2013-03-15 25 views
2

我正在構建一種「匹配」配置文件的應用程序。 例如,這裏是我的架構::將幾個鍵值對與SQL Server匹配/分組

用戶 編號
姓的簡化版本
名字

用戶配置 標識
用戶ID
SomeOtherFields

UserProfileFields 標識
UserProfileId
重點
價值

用戶配置的存在是爲了保存一些標準信息(生日等日期等)

UserProfileFields基本上是鍵和它們的值的列表,以建立一個字典,看起來有點像這樣(再次,簡化爲這個問題的目的)

UserProfileID | Key  | Value  
123   | food  | Pizza 
123   | food  | Indian 
4453   | drink  | Coke 
44850   | drink  | Orange Juice 
88493   | food  | Pizza 
448382  | food  | Chinese 

所以,從一個波夫,我們可以看到,曲線123匹配到88493對食品 - 它們都具有食品|比薩餅

有沒有辦法有效地查詢這個表來獲得「匹配」的列表

我會設想這個要每天一次運行,並存儲在一個單獨的表中的結果

例如:

相配

MatchID | ProfileID 
1  | 123 
1  | 88493 

我猜沿查詢的

SELECT * FROM UserProfileFields 
GROUP BY Key 

型線的東西......但不知道如何有效的,這將是在說,一個百萬行?

回答

1
WITH Matches 
AS 
(
    SELECT a.UserProfileID, 
      a.[Key], 
      a.Value, 
      DENSE_RANK() OVER(ORDER BY a.[Key]) MatchID 
    FROM UserProfileFields a 
      INNER JOIN 
      (
       SELECT [Key], Value 
       FROM UserProfileFields 
       GROUP BY [Key], Value 
       HAVING COUNT(DISTINCT UserProfileID) > 1 
      ) b ON a.[Key] = b.[Key] AND 
        a.Value = b.Value 
) 
SELECT MatchID, UserProfileID 
FROM Matches 
2

這應該照顧它。

-- ============================================================================ 
-- BEGIN: SETUP TEST DATA 
-- ============================================================================ 
CREATE TABLE UserProfileFields (
    UserProfileID int 
    ,[Key]   varchar(5) 
    ,Value   varchar(12) 
); 


INSERT UserProfileFields (UserProfileID, [Key], Value) 
SELECT A.* 
    FROM (
     SELECT * FROM UserProfileFields WHERE 1=2 
     UNION ALL SELECT 123,  'food',  'Pizza' 
     UNION ALL SELECT 123,  'food',  'Indian' 
     UNION ALL SELECT 4453,  'drink', 'Coke' 
     UNION ALL SELECT 44850,  'drink', 'Orange Juice' 
     UNION ALL SELECT 88493,  'food',  'Pizza' 
     UNION ALL SELECT 448382, 'food',  'Chinese' 
     UNION ALL SELECT 88493,  'drink', 'Coke' 
     UNION ALL SELECT 88493,  'drink', 'Orange Juice' 
     ) A; 

--/* 
-- Turn 8 records into 1,048,576 
DECLARE @Count int; SELECT @Count = 0; 
WHILE @Count < 17 
    BEGIN 
    INSERT UserProfileFields 
    SELECT * FROM UserProfileFields 

    SELECT @Count = (@Count + 1) 
END 
--*/ 
-- SELECT COUNT(*) FROM UserProfileFields WITH (NOLOCK) 
-- ============================================================================ 
-- END: SETUP TEST DATA 
-- ============================================================================ 




-- ============================================================================ 
-- BEGIN: Solution if Key, Value, and UserProfileID do NOT make up a unique key 
-- ============================================================================ 
SET NOCOUNT ON 
IF OBJECT_ID('tempdb..#DistinctValues', 'U') IS NOT NULL DROP TABLE #DistinctValues; 
IF OBJECT_ID('tempdb..#Matches', 'U') IS NOT NULL DROP TABLE #Matches; 

SELECT [Key], UserProfileID, Value 
    INTO #DistinctValues 
    FROM UserProfileFields WITH (NOLOCK) 
GROUP BY [Key], UserProfileID, Value; 

SELECT A.[Key], A.Value, A.UserProfileID 
    INTO #Matches 
    FROM #DistinctValues A 
    JOIN #DistinctValues B 
    ON A.[Key]   = B.[Key] 
    AND A.Value   = B.Value 
    AND A.UserProfileID <> B.UserProfileID; 

SELECT DENSE_RANK() OVER(ORDER BY A.[Key], A.Value) [MatchID] 
     ,A.UserProfileID 
     ,A.[Key] 
     ,A.Value 
    FROM #Matches A; 
-- ============================================================================ 
-- END: Solution if Key, Value, and UserProfileID do NOT make up a unique key 
-- ============================================================================ 




-- ============================================================================ 
-- BEGIN: Solution if Key, Value, and UserProfileID make up a unique key 
-- ============================================================================ 
IF OBJECT_ID('tempdb..#Matches', 'U') IS NOT NULL DROP TABLE #Matches; 

SELECT A.[Key], A.Value, A.UserProfileID 
    INTO #Matches 
    FROM UserProfileFields A WITH (NOLOCK) 
    JOIN UserProfileFields B WITH (NOLOCK) 
    ON A.[Key]   = B.[Key] 
    AND A.Value   = B.Value 
    AND A.UserProfileID <> B.UserProfileID; 

SELECT DENSE_RANK() OVER(ORDER BY A.[Key], A.Value) [MatchID] 
     ,A.UserProfileID 
     ,A.[Key] 
     ,A.Value 
    FROM #Matches A; 
-- ============================================================================ 
-- END: Solution if Key, Value, and UserProfileID make up a unique key 
-- ============================================================================ 
0

使用帶有EXISTS()運算符和覆蓋索引的選項。這將有助於避免過多的數據排序。

CREATE INDEX ix_Key_Value_UserProfileFields ON dbo.UserProfileFields([Key], Value) INCLUDE(UserProfileID) 

SELECT DENSE_RANK() OVER(ORDER BY t.[Key], t.Value) AS MatchID, t.UserProfileID 
FROM dbo.UserProfileFields t 
WHERE EXISTS (
       SELECT 1 
       FROM dbo.UserProfileFields t2 
       WHERE t.[Key] = t2.[Key] 
       AND t.Value = t2.Value 
       HAVING COUNT(*) > 1 
      ) 

enter image description here

演示上SQLFiddle