2013-02-12 17 views
4

我創建了以下視圖 「user_details_merged」:消除NULL字段從合併連接的ResultSet

SELECT DISTINCT 
coalesce(own.user_name, join_user_name.user_name) AS user_name, 
coalesce(own.email, join_mail.email) AS email, 
coalesce(own.first_name, join_name.first_name) AS first_name, 
coalesce(own.last_name, join_name.last_name) AS last_name 
FROM 
user_details AS own 

LEFT JOIN user_details AS join_user_name ON 
    own.user_name IS NULL AND (
    (join_user_name.email = own.email AND own.email IS NOT NULL) 
    OR (join_user_name.first_name = own.first_name AND join_user_name.last_name = own.last_name 
    AND own.first_name IS NOT NULL AND own.last_name IS NOT NULL)) 


LEFT JOIN user_details AS join_mail ON 
    own.email IS NULL AND (
    (join_mail.user_name = own.user_name AND own.user_name IS NOT NULL) 
    OR (join_mail.first_name = own.first_name AND join_mail.last_name = own.last_name 
    AND own.first_name IS NOT NULL AND own.last_name IS NOT NULL)) 

LEFT JOIN user_details AS join_name ON 
    own.first_name IS NULL AND own.last_name IS NULL AND (
    (join_name.email = own.email AND own.email IS NOT NULL) 
    OR (join_name.user_name = own.user_name AND own.user_name IS NOT NULL)) 

ORDER BY user_name ASC,email ASC, first_name ASC, last_name ASC 

這從合併我的專欄:

user_name | email | first_name | last_name 
a    b  NULL   NULL 
NULL   b  c   d 
a   NULL  e   f 
NULL   x  y   z 

user_name | email | first_name | last_name 
a    b  NULL   NULL 
NULL   b  c   d 
a   NULL  e   f 
NULL   x  y   z 

a    b  c   d 
a    b  e   f 

我要的是:

user_name | email | first_name | last_name 
NULL   x  y   z 
a    b  c   d 
a    b  e   f 

沒有包含ROWS的NULL時,如果存在具有更多信息的相同數據的行,但仍然保留/ NULL x y z /當沒有其他行具有更多信息時。

這裏這第二個觀點不正是我需要的:

SELECT DISTINCT a.user_name,a.email,a.first_name,a.last_name FROM 
user_details_merged a 
LEFT JOIN user_details_merged b 
ON 
(
    (
    a.user_name IS NOT NULL OR 
    NOT EXISTS (SELECT user_name FROM user_details_merged b WHERE b.user_name IS NOT NULL AND 
     b.email=ISNULL(a.email,b.email) AND 
     b.first_name=isnull(a.first_name,b.first_name) AND 
     b.last_name=isnull(a.last_name,b.last_name)) 
    ) 

    AND 

    (
    a.email IS NOT NULL OR 
    NOT EXISTS (SELECT email FROM user_details_merged b WHERE b.email IS NOT NULL AND 
     b.user_name=ISNULL(a.user_name,b.user_name) AND 
     b.first_name=isnull(a.first_name,b.first_name) AND 
     b.last_name=isnull(a.last_name,b.last_name)) 
    ) 

    AND 

    (
    (a.first_name IS NOT NULL AND a.last_name IS NOT NULL) OR 
    NOT EXISTS (SELECT email FROM user_details_merged b WHERE b.email IS NOT NULL AND 
     b.user_name=ISNULL(a.user_name,b.user_name) AND 
     b.email=ISNULL(a.email,b.email)) 
     -- AND b.first_name=isnull(a.first_name,b.first_name) AND b.last_name=isnull(a.last_name,b.last_name)) 
    ) 

    AND NOT (a.first_name = b.first_name AND a.last_name = b.last_name AND a.email = b.email AND a.user_name = b.user_name) 

) 

WHERE coalesce(b.user_name,b.email,b.first_name,b.last_name) IS NOT NULL 

的主要問題是,user_details查看,這裏的數據來從由不同表的許多工會。一些只包含用戶名&電子郵件和一些只有電子郵件和名字/姓氏等這就是爲什麼沒有唯一的關鍵,我不能索引視圖,因爲工會。這使得不可能在一小時內執行最後的視圖。 我目前的解決方法是一個過程,它將user_details_merged視圖的數據存儲在臨時表中,並讓上面的第二個視圖使用該表中的數據。這樣我可以將執行時間減少到8000行的7秒。

其他建議?

非常感謝你;)

+0

對不起,我不明白你的問題。什麼是GROUP BY MAX專欄?你想達到什麼目的? – Melanie 2013-02-12 17:20:14

+0

嗨。您可以通過SELECT MAX(user_name),email,MAX(first_name),MAX(last_name).. GROUP BY電子郵件消除包含NULL的行。但是,這將包含user_name = NULL的所有行或消除一個user_name的不同結果 – 5andr0 2013-02-12 17:22:51

+0

它看起來像你試圖重複信息。最大的問題是弄清楚你的ID是什麼。從我所看到和看到的,電子郵件是共同點。有沒有理由不把它當作鑰匙使用? – Heidi 2013-02-13 06:35:06

回答

1

哎唷!這是一個混亂的數據模型。最好的解決方案是修復數據模型以防止需要這樣複雜的查詢。然而,對於應用程序依賴關係來說,這往往更加複雜,所以我會假設已經被娛樂了。

  • 我用4行樣本來提出一種替代解決方案。
  • 然後,我添加了只有First和Last名稱值的行,這些行在上面的查詢中公開了一個錯過的方案。
  • 我還將相同的6行復制到12K行以上,因爲數據模型似乎支持這種情況。這最終導致上面的查詢運行了2個多小時,然後我終於放棄並停止了它。
  • 我按照我的解決方案運行了12K行,並在不到一秒的時間內返回了預期結果。

因此,沒有進一步的告別:

-- ================================================================================= 
-- BEGIN: SETUP TEST DATA 
-- ================================================================================= 
SET NOCOUNT ON 

IF OBJECT_ID('user_details', 'U') IS NOT NULL DROP TABLE user_details; 
GO 

CREATE TABLE dbo.user_details (
    user_name char(1) NULL, 
    email  char(1) NULL, 
    first_name char(1) NULL, 
    last_name char(1) NULL 
) 
GO 

INSERT dbo.user_details 
SELECT * 
    FROM (
     SELECT * FROM dbo.user_details WHERE 1=2 
     UNION ALL SELECT 'a', 'b', NULL, NULL 
     UNION ALL SELECT NULL, 'b', 'c', 'd' 
     UNION ALL SELECT 'a', NULL, 'e', 'f' 
     UNION ALL SELECT NULL, 'x', 'y', 'z' 
     UNION ALL SELECT NULL, NULL, 'y', 'z' 
     UNION ALL SELECT NULL, NULL, 'a', 'z' 
     ) A 
GO 

--/* 
-- TURN 6 ROWS INTO OVER 12K ROWS TO TEST PERFORMANCE 
DECLARE @count int; SELECT @count = 0 
WHILE @count < 11 
    BEGIN 
    INSERT user_details 
    SELECT * 
     FROM user_details 

    SELECT @count = @count + 1 
END 
--*/ 
-- ================================================================================= 
-- END: SETUP TEST DATA 
-- ================================================================================= 


-- ================================================================================= 
-- BEGIN: NEW SOLUTION FINAL: <1sec on 12288 rows 
-- ================================================================================= 
IF OBJECT_ID('tempdb..#useremail', 'U') IS NOT NULL DROP TABLE #useremail; 
IF OBJECT_ID('tempdb..#email', 'U') IS NOT NULL DROP TABLE #email; 
IF OBJECT_ID('tempdb..#user', 'U') IS NOT NULL DROP TABLE #user; 
IF OBJECT_ID('tempdb..#name', 'U') IS NOT NULL DROP TABLE #name; 


-- GET YOUR UNIQUE user_name AND email KEY 
SELECT DISTINCT A.user_name, A.email 
    INTO #useremail 
    FROM user_details A 


-- GET YOUR UNIQUE email VALUES 
SELECT DISTINCT A.email, A.first_name, A.last_Name 
    INTO #email 
    FROM user_details A 
WHERE A.email IS NOT NULL 


-- GET YOUR UNIQUE user_name VALUES 
SELECT DISTINCT A.user_name, A.first_name, A.last_Name 
    INTO #user 
    FROM user_details A 
WHERE A.user_name IS NOT NULL 


-- GET YOUR UNIQUE first_name AND last_Name VALUES NOT PART OF THE KEY 
SELECT DISTINCT A.first_name, A.last_Name 
    INTO #name 
    FROM user_details A 
WHERE A.first_name IS NOT NULL 
    AND A.last_Name IS NOT NULL 
    AND A.user_name IS NULL 
    AND A.email IS NULL 


-- CLEAN UP YOUR UNIQUE user_name AND email KEY 
DELETE A 
-- SELECT * 
    FROM #useremail A 
    JOIN (
     SELECT * 
      FROM #useremail 
     WHERE user_name IS NOT NULL 
      AND email IS NOT NULL 
     ) B 
    ON (A.user_name = B.user_name AND A.email  IS NULL) 
    OR (A.email  = B.email  AND A.user_name IS NULL) 


-- CLEAN UP YOUR UNIQUE email VALUES 
DELETE A 
-- SELECT * 
    FROM #email A 
    JOIN (
     SELECT * 
      FROM #email 
     WHERE first_name IS NOT NULL 
      AND last_Name IS NOT NULL 
     ) B 
    ON A.email = B.email 
    AND A.first_name IS NULL 
    AND A.last_name IS NULL 


-- CLEAN UP YOUR UNIQUE user_name VALUES 
DELETE A 
-- SELECT * 
    FROM #user A 
    JOIN (
     SELECT * 
      FROM #user 
     WHERE first_name IS NOT NULL 
      AND last_Name IS NOT NULL 
     ) B 
    ON A.user_name = B.user_name 
    AND A.first_name IS NULL 
    AND A.last_name IS NULL 


-- CLEAN UP YOUR UNIQUE #name VALUES 
DELETE A 
-- SELECT * 
    FROM #name A 
    JOIN #user B 
    ON A.first_name = B.first_name 
    AND A.last_name = B.last_name 

DELETE A 
-- SELECT * 
    FROM #name A 
    JOIN #email B 
    ON A.first_name = B.first_name 
    AND A.last_name = B.last_name 


-- GET YOUR DATA 
SELECT A.user_name 
     ,A.email 
     ,U.first_name 
     ,U.last_name 
     --,* 
    FROM #useremail A 
    JOIN #user U 
    ON A.user_name = U.user_name 
UNION 
SELECT A.user_name 
     ,A.email 
     ,E.first_name 
     ,E.last_name 
     --,* 
    FROM #useremail A 
    JOIN #email E 
    ON A.email = E.email 
UNION 
SELECT NULL as [user_name] 
     ,NULL as [email] 
     ,N.first_name 
     ,N.last_name 
     --,* 
    FROM #name N 
-- ================================================================================= 
-- END: NEW SOLUTION FINAL 
-- ================================================================================= 
+0

+1做ddl和數據,你聽說過sqlfiddle.com嗎? – jmoreno 2013-03-12 05:42:54

+0

感謝提及sqlfiddle.com。它看起來像一個非常有用的小工具。 – 2013-03-13 11:34:05

0

- 如果我得到的是正確的這個時候,你可以用以下解決它:

SELECT ISNULL(A.USER_NAME, B.USER_NAME), A.EMAIL, A.FIRST_NAME, A.LAST_NAME FROM 
user_details A CROSS JOIN user_details B 
WHERE A.EMAIL IS NOT NULL 
AND A.FIRST_NAME IS NOT NULL 
AND A.LAST_NAME IS NOT NULL 
GROUP BY ISNULL(A.USER_NAME, B.USER_NAME), A.EMAIL, A.FIRST_NAME, A.LAST_NAME 
+0

抱歉,但它不這樣工作。即使存在「a b c d」,我仍然會得到「NULL b c d」:/我必須檢查每列 – 5andr0 2013-02-12 19:13:38

0

嘗試使用外連接對user_details。

+0

不起作用。已經嘗試過 – 5andr0 2013-02-12 19:08:07