2011-07-05 184 views
2

我想寫一個查詢,從下表(valid_columns)刪除重複的記錄,並保留只有儘可能最低的[訂單]號碼的記錄。SQL刪除子查詢,刪除重複記錄

例如,在下表中,我想刪除重複的行,區域2,3和作業3,並保留記錄的最低可能[順序]。

E.g.輸入表,valid_columns看起來是這樣的:

name col_order 
------------- 
job  1 
job  3 
status 2 
cust 2 
county 1 
state 1 
region 1 
region 2 
region 3 
so  4 

所需的輸出:

name col_order 
------------- 
job  1  
status 2 
cust 2 
county 1 
state 1 
region 1 
so  4 

我試圖修復一個錯誤,我想不通的SQL。目前它使用刪除語句和子查詢。目前使用的查詢是這樣的:

- 3)刪除重複列

DELETE 
FROM valid_columns 
WHERE NOT (col_order = (SELECT TOP 1 col_order 
      FROM valid_columns firstValid 
      WHERE name = firstValid.name 
      AND col_order = firstValid.col_order 
      ORDER BY col_order ASC)) 

但是,這僅返回以下,這是不正確的:

name col_order 
------------- 
job  1 
county 1 
state 1 
region 1 

非常感謝

回答

1
DELETE FROM t1 
FROM valid_columns t1 
WHERE col_order > 
    (SELECT MIN(col_order) from valid_columns t2 WHERE t1.name = t2.name) 

編輯: 可以簡化爲這樣:

DELETE FROM valid_columns 
    WHERE col_order > 
     (SELECT MIN(col_order) from valid_columns t2 WHERE valid_columns.name = t2.name) 

的DELETE語句可以使用FROM子句刪除基於第二個表中相關記錄值的記錄。在這種情況下,FROM不是真正必需的(我有時使用FROM來別名表名,因爲我不喜歡額外的輸入。)

DELETE FROM TableA 
FROM TableA 
JOIN TableB On TableA.CriteriaA = TableB.CriteriaA 

你也可以嘗試這個例子(可能會更快,如果你必須這樣做了許多):

DELETE FROM valid_columns 
WHERE EXISTS 
    (SELECT * FROM valid_columns t1 
    WHERE t1.name = valid_columns.name AND valid_columns.col_order > t1.col_order); 
+0

在where子句中使用相關的子查詢時,我有點困惑,你能簡單地展開你的答案。它看起來會起作用。 – bobbo

0

試試這個(你可以使用select來替換delete,以確保在刪除之前得到正確的結果)。

DELETE FROM [valid_columns] t1 
WHERE col_order > (SELECT MIN(col_order) from [valid_columns] t2 
     WHERE t1.name = t2.name) 
1
-- Test table 
declare @T table(Name varchar(10), col_order int) 

-- Sample data 
insert into @T 
select 'job',  1 union all 
select 'job',  3 union all 
select 'status', 2 union all 
select 'cust', 2 union all 
select 'county', 1 union all 
select 'state', 1 union all 
select 'region', 1 union all 
select 'region', 2 union all 
select 'region', 3 union all 
select 'so',  4 

-- Delete using CTE and row_number() 
;with cte as 
(
    select row_number() over(partition by Name order by col_order) as rn 
    from @T 
) 
delete from cte 
where rn > 1 

-- Result 
select * 
from @T 

或用一個子查詢,而不是CTE

delete vc 
from (select row_number() over(partition by Name order by col_order) as rn 
     from valid_columns) as vc 
where vc.rn > 1  
0

這應該做你需要的東西:

DELETE FROM valid_columns a 
WHERE (SELECT MAX(col_order) 
    FROM valid_columns b 
    WHERE a.name = b.name) > a.col_order; 

我建議先進行數據的備份之前測試儘管如此。

0

或者您可以使用遊標遍歷表並在臨時表中插入遇到的第一個值(確保臨時表具有爲名稱列指定的唯一約束)。

編輯:我已經包括了方便的代碼片段...

declare @Ti table(name varchar(10), col_order int); 
declare @Tf table(name varchar(10) unique not null, col_order int not null); 

declare @name varchar(10); 
declare @col_order int; 

-- Sample data 
insert into @Ti 
select 'job',  1 union all 
select 'job',  3 union all 
select 'status', 2 union all 
select 'cust', 2 union all 
select 'county', 1 union all 
select 'state', 1 union all 
select 'region', 1 union all 
select 'region', 2 union all 
select 'region', 3 union all 
select 'so',  4 

select * from @Ti 

declare i cursor for 
    select * from @Ti; 

open i; 
fetch next from i into @name, @col_order; 

while @@FETCH_STATUS = 0 
begin 
    if not exists(select * from @Tf where name = @name) 
    begin 
     insert into @Tf(name, col_order) 
      select @name, @col_order; 
    end 

    fetch next from i into @name, @col_order; 
end 

close i; 
deallocate i; 

select * from @Tf; 
0

刪除記錄與二進制校驗(這是工作在任何SQL Server版本)

 

CREATE TABLE #t1(ID INT NULL, VALUE VARCHAR(2)) 
INSERT INTO #t1(ID, VALUE) VALUES (1,'aa') 
INSERT INTO #t1(ID, VALUE) VALUES (2,'bb') 
INSERT INTO #t1(ID, VALUE) VALUES (1,'aa') 
INSERT INTO #t1(ID, VALUE) VALUES (1,'aa') 
INSERT INTO #t1(ID, VALUE) VALUES (3,'cc') 
INSERT INTO #t1(ID, VALUE) VALUES (3,'cc') 
GO 

-- BINARY_CHECKSUM(): are columns that we want to compare duplicates for 
-- if you want to compare the full row then change BINARY_CHECKSUM() -> BINARY_CHECKSUM(*) 

-- for SQL Server 2000+ a loop 
-- save checksums and rowcounts for duplicates 

SELECT BINARY_CHECKSUM(ID, VALUE) AS ChkSum, COUNT(*) AS Cnt 
INTO #t2 
FROM #t1 
GROUP BY BINARY_CHECKSUM(ID, VALUE) HAVING COUNT(*)>1 

DECLARE @ChkSum BIGINT, @rc INT 

-- get the first checksum and set the rowcount to the count - 1 
-- because we want to leave one duplicate 

SELECT TOP 1 @ChkSum = ChkSum, @rc = Cnt-1 FROM #t2 

WHILE EXISTS (SELECT * FROM #t2) 
BEGIN  
    -- rowcount is one less than the duplicate rows count 
    SET ROWCOUNT @rc 
    DELETE FROM #t1 WHERE BINARY_CHECKSUM(ID, VALUE) = @ChkSum 
    -- remove the processed duplicate from the checksum table 
    DELETE #t2 WHERE ChkSum = @ChkSum 
    -- select the next duplicate rows to delete 
    SELECT TOP 1 @ChkSum = ChkSum, @rc = Cnt-1 FROM #t2  
END 
SET ROWCOUNT 0 
GO 

SELECT * FROM #t1 

-- for SQL Server 2005+ a cool CTE 
;WITH Numbered 
AS 
(
    SELECT ROW_NUMBER() OVER (PARTITION BY ChkSum ORDER BY ChkSum) AS RN, * 
    FROM (
      SELECT BINARY_CHECKSUM(ID, VALUE) AS ChkSum 
      FROM #t1 
     ) t 
) 
DELETE FROM Numbered WHERE RN > 1; 
GO 

SELECT * FROM #t1 

DROP TABLE #t1; 
DROP TABLE #t2;