鑑於您使用的是SQL 2008,您有兩種方法可以輕鬆解決問題,而無需更改您的應用程序(如果有的話)。
第一種可能的解決方案是創建第二個表,如第一個表,但使用代理標識鍵和使用ignore_dup_key選項添加的唯一性約束,該選項將爲您完成所有重複消除重複項的工作。
這裏是您可以在SSMS運行,看看發生了什麼事的例子:
if object_id('tempdb..#test1') is not null drop table #test1;
if object_id('tempdb..#test2') is not null drop table #test2;
go
-- example heap table with duplicate record
create table #test1
(
col1 int
,col2 varchar(50)
,col3 char(3)
);
insert #test1(col1, col2, col3)
values
(250, 'Joe''s IT Consulting and Bait Shop', null)
,(120, 'Mary''s Dry Cleaning and Taxidermy', 'ACK')
,(250, 'Joe''s IT Consulting and Bait Shop', null) -- dup record
,(666, 'The Honest Politician', 'LIE')
,(100, 'My Invisible Friend', 'WHO')
;
go
-- secondary table for removing duplicates
create table #test2
(
sk int not null identity primary key
,col1 int
,col2 varchar(50)
,col3 char(3)
-- add a uniqueness constraint to filter dups
,constraint UQ_test2 unique (col1, col2, col3) with (ignore_dup_key = on)
);
go
-- insert all records from original table
-- this should generate a warning if duplicate records were ignored
insert #test2(col1, col2, col3)
select col1, col2, col3
from #test1;
go
或者,您也可以刪除就地重複的沒有第二個表,但性能可能會滿足您的需求太慢。下面是這個例子的代碼,也可以在SSMS中運行:
if object_id('tempdb..#test1') is not null drop table #test1;
go
-- example heap table with duplicate record
create table #test1
(
col1 int
,col2 varchar(50)
,col3 char(3)
);
insert #test1(col1, col2, col3)
values
(250, 'Joe''s IT Consulting and Bait Shop', null)
,(120, 'Mary''s Dry Cleaning and Taxidermy', 'ACK')
,(250, 'Joe''s IT Consulting and Bait Shop', null) -- dup record
,(666, 'The Honest Politician', 'LIE')
,(100, 'My Invisible Friend', 'WHO')
;
go
-- add temporary PK and index
alter table #test1 add sk int not null identity constraint PK_test1 primary key clustered;
create index IX_test1 on #test1(col1, col2, col3);
go
-- note: rebuilding the indexes may or may not provide a performance benefit
alter index PK_test1 on #test1 rebuild;
alter index IX_test1 on #test1 rebuild;
go
-- remove duplicates
with ranks as
(
select
sk
,ordinal = row_number() over
(
-- put all the columns composing uniqueness into the partition
partition by col1, col2, col3
order by sk
)
from #test1
)
delete
from ranks
where ordinal > 1;
go
-- remove added columns
drop index IX_test1 on #test1;
alter table #test1 drop constraint PK_test1;
alter table #test1 drop column sk;
go
另外,請勿在大容量導入(更快)之後向臨時表中添加索引 – CResults 2010-04-07 15:36:40
@CResults:是的,應該提到的是...... – gbn 2010-04-07 15:42:12
那麼這個確實有意義且易於實現。謝謝。 – kscott 2010-04-07 15:57:58