SQL插入，但避免重複

我想做一些快速插入，但避免重複到表中。爲了論證的緣故，我們稱之爲MarketPrices，我一直在嘗試兩種做法，但不知道如何進行基準測試，速度會更快。SQL插入，但避免重複

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
SELECT @SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen 
EXCEPT 
SELECT SecurityCode, BuyPrice, SellPrice, j.bool as IsActive FROM MarketPrices 
CROSS JOIN (SELECT 0 as bool UNION SELECT 1 as bool) as j

DECLARE @MktId int 
SET @MktId = (SELECT SecurityId FROM MarketPrices 
       where SecurityCode = @SecurityCode 
       and [email protected] 
       and SellPrice = @SellPrice) 

IF (@MktId is NULL) 
BEGIN 
    INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
    VALUES 
    (@SecurityCode,@BuyPrice, @SellPrice, @IsMarketOpen) 
END

假設@whatever是在存儲過程中的輸入參數。

我希望能夠在BuyPrice或SellPrice或兩者都不同於以前的每一次發生時爲每個SecurityCode插入新記錄。我不關心IsMarketOpen。

有沒有什麼明顯愚蠢的關於上述任何一種方法？一個比另一個快嗎？

來源

2009-11-06 Ravi

記住，第二方法應該被包含在一個交易中，其他明智的你可能會遇到併發問題。 – 2009-11-06 16:37:57

難道你不能只是創建一個唯一的索引？我沒有在MS SQL的經驗，但我認爲應該有這樣的傾向 – 2009-11-06 16:41:19

@valya：有趣的人們如何懷疑SQL Server甚至可以做到最簡單的事情。我甚至不確定是否可以在沒有*支持唯一索引的情況下實現關係數據庫引擎*。 – Tomalak 2009-11-06 16:50:31

EDIT：防止race conditions併發環境中，在相關子查詢使用WITH (UPDLOCK)或EXCEPT倒是SELECT。我在下面寫的測試腳本不需要它，因爲它使用只對當前連接可見的臨時表，但是在真實環境中，對用戶表進行操作時，這是非常必要的。

MERGE不需要UPDLOCK。

通過MCL的回答再次啓發：唯一索引&讓數據庫拋出一個錯誤，我決定把基準conditional inserts與try/catch。

結果似乎支持了try/catch語句條件插入，但情況因人而異。這是一個非常簡單的場景（一列，小桌子等），一臺機器上執行，等等

下面是結果（SQL Server 2008中，構建10.0.1600.2）：

duplicates (short table)  
    try/catch:    14440 milliseconds/100000 inserts 
    conditional insert:  2983 milliseconds/100000 inserts 
    except:     2966 milliseconds/100000 inserts 
    merge:      2983 milliseconds/100000 inserts 

uniques 
    try/catch:     3920 milliseconds/100000 inserts 
    conditional insert:  3860 milliseconds/100000 inserts 
    except:     3873 milliseconds/100000 inserts 
    merge:      3890 milliseconds/100000 inserts 

    straight insert:   3173 milliseconds/100000 inserts 

duplicates (tall table) 
    try/catch:    14436 milliseconds/100000 inserts 
    conditional insert:  3063 milliseconds/100000 inserts 
    except:     3063 milliseconds/100000 inserts 
    merge:      3030 milliseconds/100000 inserts

通知，即使在獨特的插入上，也有略微比嘗試/ catch更多的開銷比條件插入。我想知道這是否因版本，CPU，內核數量等而異。

我沒有基準IF條件插入，只是WHERE。我認爲IF變種會顯示更多的開銷，因爲a）你會有兩個語句，b）你需要將兩個語句包裝在一個事務中，並將隔離級別設置爲可序列化（！）。如果有人想要來測試這個，你需要將臨時表更改爲常規用戶表（可序列化不適用於本地臨時表）。

下面是腳本：

-- tested on SQL 2008. 
-- to run on SQL 2005, comment out the statements using MERGE 
set nocount on 

if object_id('tempdb..#temp') is not null drop table #temp 
create table #temp (col1 int primary key) 
go 

------------------------------------------------------- 

-- duplicate insert test against a table w/ 1 record 

------------------------------------------------------- 

insert #temp values (1) 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x 
    end try 
    begin catch end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), conditional insert: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x except select col1 from #temp 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), except: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

-- comment this batch out for SQL 2005 
declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1); 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), merge: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

------------------------------------------------------- 

-- unique insert test against an initially empty table 

------------------------------------------------------- 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, straight insert: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    begin try 
    insert #temp select @x 
    end try 
    begin catch end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, try/catch: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, conditional insert: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x except select col1 from #temp 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, except: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

-- comment this batch out for SQL 2005 
truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 1, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1); 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, merge: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

------------------------------------------------------- 

-- duplicate insert test against a table w/ 100000 records 

------------------------------------------------------- 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x 
    end try 
    begin catch end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), conditional insert: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x except select col1 from #temp 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), except: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

-- comment this batch out for SQL 2005 
declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1); 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), merge: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go

來源

2009-11-06 17:31:51

在這裏使用唯一索引的主要原因是爲了保證數據的完整性。我懷疑try/catch塊中的失敗插入不會成爲大多數應用程序的瓶頸，尤其是在沒有大量嘗試插入重複的情況下（因爲您的基準測試顯示類似的性能案件）。但是我懷疑擁有一個沒有強制的數據模型會在某個時候出現問題。另外，在SQL Server 2008上，我會建議探索MERGE在其他策略中的使用。 – mlibby 2009-11-06 17:47:42

@mcl re：獨特的索引，我完全同意，他應該有一個數據完整性的索引，如果他想要合理的性能，他將需要一個索引。回覆：MERGE，我只是測試了它，它在所有場景中執行*非常類似於條件插入。 – 2009-11-06 17:51:51

謝謝你們，我希望我能接受你們的答案。爲了數據完整性，我將在其上放置一個唯一的索引，然後使用條件插入，因爲它在性能和可讀性方面似乎是最好的。 – Ravi 2009-11-06 23:34:48

EDIT：爲防止race conditions在併發環境中，請在相關子查詢中使用WITH (UPDLOCK)。

我認爲這將是標準的方法：

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
SELECT @SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen 
WHERE NOT EXISTS (
    SELECT * FROM MarketPrices WITH (UPDLOCK) 
    WHERE SecurityCode = @SecurityCode 
    AND BuyPrice = @BuyPrice 
    AND SellPrice = @SellPrice 
)

如果您的字段是空的，你必須將它添加到條件。

你的第一種方法很有趣，但對於EXCEPT的要求你有沒有經歷過。這個方法本質上是一樣的，但它會讓你圍繞着列匹配問題。

或者：

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
SELECT SecurityCode, BuyPrice, SellPrice, @IsMarketOpen 
FROM (
    SELECT @SecurityCode, @BuyPrice, @SellPrice 
    EXCEPT 
    SELECT SecurityCode, BuyPrice, SellPrice FROM MarketPrices WITH (UPDLOCK) 
) a (SecurityCode, BuyPrice, SellPrice)

有關除了在這種情況下做的好處是，它處理空值，而無需您任何額外的編碼。爲了在第一個例子中達到同樣的效果，你需要長時間測試每一對NULL和平等。

你的第二種方法是可以的，但你不需要變量。看到Tomalak的解決方案，他很好地清理了它。此外，如果這是一個問題，您需要明確處理併發插入的可能性。

來源

2009-11-06 16:33:15

我會隨時爲您提供語義解決方案。你的兩個建議對我來說似乎很模糊（雖然後者比前者好）。

IF NOT EXISTS (
    SELECT 1 
    FROM MarketPrices 
    WHERE SecurityCode = @SecurityCode 
     AND BuyPrice = @BuyPrice 
     AND SellPrice = @SellPrice 
) 
BEGIN 
    INSERT MarketPrices 
    (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
    VALUES 
    (@SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen) 
END

擁有超過SecurityCode, BuyPrice, SellPrice的EXISTS查詢應該相當快的礫岩指數。

基準測試是一個計時WHILE循環的問題，我會說。測試一下，看看你自己。

來源

2009-11-06 16:33:58 Tomalak

另一種選擇：在相關字段（SecurityCode，BuyPrice，SellPrice）上創建一個唯一索引，發出一個簡單的插入，並讓數據庫確定記錄是否重複。插入嘗試插入重複時會失敗。

使用代碼（無論是外部語言還是SQL處理程序）來保證唯一性不夠嚴格，最終會導致您希望防止的重複。

來源

2009-11-06 16:50:15 mlibby

我在想你可能是對的，特別是當涉及到併發插入時 – Ravi 2009-11-06 16:57:17

我很想看到這個基準。假設一個唯一的索引，它有更多的開銷：條件插入的WHERE子句，或TRY/CATCH塊的異常處理？如果您希望99％的插入*爲*不重複，我想TRY/CATCH塊可能更有效。 – 2009-11-06 16:59:03

我打算做到這一點，當我回家 - 將在這裏發佈結果 – Ravi 2009-11-06 17:04:46

，如果你不需要重複的陷阱，你總是可以創建一個「忽略重複」設置爲true的唯一索引。 SQL Server將爲您處理此問題。

來源

2010-12-11 08:26:51 IamIC

下面我已經加入了頂級的答案從Only inserting a row if it's not already there彼得Radocchia的出色答卷。

外賣是使用race safe with try/catch技術是輕微（〜1％），比race safe with updlock, holdlock技術快時，有沒有實際的衝突（即你期望的碰撞將是非常罕見的 - 這是uniques情況），並且是當總是發生碰撞時，速度稍慢（〜20％）（這是duplicates方案）。這並沒有將鎖升級等複雜問題考慮在內。

下面是結果（SQL服務器2014年建立12.0.2000。8）：

duplicates (short table)  
    try/catch:      15546 milliseconds/100000 inserts 
    conditional insert:    1460 milliseconds/100000 inserts 
    except:       1490 milliseconds/100000 inserts 
    merge:       1420 milliseconds/100000 inserts 
    race safe with try/catch:   1650 milliseconds/100000 inserts 
    race safe with updlock, holdlock: 1330 milliseconds/100000 inserts 

uniques 
    try/catch:      2266 milliseconds/100000 inserts 
    conditional insert:    2156 milliseconds/100000 inserts 
    except:       2273 milliseconds/100000 inserts 
    merge:       2136 milliseconds/100000 inserts 
    race safe with try/catch:   2400 milliseconds/100000 inserts 
    race safe with updlock, holdlock: 2430 milliseconds/100000 inserts 

    straight insert:     1686 milliseconds/100000 inserts 

duplicates (tall table) 
    try/catch:      15826 milliseconds/100000 inserts 
    conditional insert:    1530 milliseconds/100000 inserts 
    except:       1506 milliseconds/100000 inserts 
    merge:       1443 milliseconds/100000 inserts 
    race safe with try/catch:   1636 milliseconds/100000 inserts 
    race safe with updlock, holdlock: 1426 milliseconds/100000 inserts

重複（短表）部分：

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
    end try 
    begin catch 
    if error_number() <> 2627 
     throw 
    end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), race safe with try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), race safe with updlock, holdlock: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go

不重複部分

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
    end try 
    begin catch 
    if error_number() <> 2627 
     throw 
    end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, race safe with try/catch: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, race safe with updlock, holdlock: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go

重複（高表）部分

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
    end try 
    begin catch 
    if error_number() <> 2627 
     throw 
    end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), race safe with try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), race safe with updlock, holdlock: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go

來源

2015-05-18 20:35:29

SQL插入，但避免重複

回答

相關問題