2013-06-05 112 views
0

我有兩個包含重疊數據的表。一張桌子是另一桌子的90%重複。我需要確定表中10%的唯一記錄並將其移至其父表中。這兩個表格都是4億+ 300列的行。我正在嘗試的方法是添加一個標誌字段來唯一地標識我需要傳輸的記錄,但是我需要更新該字段並且正在與邏輯一起努力。以下是我迄今爲止所做的一切,它會導致永無止境的循環。兩個表中都沒有空值。在大型表上使用循環進行批量更新

Declare @counter int 
Declare @RowsEffected int 
Declare @RowsCnt int 
Declare @Err int 
SELECT @COUNTER = 1 
SELECT @RowsEffected = 0 

while (@counter > 0) 
begin 
set Rowcount 10000000 

update Table1 
set Existsflg = 1 
where exists (
Select Fields 
from Table1 
Except 
Select Fields 
from table2) 

Select @RowsCnt = @@ROWCOUNT , @Err = @@ERROR 
If @Err <> 0 
begin 
Print 'Problem Updating the records' 
end 
IF @RowsCnt = 0 
SELECT @COUNTER = 0 
ELSE 
SELECT @RowsEffected = @RowsEffected + @RowsCnt 
PRINT 'The total number of rows effected :'+convert(varchar,@RowsEffected)  
WAITFOR DELAY '00:00:10'   
END 
SET ROWCOUNT 0 
Go 

謝謝!

回答

0

這就是我一次做到的。

我沒有用行數,我用選擇TOP(N)以及「同時存在」

我的「源」 dbo.Employee表是另一臺服務器上。

GO 
USE [$(DestinationDatabaseName)] 
GO 





/* 

READ ME !!! 

Replace 
    $(SourceServer).$(SourceDatabaseName) 
With       (the Server and DatabaseName of the SOURCE data) 
    (ex:) [OtherServer].[OtherDatabase] 


*/ 




--SubFolder: SQLReplicateReplacer 
print '[uspEmployeeReplicateReplacer]' 
go 


IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[uspEmployeeReplicateReplacer]') AND type in (N'P', N'PC')) 
DROP PROCEDURE [dbo].[uspEmployeeReplicateReplacer] 
Go 


/* 


declare @numberRowsAffected int 
declare @ErrorNumber int 

exec [dbo].[uspEmployeeReplicateReplacer] @numberRowsAffected output , @ErrorNumber output 

print @numberRowsAffected 
print @ErrorNumber 
print '' 

*/ 



CREATE PROCEDURE [dbo].[uspEmployeeReplicateReplacer] (
@numberRowsAffected int output --return 
, 
@ErrorNumber int output 
) 

AS 


SET NOCOUNT ON 


select @ErrorNumber = 0 


declare @ErrorTracker int 
declare @insertRowCount int 
declare @updateRowCount int 
select @insertRowCount = 0 
select @updateRowCount = 0 





IF OBJECT_ID('tempdb..#Employeeupdate') IS NOT NULL 
begin 
     drop table #Employeeupdate 
end 






CREATE TABLE #Employeeupdate ( 
EmployeeKeyID int IDENTITY (1,1), 

EmployeeUUID uniqueidentifier, 
EmployeeLabel varchar(64), 
EmployeeDescription varchar(128) 


) 



declare @ManualReplicationRowCount int 
/* I put this value in a stored procedure, so I could change it in one place */ 
/* EXEC dbo.uspInternalSettingGetManualReplicationRowCount @ManualReplicationRowCount output */ 
Select @ManualReplicationRowCount = 1000 


declare @MaximumLoopCounter int 


select @MaximumLoopCounter = 10000 



while (@MaximumLoopCounter > 0) and exists 
(


Select 

    TOP 1 null 


from [$(SourceServer)].[$(SourceDatabaseName)].dbo.Employee vart with (nolock) 


    where not exists 
    (
     select null from dbo.Employee with (nolock) -- destinationTable 
     Where 

      /* 
      destinationTable.SOMEUNIQUECOLUMN1 = sourceTable.SOMEUNIQUECOLUMN1 
      and 
      destinationTable.SOMEUNIQUECOLUMN2 = sourceTable.SOMEUNIQUECOLUMN2 
      */ 

dbo.Employee.EmployeeUUID = vart.EmployeeUUID 


    ) 

) 



BEGIN 


    select @MaximumLoopCounter = @MaximumLoopCounter - 1 




DELETE FROM #Employeeupdate 




Insert into #Employeeupdate 
( 
     EmployeeUUID, 
     EmployeeLabel, 
     EmployeeDescription 

) 

Select 

    TOP (@ManualReplicationRowCount) 

     EmployeeUUID, 
     EmployeeLabel, 
     EmployeeDescription 


    from [$(SourceServer)].[$(SourceDatabaseName)].dbo.Employee vart with (nolock) 

    where not exists 
    (
     select null from dbo.Employee with (nolock) -- destinationTable 
     Where 

      /* 
      destinationTable.SOMEUNIQUECOLUMN1 = sourceTable.SOMEUNIQUECOLUMN1 
      and 
      destinationTable.SOMEUNIQUECOLUMN2 = sourceTable.SOMEUNIQUECOLUMN2 
      */ 

dbo.Employee.EmployeeUUID = vart.EmployeeUUID 


    ) 








SET NOCOUNT OFF 
Insert into dbo.Employee 
( 
     EmployeeUUID, 
     EmployeeLabel, 
     EmployeeDescription 

) 

Select 

     EmployeeUUID, 
     EmployeeLabel, 
     EmployeeDescription 


from 
    #Employeeupdate 


SELECT @insertRowCount = @@ROWCOUNT , @ErrorTracker = @@ERROR 

if @ErrorTracker <> 0 
    BEGIN 

     select @ErrorNumber = @ErrorTracker 
     select @MaximumLoopCounter = 0 --Bail Out !!! 
    END 



SET NOCOUNT ON 




END --End While Loop 














/* 

SET NOCOUNT OFF 

Update dbo.Employee 
Set 

    --EmployeeUUID = vart.EmployeeUUID, 
EmployeeLabel = vart.EmployeeLabel, 
EmployeeDescription = vart.EmployeeDescription 


From 

    dbo.Employee with (nolock) , [$(SourceServer)].[$(SourceDatabaseName)].dbo.Employee vart with (nolock) 
Where 
     --Relationship 
dbo.Employee.EmployeeUUID = vart.EmployeeUUID 




SELECT @updateRowCount = @@ROWCOUNT 

SET NOCOUNT ON 


*/ 




SELECT @numberRowsAffected = @insertRowCount + @updateRowCount 


print '/#Employeeupdate COUNT/' 
print @numberRowsAffected 
print '-------------------------' 





IF OBJECT_ID('tempdb..#Employeeupdate') IS NOT NULL 
begin 
     drop table #Employeeupdate 
end 







SET NOCOUNT OFF 


GO 

GRANT EXECUTE ON dbo.uspEmployeeReplicateReplacer TO $(DBUSERNAME) 

GO 
0

我建議你在一個時間,因爲你有大量的數據來更新爲此在1M-5M的批次。

我想在這種情況下,做的是:

一)添加新列命名加工,將針對被處理

b)選擇1M行到臨時表中的所有行被更新(位)(這可能是不需要的,但它會使事情有點 清潔劑)

C)插入所有非重複記錄到其他表

d)更新行並將其標記爲處理