我在使用嵌套關係導入Excel數據時性能緩慢。使用嵌套關係批量導入Excel數據
我有有一到多和多到許多與主表的關係兩個主表中插入和其他四個表。
我檢查進口的四個表中的數據被現有的或新的數據插入之前兩個主表的導入文件可能具有相同的數據四個表和相同的數據不能插入多次。
這就是爲什麼表現緩慢。
我怎麼能解決這個問題?
我在使用嵌套關係導入Excel數據時性能緩慢。使用嵌套關係批量導入Excel數據
我有有一到多和多到許多與主表的關係兩個主表中插入和其他四個表。
我檢查進口的四個表中的數據被現有的或新的數據插入之前兩個主表的導入文件可能具有相同的數據四個表和相同的數據不能插入多次。
這就是爲什麼表現緩慢。
我怎麼能解決這個問題?
我不得不應對這種局面與包括記錄的百萬大容量數據。從多次浪費周的經驗:
1)不要在你的一切力量不要使用Excel。它速度很慢,並消耗大量的內存。只有50000條記錄的單張表格最終可能會消耗超過2 GB的內存來加載文件。進口然後需要30-40-50分鐘或更多的單張紙。考慮將數據轉換爲CSV並使用SqlBulkCopy
導入。它可以處理大量的記錄,從幾秒到幾分鐘而不是幾個小時。
2)沒有太多的,當談到改善這種情況下的性能Ø實體框架,你可以做。我發現最好和最快的方法是將每張表加載到數據庫中的臨時表中。然後我構建了SQL來將大容量插入到它們的結束表中。可以將中間插入的結果捕獲到輸出表中,以便您可以訪問從臨時表執行任何連接所需的鍵或插入到相關表中。你可以「偷」一些自動生成的EF SQL,但你需要微調它。
3)即使SQL討厭循環,我編碼我的sql語句運行在一個循環,插入說十萬條記錄一次它使刀片運行得更快
爲了給你一個想法,批量導入每個片狀CSV之後:根據需要
首先定義變量和存儲相關的表類型:
DECLARE @Max INT = @RecordsPerLoop
DECLARE @Min INT = 0
DECLARE @TotalRECORD INT = (
SELECT count(*)
FROM TempClassMemberRecords
)
DECLARE @Country VARCHAR(50)
SET @Country = 'USA'
-- Const variables for class member inserts
DECLARE @DefaultCommPreference VARCHAR(50) = (
SELECT TOP 1 CommPreference
FROM Actors
WHERE PKID = 0
)
,@PrimaryActorTypeId INT = (
SELECT TOP 1 PKId
FROM ActorTypes
WHERE ActorTypeName = 'PrimaryClaimant'
)
,@SecondaryActorTypeId INT = (
SELECT TOP 1 PKId
FROM ActorTypes
WHERE ActorTypeName = 'CoClaimant'
)
,@HomePhoneTypeId INT = (
SELECT TOP 1 PKId
FROM PhoneTypes
WHERE PhoneTypeName = 'Home'
)
,@WorkPhoneTypeId INT = (
SELECT TOP 1 PKId
FROM PhoneTypes
WHERE PhoneTypeName = 'Work'
)
,@PrimaryCountryId INT = IsNull((
SELECT TOP 1 PKId
FROM Countries
WHERE @Country IN (
CountryName
,CountryCode
)
), 0)
,@DefaultCountryId INT = IsNull((
SELECT TOP 1 PKId
FROM Countries
WHERE CountryCode = 'USA'
), 0)
,@SubmitTypeId INT = (
SELECT TOP 1 PKId
FROM ClaimSubmitTypes
WHERE SubmitTypeName = 'Bulk'
)
,@ClaimStatusId INT = (
SELECT TOP 1 PKId
FROM ClaimStatusTypes
WHERE StatusName = 'Active'
)
,@ModifiedBy VARCHAR(20) = @uploadUser
,@ModifiedDate DATETIME = GETDATE()
,@CaseCode VARCHAR(50) = (
SELECT TOP 1 CaseCode
FROM Cases
ORDER BY PKId DESC
) + ''
,@IndividualClaimantType INT = (
SELECT TOP 1 PKId
FROM claimanttypes
WHERE ClaimantTypeName = 'Individual'
)
,@CompanyClaimantType INT = (
SELECT TOP 1 PKId
FROM claimanttypes
WHERE ClaimantTypeName = 'Corporation'
)
,@Checked BIT = 0
,@startingPKId INT = (
SELECT max(PKId) + 1
FROM dbo.Entities WITH (NOLOCK)
);
--Record per group insert
IF (@TotalRECORD <= @RecordsPerLoop)
SET @max = @TotalRECORD
運行插入廁所號碼:
-- our main loop
WHILE (@min <= @TotalRECORD)
BEGIN
IF OBJECT_ID('tempdb..#EntityIds') IS NOT NULL
DROP TABLE #EntityIds
IF OBJECT_ID('tempdb..#RefNumRepository') IS NOT NULL
DROP TABLE #RefNumRepository
IF OBJECT_ID('tempdb..#ActorIds') IS NOT NULL
DROP TABLE #ActorIds
IF OBJECT_ID('tempdb..#SecondaryActorIds') IS NOT NULL
DROP TABLE #SecondaryActorIds
CREATE TABLE #EntityIds (
pkid INT identity(1, 1) NOT NULL
,mid INT
,eid INT
)
CREATE TABLE #ActorIds (
pkid INT identity(1, 1) NOT NULL
,mid INT
,aid INT
)
CREATE TABLE #SecondaryActorIds (
pkid INT identity(1, 1) NOT NULL
,mid INT
,aid INT
)
CREATE TABLE #RefNumRepository (
pkid INT identity(1, 1) NOT NULL
,RefNum VARCHAR(50)
)
BEGIN TRANSACTION
BEGIN TRY
UPDATE TOP (@RecordsPerLoop + 1) RefNumRepository
SET IsUsed = 1
OUTPUT deleted.RefNum
INTO #RefNumRepository(RefNum)
WHERE IsUsed = 0;
PRINT 'Entities'
INSERT INTO Entities (
ModifiedBy
,ModifiedDate
,RecordOwnerName
,IsConflictOfInterest
,FKClaimantTypeId
,OtherClaimantType
,InstitutionAccountNumber
,RefNum
,FKSubmitTypeId
,FKClaimStatusTypeId
,RecordType
,ClaimNum
,FilingDate
,FirstName
,Lastname
,Email
,SSN
,Source
,ClaimDataCertifiedDate
)
OUTPUT Inserted.pkid
,Inserted.source
INTO #EntityIds(eid, mid)
SELECT @ModifiedBy
,@ModifiedDate
,NULL
,1
,CASE
WHEN IsNull(company, '') = ''
THEN @IndividualClaimantType
ELSE @CompanyClaimantType
END
,NULL
,NULL
,''
,@SubmitTypeId
,@ClaimStatusId
,'CM'
,NULL
,@ModifiedDate
,IsNull(fname, '')
,IsNull(lname, '')
,IsNull(Email, '')
,IsNull(ssn, '')
,rawID
,@ModifiedDate
FROM TempClassMemberRecords
WHERE rawID BETWEEN @min
AND @max
AND IsProcessed IS NULL
EXEC dbo.[USP_AssignClassMemberRefNums] @startingPKId
PRINT 'Actors'
-- bulk insert our range of class members into Actors while inserting the primary key into our temp table
INSERT INTO Actors (
FKActorTypeId
,ModifiedBy
,ModifiedDate
,LastName
,FirstName
,MiddleName
,CommPreference
,IsPayee
,IsUSCitizen
,ein
,ssn
,company
,attention
,NotificationsBlocked
,SearchName
,ClientAcctNumber
)
OUTPUT Inserted.pkid
,inserted.attention
INTO #ActorIds(aid, mid)
SELECT @PrimaryActorTypeId
,@ModifiedBy
,@ModifiedDate
,IsNull(lname, '')
,IsNull(fname, '')
,''
,IsNull(@DefaultCommPreference, 'Mail')
,1
,NULL
,IsNull(ein, '')
,IsNull(ssn, '')
,IsNull(company, '')
,rawid
,0
,CASE WHEN len(ISNULL(company, '')) > 0 THEN company
ELSE
CASE WHEN (len(ISNULL(lname, '')) > 0 OR len(ISNULL(fname, '')) > 0)
THEN lname + ', ' + fname
ELSE ''
END
END
,ACCTNUM
FROM TempClassMemberRecords
WHERE (
isnull(company, '') <> ''
OR isNull(fname, '') <> ''
OR isNull(lname, '') <> ''
)
AND rawid BETWEEN @Min
AND @Max
AND IsProcessed IS NULL
PRINT 'Entities2Actors'
-- bulk insert the relations of Entities to Actors in Entities2Actors
INSERT INTO Entities2Actors (
FKEntityId
,FKActorId
,IsActorBeneficiary
,ModifiedBy
,ModifiedDate
)
SELECT e.eid
,a.aid
,1
,@ModifiedBy
,@ModifiedDate
FROM #EntityIds e
INNER JOIN #ActorIds a ON e.mid = a.mid
-- etc...
PRINT 'Addressed'
--Bulk Insert into Address table for Primary Actor Address
INSERT INTO Addresses (
FKActorId
,ModifiedBy
,ModifiedDate
,Address1
,Address2
,City
,STATE
,Zip
,Zip4
,FKCountryId
)
SELECT a.aid
,@ModifiedBy
,@ModifiedDate
,IsNull(Address, '')
,IsNull(Address2, '')
,IsNull(City, '')
,IsNull([State], '')
,IsNull(Zip, '')
,IsNull(Zip4, '')
,ISNULL(@PrimaryCountryId, @DefaultCountryId)
FROM #ActorIds a
INNER JOIN TempClassMemberRecords c ON a.mid = c.rawId
-- etc...
UPDATE tempClassMemberRecords
SET IsProcessed = 1
WHERE rawid BETWEEN @Min
AND @Max
AND IsProcessed IS NULL
SET @Min = @max + 1
SET @max = @max + @RecordsPerloop
COMMIT TRANSACTION
WAITFOR DELAY '000:00:00.400'
END TRY
BEGIN CATCH
ROLLBACK TRANSACTION
RAISERROR (N'Error in moving data from Temporary table to Main tables.', -- Message text.
1,
1);
PRINT 'Failed with error: ' + ERROR_MESSAGE()
END CATCH
謝謝@Higgins 以後我會試試它現在很忙 對我來說,最大記錄不會超過5000. –
讀取整個文件到一個字符串數組對象使用.net ReadAllLines()方法,則運行一個並行For循環並行地處理所有的行。
private bool ProcessFile(string FolderPath, string FileExtension)
{
try
{
//all files with requisite file extension
DirectoryInfo dinfo = new DirectoryInfo(FolderPath);
FileInfo[] Files = dinfo.GetFiles(FileExtension);
foreach (FileInfo file in Files)
{
List<String> AllLines = new List<String>();
using (StreamReader sr = File.OpenText(file.FullName))
{
int x = 0;
while (!sr.EndOfStream)
{
AllLines.Add(sr.ReadLine());
x += 1;
}
sr.Close();
}
Parallel.For(0, AllLines.Count, x =>
{
InsertDataCheck(AllLines[x]);
});
}
GC.Collect();
return true;
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
return false;
}
private void InsertDataCheck(string Line)
{
//check if you want to insert data on the basis of your condition
//and then insert your data
}
謝謝@Ajay 以後我會試試,因爲我現在很忙。 –
可以顯示有多慢? –