我有一個函數從一個目錄獲取文件列表,然後從列表中搜索匹配的文件名。性能很糟糕。linq查詢的性能問題
下面是函數:
public List<fileStatus> checkFilesStatus(List<string> permitNumbers, string serverDirectory, fileType type)
{
XmlConfigurator.Configure();
log.Debug(string.Format("Beginning checkFilesStatus with following parameters > permitNumbers: {0} > serverDirectory: {1} > type: {2}", string.Join(",", permitNumbers.ToArray()), serverDirectory, type.ToString()));
List<fileStatus> results = new List<fileStatus>();
DirectoryInfo dirInfo = new DirectoryInfo(serverDirectory);
if (dirInfo.Exists)
{
// GET LIST OF ALL FILES IN DIRECTORY
string[] files = System.IO.Directory.GetFiles(serverDirectory, "*", System.IO.SearchOption.AllDirectories);
log.Debug(string.Format("List of all files in directory: {0}", string.Join(",", files)));
if (files.Length > 0 && permitNumbers.Count > 0)
{
log.Debug("Checking for matching files");
// CHECK FOR MATCHING FILES
switch (type)
{
case fileType.Well:
var matchingFiles = (from f in files
where f.Substring(f.LastIndexOf("\\") + 1).Length > 4
where permitNumbers.Contains(f.Substring(f.LastIndexOf("\\") + 1, 5))
select new fileStatus(fileType.Well, f.Substring(f.LastIndexOf("\\") + 1, 5), 1, f.Substring(f.LastIndexOf("\\") + 1)));
var permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
var nonMatchingFiles = (from p in permitNumbers
where !permitNumbersWithMatches.Contains(p)
select new fileStatus(fileType.Well, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
case fileType.DrillerLog:
matchingFiles = (from f in files
where f.Substring(f.LastIndexOf("\\") + 1).Length > 4
where permitNumbers.Contains(f.Substring(f.LastIndexOf("\\") + 1, 5))
select new fileStatus(fileType.DrillerLog, f.Substring(f.LastIndexOf("\\") + 1, 5), 1, f.Substring(f.LastIndexOf("\\") + 1)));
permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
nonMatchingFiles = (from p in permitNumbers
where !permitNumbersWithMatches.Contains(p)
select new fileStatus(fileType.DrillerLog, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
case fileType.RasterLog:
matchingFiles = (from f in files
where f.Substring(f.LastIndexOf("\\") + 1).Length > 13
where permitNumbers.Contains(f.Substring(f.LastIndexOf("\\") + 1, 14))
select new fileStatus(fileType.RasterLog, f.Substring(f.LastIndexOf("\\") + 1, 14), 1, f.Substring(f.LastIndexOf("\\") + 1)));
permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
nonMatchingFiles = (from p in permitNumbers
where !permitNumbersWithMatches.Contains(p)
select new fileStatus(fileType.RasterLog, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
default:
break;
}
log.Debug("Done checking for matching files");
}
}
return results;
}
一旦它到達LINQ查詢,對「matchingFiles」提供的價值,它只是掛起。這是一個大的「許可證號碼」(如5000),也是一大組「文件」。
我能做些什麼來加快速度?
考慮到下面提供的建議,我將功能修改爲如下,現在性能按預期工作。非常感謝你! =)
public List<fileStatus> checkFilesStatus(List<string> permitNumbers, string serverDirectory, fileType type)
{
HashSet<string> numbers = new HashSet<string>(permitNumbers);
XmlConfigurator.Configure();
log.Debug(string.Format("Beginning checkFilesStatus with following parameters > permitNumbers: {0} > serverDirectory: {1} > type: {2}", string.Join(",", permitNumbers.ToArray()), serverDirectory, type.ToString()));
List<fileStatus> results = new List<fileStatus>();
DirectoryInfo dirInfo = new DirectoryInfo(serverDirectory);
if (dirInfo.Exists)
{
// GET LIST OF ALL FILES IN DIRECTORY
string[] files = System.IO.Directory.GetFiles(serverDirectory, "*", System.IO.SearchOption.AllDirectories);
HashSet<string> fileNames = new HashSet<string>(files.Select(f => Path.GetFileName(f)));
log.Debug(string.Format("List of all files in directory: {0}", string.Join(",", files)));
if (fileNames.Count > 0 && numbers.Count > 0)
{
log.Debug("Checking for matching files");
// CHECK FOR MATCHING FILES
switch (type)
{
case fileType.Well:
var matchingFiles = (from f in fileNames
where f.Length > 4
where numbers.Contains(f.Substring(0, 5))
select new fileStatus(fileType.Well, f.Substring(0, 5), 1, f));
var permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
var nonMatchingFiles = numbers.Except(permitNumbersWithMatches)
.Select(p => new fileStatus(fileType.Well, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
case fileType.DrillerLog:
matchingFiles = (from f in fileNames
where f.Length > 4
where numbers.Contains(f.Substring(0, 5))
select new fileStatus(fileType.DrillerLog, f.Substring(0, 5), 1, f));
permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
nonMatchingFiles = numbers.Except(permitNumbersWithMatches)
.Select(p => new fileStatus(fileType.DrillerLog, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
case fileType.RasterLog:
matchingFiles = (from f in fileNames
where f.Length > 13
where numbers.Contains(f.Substring(0, 14))
select new fileStatus(fileType.RasterLog, f.Substring(0, 14), 1, f));
permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
nonMatchingFiles = numbers.Except(permitNumbersWithMatches)
.Select(p => new fileStatus(fileType.RasterLog, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
default:
break;
}
log.Debug("Done checking for matching files");
}
}
return results;
}
「一旦它到達LINQ查詢」 哪一個?你有幾個。另外,5000並不是一個「非常大的集合」。 – 2014-10-03 15:06:56
你是否分析了代碼?哪個linq查詢很慢? – 2014-10-03 15:07:06
只要它擊中提供「matchingFiles」值的linq查詢。 – 2014-10-03 15:08:07