我在代碼中添加了多線程部分。多線程速度問題
public class ThreadClassSeqGroups
{
public Dictionary<string, string> seqGroup;
public Dictionary<string, List<SearchAlgorithm.CandidateStr>> completeModels;
public Dictionary<string, List<SearchAlgorithm.CandidateStr>> partialModels;
private Thread nativeThread;
public ThreadClassSeqGroups(Dictionary<string, string> seqs)
{
seqGroup = seqs;
completeModels = new Dictionary<string, List<SearchAlgorithm.CandidateStr>>();
partialModels = new Dictionary<string, List<SearchAlgorithm.CandidateStr>>();
}
public void Run(DescrStrDetail dsd, DescrStrDetail.SortUnit primarySeedSu,
List<ushort> secondarySeedOrder, double partialCutoff)
{
nativeThread = new Thread(() => this._run(dsd, primarySeedSu, secondarySeedOrder, partialCutoff));
nativeThread.Priority = ThreadPriority.Highest;
nativeThread.Start();
}
public void _run(DescrStrDetail dsd, DescrStrDetail.SortUnit primarySeedSu,
List<ushort> secondarySeedOrder, double partialCutoff)
{
int groupSize = this.seqGroup.Count;
int seqCount = 0;
foreach (KeyValuePair<string, string> p in seqGroup)
{
Console.WriteLine("ThreadID {0} (priority:{1}):\t#{2}/{3} SeqName: {4}",
nativeThread.ManagedThreadId, nativeThread.Priority.ToString(), ++seqCount, groupSize, p.Key);
List<SearchAlgorithm.CandidateStr> tmpCompleteModels, tmpPartialModels;
SearchAlgorithm.SearchInBothDirections(
p.Value.ToUpper().Replace('T', 'U'), dsd, primarySeedSu, secondarySeedOrder, partialCutoff,
out tmpCompleteModels, out tmpPartialModels);
completeModels.Add(p.Key, tmpCompleteModels);
partialModels.Add(p.Key, tmpPartialModels);
}
}
public void Join()
{
nativeThread.Join();
}
}
class Program
{
public static int _paramSeqGroupSize = 2000;
static void Main(Dictionary<string, string> rawSeqs)
{
// Split the whole rawSeqs (Dict<name, seq>) into several groups
Dictionary<string, string>[] rawSeqGroups = SplitSeqFasta(rawSeqs, _paramSeqGroupSize);
// Create a thread for each seqGroup and run
var threadSeqGroups = new MultiThreading.ThreadClassSeqGroups[rawSeqGroups.Length];
for (int i = 0; i < rawSeqGroups.Length; i++)
{
threadSeqGroups[i] = new MultiThreading.ThreadClassSeqGroups(rawSeqGroups[i]);
//threadSeqGroups[i].SetPriority();
threadSeqGroups[i].Run(dsd, primarySeedSu, secondarySeedOrder, _paramPartialCutoff);
}
// Merge results from threads after the thread finish
var allCompleteModels = new Dictionary<string, List<SearchAlgorithm.CandidateStr>>();
var allPartialModels = new Dictionary<string, List<SearchAlgorithm.CandidateStr>>();
foreach (MultiThreading.ThreadClassSeqGroups t in threadSeqGroups)
{
t.Join();
foreach (string name in t.completeModels.Keys)
{
allCompleteModels.Add(name, t.completeModels[name]);
}
foreach (string name in t.partialModels.Keys)
{
allPartialModels.Add(name, t.partialModels[name]);
}
}
}
}
但是,多線程的速度比單線程要慢得多,CPU的負載一般是10%。
例如:
輸入文件包含2500串
_paramGroupSize = 3000,主線程+ 1個計算線程花費200秒
_paramGroupSize = 400,主線程+ 7計算線程花費不多更多的時間(我在超過10分鐘後殺死它)。
我的執行有問題嗎?如何加快速度?
謝謝。
SearchAlgorithm.SearchInBothDirections是做什麼的? – 2012-07-27 15:24:31
使用類似DotTrace的分析器,它會告訴你時間消耗在哪裏。 – 2012-07-27 15:26:17
@ Bryan:SearchAlgorithm.SearchInBothDirections正在對給定字符串進行深入搜索,返回兩個候選清單列表作爲輸出參數 – Mavershang 2012-07-27 15:29:30