我已經實現了一個簡單函數的正常和並行版本,該函數根據32bppArgb位圖計算直方圖。正常版本在1920x1080圖像上大約需要0.03秒,而並行版本則需要0.07秒。並行化直方圖函數
線程開銷真的很重嗎?除了Parallel.For還有其他一些構造可以加速這個過程嗎?自從我使用30fps視頻以來,我需要加快速度。
下面是簡化的代碼:
public sealed class Histogram
{
public int MaxA = 0;
public int MaxR = 0;
public int MaxG = 0;
public int MaxB = 0;
public int MaxT = 0;
public int [] A = null;
public int [] R = null;
public int [] G = null;
public int [] B = null;
public Histogram()
{
this.A = new int [256];
this.R = new int [256];
this.G = new int [256];
this.B = new int [256];
this.Initialize();
}
public void Initialize()
{
this.MaxA = 0;
this.MaxR = 0;
this.MaxG = 0;
this.MaxB = 0;
this.MaxT = 0;
for (int i = 0; i < this.A.Length; i++)
this.A [i] = 0;
for (int i = 0; i < this.R.Length; i++)
this.R [i] = 0;
for (int i = 0; i < this.G.Length; i++)
this.G [i] = 0;
for (int i = 0; i < this.B.Length; i++)
this.B [i] = 0;
}
public void ComputeHistogram (System.Drawing.Bitmap bitmap, bool parallel = false)
{
System.Drawing.Imaging.BitmapData data = null;
data = bitmap.LockBits
(
new System.Drawing.Rectangle(0, 0, bitmap.Width, bitmap.Height),
System.Drawing.Imaging.ImageLockMode.ReadOnly,
System.Drawing.Imaging.PixelFormat.Format32bppArgb
);
try
{
ComputeHistogram(data, parallel);
}
catch
{
bitmap.UnlockBits(data);
throw;
}
bitmap.UnlockBits(data);
}
public void ComputeHistogram (System.Drawing.Imaging.BitmapData data, bool parallel = false)
{
int stride = System.Math.Abs(data.Stride);
this.Initialize();
if (parallel)
{
unsafe
{
System.Threading.Tasks.Parallel.For
(
0,
data.Height,
new System.Threading.Tasks.ParallelOptions() { MaxDegreeOfParallelism = System.Environment.ProcessorCount },
y =>
{
byte* pointer = ((byte*) data.Scan0) + (stride * y);
for (int x = 0; x < stride; x += 4)
{
this.B [pointer [x + 0]]++;
this.G [pointer [x + 1]]++;
this.R [pointer [x + 2]]++;
this.A [pointer [x + 3]]++;
}
}
);
}
}
else
{
unsafe
{
for (int y = 0; y < data.Height; y++)
{
byte* pointer = ((byte*) data.Scan0) + (stride * y);
for (int x = 0; x < stride; x += 4)
{
this.B [pointer [x + 0]]++;
this.G [pointer [x + 1]]++;
this.R [pointer [x + 2]]++;
this.A [pointer [x + 3]]++;
}
}
}
}
for (int i = 0; i < this.A.Length; i++)
if (this.MaxA < this.A [i]) this.MaxA = this.A [i];
for (int i = 0; i < this.R.Length; i++)
if (this.MaxR < this.R [i]) this.MaxR = this.R [i];
for (int i = 0; i < this.G.Length; i++)
if (this.MaxG < this.G [i]) this.MaxG = this.G [i];
for (int i = 0; i < this.B.Length; i++)
if (this.MaxB < this.B [i]) this.MaxB = this.B [i];
if (this.MaxT < this.MaxA) this.MaxT = this.MaxA;
if (this.MaxT < this.MaxR) this.MaxT = this.MaxR;
if (this.MaxT < this.MaxG) this.MaxT = this.MaxG;
if (this.MaxT < this.MaxB) this.MaxT = this.MaxB;
}
}
您是否嘗試讓每個線程計算多於1行?可能使他們處理10-20可能會加快一點。 – 2013-02-15 16:05:25
那麼我已經分組了一個循環,運行1920次,並有四條語句。不知道如何構建它。有什麼建議麼? – 2013-02-15 16:07:50
對於傳入「Parallel.For」的lambda,嘗試從'y'循環到'y' +(您必須找到某個最佳數字)。當然,這意味着將'parallel.For'的第二個參數從'data.Height'調整到其他的參數。 – 2013-02-15 16:10:13