中的速度,它比clEnqueueTask更快。儘管如此,我仍然無法讓速度超過16毫秒,即使添加更多global_item_size也無濟於事。它只是在global_item_size = 3上停止工作,就這麼簡單,但我認爲它應該更快地運行global_size。我錯了嗎?我該如何解決它?使用clEnqueueNDRangeKernel函數不能計算clEnqueueNDRangeKernel在OpenCL
__kernel void red_to_green(__global unsigned char *pDataIn, __global unsigned char *pDataOut, unsigned int InSize, unsigned int OutSize)
{
unsigned int gid = get_global_id(0);
unsigned int gsize = get_global_size(0);
unsigned int lid = get_local_id(0);
unsigned int lsize = get_local_size(0);
unsigned int vstart = ((InSize/gsize) * gid);
unsigned int vstop = (vstart + (InSize/gsize));
for (unsigned int i = vstart; i < vstop; i+=4)
{
pDataOut[i/4] = (pDataIn[i] + pDataIn[i + 1] + pDataIn[i + 2])/3;
}
}
vector<unsigned char> pDataIn;
vector<unsigned char> pDataOut;
SizeIn = pDataIn.size();
SizeOut = pDataOut.size();
const size_t cycles_max = 100;
clock_t t4 = clock();
for (int i = 0; i<cycles_max; i++){
double start_time = clock();
double search_time = 0;
//float last_time = 0;
//execute opencl kernel
//ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
size_t global_item_size = 3;
size_t local_item_size = 1;
ret = clEnqueueNDRangeKernel(command_queue,kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);
//copy from buffer
ret = clEnqueueReadBuffer(command_queue, memobj1, CL_TRUE, 0, pDataOut.size(), pDataOut.data(), 0, NULL, NULL);
ret = clFinish(command_queue);
double end_time = clock(); // конечное время
search_time = end_time - start_time;
//float last_time = last_time + search_time;
cout << search_time << " ms" << endl;
}
clock_t t5 = clock();
double time_seconds2 = (t5-t4)*CLOCKS_PER_SEC/cycles_max;
cout << "Average time: " << time_seconds2/1000 << " ms" <<endl;
WriteBmpFile(L"3840x2160_ndrange.bmp", iWidth, iHeight, 8, pDataOut.size(), pDataOut.data(), false);
system("PAUSE");
你在跑什麼平臺?操作系統類型,在CPU或GPU上執行,哪個CPU或GPU? –
警告你'clock()'不測量掛鐘時間;它實際上測量(用戶區)進程消耗的CPU時間,並且可以大於或小於掛鐘時間。當您有多個線程全部計算綁定時,CPU時間會超過掛鐘時間;當進程通常是I/O限制時,CPU時間少於掛鐘時間,等待內核爲其執行某些操作。所以如果你在CPU下運行內核,'clock()'會計算這個值,但如果你在GPU下運行,它不會。 –
我正在使用Windows 8,Visual Studio 12,在GPU上執行GPU Nvidia GT 740M。嗯,這很有趣,謝謝,那我該如何計算在GPU上運行的時間? – Generwp