0
我想用Opencl做一個卷積圖像。設置主機和執行功能有什麼區別?
__kernel void convolution_read4(__global uchar *in1, __global uchar* in2,
__constant float* mask, int height, int width, int kernelSize,
__local float* lMem, int localHeight, int localWidth)
{
convolution(in1, in2, mask, height, width, kernelSize, lMem, localHeight, localWidth);
convolution(in2, in1, mask, height, width, kernelSize, lMem, localHeight, localWidth);
convolution(in1, in2, mask, height, width, kernelSize, lMem, localHeight, localWidth);
}
上面的代碼執行相同的功能3次。
err = kernel.setArg(0, d_inputImage);
err |= kernel.setArg(1, d_outputImage);
err |= kernel.setArg(2, d_filter);
err |= kernel.setArg(3, Height);
err |= kernel.setArg(4, Width);
err |= kernel.setArg(5, kernelSize);
err |= kernel.setArg(6, localSize, NULL);
err |= kernel.setArg(7, localHeight);
err |= kernel.setArg(8, localWidth);
int totalWorkItemX = roundUp(Width - paddingPixels, wgWidth);
int totalWorkItemY = roundUp(Height - paddingPixels , wgHeight);
cl::NDRange globalws(totalWorkItemX, totalWorkItemY);
cl::NDRange localws(wgWidth, wgHeight);
err = queue.enqueueNDRangeKernel(kernel, cl::NullRange,
globalws, localws, NULL, NULL);
err = kernel.setArg(1, d_inputImage);
err |= kernel.setArg(0, d_outputImage);
err = queue.enqueueNDRangeKernel(kernel, cl::NullRange,
globalws, localws, NULL, NULL);
err = kernel.setArg(0, d_inputImage);
err |= kernel.setArg(1, d_outputImage);
err = queue.enqueueNDRangeKernel(kernel, cl::NullRange,
globalws, localws, NULL, NULL);
queue.finish();
該代碼也執行相同的函數「卷積」,但內核代碼被改變了。
__kernel void convolution_read4(__global uchar *in1, __global uchar* in2,
__constant float* mask, int height, int width, int kernelSize,
__local float* lMem, int localHeight, int localWidth)
{
convolution(in1, in2, mask, height, width, kernelSize, lMem, localHeight, localWidth);
}
我認爲這兩個代碼是相同的代碼。但第一個代碼畫了一個錯誤的輸出。 我不知道這兩者有什麼區別。