2017-07-26 19 views
1

我試圖使用OpenCL在GPU上運行this代碼的部分代碼。我現在試圖運行處理YCbCr到RGB轉換的函數。OpenCL內核導致應用程序無限期地運行,並且僅在關閉我的IDE後停止

請注意,截至目前我沒有試圖優化GPU代碼。我只是想要一個與CPU上的輸出相同的輸出。

該功能最初是這樣寫的:

void YCbCr_to_ARGB(uint8_t *YCbCr_MCU[3], uint32_t *RGB_MCU, uint32_t nb_MCU_H, uint32_t nb_MCU_V) 
{ 

    uint8_t *MCU_Y, *MCU_Cb, *MCU_Cr; 
    int R, G, B; 
    uint32_t ARGB; 
    uint8_t index, i, j; 

    MCU_Y = YCbCr_MCU[0]; 
    MCU_Cb = YCbCr_MCU[1]; 
    MCU_Cr = YCbCr_MCU[2]; 
    for (i = 0; i < 8 * nb_MCU_V; i++) { 
     for (j = 0; j < 8 * nb_MCU_H; j++) { 
      index = i * (8 * nb_MCU_H) + j; 
      R = (MCU_Cr[index] - 128) * 1.402f + MCU_Y[index]; 
      B = (MCU_Cb[index] - 128) * 1.7772f + MCU_Y[index]; 
      G = MCU_Y[index] - (MCU_Cb[index] - 128) * 0.34414f - 
       (MCU_Cr[index] - 128) * 0.71414f; 
      /* Saturate */ 
      if (R > 255) 
       R = 255; 
      if (R < 0) 
       R = 0; 
      if (G > 255) 
       G = 255; 
      if (G < 0) 
       G = 0; 
      if (B > 255) 
       B = 255; 
      if (B < 0) 
       B = 0; 
      ARGB = ((R & 0xFF) << 16) | ((G & 0xFF) << 8) | (B & 0xFF); 
      // ARGB = 0xFF << 8; 
      RGB_MCU[(i * (8 * nb_MCU_H) + j)] = ARGB; 
     } 
    } 
} 

這個函數的變量聲明中main.c以下列方式:

cl_uchar* YCbCr_MCU[3] = { NULL, NULL, NULL}; 
cl_uint* RGB_MCU = NULL; 

內存這些變量在此被分配方式:

if (screen_init_needed == 1) 
{ 
        screen_init_needed = 0; 

..... 
..... 
//Some code 

for (index = 0 ; index < SOF_section.n ; index++) { 
YCbCr_MCU[index] = malloc(MCU_sx * MCU_sy * max_ss_h * max_ss_v); 

YCbCr_MCU_ds[index] = malloc(MCU_sx * MCU_sy * max_ss_h * max_ss_v); 

} 

RGB_MCU = malloc (MCU_sx * MCU_sy * max_ss_h * max_ss_v * sizeof(cl_int)); 
} 

break; 
} 

我直接複製並將其粘貼到我的.cl文件中,並做了一些小改動以使其與OpenCL標準一致。我修改的OpenCL代碼是這樣的:

__kernel void YCbCr_to_ARGB(__global uchar* YCbCr_MCU[3], __global uint* RGB_MCU, uint nb_MCU_H, uint nb_MCU_V) 
    {   
     __global uchar *MCU_Y, *MCU_Cb, *MCU_Cr; 
     int R, G, B; 
     uint ARGB; 
     uchar index, i, j; 

     MCU_Y = YCbCr_MCU[0]; 
     MCU_Cb = YCbCr_MCU[1]; 
     MCU_Cr = YCbCr_MCU[2]; 

//Same code as the first code snippet 
     ...... 
     ...... 
     ...... 

    } 

當我建立和運行了我的.cl文件我用上面的內核代碼的應用程序,我得到了錯誤。其中一個錯誤指出OpenCL不允許指向指針參數的指針。

爲了避開這些錯誤,我再次修改我的代碼看起來像這樣:

__kernel void YCbCr_to_ARGB(__global uchar YCbCr_MCU[3], __global uint* RGB_MCU, uint nb_MCU_H, uint nb_MCU_V) 
{   
      __global uchar *MCU_Y, *MCU_Cb, *MCU_Cr; 
      int R, G, B; 
      uint ARGB; 
      uchar index, i, j; 

      MCU_Y = &YCbCr_MCU[0]; 
      MCU_Cb = &YCbCr_MCU[1]; 
      MCU_Cr = &YCbCr_MCU[2]; 

    //Same code as the first code snippet 
      ...... 
      ...... 
      ...... 
} 

當我建立並再次運行應用程序,我沒有得到任何錯誤。這促使我寫這個內核的主機代碼。

它看起來像這樣:

color_kernel= clCreateKernel(program, "YCbCr_to_ARGB", &ret); 

//YCbCr_MCU for YCbCrtoARGB 
cl_mem colorMCU_GPU= clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_uchar), NULL, &ret); 


//rgb_MCU for YCbCrtoARGB 
cl_mem RGB_GPU= clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint), NULL, &ret); 

我所謂的內核參數正是原函數被調用的main.c。我執行的其餘步驟此內核通過以下方式:

if(color&&(SOF_section.n>1) 
{ 
ret = clEnqueueWriteBuffer(command_queue, colorMCU_GPU, CL_TRUE, 0, 3 * sizeof(cl_uchar), YCbCr_MCU, 0, NULL, NULL); 

ret = clEnqueueWriteBuffer(command_queue, RGB_GPU, CL_TRUE, 0, sizeof(cl_uint), RGB_MCU, 0, NULL, NULL); 

ret = clSetKernelArg(color_kernel, 0, sizeof(cl_mem), (void *)&colorMCU_GPU); 
ret |= clSetKernelArg(color_kernel, 1, sizeof(cl_mem), (void *)&RGB_GPU); 
ret = clSetKernelArg(color_kernel, 2, sizeof(cl_uint), (void *)&max_ss_h); 
ret |= clSetKernelArg(color_kernel, 3, sizeof(cl_uint), (void *)&max_ss_v); 

ret = clEnqueueTask(command_queue, color_kernel, 0, NULL, NULL); 

ret = clEnqueueReadBuffer(command_queue, RGB_GPU, CL_TRUE, 0, sizeof(cl_uint), RGB_MCU, 0, NULL, NULL); 

//YCbCr_to_ARGB(YCbCr_MCU, RGB_MCU, max_ss_h, max_ss_v); 

我運行,並利用這些參數,生成代碼,該代碼保持無限期運行後(這樣做的輸出應該是一個影片剪輯上運行有了這個代碼,我只能看到一個黑屏)。我必須關閉Eclipse並重新打開它,以便在此之後對代碼進行其他更改。

是什麼導致程序行爲像這樣?無論如何要安全地在GPU上運行此功能?

更新:

我跟着安德斯Cedronius的意見,改變了以下列方式我的內核代碼:

__kernel void YCbCr_to_ARGB(__global uchar YCbCr_MCU[3], __global uint* RGB_MCU, uint nb_MCU_H, uint nb_MCU_V) 
{ 
    printf("Doing color conversion\n"); 

    __global uchar *MCU_Y, *MCU_Cb, *MCU_Cr; 
    int R, G, B; 
    uint ARGB; 
    uchar index, i, j; 

    i= get_global_id(0); 
    j= get_global_id(1); 

    MCU_Y = &YCbCr_MCU[0]; 
    MCU_Cb = &YCbCr_MCU[1]; 
    MCU_Cr = &YCbCr_MCU[2]; 

    if (i < 8 * nb_MCU_V && j < 8 * nb_MCU_H) 
    { 
      index = i * (8 * nb_MCU_H) + j; 
      R = (MCU_Cr[index] - 128) * 1.402f + MCU_Y[index]; 
      B = (MCU_Cb[index] - 128) * 1.7772f + MCU_Y[index]; 
      G = MCU_Y[index] - (MCU_Cb[index] - 128) * 0.34414f - 
       (MCU_Cr[index] - 128) * 0.71414f; 


      /* Saturate */ 
      if (R > 255) 
       R = 255; 
      if (R < 0) 
       R = 0; 
      if (G > 255) 
       G = 255; 
      if (G < 0) 
       G = 0; 
      if (B > 255) 
       B = 255; 
      if (B < 0) 
       B = 0; 
      ARGB = ((R & 0xFF) << 16) | ((G & 0xFF) << 8) | (B & 0xFF); 
      // ARGB = 0xFF << 8; 
     RGB_MCU[(i * (8 * nb_MCU_H) + j)] = ARGB; 


} 

printf("Finished color conversion\n"); 
} 

現在調用內核我的主機代碼如下所示:

color_kernel= clCreateKernel(program, "YCbCr_to_ARGB", &ret); 

我以下面的方式設置工作大小和內核參數:

ret = clEnqueueWriteBuffer(command_queue, colorMCU_GPU, CL_TRUE, 0, 3*sizeof(cl_uchar), YCbCr_MCU, 0, NULL, NULL); 
chk(ret, "clEnqueueWriteBuffer"); 

ret = clEnqueueWriteBuffer(command_queue, RGB_GPU, CL_TRUE, 0, sizeof(cl_uint), RGB_MCU, 0, NULL, NULL); 
chk(ret, "clEnqueueWriteBuffer"); 


ret = clSetKernelArg(color_kernel, 0, sizeof(cl_mem), (void *)&colorMCU_GPU); 
ret |= clSetKernelArg(color_kernel, 1, sizeof(cl_mem), (void *)&RGB_GPU); 
ret = clSetKernelArg(color_kernel, 2, sizeof(cl_uint), (void *)&max_ss_h); 
ret |= clSetKernelArg(color_kernel, 3, sizeof(cl_uint), (void *)&max_ss_v); 


size_t itemColor[2] = {1, 1}; 

ret = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, itemColor, NULL, 0, NULL, NULL); 
chk(ret, "clEnqueueNDRange"); 

ret = clEnqueueReadBuffer(command_queue, RGB_GPU, CL_TRUE, 0, sizeof(cl_uint), RGB_MCU, 0, NULL, NULL); 


clFinish(command_queue); 

我運行了這段代碼,我不再有黑屏。但是,「YCbCr到RGB」的內核現在還沒有被識別。 即使我的printf評論沒有顯示在輸出控制檯上。這就像我的代碼沒有顏色轉換功能。

更新:

我沒有在命令EnqueueNDRangeKernel改變了我的內核的名稱。我改了名字,現在printf語句出現在控制檯上。但是,我仍然沒有得到正確的輸出結果。

size_t itemColor[2] = {1, 1}; 

ret = clEnqueueNDRangeKernel(command_queue, color_kernel, 2, NULL, itemColor, NULL, 0, NULL, NULL); 

chk(ret, "clEnqueueNDRange"); 

clFinish(command_queue); 

更新:

我也跟着pmdj的建議,修改了自己的內核代碼。現在看起來是這樣的:

__kernel void YCbCr_to_ARGB(__global uchar* Y_GPU, __global uchar* Cb_GPU, __global uchar* Cr_GPU, __global uint* RGB_MCU, uint nb_MCU_H, uint nb_MCU_V) 
{ 
    __global uchar *MCU_Y, *MCU_Cb, *MCU_Cr; 
    int R, G, B; 
    uint ARGB; 
    uchar index, i, j; 

unsigned char iid= get_global_id(0); 
unsigned char jid= get_global_id(1); 

    // MCU_Y = &YCbCr_MCU[0]; 
     // MCU_Cb = &YCbCr_MCU[1]; 
     // MCU_Cr = &YCbCr_MCU[2]; 

     MCU_Y= Y_GPU; 
     MCU_Cb= Cb_GPU; 
     MCU_Cr= Cr_GPU; 

    if (iid <= (8 * nb_MCU_V) && jid <= (8 * nb_MCU_H)) 
    { 

      index = iid * (8 * nb_MCU_H) + jid; 
      R = (MCU_Cr[index] - 128) * 1.402f + MCU_Y[index]; 
      B = (MCU_Cb[index] - 128) * 1.7772f + MCU_Y[index]; 
      G = MCU_Y[index] - (MCU_Cb[index] - 128) * 0.34414f - 
       (MCU_Cr[index] - 128) * 0.71414f; 


      /* Saturate */ 
      if (R > 255) 
       R = 255; 
      if (R < 0) 
       R = 0; 
      if (G > 255) 
       G = 255; 
      if (G < 0) 
       G = 0; 
      if (B > 255) 
       B = 255; 
      if (B < 0) 
       B = 0; 
      ARGB = ((R & 0xFF) << 16) | ((G & 0xFF) << 8) | (B & 0xFF); 

     RGB_MCU[(iid * (8 * nb_MCU_H) + jid)] = ARGB; 

     } 
} 

在主機代碼,我創建了4個新的變量分配內存:

Y_ForGPU= (cl_uchar *)malloc(MCU_sx * MCU_sy * max_ss_h * max_ss_v); 

Cb_ForGPU= (cl_uchar *)malloc(MCU_sx * MCU_sy * max_ss_h * max_ss_v); 

Cr_ForGPU= (cl_uchar *)malloc(MCU_sx * MCU_sy * max_ss_h * max_ss_v); 

//Now will do it for RGB 
RGB_testing= (cl_uint *)malloc (MCU_sx * MCU_sy * max_ss_h * max_ss_v * sizeof(cl_int)); 

我以下列方式創建緩衝區:

cl_mem for_Y= clCreateBuffer(context, CL_MEM_READ_WRITE| CL_MEM_COPY_HOST_PTR, (MCU_sx * MCU_sy * max_ss_h * max_ss_v), Y_ForGPU, &ret); 


cl_mem for_Cb= clCreateBuffer(context, CL_MEM_READ_WRITE| CL_MEM_COPY_HOST_PTR, (MCU_sx * MCU_sy * max_ss_h * max_ss_v), Cb_ForGPU , &ret); 

cl_mem for_Cr= clCreateBuffer(context, CL_MEM_READ_WRITE| CL_MEM_COPY_HOST_PTR, (MCU_sx * MCU_sy * max_ss_h * max_ss_v), Cr_ForGPU, &ret); 

//rgb_MCU for YCbCrtoARGB 
cl_mem RGB_GPU= clCreateBuffer(context, CL_MEM_READ_WRITE, (MCU_sx * MCU_sy * max_ss_h * max_ss_v * sizeof(cl_int)), NULL, &ret); 

然後我設置內核參數,執行內核並將計算的數據發送回主機:

ret = clSetKernelArg(color_kernel, 0, sizeof(cl_mem), &for_Y); 
ret |= clSetKernelArg(color_kernel, 1, sizeof(cl_mem), &for_Cb); 
ret |= clSetKernelArg(color_kernel, 2, sizeof(cl_mem), &for_Cr); 
ret |= clSetKernelArg(color_kernel, 3, sizeof(cl_mem), &RGB_GPU); 
ret |= clSetKernelArg(color_kernel, 4, sizeof(cl_uint), &max_ss_h); 
ret |= clSetKernelArg(color_kernel, 5, sizeof(cl_uint), &max_ss_v); 


const size_t itemColor[2] = {100, 100}; 

ret = clEnqueueNDRangeKernel(command_queue, color_kernel, 2, NULL, itemColor, NULL, 0, NULL, NULL); 
clFinish(command_queue); 

//Copy result to the host 
ret = clEnqueueReadBuffer(command_queue, RGB_GPU, CL_TRUE, 0, (MCU_sx * MCU_sy * max_ss_h * max_ss_v * sizeof(cl_int)), RGB_testing, 0, NULL, NULL); 

但是,現在我的代碼突然終止。爲什麼會發生這種情況?

更新:

我的代碼現在正在工作。這些問題可能是由於指針的差異而發生的。我設置Y,Cb,Cr和RGB變量(我創建的)等於主機代碼中的原始變量。

//---Setting color variables equal to array elements----// 

Y_ForGPU= YCbCr_MCU[0]; 
Cb_ForGPU= YCbCr_MCU[1]; 
Cr_ForGPU= YCbCr_MCU[2]; 

//----RGB is being assigned value-----// 

RGB_testing= RGB_MCU; 
+3

你不應該在內核程序中有內循環。內核程序應該只對單個「元素」執行數學YUV-> RGB,然後使用clEnqueueNDRangeKernel將工作人員分割爲GPU。 –

+0

@AndersCedronius我已更新我的代碼以包含您的建議。但是,我的內核現在還沒有被識別。我在上面的問題中包含了細節。 –

+0

不要只是猜測的事情。如果你想做一些有意義的事情,拿起一本書並開始閱讀,那麼你將無法獲得任何其他地方。 – Jovasa

回答

1

我不知道如果這是你的問題的唯一原因(可能還有更多我還沒發現),但你必須在你的YCbCr_MCU內核參數類型不匹配。你不能有指針指針參數,這是真的。只是刪除*不會修復它。

特別是在內核行

MCU_Cb = &YCbCr_MCU[1]; 

得到1個字節過去任何YCbCr_MCU點,其中,看主機代碼的開始,實際上是指針數組的開始,而不是像素陣列。

ret = clSetKernelArg(color_kernel, 0, sizeof(cl_mem), (void *)&colorMCU_GPU); 

它看起來像YCbCr_MCU應該是的3個指針到包含源像素的Y,CB,CR的平面陣列。您將需要將這些傳遞給內核,作爲3個直接指向3個數組的指針,而不是指向3個指針的指針。換句話說,將它變成Y,Cb和Cr參數,並將它們設置爲colorMCU_GPU[0]colorMCU_GPU[2]在主機上。

+0

根據您給我的建議,我對代碼進行了更改。當我運行它時,我的程序突然終止。爲什麼會發生這種情況?這可能是錯誤的內存分配的結果? –

相關問題