-1
我一直在編寫一些openCl程序一段時間,現在我試圖做一個簡單的程序讀取.pam格式的圖像,然後保存它。OpenCl Error -40
現在的問題是,當我運行它,給一個名稱來打開圖像時,它會返回一個錯誤-40當創建第一個2dImage。
下面的代碼:
#define SEPARATOR "==============================================\n"
/* Copia tutto tranne <imgInfo>.data
* -Questo vuol dire che l'immagine avrà anche stessa altezza e larghezza */
void copy_img_info(imgInfo* src, imgInfo *dst){
dst->channels=src->channels;
dst->data_size=src->data_size;
dst->depth=src->depth;
dst->height=src->height;
dst->maxval=src->maxval;
dst->width=src->width;
}
cl_event launch_op(cl_command_queue que, cl_kernel k_op,
imgInfo info_open, imgInfo info_to_save,
int _lws,
cl_mem src, cl_mem dst,
cl_int num_events, const cl_event* wait_list){
cl_int err;
cl_event evt_kernel;
size_t lws[]={
_lws ? _lws : 16, _lws ? _lws : 16
};
size_t gws[]={
round_mul_up(info_open.height, lws[0]), round_mul_up(info_open.width, lws[1])
};
err = clSetKernelArg(k_op, 0, sizeof(src),&src);
ocl_check(err, "Set op k arg 0");
err = clSetKernelArg(k_op, 1, sizeof(dst),&dst);
ocl_check(err, "Set op k arg 1");
err = clEnqueueNDRangeKernel(que, k_op, 2, NULL, gws, lws, num_events, wait_list, &evt_kernel);
ocl_check(err, "Enqueue kernel op");
return evt_kernel;
}
int main(int argc, char* argv[]) {
if(argc <2){
printf("Inserire nome del file\n");
exit(EXIT_FAILURE);
}
int _lws;
if(argc <3)
_lws= 0;
else{
_lws= atoi(argv[2]);
printf("Sarà usato %d come local work size\n",_lws);
}
imgInfo info_open, info_to_save;
cl_event evt_fill[2], evt_upload, evt_op, evt_download;
cl_image_format format = {
.image_channel_data_type = CL_UNSIGNED_INT16,
.image_channel_order = CL_RGBA
};
cl_int err;
cl_platform_id p = select_platform();
cl_device_id d = select_device(p);
cl_context ctx = create_context(p, d);
cl_command_queue que = create_queue(ctx, d);
cl_program prog = create_program("Kernels/image_tr.ocl",NULL , ctx, d);
if(load_pam(argv[1], &info_open)){
printf("Errore durante apertura file\n");
exit(EXIT_FAILURE);
}
copy_img_info(&info_open, &info_to_save);
/* ALLOCATION OF BUFFERS */
cl_mem image_to_open = clCreateImage2D(ctx, CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY , &format, info_open.width, info_open.height,
info_open.width, NULL, &err);
ocl_check(err, "Allocate image to open");
cl_mem image_to_save = clCreateImage2D(ctx, CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY , &format, info_to_save.width, info_to_save.height,
info_to_save.width, NULL, &err);
ocl_check(err, "Allocate image to save");
/* ALLOCATION OF BUFFERS */
/* FILL IMAGES */
const size_t fill_color[]={0,0,0,0};
const size_t origin[3]= {0,0,0};
const size_t region[3]= { info_open.width, info_open.height, 1};
err = clEnqueueFillImage(que, image_to_open, fill_color , origin, region, 0, NULL, evt_fill);
ocl_check(err, "Enqueue Fill Buffer to open");
err = clEnqueueFillImage(que, image_to_save, fill_color , origin, region, 0, NULL, evt_fill + 1);
ocl_check(err, "Enqueue Fill Buffer to save");
/* FILL IMAGES */
/* UPLOAD IMAGE ON GPU */
err = clEnqueueWriteImage(que, image_to_open, CL_TRUE, origin, region,
info_open.width, 0, info_open.data, 1, evt_fill, &evt_upload );
ocl_check(err, "Upload image on GPU");
/* UPLOAD IMAGE ON GPU */
/* CREATION OF KERNELS */
cl_kernel k_op = clCreateKernel(prog, "op", &err);
ocl_check(err, "Creation of kernel op");
/* CREATION OF KERNELS */
/* LAUNCH CUSTOMS KERNELS */
const cl_event evt_wait_list [] = {
evt_fill, evt_upload
};
evt_op = launch_op(que, k_op, info_open, info_to_save, _lws, image_to_open, image_to_save, 2, evt_wait_list);
/* LAUNCH CUSTOMS KERNELS */
/* DOWNLOAD FROM GPU */
err = clEnqueueReadImage(que, image_to_save, CL_TRUE, origin, region, info_to_save.width, 0,
info_to_save.data, 1, &evt_op, &evt_download);
ocl_check(err, "Download from device");
/* DOWNLOAD FROM GPU */
/* SAVE PAM FILE */
if(save_pam("ocl_image_tr.pam",&info_to_save))
printf("Errore salvataggio file Pam\n");
/* SAVE PAM FILE */
/* BENCHMARKING */
cl_ulong runtime_fill[2]={
runtime_ns(*evt_fill), runtime_ns(*(evt_fill+1))
};
cl_ulong runtime_upload= runtime_ns(evt_upload);
cl_ulong runtime_op = runtime_ns(evt_op);
cl_ulong runtime_download = runtime_ns(evt_download);
printf(SEPARATOR);
printf(" Kernel \t Runtime \t Bandwidth \t GFLOPS\n");
printf(" Fill_1 \t %gms \t %gGB/s \t %g GFLOPS\n", 1.0e-6*runtime_fill[0],
(double)(info_open.height*info_open.width*sizeof(cl_short4))/(runtime_fill[0]),
(double)(info_open.height*info_open.width*sizeof(cl_short4))/(runtime_fill[0]));
printf(" Fill_2 \t %gms \t %gGB/s \t %g GFLOPS\n", 1.0e-6*runtime_fill[1],
(double)(info_to_save.height*info_to_save.width*sizeof(cl_short4))/(runtime_fill[1]),
(double)(info_to_save.height*info_to_save.width*sizeof(cl_short4))/(runtime_fill[1]));
printf(" Upload \t %gms \t %gGB/s \t %g GFLOPS\n", 1.0e-6*runtime_upload,
(double)(info_open.height*info_open.width*sizeof(cl_short4))/(runtime_upload),
(double)(info_open.height*info_open.width*sizeof(cl_short4))/(runtime_upload));
printf(" Op \t %gms \t %gGB/s \t %g GFLOPS\n", 1.0e-6*runtime_op,
(double)(info_to_save.height*info_to_save.width*sizeof(cl_short4))/(runtime_op),
(double)(info_to_save.height*info_to_save.width*sizeof(cl_short4))/(runtime_op));
printf(" Download \t %gms \t %gGB/s \t %g GFLOPS\n", 1.0e-6*runtime_download,
(double)(info_to_save.height*info_to_save.width*sizeof(cl_short4))/(runtime_download),
(double)(info_to_save.height*info_to_save.width*sizeof(cl_short4))/(runtime_download));
printf(SEPARATOR);
/* BENCHMARKING */
/* CLEANING... */
clReleaseEvent(evt_op);
clReleaseEvent(evt_download);
clReleaseEvent(*evt_wait_list);
clReleaseEvent(*(evt_wait_list+1));
clReleaseEvent(*evt_fill);
clReleaseEvent(*(evt_fill+1));
clReleaseMemObject(image_to_open);
clReleaseMemObject(image_to_save);
clReleaseDevice(d);
clReleaseKernel(k_op);
clReleaseProgram(prog);
clReleaseContext(ctx);
clReleaseCommandQueue(que);
return 0;
}
這裏是內核:
__kernel void op(image2d_t read_only src, image2d_t write_only dst){
// Questa mappatura è più efficiente per la GPU
const sampler_t sampler_ui = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST ;
int row= get_global_id(1);
int col=get_global_id(0);
if(row > get_image_height(src) && col > get_image_width(src))
return;
uint4 pix= read_imageui(src, sampler_ui ,(int2)(row,col));
write_imageui(dst, (int2)(row,col), pix);
}
我使用的是由我的GPGPU的老師創建的頭,我不能給它。 有給出的錯誤:
Allocate image to open - error -40
的信息是,我在分配圖像打印。 這是怎麼回事?
錯誤-40是CL_INVALID_IMAGE_SIZE,但圖像是479x400或1280x900 –
這是一個**最小**示例嗎? – loki
我忘記了,我正在使用Opencl 1.1和Nvidia Cuda 7.5 –