2013-10-19 25 views
0

即時通訊新與OpenCL的,我有clCreateKernel一個問題,它拋出CL_INVALID_PROGRAM_EXECUTABLE,可能有人幫助,代碼是基於http://www.cs.bris.ac.uk/home/simonm/workshops/OpenCL_lecture3.pdf,最後優化的OpenCL clCreateKernel拋出CL_INVALID_PROGRAM_EXECUTABLE

下面是代碼:

#define ORDER 10 // Order of the square matrices A, B, and C 
#define AVAL  3.0  // A elements are constant and equal to AVAL 
#define BVAL  5.0  // B elements are constant and equal to BVAL 
#define TOL  (0.001) // tolerance used in floating point comparisons 
#define DIM  2  // Max dim for NDRange 
#define COUNT 1  // number of times to do each multiplication 
#define SUCCESS 1 
#define FAILURE 0 


// Funciones Auxiliares 
void initmat(int Mdim, int Ndim, int Pdim, float *A, float *B, float *C) 
{ 
    int i, j; 

    /* Initialize matrices */ 

    for (i = 0; i < Ndim; i++) 
     for (j = 0; j < Pdim; j++) 
      A[i*Ndim+j] = AVAL; 

    for (i = 0; i < Pdim; i++) 
     for (j = 0; j < Mdim; j++) 
      B[i*Pdim+j] = BVAL; 

    for (i = 0; i < Ndim; i++) 
     for (j = 0; j < Mdim; j++) 
      C[i*Ndim+j] = 0.0f; 
} 

// Definicion de la funcion: 
char * readKernel(void) 
{ 
    size_t *source_length; 
    FILE *fp = fopen("kernel.cl", "r"); 
    if (fp == NULL) 
    { 
     printf("Cannot Open Kernel.cl\n"); 
    } 
    else 
    { 
     printf("Kernel.cl Opened\n"); 
    } 
    fseek(fp, 0, SEEK_END); 
    source_length[0] = ftell(fp); 

    if (source_length[0] == 0) 
    { 
     printf("Kernel.cl is empty\n"); 
    } 
    else 
    { 
     printf("Kernel.cl length: %zu bytes\n", source_length[0]); 
    } 
    char *source = (char*) calloc(source_length[0] + 1, 1); 
    if (source == 0) 
    { 
     printf("Memory allocation failed"); 
    } 
    fseek(fp, 0, SEEK_SET); 
    fread(source, 1, source_length[0], fp); 
    printf("Kernel.cl Read\n"); 

    return source; 
} 
int main(int argc, char **argv) 
{ 



    // Declare and iniciate data 
    float *A, *B, *C; 
    int Mdim, Ndim, Pdim; 
    int err, szA, szB, szC; 
    size_t global[DIM]; 
    size_t local[DIM]; 
    cl_device_id device_id; 
    cl_context context; 
    cl_command_queue commands; 
    cl_program program; 
    cl_kernel kernel; 
    cl_uint nd; 
    cl_mem a_in, b_in, c_out; 
    Ndim = ORDER; 
    Pdim = ORDER; 
    Mdim = ORDER; 
    szA = Ndim*Pdim; 
    szB = Pdim*Mdim; 
    szC = Ndim*Mdim; 
    A = (float *)malloc(szA*sizeof(float)); 
    B = (float *)malloc(szB*sizeof(float)); 
    C = (float *)malloc(szC*sizeof(float)); 
    const char* C_elem_KernelSource = 
    "__kernel        \n" 
    "void mmul(       \n" 
    " const int Mdim,     \n" 
    " const int Ndim,     \n" 
    " const int Pdim,     \n" 
    " __global float* A,     \n" 
    " __global float* B,     \n" 
    " __global float* C,     \n" 
    " __local float* Bwrk)    \n" 
    "{          \n" 
    " int k,j;       \n" 
    " int i = get_global_id(0);   \n" 
    " int iloc = get_local_id(0);  \n" 
    " int nloc = get_local_size(0);  \n" 
    " float Awrk[10];     \n" 
    " float tmp;       \n" 
    " for (k=0; k<Pdim; k++)    \n" 
    "  Awrk[k] = A[i*Ndim+k];   \n" 
    " for (j=0; j<Mdim; j++){   \n" 
    "  for (k=iloc; k<Pdim; k=k+nloc) \n" 
    "   Bwrk[k] = B[k*Pdim+j];  \n" 
    "  barrier(CLK_LOCAL_MEM_FENCE); \n" 
    "  tmp = 0.0f;     \n" 
    "  for (k=0; k<Pdim; k++)   \n" 
    "   tmp += Awrk[k] * Bwrk[k]; \n" 
    "  C[i*Ndim+j] += tmp;   \n" 
    "}          \n" 
    ; 

    initmat(Mdim, Ndim, Pdim, A, B, C); 
    // Setup the plataform 
    cl_uint num_platforms; 
    if(clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS) 
    { 
     printf("Unable to get platform!\n"); 
    }else{ 
     printf("Plataformas Disponibles: %u \n", num_platforms); 
    } 
    //Identificador 
    cl_platform_id platform_id; 
    clGetPlatformIDs(1, &platform_id, &num_platforms); 
    printf("Plataformas creada\n"); 

    err = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL); 
    if (err==CL_SUCCESS){ 
     printf("Device creado \n"); 
    }else { 
     printf("Error %d \n", err); 
    } 
    context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err); 
    if (err==CL_SUCCESS){ 
     printf("Contexto creado \n"); 
    }else { 
     printf("Error creando contexto \n"); 
    } 
    commands = clCreateCommandQueue(context, device_id, 0, &err); 
    if (err==CL_SUCCESS){ 
     printf("cola de comandos creadas \n"); 
    }else { 
     printf("Error creando cola de comandos \n"); 
    } 

    // Setup buffers and write A and B matrices to the device memory 
    a_in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * szA, NULL, NULL); 
    b_in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * szB, NULL, NULL); 
    c_out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * szC, NULL, NULL); 
    err = clEnqueueWriteBuffer(commands, a_in, CL_TRUE, 0, sizeof(float) * szA, A, 0, NULL, NULL); 
    err = clEnqueueWriteBuffer(commands, b_in, CL_TRUE, 0, sizeof(float) * szB, B, 0, NULL, NULL); 
    // Build the program, define the kernel and setup arguments 
    program = clCreateProgramWithSource(context, 1, (const char **) &C_elem_KernelSource, NULL, &err); 
    if (err==CL_SUCCESS){ 
     printf("programa creado \n"); 
    }else { 
     printf("Error generado %d creando programa\n", err); 
    } 
    //Compila el programa en el dispositivo elegido 
    clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); 
    if (err==CL_SUCCESS){ 
     printf("programa compilado 1\n"); 
    }else { 
     printf("Error generado %d compilando programa 1\n", err); 
    } 
    kernel = clCreateKernel(program, "mmul", &err); 
    if (err==CL_SUCCESS){ 
     printf("Kernel creado \n"); 
    }else { 
     printf("Error generado %d creando kernel\n", err); 
    } 
    err = clSetKernelArg(kernel, 0, sizeof(int), &Mdim); 
    err |= clSetKernelArg(kernel, 1, sizeof(int), &Ndim); 
    err |= clSetKernelArg(kernel, 2, sizeof(int), &Pdim); 
    err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &a_in); 
    err |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &b_in); 
    err |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &c_out); 
    err |= clSetKernelArg(kernel, 6, sizeof(float)*Pdim, NULL); 
    if (err==CL_SUCCESS){ 
     printf("Argumentos del Kernel configurados \n"); 
    }else { 
     printf("Error configurando argumentos del kernel \n"); 
    } 
    //Run the kernel and collect results 
    // 1D ND Range set to dimensions of C matrix 
    //Local Dim set to 250 so number of work-groups match number of 
    //compute units (4 in this case) for our order 1000 matrices 
    //Pass local memory to kernels. This requires a change to the kernel 
    //argument list … a new call to clSetKernelArg is needed 
    printf("Encolando Kernel:\n"); 
    global[0] = (size_t) Ndim; global[1] = (size_t) Mdim; local[0] = (size_t) 2; 
    err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, global, local, 0, NULL, NULL); 
    if (err==CL_SUCCESS){ 
     printf("Kernel enviado a device \n"); 
    }else { 
     printf("Error enviando kernel a device \n"); 
    } 
    clFinish(commands); 
    err = clEnqueueReadBuffer(commands, c_out, CL_TRUE, 0, sizeof(float) * szC, C, 0, NULL, NULL); 
    //test_results(A, B, c_out); 


} 

由於

+0

當您從OpenCL開始時。我建議你直接使用C++綁定。使用舊的C API在真正先進和大型項目中是一團糟。 – DarkZeros

+0

有時生成日誌將包含一個很好的格式化錯誤消息,顯示編譯問題(例如語法錯誤)。 [Google clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG]。 – Tim

回答

3

的主要問題是,在管線112中的開括號沒有匹配的閉括號:

" for (j=0; j<Mdim; j++){   \n" 

還要注意的是在線路34上聲明的指針用於不用初始化:

size_t *source_length; 

在線路170,err=應使得按預期錯誤檢查作品被添加到clBuildProgram()調用。然後,您可以添加邏輯來使用clGetProgramBuildInfo()來獲取構建失敗時的詳細信息。