2012-10-25 178 views
1

是否可以使用NVIDIA的nvcc編譯器編譯.cl文件?我正在嘗試在CUDA平臺下設置visual studio 2010來編寫Opencl代碼。但是當我選擇CUDA C/C++編譯器來編譯和生成.cl文件時,它會給我提供錯誤,如nvcc不存在。什麼是問題?使用NVIDIA的nvcc編譯器編譯並生成.cl文件?

+0

也許,'nvcc.exe'將工作? –

+1

是的,你可以用'nvcc'編譯cl。不要給出'nvcc'名字,給出'nvcc'的二進制文件的完整路徑。 – ahmad

+0

謝謝艾哈邁德。我非常感謝你的幫助。 但是,如果我必須使用nvidia的gpu進行opencl編碼,那麼我是否還必須安裝CUDA 4.2?或者GPU計算SDK就足夠了? –

回答

1

您應該可以使用nvcc編譯OpenCL代碼。通常,我建議使用文件擴展名.c作爲C兼容代碼,而.cpp用於C++兼容代碼(*),但nvcc具有文件擴展名覆蓋選項(-x ...),以便我們可以修改此行爲。下面是使用CUDA 8.0.61,RHEL 7,特斯拉K20x一個工作例如:

$ cat t4.cpp 
#include <CL/opencl.h> 
#include <stdint.h> 
#include <stdio.h> 
#include <inttypes.h> 
#include <stdlib.h> 

const char source[] = 
"__kernel void test_rotate(__global ulong *d_count, ulong loops, ulong patt)" 
"{" 
" ulong n = patt;" 
" for (ulong i = 0; i<loops; i++)" 
" n &= (107 << (patt+(i%7)));" 
" d_count[0] = n + loops;" 
"}" 
; 

int main(int argc, char *argv[]) 
{ 
    cl_platform_id platform; 
    cl_device_id device; 
    cl_context context; 
    cl_command_queue queue1, queue2; 
    cl_program program; 
    cl_mem mem1, mem2; 
    cl_kernel kernel; 

    bool two_kernels = false; 
    unsigned long long loops = 1000; 
    if (argc > 1) loops *= atoi(argv[1]); 
    if (argc > 2) two_kernels = true; 
    if (two_kernels) printf("running two kernels\n"); 
    else printf("running one kernel\n"); 
    printf("running %lu loops\n", loops); 
    unsigned long long pattern = 1; 
    clGetPlatformIDs(1, &platform, NULL); 
    clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL); 
    context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); 
    queue1 = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, NULL); 
    queue2 = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, NULL); 

    const char *sources[1] = {source}; 
    program = clCreateProgramWithSource(context, 1, sources, NULL, NULL); 
    clBuildProgram(program, 1, &device, NULL, NULL, NULL); 
    mem1 = clCreateBuffer(context, CL_MEM_READ_WRITE, 1*sizeof(cl_ulong), NULL, NULL); 
    mem2 = clCreateBuffer(context, CL_MEM_READ_WRITE, 1*sizeof(cl_ulong), NULL, NULL); 
    kernel = clCreateKernel(program, "test_rotate", NULL); 
    const size_t work_size[1] = {1}; 
    clSetKernelArg(kernel, 0, sizeof(mem1), &mem1); 
    clSetKernelArg(kernel, 1, sizeof(loops), &loops); 
    clSetKernelArg(kernel, 2, sizeof(pattern), &pattern); 

    clEnqueueNDRangeKernel(queue1, kernel, 1, NULL, work_size, work_size, 0, NULL, NULL); 
    if (two_kernels){ 
    clSetKernelArg(kernel, 0, sizeof(mem2), &mem2); 
    clSetKernelArg(kernel, 1, sizeof(loops), &loops); 
    clSetKernelArg(kernel, 2, sizeof(pattern), &pattern); 

    clEnqueueNDRangeKernel(queue2, kernel, 1, NULL, work_size, work_size, 0, NULL, NULL); 
    } 
    cl_ulong *buf1 = (cl_ulong *)clEnqueueMapBuffer(queue1, mem1, true, CL_MAP_READ, 0, 1*sizeof(cl_ulong), 0, NULL, NULL, NULL); 
    cl_ulong *buf2 = (cl_ulong *)clEnqueueMapBuffer(queue2, mem2, true, CL_MAP_READ, 0, 1*sizeof(cl_ulong), 0, NULL, NULL, NULL); 
    printf("result1: %lu\n", buf1[0]); 
    printf("result2: %lu\n", buf2[0]); 
    clEnqueueUnmapMemObject(queue1, mem1, buf1, 0, NULL, NULL); 
    clEnqueueUnmapMemObject(queue2, mem2, buf2, 0, NULL, NULL); 
    return 0; 
} 
$ nvcc -arch=sm_35 -o t4 t4.cpp -lOpenCL 
$ ./t4 
running one kernel 
running 1000 loops 
result1: 1000 
result2: 0 
$ cp t4.cpp t4.cl 
$ nvcc -arch=sm_35 -x cu -o t4 t4.cl -lOpenCL 
$ ./t4 
running one kernel 
running 1000 loops 
result1: 1000 
result2: 0 
$ 

請注意,這裏的代碼沒有做任何明智的或顯著,所以我寧願避免的問題。它僅用於演示編譯符合C++的OpenCL代碼。

(*)(由於這樣的文件也可以容易地通過常規的宿主編譯器處理,例如GNU編譯器,具有用於相應的開關包括和鏈接選項。)

+0

謝謝,這工作!我來自OpenCL並沒有得到'nvcc'的單一來源氛圍:-)考慮在提及您回覆評論時,我以爲我被忽略了;-) –