2012-05-21 59 views
0

我試圖在運行時用dlsym加載cuda驅動程序api函數,而且我遇到了一個奇怪的錯誤。我有這樣的代碼,運行平穩,我的系統(帶NVCC等編譯)上:CUDA驅動程序調用返回錯誤1(cudaErrorMissingConfiguration)

#include <cuda.h> 
#include <stdio.h> 

int main(int argc,char *argv[]){ 

    if(argc<3){ 
    printf("Usage: ./test.cu <ptx_file> <cuda_device>\n"); 
    exit(0); 
    } 

    // Error code 
    CUresult error; 

    // My number 
    unsigned int h_var=7; 

    // Initialize driver API 
    error = cuInit(0); 
    if((int)error!=0){ 
    printf("Error! cuInit returned: %d\n",(int)error); 
    exit(0); 
    } 

    // Get Cuda Device and give handle 
    CUdevice cu_device; 
    error = cuDeviceGet(&cu_device,atoi(argv[2])); 
    if((int)error!=0){ 
    printf("Error! cuDeviceGet returned: %d\n",(int)error); 
    exit(0); 
    } 

    // Create context to run on device 
    CUcontext cu_context; 
    error = cuCtxCreate(&cu_context, 0, cu_device); 
    if((int)error!=0){ 
    printf("Error! cuCtxCreate returned: %d\n",(int)error); 
    exit(0); 
    } 

    // Load ptx code 
    CUmodule cu_module; 
    error = cuModuleLoad(&cu_module,argv[1]); 
    if((int)error!=0){ 
    printf("Error! cuModuleLoad returned: %d\n",(int)error); 
    exit(0); 
    } 

    // Get kernel function 
    CUfunction func; 
    error = cuModuleGetFunction(&func,cu_module,"testing"); 
    if((int)error!=0){ 
    printf("Error! cuModuleGetFunction returned: %d\n",(int)error); 
    exit(0); 
    } 

    CUdeviceptr var; 

    // Allocate device memory 
    unsigned int size = sizeof(unsigned int); 
    error = cuMemAlloc(&var, size); 
    if((int)error!=0){ 
    printf("Error! cuMemAlloc returned: %d\n",(int)error); 
    exit(0); 
    } 

    // Copy variable to host 
    error = cuMemcpyHtoD(var,&h_var,size); 
    if((int)error!=0){ 
    printf("Error! cuMemcpyHtoD returned: %d\n",(int)error); 
    exit(0); 
    } 

    // Lauch kernel 
    void *args[] = {&var}; 
    error = cuLaunchKernel(func, 1, 1, 1, 1, 1, 1, 0, NULL, args, NULL); 
    if((int)error!=0){ 
    printf("Error! cuLaunchKernel returned: %d\n",(int)error); 
    exit(0); 
    } 

    // Get result to host 
    error = cuMemcpyDtoH(&h_var,var,size); 
    if((int)error!=0){ 
    printf("Error! cuMemcpyDtoH returned: %d\n",(int)error); 
    exit(0); 
    } 

    // Free device memory 
    error = cuMemFree(var); 
    if((int)error!=0){ 
    printf("Error! cuMemFree returned: %d\n",(int)error); 
    exit(0); 
    } 

    // Destroy context 
    error = cuCtxDestroy(cu_context); 
    if((int)error!=0){ 
    printf("Error! cuCtxDestroy returned: %d\n",(int)error); 
    exit(0); 
    } 

    // Print result 
    printf("var: %d\n",h_var); 
} 

由我寫的PTX代碼:(這是一個簡單的加法,只是爲了測試它的作品)

。版本1.4 .TARGET sm_10,map_f64_to_f32

.entry testing (
    .param .u64 mynum) 
{ 

    .reg .u64 %r; 
    .reg .u64 %i; 
    ld.param.u64 %r,[mynum]; 
    ld.global.u64 %i,[%r]; 
    add.u64 %i,%i,3; 
    st.global.u64 [%r+0],%i; 
    exit; 

} 

到目前爲止好。然後,我把代碼和加載像這樣使用dlsym每一個功能:

/* Lauch kernel */ 
    void *args[] = {&var}; 
    int (*_cuLaunchKernel)(void *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, void *, void **, void **); 
    *(void **)(&_cuLaunchKernel) = dlsym(dlhandle, "cuLaunchKernel"); 
    (*_cuLaunchKernel)(cu_func,1 , 1, 1, 1, 1, 1, 0, NULL, args, NULL); 


/* Get result to host */ 
    int (*_cuMemcpyDtoH)(void *, void *, size_t); 
    *(void **)(&_cuMemcpyDtoH) = dlsym(dlhandle, "cuMemcpyHtoD"); 
    error = (*_cuMemcpyDtoH)(&h_var,var,size); 

(的代碼的其餘部分是在相同的邏輯)。我的所有函數都返回0,這意味着一切正常,除了最後一個_cuMemcpyDtoH,它返回錯誤1(= cudaErrorMissingConfiguration)。有人可以解釋這個錯誤的含義,以及它爲什麼會發生?有沒有辦法解決它?爲什麼當我在運行時加載函數時顯示它?

謝謝。

我的系統: NVCC 4.1版 GPU:GTX 480 NVRM版本:NVIDIA UNIX x86_64的內核模塊285.05.32 GCC版本:gcc版本4.5.2(Ubuntu的/ Linaro的4.5.2-8ubuntu4)

回答

0

對於驅動程序API正在使用,erorr碼1意味着CUDA_ERROR_INVALID_VALUE

這樣做的原因錯誤是:

*(void **)(&_cuMemcpyDtoH) = dlsym(dlhandle, "cuMemcpyHtoD"); 

---你想牛逼o使用錯誤的功能:HtoD而不是DtoH

相關問題