CUDA NVCC編譯器錯誤

-1

爲什麼當我嘗試編譯cuda程序時會拋出這些錯誤：clang: error: unsupported option '-dumpspecs'和clang: error: no input files？CUDA NVCC編譯器錯誤

我在終端上輸入：nvcc -o hello matrix_product.cu作爲一個互聯網教程給我看，但似乎沒有工作......我必須添加一些變量或類似的東西？

而且file.cu的代碼是：

using namespace std; 

void CheckCudaError(string &e); 


__global__ void productMatrix(int *matrix_a, int *matrix_b, int *matrix_c) 
{ 

    int blockidx = blockIdx.x; 
    int blockidy = blockIdx.y; 

    int threadx = threadIdx.x; 
    int thready = threadIdx.y; 

    __shared__ int Asub[BLOCK_SIZE][BLOCK_SIZE]; 
    __shared__ int Bsub[BLOCK_SIZE][BLOCK_SIZE]; 

    Asub[threadx][thready] = matrix_a[blockidx * BLOCK_SIZE + threadx + blockidy * BLOCK_SIZE + thready]; 
    Bsub[threadx][thready] = matrix_b[blockidx * BLOCK_SIZE + threadx + blockidy * BLOCK_SIZE + thready]; 

    __syncthreads(); 

    int suma; 

    for (int i = 0; i < BLOCK_SIZE; ++i) 
    { 
     suma += Asub[e][thready]* Bsub[threadx][e]; 
    } 

    __syncthreads(); 

    matrix_c[blockidx * BLOCK_SIZE + threadx + blockidy * BLOCK_SIZE + thready] = suma; 


} 


int main(){ 

    //Creamos punteros para apuntar tanto al dispositivo como a memoria. 
    int *h_a, *h_b; 
    int *d_a, *d_b, *d_c; 

    int NumBlocks = 100 * 100/BLOCK_SIZE; 
    int num_elements = NumBlocks * BLOCK_SIZE; 


    //Apuntamos los punteros hacia un espacio de 100*100 elementos en el host 
    h_a = malloc(num_elements * sizeof(int)); 
    h_b = malloc(num_elements * sizeof(int)); 
    CheckCudaError("malloc_host_error"); 


    //LLenamos la memoria 
    for (int i = 0; i < num_elements; ++i) 
    { 
     h_a[i] = i; 
     h_b[i] = num_elements - 1 - i; 
    } 


    //Apuntamos los punteros del dispositivo hacia una reserva de memoria de 100*100 elementos. 
    cudaMalloc(&d_a, num_elements * sizeof(int)); 
    cudaMalloc(&d_b, num_elements * sizeof(int)); 
    cudaMalloc(&d_c, num_elements * sizeof(int)); 
    CheckCudaError("malloc_device_error"); 


    /*Copiamos los elementos del host ya llenados anteriormente (llenamos memoria, 
     copiando las matrizes del host hacia la tarjeta gráfica (device).*/ 
    cudaMemcpy(d_a, h_a, num_elements * sizeof(int), cudaMemcpyHostToDevice); 
    cudaMemcpy(d_b, h_b, num_elements * sizeof(int), cudaMemcpyHostToDevice); 
    CheckCudaError("memcpy_error"); 


    free(h_b); 
    CheckCudaError("Free_host_error"); 


    //Establecemos el num de threas y blocks que utilizaremos 
    dim3 gridDim (NumBlocks, NumBlocks); 
    dim3 blockDim (BLOCK_SIZE, BLOCK_SIZE); 
    //LLamamos la función. 
    productMatrix <<< gridDim, blockDim >>> (d_a, d_b, d_c); 
    CheckCudaError("Calling_device_function_error"); 


    /*Esperamos a que todos los threads hayan hecho su trabajo (multiplicar las matrizes) 
     antes de copy back.*/ 
    cudaThreadSyncronize(); 
    CheckCudaError("Syncronize_threads_error"); 


    //Una vez sincronizados los volvemos a copiar hacia el host. 
    cudaMemcpy(h_a, d_c, num_elements * sizeof(int), cudaMemcpyDeviceToHost); 
    CheckCudaError("mempcy_host_error"); 


    //Imprimimos por pantalla 
    for (int i = 0; i < num_elements; ++i) cout << h_a[i]; 


    //Aliberamos memoria en el device 
    cudaFree(d_a); 
    cudaFree(d_b); 
    cudaFree(d_c); 

    //Aliveramos meomria en host. 
    free(h_a); 

    CheckCudaError("free_device_error"); 

} 


void CheckCudaError(string &e) 
{ 
    //Obtenemos el ultimo error. 
    cudaError_t err = cudaGetLastError(); 
    //Si hay error imprime el error por pantalla 
    if(cudaSuccess != err){ 
     cout << e << endl; 
    } 
}

來源

2014-02-07 Marc Ortiz Torres

似乎NVCC要求系統編譯器（GCC），瞭解系統信息（ -dumpspecs），但你有一些叫做ngng假裝gcc和失敗。我確定nvidia有你的指示，但是你可以嘗試在PATH前添加一個包含真實gcc的目錄。 –

Dup http://stackoverflow.com/q/19649541/1918193這已經是一個重複... –

請儘量明確地指出NVCC到鐺編譯器。

NVCC := nvcc -ccbin /usr/bin/clang

也許

NVCC := nvcc -ccbin /usr/local/cuda/bin/clang

另外也別忘添加所有必要包括：

nvcc -I/usr/local/cuda-5.0/include -I. -I.. -I../../common/inc -o MonteCarlo_kernel.o -c MonteCarlo_kernel.cu

來源

2014-02-07 17:57:01 4pie0

這與問題完全沒有關係。如何指定包含文件可能會改變錯誤'clang：error：unsupported option'-dumpspecs''？（並注意編譯器是叮噹聲，而不是gcc，你使用的是鏗鏘聲嗎？） – talonmies

好吧，現在你已經編輯了問題的*精確原因*作爲解決方案，這與你的第一個答案同樣錯誤。這裏的要點是，nvcc會嘗試確定主機編譯器是gcc還是clang，但是要做到這一點，它必須明確指向clang，而不是gcc的符號鏈接，這正是問題的根本原因以及你的答案將會做什麼。 – talonmies

@talonmies更正，請檢閱 – 4pie0

CUDA NVCC編譯器錯誤

回答

相關問題