2013-03-20 85 views
0

我正在嘗試使用CUDA和C編寫一個基本的矩陣乘法程序。代碼本身並沒有真正做任何事情,但至少應該編譯。在對這個問題進行了一些研究之後,我確定這個問題沒有包含CUDA頭文件,說明我的Makefile存在問題。我對CUDA非常缺乏經驗(和C有關),所以任何幫助將不勝感激。CUDA Makefile包含錯誤

輸出的命令:make matrixMult1

c99 -I. -I/usr/local/cuda/include -c matrixMult1.c -o matrixMult1.o 
matrixMult1.c: In function 'main': 
matrixMult1.c:77: warning: implicit declaration of function 'cudaMalloc' 
matrixMult1.c:82: warning: implicit declaration of function 'cudaMemcpy' 
matrixMult1.c:83: error: 'cudaMemcpyHostToDevice' undeclared (first use in this 
function) 
matrixMult1.c:83: error: (Each undeclared identifier is reported only once 
matrixMult1.c:83: error: for each function it appears in.) 
matrixMult1.c:106: warning: implicit declaration of function 'cudaFree' 
make: *** [matrixMult1.o] Error 1 

的Makefile:

GCC = c99 
CUDA_INSTALL_PATH := /usr/local/cuda 
INCLUDES := -I. -I$(CUDA_INSTALL_PATH)/include 
CUDA_LIBS := -L$(CUDA_INSTALL_PATH)/lib -lcudart 

matrixMult1.o:   matrixMult1.c 
       $(GCC) $(INCLUDES) -c matrixMult1.c -o [email protected] 

matrixMult1:   matrixMult1.o 
       $(GCC) -o [email protected] matrixMult1.o $(CUDA_LIBS) 

C程序:

//******************************************************************** 
// matrixMult1.c 
// 
// A basic matrix multiplication program. 
//******************************************************************** 

#include <stdlib.h> 
#include <stdio.h> 
#include <math.h> 
#include "cuda.h" 

#define WA 3 
#define HA 3 
#define WB 3 
#define HB WA 
#define WC WB 
#define HC HA 

void initMatrix(float * matrix, int numIndices); 

//************* 
// Main Program 
//************* 
int main(int argc, char** argv) { 

    /* Set random seed */ 
    srand(2013); 

    /* Compute memory sizes for matrices A, B, and C */ 
    unsigned int sizeA = WA * HA; 
    unsigned int sizeB = WB * HB; 
    unsigned int sizeC = WC * HC; 
    unsigned int memoryA = sizeof(float) * sizeA; 
    unsigned int memoryB = sizeof(float) * sizeB; 
    unsigned int memoryC = sizeof(float) * sizeC; 

    /* Allocate memory for matrices A, B, and C */ 
    float * matrixA = (float *) malloc(memoryA); 
    float * matrixB = (float *) malloc(memoryB); 
    float * matrixC = (float *) malloc(memoryC); 

    /* Initialize matrices A and B */ 
    initMatrix(matrixA, sizeA); 
    initMatrix(matrixB, sizeB); 

    /* Print matrix A */ 
    printf("\nMatrix A:\n"); 
    for (int i = 0; i < sizeA; i++) { 
     printf("%f ", matrixA[i]); 

    if (((i + 1) % WA) == 0) { 
     printf("\n"); 
    } else { 
     printf(" | "); 
    } 
    } 

    /* Print matrix B */ 
    printf("\nMatrix B:\n"); 
    for (int i = 0; i < sizeB; i++) { 
    printf("%f ", matrixB[i]); 

    if (((i + 1) % WA) == 0) { 
     printf("\n"); 
    } else { 
     printf(" | "); 
    } 
    } 

    /* Allocate device memory */ 
    float* deviceMemA; 
    float* deviceMemB; 
    float* deviceMemC; 
    cudaMalloc((void**) &deviceMemA, memoryA); 
    cudaMalloc((void**) &deviceMemB, memoryB); 
    cudaMalloc((void**) &deviceMemC, memoryC); 

    /* Copy host memory to device */ 
    cudaMemcpy(deviceMemA, matrixA, memoryA, 
      cudaMemcpyHostToDevice); 
    cudaMemcpy(deviceMemB, matrixB, memoryB, 
       cudaMemcpyHostToDevice); 
    cudaMemcpy(deviceMemC, matrixC, memoryC, 
      cudaMemcpyHostToDevice); 

    /* Print matrix C */ 
    printf("\nMatrix C:\n"); 
    for (int i = 0; i < sizeC; i++) { 
    printf("%f ", matrixC[i]); 

    if (((i + 1) % WC) == 0) { 
     printf("\n"); 
    } else { 
     printf(" | "); 
    } 
    } 
    printf("\n"); 

    /* Free up memory */ 
    free(matrixA); 
    free(matrixB); 
    free(matrixC); 
    cudaFree(deviceMemA); 
    cudaFree(deviceMemB); 
    cudaFree(deviceMemC); 
} 

//-------------------------------------------------------------------- 
// initMatrix - Assigns a random float value to each indice of the 
//    matrix. 
// 
// PRE: matrix is a pointer to a block of bytes in memory; numIndices 
//  is the number of indicies in the matrix being instantiated. 
// POST: Each index of the matrix has been instantiated with a random 
//  float value. 
//-------------------------------------------------------------------- 
void initMatrix(float * matrix, int numIndices) { 

    /* 
    Loop through the block of bytes, assigning a random float 
    for each index of the matrix 
    */ 
    for (int i = 0; i < numIndices; ++i) { 

    /* Assign a random float between 0 and 1 at this byte */ 
    matrix[i] = rand()/(float)RAND_MAX; 
    } 
} 
+0

Makefile沒有錯,它是你的代碼。您尚未將任何CUDA頭文件包含到您的代碼中。編譯器抱怨未定義的常量是否真的令人驚訝? – talonmies 2013-03-20 09:19:14

+0

在您的代碼中添加'#include '。 – 2013-03-20 09:21:27

+0

我試過cuda.h和cuda_runtime.h,都導致'沒有這樣的文件或目錄'錯誤。我也檢查過以確保這些文件位於/ usr/local/cuda/include /中。我在stackoverflow中編輯了代碼,以反映我的更改和對錯誤輸出的更改。 – 2013-03-20 09:27:26

回答

1

兩個問題在這裏:

  1. 你沒有在你的代碼中加入適當的頭文件(你修正了這個頭文件)
  2. 你的Makefile實際上已經壞了。它應該是這個樣子:
GCC = c99 
CUDA_INSTALL_PATH := /usr/local/cuda 
INCLUDES := -I. -I$(CUDA_INSTALL_PATH)/include 
CUDA_LIBS := -L$(CUDA_INSTALL_PATH)/lib -lcudart 

matrixMult1.o:   matrixMult1.c 
       $(GCC) $(INCLUDES) -c matrixMult1.c -o [email protected] 

matrixMult1:   matrixMult1.o 
       $(GCC) -o [email protected] matrixMult1.o $(CUDA_LIBS) 

[免責聲明:沒有測試過,用在自己的風險]

目前的問題是,包括僅在構建的連鎖相指定的路徑。

請注意,這些更改也會搶佔鏈接期間從未與CUDA運行時庫鏈接期間會丟失的符號錯誤。請注意,根據您使用的是32位還是64位主機操作系統,您可能需要將庫路徑更改爲$(CUDA_INSTALL_PATH)/lib64,以確保鏈接正常工作。

+0

我用這個Makefile修訂版的結果更新了帖子。我不再收到'沒有這樣的文件或目錄'錯誤,但它恢復到原來的錯誤狀態。我正在運行32位Ubuntu 9.10。我不認爲我得到一個缺少符號錯誤,有什麼我失蹤? – 2013-03-20 09:50:03

1

CUDA程序需要編譯nvcc。雖然你的程序還沒有包含任何CUDA內核,但我相信這就是你想要實現的。

重命名文件從matrixMult1.cmatrixMult1.cu,除去#include "cuda.h"線(與nvcc編譯不需要任何CUDA的具體方案包括),並與nvcc代替gcc編譯(通過在Makefile文件的開頭設定GCC = nvcc EG) 。