2011-04-05 197 views
6

用CUDA實現矩陣乘法後。我試圖用CUBLAS來實現它(感謝這裏的一些人的建議)。CUBLAS矩陣乘法

我可以乘以平方矩陣,但是(是的,再一次...)我在使用非平方矩陣時遇到了困難。當矩陣A的寬度(A * B = C)變化時,唯一適用的非矩形矩陣乘法就是這種類型。

我沒有得到任何錯誤,但由此產生的矩陣返回錯誤的值。這裏是我的代碼(它基本上是簡單的CUBLAS SDK示例的改編):

#include <stdlib.h> 
#include <stdio.h> 
#include "cublas.h" 
#define HA 2 
#define WA 9 
#define WB 2 
#define HB WA 
#define WC WB 
#define HC HA 
#define index(i,j,ld) (((j)*(ld))+(i)) 

void printMat(float*P,int uWP,int uHP){ 
//printf("\n %f",P[1]); 
int i,j; 
for(i=0;i<uHP;i++){ 

    printf("\n"); 

    for(j=0;j<uWP;j++) 
     printf("%f ",P[index(i,j,uHP)]); 
     //printf("%f ",P[i*uWP+j]); 
} 
} 




int main (int argc, char** argv) { 
    cublasStatus status; 
     int i,j; 
     cublasInit(); 

     float *A = (float*)malloc(HA*WA*sizeof(float)); 
     float *B = (float*)malloc(HB*WB*sizeof(float)); 
     float *C = (float*)malloc(HC*WC*sizeof(float)); 
    if (A == 0) { 
     fprintf (stderr, "!!!! host memory allocation error (A)\n"); 
     return EXIT_FAILURE; 
    } 
    if (B == 0) { 
     fprintf (stderr, "!!!! host memory allocation error (A)\n"); 
     return EXIT_FAILURE; 
    } 
    if (C == 0) { 
     fprintf (stderr, "!!!! host memory allocation error (A)\n"); 
     return EXIT_FAILURE; 
     } 


     for (i=0;i<HA;i++) 
    for (j=0;j<WA;j++) 
     A[index(i,j,HA)] = (float) index(i,j,HA); 
     for (i=0;i<HB;i++) 
    for (j=0;j<WB;j++) 
     B[index(i,j,HB)] = (float) index(i,j,HB); 
    /* 
    for (i=0;i<HA*WA;i++) 
    A[i]=(float) i; 
    for (i=0;i<HB*WB;i++) 
    B[i]=(float) i;   */ 


     float* AA; float* BB; float* CC; 

    /*ALLOCATE ON THE DEVICE*/ 
    status=cublasAlloc(HA*WA,sizeof(float),(void**)&AA); 
     if (status != CUBLAS_STATUS_SUCCESS) { 
     fprintf (stderr, "!!!! device memory allocation error (A)\n"); 
     return EXIT_FAILURE; 
     } 

     status=cublasAlloc(HB*WB,sizeof(float),(void**)&BB); 
     if (status != CUBLAS_STATUS_SUCCESS) { 
     fprintf (stderr, "!!!! device memory allocation error (A)\n"); 
     return EXIT_FAILURE; 
     } 

     status=cublasAlloc(HC*WC,sizeof(float),(void**)&CC); 
     if (status != CUBLAS_STATUS_SUCCESS) { 
     fprintf (stderr, "!!!! device memory allocation error (A)\n"); 
     return EXIT_FAILURE; 
     } 

    /*SET MATRIX*/ 
     status=cublasSetMatrix(HA,WA,sizeof(float),A,HA,AA,HA); 
     if (status != CUBLAS_STATUS_SUCCESS) { 
     fprintf (stderr, "!!!! device memory allocation error (A)\n"); 
     return EXIT_FAILURE; 
     } 

     status=cublasSetMatrix(HB,WB,sizeof(float),B,HB,BB,HB); 
     if (status != CUBLAS_STATUS_SUCCESS) { 
     fprintf (stderr, "!!!! device memory allocation error (A)\n"); 
     return EXIT_FAILURE; 
     } 

    /*KERNEL*/ 
     cublasSgemm('n','n',HA,WB,WA,1,AA,HA,BB,HB,0,CC,HC); 

     status = cublasGetError(); 
     if (status != CUBLAS_STATUS_SUCCESS) { 
     fprintf (stderr, "!!!! kernel execution error.\n"); 
     return EXIT_FAILURE; 
     } 
     cublasGetMatrix(HC,WC,sizeof(float),CC,HC,C,HC); 
     if (status != CUBLAS_STATUS_SUCCESS) { 
     fprintf (stderr, "!!!! device read error (A)\n"); 
     return EXIT_FAILURE; 
     } 


    /* PERFORMANCE OUTPUT*/ 

    printf("\nMatriz A:\n"); 
    printMat(A,WA,HA); 
    printf("\nMatriz B:\n"); 
    printMat(B,WB,HB); 
    printf("\nMatriz C:\n"); 
    printMat(C,WC,HC); 

     free(A); free(B); free (C); 
     status = cublasFree(AA); 
     if (status != CUBLAS_STATUS_SUCCESS) { 
     fprintf (stderr, "!!!! memory free error (A)\n"); 
     return EXIT_FAILURE; 
     } 
     status = cublasFree(BB); 
     if (status != CUBLAS_STATUS_SUCCESS) { 
     fprintf (stderr, "!!!! memory free error (B)\n"); 
     return EXIT_FAILURE; 
     } 
     status = cublasFree(CC); 
     if (status != CUBLAS_STATUS_SUCCESS) { 
     fprintf (stderr, "!!!! memory free error (C)\n"); 
    return EXIT_FAILURE; 
    } 

     /* Shutdown */ 
     status = cublasShutdown(); 
     if (status != CUBLAS_STATUS_SUCCESS) { 
     fprintf (stderr, "!!!! shutdown error (A)\n"); 
     return EXIT_FAILURE; 
     } 

    if (argc > 1) { 
     if (!strcmp(argv[1], "-noprompt") ||!strcmp(argv[1], "-qatest")) 
     { 
    return EXIT_SUCCESS; 
     } 
     } 
     else 
     { 
      printf("\nPress ENTER to exit...\n"); 
      getchar(); 
     } 

return EXIT_SUCCESS; 


    } 

有什麼想法?另外,是否有人在CUBLAS中的矩陣乘法實現工作,所以我可以比較?提前致謝。

+0

+1 for cuBLAS'dgemm'例子:-) – fommil 2013-09-13 20:10:54

回答

7

我不明白你爲什麼認爲你發佈的代碼不起作用。當我編譯並運行它時,如果我將相同的矩陣輸入到matlab並計算它們的乘積,那麼生成的可執行文件會生成與我得到的相同的輸出。 CUBLAS是一個FORTRAN BLAS,它期望按列主要順序輸入(並且您的代碼是列主要順序)。如果結果不符合您的要求,則必須在某處混淆列和行主要排序。

+0

你是完全正確的我是;)。之前我必須使用MATLAB(專業專欄),但我從來不必區分行專業和專業專業排序 – Bernardo 2011-04-05 16:18:07