2014-05-12 149 views
2

我有矩陣的CSR座標。與MKL的矩陣乘法

/* alloc space for COO matrix */ 
int *coo_rows = (int*) malloc(K.n_rows * sizeof(int)); 
int *coo_cols = (int*) malloc(K.n_rows * sizeof(int)); 
float *coo_vals = (float*) malloc(K.n_rows * sizeof(float)); 

/*Load coo values*/ 

int *rowptrs = (int*) malloc((N_unique+1)*sizeof(int)); 
int *colinds = (int*) malloc(K.n_rows *sizeof(int)); 
double *vals = (double*) malloc(K.n_rows *sizeof(double)); 

/* take csr values */ 
int job[] = { 
     2, // job(1)=2 (coo->csr with sorting) 
     0, // job(2)=1 (one-based indexing for csr matrix) 
     0, // job(3)=1 (one-based indexing for coo matrix) 
     0, // empty 
     n1, // job(5)=nnz (sets nnz for csr matrix) 
     0 // job(6)=0 (all output arrays filled) 
     }; 
int info; 
mkl_scsrcoo(job, &n, vals, colinds, rowptrs, &n1, coo_vals, coo_rows, coo_cols, &info); 
assert(info == 0 && "Converted COO->CSR"); 

現在我想用beta = 0;

/* function declaration */ 
void mkl_dcsrmm (char *transa, MKL_INT *m, MKL_INT *n, MKL_INT *k, double *alpha, char *matdescra, double *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, double *b, MKL_INT *ldb, double *beta, double *c, MKL_INT *ldc); 

應用mkl_dcsrmm函數來計算C := alpha*A*B + beta*C因爲現在我有。

int A_rows = ..., A_cols = ..., C_cols = ... 
double alpha = 1.0; 

mkl_dcsrmm ((char*)"N", &A_rows, &C_cols, &A_cols, &alpha, char *matdescra, vals, coo_cols, rowptrs, colinds , double *b, MKL_INT *ldb, double *beta, double *c, MKL_INT *ldc); 

我在填寫輸入時遇到了一些困難。你能幫我填補其餘的投入嗎?

我想更詳細地瞭解一個具體的輸入是matdescra。我從cspblas_ccsr借了下面的代碼

char matdescra[6]; 
matdescra[0] = 'g'; 
matdescra[1] = 'l'; 
matdescra[2] = 'n'; 
matdescra[3] = 'c'; 

但我對此有一些疑問。我工作的矩陣A不是三角形的,這個char數組的初始化讓你做出這樣的聲明,我應該如何配置matdescra數組的參數?

回答

3

這是我使用的,以及對我有用的東西。

char matdescra[6] = {'g', 'l', 'n', 'c', 'x', 'x'}; 
/* https://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-34C8DB79-0139-46E0-8B53-99F3BEE7B2D4.htm#TBL2-6 
G: General. D: Diagonal 
L/U Lower/Upper triangular (ignored with G) 
N: non-unit diagonal (ignored with G) 
C: zero-based indexing. 
*/ 

完整的例子

下面是一個完整的例子。我首先通過用指定密度的非零元素填充密集矩陣來創建一個隨機矩陣。然後我將它轉換爲CSR格式的稀疏矩陣。最後,我使用mkl_dcsrmm來進行乘法運算。作爲可能的檢查(檢查未完成),我使用密集矩陣使用cblas_dgemm函數進行相同的乘法運算。

#include "mkl.h" 
#include "mkl_spblas.h" 
#include <stddef.h> // For NULL 
#include <stdlib.h> // for rand() 
#include <assert.h> 
#include <stdio.h> 
#include <limits.h> 


// Compute C = A * B; where A is sparse and B is dense. 
int main() { 
    MKL_INT m=10, n=5, k=11; 
    const double sparsity = 0.9; ///< @param sparsity Values below which are set to zero (sampled from uniform(0,1)-distribution). 
    double *A_dense; 
    double *B; 
    double *C; 
    double alpha = 1.0; 
    double beta = 0.0; 
    const int allignment = 64; 

    // Seed the RNG to always be the same 
    srand(42); 

    // Allocate memory to matrices 
    A_dense = (double *)mkl_malloc(m*k*sizeof(double), allignment); 
    B = (double *)mkl_malloc(k*n*sizeof(double), allignment); 
    C = (double *)mkl_malloc(m*n*sizeof(double), allignment); 
    if (A_dense == NULL || B == NULL || C == NULL) { 
     printf("ERROR: Can't allocate memory for matrices. Aborting... \n\n"); 
     mkl_free(A_dense); 
     mkl_free(B); 
     mkl_free(C); 
     return 1; 
    } 

    // Initializing matrix data 
    int i; 
    int nzmax = 0; 
    for (i = 0; i < (m*k); i++) { 
     double val = rand()/(double)RAND_MAX; 
     if (val < sparsity) { 
      A_dense[i] = 0.0; 
     } else { 
      A_dense[i] = val; 
      nzmax++; 
     } 
    } 
    for (i = 0; i < (k*n); i++) { 
      B[i] = rand(); 
    } 
    for (i = 0; i < (m*n); i++) { 
      C[i] = 0.0; 
    } 

    // Convert A to a sparse matrix in CSR format. 

    // INFO: https://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-AD67DD8D-4C22-4232-8D3F-AF97DC2ABBC8.htm#GUID-AD67DD8D-4C22-4232-8D3F-AF97DC2ABBC8 
    MKL_INT job[6]; 
    job[0] = 0; // convert TO CSR. 
    job[1] = 0; // Zero-based indexing for input. 
    job[2] = 0; // Zero-based indexing for output. 
    job[3] = 2; // adns is a whole matrix A. 
    job[4] = nzmax; // Maximum number of non-zero elements allowed. 
    job[5] = 3; // all 3 arays are generated for output. 

    /* JOB: conversion parameters 
    * m: number of rows of A. 
    * k: number of columns of A. 
    * adns: (input/output). Array containing non-zero elements of the matrix A. 
    * lda: specifies the leading dimension of adns. must be at least max(1, m). 
    * acsr: (input/output) array containing non-zero elements of the matrix A. 
    * ja: array containing the column indices. 
    * ia length m+1, rowIndex. 
    * OUTPUT: 
    * info: 0 if successful. i if interrupted at i-th row because of lack of space. 
    */ 
    int info = -1; 
    printf("nzmax:\t %d\n", nzmax); 

    double *A_sparse = mkl_malloc(nzmax * sizeof(double), allignment); 
    if (A_sparse == NULL) { 
     printf("ERROR: Could not allocate enough space to A_sparse.\n"); 
     return 1; 
    } 
    MKL_INT *A_sparse_cols = mkl_malloc(nzmax * sizeof(MKL_INT), allignment); 
    if (A_sparse_cols == NULL) { 
     printf("ERROR: Could not allocate enough space to A_sparse_cols.\n"); 
     return 1; 
    } 
    MKL_INT *A_sparse_rowInd = mkl_malloc((m+1) * sizeof(MKL_INT), allignment); 
    if (A_sparse_rowInd == NULL) { 
     printf("ERROR: Could not allocate enough space to A_sparse_rowInd.\n"); 
     return 1; 
    } 
    mkl_ddnscsr(job, &m, &k, A_dense, &k, A_sparse, A_sparse_cols, A_sparse_rowInd, &info); 
    if(info != 0) { 
     printf("WARNING: info=%d, expected 0.\n", info); 
    } 
    assert(info == 0); 

    char transa = 'n'; 
    MKL_INT ldb = n, ldc=n; 
    char matdescra[6] = {'g', 'l', 'n', 'c', 'x', 'x'}; 
    /* https://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-34C8DB79-0139-46E0-8B53-99F3BEE7B2D4.htm#TBL2-6 
    G: General. D: Diagonal 
    L/U Lower/Upper triangular (ignored with G) 
    N: non-unit diagonal (ignored with G) 
    C: zero-based indexing. 
    */ 


    mkl_dcsrmm(&transa, &m, &n, &m, &alpha, matdescra, A_sparse, A_sparse_cols, 
      A_sparse_rowInd, &(A_sparse_rowInd[1]), B, &ldb, &beta, C, &ldc); 
    // The same computation in dense format 
    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 
       m, n, k, alpha, A_dense, k, B, n, beta, C, n); 

    mkl_free(A_dense); 
    mkl_free(A_sparse); 
    mkl_free(A_sparse_cols); 
    mkl_free(A_sparse_rowInd); 
    mkl_free(B); 
    mkl_free(C); 
    return 0; 
} 
+0

感謝您的回覆。你能幫我填補剩下的參數嗎? – Thoth

+0

@Thoth,是的,我添加了一個完整的示例。 – Unapiedra

+0

@ Unapiedra謝謝。我想運行代碼,並確保我沒有任何問題:)。我會盡快將你的帖子標記爲答案。再次感謝! – Thoth