2013-10-16 118 views
0

我需要執行維數爲10,000 X 10,000的兩個矩陣的矩陣乘法,每個元素從範圍1到10,000隨機生成。我需要用線程(25個線程)和沒有線程來完成,並比較時間。 我正在使用一個簡單的矩陣乘法算法O(n^3)。 沒有線程的程序一直在執行(超過一天),當我嘗試運行它時,線程版本會中止。 它工作正常的1000×1000矩陣10000×10000的矩陣乘法

我用我的大學服務器 這裏的CC prog.cc -lpthread -lposix4編譯它的非線程版本

/* 編譯:Unix服務器

該程序執行兩個10000×10000矩陣的矩陣乘法,無線程 該程序的目的是演示使用線程 反對不使用線程在不同數據上執行相同計算的性能增益。 */

#include <pthread.h> 
#include <iostream.h> 
#include <semaphore.h> 
#include <unistd.h> 
#include<math.h> 

int main() 
{ 
    double **A;//Matrix A 
    double **B;//Matrix B 
    double **C;//Output Matrix C 
    const int MATRIX_DIMENSION = 5000; 

    //Assign Matrix A first dimension 
    //----------------------------------------------------------- 
    A = new double*[MATRIX_DIMENSION]; 
    //Assign second dimension 
    for(int i = 0; i < MATRIX_DIMENSION; i++) 
    { 
     A[i] = new double[MATRIX_DIMENSION]; 
    } 
    //Assign Matrix B first dimension 
    B = new double*[MATRIX_DIMENSION]; 
    //Assign second dimension 
    for(int i = 0; i < MATRIX_DIMENSION; i++) 
    { 
     B[i] = new double[MATRIX_DIMENSION]; 
    } 
    //Assign Matrix C first dimension 
    C = new double*[MATRIX_DIMENSION]; 
    //Assign second dimension 
    for(int i = 0; i < MATRIX_DIMENSION; i++) 
    { 
     C[i] = new double[MATRIX_DIMENSION]; 
    } 
    //----------------------------------------------------------- 

    //Generate random numbers for matrices A and B and assign C to 0 
    for(int i=0;i<MATRIX_DIMENSION;i++) 
    { 
     for(int j=0;j<MATRIX_DIMENSION;j++) 
     { 
      A[i][j] = rand() % 10000; 
      B[i][j] = rand() % 10000; 
      C[i][j] = 0; // initialize C to zero 
     } 
    } 
    //----------------------------------------------------------- 
    //Do the matrix multiplication 

    for(int i=0;i<MATRIX_DIMENSION;i++) 
    { 
     for(int j=0 ;j<MATRIX_DIMENSION; j++) 
     { 
      for(int k=0;k<MATRIX_DIMENSION;k++) 
      { 
       C[i][j]+=A[i][k]*B[k][j]; 
      } 
     }                                                                                   
    }        

    //----------------------------------------------------------- 
    //delete the dynamic memory of A 
    for (int i = 0; i < MATRIX_DIMENSION; i++) 
    { 
     delete[] A[i]; 
    } 
    delete[] A; 
    //delete the dynamic memory of B 
    for (int i = 0; i < MATRIX_DIMENSION; i++) 
    { 
     delete[] B[i]; 
    } 
    delete[] B; 
    //delete the dynamic memory of C 
    for (int i = 0; i < MATRIX_DIMENSION; i++) 
    { 
     delete[] C[i]; 
    } 
    delete[] C; 
    //----------------------------------------------------------- 
    return(0); 
} 

這裏的線程版本 /* 名稱: 編譯:Unix服務器

這個程序執行兩個10000 * 10000矩陣的矩陣乘法無緒 該計劃的目的是演示使用線程 針對不使用線程對不同數據執行相同計算的性能增益。 */

#include <pthread.h> 
#include <iostream.h> 
#include <semaphore.h> 
#include <unistd.h> 
#include<math.h> 

//Global variables 
    double **A;//Matrix A 
    double **B;//Matrix B 
    double **C;//Output Matrix C 
    const int MATRIX_DIMENSION = 10000; //We need a 10000 X 10000 Matrix 
    const int NUM_THREADS = 25; // One thread completes 1/25th of the work 
    const int THREAD_DIMENSION = MATRIX_DIMENSION/NUM_THREADS; //Array that each thread will operate on 
    pthread_t * thread[NUM_THREADS]; 

    /*************************************************************************** 
    Function that does matrix multiplication of 1/25th of the whole matrix, 
    The division is done by dividing the Matrix into row's all 1/25 of the total matrix 
    Each row of Matrix A operates on all the columns of Matrix B to get corresponding elements of Matrix C 
    Parameter : arg, this is used as and index for which part of the Matrix this particular thread operates on 
    Return type: void 
    ****************************************************************************/ 
void *MatrixMul (void * arg) 
{ 
    int index; 
    index = (int) arg; 
    int operation_Lower_Limit = ((index+1) * THREAD_DIMENSION) - THREAD_DIMENSION ; //Multiplication starting row 
    int operation_Upper_Limit = ((index+1) * THREAD_DIMENSION) - 1; //Multiplication ending row 

    for(int i=operation_Lower_Limit;i<=operation_Upper_Limit;i++) //only 1/25th of Matrix A is used 
    { 
     for(int j=0 ;j<MATRIX_DIMENSION; j++) // The whole B matrix is used 
     { 
      for(int k=0;k<MATRIX_DIMENSION;k++) 
      { 
       C[i][j]+=A[i][k]*B[k][j]; 
      } 
     }                                                                                   
    } 
    return NULL; 
} 

int main() 
{ 

    srand(time(0)); 
    //Assign memory for threads 
    for(int i=0;i < NUM_THREADS;i++) 
    { 
    thread[i] = new pthread_t; 
    } 

    //Assign Matrix A first dimension 
    //----------------------------------------------------------- 
    A = new double*[MATRIX_DIMENSION]; 
    //Assign second dimension 
    for(int i = 0; i < MATRIX_DIMENSION; i++) 
    { 
     A[i] = new double[MATRIX_DIMENSION]; 
    } 
    //Assign Matrix B first dimension 
    B = new double*[MATRIX_DIMENSION]; 
    //Assign second dimension 
    for(int i = 0; i < MATRIX_DIMENSION; i++) 
    { 
     B[i] = new double[MATRIX_DIMENSION]; 
    } 
    //Assign Matrix C first dimension 
    C = new double*[MATRIX_DIMENSION]; 
    //Assign second dimension 
    for(int i = 0; i < MATRIX_DIMENSION; i++) 
    { 
     C[i] = new double[MATRIX_DIMENSION]; 
    } 
    //----------------------------------------------------------- 



    //Generate random numbers for matrices A and B and assign C to 0 
    for(int i=0;i<MATRIX_DIMENSION;i++) 
    { 
     for(int j=0;j<MATRIX_DIMENSION;j++) 
     { 
      A[i][j] = rand() % 10000; 
      B[i][j] = rand() % 10000; 
      C[i][j] = 0; // initialize C to zero 
     } 
    } 
    //----------------------------------------------------------- 
    //Do the matrix multiplication 

    for(int i=0;i<NUM_THREADS;i++) 
    { 
    pthread_create(thread[ i ], NULL, (MatrixMul), (void *) (i)); 
    } 


    //wait for all the threads to complete execution 
    for(int i=0;i<NUM_THREADS;i++) 
    { 
    pthread_join(*thread[i],NULL); 
    } 

    //----------------------------------------------------------- 
    //delete the dynamic memory of A 
    for (int i = 0; i < MATRIX_DIMENSION; i++) 
    { 
     delete[] A[i]; 
    } 
    delete[] A; 
    //delete the dynamic memory of B 
    for (int i = 0; i < MATRIX_DIMENSION; i++) 
    { 
     delete[] B[i]; 
    } 
    delete[] B; 
    //delete the dynamic memory of C 
    for (int i = 0; i < MATRIX_DIMENSION; i++) 
    { 
     delete[] C[i]; 
    } 
    delete[] C; 
    //----------------------------------------------------------- 
    return(0); 
} 
+0

你在運行什麼樣的系統? – DashControl

+0

@DashControl我有一個遠程連接到我的大學unix服務器使用膩子。 –

回答

0

您的程序是否內存不足?如果是這樣,你可能會釋​​放一些內存,因爲你正在執行乘法?

+1

感謝您的回覆。但是如果內存不足,我不會得到分段錯誤嗎? –

+0

@AjayKarthik這只是一個猜測,但是分配一個10000x10000的矩陣大概是7Gb我是吧?而你正在分配3(!)個。 –

+0

我已經將變量更改爲int類型。現在它們每個應該有大約1.5 GB,總共大約4.5 GB。我在我的本地ubuntu機器上運行了沒有線程的乘法運算,它具有4GB RAM。但是這不應該成爲一個問題,因爲操作系統使用虛擬內存的權利?該計劃現在運行了大約5個小時。我的教授說完成執行需要更長的時間。我會看看它如何去。謝謝 –