當我嘗試使用OpenMP乘以兩個大方形矩陣時,並行方式比序列化方法花費的時間要多得多。我做錯了什麼?使用OpenMP乘以矩陣需要比序列化的方式花費更多的時間
對4核心(超線程)機器的簡單測試將導致並行計算大約需要100秒,而串行計算需要10秒!
這是我的並行代碼:
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define MATSIZE 500
#define MAXRAND 100
int main (int argc, char *argv[])
{
double startTime = 0.0, stopTime = 0.0;
startTime = omp_get_wtime();
int i, j, k;
static int a[MATSIZE][MATSIZE],b[MATSIZE][MATSIZE],c[MATSIZE][MATSIZE];
srand(time(NULL));
#pragma omp parallel shared(a,b,c) private(i,j,k)
{
#pragma omp for
for (i=0; i<MATSIZE; i++)
for (j=0; j<MATSIZE; j++){
a[i][j]= rand()%MAXRAND;
b[i][j]= rand()%MAXRAND;
c[i][j]= 0;
}
}
printf("Matrix A:\n");
for (i=0; i<MATSIZE; i++){
for (j=0; j<MATSIZE; j++)
printf("%d ", a[i][j]);
printf("\n");
}
printf("******************************************************\n");
printf("Matrix B:\n");
for (i=0; i<MATSIZE; i++){
for (j=0; j<MATSIZE; j++)
printf("%d ", b [i][j]);
printf("\n");
}
printf("******************************************************\n");
#pragma omp parallel shared(a,b,c) private(i,j,k)
{
#pragma omp for
for (i=0; i<MATSIZE; i++){
for(j=0; j<MATSIZE; j++)
for (k=0; k<MATSIZE; k++){
c[i][j] += a[i][k] * b[k][j];
printf(".");
}
}
}
printf("\nResult Matrix:\n");
for (i=0; i<MATSIZE; i++){
for (j=0; j<MATSIZE; j++)
printf("%d ", c[i][j]);
printf("\n");
}
stopTime = omp_get_wtime();
printf("Elapsed time = %f \n", stopTime - startTime);
}
這裏是串行一個:
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define MATSIZE 500
#define MAXRAND 100
int main (int argc, char *argv[])
{
double startTime = 0.0, stopTime = 0.0;
startTime = omp_get_wtime();
int i, j, k;
static int a[MATSIZE][MATSIZE],b[MATSIZE][MATSIZE],c[MATSIZE][MATSIZE];
srand(time(NULL));
for (i=0; i<MATSIZE; i++)
for (j=0; j<MATSIZE; j++){
a[i][j]= rand()%MAXRAND;
b[i][j]= rand()%MAXRAND;
c[i][j]= 0;
}
printf("Matrix A:\n");
for (i=0; i<MATSIZE; i++){
for (j=0; j<MATSIZE; j++)
printf("%d ", a[i][j]);
printf("\n");
}
printf("******************************************************\n");
printf("Matrix B:\n");
for (i=0; i<MATSIZE; i++){
for (j=0; j<MATSIZE; j++)
printf("%d ", b [i][j]);
printf("\n");
}
printf("******************************************************\n");
for (i=0; i<MATSIZE; i++){
for(j=0; j<MATSIZE; j++)
for (k=0; k<MATSIZE; k++){
c[j][i] += a[j][k] * b[k][i];
printf(".");
}
}
printf("\nResult Matrix:\n");
for (i=0; i<MATSIZE; i++){
for (j=0; j<MATSIZE; j++)
printf("%d ", c[i][j]);
printf("\n");
}
stopTime = omp_get_wtime();
printf("Elapsed time = %f \n", stopTime - startTime);
}
你爲什麼在你的內循環中有一個'printf'? *這是*吃你所有的運行時間,而不是矩陣乘法。 – user2357112