2014-09-12 31 views
2

我有下面的代碼,我編譯和運行用:MPI gatherv位移未正常工作

mpicc -std=c99 region.c 
mpirun -n 4 region 

$的mpirun -version

的mpirun(開放MPI)1.6.5

$ mpicc --version

GCC(Ubuntu的4.8.2-19ubuntu1)4.8.2

#include <stdio.h> 
#include <stdlib.h> 
#include <mpi.h> 

int rank,     
    size,      
    dims[2],     
    coords[2],     
    image_size[2] = {8,8}, 
    local_image_size[2];   

MPI_Datatype border_row_t, 
      border_col_t, 
      subarray_type, 
      recv_type; 

unsigned char *image,   
       *region,   
       *local_region; 

void create_types() { 
    int starts[2] = {0, 0}; 
    MPI_Type_create_subarray(2, image_size, local_image_size, starts, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &subarray_type); 
    MPI_Type_commit(&subarray_type); 


    MPI_Type_vector(local_image_size[0], local_image_size[1], image_size[1], MPI_UNSIGNED_CHAR, &recv_type); 
    MPI_Type_commit(&recv_type); 
} 

void distribute_image(){ 
    if (0 == rank) { 
     MPI_Request request; 
     int num_hor_segments = image_size[0]/local_image_size[0]; 
     int num_vert_segments = image_size[1]/local_image_size[1]; 
     int dest_rank=0; 
     for (int vert=0; vert<num_vert_segments; vert++) { 
      for (int hor=0; hor<num_hor_segments; hor++) { 
       MPI_Isend((image+(local_image_size[0]*hor)+(local_image_size[1]*image_size[1]*vert)), 1, subarray_type, dest_rank, 0, MPI_COMM_WORLD, &request); 
       dest_rank++; 
      } 
     } 
    } 
    MPI_Status status; 
    MPI_Recv(local_region, local_image_size[0]*local_image_size[1], MPI_UNSIGNED_CHAR, 0, 0, MPI_COMM_WORLD, &status); 
} 

void gather_region(){ 

    int counts[4]={1,1,1,1}; 
    int disps[4]={0,4,32,36}; 

    MPI_Gatherv(local_region,local_image_size[0]*local_image_size[1], MPI_UNSIGNED_CHAR, region,counts,disps,recv_type,0,MPI_COMM_WORLD); 

    if (0==rank) { 
     printf("Actually returned:\n"); 
     for (int i=0; i<image_size[0]*image_size[1]; i++) { 
      printf("%d\t", *(region+i)); 
      if ((i+1)%image_size[0]==0) printf("\n"); 
     } 
    } 

} 

void init_mpi(int argc, char** argv){ 
    MPI_Init(&argc, &argv); 
    MPI_Comm_size(MPI_COMM_WORLD, &size); 
    MPI_Comm_rank(MPI_COMM_WORLD, &rank); 
    MPI_Dims_create(size, 2, dims); 
} 

void load_and_allocate_images(int argc, char** argv){ 

    if(rank == 0){ 
     image = (unsigned char*) malloc(sizeof(unsigned char*) * image_size[0] * image_size[1]); 
     for (unsigned char i=0; i<image_size[0]*image_size[1]; i++) { 
      image[i] = i; 
      printf("%d\t", *(image+i)); 
      if((i+1)%image_size[0]==0) printf("\n"); 
     } 
     printf("\n\n"); 
     region = (unsigned char*)calloc(sizeof(unsigned char),image_size[0]*image_size[1]); 
    } 
    local_image_size[0] = image_size[0]/dims[0]; 
    local_image_size[1] = image_size[1]/dims[1]; 

    int lsize = local_image_size[0]*local_image_size[1]; 
    int lsize_border = (local_image_size[0] + 2)*(local_image_size[1] + 2); 
    local_region = (unsigned char*)calloc(sizeof(unsigned char),lsize_border); 
} 

void cleanup() { 
    MPI_Type_free(&subarray_type); 
    MPI_Type_free(&recv_type); 
} 

int main(int argc, char** argv){ 
    init_mpi(argc, argv); 
    load_and_allocate_images(argc, argv); 
    create_types(); 
    distribute_image(); 
    gather_region(); 
    cleanup(); 
    MPI_Finalize(); 
    exit(0); 
} 

當我的0,4,32和36的位移運行gatherv我得到以下

分佈式矢量:

0 1 2 3 4 5 6 7 
8 9 10 11 12 13 14 15 
16 17 18 19 20 21 22 23 
24 25 26 27 28 29 30 31 
32 33 34 35 36 37 38 39 
40 41 42 43 44 45 46 47 
48 49 50 51 52 53 54 55 
56 57 58 59 60 61 62 63 

實際返回:

0 1 2 3 0 0 0 0 
8 9 10 11 0 0 0 0 
16 17 18 19 0 0 0 0 
24 25 26 27 0 0 0 0 
0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 

如果我改變的位移爲0,1,32 36我得到如下:

分佈式矢量:

0 1 2 3 4 5 6 7 
8 9 10 11 12 13 14 15 
16 17 18 19 20 21 22 23 
24 25 26 27 28 29 30 31 
32 33 34 35 36 37 38 39 
40 41 42 43 44 45 46 47 
48 49 50 51 52 53 54 55 
56 57 58 59 60 61 62 63 

實際返回:

爲什麼1位移轉換成28在返回的向量?這讓我困惑。

+1

如果你運行這個用的valgrind(-np的mpirun的valgrind 4 a.out的),你會看到你覆蓋記憶 - 但是如果你改變create_types最後2行(你會得到預期的行爲)來' MPI_Datatype tmptype; MPI_Type_vector(local_image_size [0],local_image_size [1],image_size [1],MPI_UNSIGNED_CHAR,&tmptype); MPI_Type_create_resized(tmptype,0,sizeof(unsigned char),&recv_type); MPI_Type_commit(&recv_type);'。好奇爲什麼?這是關於位移的單位;參見[這個問題的答案與(http://stackoverflow.com/questions/9269399/sending-blocks-of-2d-array-in-c-using-mpi/9271753)。 – 2014-09-12 15:28:41

回答

2

MPI_GATHERV中的位移以數據類型的範圍爲單位指定。由MPI_Type_vector(local_image_size[0], local_image_size[1], image_size[1], MPI_UNSIGNED_CHAR, &recv_type);創建的數據類型的範圍爲{(local_image_size[0]-1) * image_size[1] + local_image_size[1]} * extent(MPI_UNISIGNED_CHAR)。給出的下列:

local_image_size[0] = 4 
local_image_size[1] = 4 
image_size[1] = 8 
extent(MPI_UNSIGNED_CHAR) = 1 byte 

這導致recv_type(4-1) * 8 + 4或28字節的程度。因此,位移1指定了超過接收緩衝區開始28個字節的位置。

可以通過在MPI_Type_create_resized上強制一個不同的「可見」範圍來「調整」某個類型的大小。正確執行2D分解的整個過程在this answer中有很好的描述。