2012-11-14 37 views
2

我正在嘗試使用opemMP做一個「諧波進展總和」問題的並行版本。 但輸出依賴於輸入而不同。 (並行和串行)諧波進展總和C++ openMP

程序:

#include "stdafx.h" 
#include <iostream> 
#include <sstream> 
#include <omp.h> 
#include <time.h> 

#define d 10 //Numbers of Digits (Example: 5 => 0,xxxxx) 
#define n 1000 //Value of N (Example: 5 => 1/1 + 1/2 + 1/3 + 1/4 + 1/5) 

using namespace std; 

void HPSSeguencial(char* output) { 
    long unsigned int digits[d + 11]; 

    for (int digit = 0; digit < d + 11; ++digit) 
     digits[digit] = 0; 

    for (int i = 1; i <= n; ++i) { 
     long unsigned int remainder = 1; 
     for (long unsigned int digit = 0; digit < d + 11 && remainder; ++digit) { 
      long unsigned int div = remainder/i; 
      long unsigned int mod = remainder % i; 
      digits[digit] += div; 
      remainder = mod * 10; 
     } 
    } 


    for (int i = d + 11 - 1; i > 0; --i) { 
     digits[i - 1] += digits[i]/10; 
     digits[i] %= 10; 
    } 
    if (digits[d + 1] >= 5) { 
     ++digits[d]; 
    } 


    for (int i = d; i > 0; --i) { 
     digits[i - 1] += digits[i]/10; 
     digits[i] %= 10; 
    } 
    stringstream stringstreamA; 
    stringstreamA << digits[0] << ","; 


    for (int i = 1; i <= d; ++i) { 
     stringstreamA << digits[i]; 
    } 
    string stringA = stringstreamA.str(); 
    stringA.copy(output, stringA.size()); 
} 

void HPSParallel(char* output) { 
    long unsigned int digits[d + 11]; 

    for (int digit = 0; digit < d + 11; ++digit) 
     digits[digit] = 0; 

    int i; 
    long unsigned int digit; 
    long unsigned int remainder; 
    #pragma omp parallel for private(i, remainder, digit) 
    for (i = 1; i <= n; ++i) { 
     remainder = 1; 
     for (digit = 0; digit < d + 11 && remainder; ++digit) { 
      long unsigned int div = remainder/i; 
      long unsigned int mod = remainder % i; 
      digits[digit] += div; 
      remainder = mod * 10; 
     } 
    } 

    for (int i = d + 11 - 1; i > 0; --i) { 
     digits[i - 1] += digits[i]/10; 
     digits[i] %= 10; 
    } 
    if (digits[d + 1] >= 5) { 
     ++digits[d]; 
    } 

    for (int i = d; i > 0; --i) { 
     digits[i - 1] += digits[i]/10; 
     digits[i] %= 10; 
    } 
    stringstream stringstreamA; 
    stringstreamA << digits[0] << ","; 

    for (int i = 1; i <= d; ++i) { 
     stringstreamA << digits[i]; 
    } 
    string stringA = stringstreamA.str(); 
    stringA.copy(output, stringA.size()); 
} 

int main() { 
    //Sequential Method 
    cout << "Sequential Method: " << endl; 
    char outputSeguencial[d + 10]; 
    HPSSeguencial(outputSeguencial); 
    cout << outputSeguencial << endl; 

    //Cleaning vector 
    string stringA = ""; 
    stringA.copy(outputSeguencial, stringA.size()); 

    //Parallel Method 
    cout << "Parallel Method: " << endl; 
    char outputParallel[d + 10]; 
    HPSParallel(outputParallel); 
    cout << outputParallel << endl; 

    system("PAUSE"); 
    return 0; 
} 

實例:

輸入:

#define d 10 
#define n 1000 

輸出:

Sequential Method: 
7,4854708606╠╠╠╠╠╠╠╠╠╠╠╠ 
Parallel Method: 
6,6631705861╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠ÇJ^ 

輸入:

#define d 12 
#define n 7 

輸出:

Sequential Method: 
2,592857142857╠╠╠╠╠╠╠╠╠╠╠╠╠╠ÀÂ♂ü─¨@ 
Parallel Method: 
2,592857142857╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠ÇJJ 

個問候

Pastecode更新digits陣列時

http://pastecode.org/index.php/view/62768285

回答

2

你的線程在彼此的腳趾步驟。因此,一些附加信息會丟失,並且會得到虛假結果(幾乎可以肯定,在不同的運行中會得到不同的結果)。

您必須將寫入同步到digits,例如,與原子(或關鍵)部分:

// ... <snip> 
#pragma omp parallel for private(i, remainder, digit) 
for (i = 1; i <= n; ++i) { 
    remainder = 1; 
    for (digit = 0; digit < d + 11 && remainder; ++digit) { 
     long unsigned int div = remainder/i; 
     long unsigned int mod = remainder % i; 
     #pragma omp atomic  // <- HERE, could also be #pragma omp critical 
     digits[digit] += div; 
     remainder = mod * 10; 
    } 
} 
// <snip> ... 

這樣一次只有一個線程可以更新數組。對於這樣的任務來說,這可能會抵消在多個線程中分割任務的任何收益。

+0

一些編譯器(閱讀GCC)在x86上用'lock addl'實現原子'+ ='。關鍵部分通過互斥體實現,包括運行時函數調用,因此比原子方法慢。實際上,由於內部循環是規則的,所以線程會在幾次迭代之後延遲同步,並且不會有等待。緩存一致性會損害並行增益。 –

1

由於Daniel Fischer指出,你有寫衝突,但你可以避免它比omp critical節更優雅,例如,通過給每個線程它自己的副本digits,並在循環結束時將它們聚合在一起。

+1

更優雅的解決方案是在Fortran中重寫代碼,因爲OpenMP支持通過Fortran中的數組變量進行減少:) –