2014-09-12 35 views
-1

我需要轉換這個程序運行一個迭代,將迭代步驟分成4個線程。如果迭代是n,那麼我使用4個線程執行它。該程序平均需要4.7秒才能運行。所有4個線程都可以訪問這個總和,並且在更新時有一個問題。對於pi的值,我得到的是1.5而不是3.1457,而且線程不會減少時間。請幫我錯誤轉換pi的例子多線程來提高速度在c + +

#include "stdafx.h" 
#include <iostream> 
#include <chrono> 
#include <thread> 
#include <functional> 
#include <mutex> 
//std::mutex m; 
long num_rects = 100000000; 

struct params 
{ 
    int start; 
    int end; 
    double mid; 
    double height; 
    double width; 
    params(int st,int en) 
    { 
     start = st; 
     end = en; 
     width = 1.0/(double)num_rects; 
    } 

}; 

double sum = 0.0; 


void sub1(params param){ 

    for (int i = param.start; i < param.end; i++) 
    { 
     param.mid = (i + 0.5)*param.width; 
     param.height = 4.0/(1.0 + param.mid*param.mid); 

     //m.lock(); 
     sum += param.height; 
     //m.unlock(); 
    } 
} 


int _tmain(int argc, _TCHAR* argv[]) 
{ 
    int i; 
    double mid, height, width; 
    double area; 

    auto begin = std::chrono::high_resolution_clock::now(); 
    params par(0, num_rects/4); 
    std::thread t(sub1, par); 

    params par1(num_rects/4, num_rects/2); 
    std::thread t1(sub1, par1); 

    params par2(num_rects/2, (num_rects *3)/ 4); 
    std::thread t2(sub1, par2); 

    params par3((num_rects * 3)/4, num_rects); 
    std::thread t3(sub1, par3); 

    t.join(); 
    t1.join(); 
    t2.join(); 
    t3.join(); 

    /* 
    sub1(par); 
    sub1(par1); 
    sub1(par2); 
    sub1(par3); 
    */ 


    width = 1.0/(double)num_rects; 
    area = sum*width; 
    std::cout << area << std::endl; 
    auto end = std::chrono::high_resolution_clock::now(); 
    std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() << "ms" << std::endl; 
    std::cin.get(); 
    return 0; 
} 
+0

你在'sub1'註釋掉周圍'sum'鎖。當鎖定到位時你會得到正確的答案嗎? – dohashi 2014-09-12 14:47:04

+0

隨着鎖定代碼變得非常慢。 – ram123 2014-09-12 16:06:55

+0

你是否第一次閱讀[pi]上的wikipage(https://en.wikipedia.org/wiki/Pi)?你知道[bignums](http://en.wikipedia.org/wiki/Bignum)嗎? [GMPlib](http://gmplib.org/)? F.Bellard關於[pi計算]的頁面(http://bellard.org/pi/)??? – 2014-09-13 10:24:10

回答

0

您正在從競爭條件的痛苦寫總和,所以2個線程可能會覆蓋和使用不同的值,然後將更新值被覆蓋。

這個改變應該可以工作。

double sub1(params param){ 
    double sum = 0.0; // thread local 

    for (int i = param.start; i < param.end; i++) 
    { 
     param.mid = (i + 0.5)*param.width; 
     param.height = 4.0/(1.0 + param.mid*param.mid); 

     sum += param.height; 
    } 
    return sum; 
} 

#include <future> 
int SubMain() { 
    int i; 
    double mid, height, width; 
    double area; 

    auto begin = std::chrono::high_resolution_clock::now(); 
    params par(0, num_rects/4); 
    std::future<double> fut1 = std::async (sub1, par); 

    params par1(num_rects/4, num_rects/2); 
    std::future<double> fut2 = std::async (sub1, par1); 

    params par2(num_rects/2, (num_rects *3)/ 4); 
    std::future<double> fut3 = std::async (sub1, par2); 

    params par3((num_rects * 3)/4, num_rects); 
    std::future<double> fut4 = std::async (sub1, par3); 

    sum = fut1.get() + fut2.get() + fut3.get() + fut4.get(); 

    width = 1.0/(double)num_rects; 
    area = sum*width; 
    std::cout << area << std::endl; 
    auto end = std::chrono::high_resolution_clock::now(); 
    std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() << "ms" << std::endl; 
    std::cin.get(); 
    return 0; 
} 
+0

在鎖定代碼中更新總和時,高度值發生變化會導致死鎖情況。謝謝它的作品。此外,我會嘗試將線程增加到最佳狀態。現在領帶減少了1.6秒。現在需要3秒。 – ram123 2014-09-12 16:31:20

+0

您是否正在使用完全優化運行它?我只有848ms。 – Surt 2014-09-12 16:46:44

+0

是的,我正在調試模式下運行 – ram123 2014-09-12 17:04:44

0

做了一些chnages到蘇爾特的代碼,這是最終的優化版本

double sub1(params param){ 
     double sum = 0.0; // thread local 

     for (int i = param.start; i < param.end; i++) 
     { 
      param.mid = (i + 0.5)*param.width; 
      param.height = 4.0/(1.0 + param.mid*param.mid); 

      sum += param.height; 
     } 
     return sum; 
    } 

#include <future> 
#include <vector> 
    int SubMain() { 
     int i; 
     double mid, height, width; 
     double area; 

     auto begin = std::chrono::high_resolution_clock::now(); 

     std::vector<std::future<double>> futures; 
     double k = 0; 
     for (int j = 0; j < 4; j++) 
     { 

      params par(num_rects *k, num_rects *(k + 0.25)); 
      k += 0.25; 
      futures.push_back(std::async(sub1, par));   

     } 

     for (std::vector<std::future<double>> ::iterator it = futures.begin(); it != futures.end(); it++) 
     { 
      sum += it->get(); 
     } 

    /* params par(0, num_rects/4); 
     std::future<double> fut1 = std::async(sub1, par); 

     params par1(num_rects/4, num_rects/2); 
     std::future<double> fut2 = std::async(sub1, par1); 

     params par2(num_rects/2, (num_rects * 3)/4); 
     std::future<double> fut3 = std::async(sub1, par2); 

     params par3((num_rects * 3)/4, num_rects); 
     std::future<double> fut4 = std::async(sub1, par3); 

     sum = fut1.get() + fut2.get() + fut3.get() + fut4.get();*/ 




     width = 1.0/(double)num_rects; 
     area = sum*width; 
     std::cout << area << std::endl; 
     auto end = std::chrono::high_resolution_clock::now(); 
     std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() << "ms" << std::endl; 
     std::cin.get(); 
     return 0; 
    }