2012-06-15 113 views
0

我正在使用開放MP來加速我的程序中的通量計算。我基本上想讓OpenMP同時執行這兩種左右流量計算。但相反,以下代碼需要更多的時間用於#pragma指令。我修改了什麼才能正確使用它?如何讓不同的OpenMP線程執行不同的任務

#pragma omp parallel num_threads(2) 
{ 

#pragma omp single 
{//first condition 
//cerr<<"Executed thread 0"<<endl; 
     if ((fabs(lcellMach-1.0)<EPSILON) || ((lcellMach-1.0) > 0.0)){//purpose of Epsilon!!!! 
       FluxP[0] = rhol * vnl; 
       FluxP[1] = rhol * ul * vnl + Pl*nx; 
       FluxP[2] = rhol * vl * vnl + Pl*ny; 
       FluxP[3] = rhol * wl * vnl + Pl*nz; 
       FluxP[4] = rhol * ((GAMMA * Pl/(rhol * (GAMMA-1.0))) + ((ul*ul + vl*vl + wl*wl)/2.0)) * vnl; 
     }else if ((fabs(lcellMach+1.0)<EPSILON) || ((lcellMach+1.0) < 0.0)){ 
       FluxP[0] = FluxP[1] = FluxP[2] = FluxP[3] = FluxP[4] = 0.0;// If flow direction is opposite the Flux + is zero 
     }else { 
       double ql = (ul*ul + vl*vl + wl*wl);// how did this come 
       FluxP[0] = rhol * lcell_a * (lcellMach+1.0)*(lcellMach+1.0)/4.0; 
       FluxP[1] = FluxP[0] * (ul + (nx*(0.0-vnl + 2.0*lcell_a)/GAMMA)); 
       FluxP[2] = FluxP[0] * (vl + (ny*(0.0-vnl + 2.0*lcell_a)/GAMMA)); 
       FluxP[3] = FluxP[0] * (wl + (nz*(0.0-vnl + 2.0*lcell_a)/GAMMA)); 
       FluxP[4] = FluxP[0] * ( ((ql - vnl*vnl)/2.0) + (((GAMMA-1.0)*vnl + 2.0*lcell_a)*((GAMMA-1.0)*vnl + 2.0*lcell_a)/(2.0*(GAMMA*GAMMA-1.0))) ); 
     } 
}//end of 1st 
#pragma omp single 
{//second condition 
//cerr<<"Executed thread 1"<<endl; 
     if ((fabs(rcellMach+1.0)<EPSILON) || ((rcellMach+1.0) < 0.0)) { 
       FluxM[0] = rhor * vnr; 
       FluxM[1] = rhor * ur * vnr + Pr*nx; 
       FluxM[2] = rhor * vr * vnr + Pr*ny; 
       FluxM[3] = rhor * wr * vnr + Pr*nz; 
       FluxM[4] = rhor * ((GAMMA * Pr/(rhor * (GAMMA-1.0))) + ((ur*ur + vr*vr + wr*wr)/2.0)) * vnr; 
     }else if ((fabs(rcellMach-1.0)<EPSILON) || ((rcellMach-1.0) > 0.0)) { 
       FluxM[0] = FluxM[1] = FluxM[2] = FluxM[3] = FluxM[4] = 0.0; 
     }else { 
       tempFlux[0] = rhor * vnr; 
       tempFlux[1] = rhor * ur * vnr + Pr*nx; 
       tempFlux[2] = rhor * vr * vnr + Pr*ny; 
       tempFlux[3] = rhor * wr * vnr + Pr*nz; 
       tempFlux[4] = rhor * ((GAMMA * Pr/(rhor * (GAMMA-1.0))) + ((ur*ur + vr*vr + wr*wr)/2.0)) * vnr; 

       double qr = (ur*ur + vr*vr + wr*wr); 
       tempFluxP[0] = rhor * rcell_a * (rcellMach+1.0)*(rcellMach+1.0)/4.0; 
       tempFluxP[1] = tempFluxP[0] * (ur + (nx*(0.0-vnr + 2.0*rcell_a)/GAMMA)); 
       tempFluxP[2] = tempFluxP[0] * (vr + (ny*(0.0-vnr + 2.0*rcell_a)/GAMMA)); 
       tempFluxP[3] = tempFluxP[0] * (wr + (nz*(0.0-vnr + 2.0*rcell_a)/GAMMA)); 
       tempFluxP[4] = tempFluxP[0] * ( ((qr - vnr*vnr)/2.0) + (((GAMMA-1.0)*vnr + 2.0*rcell_a)*((GAMMA-1.0)*vnr + 2.0*rcell_a)/(2.0*(GAMMA*GAMMA-1.0))) ); 

       for (int j=0; j<O; j++) FluxM[j] = tempFlux[j] - tempFluxP[j]; 
     } 
} 
}//pragma 

需要緊急幫助。謝謝。

+2

#pragma omp single:omp single指令標識必須由**單個可用線程**運行的一段代碼。 – nhahtdh

+0

那我在這裏做什麼?我需要並行執行單個部分內的兩個代碼塊。 –

+0

這可以做到; @ nhahtdh的評論描述了這個問題,Hristo的答案正確地描述瞭如何去做你正在做的事情。但是這似乎是一個過分低層次的並行化的地方。據推測,在一些更高級別的代碼中,你正在對許多單元進行這種通量計算,對嗎?這個循環似乎是一個更有利可圖的並行化目標。 –

回答

1

你需要的是sections結構:

#pragma omp parallel sections num_threads(2) 
{ 
    #pragma omp section 
    { 
     ... code that updates FluxP ... 
    } 
    #pragma omp section 
    { 
     ... code that updates FluxM ... 
    } 
} 

,但似乎你的代碼並不像它會花費很多時間做計算(無大for環內例如),所以開銷的OpenMP將會比計算時間節省更多的時間,因此並行版本很可能會比串行執行更慢。

相關問題