2013-10-18 54 views
0

OpenCL的新手問題:)OpenCL的崩潰我的電腦

我試着寫在OpenCL內核像

__kernel void NLLSQ 
(
    __global double* image, 
    __global double* nllsqResult 
) 
{ 
    //Do some stuff 
} 

,直到我試圖把一個循環的正常工作,即:

__kernel void NLLSQ 
(
    __global double* image, 
    __global double* nllsqResult 
) 
{ 
    for (int i = 0; i < 2; i++) 
    { 
     //Do some stuff 
    } 
} 

這導致我的電腦崩潰顯示器變黑。我認爲問題在於我向顯卡發送了太多的工作。

我完整的代碼看起來像這樣

#ifdef cl_khr_fp64 
    #pragma OPENCL EXTENSION cl_khr_fp64 : enable 
#elif defined(cl_amd_fp64) 
    #pragma OPENCL EXTENSION cl_amd_fp64 : enable 
#else 
    #error "Double precision doubleing point not supported by OpenCL implementation." 
#endif 

int2 clipPixel(int2 coordinate, int width, int height) 
{ 
    coordinate.x = max(0, coordinate.x); 
    coordinate.y = max(0, coordinate.y); 
    coordinate.x = min(width, coordinate.x); //1911 
    coordinate.y = min(height, coordinate.y); //1071 
    return coordinate; 
} 

int Coord2Index(int X, int Y, int width) 
{ 
    return (width * Y) + X; 
} 



//2D Gaussian 'bubble' Function 
double f(int x, int y, double a, double b, double s) 
{ 
    return a + b*exp(-(x*x+y*y)/(s*s)); 
} 

// (∂f/∂b) 
double dfdb(int x, int y, double s) 
{ 
    return exp(-(x*x+y*y)/(s*s)); 
} 

// (∂f/∂σ) 
double dfds(int x, int y, double b, double s) 
{ 
    double v = -(x*x + y*y); 
    return b * exp(v/(s*s))*-2*v/(s*s*s); 
} 

//Non-Linear Least Squares 
__kernel void NLLSQ 
(
    __global double* image, 
    __global double* nllsqResult 
) 
{ 
    const int x = get_global_id(0); 
    const int y = get_global_id(1); 

    int index = Coord2Index(x, y, 1912); 
    int jacIndex = 0; 
    int dyIndex = 0; 
    int indexRslt = Coord2Index(x, y, 1904); 

    double dY[81]; 
    double J[81][3]; 
    double JTJ[3][3]; 
    double3 B = (double3)(0, 1, 1); //initial guess 
    double JTdY[3]; 

    //Creates the dY vector 
    for (int j = -4; j <= 4; j++) 
    { 
     for (int i = -4; i <= 4; i++) 
     { 
      dY[dyIndex] = image[index] - f(i, j, B.x, B.y, B.z); 
      dyIndex = dyIndex + 1; 
     } 
    } 

    //Creates the Jacobian 
    for (int j = -4; j <= 4; j++) 
    { 
     for (int i = -4; i <= 4; i++) 
     { 
      index = Coord2Index(x + i + 4, y + j + 4, 1912); 

      J[jacIndex][0] = 1; 
      J[jacIndex][1] = dfdb(i, j, B.z); 
      J[jacIndex][2] = dfds(i, j, B.y, B.z); 

      jacIndex = jacIndex + 1; 
     } 
    } 

    //Now to solve (JT * J) * ΔB = JT * ΔY for ΔB .... 

    JTdY[0] = 0; 
    JTdY[1] = 0; 
    JTdY[2] = 0; 
    //Create JTJ 
    for (int i = 0; i < 81; i++) 
    { 
     JTJ[0][0] = J[i][0] * J[i][0]; 
     JTJ[0][1] = J[i][0] * J[i][1]; 
     JTJ[0][2] = J[i][0] * J[i][2]; 

     JTJ[1][0] = J[i][1] * J[i][0]; 
     JTJ[1][1] = J[i][1] * J[i][1]; 
     JTJ[1][2] = J[i][1] * J[i][2]; 

     JTJ[2][0] = J[i][2] * J[i][0]; 
     JTJ[2][1] = J[i][2] * J[i][1]; 
     JTJ[2][2] = J[i][2] * J[i][2]; 

     //JT * ΔY 
     JTdY[0] = J[i][0] * dY[i]; 
     JTdY[1] = J[i][1] * dY[i]; 
     JTdY[2] = J[i][2] * dY[i]; 
    } 

    //TO DO: might have to make this next part more general if I decide not to use a 9x9 bubble template size 
    // Also not sure what to do when det(A) = 0 (is that even possible?) 

    // (JT * J) * ΔB = JT * ΔY is a system of the form Ax = b 
    // A = (JT * J), ΔB = x, JT * ΔY = b 
    //Solve using cramer's rule http://en.wikipedia.org/wiki/Cramer%27s_rule 
    // xi = det(Ai)/det(A) 

    //determinant of A 
    double detA = 
    JTJ[0][0] * (JTJ[1][1] * JTJ[2][2] - JTJ[1][2] * JTJ[2][1]) - 
    JTJ[0][1] * (JTJ[1][0] * JTJ[2][2] - JTJ[1][2] * JTJ[2][0]) + 
    JTJ[0][2] * (JTJ[1][0] * JTJ[2][1] - JTJ[1][1] * JTJ[2][0]) ; 

    double detA1 = 
     JTdY[0] * (JTJ[1][1] * JTJ[2][2] - JTJ[1][2] * JTJ[2][1]) - 
    JTJ[0][1] * ( JTdY[1] * JTJ[2][2] - JTJ[1][2] * JTdY[2] ) + 
    JTJ[0][2] * ( JTdY[1] * JTJ[2][1] - JTJ[1][1] * JTdY[2] ) ; 

    double detA2 = 
    JTJ[0][0] * (JTdY[1] * JTJ[2][2] - JTJ[1][2] * JTdY[2] ) - 
    JTdY[0] * (JTJ[1][0] * JTJ[2][2] - JTJ[1][2] * JTJ[2][0]) + 
    JTJ[0][2] * (JTJ[1][0] * JTdY[2] - JTdY[1] * JTJ[2][0]) ; 

    double detA3 = 
    JTJ[0][0] * (JTJ[1][1] * JTdY[2] - JTdY[1] * JTJ[2][1]) - 
    JTJ[0][1] * (JTJ[1][0] * JTdY[2] - JTdY[1] * JTJ[2][0]) + 
    JTdY[0] * (JTJ[1][0] * JTJ[2][1] - JTJ[1][1] * JTJ[2][0]) ; 

    // B(k+1) = B(k) + ΔB 
    B.x = B.x + (detA1/detA); 
    B.y = B.y + (detA2/detA); 
    B.z = B.z + (detA3/detA); 

    nllsqResult[indexRslt] = B.z; 
} 

我想用一個for循環這樣

//Non-Linear Least Squares 
__kernel void NLLSQ 
(
    __global double* image, 
    __global double* nllsqResult 
) 
{ 
    const int x = get_global_id(0); 
    const int y = get_global_id(1); 

    int index = Coord2Index(x, y, 1912); 
    int jacIndex = 0; 
    int dyIndex = 0; 
    int indexRslt = Coord2Index(x, y, 1904); 

    double dY[81]; 
    double J[81][3]; 
    double JTJ[3][3]; 
    double3 B = (double3)(0, 1, 1); //initial guess 
    double JTdY[3]; 

    //Creates the dY vector 
    for (int j = -4; j <= 4; j++) 
    { 
     for (int i = -4; i <= 4; i++) 
     { 
      dY[dyIndex] = image[index] - f(i, j, B.x, B.y, B.z); 
      dyIndex = dyIndex + 1; 
     } 
    } 

     for (int iters = 0; iters < 10; iters++) //FOR LOOP ADDED HERE 
     { 
     jacIndex = 0; 
    //Creates the Jacobian 
    for (int j = -4; j <= 4; j++) 
    { 
     for (int i = -4; i <= 4; i++) 
     { 
      index = Coord2Index(x + i + 4, y + j + 4, 1912); 

      J[jacIndex][0] = 1; 
      J[jacIndex][1] = dfdb(i, j, B.z); 
      J[jacIndex][2] = dfds(i, j, B.y, B.z); 

      jacIndex = jacIndex + 1; 
     } 
    } 

    //Now to solve (JT * J) * ΔB = JT * ΔY for ΔB .... 

    JTdY[0] = 0; 
    JTdY[1] = 0; 
    JTdY[2] = 0; 
    //Create JTJ 
    for (int i = 0; i < 81; i++) 
    { 
     JTJ[0][0] = J[i][0] * J[i][0]; 
     JTJ[0][1] = J[i][0] * J[i][1]; 
     JTJ[0][2] = J[i][0] * J[i][2]; 

     JTJ[1][0] = J[i][1] * J[i][0]; 
     JTJ[1][1] = J[i][1] * J[i][1]; 
     JTJ[1][2] = J[i][1] * J[i][2]; 

     JTJ[2][0] = J[i][2] * J[i][0]; 
     JTJ[2][1] = J[i][2] * J[i][1]; 
     JTJ[2][2] = J[i][2] * J[i][2]; 

     //JT * ΔY 
     JTdY[0] = J[i][0] * dY[i]; 
     JTdY[1] = J[i][1] * dY[i]; 
     JTdY[2] = J[i][2] * dY[i]; 
    } 

    //TO DO: might have to make this next part more general if I decide not to use a 9x9 bubble template size 
    // Also not sure what to do when det(A) = 0 (is that even possible?) 

    // (JT * J) * ΔB = JT * ΔY is a system of the form Ax = b 
    // A = (JT * J), ΔB = x, JT * ΔY = b 
    //Solve using cramer's rule http://en.wikipedia.org/wiki/Cramer%27s_rule 
    // xi = det(Ai)/det(A) 

    //determinant of A 
    double detA = 
    JTJ[0][0] * (JTJ[1][1] * JTJ[2][2] - JTJ[1][2] * JTJ[2][1]) - 
    JTJ[0][1] * (JTJ[1][0] * JTJ[2][2] - JTJ[1][2] * JTJ[2][0]) + 
    JTJ[0][2] * (JTJ[1][0] * JTJ[2][1] - JTJ[1][1] * JTJ[2][0]) ; 

    double detA1 = 
     JTdY[0] * (JTJ[1][1] * JTJ[2][2] - JTJ[1][2] * JTJ[2][1]) - 
    JTJ[0][1] * ( JTdY[1] * JTJ[2][2] - JTJ[1][2] * JTdY[2] ) + 
    JTJ[0][2] * ( JTdY[1] * JTJ[2][1] - JTJ[1][1] * JTdY[2] ) ; 

    double detA2 = 
    JTJ[0][0] * (JTdY[1] * JTJ[2][2] - JTJ[1][2] * JTdY[2] ) - 
    JTdY[0] * (JTJ[1][0] * JTJ[2][2] - JTJ[1][2] * JTJ[2][0]) + 
    JTJ[0][2] * (JTJ[1][0] * JTdY[2] - JTdY[1] * JTJ[2][0]) ; 

    double detA3 = 
    JTJ[0][0] * (JTJ[1][1] * JTdY[2] - JTdY[1] * JTJ[2][1]) - 
    JTJ[0][1] * (JTJ[1][0] * JTdY[2] - JTdY[1] * JTJ[2][0]) + 
    JTdY[0] * (JTJ[1][0] * JTJ[2][1] - JTJ[1][1] * JTJ[2][0]) ; 

    // B(k+1) = B(k) + ΔB 
    B.x = B.x + (detA1/detA); 
    B.y = B.y + (detA2/detA); 
    B.z = B.z + (detA3/detA); 
     } 

    nllsqResult[indexRslt] = B.z; 
} 
+0

你在Windows或Linux?以及您正在使用哪種OpenCL實現? –

+0

我正在使用windows – sav

+0

我使用cloo for C# – sav

回答

1

它接縫你的內核需要從長,超時檢測和恢復機制Windows開始運行。您可以通過更改註冊表值來禁用TDR,如下所述:MSDN但是,如果您的TDS可用,您的屏幕可能會掛起,直到內核計算完成。如果你的內核有一個infinit循環,那麼沒有任何東西可以阻止它,並且既然你沒有任何迴應,你的計算機將會非常困難。很好,有電源和重置按鈕。

+0

此刻我必須重新啓動計算機。我猜TDR的'恢復'部分不會在踢:) – sav

相關問題