實現Strassen算法

我想在單個內核上快速地乘上矩陣。我瀏覽了網頁並找到了幾個算法，發現Strassen的算法是唯一的算法，這實際上是由人們實施的。我已經看過幾個例子，並參考了下面的解決方案。我做了一個簡單的基準，它會生成兩個隨機填充的500x500矩陣。斯特拉森算法耗時18秒，高中算法在0.4秒內完成。其他人在實現算法後非常有希望，那麼我的錯在哪裏，我該如何讓它更快？實現Strassen算法

// return C = A * B 
private Matrix strassenTimes(Matrix B, int LEAFSIZE) { 
    Matrix A = this; 
    if (B.M != A.M || B.N != A.N) throw new RuntimeException("Illegal matrix dimensions."); 

    if (N <= LEAFSIZE || M <= LEAFSIZE) { 
     return A.times(B); 
    } 

    // make new sub-matrices 
    int newAcols = (A.N + 1)/2; 
    int newArows = (A.M + 1)/2; 
    Matrix a11 = new Matrix(newArows, newAcols); 
    Matrix a12 = new Matrix(newArows, newAcols); 
    Matrix a21 = new Matrix(newArows, newAcols); 
    Matrix a22 = new Matrix(newArows, newAcols); 

    int newBcols = (B.N + 1)/2; 
    int newBrows = (B.M + 1)/2; 
    Matrix b11 = new Matrix(newBrows, newBcols); 
    Matrix b12 = new Matrix(newBrows, newBcols); 
    Matrix b21 = new Matrix(newBrows, newBcols); 
    Matrix b22 = new Matrix(newBrows, newBcols); 


    for (int i = 1; i <= newArows; i++) { 
     for (int j = 1; j <= newAcols; j++) { 
      a11.setElement(i, j, A.saveGet(i, j)); // top left 
      a12.setElement(i, j, A.saveGet(i, j + newAcols)); // top right 
      a21.setElement(i, j, A.saveGet(i + newArows, j)); // bottom left 
      a22.setElement(i, j, A.saveGet(i + newArows, j + newAcols)); // bottom right 
     } 
    } 

    for (int i = 1; i <= newBrows; i++) { 
     for (int j = 1; j <= newBcols; j++) { 
      b11.setElement(i, j, B.saveGet(i, j)); // top left 
      b12.setElement(i, j, B.saveGet(i, j + newBcols)); // top right 
      b21.setElement(i, j, B.saveGet(i + newBrows, j)); // bottom left 
      b22.setElement(i, j, B.saveGet(i + newBrows, j + newBcols)); // bottom right 
     } 
    } 

    Matrix aResult; 
    Matrix bResult; 

    aResult = a11.add(a22); 
    bResult = b11.add(b22); 
    Matrix p1 = aResult.strassenTimes(bResult, LEAFSIZE); 

    aResult = a21.add(a22); 
    Matrix p2 = aResult.strassenTimes(b11, LEAFSIZE); 

    bResult = b12.minus(b22); // b12 - b22 
    Matrix p3 = a11.strassenTimes(bResult, LEAFSIZE); 

    bResult = b21.minus(b11); // b21 - b11 
    Matrix p4 = a22.strassenTimes(bResult, LEAFSIZE); 

    aResult = a11.add(a12); // a11 + a12 
    Matrix p5 = aResult.strassenTimes(b22, LEAFSIZE); 

    aResult = a21.minus(a11); // a21 - a11 
    bResult = b11.add(b12); // b11 + b12 
    Matrix p6 = aResult.strassenTimes(bResult, LEAFSIZE); 

    aResult = a12.minus(a22); // a12 - a22 
    bResult = b21.add(b22); // b21 + b22 
    Matrix p7 = aResult.strassenTimes(bResult, LEAFSIZE); 

    Matrix c12 = p3.add(p5); // c12 = p3 + p5 
    Matrix c21 = p2.add(p4); // c21 = p2 + p4 

    aResult = p1.add(p4); // p1 + p4 
    bResult = aResult.add(p7); // p1 + p4 + p7 
    Matrix c11 = bResult.minus(p5); 

    aResult = p1.add(p3); // p1 + p3 
    bResult = aResult.add(p6); // p1 + p3 + p6 
    Matrix c22 = bResult.minus(p2); 

    // Grouping the results obtained in a single matrix: 
    int rows = c11.nrRows(); 
    int cols = c11.nrColumns(); 

    Matrix C = new Matrix(A.M, B.N); 
    for (int i = 1; i <= A.M; i++) { 
     for (int j = 1; j <= B.N; j++) { 
      int el; 
      if (i <= rows) { 
       if (j <= cols) { 
        el = c11.get(i, j); 
       } else { 
        el = c12.get(i, j - cols); 
       } 
      } else { 
       if (j <= cols) { 
        el = c21.get(i - rows, j); 
       } else { 
        el = c22.get(i - rows, j - rows); 
       } 
      } 
      C.setElement(i, j, el); 
     } 
    } 
    return C; 
}

小基準具有下面的代碼：

int AM, AN, BM, BN; 
AM = 500; 
AN = BM = 500; 
BN = 500; 
Matrix a = new Matrix(AM, AN); 
Matrix b = new Matrix(BM, BN); 

Random random = new Random(); 

for (int i = 1; i <= AM; i++) { 
    for (int j = 1; j <= AN; j++) { 
     a.setElement(i, j, random.nextInt(20)); 
    } 
} 
for (int i = 1; i <= BM; i++) { 
    for (int j = 1; j <= BN; j++) { 
     b.setElement(i, j, random.nextInt(20)); 
    } 
} 

System.out.println("strassen: A x B"); 
long tijd = System.currentTimeMillis(); 
Matrix c = a.strassenTimes(b); 
System.out.println("time = " + (System.currentTimeMillis() - tijd)); 

System.out.println("normal: A x B"); 
tijd = System.currentTimeMillis(); 
Matrix d = a.times(b); 
System.out.println("time = " + (System.currentTimeMillis() - tijd)); 

System.out.println("nr of different elements = " + c.compare(d));

結果如下：

strassen: A x B 
time = 18372 
normal: A x B 
time = 308 
nr of different elements = 0

我知道這是一個代碼低，但我想，如果你很開心大家幫幫我;）

編輯1： 爲了完整起見，我添加了上面代碼使用的一些方法。

public int get(int r, int c) { 
    if (c > nrColumns() || r > nrRows() || c <= 0 || r <= 0) { 
     throw new ArrayIndexOutOfBoundsException("matrix is of size (" + 
       nrRows() + ", " + nrColumns() + "), but tries to set element(" + r + ", " + c + ")"); 
    } 

    return content[r - 1][c - 1]; 
} 

private int saveGet(int r, int c) { 
    if (c > nrColumns() || r > nrRows() || c <= 0 || r <= 0) { 
     return 0; 
    } 

    return content[r - 1][c - 1]; 
} 

public void setElement(int r, int c, int n) { 
    if (c > nrColumns() || r > nrRows() || c <= 0 || r <= 0) { 
     throw new ArrayIndexOutOfBoundsException("matrix is of size (" + 
       nrRows() + ", " + nrColumns() + "), but tries to set element(" + r + ", " + c + ")"); 
    } 
    content[r - 1][c - 1] = n; 
} 

// return C = A + B 
public Matrix add(Matrix B) { 
    Matrix A = this; 
    if (B.M != A.M || B.N != A.N) throw new RuntimeException("Illegal matrix dimensions."); 
    Matrix C = new Matrix(M, N); 
    for (int i = 0; i < M; i++) { 
     for (int j = 0; j < N; j++) { 
      C.content[i][j] = A.content[i][j] + B.content[i][j]; 
     } 
    } 
    return C; 
}

來源

2015-03-13 martijnn2008

Matrix類在哪裏？我在上面粘貼的代碼中沒有看到它...... – 2015-03-13 20:28:07

所有那些新的Matrix（）實例，在每一層遞歸中都不會很快，我認爲。 – IVlad 2015-03-13 20:30:29

您的Strassen實現從創建新的矩陣，複製矩陣元素和進行遞歸調用中有大量（理論上）不需要的開銷。無論如何，一個高效的Strassen實現只比天真的算法快一點，並且只適用於足夠大的矩陣。 – 2015-03-13 20:30:38

我應該爲Strassen的算法選擇另一個葉子大小。所以我做了一個小實驗。看起來葉片大小256最適合問題中包含的代碼。下面用不同大小的葉子地塊尺寸的隨機矩陣1025 X 1025

leaf size

我比較Strassen's算法與葉大小256瑣碎的算法，矩陣乘法每次用，就看這實際上是一種改進。事實證明這是一種改進，見下面的結果在不同大小的隨機矩陣（以10爲單位，每個大小重複50次）。 matrix size

下面的瑣碎算法矩陣乘法代碼：

// return C = A * B 
public Matrix times(Matrix B) { 
    Matrix A = this; 
    if (A.N != B.M) throw new RuntimeException("Illegal matrix dimensions."); 
    Matrix C = new Matrix(A.M, B.N); 
    for (int i = 0; i < C.M; i++) { 
     for (int j = 0; j < C.N; j++) { 
      for (int k = 0; k < A.N; k++) { 
       C.content[i][j] += (A.content[i][k] * B.content[k][j]); 
      } 
     } 
    } 
    return C; 
}

它仍然認爲可以做到在執行其他改進，但事實證明，葉大小是一個非常重要的因素。所有的實驗都是在Ubuntu 14.04上運行的機器完成的，其規格如下：

CPU: Intel(R) Core(TM) i7-2600K CPU @ 3.40GHz 
Memory: 2 x 4GB DDR3 1333 MHz

來源

2015-04-18 16:07:31 martijnn2008

實現Strassen算法

回答

相關問題