2016-11-16 33 views
0

我正在寫一個基於圖的反向傳播神經網絡,作爲一個個人項目。仍然在前進的道路上。它編譯。一半時間成功運行,在一半時間的最後一步崩潰。它似乎在一些垃圾收集步驟中死去。我是虛擬函數和static_cast的新手,所以我想知道這些部分是否應該受到指責。 GDB說:「編程接收到的信號SIGABRT,中止0x00000000100404740 in __gnu_cxx :: new_allocator :: deallocate(double *,unsigned long)()」間歇性「排出中止的核心」。也許static_cast的錯?

構成代碼前半部分的函數可能不是責備,因爲他們在我的神經網絡(沒有圖表)的更簡單的舊版本中工作。我敢打賭它是在某個地方的結構中。

更新:如果我使用123而不是基於時間的隨機種子生成隨機數生成,它隨時都會運行。種子= 124每次都失敗。爲了保持恆定的權重,去除隨機性也允許它每次都運行。我很困惑!

#include <bits/stdc++.h> 
using namespace std; 

#define p(x) cout << #x << " = "<< x<< endl 
#define min(a,b) a<b ? a : b 
typedef vector<double> d1; 
typedef vector<d1> d2; 
typedef vector<d2> d3; 
typedef vector<int> i1; 

int argmax(d1 x){ 
    p(x.size()); 
    int maxIndex=0; 
    double maxValue=x[0]; 
    for (int i=1; i<x.size(); i++){ 
    if (x[i] > maxValue){ 
     maxValue = x[i]; 
     maxIndex = i; 
    } 
    } 
    return maxIndex; 
} 

d1 zeros(int n){ 
    return d1(n); 
} 

d2 zeros(int rows, int cols){ 
    return d2(rows, d1(cols, 0)); 
} 

d3 zeros(int x, int rows, int cols){ 
    return d3(x, d2(rows, d1(cols, 0))); 
} 

void print(d1 x){ 
    for (double d: x) 
    cout << d << endl; 
    cout << endl; 
} 

void print(d2 x){ 
    for (auto row: x){ 
    for (double d: row){ 
     cout << d << " "; 
    } 
    cout << endl; 
    } 
    cout << endl; 
} 

void print(d3 x){ 
    for (d2 X: x) 
    print(X); 
} 



void toRank2(d1&x, int rows, d2& y){ 
    for (int i=0; i<x.size()/rows; i++){ 
    y.emplace_back(); 
    for (int row=0; row<rows; row++){ 
     y[i].push_back(x[i*rows+row]); 
    } 
    } 
} 

void toRank3(d1& x, int rows, int cols, d3& y){ 
    for (int i=0; i<x.size()/rows/cols; i++){ 
    y.emplace_back(); 
    for (int row=0; row<rows; row++){ 
     y[i].emplace_back(); 
     for (int col=0; col<cols; col++){ 
     y[i][row].push_back(x[i*rows*cols+row*cols+col]); 
     } 
    } 
    } 
} 

d1 getRandomDoubles(int size, double mean=0, double standard_deviation=1){ 
    static normal_distribution<double> distribution(mean, standard_deviation); 
    int seed=time(NULL); 
    static default_random_engine generator(seed); 
    d1 data(size); 
    generate(data.begin(), data.end(), []() { return distribution(generator); }); 
    return data; 
} 

d2 getRandomDoubles(int rows, int cols, double mean=0, double standard_deviation=1){ 
    d1 d = getRandomDoubles(rows*cols, mean, standard_deviation); 
    d2 e; 
    toRank2(d, cols, e); 
    return e; 
} 

d3 getRandomDoubles(int depth, int rows, int cols, double mean=0, double standard_deviation=1){ 
    d1 d = getRandomDoubles(depth*rows*cols, mean, standard_deviation);; 
    d3 e; 
    toRank3(d, rows, cols, e); 
    return e; 
} 

struct Node{ 
    vector<Node*> parents, children; 
    bool ready=false; 
    // 
    // bool check_ready(){ 
    // for (Node* n: parents) 
    // if (!n->check_ready()) 
    // return false; 
    // return true; 
    // } 
    // 
    void add_child(Node& n){ 
    children.push_back(&n); 
    n.parents.push_back(this); 
    } 

    void forward_propagate(){ 
    cout << "starting r2 forward" <<endl; 
    // if (parents.size()==0 || updated_parents == parents.size()-1) 
    for (Node* n: children){ 
     cout << "loop" << endl; 
     n->update_state(); 
     // cout << "root child forward" << endl; 
    } 
    cout << "exiting r2 forward" << endl; 
    //updated_parents++; 
    } 

    virtual void update_state(){ 
    //if (parents.size()==0 || updated_parents == parents.size() - 1) 
    forward_propagate(); 
    } 
}; 

struct r1:Node{ 
    vector<double> state; 
    int r; 

    r1(){} 

    r1(int R){ 
    r=R; 
    state = vector<double>(r); 
    } 
}; 

struct r2:Node{ 
    vector<vector<double>> state; 
    int r,c; 

    r2(){} 
    r2(int R, int C){ 
    r=R; 
    c=C; 
    state = zeros(r, c); 
    } 
}; 

struct r3:Node{ 
    d3 state; 
    int r, c, d; 
    r3(){} 
    r3(int R, int C, int D){ 
    r=R; 
    c=C; 
    d=D; 
    state = zeros(R,C,D); 
    } 
}; 

struct MatrixProduct1_1: r1{ 
    MatrixProduct1_1(int n):r1(n){} 

    void update_state() override{ 
    cout << "mat11" << endl; 
    d2& W = static_cast<r2*>(parents[0])->state; 
    d1& x = static_cast<r1*>(parents[1])->state; 
    state = zeros(r); 
    for (int i=0; i<W.size(); i++) 
    for (int j=0; j<W[0].size(); j++) 
    state[i] += W[i][j]*x[j]; 
    forward_propagate(); 
    } 
}; 

struct MatrixProduct2_1: r1{ 
    MatrixProduct2_1(int n):r1(n){} 

    void update_state() override{ 
    cout << "matt21" << endl; 
    d3& W = static_cast<r3*>(parents[0])->state; 
    d2& x = static_cast<r2*>(parents[1])->state; 
    state = zeros(r); 
    for (int i=0; i<W.size(); i++) 
    for (int j=0; j<W[0].size(); j++) 
    for (int k=0; k<W[0][0].size(); k++) 
    state[k] += W[i][j][k]*x[i][j]; 
    forward_propagate(); 
    } 
}; 

struct Convolution: r2{ 
    Convolution(int r, int c): r2(r, c){} 
    void update_state() override{ 
    cout << "convolving" << endl; 
    state = zeros(r, c); 
    d2& W = static_cast<r2*>(parents[0])->state; 
    d2& x = static_cast<r2*>(parents[1])->state; 

    int wCenterX = W[0].size()/2; 
    int wCenterY = W.size()/2; 
    int rows = x.size(), cols = x[0].size(); 
    int wRows = W.size(), wCols = W[0].size(); 

    //#pragma omp parallel for 
    for(int i=0; i < rows; i++) 
    for(int j=0; j < cols; j++) 
    for(int m=0; m < W.size(); m++){ 
     int mm = W.size() - 1 - m; 
     for(int n=0; n < wCols; n++){ 
     int nn = wCols - 1 - n; 
     int ii = i + (m - wCenterY); 
     int jj = j + (n - wCenterX); 
     if (ii >= 0 && ii < rows && jj >= 0 && jj < cols) 
     state[i][j] += x[ii][jj] * W[mm][nn]; 
     } 
    } 
    forward_propagate(); 
    } 
}; 


struct RELU: r2{ 
    RELU(int r, int c):r2(r, c){} 
    void update_state() override{ 
    cout << "relu2" << endl; 
    state = zeros(r,c); 
    d2& x = static_cast<r2*>(parents[0])->state; 
    for (int i=0; i<state.size(); i++) 
    for (int j=0; j<state[0].size(); j++) 
    if (x[i][j] > 0) 
    state[i][j] = x[i][j]; 
    forward_propagate(); 
    } 
}; 

struct Softmax: r1{ 
    Softmax(int r):r1(r){} 
    void update_state() override{ 
    cout << "softmax" << endl; 
    state = zeros(r); 
    p(parents.size()); 
    d1& x = static_cast<r1*>(parents[0])->state; 
    cout << "got state" << endl; 
    //p(x.size()); 
    //print(x); 

    p(x.size()); 
    cout << "argmax " << argmax(x) << endl; 
    double largest = x[argmax(x)]; 
    double lndenom = largest; 
    double expsum = 0; 
    cout << "starting expsum" << endl; 
    for (int i=0; i<x.size(); i++) 
    //expsum += exp(x[i]-largest); 
    expsum += x[i] - largest; 
    cout << "next loop " << endl; 
    for (int i=0; i<x.size(); i++) 
    // state[i] = exp(x[i]-largest)/expsum; 
    state[i] = x[i]-largest; 
    cout << "forward proping" << endl; 
    cout << "weird" << endl; 
    // forward_propagate(); 
    cout << "done with softmax" <<endl; 
    } 
}; 

struct Add1: r1{ 
    Add1(int r):r1(r){} 
    void update_state() override{ 
    cout << "add1ing" << endl; 
    d1& x = static_cast<r1*>(parents[0])->state; 
    d1& y = static_cast<r1*>(parents[1])->state; 
    for (int i=0; i<r; i++) 
    state[i] = x[i]+y[i]; 
    forward_propagate(); 
    } 
}; 

struct Add2: r2{ 
    Add2(int r, int c): r2(r, c){} 
    void update_state() override{ 
    d2& x = static_cast<r2*>(parents[0])->state; 
    d2& y = static_cast<r2*>(parents[1])->state; 
    for (int i=0; i<x.size(); i++) 
    for (int j=0; j<x[0].size(); j++) 
    state[i][j] = x[i][j] + y[i][j]; 
    forward_propagate(); 
    } 
}; 

struct MaxPool: r2{ 
    MaxPool(int r, int c): r2(r, c){} 
    void update_state() override{ 
    d2& x = static_cast<r2*>(parents[0])->state; 
    for (int i=0; i<x.size(); i+=2) 
    for (int j=0; j<x[0].size(); j+=2) 
    state[i/2][j/2] = max(max(x[i][j], x[i+1][j]), max(x[i+1][j], x[i+1][j+1])); 
    forward_propagate(); 
    } 
}; 

int main(){ 
    Node root; 
    r2 x; 
    x.state = getRandomDoubles(28,28); 
    r2 wConv; 
    wConv.state = getRandomDoubles(10, 10); 
    root.add_child(x); 
    root.add_child(wConv); 
    Convolution c(28,28); 
    wConv.add_child(c); 
    x.add_child(c); 
    Add2 a(28,28); 
    r2 bConv(28,28); 
    bConv.state = getRandomDoubles(28,28); 
    c.add_child(a); 
    bConv.add_child(a); 
    RELU r(28,28); 
    a.add_child(r); 
    MaxPool max(14, 14); 
    r.add_child(max); 
    r3 wFull(10,28,28); 
    wFull.state = getRandomDoubles(10,28,28); 
    // print(wFull.state); 
    // return 0; 
    MatrixProduct2_1 full(10); 
    wFull.add_child(full); 
    max.add_child(full); 
    r1 bFull(10); 
    bFull.state = getRandomDoubles(10); 
    Add1 aFull(10); 
    aFull.state[0] = 123; 
    full.add_child(aFull); 
    bFull.add_child(aFull); 
    Softmax s(10); 
    aFull.add_child(s); 
    // d1& x =   static_cast<r1*>(parents[0])->state; 
    // d1& asdf = static_cast<r1*>(s.parents[0])->state; 
    // print(asdf); 
    //root.forward_propagate(); 
    x.forward_propagate(); 
    //print(s.state); 
    cout << "returning main"; 
} 
+1

'min'宏是因爲失敗而臭名昭着的,這是一個糟糕的版本。這也沒有意義,因爲我們有一個完美的'std :: min'。不是你正在重塑的唯一輪子 - 'argmax'只是'std :: max_element'。 – MSalters

回答

1

static_cast應該是很少需要的。這也不例外。你的節點真的應該知道他們的鄰居有什麼類型。

我不能馬上發現一個具體的問題,但我對神經網絡很熟悉。而像struct MatrixProduct1_1: r1這樣的代碼幾乎是一個紅色警報。爲什麼它是一個結構,爲什麼它繼承自r1?在神經網絡理論中,矩陣產品就是如何表達兩層節點之間的完整連接。再次,節點通常具有標量激活。

激活函數可以使用繼承來實現,但是您可以從Node繼承。這意味着你不能也有那些r1 .. r3類型,但我不明白這些。

TLDR:這些類型都搞砸了,你用static_cast來隱藏它,但這隻會使它編譯,它不會使它正確。

-1

解決!該錯誤是由MatrixProd2_1中使用錯誤的索引引起的。我通過從神經網絡結束開始刪除節點,確定其中一個是bug的來源,並且對vector :: operator []參數進行了斷言。我正在訪問越界,導致未定義的行爲。根本不清楚爲什麼某些種子會跑(大概有些不正確的結果)。

我還對主要創建wFull時的形狀以及MatrixProd2_1中使用的getRandomDoubles的參數進行了更改。全新版本:

#include <bits/stdc++.h> 
using namespace std; 

#define p(x) cout << #x << " = "<< x<< endl 
//#define min(a,b) a<b ? a : b 
typedef vector<double> d1; 
typedef vector<d1> d2; 
typedef vector<d2> d3; 
typedef vector<int> i1; 
int seed; 
bool time_seed = true; 

int argmax(d1 x){ 
    p(x.size()); 
    int maxIndex=0; 
    double maxValue=x[0]; 
    for (int i=1; i<x.size(); i++){ 
    if (x[i] > maxValue){ 
     maxValue = x[i]; 
     maxIndex = i; 
    } 
    } 
    return maxIndex; 
} 

d1 zeros(int n){ 
    return d1(n); 
} 

d2 zeros(int rows, int cols){ 
    return d2(rows, d1(cols, 0)); 
} 

d3 zeros(int x, int rows, int cols){ 
    return d3(x, d2(rows, d1(cols, 0))); 
} 

void print(d1 x){ 
    for (double d: x) 
    cout << d << endl; 
    cout << endl; 
} 

void print(d2 x){ 
    for (auto row: x){ 
    for (double d: row){ 
     cout << d << " "; 
    } 
    cout << endl; 
    } 
    cout << endl; 
} 

void print(d3 x){ 
    for (d2 X: x) 
    print(X); 
} 



void toRank2(d1&x, int rows, d2& y){ 
    for (int i=0; i<x.size()/rows; i++){ 
    y.emplace_back(); 
    for (int row=0; row<rows; row++){ 
     y[i].push_back(x[i*rows+row]); 
    } 
    } 
} 

void toRank3(d1& x, int rows, int cols, d3& y){ 
    for (int i=0; i<x.size()/rows/cols; i++){ 
    y.emplace_back(); 
    for (int row=0; row<rows; row++){ 
     y[i].emplace_back(); 
     for (int col=0; col<cols; col++){ 
     y[i][row].push_back(x[i*rows*cols+row*cols+col]); 
     } 
    } 
    } 
} 

d1 getRandomDoubles(int size, double mean=1, double standard_deviation=1){ 
    static normal_distribution<double> distribution(mean, standard_deviation); 
    if (time_seed) 
    seed=time(NULL); 
    //int seed=123; //123 works, 124 fails 
    static default_random_engine generator(seed); 
    d1 data(size); 
    generate(data.begin(), data.end(), []() { return distribution(generator); }); 
    // generate(data.begin(), data.end(), [](){return -.1;}); 
    return data; 
} 

d2 getRandomDoubles(int rows, int cols, double mean=0, double standard_deviation=1){ 
    d1 d = getRandomDoubles(rows*cols, mean, standard_deviation); 
    d2 e; 
    toRank2(d, cols, e); 
    return e; 
} 

d3 getRandomDoubles(int depth, int rows, int cols, double mean=0, double standard_deviation=1){ 
    d1 d = getRandomDoubles(depth*rows*cols, mean, standard_deviation);; 
    d3 e; 
    toRank3(d, rows, cols, e); 
    return e; 
} 

struct Node{ 
    vector<Node*> parents, children; 
    bool ready=false; 
    // 
    // bool check_ready(){ 
    // for (Node* n: parents) 
    // if (!n->check_ready()) 
    // return false; 
    // return true; 
    // } 
    // 
    void add_child(Node& n){ 
    children.push_back(&n); 
    n.parents.push_back(this); 
    } 

    void forward_propagate(){ 
    cout << "starting r2 forward" <<endl; 
    // if (parents.size()==0 || updated_parents == parents.size()-1) 
    for (Node* n: children){ 
     cout << "loop" << endl; 
     n->update_state(); 
     // cout << "root child forward" << endl; 
    } 
    cout << "exiting r2 forward" << endl; 
    //updated_parents++; 
    } 

    virtual void update_state(){ 
    //if (parents.size()==0 || updated_parents == parents.size() - 1) 
    forward_propagate(); 
    } 
}; 

struct r1:Node{ 
    vector<double> state; 
    int r; 

    r1(){} 

    r1(int R){ 
    r=R; 
    state = vector<double>(r); 
    } 
}; 

struct r2:Node{ 
    vector<vector<double>> state; 
    int r,c; 

    r2(){} 
    r2(int R, int C){ 
    r=R; 
    c=C; 
    state = zeros(r, c); 
    } 
}; 

struct r3:Node{ 
    d3 state; 
    int r, c, d; 
    r3(){} 
    r3(int R, int C, int D){ 
    r=R; 
    c=C; 
    d=D; 
    state = zeros(R,C,D); 
    } 
}; 

struct MatrixProduct1_1: r1{ 
    MatrixProduct1_1(int n):r1(n){} 

    void update_state() override{ 
    cout << "mat11" << endl; 
    d2& W = static_cast<r2*>(parents[0])->state; 
    d1& x = static_cast<r1*>(parents[1])->state; 
    state = zeros(r); 
    for (int i=0; i<W.size(); i++) 
    for (int j=0; j<W[0].size(); j++) 
    state[i] += W[i][j]*x[j]; 
    forward_propagate(); 
    } 
}; 

struct MatrixProduct2_1: r1{ 
    MatrixProduct2_1(int n):r1(n){} 

    void update_state() override{ 
    cout << "matt21" << endl; 
    d3& W = static_cast<r3*>(parents[0])->state; 
    d2& x = static_cast<r2*>(parents[1])->state; 
    p(x.size()); 
    p(W.size()); 
    p(x[0].size()); 
    p(W[0].size()); 
    p(W[0][0].size()); 
    p(state.size()); 
    // assert (x.size()==W.size()); 
    // assert (x[0].size()==W[0].size()); 
    // assert (state.size()==W[0][0].size()); 
    assert (state.size() == W.size()); 

    state = zeros(r); 
    for (int i=0; i<W.size(); i++) 
    for (int j=0; j<W[0].size(); j++) 
    for (int k=0; k<W[0][0].size(); k++) 
    state[i] += W[i][j][k]*x[j][k]; 
    forward_propagate(); 
    } 
}; 

struct Convolution: r2{ 
    Convolution(int r, int c): r2(r, c){} 
    void update_state() override{ 
    cout << "convolving" << endl; 
    state = zeros(r, c); 
    d2& W = static_cast<r2*>(parents[0])->state; 
    d2& x = static_cast<r2*>(parents[1])->state; 

    int wCenterX = W[0].size()/2; 
    int wCenterY = W.size()/2; 
    int rows = x.size(), cols = x[0].size(); 
    int wRows = W.size(), wCols = W[0].size(); 

    //#pragma omp parallel for 
    for(int i=0; i < rows; i++) 
    for(int j=0; j < cols; j++) 
    for(int m=0; m < W.size(); m++){ 
     int mm = W.size() - 1 - m; 
     for(int n=0; n < wCols; n++){ 
     int nn = wCols - 1 - n; 
     int ii = i + (m - wCenterY); 
     int jj = j + (n - wCenterX); 
     if (ii >= 0 && ii < rows && jj >= 0 && jj < cols) 
     state[i][j] += x[ii][jj] * W[mm][nn]; 
     } 
    } 
    forward_propagate(); 
    } 
}; 


struct RELU: r2{ 
    RELU(int r, int c):r2(r, c){} 
    void update_state() override{ 
    cout << "relu2" << endl; 
    state = zeros(r,c); 
    d2& x = static_cast<r2*>(parents[0])->state; 
    for (int i=0; i<state.size(); i++) 
    for (int j=0; j<state[0].size(); j++) 
    if (x[i][j] > 0) 
    state[i][j] = x[i][j]; 
    forward_propagate(); 
    } 
}; 

struct Softmax: r1{ 
    Softmax(int r):r1(r){} 
    void update_state() override{ 
    cout << "softmax" << endl; 
    state = zeros(r); 
    p(parents.size()); 
    d1& x = static_cast<r1*>(parents[0])->state; 
    cout << "got state" << endl; 
    //p(x.size()); 
    //print(x); 

    p(x.size()); 
    cout << "argmax " << argmax(x) << endl; 
    double largest = x[argmax(x)]; 
    double lndenom = largest; 
    double expsum = 0; 
    cout << "starting expsum" << endl; 
    for (int i=0; i<x.size(); i++) 
    expsum += exp(x[i]-largest); 
    //expsum += x[i] - largest; 
    cout << "next loop " << endl; 
    for (int i=0; i<x.size(); i++) 
    state[i] = exp(x[i]-largest)/expsum; 
    //state[i] = x[i]-largest; 
    // state[i] = 3; 
    cout << "forward proping" << endl; 
    cout << "weird" << endl; 
    forward_propagate(); 
    cout << "done with softmax" <<endl; 
    } 
}; 

struct Add1: r1{ 
    Add1(int r):r1(r){} 
    void update_state() override{ 
    cout << "add1ing" << endl; 
    d1& x = static_cast<r1*>(parents[0])->state; 
    d1& y = static_cast<r1*>(parents[1])->state; 
    for (int i=0; i<r; i++) 
    state[i] = x[i]+y[i]; 
    forward_propagate(); 
    } 
}; 

struct Add2: r2{ 
    Add2(int r, int c): r2(r, c){} 
    void update_state() override{ 
    d2& x = static_cast<r2*>(parents[0])->state; 
    d2& y = static_cast<r2*>(parents[1])->state; 
    for (int i=0; i<x.size(); i++) 
    for (int j=0; j<x[0].size(); j++) 
    state[i][j] = x[i][j] + y[i][j]; 
    forward_propagate(); 
    } 
}; 

struct MaxPool: r2{ 
    MaxPool(int r, int c): r2(r, c){} 
    void update_state() override{ 
    d2& x = static_cast<r2*>(parents[0])->state; 
    for (int i=0; i<x.size(); i+=2) 
    for (int j=0; j<x[0].size(); j+=2) 
    state[i/2][j/2] = max(max(x[i][j], x[i+1][j]), max(x[i+1][j], x[i+1][j+1])); 
    forward_propagate(); 
    } 
}; 

int main(int argc, char *argv[]){ 
    if (argc>1){ 
    seed = atoi(argv[1]); 
    time_seed = false; 
    } 
    Node root; 
    r2 x; 
    x.state = getRandomDoubles(28,28); 
    //x.state[0][0]-=1000; 
    r2 wConv; 
    wConv.state = getRandomDoubles(10, 10); 
    root.add_child(x); 
    root.add_child(wConv); 
    Convolution c(28,28); 
    wConv.add_child(c); 
    x.add_child(c); 
    Add2 a(28,28); 
    r2 bConv(28,28); 
    bConv.state = getRandomDoubles(28,28); 
    c.add_child(a); 
    bConv.add_child(a); 
    RELU r(28,28); 
    a.add_child(r); 
    MaxPool max(14, 14); 
    r.add_child(max); 
// print(max.state); 
    r3 wFull(10,14,14); 
    wFull.state = getRandomDoubles(10,14,14); 
    //print(wFull.state); 
    // return 0; 
    MatrixProduct2_1 full(10); 
    wFull.add_child(full); 
    max.add_child(full); 
    //print(full.state); //suspiciously zero 
    r1 bFull(10); 
    bFull.state = getRandomDoubles(10); 
    Add1 aFull(10); 
    aFull.state[0] = 123; 
    full.add_child(aFull); 
    bFull.add_child(aFull); 
    Softmax s(10); 
    aFull.add_child(s); 
    // d1& x =   static_cast<r1*>(parents[0])->state; 
    // d1& asdf = static_cast<r1*>(s.parents[0])->state; 
    // print(asdf); 
    //root.forward_propagate(); 
    x.forward_propagate(); 
    //print(aFull.state); 
    print(s.state); 
    cout << "returning main"; 
}