我工作的一個項目使用CUDA C的問題是,當我嘗試請編譯程序我得到這個錯誤訓練多層神經網絡:未定義的參考`設置(INT,CHAR **)」
facetrain.o: In function `backprop_face':
facetrain.c:(.text+0x127): undefined reference to `bpnn_train_kernel'
backprop_kernel.o: In function `bpnn_train_kernel(BPNN*, float*, float*)':
tmpxft_0002fa78_00000000-4_backprop_kernel.cudafe1.cpp:(.text+0x6e6): undefined reference to `bpnn_layerforward(float*, float*, float**, int, int)'
tmpxft_0002fa78_00000000-4_backprop_kernel.cudafe1.cpp:(.text+0x703): undefined reference to `bpnn_output_error(float*, float*, float*, int, float*)'
tmpxft_0002fa78_00000000-4_backprop_kernel.cudafe1.cpp:(.text+0x72a): undefined reference to `bpnn_hidden_error(float*, int, float*, int, float**, float*, float*)'
tmpxft_0002fa78_00000000-4_backprop_kernel.cudafe1.cpp:(.text+0x745): undefined reference to `bpnn_adjust_weights(float*, int, float*, int, float**, float**)'
backprop_kernel.o: In function `main':
tmpxft_0002fa78_00000000-4_backprop_kernel.cudafe1.cpp:(.text+0x9a5): undefined reference to `setup(int, char**)'
collect2: ld returned 1 exit status
make: *** [backprop] Error 1
這裏是backdrop_kernel.cu的代碼:
////////////////////////////////////////////////////////////////////////////////
extern void bpnn_layerforward(float *l1, float *l2, float **conn, int n1, int n2);
extern void bpnn_output_error(float *delta, float *target, float *output, int nj, float *err);
extern void bpnn_hidden_error(float *delta_h, int nh, float *delta_o, int no, float **who, float *hidden, float *err);
extern void bpnn_adjust_weights(float *delta, int ndelta, float *ly, int nly, float **w, float **oldw);
extern int setup(int argc, char** argv);
extern float **alloc_2d_dbl(int m, int n);
extern float squash(float x);
double gettime() {
struct timeval t;
gettimeofday(&t,NULL);
return t.tv_sec+t.tv_usec*1e-6;
}
unsigned int num_threads = 0;
unsigned int num_blocks = 0;
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char** argv)
{
setup(argc, argv);
}
void bpnn_train_kernel(BPNN *net, float *eo, float *eh)
{
int in, hid, out;
float out_err, hid_err;
in = net->input_n;
hid = net->hidden_n;
out = net->output_n;
#ifdef GPU
int m = 0;
float *input_hidden_cuda;
float *input_cuda;
float *output_hidden_cuda;
float *partial_sum;
float *hidden_partial_sum;
float *hidden_delta_cuda;
float *input_prev_weights_cuda;
float sum;
float *input_weights_one_dim;
float *input_weights_prev_one_dim;
num_blocks = in/16;
dim3 grid(1 , num_blocks);
dim3 threads(16 , 16);
input_weights_one_dim = (float *) malloc((in + 1)* (hid + 1) * sizeof(float));
input_weights_prev_one_dim = (float *) malloc((in + 1)* (hid + 1) * sizeof(float));
partial_sum = (float *) malloc(num_blocks * WIDTH * sizeof(float));
// this preprocessing stage is added to correct the bugs of wrong memcopy using two-dimensional net->inputweights
for (int k = 0; k <= in; k++) {
for (int j = 0; j <= hid; j++) {
input_weights_one_dim[m] = net->input_weights[k][j];
input_weights_prev_one_dim[m] = net-> input_prev_weights[k][j];
m++;
}
}
cudaMalloc((void**) &input_cuda, (in + 1) * sizeof(float));
cudaMalloc((void**) &output_hidden_cuda, (hid + 1) * sizeof(float));
cudaMalloc((void**) &input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float));
cudaMalloc((void**) &hidden_partial_sum, num_blocks * WIDTH * sizeof(float));
#endif
#ifdef CPU
printf("Performing CPU computation\n");
bpnn_layerforward(net->input_units, net->hidden_units,net->input_weights, in, hid);
#endif
#ifdef GPU
printf("Performing GPU computation\n");
//printf("in= %d, hid = %d, numblocks = %d\n", in, hid, num_blocks);
cudaMemcpy(input_cuda, net->input_units, (in + 1) * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyHostToDevice);
bpnn_layerforward_CUDA<<< grid, threads >>>(input_cuda,
output_hidden_cuda,
input_hidden_cuda,
hidden_partial_sum,
in,
hid);
cudaThreadSynchronize();
cudaError_t error = cudaGetLastError();
if (error != cudaSuccess) {
printf("bpnn kernel error: %s\n", cudaGetErrorString(error));
exit(EXIT_FAILURE);
}
cudaMemcpy(partial_sum, hidden_partial_sum, num_blocks * WIDTH * sizeof(float), cudaMemcpyDeviceToHost);
for (int j = 1; j <= hid; j++) {
sum = 0.0;
for (int k = 0; k < num_blocks; k++) {
sum += partial_sum[k * hid + j-1] ;
}
sum += net->input_weights[0][j];
net-> hidden_units[j] = float(1.0/(1.0 + exp(-sum)));
}
#endif
bpnn_layerforward(net->hidden_units, net->output_units, net->hidden_weights, hid, out);
bpnn_output_error(net->output_delta, net->target, net->output_units, out, &out_err);
bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, net->hidden_weights, net->hidden_units, &hid_err);
bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, net->hidden_weights, net->hidden_prev_weights);
#ifdef CPU
bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, net->input_weights, net->input_prev_weights);
#endif
#ifdef GPU
cudaMalloc((void**) &hidden_delta_cuda, (hid + 1) * sizeof(float));
cudaMalloc((void**) &input_prev_weights_cuda, (in + 1) * (hid + 1) * sizeof(float));
cudaMemcpy(hidden_delta_cuda, net->hidden_delta, (hid + 1) * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(input_prev_weights_cuda, input_weights_prev_one_dim, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyHostToDevice);
bpnn_adjust_weights_cuda<<< grid, threads >>>(hidden_delta_cuda,
hid,
input_cuda,
in,
input_hidden_cuda,
input_prev_weights_cuda
);
cudaMemcpy(net->input_units, input_cuda, (in + 1) * sizeof(float), cudaMemcpyDeviceToHost);
cudaMemcpy(input_weights_one_dim, input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyDeviceToHost);
cudaFree(input_cuda);
cudaFree(output_hidden_cuda);
cudaFree(input_hidden_cuda);
cudaFree(hidden_partial_sum);
cudaFree(input_prev_weights_cuda);
cudaFree(hidden_delta_cuda);
free(partial_sum);
free(input_weights_one_dim);
free(input_weights_prev_one_dim);
#endif
}
如果您需要了解更多信息或代碼,讓我知道。提前致謝!
歡迎來到Stack Overflow!請參閱[如何完成最小,完整和可驗證示例](/ help/mcve)。 –