2013-05-13 56 views
0

這是我的代碼我得到的malloc 3906錯誤,當我試圖填補我陣列

using namespace std; 
#include <iostream> 
#include <stdio.h> 
#include <stdlib.h> 
#define N 8000 

void fillArray(int *data, int count){ 
    for(int i =0; i < count; i++) 
     data[i] = (int) rand()/((int) RAND_MAX); 
} 

__global__ void add(int* a, int *b){ 
    int add = 0; 

    int tid = threadIdx.x + blockIdx.x * blockDim.x; 
    if(tid < N){ 
     add = a[tid] + b[tid]; 
    } 
} 

__global__ void subtract(int* a, int *b){ 
    int subtract = 0; 

    int tid = threadIdx.x + blockIdx.x * blockDim.x; 
    if(tid < N){ 
     subtract = a[tid] - b[tid]; 
    } 
} 

float duration(int *devA, int *devB, int blocksPerGrid, int threadsPerBlock){ 

    cudaEvent_t start, stop; 
    cudaEventCreate(&start); 
    cudaEventCreate(&stop); 
    cudaEventRecord(start,0); 
    cudaEventRecord(stop,0); 
    cudaEventSynchronize(stop); 

    cudaMalloc((void**) &devA, N * sizeof(int)); 
    cudaMalloc((void**) &devB, N * sizeof(int)); 

    add<<<blocksPerGrid, threadsPerBlock>>>(devA,devB); 

    float elapsedTime; 
    cudaEventElapsedTime(&elapsedTime,start,stop); 
    cudaEventDestroy(start); 
    cudaEventDestroy(stop); 

    return elapsedTime; 
} 



int main(void) { 

    int *a = new int(N); 
    int *b = new int(N); 
    float dur = 0 ; 



    fillArray(a, N); 
    fillArray(b, N); 

    dur = duration(a,b,N,1); 

    cout << "Global memory version:\n"; 
    cout << "Process completed in " << dur; 
    cout << "for a data set of " << N << " integers."; 


    return 0; 
} 

正如你所看到的,我補我的陣列fillArray功能的CPU側。但填數組函數給出的錯誤:

malloc.c 3906 : sYSMalloc: Assertion bla bla 

我在這裏失蹤了什麼?我只是試圖填充數組。我可能會遇到什麼問題?事件如果我刪除持續時間函數中的添加函數,我得到這個錯誤。這裏有什麼問題?

+2

寫例外充分所以人們可能會幫助你。如果你在Linux上使用valgrind這種類型的錯誤 – qwr 2013-05-13 05:23:35

+0

可能的重複[爲什麼我會得到一個C malloc斷言失敗?](http://stackoverflow.com/questions/2987207/why-do-i-get-ac- malloc-assertion-failure) – talonmies 2013-05-13 05:33:27

+0

這個問題與CUDA有關,你應該重新使用它。 – talonmies 2013-05-13 06:11:02

回答

2

錯誤在於您的ab陣列的創建。作爲@QWR和@talonmies表示,採用valgrind(或任何Windows substitute)可以幫助你找到那種錯誤的來源:

==8288== Invalid write of size 4 
==8288== at 0x400DD2: fillArray(int*, int) (kernel.cu:11) 
==8288== by 0x400F79: main (kernel.cu:63) 
==8288== Address 0x62783e4 is 0 bytes after a block of size 4 alloc'd 
==8288== at 0x4C2BA77: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) 
==8288== by 0x400F41: main (kernel.cu:57) 
==8288== 
==8288== Invalid write of size 4 
==8288== at 0x400DD2: fillArray(int*, int) (kernel.cu:11) 
==8288== by 0x400F8A: main (kernel.cu:64) 
==8288== Address 0x6278434 is 0 bytes after a block of size 4 alloc'd 
==8288== at 0x4C2BA77: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) 
==8288== by 0x400F55: main (kernel.cu:58) 

如果更改:

int *a = new int(N); 
int *b = new int(N); 

要:

int *a = new int[N]; 
int *b = new int[N]; 

錯誤消失。事實上,你並沒有爲數組分配內存,而只是爲一個整數分配內存。因爲GPU和CPU都可能發生錯誤,所以在調試CUDA代碼時,需要同時使用GPU /設備調試工具(cuda-memcheckcuda-gdb)和CPU /主機工具(valgrind)。不要忘記使用nvcc的兩個調試標誌進行編譯:設備代碼爲-G,主機代碼爲-g

你也應該delete你的陣列在你的主要的年底,good practice

delete [] a; 
delete [] b; 
相關問題