我是CUDA的新手,當我嘗試將陣列從主機複製到設備時出現錯誤。 誤差:無重載的函數 「cudaMalloc」 實例相匹配的參數listargument類型有:(INT(*)[1048576],無符號長整數)從CUDA主機複製陣列到設備
#include <assert.h>
#include <cuda.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <time.h>
#include <unistd.h>
#include <curand.h>
#include <curand_kernel.h>
#define N (1024*1024)
#define M (1000000)
/**************************************************/
/* this GPU kernel function is used to initialize the random states */
__global__ void init(unsigned int seed, curandState_t* states) {
/* we have to initialize the state */
curand_init(seed, /* the seed can be the same for each core, here we pass the time in from the CPU */
blockIdx.x, /* the sequence number should be different for each core (unless you want all
cores to get the same sequence of numbers for some reason - use thread id! */
0, /* the offset is how much extra we advance in the sequence for each call, can be 0 */
&states[blockIdx.x]);
}
/* this GPU kernel takes an array of states, and an array of ints, and puts a random int into each */
__global__ void randoms(curandState_t* states, unsigned int* numbers) {
/* curand works like rand - except that it takes a state as a parameter */
numbers[blockIdx.x] = curand(&states[blockIdx.x]) %2000;
};
/*******************************************************/
__global__ void cudakernel(int *buf)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
// buf[i] = rand();
for(int j = 0; j < M; j++)
buf[i] = buf[i] * buf[i] - 0.25f;
}
int main()
{
/*****************************************************/
/* CUDA's random number library uses curandState_t to keep track of the seed value
we will store a random state for every thread */
curandState_t* states;
/* allocate space on the GPU for the random states */
cudaMalloc((void**) &states, N * sizeof(curandState_t));
/* invoke the GPU to initialize all of the random states */
init<<<N, 1>>>(time(0), states);
/* allocate an array of unsigned ints on the CPU and GPU */
// unsigned int cpu_nums[N];//getting error in median relared to type of int
unsigned int* gpu_nums;
int cpu_nums[N];
cudaMalloc((void**) &gpu_nums, N * sizeof(unsigned int));
/* invoke the kernel to get some random numbers */
randoms<<<N, 1>>>(states, gpu_nums);
/* copy the random numbers back */
cudaMemcpy(cpu_nums, gpu_nums, N * sizeof(unsigned int), cudaMemcpyDeviceToHost);
/******************************************************************************/
int data[N];// int count = 0;
int cpunums[N],i;
for (i=0;i<=N;i++)
cpunums[i]=cpu_nums[i];
cudaMalloc(&cpunums, N * sizeof(int));
cudakernel<<<N/256, 256>>>(cpunums);
cudaMemcpy(data, cpunums, N * sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(cpunums);
int sel;
printf("Enter an index: ");
scanf("%d", &sel);
printf("data[%d] = %f\n", sel, data[sel]);
}
我試圖複製cpunums [I]從宿主數組到設備agine affter我genti從設備的隨機數字。
我試圖調用設備的功能,但我有很多errors.so我試過這種方式。
閱讀錯誤信息並思考一分鐘。 cudaMalloc與將主機陣列複製到設備有什麼關係? – talonmies