3
我試圖用Clang 3.0將簡單的CUDA程序轉換爲LLVM IR。程序如下,叮噹中的CUDA支持
#include <stdio.h>
#include <clang/test/SemaCUDA/cuda.h>
__global__ void kernfunc(int *a)
{
//kernel definition
*a = threadIdx.x + blockIdx.x * blockDim.x;
}
int main()
{
int *h_a, *d_a, n;
n = sizeof(int);
h_a = (int*)malloc(n);
*h_a = 5;
cudaMalloc((void*)&d_a, n);
cudaMemcpy(d_a, h_a, n, cudaMemcpyHostToDevice);
//kernel call
kernelfunc<<<1,1>>>(d_a);
cudaMemcpy(h_a, d_a, n, cudaMemcpyDeviceToHost);
printf("%d", *h_a);
return 0;
}
應該包含哪些額外的頭文件? Clang 3.0目前不支持哪部分代碼?