新的thrust :: tabulate功能適用於我在主機上,但不適用於設備。該設備是K20x,計算能力爲3.5。主機是一臺128GB內存的Ubuntu機器。幫幫我?推力1.7在CUDA設備上列表失敗
我認爲統一尋址不是問題,因爲我可以在設備上對統一尋址的陣列進行排序。
#include <iostream>
#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <thrust/tabulate.h>
#include <thrust/version.h>
using namespace std;
// Print an expression's name then its value, possible followed by a
// comma or endl. Ex: cout << PRINTC(x) << PRINTN(y);
#define PRINT(arg) #arg "=" << (arg)
#define PRINTC(arg) #arg "=" << (arg) << ", "
#define PRINTN(arg) #arg "=" << (arg) << endl
// Execute an expression and check for CUDA errors.
#define CE(exp) { \
cudaError_t e; e = (exp); \
if (e != cudaSuccess) { \
cerr << #exp << " failed at line " << __LINE__ << " with error " << cudaGetErrorString(e) << endl; \
exit(1); \
} \
}
const int N(10);
int main(void) {
int major = THRUST_MAJOR_VERSION;
int minor = THRUST_MINOR_VERSION;
cout << "Thrust v" << major << "." << minor
<< ", CUDA_VERSION: " << CUDA_VERSION << ", CUDA_ARCH: " << __CUDA_ARCH__
<< endl;
cout << PRINTN(N);
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, 0);
if (!prop.unifiedAddressing) {
cerr << "Unified addressing not available." << endl;
exit(1);
}
cudaGetDeviceProperties(&prop, 0);
if (!prop.canMapHostMemory) {
cerr << "Can't map host memory." << endl;
exit(1);
}
cudaSetDeviceFlags(cudaDeviceMapHost);
int *p, *q;
CE(cudaHostAlloc(&p, N*sizeof(int), cudaHostAllocMapped));
CE(cudaHostAlloc(&q, N*sizeof(int), cudaHostAllocMapped));
thrust::tabulate(thrust::host, p, p+N, thrust::negate<int>());
thrust::tabulate(thrust::device, q, q+N, thrust::negate<int>());
for (int i=0; i<N; i++)
cout << PRINTC(i) << PRINTC(p[i]) << PRINTN(q[i]);
}
輸出:
Thrust v1.7, CUDA_VERSION: 6000, CUDA_ARCH: 0
N=10
i=0, p[i]=0, q[i]=0
i=1, p[i]=-1, q[i]=0
i=2, p[i]=-2, q[i]=0
i=3, p[i]=-3, q[i]=0
i=4, p[i]=-4, q[i]=0
i=5, p[i]=-5, q[i]=0
i=6, p[i]=-6, q[i]=0
i=7, p[i]=-7, q[i]=0
i=8, p[i]=-8, q[i]=0
i=9, p[i]=-9, q[i]=0
下不會將任何信息內容添加到我的職位,但之前計算器會接受它是必需的:大部分的程序是錯誤檢查和版本檢查。