我想做一些推力操作,但我不確定如何。推力操作空主機陣列
現在,我收到時許陣列全是零(在h_a陣列)
我:
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <iostream>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <thrust/device_ptr.h>
#include <thrust/fill.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/copy.h>
#include <thrust/generate.h>
template <typename T>
struct square
{
__host__ __device__
T operator()(const T& x) const
{
return x * x;
}
};
int
main(
int argc,
const char * argv[])
{
const size_t NbOfPoints = 256;
int BlocksPerGridX = 16;
int BlocksPerGridY = 16;
int ThreadsPerBlockX = 16;
int ThreadsPerBlockY = 16;
// generate random data on the host
thrust::host_vector<float> h_Kx (NbOfPoints);
thrust::generate(h_Kx.begin(), h_Kx.end(), rand);
thrust::host_vector<float> h_Ky (NbOfPoints);
thrust::generate(h_Ky.begin(), h_Ky.end(), rand);
// transfer to device
thrust::device_vector<float> dev_Kx = h_Kx;
thrust::device_vector<float> dev_Ky = h_Ky;
// create arrays for holding the number of threads per block in each dimension
int * X , * Y;
cudaMalloc((void **) &X, ThreadsPerBlockX * BlocksPerGridX * sizeof(*X));
cudaMalloc((void **) &Y, ThreadsPerBlockY * BlocksPerGridY * sizeof(*Y));
// wrap raw pointer with a device_ptr
thrust::device_ptr<int> dev_X (X);
thrust::device_ptr<int> dev_Y (Y);
// use device_ptr in Thrust algorithms
thrust::fill(dev_X, dev_X + (ThreadsPerBlockX * BlocksPerGridX) , (int) 0);
thrust::fill(dev_Y, dev_Y + (ThreadsPerBlockY * BlocksPerGridY) , (int) 0);
// setup arguments
square<float> square_op;
// create various vectors
thrust::device_vector<int> distX (NbOfPoints);
thrust::device_vector<int> distY (NbOfPoints);
thrust::device_vector<unsigned int> Tmp (NbOfPoints);
thrust::host_vector<unsigned int> h_a (NbOfPoints);
thrust::device_vector<unsigned int> distXSquared (NbOfPoints);
thrust::device_vector<unsigned int> distYSquared (NbOfPoints);
// compute distX = dev_Kx - dev_X and distY = dev_Ky - dev_Y
thrust::transform(dev_Kx.begin(), dev_Kx.begin(), dev_X , distX.begin() , thrust::minus<float>());
thrust::transform(dev_Ky.begin(), dev_Ky.begin(), dev_Y , distY.begin() , thrust::minus<float>());
//square distances
thrust::transform(distX.begin(), distX.end(), distXSquared.begin(), square_op);
thrust::transform(distY.begin(), distY.end(), distYSquared.begin(), square_op);
// compute Tmp = distX + distY
thrust::transform(distXSquared.begin() ,distXSquared.begin() , distYSquared.begin() , Tmp.begin() , thrust::plus<unsigned int>());
thrust::copy(Tmp.begin(), Tmp.end(), h_a.begin());
for (int i = 0; i < 5; i ++)
printf("\n temp = %u",h_a[ i ]);
return 0;
}
UPDATE:
除了羅伯特Crovella的編輯,你必須編輯整數:
square<int> square_op;
thrust::transform(dev_Kx.begin(), dev_Kx.end(), dev_X , distX.begin() , thrust::minus<int>());
thrust::transform(dev_Ky.begin(), dev_Ky.end(), dev_Y , distY.begin() , thrust::minus<int>());
什麼*確切*是那些「各種錯誤」? – 2014-12-09 12:26:28
@Park Young-Bae:我更新了.. – George 2014-12-09 12:29:37
發表一個例子說有人可以自己編譯和運行它有多難?我絕望,即使在[SO]問了200 *個問題之後,你仍然沒有明白這個地方是如何工作的。 – talonmies 2014-12-09 14:38:05