使用CUDA複製陣列的特定元素推力置換迭代器

我有一個glm::vec3的數組，其中count * 3元素。我有另一個數組，其中包含要複製的元素的int索引。一個例子：使用CUDA複製陣列的特定元素推力置換迭代器

thrust::device_vector<glm::vec3> vals(9); 
// vals contains 9 vec3, which represent 3 "items" 
// vals[0], vals[1], vals[2] are the first "item", 
// vals[3], vals[4], vals[5] are the second "item"... 

int idcs[] = {0, 2}; 
// index 0 and 2 should be copied, i.e. 
// vals[0..2] and vals[6..8]

我試圖使用置換迭代器，但我不能讓它工作。我的做法是：

thrust::copy(
    thrust::make_permutation_iterator(vals, idcs), 
    thrust::make_permutation_iterator(vals, idcs + 2), 
    target.begin() 
);

當然但這隻會複製vals[0]和vals[2]代替vals[0] vals[1] vals[2]和vals[6] vals[7] vals[8]。

是否可以使用Thrust將所需值從一個緩衝區複製到另一個緩衝區？

來源

2014-07-17 wondering

我認爲，我們可以將strided ranges的想法與您的permutation iterator方法結合起來，實現您想要的目標。

基本思想是使用你的排列迭代器方法來選擇要複製的項目的「組」，我們將使用組合到zip迭代器中的3個跨步範圍迭代器來選擇每個組中的3個項目。我們需要一個用於輸入的zip迭代器，以及用於輸出的zip迭代器。這是一個完整的工作示例，使用uint3作爲glm::vec3的代理：

$ cat t484.cu 
#include <vector_types.h> 
#include <thrust/device_vector.h> 
#include <thrust/host_vector.h> 
#include <iostream> 
#include <thrust/copy.h> 
#include <thrust/iterator/permutation_iterator.h> 
#include <thrust/iterator/counting_iterator.h> 
#include <thrust/iterator/transform_iterator.h> 
#include <thrust/functional.h> 


#define DSIZE 18 


template <typename Iterator> 
class strided_range 
{ 
    public: 

    typedef typename thrust::iterator_difference<Iterator>::type difference_type; 

    struct stride_functor : public thrust::unary_function<difference_type,difference_type> 
    { 
     difference_type stride; 

     stride_functor(difference_type stride) 
      : stride(stride) {} 

     __host__ __device__ 
     difference_type operator()(const difference_type& i) const 
     { 
      return stride * i; 
     } 
    }; 

    typedef typename thrust::counting_iterator<difference_type>     CountingIterator; 
    typedef typename thrust::transform_iterator<stride_functor, CountingIterator> TransformIterator; 
    typedef typename thrust::permutation_iterator<Iterator,TransformIterator>  PermutationIterator; 

    // type of the strided_range iterator 
    typedef PermutationIterator iterator; 

    // construct strided_range for the range [first,last) 
    strided_range(Iterator first, Iterator last, difference_type stride) 
     : first(first), last(last), stride(stride) {} 

    iterator begin(void) const 
    { 
     return PermutationIterator(first, TransformIterator(CountingIterator(0), stride_functor(stride))); 
    } 

    iterator end(void) const 
    { 
     return begin() + ((last - first) + (stride - 1))/stride; 
    } 

    protected: 
    Iterator first; 
    Iterator last; 
    difference_type stride; 
}; 

typedef thrust::device_vector<uint3>::iterator Iter; 

int main(){ 
// set up test data 
    int idcs[] = {0, 2, 5}; 
    unsigned num_idcs = sizeof(idcs)/sizeof(int); 
    thrust::host_vector<uint3> h_vals(DSIZE); 
    for (int i = 0; i < DSIZE; i ++) { 
    h_vals[i].x = i; 
    h_vals[i].y = 100+i; 
    h_vals[i].z = 1000+i;} 
    thrust::device_vector<uint3> d_target(num_idcs*3); 
    thrust::host_vector<int> h_idcs(idcs, idcs + num_idcs); 
    thrust::device_vector<int> d_idcs = h_idcs; 
    thrust::device_vector<uint3> d_vals = h_vals; 
// set up strided ranges for input, output 
    strided_range<Iter> item_1(d_vals.begin() , d_vals.end(), 3); 
    strided_range<Iter> item_2(d_vals.begin()+1, d_vals.end(), 3); 
    strided_range<Iter> item_3(d_vals.begin()+2, d_vals.end(), 3); 
// set up strided ranges for output 
    strided_range<Iter> out_1(d_target.begin() , d_target.end(), 3); 
    strided_range<Iter> out_2(d_target.begin()+1, d_target.end(), 3); 
    strided_range<Iter> out_3(d_target.begin()+2, d_target.end(), 3); 
// copy from input to output 
    thrust::copy(thrust::make_permutation_iterator(thrust::make_zip_iterator(thrust::make_tuple(item_1.begin(), item_2.begin(), item_3.begin())), d_idcs.begin()), thrust::make_permutation_iterator(thrust::make_zip_iterator(thrust::make_tuple(item_1.begin(), item_2.begin(), item_3.begin())), d_idcs.end()), thrust::make_zip_iterator(thrust::make_tuple(out_1.begin(), out_2.begin(), out_3.begin()))); 
// print out results 
    thrust::host_vector<uint3> h_target = d_target; 
    for (int i = 0; i < h_target.size(); i++) 
    std::cout << "index: " << i << " x: " << h_target[i].x << " y: " << h_target[i].y << " z: " << h_target[i].z << std::endl; 
    return 0; 
} 
$ nvcc -arch=sm_20 -o t484 t484.cu 
$ ./t484 
index: 0 x: 0 y: 100 z: 1000 
index: 1 x: 1 y: 101 z: 1001 
index: 2 x: 2 y: 102 z: 1002 
index: 3 x: 6 y: 106 z: 1006 
index: 4 x: 7 y: 107 z: 1007 
index: 5 x: 8 y: 108 z: 1008 
index: 6 x: 15 y: 115 z: 1015 
index: 7 x: 16 y: 116 z: 1016 
index: 8 x: 17 y: 117 z: 1017 
$

來源

2014-07-17 03:23:57

使用CUDA複製陣列的特定元素推力置換迭代器

回答

相關問題