下面你可以找到一個完整的工作示例。我希望這對未來的用戶有所幫助。
#include <thrust/device_vector.h>
#include <thrust/reduce.h>
// --- Defining key tuple type
typedef thrust::tuple<int,int> Tuple;
typedef thrust::host_vector<Tuple>::iterator dIter1;
typedef thrust::host_vector<float>::iterator dIter2;
/************************************/
/* EQUALITY OPERATOR BETWEEN TUPLES */
/************************************/
struct BinaryPredicate
{
__host__ __device__ bool operator()
(const Tuple& lhs, const Tuple& rhs)
{
return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && (thrust::get<1>(lhs) == thrust::get<1>(rhs));
}
};
/********/
/* MAIN */
/********/
int main()
{
const int N = 7;
thrust::host_vector<Tuple> keys_input(N);
thrust::host_vector<float> values_input(N);
int keys1_input[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys 1
int keys2_input[N] = {1, 5, 3, 8, 2, 2, 1}; // input keys 2
float input_values[N] = {9., 8., 7., 6., 5., 4., 3.}; // input values
for (int i=0; i<N; i++) {
keys_input[i] = thrust::make_tuple(keys1_input[i], keys2_input[i]);
values_input[i] = input_values[i];
}
for (int i=0; i<N; i++) printf("%i %i\n", thrust::get<0>(keys_input[i]), thrust::get<1>(keys_input[i]));
thrust::host_vector<Tuple> keys_output(N);
thrust::host_vector<float> values_output(N);
thrust::pair<dIter1, dIter2> new_end;
new_end = thrust::reduce_by_key(keys_input.begin(),
keys_input.end(),
values_input.begin(),
keys_output.begin(),
values_output.begin(),
BinaryPredicate(),
thrust::plus<float>());
int Nkeys = new_end.first - keys_output.begin();
printf("\n\n");
for (int i = 0; i < Nkeys; i++) printf("%i; %f\n", i, values_output[i]);
printf("\n\n");
for (int i = 0; i < Nkeys; i++) printf("%i %i\n", thrust::get<0>(keys_output[i]), thrust::get<1>(keys_output[i]));
return 0;
}
EDIT
上述工作例子稱爲host_vector
的。下面是一個完整的例子,考慮關鍵和值向量是常規的數組的情況。
#include <thrust/device_vector.h>
#include <thrust/reduce.h>
#include "Utilities.cuh"
// --- Defining key tuple type
typedef thrust::tuple<int, int> Tuple;
typedef thrust::device_vector<Tuple>::iterator dIter1;
typedef thrust::device_vector<float>::iterator dIter2;
/************************************/
/* EQUALITY OPERATOR BETWEEN TUPLES */
/************************************/
struct BinaryPredicate
{
__host__ __device__ bool operator()
(const Tuple& lhs, const Tuple& rhs)
{
return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && (thrust::get<1>(lhs) == thrust::get<1>(rhs));
}
};
/********/
/* MAIN */
/********/
int main()
{
const int N = 7;
// --- Keys and input values on the host: allocation and definition
int h_keys1_input[N] = { 1, 3, 3, 3, 2, 2, 1 }; // --- Input keys 1 - host side
int h_keys2_input[N] = { 1, 5, 3, 8, 2, 2, 1 }; // --- Input keys 2 - host side
float h_input_values[N] = { 9., 8., 7., 6., 5., 4., 3. }; // --- Input values - host side
// --- Keys and input values on the device: allocation
int *d_keys1_input; gpuErrchk(cudaMalloc(&d_keys1_input, N * sizeof(int))); // --- Input keys 1 - device side
int *d_keys2_input; gpuErrchk(cudaMalloc(&d_keys2_input, N * sizeof(int))); // --- Input keys 2 - device side
float *d_input_values; gpuErrchk(cudaMalloc(&d_input_values, N * sizeof(float))); // --- Input values - device side
// --- Keys and input values: host -> device
gpuErrchk(cudaMemcpy(d_keys1_input, h_keys1_input, N * sizeof(int), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(d_keys2_input, h_keys2_input, N * sizeof(int), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(d_input_values, h_input_values, N * sizeof(float), cudaMemcpyHostToDevice));
// --- From raw pointers to device_ptr
thrust::device_ptr<int> dev_ptr_keys1 = thrust::device_pointer_cast(d_keys1_input);
thrust::device_ptr<int> dev_ptr_keys2 = thrust::device_pointer_cast(d_keys2_input);
thrust::device_ptr<float> dev_ptr_values = thrust::device_pointer_cast(d_input_values);
// --- Declare outputs
thrust::device_vector<Tuple> d_keys_output(N);
thrust::device_vector<float> d_values_output(N);
thrust::pair<dIter1, dIter2> new_end;
auto begin = thrust::make_zip_iterator(thrust::make_tuple(dev_ptr_keys1, dev_ptr_keys2));
auto end = thrust::make_zip_iterator(thrust::make_tuple(dev_ptr_keys1 + N, dev_ptr_keys2 + N));
new_end = thrust::reduce_by_key(begin,
end,
dev_ptr_values,
d_keys_output.begin(),
d_values_output.begin(),
BinaryPredicate(),
thrust::plus<float>());
int Nkeys = new_end.first - d_keys_output.begin();
printf("\n\n");
for (int i = 0; i < Nkeys; i++) {
float output = d_values_output[i];
printf("%i; %f\n", i, output);
}
thrust::host_vector<Tuple> h_keys_output(d_keys_output);
printf("\n\n");
for (int i = 0; i < Nkeys; i++) {
int key1 = thrust::get<0>(h_keys_output[i]);
int key2 = thrust::get<1>(h_keys_output[i]);
printf("%i %i\n", key1, key2);
}
return 0;
}
如果按鍵排序,則需要* three *個矢量,其中兩個用於生成元組,另一個用於存儲元素以執行排序。 – talonmies
這不是一個排序問題,而是一個減少問題。當我做一個reduce_by_key時,它會產生一個錯誤的結果向量。根據你所說的我不知道我該如何解決這個問題。對不起我的英語不好 :) ! – user1413067