我需要對元組數組進行排序,所以我爲元組定義了一個運算符並使用thrust::sort
進行排序。元組上的推力排序非常緩慢
所以我發現,對數組進行排序顯然比對數組排序要慢得多。這裏是我的代碼:
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/set_operations.h>
#include <thrust/reduce.h>
#include <thrust/unique.h>
#include <thrust/binary_search.h>
#include <thrust/gather.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/sort.h>
#include <thrust/execution_policy.h>
#include <iostream>
static const int size = 100000;
#define mzi(x) thrust::make_zip_iterator(x)
#define mt(...) thrust::make_tuple(__VA_ARGS__)
typedef thrust::tuple<int, int> IntTuple;
typedef thrust::device_vector<IntTuple>::iterator TupleIterator;
typedef thrust::device_vector<int>::iterator IntIterator;
typedef thrust::tuple<IntIterator, IntIterator> IteratorTuple;
typedef thrust::zip_iterator<IteratorTuple> ZipIterator;
struct TupleComp
{
__host__ __device__
bool operator()(const IntTuple& t1, const IntTuple& t2)
{
return t1.get<0>() != t2.get<0>() ? t1.get<0>() < t2.get<0>() : t1.get<1>() > t2.get<1>();
}
};
int main()
{
timespec start;
clock_gettime(0, &start);
thrust::device_vector<int> dataA1(size);
thrust::device_vector<int> dataA2(size);
thrust::device_vector<int> dataB1(size);
thrust::device_vector<int> dataB2(size);
srand(time(NULL));
for (int i = 0; i < size; i++)
{
//dataA[i] = dataA[i - 1] + (rand() % 100);
dataA1[i] = (rand() % 100);
dataA2[i] = (rand() % 100);
dataB1[i] = (rand() % 100);
dataB2[i] = (rand() % 100);
std::cout << dataA1[i] << "\t" << dataA2[i] << "\t" << dataB1[i] << "\t" << dataB2[i];
std::cout << std::endl;
}
timespec end;
clock_gettime(0, &end);
std::cout << "gendb took: " << end.tv_sec - start.tv_sec << "s" << end.tv_nsec - start.tv_nsec << "ns" << std::endl;
ZipIterator beginA = mzi(mt(dataA1.begin(), dataA2.begin()));
ZipIterator beginB = mzi(mt(dataB1.begin(), dataB2.begin()));
ZipIterator endA = mzi(mt(dataA1.end(), dataA2.end()));
ZipIterator endB = mzi(mt(dataB1.end(), dataB2.end()));
thrust::device_vector<IntTuple> A(size);
thrust::device_vector<IntTuple> B(size);
clock_gettime(0, &start);
thrust::copy(beginA, endA, A.begin());
thrust::copy(beginB, endB, B.begin());
clock_gettime(0, &end);
std::cout << "thrust::copy took: " << end.tv_sec - start.tv_sec << "s" << end.tv_nsec - start.tv_nsec << "ns" << std::endl;
clock_gettime(0, &start);
thrust::sort(A.begin(), A.end());
clock_gettime(0, &end);
std::cout << "A thrust::sort took: " << end.tv_sec - start.tv_sec << "s" << end.tv_nsec - start.tv_nsec << "ns" << std::endl;
clock_gettime(0, &start);
thrust::sort(B.begin(), B.end(), TupleComp());
clock_gettime(0, &end);
std::cout << "B thrust::sort took: " << end.tv_sec - start.tv_sec << "s" << end.tv_nsec - start.tv_nsec << "ns" << std::endl;
clock_gettime(0, &start);
thrust::sort(dataA1.begin(), dataA1.end());
clock_gettime(0, &end);
std::cout << "regular thrust::sort took: " << end.tv_sec - start.tv_sec << "s" << end.tv_nsec - start.tv_nsec << "ns" << std::endl;
clock_gettime(0, &start);
thrust::sort(beginA, endA, TupleComp());
thrust::sort(beginB, endB, TupleComp());
clock_gettime(0, &end);
std::cout << "thrust::sort took: " << end.tv_sec - start.tv_sec << "s" << end.tv_nsec - start.tv_nsec << "ns" << std::endl;
}
我理解的元組排序是〜10X比普通排序慢倍。
我不明白爲什麼。運營商的類型複雜程度是否直接受到運營商的影響?儘管如此,我的操作員並不比常規比較器慢10倍。
注: 這不只是10倍倍慢: 100000這是〜10倍速度較慢 達到1000000這是〜20倍速度較慢
我還發現,應對兩個數組到元組的數組和排序數組相反,它的速度快了150%,而thrust :: copy幾乎沒有任何效果(0.3爲1M)。
注2:
我改變了我的運營商這樣的:
struct TupleComp
{
__host__ __device__
bool operator()(const IntTuple& t1, const IntTuple& t2)
{
if(t1.get<0>() < t2.get<0>())
return true;
if(t1.get<0>() > t2.get<0>())
return false;
return t1.get<1>() > t2.get<1>();
}
};
現在的排序是約112.5%的速度,這可能是因爲equals
上的第一個值是很少發生的,這一般來說,檢查運營商的方式較少。
比較似乎不公平,因爲他們不使用相同的順序邏輯,也沒有相同的數據... – Jarod42
爲什麼不呢?這個結果是一些測試的平均值。他們中沒有一個依靠技巧或一些確切的排列順序,都是隨機的和公平的。改變比較器的確增加了平均速度,爲什麼它不公平? – Vladp
如果我理解正確,那麼是的,我做了各種各樣的排序,但在我寫的結果中,我只比較了排序(zip vs tuple,mycomp),這種公平的比較,我也評論了其他代碼以查看分別進行分類。我發佈的代碼立即擁有我的所有測試。 – Vladp