2017-01-17 195 views
3

我有一個this question的後續行動。在R中,您可以在輔助鍵之後訂購重複項。我如何在Rcpp中實現這個目標?我正在嘗試以下內容(受Kevin Ushey對原始問題的回答的啓發),這似乎適用於簡單情況。我怎麼能概括這個?rcpp排列順序

// [[Rcpp::export]] 
Rcpp::IntegerVector order_(Rcpp::NumericVector x, Rcpp::NumericVector y) { 
    // sort x 
    Rcpp::NumericVector sorted_x = clone(x).sort(); 
    // find order of x only 
    Rcpp::IntegerVector order = Rcpp::match(sorted_x, x); 
    // find order of y for each order of x 
    for (int i = 1; i <= Rcpp::max(order); ++i){ 
    Rcpp::LogicalVector duplicates = order == i; 
    int k = which_max(duplicates); 
    Rcpp::NumericVector duplicated_y = y[duplicates]; 
    Rcpp::NumericVector sorted_dy = clone(duplicated_y).sort(); 
    Rcpp::IntegerVector order_y = Rcpp::match(sorted_dy, duplicated_y); 
    for (int j = 0; j < order_y.length(); ++j){ 
     order(k + j) = order(k + j) + order_y(j) - 1; 
    } 
    } 
    return order; 
} 
+1

你不侷限於RCPP類型。我幾乎可以肯定,Boost或C++標準庫將有一個'次級密鑰訂單'選項,所以我會尋找並使用它。 _I.e._ [這是一個老問題的答案](http://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes)顯示C++ 11 lambda的使用,它可以讓你插入自定義比較器。 –

回答

3

如果我明白你問什麼,一個辦法是充分利用comparison operators of std::tuple,應該「做正確的事」。

// [[Rcpp::plugins(cpp11)]] 
#include <Rcpp.h> 
#include <tuple> 

template <typename... Ts> 
struct Point { 
    std::size_t index; 

    using tuple_t = std::tuple<Ts...>; 
    tuple_t data; 

    template <typename... Vs> 
    Point(std::size_t i, const Vs&... vs) 
     : index(i), 
      data(std::forward<decltype(vs[i])>(vs[i])...) 
    {} 

    bool operator<(const Point& other) const { 
     return data < other.data; 
    } 
}; 

template <typename... Ts> 
class PointVector { 
public: 
    std::size_t sz; 
    using vector_t = std::vector<Point<Ts...>>; 
    vector_t data; 

    template <typename... Vs> 
    PointVector(const Vs&... vs) 
     : sz(min_size(vs...)) 
    { 
     data.reserve(sz); 
     for (std::size_t i = 0; i < sz; i++) { 
      data.emplace_back(i, vs...); 
     } 
    } 

    Rcpp::IntegerVector sorted_index() const { 
     vector_t tmp(data); 
     std::stable_sort(tmp.begin(), tmp.end()); 

     Rcpp::IntegerVector res(sz); 
     for (std::size_t i = 0; i < sz; i++) { 
      res[i] = tmp[i].index + 1; 
     } 

     return res; 
    } 

private: 
    template <typename V> 
    std::size_t min_size(const V& v) { 
     return v.size(); 
    } 

    template <typename T, typename S, typename... Vs> 
    std::size_t min_size(const T& t, const S& s, const Vs&... vs) { 
     return t.size() < s.size() ? 
      min_size(t, vs...) : 
      min_size(s, vs...); 
    } 
}; 

using namespace Rcpp; 

// [[Rcpp::export]] 
IntegerVector order2(NumericVector x, NumericVector y) { 
    PointVector<double, double> pv(x, y); 
    return pv.sorted_index(); 
} 

// [[Rcpp::export]] 
IntegerVector order3(NumericVector x, NumericVector y, NumericVector z) { 
    PointVector<double, double, double> pv(x, y, z); 
    return pv.sorted_index(); 
} 

使用此數據來證明,

x <- rep(1:2 + 0.5, 10) 
y <- rep(1:4 + 0.5, 5) 
z <- rep(1:5 + 0.5, 4) 
(df <- data.frame(x, y, z)) 
#  x y z 
# 1 1.5 1.5 1.5 
# 2 2.5 2.5 2.5 
# 3 1.5 3.5 3.5 
# 4 2.5 4.5 4.5 
# 5 1.5 1.5 5.5 
# 6 2.5 2.5 1.5 
# 7 1.5 3.5 2.5 
# 8 2.5 4.5 3.5 
# 9 1.5 1.5 4.5 
# 10 2.5 2.5 5.5 
# 11 1.5 3.5 1.5 
# 12 2.5 4.5 2.5 
# 13 1.5 1.5 3.5 
# 14 2.5 2.5 4.5 
# 15 1.5 3.5 5.5 
# 16 2.5 4.5 1.5 
# 17 1.5 1.5 2.5 
# 18 2.5 2.5 3.5 
# 19 1.5 3.5 4.5 
# 20 2.5 4.5 5.5 

C++版本產生了相同的結果基礎R當量:

all.equal(base::order(x, y, z), order3(x, y, z)) 
# [1] TRUE 

df[order3(x, y, z),] 
#  x y z 
# 1 1.5 1.5 1.5 
# 17 1.5 1.5 2.5 
# 13 1.5 1.5 3.5 
# 9 1.5 1.5 4.5 
# 5 1.5 1.5 5.5 
# 11 1.5 3.5 1.5 
# 7 1.5 3.5 2.5 
# 3 1.5 3.5 3.5 
# 19 1.5 3.5 4.5 
# 15 1.5 3.5 5.5 
# 6 2.5 2.5 1.5 
# 2 2.5 2.5 2.5 
# 18 2.5 2.5 3.5 
# 14 2.5 2.5 4.5 
# 10 2.5 2.5 5.5 
# 16 2.5 4.5 1.5 
# 12 2.5 4.5 2.5 
# 8 2.5 4.5 3.5 
# 4 2.5 4.5 4.5 
# 20 2.5 4.5 5.5 

編輯:這是一個稍微更可口的版本,它允許你寫auto pv = MakePointVector(...),而不是指定所有類型的參數:

// Point class as before 

template <typename... Vecs> 
class PointVector { 
public: 
    std::size_t sz; 
    using point_t = Point<typename std::remove_reference<decltype(Vecs{}[0])>::type...>; 
    using vector_t = std::vector<point_t>; 
    vector_t data; 

    template <typename... Vs> 
    PointVector(const Vecs&... vs) 
     : sz(min_size(vs...)) 
    { 
     data.reserve(sz); 
     for (std::size_t i = 0; i < sz; i++) { 
      data.emplace_back(i, vs...); 
     } 
    } 

    Rcpp::IntegerVector sorted_index() const { 
     vector_t tmp(data); 
     std::stable_sort(tmp.begin(), tmp.end()); 

     Rcpp::IntegerVector res(sz); 
     for (std::size_t i = 0; i < sz; i++) { 
      res[i] = tmp[i].index + 1; 
     } 

     return res; 
    } 

private: 
    template <typename V> 
    std::size_t min_size(const V& v) { 
     return v.size(); 
    } 

    template <typename T, typename S, typename... Vs> 
    std::size_t min_size(const T& t, const S& s, const Vs&... vs) { 
     return t.size() < s.size() ? 
      min_size(t, vs...) : 
      min_size(s, vs...); 
    } 
}; 

template <typename... Vecs> 
PointVector<Vecs...> MakePointVector(const Vecs&... vecs) { 
    return PointVector<Vecs...>(vecs...); 
} 

using namespace Rcpp; 

// [[Rcpp::export]] 
IntegerVector order2(NumericVector x, NumericVector y) { 
    auto pv = MakePointVector(x, y); 
    return pv.sorted_index(); 
} 

// [[Rcpp::export]] 
IntegerVector order3(NumericVector x, NumericVector y, NumericVector z) { 
    auto pv = MakePointVector(x, y, z); 
    return pv.sorted_index(); 
} 

/*** R 

all.equal(base::order(x, y), order2(x, y)) 
# [1] TRUE 

all.equal(base::order(x, y, z), order3(x, y, z)) 
# [1] TRUE 

*/