2014-01-13 42 views
0

我有一個2d類型的問題,我解決了實現內核的問題。由於問題是2d,如果我可以使用列主矩陣排序來執行d_A[i][j]而不是d_A[i + m*j],那麼它在內核中的可讀性會更高。如果我只使用cudaMalloc,訪問d_A[i][j]將不起作用。我必須使用什麼功能?我非常感謝一個例子。在C++中,這通過分配2d內存來平均實現。 double** A = new double[10][10];如何在CUDA內核中訪問內存?例如d_A [i] [j]

cudaMallocPitch有什麼關係?或音高版本僅用於最大化2D對齊和合並內存訪問?

+1

這可能是最簡單的只是使用計算的指標。創建一個合適的動態分配的變量維度二維數組很複雜。如果你在編譯時知道尺寸,你可以使用編譯器來幫助使它易於管理[(3D例子)](http://stackoverflow.com/questions/12924155/sending-3d-array-to-cuda-kernel/ 12925014#12925014)。 'cudaMallocPitch'仍然只處理1D分配,但創建了用於最大化行對齊的傾斜存儲。 –

回答

3

你可以先定義與條紋矢量類支持,那麼運營商[]二維矩陣可以返回與條紋矢量正確設置。第二個[]實際上將從矢量中調用。這裏是一個例子:

#define _devhost_ __device__ __host__ 
typedef long SizeT; 

template<typename T> 
_devhost_ const T* pointer_offset(const T* ptr, SizeT offset) { 
    return reinterpret_cast<const T*>(
     reinterpret_cast<const uint8_t*>(ptr) + offset); 
} 

typedef enum { 
    NonConst = 0, 
    Const = 1, 
} ConstEnum; 

typedef enum { 
    NonOwner = 0, 
    Owner = 1, 
} OwnerEnum; 

// Strip is measured in the number of bytes. 
typedef enum { 
    NonStrip = 0, 
    Strip = 1, 
} StripEnum; 

template< 
    typename ValueType, typename Alloc, 
    ConstEnum IsConst = NonConst, 
    OwnerEnum IsOwner = NonOwner, 
    StripEnum HasStrip = NonStrip 
> class Vector; 

template< 
    typename ValueType, typename Alloc, 
    ConstEnum IsConst = NonConst, 
    OwnerEnum IsOwner = NonOwner 
> class DenseMatrix; 

template<typename ValueType, typename Alloc> 
class Vector<ValueType, Alloc, Const> { 
protected: 
    ValueType* ptr_; 
    SizeT len_; 

public: 
    _devhost_ Vector():ptr_(0), len_(0) {} 
    _devhost_ Vector(const ValueType* ptr, SizeT len) { 
    ptr_ = const_cast<ValueType*>(ptr); 
    len_ = len; 
    } 

    _devhost_ const ValueType& operator[] (SizeT i) const { 
    return ptr_[i]; 
    } 
    _devhost_ SizeT size() const {return len_;} 
    _devhost_ const ValueType* data() const {return ptr_;} 
}; 

template<typename ValueType, typename Alloc> 
class Vector<ValueType, Alloc, Const, NonOwner, Strip>: 
    public Vector<ValueType, Alloc, Const> { 

protected: 
    SizeT strip_; 
    typedef Vector<ValueType, Alloc, Const> Base; 

    // C++ independent names lookup will not look into base classes which 
    // are depended on template arguments. A "using" is required here. 
    using Base::ptr_; 
    using Base::len_; 

public: 
    _devhost_ Vector():strip_(sizeof(ValueType)) {} 
    _devhost_ Vector(const ValueType* ptr, SizeT len, 
     SizeT strip = sizeof(ValueType)):Base(ptr, len), strip_(strip) {} 

    _devhost_ const ValueType& operator[] (SizeT i) const { 
    return *pointer_offset(ptr_, i * strip_); 
    } 

    // NOTE: size() and data() still valid, 
    // but may not make the right sense here in the presence of stripe. 
}; 

template<typename ValueType, typename Alloc> 
class DenseMatrix<ValueType, Alloc, Const> { 
protected: 
    ValueType* vals_; 
    SizeT nrows_, ncols_; 

public: 
    _devhost_ DenseMatrix() {vals_ = 0; nrows_ = 0; ncols_ = 0;} 
    _devhost_ DenseMatrix(const ValueType* vals, SizeT n_rows, SizeT n_cols) { 
    nrows_ = n_rows; ncols_ = n_cols; 
    vals_ = const_cast<ValueType*>(vals_); 
    } 

    _devhost_ SizeT num_rows() const {return nrows_;} 
    _devhost_ SizeT num_cols() const {return ncols_;} 
    _devhost_ SizeT numel() const {return nrows_ * ncols_;} 

    _devhost_ const ValueType* data() const {return vals_;} 
    _devhost_ const ValueType& at(SizeT irow, SizeT icol) const { 
    return vals_[irow + icol * nrows_]; 
    } 

    typedef Vector<ValueType, Alloc, Const, NonOwner, Strip> ConstIndexer; 

    _devhost_ ConstIndexer operator[] (SizeT irow) const { 
    return ConstIndexer(vals_ + irow, ncols_, nrows_ * sizeof(ValueType)); 
    } 

    _devhost_ DenseMatrix<ValueType, Alloc, Const> get_cols(SizeT icol, 
     SizeT n_cols) const { 
    return DenseMatrix<ValueType, Alloc, Const>(vals_ + icol * nrows_, 
     nrows_, n_cols); 
    } 

    _devhost_ Vector<ValueType, Alloc, Const> get_col(SizeT icol) const { 
    return Vector<ValueType, Alloc, Const>(vals_ + icol * nrows_, nrows_); 
    } 
}; 
1

我只想用一個定義語句,如果您關注的是可讀性

#define A(i,j) d_A[i + m*j]