2012-05-05 37 views
2

我有一堆numpy數組作爲python對象的數組的屬性,在cython中,準備prange處理(這需要nogil),我想創建一個內存視圖,該視圖是「間接「在第一維,其進一步的維度引用numpy數組中的數據。所以假設,objects是一個對象列表,它有vector屬性。從numpy數組中組裝一個cython memoryview

我想要做的事,如:

cdef double[ ::cython.view.indirect, ::1 ] vectors 
for object in objects: 
    vectors[ i ] = object.vector 

但我應該怎麼初始化「載體」,使這個可能嗎?如果有可能呢?或者,也許一個內存視圖只允許成爲一個對象的內存視圖...在這種情況下還有另外一個問題 - 如何動態地創建一個內存視圖數組?

+0

其實 - 我現在相信,這是不可能的。目前,我正在使用'double *'數組,並使用來自每個向量的'data'和'stride'信息。不過,我擔心這是否是線程安全的。 – shaunc

回答

3

有了下面的代碼,你可以使用這個任務:

cimport stackoverflow_contrib 

cdef double[::cython.view.indirect, ::1] vectors = 
    stackoverflow_contrib.OnceIndirect([object.vector for object in objects]) 

其中stackoverflow_contrib.pyx如下:

from libc.stdlib cimport malloc, free 
from libc.string cimport strcmp 

from cython.view cimport memoryview 
from cpython cimport buffer 

cdef class OnceIndirect: 
    cdef object _objects 
    cdef void** buf 
    cdef int ndim 
    cdef int n_rows 
    cdef int buf_len 
    cdef Py_ssize_t* shape 
    cdef Py_ssize_t* strides 
    cdef Py_ssize_t* suboffsets 
    cdef Py_ssize_t itemsize 
    cdef bytes format 
    cdef int is_readonly 

    def __cinit__(self, object rows, want_writable=True, want_format=True, allow_indirect=False): 
     """ 
     Set want_writable to False if you don't want writable data. (This may 
     prevent copies.) 
     Set want_format to False if your input doesn't support PyBUF_FORMAT (unlikely) 
     Set allow_indirect to True if you are ok with the memoryview being indirect 
     in dimensions other than the first. (This may prevent copies.) 
     """ 
     demand = buffer.PyBUF_INDIRECT if allow_indirect else buffer.PyBUF_STRIDES 
     if want_writable: 
      demand |= buffer.PyBUF_WRITABLE 
     if want_format: 
      demand |= buffer.PyBUF_FORMAT 
     self._objects = [memoryview(row, demand) for row in rows] 
     self.n_rows = len(self._objects) 
     self.buf_len = sizeof(void*) * self.n_rows 
     self.buf = <void**>malloc(self.buf_len) 
     self.ndim = 1 + self._objects[0].ndim 
     self.shape = <Py_ssize_t*>malloc(sizeof(Py_ssize_t) * self.ndim) 
     self.strides = <Py_ssize_t*>malloc(sizeof(Py_ssize_t) * self.ndim) 
     self.suboffsets = <Py_ssize_t*>malloc(sizeof(Py_ssize_t) * self.ndim) 

     cdef memoryview example_obj = self._objects[0] 
     self.itemsize = example_obj.itemsize 

     if want_format: 
      self.format = example_obj.view.format 
     else: 
      self.format = None 
     self.is_readonly |= example_obj.view.readonly 

     for dim in range(self.ndim): 
      if dim == 0: 
       self.shape[dim] = self.n_rows 
       self.strides[dim] = sizeof(void*) 
       self.suboffsets[dim] = 0 
      else: 
       self.shape[dim] = example_obj.view.shape[dim - 1] 
       self.strides[dim] = example_obj.view.strides[dim - 1] 
       if example_obj.view.suboffsets == NULL: 
        self.suboffsets[dim] = -1 
       else: 
        self.suboffsets[dim] = example_obj.suboffsets[dim - 1] 

     cdef memoryview obj 
     cdef int i = 0 
     for obj in self._objects: 
      assert_similar(example_obj, obj) 
      self.buf[i] = obj.view.buf 
      i += 1 

    def __getbuffer__(self, Py_buffer* buff, int flags): 
     if (flags & buffer.PyBUF_INDIRECT) != buffer.PyBUF_INDIRECT: 
      raise Exception("don't want to copy data") 
     if flags & buffer.PyBUF_WRITABLE and self.is_readonly: 
      raise Exception("couldn't provide writable, you should have demanded it earlier") 
     if flags & buffer.PyBUF_FORMAT: 
      if self.format is None: 
       raise Exception("couldn't provide format, you should have demanded it earlier") 
      buff.format = self.format 
     else: 
      buff.format = NULL 

     buff.buf = <void*>self.buf 
     buff.obj = self 
     buff.len = self.buf_len 
     buff.readonly = self.is_readonly 
     buff.ndim = self.ndim 
     buff.shape = self.shape 
     buff.strides = self.strides 
     buff.suboffsets = self.suboffsets 
     buff.itemsize = self.itemsize 
     buff.internal = NULL 

    def __dealloc__(self): 
     free(self.buf) 
     free(self.shape) 
     free(self.strides) 
     free(self.suboffsets) 

cdef int assert_similar(memoryview left_, memoryview right_) except -1: 
    cdef Py_buffer left = left_.view 
    cdef Py_buffer right = right_.view 
    assert left.ndim == right.ndim 
    cdef int i 
    for i in range(left.ndim): 
     assert left.shape[i] == right.shape[i], (left_.shape, right_.shape) 
     assert left.strides[i] == right.strides[i], (left_.strides, right_.strides) 

    if left.suboffsets == NULL: 
     assert right.suboffsets == NULL, (left_.suboffsets, right_.suboffsets) 
    else: 
     for i in range(left.ndim): 
      assert left.suboffsets[i] == right.suboffsets[i], (left_.suboffsets, right_.suboffsets) 

    if left.format == NULL: 
     assert right.format == NULL, (bytes(left.format), bytes(right.format)) 
    else: 
     #alternatively, compare as Python strings: 
     #assert bytes(left.format) == bytes(right.format) 
     assert strcmp(left.format, right.format) == 0, (bytes(left.format), bytes(right.format)) 
    return 0 

from cython cimport view 

cimport numpy as np 
import numpy as np 

def show_memoryview(object x): 
    print dict(shape=x.shape, strides=x.strides, suboffsets=x.suboffsets, itemsize=x.itemsize) 

def go(): 
    row0 = np.array(range(20), dtype=np.float64).reshape(2, 10) 
    row1 = np.array(range(20, 40), dtype=np.float64).reshape(2, 10) 
    row2 = np.array(range(40, 60), dtype=np.float64).reshape(2, 10) 
    small_view = memoryview(row0, buffer.PyBUF_STRIDES) 
    show_memoryview(small_view) 
    rows = [row0, row1, row2] 
     big_view = OnceIndirect(rows) 
    cdef double[::view.indirect, :, :] big_view2 = big_view 
    cdef int i, j, k 
    show_memoryview(big_view2) 
    print row1 
    big_view2[1, 0, 1] += 200 
    print row1 
    cdef double[:, :] row1_view = big_view2[1] 
    assert row1_view[0, 1] >= 200 
    cdef double[::view.indirect, :, :] big_view3 = OnceIndirect([row0, row1, row0]) 
    cdef double[::view.indirect, ::view.indirect, :, :] dub = OnceIndirect([big_view2, big_view3], allow_indirect=True) 
    show_memoryview(dub) 
     # big_view2 can be indexed and sliced in Cython and Python code 
     # note big_view2 is a cython memoryview object not a OnceIndirect object because it was implicitly cast to one 
     # rows, big_view, big_view2 all refer to the same data! 
    return (rows, big_view, big_view2) 
+0

你能詳細說一下嗎?對代碼進行高層次的概述會比較有幫助,而不必逐行進行。 – kynan

+0

嗨kynan,'OnceIndirect'類定義了'__getbuffer__'方法,它在Cython中意味着OnceIndirect的行爲類似於memoryview。看到這些例子:https://github.com/cython/cython/blob/e8e96dfcb3f5218ad33c0331720cd02790879227/tests/buffers/buffer.pyx https://github.com/cython/cython/blob/cbc0665c95ff461df9d1e1​​066da7628d640547c2/Cython/Includes/numpy.pxd 。 '__getbuffer__'函數填充一個'Py_buffer'對象,告訴Cython如何訪問數據。有關這些字段的詳細說明,請參閱http://docs.python.org/3.3/c-api/buffer.html。 – r3m0t

+0

是否有一個特別的原因,你顯式'malloc' /'free'你的內部數據結構,而不是使用管理的方式分配內存?我問Wrt [這個問題](http://stackoverflow.com/questions/18462785/what-is-the-recommended-way-of-allocating-memory-for-a-typed-memory-view)我問爲類型化內存視圖尋找分配內存的最佳方式。 – kynan