我正在用Cython重寫一些Python代碼。用cython聲明一個numpy數組奇怪地會產生很多開銷
繼建議in the documentation我開始與優化用Cython定義代替我的蟒蛇陣列。
特別地,以下被認爲是聲明一個numpy的陣列的「最好」的方式:
# cython: profile=True
# cython: boundscheck=False
# cython: wraparound=False
import numpy as np
cimport numpy as np
cpdef test():
cdef np.ndarray[np.int_t, ndim=1] seeds_idx = np.empty(10, dtype=np.int)
pass
然而,通過仿形經由cython -a my_file.pyx
上面的代碼生成的HTML文件顯示以下內容:
+10: cdef np.ndarray[np.int_t, ndim=1] seeds_idx = np.empty(10, dtype=np.int)
__pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_empty); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_3);
__pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_int); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_4);
__Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_4) < 0) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
__pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_tuple_, __pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_4);
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 10, __pyx_L1_error)
__pyx_t_5 = ((PyArrayObject *)__pyx_t_4);
{
__Pyx_BufFmt_StackElem __pyx_stack[1];
if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer, (PyObject*)__pyx_t_5, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
__pyx_v_seeds_idx = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer.buf = NULL;
__PYX_ERR(0, 10, __pyx_L1_error)
} else {__pyx_pybuffernd_seeds_idx.diminfo[0].strides = __pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_seeds_idx.diminfo[0].shape = __pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer.shape[0];
}
}
__pyx_t_5 = 0;
__pyx_v_seeds_idx = ((PyArrayObject *)__pyx_t_4);
__pyx_t_4 = 0;
/* … */
__pyx_tuple_ = PyTuple_Pack(1, __pyx_int_10); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_tuple_);
__Pyx_GIVEREF(__pyx_tuple_);
這是關於Python 2.7獲得具有用Cython 0.24和numpy的1.10.4。
在另一方面,很簡單的聲明seeds_idx = np.empty(10)
結果:
+10: seeds_idx = np.empty(10)
__pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_empty); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
__pyx_v_seeds_idx = __pyx_t_1;
__pyx_t_1 = 0;
/* … */
__pyx_tuple_ = PyTuple_Pack(1, __pyx_int_10); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_tuple_);
__Pyx_GIVEREF(__pyx_tuple_);
這是怎麼回事錯在這裏(如果有的話)?謝謝!
沒有什麼錯,numpy數組是複雜(但非常高效)的數據結構。你可以嘗試使用[typed memoryviews](http://docs.cython.org/src/userguide/memoryviews.html),它們通常更快,並且可以很容易地轉換爲numpy數組。 –
另一點值得注意的是,在分配數組中有__is__開銷。使用快速但分配它的數組可能會慢一點,所以儘量不要做不必要的事情。 – DavidW
我明白了,所以在聲明過程中有一個小的開銷,但是分配/訪問/等等要快得多。 – Gioker