2016-01-28 52 views
0

所以我想加快我的Python腳本從文件加載數據並將其存儲在數組中。但是我意識到如果我重複1020次左右的程序,不要問我爲什麼會出現分段錯誤。用於加載數據的代碼是:Ctypes分段錯誤

import os,sys 
import numpy as np 
import pandas as pd 
import ctypes as ct 

VERSION = 0.1 

try: 
    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "_vec") 
except NameError: 
    path = "./_im7" 

if not(sys.platform in ('win32', 'cygwin')): 
    path += '.so.'+str(VERSION) 
    libfunctions = ct.cdll.LoadLibrary(path) 
else: 
    libfunctions = ct.cdll.LoadLibrary(path) 

libfunctions.readvec.restype = ct.c_void_p 
libfunctions.readvec.argtypes = [ct.c_char_p, np.ctypeslib.ndpointer(ct.c_float), \ 
    ct.c_int,ct.c_int,ct.c_int] 


def readvecCT(filename,nx,ny,nz): 
    # we are supposing for the moment that the naming scheme PIV__vxy.case PIV__vxy.geo not changes should that 
    # not be the case appropriate changes have to be made to the corresponding file 

    # ctypes 
    data_temp = np.zeros((2*ny*nx,1),dtype=np.dtype('f4')) 
    libfunctions.readvec(ct.c_char_p(filename),data_temp,ct.c_int(nx), ct.c_int(ny),ct.c_int(3)) 

    # W value 
    if (nz)>1: 
     return data_temp[:nx*ny].reshape(ny, nx), data_temp[nx*ny:2*nx*ny].reshape(ny, nx), data_temp[2*nx*ny:].reshape(ny, nx) 
    else:  
     return data_temp[:nx*ny].reshape(ny, nx), data_temp[nx*ny:].reshape(ny, nx)   

底層C函數:

#include <stdio.h> 


void readvec(const char *fname, float *data, int nx, int ny,int skiprows) { 
    //void cfun(const double * indata, int rowcount, int colcount, double * outdata) { 
    int i,j,check; 
    size_t length; 
    FILE *file; 
    char buffer[1024]; 
    char *buffer_ptr = &buffer[0]; 
    //puts("Here we go!"); 

    file = fopen(fname, "r"); 
    //printf("Nx: %d Ny: %d skiprows: %d \n",nx,ny,skiprows); 
    //printf("Filename %s \n",fname); 
    for (i=0;i<=skiprows;i++){ 
    check=getline(&buffer_ptr,&length,file); 
    //printf("buffer: %s \n",buffer); 
    if (check==-1){ 
     puts("ERROR"); 
    } 
    } 

    for (i = 0; i < ny; i++) { 
    for (j=0;j<nx;j++){ 
     check=fscanf(file,"%f",&data[i*nx+j]); 
     //printf("Data %s\n",buffer); 
     if (check==-1){ 
     puts("ERROR"); 
     } 
    } 
    } 

    for (i = 0; i < ny; i++) { 
    for (j=0;j<nx;j++){ 
     check=fscanf(file,"%f",&data[ny*nx+i*nx+j]); 
     if (check==-1){ 
     puts("ERROR"); 
     } 
    } 
    } 

    //fclose(file); 
    //puts("Done!"); 
} 

和一個簡單的測試,產生錯誤:

import time 
import numpy as np 
import libvec.libvec as vec 
import matplotlib.pyplot as plt 


tmp_geo = '/mnt/shared/projects/MORPHING/Users/jschelle/raw_treated_201509/RES_u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz/u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz_20150824_seq_01/ParaView/PIV__vxy.geo' 
tmp_file = '/mnt/shared/projects/MORPHING/Users/jschelle/raw_treated_201509/RES_u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz/u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz_20150824_seq_01/ParaView/PIV__vxy_01019.vec' 
tmp_case = '/mnt/shared/projects/MORPHING/Users/jschelle/raw_treated_201509/RES_u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz/u_8-ms-dp-4-mmce_sma_0-mm-mcf_0-hz_20150824_seq_01/ParaView/PIV__vxy.case' 

x,y,z = vec.readgeo(tmp_geo) 
nx = len(x) 
ny = len(y) 
nz = len(z) 
iterations = 1100 

start_time = time.time() 

for i in range(iterations): 
    U,V = vec.readvecCT(tmp_file,nx,ny,nz); 

print("Ctypes --- %f seconds ---" % ((time.time() - start_time)/iterations)) 

現在,當迭代次數是1000沒有問題發生,但在1100我得到分段錯誤。我想它與內存管理有關,但我不知道如何修復它,甚至不知道從哪裏開始!任何幫助將不勝感激。

THX很多提前

Ĵ

+0

您是否曾經在C函數中初始化'length'的值?看起來這個人的價值可能是不確定的。 – JCVanHamme

+0

我會在'check = fscanf(file,「%f」,&data [ny * nx + i * nx + j])''行上檢查索引'ny * nx + i * nx + j''。它是否低於'2 * ny * nx'? –

+0

您忘記初始化'size_t length = 1024',所以'getline'會在堆上重新分配''buffer_ptr',導致內存泄漏。無論哪種方式,如果地址已經改變,你需要'釋放'分配的內存,否則你有內存泄漏。但是,這不會導致段錯誤。 – eryksun

回答

1

事實證明:

fclose(file) 

有人評論因此這並不會直接導致一個錯誤,但在同一時間之後導致段錯誤n次迭代

非常感謝大家!