在Linux下的命令行中運行我的python腳本時,出現了分段錯誤 - 可能是由長循環造成的。我確切地知道其中的問題是,但我不知道爲什麼。我嘗試了一些我在網上搜索的方法,包括這個網站,但我仍然無法解決它。所以,請幫助我 - 預先感謝您。這裏如下一些代碼:在.so文件中使用C模塊時出現分段錯誤
analyzer.py
,其中該程序開始:
from classify import BayesClassifier
class Analyzer:
def __init__(self):
self.classify = BayesClassifier('/home/user/yakamoz/srcanalyzer/classification/training.tab')
if __name__ == '__main__':
a = Analyzer()
# the following is a string of Chinese character, which, I am sure,
# has no influence on the Segmentation fault, you can just suppose
# it as a paragraph in English.
text = "市委常委、紀委書記楊娟高度評價我縣基層黨風廉政建設:\
務實創新成效顯著作者:縣紀委辦公室發佈時間:11月27日下午,\
市委常委、紀委書記楊娟率領市紀委副書記蔣玉平、王友富,\
市紀委常委、祕書長任斌,市紀委機關黨委書記劉林建一行來我\
縣調研基層黨風廉政建設。調研中,楊娟高度評價我縣基層黨風廉政建設,\
認爲工作務實創新,成效顯著。縣委書記陳朝先,縣委副書記季代雙,縣委常委、\
紀委書記韓忠明陪同調研。楊娟一行先後來到兩河鎮、西部花都、兩江廣場、\
工業園區等地實地調研我縣基層黨風廉政建設,檢閱我縣「兩化」互動、「三化」\
聯動發展成果。查閱相關資料在兩河鎮,楊娟認真聽取了兩河片區紀工委\
日常工作開展情況的彙報,仔細翻閱了巡查工作日記和接訪記錄。楊娟指出,\
設置鄉鎮片區紀工委是加強基層紀檢組織建設的創新舉措。\
鹽亭在全市率先設置、運行紀工委以來,在化解農村信訪矛盾,理順羣衆情緒,\
強化基層辦案工作等方面取得了明顯成效。她要求,要總結提煉片區紀工委的經驗,\
進一步明確職能職責,在機構設置、人員配備、制度建設等方面進行探索實踐,\
爲全市基層紀檢組織建設提供有益經驗借鑑。楊娟還饒有興趣地參觀了兩河鎮\
的機關廉政文化建設"
print str(a.classify.classify_text(text)[0])
classify.py
;該文件是由analyzer.py
使用的,上面給出:
# -*- coding:utf-8 -*-
from match import WordMatch
import cPickle
import math
class BayesClassifier:
__trainingdata = {}
__classifywordscount = {}
__classifydoccount = {}
def __init__(self, table_name):
self.trainingtable = cPickle.load(open(table_name, 'r'))
for x in self.trainingtable:
self.train(x[1], x[0])
print 'training finished'
self.matrix = self.get_matrix()
self.vector_count = len(self.matrix)
self.doc_count = len(self.trainingtable)
self.match = WordMatch(self.matrix)
def get_matrix(self):
matrix = {}
for x in self.trainingtable:
for k in x[0]:
matrix[k] = 0
return matrix
def doc_to_vector(self, content):
matrix = {word:value for (word, value) in self.match.find(content).items()}
return matrix
def train(self, cls, vector):
if cls not in self.__trainingdata:
self.__trainingdata[cls] = {}
if cls not in self.__classifywordscount:
self.__classifywordscount[cls] = 0
if cls not in self.__classifydoccount:
self.__classifydoccount[cls] = 0
self.__classifydoccount[cls] += 1
for word in vector.keys():
self.__classifywordscount[cls] += vector[word]
if word not in self.__trainingdata[cls]:
self.__trainingdata[cls][word] = vector[word]
else:
self.__trainingdata[cls][word] += vector[word]
def classify_text(self, content):
t = -1 << 32
res = "unknown classification"
for cls in self.__trainingdata.keys():
prob = self.__count(cls, self.doc_to_vector(content))
if prob > t:
res = cls
t = prob
return res, t
match.py
;這段代碼是由classify.py
# -*- coding:utf-8 -*-
import os
import re
import util.ahocorasick.x64 as ahocorasick
# util.ahocorasick.x64 is a folder where .so file locates
class WordMatch(object):
def __init__(self, arg):
self.__tree = ahocorasick.KeywordTree()
if isinstance(arg, (list, dict)):
for item in arg:
if item:
self.__tree.add(item)
elif isinstance(arg, basestring):
if os.path.isfile(arg):
fp = open(arg)
for line in fp:
line = line.strip()
if line:
self.__tree.add(line)
fp.close()
else:
print 'the path of the input file does not exist'
return
else:
print 'parameter fault'
return
self.__tree.make()
def _findall(self, content):
'''return the list of keywords that is found
'''
hit_list = []
if isinstance(content, basestring):
for start, end in self.__tree.findall(content):
if len(content[start:end]):
hit_list.append(content[start:end])
else:
print 'AC automation requires string '
return hit_list
def find(self, content):
'''return those matched keywords and the corresponding count
'''
hit_list = self._findall(content)
mydict = {}
for item in hit_list:
if item in mydict:
mydict[item] += 1
else:
mydict[item] = 1
return mydict
__init__.py
引用,該文件夾下util.ahocorasick.x64
:
import _ahocorasick
__all__ = ['KeywordTree']
# A high level version of the keyword tree. Most of the methods here
# are just delegated over to the underlying C KeywordTree
#(in the .so file, which is not shown here).
class KeywordTree(object):
def __init__(self):
self.__tree = _ahocorasick.KeywordTree();
def add(self, s):
return self.__tree.add(s)
def make(self):
return self.__tree.make()
def zerostate(self):
return self.__tree.zerostate()
##### !! I found this is where the segmentation fault occurs
def __findall_helper(self, sourceBlock, allow_overlaps, search_function):
"""Helper function that captures the common logic behind the
two findall methods."""
startpos = 0
startstate = self.zerostate()
loop_times = 0
while True:
#print spot_1
match = search_function(sourceBlock, startpos, startstate)
#print spot_2
if not match:
break
yield match[0:2]
startpos = match[1]
if allow_overlaps: #which in my case is always false
startstate = match[2]
else:
loop_times = loop_times + 1
#print spot_3
startstate = self.zerostate()
#print spot_4
#print loop_times
def findall(self, sourceBlock, allow_overlaps=0):
return self.__findall_helper(sourceBlock, allow_overlaps,self.__tree.search)
我給出不同的結果困惑:我已經找到了問題所在在3 __init__.py
或更確切地說,__findall_helper(self, sourceBlock, akkow_overlaps, search_function)
。
通過取消註釋以下注釋之一:
#print spot_1
#print spot_2
#print spot_4
一個可以消除分段錯誤和環路是有限的(匹配可以是None
),但通過取消註釋#print spot_3
,人們可以不(它似乎是一個無限循環)。這裏是我的問題:
print
語句在python中有副作用嗎?我發現只有在上面提到的三個點(spot_1
或spot_2
或spot_4
)之一中有一個print
聲明可以排除故障。順便說一句,我偶然發現這個,起初沒有print
。
這裏是backtrace
使用gdb
。
(gdb) r analyzer.py
Starting program: /usr/local/bin/python analyzer.py
[Thread debugging using libthread_db enabled]
Detaching after fork from child process 11499.
training finished
Program received signal SIGSEGV, Segmentation fault.
0x00007ffff178956d in ahocorasick_KeywordTree_search_helper (state=0x85c730,
string=0x8967d4 "【中國環保在線 市場行情】「我國將在2016年啓動全國碳市場。全國碳交 易市場的首批行業企業將由電力、冶金、有色、建材、化工5個傳統制造業和航", <incomplete sequence \347\251>..., n=140733193395828, startpos=118366835,
out_start=0x7fffffffd468, out_end=0x7fffffffd460, out_last_state=0x7fffffffd458) at aho-corasick.c:216
216 aho-corasick.c: No such file or directory.
in aho-corasick.c
Missing separate debuginfos, use: debuginfo-install glibc-2.12- 1.149.el6.x86_64
(gdb) bt
#0 0x00007ffff178956d in ahocorasick_KeywordTree_search_helper (state=0x85c730, string=0x8967d4 "【中國環保在線 市場行情】「我國將在2016年啓動全國碳市場。全國碳交 易市場的首批行業企業將由電力、冶金、有色、建材、化工5個傳統制造業和航", <incomplete sequence \347\251>..., n=140733193395828, startpos=118366835, out_start=0x7fffffffd468, out_end=0x7fffffffd460, out_last_state=0x7fffffffd458) at aho-corasick.c:216
#1 0x00007ffff178a2b1 in ahocorasick_KeywordTree_basesearch (self=0x7ffff7f6c230, args=0x7ffff0ca1a50, kwargs=0x0, helper=0x7ffff1789525<ahocorasick_KeywordTree_search_helper>) at py_wrapper.c:190
#2 0x00007ffff178a358 in ahocorasick_KeywordTree_search (self=0x7ffff7f6c230, args=0x7ffff0ca1a50, kwargs=0x0) at py_wrapper.c:212
#3 0x00000000004a7103 in call_function (f=<value optimized out>, throwflag=<value optimized out>) at Python/ceval.c:4013
#4 PyEval_EvalFrameEx (f=<value optimized out>, throwflag=<value optimized out>) at Python/ceval.c:2666
#5 0x0000000000507e8d in gen_send_ex (gen=0x7904640, arg=0x0, exc=<value optimized out>) at Objects/genobject.c:84
#6 0x00000000004a25da in PyEval_EvalFrameEx (f=<value optimized out>, throwflag=<value optimized out>) at Python/ceval.c:2497
#7 0x00000000004a805b in fast_function (f=<value optimized out>, throwflag=<value optimized out>) at Python/ceval.c:4099
#8 call_function (f=<value optimized out>, throwflag=<value optimized out>) at Python/ceval.c:4034
#9 PyEval_EvalFrameEx (f=<value optimized out>, throwflag=<value optimized out>) at Python/ceval.c:2666
#10 0x00000000004a8bd7 in PyEval_EvalCodeEx (co=0x7ffff1ff54b0, globals=<value optimized out>, locals=<value optimized out>, args=<value optimized out>, argcount=3, kws=0x9984520, kwcount=0, defs=0x7ffff2016968, defcount=1, closure=0x0) at Python/ceval.c:3253
#11 0x00000000004a6dce in fast_function (f=<value optimized out>, throwflag=<value optimized out>) at Python/ceval.c:4109
#12 call_function (f=<value optimized out>, throwflag=<value optimized out>) at Python/ceval.c:4034
#13 PyEval_EvalFrameEx (f=<value optimized out>, throwflag=<value optimized out>) at Python/ceval.c:2666
#14 0x00000000004a805b in fast_function (f=<value optimized out>, throwflag=<value optimized out>) at Python/ceval.c:4099
#15 call_function (f=<value optimized out>, throwflag=<value optimized out>) at Python/ceval.c:4034
#16 PyEval_EvalFrameEx (f=<value optimized out>, throwflag=<value optimized out>) at Python/ceval.c:2666
#17 0x00000000004a8bd7 in PyEval_EvalCodeEx (co=0x7ffff7ec4130, globals=<value optimized out>, locals=<value optimized out>, args=<value optimized out>, argcount=0, kws=0x0, kwcount=0, defs=0x0, defcount=0, closure=0x0) at Python/ceval.c:3253
#18 0x00000000004a8ce2 in PyEval_EvalCode (co=<value optimized out>, globals=<value optimized out>, locals=<value optimized out>) at Python/ceval.c:667
#19 0x00000000004c91fe in run_mod (fp=0x880ee0, filename=0x7fffffffe30c "analyzer.py", start=<value optimized out>, globals=0x7fc140, locals=0x7fc140, closeit=1, flags=0x7fffffffdea0) at Python/pythonrun.c:1346
#20 PyRun_FileExFlags (fp=0x880ee0, filename=0x7fffffffe30c "analyzer.py", start=<value optimized out>, globals=0x7fc140, locals=0x7fc140, closeit=1,flags=0x7fffffffdea0) at Python/pythonrun.c:1332
#21 0x00000000004c9414 in PyRun_SimpleFileExFlags (fp=0x880ee0, filename=0x7fffffffe30c "analyzer.py", closeit=1, flags=0x7fffffffdea0)at Python/pythonrun.c:936
#22 0x0000000000414a4f in Py_Main (argc=<value optimized out>, argv=<value optimized out>) at Modules/main.c:599
#23 0x0000003fd281ed5d in __libc_start_main() from /lib64/libc.so.6
#24 0x0000000000413bc9 in _start()
這是一個龐大的代碼量,我找不到任何地方顯而易見的地方,你在訪問'.so'。你能給我們一個[最小的,完整的,可驗證的例子](http://stackoverflow.com/help/mcve)? – abarnert
要回答你的問題:不,「print」語句在Python中沒有副作用。 – gustafbstrom
你可以使用gdb獲得一個回溯到哪裏segfaults? 'gdb $(which python)'並輸入'r analyzer.py',當它停止輸入'bt' – maxy