2012-08-01 16 views
1

當我運行(末尾整個代碼)下面的代碼,這一行:如何解決這個Python繼承AttributeError:X實例沒有屬性'con'?

res=self.con.execute(

從這個函數(其中getfeatures返回dictionary):

def fcount(self,f,cat): 
    res=self.con.execute(
     'select count from fc where feature="%s" and category="%s"' 
     %(f,cat)).fetchone() 
    if res==None: return 0 
    else: return float(res[0]) 

可生產這樣的錯誤:

AttributeError: naivebayes instance has no attribute 'con'    

首先,我以爲這是一個pysqlite2問題。但我已經安裝了pysqlite2,當我運行一個pysqlite2測試時,我確定了。我也嘗試使用內置的sqlite3的,而不是pysqlite2(做一個import sqlite3聲明和self.con=sqlite3.connect(":memory:")更換self.con=sqlite.connect(dbfile),但它也不能工作。

所以,前一個問題,我得到一個客戶留言說這是不是一個pysqlite2問題,buth繼承問題,但由於在naivebayes 初始化()被重新顯式調用超類(分類)來擴展它的行爲,這樣說:

class naivebayes(classifier): 

    def __init__(self,getfeatures): 
    classifier.__init__(self,getfeatures) 

我不明白是什麼繼承問題,究竟如何解決?

PS - 代碼不是我的。它來自(優秀的)「編程集體智慧」一書。我只是從raw.github.com/cataska/programming-collective-intelligence-code/...複製它,並將代碼的一部分(fisherclassifier,因爲我只使用naivebayes分類器)。

感謝您的任何幫助。

這裏整個代碼:

from pysqlite2 import dbapi2 as sqlite 

import re 
import math 

def getfeatures(doc): 
    splitter=re.compile('\\W*') 
    # Split the words by non-alpha characters 
    words=[s.lower() for s in splitter.split(doc) 
      if len(s)>2 and len(s)<20] 
    # Return the unique set of words only 
# return dict([(w,1) for w in words]).iteritems() 
    return dict([(w,1) for w in words]) 

class classifier: 
    def __init__(self,getfeatures,filename=None): 
    # Counts of feature/category combinations 
    self.fc={} 
    # Counts of documents in each category 
    self.cc={} 
    self.getfeatures=getfeatures 

    def setdb(self,dbfile): 
    self.con=sqlite.connect(dbfile) 
    self.con.execute('create table if not exists fc(feature,category,count)') 
    self.con.execute('create table if not exists cc(category,count)') 


    def incf(self,f,cat): 
    count=self.fcount(f,cat) 
    if count==0: 
     self.con.execute("insert into fc values ('%s','%s',1)" 
         % (f,cat)) 
    else: 
     self.con.execute(
     "update fc set count=%d where feature='%s' and category='%s'" 
     % (count+1,f,cat)) 

    def fcount(self,f,cat): 
    res=self.con.execute(
     'select count from fc where feature="%s" and category="%s"' 
     %(f,cat)).fetchone() 
    if res==None: return 0 
    else: return float(res[0]) 

    def incc(self,cat): 
    count=self.catcount(cat) 
    if count==0: 
     self.con.execute("insert into cc values ('%s',1)" % (cat)) 
    else: 
     self.con.execute("update cc set count=%d where category='%s'" 
         % (count+1,cat)) 

    def catcount(self,cat): 
    res=self.con.execute('select count from cc where category="%s"' 
         %(cat)).fetchone() 
    if res==None: return 0 
    else: return float(res[0]) 

    def categories(self): 
    cur=self.con.execute('select category from cc'); 
    return [d[0] for d in cur] 

    def totalcount(self): 
    res=self.con.execute('select sum(count) from cc').fetchone(); 
    if res==None: return 0 
    return res[0] 


    def train(self,item,cat): 
    features=self.getfeatures(item) 
    # Increment the count for every feature with this category 
    for f in features.keys(): 
## for f in features: 
     self.incf(f,cat) 
    # Increment the count for this category 
    self.incc(cat) 
    self.con.commit() 

    def fprob(self,f,cat): 
    if self.catcount(cat)==0: return 0 

    # The total number of times this feature appeared in this 
    # category divided by the total number of items in this category 
    return self.fcount(f,cat)/self.catcount(cat) 

    def weightedprob(self,f,cat,prf,weight=1.0,ap=0.5): 
    # Calculate current probability 
    basicprob=prf(f,cat) 

    # Count the number of times this feature has appeared in 
    # all categories 
    totals=sum([self.fcount(f,c) for c in self.categories()]) 

    # Calculate the weighted average 
    bp=((weight*ap)+(totals*basicprob))/(weight+totals) 
    return bp 




class naivebayes(classifier): 

    def __init__(self,getfeatures): 
    classifier.__init__(self,getfeatures) 
    self.thresholds={} 

    def docprob(self,item,cat): 
    features=self.getfeatures(item) 

    # Multiply the probabilities of all the features together 
    p=1 
    for f in features: p*=self.weightedprob(f,cat,self.fprob) 
    return p 

    def prob(self,item,cat): 
    catprob=self.catcount(cat)/self.totalcount() 
    docprob=self.docprob(item,cat) 
    return docprob*catprob 

    def setthreshold(self,cat,t): 
    self.thresholds[cat]=t 

    def getthreshold(self,cat): 
    if cat not in self.thresholds: return 1.0 
    return self.thresholds[cat] 

    def classify(self,item,default=None): 
    probs={} 
    # Find the category with the highest probability 
    max=0.0 
    for cat in self.categories(): 
     probs[cat]=self.prob(item,cat) 
     if probs[cat]>max: 
     max=probs[cat] 
     best=cat 

    # Make sure the probability exceeds threshold*next best 
    for cat in probs: 
     if cat==best: continue 
     if probs[cat]*self.getthreshold(best)>probs[best]: return default 
    return best 


def sampletrain(cl): 
    cl.train('Nobody owns the water.','good') 
    cl.train('the quick rabbit jumps fences','good') 
    cl.train('buy pharmaceuticals now','bad') 
    cl.train('make quick money at the online casino','bad') 
    cl.train('the quick brown fox jumps','good') 


nb = naivebayes(getfeatures) 

sampletrain(nb) 

#print ('\nbuy is classified as %s'%nb.classify('buy')) 
#print ('\nquick is classified as %s'%nb.classify('quick')) 

##print getfeatures('Nobody owns the water.') 

回答

1

只是追加classifier.__init__方法與self.setdb('autocreated_db_file')

class classifier:            
    def __init__(self,getfeatures,filename=None): 
    ... 
    self.setdb('autocreated_db_file') 
相關問題