2013-07-29 35 views
1

我使用Scrapy製作了一個小項目。 問題是我的scrapy正在抓取頁面和抓取數據。但它沒有被保存到我的數據庫中。 我正在使用MySQL作爲我的數據庫。MySQL不保存正在被抓取的數據

我想有東西,我在我的pipelines.py文件丟失了

from scrapy import log 
from twisted.enterprise import adbapi 

import MySQLdb.cursors 

# the required Pipeline settings. 
class MySQLStorePipeline(object): 

    def __init__(self, *args, **kwargs): 
     # db settings 


     self.dbpool = adbapi.ConnectionPool('MySQLdb', 
       db='project2', 
       user='root', 
       passwd='', 
       host='127.0.0.1', 
       port='3306',        
       cursorclass=MySQLdb.cursors.DictCursor, 
       charset='utf8', 
       use_unicode=True 
      ) 

    def process_item(self, item, spider): 
    # run db query in thread pool 
     query = self.dbpool.runInteraction(self._conditional_insert, item) 
     query.addErrback(self.handle_error) 
     return item 


    def _conditional_insert(self, tx, item): 
     if sites.get('//div[@class="abTbl "]'): 
     #runs the condition 
      insert_id = tx.execute(\ 
       "insert into crawlerapp_directory (Catogory, Bussiness_name, Description, Number, Web_url) " 
       "values (%s, %s, %s, %s, %s)", 
       (item['Catogory'][0], 
       item['Bussiness_name'][0], 
       item['Description'][0], 
       item['Number'][0], 
       item['Web_url'][0], 
       ) 
       ) 
#connection to the foreign key Adress. 
      tx.execute(\ 
       "insert into crawlerapp_adress (directory_id, adress_name) " 
       "values (%s, %s)", 
       (insert_id, 
       item['adress_name'][0] 
       ) 
       ) 
#connection to the foreign key Photos. 
      tx.execute(\ 
       "insert into crawlerapp_photos (directory_id, Photo_path, Photo_name) " 
       "values (%s, %s, %s)", 
       (insert_id, 
       item['Photo_path'][0], 
       item['Photo_name'][0] 
       ) 
       ) 
      log.msg("Item stored in db: %s" % item, level=log.DEBUG) 
     def handle_error(self, e): 
      log.err(e) 

請指導我去刮數據保存在我的數據庫。

回答

0

試試這個,它爲我工作

import MySQLdb 
from scrapy import log 


class MySpiderPipeline(object): 
    def __init__(self): 
     self.conn = MySQLdb.connect(host_name, 
            user_name, 
            password, 
            db_name, 
            charset="utf8", use_unicode=True)  
     self.cursor = self.conn.cursor() 

    def open_spider(spider):   
     pass 
    def close_spider(self,spider): 
     self.conn.close() 

    def process_item(self, item, spider): 
     try: 
      sql = """INSERT INTO tutorial (`title`,`link`,`desc`,`last_updated`) 
        VALUES ('%s', '%s','%s','%s')""" %(item['title'][0].replace("'","`"), 
         item['link'][0].replace("'","`"), 
         item['desc'][0].replace("'","`"), 
         item['last_updated'].replace("'","`") 
         ) 
      self.cursor.execute(sql)    
      self.conn.commit() 


     except MySQLdb.Error, e: 
      print '!!!!!!!!!!!!!!!!!!DB Write failure!!!!!!!!!!!!' 
      print "Error %d: %s" % (e.args[0], e.args[1]) 
      log.msg("Error %d: %s" % (e.args[0], e.args[1]), level=log.CRITICAL) 
     return item