嗯,我面臨着同樣的問題,所以我創建動態使用type()
蜘蛛類,
from scrapy.contrib.spiders import CrawlSpider
import urlparse
class GenericSpider(CrawlSpider):
"""a generic spider, uses type() to make new spider classes for each domain"""
name = 'generic'
allowed_domains = []
start_urls = []
@classmethod
def create(cls, link):
domain = urlparse.urlparse(link).netloc.lower()
# generate a class name such that domain www.google.com results in class name GoogleComGenericSpider
class_name = (domain if not domain.startswith('www.') else domain[4:]).title().replace('.', '') + cls.__name__
return type(class_name, (cls,), {
'allowed_domains': [domain],
'start_urls': [link],
'name': domain
})
所以說,創造一個蜘蛛「http://www.google.com」我只是做 -
In [3]: google_spider = GenericSpider.create('http://www.google.com')
In [4]: google_spider
Out[4]: __main__.GoogleComGenericSpider
In [5]: google_spider.name
Out[5]: 'www.google.com'
希望這會有所幫助