這是我從網上獲得的代碼,當我執行它時,它說下面的錯誤,我是新的網頁抓取,所以完全混淆它。 任何人都可以告訴我我的代碼出錯了嗎? 謝謝你的幫助!Python紐約時報網絡伸縮錯誤(「字節到字符串」)
from nytimesarticle import articleAPI
api = articleAPI('a0de895aa110431eb2344303c7105a9f')
articles = api.search(q = 'Obama',
fq = {'headline':'Obama', 'source':['Reuters','AP', 'The New York Times']},
begin_date = 20111231)
def parse_articles(articles):
news = []
for i in articles['response']['docs']:
dic = {}
dic['id'] = i['_id']
if i['abstract'] is not None:
dic['abstract'] = i['abstract'].encode("utf8")
dic['headline'] = i['headline']['main'].encode("utf8")
dic['desk'] = i['news_desk']
dic['date'] = i['pub_date'][0:10] # cutting time of day.
dic['section'] = i['section_name']
if i['snippet'] is not None:
dic['snippet'] = i['snippet'].encode("utf8")
dic['source'] = i['source']
dic['type'] = i['type_of_material']
dic['url'] = i['web_url']
dic['word_count'] = i['word_count']
# locations
locations = []
for x in range(0,len(i['keywords'])):
if 'glocations' in i['keywords'][x]['name']:
locations.append(i['keywords'][x]['value'])
dic['locations'] = locations
# subject
subjects = []
for x in range(0,len(i['keywords'])):
if 'subject' in i['keywords'][x]['name']:
subjects.append(i['keywords'][x]['value'])
dic['subjects'] = subjects
news.append(dic)
return(news)
def get_articles(date,query):
all_articles = []
for i in range(0,100): #NYT limits pager to first 100 pages. But rarely will you find over 100 pages of results anyway.
articles = api.search(q = query,
fq = {'source':['Reuters','AP', 'The New York Times']},
begin_date = date + '0101',
end_date = date + '1231',
sort='oldest',
page = str(i))
articles = parse_articles(articles)
all_articles = all_articles + articles
return(all_articles)
Amnesty_all = []
for i in range(1980,2014):
print ('Processing' + str(i) + '...')
Amnesty_year = get_articles(str(i),'Amnesty International')
Amnesty_all = Amnesty_all + Amnesty_year
import csv
keys = Amnesty_all[0].keys()
with open('amnesty-mentions.csv', 'wb') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(Amnesty_all)
這是輸出,當我在蟒蛇運行3.4: -
OUTPUT:
Traceback (most recent call last):
File "/Users/niharika/Documents/nyt.py", line 7, in <module>
begin_date = 20111231)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/nytimesarticle.py", line 111, in search
API_ROOT, response_format, self._options(**kwargs), key
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/nytimesarticle.py", line 84, in _options
v = _format_fq(v)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/nytimesarticle.py", line 69, in _format_fq
d[k] = '"' + v + '"'
TypeError: Can't convert 'bytes' object to str implicitly
>>>
源代碼:http://dlab.berkeley.edu/blog/scraping-new-york-times-articles-python-tutorial
可能的重複[Python3錯誤:TypeError:無法將'字節'對象隱式轉換爲str](http://stackoverflow.com/questions/16699362/python3-error-typeerror-cant-convert-bytes-object不要暗示) –