當我運行這個python程序時,我總是收到一個錯誤, 它說我在'/path/to/times-testing.log'沒有任何文件或目錄。 我似乎不明白,誰能幫我解決這個問題。 預先感謝您!Python腳本錯誤?
繼承人的代碼:
import urllib2
import json
import datetime
import time
import sys, os
import logging
from urllib2 import HTTPError
from ConfigParser import SafeConfigParser
# helper function to iterate through dates
def daterange(start_date, end_date):
if start_date <= end_date:
for n in range((end_date - start_date).days + 1):
yield start_date + datetime.timedelta(n)
else:
for n in range((start_date - end_date).days + 1):
yield start_date - datetime.timedelta(n)
# helper function to get json into a form I can work with
def convert(input):
if isinstance(input, dict):
return {convert(key): convert(value) for key, value in input.iteritems()}
elif isinstance(input, list):
return [convert(element) for element in input]
elif isinstance(input, unicode):
return input.encode('utf-8')
else:
return input
# helpful function to figure out what to name individual JSON files
def getJsonFileName(date, page, json_file_path):
json_file_name = ".".join([date,str(page),'json'])
json_file_name = "".join([json_file_path,json_file_name])
return json_file_name
# helpful function for processing keywords, mostly
def getMultiples(items, key):
values_list = ""
if len(items) > 0:
num_keys = 0
for item in items:
if num_keys == 0:
values_list = item[key]
else:
values_list = "; ".join([values_list,item[key]])
num_keys += 1
return values_list
# get the articles from the NYTimes Article API
def getArticles(date, query, api_key, json_file_path):
# LOOP THROUGH THE 101 PAGES NYTIMES ALLOWS FOR THAT DATE
for page in range(101):
for n in range(5): # 5 tries
try:
request_string = "http://api.nytimes.com/svc/search/v2/articlesearch.json?begin_date=" + date + "&end_date=" + date + "&page=" + str(page) + "&api-key=" + api_key
response = urllib2.urlopen(request_string)
content = response.read()
if content:
articles = convert(json.loads(content))
# if there are articles here
if len(articles["response"]["docs"]) >= 1:
json_file_name = getJsonFileName(date, page, json_file_path)
json_file = open(json_file_name, 'w')
json_file.write(content)
json_file.close()
# if no more articles, go to next date
else:
return
time.sleep(3) # wait so we don't overwhelm the API
except HTTPError as e:
logging.error("HTTPError on page %s on %s (err no. %s: %s) Here's the URL of the call: %s", page, date, e.code, e.reason, request_string)
if e.code == 403:
print "Script hit a snag and got an HTTPError 403. Check your log file for more info."
return
if e.code == 429:
print "Waiting. You've probably reached an API limit."
time.sleep(30) # wait 30 seconds and try again
except:
logging.error("Error on %s page %s: %s", date, file_number, sys.exc_info()[0])
continue
# parse the JSON files you stored into a tab-delimited file
def parseArticles(date, tsv_file_name, json_file_path):
for file_number in range(101):
# get the articles and put them into a dictionary
try:
file_name = getJsonFileName(date,file_number, json_file_path)
if os.path.isfile(file_name):
in_file = open(file_name, 'r')
articles = convert(json.loads(in_file.read()))
in_file.close()
else:
break
except IOError as e:
logging.error("IOError in %s page %s: %s %s", date, file_number, e.errno, e.strerror)
continue
# if there are articles in that document, parse them
if len(articles["response"]["docs"]) >= 1:
# open the tsv for appending
try:
out_file = open(tsv_file_name, 'ab')
except IOError as e:
logging.error("IOError: %s %s %s %s", date, file_number, e.errno, e.strerror)
continue
# loop through the articles putting what we need in a tsv
try:
for article in articles["response"]["docs"]:
# if (article["source"] == "The New York Times" and article["document_type"] == "article"):
keywords = ""
keywords = getMultiples(article["keywords"],"value")
# should probably pull these if/else checks into a module
variables = [
article["pub_date"],
keywords,
str(article["headline"]["main"]).decode("utf8").replace("\n","") if "main" in article["headline"].keys() else "",
str(article["source"]).decode("utf8") if "source" in article.keys() else "",
str(article["document_type"]).decode("utf8") if "document_type" in article.keys() else "",
article["web_url"] if "web_url" in article.keys() else "",
str(article["news_desk"]).decode("utf8") if "news_desk" in article.keys() else "",
str(article["section_name"]).decode("utf8") if "section_name" in article.keys() else "",
str(article["snippet"]).decode("utf8").replace("\n","") if "snippet" in article.keys() else "",
str(article["lead_paragraph"]).decode("utf8").replace("\n","") if "lead_paragraph" in article.keys() else "",
]
line = "\t".join(variables)
out_file.write(line.encode("utf8")+"\n")
except KeyError as e:
logging.error("KeyError in %s page %s: %s %s", date, file_number, e.errno, e.strerror)
continue
except (KeyboardInterrupt, SystemExit):
raise
except:
logging.error("Error on %s page %s: %s", date, file_number, sys.exc_info()[0])
continue
out_file.close()
else:
break
# Main function where stuff gets done
def main():
config = SafeConfigParser()
script_dir = os.path.dirname(__file__)
config_file = os.path.join(script_dir, 'config/settings.cfg')
config.read(config_file)
json_file_path = config.get('files','json_folder')
tsv_file_name = config.get('files','tsv_file')
log_file = config.get('files','logfile')
api_key = config.get('nytimes','api_key')
start = datetime.date(year = int(config.get('nytimes','start_year')), month = int(config.get('nytimes','start_month')), day = int(config.get('nytimes','start_day')))
end = datetime.date(year = int(config.get('nytimes','end_year')), month = int(config.get('nytimes','end_month')), day = int(config.get('nytimes','end_day')))
query = config.get('nytimes','query')
logging.basicConfig(filename=log_file, level=logging.INFO)
logging.info("Getting started.")
try:
# LOOP THROUGH THE SPECIFIED DATES
for date in daterange(start, end):
date = date.strftime("%Y%m%d")
logging.info("Working on %s." % date)
getArticles(date, query, api_key, json_file_path)
parseArticles(date, tsv_file_name, json_file_path)
except:
logging.error("Unexpected error: %s", str(sys.exc_info()[0]))
finally:
logging.info("Finished.")
if __name__ == '__main__' :
main()
它會產生編譯時有下列錯誤:
Rakeshs-MacBook-Air:get-nytimes-articles-master niharika$ python getTimesArticles.py
Traceback (most recent call last):
File "getTimesArticles.py", line 180, in <module>
main()
File "getTimesArticles.py", line 164, in main
logging.basicConfig(filename=log_file, level=logging.INFO)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.py", line 1545, in basicConfig
hdlr = FileHandler(filename, mode)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.py", line 911, in __init__
StreamHandler.__init__(self, self._open())
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.py", line 941, in _open
stream = open(self.baseFilename, self.mode)
IOError: [Errno 2] No such file or directory: '/path/to/times-testing.log'
Rakeshs-MacBook-Air:get-nytimes-articles-master niharika$
maby logfile不存在觸摸日誌文件'touch /path/to/times-testing.log' – mtt2p
config/settings.cfg文件的內容是什麼?特別是,與關鍵日誌文件關聯的值是多少?磁盤上是否存在相應的文件? – Antwane
是的,我有settings.cfc在/desktop/nytpy/config/settings.cfc –