2017-02-15 151 views
-3

當我運行這個python程序時,我總是收到一個錯誤, 它說我在'/path/to/times-testing.log'沒有任何文件或目錄。 我似乎不明白,誰能幫我解決這個問題。 預先感謝您!Python腳本錯誤?

繼承人的代碼:

import urllib2 
import json 
import datetime 
import time 
import sys, os 
import logging 
from urllib2 import HTTPError 
from ConfigParser import SafeConfigParser 


# helper function to iterate through dates 
def daterange(start_date, end_date): 
    if start_date <= end_date: 
     for n in range((end_date - start_date).days + 1): 
      yield start_date + datetime.timedelta(n) 
    else: 
     for n in range((start_date - end_date).days + 1): 
      yield start_date - datetime.timedelta(n) 

# helper function to get json into a form I can work with  
def convert(input): 
    if isinstance(input, dict): 
     return {convert(key): convert(value) for key, value in input.iteritems()} 
    elif isinstance(input, list): 
     return [convert(element) for element in input] 
    elif isinstance(input, unicode): 
     return input.encode('utf-8') 
    else: 
     return input 

# helpful function to figure out what to name individual JSON files   
def getJsonFileName(date, page, json_file_path): 
    json_file_name = ".".join([date,str(page),'json']) 
    json_file_name = "".join([json_file_path,json_file_name]) 
    return json_file_name 

# helpful function for processing keywords, mostly  
def getMultiples(items, key): 
    values_list = "" 
    if len(items) > 0: 
     num_keys = 0 
     for item in items: 
      if num_keys == 0: 
       values_list = item[key]     
      else: 
       values_list = "; ".join([values_list,item[key]]) 
      num_keys += 1 
    return values_list 

# get the articles from the NYTimes Article API  
def getArticles(date, query, api_key, json_file_path): 
    # LOOP THROUGH THE 101 PAGES NYTIMES ALLOWS FOR THAT DATE 
    for page in range(101): 
     for n in range(5): # 5 tries 
      try: 
       request_string = "http://api.nytimes.com/svc/search/v2/articlesearch.json?begin_date=" + date + "&end_date=" + date + "&page=" + str(page) + "&api-key=" + api_key 
       response = urllib2.urlopen(request_string) 
       content = response.read() 
       if content: 
        articles = convert(json.loads(content)) 
        # if there are articles here 
        if len(articles["response"]["docs"]) >= 1: 
         json_file_name = getJsonFileName(date, page, json_file_path) 
         json_file = open(json_file_name, 'w') 
         json_file.write(content) 
         json_file.close() 
        # if no more articles, go to next date 
        else: 
         return 
       time.sleep(3) # wait so we don't overwhelm the API 
      except HTTPError as e: 
       logging.error("HTTPError on page %s on %s (err no. %s: %s) Here's the URL of the call: %s", page, date, e.code, e.reason, request_string) 
       if e.code == 403: 
        print "Script hit a snag and got an HTTPError 403. Check your log file for more info." 
        return 
       if e.code == 429: 
        print "Waiting. You've probably reached an API limit." 
        time.sleep(30) # wait 30 seconds and try again 
      except: 
       logging.error("Error on %s page %s: %s", date, file_number, sys.exc_info()[0]) 
       continue 

# parse the JSON files you stored into a tab-delimited file 
def parseArticles(date, tsv_file_name, json_file_path): 

    for file_number in range(101): 
     # get the articles and put them into a dictionary 
     try: 
      file_name = getJsonFileName(date,file_number, json_file_path) 
      if os.path.isfile(file_name): 
       in_file = open(file_name, 'r') 
       articles = convert(json.loads(in_file.read())) 
       in_file.close() 
      else: 
       break 
     except IOError as e: 
      logging.error("IOError in %s page %s: %s %s", date, file_number, e.errno, e.strerror) 
      continue 

     # if there are articles in that document, parse them 
     if len(articles["response"]["docs"]) >= 1: 

      # open the tsv for appending 
      try: 
       out_file = open(tsv_file_name, 'ab') 

      except IOError as e: 
       logging.error("IOError: %s %s %s %s", date, file_number, e.errno, e.strerror) 
       continue 

      # loop through the articles putting what we need in a tsv 
      try: 
       for article in articles["response"]["docs"]: 
        # if (article["source"] == "The New York Times" and article["document_type"] == "article"): 
        keywords = "" 
        keywords = getMultiples(article["keywords"],"value") 

        # should probably pull these if/else checks into a module 
        variables = [ 
         article["pub_date"], 
         keywords, 
         str(article["headline"]["main"]).decode("utf8").replace("\n","") if "main" in article["headline"].keys() else "", 
         str(article["source"]).decode("utf8") if "source" in article.keys() else "", 
         str(article["document_type"]).decode("utf8") if "document_type" in article.keys() else "", 
         article["web_url"] if "web_url" in article.keys() else "", 
         str(article["news_desk"]).decode("utf8") if "news_desk" in article.keys() else "", 
         str(article["section_name"]).decode("utf8") if "section_name" in article.keys() else "", 
         str(article["snippet"]).decode("utf8").replace("\n","") if "snippet" in article.keys() else "", 
         str(article["lead_paragraph"]).decode("utf8").replace("\n","") if "lead_paragraph" in article.keys() else "", 
         ] 
        line = "\t".join(variables) 
        out_file.write(line.encode("utf8")+"\n") 
      except KeyError as e: 
       logging.error("KeyError in %s page %s: %s %s", date, file_number, e.errno, e.strerror) 
       continue 
      except (KeyboardInterrupt, SystemExit): 
       raise 
      except: 
       logging.error("Error on %s page %s: %s", date, file_number, sys.exc_info()[0]) 
       continue 

      out_file.close() 
     else: 
      break 

# Main function where stuff gets done 

def main(): 

    config = SafeConfigParser() 
    script_dir = os.path.dirname(__file__) 
    config_file = os.path.join(script_dir, 'config/settings.cfg') 
    config.read(config_file) 

    json_file_path = config.get('files','json_folder') 
    tsv_file_name = config.get('files','tsv_file') 
    log_file = config.get('files','logfile') 

    api_key = config.get('nytimes','api_key')  
    start = datetime.date(year = int(config.get('nytimes','start_year')), month = int(config.get('nytimes','start_month')), day = int(config.get('nytimes','start_day'))) 
    end = datetime.date(year = int(config.get('nytimes','end_year')), month = int(config.get('nytimes','end_month')), day = int(config.get('nytimes','end_day'))) 
    query = config.get('nytimes','query') 

    logging.basicConfig(filename=log_file, level=logging.INFO) 

    logging.info("Getting started.") 
    try: 
     # LOOP THROUGH THE SPECIFIED DATES 
     for date in daterange(start, end): 
      date = date.strftime("%Y%m%d") 
      logging.info("Working on %s." % date) 
      getArticles(date, query, api_key, json_file_path) 
      parseArticles(date, tsv_file_name, json_file_path) 
    except: 
     logging.error("Unexpected error: %s", str(sys.exc_info()[0])) 
    finally: 
     logging.info("Finished.") 

if __name__ == '__main__' : 
    main() 

它會產生編譯時有下列錯誤:

Rakeshs-MacBook-Air:get-nytimes-articles-master niharika$ python getTimesArticles.py 
Traceback (most recent call last): 
    File "getTimesArticles.py", line 180, in <module> 
    main() 
    File "getTimesArticles.py", line 164, in main 
    logging.basicConfig(filename=log_file, level=logging.INFO) 
    File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.py", line 1545, in basicConfig 
    hdlr = FileHandler(filename, mode) 
    File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.py", line 911, in __init__ 
    StreamHandler.__init__(self, self._open()) 
    File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.py", line 941, in _open 
    stream = open(self.baseFilename, self.mode) 
IOError: [Errno 2] No such file or directory: '/path/to/times-testing.log' 
Rakeshs-MacBook-Air:get-nytimes-articles-master niharika$ 
+0

maby logfile不存在觸摸日誌文件'touch /path/to/times-testing.log' – mtt2p

+0

config/settings.cfg文件的內容是什麼?特別是,與關鍵日誌文件關聯的值是多少?磁盤上是否存在相應的文件? – Antwane

+0

是的,我有settings.cfc在/desktop/nytpy/config/settings.cfc –

回答

0

main()功能:

​​

打開config/settings.cfg文件並獲取名稱的日誌文件,這似乎是/path/to/times-testing.log。您可能需要創建該文件夾(可能不是最好的主意)或將其配置爲指向正確的文件。

+0

謝謝,你的回答非常有見地,我找到了settings.cfg文件,裏面有一個參數,指定logfile = /path/to/times-testing.log json_folder =/full/path/ tsv_file =/full/path/output.tsv,我是否應該創建新的空文件,即output.tsv和times-testing.log文件並添加路徑?我應該爲Json_folder參數設置什麼,謝謝! –

+0

嗯,這完全取決於你和你想要找到這些文件的位置,真的。 :)通常,日誌存儲在UNIX系統(例如macOS)的'/ var/log /'文件夾中。 – sxn