2012-12-08 51 views
0

有誰能告訴我我做錯了什麼?我不斷收到此代碼的錯誤。在Python中下載多個文件時出錯

我試圖從primaryschoolgames下載所有的SWF的只是作爲一個實驗,但我似乎無法做到這一點:

#!/usr/bin/env python 
# encoding: utf-8 

import sys, getopt 
import os, urllib, urllib2, re, string, math 

help_message = ''' 
''' 
no_param = ''' 
''' 

verbose = False 
fakeMode = False 
curPath = os.getcwd() + "/" 

urlRegex = '' 
FileRegex = '' 
outputPath = '' 
currentFile = '' 

def removeDuplicates(seq): 
# Not order preserving 
keys = {} 
for e in seq: 
    keys[e] = 1 
return keys.keys() 

def go(filename): 
print "Having a look at " + string.capwords(filename) 

global urlRegex, FileRegex, outputPath, currentFile 

url = 'http://cdn.primarygames.com' + filename 

urlRegex = '/'+filename+'/.+/download' 
FileRegex = '/'+filename+'/(.*?)/download' 
outputPath = curPath+"Swfs"+"/" 

if not os.path.exists(outputPath): 
    os.makedirs(outputPath) 

filelist = [] 

while(len(url)): 
    # looping system 
    newlist, url = scrapePage(url, filename) 
    filelist.extend(newlist) 

print 'Found %s Files.' % len(filelist) 

for swf in filelist: 
    swfurl = swf['url'] 
    name = swf['name'] 
    currentFile = name 
    #print 'Downloading '+name, 
    if not fakeMode: 
     #print '' 
     urllib.urlretrieve('http://cdn.primarygames.com' + swfurl, outputPath+name) 
    else: 
     print 'Not downloading %s.' % name 
print "All done with %s!" % filename 

def scrapePage(url, filename): 
print 'Looking through '+url 
html = urllib2.urlopen(url).read() 
swflist = re.findall(urlRegex, html) 
swflist = removeDuplicates(swflist) 

swfs = [] 

for swfurl in swflist: 
    r = re.compile(FileRegex) 
    swfname = r.search(swfurl).group(1) 
    swfname = swfname.replace('-', ' ') 
    name = filename + "/" + swfname + ".swf" 
    name = string.capwords(name) 
    swf.append({'name':name,'url':swfurl}) 

r = re.compile(nextRegex) 
result = r.search(html) 
if result: 
    nextUrl = 'http://cdn.primarygames.com' + result.group(1) 
else: 
    nextUrl = '' 

return swfs, nextUrl 


def main(argv=None): 
global verbose, fakeMode 

if argv is None: 
    argv = sys.argv 
try: 
    try: 
     opts, args = getopt.getopt(argv[1:], "ho:vf", ["help", "output="]) 
    except getopt.error, msg: 
     raise Usage(msg) 

    # option processing 
    for option, value in opts: 
     if option == "-v": 
      verbose = True 
     if option in ("-f", "--fake"): 
      fakeMode = True 
     if option in ("-h", "--help"): 
      raise Usage(help_message) 
     if option in ("-o", "--output"): 
      output = value 

    if len(args): 
     swfs = args 
    else: 
     raise Usage(no_param) 

except Usage, err: 
    print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg) 
    if err.msg != help_message: 
     print >> sys.stderr, "\t for help use --help" 
    return 2 

for swf in swfs: 
    go(swf) 


if __name__ == "__main__": 
sys.exit(main()) 

這是我不斷收到錯誤:

Having a look at * 
Looking through http://cdn.primarygames.com/* 
Traceback (most recent call last): 
File "C:\PrimarySchoolGames Swf Downloader.py" 
, line 129, in <module> 
sys.exit(main()) 
File "C:\PrimarySchoolGames Swf Downloader.py" 
, line 125, in main 
go(swf) 
File "C:\PrimarySchoolGames Swf Downloader.py" 
, line 48, in go 
newlist, url = scrapePage(url, filename) 
File "C:\Users\Terrii\Desktop\VB Extra's\PrimarySchoolGames Swf Downloader.py" 
, line 67, in scrapePage 
html = urllib2.urlopen(url).read() 
File "C:\Python27\lib\urllib2.py", line 126, in urlopen 
return _opener.open(url, data, timeout) 
File "C:\Python27\lib\urllib2.py", line 400, in open 
response = self._open(req, data) 
File "C:\Python27\lib\urllib2.py", line 418, in _open 
'_open', req) 
File "C:\Python27\lib\urllib2.py", line 378, in _call_chain 
result = func(*args) 
File "C:\Python27\lib\urllib2.py", line 1207, in http_open 
return self.do_open(httplib.HTTPConnection, req) 
File "C:\Python27\lib\urllib2.py", line 1177, in do_open 
raise URLError(err) 
urllib2.URLError: <urlopen error [Errno 11004] getaddrinfo failed> 
+2

如果你想在你的帖子中修正縮進,你可能會得到一個答案。現在的情況是,不可能說出發生了什麼。 – MikeHunter

回答

1

失敗getaddrinfo通常表明您提供的URL有問題。由於我能夠解決地址,你確定你不在代理服務器後面嗎?這可能會導致DNS查找失敗,從而導致此消息。

Python和決定在Windows上使用的代理服務器:

在Windows環境中,如果沒有代理設置環境變量,從註冊表中的Internet設置 部分獲得 代理設置。

如需更多幫助,請與@MikeHunter合作。我試圖修復你的代碼,但由於我必須實現你的Exception-Class才能讓代碼運行,我認爲你應該重新縮進你的代碼並提供更多信息。抱歉。