2017-01-06 85 views
0
from urllib.request import Request, urlopen, urlretrieve 
from bs4 import BeautifulSoup 
def save_picture(self, word): 
    search_string = "https://www.google.nl/search?q={}&tbm=isch&tbs=isz:m".format(word) 

    request = Request(search_string, headers={'User-Agent': 'Mozilla/5.0'}) 
    raw_website = urlopen(request).read() 

    soup = BeautifulSoup(raw_website, "html.parser") 
    image = soup.find("img").get("src") 

    urlretrieve(image, "{}.jpg".format(word)) 

的失敗的urlopen我寫上面的函數從谷歌圖片保存第一TUMBNAIL圖像。然而,問題是,當我輸入一個非ansii字時會失敗,例如:mañanaurllib.request裏的Unicode字符串

錯誤消息來自urllib模塊內。我使用python 3.6

Traceback (most recent call last): File "c:\users\xxx\Desktop\script.py", line 19, in main() File "c:\users\xxx\Desktop\script.py", line 16, in main save_picture("mañana") File "c:\users\xxx\Desktop\script.py", line 8, in save_picture raw_website = urlopen(request).read() File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 223, in urlopen return opener.open(url, data, timeout) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 526, in open response = self._open(req, data) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 544, in _open '_open', req) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 504, in _call_chain result = func(*args) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 1361, in https_open context=self._context, check_hostname=self._check_hostname) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 1318, in do_open encode_chunked=req.has_header('Transfer-encoding')) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 1239, in request self._send_request(method, url, body, headers, encode_chunked) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 1250, in _send_request self.putrequest(method, url, **skips) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 1117, in putrequest self._output(request.encode('ascii')) UnicodeEncodeError: 'ascii' codec can't encode character '\xf1' in position 16: ordinal not in range(128)

編輯:讀了之後我才發現有這個任務,urllib的,urllib2的和請求幾個庫(也通過PIP:urllib3)。我得到這個錯誤,因爲我正在使用折舊的庫嗎?

EDIT2:添加了完整的追溯

+0

發佈完整的回溯,所以我們有上下文。 –

回答

0
import requests 
import mimetypes 
from bs4 import BeautifulSoup 

def save_picture(self, word): 
    search_string = "https://www.google.nl/search?q={}&tbm=isch&tbs=isz:m".format(word) 
    response = requests.get(search_string, headers={'User-Agent': 'Mozilla/5.0'}) 

    #find the tumbnail for first hit 
    soup = BeautifulSoup(response.text, "html.parser") 
    image_location = soup.find("img").get("src") 

    # download image 
    image = requests.get(image_location) 
    content_type = image.headers.get('content-type') 
    ext = mimetypes.guess_extension(content_type) 

    with open(f"{word}{ext}", 'wb') as fd: 
     for chunk in image.iter_content(chunk_size=128): 
      fd.write(chunk) 

我改寫了使用請求的功能,如預期它處理Unicode字符串。保存文件有點冗長但是