from urllib.request import Request, urlopen, urlretrieve
from bs4 import BeautifulSoup
def save_picture(self, word):
search_string = "https://www.google.nl/search?q={}&tbm=isch&tbs=isz:m".format(word)
request = Request(search_string, headers={'User-Agent': 'Mozilla/5.0'})
raw_website = urlopen(request).read()
soup = BeautifulSoup(raw_website, "html.parser")
image = soup.find("img").get("src")
urlretrieve(image, "{}.jpg".format(word))
的失敗的urlopen我寫上面的函數從谷歌圖片保存第一TUMBNAIL圖像。然而,問題是,當我輸入一個非ansii字時會失敗,例如:mañanaurllib.request裏的Unicode字符串
錯誤消息來自urllib模塊內。我使用python 3.6
Traceback (most recent call last): File "c:\users\xxx\Desktop\script.py", line 19, in main() File "c:\users\xxx\Desktop\script.py", line 16, in main save_picture("mañana") File "c:\users\xxx\Desktop\script.py", line 8, in save_picture raw_website = urlopen(request).read() File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 223, in urlopen return opener.open(url, data, timeout) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 526, in open response = self._open(req, data) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 544, in _open '_open', req) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 504, in _call_chain result = func(*args) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 1361, in https_open context=self._context, check_hostname=self._check_hostname) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 1318, in do_open encode_chunked=req.has_header('Transfer-encoding')) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 1239, in request self._send_request(method, url, body, headers, encode_chunked) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 1250, in _send_request self.putrequest(method, url, **skips) File "C:\Users\xxx\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 1117, in putrequest self._output(request.encode('ascii')) UnicodeEncodeError: 'ascii' codec can't encode character '\xf1' in position 16: ordinal not in range(128)
編輯:讀了之後我才發現有這個任務,urllib的,urllib2的和請求幾個庫(也通過PIP:urllib3)。我得到這個錯誤,因爲我正在使用折舊的庫嗎?
EDIT2:添加了完整的追溯
發佈完整的回溯,所以我們有上下文。 –