1
我目前正在從事一項需要從PubMed下載幾千條引用的項目。我目前使用BioPython並寫了這個代碼:使用BioPython時Urllib錯誤
from Bio import Entrez
from Bio import Medline
from pandas import *
from sys import argv
import os
Entrez.email = "email"
df = read_csv("/Users/.../Desktop/sr_dataset/adhd/excluded/adhdExcluded.csv")
i=0
withoutMesh = 0
withoutMeshID = ""
withoutAbstract = 0
withoutAbstractID = ""
path = '/Users/.../Desktop/sr_dataset/adhd/excluded'
for index, row in df.iterrows():
print (row.id)
handle = Entrez.efetch(db="pubmed",rettype="medline",retmode="text", id=str(row.id))
records = Medline.parse(handle)
for record in records:
try:
abstract = str(record["AB"])
except:
abstract = "none"
withoutAbstract = withoutAbstract +1
withoutAbstractID = withoutAbstractID + str(row.id) + "\n"
try:
title = str(record["TI"])
except:
title = "none"
try:
mesh = str(record["MH"])
except:
mesh = "none"
withoutMesh = withoutMesh +1
withoutMeshID = withoutMeshID + str(row.id) + "\n"
filename= str(row.id) + '.txt'
filename = os.path.join(path, filename)
file = open(filename, "w")
output = "title: "+str(title) + "\n\n" + "abstract: "+str(abstract) + "\n\n" + "mesh: "+str(mesh) + "\n\n"
file.write(output)
file.close()
print (i)
i=i+1
filename = os.path.join(path, "overview.txt")
file = open(filename, "w")
output = "Without MeSH terms:" + str(withoutMesh) + "\n" + "ID's: "+str(withoutMeshID) + "\n\n" + "Without abstract: "+str(withoutAbstract) + "\n" + "ID's: "+str(withoutAbstractID)
file.write(output)
file.close()
的代碼適用於前幾百個表中的行,但隨後停止執行,我收到的錯誤是:
Traceback (most recent call last):
File "/Users/.../anaconda/lib/python3.5/urllib/request.py", line 1254, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "/Users/.../anaconda/lib/python3.5/http/client.py", line 1106, in request
self._send_request(method, url, body, headers)
File "/Users/.../anaconda/lib/python3.5/http/client.py", line 1151, in _send_request
self.endheaders(body)
File "/Users/.../anaconda/lib/python3.5/http/client.py", line 1102, in endheaders
self._send_output(message_body)
File "/Users/.../anaconda/lib/python3.5/http/client.py", line 934, in _send_output
self.send(msg)
File "/Users/.../anaconda/lib/python3.5/http/client.py", line 877, in send
self.connect()
File "/Users/.../anaconda/lib/python3.5/http/client.py", line 1260, in connect
server_hostname=server_hostname)
File "/Users/.../anaconda/lib/python3.5/ssl.py", line 377, in wrap_socket
_context=self)
File "/Users/.../anaconda/lib/python3.5/ssl.py", line 752, in __init__
self.do_handshake()
File "/Users/.../anaconda/lib/python3.5/ssl.py", line 988, in do_handshake
self._sslobj.do_handshake()
File "/Users/.../anaconda/lib/python3.5/ssl.py", line 633, in do_handshake
self._sslobj.do_handshake()
ConnectionResetError: [Errno 54] Connection reset by peer
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/.../Desktop/sr_dataset/ace_inhibitor/excluded/pumbedMedline.py", line 18, in <module>
handle = Entrez.efetch(db="pubmed",rettype="medline",retmode="text", id=str(row.id))
File "/Users/.../anaconda/lib/python3.5/site-packages/biopython-1.68-py3.5-macosx-10.6-x86_64.egg/Bio/Entrez/__init__.py", line 180, in efetch
return _open(cgi, variables, post=post)
File "/Users/.../anaconda/lib/python3.5/site-packages/biopython-1.68-py3.5-macosx-10.6-x86_64.egg/Bio/Entrez/__init__.py", line 524, in _open
handle = _urlopen(cgi)
File "/Users/.../anaconda/lib/python3.5/urllib/request.py", line 163, in urlopen
return opener.open(url, data, timeout)
File "/Users/.../anaconda/lib/python3.5/urllib/request.py", line 466, in open
response = self._open(req, data)
File "/Users/.../anaconda/lib/python3.5/urllib/request.py", line 484, in _open
'_open', req)
File "/Users/.../anaconda/lib/python3.5/urllib/request.py", line 444, in _call_chain
result = func(*args)
File "/Users/.../anaconda/lib/python3.5/urllib/request.py", line 1297, in https_open
context=self._context, check_hostname=self._check_hostname)
File "/Users/.../anaconda/lib/python3.5/urllib/request.py", line 1256, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 54] Connection reset by peer>
這裏有CSV文件的第幾列:
id
10029645
10073846
10078088
10080457
10088066
...
是完全回溯嗎?什麼是錯誤信息? –
@ cricket_007已添加完整的信息。 – testing
請參閱此帖子的評論http://stackoverflow.com/q/21334966/2308683 –