-4
import requests
from lxml import html
SEARCH_URL = "https://www.yellowpages.com/search"
def crawl(name, state, page=1):
params={'search_terms': name, 'geo_location_terms': state, 'page': page}
data = requests.get(SEARCH_URL, params=params).text
tree = html.fromstring(data)
for items in tree.xpath("//div[@class='info']"):
name = items.findtext(".//span[@itemprop='name']")
address = items.findtext(".//span[@class='street-address']")
phone = items.findtext(".//div[@itemprop='telephone']")
showing = items.findtext("//*[@id='main-content']/div[2]/div[4]/p/text()")
yield (name, address, phone, showing)
def search(name, state, pages=1):
page = 1
while page is not pages:
for result in crawl(name, state, page=page):
print result
page +=1
if __name__ == '__main__':
search('pizza', 'tx', pages=10)
Traceback (most recent call last):
File "C:/Python27/Scripts/yellowpages.py", line 31, in <module>
search('pizza', 'tx', pages=10)
File "C:/Python27/Scripts/yellowpages.py", line 25, in search
for result in crawl(name, state, page=page):
File "C:/Python27/Scripts/yellowpages.py", line 16, in crawl
showing = items.findtext("//*[@id='main-content']/div[2]/div[4]/p/text()")
File "src\lxml\lxml.etree.pyx", line 1550, in lxml.etree._Element.findtext (src\lxml\lxml.etree.c:59189)
File "C:\Python27\lib\site-packages\lxml\_elementpath.py", line 320, in findtext
el = find(elem, path, namespaces)
File "C:\Python27\lib\site-packages\lxml\_elementpath.py", line 302, in find
it = iterfind(elem, path, namespaces)
File "C:\Python27\lib\site-packages\lxml\_elementpath.py", line 291, in iterfind
selector = _build_path_iterator(path, namespaces)
File "C:\Python27\lib\site-packages\lxml\_elementpath.py", line 260, in _build_path_iterator
raise SyntaxError("cannot use absolute path on element")
SyntaxError: cannot use absolute path on element
請問您可以分享SEARCH_URL嗎? –
如果是語法錯誤,它與xpath無關。提供完整的錯誤追蹤,以便有人可以提供幫助。 – Rahul