Lycos加密了他們的搜索結果。但是,你可以嘗試谷歌。
import urllib, urllib2
from urllib import urlopen
from bs4 import BeautifulSoup
import re
from time import sleep
from random import choice, random
def scrapping_google(query):
g_url = "http://www.google.com/search?q=%s&num=100&hl=en&start=0" %(urllib.quote_plus(query))
request = urllib2.Request(g_url, None, {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64; rv:35.0) Gecko/20100101 Firefox/35.0'})
open_url = urllib2.urlopen(request)
read_url = open_url.read()
g_soup = BeautifulSoup(read_url)
remove_tag = re.compile(r'<.*?>')
g_dict = {}
scrap_count = g_soup.find('div', attrs={'id' : 'resultStats'})
count = remove_tag.sub('', str(scrap_count)).replace('.','')
only_count = count[0:-16]
print 'Prediction result: ', only_count
print '\n'
for li in g_soup.findAll('li', attrs={'class' : 'g'}):
links = li.find('a')
print links['href']
scrap_content = li.find('span', attrs={'class' : 'st'})
content = remove_tag.sub('', str(scrap_content)).replace('.','')
print content
return g_dict
if __name__ == '__main__':
fetch_links = scrapping_google('jokowi')
請詳細說明。代碼示例等 – theglauber
您可以請演示腳本嗎? – aayoubi
是否存在來自搜索引擎的實際編程API而不是試圖解析/讀取爲人類最終用戶編寫的頁面? – bot403