2
我練習刮一個網站。 我有一些神祕的情況。beautifulsoup和request.post
import requests
from bs4 import BeautifulSoup
import json
class n_auction(object):
def __init__(self):
self.search_request = {
'lawsup':0,
'lesson':0,
'next_biddate1':'',
'next_biddate2':'',
'state':91,
'b_count1':0,
'b_count2':0,
'b_area1':'',
'b_area2':'',
'special':0,
'e_area1':'',
'e_area2':'',
'si':11,
'gu':0,
'dong':0,
'apt_no':0,
'order':'',
'start':60,
'total_record_val':850,
'detail_search':'',
'detail_class':'',
'recieveCode':'',}
self.headers = {'User-Agent':'Mozilla/5.0',
'Referer':'http://goodauction.land.naver.com/auction/ca_list.php'}
def scrape(self, max_pages):
addr = []
pageno = 0
self.search_request['start'] = pageno
while pageno < max_pages:
payload = json.dumps(self.search_request)
r = requests.post('http://goodauction.land.naver.com/auction/ax_list.php', data=payload ,headers=self.headers)
print(r.text)
s = BeautifulSoup(r.text)
print(s)
if __name__ == '__main__':
scraper = n_auction()
scraper.scrape(30)
但經過beautifulsoup, 我失去了一些像下面的圖片值。
這很尷尬。幫我~~
另外,順便說一句,它看起來並不像你遞增'pageno'! – jinksPadlock
謝謝~~我遲到了。哈哈。 你很棒!祝你有美好的一天〜 –