我想抓取該網站,其中首先要求我填寫表格,然後獲取到所需的頁面html頁面抓取: http://fcainfoweb.nic.in/PMSver2/Reports/Report_Menu_web.aspxPython的網絡使用Scrapy對於需要填寫表格
我已經寫了下面的代碼,但不知道什麼是錯的。請幫忙:
import scrapy
class SpidyQuotesViewStateSpider(scrapy.Spider):
name = 'spidyquotes-viewstate'
start_urls = ['http://fcainfoweb.nic.in/PMSver2/Reports/Report_Menu_web.aspx']
download_delay = 1.5
def parse(self, response):
yield scrapy.FormRequest.from_response(
response,
formdata={
'ctl00_MainContent_ToolkitScriptManager1_HiddenField':response.css('input#ctl00_MainContent_ToolkitScriptManager1_HiddenField::attr(value)').extract_first(),
'__EVENTTARGET':'ctl00$MainContent$Rbl_Rpt_type$0',
#'__EVENTARGUMENT':'',
#'__LASTFOCUS':'',
#'__VIEWSTATE':response.css('input#__VIEWSTATE::attr(value)').extract_first(),
#'__VIEWSTATEGENERATOR':response.css('input#__VIEWSTATEGENERATOR::attr(value)').extract_first(),
#'__VIEWSTATEENCRYPTED':response.css('input#__VIEWSTATEENCRYPTED::attr(value)').extract_first(),
#'__EVENTVALIDATION': response.css('input#__EVENTVALIDATION::attr(value)').extract_first(),
'ctl00$MainContent$Ddl_Rpt_type':'Retail',
'ctl00$MainContent$ddl_Language':'English',
'ctl00$MainContent$Rbl_Rpt_type':'Price+report',
},
callback=self.parse_tags,
)
def parse_tags(self, response):
yield scrapy.FormRequest.from_response(
response,
formdata={
'ctl00_MainContent_ToolkitScriptManager1_HiddenField':response.css('input#ctl00_MainContent_ToolkitScriptManager1_HiddenField::attr(value)').extract_first(),
'__EVENTTARGET':'ctl00$MainContent$Ddl_Rpt_Option0',
#'__EVENTARGUMENT':'',
#'__LASTFOCUS':'',
#'__VIEWSTATE':response.css('input#__VIEWSTATE::attr(value)').extract_first(),
#'__VIEWSTATEGENERATOR':response.css('input#__VIEWSTATEGENERATOR::attr(value)').extract_first(),
#'__VIEWSTATEENCRYPTED':response.css('input#__VIEWSTATEENCRYPTED::attr(value)').extract_first(),
#'__EVENTVALIDATION': response.css('input#__EVENTVALIDATION::attr(value)').extract_first(),
'ctl00$MainContent$Ddl_Rpt_type':'Retail',
'ctl00$MainContent$ddl_Language':'English',
'ctl00$MainContent$Rbl_Rpt_type':'Price+report',
'ctl00$MainContent$Ddl_Rpt_Option0':'Daily+Prices',
},
callback=self.parse_date,
)
def parse_date(self, response):
yield scrapy.FormRequest(
'http://fcainfoweb.nic.in/PMSver2/Reports/Report_Menu_web.aspx',
formdata={
#'ctl00_MainContent_ToolkitScriptManager1_HiddenField':response.css('input#ctl00_MainContent_ToolkitScriptManager1_HiddenField::attr(value)').extract_first(),
'__EVENTTARGET':'',
#'__EVENTARGUMENT':'',
#'__LASTFOCUS':'',
#'__VIEWSTATE':response.css('input#__VIEWSTATE::attr(value)').extract_first(),
#'__VIEWSTATEGENERATOR':response.css('input#__VIEWSTATEGENERATOR::attr(value)').extract_first(),
#'__VIEWSTATEENCRYPTED':response.css('input#__VIEWSTATEENCRYPTED::attr(value)').extract_first(),
#'__EVENTVALIDATION': response.css('input#__EVENTVALIDATION::attr(value)').extract_first(),
'ctl00$MainContent$Ddl_Rpt_type':'Retail',
'ctl00$MainContent$ddl_Language':'English',
'ctl00$MainContent$Rbl_Rpt_type':'Price+report',
'ctl00$MainContent$Ddl_Rpt_Option0':'Daily+Prices',
'ctl00$MainContent$Txt_FrmDate':'01/02/2017',
'ctl00$MainContent$btn_getdata1':'Get+Data',
},
callback=self.parse_results,
)
def parse_results(self, response):
response.css('div.Panel1')
你得到一個錯誤?發生什麼不同於你的期望? (人們可能不會運行你的代碼來試圖找出你的問題是什麼......你需要指定問題) –
ValueError:沒有
因此,您正在尋找錯誤頁面上的「表單」元素......也許資源管理器爲什麼要提取錯誤頁面而不是您想要的頁面? –