2017-08-08 59 views
1

我是使用python操作網站的新手。我想檢查一下網站是否有更新的數據。如果可用,然後下載它。然而,檢查它是否是新數據並不是問題,問題是當我嘗試下載填充下拉菜單後失敗的數據時。該網站使用手風琴表內的超鏈接圖像開始下載。我嘗試過的所有內容都無法啓動各種錯誤的下載。如何使用python選擇超鏈接的圖像來下載文件?

import time 
from selenium import webdriver 
from bs4 import BeautifulSoup 
##from selenium.webdriver.common.keys import Keys 

driver = webdriver.Chrome() 

driver.get('http://msc.fema.gov/portal/advanceSearch#searchresultsanchor') 

from selenium.webdriver.support.ui import Select 

##Fill in drop boxes 
select = Select(driver.find_element_by_id('selstate')) 
select.select_by_index(18) 
time.sleep(5) 

select1 = Select(driver.find_element_by_id('selcounty')) 
select1.select_by_index(1) 
time.sleep(5) 

select2 = Select(driver.find_element_by_id('selcommunity')) 
select2.select_by_index(1) 
time.sleep(5) 

driver.find_element_by_css_selector('.btn.btn-primary').click() 
time.sleep(5) 

content = driver.page_source 
soup = BeautifulSoup(content, "lxml") 

cdate = "NFHL_19_20170621" 

#elem = driver.find_element_by_xpath("""//*[@id="nfhl_state_list"]/table/tbody/tr[1]/td/table[2]/tbody/tr/td[2]/table/tbody/tr[4]/td[1]""") 
#print(elem.text) 

##search webpage to see if data has bee updated 
if str(soup).find(cdate) > -1: 
    print 'found' 
else: 
    print 'not found' 

##Download state wide data 
link = driver.find_element_by_css_selector('#nfhl_state_list > tr:nth- 
child(1) > td:nth-child(5) > a').get_attribute('href') 
strlink = str(link) 

回答

0

試試這個,我剛剛暴露了DL img,然後點擊它。另請注意,儘量使用顯式等待 http://selenium-python.readthedocs.io/waits.html#explicit-waits

import time 
from selenium.webdriver.support.select import Select as WebDriverSelect 
driver = WebDriver(desired_capabilities=options.secure_options.to_capabilities(), 
          command_executor=config.command_executor) 

driver.get('http://msc.fema.gov/portal/advanceSearch#searchresultsanchor') 

state = WebDriverSelect(driver.find_element_by_id('selstate')) 
county = WebDriverSelect(driver.find_element_by_id('selcounty')) 
community = WebDriverSelect(driver.find_element_by_id('selcommunity')) 

state.select_by_visible_text('INDIANA') 
county.select_by_visible_text('ADAMS COUNTY') 
time.sleep(2) 
community.select_by_index(1) 

driver.find_element_by_id('mainSearch').click() 
time.sleep(2) 
driver.find_element_by_id('eff_root').click() 
time.sleep(2) 
driver.find_element_by_id('eff_nfhl_state_root').click() 
time.sleep(2) 
driver.find_element_by_css_selector('tbody[id="nfhl_state_list"] img').click()