2017-10-07 44 views
-3

我是python的新手,我需要幫助使用網頁抓取代碼來保存每週的動態地圖。 這是我感興趣的site 目的是進入頁面,選擇季節,選擇周,並將圖像下載到本地文件夾。我將使用該圖像集成一個使用SAS的自動化每週報告。用python抓取網頁,用導航控制器

謝謝你提前!

+1

你需要什麼樣的幫助? –

+0

我需要幫助的代碼...所以我可以在spyder上運行它來保存.png文件。 – Arthuro

+0

你到目前爲止嘗試過什麼?您應該提供某種代碼示例,以顯示您在哪裏遇到問題。沒有這一點,你不可能在這裏得到任何幫助。 –

回答

0
import sys 
import os 
import time 
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile 
from selenium import webdriver 
import arrow 

BASE_URL = 'https://gis.cdc.gov/grasp/fluview/main.html' 
DOWNLOAD_PATH = "/Users/" 

def closeWebDriver(driver): 

    if os.name == 'nt': 
     driver.quit() 
    else: 
     driver.close() 

def getImage(): 

    profile = FirefoxProfile() 

    profile.set_preference("browser.download.panel.shown", False) 
    profile.set_preference("browser.helperApps.neverAsk.openFile","image/png") 
    profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "image/png") 
    profile.set_preference("browser.download.folderList", 2); 
    profile.set_preference("browser.download.dir", DOWNLOAD_PATH) 

    driver = webdriver.Firefox(firefox_profile=profile) 

    driver.get(BASE_URL) 

    time.sleep(5) 

    if not isValidTimeFrame(driver): 
     print('Not the time to download yet!') 
     closeWebDriver(driver) 
     return 

    selectFirstWeek(driver) 

    print('- Consume the web.') 
    wrapper = driver.find_element_by_class_name('downloads-help-area') 

    download_img_els = wrapper.find_elements_by_class_name('downloads-button') 

    for el in download_img_els: 
     text = el.text.encode('utf-8') 
#  print(text) 
     if 'download image' == text.strip().lower(): 
      # Process 
      downloadImage(el) 
      break 

    time.sleep(5) 
    closeWebDriver(driver) 


def isValidTimeFrame(driver): 
    seasons_button = driver.find_element_by_class_name('seasons-button') 
    time_frame = seasons_button.text.encode('utf-8').strip().lower() 
    current_year = arrow.now().to('local') 
    current_year_str = current_year.format('YYYY') 
    next_year = current_year.shift(years=1) 
    next_year_str = next_year.format('YY') 
    print(time_frame) 
    compare_year = '%s-%s' % (current_year_str, next_year_str) 

    return time_frame == compare_year 

def selectFirstWeek(driver): 
    prev = driver.find_element_by_id('prevMap') 
    week = driver.find_element_by_id('weekSlider') 

    while True: 
     print(week) 
     current_number = week.get_property('value') 
     print('- Week: ' + current_number) 
     prev.click() 
     if int(current_number) < 2: 
      break; 

    time.sleep(1) 


def downloadImage(el): 
    print('- Click on ' + el.text) 
    el.click() 


getImage()