2017-05-31 20 views
0

我能夠抓取數據,如果我應用每個下拉列表的靜態值,但現在我試圖通過每個下拉和存儲結果值獲取動態值循環到mysqldb。循環動態Ajax爲基礎的下拉值使用Python硒和抓取數據到MySQL數據庫

問題:我無法循環下拉菜單和商店的每一個元素導致數據庫,如果我對每次三個下拉通過靜態值,那麼我能夠做到的。

enter image description here

工作代碼靜態值

import time 
from selenium import webdriver 
import requests 
from bs4 import BeautifulSoup 
import MySQLdb 

url = "http://xlnindia.gov.in/frm_G_Cold_S_Query.aspx" 
browser = webdriver.Chrome() 
browser.get(url) 

#=========State==================== 
state = browser.find_element_by_id("ddlState") 
state.send_keys("GJ") 

#=========District==================== 
district = browser.find_element_by_id("ddldistrict") 
district.send_keys("AD2") 

#=========Taluka==================== 
category = browser.find_element_by_id("ddltaluka") 
category.send_keys("AMB") 

button = browser.find_element_by_id("btnSearch") 
button.click() 


# Open database connection 
db = MySQLdb.connect(host="localhost", # your host, usually localhost 
        user="root", # your username 
         passwd="", # your password 
         db="test") # name of the data base 

cursor=db.cursor() 


time.sleep(10) 
browser.save_screenshot(browser.title + ".JPEG") 
html = browser.page_source 

soup = BeautifulSoup(html, "html.parser") 
table = soup.findChildren('table')[3] 

for row in table.findAll("tr")[1:]: 
    cells = row.findAll("td") 
    name = cells[0].find(text=True) 
    city = cells[1].find(text=True) 
    licence = cells[2].find(text=True) 
    owner = cells[3].find(text=True) 
    cursor.execute ("INSERT INTO distributors (name, city, licence, owner) VALUES (%s, %s, %s, %s);", (name, city, licence, owner)) 


#print(table.prettify()) 
#print(html) 

db.commit() 
db.close() 

browser.close() 
browser.quit() 

試圖通過循環下拉的各元素來傳遞動態值

import time 
from selenium import webdriver 
import requests 
from bs4 import BeautifulSoup 
import MySQLdb 

url = "http://xlnindia.gov.in/frm_G_Cold_S_Query.aspx" 
browser = webdriver.Chrome() 
browser.get(url) 

#======================================================= 
#Array of Stats Dropdown 
states_array = [] 
el = browser.find_element_by_id('ddlState') 
for option in el.find_elements_by_tag_name('option'): 
    states_array.append(option.get_attribute("value")) 


#print(states_array) 
#======================================================= 

#======================================================= 
#Array of Districts Dropdown 
district_array = [] 
el = browser.find_element_by_id('ddldistrict') 
for option in el.find_elements_by_tag_name('option')[1:]: 
    district_array.append(option.get_attribute("value")) 

#print(district_array) 
#======================================================= 


state = browser.find_element_by_id("ddlState") 
district = browser.find_element_by_id("ddldistrict") 
category = browser.find_element_by_id("ddltaluka") 


# Open database connection 
db = MySQLdb.connect(host="localhost", # your host, usually localhost 
        user="root", # your username 
         passwd="", # your password 
         db="test") # name of the data base 
cursor=db.cursor() 
taluka_array = [] 

for i in range(len(states_array)): 
    state.send_keys(states_array[i]) 
for j in range(len(district_array)): 
    district.send_keys(district_array[j]) 
    district.click() 
    el = browser.find_element_by_id('ddltaluka') 
     for option in el.find_elements_by_tag_name('option')[1:]: 
      taluka_array.append(option.get_attribute("value")) 
     print(taluka_array) 
for k in range(len(taluka_array)): 
    category.send_keys(taluka_array[k]) 
    button = browser.find_element_by_id("btnSearch") 
    button.click() 
    html = browser.page_source 
    soup = BeautifulSoup(html, "html.parser") 
    table = soup.findChildren('table')[3] 
    for row in table.findAll("tr")[1:]: 
     cells = row.findAll("td") 
     name = cells[0].find(text=True) 
     city = cells[1].find(text=True) 
     licence = cells[2].find(text=True) 
     owner = cells[3].find(text=True) 
     cursor.execute ("INSERT INTO distributors (name, city, licence, owner) VALUES (%s, %s, %s, %s);", (name, city, licence, owner)) 
    db.commit() 
    db.close() 
+1

聽起來像一個計劃!你的問題是? – Todor

+0

我無法循環它的動態,你可以看到我的嘗試 –

+0

你能更加明確 - 你期望發生什麼,實際發生了什麼,你正在面臨什麼具體問題 - 加入這個問題,而不是另一評論。 – Todor

回答

1

首先找到td標記過濾掉states然後獲取option標記的值。在這裏你可以得到所有的狀態。對於每個州去那個國家的網址,找到地區。區域在第二個td標記中,所以使用find_next_sibling()並再次獲得option標記的值。所以你也可以找到每個州的所有地區。和其他你已經做的事情。

import time 
from bs4 import BeautifulSoup 
from selenium import webdriver 

url = "http://xlnindia.gov.in/frm_G_Cold_S_Query.aspx" 
browser = webdriver.Chrome() 
browser.get(url) 

time.sleep(5) 
html = browser.page_source 
soup = BeautifulSoup(html, "lxml") 

states = [ x["value"] for x in soup.find("td", bgcolor="#ffe0c0").find_all("option") ] 

for state in states: 

    browser.get(url + "?ST=" + state) 
    time.sleep(5) 

    html = browser.page_source 
    soup = BeautifulSoup(html, "lxml") 

    districts = [ x["value"] for x in soup.find("td", bgcolor="#ffe0c0").find_next_sibling().find_all("option") ] 
    districts = districts[1:] 

    for dist in districts: 

     browser.get(url + "?ST=" + state) 
     district = browser.find_element_by_id("ddldistrict") 
     district.send_keys(dist) 

     button = browser.find_element_by_id("btnSearch") 
     button.click() 

browser.close() 
browser.quit() 
+0

謝謝兄弟........ –

+0

@JunedAnsari最受歡迎。 :) –

相關問題