2017-06-22 26 views
-1
import time from selenium 
import webdriver from selenium.webdriver.common.by 
import By from selenium.webdriver.common.keys 
import Keys from selenium.webdriver.support.ui 
import WebDriverWait from selenium.webdriver.support 
import expected_conditions as EC 
# from selenium.common.exceptions import TimeoutException 
# from selenium.common.exceptions import ElementNotVisibleException 
import sqlite3 




def scrape(driver, nmlsNo): 
    driver.get("http://guests.themls.com/") 
    time.sleep(1) 
    #FirstWindow = driver.window_handles[0] 
    searchField = driver.find_element_by_xpath('//*[@id="txtSearchBox"]') 
    searchField.send_keys(nmlsNo) 
    time.sleep(1) 
    searchBtn = driver.find_element_by_xpath('//*[@id="btnSearch"]') 
    searchBtn.click() 
    time.sleep(1) 
    # FOR SECOND WINDOW TAB 
    # searchWindow = driver.window_handles[1] 
    # driver.switch_to_window(searchWindow) 
    time.sleep(3) 

    driver.get_screenshot_as_file('pic1.png') 
    no_listings = driver.find_element_by_xpath('//*[@id="MainContent_listview"]').text 
    if no_listings == 'No Listings Found!': 
     print('no listings') 
     driver.quit() 
     # SECONDARY WEB SITE SHOULD GO HERE IN CASE NOT FOUND 
    else: 

     propLink = driver.find_element_by_xpath('//*[@id="MainContent_dtListView"]/span/div/ul[1]/li[2]/span[1]/a/span[1]') 
     propLink.click() 
     print('ready') 
     # time.sleep(3) 
     data_nmlsNo = driver.find_element_by_xpath('//*[@id="MainContent_frmDetails_lblMLSnum"]/following-sibling::span').text 
     imgClick = driver.find_element_by_xpath('//*[@id="MainContent_frmDetails_imgDisplay"]').click() 
     no_img = driver.find_element_by_xpath('//*[@id="lightbox-image-details-currentNumber"]').get_attribute('text') 
     print('here'+ str(no_img)) 

     # for i in range(1, 10+1): 

     #  driver.get('http://mediaservice.themls.com/default.aspx?LARGEDIR=/'+data_nmlsNo+'/'+data_nmlsNo+'-'+str(i)+'.JPG') 
     #  time.sleep(3) 

def init_driver(): 
    options = webdriver.ChromeOptions() 
    options.binary_location = '/usr/bin/google-chrome-stable' 
    #options.add_argument('headless') #turn on headless when running on server 
    options.add_argument('window-size=1200x600') 
    driver = webdriver.Chrome(chrome_options=options) 
    driver.wait = WebDriverWait(driver, 5) 
    return driver 

if __name__ == "__main__": 
#start the chromedriver Using headless chromedriver since script is running on server 
    driver = init_driver() 
    scrape(driver, "17-241220") 
    #driver.quit() 

ELSE語句不能颳去收藏硒文本

  no_img = driver.find_element_by_xpath('//*[@id="lightbox-image-details-currentNumber"]').get_attribute('text') 

我用這找到圖片的數量將下面的代碼, 我使用數量determin環路的長度用於檢索圖像

如何檢索計數或具有32箇中的1個的文本,以便從中獲得計數。

我玩過.text,get_arrgument('text') 我想我可能沒有關注燈箱,我的驅動程序仍然在主頁上。

我不必使用這種方法,但它看起來很簡單。

在這裏再次是相關代碼

propLink = driver.find_element_by_xpath('//*[@id="MainContent_dtListView"]/span/div/ul[1]/li[2]/span[1]/a/span[1]') 
propLink.click() 
print('ready') 
# time.sleep(3) 
data_nmlsNo = driver.find_element_by_xpath('//*[@id="MainContent_frmDetails_lblMLSnum"]/following-sibling::span').text 
imgClick = driver.find_element_by_xpath('//*[@id="MainContent_frmDetails_imgDisplay"]').click() 
no_img = driver.find_element_by_xpath('//*[@id="lightbox-image-details-currentNumber"]').get_attribute('text') 
print('here'+ str(no_img)) 

OUTPUT:

ready 
hereNone 
[Finished in 14.6s] 

這裏是PAGE從 http://guests.themls.com/Details/CA/CULVER-CITY/4042-WASATCH-AVE/17-241220/17-241220.aspx

+0

問題是? –

+0

如何檢索計數或具有32箇中的1個的文本,以便從中獲得計數。 –

回答

0

此代碼爲我工作我拉的XPath,請注意,我我正在使用PhantomJs而不是Chrome for Headless Browser,但您也可以使用瀏覽器嘗試:

browser = webdriver.PhantomJS('phantomjs') 
browser.implicitly_wait(10)#it's alwas a good ideea to set implicitly wait 
browser.get('http://guests.themls.com/Details/CA/CULVER-CITY/4042-WASATCH-AVE/17-241220/17-241220.aspx') 
browser.find_element_by_xpath('//*[@id="MainContent_frmDetails_imgDisplay"]').click() 
text_1_to_32 = browser.find_element_by_xpath('//*[@id="lightbox-image-details-currentNumber"]').text 
only_32 = text_1_to_32.split('of ',1)[1]