2017-03-11 84 views
1
from bs4 import BeautifulSoup 
import requests 
import time 

urls = ['http://www.soku.com/search_playlist/q_python_orderby_1_limitdate_0?site=14&page={}&spm=a2h0k.8191403.0.00'.format(str(i)) for i in range(1,30,1)] 

def UUrl(urls): 

    def Url(url): 
     single_urls = [] 
     time.sleep(1) 
     wb_data = requests.get(url) 
     soup = BeautifulSoup(wb_data.text,'lxml') 
     for single_urls in soup.find_all(class_ = "album_tit"): 
      single_url = (single_urls.a.get('href')) 
      return single_url 
      # print(single_url) 

    for url in urls: 
     Url(url) 

def get_url_title(urls,data = None): 
    urlsss = UUrl(urls) 
    for surl in urlsss: 
     wb_data = requests.get(surl) 
     soup = BeautifulSoup(wb_data.text,'lxml') 
     urlss = soup.find_all(class_="title short-title") 
     titles = soup.find_all(class_="title short-title") 

     for t_url,title in zip(urlss,titles): 
      data = { 
       'title':title.get_text(), 
       'url': (t_url.a.get('href')) 
      } 
      print(data) 

get_url_title(urls) 

回答

1

這意味着你正在遍歷一個空值。 soup.findall函數可能沒有返回結果。如果發生這種情況,函數返回非類型,就像python的null。然後你試圖對不存在的東西進行循環。在你的代碼中有幾個區域可能會引發這個錯誤,但基本上它只是意味着for循環中的表達式IN後面的變量沒有任何價值。你可以做一個。如果soup.find_all(class_ =「album_tit」)是NoneType:print(「find all function not returns a value」)