1
from bs4 import BeautifulSoup
import requests
import time
urls = ['http://www.soku.com/search_playlist/q_python_orderby_1_limitdate_0?site=14&page={}&spm=a2h0k.8191403.0.00'.format(str(i)) for i in range(1,30,1)]
def UUrl(urls):
def Url(url):
single_urls = []
time.sleep(1)
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text,'lxml')
for single_urls in soup.find_all(class_ = "album_tit"):
single_url = (single_urls.a.get('href'))
return single_url
# print(single_url)
for url in urls:
Url(url)
def get_url_title(urls,data = None):
urlsss = UUrl(urls)
for surl in urlsss:
wb_data = requests.get(surl)
soup = BeautifulSoup(wb_data.text,'lxml')
urlss = soup.find_all(class_="title short-title")
titles = soup.find_all(class_="title short-title")
for t_url,title in zip(urlss,titles):
data = {
'title':title.get_text(),
'url': (t_url.a.get('href'))
}
print(data)
get_url_title(urls)