0
我是一名學生記者,是python的新手。我一直在試圖弄清楚如何使用for循環在我大學的每日犯罪日誌的所有當前頁面上刮取每個單獨的犯罪日誌。但是,它只是颳起了第一頁。我一直在看別人的代碼和問題,並不能真正弄清楚我錯過了什麼。任何幫助表示感謝。使用Python和BS4循環刮屏多頁
import urllib.request
import requests
import csv
import bs4
import numpy as np
import pandas as pd
from pandas import DataFrame
for num in range(27): #Number of pagers plus
url = ("http://police.psu.edu/daily-crime-log?field_reported_value[value]&page=0".format(num))
r = requests.get(url)
source = urllib.request.urlopen(url).read()
bs_tree = bs4.BeautifulSoup(source, "lxml")
incident_nums = bs_tree.findAll("div", class_="views-field views-field-title")
occurred = bs_tree.findAll("div", class_="views-field views-field-field-occurred")
reported = bs_tree.findAll("div", class_="views-field views-field-field-reported")
incidents = bs_tree.findAll("div", class_="views-field views-field-field-nature-of-incident")
offenses = bs_tree.findAll("div", class_="views-field views-field-field-offenses")
locations = bs_tree.findAll("div", class_="views-field views-field-field-location")
dispositions = bs_tree.findAll("div", class_="views-field views-field-field-case-disposition")
allCrimes = pd.DataFrame(columns = ['Incident#', 'Occurred', 'reported', 'nature of incident', 'offenses', 'location', 'disposition'])
total = len(incident_nums)
count = 0
while (count<total):
incNum = incident_nums[count].find("span", class_="field-content").get_text()
occr = occurred[count].find("span", class_="field-content").get_text()
repo = reported[count].find("span", class_="field-content").get_text()
incNat = incidents[count].find("span", class_="field-content").get_text()
offe = offenses[count].find("span", class_="field-content").get_text()
loca = locations[count].find("span", class_="field-content").get_text()
disp = dispositions[count].find("span", class_="field-content").get_text()
allCrimes.loc[count] =[incNum, occr, repo, incNat, offe, loca, disp]
count +=1