所以我有這個程序搜索年度報告(10-K)SEC SEC埃德加數據庫,並返回列表框中的40個不同項目的列表。好吧,我想創建一個顯示在列表框中的下一個40個項目,其中下面的代碼完成了「未來40」按鈕:問題與按鈕命令Tkinter Python
def Next():
global entryWidget
page = 'http://www.sec.gov/cgi-bin/browse-edgar?company=&match=&CIK=' + entryWidget.get().strip() + '&filenum=&State=&Country=&SIC=&owner=exclude&Find=Find+Companies&action=getcompany'
sock = urllib.urlopen(page)
raw = sock.read()
soup = BeautifulSoup(raw)
npar = str(soup.find(value="Next 40"))
index = npar.find('/cgi')
index2 = npar.find('count=40') + len('count=40')
nextpage = 'http://www.sec.gov' + npar[index:index2]
sock2 = urllib.urlopen(nextpage)
raw2 = sock2.read()
soup2 = BeautifulSoup(raw2)
psoup = str(soup2.findAll(nowrap=True))
myparser = MyParser()
myparser.parse(psoup)
filinglist = myparser.get_descriptions()
linklist = myparser.get_hyperlinks()
filinglist = [s for s in filinglist if s != 'Documents']
filinglist = [s for s in filinglist if s != 'Documents Interactive Data']
filinglist = [s for s in filinglist if not re.match(r'\d{3}-', s)]
linklist = [s for s in linklist if not s.startswith('/cgi-')]
Lb1.delete(0, END)
counter = 0
while counter < len(filinglist):
Lb1.insert(counter, filinglist[counter])
counter = counter +1
當按下按鈕時,你可以看到,它讀取原始鏈接(頁面)而不是在html網站(頁面)上查找「Next 40」超鏈接。然後解析新的html文檔(nextpage),然後獲取項目名稱和關聯的鏈接。現在,此代碼成功從原始頁面轉到下一頁,但它只能顯示下一頁。
那麼我怎麼能夠使(nextpage)進入原始(頁面),然後能夠列出(nextnextpage)html文件中的項目,每次我按下'下一步'按鈕?對不起,如果這是令人困惑的,我真的不知道任何其他方式來解釋它。
欲瞭解更多的解釋,這裏是我想要解析的實際站點鏈接:http://www.sec.gov/cgi-bin/browse-edgar ... getcompany 我想要'下一步'按鈕來繼續檢索該網站'下一個40'按鈕的HTML超鏈接。
這裏是我的情況下,整個程序代碼,您需要:
import BeautifulSoup
from BeautifulSoup import BeautifulSoup
import urllib
import sgmllib
from Tkinter import *
import tkMessageBox
import re
class MyParser(sgmllib.SGMLParser):
def parse(self, psoup):
self.feed(psoup)
self.close()
def __init__(self, verbose=0):
sgmllib.SGMLParser.__init__(self, verbose)
self.descriptions = []
self.hyperlinks = []
self.inside_td_element = 0
self.starting_description = 0
def start_td(self, attributes):
for name, value in attributes:
if name == "nowrap":
self.inside_td_element = 1
self.starting_description = 1
def end_td(self):
self.inside_td_element = 0
def start_a(self, attributes):
for name, value in attributes:
if name == "href":
self.hyperlinks.append(value)
def handle_data(self, data):
if self.inside_td_element:
if self.starting_description:
self.descriptions.append(data)
self.starting_description = 0
else:
self.descriptions[-1] += data
def get_descriptions(self):
return self.descriptions
def get_hyperlinks(self):
return self.hyperlinks
def Submit():
global entryWidget
if entryWidget.get().strip() == "":
tkMessageBox.showerror("Tkinter Entry Widget", "Enter a text value")
else:
page = 'http://www.sec.gov/cgi-bin/browse-edgar?company=&match=&CIK=' + entryWidget.get().strip() + '&filenum=&State=&Country=&SIC=&owner=exclude&Find=Find+Companies&action=getcompany'
sock = urllib.urlopen(page)
raw = sock.read()
soup = BeautifulSoup(raw)
psoup = str(soup.findAll(nowrap=True))
myparser = MyParser()
myparser.parse(psoup)
filinglist = myparser.get_descriptions()
linklist = myparser.get_hyperlinks()
filinglist = [s for s in filinglist if s != 'Documents']
filinglist = [s for s in filinglist if s != 'Documents Interactive Data']
filinglist = [s for s in filinglist if not re.match(r'\d{3}-', s)]
linklist = [s for s in linklist if not s.startswith('/cgi-')]
counter = 0
while counter < len(filinglist):
Lb1.insert(counter, filinglist[counter])
counter = counter +1
downloadbutton.configure(state=NORMAL)
nextbutton.configure(state=NORMAL)
def Next():
global entryWidget
page = 'http://www.sec.gov/cgi-bin/browse-edgar?company=&match=&CIK=' + entryWidget.get().strip() + '&filenum=&State=&Country=&SIC=&owner=exclude&Find=Find+Companies&action=getcompany'
sock = urllib.urlopen(page)
raw = sock.read()
soup = BeautifulSoup(raw)
npar = str(soup.find(value="Next 40"))
index = npar.find('/cgi')
index2 = npar.find('count=40') + len('count=40')
nextpage = 'http://www.sec.gov' + npar[index:index2]
sock2 = urllib.urlopen(nextpage)
raw2 = sock2.read()
soup2 = BeautifulSoup(raw2)
psoup = str(soup2.findAll(nowrap=True))
myparser = MyParser()
myparser.parse(psoup)
filinglist = myparser.get_descriptions()
linklist = myparser.get_hyperlinks()
filinglist = [s for s in filinglist if s != 'Documents']
filinglist = [s for s in filinglist if s != 'Documents Interactive Data']
filinglist = [s for s in filinglist if not re.match(r'\d{3}-', s)]
linklist = [s for s in linklist if not s.startswith('/cgi-')]
Lb1.delete(0, END)
counter = 0
while counter < len(filinglist):
Lb1.insert(counter, filinglist[counter])
counter = counter +1
previousbutton.configure(state=NORMAL)
nextbutton.configure(state=DISABLED)
def Previous():
global entryWidget
page = 'http://www.sec.gov/cgi-bin/browse-edgar?company=&match=&CIK=' + entryWidget.get().strip() + '&filenum=&State=&Country=&SIC=&owner=exclude&Find=Find+Companies&action=getcompany'
sock = urllib.urlopen(page)
raw = sock.read()
soup = BeautifulSoup(raw)
psoup = str(soup.findAll(nowrap=True))
myparser = MyParser()
myparser.parse(psoup)
filinglist = myparser.get_descriptions()
linklist = myparser.get_hyperlinks()
filinglist = [s for s in filinglist if s != 'Documents']
filinglist = [s for s in filinglist if s != 'Documents Interactive Data']
filinglist = [s for s in filinglist if not re.match(r'\d{3}-', s)]
linklist = [s for s in linklist if not s.startswith('/cgi-')]
Lb1.delete(0, END)
counter = 0
while counter < len(filinglist):
Lb1.insert(counter, filinglist[counter])
counter = counter +1
nextbutton.configure(state=NORMAL)
previousbutton.configure(state=DISABLED)
if __name__ == "__main__":
root = Tk()
root.title("SEC Edgar Search")
root["padx"] = 10
root["pady"] = 25
top = Frame(root)
bottom = Frame(root)
bottom2 = Frame(root)
top.pack(side=TOP)
bottom.pack(side=BOTTOM, fill=BOTH, expand=True)
bottom2.pack(side=BOTTOM, fill=BOTH, expand=True)
textFrame = Frame(root)
entryLabel = Label(textFrame)
entryLabel["text"] = "Ticker symbol:"
entryLabel.pack(side=TOP)
entryWidget = Entry(textFrame)
entryWidget["width"] = 15
entryWidget.pack(side=LEFT)
textFrame.pack()
scrollbar = Scrollbar(root)
scrollbar.pack(side=RIGHT, fill=Y)
Lb1 = Listbox(root, width=20, height=15, yscrollcommand=scrollbar.set, selectmode=EXTENDED)
Lb1.pack()
scrollbar.config(command=Lb1.yview)
submitbutton = Button(root, text="Submit", command=Submit)
submitbutton.pack(in_=bottom2, side=TOP)
downloadbutton = Button(root, text="Download")
downloadbutton.pack(in_=bottom2, side=TOP)
downloadbutton.configure(state=DISABLED)
previousbutton = Button(root, text="Previous 40", command=Previous)
previousbutton.pack(in_=bottom, side=LEFT)
previousbutton.configure(state=DISABLED)
nextbutton = Button(root, text="Next 40", command=Next)
nextbutton.pack(in_=bottom, side=LEFT)
nextbutton.configure(state=DISABLED)
root.mainloop()
我嘗試使用以下代碼創建一個新類:Class Application():def submit(self):。 。 。等等。但我不斷收到此異常:Tkinter回調中的異常 Traceback(最近調用最後一次): 文件「C:\ Python27 \ lib \ lib-tk \ Tkinter.py」,第1410行,在__call__中 return self.func * args) TypeError:無約束方法必須使用Application實例作爲第一個參數調用Submit()(取而代之)。任何想法是什麼造成這個? – kr21
是的,這個作品非常完美,非常感謝! – kr21