2017-02-16 77 views
1

我正在搜索應與標籤的文本值完全匹配的特定字符串。如何僅使用術語「結果」進行搜索並將標籤'h4'返回給我?立即在Beautifulsoup中進行文本搜索的標籤

soup = BeautifulSoup('<table><tbody><tr><td class="fulltext-body-paragraph"><a name="44"></a><div class="fulltext-LEVEL1"><h4>RESULTS</h4></div></td></tr></tbody></table>') 

soup.find(lambda el: el.text == 'RESULTS').name 
Out: 'html' # I would like it to return 'h4' 

回答

0

這個(https://stackoverflow.com/a/13349041/7573286)可以解決你的問題嗎?

from bs4 import BeautifulSoup 
from pprint import pprint 
import re 

html_text = """ 
<h2>this is cool #12345678901</h2> 
<h2>this is nothing</h2> 
<h2>this is interesting #126666678901</h2> 
<h2>this is blah #124445678901</h2> 
""" 

soup = BeautifulSoup(html_text) 

# Even though the OP was not looking for 'cool', it's more understandable to work with item zero. 
pattern = re.compile(r'cool') 

pprint(soup.find(text=pattern).__dict__) 
#>> {'next': u'\n', 
#>> 'nextSibling': None, 
#>> 'parent': <h2>this is cool #12345678901</h2>, 
#>> 'previous': <h2>this is cool #12345678901</h2>, 
#>> 'previousSibling': None} 

print soup.find('h2') 
#>> <h2>this is cool #12345678901</h2> 
print soup.find('h2', text=pattern) 
#>> this is cool #12345678901 
print soup.find('h2', text=pattern).parent 
#>> <h2>this is cool #12345678901</h2> 
print soup.find('h2', text=pattern) == soup.find('h2') 
#>> False 
print soup.find('h2', text=pattern) == soup.find('h2').text 
#>> True 
print soup.find('h2', text=pattern).parent == soup.find('h2') 
#>> True