from lxml import html
text = '''<ul>
<li class="name"> James </li>
<li> Male </li>
<li> 5'8" </li>
</ul>
<ul>
<li class="name"> James </li>
<li> Male </li>
<li> 5'8" </li>
</ul>
<ul>
<li class="name"> James </li>
<li> Male </li>
<li> 5'8" </li>
</ul>'''
tree = html.fromstring(text)
for ul in tree.xpath('//ul[li[@class="name"]]'): # loop through the ul tag, whose child tag contains class attribute and the value is 'name'
print(ul.xpath("li/text()")) # get all the text in the li tag
出來:
[' James ', ' Male ', ' 5\'8" ']
[' James ', ' Male ', ' 5\'8" ']
[' James ', ' Male ', ' 5\'8" ']