你應該逃避字符串字面匹配,因爲一些字符有特殊意義的正則表達式(例如?
在REMOVE_LIST
):
使用re.escape
逃脫這樣的字符:
>>> import re
>>> re.escape('?')
'\\?'
>>> re.search('?', 'Lexington?')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Python27\lib\re.py", line 142, in search
return _compile(pattern, flags).search(string)
File "C:\Python27\lib\re.py", line 242, in _compile
raise error, v # invalid expression
sre_constants.error: nothing to repeat
>>> re.search(r'\?', 'Lexington?')
<_sre.SRE_Match object at 0x0000000002C68100>
>>>
>>> import re
>>> ask = "What's the weather like in Lexington, SC?"
>>> REMOVE_LIST = ["like", "in", "how's", "hows", "weather", "the", "whats", "what's", "?"]
>>> remove = '|'.join(map(re.escape, REMOVE_LIST))
>>> regex = re.compile(r'\b(' + remove + r')\b', flags=re.IGNORECASE)
>>> out = regex.sub("", ask)
>>> print out
Lexington, SC?