2017-07-13 45 views
1

我試圖用difflib.SequenceMatcher在Python的string1string2比較期間所以基本上恢復最大公共字符串使用Python中的序列匹配找到

string1="""ERROR agave_util.py:64 Timed out waiting for HA alert generated CRITICAL ha_test_util.py:44 HA alert generated, Stack:File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 909, in <module> main(FLAGS, sync_state) File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 878, in main worker.run(sync_state) File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 326, in run if not self.__test_phase_wrapper(test_method): File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 502, in __test_phase_wrapper func() File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 87, in test_stargate_master_power_off self._host_power_off_test_cycle(host_of_stargate_master) File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 27, in _host_power_off_test_cycle self.ha_util.power_off_and_check_ha(host) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 469, in power_off_and_check_ha self.wait_for_ha_alert(cutoff_usecs=latest_alert_start, **kwargs) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 418, in wait_for_ha_alert interval=interval, File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 44, in wait_for_true CHECK(result, message) ERROR nutanix_test_runner_worker.py:595 Test failed: 1exc_type: <type 'exceptions.SystemExit'>exc_value: 1stack: File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 502, in __test_phase_wrapper func() File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 87, in test_stargate_master_power_off self._host_power_off_test_cycle(host_of_stargate_master) File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 27, in _host_power_off_test_cycle self.ha_util.power_off_and_check_ha(host) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 469, in power_off_and_check_ha self.wait_for_ha_alert(cutoff_usecs=latest_alert_start, **kwargs) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 418, in wait_for_ha_alert interval=interval, File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 44, in wait_for_true CHECK(result, message) File "/main/.python/util/base/log.py", line 204, in CHECK FATAL(log_msg, **kwargs) File "/main/.python/util/base/log.py", line 185, in FATAL sys.exit(1) ERROR nutanix_test.py:696 Failed to get gflags from 10.5.132.157. ERROR nutanix_test.py:696 Failed to get gflags from 10.5.132.157. ERROR nutanix_test.py:1699 Failed to save cluster configuration""" 

string2="""ERROR agave_util.py:64 Timed out waiting for VMs [u'vm_353ca5', u'vm_e02d7f'] power on CRITICAL ha_test_util.py:44 VMs [u'vm_353ca5', u'vm_e02d7f'] power on, Stack:File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 909, in <module> main(FLAGS, sync_state) File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 878, in main worker.run(sync_state) File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 326, in run if not self.__test_phase_wrapper(test_method): File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 502, in __test_phase_wrapper func() File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 67, in test_zoo_keeper_leader_power_off self._host_power_off_test_cycle(leader_host) File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 27, in _host_power_off_test_cycle self.ha_util.power_off_and_check_ha(host) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 468, in power_off_and_check_ha self.verify_vms_not_on_host(host_vms, host) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 617, in verify_vms_not_on_host self.wait_for_vms_power_on(vm_names, per_vm_timeout) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 599, in wait_for_vms_power_on interval=15) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 44, in wait_for_true CHECK(result, message) ERROR nutanix_test_runner_worker.py:595 Test failed: 1exc_type: <type 'exceptions.SystemExit'>exc_value: 1stack: File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 502, in __test_phase_wrapper func() File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 67, in test_zoo_keeper_leader_power_off self._host_power_off_test_cycle(leader_host) File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 27, in _host_power_off_test_cycle self.ha_util.power_off_and_check_ha(host) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 468, in power_off_and_check_ha self.verify_vms_not_on_host(host_vms, host) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 617, in verify_vms_not_on_host self.wait_for_vms_power_on(vm_names, per_vm_timeout) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 599, in wait_for_vms_power_on interval=15) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 44, in wait_for_true CHECK(result, message) File "/main/.python/util/base/log.py", line 204, in CHECK FATAL(log_msg, **kwargs) File "/main/.python/util/base/log.py", line 185, in FATAL sys.exit(1) ERROR nutanix_test.py:696 Failed to get gflags from 10.5.132.156. ERROR nutanix_test.py:696 Failed to get gflags from 10.5.132.156. ERROR nutanix_test.py:1699 Failed to save cluster configuration""" 

match = SequenceMatcher(None, string1, string2).find_longest_match(0, len(string1), 0, len(string2)) 
print match 
print(string1[match.a: match.a + match.size]) 

string1="""ERROR agave_util.py:64 Timed out waiting for HA alert generated CRITICAL ha_test_util.py:44,""" 
string2="""ERROR agave_util.py:64 Timed out waiting for VMs [u'vm_353ca5', u'vm_e02d7f'] power on CRITICAL ha_test_util.py:44""" 
match = SequenceMatcher(None, string1, string2).find_longest_match(0, len(string1), 0, len(string2)) 
print(string1[match.a: match.a + match.size]) 

[前兩行]返回CRITICAL ha_test_util.py:44的最長的子串, 而當我切了線從string1string2 [線6,7],它返回ERROR agave_util.py:64 Timed out waiting for

基本上我的問題是,爲什麼我的F期間序列匹配沒有返回正確的比賽第一種情況?

+1

'SequenceMatcher'從哪裏來?我從來沒有聽說過它,懶得搜索,冒着危險,我發現了其他東西你沒有使用... – Alfe

+1

@Alfe - 很可能是它來自內置的difflib模塊。 – zwer

+0

@ zwer對,已經看着那個;-)我的評論也試圖提高意識到提到這些事情的必要性。其他人可能只是因爲發現問題的麻煩而忽略了這個問題。 – Alfe

回答

1

您正在體驗SequenceMatcher自動垃圾啓發式(您的情況爲負面)效果。從docs

自動垃圾啓發式SequenceMatcher支持啓發式掃描自動將一定順序的項目,如垃圾。啓發式計算每個單獨項目出現在序列中的次數。如果一個項目的重複(在第一個之後)佔序列的1%以上並且序列長度至少爲200個項目,則該項目被標記爲「流行」並且爲了序列匹配而被視爲垃圾。在創建SequenceMatcher時,可以通過將autojunk參數設置爲False來關閉啓發式功能。

SequenceMatcher構造函數中,autojunk默認爲True。如果您有autojunk=False嘗試,你會得到預期的最長匹配:

from difflib import SequenceMatcher 

string1 = """ERROR agave_util.py:64 Timed out waiting for HA alert generated CRITICAL ha_test_util.py:44 HA alert generated, Stack:File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 909, in <module> main(FLAGS, sync_state) File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 878, in main worker.run(sync_state) File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 326, in run if not self.__test_phase_wrapper(test_method): File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 502, in __test_phase_wrapper func() File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 87, in test_stargate_master_power_off self._host_power_off_test_cycle(host_of_stargate_master) File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 27, in _host_power_off_test_cycle self.ha_util.power_off_and_check_ha(host) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 469, in power_off_and_check_ha self.wait_for_ha_alert(cutoff_usecs=latest_alert_start, **kwargs) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 418, in wait_for_ha_alert interval=interval, File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 44, in wait_for_true CHECK(result, message) ERROR nutanix_test_runner_worker.py:595 Test failed: 1exc_type: <type 'exceptions.SystemExit'>exc_value: 1stack: File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 502, in __test_phase_wrapper func() File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 87, in test_stargate_master_power_off self._host_power_off_test_cycle(host_of_stargate_master) File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 27, in _host_power_off_test_cycle self.ha_util.power_off_and_check_ha(host) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 469, in power_off_and_check_ha self.wait_for_ha_alert(cutoff_usecs=latest_alert_start, **kwargs) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 418, in wait_for_ha_alert interval=interval, File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 44, in wait_for_true CHECK(result, message) File "/main/.python/util/base/log.py", line 204, in CHECK FATAL(log_msg, **kwargs) File "/main/.python/util/base/log.py", line 185, in FATAL sys.exit(1) ERROR nutanix_test.py:696 Failed to get gflags from 10.5.132.157. ERROR nutanix_test.py:696 Failed to get gflags from 10.5.132.157. ERROR nutanix_test.py:1699 Failed to save cluster configuration""" 
string2 = """ERROR agave_util.py:64 Timed out waiting for VMs [u'vm_353ca5', u'vm_e02d7f'] power on CRITICAL ha_test_util.py:44 VMs [u'vm_353ca5', u'vm_e02d7f'] power on, Stack:File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 909, in <module> main(FLAGS, sync_state) File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 878, in main worker.run(sync_state) File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 326, in run if not self.__test_phase_wrapper(test_method): File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 502, in __test_phase_wrapper func() File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 67, in test_zoo_keeper_leader_power_off self._host_power_off_test_cycle(leader_host) File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 27, in _host_power_off_test_cycle self.ha_util.power_off_and_check_ha(host) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 468, in power_off_and_check_ha self.verify_vms_not_on_host(host_vms, host) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 617, in verify_vms_not_on_host self.wait_for_vms_power_on(vm_names, per_vm_timeout) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 599, in wait_for_vms_power_on interval=15) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 44, in wait_for_true CHECK(result, message) ERROR nutanix_test_runner_worker.py:595 Test failed: 1exc_type: <type 'exceptions.SystemExit'>exc_value: 1stack: File "/main/qa/py/qa/agave/nutanix_test_runner_worker.py", line 502, in __test_phase_wrapper func() File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 67, in test_zoo_keeper_leader_power_off self._host_power_off_test_cycle(leader_host) File "/main/qa/test/agave/acropolis_tests/ha/best_effort_power_off_test.py", line 27, in _host_power_off_test_cycle self.ha_util.power_off_and_check_ha(host) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 468, in power_off_and_check_ha self.verify_vms_not_on_host(host_vms, host) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 617, in verify_vms_not_on_host self.wait_for_vms_power_on(vm_names, per_vm_timeout) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 599, in wait_for_vms_power_on interval=15) File "/main/.python/qa/util/agave_tools/ha_test_util.py", line 44, in wait_for_true CHECK(result, message) File "/main/.python/util/base/log.py", line 204, in CHECK FATAL(log_msg, **kwargs) File "/main/.python/util/base/log.py", line 185, in FATAL sys.exit(1) ERROR nutanix_test.py:696 Failed to get gflags from 10.5.132.156. ERROR nutanix_test.py:696 Failed to get gflags from 10.5.132.156. ERROR nutanix_test.py:1699 Failed to save cluster configuration""" 

match = SequenceMatcher(None, string1, string2, autojunk=False).find_longest_match(0, len(string1), 0, len(string2)) 
print(match) 

輸出:

Match(a=110, b=156, size=534) 

可以肯定,我們能夠檢查所有匹配塊,找到最長:

>>> max(SequenceMatcher(None, string1, string2, autojunk=False).get_matching_blocks(), 
...  key=lambda m: m.size) 
Match(a=110, b=156, size=534) 

爲了說明autojunk ing對更簡單的例子的影響,讓我們看看w帽子在這裏發生:

>>> a = "aa:bb:cc" + ":"*200 
>>> b = "aa:bb" + ":"*200 
>>> SequenceMatcher(None, a, b).find_longest_match(0, len(a), 0, len(b)) 
Match(a=0, b=0, size=6)  # : is classified as junk 
>>> SequenceMatcher(None, a, b, autojunk=False).find_longest_match(0, len(a), 0, len(b)) 
Match(a=8, b=5, size=200) # : is NOT classified as junk 

在第一種情況下(默認autojunk=True)的:被認爲是垃圾字符(它代表這是至少200個項目長序列的1%以上),以及作爲結果, "looks right to people"只有6個字符(最初的六個)的最長匹配。

在第二種情況下(顯式爲autojunk=False),垃圾啓發式是關閉的,所以最長匹配是最後200個字符。

如果您對較短的序列(短於200個字符)重複相同的測試,則可以看到autojunk沒有區別,因爲垃圾啓發式已關閉(請參閱source)。

+1

接受!!!!!謝謝 :-) – syam