您可以分析使用regex
類似這樣的例子您的數據:
我假設你的數據是:
a = "|hiddenField|__VIEWSTATE|/wEPDwUKLTUzNjYxMTI2OA8WCB4IdndHcnVwb3MyiQYAAQAAAP////8BAAAAAAAAAAwCAAAASUJTQS5OZXRGb3JjZS5Nb2RlbCwgVmVyc2lvbj0xLjAuMC4wLCBDdWx0dXJlPW5ldXRyYWwsIFB1YmxpY0tleVRva2VuPW51bGwEAQAAAJ8BU3lzdGVtLkNvbGxlY3Rpb25zLkdlbmVyaWMuTGlzdGAxW1tCU0EuTmV0Rm9yY2UuTW9kZWwuQ29yZS5FbnRpdGllcy5HcnVwb1Byb21vdG9yYSwgQlNBLk5ldEZvcmNlLk1vZGVsLCBWZXJzaW9uPTEuMC4wLjAsIEN1bHR1cmU9bmV1dHJhbCwgUHVibGljS2V5VG9rZW49bnVsbF1dAwAAAAZfaXRlbXMFX3NpemUIX3ZlcnNpb24EAAAxQlNBLk5ldEZvcmNlLk1vZGVsLkNvcmUuRW50aXRpZXMuR3J1cG9Qcm9tb3RvcmFbXQIAAAAICAkDAAAA|512|hiddenField|__VIEWSTATE1|AQAAAAEAAAAHAwAAAAABAAAABAAAAAQvQlNBLk5ldEZvcmNlLk1vZGVsLkNvcmUuRW50aXRpZXMuR3J1cG9Qcm9tb3RvcmECAAAACQQAAAANAwUEAAAAL0JTQS5OZXRGb3JjZS5Nb2RlbC5Db3JlLkVudGl0aWVzLkdydXBvUHJvbW90b3JhBgAAABw8SWRHcnVwb3Byb20+a19fQmFja2luZ0ZpZWxkFTxOb21lPmtfX0JhY2tpbmdGaWVsZCA8Q2RVc3VhcmlvR2VzdG9yPmtfX0JhY2tpbmdGaWVsZCA8SWRVc3VhcmlvR2VzdG9yPmtfX0JhY2tpbmdGaWVsZBo8RHRDcmlhY2FvPmtfX0JhY2tpbmdGaWVsZBg8RG9taW5pbz5rX19CYWNraW5nRmllbGQAAQEDAwEFDlN5c3RlbS5EZWNpbWFsD1N5c3RlbS5EYXRlVGltZQIAAAABMgYFAAAAEUdSVVBPIFBSSU1FQ09SQkFOBgYAAAAKMDAw|512|hiddenField|__VIEWSTATE2|MDEwNDk4MQgFAzEzMQgNgCrlF50A0AgGBwAAAAtQUklNRUNPUkJBTgseCWNvZEdlc3RvcgUKMDAwMDEwNDk4MR4HQ1JDUGFnZSgpWlN5c3RlbS5VSW50MzIsIG1zY29ybGliLCBWZXJzaW9uPTQuMC4wLjAsIEN1bHR1cmU9bmV1dHJhbCwgUHVibGljS2V5VG9rZW49Yjc3YTVjNTYxOTM0ZTA4OQoyOTAyOTk0ODc3HgpDUkNDb250ZW50BRJkZGxHcnVwb3NGaWx0cm8yX18WAmYPZBYCZg9kFgICAw9kFgICBQ9kFggCAw8PFgIeD0NvbW1hbmRBcmd1bWVudAURNC4xLjAuMDgxMy4wODAwLjBkZAIFDw8WAh4EVGV4dAVwVm9jw6ogZXN0w6EgZW0gPiA8c3Ryb25nPkhvbWU8L3N0cm9uZz4gID4gPHN0cm9uZz5SZWxhdMOzcmlvczwvc3Ryb25nPiA+IDxzdHJvbmc+UG9zacOnw6NvIGRl|512|"
然後,你可以做:
import re
obj = re.findall('\|hiddenField|\|(.*?)\|\d+\|', a)
final = {k[0]:k[1] for k in [k.split('|') for k in obj if k != '']}
for k in final.items():
print(k)
輸出:
('__VIEWSTATE1', 'AQAAAAEAAAAHAwAAAAABAAAABAAAAAQvQlNBLk5ldEZvcmNlLk1vZGVsLkNvcmUuRW50aXRpZXMuR3J1cG9Qcm9tb3RvcmECAAAACQQAAAANAwUEAAAAL0JTQS5OZXRGb3JjZS5Nb2RlbC5Db3JlLkVudGl0aWVzLkdydXBvUHJvbW90b3JhBgAAABw8SWRHcnVwb3Byb20+a19fQmFja2luZ0ZpZWxkFTxOb21lPmtfX0JhY2tpbmdGaWVsZCA8Q2RVc3VhcmlvR2VzdG9yPmtfX0JhY2tpbmdGaWVsZCA8SWRVc3VhcmlvR2VzdG9yPmtfX0JhY2tpbmdGaWVsZBo8RHRDcmlhY2FvPmtfX0JhY2tpbmdGaWVsZBg8RG9taW5pbz5rX19CYWNraW5nRmllbGQAAQEDAwEFDlN5c3RlbS5EZWNpbWFsD1N5c3RlbS5EYXRlVGltZQIAAAABMgYFAAAAEUdSVVBPIFBSSU1FQ09SQkFOBgYAAAAKMDAw')
('__VIEWSTATE', '/wEPDwUKLTUzNjYxMTI2OA8WCB4IdndHcnVwb3MyiQYAAQAAAP////8BAAAAAAAAAAwCAAAASUJTQS5OZXRGb3JjZS5Nb2RlbCwgVmVyc2lvbj0xLjAuMC4wLCBDdWx0dXJlPW5ldXRyYWwsIFB1YmxpY0tleVRva2VuPW51bGwEAQAAAJ8BU3lzdGVtLkNvbGxlY3Rpb25zLkdlbmVyaWMuTGlzdGAxW1tCU0EuTmV0Rm9yY2UuTW9kZWwuQ29yZS5FbnRpdGllcy5HcnVwb1Byb21vdG9yYSwgQlNBLk5ldEZvcmNlLk1vZGVsLCBWZXJzaW9uPTEuMC4wLjAsIEN1bHR1cmU9bmV1dHJhbCwgUHVibGljS2V5VG9rZW49bnVsbF1dAwAAAAZfaXRlbXMFX3NpemUIX3ZlcnNpb24EAAAxQlNBLk5ldEZvcmNlLk1vZGVsLkNvcmUuRW50aXRpZXMuR3J1cG9Qcm9tb3RvcmFbXQIAAAAICAkDAAAA')
('__VIEWSTATE2', 'MDEwNDk4MQgFAzEzMQgNgCrlF50A0AgGBwAAAAtQUklNRUNPUkJBTgseCWNvZEdlc3RvcgUKMDAwMDEwNDk4MR4HQ1JDUGFnZSgpWlN5c3RlbS5VSW50MzIsIG1zY29ybGliLCBWZXJzaW9uPTQuMC4wLjAsIEN1bHR1cmU9bmV1dHJhbCwgUHVibGljS2V5VG9rZW49Yjc3YTVjNTYxOTM0ZTA4OQoyOTAyOTk0ODc3HgpDUkNDb250ZW50BRJkZGxHcnVwb3NGaWx0cm8yX18WAmYPZBYCZg9kFgICAw9kFgICBQ9kFggCAw8PFgIeD0NvbW1hbmRBcmd1bWVudAURNC4xLjAuMDgxMy4wODAwLjBkZAIFDw8WAh4EVGV4dAVwVm9jw6ogZXN0w6EgZW0gPiA8c3Ryb25nPkhvbWU8L3N0cm9uZz4gID4gPHN0cm9uZz5SZWxhdMOzcmlvczwvc3Ryb25nPiA+IDxzdHJvbmc+UG9zacOnw6NvIGRl')
然而,如果要分析所有的數據像|hiddenField|field_name|field_value|digits
你可以這樣做:
import re
obj = re.findall('\|hiddenField|\|(.*?)\|(\d+)\|', a)
final = {k[0]:{'field_value': k[1], 'digits': k[2]} for k in [k[0].split("|") + [k[1]] for k in obj if k != ('','')]}
for k in final.items():
print(k)
輸出:
('__VIEWSTATE', {'field_value': '/wEPDwUKLTUzNjYxMTI2OA8WCB4IdndHcnVwb3MyiQYAAQAAAP////8BAAAAAAAAAAwCAAAASUJTQS5OZXRGb3JjZS5Nb2RlbCwgVmVyc2lvbj0xLjAuMC4wLCBDdWx0dXJlPW5ldXRyYWwsIFB1YmxpY0tleVRva2VuPW51bGwEAQAAAJ8BU3lzdGVtLkNvbGxlY3Rpb25zLkdlbmVyaWMuTGlzdGAxW1tCU0EuTmV0Rm9yY2UuTW9kZWwuQ29yZS5FbnRpdGllcy5HcnVwb1Byb21vdG9yYSwgQlNBLk5ldEZvcmNlLk1vZGVsLCBWZXJzaW9uPTEuMC4wLjAsIEN1bHR1cmU9bmV1dHJhbCwgUHVibGljS2V5VG9rZW49bnVsbF1dAwAAAAZfaXRlbXMFX3NpemUIX3ZlcnNpb24EAAAxQlNBLk5ldEZvcmNlLk1vZGVsLkNvcmUuRW50aXRpZXMuR3J1cG9Qcm9tb3RvcmFbXQIAAAAICAkDAAAA', 'digits': '512'})
('__VIEWSTATE2', {'field_value': 'MDEwNDk4MQgFAzEzMQgNgCrlF50A0AgGBwAAAAtQUklNRUNPUkJBTgseCWNvZEdlc3RvcgUKMDAwMDEwNDk4MR4HQ1JDUGFnZSgpWlN5c3RlbS5VSW50MzIsIG1zY29ybGliLCBWZXJzaW9uPTQuMC4wLjAsIEN1bHR1cmU9bmV1dHJhbCwgUHVibGljS2V5VG9rZW49Yjc3YTVjNTYxOTM0ZTA4OQoyOTAyOTk0ODc3HgpDUkNDb250ZW50BRJkZGxHcnVwb3NGaWx0cm8yX18WAmYPZBYCZg9kFgICAw9kFgICBQ9kFggCAw8PFgIeD0NvbW1hbmRBcmd1bWVudAURNC4xLjAuMDgxMy4wODAwLjBkZAIFDw8WAh4EVGV4dAVwVm9jw6ogZXN0w6EgZW0gPiA8c3Ryb25nPkhvbWU8L3N0cm9uZz4gID4gPHN0cm9uZz5SZWxhdMOzcmlvczwvc3Ryb25nPiA+IDxzdHJvbmc+UG9zacOnw6NvIGRl', 'digits': '512'})
('__VIEWSTATE1', {'field_value': 'AQAAAAEAAAAHAwAAAAABAAAABAAAAAQvQlNBLk5ldEZvcmNlLk1vZGVsLkNvcmUuRW50aXRpZXMuR3J1cG9Qcm9tb3RvcmECAAAACQQAAAANAwUEAAAAL0JTQS5OZXRGb3JjZS5Nb2RlbC5Db3JlLkVudGl0aWVzLkdydXBvUHJvbW90b3JhBgAAABw8SWRHcnVwb3Byb20+a19fQmFja2luZ0ZpZWxkFTxOb21lPmtfX0JhY2tpbmdGaWVsZCA8Q2RVc3VhcmlvR2VzdG9yPmtfX0JhY2tpbmdGaWVsZCA8SWRVc3VhcmlvR2VzdG9yPmtfX0JhY2tpbmdGaWVsZBo8RHRDcmlhY2FvPmtfX0JhY2tpbmdGaWVsZBg8RG9taW5pbz5rX19CYWNraW5nRmllbGQAAQEDAwEFDlN5c3RlbS5EZWNpbWFsD1N5c3RlbS5EYXRlVGltZQIAAAABMgYFAAAAEUdSVVBPIFBSSU1FQ09SQkFOBgYAAAAKMDAw', 'digits': '512'})
使用開頭的單詞'your_string.split( 「|」)[2]'和'[3]' – Julien
犯錯,似乎是包含在這串4種類型。 'hiddenField','__VIEWSTATE',一些長文本字符串和一個數字。你試圖收集哪些信息,以及你試圖把它變成什麼格式? – dmlicht