如果輸入非常一致(如圖所示),那麼您可能會得到re
。
對於任何更復雜的事情,你可能想看看更強大的解析器,如pyparsing
。
編輯:這裏是使用正則表達式一個非常簡單的有限狀態機解析器;它處理空白行,未使用的select;
和end;
陳述,以及初始/連續when
s。我不處理label
因爲我不確定他們在做什麼 - 將V變量重命名爲X?
import re
class SasTranslator:
def __init__(self):
# modes:
# 0 not in START..END
# 1 in START..END, no CASE seen yet
# 2 in START..END, CASE already found
self.mode = 0
self.offset = -1 # input line #
def handle_blank(self, match):
return ""
def handle_start(self, match):
if self.mode == 0:
self.mode = 1
return None
else:
raise ValueError("Found 'select;' in select block, line {}".format(self.offset))
def handle_end(self, match):
if self.mode == 0:
raise ValueError("Found 'end;' with no opening 'select;', line {}".format(self.offset))
elif self.mode == 1:
raise ValueError("Found empty 'select;' .. 'end;', line {}".format(self.offset))
elif self.mode == 2:
self.mode = 0
return None
def handle_case(self, match):
if self.mode == 0:
raise ValueError("Found 'when' clause outside 'select;' .. 'end;', line {}".format(self.offset))
elif self.mode == 1:
test = "if"
self.mode = 2
# note: code continues after if..else block
elif self.mode == 2:
test = "elif"
# note: code continues after if..else block
test_var, op, test_val, assign_var, assign_val = match.groups()
return (
"{test} {test_var} {op} {test_val}:\n"
" {assign_var} = {assign_val}".format(
test = test,
test_var = test_var,
op = op,
test_val = test_val,
assign_var = assign_var,
assign_val = assign_val
)
)
#
# Build a dispatch table for the handlers
#
BLANK = re.compile("\s*$")
START = re.compile("select;\s*$")
END = re.compile("end;\s*$")
CASE = re.compile("\s*when\((\w+)\s*([<>=]+)\s*([\d.-]+)\s*\)\s*(\w+)\s*=\s*([\d.-]+)\s*;\s*$")
dispatch_table = [
(BLANK, handle_blank),
(START, handle_start),
(END, handle_end),
(CASE, handle_case)
]
def __call__(self, line):
"""
Translate a single line of input
"""
self.offset += 1
for test,handler in SasTranslator.dispatch_table:
match = test.match(line)
if match is not None:
return handler(self, match)
# nothing matched!
return None
def main():
with open("my_file.sas") as inf:
trans = SasTranslator()
for line in inf:
result = trans(line)
if result is not None:
print(result)
else:
print("***unknown*** {}".format(line.rstrip()))
if __name__=="__main__":
main()
,並運行對您的樣品輸入它產生
if X_1 <= 6.7278:
V_1 = -0.0594
elif X_1 <= 19.5338:
V_1 = 0.0604
elif X_1 <= 45.1458:
V_1 = 0.1755
elif X_1 <= 83.5638:
V_1 = 0.2867
elif X_1 <= 203.0878:
V_1 = 0.395
elif X_1 > 203.0878:
V_1 = 0.5011
***unknown*** label V_1 ="X_1 ";
if X_2 <= 0.0836:
V_2 = 0.0562
elif X_2 <= 0.1826:
V_2 = 0.07
elif X_2 <= 0.2486:
V_2 = 0.0836
elif X_2 <= 0.3146:
V_2 = 0.0969
elif X_2 <= 0.3806:
V_2 = 0.1095
elif X_2 <= 0.4466:
V_2 = 0.1212
elif X_2 <= 0.5126:
V_2 = 0.132
elif X_2 <= 0.5786:
V_2 = 0.1419
elif X_2 <= 0.6446:
V_2 = 0.1511
elif X_2 <= 0.7106:
V_2 = 0.1596
elif X_2 <= 0.8526:
V_2 = 0.1679
elif X_2 > 0.8526:
V_2 = 0.176
***unknown*** label V_2 ="X_2 ";
根據您使用這個頻率,它可能是值得的使用bisect
二項式查找功能和翻譯select;
..而不是(end;
),而不是將其嵌入到該表單中(儘管您希望非常小心比較運算符是您所期望的!) - 類似於
V_1 = index_into(
X_1,
[ 6.7278, 19.5338, 45.1458, 83.5638, 203.0878 ],
[-0.0594, 0.0604, 0.1755, 0.2867, 0.395, 0.5011]
)
它可能會顯着加快運行速度(特別是隨着選件數量的增加),並且更容易理解和維護。
謝謝,將研究,同時,你能幫我寫一些代碼讓我開始。謝謝。 – TongZZZ
這真的很有幫助,比我想象的要好得多,我可以輕鬆地添加更多組件,非常感謝!我也會看看昆蟲。 – TongZZZ