2016-07-06 586 views
3

給定一個帶有print()語句的Python腳本,我希望能夠遍歷腳本並在顯示每個語句的每個語句之後插入註釋。爲了證明,採取這種腳本命名example.py使用註釋註釋Python print()輸出

a, b = 1, 2 

print('a + b:', a + b) 

c, d = 3, 4 

print('c + d:', c + d) 

所需的輸出將是:

a, b = 1, 2 

print('a + b:', a + b) 
# a + b: 3 

c, d = 3, 4 

print('c + d:', c + d) 
# c + d: 7 

這裏是我的嘗試,它適用於像上面的一個簡單的例子:

import sys 
from io import StringIO 

def intercept_stdout(func): 
    "redirect stdout from a target function" 
    def wrapper(*args, **kwargs): 
     "wrapper function for intercepting stdout" 
     # save original stdout 
     original_stdout = sys.stdout 

     # set up StringIO object to temporarily capture stdout 
     capture_stdout = StringIO() 
     sys.stdout = capture_stdout 

     # execute wrapped function 
     func(*args, **kwargs) 

     # assign captured stdout to value 
     func_output = capture_stdout.getvalue() 

     # reset stdout 
     sys.stdout = original_stdout 

     # return captured value 
     return func_output 

    return wrapper 


@intercept_stdout 
def exec_target(name): 
    "execute a target script" 
    with open(name, 'r') as f:  
     exec(f.read()) 


def read_target(name): 
    "read source code from a target script & return it as a list of lines" 
    with open(name) as f: 
     source = f.readlines() 

    # to properly format last comment, ensure source ends in a newline 
    if len(source[-1]) >= 1 and source[-1][-1] != '\n': 
     source[-1] += '\n' 

    return source 


def annotate_source(target): 
    "given a target script, return the source with comments under each print()" 
    target_source = read_target(target) 

    # find each line that starts with 'print(' & get indices in reverse order 
    print_line_indices = [i for i, j in enumerate(target_source) 
           if len(j) > 6 and j[:6] == 'print('] 
    print_line_indices.reverse() 

    # execute the target script and get each line output in reverse order 
    target_output = exec_target(target) 
    printed_lines = target_output.split('\n') 
    printed_lines.reverse() 

    # iterate over the source and insert commented target output line-by-line 
    annotated_source = [] 
    for i, line in enumerate(target_source): 
     annotated_source.append(line) 
     if print_line_indices and i == print_line_indices[-1]: 
      annotated_source.append('# ' + printed_lines.pop() + '\n') 
      print_line_indices.pop() 

    # return new annotated source as a string 
    return ''.join(annotated_source) 


if __name__ == '__main__': 
    target_script = 'example.py' 
    with open('annotated_example.py', 'w') as f: 
     f.write(annotate_source(target_script)) 

但是,對於跨越多行的print()語句的腳本以及print()不在行首的語句。在最好的情況下,它甚至可以用於函數內的print()語句。看看下面的例子:

print('''print to multiple lines, first line 
second line 
third line''') 

print('print from partial line, first part') if True else 0 

1 if False else print('print from partial line, second part') 

print('print from compound statement, first part'); pass 

pass; print('print from compound statement, second part') 

def foo(): 
    print('bar') 

foo() 

理想情況下,輸出應該是這樣的:

print('''print to multiple lines, first line 
second line 
third line''') 
# print to multiple lines, first line 
# second line 
# third line 

print('print from partial line, first part') if True else 0 
# print from partial line, first part 

1 if False else print('print from partial line, second part') 
# print from partial line, second part 

print('print from compound statement, first part'); pass 
# print from compound statement, first part 

pass; print('print from compound statement, second part') 
# print from compound statement, second part 

def foo(): 
    print('bar') 

foo() 
# bar 

但上面的腳本軋液它像這樣:

print('''print to multiple lines, first line 
# print to multiple lines, first line 
second line 
third line''') 

print('print from partial line, first part') if True else 0 
# second line 

1 if False else print('print from partial line, second part') 

print('print from compound statement, first part'); pass 
# third line 

pass; print('print from compound statement, second part') 

def foo(): 
    print('bar') 

foo() 

什麼方法可以使這個過程更強大?

+3

你會期望它在像'def foo(a,b):print(a,b)'這樣的情況下執行什麼操作,其中'foo'可以被多次調用? – Brian

+1

你是如何在你不知道提前知道價值的地方展示印刷品的? ex'print(randint(0,100))'? – xgord

+0

@xgord這些仍然會顯示出來,但對於每個run-through會有所不同。我主要是在每次使用結果相同的情況下使用它,但它們仍然可以用於展示示例輸出。 – Alec

回答

5

你有沒有考慮使用inspect模塊?如果您願意說您總是希望最頂級呼叫旁邊的註釋,並且您註釋的文件足夠簡單,那麼您可以獲得合理的結果。以下是我的嘗試,它覆蓋內置的打印功能,並期待在堆棧跟蹤,以確定打印被稱爲:

import inspect 
import sys 
from io import StringIO 

file_changes = {} 

def anno_print(old_print, *args, **kwargs): 
    (frame, filename, line_number, 
    function_name, lines, index) = inspect.getouterframes(inspect.currentframe())[-2] 
    if filename not in file_changes: 
     file_changes[filename] = {} 
    if line_number not in file_changes[filename]: 
     file_changes[filename][line_number] = [] 
    orig_stdout = sys.stdout 
    capture_stdout = StringIO() 
    sys.stdout = capture_stdout 
    old_print(*args, **kwargs) 
    output = capture_stdout.getvalue() 
    file_changes[filename][line_number].append(output) 
    sys.stdout = orig_stdout 
    return 

def make_annotated_file(old_source, new_source): 
    changes = file_changes[old_source] 
    old_source_F = open(old_source) 
    new_source_F = open(new_source, 'w') 
    content = old_source_F.readlines() 
    for i in range(len(content)): 
     line_num = i + 1 
     new_source_F.write(content[i]) 
     if content[i][-1] != '\n': 
      new_source_F.write('\n') 
     if line_num in changes: 
      for output in changes[line_num]: 
       output = output[:-1].replace('\n', '\n#') + '\n' 
       new_source_F.write("#" + output) 
    new_source_F.close() 



if __name__=='__main__': 
    target_source = "foo.py" 
    old_print = __builtins__.print 
    __builtins__.print = lambda *args, **kwargs: anno_print(old_print, *args, **kwargs) 
    with open(target_source) as f: 
     code = compile(f.read(), target_source, 'exec') 
     exec(code) 
    __builtins__.print = old_print 
    make_annotated_file(target_source, "foo_annotated.py") 

如果我在下面的文件運行「foo.py」:

def foo(): 
    print("a") 
    print("b") 

def cool(): 
    foo() 
    print("c") 

def doesnt_print(): 
    a = 2 + 3 

print(1+2) 
foo() 
doesnt_print() 
cool() 

的輸出爲「foo_annotated.py」:

def foo(): 
    print("a") 
    print("b") 

def cool(): 
    foo() 
    print("c") 

def doesnt_print(): 
    a = 2 + 3 

print(1+2) 
#3 
foo() 
#a 
#b 
doesnt_print() 
cool() 
#a 
#b 
#c 
+0

太棒了! 'inspect.getouterframes()'看起來很不錯。我也喜歡你直接覆蓋'print()'的決定,而不是像我一樣單獨跟蹤'stdout'。到目前爲止,我發現的唯一真實的邊緣情況是,在print()中的一個字符串像原始問題中的第二個例子那樣跨越多行。 – Alec

+1

哦,是的,這是寫入註釋文件時格式化的問題。我編輯了原來的迴應: 'output = output [: - 1] .replace('\ n','\ n'')現在打印。 –

+0

修復它,謝謝!還有一件小事:當我現在運行你的例子(以及當我運行其他的時候),我得到了在同一行上打印的最後一個函數的第一個註釋(比如'cool()#a')。任何想法發生了什麼? – Alec

1

感謝來自@Lennart反饋,我已經幾乎得到它的工作......這迭代通過行由行,聚集成線越來越長的塊,只要當前塊包含SyntaxError反饋時的到exec()。在這裏它適用於其他人:

import sys 
from io import StringIO 

def intercept_stdout(func): 
    "redirect stdout from a target function" 
    def wrapper(*args, **kwargs): 
     "wrapper function for intercepting stdout" 
     # save original stdout 
     original_stdout = sys.stdout 

     # set up StringIO object to temporarily capture stdout 
     capture_stdout = StringIO() 
     sys.stdout = capture_stdout 

     # execute wrapped function 
     func(*args, **kwargs) 

     # assign captured stdout to value 
     func_output = capture_stdout.getvalue() 

     # reset stdout 
     sys.stdout = original_stdout 

     # return captured value 
     return func_output 

    return wrapper 

@intercept_stdout 
def exec_line(source, block_globals): 
    "execute a target block of source code and get output" 
    exec(source, block_globals) 

def read_target(name): 
    "read source code from a target script & return it as a list of lines" 
    with open(name) as f: 
     source = f.readlines() 

    # to properly format last comment, ensure source ends in a newline 
    if len(source[-1]) >= 1 and source[-1][-1] != '\n': 
     source[-1] += '\n' 

    return source 

def get_blocks(target, block_globals): 
    "get outputs for each block of code in source" 
    outputs = [] 
    lines = 1 

    @intercept_stdout 
    def eval_blocks(start_index, end_index, full_source, block_globals): 
     "work through a group of lines of source code and exec each block" 
     nonlocal lines 
     try:  
      exec(''.join(full_source[start_index:end_index]), block_globals) 
     except SyntaxError: 
      lines += 1 
      eval_blocks(start_index, start_index + lines, 
         full_source, block_globals) 

    for i, s in enumerate(target): 
     if lines > 1: 
      lines -= 1 
      continue 
     outputs.append((eval_blocks(i, i+1, target, block_globals), i, lines)) 

    return [(i[1], i[1] + i[2]) for i in outputs] 

def annotate_source(target, block_globals={}): 
    "given a target script, return the source with comments under each print()" 
    target_source = read_target(target) 

    # get each block's start and end indices 
    outputs = get_blocks(target_source, block_globals) 
    code_blocks = [''.join(target_source[i[0]:i[1]]) for i in outputs] 

    # iterate through each 
    annotated_source = [] 
    for c in code_blocks: 
     annotated_source.append(c) 
     printed_lines = exec_line(c, block_globals).split('\n') 
     if printed_lines and printed_lines[-1] == '': 
      printed_lines.pop() 
     for line in printed_lines: 
      annotated_source.append('# ' + line + '\n') 

    # return new annotated source as a string 
    return ''.join(annotated_source) 

def main(): 
    ### script to format goes here 
    target_script = 'example.py' 

    ### name of formatted script goes here 
    new_script = 'annotated_example.py' 

    new_code = annotate_source(target_script) 
    with open(new_script, 'w') as f: 
     f.write(new_code) 

if __name__ == '__main__': 
    main() 

它適用於上述兩個例子中的每一個。然而,試圖執行以下時:

def foo(): 
    print('bar') 
    print('baz') 

foo() 

而不是給我所需要的輸出:

def foo(): 
    print('bar') 
    print('baz') 

foo() 
# bar 
# baz 

它失敗,一個很長回溯:

Traceback (most recent call last): 
    File "ex.py", line 55, in eval_blocks 
    exec(''.join(full_source[start_index:end_index]), block_globals) 
    File "<string>", line 1 
    print('baz') 
    ^
IndentationError: unexpected indent 

During handling of the above exception, another exception occurred: 

Traceback (most recent call last): 
    File "ex.py", line 55, in eval_blocks 
    exec(''.join(full_source[start_index:end_index]), block_globals) 
    File "<string>", line 1 
    print('baz') 
    ^
IndentationError: unexpected indent 

During handling of the above exception, another exception occurred: 

... 

Traceback (most recent call last): 
    File "ex.py", line 55, in eval_blocks 
    exec(''.join(full_source[start_index:end_index]), block_globals) 
    File "<string>", line 1 
    print('baz') 
    ^
IndentationError: unexpected indent 

During handling of the above exception, another exception occurred: 

Traceback (most recent call last): 
    File "ex.py", line 102, in <module> 
    main() 
    File "ex.py", line 97, in main 
    new_code = annotate_source(target_script) 
    File "ex.py", line 74, in annotate_source 
    outputs = get_blocks(target_source, block_globals) 
    File "ex.py", line 65, in get_blocks 
    outputs.append((eval_blocks(i, i+1, target, block_globals), i, lines)) 
    File "ex.py", line 16, in wrapper 
    func(*args, **kwargs) 
    File "ex.py", line 59, in eval_blocks 
    full_source, block_globals) 
    File "ex.py", line 16, in wrapper 
    func(*args, **kwargs) 

... 

    File "ex.py", line 16, in wrapper 
    func(*args, **kwargs) 
    File "ex.py", line 55, in eval_blocks 
    exec(''.join(full_source[start_index:end_index]), block_globals) 
RecursionError: maximum recursion depth exceeded while calling a Python object 

看起來像這種情況由於def foo(): print('bar')是有效的代碼,所以print('baz')沒有被包含在該函數中,導致它失敗並帶有IndentationError。任何想法如何避免這個問題?我懷疑它可能需要潛入ast如上所述,但會喜歡進一步的輸入或使用示例。

1

通過使用現有的Python解析器從代碼中提取頂級語句,可以使它變得更容易。例如,標準庫中的ast模塊。但是,ast會丟失一些信息,如評論。

使用源代碼轉換(您正在做的)構建的庫可能更適合於此處。 redbaron就是一個很好的例子。

要進行全局下一個exec(),你必須使用第二個參數(documentation):

environment = {} 
for statement in statements: 
    exec(statement, environment) 
+0

ast和redbaron的偉大建議(我以前只使用'ast.literal_eval()',將不得不消化一些更高級的功能)。有沒有辦法從'exec()'中提取環境,以便將它們鏈接在一起? – Alec

+1

當然,exec修改你傳遞它的字典。所以,當你給一個空字典執行,它會。後來包含環境 – Lennart

+0

這是一種解脫! (我擔心我不得不攔截exec()的默認'None'返回值。) – Alec

1

它看起來像except SyntaxError不是一個完整的功能的足夠的檢查,因爲它會完成一個塊的第一行,其不會創建語法錯誤。你想要的是確保整個功能包含在同一個模塊中。要做到這一點:

  • 檢查當前塊是否是一個函數。檢查第一行是否以def開頭。

  • 檢查full_source中的下一行是否以與該函數的第二行(定義縮進的那一行)相同數量的空格開始。這將意味着eval_blocks將檢查代碼的下一行是否具有更高或相等的間距,並因此位於該函數內。

get_blocks的代碼可能是這個樣子:

# function for finding num of spaces at beginning (could be in global spectrum) 
def get_front_whitespace(string): 
    spaces = 0 
    for char in string: 
     # end loop at end of spaces 
     if char not in ('\t', ' '): 
      break 
     # a tab is equal to 8 spaces 
     elif char == '\t': 
      spaces += 8 
     # otherwise must be a space 
     else: 
      spaces += 1 
    return spaces 

... 

def get_blocks(target, block_globals): 
    "get outputs for each block of code in source" 
    outputs = [] 
    lines = 1 
    # variable to check if current block is a function 
    block_is_func = False 

    @intercept_stdout 
    def eval_blocks(start_index, end_index, full_source, block_globals): 
     "work through a group of lines of source code and exec each block" 
     nonlocal lines 
     nonlocal block_is_func 
     # check if block is a function 
     block_is_func = (full_source[start_index][:3] == 'def') 
     try:  
      exec(''.join(full_source[start_index:end_index]), block_globals) 
     except SyntaxError: 
      lines += 1 
      eval_blocks(start_index, start_index + lines, 
         full_source, block_globals) 
     else: 
      # if the block is a function, check for indents 
      if block_is_func: 
       # get number of spaces in first indent of function 
       func_indent= get_front_whitespace(full_source[start_index + 1]) 
       # get number of spaces in the next index 
       next_index_spaces = get_front_whitespace(full_source[end_index + 1]) 
       # if the next line is equally or more indented than the function indent, continue to next recursion layer 
       if func_indent >= next_index_spaces: 
        lines += 1 
        eval_blocks(start_index, start_index + lines, 
           full_source, block_globals) 

    for i, s in enumerate(target): 
     # reset the function variable for next block 
     if block_is_func: block_is_func = False 
     if lines > 1: 
      lines -= 1 
      continue 
     outputs.append((eval_blocks(i, i+1, target, block_globals), i, lines)) 

    return [(i[1], i[1] + i[2]) for i in outputs] 

如果函數的最後一行是文件的末尾,雖然這可能會創建一個索引錯誤,由於正向索引在end_index_spaces = get_front_whitespace(full_source[end_index + 1])

這也可用於選擇語句和循環,這可能有同樣的問題:剛檢查ifforwhile在0123年初行以及def。這會導致註釋位於縮進區域之後,但作爲縮進區域內的打印輸出取決於用於調用它們的變量,我認爲在任何情況下都需要縮進以外的輸出。