2017-03-31 31 views
0

在Linux/bash下,如何獲取其內容目錄的純文本表示形式? (請注意,這裏的「純文本」是指「UTF-8」)。歸檔/打包內容爲純文本格式的目錄?

換句話說,我如何將一個目錄(包含內容 - 包括二進制文件)「打包」或「歸檔」爲純文本文件 - 這樣我可以稍後「解壓」它,並獲得與其內容?

+0

tar'和'uuencode'的'的組合想到(或更現代的鹼-64或十六進制編碼器以及).. 。 – twalberg

回答

0

我對此感興趣了一段時間,我想我最終設法制作了一個可在Python 2.7和3.4中使用的腳本 - 但是,我仍然想知道是否還有其他的東西一樣。這是作爲一個要點(有一些更多的評論):

https://gist.github.com/anonymous/1a68bf2c9134fd5312219c8f68713632

否則,我在這裏張貼(下文)略有刪節版本以供參考。

用法是:歸檔/包成一個以.json文本文件:

python archdir2text-json.py -a /tmp > myarchdir.json 

...從以.json文本文件解壓到當前(主叫)目錄:

python archdir2text-json.py -u myarchdir.json 

二進制文件作爲base64處理。

這裏是腳本:

archdir2text-json.py

#!/usr/bin/env python 

import pprint, inspect 
import argparse 
import os 
import stat 
import errno 
import base64 
import codecs 

class SmartDescriptionFormatter(argparse.RawDescriptionHelpFormatter): 
    def _fill_text(self, text, width, indent): 
    if text.startswith('R|'): 
     paragraphs = text[2:].splitlines() 
     rebroken = [argparse._textwrap.wrap(tpar, width) for tpar in paragraphs] 
     rebrokenstr = [] 
     for tlinearr in rebroken: 
     if (len(tlinearr) == 0): 
      rebrokenstr.append("") 
     else: 
      for tlinepiece in tlinearr: 
      rebrokenstr.append(tlinepiece) 
     return '\n'.join(rebrokenstr) 
    return argparse.RawDescriptionHelpFormatter._fill_text(self, text, width, indent) 

textchars = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100)) - {0x7f}) 
is_binary_string = lambda bytes: bool(bytes.translate(None, textchars)) 

cwd = os.getcwd() 

if os.name == 'nt': 
    import win32api, win32con 
def folder_is_hidden(p): 
    if os.name== 'nt': 
    attribute = win32api.GetFileAttributes(p) 
    return attribute & (win32con.FILE_ATTRIBUTE_HIDDEN | win32con.FILE_ATTRIBUTE_SYSTEM) 
    else: 
    return os.path.basename(p).startswith('.') #linux-osx 

def path_hierarchy(path): 
    hierarchy = { 
    'type': 'folder', 
    'name': os.path.basename(path), 
    'path': path, 
    } 
    try: 
    cleared_contents = [contents 
         for contents in os.listdir(path) 
         if not(
          os.path.isdir(os.path.join(path, contents)) 
          and 
          folder_is_hidden(os.path.join(path, contents)) 
         )] 
    hierarchy['children'] = [ 
     path_hierarchy(os.path.join(path, contents)) 
     for contents in cleared_contents 
    ] 
    except OSError as e: 
    if e.errno == errno.ENOTDIR: 
     hierarchy['type'] = 'file' 
    else: 
     hierarchy['type'] += " " + str(e) 
    if hierarchy['type'] == 'file': 
    isfifo = stat.S_ISFIFO(os.stat(hierarchy['path']).st_mode) 
    if isfifo: 
     ftype = "fifo" 
    else: 
     try: 
     data = open(hierarchy['path'], 'rb').read() 
     ftype = "bin" if is_binary_string(data) else "txt" 
     if (ftype == "txt"): 
      hierarchy['content'] = data.decode("utf-8") 
     else: 
      hierarchy['content'] = base64.b64encode(data).decode("utf-8") 
     except Exception as e: 
     ftype = str(e) 
    hierarchy['ftype'] = ftype 
    return hierarchy 

def recurse_unpack(inobj, relpath=""): 
    if (inobj['type'] == "folder"): 
    rpname = relpath + inobj['name'] 
    sys.stderr.write("folder name: " + rpname + os.linesep); 
    os.mkdir(rpname) 
    for tchild in inobj['children']: 
     recurse_unpack(tchild, relpath=relpath+inobj['name']+os.sep) 
    elif (inobj['type'] == "file"): 
    rfname = relpath + inobj['name'] 
    sys.stderr.write("file name: " + rfname + os.linesep) 
    if inobj['ftype'] == "txt": 
     with codecs.open(rfname, "w", "utf-8") as text_file: 
     text_file.write(inobj['content']) 
    elif inobj['ftype'] == "bin": 
     with open(rfname, "wb") as bin_file: 
     bin_file.write(base64.b64decode(inobj['content'])) 

if __name__ == '__main__': 
    import json 
    import sys 

    parser = argparse.ArgumentParser(formatter_class=SmartDescriptionFormatter, description="""R|Command-line App that packs/archives (and vice-versa) a directory to a plain-text .json file; should work w/ both Python 2.7 and 3.4 

see full help text in https://gist.github.com/anonymous/1a68bf2c9134fd5312219c8f68713632""") 

    parser.add_argument('input_paths', type=str, nargs='*', default=['.'], 
         help='Paths to files/directories to include in the archive; or path to .json archive file') 

    group = parser.add_mutually_exclusive_group(required=True) 
    group.add_argument('-a', '--archive', action='store_true', help="Interpret input_paths as paths to files/directories, and archive them to a .json file (output to stdout)") 
    group.add_argument('-u', '--unpack', action='store_true', help="Interpret input_paths as path to an archive .json file, and unpack it in the current directory") 

    args = parser.parse_args() 

    if (args.archive): 
    valid_input_paths = [] 
    for p in args.input_paths: 
     if os.path.isdir(p) or os.path.exists(p): 
     valid_input_paths.append(p) 
     else: 
     sys.stderr.write("Ignoring invalid input path: " + p + os.linesep) 
    sys.stderr.write("Encoding input path(s): " + str(valid_input_paths) + os.linesep) 
    path_hier_arr = [path_hierarchy(vp) for vp in valid_input_paths] 
    outjson = json.dumps(path_hier_arr, indent=2, sort_keys=True, separators=(',', ': ')) 
    print(outjson) 
    elif (args.unpack): 
    valid_input_paths = [] 
    for p in args.input_paths: 
     if os.path.isdir(p) or os.path.exists(p): 
     valid_input_paths.append(p) 
     else: 
     sys.stderr.write("Ignoring invalid input path: " + p + os.linesep) 
    for vp in valid_input_paths: 
     with open(vp) as data_file: 
     data = json.load(data_file) 
     for datachunk in data: 
     recurse_unpack(datachunk)