0
在Linux/bash
下,如何獲取其內容目錄的純文本表示形式? (請注意,這裏的「純文本」是指「UTF-8」)。歸檔/打包內容爲純文本格式的目錄?
換句話說,我如何將一個目錄(包含內容 - 包括二進制文件)「打包」或「歸檔」爲純文本文件 - 這樣我可以稍後「解壓」它,並獲得與其內容?
在Linux/bash
下,如何獲取其內容目錄的純文本表示形式? (請注意,這裏的「純文本」是指「UTF-8」)。歸檔/打包內容爲純文本格式的目錄?
換句話說,我如何將一個目錄(包含內容 - 包括二進制文件)「打包」或「歸檔」爲純文本文件 - 這樣我可以稍後「解壓」它,並獲得與其內容?
我對此感興趣了一段時間,我想我最終設法制作了一個可在Python 2.7和3.4中使用的腳本 - 但是,我仍然想知道是否還有其他的東西一樣。這是作爲一個要點(有一些更多的評論):
https://gist.github.com/anonymous/1a68bf2c9134fd5312219c8f68713632
否則,我在這裏張貼(下文)略有刪節版本以供參考。
用法是:歸檔/包成一個以.json文本文件:
python archdir2text-json.py -a /tmp > myarchdir.json
...從以.json文本文件解壓到當前(主叫)目錄:
python archdir2text-json.py -u myarchdir.json
二進制文件作爲base64處理。
這裏是腳本:
archdir2text-json.py
#!/usr/bin/env python
import pprint, inspect
import argparse
import os
import stat
import errno
import base64
import codecs
class SmartDescriptionFormatter(argparse.RawDescriptionHelpFormatter):
def _fill_text(self, text, width, indent):
if text.startswith('R|'):
paragraphs = text[2:].splitlines()
rebroken = [argparse._textwrap.wrap(tpar, width) for tpar in paragraphs]
rebrokenstr = []
for tlinearr in rebroken:
if (len(tlinearr) == 0):
rebrokenstr.append("")
else:
for tlinepiece in tlinearr:
rebrokenstr.append(tlinepiece)
return '\n'.join(rebrokenstr)
return argparse.RawDescriptionHelpFormatter._fill_text(self, text, width, indent)
textchars = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100)) - {0x7f})
is_binary_string = lambda bytes: bool(bytes.translate(None, textchars))
cwd = os.getcwd()
if os.name == 'nt':
import win32api, win32con
def folder_is_hidden(p):
if os.name== 'nt':
attribute = win32api.GetFileAttributes(p)
return attribute & (win32con.FILE_ATTRIBUTE_HIDDEN | win32con.FILE_ATTRIBUTE_SYSTEM)
else:
return os.path.basename(p).startswith('.') #linux-osx
def path_hierarchy(path):
hierarchy = {
'type': 'folder',
'name': os.path.basename(path),
'path': path,
}
try:
cleared_contents = [contents
for contents in os.listdir(path)
if not(
os.path.isdir(os.path.join(path, contents))
and
folder_is_hidden(os.path.join(path, contents))
)]
hierarchy['children'] = [
path_hierarchy(os.path.join(path, contents))
for contents in cleared_contents
]
except OSError as e:
if e.errno == errno.ENOTDIR:
hierarchy['type'] = 'file'
else:
hierarchy['type'] += " " + str(e)
if hierarchy['type'] == 'file':
isfifo = stat.S_ISFIFO(os.stat(hierarchy['path']).st_mode)
if isfifo:
ftype = "fifo"
else:
try:
data = open(hierarchy['path'], 'rb').read()
ftype = "bin" if is_binary_string(data) else "txt"
if (ftype == "txt"):
hierarchy['content'] = data.decode("utf-8")
else:
hierarchy['content'] = base64.b64encode(data).decode("utf-8")
except Exception as e:
ftype = str(e)
hierarchy['ftype'] = ftype
return hierarchy
def recurse_unpack(inobj, relpath=""):
if (inobj['type'] == "folder"):
rpname = relpath + inobj['name']
sys.stderr.write("folder name: " + rpname + os.linesep);
os.mkdir(rpname)
for tchild in inobj['children']:
recurse_unpack(tchild, relpath=relpath+inobj['name']+os.sep)
elif (inobj['type'] == "file"):
rfname = relpath + inobj['name']
sys.stderr.write("file name: " + rfname + os.linesep)
if inobj['ftype'] == "txt":
with codecs.open(rfname, "w", "utf-8") as text_file:
text_file.write(inobj['content'])
elif inobj['ftype'] == "bin":
with open(rfname, "wb") as bin_file:
bin_file.write(base64.b64decode(inobj['content']))
if __name__ == '__main__':
import json
import sys
parser = argparse.ArgumentParser(formatter_class=SmartDescriptionFormatter, description="""R|Command-line App that packs/archives (and vice-versa) a directory to a plain-text .json file; should work w/ both Python 2.7 and 3.4
see full help text in https://gist.github.com/anonymous/1a68bf2c9134fd5312219c8f68713632""")
parser.add_argument('input_paths', type=str, nargs='*', default=['.'],
help='Paths to files/directories to include in the archive; or path to .json archive file')
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-a', '--archive', action='store_true', help="Interpret input_paths as paths to files/directories, and archive them to a .json file (output to stdout)")
group.add_argument('-u', '--unpack', action='store_true', help="Interpret input_paths as path to an archive .json file, and unpack it in the current directory")
args = parser.parse_args()
if (args.archive):
valid_input_paths = []
for p in args.input_paths:
if os.path.isdir(p) or os.path.exists(p):
valid_input_paths.append(p)
else:
sys.stderr.write("Ignoring invalid input path: " + p + os.linesep)
sys.stderr.write("Encoding input path(s): " + str(valid_input_paths) + os.linesep)
path_hier_arr = [path_hierarchy(vp) for vp in valid_input_paths]
outjson = json.dumps(path_hier_arr, indent=2, sort_keys=True, separators=(',', ': '))
print(outjson)
elif (args.unpack):
valid_input_paths = []
for p in args.input_paths:
if os.path.isdir(p) or os.path.exists(p):
valid_input_paths.append(p)
else:
sys.stderr.write("Ignoring invalid input path: " + p + os.linesep)
for vp in valid_input_paths:
with open(vp) as data_file:
data = json.load(data_file)
for datachunk in data:
recurse_unpack(datachunk)
tar'和'uuencode'的'的組合想到(或更現代的鹼-64或十六進制編碼器以及).. 。 – twalberg