我有一個複雜的python管道(我不能改變這個代碼),調用多個其他腳本和其他可執行文件。關鍵是要花費很長時間來運行8000多個目錄,進行一些科學分析。所以,我寫了一個簡單的包裝器,(可能不是最有效的,但似乎工作)使用多處理模塊。多個python進程之間的os.chdir
from os import path, listdir, mkdir, system
from os.path import join as osjoin, exists, isfile
from GffTools import Gene, Element, Transcript
from GffTools import read as gread, write as gwrite, sort as gsort
from re import match
from multiprocessing import JoinableQueue, Process
from sys import argv, exit
# some absolute paths
inbase = "/.../abfgp_in"
outbase = "/.../abfgp_out"
abfgp_cmd = "python /.../abfgp-2.rev/abfgp.py"
refGff = "/.../B0510_manual_reindexed_noSeq.gff"
# the Queue
Q = JoinableQueue()
i = 0
# define number of processes
try: num_p = int(argv[1])
except ValueError: exit("Wrong CPU argument")
# This is the function calling the abfgp.py script, which in its turn calls alot of third party software
def abfgp(id_, pid):
out = osjoin(outbase, id_)
if not exists(out): mkdir(out)
# logfile
log = osjoin(outbase, "log_process_%s" %(pid))
try:
# call the script
system("%s --dna %s --multifasta %s --target %s -o %s -q >>%s" %(abfgp_cmd, osjoin(inbase, id_, id_ +".dna.fa"), osjoin(inbase, id_, "informants.mfa"), id_, out, log))
except:
print "ABFGP FAILED"
return
# parse the output
def extractGff(id_):
# code not relevant
# function called by multiple processes, using the Queue
def run(Q, pid):
while not Q.empty():
try:
d = Q.get()
print "%s\t=>>\t%s" %(str(i-Q.qsize()), d)
abfgp(d, pid)
Q.task_done()
except KeyboardInterrupt:
exit("Interrupted Child")
# list of directories
genedirs = [d for d in listdir(inbase)]
genes = gread(refGff)
for d in genedirs:
i += 1
indir = osjoin(inbase, d)
outdir = osjoin(outbase, d)
Q.put(d)
# this loop creates the multiple processes
procs = []
for pid in range(num_p):
try:
p = Process(target=run, args=(Q, pid+1))
p.daemon = True
procs.append(p)
p.start()
except KeyboardInterrupt:
print "Aborting start of child processes"
for x in procs:
x.terminate()
exit("Interrupted")
try:
for p in procs:
p.join()
except:
print "Terminating child processes"
for x in procs:
x.terminate()
exit("Interrupted")
print "Parsing output..."
for d in genedirs: extractGff(d)
現在的問題是,abfgp.py使用os.chdir函數,這似乎中斷了並行處理。我收到很多錯誤,指出某些(輸入/輸出)文件/目錄不能用於讀/寫。即使我通過os.system()調用腳本,從中我通過產生單獨的進程可以防止這種情況。
我該如何解決這些chdir干擾問題?
編輯:我可能將os.system()更改爲subprocess.Popen(cwd =「...」)與正確的目錄。我希望這有所作爲。
謝謝。
爲什麼使用'os.system'而不是'subprocess.call'?如果沒有字符串插值,它會更加麻煩。 –
好的提示,你是對的:),但正如我所說,我雖然os.system會解決chdir干擾 – Sander