2016-02-07 69 views
-2

我有大約2.5M文件用.py腳本處理。如何加速引用數百萬個文件的.py腳本?

我正在使用超級計算器,但我的問題不是電源,它的python進程本身打開和關閉每一次,並放鬆時間。

我正在使用循環來獲取我想用腳本轉換的文件夾中的每個文件。所以$ {line}指的是一個文件,其中每一行都指向文件夾的每個文件。

在打開.py腳本而不是循環使用python腳本之後,有沒有辦法處理所有文件?

還有就是我的循環代碼:

### LOOP ### 
while : 
do 
pythonsh ${RAMDISK}/script.py -l ${RAMDISK}/${line}.pdb -U '' -A hydrogens 

done 

exit 

的Python腳本僅僅是轉換到.PDB那.pdbqt我從裏面自帶Autodock4 AutodockTools找到的文件的工具。

+0

你問Python腳本,但你顯示bash腳本?如何嘗試幫助? – Clodion

+0

是你的問題如何在Python中做循環? – kay

+4

如果瓶頸是Python腳本的進程管理,那麼只需將循環移動到Python腳本,以便不再需要一直打開和關閉。 – poke

回答

1

我修改了腳本,添加了一個-i命令行選項。

這將允許您指定一個包含配體文件名(每行一個)的文本文件,並在不重新啓動Python的情況下處理它們。

您現在應該能夠稱其爲

pythonsh ./newscript.py -i ./list_of_files.txt -U '' -A hydrogens 

注:這是未經測試!它應該按照給定的方式工作,但要非常謹慎!

#!/usr/bin/env python 
# 
# 
# 
# $Header: /opt/cvs/python/packages/share1.5/AutoDockTools/Utilities24/prepare_ligand4.py,v 1.5.4.1 2009/04/15 17:41:57 rhuey Exp $ 
# 
# Modified 2016/02/07 
# Hugh Bothwell http://stackoverflow.com/users/33258 
# Added -i commandline option to process multiple files 
# 
import os 

from MolKit import Read 
from AutoDockTools.MoleculePreparation import AD4LigandPreparation 

# initialize command-line parameters 
#-l: ligand 
ligand_filename = None 
#-i: file containing ligand-filenames 
ligand_listfile = None 
# optional parameters 
verbose = None 
add_bonds = False 
#-A: repairs to make: add bonds and/or hydrogens 
repairs = "" 
#-C default: add gasteiger charges 
charges_to_add = 'gasteiger' 
#-p preserve charges on specific atom types 
preserve_charge_types='' 
#-U: cleanup by merging nphs_lps, nphs, lps 
cleanup = "nphs_lps" 
#-B named rotatable bond type(s) to allow to rotate 
#allowed_bonds = "" 
allowed_bonds = "backbone" 
#-r root 
root = 'auto' 
#-o outputfilename 
outputfilename = None 
#-F check_for_fragments 
check_for_fragments = False 
#-I bonds_to_inactivate 
bonds_to_inactivate = "" 
#-Z inactivate_all_torsions 
inactivate_all_torsions = False 
#-g attach_nonbonded_fragments 
attach_nonbonded_fragments = False 
#-m mode 
mode = 'automatic' 
#-d dictionary 
dict = None 

def process_file(fname): 
    mols = Read(fname) 
    if verbose: print 'read ', fname 
    mol = mols[0] 
    if len(mols)>1: 
     if verbose: 
      print "more than one molecule in file" 
     #use the one molecule with the most atoms 
     ctr = 1 
     for m in mols[1:]: 
      ctr += 1 
      if len(m.allAtoms)>len(mol.allAtoms): 
       mol = m 
       if verbose: 
        print "mol set to ", ctr, "th molecule with", len(mol.allAtoms), "atoms" 
    coord_dict = {} 
    for a in mol.allAtoms: coord_dict[a] = a.coords 

    mol.buildBondsByDistance() 
    if charges_to_add is not None: 
     preserved = {} 
     preserved_types = preserve_charge_types.split(',') 
     for t in preserved_types: 
      if not len(t): continue 
      ats = mol.allAtoms.get(lambda x: x.autodock_element==t) 
      for a in ats: 
       if a.chargeSet is not None: 
        preserved[a] = [a.chargeSet, a.charge] 

    if verbose: 
     print "setting up LPO with mode=", mode, 
     print "and outputfilename= ", outputfilename 
     print "and check_for_fragments=", check_for_fragments 
     print "and bonds_to_inactivate=", bonds_to_inactivate 
    LPO = AD4LigandPreparation(mol, mode, repairs, charges_to_add, 
          cleanup, allowed_bonds, root, 
          outputfilename=outputfilename, 
          dict=dict, check_for_fragments=check_for_fragments, 
          bonds_to_inactivate=bonds_to_inactivate, 
          inactivate_all_torsions=inactivate_all_torsions, 
          attach_nonbonded_fragments=attach_nonbonded_fragments) 
    #do something about atoms with too many bonds (?) 
    #FIX THIS: could be peptide ligand (???) 
    #   ??use isPeptide to decide chargeSet?? 
    if charges_to_add is not None: 
     #restore any previous charges 
     for atom, chargeList in preserved.items(): 
      atom._charges[chargeList[0]] = chargeList[1] 
      atom.chargeSet = chargeList[0] 
    if verbose: print "returning ", mol.returnCode 
    bad_list = [] 
    for a in mol.allAtoms: 
     if a.coords!=coord_dict[a]: bad_list.append(a) 
    if len(bad_list): 
     print len(bad_list), ' atom coordinates changed!'  
     for a in bad_list: 
      print a.name, ":", coord_dict[a], ' -> ', a.coords 
    else: 
     if verbose: print "No change in atomic coordinates" 
    if mol.returnCode != 0: 
     sys.stderr.write(mol.returnMsg + "\n") 
    # sys.exit(mol.returnCode) 

if __name__ == '__main__': 
    import sys 
    import getopt 

    def usage(): 
     "Print helpful, accurate usage statement to stdout." 
     print "Usage: prepare_ligand4.py -l filename" 
     print 
     print " Description of command..." 
     print "   -l  ligand_filename  (.pdb or .mol2 or .pdbq format)" 
     print "   -i  list_of_filenames.txt (.pdb or .mol2 or .pdbq format)" 
     print " Optional parameters:" 
     print "  [-v] verbose output" 
     print "  [-o pdbqt_filename] (default output filename is ligand_filename_stem + .pdbqt)" 
     print "  [-d] dictionary to write types list and number of active torsions " 

     print "  [-A] type(s) of repairs to make:\n\t\t bonds_hydrogens, bonds, hydrogens (default is to do no repairs)" 
     print "  [-C] do not add charges (default is to add gasteiger charges)" 
     print "  [-p] preserve input charges on atom type, eg -p Zn" 
     print "    (default is not to preserve charges on any specific atom type)" 
     print "  [-U] cleanup type:\n\t\t nphs_lps, nphs, lps, '' (default is 'nphs_lps') " 
     print "  [-B] type(s) of bonds to allow to rotate " 
     print "    (default sets 'backbone' rotatable and 'amide' + 'guanidinium' non-rotatable)" 
     print "  [-R] index for root" 
     print "  [-F] check for and use largest non-bonded fragment (default is not to do this)" 
     print "  [-M] interactive (default is automatic output)" 
     print "  [-I] string of bonds to inactivate composed of " 
     print "     of zero-based atom indices eg 5_13_2_10 " 
     print "     will inactivate atoms[5]-atoms[13] bond " 
     print "        and atoms[2]-atoms[10] bond " 
     print "      (default is not to inactivate any specific bonds)" 
     print "  [-Z] inactivate all active torsions  " 
     print "      (default is leave all rotatable active except amide and guanidinium)" 
     print "  [-g] attach all nonbonded fragments " 
     print "      (default is not to do this)" 

    # process command arguments 
    try: 
     opt_list, args = getopt.getopt(sys.argv[1:], 'l:i:vo:d:A:Cp:U:B:R:MFI:Zgh') 
    except getopt.GetoptError, msg: 
     print 'prepare_ligand4.py: %s' %msg 
     usage() 
     sys.exit(2) 

    #'l:vo:d:A:CKU:B:R:MFI:Zg' 
    for o, a in opt_list: 
     #print "o=", o, " a=", a 
     if o in ('-l', '--l'): 
      ligand_filename = a 
      if verbose: print 'set ligand_filename to ', a 
     if o in ('-i', '--i'): 
      ligand_listfile = a 
      if verbose: print 'set ligand_listfile to ', a 
     if o in ('-v', '--v'): 
      verbose = True 
      if verbose: print 'set verbose to ', True 
     if o in ('-o', '--o'): 
      outputfilename = a 
      if verbose: print 'set outputfilename to ', a 
     if o in ('-d', '--d'): 
      dict = a 
      if verbose: print 'set dict to ', a 
     if o in ('-A', '--A'): 
      repairs = a 
      if verbose: print 'set repairs to ', a 
     if o in ('-C', '--C'): 
      charges_to_add = None 
      if verbose: print 'do not add charges' 
     if o in ('-p', '--p'): 
      preserve_charge_types+=a 
      preserve_charge_types+=',' 
      if verbose: print 'preserve initial charges on ', preserve_charge_types 
     if o in ('-U', '--U'): 
      cleanup = a 
      if verbose: print 'set cleanup to merge ', a 
     if o in ('-B', '--B'): 
      allowed_bonds = a 
      if verbose: print 'allow ', a, 'bonds set to rotate' 
     if o in ('-R', '--R'): 
      root = a 
      if verbose: print 'set root to ', root 
     if o in ('-F', '--F'): 
      check_for_fragments = True 
      if verbose: print 'set check_for_fragments to True' 
     if o in ('-M', '--M'): 
      mode = a 
      if verbose: print 'set mode to ', a 
     if o in ('-I', '--I'): 
      bonds_to_inactivate = a 
      if verbose: print 'set bonds_to_inactivate to ', a 
     if o in ('-Z', '--Z'): 
      inactivate_all_torsions = True 
      if verbose: print 'set inactivate_all_torsions to ', inactivate_all_torsions 
     if o in ('-g', '--g'): 
      attach_nonbonded_fragments = True 
      if verbose: print 'set attach_nonbonded_fragments to ', attach_nonbonded_fragments 
     if o in ('-h', '--'): 
      usage() 
      sys.exit() 

    if ligand_filename: 
     process_file(ligand_filename) 
    elif ligand_listfile: 
     # Python 2.5 does not support `with` 
     # with open(ligand_listfile) as inf: 
     #  for fname in inf: 
     #   process_file(fname.rstrip()) 
     inf = open(ligand_listfile) 
     for fname in inf: 
      process_file(fname.rstrip()) 
     inf.close() 
    else: 
     print 'prepare_ligand4: either -l (ligand filename) or -i (ligand listfile) must be specified.' 
     usage() 
     sys.exit() 

# To execute this command type: 
# prepare_ligand4.py -l pdb_file -v 
+0

我做了一個save.tar將需要幾個小時。我會在以後試用。由於 – Grego

+0

設置PYTHONHOME環境 ./prepare_ligand4v2.py:220:警告: '與' 將成爲一個Python保留關鍵字2.6 文件 「./prepare_ligand4v2.py」,用開(ligand_listfile)爲INF線220 : ^ SyntaxError:無效的語法 – Grego

+0

(wince)顯然你使用Python 2.5(大約2008)。我會更新腳本以不使用'with'。 –

相關問題