> from Bio import SeqIO
> from Bio.Blast import NCBIXML
> infile_path = '/home/edson/ungulate/ungulate.fa' # this is a file
> which contain unaligned nucleotide sequences outfile_path =
> '/home/edson/ungulate/tblastn_result.fa'
> for seq_record in SeqIO.parse(infile_path, 'fasta'):
> flag = seq_record.description # a flag is sequence identifier in a fasta file format
with open(outfile_path, 'a') as outfile:
with open('/home/edson/ungulate/tblastn_result.xml') as tblastn_file:
tblastn_records = NCBIXML.parse(tblastn_file)
for tblastn_record in tblastn_records:
for alignment in tblastn_record.alignments[:4]:
for hsp in alignment.hsps:
if flag in alignment.title:
# this cross check if sequence identifier is present in an XML file
> sub_record = seq_record.seq[hsp.sbjct_start:hsp.sbjct_end]
# this takes sequences in an infile path and slice them based on tblastn output
> outfile.write('>' + seq_record.description + '\n')
> outfile.write(str(sub_record + '\n'))
爲什麼大於號?你是否點擊了報價按鈕,然後是代碼按鈕? – user2357112
「tblastn」和「alignment.hsps」(典型)的「len」是什麼?你的程序哪一部分很慢? – goncalopp
您分配'flag'的循環會在第一次之後的每次迭代中覆蓋'flag'的舊值。你想要一個標誌列表嗎?或者'for'循環只有一次迭代? – user2357112