import zipfile
from collections import defaultdict
from pprint import pprint
import re
from glob import glob
filenames = glob("/batch3/*C*_paired_fastqc.zip")
pattern = re.compile(r"([A-Z0-9-_]+)_L\d{3}\.*")
grouped = defaultdict(list)
for filename in filenames:
match = pattern.search(filename)
if match:
key = match.group(1)
grouped[key].append(filename)
pprint(grouped)
for i, g in enumerate(sorted(grouped.keys())):
for f in grouped[g]:
print f
print "--------"
輸出:文件名匹配不組
/batch3/0046-CL7_S7_L003_R1_001_output_paired_fastqc.zip
/batch3/0046-CL7_S7_L001_R1_001_output_paired_fastqc.zip
/batch3/0046-CL7_S7_L002_R2_001_output_paired_fastqc.zip
/batch3/0046-CL7_S7_L003_R2_001_output_paired_fastqc.zip
/batch3/0046-CL7_S7_L004_R2_001_output_paired_fastqc.zip
/batch3/0046-CL7_S7_L001_R2_001_output_paired_fastqc.zip
/batch3/0046-CL7_S7_L002_R1_001_output_paired_fastqc.zip
/batch3/0046-CL7_S7_L004_R1_001_output_paired_fastqc.zip
--------
/batch3/0047-CLI_S8_L002_R1_001_output_paired_fastqc.zip
/batch3/0047-CLI_S8_L004_R2_001_output_paired_fastqc.zip
/batch3/0047-CLI_S8_L002_R2_001_output_paired_fastqc.zip
/batch3/0047-CLI_S8_L003_R2_001_output_paired_fastqc.zip
/batch3/0047-CLI_S8_L004_R1_001_output_paired_fastqc.zip
/batch3/0047-CLI_S8_L001_R2_001_output_paired_fastqc.zip
/batch3/0047-CLI_S8_L003_R1_001_output_paired_fastqc.zip
/batch3/0047-CLI_S8_L001_R1_001_output_paired_fastqc.zip
不過,我想有:
/batch3/0046-CL7_S7_L001_R1_001_output_paired_fastqc.zip
/batch3/0046-CL7_S7_L001_R2_001_output_paired_fastqc.zip
---------
/batch3/0046-CL7_S7_L002_R1_001_output_paired_fastqc.zip
/batch3/0046-CL7_S7_L002_R2_001_output_paired_fastqc.zip
---------
/batch3/0046-CL7_S7_L003_R1_001_output_paired_fastqc.zip
/batch3/0046-CL7_S7_L003_R2_001_output_paired_fastqc.zip
---------
/batch3/0046-CL7_S7_L004_R1_001_output_paired_fastqc.zip
/batch3/0046-CL7_S7_L004_R2_001_output_paired_fastqc.zip
不幸的是,我無法弄清楚如何匹配文件名。
你所需的輸出不會出現你輸入對應。你試圖完成什麼? –
您的輸出結果不正確......它具有多個相同的文件名。看起來你正在分組在S7,S8等......當你想要L001,L002等時,所以正則表達式選擇了錯誤的部分。嘗試''_L \ d {3} _「'來代替。 – tdelaney
是的,修復輸出 – user977828