我想知道,如果並行寫入文件是有效的。實際上,硬盤一次只有一個可用的讀取頭。因此,HDD一次可以完成一項任務。 但低於測試(在python)矛盾的時候,我想到的問題:並行寫入文件是有效的?
要複製的文件是1 GB左右
腳本1(//任務由線讀取和寫入行10次相同的文件):
#!/usr/bin/env python
from multiprocessing import Pool
def read_and_write(copy_filename):
with open("/env/cns/bigtmp1/ERR000916_2.fastq", "r") as fori:
with open("/env/cns/bigtmp1/{}.fastq".format(copy_filename) , "w") as fout:
for line in fori:
fout.write(line + "\n")
return copy_filename
def main():
f_names = [ "test_jm_{}".format(i) for i in range(0, 10) ]
pool = Pool(processes=4)
results = pool.map(read_and_write, f_names)
if __name__ == "__main__":
main()
腳本2(任務讀取和寫線線10次相同的文件):
#!/usr/bin/env python
def read_and_write(copy_filename):
with open("/env/cns/bigtmp1/ERR000916_2.fastq", "r") as fori:
with open("/env/cns/bigtmp1/{}.fastq".format(copy_filename) , "w") as fout:
for line in fori:
fout.write(line + "\n")
return copy_filename
def main():
f_names = [ "test_jm_{}".format(i) for i in range(0, 10) ]
for n in f_names:
result = read_and_write(n)
if __name__ == "__main__":
main()
腳本3(//任務複製10次相同的文件中):
#!/usr/bin/env python
from shutil import copyfile
from multiprocessing import Pool
def read_and_write(copy_filename):
copyfile("/env/cns/bigtmp1/ERR000916_2.fastq", "/env/cns/bigtmp1/{}.fastq".format(copy_filename))
return copy_filename
def main():
f_names = [ "test_jm_{}".format(i) for i in range(0, 10) ]
pool = Pool(processes=4)
results = pool.map(read_and_write, f_names)
if __name__ == "__main__":
main()
腳本4(任務複製10次相同的文件):
#!/usr/bin/env python
from shutil import copyfile
def read_and_write(copy_filename):
copyfile("/env/cns/bigtmp1/ERR000916_2.fastq", "/env/cns/bigtmp1/{}.fastq".format(copy_filename))
return copy_filename
def main():
f_names = [ "test_jm_{}".format(i) for i in range(0, 10) ]
for n in f_names:
result = read_and_write(n)
if __name__ == "__main__":
main()
結果:
$ # // task to read and write line by line 10 times a same file
$ time python read_write_1.py
real 1m46.484s
user 3m40.865s
sys 0m29.455s
$ rm test_jm*
$ # task to read and write line by line 10 times a same file
$ time python read_write_2.py
real 4m16.530s
user 3m41.303s
sys 0m24.032s
$ rm test_jm*
$ # // task to copy 10 times a same file
$ time python read_write_3.py
real 1m35.890s
user 0m10.615s
sys 0m36.361s
$ rm test_jm*
$ # task to copy 10 times a same file
$ time python read_write_4.py
real 1m40.660s
user 0m7.322s
sys 0m25.020s
$ rm test_jm*
這些基本結果似乎表明,// IO讀寫爲更高效。
感謝你照亮
對於具有單個讀取頭的硬盤,您有什麼證據? –
您寫入文件的任何數據都將存儲在您計算機的RAM中,直到您的計算機的操作系統開始對其進行整理並實際將其快速寫入磁盤。 – kindall
你期待什麼結果? – AChampion