2015-09-15 101 views
1

我需要在5秒內創建100mb的壓縮文件,其中包含使用java的CSV文件。我已經創建了包含CSV文件的test.zip,但它花費了太多的時間(〜30秒)來生成zip文件。這裏是我到目前爲止已經編寫的代碼:Java創建100MB壓縮的csv文件性能問題

ByteArrayOutputStream baos = new ByteArrayOutputStream(); 
/* Create instance of ZipOutputStream to create ZIP file. */ 
ZipOutputStream zipOutputStream = new ZipOutputStream(baos); 

/* Create ZIP entry for file.The file which is created put into the 
* zip file.File is not on the disk, csvFileName indicates only the 
* file name to be put into the zip 
*/ 
ZipEntry zipEntry = new ZipEntry("Test.zip"); 

zipOutputStream.putNextEntry(zipEntry); 

/* Create OutputStreamWriter for CSV. There is no need for staging 
* the CSV on filesystem . Directly write bytes to the output stream. 
*/ 
BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(zipOutputStream, "UTF-8")); 

CsvListWriter csvListWriter = new CsvListWriter(bufferedWriter, CsvPreference.EXCEL_PREFERENCE); 

/* Write the CSV header to the generated CSV file. */ 
csvListWriter.writeHeader(CSVGeneratorConstant.CSV_HEADERS); 

/* Logic to Write the content to CSV */ 
long startTime = System.currentTimeMillis(); 

for (int rowIdx = 0; rowIdx < 7000000; rowIdx++) { 
    final List<String> rowContent = new LinkedList<String>(); 
    for (int colIdx = 0; colIdx < 6; colIdx++) { 
     String str = "R" + rowIdx + "C" + colIdx + " FieldContent"; 
     rowContent.add(str); 
    } 
    csvListWriter.write(rowContent); 
} 
long stopTime = System.currentTimeMillis(); 
long elapsedTime = stopTime - startTime; 
System.out.println("time==" + elapsedTime/1000f + "Seconds"); 

System.out.println("Size=====" + baos.size()/(Math.pow(1024, 2)) + "MB"); 

csvListWriter.close(); 
bufferedWriter.close(); 
zipOutputStream.close(); 
baos.close(); 

我現在用的是超級CSV庫,但我也試圖在內存中創建的壓縮文件,而不無功而返超級CSV庫。你能幫我麼?

+0

你確定你的機器可以做到嗎? Cna你從命令行嘗試相同的東西。順便說一句''mb' ='milli-bits','MB' ='Mega-Bytes' –

+2

而不是建立一個字符串列表,爲什麼不直接寫入ZipOutputStream呢?這會爲你節省很多時間。 –

+3

當您對CPU進行配置時,您看到什麼並花費最多時間? –

回答

0

您的測試數據大約爲1GB,壓縮到100MB。根據您的硬件,可能無法實現5秒鐘的性能。

我把一個快速和骯髒的基準放在一起,突出了寫入到zip文件的性能影響。

  • 寫入CSV與String.join():9.6s
  • 寫入CSV具有超強CSV:12.7s
  • 寫入CSV拉鍊內的String.join():18.6s
  • 寫入CSV具有超強CSV拉鍊內:22.5s

看起來,使用Super CSV(〜122%)有一點點開銷,但只需寫入一個zip文件幾乎會使時間增加一倍(〜190%),而不管是否Supe使用CSV。

下面是4個場景的代碼。與您提供的代碼不同,我直接寫入一個文件(我沒有注意到寫入磁盤與寫入內存之間的任何差異,即ByteArrayOutputStream)。我也跳過了關於超級CSV示例的BufferedWriter,因爲它已經在內部使用了它,並且我使用了試用資源來使事情更加清潔。

@Test 
public void testWriteToCsvFileWithSuperCSV() throws Exception { 
    long startTime = System.currentTimeMillis(); 

    try (FileOutputStream csvFile = new FileOutputStream(new File("supercsv.csv")); 
     ICsvListWriter writer = new CsvListWriter(new OutputStreamWriter(csvFile, "UTF-8"), CsvPreference.EXCEL_PREFERENCE) 
    ){ 
     for (int rowIdx = 0; rowIdx < 7000000; rowIdx++) { 
      final List<String> rowContent = new LinkedList<>(); 
      for (int colIdx = 0; colIdx < 6; colIdx++) { 
       String str = "R" + rowIdx + "C" + colIdx + " FieldContent"; 
       rowContent.add(str); 
      } 
      writer.write(rowContent); 
     } 
    } 

    long stopTime = System.currentTimeMillis(); 
    long elapsedTime = stopTime - startTime; 
    System.out.println("Writing to CSV with Super CSV took " + (elapsedTime/1000f) + " seconds"); 
} 

@Test 
public void testWriteToCsvFileWithinZipWithSuperCSV() throws Exception { 
    long startTime = System.currentTimeMillis(); 

    try (FileOutputStream zipFile = new FileOutputStream(new File("supercsv.zip")); 
     ZipOutputStream zos = new ZipOutputStream(zipFile); 
     ICsvListWriter writer = new CsvListWriter(new OutputStreamWriter(zos, "UTF-8"), CsvPreference.EXCEL_PREFERENCE) 
    ){ 

     ZipEntry csvFile = new ZipEntry("supercsvwithinzip.csv"); 
     zos.putNextEntry(csvFile); 

     for (int rowIdx = 0; rowIdx < 7000000; rowIdx++) { 
      final List<String> rowContent = new LinkedList<>(); 
      for (int colIdx = 0; colIdx < 6; colIdx++) { 
       String str = "R" + rowIdx + "C" + colIdx + " FieldContent"; 
       rowContent.add(str); 
      } 
      writer.write(rowContent); 
     } 
    } 

    long stopTime = System.currentTimeMillis(); 
    long elapsedTime = stopTime - startTime; 
    System.out.println("Writing to CSV within zip file with Super CSV took " + (elapsedTime/1000f) + " seconds"); 
} 

@Test 
public void testWriteToCsvFileWithStringJoin() throws Exception { 
    long startTime = System.currentTimeMillis(); 

    try (FileOutputStream textFile = new FileOutputStream(new File("join.csv")); 
     BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(textFile, "UTF-8")); 
    ){ 

     for (int rowIdx = 0; rowIdx < 7000000; rowIdx++) { 
      final List<String> rowContent = new LinkedList<>(); 
      for (int colIdx = 0; colIdx < 6; colIdx++) { 
       String str = "R" + rowIdx + "C" + colIdx + " FieldContent"; 
       rowContent.add(str); 
      } 
      writer.append(String.join(",", rowContent) + "\n"); 
     } 
    } 

    long stopTime = System.currentTimeMillis(); 
    long elapsedTime = stopTime - startTime; 
    System.out.println("Writing to CSV with String.join() took " + (elapsedTime/1000f) + " seconds"); 
} 

@Test 
public void testWriteToCsvFileWithinZipWithStringJoin() throws Exception { 
    long startTime = System.currentTimeMillis(); 

    try (FileOutputStream zipFile = new FileOutputStream(new File("join.zip")); 
     ZipOutputStream zos = new ZipOutputStream(zipFile); 
     BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(zos, "UTF-8")); 
    ){ 

     ZipEntry csvFile = new ZipEntry("joinwithinzip.csv"); 
     zos.putNextEntry(csvFile); 

     for (int rowIdx = 0; rowIdx < 7000000; rowIdx++) { 
      final List<String> rowContent = new LinkedList<>(); 
      for (int colIdx = 0; colIdx < 6; colIdx++) { 
       String str = "R" + rowIdx + "C" + colIdx + " FieldContent"; 
       rowContent.add(str); 
      } 
      writer.append(String.join(",", rowContent) + "\n"); 
     } 
    } 

    long stopTime = System.currentTimeMillis(); 
    long elapsedTime = stopTime - startTime; 
    System.out.println("Writing to CSV within zip with String.join() took " + (elapsedTime/1000f) + " seconds"); 
}