2013-04-27 195 views
1

嗨,我正在編寫我們自己的霍夫曼編碼項目。我目前無法將二進制1和0寫入輸出文件。它適用於較小的輸入文件,但對於非常大的文件,它不會將任何內容寫入輸出文件。負責書寫的方法是compress方法。任何幫助,將不勝感激。謝謝!寫大二進制數據到文件

package proj3; 

import java.io.File; 
import java.io.FileInputStream; 
import java.io.FileNotFoundException; 
import java.io.FileOutputStream; 
import java.io.IOException; 
import java.util.ArrayList; 
import java.util.Collections; 
import java.util.LinkedHashMap; 
import java.util.Map; 
import java.util.Scanner; 
import java.util.PriorityQueue; 

public class Project3 { 

//variables for PriorityQueue and Huffman Tree 
private static PriorityQueue<BinaryNode<Character>> queue; 
private static BinaryNode<Character> huffTree; 
private static Map<Character, String> table = new LinkedHashMap<Character, String>(); 

/** 
* Method for creating Huffman Tree 
* @param counts Map that contains all characters and their frequencies 
* @return the Huffman Tree 
*/ 
public static BinaryNode<Character> makeTree(Map<Character, Integer> counts) 
{ 
    queue = new PriorityQueue<BinaryNode<Character>>(); 

    for(Character c : counts.keySet()) 
    { 
     BinaryNode<Character> tree = new BinaryNode<Character>(c, counts.get(c), null, null); 
     queue.add(tree); 
    } 

    while(!queue.isEmpty()) 
    { 
     if(queue.size() >= 2) 
     { 
      BinaryNode<Character> n1 = queue.remove(); 
      BinaryNode<Character> n2 = queue.remove(); 
      Integer weight = n1.getFreq() + n2.getFreq(); 
      huffTree = new BinaryNode<Character>(null, weight, n1, n2); 
      queue.add(huffTree); 
     } 
     if(queue.size() == 1) 
     { 
      return queue.remove(); 
     } 
    } 
    return huffTree; 
} 

public static void encode(BinaryNode<Character> node, String s) 
{ 
    if(!node.isLeaf()) 
    { 
     encode(node.getLeft(), s + "0"); 
     encode(node.getRight(), s + "1"); 
    } 
    else 
    { 
     table.put(node.getElement(), s); 
    } 
} 

public static void compress(String in, String out) throws IOException 
{ 
    try 
    { 
     File outFile = new File(out); 
     FileOutputStream compressedFile = new FileOutputStream(outFile); 
     Scanner infile = new Scanner(new FileInputStream(in)); 
     while(infile.hasNext()) 
     { 
      infile.useDelimiter(""); 
      String str = infile.next(); 
      Character character = str.charAt(0); 
      for(Character c : table.keySet()) 
      { 
       if(c == character){ 
        compressedFile.write(table.get(c).getBytes()); 
        compressedFile.flush(); 
       } 
      } 
     } 
     for(Byte b : table.get('^').getBytes()) 
     { 
      compressedFile.write(b); 
     } 
     infile.close(); 
     compressedFile.close(); 
    } 
    catch (FileNotFoundException e) 
    { 
     System.err.println("File not found."); 
     e.printStackTrace(); 
    } 
} 

public static void decompress(String s) 
{ 

} 

public static void printEncodings(Map<Character, String> m) 
{ 
    ArrayList<Character> chars = new ArrayList<Character>(); 

    System.out.println("Character Encodings"); 
    System.out.println("---------------------"); 
    for(Character c : m.keySet()) 
    { 
     chars.add(c); 
     Collections.sort(chars); 
    } 
    for(Character c : chars) 
    { 
     System.out.print(c + "\t" + m.get(c) + "\n"); 
    } 
    System.out.println(); 
    System.out.println("Total Characters: " + chars.size()); 
} 

/** 
* Method for creating map with character and its frequencies 
* @param s the file name to be opened 
* @return the Map containing characters and frequencies 
*/ 
public static Map<Character, Integer> charCount(String s){ 

    Map<Character, Integer> counts = new LinkedHashMap<Character, Integer>(); 
    ArrayList<Character> chars = new ArrayList<Character>(); 

    try { 
     Scanner file = new Scanner(new FileInputStream(s)); 
     while(file.hasNext()){ 
      file.useDelimiter(""); 
      String str = file.next(); 
      Character c = str.charAt(0); 
      if(counts.containsKey(c)){ 
       counts.put(c, counts.get(c) + 1); 
      } 
      else{ 
       counts.put(c, 1); 
      } 
     } 
     counts.put('^', 1); 
     System.out.println("Character Frequencies"); 
     System.out.println("---------------------"); 
     for(Character c : counts.keySet()) 
     { 
      chars.add(c); 
      Collections.sort(chars); 
     } 
     for(Character c : chars){ 
      System.out.println(c + "\t" + counts.get(c)); 
     } 
     System.out.println(); 
     System.out.println("Total characters: " + chars.size() + "\n"); 
     file.close(); 
    } 
    catch (FileNotFoundException e) { 
     System.err.println("File not found."); 
     System.exit(0); 
    } 
    return counts; 
} 

public static void main(String[] args){ 

    if(args.length != 3) 
    { 
     throw new IllegalArgumentException("Invalid number of arguments."); 
    } 
    encode(makeTree(charCount(args[0])), ""); 
    printEncodings(table); 
    try { 
     compress(args[0], args[1]); 
    } catch (IOException e) { 
     e.printStackTrace(); 
    } 
} 

} 
+1

方案是否結束?你是否已經通過該計劃來了解它的功能? – 2013-04-27 20:56:55

+0

是的,它打印出所有正確的字符頻率和編碼表 – somtingwong 2013-04-27 21:00:23

+0

大數據寫入時它是空文件嗎? – Kowser 2013-04-27 21:21:01

回答

0

我很確定你正在面對compress()方法中的性能/內存問題。編碼打印在你的主要方法中,但我相信文件輸出會卡住。我可以看到優化至少三種可能在你的代碼:

  1. 您使用的是Scanner讀取由字符輸入文件字符,但您不使用由掃描儀提供的任何解析功能。改爲使用InputStreamReader

  2. 您正在循環查找Huffman表中的一組鍵並檢查相等性。您可以簡單地使用當前字符來獲取它映射到的字符串並省略循環。

  3. 您不需要循環字節數組將其寫入輸出文件。 write()方法FileOutputStream可以將整個字節數組作爲參數。

憑藉我卑微的Java技能,我寧願以下面的方式實現它;請注意,這是未經測試的代碼,因爲我沒有你BinaryNode類:

public static void compress(String in, String out) throws IOException 
{ 
    try 
     { 
      File outFile = new File(out); 
      FileOutputStream compressedFile = new FileOutputStream(outFile); 

      // 1. Use a Reader instead of a Scanner; 
      // make sure to use the correct charset 
      FileInputStream fis= new FileInputStream(in); 
      Reader reader = new InputStreamReader(fis,   
       Charset.forName("US-ASCII")); 

      // use BufferedReader for even better performance 
      Reader bufferedReader = new BufferedReader(reader); 

      int r; 
      while ((r = bufferedReader.read()) != -1) { 
       char ch= (char) r; 

       // 2. Get the string for this character directly instead of 
       // looping the keySet and checking for equivalence 
       String s= table.get(ch); 
       if (s != null) { 
        // 3. Write entire array of bytes instead of 
        // looping and writing bytes one by one 
        compressedFile.write(s.getBytes()); 
       } 
      } 
      fis.close(); 
      compressedFile.close(); 
     } 
    ... 
+0

我發誓我已經把什麼東西你已經寫了,但它仍然不工作.....是否有可能我的代碼的其他部分是如此低效,以至於它不適用於大文件? – somtingwong 2013-04-27 22:31:36

+0

我認爲不,其餘部分應該足夠高效。檢查以下內容:1)當args.length!= 3時您正在投擲,但只在您的代碼中使用2個參數。 2)args [0]是否指向現有文件?是否存在args [1]中的所有目錄? 3)是否在修改代碼後清理/重新編譯項目? 4)BinaryNode是否實現java.lang.comparable(PriorityQueue必需)? 5)你是否在調試模式下運行代碼並檢查任何日誌或控制檯輸出是否有錯誤/堆棧跟蹤? 6)輸出文件是以0 KB寫入還是缺失? 7)你的代碼是否終止? – Marcellus 2013-04-28 11:14:41

0

很可能需要調用compressedFile.flush()

提高它像這樣

  if(c == character){ 
       compressedFile.write(table.get(c).getBytes()); 
       compressedFile.flush(); 
      } 

另外考慮使用try/catch。將提高執行的完整性。

+0

沒有對不起,它仍然沒有寫任何東西:( – somtingwong 2013-04-27 21:13:55

+0

@ user1813076:再次更新 – Kowser 2013-04-27 21:23:48

+0

對不起,但我相信我更新完全按照你寫的,但它仍然無法正常工作.... – somtingwong 2013-04-27 21:31:57