2017-05-13 203 views
0

我有大約1 GB的大數據集,需要將其從.txt轉換爲.arff格式。所有來源從.txt轉換爲.csv,然後轉換爲.arff,但使用在線轉換器時,最大大小爲100 MB。任何人都可以幫助我如何轉換這1 GB的數據?將.txt中的大數據集轉換爲.arff格式

回答

0

您可以使用此代碼段將txt文件轉換爲csv。

import java.io.*; 

public class Convert { 
public static void main(String[] args) { 
    System.out.println("Initialize ..."); 
    Read read = new Read("soal01.txt"); 
    Print print = new Print(); 
    for (int i = 1; i <= 3; i++) { 
     try{ 
      print.setFileName("jawaban"+i+".csv"); 
      read.setPrint(print); 
      read.exec(i); 
     } catch(IOException ex){ 
      ex.printStackTrace(); 
     } 
    } 
    System.out.println("Finished"); 
    } 
} 

class Print { 

protected String fileName; 

protected FileWriter writer; 

public Print() {} 

public Print(String fileName) throws IOException { 
    this.fileName = fileName; 
    this.writer = new FileWriter(this.fileName); 
} 

public String getFileName() { 
    return this.fileName; 
} 

public void setFileName(String fileName) throws IOException { 
    this.fileName = fileName; 
    this.writer = new FileWriter(this.fileName); 
} 

public void close() throws IOException { 
    this.writer.flush(); 
    this.writer.close(); 
} 

public void addRow(String[] c) throws IOException { 
    int l = c.length; 
    for(int i = 0; i < l; i++) { 
     this.writer.append(c[i]); 
     if(i != (l - 1)) { 
      this.writer.append(","); 
     } 
    } 
    this.writer.append('\n'); 
    } 
} 

class Read { 

protected String fileName; 

protected BufferedReader bufferedReader; 

protected Print print; 

public Read(String fileName) { 
    this.fileName = fileName; 
} 

public Read(Print print, String fileName) { 
    this.print = print; 
    this.fileName = fileName; 
} 

public void setPrint(Print print) { 
    this.print = print; 
} 

public Print getPrint() { 
    return this.print; 
} 

public void exec(Integer type) { 
    String sCurrentLine = ""; 
    try{ 
     this.bufferedReader = new BufferedReader(
      new FileReader(this.fileName)); 

     while((sCurrentLine = this.bufferedReader.readLine()) != null) { 
      String[] columns = sCurrentLine.split("\t"); 
      int length = columns.length; 
      if(type == 1) { 
       this.print.addRow(columns); 
      } else if(length >= 2){ 
       double col1 = Double.parseDouble(columns[1]); 
       if(col1 > 20){ 
        if(type == 2){ 
         this.print.addRow(columns); 
        } else if(type == 3 && length >= 3 && columns[2] != null){ 
         this.print.addRow(columns); 
        } 
       } 
      } 
     } 
     this.print.close(); 

    } catch(IOException ex) { 
     ex.printStackTrace(); 
    } finally { 
     try{ 
      if(this.bufferedReader != null) { 
       this.bufferedReader.close(); 
      } 
     } catch(IOException ex) { 
      ex.printStackTrace(); 
     } 
     } 
    } 
} 
+0

謝謝你的幫助,但我運行的代碼,並出現以下錯誤。 異常線程「main」 java.lang.NumberFormatException:對於輸入字符串:「青少年分享如何基督教影響到他們生活」 \t在sun.misc.FloatingDecimal.readJavaFormatString(FloatingDecimal.java:2043) \t在陽光下.misc.FloatingDecimal.parseDouble(FloatingDecimal.java:110) \t在java.lang.Double.parseDouble(Double.java:538) \t在Read.exec(Convert.java:97) \t在Convert.main( Convert.java:12) – medooSa

相關問題