2011-05-09 31 views

回答

4
public class UniqueLineReader extends BufferedReader { 
    Set<String> lines = new HashSet<String>(); 

    public UniqueLineReader(Reader arg0) { 
     super(arg0); 
    } 

    @Override 
    public String readLine() throws IOException { 
     String uniqueLine; 
     if (lines.add(uniqueLine = super.readLine())) 
      return uniqueLine; 
     return ""; 
    } 

    //for testing.. 

    public static void main(String args[]) { 
     try { 
      // Open the file that is the first 
      // command line parameter 
      FileInputStream fstream = new FileInputStream(
        "test.txt"); 
      UniqueLineReader br = new UniqueLineReader(new InputStreamReader(fstream)); 
      String strLine; 
      // Read File Line By Line 
      while ((strLine = br.readLine()) != null) { 
       // Print the content on the console 
       if (strLine != "") 
        System.out.println(strLine); 
      } 
      // Close the input stream 
      in.close(); 
     } catch (Exception e) {// Catch exception if any 
      System.err.println("Error: " + e.getMessage()); 
     } 
    } 

} 

修改的版本:

public class UniqueLineReader extends BufferedReader { 
    Set<String> lines = new HashSet<String>(); 

    public UniqueLineReader(Reader arg0) { 
     super(arg0); 
    } 

    @Override 
    public String readLine() throws IOException { 
     String uniqueLine; 
     while (lines.add(uniqueLine = super.readLine()) == false); //read until encountering a unique line 
      return uniqueLine; 
    } 

    public static void main(String args[]) { 
     try { 
      // Open the file that is the first 
      // command line parameter 
      FileInputStream fstream = new FileInputStream(
        "/home/emil/Desktop/ff.txt"); 
      UniqueLineReader br = new UniqueLineReader(new InputStreamReader(fstream)); 
      String strLine; 
      // Read File Line By Line 
      while ((strLine = br.readLine()) != null) { 
       // Print the content on the console 
        System.out.println(strLine); 
      } 
      // Close the input stream 
      in.close(); 
     } catch (Exception e) {// Catch exception if any 
      System.err.println("Error: " + e.getMessage()); 
     } 

    } 
} 
1

使用BufferedReader讀取文本文件並將其存儲在LinkedHashSet中。打印出來。

下面是一個例子:

public class DuplicateRemover { 

    public String stripDuplicates(String aHunk) { 
     StringBuilder result = new StringBuilder(); 
     Set<String> uniqueLines = new LinkedHashSet<String>(); 

     String[] chunks = aHunk.split("\n"); 
     uniqueLines.addAll(Arrays.asList(chunks)); 

     for (String chunk : uniqueLines) { 
      result.append(chunk).append("\n"); 
     } 

     return result.toString(); 
    } 

} 

這裏的一些單元測試,以驗證(忽略我的邪惡複製粘貼;)):

import org.junit.Test; 
import static org.junit.Assert.*; 

public class DuplicateRemoverTest { 

    @Test 
    public void removesDuplicateLines() { 
     String input = "a\nb\nc\nb\nd\n"; 
     String expected = "a\nb\nc\nd\n"; 

     DuplicateRemover remover = new DuplicateRemover(); 

     String actual = remover.stripDuplicates(input); 
     assertEquals(expected, actual); 
    } 

    @Test 
    public void removesDuplicateLinesUnalphabetized() { 
     String input = "z\nb\nc\nb\nz\n"; 
     String expected = "z\nb\nc\n"; 

     DuplicateRemover remover = new DuplicateRemover(); 

     String actual = remover.stripDuplicates(input); 
     assertEquals(expected, actual); 
    } 

} 
+0

嗯排序在文件中的內容,讓我。不知道。 – Mike 2011-05-09 02:03:30

2

如果你喂線爲LinkedHashSet,它忽略重複的,因爲它是一個集合,但保留順序,因爲它是鏈接的。如果你只是想知道你是否看過以前的行,在繼續時將它們放入一個簡單的Set,並忽略Set已經包含/包含的行。

0

這裏我用一個HashSet存儲看出線

Scanner scan;//input 
Set<String> lines = new HashSet<String>(); 
StringBuilder strb = new StringBuilder(); 
while(scan.hasNextLine()){ 
    String line = scan.nextLine(); 
    if(lines.add(line)) strb.append(line); 
} 
+0

但是我們可以確保輸入行和輸出行的順序與散列保持一致嗎? – 2011-05-09 01:50:59

+0

我也將它們添加到一個字符串生成器中,當你遍歷整個文本時,你拋棄了這個集合並保留'strb.toString()' – 2011-05-09 01:53:48

+0

當你添加到一個集合時,你不需要檢查如果它已經在那裏。另外,HashSets不保證順序。 – Kal 2011-05-09 01:55:31

1

這裏是另一種解決方案。讓我們來使用UNIX!

cat MyFile.java | uniq > MyFile.java 

編輯:哦等等,我重新讀了這個話題。這是一個合法的解決方案,因爲我設法成爲語言不可知論者?

+0

我想你可以在這裏使用類似的解決方案:http://stackoverflow.com/questions/1088113/is-there-a-java-library-of-unix-functions。不過,如果你在UNIX系統上,我會嘗試爲腳本編寫鉤子。 – Mike 2011-05-09 02:13:43

1

它可以輕鬆地從使用新的Java API流文本或文件中的重複行。 Stream支持不同的聚合特性,如排序,區分和使用不同的Java現有數據結構及其方法。下面的例子可以用它來刪除重複或使用流API

package removeword; 

import java.io.IOException; 
import java.nio.file.Files; 
import java.nio.file.OpenOption; 
import java.nio.file.Path; 
import java.nio.file.Paths; 
import java.util.Arrays; 
import java.util.Scanner; 
import java.util.stream.Stream; 
import static java.nio.file.StandardOpenOption.*; 
import static java.util.stream.Collectors.joining; 

public class Java8UniqueWords { 

public static void main(String[] args) throws IOException {   
    Path sourcePath = Paths.get("C:/Users/source.txt"); 
    Path changedPath = Paths.get("C:/Users/removedDouplicate_file.txt"); 
     try (final Stream<String> lines = Files.lines(sourcePath) 
       // .map(line -> line.toLowerCase()) /*optional to use existing string methods*/ 
       .distinct() 
       // .sorted()) /*aggregrate function to sort disctincted line*/ 
     { 
      final String uniqueWords = lines.collect(joining("\n")); 
      System.out.println("Final Output:" + uniqueWords); 
      Files.write(changedPath , uniqueWords.getBytes(),WRITE, TRUNCATE_EXISTING); 
     } 
} 
}