2013-10-10 74 views
-6

這是我的porter stemmer程序....我得到了char()函數中字符數組的輸出....我試圖將它轉換爲字符串...但它需要較舊的值,而不是新的值。例如 - 「看」是源於「看」.....「看」是在字符數組(b [c]在莖())但同時將其轉換爲字符串取值爲「正在查看」
程序包文件;如何將char數組轉換爲java中的字符串

import java.io.BufferedReader; 
import java.io.BufferedWriter; 
import java.io.FileReader; 
import java.io.FileWriter; 
import java.util.StringTokenizer; 
import java.util.Vector; 

/** 
* 
* @author sky 
*/ 

public class stemmer { 



    public static String line1,line,element,sentence,str; 
    private char[] b; 
    private int i,  /* offset into b */ 
       i_end, /* offset to end of stemmed word */ 
       j, k; 
    private static final int INC = 50; 
        /* unit of size whereby b is increased */ 

    public stemmer() 
    { 
     //b = new char[INC]; 
     i = 0; 
     i_end = 0; 
    } 

    /** 
    * Add a character to the word being stemmed. When you are finished 
    * adding characters, you can call stem(void) to stem the word. 
    */ 

    public void add(char ch) 
    { 
     System.out.println("in add() function"); 
     if (i == b.length) 
     { 
      char[] new_b = new char[i+INC]; 
     for (int c = 0; c < i; c++) 
     new_b[c] = b[c]; 
     b = new_b; 
     } 
     b[i++] = ch; 

    } 



    /** Adds wLen characters to the word being stemmed contained in a portion 
    * of a char[] array. This is like repeated calls of add(char ch), but 
    * faster. 
    */ 

    public void add(char[] w, int wLen) 
    { if (i+wLen >= b.length) 
     { 
     char[] new_b = new char[i+wLen+INC]; 
     for (int c = 0; c < i; c++) 
      new_b[c] = b[c]; 
      b = new_b; 
     } 
     for (int c = 0; c < wLen; c++) 
      b[i++] = w[c]; 
    } 
public void addstring(String s1) 
{ 
    b=new char[s1.length()]; 
    for(int k=0;k<s1.length();k++) 
    { 
     b[k] = s1.charAt(k); 
    //System.out.println(b[k]); 
    } 
    i=s1.length(); 
} 
    /** 
    * After a word has been stemmed, it can be retrieved by toString(), 
    * or a reference to the internal buffer can be retrieved by getResultBuffer 
    * and getResultLength (which is generally more efficient.) 
    */ 
    public String toString() { return new String(b,0,i_end); } 

    /** 
    * Returns the length of the word resulting from the stemming process. 
    */ 
    public int getResultLength() { return i_end; } 

    /** 
    * Returns a reference to a character buffer containing the results of 
    * the stemming process. You also need to consult getResultLength() 
    * to determine the length of the result. 
    */ 
    public char[] getResultBuffer() { return b; } 

    /* cons(i) is true <=> b[i] is a consonant. */ 

    private final boolean cons(int i) 
    { switch (b[i]) 
     { case 'a': case 'e': case 'i': case 'o': case 'u': return false; 
     case 'y': return (i==0) ? true : !cons(i-1); 
     default: return true; 
     } 
    } 

    /* m() measures the number of consonant sequences between 0 and j. if c is 
     a consonant sequence and v a vowel sequence, and <..> indicates arbitrary 
     presence, 

     <c><v>  gives 0 
     <c>vc<v>  gives 1 
     <c>vcvc<v> gives 2 
     <c>vcvcvc<v> gives 3 
     .... 
    */ 

    private final int m() 
    { int n = 0; 
     int i = 0; 
     while(true) 
     { if (i > j) return n; 
     if (! cons(i)) break; i++; 
     } 
     i++; 
     while(true) 
     { while(true) 
     { if (i > j) return n; 
       if (cons(i)) break; 
       i++; 
     } 
     i++; 
     n++; 
     while(true) 
     { if (i > j) return n; 
      if (! cons(i)) break; 
      i++; 
     } 
     i++; 
     } 
    } 

    /* vowelinstem() is true <=> 0,...j contains a vowel */ 

    private final boolean vowelinstem() 
    { int i; for (i = 0; i <= j; i++) if (! cons(i)) return true; 
     return false; 
    } 

    /* doublec(j) is true <=> j,(j-1) contain a double consonant. */ 

    private final boolean doublec(int j) 
    { if (j < 1) return false; 
     if (b[j] != b[j-1]) return false; 
     return cons(j); 
    } 

    /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant 
     and also if the second c is not w,x or y. this is used when trying to 
     restore an e at the end of a short word. e.g. 

     cav(e), lov(e), hop(e), crim(e), but 
     snow, box, tray. 

    */ 

    private final boolean cvc(int i) 
    { if (i < 2 || !cons(i) || cons(i-1) || !cons(i-2)) return false; 
     { int ch = b[i]; 
     if (ch == 'w' || ch == 'x' || ch == 'y') return false; 
     } 
     return true; 
    } 

    private final boolean ends(String s) 
    { 

     int l = s.length(); 
    int o = k-l+1; 
     if (o < 0) 
      return false; 
     for (int i = 0; i < l; i++) 
      if (b[o+i] != s.charAt(i)) 
      return false; 
     j = k-l; 
     return true; 
    } 

    /* setto(s) sets (j+1),...k to the characters in the string s, readjusting 
     k. */ 

    private final void setto(String s) 
    { int l = s.length(); 
     int o = j+1; 
     for (int i = 0; i < l; i++) 
      b[o+i] = s.charAt(i); 
     k = j+l; 
    } 

    /* r(s) is used further down. */ 

    private final void r(String s) { if (m() > 0) setto(s); } 

    /* step1() gets rid of plurals and -ed or -ing. e.g. 

      caresses -> caress 
      ponies -> poni 
      ties  -> ti 
      caress -> caress 
      cats  -> cat 

      feed  -> feed 
      agreed -> agree 
      disabled -> disable 

      matting -> mat 
      mating -> mate 
      meeting -> meet 
      milling -> mill 
      messing -> mess 

      meetings -> meet 

    */ 

    private final void step1() 
    { 

     if (b[k] == 's') 
     { if (ends("sses")) k -= 2; else 
     if (ends("ies")) setto("i"); else 
     if (b[k-1] != 's') k--; 
     } 
     if (ends("eed")) { if (m() > 0) k--; } else 
     if ((ends("ed") || ends("ing")) && vowelinstem()) 
     { k = j; 
     if (ends("at")) setto("ate"); else 
     if (ends("bl")) setto("ble"); else 
     if (ends("iz")) setto("ize"); else 
     if (doublec(k)) 
     { k--; 
      { int ch = b[k]; 
       if (ch == 'l' || ch == 's' || ch == 'z') k++; 
      } 
     } 
     else if (m() == 1 && cvc(k)) setto("e"); 
    } 

    } 

    /* step2() turns terminal y to i when there is another vowel in the stem. */ 

    private final void step2() { if (ends("y") && vowelinstem()) b[k] = 'i'; } 

    /* step3() maps double suffices to single ones. so -ization (= -ize plus 
     -ation) maps to -ize etc. note that the string before the suffix must give 
     m() > 0. */ 

    private final void step3() { if (k == 0) return; /* For Bug 1 */ switch (b[k-1]) 
    { 
     case 'a': if (ends("ational")) { r("ate"); break; } 
       if (ends("tional")) { r("tion"); break; } 
       break; 
     case 'c': if (ends("enci")) { r("ence"); break; } 
       if (ends("anci")) { r("ance"); break; } 
       break; 
     case 'e': if (ends("izer")) { r("ize"); break; } 
       break; 
     case 'l': if (ends("bli")) { r("ble"); break; } 
       if (ends("alli")) { r("al"); break; } 
       if (ends("entli")) { r("ent"); break; } 
       if (ends("eli")) { r("e"); break; } 
       if (ends("ousli")) { r("ous"); break; } 
       break; 
     case 'o': if (ends("ization")) { r("ize"); break; } 
       if (ends("ation")) { r("ate"); break; } 
       if (ends("ator")) { r("ate"); break; } 
       break; 
     case 's': if (ends("alism")) { r("al"); break; } 
       if (ends("iveness")) { r("ive"); break; } 
       if (ends("fulness")) { r("ful"); break; } 
       if (ends("ousness")) { r("ous"); break; } 
       break; 
     case 't': if (ends("aliti")) { r("al"); break; } 
       if (ends("iviti")) { r("ive"); break; } 
       if (ends("biliti")) { r("ble"); break; } 
       break; 
     case 'g': if (ends("logi")) { r("log"); break; } 
    } } 

    /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */ 

    private final void step4() { switch (b[k]) 
    { 
     case 'e': if (ends("icate")) { r("ic"); break; } 
       if (ends("ative")) { r(""); break; } 
       if (ends("alize")) { r("al"); break; } 
       break; 
     case 'i': if (ends("iciti")) { r("ic"); break; } 
       break; 
     case 'l': if (ends("ical")) { r("ic"); break; } 
       if (ends("ful")) { r(""); break; } 
       break; 
     case 's': if (ends("ness")) { r(""); break; } 
       break; 
    } } 

    /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */ 

    private final void step5() 
    { if (k == 0) return; /* for Bug 1 */ switch (b[k-1]) 
     { case 'a': if (ends("al")) break; return; 
      case 'c': if (ends("ance")) break; 
        if (ends("ence")) break; return; 
      case 'e': if (ends("er")) break; return; 
      case 'i': if (ends("ic")) break; return; 
      case 'l': if (ends("able")) break; 
        if (ends("ible")) break; return; 
      case 'n': if (ends("ant")) break; 
        if (ends("ement")) break; 
        if (ends("ment")) break; 
        /* element etc. not stripped before the m */ 
        if (ends("ent")) break; return; 
      case 'o': if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't')) break; 
            /* j >= 0 fixes Bug 2 */ 
        if (ends("ou")) break; return; 
        /* takes care of -ous */ 
      case 's': if (ends("ism")) break; return; 
      case 't': if (ends("ate")) break; 
        if (ends("iti")) break; return; 
      case 'u': if (ends("ous")) break; return; 
      case 'v': if (ends("ive")) break; return; 
      case 'z': if (ends("ize")) break; return; 
      default: return; 
     } 
     if (m() > 1) k = j; 
    } 

    /* step6() removes a final -e if m() > 1. */ 

    private final void step6() 
    { j = k; 
     if (b[k] == 'e') 
     { int a = m(); 
     if (a > 1 || a == 1 && !cvc(k-1)) k--; 
     } 
     if (b[k] == 'l' && doublec(k) && m() > 1) k--; 
    } 

    /** Stem the word placed into the Stemmer buffer through calls to add(). 
    * Returns true if the stemming process resulted in a word different 
    * from the input. You can retrieve the result with 
    * getResultLength()/getResultBuffer() or toString(). 
    */ 
    public void stem() 

    { 
     //System.out.println(i); 
     k = i - 1; 
     if (k > 1) 
     { 
      step1(); 
      step2(); 
      step3(); 
      step4(); 
      step5(); 
      step6(); 
     } 
     for(int c=0;c<=k;c++) 
     { 
     // System.out.print(b[c]); 

     } 
     str=new String(b); 
//System.out.println(str); 
     sentence+=str+" "; 
    System.out.println(sentence); 

     i_end = k+1; i = 0; 
    } 
    public static void main(String[] args) 
    { 
     stemmer s = new stemmer(); 

     try 
     { 
    BufferedReader br = new BufferedReader(new FileReader("D:/output.txt")); 
    BufferedWriter output = new BufferedWriter(new FileWriter("D:/output1.txt")); 
    String separator = System.getProperty("line.separator"); 
     while ((line = br.readLine()) != null) 
      { 
        StringTokenizer st2 = new StringTokenizer(line, "\n"); 

        while (st2.hasMoreElements()) 
        { 
         String line1 = (String) st2.nextElement(); 
         System.out.println(line1); 
         StringTokenizer st3 = new StringTokenizer(line1, " "); 
         // String sentence= new String(); 
        while (st3.hasMoreTokens()) 
        { 
         element=st3.nextToken(); 
         s.addstring(element); 
         s.stem(); 
         } 
    output.append(separator+sentence); 
    sentence=""; 
        } 
      } 
     output.close(); 
     } 

    //System.out.println(element); 


//  s.addstring(element); 
//  s.stem(); 
//  s.addstring("walks"); 
//  s.stem(); 
     //System.out.println("Output " +s.b); 


     catch(Exception e) 
     { 

     } 
} 
} 
+3

這個問題很混亂。我們不知道「我」是什麼,或者各種步驟方法做了什麼,或者期望的輸出是什麼。請閱讀http://tinyurl.com/so-list –

+0

隨着更多的信息,它會更容易回答 – JulianG

+0

請寫一個SSCCE:http://sscce.org/ –

回答

4

要轉換char陣列String使用下面的語句:

String str=new String(char[]); 

docs

+3

當提供代碼時,最好是該代碼應該工作,例如'char [] arr = {'a','b'}; String str = new String(arr);'。 – Dukeling

相關問題