2014-06-12 47 views
0

我有一個要求編寫一個實用程序,它將從給定的字符串輸入中刪除 中的一些特殊字符。我無法理解,我該如何處理這個任務。我已經給了 一個db程序,它的功能相同,我需要在java代碼中複製相同的算法。 我正在把這裏的程序。Java刪除特殊的阿拉伯字符

create or replace procedure dbimm.check_arabic_letters (name_a in out varchar2) as 
     pos  number(3); 
     strlen number(3); 
     nxtchar char(1); 
     ascval number(3); 
begin 
     replace_mult_spaces(name_a); 
     strlen := length(name_a); 
     pos := 1; 
     while pos <= strlen loop 
     nxtchar := substr(name_a, pos, 1); 
     ascval := ascii(nxtchar); 
     -- dbms_output.put_line(to_char(ascval)); 
     if (ascval between 193 and 218) or 
      (ascval between 225 and 234) or 
      (ascval in (32,38,40,41,47,247, 248, 249, 250)) 
     then 
      pos := pos + 1; 
     else 
      raise_application_error(-20000,display_message(9)); 
     end if; 
     end loop; 
     name_a := replace(name_a, 'ي ','ى '); 
     if substr(name_a, strlen) = 'ي' then 
      name_a := substr(name_a, 1, strlen - 1) || 'ى'; 
     end if; 
     name_a := replace(name_a, 'ة ', 'ه '); 
     if substr(name_a, strlen) = 'ة' then 
      name_a := substr(name_a, 1, strlen - 1) || 'ه'; 
     end if; 

     /* Old code commented by Mobeen 
     name_a := replace(name_a, ' عبد ',' عبد'); 
     if instr(name_a,'عبد ') = 1 and length(name_a) > 4 then 
      name_a := substr(name_a, 1, 3) || substr(name_a,5); 
     end if; 
     */ 
     ------- 

    name_a := replace(name_a,'أ','ا'); 
     name_a := replace(name_a,'إ','ا'); 
     name_a := replace(name_a,'آ','ا'); 
     --m name_a := replace(name_a,'لا','?'); 
     name_a := replace(name_a,chr(250),'لا'); 
     name_a := replace(name_a,chr(247),'لا'); 
     name_a := replace(name_a,chr(248),'لا'); 
     name_a := replace(name_a,chr(249),'لا'); 
     name_a := replace(name_a,chr(63),'لا'); 

     --- New Code added by Patrick 
     name_a := replace(name_a, ' عبد ال', ' عبدال'); 
     if substr(name_a,1,6)= 'عبد ال' then --start 
     name_a:= 'عبدال'||substr(name_a,7); 
     end if; 
     ---- 

     name_a := replace(name_a, ' ابن ',' بن '); --middle 
     if substr(name_a,1,4)='ابن ' then --start 
     name_a:='بن '||substr(name_a,5); 
     end if; 
     if substr(name_a,-4)=' ابن' then --end 
     name_a:=substr(name_a,1,length(name_a)-4)||' بن'; 
     end if; 
     ------- 

我開始在我的java類中複製相同的東西。

public class ReplaceSpecialArabicCharacUtil { 


    /** 
    * This method is responsible for replacing special arabic 
    * Characters from the input given to the method. This method 
    * Algorithm is taken from the database procedure already been 
    * used for blacklist. 
    * @param nameInArabic name in Arabic of applicant. E.g First name, last name 
    * @return 
    */ 
    public static String removeSpecialArabicCharacters(String nameInArabic){ 

    //Step-1 Remove multiple spaces. Take the procedure replica from Naveed 
    nameInArabic = nameInArabic.replaceAll(" ې" ,"ی "); 


    return nameInArabic; 
    } 

    /** 
    * Driver method responsible for testing the Algorithm. 
    * It is replicated from the Database Procedure. 
    * @param args 
    */ 
    public static void main(String[] args) throws UnsupportedEncodingException { 

    String s ="ې "; 
    // System.out.println(removeSpecialArabicCharacters(s).getBytes("UTF-8")); 

    } 

} 

replaceAll不理解空格。我不確定,我是否正確地接近問題。有人可以幫助我,因爲我想以正確的方式編寫此實用程序。

感謝, 本

回答

1

是最好的,我可以,我一直在使用Java代碼,除了replace_mult_space我不知道它的模仿你的程序。

注意:當你複製粘貼時,你肯定會發現編譯錯誤,因爲我的IDE和StackOverflow並不真正支持阿拉伯字符。所以你必須自己調整代碼,直到你達到你想要的結果。

這裏的是你的程序的Java的等價物:

public class ReplaceSpecialArabicCharacUtil { 

    public static List<Integer> getValidAsciiValues() { 
     List<Integer> validAsciiValues = new ArrayList<Integer>(); 
     for (int i=193; i<=218; i++) { 
      validAsciiValues.add(i); 
     } 
     for (int i=225; i<=234; i++) { 
      validAsciiValues.add(i); 
     } 

     validAsciiValues.add(32); 
     validAsciiValues.add(38); 
     validAsciiValues.add(40); 
     validAsciiValues.add(41); 
     validAsciiValues.add(47); 
     validAsciiValues.add(247); 
     validAsciiValues.add(248); 
     validAsciiValues.add(249); 
     validAsciiValues.add(250); 

     return validAsciiValues; 
    } 

    public static void removeSpecialArabicCharacters(String name_a) { 

     //replace_mult_spaces(name_a) 
     int stringLenth = name_a.length(); 
     int pos = 0; //the Java index is 0-based (starts from 0) 
     while (pos < stringLenth) { 
      char nextChar = name_a.substring(pos, pos+1).toCharArray()[0]; 
      int asciiValue = (int) nextChar; 
      if (getValidAsciiValues().contains(asciiValue)) { 
       pos++; 
      } else { 
       throw new AssertionError("The string contains invalid characters"); 
      } 
     } 
     name_a = name_a.replaceAll("ې"," ې "); 
     if (name_a.substring(stringLenth).equals('ي')) { 
      name_a = name_a.substring(0, stringLenth - 2); 
     } 
     name_a = name_a.replaceAll(" ", "ه "); 
     if (name_a.substring(stringLenth).equals("ة")) { 
      name_a = name_a.substring(0, stringLenth - 2); 
     } 

     name_a = name_a.replace('ا', 'أ'); 
     name_a = name_a.replace('ا', 'إ'); 
     name_a = name_a.replace('ا', 'آ'); 
     name_a = name_a.replace((char) 250, 'ل'); 
     name_a = name_a.replace((char) 247, 'ل'); 
     name_a = name_a.replace((char) 248, 'ل'); 
     name_a = name_a.replace((char) 249, 'ل'); 
     name_a = name_a.replace((char) 63, 'ل'); 

     name_a.replace(' ابن ',' بن '); 
     if (name_a.substring(0,5).equals("'عبد ال")) { 
      name_a = name_a.substring(6); 
     } 


     name_a.replaceAll(" عبد ال"" " عبدال"); 
     if (name_a.substring(0,3).equals("'ابن"))) { 
      name_a = name_a.substring(4); 
     } 
     if (name_a.substring(-4).equals("ابن))")) { 
      name_a = name_a.substring(0, name_a.length()-4); 
     } 
    } 
} 

您可以比較兩個並排側,以獲得更好的感覺。

+0

我期待了解設計如何處理這樣的工具? – benz

+0

您可以擁有一個Map ,它將原始阿拉伯字符保留爲鍵,並將值作爲要替換它們的字符。那麼你會調用你的removeSpecialArabicCharacters方法nameInArabic.replaceAll(nameInArabic,myArabicReplaceMap.get(nameInArabic)); –

+0

我明白,@加布裏埃爾,我的觀點是,如果仔細看問題,db程序有一些嚴格的規則。即使他們想要仔細看看空間。如果它出現在中間,他們想要轉換一個特定的序列。如果你可以通過程序代碼,你可以正確理解我在找什麼。 – benz