2014-04-02 59 views
1

如何使lucene索引字段不區分大小寫。 我的意思是有什麼辦法來小寫索引字段在查詢中,而不是值。如何使lucene索引字段不區分大小寫

我不能將整個查詢轉換爲小寫,因爲它會影響其他使用空白分析器的查詢。

Query.extractterms() - >方法返回我而言的陣列,但如果輸入包含通配符,例如,*

我需要這個,因爲我已經小寫指數fields.eg

這是行不通的如果我的字段是「演員」索引,我應該能夠得到包含「Actor:abc」以及「ACTOR:abc」的查詢結果

任何想法?

回答

0

解決方案是創建您自己的分析儀並添加LowerCaseFilter指令。

這裏是一個定製的法國分析器的一個例子,其是不區分大小寫:

import org.apache.lucene.analysis.Analyzer; 
import org.apache.lucene.analysis.TokenStream; 
import org.apache.lucene.analysis.Tokenizer; 
import org.apache.lucene.analysis.core.LowerCaseFilter; 
import org.apache.lucene.analysis.core.StopFilter; 
import org.apache.lucene.analysis.fr.FrenchAnalyzer; 
import org.apache.lucene.analysis.fr.FrenchLightStemFilter; 
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; 
import org.apache.lucene.analysis.standard.StandardFilter; 
import org.apache.lucene.analysis.standard.StandardTokenizer; 
import org.apache.lucene.analysis.util.ElisionFilter; 
import org.apache.lucene.util.Version; 

import java.io.Reader; 

/** 
* Completes {@link org.apache.lucene.analysis.fr.FrenchAnalyzer} with accent management 
*/ 
public class CustomFrenchAnalyzer extends Analyzer { 

    /** 
    * Lucene version 
    */ 
    private final Version matchVersion; 

    /** 
    * Constructs a new analyzer 
    * @param matchVersion compatibility version 
    */ 
    public CustomFrenchAnalyzer(final Version matchVersion) { 
     this.matchVersion = matchVersion; 
    } 

    @Override 
    protected final TokenStreamComponents createComponents(final String s, final Reader reader) { 
     final Tokenizer source = new StandardTokenizer(matchVersion, reader); 
     TokenStream result = new StandardFilter(matchVersion, source); 
     result = new ElisionFilter(result, FrenchAnalyzer.DEFAULT_ARTICLES); 
     result = new StopFilter(matchVersion, result, FrenchAnalyzer.getDefaultStopSet()); 
     result = new ASCIIFoldingFilter(result); 
     result = new LowerCaseFilter(matchVersion, result); 
     result = new FrenchLightStemFilter(result); 

     return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result)); 
    } 
}