解決方案是創建您自己的分析儀並添加LowerCaseFilter指令。
這裏是一個定製的法國分析器的一個例子,其是不區分大小寫:
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.util.Version;
import java.io.Reader;
/**
* Completes {@link org.apache.lucene.analysis.fr.FrenchAnalyzer} with accent management
*/
public class CustomFrenchAnalyzer extends Analyzer {
/**
* Lucene version
*/
private final Version matchVersion;
/**
* Constructs a new analyzer
* @param matchVersion compatibility version
*/
public CustomFrenchAnalyzer(final Version matchVersion) {
this.matchVersion = matchVersion;
}
@Override
protected final TokenStreamComponents createComponents(final String s, final Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, source);
result = new ElisionFilter(result, FrenchAnalyzer.DEFAULT_ARTICLES);
result = new StopFilter(matchVersion, result, FrenchAnalyzer.getDefaultStopSet());
result = new ASCIIFoldingFilter(result);
result = new LowerCaseFilter(matchVersion, result);
result = new FrenchLightStemFilter(result);
return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result));
}
}