我有一個簡單的自定義分析器,似乎正確地在SQL服務器的索引中生成語音哈希。它似乎大多數嘗試查詢使用我的自定義分析器生成的索引不返回結果。我一直無法找到類似的情況,所以我一定會做錯的。用自定義分析器/過濾器搜索返回沒有結果
自定義過濾器:
internal class SoundexFilter : TokenFilter
{
private readonly ITermAttribute _termAttr;
private Queue<Token> soundexTokenQueue
= new Queue<Token>();
public SoundexFilter(TokenStream input)
: base(input)
{
_termAttr = AddAttribute<ITermAttribute>();
}
public override bool IncrementToken()
{
if (input.IncrementToken())
{
string currentTerm = _termAttr.Term;
var hash = Soundex.For(currentTerm);
Console.WriteLine("Original: {0}, Hash: {1}", currentTerm, hash);
soundexTokenQueue.Enqueue(new Token(hash, 0, hash.Length));
return true;
}
else if (soundexTokenQueue.Count > 0)
{
var token = soundexTokenQueue.Dequeue();
_termAttr.SetTermBuffer(token.Term);
_termAttr.SetTermLength(token.TermLength());
return true;
}
return false;
}
}
定製分析器:
public class SoundexAnalyzer : Analyzer
{
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
//create the tokenizer
TokenStream result = new StandardTokenizer(Version.LUCENE_30, reader);
//add in filters
result = new StandardFilter(result);
// Add soundex filter
result = new SoundexFilter(result);
return result;
}
}
簡單的測試程序:
public class Program
{
private const string NAME = "John Smith";
private const string SEARCH_NAME = "John Smith";
private Analyzer _analyzer = new SoundexAnalyzer();
private Directory _directory = new RAMDirectory();
internal void Run(string[] args)
{
using (var writer = new IndexWriter(_directory, _analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
{
var field = new Field("Name", NAME, Field.Store.YES, Field.Index.ANALYZED);
var document = new Document();
document.Add(field);
writer.AddDocument(document);
// Unnecessary but helps imply intent
writer.Commit();
}
using (var searcher = new IndexSearcher(_directory))
{
var parser = new QueryParser(Version.LUCENE_30, "Name", _analyzer);
var query = parser.Parse(SEARCH_NAME);
var docs = searcher.Search(query, 10);
Console.WriteLine("\nReturned Docs:");
foreach (var scoreDoc in docs.ScoreDocs)
{
var doc = searcher.Doc(scoreDoc.Doc);
Console.WriteLine(doc.Get("Name"));
}
}
}
private static void Main(string[] args)
{
new Program().Run(args);
}
}
該成功使用此代碼是唯一的搜索完全匹配像NAME = "John"
和SEARCH_NAME = "John"
。
奇怪的是,在Luke中搜索標準分析器的語音哈希工作正常,因此寫入必須按預期工作(或者至少是我的期望)。
我已經做了相當多的研究,沒有什麼幫助。任何想法我失蹤?