2012-10-08 25 views
3

我在編寫一個字段到文檔時編寫了下面的代碼來測試SetBoost方法。Lucene .Net SetBoost on Field不會影響結果

using System; 
using System.Collections.Generic; 
using System.IO; 
using System.Linq; 
using System.Text; 
using Lucene; 
using Lucene.Net; 
using Lucene.Net.Analysis; 
using Lucene.Net.Analysis.Standard; 
using Lucene.Net.Documents; 
using Lucene.Net.Index; 
using Lucene.Net.QueryParsers; 
using Lucene.Net.Search; 
using Lucene.Net.Store; 
using Directory = Lucene.Net.Store.Directory; 
using Version = Lucene.Net.Util.Version; 

namespace LuceneTest 
{ 
    public class LuceneTest 
    { 
     static void Main(string[] args) 
     { 
      var product1 = new Document(); 
      product1.Add(new Field("Id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); 
      var title1 = new Field("title", "Special One", Field.Store.YES, Field.Index.ANALYZED); 
      title1.SetBoost(2f); 
      product1.Add(title1); 
      product1.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED)); 

      var product2 = new Document(); 
      product2.Add(new Field("Id", "2", Field.Store.YES, Field.Index.NOT_ANALYZED)); 
      var title2 = new Field("title", "Special Two", Field.Store.YES, Field.Index.ANALYZED); 
      title2.SetBoost(2f); 
      product2.Add(title2); 
      product2.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED)); 

      var product3 = new Document(); 
      product3.Add(new Field("Id", "3", Field.Store.YES, Field.Index.NOT_ANALYZED)); 
      var title3 = new Field("title", "Normal One", Field.Store.YES, Field.Index.ANALYZED); 
      title3.SetBoost(2f); 
      product3.Add(title3); 
      product3.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED)); 

      var product4 = new Document(); 
      product4.Add(new Field("Id", "4", Field.Store.YES, Field.Index.NOT_ANALYZED)); 
      var title4 = new Field("title", "Normal Two", Field.Store.YES, Field.Index.ANALYZED); 
      title4.SetBoost(2f); 
      product4.Add(title4); 
      product4.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED)); 

      var product5 = new Document(); 
      product5.Add(new Field("Id", "5", Field.Store.YES, Field.Index.NOT_ANALYZED)); 
      var title5 = new Field("title", "Special Three", Field.Store.YES, Field.Index.ANALYZED); 
      title5.SetBoost(2f); 
      product5.Add(title5); 
      product5.Add(new Field("synopsis", "normal synopsis", Field.Store.YES, Field.Index.ANALYZED)); 

      Directory directory = FSDirectory.Open(new DirectoryInfo(Environment.CurrentDirectory + "\\Lucene")); 
      Analyzer analyzer = new StandardAnalyzer(); 
      var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); 

      writer.AddDocument(product1); 
      writer.AddDocument(product2); 
      writer.AddDocument(product3); 
      writer.AddDocument(product4); 
      writer.AddDocument(product5); 
      writer.Optimize(); 
      writer.Close(); 

      Console.WriteLine("searching..."); 
      var indexReader = IndexReader.Open(directory, true); 
      var indexSearcher = new IndexSearcher(indexReader); 

      var booleanQuery1 = new BooleanQuery(); 
      booleanQuery1.Add(new BooleanClause(new PrefixQuery(new Term("title", "special")), BooleanClause.Occur.SHOULD)); 
      booleanQuery1.Add(new BooleanClause(new PrefixQuery(new Term("synopsis", "special")), BooleanClause.Occur.SHOULD)); 

      var booleanQuery2 = new BooleanQuery(); 
      booleanQuery2.Add(new BooleanClause((Query)booleanQuery1, BooleanClause.Occur.MUST)); 
      TopDocs results = indexSearcher.Search(booleanQuery2, (Filter)null, 200); 
      var hits = results.ScoreDocs; 

      foreach (var hit in hits) 
      { 
       var document = indexSearcher.Doc(hit.doc); 
       Console.WriteLine(document.Get("Id") + " " + document.Get("title") + " " + hit.score); 
      } 
      Console.WriteLine("done..."); 
      Console.ReadLine(); 
     } 

    } 

} 

我使用的是Lucene 2.9.4.1版本。我在標題欄上設置了提升。我所期望的產品1,2,5是在頂部,當我搜索在標題和概要領域的術語「特殊」,而是我得到如下:

searching... 
1 Special One 1.414214 
2 Special Two 1.414214 
3 Normal One 0.3535534 
4 Normal Two 0.3535534 
5 Special Three 0.3535534 
done... 

產品5具有相同即使它的標題中包含「特殊」一詞,但它不會成爲產品3和4。

任何幫助或想法,將不勝感激。 謝謝

回答

4

我相信問題是您正在使用PrefixQueries。前綴查詢被重寫爲一個常量記分查詢。您可以自己設置重寫方法,如:

PrefixQuery pquery = new PrefixQuery(new Term("title", "special")); 
pquery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); 

或者您可以嘗試使用TermQuery而不是PrefixQuery。無論哪種方式,你應該看到場級升壓生效。

哦,有一點需要注意的是,如果你想明白爲什麼結果會按照它們的方式進行評分,那麼你應該看看Searcher.explain。評分變得複雜,這是理解和調整它的非常方便的工具。

+0

謝謝。我嘗試了兩種方法,現在產生了預期的結果。解釋方法也有助於瞭解它是如何得出結果的。 – user1408767

0

在Lucene In Action,第二版,第49頁,第2.5.2節中增強字段「但記住當你想改變字段或文檔的提升時,你必須完全刪除然後讀取整個文檔,或使用updateDocument方法,它做同樣的事情「。

由於您使用相同的索引文件進行測試,我認爲您需要在打開索引之前調用writer.updateDocument。

+0

IndexWriter構造函數的第三個參數('create'標誌)被設置爲true,它指定任何現有的索引都將被覆蓋,所以當測試開始在每次運行時寫入時它將爲空。 – femtoRgon