2015-10-27 59 views
17

我們最近升級了CMS,我們在工作,不得不從 Lucene.net V2.3.1.301移動到V2.9.4.1Lucene的非正分返回文檔

我們在原來使用的CustomScoreQuery解決方案進行了各種過濾,這些內置查詢無法實現。 (GEO,多日期範圍等)

由於從舊版本移動Lucene的新版本,它開始返回文檔,即使他們有一個0甚至負數得分,當我們檢查結果

enter image description here 下面是一個示例代碼來演示問題

public LuceneTest() 
    { 
     Lucene.Net.Store.Directory luceneIndexDirectory = FSDirectory.Open(new System.IO.DirectoryInfo(@"C:\inetpub\wwwroot\Project\build\Data\indexes\all_site_search_en")); 
     Analyzer analyzer = new WhitespaceAnalyzer(); 
     IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory, true); 
     QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_23, "", analyzer); 
     parser.SetAllowLeadingWildcard(true); 
     Query dateQuery = ComposeEventDateQuery(new DateTime(2015, 11, 23), new DateTime(2015,11,25), searcher); 
     BooleanQuery combinedQuery = new BooleanQuery(); 
     BooleanQuery.SetMaxClauseCount(10000); 
     combinedQuery.Add(dateQuery, BooleanClause.Occur.MUST); 

     TopDocs hitsFound = searcher.Search(dateQuery, 1000); 
     System.Console.WriteLine(String.Format("Found {0} matches with the date filters", hitsFound.TotalHits)); 
     System.Console.ReadKey(); 
    } 



    public static Query ComposeEventDateQuery(DateTime fromDate, DateTime ToDate, IndexSearcher MySearcher) 
    { 
     BooleanQuery query = new BooleanQuery(); 
     Query boolQuery3A = new TermQuery(new Lucene.Net.Index.Term("_language", "en")); 
     Query eventDateQuery = new EventDateQuery1(boolQuery3A, MySearcher, fromDate, ToDate, false); 
     query.Add(eventDateQuery, BooleanClause.Occur.MUST); 
     return query; 
    } 


    public class EventDateQuery1 : CustomScoreQuery 
    { 
     private Searcher _searcher; 
     private DateTime _fromDT; 
     private DateTime _toDT; 
     private readonly string _dateFormat = "yyyyMMdd"; 

     private bool _shouldMatchNonEvents = true; 

     public EventDateQuery1(Query subQuery, Searcher searcher, DateTime fromDT, bool shouldMatchNonEvents, int dateRange = 14) 
      : base(subQuery) 
     { 
      _searcher = searcher; 
      _fromDT = fromDT.Date; 
      _toDT = fromDT.AddDays(dateRange).Date; 
      _shouldMatchNonEvents = shouldMatchNonEvents; 
     } 

     public EventDateQuery1(Query subQuery, Searcher searcher, DateTime fromDT, DateTime toDT, bool shouldMatchNonEvents) 
      : base(subQuery) 
     { 
      _searcher = searcher; 
      _fromDT = fromDT.Date; 
      _toDT = toDT.Date; 
      _shouldMatchNonEvents = shouldMatchNonEvents; 
     } 


     public override string ToString() 
     { 
      return GenerateUniqueKey(); 
     } 

     public override string ToString(string field) 
     { 
      return GenerateUniqueKey(); 
     } 

     public override string Name() 
     { 
      return GenerateUniqueKey(); 
     } 

     public string GenerateUniqueKey() 
     { 
      return String.Format("EventDateQuery_{0}_{1}_{2}", _fromDT.ToString(_dateFormat), _toDT.ToString(_dateFormat), _shouldMatchNonEvents.ToString()); 
     } 

     protected override CustomScoreProvider GetCustomScoreProvider(IndexReader reader) 
     { 
      return new EventDateQueryCustomScoreProvider(reader, _fromDT, _toDT, _shouldMatchNonEvents); 
     } 



    } 

    public class EventDateQueryCustomScoreProvider : CustomScoreProvider 
    { 
     private DateTime _fromDT; 
     private DateTime _toDT; 
     private readonly string _dateFormat = "yyyyMMdd"; 
     private bool _shouldMatchNonEvents = true; 
     private float NoMatchFloat = 0f; 
     private float MatchFloat = 1f; 

     public EventDateQueryCustomScoreProvider(IndexReader reader, DateTime fromDT, DateTime toDT, bool shouldMatchNonEvents) 
      : base(reader) 
     { 
      _fromDT = fromDT.Date; 
      _toDT = toDT.Date; 
      _shouldMatchNonEvents = shouldMatchNonEvents; 
     } 



     public override float CustomScore(int doc, float subQueryScore, float valSrcScore) 
     { 
      return myScore(doc); 
     } 

     public override float CustomScore(int doc, float subQueryScore, float[] valSrcScores) 
     { 
      return myScore(doc); 
     } 

     public float myScore(int doc) 
     { 
      //Below is a fake implementation just to prove the run 
      if (doc < 10) 
      { 
       return 1F; 
      } 
      else 
      { 
       return 0F; 
      } 
     } 



    } 

任何關於如何使Lucene不返回這些文檔的建議將是偉大的。 在此先感謝。

回答

6

您可以編寫自定義Collector,僅收集>0得分的文檔。然後將此收集器的實例傳遞給Search()方法。有一個這樣的Collectorhere的實現。

但是,如果您不需要所有結果,則documentation建議不要使用此解決方案。這可能是因爲您只選擇了前1000個文檔。