2012-09-25 58 views
2

我是新來的lucene,所以我不知道它是否可能,但我有一個索引,我想獲得短語的總數在一個子集索引(該子集由過濾器定義)。 我可以使用FilteredQuery與我的過濾器和PhraseQuery來搜索短語,因此我可以計算出發生此短語的文檔,但我似乎無法找到一種方法來計算每個文檔的匹配數量。查找與lucene查詢匹配的總數

回答

3

您可以這樣做,詳情請參閱LUCENE-2590

例如,您可以查看該功能的unit tests示例代碼。

我複製下面短語搜索相關的代碼,

這是集電極,

private static class CountingCollector extends Collector { 
    private final Collector other; 
    private int docBase; 

    public final Map<Integer, Map<Query, Float>> docCounts = new HashMap<Integer, Map<Query, Float>>(); 

    private final Map<Query, Scorer> subScorers = new HashMap<Query, Scorer>(); 
    private final ScorerVisitor<Query, Query, Scorer> visitor = new MockScorerVisitor(); 
    private final EnumSet<Occur> collect; 

    private class MockScorerVisitor extends ScorerVisitor<Query, Query, Scorer> { 

    @Override 
    public void visitOptional(Query parent, Query child, Scorer scorer) { 
     if (collect.contains(Occur.SHOULD)) 
     subScorers.put(child, scorer); 
    } 

    @Override 
    public void visitProhibited(Query parent, Query child, Scorer scorer) { 
     if (collect.contains(Occur.MUST_NOT)) 
     subScorers.put(child, scorer); 
    } 

    @Override 
    public void visitRequired(Query parent, Query child, Scorer scorer) { 
     if (collect.contains(Occur.MUST)) 
     subScorers.put(child, scorer); 
    } 

    } 

    public CountingCollector(Collector other) { 
    this(other, EnumSet.allOf(Occur.class)); 
    } 

    public CountingCollector(Collector other, EnumSet<Occur> collect) { 
    this.other = other; 
    this.collect = collect; 
    } 

    @Override 
    public void setScorer(Scorer scorer) throws IOException { 
    other.setScorer(scorer); 
    scorer.visitScorers(visitor); 
    } 

    @Override 
    public void collect(int doc) throws IOException { 
    final Map<Query, Float> freqs = new HashMap<Query, Float>(); 
    for (Map.Entry<Query, Scorer> ent : subScorers.entrySet()) { 
     Scorer value = ent.getValue(); 
     int matchId = value.docID(); 
     freqs.put(ent.getKey(), matchId == doc ? value.freq() : 0.0f); 
    } 
    docCounts.put(doc + docBase, freqs); 
    other.collect(doc); 
    } 

    @Override 
    public void setNextReader(IndexReader reader, int docBase) 
     throws IOException { 
    this.docBase = docBase; 
    other.setNextReader(reader, docBase); 
    } 

    @Override 
    public boolean acceptsDocsOutOfOrder() { 
    return other.acceptsDocsOutOfOrder(); 
    } 
} 

單元測試是,

@Test 
public void testPhraseQuery() throws Exception { 
    PhraseQuery q = new PhraseQuery(); 
    q.add(new Term("f", "b")); 
    q.add(new Term("f", "c")); 
    CountingCollector c = new CountingCollector(TopScoreDocCollector.create(10, 
     true)); 
    s.search(q, null, c); 
    final int maxDocs = s.maxDoc(); 
    assertEquals(maxDocs, c.docCounts.size()); 
    for (int i = 0; i < maxDocs; i++) { 
    Map<Query, Float> doc0 = c.docCounts.get(i); 
    assertEquals(1, doc0.size()); 
    assertEquals(2.0F, doc0.get(q), FLOAT_TOLERANCE); 

    Map<Query, Float> doc1 = c.docCounts.get(++i); 
    assertEquals(1, doc1.size()); 
    assertEquals(1.0F, doc1.get(q), FLOAT_TOLERANCE); 
    } 

}