2
我是新來的lucene,所以我不知道它是否可能,但我有一個索引,我想獲得短語的總數在一個子集索引(該子集由過濾器定義)。 我可以使用FilteredQuery與我的過濾器和PhraseQuery來搜索短語,因此我可以計算出發生此短語的文檔,但我似乎無法找到一種方法來計算每個文檔的匹配數量。查找與lucene查詢匹配的總數
我是新來的lucene,所以我不知道它是否可能,但我有一個索引,我想獲得短語的總數在一個子集索引(該子集由過濾器定義)。 我可以使用FilteredQuery與我的過濾器和PhraseQuery來搜索短語,因此我可以計算出發生此短語的文檔,但我似乎無法找到一種方法來計算每個文檔的匹配數量。查找與lucene查詢匹配的總數
您可以這樣做,詳情請參閱LUCENE-2590。
例如,您可以查看該功能的unit tests示例代碼。
我複製下面短語搜索相關的代碼,
這是集電極,
private static class CountingCollector extends Collector {
private final Collector other;
private int docBase;
public final Map<Integer, Map<Query, Float>> docCounts = new HashMap<Integer, Map<Query, Float>>();
private final Map<Query, Scorer> subScorers = new HashMap<Query, Scorer>();
private final ScorerVisitor<Query, Query, Scorer> visitor = new MockScorerVisitor();
private final EnumSet<Occur> collect;
private class MockScorerVisitor extends ScorerVisitor<Query, Query, Scorer> {
@Override
public void visitOptional(Query parent, Query child, Scorer scorer) {
if (collect.contains(Occur.SHOULD))
subScorers.put(child, scorer);
}
@Override
public void visitProhibited(Query parent, Query child, Scorer scorer) {
if (collect.contains(Occur.MUST_NOT))
subScorers.put(child, scorer);
}
@Override
public void visitRequired(Query parent, Query child, Scorer scorer) {
if (collect.contains(Occur.MUST))
subScorers.put(child, scorer);
}
}
public CountingCollector(Collector other) {
this(other, EnumSet.allOf(Occur.class));
}
public CountingCollector(Collector other, EnumSet<Occur> collect) {
this.other = other;
this.collect = collect;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
other.setScorer(scorer);
scorer.visitScorers(visitor);
}
@Override
public void collect(int doc) throws IOException {
final Map<Query, Float> freqs = new HashMap<Query, Float>();
for (Map.Entry<Query, Scorer> ent : subScorers.entrySet()) {
Scorer value = ent.getValue();
int matchId = value.docID();
freqs.put(ent.getKey(), matchId == doc ? value.freq() : 0.0f);
}
docCounts.put(doc + docBase, freqs);
other.collect(doc);
}
@Override
public void setNextReader(IndexReader reader, int docBase)
throws IOException {
this.docBase = docBase;
other.setNextReader(reader, docBase);
}
@Override
public boolean acceptsDocsOutOfOrder() {
return other.acceptsDocsOutOfOrder();
}
}
單元測試是,
@Test
public void testPhraseQuery() throws Exception {
PhraseQuery q = new PhraseQuery();
q.add(new Term("f", "b"));
q.add(new Term("f", "c"));
CountingCollector c = new CountingCollector(TopScoreDocCollector.create(10,
true));
s.search(q, null, c);
final int maxDocs = s.maxDoc();
assertEquals(maxDocs, c.docCounts.size());
for (int i = 0; i < maxDocs; i++) {
Map<Query, Float> doc0 = c.docCounts.get(i);
assertEquals(1, doc0.size());
assertEquals(2.0F, doc0.get(q), FLOAT_TOLERANCE);
Map<Query, Float> doc1 = c.docCounts.get(++i);
assertEquals(1, doc1.size());
assertEquals(1.0F, doc1.get(q), FLOAT_TOLERANCE);
}
}