0
有人可以給我提示,在lucene中應用僞反饋。我無法在Google上找到太多幫助。我正在使用相似性類。 在lucene中是否有任何類可以擴展來實現反饋? 謝謝。在lucene中實現反饋
有人可以給我提示,在lucene中應用僞反饋。我無法在Google上找到太多幫助。我正在使用相似性類。 在lucene中是否有任何類可以擴展來實現反饋? 謝謝。在lucene中實現反饋
假設你指的是this relevance feedback method,一旦你有原始查詢的TopDocs
,迭代你想要的記錄的數量(假設我們想要原始查詢的前25個文檔的前25項) ,並致電IndexReader.getTermVectors(int)
,這將抓住你需要的信息。遍歷每個。而在哈希映射中存儲術語頻率將是我立即發生的實現。
喜歡的東西:
//Get the original results
TopDocs docs = indexsearcher.search(query,25);
HashMap<String,ScorePair> map = new HashMap<String,ScorePair>();
for (int i = 0; i < docs.scoreDocs.length; i++) {
//Iterate fields for each result
FieldsEnum fields = indexreader.getTermVectors(docs.scoreDocs[i].doc).iterator();
String fieldname;
while (fieldname = fields.next()) {
//For each field, iterate it's terms
TermsEnum terms = fields.terms().iterator();
while (terms.next()) {
//and store it
putTermInMap(fieldname, terms.term(), terms.docFreq(), map);
}
}
}
List<ScorePair> byScore = new ArrayList<ScorePair>(map.values());
Collections.sort(byScore);
BooleanQuery bq = new BooleanQuery();
//Perhaps we want to give the original query a bit of a boost
query.setBoost(5);
bq.add(query,BooleanClause.Occur.SHOULD);
for (int i = 0; i < 25; i++) {
//Add all our found terms to the final query
ScorePair pair = byScore.get(i);
bq.add(new TermQuery(new Term(pair.field,pair.term)),BooleanClause.Occur.SHOULD);
}
}
//Say, we want to score based on tf/idf
void putTermInMap(String field, String term, int freq, Map<String,ScorePair> map) {
String key = field + ":" + term;
if (map.containsKey(key))
map.get(key).increment();
else
map.put(key,new ScorePair(freq,field,term));
}
private class ScorePair implements Comparable{
int count = 0;
double idf;
String field;
String term;
ScorePair(int docfreq, String field, String term) {
count++;
//Standard Lucene idf calculation. This is calculated once per field:term
idf = (1 + Math.log(indexreader.numDocs()/((double)docfreq + 1)))^2;
this.field = field;
this.term = term;
}
void increment() { count++; }
double score() {
return Math.sqrt(count) * idf;
}
//Standard Lucene TF/IDF calculation, if I'm not mistaken about it.
int compareTo(ScorePair pair) {
if (this.score() < pair.score()) return -1;
else return 1;
}
}
(我並沒有說這是功能代碼,在它的當前狀態)
謝謝你會嘗試一下。 – j10
你能詳細一點嗎?你不願意做什麼? – javanna
是的,請定義「僞反饋」 – phani
通過使用反饋我想擴大我的查詢。我需要一些方法來擴展我的查詢(反正應該沒問題) – j10