2012-10-05 46 views
3

我有一個UUID字段,我以以下格式添加到我的文檔中:372d325c-e01b-432f-98bd-bc4c949f15b8。但是,當我嘗試通過UUID查詢文檔時,無論如何嘗試轉義表達式,它都不會返回它們。例如:在lucene中搜索UUID不起作用

+uuid:372d325c-e01b-432f-98bd-bc4c949f15b8 
+uuid:"372d325c-e01b-432f-98bd-bc4c949f15b8" 
+uuid:372d325c\-e01b\-432f\-98bd\-bc4c949f15b8 
+uuid:(372d325c-e01b-432f-98bd-bc4c949f15b8) 
+uuid:("372d325c-e01b-432f-98bd-bc4c949f15b8") 

而且連QueryParser的完全使用TermQuery像這樣跳繩:

new TermQuery(new Term("uuid", uuid.toString())) 

或者

new TermQuery(new Term("uuid", QueryParser.escape(uuid.toString()))) 

沒有這些搜索將返回一個文件,但如果我搜索它將返回一個文檔的UUID部分。例如,這些將返回的東西:

+uuid:372d325c 
+uuid:e01b 
+uuid:432f 

我應該怎麼做索引這些文件,所以我可以將他們拉回來的UUID?我考慮重新格式化UUID以刪除連字符,但我還沒有實現它。

+0

你是否檢查該字段是如何獲取索引的?是否有可能uuid被lucene標記器拉開? – jtahlborn

+0

現在這裏是我如何將UUID添加到索引:doc.add(新字段(「uuid」,id.toString(),Field.Store.YES,Field.Index.NOT_ANALYZED))。我對另一個項目使用完全相同的方案,它的工作原理很好,但不同之處在於其他項目中的ID不是UUID,也不包含連字符。 – chubbsondubs

+0

如果該字段未被分析(因此未被標記),那麼以下不應查詢應該返回空+ uuid:372d325c。 一般規則是確保您使用相同的分析器進行索引和搜索。 您是否確認使用Field.Index.NOT_ANALYZED進行索引,然後使用新的TermQuery(新術語(「uuid」,uuid.toString())) 進行搜索時會返回空? –

回答

1

我得到這個工作的唯一方法是使用WhitespaceAnalyzer而不是StandardAnalyzer。然後使用TermQuery像這樣:

IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, new WhitespaceAnalyzer(Version.LUCENE_36)) 
      .setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); 
writer = new IndexWriter(directory, config); 

然後搜索:

TopDocs docs = searcher.search(new TermQuery(new Term("uuid", uuid.toString())), 1); 

WhitespaceAnalyzer防止Lucene的從由連字符裂開的UUID。另一種選擇是消除UUID中的破折號,但使用WhitespaceAnalyzer也可以達到我的目的。

0

按照Lucene Query Syntax rules,查詢

+uuid:372d325c\-e01b\-432f\-98bd\-bc4c949f15b8 

應該工作。

我猜如果它不這樣做,那是因爲uuid字段沒有填充,因爲它應該當文檔插入索引。你能確定這個字段究竟插入了什麼嗎?您可以使用Luke來抓取索引並查找爲uuid字段存儲的實際值。

+0

我已在Luke中確認該字段中的值已保存並存在於文檔中。這一點進一步得到證實,我可以通過搜索原始問題中提到的uuid的一個片段來撤回文檔。 – chubbsondubs

0

如果您計劃將UUID字段作爲查找鍵,則需要讓Lucene將整個字段索引爲單個字符串,而不進行標記化。這是通過爲您的UUID字段設置正確的FieldType來完成的。在Lucene 4+中,你可以使用StringField。

import java.io.IOException; 
import java.util.UUID; 
import junit.framework.Assert; 
import org.apache.lucene.analysis.Analyzer; 
import org.apache.lucene.analysis.standard.StandardAnalyzer; 
import org.apache.lucene.document.Document; 
import org.apache.lucene.document.Field; 
import org.apache.lucene.document.StringField; 
import org.apache.lucene.document.TextField; 
import org.apache.lucene.index.DirectoryReader; 
import org.apache.lucene.index.IndexWriter; 
import org.apache.lucene.index.IndexWriterConfig; 
import org.apache.lucene.index.Term; 
import org.apache.lucene.queryparser.classic.ParseException; 
import org.apache.lucene.queryparser.classic.QueryParser; 
import org.apache.lucene.search.IndexSearcher; 
import org.apache.lucene.search.Query; 
import org.apache.lucene.search.TopDocs; 
import org.apache.lucene.store.Directory; 
import org.apache.lucene.store.RAMDirectory; 
import org.apache.lucene.util.Version; 
import org.junit.Test; 

/** 
* Using Lucene 4.7 on Java 7. 
*/ 
public class LuceneUUIDFieldLookupTest { 

    private Directory directory; 
    private Analyzer analyzer; 

    @Test 
    public void testUsingUUIDAsLookupKey() throws IOException, ParseException { 

     directory = new RAMDirectory(); 
     analyzer = new StandardAnalyzer(Version.LUCENE_47); 

     UUID docUUID = UUID.randomUUID(); 
     String docContentText1 = "Stack Overflow is a question and answer site for professional and enthusiast programmers."; 

     index(docUUID, docContentText1); 

     QueryParser parser = new QueryParser(Version.LUCENE_47, MyIndexedFields.DOC_TEXT_FIELD.name(), analyzer); 
     Query queryForProgrammer = parser.parse("programmers"); 

     IndexSearcher indexSearcher = getIndexSearcher(); 
     TopDocs hits = indexSearcher.search(queryForProgrammer, Integer.MAX_VALUE); 
     Assert.assertTrue(hits.scoreDocs.length == 1); 

     Integer internalDocId1 = hits.scoreDocs[0].doc; 
     Document docRetrieved1 = indexSearcher.doc(internalDocId1); 
     indexSearcher.getIndexReader().close(); 

     String docText1 = docRetrieved1.get(MyIndexedFields.DOC_TEXT_FIELD.name()); 
     Assert.assertEquals(docText1, docContentText1); 

     String docContentText2 = "TechCrunch is a leading technology media property, dedicated to ... according to a new report from the Wall Street Journal confirmed by Google to TechCrunch."; 
     reindex(docUUID, docContentText2); 

     Query queryForTechCrunch = parser.parse("technology"); 
     indexSearcher = getIndexSearcher(); //you must reopen directory because the previous IndexSearcher only sees a snapshoted directory. 
     hits = indexSearcher.search(queryForTechCrunch, Integer.MAX_VALUE); 
     Assert.assertTrue(hits.scoreDocs.length == 1); 

     Integer internalDocId2 = hits.scoreDocs[0].doc; 
     Document docRetrieved2 = indexSearcher.doc(internalDocId2); 
     indexSearcher.getIndexReader().close(); 

     String docText2 = docRetrieved2.get(MyIndexedFields.DOC_TEXT_FIELD.name()); 
     Assert.assertEquals(docText2, docContentText2); 
    } 

    private void reindex(UUID myUUID, String docContentText) throws IOException { 
     try (IndexWriter indexWriter = new IndexWriter(directory, getIndexWriterConfig())) { 
      Term term = new Term(MyIndexedFields.MY_UUID_FIELD.name(), myUUID.toString()); 
      indexWriter.updateDocument(term, buildDoc(myUUID, docContentText)); 
     }//auto-close 
    } 

    private void index(UUID myUUID, String docContentText) throws IOException { 
     try (IndexWriter indexWriter = new IndexWriter(directory, getIndexWriterConfig())) { 
      indexWriter.addDocument(buildDoc(myUUID, docContentText)); 
     }//auto-close 
    } 

    private IndexWriterConfig getIndexWriterConfig() { 
     return new IndexWriterConfig(Version.LUCENE_47, analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); 
    } 

    private Document buildDoc(UUID myUUID, String docContentText) { 
     Document doc = new Document(); 
     doc.add(new Field(
       MyIndexedFields.MY_UUID_FIELD.name(), 
       myUUID.toString(), 
       StringField.TYPE_STORED));//use TYPE_STORED if you want to read it back in search result. 

     doc.add(new Field(
       MyIndexedFields.DOC_TEXT_FIELD.name(), 
       docContentText, 
       TextField.TYPE_STORED)); 

     return doc; 
    } 

    private IndexSearcher getIndexSearcher() throws IOException { 
     DirectoryReader ireader = DirectoryReader.open(directory); 
     IndexSearcher indexSearcher = new IndexSearcher(ireader); 
     return indexSearcher; 
    } 

    enum MyIndexedFields { 

     MY_UUID_FIELD, 
     DOC_TEXT_FIELD 
    } 
}