2016-09-23 23 views
1

過去幾天我開始在Java中使用Lucene 6.2.0,並嘗試解析包含標籤的.trectext文件。我相信我使用了正確的解析器來解析標籤,但是在創建一個新的Lucene文檔時遇到了這個奇怪的錯誤,它說Cannot instantiate the type Document。我懇請有人幫我解決這個問題,在互聯網上我沒有看到這方面的任何建議,eclipse也沒有提出任何更正。不知道我哪裏錯了。我粘貼了下面的代碼。 `無法實例化文檔錯誤Lucene 6.2.0 API

import javax.xml.parsers.DocumentBuilderFactory; 
import javax.xml.parsers.DocumentBuilder; 
import org.apache.lucene.analysis.Analyzer; 
import org.apache.lucene.analysis.standard.StandardAnalyzer; 
import org.apache.lucene.document.StringField; 
import org.apache.lucene.index.DirectoryReader; 
import org.apache.lucene.index.IndexReader; 
import org.apache.lucene.index.IndexWriter; 
import org.apache.lucene.index.IndexWriterConfig; 
import org.apache.lucene.index.IndexWriterConfig.OpenMode; 
import org.apache.lucene.index.MultiFields; 
import org.apache.lucene.index.Term; 
import org.apache.lucene.index.Terms; 
import org.apache.lucene.store.Directory; 
import org.apache.lucene.store.FSDirectory; 
import org.apache.lucene.document.Field; 
import org.w3c.dom.Document; 
import org.w3c.dom.NodeList; 
import org.w3c.dom.Node; 
import org.w3c.dom.Element; 
import java.io.File; 
import java.io.FileFilter; 
import java.io.FileReader; 
import java.io.IOException; 
import java.nio.file.Paths; 
import java.io.Reader; 
import java.util.Iterator; 

public class Indexing { 
public static void main(String argv[]) { 
    String[] tags={"DOCNO","HEAD","BYLINE","DATELINE","TEXT"}; 
    try { 
    String indexPath="C:\\Users\\sujit\\Desktop\\lucene_indexed"; //Path to create the Lucene Document Index. 
    File fXmlFile = new File("C:\\Users\\sujit\\Desktop\\sample.txt"); //Path to find the document to be indexed. 

    DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); 
    DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); 
    Document doc = dBuilder.parse(fXmlFile); 
    doc.getDocumentElement().normalize(); 

    Analyzer analyzer=new StandardAnalyzer(); 
    Directory dir = FSDirectory.open(Paths.get(indexPath)); 
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer); 
    iwc.setOpenMode(OpenMode.CREATE); 
    IndexWriter writer=new IndexWriter(dir,iwc); 
    String DocNo="" ,Head="",ByLine="",DateLine="",Text=""; 

    //System.out.println("Root element :" + doc.getDocumentElement().getNodeName()); 
    NodeList nList = doc.getElementsByTagName("DOC"); 
    //System.out.println("----------------------------"); 
    for (int temp = 0; temp < nList.getLength(); temp++) { 
     //**Place where I see the error ** 
     Document luceneDoc=new Document(); 

     Node nNode = nList.item(temp); 
     System.out.println("\nCurrent Element :" + nNode.getNodeName()); 
     if (nNode.getNodeType() == Node.ELEMENT_NODE) { 
      Element eElement = (Element) nNode; 
      for(int tagNo=0;tagNo<tags.length;tagNo++){ 

       for(int j=0;j<eElement.getElementsByTagName(tags[tagNo]).getLength();j++){ 
        if(tags[tagNo]==tags[0]) 
         DocNo+=eElement.getElementsByTagName(tags[tagNo]).item(j).getTextContent(); 
        else if(tags[tagNo]==tags[1]) 
         Head+=eElement.getElementsByTagName(tags[tagNo]).item(j).getTextContent(); 
        else if(tags[tagNo]==tags[2]) 
         ByLine+=eElement.getElementsByTagName(tags[tagNo]).item(j).getTextContent(); 
        else if(tags[tagNo]==tags[3]) 
         DateLine+=eElement.getElementsByTagName(tags[tagNo]).item(j).getTextContent(); 
        else if(tags[tagNo]==tags[4]) 
         Text+=eElement.getElementsByTagName(tags[tagNo]).item(j).getTextContent(); 
       } 
       System.out.println(DocNo+Head+ByLine+DateLine+Text+"\n"); 
       luceneDoc.add(new StringField("DOCNO",DocNo,Field.Store.YES)); 
       luceneDoc.add(new StringField("HEAD",Head,Field.Store.YES)); 
       luceneDoc.add(new StringField("BYLINE",ByLine,Field.Store.YES)); 
       luceneDoc.add(new StringField("DATELINE",DateLine,Field.Store.YES)); 
       luceneDoc.add(new StringField("TEXT",Text,Field.Store.YES)); 
       writer.addDocument(luceneDoc); 
       DocNo="";Head="";ByLine="";DateLine="";Text=""; 
      } 
      writer.close(); 
     } 
    } 
    } catch (Exception e) { 
    e.printStackTrace(); 
    } 
    } 

} 

回答

1

要導入org.w3c.dom.Document,而不是org.apache.lucene.document.Document。既然你似乎都需要,你可以用完整路徑引用其中的一個,而不是導入它。

+0

謝謝,這解決了我的問題。 –

相關問題