索引和搜索一個MS Excel中使用Lucene 3.1

我有MS Excel工作表，具有以下的列索引和搜索一個MS Excel中使用Lucene 3.1

title,cast,director,genre.

的Excel工作表，使用JXL庫解析。索引工作正常，但是當我搜索時，我總是得到0個匹配結果。我不知道我出錯的地方。代碼如下：

import java.io.File; 
import java.io.IOException; 
import jxl.Cell; 
import jxl.Sheet; 
import jxl.Workbook; 
import jxl.read.biff.BiffException; 

import org.apache.lucene.analysis.Analyzer; 
import org.apache.lucene.analysis.standard.StandardAnalyzer; 
import org.apache.lucene.document.Document; 
import org.apache.lucene.document.Field; 
import org.apache.lucene.index.CorruptIndexException; 
import org.apache.lucene.index.IndexWriter; 
import org.apache.lucene.index.IndexWriterConfig; 
import org.apache.lucene.queryParser.ParseException; 
import org.apache.lucene.queryParser.QueryParser; 
import org.apache.lucene.search.IndexSearcher; 
import org.apache.lucene.search.Query; 
import org.apache.lucene.search.ScoreDoc; 
import org.apache.lucene.search.TopScoreDocCollector; 
import org.apache.lucene.store.Directory; 
import org.apache.lucene.store.FSDirectory; 
import org.apache.lucene.util.Version; 

public class ExcelParser { 

    Directory index; 
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); 
    IndexWriterConfig c = new IndexWriterConfig(Version.LUCENE_31, analyzer); 

    public void parse(String filePath) throws IndexOutOfBoundsException, 
      BiffException, IOException { 
     index = FSDirectory.open(new File("d:\\index")); 
     Sheet contentSheet = Workbook.getWorkbook(new File(filePath)).getSheet(
       0); 
     indexDocs(contentSheet); 

    } 

    void indexDocs(Sheet contentSheet) throws CorruptIndexException, 
      IOException { 
     String currentColumn = ""; 
     IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, 
       analyzer); 
     IndexWriter writer = new IndexWriter(index, iwc); 

     for (int i = 0; i < contentSheet.getColumns(); i++) { 
      Cell[] xlCells = contentSheet.getColumn(i); 
      currentColumn = xlCells[0].getContents(); 
      StringBuffer sb = new StringBuffer(); 

      for (int j = 1; j < xlCells.length; j++) { 
       sb.append(xlCells[j].getContents() + " "); 

      } 
      addDoc(writer, sb.toString(), currentColumn); 


     } 
     writer.close(); 
    } 





    void searcher(String querystr, String onField) throws ParseException, 
      CorruptIndexException, IOException { 

     IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(
       "d:\\index"))); 

     Query q = new QueryParser(Version.LUCENE_31, onField, analyzer) 
       .parse(querystr); 

     int hitsPerPage = 2; 

     TopScoreDocCollector collector = TopScoreDocCollector.create(
       hitsPerPage, true); 

     searcher.search(q, collector); 
     ScoreDoc[] hits = collector.topDocs().scoreDocs; 
     System.out.println("Found " + hits.length + " hits."); 
     for (int i = 0; i < hits.length; ++i) { 
      int docId = hits[i].doc; 
      Document d = searcher.doc(docId); 
      System.out.println((i + 1) + ". " + d.get("title")); 
     } 
     searcher.close(); 

    } 

    private static void addDoc(IndexWriter w, String value, String fieldName) 
      throws IOException { 
     Document doc = new Document(); 
     doc.add(new Field(fieldName, value, Field.Store.YES, 
       Field.Index.ANALYZED)); 
     w.addDocument(doc); 
    } 

    public static void main(String[] args) throws IndexOutOfBoundsException, 
      BiffException, IOException { 
     ExcelParser p = new ExcelParser(); 

     p.parse("d:\\movieList.xls"); 

     try { 
      p.searcher("the", "title"); 
     } catch (ParseException e) { 

      e.printStackTrace(); 
     } 

    } 

}

來源

2011-10-01 Prashanth B C