2014-07-22 57 views
0

我是新來的大門。我想從文檔創建一個語料庫。我有大量的文檔,因此每次都難以加載並手動創建語料庫。有沒有簡單的方法可以直接創建語料庫?加載語言語料庫在GATE

回答

0
/** 
* Used to hold the data store saved the records for processing 
*/ 
private static DataStore ProcessingDataStore = null; 

/** 
* Holds the corpus for processing 
*/ 
private static Corpus ProcessingCorpus = null; 


private static void LoadSerialDataStore(String dataStoreDirPath) 
     throws Exception { 
    File dataStoreDirFile = new File(dataStoreDirPath); 
    if (!dataStoreDirFile.exists()) { 
     if (!dataStoreDirFile.mkdirs()) { 
      logger.log(Level.WARNING, 
        "Data store directory creation false!"); 
      return; 
     } 
     ProcessingDataStore = (SerialDataStore) Factory.createDataStore(
       SerialDataStore.class.getName(), dataStoreDirFile.toURI() 
         .toString()); 
     ProcessingDataStore.open(); 
     ProcessingCorpus = (Corpus) ProcessingDataStore.adopt(Factory 
       .newCorpus(""), null); 
     ProcessingDataStore.sync(ProcessingCorpus); 
    } else { 
     ProcessingDataStore = (SerialDataStore) Factory.openDataStore(
       SerialDataStore.class.getName(), dataStoreDirFile.toURI() 
         .toString()); 
     ProcessingDataStore.open(); 
     ProcessingCorpus = CorpusUtil.loadSerialCorpus(ProcessingDataStore); 
    } 
} 

private static void CreateSerialDataStore(String dataStoreDirPath){ 
     LoadSerialDataStore(dataStoreDirPath); 
     Document tempDocument = Factory.newDocument(content); 
     FeatureMap featureMap = Factory.newFeatureMap(); 
     tempDocument.setFeatures(featureMap); 
     ProcessingCorpus.add(tempDocument); 
     ProcessingCorpus.unloadDocument(tempDocument); 
     Factory.deleteResource(tempDocument); 
}