0
我是新來的大門。我想從文檔創建一個語料庫。我有大量的文檔,因此每次都難以加載並手動創建語料庫。有沒有簡單的方法可以直接創建語料庫?加載語言語料庫在GATE
我是新來的大門。我想從文檔創建一個語料庫。我有大量的文檔,因此每次都難以加載並手動創建語料庫。有沒有簡單的方法可以直接創建語料庫?加載語言語料庫在GATE
/**
* Used to hold the data store saved the records for processing
*/
private static DataStore ProcessingDataStore = null;
/**
* Holds the corpus for processing
*/
private static Corpus ProcessingCorpus = null;
private static void LoadSerialDataStore(String dataStoreDirPath)
throws Exception {
File dataStoreDirFile = new File(dataStoreDirPath);
if (!dataStoreDirFile.exists()) {
if (!dataStoreDirFile.mkdirs()) {
logger.log(Level.WARNING,
"Data store directory creation false!");
return;
}
ProcessingDataStore = (SerialDataStore) Factory.createDataStore(
SerialDataStore.class.getName(), dataStoreDirFile.toURI()
.toString());
ProcessingDataStore.open();
ProcessingCorpus = (Corpus) ProcessingDataStore.adopt(Factory
.newCorpus(""), null);
ProcessingDataStore.sync(ProcessingCorpus);
} else {
ProcessingDataStore = (SerialDataStore) Factory.openDataStore(
SerialDataStore.class.getName(), dataStoreDirFile.toURI()
.toString());
ProcessingDataStore.open();
ProcessingCorpus = CorpusUtil.loadSerialCorpus(ProcessingDataStore);
}
}
private static void CreateSerialDataStore(String dataStoreDirPath){
LoadSerialDataStore(dataStoreDirPath);
Document tempDocument = Factory.newDocument(content);
FeatureMap featureMap = Factory.newFeatureMap();
tempDocument.setFeatures(featureMap);
ProcessingCorpus.add(tempDocument);
ProcessingCorpus.unloadDocument(tempDocument);
Factory.deleteResource(tempDocument);
}