2017-07-12 70 views
1

我在CentOS 7.3.1611(Core)上安裝了DSE 5.1,並安裝了solr 6.0.1。 UIMA在eclipse中爲我的annotator項目配置。dse search apache uima integration

我下面Solr的文檔: https://wiki.apache.org/solr/Solr4UIMA

與UIMA創建的項目有一個簡單的註釋來獲得個人的名字,已經從UIMA的CAS可視化調試工作正常。這個jar已經創建並複製到了solr lib目錄(DSE_HOME/solr/lib)中,這裏也是uima集成的solr(SOLR_HOME/contrib/uima/lib,SOLR_HOME/contrib/uima/lucene-lib ,SOLR_HOME/dist/solr-uima-version)。

在卡桑德拉創建我的表是:

CREATE TABLE uima_solr.person_annotator (
    id int PRIMARY KEY, 
    apellido text, 
    nombre text, 
    nombrecompleto text, 
    solr_query text, 
    uimaname set<text> 

Solr的核心使用模式:

<?xml version="1.0" encoding="UTF-8" standalone="no"?> 
<schema name="autoSolrSchema" version="1.5"> 
    <types> 
    <fieldType class="org.apache.solr.schema.TextField" name="TextField"> 
     <analyzer> 
     <tokenizer class="solr.StandardTokenizerFactory"/> 
     <filter class="solr.LowerCaseFilterFactory"/> 
     </analyzer> 
    </fieldType> 
    <fieldType class="org.apache.solr.schema.TrieIntField" name="TrieIntField"/> 
    </types> 
    <fields> 
    <field indexed="true" multiValued="false" name="nombrecompleto" stored="true" type="TextField"/> 
    <field indexed="true" multiValued="false" name="apellido" stored="true" type="TextField"/> 
    <field indexed="true" multiValued="false" name="nombre" stored="true" type="TextField"/> 
    <field docValues="true" indexed="true" multiValued="false" name="id" stored="true" type="TrieIntField"/> 
    <field indexed="true" multiValued="false" name="all" stored="false" type="TextField"/> 
    <field indexed="true" multiValued="true" name="uimaname" stored="true" type="TextField"/> 
    </fields> 
    <uniqueKey>id</uniqueKey> 
    <defaultSearchField>all</defaultSearchField> 
    <copyField source="nombrecompleto" dest="all"/> 
    <copyField source="apellido" dest="all"/> 
    <copyField source="nombre" dest="all"/> 
</schema> 

而且solr_config如下:

<?xml version="1.0" encoding="UTF-8" standalone="no"?> 
<config> 
    <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError> 
    <luceneMatchVersion>LUCENE_6_0_0</luceneMatchVersion> 
    <dseTypeMappingVersion>2</dseTypeMappingVersion> 
    <directoryFactory class="solr.StandardDirectoryFactory" name="DirectoryFactory"/> 
    <indexConfig> 
    <rt>false</rt> 
    <useCompoundFile>false</useCompoundFile> 
    <ramBufferSizeMB>512</ramBufferSizeMB> 
    <mergeFactor>10</mergeFactor> 
    <reopenReaders>true</reopenReaders> 
    <deletionPolicy class="solr.SolrDeletionPolicy"> 
     <str name="maxCommitsToKeep">1</str> 
     <str name="maxOptimizedCommitsToKeep">0</str> 
    </deletionPolicy> 
    <infoStream file="INFOSTREAM.txt">false</infoStream> 
    </indexConfig> 
    <jmx/> 
    <updateHandler class="solr.DirectUpdateHandler2"> 
    <autoSoftCommit> 
     <maxTime>10000</maxTime> 
    </autoSoftCommit> 
    </updateHandler> 
    <query> 
    <maxBooleanClauses>1024</maxBooleanClauses> 
    <filterCache class="solr.SolrFilterCache" highWaterMarkMB="256" lowWaterMarkMB="128"/> 
    <enableLazyFieldLoading>true</enableLazyFieldLoading> 
    <useColdSearcher>true</useColdSearcher> 
    <maxWarmingSearchers>16</maxWarmingSearchers> 
    </query> 
    <requestDispatcher handleSelect="true"> 
    <requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048000"/> 
    <httpCaching never304="true"/> 
    </requestDispatcher> 
    <requestHandler class="solr.SearchHandler" default="true" name="search"> 
    <lst name="defaults"> 
     <int name="rows">10</int> 
    </lst> 
    </requestHandler> 
    <requestHandler class="com.datastax.bdp.search.solr.handler.component.CqlSearchHandler" name="solr_query"> 
    <lst name="defaults"> 
     <int name="rows">10</int> 
    </lst> 
    </requestHandler> 
    <!--<requestHandler class="solr.UpdateRequestHandler" name="/update"/>--> 
    <requestHandler class="solr.UpdateRequestHandler" name="/update/csv" startup="lazy"/> 
    <requestHandler class="solr.UpdateRequestHandler" name="/update/json" startup="lazy"/> 
    <requestHandler class="solr.FieldAnalysisRequestHandler" name="/analysis/field" startup="lazy"/> 
    <requestHandler class="solr.DocumentAnalysisRequestHandler" name="/analysis/document" startup="lazy"/> 
    <requestHandler class="solr.admin.AdminHandlers" name="/admin/"/> 
    <requestHandler class="solr.PingRequestHandler" name="/admin/ping"> 
    <lst name="invariants"> 
     <str name="qt">search</str> 
     <str name="q">solrpingquery</str> 
    </lst> 
    <lst name="defaults"> 
     <str name="echoParams">all</str> 
    </lst> 
    </requestHandler> 
    <requestHandler class="solr.DumpRequestHandler" name="/debug/dump"> 
    <lst name="defaults"> 
     <str name="echoParams">explicit</str> 
     <str name="echoHandler">true</str> 
    </lst> 
    </requestHandler> 
    <admin> 
    <defaultQuery>*:*</defaultQuery> 
    </admin> 



    <updateRequestProcessorChain default="true" name="uima"> 
    <processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory"> 
     <lst name="uimaConfig"> 
     <lst name="runtimeParameters"></lst> 
     <!-- Under $SOLR_HOME/solr/example--> 
     <str name="analysisEngine">desc/descPersonAnnotator.xml</str> 
     <bool name="ignoreErrors">false</bool> 
     <lst name="analyzeFields"> 
      <bool name="merge">false</bool> 
      <arr name="fields"> 
      <str>nombrecompleto</str> 
      </arr> 
     </lst> 
     <lst name="fieldMappings"> 
      <lst name="type"> 
      <str name="name">org.apache.uima.annotator.person</str> 
      <lst name="mapping"> 
       <str name="feature">name</str> 
       <str name="field">uimaname</str> 
      </lst> 
      </lst> 
     </lst> 
     </lst> 
    </processor> 
    <processor class="solr.LogUpdateProcessorFactory"/> 
    <processor class="solr.RunUpdateProcessorFactory"/> 
    </updateRequestProcessorChain> 
    <requestHandler class="solr.UpdateRequestHandler" name="/update"> 
    <lst name="defaults"> 
     <str name="update.processor">uima</str> 
    </lst> 
    </requestHandler> 


</config> 

當我插入數據使用CQL,數據已在Lucene中正確索引,並且搜索正常,但uima註釋器不工作國王。另外,當我上傳文檔到Solr Trought Sol命令時,文檔被成功索引,搜索也在CQL中工作,但uima不起作用。我檢查了日誌,並沒有顯示任何錯誤。

我在Solr Apache發行版(v6.0.1)中做了相同的過程,它按預期工作。

我沒有看到DSE_HOME中的核心文件以便於編輯文件,並且我沒有在DSE Search中成功完成UIMA集成,我在DSE搜索的核心配置中缺少了什麼?

回答

1

通過不遵循Solr文檔程序找到了答案,而是使用了DSE文檔,特別是Update request processor and field transformer (FIT) [2017-07-17]。

一個例子可在DSE here中找到[2017年7月17日]

的過程形成上述鏈接顯示如何映射而索引字段。通過創建適合UITA項目的jar到FIT項目並將類添加到solrconfig.xml中

此過程允許使用Solr命令和cql與solr_query在添加的元數據上執行搜索。