2012-07-05 95 views
1

我想索引存儲在數據庫(MySQL)中的二進制文件,我沒有成功。我有一個Solr的如下配置: Solr的文件結構從數據庫問題索引二進制文件(無錯誤)

+solr 
    +bookledger(core0) 
     -conf 
     +lib(all necessary libraries) 
     +contrib 
     +dist 
    +data 
     +bookledger 
     -index 
     -spellchecker 
     +ktimatologio 
     -index 
     -spellchecker 
    +ktimatologio(core1) 
     -conf 
     +lib(all necessary libraries) 
     +contrib 
     +dist 

正如你所看到的配置涉及多核Solr的設置。現在,在bookledger(core0)上,我已成功索引二進制文件(存儲在數據庫中)。在第二個核心,當我進行全面導入時,我沒有看到任何錯誤!然後,當我嘗試查詢二進制內容時,輸出如下所示:[B @ 660b1b14。我在這裏錯過了什麼?

謝謝你在前進,

湯姆

的solr.xml文件:

<?xml version="1.0" encoding="UTF-8" ?> 
<solr persistent="false"> 
    <cores adminPath="/admin/cores"> 
    <core name="ktimatologio" instanceDir="ktimatologio" dataDir="../data/ktimatologio"/> 
    <core name="bookledger" instanceDir="bookledger" dataDir="../data/bookledger"/> 
    </cores> 
</solr> 

solrconfig.xml中的文件:

<?xml version="1.0" encoding="UTF-8" ?> 

<config> 

    <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError> 


    <luceneMatchVersion>LUCENE_36</luceneMatchVersion> 

    <lib dir="lib/dist/" regex="apache-solr-cell-\d.*\.jar" /> 
    <lib dir="lib/dist/" regex="apache-solr-clustering-\d.*\.jar" /> 
    <lib dir="lib/dist/" regex="apache-solr-dataimporthandler-\d.*\.jar" /> 
    <lib dir="lib/dist/" regex="apache-solr-langid-\d.*\.jar" /> 
    <lib dir="lib/dist/" regex="apache-solr-velocity-\d.*\.jar" /> 
    <lib dir="lib/dist/" regex="apache-solr-dataimporthandler-extras-\d.*\.jar" /> 

    <lib dir="lib/contrib/extraction/lib/" regex=".*\.jar" /> 
    <lib dir="lib/contrib/clustering/lib/" regex=".*\.jar" /> 
    <lib dir="lib/contrib/dataimporthandler/lib/" regex=".*\.jar" /> 
    <lib dir="lib/contrib/langid/lib/" regex=".*\.jar" /> 
    <lib dir="lib/contrib/velocity/lib/" regex=".*\.jar" /> 
    <lib dir="lib/contrib/extraction/lib/" regex="tika-core-\d.*\.jar" /> 
    <lib dir="lib/contrib/extraction/lib/" regex="tika-parsers-\d.*\.jar" /> 


    <dataDir>${solr.data.dir:}</dataDir> 



    <directoryFactory name="DirectoryFactory" 
        class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/> 


    <indexConfig> 

    </indexConfig> 



    <jmx /> 


    <!-- The default high-performance update handler --> 
    <updateHandler class="solr.DirectUpdateHandler2"> 


    </updateHandler> 

    <query> 

    <maxBooleanClauses>1024</maxBooleanClauses> 



    <filterCache class="solr.FastLRUCache" 
       size="512" 
       initialSize="512" 
       autowarmCount="0"/> 


    <queryResultCache class="solr.LRUCache" 
        size="512" 
        initialSize="512" 
        autowarmCount="0"/> 


    <documentCache class="solr.LRUCache" 
        size="512" 
        initialSize="512" 
        autowarmCount="0"/> 

    <enableLazyFieldLoading>true</enableLazyFieldLoading> 

    <queryResultWindowSize>20</queryResultWindowSize> 

    <queryResultMaxDocsCached>200</queryResultMaxDocsCached> 

    <listener event="newSearcher" class="solr.QuerySenderListener"> 
     <arr name="queries"> 

     </arr> 
    </listener> 
    <listener event="firstSearcher" class="solr.QuerySenderListener"> 
     <arr name="queries"> 
     <lst> 
      <str name="q">static firstSearcher warming in solrconfig.xml</str> 
     </lst> 
     </arr> 
    </listener> 

    <useColdSearcher>false</useColdSearcher> 

    <maxWarmingSearchers>2</maxWarmingSearchers> 

    </query> 

    <requestDispatcher> 

    <requestParsers enableRemoteStreaming="true" 
        multipartUploadLimitInKB="2048000" /> 

    </requestDispatcher> 

    <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler"> 
    <lst name="defaults"> 
     <str name="config">data-config.xml</str> 
    </lst> 
    </requestHandler> 

    <requestHandler name="/select" class="solr.SearchHandler"> 

    <lst name="defaults"> 
     <str name="echoParams">explicit</str> 
     <int name="rows">100</int> 
    </lst> 

    </requestHandler> 

    <requestHandler name="/browse" class="solr.SearchHandler"> 
    <lst name="defaults"> 
     <str name="echoParams">explicit</str> 

     <!-- VelocityResponseWriter settings --> 
     <str name="wt">velocity</str> 

     <str name="v.template">browse</str> 
     <str name="v.layout">layout</str> 
     <str name="title">Solritas</str> 

     <str name="df">text</str> 
     <str name="defType">edismax</str> 
     <str name="q.alt">*:*</str> 
     <str name="rows">10</str> 
     <str name="fl">*,score</str> 
     <str name="mlt.qf"> 
     text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 
     </str> 
     <str name="mlt.fl">text,features,name,sku,id,manu,cat</str> 
     <int name="mlt.count">3</int> 

     <str name="qf"> 
      text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 
     </str> 

     <str name="facet">on</str> 
     <str name="facet.field">cat</str> 
     <str name="facet.field">manu_exact</str> 
     <str name="facet.query">ipod</str> 
     <str name="facet.query">GB</str> 
     <str name="facet.mincount">1</str> 
     <str name="facet.pivot">cat,inStock</str> 
     <str name="facet.range.other">after</str> 
     <str name="facet.range">price</str> 
     <int name="f.price.facet.range.start">0</int> 
     <int name="f.price.facet.range.end">600</int> 
     <int name="f.price.facet.range.gap">50</int> 
     <str name="facet.range">popularity</str> 
     <int name="f.popularity.facet.range.start">0</int> 
     <int name="f.popularity.facet.range.end">10</int> 
     <int name="f.popularity.facet.range.gap">3</int> 
     <str name="facet.range">manufacturedate_dt</str> 
     <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str> 
     <str name="f.manufacturedate_dt.facet.range.end">NOW</str> 
     <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str> 
     <str name="f.manufacturedate_dt.facet.range.other">before</str> 
     <str name="f.manufacturedate_dt.facet.range.other">after</str> 


     <!-- Highlighting defaults --> 
     <str name="hl">on</str> 
     <str name="hl.fl">text features name</str> 
     <str name="f.name.hl.fragsize">0</str> 
     <str name="f.name.hl.alternateField">name</str> 
    </lst> 
    <arr name="last-components"> 
     <str>spellcheck</str> 
    </arr> 

    </requestHandler> 


    <requestHandler name="/update" 
        class="solr.XmlUpdateRequestHandler"> 

    </requestHandler> 

    <requestHandler name="/update/javabin" 
        class="solr.BinaryUpdateRequestHandler" /> 


    <requestHandler name="/update/csv" 
        class="solr.CSVRequestHandler" 
        startup="lazy" /> 


    <requestHandler name="/update/json" 
        class="solr.JsonUpdateRequestHandler" 
        startup="lazy" /> 


    <requestHandler name="/update/extract" 
        startup="lazy" 
        class="solr.extraction.ExtractingRequestHandler" > 
    <lst name="defaults"> 
     <!-- All the main content goes into "text"... if you need to return 
      the extracted text or do highlighting, use a stored field. --> 
     <str name="fmap.content">text</str> 
     <str name="lowernames">true</str> 
     <str name="uprefix">ignored_</str> 

     <!-- capture link hrefs but ignore div attributes --> 
     <str name="captureAttr">true</str> 
     <str name="fmap.a">links</str> 
     <str name="fmap.div">ignored_</str> 
    </lst> 
    </requestHandler> 

    <requestHandler name="/update/xslt" 
        startup="lazy" 
        class="solr.XsltUpdateRequestHandler"/> 

    <requestHandler name="/analysis/field" 
        startup="lazy" 
        class="solr.FieldAnalysisRequestHandler" /> 
    <requestHandler name="/analysis/document" 
        class="solr.DocumentAnalysisRequestHandler" 
        startup="lazy" /> 

    <!-- Admin Handlers 

     Admin Handlers - This will register all the standard admin 
     RequestHandlers. 
    --> 
    <requestHandler name="/admin/" 
        class="solr.admin.AdminHandlers" /> 

    <!-- ping/healthcheck --> 
    <requestHandler name="/admin/ping" class="solr.PingRequestHandler"> 
    <lst name="invariants"> 
     <str name="q">solrpingquery</str> 
    </lst> 
    <lst name="defaults"> 
     <str name="echoParams">all</str> 
    </lst> 
    </requestHandler> 

    <!-- Echo the request contents back to the client --> 
    <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > 
    <lst name="defaults"> 
    <str name="echoParams">explicit</str> 
    <str name="echoHandler">true</str> 
    </lst> 
    </requestHandler> 


    <searchComponent name="spellcheck" class="solr.SpellCheckComponent"> 

    <str name="queryAnalyzerFieldType">textSpell</str> 


    <lst name="spellchecker"> 
     <str name="name">default</str> 
     <str name="field">name</str> 
     <str name="spellcheckIndexDir">spellchecker</str> 

    </lst> 



    </searchComponent> 


    <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy"> 
    <lst name="defaults"> 
     <str name="df">text</str> 
     <str name="spellcheck.onlyMorePopular">false</str> 
     <str name="spellcheck.extendedResults">false</str> 
     <str name="spellcheck.count">1</str> 
    </lst> 
    <arr name="last-components"> 
     <str>spellcheck</str> 
    </arr> 
    </requestHandler> 


    <searchComponent name="tvComponent" class="solr.TermVectorComponent"/> 


    <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy"> 
    <lst name="defaults"> 
     <str name="df">text</str> 
     <bool name="tv">true</bool> 
    </lst> 
    <arr name="last-components"> 
     <str>tvComponent</str> 
    </arr> 
    </requestHandler> 


    <searchComponent name="clustering" 
        enable="${solr.clustering.enabled:false}" 
        class="solr.clustering.ClusteringComponent" > 
    <!-- Declare an engine --> 
    <lst name="engine"> 
     <!-- The name, only one can be named "default" --> 
     <str name="name">default</str> 


     <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str> 


     <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str> 


     <str name="carrot.lexicalResourcesDir">clustering/carrot2</str> 


     <str name="MultilingualClustering.defaultLanguage">ENGLISH</str> 
    </lst> 
    <lst name="engine"> 
     <str name="name">stc</str> 
     <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str> 
    </lst> 
    </searchComponent> 


    <requestHandler name="/clustering" 
        startup="lazy" 
        enable="${solr.clustering.enabled:false}" 
        class="solr.SearchHandler"> 
    <lst name="defaults"> 
     <bool name="clustering">true</bool> 
     <str name="clustering.engine">default</str> 
     <bool name="clustering.results">true</bool> 
     <!-- The title field --> 
     <str name="carrot.title">name</str> 
     <str name="carrot.url">id</str> 
     <!-- The field to cluster on --> 
     <str name="carrot.snippet">features</str> 
     <!-- produce summaries --> 
     <bool name="carrot.produceSummary">true</bool> 
     <!-- the maximum number of labels per cluster --> 
     <!--<int name="carrot.numDescriptions">5</int>--> 
     <!-- produce sub clusters --> 
     <bool name="carrot.outputSubClusters">false</bool> 

     <str name="df">text</str> 
     <str name="defType">edismax</str> 
     <str name="qf"> 
      text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 
     </str> 
     <str name="q.alt">*:*</str> 
     <str name="rows">10</str> 
     <str name="fl">*,score</str> 
    </lst>  
    <arr name="last-components"> 
     <str>clustering</str> 
    </arr> 
    </requestHandler> 


    <searchComponent name="terms" class="solr.TermsComponent"/> 

    <!-- A request handler for demonstrating the terms component --> 
    <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> 
    <lst name="defaults"> 
     <bool name="terms">true</bool> 
    </lst>  
    <arr name="components"> 
     <str>terms</str> 
    </arr> 
    </requestHandler> 

    <searchComponent name="elevator" class="solr.QueryElevationComponent" > 
    <!-- pick a fieldType to analyze queries --> 
    <str name="queryFieldType">string</str> 
    <str name="config-file">elevate.xml</str> 
    </searchComponent> 

    <!-- A request handler for demonstrating the elevator component --> 
    <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy"> 
    <lst name="defaults"> 
     <str name="echoParams">explicit</str> 
     <str name="df">text</str> 
    </lst> 
    <arr name="last-components"> 
     <str>elevator</str> 
    </arr> 
    </requestHandler> 

    <!-- Highlighting Component 

     http://wiki.apache.org/solr/HighlightingParameters 
    --> 
    <searchComponent class="solr.HighlightComponent" name="highlight"> 
    <highlighting> 
     <!-- Configure the standard fragmenter --> 
     <!-- This could most likely be commented out in the "default" case --> 
     <fragmenter name="gap" 
        default="true" 
        class="solr.highlight.GapFragmenter"> 
     <lst name="defaults"> 
      <int name="hl.fragsize">100</int> 
     </lst> 
     </fragmenter> 

     <!-- A regular-expression-based fragmenter 
      (for sentence extraction) 
     --> 
     <fragmenter name="regex" 
        class="solr.highlight.RegexFragmenter"> 
     <lst name="defaults"> 
      <!-- slightly smaller fragsizes work better because of slop --> 
      <int name="hl.fragsize">70</int> 
      <!-- allow 50% slop on fragment sizes --> 
      <float name="hl.regex.slop">0.5</float> 
      <!-- a basic sentence pattern --> 
      <str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str> 
     </lst> 
     </fragmenter> 

     <!-- Configure the standard formatter --> 
     <formatter name="html" 
       default="true" 
       class="solr.highlight.HtmlFormatter"> 
     <lst name="defaults"> 
      <str name="hl.simple.pre"><![CDATA[<em>]]></str> 
      <str name="hl.simple.post"><![CDATA[</em>]]></str> 
     </lst> 
     </formatter> 

     <!-- Configure the standard encoder --> 
     <encoder name="html" 
       class="solr.highlight.HtmlEncoder" /> 

     <!-- Configure the standard fragListBuilder --> 
     <fragListBuilder name="simple" 
         default="true" 
         class="solr.highlight.SimpleFragListBuilder"/> 

     <!-- Configure the single fragListBuilder --> 
     <fragListBuilder name="single" 
         class="solr.highlight.SingleFragListBuilder"/> 

     <!-- default tag FragmentsBuilder --> 
     <fragmentsBuilder name="default" 
         default="true" 
         class="solr.highlight.ScoreOrderFragmentsBuilder"> 
     <!-- 
     <lst name="defaults"> 
      <str name="hl.multiValuedSeparatorChar">/</str> 
     </lst> 
     --> 
     </fragmentsBuilder> 

     <!-- multi-colored tag FragmentsBuilder --> 
     <fragmentsBuilder name="colored" 
         class="solr.highlight.ScoreOrderFragmentsBuilder"> 
     <lst name="defaults"> 
      <str name="hl.tag.pre"><![CDATA[ 
       <b style="background:yellow">,<b style="background:lawgreen">, 
       <b style="background:aquamarine">,<b style="background:magenta">, 
       <b style="background:palegreen">,<b style="background:coral">, 
       <b style="background:wheat">,<b style="background:khaki">, 
       <b style="background:lime">,<b style="background:deepskyblue">]]></str> 
      <str name="hl.tag.post"><![CDATA[</b>]]></str> 
     </lst> 
     </fragmentsBuilder> 

     <boundaryScanner name="default" 
         default="true" 
         class="solr.highlight.SimpleBoundaryScanner"> 
     <lst name="defaults"> 
      <str name="hl.bs.maxScan">10</str> 
      <str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str> 
     </lst> 
     </boundaryScanner> 

     <boundaryScanner name="breakIterator" 
         class="solr.highlight.BreakIteratorBoundaryScanner"> 
     <lst name="defaults"> 

      <str name="hl.bs.type">WORD</str> 

      <str name="hl.bs.language">en</str> 
      <str name="hl.bs.country">US</str> 
     </lst> 
     </boundaryScanner> 
    </highlighting> 
    </searchComponent> 

    <queryResponseWriter name="json" class="solr.JSONResponseWriter"> 

    <str name="content-type">text/plain; charset=UTF-8</str> 
    </queryResponseWriter> 


    <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/> 


    --> 
    <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter"> 
    <int name="xsltCacheLifetimeSeconds">5</int> 
    </queryResponseWriter> 
    <admin> 

    <defaultQuery>*:*</defaultQuery> 


    </admin> 

</config> 

的Schema.xml文件:

<?xml version="1.0" encoding="UTF-8" ?> 

<schema name="ktimatologio" version="1.5"> 

    <types> 

    <!-- The StrField type is not analyzed, but indexed/stored verbatim. --> 
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> 

    <!-- boolean type: "true" or "false" --> 
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> 
    <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings --> 
    <fieldtype name="binary" class="solr.BinaryField"/> 

    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> 
    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/> 
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/> 
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/> 

    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/> 
    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/> 
    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/> 
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/> 

    <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> 

    <!-- A Trie based date field for faster date range queries and date faceting. --> 
    <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/> 

    <fieldType name="pint" class="solr.IntField"/> 
    <fieldType name="plong" class="solr.LongField"/> 
    <fieldType name="pfloat" class="solr.FloatField"/> 
    <fieldType name="pdouble" class="solr.DoubleField"/> 
    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/> 

    <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/> 
    <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/> 
    <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/> 
    <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/> 
    <fieldType name="random" class="solr.RandomSortField" indexed="true" /> 
    <!-- Greek --> 
    <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> 
     <analyzer> 
     <tokenizer class="solr.StandardTokenizerFactory"/> 
     <!-- greek specific lowercase for sigma --> 
     <filter class="solr.GreekLowerCaseFilterFactory"/> 
     <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/> 
     <filter class="solr.GreekStemFilterFactory"/> 
     </analyzer> 
    </fieldType> 

    <fieldType name="text_ktimatologio" class="solr.TextField" positionIncrementGap="100"> 

     <analyzer type="index">  
     <tokenizer class="solr.StandardTokenizerFactory"/> 
     <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" enablePositionIncrements="true"/> 
     <filter class="solr.LowerCaseFilterFactory"/> 
     <filter class="solr.EnglishPossessiveFilterFactory"/> 
     <filter class="solr.GreekLowerCaseFilterFactory"/> 
     <filter class="solr.GreekStemFilterFactory"/>  
     <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> 
     <filter class="solr.PorterStemFilterFactory"/> 
     </analyzer> 


     <analyzer type="query"> 
     <tokenizer class="solr.StandardTokenizerFactory"/> 
     <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> 
     <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" enablePositionIncrements="true"/> 

     <filter class="solr.GreekLowerCaseFilterFactory"/> 
     <filter class="solr.GreekStemFilterFactory"/> 
     <filter class="solr.LowerCaseFilterFactory"/> 
     <filter class="solr.EnglishPossessiveFilterFactory"/> 
     <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> 
     <filter class="solr.PorterStemFilterFactory"/> 
     </analyzer> 
    </fieldType> 

    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> 
    <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> 
    <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> 
    <fieldtype name="geohash" class="solr.GeoHashField"/> 
    <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" /> 
</types> 



<fields> 

    <field name="id" type="string" indexed="true" stored="true" multiValued="false"/> 
    <field name="solr_id" type="string" indexed="true" stored="true" multiValued="false"/> 
    <field name="title" type="text_ktimatologio" indexed="true" stored="true"/> 
    <field name="model" type="text_ktimatologio" indexed="true" stored="true" multiValued="false"/> 
    <field name="type" type="text_ktimatologio" indexed="true" stored="true"/> 
    <field name="url" type="text_ktimatologio" indexed="true" stored="true"/> 
    <field name="content" type="text_ktimatologio" indexed="true" stored="true" multiValued="true"/> 
    <field name="last_modified" type="string" indexed="true" stored="true"/> 

</fields> 

<uniqueKey>solr_id</uniqueKey> 

<defaultSearchField>content</defaultSearchField> 

<solrQueryParser defaultOperator="OR"/> 

    <copyField source="title" dest="content" /> 

</schema> 

數據-config.xml文件:

<dataConfig> 

    <dataSource type="JdbcDataSource" 
      autoCommit="true" batchSize="-1" 
      convertType="false" 
      driver="com.mysql.jdbc.Driver" 
      url="jdbc:mysql://127.0.0.1:3306/ktimatologio" 
      user="root" 
      password="1a2b3c4d"/> 

     <dataSource name="fieldReader" type="FieldStreamDataSource" />  


    <document> 

    <entity name="aitiologikes_ektheseis" 
    dataSource="db" 
    transformer="HTMLStripTransformer" 
    query="select id, title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, body AS content from aitiologikes_ektheseis where type = 'text'" 
    deltaImportQuery="select id, title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, body AS content from aitiologikes_ektheseis where type = 'text' and id='${dataimporter.delta.id}'" 
    deltaQuery="select id, title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, body AS content from aitiologikes_ektheseis where type = 'text' and last_modified &gt; '${dataimporter.last_index_time}'"> 
     <field column="id" name="id" />   
     <field column="solr_id" name="solr_id" /> 
     <field column="title" name="title" stripHTML="true" /> 
     <field column="model" name="model" stripHTML="true" /> 
     <field column="type" name="type" stripHTML="true" /> 
     <field column="url" name="url" stripHTML="true" /> 
     <field column="last_modified" name="last_modified" stripHTML="true" /> 
     <field column="content" name="content" stripHTML="true" /> 
    </entity> 

    <entity name="aitiologikes_ektheseis_bin" 
     query="select id, title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from aitiologikes_ektheseis where type = 'bin'" 
     deltaImportQuery="select id, title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from aitiologikes_ektheseis where type = 'bin' and id='${dataimporter.delta.id}'" 
     deltaQuery="select id, title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from aitiologikes_ektheseis where type = 'bin' and last_modified &gt; '${dataimporter.last_index_time}'" 
     transformer="TemplateTransformer" 
     dataSource="db"> 

     <entity dataSource="fieldReader" processor="TikaEntityProcessor" dataField="aitiologikes_ektheseis_bin.content" format="text"> 
      <field column="id" name="id" />   
      <field column="solr_id" name="solr_id" /> 
      <field column="title" name="title" stripHTML="true" /> 
      <field column="model" name="model" stripHTML="true" /> 
      <field column="type" name="type" stripHTML="true" /> 
      <field column="url" name="url" stripHTML="true" /> 
      <field column="last_modified" name="last_modified" stripHTML="true" /> 
      <field column="content" name="content" stripHTML="true" /> 
     </entity> 

    </entity> 

    </document> 


</dataConfig> 

回答

1

最後我已經找到了解決辦法。注意實體的查詢,並在數據-config.xml中列定義:

.... 
    <entity name="aitiologikes_ektheseis_bin" 
     query="select id, title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from aitiologikes_ektheseis where type = 'bin'" 
     deltaImportQuery="select id, title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from aitiologikes_ektheseis where type = 'bin' and id='${dataimporter.delta.id}'" 
     deltaQuery="select id, title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from aitiologikes_ektheseis where type = 'bin' and last_modified &gt; '${dataimporter.last_index_time}'" 
     transformer="TemplateTransformer" 
     dataSource="db"> 

     <entity dataSource="fieldReader" processor="TikaEntityProcessor" dataField="aitiologikes_ektheseis_bin.content" format="text"> 
      <field column="id" name="id" />   
      <field column="solr_id" name="solr_id" /> 
      <field column="title" name="title" stripHTML="true" /> 
      <field column="model" name="model" stripHTML="true" /> 
      <field column="type" name="type" stripHTML="true" /> 
      <field column="url" name="url" stripHTML="true" /> 
      <field column="last_modified" name="last_modified" stripHTML="true" /> 
      <field column="content" name="content" stripHTML="true" /> 
     </entity> 

    </entity> 

    </document> 


</dataConfig> 

爲了「提卡」到「看」的內容,並提取它,我必須改變「內容」到「文本」 。 還有一件事。正確的語法是:

<entity name="aitiologikes_ektheseis_bin" 
     query="select id, title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, bin_con AS text from aitiologikes_ektheseis where type = 'bin'" 
     deltaImportQuery="select id, title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, bin_con AS text from aitiologikes_ektheseis where type = 'bin' and id='${dataimporter.delta.id}'" 
     deltaQuery="select id, title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, bin_con AS text from aitiologikes_ektheseis where type = 'bin' and last_modified &gt; '${dataimporter.last_index_time}'" 
     transformer="TemplateTransformer" 
     dataSource="db"> 

      <field column="id" name="id" />   
      <field column="solr_id" name="solr_id" /> 
      <field column="title" name="title" /> 
      <field column="model" name="model" /> 
      <field column="type" name="type" /> 
      <field column="url" name="url" /> 
      <field column="last_modified" name="last_modified" /> 

     <entity dataSource="fieldReader" processor="TikaEntityProcessor" dataField="aitiologikes_ektheseis_bin.text" format="text"> 
      <field column="text" name="content" /> 
     </entity> 

    </entity> 

    </document> 


</dataConfig> 

我希望這有助於某人。 好, 湯姆