2014-01-30 52 views
0

我是Solr的新手。我使用的彈簧3.x和Solr的4.6Spring Solr拼寫檢查組件匹配衣櫃字

下面是我Schema.xml

<schema name="customer_site_address" version="1.5"> 

<fields> 
    <field name="id" type="long" indexed="true" stored="true" required="true" multiValued="false" /> 
    <field name="name" type="string" indexed="false" stored="true" required="true" multiValued="false"/> 
    <field name="number" type="string" indexed="false" stored="true" required="true" multiValued="false"/> 
    <field name="address" type="text_general" indexed="false" stored="true" required="true" multiValued="false"/> 
    <field name="city" type="string" indexed="false" stored="true" multiValued="false"/> 
    <field name="state" type="string" indexed="false" stored="true" multiValued="false"/> 
    <field name="zipcode" type="string" indexed="false" stored="true" multiValued="false"/> 
    <field name="country" type="string" indexed="false" stored="true" multiValued="false"/> 
    <field name="latlng" type="string" indexed="false" stored="true" multiValued="false"/> 
    <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/> 
    <field name="_version_" type="long" indexed="true" stored="true"/> 
</fields> 

<!-- Configure unique key --> 
<uniqueKey>id</uniqueKey> 
<copyField source="name" dest="text"/> 
<copyField source="number" dest="text"/> 
<copyField source="address" dest="text"/> 
<copyField source="city" dest="text"/> 
<copyField source="state" dest="text"/> 
<copyField source="zipcode" dest="text"/> 
<copyField source="country" dest="text"/> 

<types> 
    <!-- Long --> 
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" /> 
    <!-- String --> 
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> 
    <!-- Text --> 
    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> 
     <!-- Configures the analysis done at the index phase --> 
     <analyzer type="index"> 
      <!-- Uses word break rules of the Unicode Text Segmentation algorith 
       when splitting text into words. --> 
      <tokenizer class="solr.StandardTokenizerFactory" /> 
      <!-- <tokenizer class="solr.KeywordTokenizerFactory"/> --> 
      <!-- <tokenizer class="solr.WhitespaceTokenizerFactory"/> --> 
      <!-- Removes words found from stopwords.txt file. This filter is case 
       insensitive. --> 
      <filter class="solr.StopFilterFactory" ignoreCase="true" 
       words="stopwords.txt" enablePositionIncrements="true" /> 

      <filter class="solr.PatternReplaceFilterFactory" pattern="'" replacement="" replace="all" /> 
      <filter class="solr.WordDelimiterFilterFactory" 
       generateWordParts="1" 
       generateNumberParts="1" 
       catenateWords="1" 
       catenateNumbers="1" 
       catenateAll="0" 
       splitOnCaseChange="1" 
      /> 
      <!-- Transforms text to lower case --> 
      <filter class="solr.LowerCaseFilterFactory" /> 

      <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="front"/> 
     </analyzer> 
     <!-- Configures the analysis done at the query time --> 
     <analyzer type="query"> 
      <!-- Uses word break rules of the Unicode Text Segmentation algorith 
       when splitting text into words. --> 
      <tokenizer class="solr.StandardTokenizerFactory" /> 
      <!-- <tokenizer class="solr.KeywordTokenizerFactory"/> --> 
      <!-- <tokenizer class="solr.WhitespaceTokenizerFactory"/> --> 
      <!-- Removes words found from stopwords.txt file. This filter is case 
       insensitive. --> 
      <filter class="solr.StopFilterFactory" ignoreCase="true" 
       words="stopwords.txt" enablePositionIncrements="true" /> 
      <!-- Applies synonyms found from the synonyms.txt file. --> 
      <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" 
       ignoreCase="true" expand="true" /> 
      <!-- Transforms text to lower case --> 
      <filter class="solr.LowerCaseFilterFactory" /> 

      <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="front"/> 
     </analyzer> 
    </fieldType> 
</types> 

下面SearchComponent的是solrconfig.xml

<searchComponent name="spellcheck" class="solr.SpellCheckComponent"> 
<str name="queryAnalyzerFieldType">text_general</str> 

<lst name="spellchecker"> 
    <str name="name">default</str> 
    <str name="field">text</str> 
    <str name="classname">solr.DirectSolrSpellChecker</str> 
    <str name="distanceMeasure">internal</str> 
    <float name="accuracy">0.5</float> 
    <int name="maxEdits">2</int> 
    <int name="minPrefix">1</int> 
    <int name="maxInspections">5</int> 
    <int name="minQueryLength">4</int> 
    <float name="maxQueryFrequency">0.01</float> 
</lst> 

<!-- a spellchecker that can break or combine words. See "/spell" handler below for usage --> 
<lst name="spellchecker"> 
    <str name="name">wordbreak</str> 
    <str name="classname">solr.WordBreakSolrSpellChecker</str>  
    <str name="field">text</str> 
    <str name="combineWords">true</str> 
    <str name="breakWords">true</str> 
    <int name="maxChanges">10</int> 
</lst> 

下面是RequestHandlersolrconfig.xml

<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy"> 
<lst name="defaults"> 
    <str name="df">text</str> 
    <str name="spellcheck.dictionary">default</str> 
    <str name="spellcheck.dictionary">wordbreak</str> 
    <str name="spellcheck">on</str> 
    <str name="spellcheck.extendedResults">true</str>  
    <str name="spellcheck.count">10</str> 
    <str name="spellcheck.alternativeTermCount">5</str> 
    <str name="spellcheck.maxResultsForSuggest">5</str>  
    <str name="spellcheck.collate">true</str> 
    <str name="spellcheck.collateExtendedResults">true</str> 
    <str name="spellcheck.maxCollationTries">10</str> 
    <str name="spellcheck.maxCollations">5</str>   
</lst> 
<arr name="last-components"> 
    <str>spellcheck</str> 
</arr> 

現在,當我打電話/spell一個城市frgo產生下面的查詢字符串。

qt=/spell&spellcheck.q=frgo&spellcheck=true&mm=100% 

我得到下列選項:(當前結果)

[f r, f r g, fargo, f r g o, farg] 

應該給我:(預期)

[fargo] 

另一種情況,當我進入wset frgo這生成以下查詢字符串

qt=/spell&spellcheck.q=wset+frgo&spellcheck=true&mm=100% 

我得到下列選項:(當前結果)

[w s, w s e, west, w s e t, wert, f r, f r g, fargo, f r g o, farg] 

應該給我:(預期)

[West, West Fargo] 

我也申請通過谷歌找到了一些解決方案,但我想我錯了配置。 我也試過用solr.KeywordTokenizerFactorysolr.WhitespaceTokenizerFactoryindexquery分析儀。

請指導我。

編輯:

我刪除以下過濾器和它的工作在某些時候。

<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="front"/> 

現在,如果我進入wset frgo產生下面的查詢字符串

qt=/spell&spellcheck.q=wset+frgo&spellcheck=true 

我得到下列選項:(當前結果)

[west, fargo, farg] 

應該給我:(預計)

[west, fargo, west fargo] 

仍然West Fargo不返回。

請指引我

感謝

+0

一個字在衣櫃裏? – hippietrail

回答

0

檢查領域Solr管理,如果西方法戈是指數。

+0

我正在使用嵌入式Solr服務器 –

0

更改此配置在您wordbreak拼寫檢查器配置

<str name="breakWords">true</str> 

<str name="breakWords">false</str> 

,或者你可以完全刪除 「/拼」 處理程序wordbreak拼寫檢查。