Spring Solr拼寫檢查組件匹配衣櫃字

我是Solr的新手。我使用的彈簧3.x和Solr的4.6Spring Solr拼寫檢查組件匹配衣櫃字

下面是我Schema.xml

<schema name="customer_site_address" version="1.5"> 

<fields> 
    <field name="id" type="long" indexed="true" stored="true" required="true" multiValued="false" /> 
    <field name="name" type="string" indexed="false" stored="true" required="true" multiValued="false"/> 
    <field name="number" type="string" indexed="false" stored="true" required="true" multiValued="false"/> 
    <field name="address" type="text_general" indexed="false" stored="true" required="true" multiValued="false"/> 
    <field name="city" type="string" indexed="false" stored="true" multiValued="false"/> 
    <field name="state" type="string" indexed="false" stored="true" multiValued="false"/> 
    <field name="zipcode" type="string" indexed="false" stored="true" multiValued="false"/> 
    <field name="country" type="string" indexed="false" stored="true" multiValued="false"/> 
    <field name="latlng" type="string" indexed="false" stored="true" multiValued="false"/> 
    <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/> 
    <field name="_version_" type="long" indexed="true" stored="true"/> 
</fields> 

<!-- Configure unique key --> 
<uniqueKey>id</uniqueKey> 
<copyField source="name" dest="text"/> 
<copyField source="number" dest="text"/> 
<copyField source="address" dest="text"/> 
<copyField source="city" dest="text"/> 
<copyField source="state" dest="text"/> 
<copyField source="zipcode" dest="text"/> 
<copyField source="country" dest="text"/> 

<types> 
    <!-- Long --> 
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" /> 
    <!-- String --> 
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> 
    <!-- Text --> 
    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> 
     <!-- Configures the analysis done at the index phase --> 
     <analyzer type="index"> 
      <!-- Uses word break rules of the Unicode Text Segmentation algorith 
       when splitting text into words. --> 
      <tokenizer class="solr.StandardTokenizerFactory" /> 
      <!-- <tokenizer class="solr.KeywordTokenizerFactory"/> --> 
      <!-- <tokenizer class="solr.WhitespaceTokenizerFactory"/> --> 
      <!-- Removes words found from stopwords.txt file. This filter is case 
       insensitive. --> 
      <filter class="solr.StopFilterFactory" ignoreCase="true" 
       words="stopwords.txt" enablePositionIncrements="true" /> 

      <filter class="solr.PatternReplaceFilterFactory" pattern="'" replacement="" replace="all" /> 
      <filter class="solr.WordDelimiterFilterFactory" 
       generateWordParts="1" 
       generateNumberParts="1" 
       catenateWords="1" 
       catenateNumbers="1" 
       catenateAll="0" 
       splitOnCaseChange="1" 
      /> 
      <!-- Transforms text to lower case --> 
      <filter class="solr.LowerCaseFilterFactory" /> 

      <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="front"/> 
     </analyzer> 
     <!-- Configures the analysis done at the query time --> 
     <analyzer type="query"> 
      <!-- Uses word break rules of the Unicode Text Segmentation algorith 
       when splitting text into words. --> 
      <tokenizer class="solr.StandardTokenizerFactory" /> 
      <!-- <tokenizer class="solr.KeywordTokenizerFactory"/> --> 
      <!-- <tokenizer class="solr.WhitespaceTokenizerFactory"/> --> 
      <!-- Removes words found from stopwords.txt file. This filter is case 
       insensitive. --> 
      <filter class="solr.StopFilterFactory" ignoreCase="true" 
       words="stopwords.txt" enablePositionIncrements="true" /> 
      <!-- Applies synonyms found from the synonyms.txt file. --> 
      <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" 
       ignoreCase="true" expand="true" /> 
      <!-- Transforms text to lower case --> 
      <filter class="solr.LowerCaseFilterFactory" /> 

      <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="front"/> 
     </analyzer> 
    </fieldType> 
</types>

下面SearchComponent的是solrconfig.xml

<searchComponent name="spellcheck" class="solr.SpellCheckComponent"> 
<str name="queryAnalyzerFieldType">text_general</str> 

<lst name="spellchecker"> 
    <str name="name">default</str> 
    <str name="field">text</str> 
    <str name="classname">solr.DirectSolrSpellChecker</str> 
    <str name="distanceMeasure">internal</str> 
    <float name="accuracy">0.5</float> 
    <int name="maxEdits">2</int> 
    <int name="minPrefix">1</int> 
    <int name="maxInspections">5</int> 
    <int name="minQueryLength">4</int> 
    <float name="maxQueryFrequency">0.01</float> 
</lst> 

<!-- a spellchecker that can break or combine words. See "/spell" handler below for usage --> 
<lst name="spellchecker"> 
    <str name="name">wordbreak</str> 
    <str name="classname">solr.WordBreakSolrSpellChecker</str>  
    <str name="field">text</str> 
    <str name="combineWords">true</str> 
    <str name="breakWords">true</str> 
    <int name="maxChanges">10</int> 
</lst>

下面是RequestHandler在solrconfig.xml

<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy"> 
<lst name="defaults"> 
    <str name="df">text</str> 
    <str name="spellcheck.dictionary">default</str> 
    <str name="spellcheck.dictionary">wordbreak</str> 
    <str name="spellcheck">on</str> 
    <str name="spellcheck.extendedResults">true</str>  
    <str name="spellcheck.count">10</str> 
    <str name="spellcheck.alternativeTermCount">5</str> 
    <str name="spellcheck.maxResultsForSuggest">5</str>  
    <str name="spellcheck.collate">true</str> 
    <str name="spellcheck.collateExtendedResults">true</str> 
    <str name="spellcheck.maxCollationTries">10</str> 
    <str name="spellcheck.maxCollations">5</str>   
</lst> 
<arr name="last-components"> 
    <str>spellcheck</str> 
</arr>

現在，當我打電話/spell一個城市frgo產生下面的查詢字符串。

qt=/spell&spellcheck.q=frgo&spellcheck=true&mm=100%

我得到下列選項：（當前結果）

[f r, f r g, fargo, f r g o, farg]

應該給我：（預期）

[fargo]

另一種情況，當我進入wset frgo這生成以下查詢字符串

qt=/spell&spellcheck.q=wset+frgo&spellcheck=true&mm=100%

我得到下列選項：（當前結果）

[w s, w s e, west, w s e t, wert, f r, f r g, fargo, f r g o, farg]

應該給我：（預期）

[West, West Fargo]

我也申請通過谷歌找到了一些解決方案，但我想我錯了配置。我也試過用solr.KeywordTokenizerFactory和solr.WhitespaceTokenizerFactory爲index和query分析儀。

請指導我。

編輯：

我刪除以下過濾器和它的工作在某些時候。

<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="front"/>

現在，如果我進入wset frgo產生下面的查詢字符串

qt=/spell&spellcheck.q=wset+frgo&spellcheck=true

我得到下列選項：（當前結果）

[west, fargo, farg]

應該給我：（預計）

[west, fargo, west fargo]

仍然West Fargo不返回。

請指引我

感謝

來源

2014-01-30 Ankur Raiyani

一個字在衣櫃裏？ – hippietrail

檢查領域Solr管理，如果西方法戈是指數。

來源

2014-02-10 12:56:02 gemini

我正在使用嵌入式Solr服務器 –

更改此配置在您wordbreak拼寫檢查器配置

<str name="breakWords">true</str>

到

<str name="breakWords">false</str>

，或者你可以完全刪除「/拼」處理程序wordbreak拼寫檢查。

來源

2014-11-14 15:45:17 Rajesh

Spring Solr拼寫檢查組件匹配衣櫃字

回答

相關問題