我是Solr的新手。我使用的彈簧3.x和Solr的4.6Spring Solr拼寫檢查組件匹配衣櫃字
下面是我Schema.xml
<schema name="customer_site_address" version="1.5">
<fields>
<field name="id" type="long" indexed="true" stored="true" required="true" multiValued="false" />
<field name="name" type="string" indexed="false" stored="true" required="true" multiValued="false"/>
<field name="number" type="string" indexed="false" stored="true" required="true" multiValued="false"/>
<field name="address" type="text_general" indexed="false" stored="true" required="true" multiValued="false"/>
<field name="city" type="string" indexed="false" stored="true" multiValued="false"/>
<field name="state" type="string" indexed="false" stored="true" multiValued="false"/>
<field name="zipcode" type="string" indexed="false" stored="true" multiValued="false"/>
<field name="country" type="string" indexed="false" stored="true" multiValued="false"/>
<field name="latlng" type="string" indexed="false" stored="true" multiValued="false"/>
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
<field name="_version_" type="long" indexed="true" stored="true"/>
</fields>
<!-- Configure unique key -->
<uniqueKey>id</uniqueKey>
<copyField source="name" dest="text"/>
<copyField source="number" dest="text"/>
<copyField source="address" dest="text"/>
<copyField source="city" dest="text"/>
<copyField source="state" dest="text"/>
<copyField source="zipcode" dest="text"/>
<copyField source="country" dest="text"/>
<types>
<!-- Long -->
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" />
<!-- String -->
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
<!-- Text -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<!-- Configures the analysis done at the index phase -->
<analyzer type="index">
<!-- Uses word break rules of the Unicode Text Segmentation algorith
when splitting text into words. -->
<tokenizer class="solr.StandardTokenizerFactory" />
<!-- <tokenizer class="solr.KeywordTokenizerFactory"/> -->
<!-- <tokenizer class="solr.WhitespaceTokenizerFactory"/> -->
<!-- Removes words found from stopwords.txt file. This filter is case
insensitive. -->
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.PatternReplaceFilterFactory" pattern="'" replacement="" replace="all" />
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
generateNumberParts="1"
catenateWords="1"
catenateNumbers="1"
catenateAll="0"
splitOnCaseChange="1"
/>
<!-- Transforms text to lower case -->
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="front"/>
</analyzer>
<!-- Configures the analysis done at the query time -->
<analyzer type="query">
<!-- Uses word break rules of the Unicode Text Segmentation algorith
when splitting text into words. -->
<tokenizer class="solr.StandardTokenizerFactory" />
<!-- <tokenizer class="solr.KeywordTokenizerFactory"/> -->
<!-- <tokenizer class="solr.WhitespaceTokenizerFactory"/> -->
<!-- Removes words found from stopwords.txt file. This filter is case
insensitive. -->
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" enablePositionIncrements="true" />
<!-- Applies synonyms found from the synonyms.txt file. -->
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true" />
<!-- Transforms text to lower case -->
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="front"/>
</analyzer>
</fieldType>
</types>
下面SearchComponent
的是solrconfig.xml
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">text_general</str>
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">text</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<str name="distanceMeasure">internal</str>
<float name="accuracy">0.5</float>
<int name="maxEdits">2</int>
<int name="minPrefix">1</int>
<int name="maxInspections">5</int>
<int name="minQueryLength">4</int>
<float name="maxQueryFrequency">0.01</float>
</lst>
<!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
<lst name="spellchecker">
<str name="name">wordbreak</str>
<str name="classname">solr.WordBreakSolrSpellChecker</str>
<str name="field">text</str>
<str name="combineWords">true</str>
<str name="breakWords">true</str>
<int name="maxChanges">10</int>
</lst>
下面是RequestHandler
在solrconfig.xml
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="df">text</str>
<str name="spellcheck.dictionary">default</str>
<str name="spellcheck.dictionary">wordbreak</str>
<str name="spellcheck">on</str>
<str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.count">10</str>
<str name="spellcheck.alternativeTermCount">5</str>
<str name="spellcheck.maxResultsForSuggest">5</str>
<str name="spellcheck.collate">true</str>
<str name="spellcheck.collateExtendedResults">true</str>
<str name="spellcheck.maxCollationTries">10</str>
<str name="spellcheck.maxCollations">5</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
現在,當我打電話/spell
一個城市frgo
產生下面的查詢字符串。
qt=/spell&spellcheck.q=frgo&spellcheck=true&mm=100%
我得到下列選項:(當前結果)
[f r, f r g, fargo, f r g o, farg]
應該給我:(預期)
[fargo]
另一種情況,當我進入wset frgo
這生成以下查詢字符串
qt=/spell&spellcheck.q=wset+frgo&spellcheck=true&mm=100%
我得到下列選項:(當前結果)
[w s, w s e, west, w s e t, wert, f r, f r g, fargo, f r g o, farg]
應該給我:(預期)
[West, West Fargo]
我也申請通過谷歌找到了一些解決方案,但我想我錯了配置。 我也試過用solr.KeywordTokenizerFactory
和solr.WhitespaceTokenizerFactory
爲index
和query
分析儀。
請指導我。
編輯:
我刪除以下過濾器和它的工作在某些時候。
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="front"/>
現在,如果我進入wset frgo
產生下面的查詢字符串
qt=/spell&spellcheck.q=wset+frgo&spellcheck=true
我得到下列選項:(當前結果)
[west, fargo, farg]
應該給我:(預計)
[west, fargo, west fargo]
仍然West Fargo
不返回。
請指引我
感謝
一個字在衣櫃裏? – hippietrail