2016-08-15 108 views
1

因爲我知道默認omitNorms設置爲false,但不知何故分數根本不影響。 它總是顯示所有領域相同的分數。Solr omitNorms = false不起作用

{ 
     "MediaOutletName":"Guardian Money", 
     "score":6.101774}, 
     { 
     "MediaOutletName":"The Guardian", 
     "score":6.101774}, 
     { 
     "MediaOutletName":"Farmers Guardian", 
     "score":6.101774}, 
     { 
     "MediaOutletName":"Guardian Online", 
     "score":6.101774}, 
     { 
     "MediaOutletName":"Thames Guardian", 
     "score":6.101774}, 
     { 

在這個例子中應該是在頂部和比別人得到更多的分數。

篩選:

<fieldType name="text_general" class="solr.TextField" omitNorms="false" positionIncrementGap="100"> 
    <analyzer type="index"> 
     <tokenizer class="solr.WhitespaceTokenizerFactory"/> 
     <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> 
     <filter class="solr.LowerCaseFilterFactory"/> 
     <filter class="solr.WordDelimiterFilterFactory" types="wdfftypes.txt" generateNumberParts="0" stemEnglishPossessive="0" splitOnCaseChange="1" preserveOriginal="1" catenateAll="1" catenateWords="1" catenateNumbers="1" generateWordParts="1" splitOnNumerics="1"/> 
    </analyzer> 
    <analyzer type="query"> 
     <tokenizer class="solr.WhitespaceTokenizerFactory"/> 
     <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> 
     <filter class="solr.LowerCaseFilterFactory"/> 
     <filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt" tokenizerFactory="solr.WhitespaceTokenizerFactory"/> 
     <filter class="solr.WordDelimiterFilterFactory" types="wdfftypes.txt" generateNumberParts="1" stemEnglishPossessive="0" splitOnCaseChange="1" preserveOriginal="1" catenateAll="1" catenateWords="1" catenateNumbers="1" generateWordParts="1" splitOnNumerics="1"/> 
    </analyzer> 
    </fieldType> 

域定義

<field name="MediaOutletName" omitNorms="false" type="text_general" multiValued="false" indexed="true" stored="true"/> 

調試查詢

{ 
 
    "responseHeader":{ 
 
    "status":0, 
 
    "QTime":0, 
 
    "params":{ 
 
     "q":"MediaOutletName:Guardian", 
 
     "indent":"on", 
 
     "fl":"MediaOutletName_s,score", 
 
     "omit":"undefined", 
 
     "wt":"json", 
 
     "debugQuery":"on", 
 
     "_":"1471275424357"}}, 
 
    "response":{"numFound":55,"start":0,"maxScore":6.101774,"docs":[ 
 
     { 
 
     "MediaOutletName_s":"Guardian Money", 
 
     "score":6.101774}, 
 
     { 
 
     "MediaOutletName_s":"The Guardian", 
 
     "score":6.101774}, 
 
     { 
 
     "MediaOutletName_s":"Farmers Guardian", 
 
     "score":6.101774}, 
 
     { 
 
     "MediaOutletName_s":"Guardian Online", 
 
     "score":6.101774}, 
 
     { 
 
     "MediaOutletName_s":"Thames Guardian", 
 
     "score":6.101774}, 
 
     { 
 
     "MediaOutletName_s":"Nenagh Guardian", 
 
     "score":6.101774}, 
 
     { 
 
     "MediaOutletName_s":"News Guardian", 
 
     "score":6.101774}, 
 
     { 
 
     "MediaOutletName_s":"Gorey Guardian", 
 
     "score":6.101774}, 
 
     { 
 
     "MediaOutletName_s":"Cornish Guardian", 
 
     "score":6.101774}, 
 
     { 
 
     "MediaOutletName_s":"Somerset Guardian", 
 
     "score":6.101774}] 
 
    }, 
 
    "debug":{ 
 
    "rawquerystring":"MediaOutletName:Guardian", 
 
    "querystring":"MediaOutletName:Guardian", 
 
    "parsedquery":"MediaOutletName:guardian", 
 
    "parsedquery_toString":"MediaOutletName:guardian", 
 
    "explain":{ 
 
     "301":"\n6.101774 = weight(MediaOutletName:guardian in 110) [], result of:\n 6.101774 = score(doc=110,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n  1.0 = termFreq=1.0\n  1.2 = parameter k1\n  0.75 = parameter b\n  3.436885 = avgFieldLength\n  2.56 = fieldLength\n", 
 
     "332":"\n6.101774 = weight(MediaOutletName:guardian in 125) [], result of:\n 6.101774 = score(doc=125,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n  1.0 = termFreq=1.0\n  1.2 = parameter k1\n  0.75 = parameter b\n  3.436885 = avgFieldLength\n  2.56 = fieldLength\n", 
 
     "4897":"\n6.101774 = weight(MediaOutletName:guardian in 1016) [], result of:\n 6.101774 = score(doc=1016,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n  1.0 = termFreq=1.0\n  1.2 = parameter k1\n  0.75 = parameter b\n  3.436885 = avgFieldLength\n  2.56 = fieldLength\n", 
 
     "6923":"\n6.101774 = weight(MediaOutletName:guardian in 2270) [], result of:\n 6.101774 = score(doc=2270,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n  1.0 = termFreq=1.0\n  1.2 = parameter k1\n  0.75 = parameter b\n  3.436885 = avgFieldLength\n  2.56 = fieldLength\n", 
 
     "8553":"\n6.101774 = weight(MediaOutletName:guardian in 2970) [], result of:\n 6.101774 = score(doc=2970,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n  1.0 = termFreq=1.0\n  1.2 = parameter k1\n  0.75 = parameter b\n  3.436885 = avgFieldLength\n  2.56 = fieldLength\n", 
 
     "8680":"\n6.101774 = weight(MediaOutletName:guardian in 3045) [], result of:\n 6.101774 = score(doc=3045,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n  1.0 = termFreq=1.0\n  1.2 = parameter k1\n  0.75 = parameter b\n  3.436885 = avgFieldLength\n  2.56 = fieldLength\n", 
 
     "8686":"\n6.101774 = weight(MediaOutletName:guardian in 3049) [], result of:\n 6.101774 = score(doc=3049,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n  1.0 = termFreq=1.0\n  1.2 = parameter k1\n  0.75 = parameter b\n  3.436885 = avgFieldLength\n  2.56 = fieldLength\n", 
 
     "8961":"\n6.101774 = weight(MediaOutletName:guardian in 3203) [], result of:\n 6.101774 = score(doc=3203,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n  1.0 = termFreq=1.0\n  1.2 = parameter k1\n  0.75 = parameter b\n  3.436885 = avgFieldLength\n  2.56 = fieldLength\n", 
 
     "9253":"\n6.101774 = weight(MediaOutletName:guardian in 3396) [], result of:\n 6.101774 = score(doc=3396,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n  1.0 = termFreq=1.0\n  1.2 = parameter k1\n  0.75 = parameter b\n  3.436885 = avgFieldLength\n  2.56 = fieldLength\n", 
 
     "9344":"\n6.101774 = weight(MediaOutletName:guardian in 3448) [], result of:\n 6.101774 = score(doc=3448,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n  1.0 = termFreq=1.0\n  1.2 = parameter k1\n  0.75 = parameter b\n  3.436885 = avgFieldLength\n  2.56 = fieldLength\n"}, 
 
    "QParser":"LuceneQParser", 
 
    "timing":{ 
 
     "time":0.0, 
 
     "prepare":{ 
 
     "time":0.0, 
 
     "query":{ 
 
      "time":0.0}, 
 
     "facet":{ 
 
      "time":0.0}, 
 
     "facet_module":{ 
 
      "time":0.0}, 
 
     "mlt":{ 
 
      "time":0.0}, 
 
     "highlight":{ 
 
      "time":0.0}, 
 
     "stats":{ 
 
      "time":0.0}, 
 
     "expand":{ 
 
      "time":0.0}, 
 
     "debug":{ 
 
      "time":0.0}}, 
 
     "process":{ 
 
     "time":0.0, 
 
     "query":{ 
 
      "time":0.0}, 
 
     "facet":{ 
 
      "time":0.0}, 
 
     "facet_module":{ 
 
      "time":0.0}, 
 
     "mlt":{ 
 
      "time":0.0}, 
 
     "highlight":{ 
 
      "time":0.0}, 
 
     "stats":{ 
 
      "time":0.0}, 
 
     "expand":{ 
 
      "time":0.0}, 
 
     "debug":{ 
 
      "time":0.0}}}}}

謝謝,

+0

索引時omitNorms是否設置爲false? 'debugQuery'顯示什麼? – MatsLindh

+0

@MatsLindh請看看我已經更新了我的代碼。我重新導入了omitNorms「true」和「false」的數據很多次,根本沒有任何反應。正如你可以看到2.56 = fieldLength顯示每個字段相同長度; – Oyeme

回答

1

當談到Lucene/Solr/Elastic中的字段長度時,你說的是the number of tokens,而不是字段中的字節數。您的所有示例都有兩個令牌,因此可以從應用的長度歸一化中獲得相同的分數。

如果該字段是單值的,那麼您可以另外將整數值作爲字段長度以字節爲單位進行索引,並在排序時將其用作聯絡斷路器。我不認爲有一個函數查詢來獲取字段的原始值的長度來執行此運行時(至少不是較早)。

+0

在這種情況下,它將打破具有多個排序字段的排序邏輯。例如:score desc,fieldLength asc,重要性asc,IsParent(4個排序選項) – Oyeme

+0

我不確定我是否遵循 - 如果因爲長度規範而導致分數不同,第三和第四排序參數不會做任何事情這與你使用fieldLength作爲顯式排序所得到的結果是一樣的)? – MatsLindh

+0

按長度排序只適用於查詢簡單的情況。在我的情況下,查詢更加複雜,我不能使用字段長度作爲排序參數。 (((MediaOutletName:Guardian *^20))OR((MediaOutletName:Guardian^20)))OR(((About:Guardian *))OR((About:Guardian)))(((貢獻:Guardian *) )(或((讀者:監護人))或((請注意:監護人))或((請注意:監護人))或((MediaOutletNameAlias:Guardian *^20))OR((MediaOutletNameAlias:Guardian^20)))OR(((Content:Guardian *))OR((Content:Guardian)))) – Oyeme