2016-09-22 36 views
0

雖然與AWS Elasticsearch(2.3)打,我用一些樣本數據 https://www.elastic.co/guide/en/kibana/3.0/snippets/shakespeare.json有下列映射elasticsearch長期聚集返回桶以非常低的文檔數

$ curl --url "https://my_es_id.us-east-1.es.amazonaws.com/shakespeare/_mapping" 

{ 
    "shakespeare": { 
     "mappings": { 
      "act": { 
       "properties": { 
        "line_id": { 
         "type": "integer" 
        }, 
        "line_number": { 
         "type": "string" 
        }, 
        "play_name": { 
         "fields": { 
          "raw": { 
           "index": "not_analyzed", 
           "type": "string" 
          } 
         }, 
         "type": "string" 
        }, 
        "speaker": { 
         "fields": { 
          "raw": { 
           "index": "not_analyzed", 
           "type": "string" 
          } 
         }, 
         "type": "string" 
        }, 
        "speech_number": { 
         "type": "integer" 
        }, 
        "text_entry": { 
         "type": "string" 
        } 
       } 
      }, 
      "line": { 
       "properties": { 
        "line_id": { 
         "type": "integer" 
        }, 
        "line_number": { 
         "type": "string" 
        }, 
        "play_name": { 
         "type": "string" 
        }, 
        "speaker": { 
         "type": "string" 
        }, 
        "speech_number": { 
         "type": "integer" 
        }, 
        "text_entry": { 
         "type": "string" 
        } 
       } 
      }, 
      "scene": { 
       "properties": { 
        "line_id": { 
         "type": "integer" 
        }, 
        "line_number": { 
         "type": "string" 
        }, 
        "play_name": { 
         "type": "string" 
        }, 
        "speaker": { 
         "type": "string" 
        }, 
        "speech_number": { 
         "type": "integer" 
        }, 
        "text_entry": { 
         "type": "string" 
        } 
       } 
      } 
     } 
    } 
} 

現在,當我運行一個查詢裝好了爲了獲得整個數據的揚聲器計數,我得到以下結果。

$ curl -XPOST "https://my_es_id.us-east-1.es.amazonaws.com/shakespeare/_search" -d' 
{ 
    "aggs" : { 
     "speakers" : { 
      "terms" : { "field" : "speaker.raw"} 
     } 
    } 
}' 

{ 
    "_shards": { 
     "failed": 0, 
     "successful": 5, 
     "total": 5 
    }, 
    "aggregations": { 
     "speakers": { 
      "buckets": [ 
       { 
        "doc_count": 4, 
        "key": "BASTARD" 
       }, 
       { 
        "doc_count": 3, 
        "key": "HAMLET" 
       }, 
       { 
        "doc_count": 3, 
        "key": "KING HENRY VIII" 
       }, 
       { 
        "doc_count": 3, 
        "key": "OF SYRACUSE" 
       }, 
       { 
        "doc_count": 3, 
        "key": "PROSPERO" 
       }, 
       { 
        "doc_count": 3, 
        "key": "WARWICK" 
       }, 
       { 
        "doc_count": 2, 
        "key": "ADRIANO DE ARMADO" 
       }, 
       { 
        "doc_count": 2, 
        "key": "ARCHBISHOP OF YORK" 
       }, 
       { 
        "doc_count": 2, 
        "key": "AUFIDIUS" 
       }, 
       { 
        "doc_count": 2, 
        "key": "BENEDICK" 
       } 
      ], 
      "doc_count_error_upper_bound": 0, 
      "sum_other_doc_count": 153 
     } 
    }, 
    "hits": { 
     "hits": [ 
      { 
       "_id": "0", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 1, 
        "line_number": "", 
        "play_name": "Henry IV", 
        "speaker": "", 
        "speech_number": "", 
        "text_entry": "ACT I" 
       }, 
       "_type": "act" 
      }, 
      { 
       "_id": "14", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 15, 
        "line_number": "1.1.12", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Did lately meet in the intestine shock" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "19", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 20, 
        "line_number": "1.1.17", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "The edge of war, like an ill-sheathed knife," 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "22", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 23, 
        "line_number": "1.1.20", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Whose soldier now, under whose blessed cross" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "24", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 25, 
        "line_number": "1.1.22", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Forthwith a power of English shall we levy;" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "25", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 26, 
        "line_number": "1.1.23", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Whose arms were moulded in their mothers womb" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "26", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 27, 
        "line_number": "1.1.24", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "To chase these pagans in those holy fields" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "29", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 30, 
        "line_number": "1.1.27", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "For our advantage on the bitter cross." 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "40", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 41, 
        "line_number": "1.1.38", 
        "play_name": "Henry IV", 
        "speaker": "WESTMORELAND", 
        "speech_number": 2, 
        "text_entry": "Whose worst was, that the noble Mortimer," 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "41", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 42, 
        "line_number": "1.1.39", 
        "play_name": "Henry IV", 
        "speaker": "WESTMORELAND", 
        "speech_number": 2, 
        "text_entry": "Leading the men of Herefordshire to fight" 
       }, 
       "_type": "line" 
      } 
     ], 
     "max_score": 1.0, 
     "total": 111396 
    }, 
    "timed_out": false, 
    "took": 28 
} 

文檔在聚合桶中的計數看起來非常低。我希望看到的是與文檔數以下的揚聲器(通過明確評估揚聲器計算下面我算整個數據):

GLOUCESTER 1920 
HAMLET 1582 
IAGO 1161 
FALSTAFF 1117 
KING HENRY V 1086 
BRUTUS 1051 
OTHELLO 928 
MARK ANTONY 927 
KING HENRY VI 917 
DUKE VINCENTIO 909 

我已經花了幾個小時在網上搜索這個問題,但我的理由無法理解。我究竟做錯了什麼?

回答

0

根本原因是映射中的錯誤和數據被搜索的方式。映射僅針對doc_type設置:'act',當爲doc_type:'line'設置時,搜索不應該覆蓋所有內容,而只是doc_type:'line'。

詳細的解答:

按照該網頁上的例子:https://www.elastic.co/guide/en/elasticsearch/guide/current/aggregations-and-analysis.html我意識到這個bug是在映射。

前:

  • 我沒有意識到原始數據集有多個doc_types。
  • 在映射,只有DOC_TYPE:「ACT」有場:「揚聲器」與not_analyzed版本
  • 我沒有設置任何DOC_TYPE
  • 我期待的結果將從DOC_TYPE桶音箱正在尋找:當實際上那些doc_type根本沒有任何'speaker.raw'屬性。
  • 鑑於此,問題中的計數也是錯誤的。

後:

  • 新映射添加一個多字段用於字段: '揚聲器' 對於每個doc_types的:動作/場景/線。這是speaker.raw,並沒有分析。
  • 新的搜索,正確搜索行的發言者,這是最初的意圖。
  • 彈性搜索的結果現在與我手動從此數據集中獲得的計數匹配。對於頂部10的揚聲器在DOC_TYPE當前計數:線如下:

    GLOUCESTER 1907 HAMLET 1572 IAGO 1153 FALSTAFF 1109 亨利五世1076 BRUTUS 1043 OTHELLO 928 標記安東尼915 亨利VI 909 DUKE VINCENTIO 901

這裏是正確的映射:

{ 
    "shakespeare" : { 
    "mappings" : { 
     "line" : { 
     "properties" : { 
      "line_id" : { 
      "type" : "integer" 
      }, 
      "line_number" : { 
      "type" : "string" 
      }, 
      "play_name" : { 
      "type" : "string", 
      "fields" : { 
       "raw" : { 
       "type" : "string", 
       "index" : "not_analyzed" 
       } 
      } 
      }, 
      "speaker" : { 
      "type" : "string", 
      "fields" : { 
       "raw" : { 
       "type" : "string", 
       "index" : "not_analyzed" 
       } 
      } 
      }, 
      "speech_number" : { 
      "type" : "integer" 
      }, 
      "text_entry" : { 
      "type" : "string" 
      } 
     } 
     }, 
     "act" : { 
     "properties" : { 
      "line_id" : { 
      "type" : "integer" 
      }, 
      "line_number" : { 
      "type" : "string" 
      }, 
      "play_name" : { 
      "type" : "string", 
      "fields" : { 
       "raw" : { 
       "type" : "string", 
       "index" : "not_analyzed" 
       } 
      } 
      }, 
      "speaker" : { 
      "type" : "string", 
      "fields" : { 
       "raw" : { 
       "type" : "string", 
       "index" : "not_analyzed" 
       } 
      } 
      }, 
      "speech_number" : { 
      "type" : "integer" 
      }, 
      "text_entry" : { 
      "type" : "string" 
      } 
     } 
     }, 
     "scene" : { 
     "properties" : { 
      "line_id" : { 
      "type" : "integer" 
      }, 
      "line_number" : { 
      "type" : "string" 
      }, 
      "play_name" : { 
      "type" : "string", 
      "fields" : { 
       "raw" : { 
       "type" : "string", 
       "index" : "not_analyzed" 
       } 
      } 
      }, 
      "speaker" : { 
      "type" : "string", 
      "fields" : { 
       "raw" : { 
       "type" : "string", 
       "index" : "not_analyzed" 
       } 
      } 
      }, 
      "speech_number" : { 
      "type" : "integer" 
      }, 
      "text_entry" : { 
      "type" : "string" 
      } 
     } 
     } 
    } 
    } 
} 

有了新的映射的結果看的權利:

curl -XPOST "https://my_es_id/shakespeare/line/_search" -d' 
{ 
    "aggs" : { 
     "speakers" : { 
      "terms" : { "field" : "speaker.raw"} 
     } 
    } 
}' 
{ 
    "_shards": { 
     "failed": 0, 
     "successful": 5, 
     "total": 5 
    }, 
    "aggregations": { 
     "speakers": { 
      "buckets": [ 
       { 
        "doc_count": 1907, 
        "key": "GLOUCESTER" 
       }, 
       { 
        "doc_count": 1572, 
        "key": "HAMLET" 
       }, 
       { 
        "doc_count": 1153, 
        "key": "IAGO" 
       }, 
       { 
        "doc_count": 1109, 
        "key": "FALSTAFF" 
       }, 
       { 
        "doc_count": 1076, 
        "key": "KING HENRY V" 
       }, 
       { 
        "doc_count": 1043, 
        "key": "BRUTUS" 
       }, 
       { 
        "doc_count": 928, 
        "key": "OTHELLO" 
       }, 
       { 
        "doc_count": 915, 
        "key": "MARK ANTONY" 
       }, 
       { 
        "doc_count": 909, 
        "key": "KING HENRY VI" 
       }, 
       { 
        "doc_count": 901, 
        "key": "DUKE VINCENTIO" 
       } 
      ], 
      "doc_count_error_upper_bound": 461, 
      "sum_other_doc_count": 94715 
     } 
    }, 
    "hits": { 
     "hits": [ 
      { 
       "_id": "14", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 15, 
        "line_number": "1.1.12", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Did lately meet in the intestine shock" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "19", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 20, 
        "line_number": "1.1.17", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "The edge of war, like an ill-sheathed knife," 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "22", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 23, 
        "line_number": "1.1.20", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Whose soldier now, under whose blessed cross" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "24", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 25, 
        "line_number": "1.1.22", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Forthwith a power of English shall we levy;" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "25", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 26, 
        "line_number": "1.1.23", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Whose arms were moulded in their mothers womb" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "26", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 27, 
        "line_number": "1.1.24", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "To chase these pagans in those holy fields" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "29", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 30, 
        "line_number": "1.1.27", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "For our advantage on the bitter cross." 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "40", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 41, 
        "line_number": "1.1.38", 
        "play_name": "Henry IV", 
        "speaker": "WESTMORELAND", 
        "speech_number": 2, 
        "text_entry": "Whose worst was, that the noble Mortimer," 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "41", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 42, 
        "line_number": "1.1.39", 
        "play_name": "Henry IV", 
        "speaker": "WESTMORELAND", 
        "speech_number": 2, 
        "text_entry": "Leading the men of Herefordshire to fight" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "44", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 45, 
        "line_number": "1.1.42", 
        "play_name": "Henry IV", 
        "speaker": "WESTMORELAND", 
        "speech_number": 2, 
        "text_entry": "A thousand of his people butchered;" 
       }, 
       "_type": "line" 
      } 
     ], 
     "max_score": 1.0, 
     "total": 106228 
    }, 
    "timed_out": false, 
    "took": 48 
} 
+1

你介意加入一個答案,告訴您如何解決呢?這將有助於其他誰在未來找到你的問題:) –