2016-08-25 23 views
0

這裏我的索引結構字符串的一部分:集團通過從場,而不是滿場的Elasticsearch

[ 
    { 
     "Id":"1", 
     "Path":"/Series/Current/SerieA/foo/foo", 
     "PlayCount":100 
    }, 
    { 
     "Id":"2", 
     "Path":"/Series/Current/SerieA/bar/foo", 
     "PlayCount":1000 
    }, 
    { 
     "Id":"3", 
     "Path":"/Series/Current/SerieA/bar/bar", 
     "PlayCount":50 
    }, 
    { 
     "Id":"4", 
     "Path":"/Series/Current/SerieB/bla/bla", 
     "PlayCount":300 
    }, 
    { 
     "Id":"5", 
     "Path":"/Series/Current/SerieB/goo/boo", 
     "PlayCount":200 
    }, 
    { 
     "Id":"6", 
     "Path":"/Series/Current/SerieC/foo/zoo", 
     "PlayCount":100 
    } 
] 

我想執行一個給我「PlayCount」的總和爲每一個集合系列,如:

[ 
    { 
     "key":"serieA", 
     "TotalPlayCount":1150 
    }, 
    { 
     "key":"serieB", 
     "TotalPlayCount":500 
    }, 
    { 
     "key":"serieC", 
     "TotalPlayCount":100 
    } 
] 

這是我嘗試做,而是因爲這是不正確的方法顯然查詢失敗:

{ 
    "size": 0, 
    "query":{ 
     "filtered":{ 
      "query":{ 
       "regexp":{ 
        "Path":"/Series/Current/.*" 
       } 
      } 
     } 
    }, 
    "aggs":{ 
     "play_count_for_current_series":{ 
      "terms": { 
       "field": "Path", 
       "regexp": "/Series/Current/([^/]+)" 
      }, 
      "aggs":{ 
       "Total_play": { "sum": { "field": "PlayCount" } } 
      } 
     } 
    } 
} 

有沒有辦法做到這一點?

回答

0

我的建議如下:

DELETE test 
PUT /test 
{ 
    "settings": { 
    "analysis": { 
     "filter": { 
     "my_special_filter": { 
      "type": "pattern_capture", 
      "preserve_original": 0, 
      "patterns": [ 
      "/Series/Current/([^/]+)" 
      ] 
     } 
     }, 
     "analyzer": { 
     "my_special_analyzer": { 
      "tokenizer": "whitespace", 
      "filter": [ 
      "my_special_filter" 
      ] 
     } 
     } 
    } 
    }, 
    "mappings": { 
    "test": { 
     "properties": { 
     "Path": { 
      "type": "string", 
      "fields": { 
      "for_aggregations": { 
       "type": "string", 
       "analyzer": "my_special_analyzer" 
      }, 
      "raw": { 
       "type": "string", 
       "index": "not_analyzed" 
      } 
      } 
     } 
     } 
    } 
    } 
} 

創建一個使用pattern_capture過濾器只捕獲那些你有興趣方面具有特殊的分析儀。因爲我不想更改當前的該領域的映射,我添加了一個fields部分的子字段,它將使用此特殊分析器。我還添加了一個raw字段,它是not_analyzed這將有助於查詢本身。

POST test/test/_bulk 
{"index":{}} 
{"Id":"1","Path":"/Series/Current/SerieA/foo/foo","PlayCount":100} 
{"index":{}} 
{"Id":"2","Path":"/Series/Current/SerieA/bar/foo","PlayCount":1000} 
{"index":{}} 
{"Id":"3","Path":"/Series/Current/SerieA/bar/bar","PlayCount":50} 
{"index":{}} 
{"Id":"4","Path":"/Series/Current/SerieB/bla/bla","PlayCount":300} 
{"index":{}} 
{"Id":"5","Path":"/Series/Current/SerieB/goo/boo","PlayCount":200} 
{"index":{}} 
{"Id":"6","Path":"/Series/Current/SerieC/foo/zoo","PlayCount":100} 
{"index":{}} 
{"Id":"7","Path":"/Sersdasdies/Curradent/SerieC/foo/zoo","PlayCount":100} 

對於查詢,你不需要正則表達式的查詢,因爲你的聚集會使用子場只擁有你需要SerieX條款。

GET /test/test/_search 
{ 
    "size": 0, 
    "query": { 
    "filtered": { 
     "query": { 
     "regexp": { 
      "Path.raw": "/Series/Current/.*" 
     } 
     } 
    } 
    }, 
    "aggs": { 
    "play_count_for_current_series": { 
     "terms": { 
     "field": "Path.for_aggregations" 
     }, 
     "aggs": { 
     "Total_play": { 
      "sum": { 
      "field": "PlayCount" 
      } 
     } 
     } 
    } 
    } 
} 

,其結果是

"play_count_for_current_series": { 
    "doc_count_error_upper_bound": 0, 
    "sum_other_doc_count": 0, 
    "buckets": [ 
     { 
      "key": "SerieA", 
      "doc_count": 3, 
      "Total_play": { 
       "value": 1150 
      } 
     }, 
     { 
      "key": "SerieB", 
      "doc_count": 2, 
      "Total_play": { 
       "value": 500 
      } 
     }, 
     { 
      "key": "SerieC", 
      "doc_count": 1, 
      "Total_play": { 
       "value": 100 
      } 
     } 
    ] 
    }