2016-08-16 52 views
1

目前我們正在使用AWS RDS(Mysql)+ Pandas。我們有訂單,客戶,產品表格等等。爲了獲得客戶和基於各種過濾器(總共18個過濾器)的廣告系列,這些客戶花費了太多時間。 「訂單」的表本身具有數百萬行的數量級。所以加快我們開始用elasticsearch做poc,因爲我們的過濾器包含了太多的文本搜索,例如「產品名稱」,「供應商名稱」等。加速切片客戶的切片mysql

我們所面臨的問題是 1)過濾的AOV桶(平均訂單價值),與relavent文件細節也 2)過濾的訂購數 3)過濾上first_order_date和last_order_date

我們的文件結構是

{ 
    "order_id":"6", 
    "customer_id":"1", 
    "customer_name":"shailendra", 
    "mailing_addres":"[email protected]", 
    "actual_order_date":"2000-04-30", 
    "is_veg":"0", 
    "total_amount":"2499", 
    "store_id":"276", 
    "city_id":"12", 
    "payment_mode":"cod", 
    "is_elite":"0", 
    "product":["1","2"], 
    "coupon_id":"", 
    "client_source":"1", 
    "vendor_id":"", 
    "vendor_name: "", 
    "brand_id":"", 
    "third_party_source":"" 

} 

這是查詢

{ 
    "aggs": { 
     "customer_ids":{ 
      "terms":{ 
       "field":"customer_id" 
      } 
     } 
    } 
} 

它返回結果爲

{ 
    "took": 13, 
    "timed_out": false, 
    "_shards": { 
    "total": 5, 
    "successful": 5, 
    "failed": 0 
    }, 
    "hits": { 
    "total": 8, 
    "max_score": 1, 
    "hits": [ 
     { 
     "_index": "customers4", 
     "_type": "details", 
     "_id": "5", 
     "_score": 1, 
     "_source": { 
      "order_id": "5", 
      "customer_id": "5", 
      "customer_name": "ashish", 
      "mailing_addres": "[email protected]", 
      "actual_order_date": "2016-05-30", 
      "is_veg": "1", 
      "total_amount": "300", 
      "store_id": "2", 
      "city_id": "", 
      "payment_mode": "cod", 
      "is_elite": "0", 
      "product": [ 
      "1", 
      "2" 
      ], 
      "coupon_id": "", 
      "client_source": "1", 
      "vendor_id": "", 
      "brand_id": "", 
      "third_party_source": "" 
     } 
     }, 
     { 
     "_index": "customers4", 
     "_type": "details", 
     "_id": "8", 
     "_score": 1, 
     "_source": { 
      "order_id": "8", 
      "customer_id": "2", 
      "customer_name": "nikhil", 
      "mailing_addres": "[email protected]", 
      "actual_order_date": "2016-05-30", 
      "is_veg": "0", 
      "total_amount": "249", 
      "store_id": "2", 
      "city_id": "", 
      "payment_mode": "cod", 
      "is_elite": "0", 
      "product": [ 
      "1", 
      "2" 
      ], 
      "coupon_id": "", 
      "client_source": "1", 
      "vendor_id": "", 
      "brand_id": "", 
      "third_party_source": "" 
     } 
     }, 
     { 
     "_index": "customers4", 
     "_type": "details", 
     "_id": "2", 
     "_score": 1, 
     "_source": { 
      "order_id": "2", 
      "customer_id": "2", 
      "customer_name": "nikhil", 
      "mailing_addres": "[email protected]", 
      "actual_order_date": "2016-01-30", 
      "is_veg": "1", 
      "total_amount": "255", 
      "store_id": "1", 
      "city_id": "", 
      "payment_mode": "cod", 
      "is_elite": "0", 
      "product": [ 
      "1", 
      "2", 
      "3" 
      ], 
      "coupon_id": "", 
      "client_source": "1", 
      "vendor_id": "", 
      "brand_id": "", 
      "third_party_source": "" 
     } 
     }, 
     { 
     "_index": "customers4", 
     "_type": "details", 
     "_id": "4", 
     "_score": 1, 
     "_source": { 
      "order_id": "4", 
      "customer_id": "4", 
      "customer_name": "vivek", 
      "mailing_addres": "[email protected]", 
      "actual_order_date": "2016-04-30", 
      "is_veg": "0", 
      "total_amount": "249", 
      "store_id": "2", 
      "city_id": "", 
      "payment_mode": "cod", 
      "is_elite": "0", 
      "product": [ 
      "1", 
      "2" 
      ], 
      "coupon_id": "", 
      "client_source": "1", 
      "vendor_id": "", 
      "brand_id": "", 
      "third_party_source": "" 
     } 
     }, 
     { 
     "_index": "customers4", 
     "_type": "details", 
     "_id": "6", 
     "_score": 1, 
     "_source": { 
      "order_id": "7", 
      "customer_id": "1", 
      "customer_name": "shailendra", 
      "mailing_addres": "[email protected]", 
      "actual_order_date": "2016-05-30", 
      "is_veg": "0", 
      "total_amount": "249", 
      "store_id": "2", 
      "city_id": "", 
      "payment_mode": "cod", 
      "is_elite": "0", 
      "product": [ 
      "1", 
      "2" 
      ], 
      "coupon_id": "", 
      "client_source": "1", 
      "vendor_id": "", 
      "brand_id": "", 
      "third_party_source": "" 
     } 
     }, 
     { 
     "_index": "customers4", 
     "_type": "details", 
     "_id": "1", 
     "_score": 1, 
     "_source": { 
      "order_id": "1", 
      "customer_id": "1", 
      "customer_name": "shailendra", 
      "mailing_addres": "[email protected]", 
      "actual_order_date": "2016-01-30", 
      "is_veg": "1", 
      "total_amount": "251", 
      "store_id": "1", 
      "city_id": "", 
      "payment_mode": "cod", 
      "is_elite": "0", 
      "product": [ 
      "1", 
      "2", 
      "3" 
      ], 
      "coupon_id": "", 
      "client_source": "1", 
      "vendor_id": "", 
      "brand_id": "", 
      "third_party_source": "" 
     } 
     }, 
     { 
     "_index": "customers4", 
     "_type": "details", 
     "_id": "7", 
     "_score": 1, 
     "_source": { 
      "order_id": "6", 
      "customer_id": "4", 
      "customer_name": "vivek", 
      "mailing_addres": "[email protected]", 
      "actual_order_date": "2016-05-30", 
      "is_veg": "0", 
      "total_amount": "249", 
      "store_id": "2", 
      "city_id": "", 
      "payment_mode": "cod", 
      "is_elite": "0", 
      "product": [ 
      "1", 
      "2" 
      ], 
      "coupon_id": "", 
      "client_source": "1", 
      "vendor_id": "", 
      "brand_id": "", 
      "third_party_source": "" 
     } 
     }, 
     { 
     "_index": "customers4", 
     "_type": "details", 
     "_id": "3", 
     "_score": 1, 
     "_source": { 
      "order_id": "3", 
      "customer_id": "3", 
      "customer_name": "manish", 
      "mailing_addres": "[email protected]", 
      "actual_order_date": "2016-03-30", 
      "is_veg": "0", 
      "total_amount": "249", 
      "store_id": "2", 
      "city_id": "", 
      "payment_mode": "cod", 
      "is_elite": "0", 
      "product": [ 
      "1", 
      "2" 
      ], 
      "coupon_id": "", 
      "client_source": "1", 
      "vendor_id": "", 
      "brand_id": "", 
      "third_party_source": "" 
     } 
     } 
    ] 
    }, 
    "aggregations": { 
    "customer_ids": { 
     "doc_count_error_upper_bound": 0, 
     "sum_other_doc_count": 0, 
     "buckets": [ 
     { 
      "key": "1", 
      "doc_count": 2 
     }, 
     { 
      "key": "2", 
      "doc_count": 2 
     }, 
     { 
      "key": "4", 
      "doc_count": 2 
     }, 
     { 
      "key": "3", 
      "doc_count": 1 
     }, 
     { 
      "key": "5", 
      "doc_count": 1 
     } 
     ] 
    } 
    } 
} 

在這裏,因爲你可以看到只有doc計數正在返回。我們希望文檔的所有字段以及文檔數

+1

您可以顯示迄今爲止嘗試過哪些查詢以及它們如何達到您想要的效果嗎? – Val

+0

這是基本的{ 「AGGS」:{ 「CUSTOMER_IDS」:{ 「術語」:{ 「場」: 「CUSTOMER_ID」 }} } } – user6720627

回答

0

您可以使用top_hits aggregation來檢索每個客戶存儲桶的文檔。

{ 
    "aggs": { 
     "customer_ids":{ 
      "terms":{ 
       "field":"customer_id" 
      }, 
      "aggs": { 
       "docs": { 
       "top_hits": { 
        "size": 3 
       } 
       } 
      } 
     } 
    } 
} 
+0

感謝。只是想知道在應用分組之後,我們是否可以根據兩個日期之間該客戶的max(actual_order_date)過濾客戶? – user6720627

+0

您只需要兩個日期之間該客戶的最近訂單日期? – Val

+0

是的,兩個日期之間的客戶的最後訂單日期和兩個日期之間的客戶的第一個訂單日期 – user6720627