2017-01-19 126 views
5

全部,資源在查詢執行期間超出。 BigQuery

我一直在試圖通過BigQuery獲取我們的一些較大的分析代碼,但我仍然遇到很多狀態和存在的數據量的問題。我們正在談論多年的數據。這可能是我的查詢不符合規則,但是試圖根據特定的分組獲得總和是我需要的。

在查詢中需要更改哪些內容以防止資源超出錯誤?

SELECT 
    COMPANY_NAME, 
    RATING_CLASS, 
    COMPANY_KEY, 
    -- State Info & Calculations 
    -- Over is used as a WINDOW function to SUM ALL results within the given query 
    SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER() AS STATE_MED_SUPP_TOOL_NUM_QUOTE, 
    -- ZIP3 Info & Calculations 
    ZIP3, 
    ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST, 
    ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_AVG_RANK AS ZIP3_AVG_RANK, 
    ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5, 
    ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10, 
    ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF, 
    ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5, 
    ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10, 
    ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 Info & Calculations 
    ZIP5, 
    ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST, 
    ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_AVG_RANK AS ZIP5_AVG_RANK, 
    ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5, 
    ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10, 
    ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF, 
    ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5, 
    ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10, 
    ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
FROM (
    SELECT 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    -- ZIP3 
    ZIP3, 
    COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP3) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    COUNT(*) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE 
     WHEN lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RANK, 
    SUM(CASE 
     WHEN top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP5_COUNT, 
    SUM(CASE 
     WHEN top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_CENT_DIFF, 
    SUM(CASE 
     WHEN DISCOUNTED_lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RANK, 
    SUM(CASE 
     WHEN DISCOUNTED_top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 
    ZIP5, 
    COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP5) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    COUNT(*) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE 
     WHEN lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RANK, 
    SUM(CASE 
     WHEN top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP5_COUNT, 
    SUM(CASE 
     WHEN top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_CENT_DIFF, 
    SUM(CASE 
     WHEN DISCOUNTED_lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RANK, 
    SUM(CASE 
     WHEN DISCOUNTED_top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ZIP3, 
    ZIP5, 
    LOWEST, 
    RATIO_TO_MIN, 
    RATE_ORDER, 
    TOP5, 
    TOP10, 
    CENT_DIFF, 
    DISCOUNTED_LOWEST, 
    DISCOUNTED_RATIO_TO_MIN, 
    DISCOUNTED_RATE_ORDER, 
    DISCOUNTED_TOP5, 
    DISCOUNTED_TOP10, 
    DISCOUNTED_CENT_DIFF, 
    LOGGING_KEY) 
GROUP BY 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    -- ZIP3 General 
    ZIP3, 
    ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_LOWEST, 
    ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_AVG_RANK, 
    ZIP3_TOP5, 
    ZIP3_TOP10, 
    ZIP3_AVG_CENT_DIFF, 
    ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_DISCOUNTED_TOP5, 
    ZIP3_DISCOUNTED_TOP10, 
    ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 General 
    ZIP5, 
    ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_LOWEST, 
    ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_AVG_RANK, 
    ZIP5_TOP5, 
    ZIP5_TOP10, 
    ZIP5_AVG_CENT_DIFF, 
    ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_DISCOUNTED_TOP5, 
    ZIP5_DISCOUNTED_TOP10, 
    ZIP5_DISCOUNTED_AVG_CENT_DIFF 

更新查詢與建議的修復:

SELECT 
    main.COMPANY_NAME AS COMPANY_NAME, 
    main.COMPANY_KEY AS COMPANY_KEY, 
    main.RATING_CLASS AS RATING_CLASS, 
    state_count.STATE_MED_SUPP_TOOL_NUM_QUOTE AS STATE_MED_SUPP_TOOL_NUM_QUOTE, 
    -- ZIP3 
    main.ZIP3 AS ZIP3, 
    ZIP3_COUNT.ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_SUB.ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_SUB.ZIP3_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5, 
    ZIP3_SUB.ZIP3_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST, 
    ZIP3_SUB.ZIP3_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10, 
    ZIP3_SUB.ZIP3_AVG_RANK AS ZIP3_AVG_RANK, 
    ZIP3_SUB.ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF, 
    ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_SUB.ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5, 
    ZIP3_SUB.ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_SUB.ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10, 
    ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_SUB.ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 
    main.ZIP5 AS ZIP5, 
    ZIP5_COUNT.ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_SUB.ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_SUB.ZIP5_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5, 
    ZIP5_SUB.ZIP5_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST, 
    ZIP5_SUB.ZIP5_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10, 
    ZIP5_SUB.ZIP5_AVG_RANK AS ZIP5_AVG_RANK, 
    ZIP5_SUB.ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF, 
    ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_SUB.ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5, 
    ZIP5_SUB.ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_SUB.ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10, 
    ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_SUB.ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
FROM (
    SELECT 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ZIP3, 
    ZIP5, 
    STATE, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824", 
     "4872666167115776", 
     "6396348765044736", 
     "6139303562313728", 
     "4988973881491456") 
    AND portal_key NOT IN ("5878607637381120") 
    GROUP BY 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ZIP3, 
    ZIP5, 
    STATE, 
    ) AS MAIN 
LEFT JOIN (
    SELECT 
    ZIP3, 
    COUNT(*) AS ZIP3_TOTAL_RESULT_APPEARANCE, 
    COMPANY_KEY, 
    RATING_CLASS, 
    AVG(discounted_ratio_to_min) AS ZIP3_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_TOP5_COUNT, 
    SUM(CASE 
     WHEN LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_LOWEST_COUNT, 
    SUM(CASE 
     WHEN TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP3_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP3_AVG_CENT_DIFF, 
    AVG(discounted_ratio_to_min) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP3_DISCOUNTED_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP3, 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ) AS ZIP3_SUB 
ON 
    main.ZIP3 = ZIP3_SUB.ZIP3 
    AND main.COMPANY_KEY = ZIP3_SUB.COMPANY_KEY 
    AND main.RATING_CLASS = ZIP3_SUB.RATING_CLASS 
LEFT JOIN (
    SELECT 
    ZIP3, 
    EXACT_COUNT_DISTINCT(logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP3) AS ZIP3_COUNT 
ON 
    main.ZIP3 = ZIP3_COUNT.ZIP3 
LEFT JOIN (
    SELECT 
    ZIP5, 
    COUNT(*) AS ZIP5_TOTAL_RESULT_APPEARANCE, 
    COMPANY_KEY, 
    RATING_CLASS, 
    AVG(discounted_ratio_to_min) AS ZIP5_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_TOP5_COUNT, 
    SUM(CASE 
     WHEN LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_LOWEST_COUNT, 
    SUM(CASE 
     WHEN TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP5_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP5_AVG_CENT_DIFF, 
    AVG(discounted_ratio_to_min) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP5_DISCOUNTED_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP5, 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ) AS ZIP5_SUB 
ON 
    main.ZIP5 = ZIP5_SUB.ZIP5 
    AND main.COMPANY_KEY = ZIP5_SUB.COMPANY_KEY 
    AND main.RATING_CLASS = ZIP5_SUB.RATING_CLASS 
LEFT JOIN (
    SELECT 
    ZIP5, 
    EXACT_COUNT_DISTINCT(logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP5) AS ZIP5_COUNT 
ON 
    main.ZIP5 = ZIP5_COUNT.ZIP5 
LEFT JOIN (
    SELECT 
    STATE, 
    EXACT_COUNT_DISTINCT(logging_key) AS STATE_MED_SUPP_TOOL_NUM_QUOTE 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    STATE) AS STATE_COUNT 
ON 
    main.STATE = STATE_COUNT.STATE 

說明: 由於這樣的事實BigQuery是一個共享的資源,大量查詢近似於資源的查詢將需要並且相應地分配資源的量。通過將查詢拆分成同一個表上的多個連接,計算可以由比最初預期更多的資源執行。關於爲什麼限制查詢效果更好的技術性說明,請參閱Jordan Tigani的堆棧溢出響應​​。

+0

只是想知道,這個查詢粗略處理了幾千兆字節? –

+0

我對你的查詢如此昂貴的原因有一些想法。其中一個原因可能是因爲它嚴重過度設計。但是,如果沒有清楚地瞭解你想要在這裏實現什麼(邏輯明智) - 很難做出最終結論,也很難在判斷中弄錯。我建議你給我們詳細的解釋,你試圖在這個查詢中得到什麼,所以我們將能夠幫助你無視我們現在的眼睛:o) –

回答

2

不知何故,我覺得下面是你需要的。我可能是錯的,因爲它實際上只是相對盲目嘗試反向工程的邏輯,所以不要大量判斷,如果我錯了,這裏
不能測試,但我覺得這將是廉價的實際成功

SELECT 
    main.COMPANY_NAME AS COMPANY_NAME, 
    main.COMPANY_KEY AS COMPANY_KEY, 
    main.RATING_CLASS AS RATING_CLASS, 
    SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER() AS STATE_MED_SUPP_TOOL_NUM_QUOTE, 
    -- ZIP3 
    main.ZIP3 AS ZIP3, 
    ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST, 
    ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_AVG_RANK AS ZIP3_AVG_RANK, 
    ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5, 
    ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10, 
    ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF, 
    ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5, 
    ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10, 
    ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 
    main.ZIP5 AS ZIP5, 
    ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST, 
    ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_AVG_RANK AS ZIP5_AVG_RANK, 
    ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5, 
    ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10, 
    ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF, 
    ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5, 
    ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10, 
    ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF 
FROM (
    SELECT COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5 
) AS main 
LEFT JOIN (
    SELECT 
    ZIP3, company_key, rating_class, 
    COUNT(*)             AS ZIP3_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END)   AS ZIP3_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP3_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP3_AVG_RANK, 
    SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END)    AS ZIP3_TOP5_COUNT, 
    SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END)    AS ZIP3_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP3_AVG_CENT_DIFF, 
    SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP3_DISCOUNTED_AVG_RANK, 
    SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
     AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
     AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP3, company_key, rating_class 
) AS zip3_sub 
ON main.ZIP3 = zip3_sub.ZIP3 AND main.company_key = zip3_sub.company_key AND main.rating_class = zip3_sub.rating_class 
LEFT JOIN ( 
    SELECT 
    ZIP5, company_key, rating_class, 
    COUNT(*)             AS ZIP5_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END)   AS ZIP5_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP5_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP5_AVG_RANK, 
    SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END)    AS ZIP5_TOP5_COUNT, 
    SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END)    AS ZIP5_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP5_AVG_CENT_DIFF, 
    SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP5_DISCOUNTED_AVG_RANK, 
    SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP5, company_key, rating_class 
) AS zip5_sub 
ON main.ZIP5 = zip5_sub.ZIP5 AND main.company_key = zip5_sub.company_key AND main.rating_class = zip5_sub.rating_class 
LEFT JOIN (
    SELECT ZIP3, COUNT(DISTINCT logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP3 
) AS zip3_count 
ON main.ZIP3 = zip3_count.ZIP3 
LEFT JOIN (
    SELECT ZIP5, COUNT(DISTINCT logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP5 
) AS zip5_count 
ON main.ZIP5 = zip5_count.ZIP5 

此外,請注意:在BigQuery Legacy SQL - COUNT(DISTINCT)函數是概率性的 - 提供了統計近似值,並且不保證是精確的。
可以使用EXACT_COUNT_DISTINCT()功能,而不是 - 這個人給你確切的數字,但有點在後端更加昂貴

當然,整個查詢可以改寫爲BigQuery Standard SQL當COUNT(DISTINCT)產生精確的計數和標準SQL是最好使用每個BigQuery團隊的建議

+0

WOW。你只是吹了我的腦海。我猜想BigQuery和他們如何分配資源,這是非常合理的。感謝您與我們分享這一點。我發佈了更新後的查詢,以說明您建議的更改。這是工作。平均運行時間從86秒降至30秒。 – SmittySmee

相關問題