2016-05-10 175 views
0

我有一張近700萬行的表。這裏的表結構Mysql查詢優化

`CREATE TABLE `ERS_SALES_TRANSACTIONS` (
    `saleId` int(12) NOT NULL AUTO_INCREMENT, 
    `ERS_COMPANY_CODE` int(3) DEFAULT NULL, 
    `SALE_SECTION` varchar(128) DEFAULT NULL, 
    `SALE_DATE` date DEFAULT NULL, 
    `SALE_STOCKAGE_EXACT` int(4) DEFAULT NULL, 
    `SALE_NET_AMOUNT` decimal(11,2) DEFAULT NULL, 
    `SALE_ABSOLUTE_CDATE` date DEFAULT NULL, 
    PRIMARY KEY (`saleId`), 
    KEY `index_location` (`ERS_COMPANY_CODE`), 
    KEY `idx-erscode-salesec` (`SALE_SECTION`,`ERS_COMPANY_CODE`) USING BTREE, 
    KEY `idx-saledate-section` (`SALE_DATE`,`SALE_SECTION`) USING BTREE 
    KEY `idx_quick_sales_transactions` (`ERS_COMPANY_CODE`,`SALE_SECTION`,`SALE_DATE`,`SALE_STOCKAGE_EXACT`,`SALE_NET_AMOUNT`) 
) ENGINE=InnoDB; 

這個查詢是花了超過7秒執行,有沒有什麼辦法來加速呢?

SELECT 
    A.SALE_SECTION, 
    SUM(IF(A.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30' 
      AND A.SALE_STOCKAGE_EXACT BETWEEN 0 AND 90, A.SALE_NET_AMOUNT, 0)) AS fs1_pd1_sale, 
    SUM(IF(A.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30' 
      AND A.SALE_STOCKAGE_EXACT BETWEEN 91 AND 180, A.SALE_NET_AMOUNT, 0)) AS fs2_pd1_sale, 
    SUM(IF(A.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30' 
      AND A.SALE_STOCKAGE_EXACT BETWEEN 181 AND 365, A.SALE_NET_AMOUNT, 0)) AS os1_pd1_sale, 
    SUM(IF(A.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30' 
      AND A.SALE_STOCKAGE_EXACT BETWEEN 366 AND 9999, A.SALE_NET_AMOUNT, 0)) AS os2_pd1_sale, 
    SUM(IF(A.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30', A.SALE_NET_AMOUNT, 0)) AS TOTAL_PD1_SALE, 
    SUM(IF(A.SALE_DATE BETWEEN '2016-04-01' AND '2016-04-30' 
      AND A.SALE_STOCKAGE_EXACT BETWEEN 0 AND 90, A.SALE_NET_AMOUNT, 0)) AS fs1_pd2_sale, 
    SUM(IF(A.SALE_DATE BETWEEN '2016-04-01' AND '2016-04-30' 
      AND A.SALE_STOCKAGE_EXACT BETWEEN 91 AND 180, A.SALE_NET_AMOUNT, 0)) AS fs2_pd2_sale, 
    SUM(IF(A.SALE_DATE BETWEEN '2016-04-01' AND '2016-04-30' 
      AND A.SALE_STOCKAGE_EXACT BETWEEN 181 AND 365, A.SALE_NET_AMOUNT, 0)) AS os1_pd2_sale, 
    SUM(IF(A.SALE_DATE BETWEEN '2016-04-01' AND '2016-04-30' 
      AND A.SALE_STOCKAGE_EXACT BETWEEN 366 AND 9999, A.SALE_NET_AMOUNT, 0)) AS os2_pd2_sale, 
    SUM(IF(A.SALE_DATE BETWEEN '2016-04-01' AND '2016-04-30', A.SALE_NET_AMOUNT, 0)) AS TOTAL_PD2_SALE, 
    SUM(IF(A.SALE_DATE BETWEEN '2016-05-01' AND '2016-05-31' 
      AND A.SALE_ABSOLUTE_CDATE BETWEEN '2016-03-01' AND '2016-05-31', A.SALE_NET_AMOUNT, 0)) AS fs1_achived_sale, 
    SUM(IF(A.SALE_DATE BETWEEN '2016-05-01' AND '2016-05-31' 
      AND A.SALE_ABSOLUTE_CDATE BETWEEN '2015-12-01' AND '2016-02-29', A.SALE_NET_AMOUNT, 0)) AS fs2_achived_sale, 
    SUM(IF(A.SALE_DATE BETWEEN '2016-05-01' AND '2016-05-31' 
      AND A.SALE_ABSOLUTE_CDATE BETWEEN '2015-06-01' AND '2015-11-30', A.SALE_NET_AMOUNT, 0)) AS os1_achived_sale, 
    SUM(IF(A.SALE_DATE BETWEEN '2016-05-01' AND '2016-05-31' 
      AND A.SALE_ABSOLUTE_CDATE BETWEEN '2006-12-26' AND '2015-05-31', A.SALE_NET_AMOUNT, 0)) AS os2_achived_sale, 
    SUM(IF(A.SALE_DATE BETWEEN '2016-05-01' AND '2016-05-31', A.SALE_NET_AMOUNT, 0)) AS Total_ACHIVED_SALE 
    FROM ERS_SALES_TRANSACTIONS A WHERE A.ERS_COMPANY_CODE = 48 GROUP BY A.SALE_SECTION 

這裏的解釋查詢

{ 
"data": 
[ 
    { 
     "id": 1, 
     "select_type": "SIMPLE", 
     "table": "A", 
     "type": "ref", 
     "possible_keys": "index_location,idx-erscode-salesec,idx-saledate-section", 
     "key": "index_location", 
     "key_len": "5", 
     "ref": "const", 
     "rows": 1411944, 
     "Extra": "Using where; Using temporary; Using filesort" 
    } 
] 
} 

加入複合索引之後,時間減少到4.03秒。下面是該計劃

{ 
"data": 
[ 
    { 
     "id": 1, 
     "select_type": "SIMPLE", 
     "table": "A", 
     "type": "ref", 
     "possible_keys": "index_location,idx-erscode-salesec,idx-saledate-section,idx_quick_sales_transactions", 
     "key_len": "5", 
     "key": "idx_quick_sales_transactions", 
     "ref": "const", 
     "rows": 1306058, 
     "Extra": "Using where" 
    } 
] 

}

+0

嘗試並擺脫所有'SUM(IF(...',嘗試(外部)自加入代替 – JimmyB

+0

鍵'index_location'是ERS_COMPANY_CODE的索引 – sam

回答

1

我不同意吉米B在這裏。在我看來,你的查詢看起來很完美。

根據公司48有多少條記錄,應該按順序讀取整個表(當它的數量是所有表記錄的50%時)或者應該使用ERS_COMPANY_CODE上的索引(當它不是那麼多時,比例只有所有記錄的1%)。

由於DBMS決定使用ERS_COMPANY_CODE上的索引,所以後者應該是這種情況。

您可以嘗試通過創建組合索引來進一步加快查詢速度。至少做到這一點(ERS_COMPANY_CODE , SALE_SECTION),以便讓GROUP BY更快。甚至可以添加所有字段,因此可以從索引收集所有數據,並且不必再訪問表本身。

CREATE INDEX idx_quick_sales_transactions ON ERS_SALES_TRANSACTIONS 
    (ERS_COMPANY_CODE, SALE_SECTION, SALE_DATE, SALE_STOCKAGE_EXACT, SALE_NET_AMOUNT); 
+0

如何確定組合索引的列順序?如果我需要爲其他查詢執行此操作 – sam

+0

我總是在'WHERE'子句中使用1.列順序中的列。 「GROUP BY」子句中的列,3. HAVING子句中的列4. SELECT語句中的列4.因此,檢索數據具有優先權,其次是彙總第二項,並最後顯示結果 –

+0

好吧,我會做一個注意它。謝謝你的幫助 – sam

0
SELECT 
    sales.SALE_SECTION, 
    SUM(fs1_pd1.SALE_NET_AMOUNT) AS fs1_pd1_sale, 
    SUM(fs2_pd1.SALE_NET_AMOUNT) AS fs2_pd1_sale, 
... 
FROM ERS_SALES_TRANSACTIONS sales 

LEFT OUTER JOIN ERS_SALES_TRANSACTIONS fs1_pd1 ON sales.ERS_COMPANY_CODE = fs1_pd1.ERS_COMPANY_CODE AND sales.SALE_SECTION = fs1_pd1.SALE_SECTION 
    AND fs1_pd1.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30' 
    AND fs1_pd1.SALE_STOCKAGE_EXACT BETWEEN 0 AND 90 

LEFT OUTER JOIN ERS_SALES_TRANSACTIONS fs2_pd1 ON sales.ERS_COMPANY_CODE = fs2_pd1.ERS_COMPANY_CODE AND sales.SALE_SECTION = fs2_pd1.SALE_SECTION 
    AND fs2_pd1.SALE_DATE BETWEEN '2016-01-16' AND '2016-04-30' 
    AND fs2_pd1.SALE_STOCKAGE_EXACT BETWEEN 91 AND 180 
... 
    WHERE sales.ERS_COMPANY_CODE = 48 
    GROUP BY sales.SALE_SECTION 

這種方式,優化器可以使用多個索引進行查詢。

但是,我建議,首先嚐試複合索引@Thorsten Kettner建議,因爲這樣可以達到相同的效果,但複雜程度要低得多。

+0

對不起,此查詢花費的時間比我的查詢時間在添加覆蓋索引之前和之後進行測試 – sam

2

我不知道是否有辦法加快速度。但是,你可以嘗試使用索引。我會推薦一個ERS_SALES_TRANSACTIONS(ERS_COMPANY_CODE, SALE_SECTION, SALE_DATE, SALE_NET_AMOUNT)

這是查詢的覆蓋索引,這意味着用於查詢的所有列位於索引中 - 因此數據庫引擎不需要訪問原始數據頁面。

但是,性能仍然取決於與特定公司代碼匹配的行數。特別是,用於聚合的文件排序的性能。

+0

好了,添加複合索引查詢時間後減少到4.03秒,但它仍然很多 – sam

+0

你有一個表,每個ERS_COMPANY_CODE'和'SALE_SECTION'有一行嗎? –

+0

company_code可以有多個sale_section – sam