2017-01-20 54 views
1

中位數查詢返回類似於以下內容的結果:MySQL中位數查詢,用於檢索每個分組標識的中位數

Vendor_id |中位invoice_total
97 ............. | 418

我想中間查詢的結果類似於下面的AVG函數查詢:

SELECT vendor_id, avg(invoice_total) 
FROM invoices 
GROUP BY vendor_id; 

中位數查詢:

SELECT t3.vendor_id, AVG(middle_values) AS 'median' 
FROM (
    SELECT t1.invoice_total AS 'middle_values', t1.vendor_id 
    FROM 
    (
     SELECT @row:[email protected]+1 as `row`, iv.invoice_total, iv.vendor_id 
     FROM invoices AS iv, (SELECT @row:=0) AS r 
     WHERE iv.vendor_id = 97 
     ORDER BY iv.invoice_total 
    ) AS t1, 
    (
     SELECT COUNT(*) as 'count' 
     FROM invoices iv 
     WHERE iv.vendor_id = 97 
    ) AS t2 
-- the following condition will return 1 record for odd number sets, or 2 records for even number sets. 
WHERE t1.row >= t2.count/2 and t1.row <= ((t2.count/2) +1)) AS t3 

我認爲主要的關鍵部分是第三(和嵌套)選擇語句。

SELECT @row:[email protected]+1 as `row` 
FROM (SELECT @row:[email protected]+1 as `row`, vendor_id, invoice_total 
     FROM invoices 
     ORDER BY vendor_id, invoice_total) t, invoices inv 
WHERE inv.vendor_id = t.vendor_id; 

如果@row計數器可以在每次查詢轉換到不同的vendor_id時被設置。這將是一個巨大的步驟。

表:

CREATE TABLE IF NOT EXISTS `invoices` (
    `invoice_id` int(11) NOT NULL AUTO_INCREMENT, 
    `vendor_id` int(11) NOT NULL, 
    `invoice_number` varchar(50) NOT NULL, 
    `invoice_date` date NOT NULL, 
    `invoice_total` decimal(9,2) NOT NULL, 
    `payment_total` decimal(9,2) NOT NULL DEFAULT '0.00', 
    `credit_total` decimal(9,2) NOT NULL DEFAULT '0.00', 
    `terms_id` int(11) NOT NULL, 
    `invoice_due_date` date NOT NULL, 
    `payment_date` date DEFAULT NULL, 
    PRIMARY KEY (`invoice_id`), 
    KEY `invoices_fk_vendors` (`vendor_id`), 
    KEY `invoices_fk_terms` (`terms_id`), 
    KEY `invoices_invoice_date_ix` (`invoice_date`), 
    CONSTRAINT `invoices_fk_terms` FOREIGN KEY (`terms_id`) REFERENCES `terms` (`terms_id`), 
    CONSTRAINT `invoices_fk_vendors` FOREIGN KEY (`vendor_id`) REFERENCES `vendors` (`vendor_id`) 
) ENGINE=InnoDB AUTO_INCREMENT=119 DEFAULT CHARSET=latin1; 

插入:

INSERT INTO `invoices` VALUES (118, 97, '456792', '2011-08-03', 565.60, 0.00, 0.00, 2, '2011-09-02', NULL); 
INSERT INTO `invoices` VALUES (117, 97, '456791', '2011-08-03', 4390.00, 0.00, 0.00, 2, '2011-09-02', NULL); 
INSERT INTO `invoices` VALUES (116, 97, '456701', '2011-08-02', 270.50, 0.00, 0.00, 2, '2011-09-01', NULL); 
INSERT INTO `invoices` VALUES (115, 97, '456789', '2011-08-01', 8344.50, 0.00, 0.00, 2, '2011-08-31', NULL); 
INSERT INTO `invoices` VALUES (114, 123, '963253249', '2011-08-02', 127.75, 127.75, 0.00, 3, '2011-09-01', '2011-09-04'); 
INSERT INTO `invoices` VALUES (113, 37, '547480102', '2011-08-01', 224.00, 0.00, 0.00, 3, '2011-08-31', NULL); 
INSERT INTO `invoices` VALUES (112, 110, '0-2436', '2011-07-31', 10976.06, 0.00, 0.00, 3, '2011-08-30', NULL); 
INSERT INTO `invoices` VALUES (111, 123, '263253257', '2011-07-30', 22.57, 22.57, 0.00, 3, '2011-08-29', '2011-09-03'); 

回答

1

試試這個分配vendor_ids

SELECT 
    t.*, 
    @rn:=IF(vendor_id = @prev_vid, 
     @rn + 1, 
     IF(@prev_vid:=vendor_id, 1, 1)) rn 
FROM 
    (SELECT 
     * 
    FROM 
     invoices 
    ORDER BY vendor_id , invoice_total) t 
     CROSS JOIN 
    (SELECT @rn:=0, @prev_vid:=- 1) t2; 

,使您這最後的查詢中的行號:

SELECT 
    t1.vendor_id, AVG(t1.invoice_total) median 
FROM 
    (SELECT 
     t.*, 
      @rn:=IF(vendor_id = @prev_vid, @rn + 1, IF(@prev_vid:=vendor_id, 1, 1)) row 
    FROM 
     (SELECT 
     * 
    FROM 
     invoices 
    ORDER BY vendor_id , invoice_total) t 
    CROSS JOIN (SELECT @rn:=0, @prev_vid:=- 1) t2) AS t1 
     INNER JOIN 
    (SELECT 
     vendor_id, COUNT(*) cnt 
    FROM 
     invoices 
    GROUP BY vendor_id) AS t2 ON t1.vendor_id = t2.vendor_id 
     AND t1.row >= t2.cnt/2 
     AND t1.row <= ((t2.cnt/2) + 1) 
GROUP BY t1.vendor_id;