試試下面
可以打磨的細微差別,但總體應該給你良好的開端!
SELECT *
FROM JS(
(// input table
SELECT store_nbr, sls_dt, NEST(CONCAT(STRING(item_sku), '|', STRING(sls_amt), '|', STRING(discounts))) AS items
FROM (
SELECT store_nbr, sls_dt, item_sku, sls_amt, GROUP_CONCAT(CONCAT(STRING(disc_nbr), ',', STRING(disc_amt)), ';') AS discounts
FROM
(SELECT 1234 AS store_nbr, "2014-01-01 00:00:00" AS sls_dt, 3456 AS item_sku, 9.99 AS sls_amt, 1 AS disc_nbr, 0.99 AS disc_amt),
(SELECT 1234 AS store_nbr, "2014-01-01 00:00:00" AS sls_dt, 3456 AS item_sku, 9.99 AS sls_amt, 2 AS disc_nbr, 1.00 AS disc_amt),
(SELECT 1234 AS store_nbr, "2014-01-01 00:00:00" AS sls_dt, 2345 AS item_sku, 7.99 AS sls_amt, 1 AS disc_nbr, 0.59 AS disc_amt),
(SELECT 1234 AS store_nbr, "2014-01-01 00:00:00" AS sls_dt, 4567 AS item_sku, 7.99 AS sls_amt, 1 AS disc_nbr, 0.59 AS disc_amt),
(SELECT 1234 AS store_nbr, "2014-01-01 00:00:00" AS sls_dt, 4567 AS item_sku, 7.99 AS sls_amt, 2 AS disc_nbr, 0.69 AS disc_amt),
(SELECT 1234 AS store_nbr, "2014-01-01 00:00:00" AS sls_dt, 4567 AS item_sku, 7.99 AS sls_amt, 3 AS disc_nbr, 0.79 AS disc_amt),
(SELECT 2345 AS store_nbr, "2014-01-02 00:00:00" AS sls_dt, 3456 AS item_sku, 9.99 AS sls_amt, 1 AS disc_nbr, 0.99 AS disc_amt),
(SELECT 2345 AS store_nbr, "2014-01-02 00:00:00" AS sls_dt, 3456 AS item_sku, 9.99 AS sls_amt, 2 AS disc_nbr, 1.00 AS disc_amt),
(SELECT 2345 AS store_nbr, "2014-01-02 00:00:00" AS sls_dt, 4567 AS item_sku, 7.99 AS sls_amt, 1 AS disc_nbr, 0.59 AS disc_amt),
GROUP BY store_nbr, sls_dt, item_sku, sls_amt
) GROUP BY store_nbr, sls_dt
),
store_nbr, sls_dt, items, // input columns
"[ // output schema
{'name': 'store_nbr', 'type': 'INTEGER'},
{'name': 'sls_dt', 'type': 'STRING'},
{'name': 'items', 'type': 'RECORD',
'mode': 'REPEATED',
'fields': [
{'name': 'sku', 'type': 'STRING'},
{'name': 'sls_amt', 'type': 'FLOAT'},
{'name': 'discounts', 'type': 'RECORD',
'mode': 'REPEATED',
'fields': [
{'name': 'disc_nbr', 'type': 'INTEGER'},
{'name': 'disc_amt', 'type': 'FLOAT'}
]
}]
}]",
"function(row, emit) { // function
var items = [];
for (var i = 0; i < row.items.length; i++) {
x = row.items[i].split('|');
var discounts = [];
y = x[2].split(';');
for (var j = 0; j < y.length; j++) {
discount = y[j].split(',');
discounts.push({disc_nbr:parseInt(discount[0]), disc_amt:parseFloat(discount[1])})
}
items.push({sku:x[0], sls_amt:parseFloat(x[1]), discounts: discounts});
};
emit({
store_nbr: row.store_nbr,
sls_dt: row.sls_dt,
items: items
});
}"
)
結果如下
與預期的模式
感謝米哈伊爾。這就像魅力一樣。 Didn不知道這可以在BigQuery中完成。 – venky
可以請你看看這個http://stackoverflow.com/questions/43492894/structure-of-table-in-bigquery –
@Williams - 肯定 - 考慮這個答案投票 - 同時我會盡快檢查您的問題 –