2016-04-26 82 views
1
(SELECT 
    id, 
    SUM(hits/ab) AS HAB 
FROM batting 
GROUP BY id 
) b 

SELECT id, bmonth, bstate FROM master a 

WHERE bmonth >= 0 AND bstate is NOT NULL 
GROUP By bmonth,bstate 

到目前爲止,我有這個亂碼,但我迷失在如何形成連接,然後繼續。我不知道從哪裏開始儘可能地做事。我們應該加入還是使用子查詢?請協助下面的架構Hive加入或子查詢混淆

看:

CREATE EXTERNAL TABLE IF NOT EXISTS batting 
    (id STRING, year INT, team STRING, 
    league STRING, games INT, ab INT, runs INT, hits INT, doubles INT, triples INT, 
    homeruns INT, rbi INT, sb INT, cs INT, walks INT, strikeouts INT, ibb INT, 
    hbp INT, sh INT, sf INT, gidp INT) 
ROW FORMAT DELIMITED FIELDS 
TERMINATED BY ',' LOCATION '/home/hduser/hivetest/batting'; 

CREATE EXTERNAL TABLE IF NOT EXISTS master 
    (id STRING, byear INT, bmonth INT, bday INT, bcountry STRING, bstate STRING, 
    bcity STRING, dyear INT, dmonth INT, dday INT, dcountry STRING, dstate STRING, 
    dcity STRING, fname STRING, lname STRING, name STRING, weight INT, height INT, 
    bats STRING, throws STRING, debut STRING, finalgame STRING, retro STRING, 
    bbref STRING) 
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LOCATION '/home/hduser/hivetest/master'; 

回答

1

首先確保ATLEAST 3名球員來自同一國家和同一month.You將獲得一組來計算每個州/每月的ID並過濾結果,其中count(id)> = 3

select bstate,bmonth from master 
group by bstate,bmonth 
having count(id) >=3 

然後,您必須按照上述設置,按月份,狀態和順序按總和(點擊)/總和(蝙蝠)加入擊球表並獲得第一排。

select a.bmonth,a.bstate,SUM(c.hits)/SUM(b.bats) hb 
from (select bmonth,bstate from master 
     group by bmonth,bstate 
     having count(id) >=3) a 
join master b on a.bstate=b.state and a.month = b.month 
join batting c on b.id = c.id 
group by a.bmonth,a.bstate 
order by hb 
limit 1; 
+0

喜感謝u爲答案,但得到一個無法識別輸入「(」「(」從源「選擇」失敗:ParseException的 – dedpo

+0

@dedpo我已經編輯了答案,儘量不要now.Sorry無法訪問羣集 –

+0

這不是預期的輸出,但它對於如何執行多連接非常有用 – dedpo

0

下面是該查詢

select id, sum(hits)/sum(ab) as output from (select m.id, b.ab, b.hits from master m, batting b where m.id = b.id and m.bmonth >= 0 AND m.bstate is NOT NULL) group by id