2016-07-22 65 views
0

首先,我是Hive的新手。創建HIVE表時JSON解析錯誤

我通過Apache水槽獲取twitter數據。

{ 
"filter_level": "low", 
"retweeted": false, 
"in_reply_to_screen_name": null, 
"possibly_sensitive": false, 
"truncated": false, 
"lang": "en", 
"in_reply_to_status_id_str": null, 
"id": 756378998838530048, 
"in_reply_to_user_id_str": null, 
"timestamp_ms": "1469169780822", 
"in_reply_to_status_id": null, 
"created_at": "Fri Jul 22 06:43:00 +0000 2016", 
"favorite_count": 0, 
"place": null, 
"coordinates": null, 
"text": "#furry pokemon sex mermaid sex position", 
"contributors": null, 
"geo": null, 
"entities": { 
    "symbols": [], 
    "urls": [{ 
     "expanded_url": "http://14.gerase.tk", 
     "indices": [40, 63], 
     "display_url": "14.gerase.tk", 
     "url": "" 
    }], 
    "hashtags": [{ 
     "text": "furry", 
     "indices": [0, 6] 
    }], 
    "user_mentions": [] 
}, 
"is_quote_status": false, 
"source": "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client<\/a>", 
"favorited": false, 
"in_reply_to_user_id": null, 
"retweet_count": 0, 
"id_str": "756378998838530048", 
"user": { 
    "location": null, 
    "default_profile": true, 
    "statuses_count": 3436, 
    "profile_background_tile": false, 
    "lang": "ru", 
    "profile_link_color": "2B7BB9", 
    "id": 752318303280955392, 
    "following": null, 
    "favourites_count": 0, 
    "protected": false, 
    "profile_text_color": "333333", 
    "verified": false, 
    "description": null, 
    "contributors_enabled": false, 
    "profile_sidebar_border_color": "C0DEED", 
    "name": "Мария Виноградова", 
    "profile_background_color": "F5F8FA", 
    "created_at": "Mon Jul 11 01:47:15 +0000 2016", 
    "default_profile_image": false, 
    "followers_count": 5, 
    "profile_image_url_https": "https://pbs.twimg.com/profile_images/753398763201425408/X_2mAGt1_normal.jpg", 
    "geo_enabled": false, 
    "profile_background_image_url": "", 
    "profile_background_image_url_https": "", 
    "follow_request_sent": null, 
    "url": null, 
    "utc_offset": null, 
    "time_zone": null, 
    "notifications": null, 
    "profile_use_background_image": true, 
    "friends_count": 21, 
    "profile_sidebar_fill_color": "DDEEF6", 
    "screen_name": "afinafedorova2", 
    "id_str": "752318303280955392", 
    "profile_image_url": "http://pbs.twimg.com/profile_images/753398763201425408/X_2mAGt1_normal.jpg", 
    "listed_count": 5, 
    "is_translator": false 
} 

}

https://github.com/cloudera/cdh-twitter-example 提到這是蜂房查詢我想運行我使用HiveJSONSerDe。

create external table twitterdata(
filter_level string, 
retweeted boolean, 
in_reply_to_screen_name string, 
possibly_sensitive boolean, 
trauncated boolean, 
lang string, 
in_reply_to_status_id_str string, 
id bigint, 
in_reply_to_user_id_str string, 
timestamp_ms string, 
in_reply_to_status_id int, 
created_at string, 
favourite_count int, 
place string, 
coordinates string, 
text string, 
contributors string, 
geo string, 
entities STRUCT< 
     urls:ARRAY<STRUCT<expanded_url:STRING,url:STRING>>, 
    hashtags:ARRAY<STRUCT<text:STRING>>, 
     user_mentions:ARRAY<STRUCT<screen_name:STRING,name:STRING>>>, 
is_quote_status boolean, 
source string, 
favotited boolean, 
in_reply_to_user_id int, 
retweet_count int, 
id_str string, 
user STRUCT< 
    location:STRING, 
    default_profile:boolean, 
    statuses_count:int, 
    profile_background_tile:boolean, 
    lang: string, 
    profile_link_color: string, 
    id: bigint, 
    following: string, 
    protected: boolean, 
    profile_text_color: string, 
    verified: boolean, 
    description: string, 
    contributors_enabled: boolean, 
    name: string, 
    created_at: string, 
    default_profile_image: boolean, 
    followers_count: int, 
    profile_image_url_https: string, 
    geo_enabled: boolean 
    url: string, 
    time_zone: string, 
    friends_count: int, 
    screen_name: string, 
    id_str: string, 
    listed_count: int, 
    is_translator: boolean> 
) 
ROW FORMAT SERDE 'com.cloudera.hive.serde.JSONSerDe' 
STORED AS TEXTFILE 
LOCATION '/Twitter/Pokemon/'; 

但它顯示有關解析JSON腳本的錯誤。

FAILED: Parse Error: line 31:2 mismatched input 'location' expecting identifier in column specification 

我找不到在創建表查詢中的錯誤。請幫忙。

回答

0

您可能想看看數據文件中的那一行。這很可能是您的JSON文件不完整或與模式不匹配。

如果您想持續將數據加載到配置單元表,您可能還需要檢查StreamSets。您可以使用它來捕獲JSON數據,自動轉換爲Avro並填充和更新配置單元表。

完全披露,我是該項目的提交者。

+1

實際上,「位置」本身是配置單元中的關鍵字,不能用作任何屬性名稱。對不起,我忘了編輯我的問題。 :) – Bihan