我試圖使用Hive JSON SerDe將Twitter JSON放入Hive表中。我首先將JSON導入到由ROW FORMAT SERDE定義的一個表中,然後將其導入另一個存儲爲RCFile的表中。它的工作原理到一個點,但隨後我得到以下性質的一個ClassCastException:Hive JSON SerDe - ClassCastException:java.lang.Integer不能轉換爲java.lang.Double
java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row [Error getting row data with exception java.lang.ClassCastException: java.lang.Integer cannot be cast to java.lang.Double
at org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaDoubleObjectInspector.get(JavaDoubleObjectInspector.java:40)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:259)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:307)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354)
at org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:220)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:667)
at org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:141)
at org.apache.hadoop
下面是我使用來定義SERDE表的模式:
CREATE EXTERNAL TABLE gh_raw (
coordinates struct <
coordinates: array <double>,
type: string>,
created_at string,
entities struct <
hashtags: array <struct <text: string>>,
media: array <struct <
display_url: string,
expanded_url: string,
media_url: string,
media_url_https: string,
sizes: struct <
large: struct <
h: int,
resize: string,
w: int>,
medium: struct <
h: int,
resize: string,
w: int>,
small: struct <
h: int,
resize: string,
w: int>,
thumb: struct <
h: int,
resize: string,
w: int>>,
type: string,
url: string>>,
urls: array <struct <
display_url: string,
expanded_url: string,
url: string>>,
user_mentions: array <struct <
id: int,
name: string,
screen_name: string>>>,
geo struct <
coordinates: array <double>,
type: string>,
id_str string,
in_reply_to_screen_name string,
in_reply_to_status_id_str string,
in_reply_to_user_id_str string,
place struct <
attributes: struct <
locality: string,
region: string,
street_address: string>,
bounding_box: struct <
coordinates: array <array <array <double>>>,
type: string>,
country: string,
country_code: string,
full_name: string,
name: string,
place_type: string,
url: string>,
possibly_sensitive boolean,
retweeted_status struct <
coordinates: struct <
coordinates: array <double>,
type: string>,
created_at: string,
entities: struct <
hashtags: array <struct <
text: string>>,
media: array <struct <
display_url: string,
expanded_url: string,
media_url: string,
media_url_https: string,
sizes: struct <
large: struct <
h: int,
resize: string,
w: int>,
medium: struct <
h: int,
resize: string,
w: int>,
small: struct <
h: int,
resize: string,
w: int>,
thumb: struct <
h: int,
resize: string,
w: int>>,
type: string,
url: string>>,
urls: array <struct <
display_url: string,
expanded_url: string,
url: string>>,
user_mentions: array <struct <
id: int,
name: string,
screen_name: string>>>,
favorited: boolean,
geo: struct <
coordinates: array <double>,
type: string>,
id_str: string,
in_reply_to_screen_name: string,
in_reply_to_status_id_str: string,
in_reply_to_user_id_str: string,
place: struct <
attributes: struct <
locality: string,
region: string,
street_address: string
>,
bounding_box: struct <
coordinates: array <array <array <double>>>,
type: string>,
country: string,
country_code: string,
full_name: string,
name: string,
place_type: string,
url: string>,
possibly_sensitive: boolean,
scopes: struct <
followers: boolean>,
source: string,
text: string,
truncated: boolean,
user: struct <
contributors_enabled: boolean,
created_at: string,
default_profile: boolean,
default_profile_image: boolean,
description: string,
favourites_count: int,
followers_count: int,
friends_count: int,
geo_enabled: boolean,
id: int,
id_str: string,
is_translator: boolean,
lang: string,
listed_count: int,
`location`: string,
name: string,
profile_background_color: string,
profile_background_image_url: string,
profile_background_image_url_https: string,
profile_background_tile: boolean,
profile_banner_url: string,
profile_image_url: string,
profile_image_url_https: string,
profile_link_color: string,
profile_sidebar_border_color: string,
profile_sidebar_fill_color: string,
profile_text_color: string,
profile_use_background_image: boolean,
protected: boolean,
screen_name: string,
statuses_count: int,
time_zone: string,
url: string,
utc_offset: int,
verified: boolean>>,
source string,
text string,
truncated boolean,
user struct <
contributors_enabled: boolean,
created_at: string,
default_profile: boolean,
default_profile_image: boolean,
description: string,
favourites_count: int,
followers_count: int,
friends_count: int,
geo_enabled: boolean,
id: int,
id_str: string,
is_translator: boolean,
lang: string,
listed_count: int,
`location`: string,
name: string,
profile_background_color: string,
profile_background_image_url: string,
profile_background_image_url_https: string,
profile_background_tile: boolean,
profile_banner_url: string,
profile_image_url: string,
profile_image_url_https: string,
profile_link_color: string,
profile_sidebar_border_color: string,
profile_sidebar_fill_color: string,
profile_text_color: string,
profile_use_background_image: boolean,
protected: boolean,
screen_name: string,
statuses_count: int,
time_zone: string,
url: string,
utc_offset: int,
verified: boolean>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
LOCATION '/user/ahanna/gh_raw';
我想這是在崩潰時找到一組座標或邊界框。
我認爲這是一個我正在使用的JSON SerDe的錯誤,但我不確定。我已經編譯了我從頭開始使用的一個,從一個誰說他們已經解決了這個問題的人,但沒有去:https://github.com/brndnmtthws/Hive-JSON-Serde