2
對於分區的Avro Hive表,Avro模式中具有大寫字符的字段名將被撤回爲空。我想知道是否有一些設置/解決方法我缺少,或者這只是Hive上下文的一個錯誤。Spark Hive上下文 - 帶分區和大寫字段名稱的Avro表
我已經嘗試添加以下的DDL:
WITH SERDEPROPERTIES ('casesensitive'='FieldName')
...並設置spark.sql.caseSensitive至真/假
星火版本1.5.0 蜂巢1.1版本0.0
您可以通過在蜂巢運行以下DDL重現該問題:
-- Hive DDL using partitions
CREATE TABLE avro_partitions (Field string)
PARTITIONED BY (part string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
TBLPROPERTIES ('avro.schema.literal'=
'{ "type":"record", "name":"avro_partitions", "namespace":"default", "fields":[ {"name":"Field", "type":"string"} ] }');
INSERT INTO avro_partitions PARTITION (part='01') VALUES('test');
-- Hive DDL without partitions
CREATE TABLE avro_no_partitions (Field string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
TBLPROPERTIES ('avro.schema.literal'=
'{ "type":"record", "name":"avro_no_partitions", "namespace":"default", "fields":[ {"name":"Field", "type":"string"} ] }');
INSERT INTO avro_no_partitions VALUES('test');
... &然後試圖從使用SQL的Spark(火花殼)表中進行選擇:
sqlContext.sql("select * from default.avro_partitions").show
+-----+----+
|field|part|
+-----+----+
| null| 01|
+-----+----+
sqlContext.sql("select * from default.avro_no_partitions").show
+-----+
|field|
+-----+
| test|
+-----+