2015-08-25 55 views
4

我有一個包含需要上載到Hive表的數據的文件。我寫了一個自定義的SerDe(這基本上是對Hive現有的Regex Serde的修改),以幫助我上傳數據。嘗試使用自定義SerDe創建Hive表時發生錯誤

這是我寫的

package my.hive.customserde; 

public class FIASC2 extends AbstractSerDe { 

    public static final Log LOG = LogFactory.getLog(FIASC2.class.getName()); 

    int colwidths[] = {1, 10, 6, 12, 8, 14, 16, 6, 6, 2, 10, 10, 19, 2, 2, 6, 8, 1}; 
    String outputformat = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s %10$s %11$s %12$s %13$s %14$s %15$s " 
     + "%16$s %17$s %18$s"; 


    int datetimecols[] = {5}; 
    int datecols[] = {17}; 
    String cols; 
    int numColumns; 
    int totalcolwidth = 0; 

    List<String> columnNames; 
    List<TypeInfo> columnTypes; 

    ArrayList<String> row; 
    StructObjectInspector rowOI; 

    Object[] outputFields; 
    Text outputRowText; 


@Override 
    public void initialize(Configuration conf, Properties tbl) throws SerDeException { 
     LOG.debug("Initializing SerDe"); 
     // Get column names 
     String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); 
     String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); 
     LOG.debug("Columns : " + columnNameProperty + "Types : " + columnTypeProperty); 

     if(columnNameProperty.length() == 0) { 
      columnNames = new ArrayList<String>(); 
     } 
     else { 
      columnNames = Arrays.asList(columnNameProperty.split(",")); 
     } 

     columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); 

     assert columnNames.size() == columnTypes.size(); 
     assert colwidths.length == columnNames.size(); 

     numColumns = columnNames.size(); 

     for(int i = 0; i < numColumns; i++) { 
      totalcolwidth += i; 
     } 

     List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size()); 

     for (int i = 0; i < numColumns; i++) { 
      columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); 
     } 

     rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs); 

     row = new ArrayList<String>(numColumns); 

     for(int i = 0; i < numColumns; i++) { 
      row.add(null); 
     } 

     outputFields = new Object[numColumns]; 
     outputRowText = new Text(); 
} 


@Override 
    public Object deserialize(Writable blob) throws SerDeException { 
     // TODO Auto-generated method stub 
     Text rowText = (Text) blob; 
     int index = 0; 

     if(rowText.toString().length() < totalcolwidth) { 
      return null; 
     } 

     if((rowText.toString().substring(0, 1) == "H") || (rowText.toString().substring(0, 1) == "T")) { 
      return null; 
     } 

     for(int i = 0; i < numColumns; i++) { 
      int len = colwidths[i]; 
      String col = rowText.toString().substring(index, index + len); 
     // Convert the datetime string into the correct format so that it can be uploaded to the hive table 
      if(Arrays.asList(datetimecols).contains(i)) { 
       DateTimeFormatConverter dtc = new DateTimeFormatConverter(); 
       try { 
        col = dtc.convertCurrToNew(col); 
       } catch (ParseException e) { 
        LOG.error("Unable to parse Date Time string : " + col); 
        e.printStackTrace(); 
       } 
      } 
      if(Arrays.asList(datecols).contains(i)) { 
       DateFormatConverter dtc = new DateFormatConverter(); 
       try { 
        col = dtc.convertCurrToNew(col); 
       } catch (ParseException e) { 
        LOG.error("Unable to parse Date String : " + col); 
        e.printStackTrace(); 
       } 
      } 
      row.set(i, col); 
      index += len; 
     } 

     return row; 
    } 


@Override 
    public ObjectInspector getObjectInspector() throws SerDeException { 
     return rowOI; 
    } 


    @Override 
    public SerDeStats getSerDeStats() { 
     // TODO Auto-generated method stub 
     return null; 
    } 

@Override 
    public Class<? extends Writable> getSerializedClass() { 
     return Text.class; 
    } 


    @Override 
    public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { 
     if(outputformat == null) { 
      throw new SerDeException("Cannot write into table because no output format was specified"); 
     } 

     StructObjectInspector outputRowOI = (StructObjectInspector) objInspector; 
     List<? extends StructField> outputFieldRefs = outputRowOI.getAllStructFieldRefs(); 

     if(outputFieldRefs.size() != numColumns) { 
      throw new SerDeException("Output format does not have the same number fields as the number of columns"); 
     } 

     for(int i = 0; i < numColumns; i++) { 
      Object field = outputRowOI.getStructFieldData(obj, outputFieldRefs.get(i)); 
      ObjectInspector fieldOI = outputFieldRefs.get(i).getFieldObjectInspector(); 

      StringObjectInspector fieldStringOI = (StringObjectInspector) fieldOI; 


      outputFields[i] = fieldStringOI.getPrimitiveJavaObject(field); 
     } 

     String outputRowString = null; 

     try { 
      outputRowString = String.format(outputformat, outputFields); 
     } catch (MissingFormatArgumentException e) { 
      throw new SerDeException("The table contains " + numColumns + "columns but the output format requires more", e); 
     } 

     outputRowText.set(outputRowString); 

     return outputRowText; 
    } 

}

你可以放心,我已經進口的每一個需要導入類SERDE。

當我試圖創建表,我得到一個錯誤說「無法從SERDE得到現場:my.hive.customserde.FIASC2」

這裏是堆棧跟蹤

2015-08-25 15:57:51,995 ERROR [HiveServer2-Background-Pool: Thread-57]: metadata.Table (Table.java:getCols(608)) - Unable to get field from serde: my.hive.customserde.FIASC2 
java.lang.NullPointerException 
    at org.apache.hadoop.hive.metastore.MetaStoreUtils.getFieldsFromDeserializer(MetaStoreUtils.java:1257) 
    at org.apache.hadoop.hive.ql.metadata.Table.getCols(Table.java:605) 
    at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:694) 
    at org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:4135) 
    at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:306) 
    at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:160) 
    at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:88) 
    at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1653) 
    at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1412) 
    at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1195) 
    at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1059) 
    at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1054) 
    at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:154) 
    at org.apache.hive.service.cli.operation.SQLOperation.access$100(SQLOperation.java:71) 
    at org.apache.hive.service.cli.operation.SQLOperation$1$1.run(SQLOperation.java:206) 
    at java.security.AccessController.doPrivileged(Native Method) 
    at javax.security.auth.Subject.doAs(Subject.java:422) 
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657) 
    at org.apache.hive.service.cli.operation.SQLOperation$1.run(SQLOperation.java:218) 
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
    at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) 
    at java.lang.Thread.run(Thread.java:745) 
2015-08-25 15:57:51,996 ERROR [HiveServer2-Background-Pool: Thread-57]: exec.DDLTask (DDLTask.java:failed(520)) - org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.NullPointerException 
    at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:720) 
    at org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:4135) 
    at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:306) 
    at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:160) 
    at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:88) 
    at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1653) 
    at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1412) 
    at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1195) 
    at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1059) 
    at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1054) 
    at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:154) 
    at org.apache.hive.service.cli.operation.SQLOperation.access$100(SQLOperation.java:71) 
    at org.apache.hive.service.cli.operation.SQLOperation$1$1.run(SQLOperation.java:206) 
    at java.security.AccessController.doPrivileged(Native Method) 
    at javax.security.auth.Subject.doAs(Subject.java:422) 
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657) 
    at org.apache.hive.service.cli.operation.SQLOperation$1.run(SQLOperation.java:218) 
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
    at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) 
    at java.lang.Thread.run(Thread.java:745) 
Caused by: java.lang.NullPointerException 
    at org.apache.hadoop.hive.metastore.MetaStoreUtils.getFieldsFromDeserializer(MetaStoreUtils.java:1257) 
    at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:695) 
    ... 21 more 

我明白表創建失敗。但有誰知道爲什麼我得到這個錯誤?我嘗試了谷歌搜索,但沒有得到很多幫助。

如果它有任何幫助,這裏是我正在使用的創建表腳本。

create table if not exists fiasc2(
record_type varchar(1), 
fin_id varchar(16), 
corp_id varchar(8), 
merc_id varchar(16), 
term_id varchar(8), 
tran_time timestamp, 
cashcard_number varchar(16), 
ttc varchar(8), 
tcc varchar(8), 
tran_type varchar(2), 
tran_amount varchar(16), 
deposit_amount varchar(16), 
pan varchar(32), 
account_type varchar(2), 
response_code varchar(2), 
card_balance varchar(8), 
settlement_date date, 
tran_mode varchar(1)) 
row format serde 'my.hive.customserde.FIASC2' 
location '/user/hive/fiasc2_test'; 

回答

0

這聽起來很熟悉。 您正在返回null SerDeStats,這是我所看到的唯一可能是空的,它響了,我想我的JSON SerDe在他們引入SerdeStats幾個版本的蜂巢前時有同樣的問題。 Try:

// add this to the members 
private SerDeStats stats; 
// ... 
public void initialize(Configuration conf, Properties tbl) throws SerDeException { 
.. 
// add this in initialize() 
stats = new SerDeStats(); 

// and of course here 
@Override 
public SerDeStats getSerDeStats() { 
    return stats; 
} 
相關問題