0
JavaRDD<String> hbaseFile = jsc.textFile(HDFS_MASTER+HBASE_FILE);
JavaPairRDD<ImmutableBytesWritable, KeyValue> putJavaRDD = hbaseFile.mapToPair(line -> convertToKVCol1(line, COLUMN_AGE));
putJavaRDD.sortByKey(true);
putJavaRDD.saveAsNewAPIHadoopFile(stagingFolder, ImmutableBytesWritable.class, KeyValue.class, HFileOutputFormat2.class, conf);
private static Tuple2<ImmutableBytesWritable, KeyValue> convertToKVCol1(String beanString, byte[] column) {
InspurUserEntity inspurUserEntity = gson.fromJson(beanString, InspurUserEntity.class);
String rowKey = inspurUserEntity.getDepartment_level1()+"_"+inspurUserEntity.getDepartment_level2()+"_"+inspurUserEntity.getId();
return new Tuple2<>(new ImmutableBytesWritable(Bytes.toBytes(rowKey)),
new KeyValue(Bytes.toBytes(rowKey), COLUMN_FAMILY, column, Bytes.toBytes(inspurUserEntity.getAge())));
}
以上是我的代碼,只對單個列創建工作HFILE,任何一個有可與多列上創建HFILE一個rowKey任何想法?星火一個rowKey多列
謝謝你的幫助。我是mapreduce和spark上的新手。你有沒有關於如何使用數組而不是ImmutableBytesWritable的例子?謝謝大家 –
這是我的代碼:return new Tuple2 <>(new ImmutableBytesWritable(rowKyeBytes),new KeyValue(xxxx));如何使用數組? –