2012-06-17 80 views
6

我試圖在應用某些過濾器後從HBase中的行中獲取所選列。考慮一個表所示:在HBase中獲取已過濾的行

的ename:FNAME的ename:L-NAME工資:工資總額:DA工資:TA

我想有工資總額> 1500爲此,我有所有員工的列表寫下面的代碼。我面臨的問題是,當我過濾列時,我只在輸出中獲得該過濾器,這是有道理的,因爲這是它們的創建目的,但是如果我希望獲得所需的列,但想要僅基於特定列進行過濾像我剛纔提到的一個 - 有薪> 1500

輸出應該是下面的一組列的所有員工的列表:

LNAME,FNAME,工資:毛,工資:TA

目前代碼

import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.hbase.HBaseConfiguration; 
import org.apache.hadoop.hbase.KeyValue; 
import org.apache.hadoop.hbase.client.Get; 
import org.apache.hadoop.hbase.client.HTable; 
import org.apache.hadoop.hbase.client.Result; 
import org.apache.hadoop.hbase.client.ResultScanner; 
import org.apache.hadoop.hbase.client.Scan; 
import org.apache.hadoop.hbase.filter.BinaryComparator; 
import org.apache.hadoop.hbase.filter.RegexStringComparator; 
import org.apache.hadoop.hbase.filter.SubstringComparator; 
import org.apache.hadoop.hbase.filter.CompareFilter; 
import org.apache.hadoop.hbase.filter.Filter; 
import org.apache.hadoop.hbase.filter.QualifierFilter; 
import org.apache.hadoop.hbase.filter.FamilyFilter; 
import org.apache.hadoop.hbase.filter.FilterList; 
import org.apache.hadoop.hbase.filter.ValueFilter; 
import org.apache.hadoop.hbase.util.Bytes; 


import java.io.IOException; 
import java.util.ArrayList; 
import java.util.List; 


public class MyQualifierFilterExample { 

    public static void main(String[] args) throws IOException { 
    Configuration conf = HBaseConfiguration.create(); 

    HTable table = new HTable(conf, "emp"); 

    List<Filter> filters = new ArrayList<Filter>(); 

    Filter famFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, 
       new BinaryComparator(Bytes.toBytes("salary"))); 
    filters.add(famFilter); 

    Filter colFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, 
     new BinaryComparator(Bytes.toBytes("gross"))); 

    filters.add(colFilter); 

    Filter valFilter = new ValueFilter(CompareFilter.CompareOp.GREATER_OR_EQUAL, 
       new BinaryComparator(Bytes.toBytes("1500"))); 

    filters.add(valFilter); 

    FilterList fl = new FilterList(FilterList.Operator.MUST_PASS_ALL, filters); 


    Scan scan = new Scan(); 
    scan.setFilter(fl); 
    ResultScanner scanner = table.getScanner(scan); 
    System.out.println("Scanning table... "); 
    for (Result result : scanner) { 
     //System.out.println("getRow:"+Bytes.toString(result.getRow())); 
     for (KeyValue kv : result.raw()) { 
      //System.out.println("Family - "+Bytes.toString(kv.getFamily())); 
      //System.out.println("Qualifier - "+Bytes.toString(kv.getQualifier())); 
      System.out.println("kv:"+kv +", Key: " + Bytes.toString(kv.getRow()) + ", Value: " +Bytes.toString(kv.getValue())); 
     } 
    } 

    scanner.close(); 
    System.out.println("Completed "); 
    } 
} 

輸出

Scanning table... 
kv:101/salary:gross/1339876269770/Put/vlen=4, Key: 101, Value: 2000 
kv:102/salary:gross/1339876277659/Put/vlen=4, Key: 102, Value: 2400 
kv:105/salary:gross/1339876300585/Put/vlen=4, Key: 105, Value: 2300 
kv:106/salary:gross/1339876310004/Put/vlen=4, Key: 106, Value: 2900 
Completed 

溶液1

import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.hbase.HBaseConfiguration; 
import org.apache.hadoop.hbase.KeyValue; 
import org.apache.hadoop.hbase.client.Get; 
import org.apache.hadoop.hbase.client.HTable; 
import org.apache.hadoop.hbase.client.Result; 
import org.apache.hadoop.hbase.client.ResultScanner; 
import org.apache.hadoop.hbase.client.Scan; 
import org.apache.hadoop.hbase.filter.BinaryComparator; 
import org.apache.hadoop.hbase.filter.RegexStringComparator; 
import org.apache.hadoop.hbase.filter.SubstringComparator; 
import org.apache.hadoop.hbase.filter.CompareFilter; 
import org.apache.hadoop.hbase.filter.Filter; 
import org.apache.hadoop.hbase.filter.QualifierFilter; 
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; 
import org.apache.hadoop.hbase.filter.FamilyFilter; 
import org.apache.hadoop.hbase.filter.FilterList; 
import org.apache.hadoop.hbase.filter.ValueFilter; 
import org.apache.hadoop.hbase.util.Bytes; 


import java.io.IOException; 
import java.util.ArrayList; 
import java.util.List; 


public class MyQualifierFilterExample { 

    public static void main(String[] args) throws IOException { 
    Configuration conf = HBaseConfiguration.create(); 

    HTable table = new HTable(conf, "emp"); 

    List<Filter> filters = new ArrayList<Filter>(); 

    SingleColumnValueFilter colValFilter = new SingleColumnValueFilter(Bytes.toBytes("salary"), Bytes.toBytes("gross") 
      , CompareFilter.CompareOp.GREATER_OR_EQUAL, new BinaryComparator(Bytes.toBytes("1300"))); 
    colValFilter.setFilterIfMissing(false); 
    filters.add(colValFilter);   

    Filter colValFilter2 = new SingleColumnValueFilter(Bytes.toBytes("salary"), Bytes.toBytes("da") 
      , CompareFilter.CompareOp.GREATER_OR_EQUAL, new BinaryComparator(Bytes.toBytes("150"))); 
    filters.add(colValFilter2); 

    //Filter colValFilter3 = new SingleColumnValueFilter(Bytes.toBytes("ename"), Bytes.toBytes("fname") 
    //  , CompareFilter.CompareOp.GREATER_OR_EQUAL, new SubstringComparator("jack")); 
    //filters.add(colValFilter3); 

    FilterList fl = new FilterList(FilterList.Operator.MUST_PASS_ALL, filters); 


    Scan scan = new Scan(); 
    scan.setFilter(fl); 
    scan.addColumn(Bytes.toBytes("ename"), Bytes.toBytes("fname")); 
    scan.addColumn(Bytes.toBytes("ename"), Bytes.toBytes("lname")); 
    scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("gross")); 
    scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("da")); 

    ResultScanner scanner = table.getScanner(scan); 
    String key = new String("~"); 
    String keyFlag = new String("~"); 
    System.out.println("Scanning table... "); 
    for (Result result : scanner) { 
     //System.out.println("getRow:"+Bytes.toString(result.getRow())); 
     key = "~"; 
     for (KeyValue kv : result.raw()) { 

      if (key.compareTo(keyFlag)==0) 
      { 
       key = Bytes.toString(kv.getRow()); 
       System.out.print("Key: " + key); 
      } 
      //System.out.print("Family - "+Bytes.toString(kv.getFamily())); 

      //System.out.print(", Buffer - "+Bytes.toString(kv.getBuffer())); 
      //System.out.print(", FamilyOffset - " + kv.getFamilyOffset()); 
      System.out.print(", "+Bytes.toString(kv.getFamily())+"."+Bytes.toString(kv.getQualifier())); 
      System.out.print("=" +Bytes.toString(kv.getValue())); 
     } 
     System.out.println(""); 
     System.out.println("-------------------"); 
    } 

    scanner.close(); 
    System.out.println("Completed "); 
    } 
} 

輸出:

Scanning table... 
Key: 103, ename.fname=peter, ename.lname=parker, salary.da=190, salary.gross=1400 
------------------- 
Key: 105, ename.fname=harry, ename.lname=potter, salary.da=154, salary.gross=2300 
------------------- 
Completed 
+0

是你的薪水確實保存爲代表字節一個字符串?這可能是個問題,因爲字符串「900」大於字符串「1500」。 – kichik

回答

0

ValueFilter 該濾波器使得能夠以僅包括具有特定值的列

這就是爲什麼您只能獲取您在過濾器中指定的列。

如果我錯了,告訴我,但是當你的薪水> 1500時,你想要做的是檢索所有的列,不是嗎?

0

您的需求是關係型。所以,我建議你在HBase上使用封裝來讓生活變得簡單。

考慮使用:Apache Phoenix。它是HBase的高性能SQL包裝器,使用它可以運行如下查詢:select * from emp where salary>1500

1

您應該使用的SingleColumnValueFilteraddFamily(或addColumn)組合

見下文(我不能在這個時候測試它在我結束):

SingleColumnValueFilter filter = new SingleColumnValueFilter(
    Bytes.toBytes("salary"), 
    Bytes.toBytes("gross"), 
    CompareOp.GREATER, 
    Bytes.toBytes("1500") 
); 
//To prevent the entire row from being emitted 
//if the column is not found on a row 
scan.setFilterIfMissing(true) 
scan.setFilter(filter); 

scan.addFamily(Bytes.toBytes("ename")) 
scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("da")) 
scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("gross"))