Author: stack Date: Tue Sep 18 21:41:20 2012 New Revision: 1387369 URL: http://svn.apache.org/viewvc?rev=1387369&view=rev Log: HBASE-6592 [shell] Add means of custom formatting output by column
Modified: hbase/trunk/hbase-server/src/main/ruby/hbase/table.rb hbase/trunk/hbase-server/src/main/ruby/shell/commands/get.rb hbase/trunk/hbase-server/src/main/ruby/shell/commands/scan.rb hbase/trunk/hbase-server/src/test/ruby/hbase/table_test.rb Modified: hbase/trunk/hbase-server/src/main/ruby/hbase/table.rb URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/ruby/hbase/table.rb?rev=1387369&r1=1387368&r2=1387369&view=diff ============================================================================== --- hbase/trunk/hbase-server/src/main/ruby/hbase/table.rb (original) +++ hbase/trunk/hbase-server/src/main/ruby/hbase/table.rb Tue Sep 18 21:41:20 2012 @@ -113,6 +113,7 @@ EOF @table = org.apache.hadoop.hbase.client.HTable.new(configuration, table_name) @name = table_name @shell = shell + @converters = Hash.new() end # Note the below methods are prefixed with '_' to hide them from the average user, as @@ -187,7 +188,8 @@ EOF def _get_internal(row, *args) get = org.apache.hadoop.hbase.client.Get.new(row.to_s.to_java_bytes) maxlength = -1 - + @converters.clear() + # Normalize args args = args.first if args.first.kind_of?(Hash) if args.kind_of?(String) || args.kind_of?(Array) @@ -299,6 +301,7 @@ EOF limit = args.delete("LIMIT") || -1 maxlength = args.delete("MAXLENGTH") || -1 + @converters.clear() if args.any? filter = args["FILTER"] @@ -450,6 +453,7 @@ EOF # Returns family and (when has it) qualifier for a column name def parse_column_name(column) split = org.apache.hadoop.hbase.KeyValue.parseColumn(column.to_java_bytes) + set_converter(split) if split.length > 1 return split[0], (split.length > 1) ? split[1] : nil end @@ -474,9 +478,42 @@ EOF if kv.isDelete val = "timestamp=#{kv.getTimestamp}, type=#{org.apache.hadoop.hbase.KeyValue::Type::codeToType(kv.getType)}" else - val = "timestamp=#{kv.getTimestamp}, value=#{org.apache.hadoop.hbase.util.Bytes::toStringBinary(kv.getValue)}" + val = "timestamp=#{kv.getTimestamp}, value=#{convert(column, kv)}" end (maxlength != -1) ? val[0, maxlength] : val end + + def convert(column, kv) + #use org.apache.hadoop.hbase.util.Bytes as the default class + klazz_name = 'org.apache.hadoop.hbase.util.Bytes' + #use org.apache.hadoop.hbase.util.Bytes::toStringBinary as the default convertor + converter = 'toStringBinary' + if @converters.has_key?(column) + # lookup the CONVERTER for certain column - "cf:qualifier" + matches = /c\((.+)\)\.(.+)/.match(@converters[column]) + if matches.nil? + # cannot match the pattern of 'c(className).functionname' + # use the default klazz_name + converter = @converters[column] + else + klazz_name = matches[1] + converter = matches[2] + end + end + method = eval(klazz_name).method(converter) + return method.call(kv.getValue) # apply the converter + end + + # if the column spec contains CONVERTER information, to get rid of :CONVERTER info from column pair. + # 1. return back normal column pair as usual, i.e., "cf:qualifier[:CONVERTER]" to "cf" and "qualifier" only + # 2. register the CONVERTER information based on column spec - "cf:qualifier" + def set_converter(column) + family = String.from_java_bytes(column[0]) + parts = org.apache.hadoop.hbase.KeyValue.parseColumn(column[1]) + if parts.length > 1 + @converters["#{family}:#{String.from_java_bytes(parts[0])}"] = String.from_java_bytes(parts[1]) + column[1] = parts[0] + end + end end end Modified: hbase/trunk/hbase-server/src/main/ruby/shell/commands/get.rb URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/ruby/shell/commands/get.rb?rev=1387369&r1=1387368&r2=1387369&view=diff ============================================================================== --- hbase/trunk/hbase-server/src/main/ruby/shell/commands/get.rb (original) +++ hbase/trunk/hbase-server/src/main/ruby/shell/commands/get.rb Tue Sep 18 21:41:20 2012 @@ -36,8 +36,23 @@ a dictionary of column(s), timestamp, ti hbase> get 't1', 'r1', 'c1', 'c2' hbase> get 't1', 'r1', ['c1', 'c2'] +Besides the default 'toStringBinary' format, 'get' also supports custom formatting by +column. A user can define a FORMATTER by adding it to the column name in the get +specification. The FORMATTER can be stipulated: + + 1. either as a org.apache.hadoop.hbase.util.Bytes method name (e.g, toInt, toString) + 2. or as a custom class followed by method name: e.g. 'c(MyFormatterClass).format'. + +Example formatting cf:qualifier1 and cf:qualifier2 both as Integers: + hbase> get 't1', 'r1' {COLUMN => ['cf:qualifier1:toInt', + 'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] } + +Note that you can specify a FORMATTER by column only (cf:qualifer). You cannot specify +a FORMATTER for all columns of a column family. + The same commands also can be run on a reference to a table (obtained via get_table or - create_table). Suppose you had a reference t to table 't1', the corresponding commands would be: +create_table). Suppose you had a reference t to table 't1', the corresponding commands +would be: hbase> t.get 'r1' hbase> t.get 'r1', {TIMERANGE => [ts1, ts2]} Modified: hbase/trunk/hbase-server/src/main/ruby/shell/commands/scan.rb URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/ruby/shell/commands/scan.rb?rev=1387369&r1=1387368&r2=1387369&view=diff ============================================================================== --- hbase/trunk/hbase-server/src/main/ruby/shell/commands/scan.rb (original) +++ hbase/trunk/hbase-server/src/main/ruby/shell/commands/scan.rb Tue Sep 18 21:41:20 2012 @@ -42,8 +42,10 @@ Some examples: hbase> scan '.META.', {COLUMNS => 'info:regioninfo'} hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'} hbase> scan 't1', {COLUMNS => 'c1', TIMERANGE => [1303668804, 1303668904]} - hbase> scan 't1', {FILTER => "(PrefixFilter ('row2') AND (QualifierFilter (>=, 'binary:xyz'))) AND (TimestampsFilter ( 123, 456))"} - hbase> scan 't1', {FILTER => org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(1, 0)} + hbase> scan 't1', {FILTER => "(PrefixFilter ('row2') AND + (QualifierFilter (>=, 'binary:xyz'))) AND (TimestampsFilter ( 123, 456))"} + hbase> scan 't1', {FILTER => + org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(1, 0)} For experts, there is an additional option -- CACHE_BLOCKS -- which switches block caching for the scanner on (true) or off (false). By @@ -58,13 +60,29 @@ Disabled by default. Example: hbase> scan 't1', {RAW => true, VERSIONS => 10} -Scan can also be used directly from a table, by first getting a reference to a table, like such: +Besides the default 'toStringBinary' format, 'scan' supports custom formatting +by column. A user can define a FORMATTER by adding it to the column name in +the scan specification. The FORMATTER can be stipulated: + + 1. either as a org.apache.hadoop.hbase.util.Bytes method name (e.g, toInt, toString) + 2. or as a custom class followed by method name: e.g. 'c(MyFormatterClass).format'. + +Example formatting cf:qualifier1 and cf:qualifier2 both as Integers: + hbase> scan 't1', {COLUMNS => ['cf:qualifier1:toInt', + 'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] } + +Note that you can specify a FORMATTER by column only (cf:qualifer). You cannot +specify a FORMATTER for all columns of a column family. + +Scan can also be used directly from a table, by first getting a reference to a +table, like such: hbase> t = get_table 't' hbase> t.scan -Note in the above situation, you can still provide all the filtering, columns, options, etc as -described above. +Note in the above situation, you can still provide all the filtering, columns, +options, etc as described above. + EOF end Modified: hbase/trunk/hbase-server/src/test/ruby/hbase/table_test.rb URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/ruby/hbase/table_test.rb?rev=1387369&r1=1387368&r2=1387369&view=diff ============================================================================== --- hbase/trunk/hbase-server/src/test/ruby/hbase/table_test.rb (original) +++ hbase/trunk/hbase-server/src/test/ruby/hbase/table_test.rb Tue Sep 18 21:41:20 2012 @@ -311,6 +311,22 @@ module Hbase @test_table._get_internal('1') { |col, val| res[col] = val } assert_equal(res.keys.sort, [ 'x:a', 'x:b' ]) end + + define_test "get should support COLUMNS with value CONVERTER information" do + @test_table.put(1, "x:c", [1024].pack('N')) + @test_table.put(1, "x:d", [98].pack('N')) + begin + res = @test_table._get_internal('1', ['x:c:toInt'], ['x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt']) + assert_not_nil(res) + assert_kind_of(Hash, res) + assert_not_nil(/value=1024/.match(res['x:c'])) + assert_not_nil(/value=98/.match(res['x:d'])) + ensure + # clean up newly added columns for this test only. + @test_table.delete(1, "x:c") + @test_table.delete(1, "x:d") + end + end #------------------------------------------------------------------------------- @@ -417,5 +433,22 @@ module Hbase res = @test_table._scan_internal { |row, cells| rows[row] = cells } assert_equal(rows.keys.size, res) end + + define_test "scan should support COLUMNS with value CONVERTER information" do + @test_table.put(1, "x:c", [1024].pack('N')) + @test_table.put(1, "x:d", [98].pack('N')) + begin + res = @test_table._scan_internal COLUMNS => ['x:c:toInt', 'x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt'] + assert_not_nil(res) + assert_kind_of(Hash, res) + assert_not_nil(/value=1024/.match(res['1']['x:c'])) + assert_not_nil(/value=98/.match(res['1']['x:d'])) + ensure + # clean up newly added columns for this test only. + @test_table.delete(1, "x:c") + @test_table.delete(1, "x:d") + end +end + end end