This is an automated email from the ASF dual-hosted git repository.

elserj pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 31917b0  HBASE-25278 Add CACHE_BLOCKS option to count shell command
31917b0 is described below

commit 31917b0a8ab1d7f90ab6997dcc80a38dcea98013
Author: Josh Elser <els...@apache.org>
AuthorDate: Thu Nov 12 16:04:26 2020 -0500

    HBASE-25278 Add CACHE_BLOCKS option to count shell command
    
    Expose an argument on the `count` command which is passed to the
    `setCacheBlocks` method on the Scan which the count command uses.
    
    This is a quick and dirty approach to read all of the blocks for a table
    into the block cache.
    
    * Raise an error when the value isn't a boolean or the expected string
    
    Closes #2650
    
    Signed-off-by: Zach York <zy...@apache.org>
    Signed-off-by: Peter Somogyi <psomo...@apache.org>
---
 hbase-shell/src/main/ruby/hbase/table.rb          |  6 ++---
 hbase-shell/src/main/ruby/shell/commands/count.rb | 27 +++++++++++++++++++++--
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/hbase-shell/src/main/ruby/hbase/table.rb 
b/hbase-shell/src/main/ruby/hbase/table.rb
index 4e8a186..d779261 100644
--- a/hbase-shell/src/main/ruby/hbase/table.rb
+++ b/hbase-shell/src/main/ruby/hbase/table.rb
@@ -303,18 +303,18 @@ EOF
 
     
#----------------------------------------------------------------------------------------------
     # Count rows in a table
-    def _count_internal(interval = 1000, scan = nil)
+    def _count_internal(interval = 1000, scan = nil, cacheBlocks=false)
       raise(ArgumentError, 'Scan argument should be 
org.apache.hadoop.hbase.client.Scan') \
         unless scan.nil? || scan.is_a?(org.apache.hadoop.hbase.client.Scan)
       # We can safely set scanner caching with the first key only filter
 
       if scan.nil?
         scan = org.apache.hadoop.hbase.client.Scan.new
-        scan.setCacheBlocks(false)
+        scan.setCacheBlocks(cacheBlocks)
         scan.setCaching(10)
         scan.setFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new)
       else
-        scan.setCacheBlocks(false)
+        scan.setCacheBlocks(cacheBlocks)
         filter = scan.getFilter
         firstKeyOnlyFilter = 
org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new
         if filter.nil?
diff --git a/hbase-shell/src/main/ruby/shell/commands/count.rb 
b/hbase-shell/src/main/ruby/shell/commands/count.rb
index 03840d0..7052358 100644
--- a/hbase-shell/src/main/ruby/shell/commands/count.rb
+++ b/hbase-shell/src/main/ruby/shell/commands/count.rb
@@ -49,6 +49,17 @@ t to table 't1', the corresponding commands would be:
  hbase> t.count FILTER => "
     (QualifierFilter (>=, 'binary:xyz')) AND (TimestampsFilter ( 123, 456))"
  hbase> t.count COLUMNS => ['c1', 'c2'], STARTROW => 'abc', STOPROW => 'xyz'
+
+By default, this operation does not cause any new blocks to be read into
+the RegionServer block cache. This is typically the desired action; however,
+if you want to force all blocks for a table to be loaded into the block cache
+on-demand, you can pass the 'CACHE_BLOCKS' option with a value of 'true'. A 
value
+of 'false' is the default and will result in no blocks being cached. This
+command can be used in conjunction with all other options.
+
+hbase> count 'ns1:t1', CACHE_BLOCKS => true
+hbase> count 'ns1:t1', CACHE_BLOCKS => 'true'
+hbase> count 'ns1:t1', INTERVAL => 100000, CACHE_BLOCKS => false
 EOF
       end
 
@@ -60,17 +71,29 @@ EOF
         # If the second parameter is an integer, then it is the old command 
syntax
         params = { 'INTERVAL' => params } if params.is_a?(Integer)
 
+        # Try to be nice and convert a string to a bool
+        if params.include?('CACHE_BLOCKS') and 
params['CACHE_BLOCKS'].is_a?(String)
+          if params['CACHE_BLOCKS'].downcase == 'true'
+            params['CACHE_BLOCKS'] = true
+          elsif params['CACHE_BLOCKS'].downcase == 'false'
+            params['CACHE_BLOCKS'] = false
+          else
+            raise(ArgumentError, "Expected CACHE_BLOCKS value to be a boolean 
or the string 'true' or 'false'")
+          end
+        end
+
         # Merge params with defaults
         params = {
           'INTERVAL' => 1000,
-          'CACHE' => 10
+          'CACHE' => 10,
+          'CACHE_BLOCKS' => false
         }.merge(params)
 
         scan = table._hash_to_scan(params)
         # Call the counter method
         @start_time = Time.now
         formatter.header
-        count = table._count_internal(params['INTERVAL'].to_i, scan) do |cnt, 
row|
+        count = table._count_internal(params['INTERVAL'].to_i, scan, 
params['CACHE_BLOCKS']) do |cnt, row|
           formatter.row(["Current count: #{cnt}, row: #{row}"])
         end
         formatter.footer(count)

Reply via email to