This is an automated email from the ASF dual-hosted git repository.
healchow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-inlong.git
The following commit(s) were added to refs/heads/master by this push:
new afabed04f [INLONG-4557][Sort] Fix HBase connector dependency and pom
relocation problem (#4560)
afabed04f is described below
commit afabed04f4854dcfd97b4b6f29e1128bfd10f5ff
Author: pacino <[email protected]>
AuthorDate: Tue Jun 7 23:21:20 2022 +0800
[INLONG-4557][Sort] Fix HBase connector dependency and pom relocation
problem (#4560)
---
inlong-sort/pom.xml | 6 +
inlong-sort/sort-connectors/hbase/pom.xml | 15 +
.../hbase/src/main/resources/hbase-default.xml | 1816 ++++++++++++++++++++
licenses/inlong-sort/LICENSE | 6 +-
4 files changed, 1839 insertions(+), 4 deletions(-)
diff --git a/inlong-sort/pom.xml b/inlong-sort/pom.xml
index f4deb44e9..ccee96823 100644
--- a/inlong-sort/pom.xml
+++ b/inlong-sort/pom.xml
@@ -45,6 +45,7 @@
<debezium.version>1.5.4.Final</debezium.version>
<kafka.clients.version>2.7.0</kafka.clients.version>
<rat.basedir>${basedir}</rat.basedir>
+ <hbase.version>2.2.3</hbase.version>
</properties>
<dependencyManagement>
<dependencies>
@@ -68,6 +69,11 @@
<artifactId>kafka-clients</artifactId>
<version>${kafka.clients.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ <version>${hbase.version}</version>
+ </dependency>
</dependencies>
</dependencyManagement>
diff --git a/inlong-sort/sort-connectors/hbase/pom.xml
b/inlong-sort/sort-connectors/hbase/pom.xml
index d4517f709..89546c9a9 100644
--- a/inlong-sort/sort-connectors/hbase/pom.xml
+++ b/inlong-sort/sort-connectors/hbase/pom.xml
@@ -31,6 +31,10 @@
<name>Apache InLong - Sort-connector-hbase</name>
<packaging>jar</packaging>
+ <properties>
+ <zookeeper.version>3.4.14</zookeeper.version>
+ </properties>
+
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
@@ -38,6 +42,17 @@
</dependency>
</dependencies>
+ <dependencyManagement>
+ <dependencies>
+ <dependency>
+ <!-- HBase only works with Zookeper 3.4 -->
+ <groupId>org.apache.zookeeper</groupId>
+ <artifactId>zookeeper</artifactId>
+ <version>${zookeeper.version}</version>
+ </dependency>
+ </dependencies>
+ </dependencyManagement>
+
<build>
<plugins>
<plugin>
diff --git
a/inlong-sort/sort-connectors/hbase/src/main/resources/hbase-default.xml
b/inlong-sort/sort-connectors/hbase/src/main/resources/hbase-default.xml
new file mode 100644
index 000000000..2d7e21d63
--- /dev/null
+++ b/inlong-sort/sort-connectors/hbase/src/main/resources/hbase-default.xml
@@ -0,0 +1,1816 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+OVERVIEW
+
+The important configs. are listed near the top. You should change
+at least the setting for hbase.tmp.dir. Other settings will change
+dependent on whether you are running hbase in standalone mode or
+distributed. See the hbase reference guide for requirements and
+guidance making configuration.
+
+This file does not contain all possible configurations. The file would be
+much larger if it carried everything. The absent configurations will only be
+found through source code reading. The idea is that such configurations are
+exotic and only those who would go to the trouble of reading a particular
+section in the code would be knowledgeable or invested enough in ever wanting
+to alter such configurations, so we do not list them here. Listing all
+possible configurations would overwhelm and obscure the important.
+-->
+
+<configuration>
+ <!--Configs you will likely change are listed here at the top of the
file.
+ -->
+ <property >
+ <name>hbase.tmp.dir</name>
+ <value>${java.io.tmpdir}/hbase-${user.name}</value>
+ <description>Temporary directory on the local filesystem.
+ Change this setting to point to a location more
permanent
+ than '/tmp', the usual resolve for java.io.tmpdir, as
the
+ '/tmp' directory is cleared on machine
restart.</description>
+ </property>
+ <property >
+ <name>hbase.rootdir</name>
+ <value>${hbase.tmp.dir}/hbase</value>
+ <description>The directory shared by region servers and into
+ which HBase persists. The URL should be
'fully-qualified'
+ to include the filesystem scheme. For example, to
specify the
+ HDFS directory '/hbase' where the HDFS instance's
namenode is
+ running at namenode.example.org on port 9000, set this
value to:
+ hdfs://namenode.example.org:9000/hbase. By default, we
write
+ to whatever ${hbase.tmp.dir} is set too -- usually /tmp
--
+ so change this configuration or else all data will be
lost on
+ machine restart.</description>
+ </property>
+ <property >
+ <name>hbase.cluster.distributed</name>
+ <value>false</value>
+ <description>The mode the cluster will be in. Possible values
are
+ false for standalone mode and true for distributed
mode. If
+ false, startup will run all HBase and ZooKeeper daemons
together
+ in the one JVM.</description>
+ </property>
+ <property>
+ <name>hbase.zookeeper.quorum</name>
+ <value>localhost</value>
+ <description>Comma separated list of servers in the ZooKeeper
ensemble
+ (This config. should have been named
hbase.zookeeper.ensemble).
+ For example,
"host1.mydomain.com,host2.mydomain.com,host3.mydomain.com".
+ By default this is set to localhost for local and
pseudo-distributed modes
+ of operation. For a fully-distributed setup, this
should be set to a full
+ list of ZooKeeper ensemble servers. If HBASE_MANAGES_ZK
is set in hbase-env.sh
+ this is the list of servers which hbase will start/stop
ZooKeeper on as
+ part of cluster start/stop. Client-side, we will take
this list of
+ ensemble members and put it together with the
hbase.zookeeper.property.clientPort
+ config. and pass it into zookeeper constructor as the
connectString
+ parameter.</description>
+ </property>
+ <!--The above are the important configurations for getting hbase up
+ and running -->
+
+ <property>
+ <name>zookeeper.recovery.retry.maxsleeptime</name>
+ <value>60000</value>
+ <description>Max sleep time before retry zookeeper operations
in milliseconds,
+ a max time is needed here so that sleep time won't grow
unboundedly
+ </description>
+ </property>
+ <property>
+ <name>hbase.local.dir</name>
+ <value>${hbase.tmp.dir}/local/</value>
+ <description>Directory on the local filesystem to be used
+ as a local storage.</description>
+ </property>
+
+ <!--Master configurations-->
+ <property >
+ <name>hbase.master.port</name>
+ <value>16000</value>
+ <description>The port the HBase Master should bind
to.</description>
+ </property>
+ <property>
+ <name>hbase.master.info.port</name>
+ <value>16010</value>
+ <description>The port for the HBase Master web UI.
+ Set to -1 if you do not want a UI instance
run.</description>
+ </property>
+ <property>
+ <name>hbase.master.info.bindAddress</name>
+ <value>0.0.0.0</value>
+ <description>The bind address for the HBase Master web UI
+ </description>
+ </property>
+ <property>
+ <name>hbase.master.logcleaner.ttl</name>
+ <value>600000</value>
+ <description>How long a WAL remain in the archive
({hbase.rootdir}/oldWALs) directory,
+ after which it will be cleaned by a Master thread. The
value is in milliseconds.</description>
+ </property>
+ <property>
+ <name>hbase.master.procedurewalcleaner.ttl</name>
+ <value>604800000</value>
+ <description>How long a Procedure WAL will remain in the
+ archive directory, after which it will be cleaned
+ by a Master thread. The value is in
milliseconds.</description>
+ </property>
+ <property>
+ <name>hbase.master.infoserver.redirect</name>
+ <value>true</value>
+ <description>Whether or not the Master listens to the Master web
+ UI port (hbase.master.info.port) and redirects requests
to the web
+ UI server shared by the Master and RegionServer.
Config. makes
+ sense when Master is serving Regions (not the
default).</description>
+ </property>
+ <property>
+ <name>hbase.master.fileSplitTimeout</name>
+ <value>600000</value>
+ <description>Splitting a region, how long to wait on the
file-splitting
+ step before aborting the attempt. Default: 600000. This
setting used
+ to be known as hbase.regionserver.fileSplitTimeout in
hbase-1.x.
+ Split is now run master-side hence the rename (If a
+ 'hbase.master.fileSplitTimeout' setting found, will use
it to
+ prime the current 'hbase.master.fileSplitTimeout'
+ Configuration.</description>
+ </property>
+
+ <!--RegionServer configurations-->
+ <property>
+ <name>hbase.regionserver.port</name>
+ <value>16020</value>
+ <description>The port the HBase RegionServer binds
to.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.info.port</name>
+ <value>16030</value>
+ <description>The port for the HBase RegionServer web UI
+ Set to -1 if you do not want the RegionServer UI to
run.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.info.bindAddress</name>
+ <value>0.0.0.0</value>
+ <description>The address for the HBase RegionServer web
UI</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.info.port.auto</name>
+ <value>false</value>
+ <description>Whether or not the Master or RegionServer
+ UI should search for a port to bind to. Enables
automatic port
+ search if hbase.regionserver.info.port is already in
use.
+ Useful for testing, turned off by default.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.handler.count</name>
+ <value>30</value>
+ <description>Count of RPC Listener instances spun up on
RegionServers.
+ Same property is used by the Master for count of master
handlers.
+ Too many handlers can be counter-productive. Make it a
multiple of
+ CPU count. If mostly read-only, handlers count close to
cpu count
+ does well. Start with twice the CPU count and tune from
there.</description>
+ </property>
+ <property>
+ <name>hbase.ipc.server.callqueue.handler.factor</name>
+ <value>0.1</value>
+ <description>Factor to determine the number of call queues.
+ A value of 0 means a single queue shared between all
the handlers.
+ A value of 1 means that each handler has its own
queue.</description>
+ </property>
+ <property>
+ <name>hbase.ipc.server.callqueue.read.ratio</name>
+ <value>0</value>
+ <description>Split the call queues into read and write queues.
+ The specified interval (which should be between 0.0 and
1.0)
+ will be multiplied by the number of call queues.
+ A value of 0 indicate to not split the call queues,
meaning that both read and write
+ requests will be pushed to the same set of queues.
+ A value lower than 0.5 means that there will be less
read queues than write queues.
+ A value of 0.5 means there will be the same number of
read and write queues.
+ A value greater than 0.5 means that there will be more
read queues than write queues.
+ A value of 1.0 means that all the queues except one are
used to dispatch read requests.
+
+ Example: Given the total number of call queues being 10
+ a read.ratio of 0 means that: the 10 queues will
contain both read/write requests.
+ a read.ratio of 0.3 means that: 3 queues will contain
only read requests
+ and 7 queues will contain only write requests.
+ a read.ratio of 0.5 means that: 5 queues will contain
only read requests
+ and 5 queues will contain only write requests.
+ a read.ratio of 0.8 means that: 8 queues will contain
only read requests
+ and 2 queues will contain only write requests.
+ a read.ratio of 1 means that: 9 queues will contain
only read requests
+ and 1 queues will contain only write requests.
+ </description>
+ </property>
+ <property>
+ <name>hbase.ipc.server.callqueue.scan.ratio</name>
+ <value>0</value>
+ <description>Given the number of read call queues, calculated
from the total number
+ of call queues multiplied by the callqueue.read.ratio,
the scan.ratio property
+ will split the read call queues into small-read and
long-read queues.
+ A value lower than 0.5 means that there will be less
long-read queues than short-read queues.
+ A value of 0.5 means that there will be the same number
of short-read and long-read queues.
+ A value greater than 0.5 means that there will be more
long-read queues than short-read queues
+ A value of 0 or 1 indicate to use the same set of
queues for gets and scans.
+
+ Example: Given the total number of read call queues
being 8
+ a scan.ratio of 0 or 1 means that: 8 queues will
contain both long and short read requests.
+ a scan.ratio of 0.3 means that: 2 queues will contain
only long-read requests
+ and 6 queues will contain only short-read requests.
+ a scan.ratio of 0.5 means that: 4 queues will contain
only long-read requests
+ and 4 queues will contain only short-read requests.
+ a scan.ratio of 0.8 means that: 6 queues will contain
only long-read requests
+ and 2 queues will contain only short-read requests.
+ </description>
+ </property>
+ <property>
+ <name>hbase.regionserver.msginterval</name>
+ <value>3000</value>
+ <description>Interval between messages from the RegionServer to
Master
+ in milliseconds.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.logroll.period</name>
+ <value>3600000</value>
+ <description>Period at which we will roll the commit log
regardless
+ of how many edits it has.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.logroll.errors.tolerated</name>
+ <value>2</value>
+ <description>The number of consecutive WAL close errors we will
allow
+ before triggering a server abort. A setting of 0 will
cause the
+ region server to abort if closing the current WAL
writer fails during
+ log rolling. Even a small value (2 or 3) will allow a
region server
+ to ride over transient HDFS errors.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.global.memstore.size</name>
+ <value></value>
+ <description>Maximum size of all memstores in a region server
before new
+ updates are blocked and flushes are forced. Defaults to
40% of heap (0.4).
+ Updates are blocked and flushes are forced until size
of all memstores
+ in a region server hits
hbase.regionserver.global.memstore.size.lower.limit.
+ The default value in this configuration has been
intentionally left empty in order to
+ honor the old
hbase.regionserver.global.memstore.upperLimit property if present.
+ </description>
+ </property>
+ <property>
+ <name>hbase.regionserver.global.memstore.size.lower.limit</name>
+ <value></value>
+ <description>Maximum size of all memstores in a region server
before flushes
+ are forced. Defaults to 95% of
hbase.regionserver.global.memstore.size
+ (0.95). A 100% value for this value causes the minimum
possible flushing
+ to occur when updates are blocked due to memstore
limiting. The default
+ value in this configuration has been intentionally left
empty in order to
+ honor the old
hbase.regionserver.global.memstore.lowerLimit property if
+ present.
+ </description>
+ </property>
+ <property>
+ <name>hbase.systemtables.compacting.memstore.type</name>
+ <value>NONE</value>
+ <description>Determines the type of memstore to be used for
system tables like
+ META, namespace tables etc. By default NONE is the type
and hence we use the
+ default memstore for all the system tables. If we need
to use compacting
+ memstore for system tables then set this property to
BASIC/EAGER
+ </description>
+ </property>
+ <property>
+ <name>hbase.regionserver.optionalcacheflushinterval</name>
+ <value>3600000</value>
+ <description>
+ Maximum amount of time an edit lives in memory before
being automatically flushed.
+ Default 1 hour. Set it to 0 to disable automatic
flushing.
+ </description>
+ </property>
+ <property>
+ <name>hbase.regionserver.dns.interface</name>
+ <value>default</value>
+ <description>The name of the Network Interface from which a
region server
+ should report its IP address.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.dns.nameserver</name>
+ <value>default</value>
+ <description>The host name or IP address of the name server
(DNS)
+ which a region server should use to determine the host
name used by the
+ master for communication and display
purposes.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.regionSplitLimit</name>
+ <value>1000</value>
+ <description>
+ Limit for the number of regions after which no more
region splitting
+ should take place. This is not hard limit for the
number of regions
+ but acts as a guideline for the regionserver to stop
splitting after
+ a certain limit. Default is set to 1000.
+ </description>
+ </property>
+
+ <!--ZooKeeper configuration-->
+ <property>
+ <name>zookeeper.session.timeout</name>
+ <value>90000</value>
+ <description>ZooKeeper session timeout in milliseconds. It is
used in two different ways.
+ First, this value is used in the ZK client that HBase
uses to connect to the ensemble.
+ It is also used by HBase when it starts a ZK server and
it is passed as the 'maxSessionTimeout'.
+ See
https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#ch_zkSessions.
+ For example, if an HBase region server connects to a ZK
ensemble that's also managed
+ by HBase, then the session timeout will be the one
specified by this configuration.
+ But, a region server that connects to an ensemble
managed with a different configuration
+ will be subjected that ensemble's maxSessionTimeout.
So, even though HBase might propose
+ using 90 seconds, the ensemble can have a max timeout
lower than this and it will take
+ precedence. The current default maxSessionTimeout that
ZK ships with is 40 seconds, which is lower than
+ HBase's.
+ </description>
+ </property>
+ <property>
+ <name>zookeeper.znode.parent</name>
+ <value>/hbase</value>
+ <description>Root ZNode for HBase in ZooKeeper. All of HBase's
ZooKeeper
+ files that are configured with a relative path will go
under this node.
+ By default, all of HBase's ZooKeeper file paths are
configured with a
+ relative path, so they will all go under this directory
unless changed.
+ </description>
+ </property>
+ <property>
+ <name>zookeeper.znode.acl.parent</name>
+ <value>acl</value>
+ <description>Root ZNode for access control lists.</description>
+ </property>
+ <property>
+ <name>hbase.zookeeper.dns.interface</name>
+ <value>default</value>
+ <description>The name of the Network Interface from which a
ZooKeeper server
+ should report its IP address.</description>
+ </property>
+ <property>
+ <name>hbase.zookeeper.dns.nameserver</name>
+ <value>default</value>
+ <description>The host name or IP address of the name server
(DNS)
+ which a ZooKeeper server should use to determine the
host name used by the
+ master for communication and display
purposes.</description>
+ </property>
+ <!--
+ The following three properties are used together to create the list of
+ host:peer_port:leader_port quorum servers for ZooKeeper.
+ -->
+ <property>
+ <name>hbase.zookeeper.peerport</name>
+ <value>2888</value>
+ <description>Port used by ZooKeeper peers to talk to each other.
+ See
https://zookeeper.apache.org/doc/r3.3.3/zookeeperStarted.html#sc_RunningReplicatedZooKeeper
+ for more information.</description>
+ </property>
+ <property>
+ <name>hbase.zookeeper.leaderport</name>
+ <value>3888</value>
+ <description>Port used by ZooKeeper for leader election.
+ See
https://zookeeper.apache.org/doc/r3.3.3/zookeeperStarted.html#sc_RunningReplicatedZooKeeper
+ for more information.</description>
+ </property>
+ <!-- End of properties used to generate ZooKeeper host:port quorum
list. -->
+
+ <!--
+ Beginning of properties that are directly mapped from ZooKeeper's zoo.cfg.
+ All properties with an "hbase.zookeeper.property." prefix are converted for
+ ZooKeeper's configuration. Hence, if you want to add an option from
zoo.cfg,
+ e.g. "initLimit=10" you would append the following to your configuration:
+ <property>
+ <name>hbase.zookeeper.property.initLimit</name>
+ <value>10</value>
+ </property>
+ -->
+ <property>
+ <name>hbase.zookeeper.property.initLimit</name>
+ <value>10</value>
+ <description>Property from ZooKeeper's config zoo.cfg.
+ The number of ticks that the initial synchronization
phase can take.</description>
+ </property>
+ <property>
+ <name>hbase.zookeeper.property.syncLimit</name>
+ <value>5</value>
+ <description>Property from ZooKeeper's config zoo.cfg.
+ The number of ticks that can pass between sending a
request and getting an
+ acknowledgment.</description>
+ </property>
+ <property>
+ <name>hbase.zookeeper.property.dataDir</name>
+ <value>${hbase.tmp.dir}/zookeeper</value>
+ <description>Property from ZooKeeper's config zoo.cfg.
+ The directory where the snapshot is
stored.</description>
+ </property>
+ <property>
+ <name>hbase.zookeeper.property.clientPort</name>
+ <value>2181</value>
+ <description>Property from ZooKeeper's config zoo.cfg.
+ The port at which the clients will
connect.</description>
+ </property>
+ <property>
+ <name>hbase.zookeeper.property.maxClientCnxns</name>
+ <value>300</value>
+ <description>Property from ZooKeeper's config zoo.cfg.
+ Limit on number of concurrent connections (at the
socket level) that a
+ single client, identified by IP address, may make to a
single member of
+ the ZooKeeper ensemble. Set high to avoid zk connection
issues running
+ standalone and pseudo-distributed.</description>
+ </property>
+ <!-- End of properties that are directly mapped from ZooKeeper's
zoo.cfg -->
+
+ <!--Client configurations-->
+ <property>
+ <name>hbase.client.write.buffer</name>
+ <value>2097152</value>
+ <description>Default size of the BufferedMutator write buffer
in bytes.
+ A bigger buffer takes more memory -- on both the client
and server
+ side since server instantiates the passed write buffer
to process
+ it -- but a larger buffer size reduces the number of
RPCs made.
+ For an estimate of server-side memory-used, evaluate
+ hbase.client.write.buffer *
hbase.regionserver.handler.count</description>
+ </property>
+ <property>
+ <name>hbase.client.pause</name>
+ <value>100</value>
+ <description>General client pause value. Used mostly as value
to wait
+ before running a retry of a failed get, region lookup,
etc.
+ See hbase.client.retries.number for description of how
we backoff from
+ this initial pause amount and how this pause works w/
retries.</description>
+ </property>
+ <property>
+ <name>hbase.client.pause.cqtbe</name>
+ <value></value>
+ <description>Whether or not to use a special client pause for
+ CallQueueTooBigException (cqtbe). Set this property to
a higher value
+ than hbase.client.pause if you observe frequent CQTBE
from the same
+ RegionServer and the call queue there keeps
full</description>
+ </property>
+ <property>
+ <name>hbase.client.retries.number</name>
+ <value>15</value>
+ <description>Maximum retries. Used as maximum for all retryable
+ operations such as the getting of a cell's value,
starting a row update,
+ etc. Retry interval is a rough function based on
hbase.client.pause. At
+ first we retry at this interval but then with backoff,
we pretty quickly reach
+ retrying every ten seconds. See
HConstants#RETRY_BACKOFF for how the backup
+ ramps up. Change this setting and hbase.client.pause
to suit your workload.</description>
+ </property>
+ <property>
+ <name>hbase.client.max.total.tasks</name>
+ <value>100</value>
+ <description>The maximum number of concurrent mutation tasks a
single HTable instance will
+ send to the cluster.</description>
+ </property>
+ <property>
+ <name>hbase.client.max.perserver.tasks</name>
+ <value>2</value>
+ <description>The maximum number of concurrent mutation tasks a
single HTable instance will
+ send to a single region server.</description>
+ </property>
+ <property>
+ <name>hbase.client.max.perregion.tasks</name>
+ <value>1</value>
+ <description>The maximum number of concurrent mutation tasks
the client will
+ maintain to a single Region. That is, if there is
already
+ hbase.client.max.perregion.tasks writes in progress for
this region, new puts
+ won't be sent to this region until some writes
finishes.</description>
+ </property>
+ <property>
+ <name>hbase.client.perserver.requests.threshold</name>
+ <value>2147483647</value>
+ <description>The max number of concurrent pending requests for
one server in all client threads
+ (process level). Exceeding requests will be thrown
ServerTooBusyException immediately to prevent
+ user's threads being occupied and blocked by only one
slow region server. If you use a fix
+ number of threads to access HBase in a synchronous way,
set this to a suitable value which is
+ related to the number of threads will help you. See
+ https://issues.apache.org/jira/browse/HBASE-16388 for
details.</description>
+ </property>
+ <property>
+ <name>hbase.client.scanner.caching</name>
+ <value>2147483647</value>
+ <description>Number of rows that we try to fetch when calling
next
+ on a scanner if it is not served from (local, client)
memory. This configuration
+ works together with
hbase.client.scanner.max.result.size to try and use the
+ network efficiently. The default value is
Integer.MAX_VALUE by default so that
+ the network will fill the chunk size defined by
hbase.client.scanner.max.result.size
+ rather than be limited by a particular number of rows
since the size of rows varies
+ table to table. If you know ahead of time that you will
not require more than a certain
+ number of rows from a scan, this configuration should
be set to that row limit via
+ Scan#setCaching. Higher caching values will enable
faster scanners but will eat up more
+ memory and some calls of next may take longer and
longer times when the cache is empty.
+ Do not set this value such that the time between
invocations is greater than the scanner
+ timeout; i.e.
hbase.client.scanner.timeout.period</description>
+ </property>
+ <property>
+ <name>hbase.client.keyvalue.maxsize</name>
+ <value>10485760</value>
+ <description>Specifies the combined maximum allowed size of a
KeyValue
+ instance. This is to set an upper boundary for a single
entry saved in a
+ storage file. Since they cannot be split it helps
avoiding that a region
+ cannot be split any further because the data is too
large. It seems wise
+ to set this to a fraction of the maximum region size.
Setting it to zero
+ or less disables the check.</description>
+ </property>
+ <property>
+ <name>hbase.server.keyvalue.maxsize</name>
+ <value>10485760</value>
+ <description>Maximum allowed size of an individual cell,
inclusive of value and all key
+ components. A value of 0 or less disables the check.
+ The default value is 10MB.
+ This is a safety setting to protect the server from OOM
situations.
+ </description>
+ </property>
+ <property>
+ <name>hbase.client.scanner.timeout.period</name>
+ <value>60000</value>
+ <description>Client scanner lease period in
milliseconds.</description>
+ </property>
+ <property>
+ <name>hbase.client.localityCheck.threadPoolSize</name>
+ <value>2</value>
+ </property>
+
+ <!--Miscellaneous configuration-->
+ <property>
+ <name>hbase.bulkload.retries.number</name>
+ <value>10</value>
+ <description>Maximum retries. This is maximum number of
iterations
+ to atomic bulk loads are attempted in the face of
splitting operations
+ 0 means never give up.</description>
+ </property>
+ <property>
+ <name>hbase.master.balancer.maxRitPercent</name>
+ <value>1.0</value>
+ <description>The max percent of regions in transition when
balancing.
+ The default value is 1.0. So there are no balancer
throttling. If set this config to 0.01,
+ It means that there are at most 1% regions in
transition when balancing.
+ Then the cluster's availability is at least 99% when
balancing.</description>
+ </property>
+ <property>
+ <name>hbase.balancer.period
+ </name>
+ <value>300000</value>
+ <description>Period at which the region balancer runs in the
Master.</description>
+ </property>
+ <property>
+ <name>hbase.normalizer.period</name>
+ <value>300000</value>
+ <description>Period at which the region normalizer runs in the
Master.</description>
+ </property>
+ <property>
+ <name>hbase.regions.slop</name>
+ <value>0.001</value>
+ <description>Rebalance if any regionserver has average +
(average * slop) regions.
+ The default value of this parameter is 0.001 in
StochasticLoadBalancer (the default load balancer),
+ while the default is 0.2 in other load balancers (i.e.,
SimpleLoadBalancer).</description>
+ </property>
+ <property>
+ <name>hbase.server.thread.wakefrequency</name>
+ <value>10000</value>
+ <description>Time to sleep in between searches for work (in
milliseconds).
+ Used as sleep interval by service threads such as log
roller.</description>
+ </property>
+ <property>
+ <name>hbase.server.versionfile.writeattempts</name>
+ <value>3</value>
+ <description>
+ How many times to retry attempting to write a version
file
+ before just aborting. Each attempt is separated by the
+ hbase.server.thread.wakefrequency
milliseconds.</description>
+ </property>
+ <property>
+ <name>hbase.hregion.memstore.flush.size</name>
+ <value>134217728</value>
+ <description>
+ Memstore will be flushed to disk if size of the memstore
+ exceeds this number of bytes. Value is checked by a
thread that runs
+ every hbase.server.thread.wakefrequency.</description>
+ </property>
+ <property>
+
<name>hbase.hregion.percolumnfamilyflush.size.lower.bound.min</name>
+ <value>16777216</value>
+ <description>
+ If FlushLargeStoresPolicy is used and there are
multiple column families,
+ then every time that we hit the total memstore limit,
we find out all the
+ column families whose memstores exceed a "lower bound"
and only flush them
+ while retaining the others in memory. The "lower bound"
will be
+ "hbase.hregion.memstore.flush.size /
column_family_number" by default
+ unless value of this property is larger than that. If
none of the families
+ have their memstore size more than lower bound, all the
memstores will be
+ flushed (just as usual).
+ </description>
+ </property>
+ <property>
+ <name>hbase.hregion.preclose.flush.size</name>
+ <value>5242880</value>
+ <description>
+ If the memstores in a region are this size or larger
when we go
+ to close, run a "pre-flush" to clear out memstores
before we put up
+ the region closed flag and take the region offline. On
close,
+ a flush is run under the close flag to empty memory.
During
+ this time the region is offline and we are not taking
on any writes.
+ If the memstore content is large, this flush could take
a long time to
+ complete. The preflush is meant to clean out the bulk
of the memstore
+ before putting up the close flag and taking the region
offline so the
+ flush that runs under the close flag has little to
do.</description>
+ </property>
+ <property>
+ <name>hbase.hregion.memstore.block.multiplier</name>
+ <value>4</value>
+ <description>
+ Block updates if memstore has
hbase.hregion.memstore.block.multiplier
+ times hbase.hregion.memstore.flush.size bytes. Useful
preventing
+ runaway memstore during spikes in update traffic.
Without an
+ upper-bound, memstore fills such that when it flushes
the
+ resultant flush files take a long time to compact or
split, or
+ worse, we OOME.</description>
+ </property>
+ <property>
+ <name>hbase.hregion.memstore.mslab.enabled</name>
+ <value>true</value>
+ <description>
+ Enables the MemStore-Local Allocation Buffer,
+ a feature which works to prevent heap fragmentation
under
+ heavy write loads. This can reduce the frequency of
stop-the-world
+ GC pauses on large heaps.</description>
+ </property>
+ <property>
+ <name>hbase.hregion.max.filesize</name>
+ <value>10737418240</value>
+ <description>
+ Maximum HFile size. If the sum of the sizes of a
region's HFiles has grown to exceed this
+ value, the region is split in two.</description>
+ </property>
+ <property>
+ <name>hbase.hregion.majorcompaction</name>
+ <value>604800000</value>
+ <description>Time between major compactions, expressed in
milliseconds. Set to 0 to disable
+ time-based automatic major compactions. User-requested
and size-based major compactions will
+ still run. This value is multiplied by
hbase.hregion.majorcompaction.jitter to cause
+ compaction to start at a somewhat-random time during a
given window of time. The default value
+ is 7 days, expressed in milliseconds. If major
compactions are causing disruption in your
+ environment, you can configure them to run at off-peak
times for your deployment, or disable
+ time-based major compactions by setting this parameter
to 0, and run major compactions in a
+ cron job or by another external mechanism.</description>
+ </property>
+ <property>
+ <name>hbase.hregion.majorcompaction.jitter</name>
+ <value>0.50</value>
+ <description>A multiplier applied to
hbase.hregion.majorcompaction to cause compaction to occur
+ a given amount of time either side of
hbase.hregion.majorcompaction. The smaller the number,
+ the closer the compactions will happen to the
hbase.hregion.majorcompaction
+ interval.</description>
+ </property>
+ <property>
+ <name>hbase.hstore.compactionThreshold</name>
+ <value>3</value>
+ <description> If more than this number of StoreFiles exist in
any one Store
+ (one StoreFile is written per flush of MemStore), a
compaction is run to rewrite all
+ StoreFiles into a single StoreFile. Larger values delay
compaction, but when compaction does
+ occur, it takes longer to complete.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.compaction.enabled</name>
+ <value>true</value>
+ <description>Enable/disable compactions on by setting
true/false.
+ We can further switch compactions dynamically with the
+ compaction_switch shell command.</description>
+ </property>
+ <property>
+ <name>hbase.hstore.flusher.count</name>
+ <value>2</value>
+ <description> The number of flush threads. With fewer threads,
the MemStore flushes will be
+ queued. With more threads, the flushes will be executed
in parallel, increasing the load on
+ HDFS, and potentially causing more compactions.
</description>
+ </property>
+ <property>
+ <name>hbase.hstore.blockingStoreFiles</name>
+ <value>16</value>
+ <description> If more than this number of StoreFiles exist in
any one Store (one StoreFile
+ is written per flush of MemStore), updates are blocked
for this region until a compaction is
+ completed, or until hbase.hstore.blockingWaitTime has
been exceeded.</description>
+ </property>
+ <property>
+ <name>hbase.hstore.blockingWaitTime</name>
+ <value>90000</value>
+ <description> The time for which a region will block updates
after reaching the StoreFile limit
+ defined by hbase.hstore.blockingStoreFiles. After this
time has elapsed, the region will stop
+ blocking updates even if a compaction has not been
completed.</description>
+ </property>
+ <property>
+ <name>hbase.hstore.compaction.min</name>
+ <value>3</value>
+ <description>The minimum number of StoreFiles which must be
eligible for compaction before
+ compaction can run. The goal of tuning
hbase.hstore.compaction.min is to avoid ending up with
+ too many tiny StoreFiles to compact. Setting this value
to 2 would cause a minor compaction
+ each time you have two StoreFiles in a Store, and this
is probably not appropriate. If you
+ set this value too high, all the other values will need
to be adjusted accordingly. For most
+ cases, the default value is appropriate. In previous
versions of HBase, the parameter
+ hbase.hstore.compaction.min was named
hbase.hstore.compactionThreshold.</description>
+ </property>
+ <property>
+ <name>hbase.hstore.compaction.max</name>
+ <value>10</value>
+ <description>The maximum number of StoreFiles which will be
selected for a single minor
+ compaction, regardless of the number of eligible
StoreFiles. Effectively, the value of
+ hbase.hstore.compaction.max controls the length of time
it takes a single compaction to
+ complete. Setting it larger means that more StoreFiles
are included in a compaction. For most
+ cases, the default value is appropriate.</description>
+ </property>
+ <property>
+ <name>hbase.hstore.compaction.min.size</name>
+ <value>134217728</value>
+ <description>A StoreFile (or a selection of StoreFiles, when
using ExploringCompactionPolicy)
+ smaller than this size will always be eligible for
minor compaction.
+ HFiles this size or larger are evaluated by
hbase.hstore.compaction.ratio to determine if
+ they are eligible. Because this limit represents the
"automatic include" limit for all
+ StoreFiles smaller than this value, this value may need
to be reduced in write-heavy
+ environments where many StoreFiles in the 1-2 MB range
are being flushed, because every
+ StoreFile will be targeted for compaction and the
resulting StoreFiles may still be under the
+ minimum size and require further compaction. If this
parameter is lowered, the ratio check is
+ triggered more quickly. This addressed some issues seen
in earlier versions of HBase but
+ changing this parameter is no longer necessary in most
situations. Default: 128 MB expressed
+ in bytes.</description>
+ </property>
+ <property>
+ <name>hbase.hstore.compaction.max.size</name>
+ <value>9223372036854775807</value>
+ <description>A StoreFile (or a selection of StoreFiles, when
using ExploringCompactionPolicy)
+ larger than this size will be excluded from compaction.
The effect of
+ raising hbase.hstore.compaction.max.size is fewer,
larger StoreFiles that do not get
+ compacted often. If you feel that compaction is
happening too often without much benefit, you
+ can try raising this value. Default: the value of
LONG.MAX_VALUE, expressed in bytes.</description>
+ </property>
+ <property>
+ <name>hbase.hstore.compaction.ratio</name>
+ <value>1.2F</value>
+ <description>For minor compaction, this ratio is used to
determine whether a given StoreFile
+ which is larger than hbase.hstore.compaction.min.size
is eligible for compaction. Its
+ effect is to limit compaction of large StoreFiles. The
value of hbase.hstore.compaction.ratio
+ is expressed as a floating-point decimal. A large
ratio, such as 10, will produce a single
+ giant StoreFile. Conversely, a low value, such as .25,
will produce behavior similar to the
+ BigTable compaction algorithm, producing four
StoreFiles. A moderate value of between 1.0 and
+ 1.4 is recommended. When tuning this value, you are
balancing write costs with read costs.
+ Raising the value (to something like 1.4) will have
more write costs, because you will
+ compact larger StoreFiles. However, during reads, HBase
will need to seek through fewer
+ StoreFiles to accomplish the read. Consider this
approach if you cannot take advantage of
+ Bloom filters. Otherwise, you can lower this value to
something like 1.0 to reduce the
+ background cost of writes, and use Bloom filters to
control the number of StoreFiles touched
+ during reads. For most cases, the default value is
appropriate.</description>
+ </property>
+ <property>
+ <name>hbase.hstore.compaction.ratio.offpeak</name>
+ <value>5.0F</value>
+ <description>Allows you to set a different (by default, more
aggressive) ratio for determining
+ whether larger StoreFiles are included in compactions
during off-peak hours. Works in the
+ same way as hbase.hstore.compaction.ratio. Only applies
if hbase.offpeak.start.hour and
+ hbase.offpeak.end.hour are also enabled.</description>
+ </property>
+ <property>
+ <name>hbase.hstore.time.to.purge.deletes</name>
+ <value>0</value>
+ <description>The amount of time to delay purging of delete
markers with future timestamps. If
+ unset, or set to 0, all delete markers, including those
with future timestamps, are purged
+ during the next major compaction. Otherwise, a delete
marker is kept until the major compaction
+ which occurs after the marker's timestamp plus the
value of this setting, in milliseconds.
+ </description>
+ </property>
+ <property>
+ <name>hbase.offpeak.start.hour</name>
+ <value>-1</value>
+ <description>The start of off-peak hours, expressed as an
integer between 0 and 23, inclusive.
+ Set to -1 to disable off-peak.</description>
+ </property>
+ <property>
+ <name>hbase.offpeak.end.hour</name>
+ <value>-1</value>
+ <description>The end of off-peak hours, expressed as an integer
between 0 and 23, inclusive. Set
+ to -1 to disable off-peak.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.thread.compaction.throttle</name>
+ <value>2684354560</value>
+ <description>There are two different thread pools for
compactions, one for large compactions and
+ the other for small compactions. This helps to keep
compaction of lean tables (such as
+ hbase:meta) fast. If a compaction is larger than this
threshold, it
+ goes into the large compaction pool. In most cases, the
default value is appropriate. Default:
+ 2 x hbase.hstore.compaction.max x
hbase.hregion.memstore.flush.size (which defaults to 128MB).
+ The value field assumes that the value of
hbase.hregion.memstore.flush.size is unchanged from
+ the default.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.majorcompaction.pagecache.drop</name>
+ <value>true</value>
+ <description>Specifies whether to drop pages read/written into
the system page cache by
+ major compactions. Setting it to true helps prevent
major compactions from
+ polluting the page cache, which is almost always
required, especially for clusters
+ with low/moderate memory to storage ratio.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.minorcompaction.pagecache.drop</name>
+ <value>true</value>
+ <description>Specifies whether to drop pages read/written into
the system page cache by
+ minor compactions. Setting it to true helps prevent
minor compactions from
+ polluting the page cache, which is most beneficial on
clusters with low
+ memory to storage ratio or very write heavy clusters.
You may want to set it to
+ false under moderate to low write workload when bulk of
the reads are
+ on the most recently written data.</description>
+ </property>
+ <property>
+ <name>hbase.hstore.compaction.kv.max</name>
+ <value>10</value>
+ <description>The maximum number of KeyValues to read and then
write in a batch when flushing or
+ compacting. Set this lower if you have big KeyValues
and problems with Out Of Memory
+ Exceptions Set this higher if you have wide, small
rows. </description>
+ </property>
+ <property>
+ <name>hbase.storescanner.parallel.seek.enable</name>
+ <value>false</value>
+ <description>
+ Enables StoreFileScanner parallel-seeking in
StoreScanner,
+ a feature which can reduce response latency under
special conditions.</description>
+ </property>
+ <property>
+ <name>hbase.storescanner.parallel.seek.threads</name>
+ <value>10</value>
+ <description>
+ The default thread pool size if parallel-seeking
feature enabled.</description>
+ </property>
+ <property>
+ <name>hfile.block.cache.size</name>
+ <value>0.4</value>
+ <description>Percentage of maximum heap (-Xmx setting) to
allocate to block cache
+ used by a StoreFile. Default of 0.4 means allocate 40%.
+ Set to 0 to disable but it's not recommended; you need
at least
+ enough cache to hold the storefile
indices.</description>
+ </property>
+ <property>
+ <name>hfile.block.index.cacheonwrite</name>
+ <value>false</value>
+ <description>This allows to put non-root multi-level index
blocks into the block
+ cache at the time the index is being
written.</description>
+ </property>
+ <property>
+ <name>hfile.index.block.max.size</name>
+ <value>131072</value>
+ <description>When the size of a leaf-level, intermediate-level,
or root-level
+ index block in a multi-level block index grows to this
size, the
+ block is written out and a new block is
started.</description>
+ </property>
+ <property>
+ <name>hbase.bucketcache.ioengine</name>
+ <value></value>
+ <description>Where to store the contents of the bucketcache.
One of: offheap,
+ file, files or mmap. If a file or files, set it to
file(s):PATH_TO_FILE.
+ mmap means the content will be in an mmaped file. Use
mmap:PATH_TO_FILE.
+ See
http://hbase.apache.org/book.html#offheap.blockcache for more information.
+ </description>
+ </property>
+ <property>
+ <name>hbase.bucketcache.size</name>
+ <value></value>
+ <description>A float that EITHER represents a percentage of
total heap memory
+ size to give to the cache (if < 1.0) OR, it is the
total capacity in
+ megabytes of BucketCache. Default: 0.0</description>
+ </property>
+ <property>
+ <name>hbase.bucketcache.bucket.sizes</name>
+ <value></value>
+ <description>A comma-separated list of sizes for buckets for
the bucketcache.
+ Can be multiple sizes. List block sizes in order from
smallest to largest.
+ The sizes you use will depend on your data access
patterns.
+ Must be a multiple of 256 else you will run into
+ 'java.io.IOException: Invalid HFile block magic' when
you go to read from cache.
+ If you specify no values here, then you pick up the
default bucketsizes set
+ in code (See BucketAllocator#DEFAULT_BUCKET_SIZES).
+ </description>
+ </property>
+ <property>
+ <name>hfile.format.version</name>
+ <value>3</value>
+ <description>The HFile format version to use for new files.
+ Version 3 adds support for tags in hfiles (See
http://hbase.apache.org/book.html#hbase.tags).
+ Also see the configuration
'hbase.replication.rpc.codec'.
+ </description>
+ </property>
+ <property>
+ <name>hfile.block.bloom.cacheonwrite</name>
+ <value>false</value>
+ <description>Enables cache-on-write for inline blocks of a
compound Bloom filter.</description>
+ </property>
+ <property>
+ <name>io.storefile.bloom.block.size</name>
+ <value>131072</value>
+ <description>The size in bytes of a single block ("chunk") of a
compound Bloom
+ filter. This size is approximate, because Bloom blocks
can only be
+ inserted at data block boundaries, and the number of
keys per data
+ block varies.</description>
+ </property>
+ <property>
+ <name>hbase.rs.cacheblocksonwrite</name>
+ <value>false</value>
+ <description>Whether an HFile block should be added to the
block cache when the
+ block is finished.</description>
+ </property>
+ <property>
+ <name>hbase.rpc.timeout</name>
+ <value>60000</value>
+ <description>This is for the RPC layer to define how long
(millisecond) HBase client applications
+ take for a remote call to time out. It uses pings to
check connections
+ but will eventually throw a
TimeoutException.</description>
+ </property>
+ <property>
+ <name>hbase.client.operation.timeout</name>
+ <value>1200000</value>
+ <description>Operation timeout is a top-level restriction
(millisecond) that makes sure a
+ blocking operation in Table will not be blocked more
than this. In each operation, if rpc
+ request fails because of timeout or other reason, it
will retry until success or throw
+ RetriesExhaustedException. But if the total time being
blocking reach the operation timeout
+ before retries exhausted, it will break early and throw
SocketTimeoutException.</description>
+ </property>
+ <property>
+ <name>hbase.cells.scanned.per.heartbeat.check</name>
+ <value>10000</value>
+ <description>The number of cells scanned in between heartbeat
checks. Heartbeat
+ checks occur during the processing of scans to
determine whether or not the
+ server should stop scanning in order to send back a
heartbeat message to the
+ client. Heartbeat messages are used to keep the
client-server connection alive
+ during long running scans. Small values mean that the
heartbeat checks will
+ occur more often and thus will provide a tighter bound
on the execution time of
+ the scan. Larger values mean that the heartbeat checks
occur less frequently
+ </description>
+ </property>
+ <property>
+ <name>hbase.rpc.shortoperation.timeout</name>
+ <value>10000</value>
+ <description>This is another version of "hbase.rpc.timeout".
For those RPC operation
+ within cluster, we rely on this configuration to set a
short timeout limitation
+ for short operation. For example, short rpc timeout for
region server's trying
+ to report to active master can benefit quicker master
failover process.</description>
+ </property>
+ <property>
+ <name>hbase.ipc.client.tcpnodelay</name>
+ <value>true</value>
+ <description>Set no delay on rpc socket connections. See
+
http://docs.oracle.com/javase/1.5.0/docs/api/java/net/Socket.html#getTcpNoDelay()</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.hostname</name>
+ <value></value>
+ <description>This config is for experts: don't set its value
unless you really know what you are doing.
+ When set to a non-empty value, this represents the
(external facing) hostname for the underlying server.
+ See https://issues.apache.org/jira/browse/HBASE-12954
for details.</description>
+ </property>
+ <property>
+
<name>hbase.regionserver.hostname.disable.master.reversedns</name>
+ <value>false</value>
+ <description>This config is for experts: don't set its value
unless you really know what you are doing.
+ When set to true, regionserver will use the current
node hostname for the servername and HMaster will
+ skip reverse DNS lookup and use the hostname sent by
regionserver instead. Note that this config and
+ hbase.regionserver.hostname are mutually exclusive. See
https://issues.apache.org/jira/browse/HBASE-18226
+ for more details.</description>
+ </property>
+ <!-- The following properties configure authentication information for
+ HBase processes when using Kerberos security. There are no default
+ values, included here for documentation purposes -->
+ <property>
+ <name>hbase.master.keytab.file</name>
+ <value></value>
+ <description>Full path to the kerberos keytab file to use for
logging in
+ the configured HMaster server principal.</description>
+ </property>
+ <property>
+ <name>hbase.master.kerberos.principal</name>
+ <value></value>
+ <description>Ex. "hbase/[email protected]". The kerberos
principal name
+ that should be used to run the HMaster process. The
principal name should
+ be in the form: user/hostname@DOMAIN. If "_HOST" is
used as the hostname
+ portion, it will be replaced with the actual hostname
of the running
+ instance.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.keytab.file</name>
+ <value></value>
+ <description>Full path to the kerberos keytab file to use for
logging in
+ the configured HRegionServer server
principal.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.kerberos.principal</name>
+ <value></value>
+ <description>Ex. "hbase/[email protected]". The kerberos
principal name
+ that should be used to run the HRegionServer process.
The principal name
+ should be in the form: user/hostname@DOMAIN. If
"_HOST" is used as the
+ hostname portion, it will be replaced with the actual
hostname of the
+ running instance. An entry for this principal must
exist in the file
+ specified in
hbase.regionserver.keytab.file</description>
+ </property>
+ <!-- Additional configuration specific to HBase security -->
+ <property>
+ <name>hadoop.policy.file</name>
+ <value>hbase-policy.xml</value>
+ <description>The policy configuration file used by RPC servers
to make
+ authorization decisions on client requests. Only used
when HBase
+ security is enabled.</description>
+ </property>
+ <property>
+ <name>hbase.superuser</name>
+ <value></value>
+ <description>List of users or groups (comma-separated), who are
allowed
+ full privileges, regardless of stored ACLs, across the
cluster.
+ Only used when HBase security is enabled.</description>
+ </property>
+ <property>
+ <name>hbase.auth.key.update.interval</name>
+ <value>86400000</value>
+ <description>The update interval for master key for
authentication tokens
+ in servers in milliseconds. Only used when HBase
security is enabled.</description>
+ </property>
+ <property>
+ <name>hbase.auth.token.max.lifetime</name>
+ <value>604800000</value>
+ <description>The maximum lifetime in milliseconds after which an
+ authentication token expires. Only used when HBase
security is enabled.</description>
+ </property>
+ <property>
+ <name>hbase.ipc.client.fallback-to-simple-auth-allowed</name>
+ <value>false</value>
+ <description>When a client is configured to attempt a secure
connection, but attempts to
+ connect to an insecure server, that server may instruct
the client to
+ switch to SASL SIMPLE (unsecure) authentication. This
setting controls
+ whether or not the client will accept this instruction
from the server.
+ When false (the default), the client will not allow the
fallback to SIMPLE
+ authentication, and will abort the
connection.</description>
+ </property>
+ <property>
+ <name>hbase.ipc.server.fallback-to-simple-auth-allowed</name>
+ <value>false</value>
+ <description>When a server is configured to require secure
connections, it will
+ reject connection attempts from clients using SASL
SIMPLE (unsecure) authentication.
+ This setting allows secure servers to accept SASL
SIMPLE connections from clients
+ when the client requests. When false (the default),
the server will not allow the fallback
+ to SIMPLE authentication, and will reject the
connection. WARNING: This setting should ONLY
+ be used as a temporary measure while converting clients
over to secure authentication. It
+ MUST BE DISABLED for secure operation.</description>
+ </property>
+ <property>
+ <name>hbase.display.keys</name>
+ <value>true</value>
+ <description>When this is set to true the webUI and such will
display all start/end keys
+ as part of the table details, region names, etc. When
this is set to false,
+ the keys are hidden.</description>
+ </property>
+ <property>
+ <name>hbase.coprocessor.enabled</name>
+ <value>true</value>
+ <description>Enables or disables coprocessor loading. If 'false'
+ (disabled), any other coprocessor related configuration
will be ignored.
+ </description>
+ </property>
+ <property>
+ <name>hbase.coprocessor.user.enabled</name>
+ <value>true</value>
+ <description>Enables or disables user (aka. table) coprocessor
loading.
+ If 'false' (disabled), any table coprocessor attributes
in table
+ descriptors will be ignored. If
"hbase.coprocessor.enabled" is 'false'
+ this setting has no effect.
+ </description>
+ </property>
+ <property>
+ <name>hbase.coprocessor.region.classes</name>
+ <value></value>
+ <description>A comma-separated list of Coprocessors that are
loaded by
+ default on all tables. For any override coprocessor
method, these classes
+ will be called in order. After implementing your own
Coprocessor, just put
+ it in HBase's classpath and add the fully qualified
class name here.
+ A coprocessor can also be loaded on demand by setting
HTableDescriptor.</description>
+ </property>
+ <property>
+ <name>hbase.coprocessor.master.classes</name>
+ <value></value>
+ <description>A comma-separated list of
+ org.apache.hadoop.hbase.coprocessor.MasterObserver
coprocessors that are
+ loaded by default on the active HMaster process. For
any implemented
+ coprocessor methods, the listed classes will be called
in order. After
+ implementing your own MasterObserver, just put it in
HBase's classpath
+ and add the fully qualified class name
here.</description>
+ </property>
+ <property>
+ <name>hbase.coprocessor.abortonerror</name>
+ <value>true</value>
+ <description>Set to true to cause the hosting server (master or
regionserver)
+ to abort if a coprocessor fails to load, fails to
initialize, or throws an
+ unexpected Throwable object. Setting this to false will
allow the server to
+ continue execution but the system wide state of the
coprocessor in question
+ will become inconsistent as it will be properly
executing in only a subset
+ of servers, so this is most useful for debugging
only.</description>
+ </property>
+ <property>
+ <name>hbase.rest.port</name>
+ <value>8080</value>
+ <description>The port for the HBase REST server.</description>
+ </property>
+ <property>
+ <name>hbase.rest.readonly</name>
+ <value>false</value>
+ <description>Defines the mode the REST server will be started
in. Possible values are:
+ false: All HTTP methods are permitted -
GET/PUT/POST/DELETE.
+ true: Only the GET method is permitted.</description>
+ </property>
+ <property>
+ <name>hbase.rest.threads.max</name>
+ <value>100</value>
+ <description>The maximum number of threads of the REST server
thread pool.
+ Threads in the pool are reused to process REST
requests. This
+ controls the maximum number of requests processed
concurrently.
+ It may help to control the memory used by the REST
server to
+ avoid OOM issues. If the thread pool is full, incoming
requests
+ will be queued up and wait for some free
threads.</description>
+ </property>
+ <property>
+ <name>hbase.rest.threads.min</name>
+ <value>2</value>
+ <description>The minimum number of threads of the REST server
thread pool.
+ The thread pool always has at least these number of
threads so
+ the REST server is ready to serve incoming
requests.</description>
+ </property>
+ <property>
+ <name>hbase.rest.support.proxyuser</name>
+ <value>false</value>
+ <description>Enables running the REST server to support
proxy-user mode.</description>
+ </property>
+ <property skipInDoc="true">
+ <name>hbase.defaults.for.version</name>
+ <value>2.2.3</value>
+ <description>This defaults file was compiled for version
${project.version}. This variable is used
+ to make sure that a user doesn't have an old version of
hbase-default.xml on the
+ classpath.</description>
+ </property>
+ <property>
+ <name>hbase.defaults.for.version.skip</name>
+ <value>false</value>
+ <description>Set to true to skip the
'hbase.defaults.for.version' check.
+ Setting this to true can be useful in contexts other
than
+ the other side of a maven generation; i.e. running in an
+ IDE. You'll want to set this boolean to true to avoid
+ seeing the RuntimeException complaint:
"hbase-default.xml file
+ seems to be for and old version of HBase
(\${hbase.version}), this
+ version is X.X.X-SNAPSHOT"</description>
+ </property>
+ <property>
+ <name>hbase.table.lock.enable</name>
+ <value>true</value>
+ <description>Set to true to enable locking the table in
zookeeper for schema change operations.
+ Table locking from master prevents concurrent schema
modifications to corrupt table
+ state.</description>
+ </property>
+ <property>
+ <name>hbase.table.max.rowsize</name>
+ <value>1073741824</value>
+ <description>
+ Maximum size of single row in bytes (default is 1 Gb)
for Get'ting
+ or Scan'ning without in-row scan flag set. If row size
exceeds this limit
+ RowTooBigException is thrown to client.
+ </description>
+ </property>
+ <property>
+ <name>hbase.thrift.minWorkerThreads</name>
+ <value>16</value>
+ <description>The "core size" of the thread pool. New threads
are created on every
+ connection until this many threads are
created.</description>
+ </property>
+ <property>
+ <name>hbase.thrift.maxWorkerThreads</name>
+ <value>1000</value>
+ <description>The maximum size of the thread pool. When the
pending request queue
+ overflows, new threads are created until their number
reaches this number.
+ After that, the server starts dropping
connections.</description>
+ </property>
+ <property>
+ <name>hbase.thrift.maxQueuedRequests</name>
+ <value>1000</value>
+ <description>The maximum number of pending Thrift connections
waiting in the queue. If
+ there are no idle threads in the pool, the server
queues requests. Only
+ when the queue overflows, new threads are added, up to
+ hbase.thrift.maxQueuedRequests threads.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.thrift.framed</name>
+ <value>false</value>
+ <description>Use Thrift TFramedTransport on the server side.
+ This is the recommended transport for thrift servers
and requires a similar setting
+ on the client side. Changing this to false will select
the default transport,
+ vulnerable to DoS when malformed requests are issued
due to THRIFT-601.
+ </description>
+ </property>
+ <property>
+
<name>hbase.regionserver.thrift.framed.max_frame_size_in_mb</name>
+ <value>2</value>
+ <description>Default frame size when using framed transport, in
MB</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.thrift.compact</name>
+ <value>false</value>
+ <description>Use Thrift TCompactProtocol binary serialization
protocol.</description>
+ </property>
+ <property>
+ <name>hbase.rootdir.perms</name>
+ <value>700</value>
+ <description>FS Permissions for the root data subdirectory in a
secure (kerberos) setup.
+ When master starts, it creates the rootdir with this
permissions or sets the permissions
+ if it does not match.</description>
+ </property>
+ <property>
+ <name>hbase.wal.dir.perms</name>
+ <value>700</value>
+ <description>FS Permissions for the root WAL directory in a
secure(kerberos) setup.
+ When master starts, it creates the WAL dir with this
permissions or sets the permissions
+ if it does not match.</description>
+ </property>
+ <property>
+ <name>hbase.data.umask.enable</name>
+ <value>false</value>
+ <description>Enable, if true, that file permissions should be
assigned
+ to the files written by the regionserver</description>
+ </property>
+ <property>
+ <name>hbase.data.umask</name>
+ <value>000</value>
+ <description>File permissions that should be used to write data
+ files when hbase.data.umask.enable is true</description>
+ </property>
+ <property>
+ <name>hbase.snapshot.enabled</name>
+ <value>true</value>
+ <description>Set to true to allow snapshots to be taken /
restored / cloned.</description>
+ </property>
+ <property>
+ <name>hbase.snapshot.restore.take.failsafe.snapshot</name>
+ <value>true</value>
+ <description>Set to true to take a snapshot before the restore
operation.
+ The snapshot taken will be used in case of failure, to
restore the previous state.
+ At the end of the restore operation this snapshot will
be deleted</description>
+ </property>
+ <property>
+ <name>hbase.snapshot.restore.failsafe.name</name>
+
<value>hbase-failsafe-{snapshot.name}-{restore.timestamp}</value>
+ <description>Name of the failsafe snapshot taken by the restore
operation.
+ You can use the {snapshot.name}, {table.name} and
{restore.timestamp} variables
+ to create a name based on what you are
restoring.</description>
+ </property>
+ <property>
+ <name>hbase.snapshot.working.dir</name>
+ <value></value>
+ <description>Location where the snapshotting process will
occur. The location of the
+ completed snapshots will not change, but the temporary
directory where the snapshot
+ process occurs will be set to this location. This can
be a separate filesystem than
+ the root directory, for performance increase purposes.
See HBASE-21098 for more
+ information</description>
+ </property>
+ <property>
+ <name>hbase.server.compactchecker.interval.multiplier</name>
+ <value>1000</value>
+ <description>The number that determines how often we scan to
see if compaction is necessary.
+ Normally, compactions are done after some events (such
as memstore flush), but if
+ region didn't receive a lot of writes for some time, or
due to different compaction
+ policies, it may be necessary to check it periodically.
The interval between checks is
+ hbase.server.compactchecker.interval.multiplier
multiplied by
+ hbase.server.thread.wakefrequency.</description>
+ </property>
+ <property>
+ <name>hbase.lease.recovery.timeout</name>
+ <value>900000</value>
+ <description>How long we wait on dfs lease recovery in total
before giving up.</description>
+ </property>
+ <property>
+ <name>hbase.lease.recovery.dfs.timeout</name>
+ <value>64000</value>
+ <description>How long between dfs recover lease invocations.
Should be larger than the sum of
+ the time it takes for the namenode to issue a block
recovery command as part of
+ datanode; dfs.heartbeat.interval and the time it takes
for the primary
+ datanode, performing block recovery to timeout on a
dead datanode; usually
+ dfs.client.socket-timeout. See the end of HBASE-8389
for more.</description>
+ </property>
+ <property>
+ <name>hbase.column.max.version</name>
+ <value>1</value>
+ <description>New column family descriptors will use this value
as the default number of versions
+ to keep.</description>
+ </property>
+ <property>
+ <name>dfs.client.read.shortcircuit</name>
+ <value>false</value>
+ <description>
+ If set to true, this configuration parameter enables
short-circuit local
+ reads.
+ </description>
+ </property>
+ <property>
+ <name>dfs.domain.socket.path</name>
+ <value>none</value>
+ <description>
+ This is a path to a UNIX domain socket that will be
used for
+ communication between the DataNode and local HDFS
clients, if
+ dfs.client.read.shortcircuit is set to true. If the
string "_PORT" is
+ present in this path, it will be replaced by the TCP
port of the DataNode.
+ Be careful about permissions for the directory that
hosts the shared
+ domain socket; dfsclient will complain if open to other
users than the HBase user.
+ </description>
+ </property>
+ <property>
+ <name>hbase.dfs.client.read.shortcircuit.buffer.size</name>
+ <value>131072</value>
+ <description>If the DFSClient configuration
+ dfs.client.read.shortcircuit.buffer.size is unset, we
will
+ use what is configured here as the short circuit read
default
+ direct byte buffer size. DFSClient native default is
1MB; HBase
+ keeps its HDFS files open so number of file blocks *
1MB soon
+ starts to add up and threaten OOME because of a
shortage of
+ direct memory. So, we set it down from the default.
Make
+ it > the default hbase block size set in the
HColumnDescriptor
+ which is usually 64k.
+ </description>
+ </property>
+ <property>
+ <name>hbase.regionserver.checksum.verify</name>
+ <value>true</value>
+ <description>
+ If set to true (the default), HBase verifies the
checksums for hfile
+ blocks. HBase writes checksums inline with the data
when it writes out
+ hfiles. HDFS (as of this writing) writes checksums to a
separate file
+ than the data file necessitating extra seeks. Setting
this flag saves
+ some on i/o. Checksum verification by HDFS will be
internally disabled
+ on hfile streams when this flag is set. If the
hbase-checksum verification
+ fails, we will switch back to using HDFS checksums (so
do not disable HDFS
+ checksums! And besides this feature applies to hfiles
only, not to WALs).
+ If this parameter is set to false, then hbase will not
verify any checksums,
+ instead it will depend on checksum verification being
done in the HDFS client.
+ </description>
+ </property>
+ <property>
+ <name>hbase.hstore.bytes.per.checksum</name>
+ <value>16384</value>
+ <description>
+ Number of bytes in a newly created checksum chunk for
HBase-level
+ checksums in hfile blocks.
+ </description>
+ </property>
+ <property>
+ <name>hbase.hstore.checksum.algorithm</name>
+ <value>CRC32C</value>
+ <description>
+ Name of an algorithm that is used to compute checksums.
Possible values
+ are NULL, CRC32, CRC32C.
+ </description>
+ </property>
+ <property>
+ <name>hbase.client.scanner.max.result.size</name>
+ <value>2097152</value>
+ <description>Maximum number of bytes returned when calling a
scanner's next method.
+ Note that when a single row is larger than this limit
the row is still returned completely.
+ The default value is 2MB, which is good for 1ge
networks.
+ With faster and/or high latency networks this value
should be increased.
+ </description>
+ </property>
+ <property>
+ <name>hbase.server.scanner.max.result.size</name>
+ <value>104857600</value>
+ <description>Maximum number of bytes returned when calling a
scanner's next method.
+ Note that when a single row is larger than this limit
the row is still returned completely.
+ The default value is 100MB.
+ This is a safety setting to protect the server from OOM
situations.
+ </description>
+ </property>
+ <property>
+ <name>hbase.status.published</name>
+ <value>false</value>
+ <description>
+ This setting activates the publication by the master of
the status of the region server.
+ When a region server dies and its recovery starts, the
master will push this information
+ to the client application, to let them cut the
connection immediately instead of waiting
+ for a timeout.
+ </description>
+ </property>
+ <property>
+ <name>hbase.status.multicast.address.ip</name>
+ <value>226.1.1.3</value>
+ <description>
+ Multicast address to use for the status publication by
multicast.
+ </description>
+ </property>
+ <property>
+ <name>hbase.status.multicast.address.port</name>
+ <value>16100</value>
+ <description>
+ Multicast port to use for the status publication by
multicast.
+ </description>
+ </property>
+ <property>
+ <name>hbase.dynamic.jars.dir</name>
+ <value>${hbase.rootdir}/lib</value>
+ <description>
+ The directory from which the custom filter JARs can be
loaded
+ dynamically by the region server without the need to
restart. However,
+ an already loaded filter/co-processor class would not
be un-loaded. See
+ HBASE-1936 for more details.
+
+ Does not apply to coprocessors.
+ </description>
+ </property>
+ <property>
+ <name>hbase.security.authentication</name>
+ <value>simple</value>
+ <description>
+ Controls whether or not secure authentication is
enabled for HBase.
+ Possible values are 'simple' (no authentication), and
'kerberos'.
+ </description>
+ </property>
+ <property>
+ <name>hbase.master.loadbalance.bytable</name>
+ <value>false</value>
+ <description>Factor Table name when the balancer runs.
+ Default: false.
+ </description>
+ </property>
+ <property>
+ <name>hbase.rest.csrf.enabled</name>
+ <value>false</value>
+ <description>
+ Set to true to enable protection against cross-site
request forgery (CSRF)
+ </description>
+ </property>
+ <property>
+ <name>hbase.rest-csrf.browser-useragents-regex</name>
+ <value>^Mozilla.*,^Opera.*</value>
+ <description>
+ A comma-separated list of regular expressions used to
match against an HTTP
+ request's User-Agent header when protection against
cross-site request
+ forgery (CSRF) is enabled for REST server by setting
+ hbase.rest.csrf.enabled to true. If the incoming
User-Agent matches
+ any of these regular expressions, then the request is
considered to be sent
+ by a browser, and therefore CSRF prevention is
enforced. If the request's
+ User-Agent does not match any of these regular
expressions, then the request
+ is considered to be sent by something other than a
browser, such as scripted
+ automation. In this case, CSRF is not a potential
attack vector, so
+ the prevention is not enforced. This helps achieve
backwards-compatibility
+ with existing automation that has not been updated to
send the CSRF
+ prevention header.
+ </description>
+ </property>
+ <property>
+ <name>hbase.security.exec.permission.checks</name>
+ <value>false</value>
+ <description>
+ If this setting is enabled and ACL based access control
is active (the
+ AccessController coprocessor is installed either as a
system coprocessor
+ or on a table as a table coprocessor) then you must
grant all relevant
+ users EXEC privilege if they require the ability to
execute coprocessor
+ endpoint calls. EXEC privilege, like any other
permission, can be
+ granted globally to a user, or to a user on a per table
or per namespace
+ basis. For more information on coprocessor endpoints,
see the coprocessor
+ section of the HBase online manual. For more
information on granting or
+ revoking permissions using the AccessController, see
the security
+ section of the HBase online manual.
+ </description>
+ </property>
+ <property>
+ <name>hbase.procedure.regionserver.classes</name>
+ <value></value>
+ <description>A comma-separated list of
+
org.apache.hadoop.hbase.procedure.RegionServerProcedureManager procedure
managers that are
+ loaded by default on the active HRegionServer process.
The lifecycle methods (init/start/stop)
+ will be called by the active HRegionServer process to
perform the specific globally barriered
+ procedure. After implementing your own
RegionServerProcedureManager, just put it in
+ HBase's classpath and add the fully qualified class
name here.
+ </description>
+ </property>
+ <property>
+ <name>hbase.procedure.master.classes</name>
+ <value></value>
+ <description>A comma-separated list of
+
org.apache.hadoop.hbase.procedure.MasterProcedureManager procedure managers
that are
+ loaded by default on the active HMaster process. A
procedure is identified by its signature and
+ users can use the signature and an instant name to
trigger an execution of a globally barriered
+ procedure. After implementing your own
MasterProcedureManager, just put it in HBase's classpath
+ and add the fully qualified class name
here.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.storefile.refresh.period</name>
+ <value>0</value>
+ <description>
+ The period (in milliseconds) for refreshing the store
files for the secondary regions. 0
+ means this feature is disabled. Secondary regions sees
new files (from flushes and
+ compactions) from primary once the secondary region
refreshes the list of files in the
+ region (there is no notification mechanism). But too
frequent refreshes might cause
+ extra Namenode pressure. If the files cannot be
refreshed for longer than HFile TTL
+ (hbase.master.hfilecleaner.ttl) the requests are
rejected. Configuring HFile TTL to a larger
+ value is also recommended with this setting.
+ </description>
+ </property>
+ <property>
+ <name>hbase.region.replica.replication.enabled</name>
+ <value>false</value>
+ <description>
+ Whether asynchronous WAL replication to the secondary
region replicas is enabled or not.
+ If this is enabled, a replication peer named
"region_replica_replication" will be created
+ which will tail the logs and replicate the mutations to
region replicas for tables that
+ have region replication > 1. If this is enabled once,
disabling this replication also
+ requires disabling the replication peer using shell or
Admin java class.
+ Replication to secondary region replicas works over
standard inter-cluster replication.
+ </description>
+ </property>
+ <property>
+ <name>hbase.security.visibility.mutations.checkauths</name>
+ <value>false</value>
+ <description>
+ This property if enabled, will check whether the labels
in the visibility
+ expression are associated with the user issuing the
mutation
+ </description>
+ </property>
+ <property>
+ <name>hbase.http.max.threads</name>
+ <value>16</value>
+ <description>
+ The maximum number of threads that the HTTP Server will
create in its
+ ThreadPool.
+ </description>
+ </property>
+ <property>
+ <name>hbase.replication.source.maxthreads</name>
+ <value>10</value>
+ <description>
+ The maximum number of threads any replication source
will use for
+ shipping edits to the sinks in parallel. This also
limits the number of
+ chunks each replication batch is broken into. Larger
values can improve
+ the replication throughput between the master and slave
clusters. The
+ default of 10 will rarely need to be changed.
+ </description>
+ </property>
+ <!-- Static Web User Filter properties. -->
+ <property>
+ <name>hbase.http.staticuser.user</name>
+ <value>dr.stack</value>
+ <description>
+ The user name to filter as, on static web filters
+ while rendering content. An example use is the HDFS
+ web UI (user to be used for browsing files).
+ </description>
+ </property>
+ <property>
+ <name>hbase.regionserver.handler.abort.on.error.percent</name>
+ <value>0.5</value>
+ <description>The percent of region server RPC threads failed to
abort RS.
+ -1 Disable aborting; 0 Abort if even a single handler
has died;
+ 0.x Abort only when this percent of handlers have died;
+ 1 Abort only all of the handers have died.</description>
+ </property>
+ <!-- Mob properties. -->
+ <property>
+ <name>hbase.mob.file.cache.size</name>
+ <value>1000</value>
+ <description>
+ Number of opened file handlers to cache.
+ A larger value will benefit reads by providing more
file handlers per mob
+ file cache and would reduce frequent file opening and
closing.
+ However, if this is set too high, this could lead to a
"too many opened file handlers"
+ The default value is 1000.
+ </description>
+ </property>
+ <property>
+ <name>hbase.mob.cache.evict.period</name>
+ <value>3600</value>
+ <description>
+ The amount of time in seconds before the mob cache
evicts cached mob files.
+ The default value is 3600 seconds.
+ </description>
+ </property>
+ <property>
+ <name>hbase.mob.cache.evict.remain.ratio</name>
+ <value>0.5f</value>
+ <description>
+ The ratio (between 0.0 and 1.0) of files that remains
cached after an eviction
+ is triggered when the number of cached mob files
exceeds the hbase.mob.file.cache.size.
+ The default value is 0.5f.
+ </description>
+ </property>
+ <property>
+ <name>hbase.master.mob.ttl.cleaner.period</name>
+ <value>86400</value>
+ <description>
+ The period that ExpiredMobFileCleanerChore runs. The
unit is second.
+ The default value is one day. The MOB file name uses
only the date part of
+ the file creation time in it. We use this time for
deciding TTL expiry of
+ the files. So the removal of TTL expired files might be
delayed. The max
+ delay might be 24 hrs.
+ </description>
+ </property>
+ <property>
+ <name>hbase.mob.compaction.mergeable.threshold</name>
+ <value>1342177280</value>
+ <description>
+ If the size of a mob file is less than this value, it's
regarded as a small
+ file and needs to be merged in mob compaction. The
default value is 1280MB.
+ </description>
+ </property>
+ <property>
+ <name>hbase.mob.delfile.max.count</name>
+ <value>3</value>
+ <description>
+ The max number of del files that is allowed in the mob
compaction.
+ In the mob compaction, when the number of existing del
files is larger than
+ this value, they are merged until number of del files
is not larger this value.
+ The default value is 3.
+ </description>
+ </property>
+ <property>
+ <name>hbase.mob.compaction.batch.size</name>
+ <value>100</value>
+ <description>
+ The max number of the mob files that is allowed in a
batch of the mob compaction.
+ The mob compaction merges the small mob files to bigger
ones. If the number of the
+ small files is very large, it could lead to a "too many
opened file handlers" in the merge.
+ And the merge has to be split into batches. This value
limits the number of mob files
+ that are selected in a batch of the mob compaction. The
default value is 100.
+ </description>
+ </property>
+ <property>
+ <name>hbase.mob.compaction.chore.period</name>
+ <value>604800</value>
+ <description>
+ The period that MobCompactionChore runs. The unit is
second.
+ The default value is one week.
+ </description>
+ </property>
+ <property>
+ <name>hbase.mob.compaction.threads.max</name>
+ <value>1</value>
+ <description>
+ The max number of threads used in MobCompactor.
+ </description>
+ </property>
+ <property>
+ <name>hbase.snapshot.master.timeout.millis</name>
+ <value>300000</value>
+ <description>
+ Timeout for master for the snapshot procedure execution.
+ </description>
+ </property>
+ <property>
+ <name>hbase.snapshot.region.timeout</name>
+ <value>300000</value>
+ <description>
+ Timeout for regionservers to keep threads in snapshot
request pool waiting.
+ </description>
+ </property>
+ <property>
+ <name>hbase.rpc.rows.warning.threshold</name>
+ <value>5000</value>
+ <description>
+ Number of rows in a batch operation above which a
warning will be logged.
+ </description>
+ </property>
+ <property>
+ <name>hbase.master.wait.on.service.seconds</name>
+ <value>30</value>
+ <description>Default is 5 minutes. Make it 30 seconds for
tests. See
+ HBASE-19794 for some context.</description>
+ </property>
+
+ <!--NOTE: HBase client try to load the class that configured in
hbase-default.xml. -->
+ <!--But actually all these classes were already shaded and can't be
loaded by those default name, -->
+ <!--so the following classes are Flink shaded classes.-->
+ <property>
+ <name>hbase.master.logcleaner.plugins</name>
+
<value>org.apache.flink.hbase.shaded.org.apache.hadoop.hbase.master.cleaner.TimeToLiveLogCleaner,org.apache.flink.hbase.shaded.org.apache.hadoop.hbase.master.cleaner.TimeToLiveProcedureWALCleaner</value>
+ <description>A comma-separated list of BaseLogCleanerDelegate
invoked by
+ the LogsCleaner service. These WAL cleaners are called
in order,
+ so put the cleaner that prunes the most files in front.
To
+ implement your own BaseLogCleanerDelegate, just put it
in HBase's classpath
+ and add the fully qualified class name here. Always add
the above
+ default log cleaners in the list.</description>
+ </property>
+ <property>
+ <name>hbase.master.hfilecleaner.plugins</name>
+
<value>org.apache.flink.hbase.shaded.org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner</value>
+ <description>A comma-separated list of BaseHFileCleanerDelegate
invoked by
+ the HFileCleaner service. These HFiles cleaners are
called in order,
+ so put the cleaner that prunes the most files in front.
To
+ implement your own BaseHFileCleanerDelegate, just put
it in HBase's classpath
+ and add the fully qualified class name here. Always add
the above
+ default log cleaners in the list as they will be
overwritten in
+ hbase-site.xml.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.hlog.reader.impl</name>
+
<value>org.apache.flink.hbase.shaded.org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader</value>
+ <description>The WAL file reader implementation.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.hlog.writer.impl</name>
+
<value>org.apache.flink.hbase.shaded.org.apache.hadoop.hbase.regionserver.wal.ProtobufLogWriter</value>
+ <description>The WAL file writer implementation.</description>
+ </property>
+ <property>
+ <name>hbase.regionserver.region.split.policy</name>
+
<value>org.apache.flink.hbase.shaded.org.apache.hadoop.hbase.regionserver.SteppingSplitPolicy</value>
+ <description>
+ A split policy determines when a region should be
split. The various
+ other split policies that are available currently are
BusyRegionSplitPolicy,
+ ConstantSizeRegionSplitPolicy,
DisabledRegionSplitPolicy,
+ DelimitedKeyPrefixRegionSplitPolicy,
KeyPrefixRegionSplitPolicy, and
+ SteppingSplitPolicy. DisabledRegionSplitPolicy blocks
manual region splitting.
+ </description>
+ </property>
+ <property>
+ <name>hbase.status.publisher.class</name>
+
<value>org.apache.flink.hbase.shaded.org.apache.hadoop.hbase.master.ClusterStatusPublisher$MulticastPublisher</value>
+ <description>
+ Implementation of the status publication with a
multicast message.
+ </description>
+ </property>
+ <property>
+ <name>hbase.status.listener.class</name>
+
<value>org.apache.flink.hbase.shaded.org.apache.hadoop.hbase.client.ClusterStatusListener$MulticastListener</value>
+ <description>
+ Implementation of the status listener with a multicast
message.
+ </description>
+ </property>
+ <property>
+ <name>hbase.rest.filter.classes</name>
+
<value>org.apache.flink.hbase.shaded.org.apache.hadoop.hbase.rest.filter.GzipFilter</value>
+ <description>
+ Servlet filters for REST service.
+ </description>
+ </property>
+ <property>
+ <name>hbase.master.loadbalancer.class</name>
+
<value>org.apache.flink.hbase.shaded.org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer</value>
+ <description>
+ Class used to execute the regions balancing when the
period occurs.
+ See the class comment for more on how it works
+
http://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.html
+ It replaces the DefaultLoadBalancer as the default
(since renamed
+ as the SimpleLoadBalancer).
+ </description>
+ </property>
+ <property>
+ <name>hbase.coordinated.state.manager.class</name>
+
<value>org.apache.flink.hbase.shaded.org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager</value>
+ <description>Fully qualified name of class implementing
coordinated state manager.</description>
+ </property>
+ <property>
+ <name>hbase.http.filter.initializers</name>
+
<value>org.apache.flink.hbase.shaded.org.apache.hadoop.hbase.http.lib.StaticUserWebFilter</value>
+ <description>
+ A comma separated list of class names. Each class in
the list must extend
+ org.apache.hadoop.hbase.http.FilterInitializer. The
corresponding Filter will
+ be initialized. Then, the Filter will be applied to all
user facing jsp
+ and servlet web pages.
+ The ordering of the list defines the ordering of the
filters.
+ The default StaticUserWebFilter add a user principal as
defined by the
+ hbase.http.staticuser.user property.
+ </description>
+ </property>
+ <property>
+ <name>hbase.replication.rpc.codec</name>
+
<value>org.apache.flink.hbase.shaded.org.apache.hadoop.hbase.codec.KeyValueCodecWithTags</value>
+ <description>
+ The codec that is to be used when replication is
enabled so that
+ the tags are also replicated. This is used along with
HFileV3 which
+ supports tags in them. If tags are not used or if the
hfile version used
+ is HFileV2 then KeyValueCodec can be used as the
replication codec. Note that
+ using KeyValueCodecWithTags for replication when there
are no tags causes no harm.
+ </description>
+ </property>
+ <property>
+ <name>hbase.master.normalizer.class</name>
+
<value>org.apache.flink.hbase.shaded.org.apache.hadoop.hbase.master.normalizer.SimpleRegionNormalizer</value>
+ <description>
+ Class used to execute the region normalization when the
period occurs.
+ See the class comment for more on how it works
+
http://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.html
+ </description>
+ </property>
+ <property>
+ <name>hbase.mob.compactor.class</name>
+
<value>org.apache.flink.hbase.shaded.org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactor</value>
+ <description>
+ Implementation of mob compactor, the default one is
PartitionedMobCompactor.
+ </description>
+ </property>
+</configuration>
\ No newline at end of file
diff --git a/licenses/inlong-sort/LICENSE b/licenses/inlong-sort/LICENSE
index 1a771951d..b95f13d3a 100644
--- a/licenses/inlong-sort/LICENSE
+++ b/licenses/inlong-sort/LICENSE
@@ -620,7 +620,7 @@ The text of each license is the standard Apache 2.0 license.
com.google.guava:guava:19.0 - Guava: Google Core Libraries for Java
(https://github.com/google/guava), (The Apache Software License, Version 2.0)
com.google.inject:guice:3.0 - Google Guice - Core Library
(https://github.com/google/guice), (The Apache Software License, Version 2.0)
com.google.inject.extensions:guice-servlet:3.0 - Google Guice - Extensions -
Servlet (https://github.com/google/guice), (The Apache Software License,
Version 2.0)
- org.apache.hbase:hbase-client:2.4.12 - Apache HBase - Client
(https://hbase.apache.org/), (Apache License, Version 2.0)
+ org.apache.hbase:hbase-client:2.2.3 - Apache HBase - Client
(https://hbase.apache.org/), (Apache License, Version 2.0)
org.apache.hbase:hbase-common:2.4.12 - Apache HBase - Common
(https://hbase.apache.org/), (Apache License, Version 2.0)
org.apache.hbase:hbase-hadoop-compat:2.4.12 - Apache HBase - Hadoop
Compatibility
(https://hbase.apache.org/hbase-build-configuration/hbase-hadoop-compat),
(Apache License, Version 2.0)
org.apache.hbase:hbase-hadoop2-compat:2.4.12 - Apache HBase - Hadoop Two
Compatibility
(https://hbase.apache.org/hbase-build-configuration/hbase-hadoop2-compat),
(Apache License, Version 2.0)
@@ -728,9 +728,7 @@ The text of each license is the standard Apache 2.0 license.
org.apache.velocity:velocity-engine-core:2.3 - Apache Velocity - Engine
(https://github.com/apache/velocity-engine/tree/2.3/velocity-engine-core),
(Apache License, Version 2.0)
com.fasterxml.woodstox:woodstox-core:5.0.3 - Woodstox
(https://github.com/FasterXML/woodstox/tree/woodstox-core-5.0.3), (The Apache
License, Version 2.0)
xml-apis:xml-apis:1.4.01 - XML Commons External Components XML APIs
(http://xml.apache.org/commons/components/external/), (The Apache Software
License, Version 2.0), (Apache 2.0, The SAX License, The W3C License)
- org.apache.zookeeper:zookeeper:3.6.3 - Apache ZooKeeper - Server
(https://github.com/apache/zookeeper/tree/release-3.6.3/zookeeper-server),
(Apache License, Version 2.0)
- org.apache.zookeeper:zookeeper-jute:3.6.3 - Apache ZooKeeper - Jute
(https://github.com/apache/zookeeper/tree/release-3.6.3/zookeeper-jute),
(Apache License, Version 2.0)
-
+ org.apache.zookeeper:zookeeper:3.4.14 - Apache ZooKeeper - Server
(https://github.com/apache/zookeeper/tree/release-3.4.14/zookeeper-server),
(Apache License, Version 2.0)
========================================================================
Apache 2.0 licenses