This is an automated email from the ASF dual-hosted git repository.
zghao pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new b6afc5f HBASE-21810 bulkload support set hfile compression on client
b6afc5f is described below
commit b6afc5fbc1e7343192d8eaa355ee21b0b760f722
Author: chenyechao <[email protected]>
AuthorDate: Mon Mar 11 19:37:17 2019 +0800
HBASE-21810 bulkload support set hfile compression on client
Signed-off-by: Guanghao Zhang <[email protected]>
---
.../hadoop/hbase/mapreduce/HFileOutputFormat2.java | 14 ++++++--
.../hbase/mapreduce/TestHFileOutputFormat2.java | 42 ++++++++++++++++++++++
2 files changed, 54 insertions(+), 2 deletions(-)
diff --git
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
index cdccb13..59b1994 100644
---
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
+++
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
@@ -159,9 +159,11 @@ public class HFileOutputFormat2
// This constant is public since the client can modify this when setting
// up their conf object and thus refer to this symbol.
// It is present for backwards compatibility reasons. Use it only to
- // override the auto-detection of datablock encoding.
+ // override the auto-detection of datablock encoding and compression.
public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
"hbase.mapreduce.hfileoutputformat.datablock.encoding";
+ public static final String COMPRESSION_OVERRIDE_CONF_KEY =
+ "hbase.mapreduce.hfileoutputformat.compression";
/**
* Keep locality while generating HFiles for bulkload. See HBASE-12596
@@ -209,6 +211,13 @@ public class HFileOutputFormat2
Compression.Algorithm.NONE.getName());
final Algorithm defaultCompression = HFileWriterImpl
.compressionByName(defaultCompressionStr);
+ String compressionStr = conf.get(COMPRESSION_OVERRIDE_CONF_KEY);
+ final Algorithm overriddenCompression;
+ if (compressionStr != null) {
+ overriddenCompression =
Compression.getCompressionAlgorithmByName(compressionStr);
+ } else {
+ overriddenCompression = null;
+ }
final boolean compactionExclude = conf.getBoolean(
"hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
@@ -383,7 +392,8 @@ public class HFileOutputFormat2
new Path(Bytes.toString(tableName), Bytes.toString(family)));
}
WriterLength wl = new WriterLength();
- Algorithm compression = compressionMap.get(tableAndFamily);
+ Algorithm compression = overriddenCompression;
+ compression = compression == null ? compressionMap.get(tableAndFamily)
: compression;
compression = compression == null ? defaultCompression : compression;
BloomType bloomType = bloomTypeMap.get(tableAndFamily);
bloomType = bloomType == null ? BloomType.NONE : bloomType;
diff --git
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
index 679b231..76c9359 100644
---
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
+++
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
@@ -1493,5 +1493,47 @@ public class TestHFileOutputFormat2 {
return null;
}
+
+ @Test
+ public void TestConfigureCompression() throws Exception {
+ Configuration conf = new Configuration(this.util.getConfiguration());
+ RecordWriter<ImmutableBytesWritable, Cell> writer = null;
+ TaskAttemptContext context = null;
+ Path dir = util.getDataTestDir("TestConfigureCompression");
+ String hfileoutputformatCompression = "gz";
+
+ try {
+ conf.set(HFileOutputFormat2.OUTPUT_TABLE_NAME_CONF_KEY,
TABLE_NAMES[0].getNameAsString());
+ conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
+
+ conf.set(HFileOutputFormat2.COMPRESSION_OVERRIDE_CONF_KEY,
hfileoutputformatCompression);
+
+ Job job = Job.getInstance(conf);
+ FileOutputFormat.setOutputPath(job, dir);
+ context = createTestTaskAttemptContext(job);
+ HFileOutputFormat2 hof = new HFileOutputFormat2();
+ writer = hof.getRecordWriter(context);
+ final byte[] b = Bytes.toBytes("b");
+
+ KeyValue kv = new KeyValue(b, b, b, HConstants.LATEST_TIMESTAMP, b);
+ writer.write(new ImmutableBytesWritable(), kv);
+ writer.close(context);
+ writer = null;
+ FileSystem fs = dir.getFileSystem(conf);
+ RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, true);
+ while (iterator.hasNext()) {
+ LocatedFileStatus keyFileStatus = iterator.next();
+ HFile.Reader reader =
+ HFile.createReader(fs, keyFileStatus.getPath(), new
CacheConfig(conf), true, conf);
+ assertEquals(reader.getCompressionAlgorithm().getName(),
hfileoutputformatCompression);
+ }
+ } finally {
+ if (writer != null && context != null) {
+ writer.close(context);
+ }
+ dir.getFileSystem(conf).delete(dir, true);
+ }
+
+ }
}