This is an automated email from the ASF dual-hosted git repository.
wlo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new 26fd24458 [GOBBLIN-1749] Add dependency for handling xz-compressed
Avro file (#3609)
26fd24458 is described below
commit 26fd2445850c91a04dc1016b0ffc3e7f7a3fc915
Author: Kengo Seki <[email protected]>
AuthorDate: Wed Feb 15 03:17:04 2023 +0900
[GOBBLIN-1749] Add dependency for handling xz-compressed Avro file (#3609)
* Add dependency on xz for handling xz-compressed Avro files
* Fix unit test to ensure all codecs are correctly supported
* Update AvroHdfsDataWriter's document for covering all compression codecs
---
gobblin-core/build.gradle | 1 +
.../gobblin/writer/AvroHdfsDataWriterTest.java | 29 ++++++++++++++--------
gobblin-docs/sinks/AvroHdfsDataWriter.md | 8 +++---
gradle/scripts/dependencyDefinitions.gradle | 3 ++-
4 files changed, 25 insertions(+), 16 deletions(-)
diff --git a/gobblin-core/build.gradle b/gobblin-core/build.gradle
index 7daa1a9b3..482e77daf 100644
--- a/gobblin-core/build.gradle
+++ b/gobblin-core/build.gradle
@@ -57,6 +57,7 @@ dependencies {
compile externalDependency.oltu
compile externalDependency.opencsv
compile externalDependency.hadoopHdfs
+ compile externalDependency.xz
runtimeOnly externalDependency.protobuf
testRuntime externalDependency.hadoopAws
diff --git
a/gobblin-core/src/test/java/org/apache/gobblin/writer/AvroHdfsDataWriterTest.java
b/gobblin-core/src/test/java/org/apache/gobblin/writer/AvroHdfsDataWriterTest.java
index ee9eb4962..e8b78c734 100644
---
a/gobblin-core/src/test/java/org/apache/gobblin/writer/AvroHdfsDataWriterTest.java
+++
b/gobblin-core/src/test/java/org/apache/gobblin/writer/AvroHdfsDataWriterTest.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.fs.Path;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import com.google.gson.Gson;
@@ -54,7 +55,6 @@ public class AvroHdfsDataWriterTest {
private static final Type FIELD_ENTRY_TYPE = new TypeToken<Map<String,
Object>>() {}.getType();
private Schema schema;
- private DataWriter<GenericRecord> writer;
private String filePath;
private State properties;
@@ -83,24 +83,31 @@ public class AvroHdfsDataWriterTest {
properties.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR,
TestConstants.TEST_OUTPUT_DIR);
properties.setProp(ConfigurationKeys.WRITER_FILE_PATH, this.filePath);
properties.setProp(ConfigurationKeys.WRITER_FILE_NAME,
TestConstants.TEST_FILE_NAME);
+ }
- // Build a writer to write test records
- this.writer = new
AvroDataWriterBuilder().writeTo(Destination.of(Destination.DestinationType.HDFS,
properties))
-
.writeInFormat(WriterOutputFormat.AVRO).withWriterId(TestConstants.TEST_WRITER_ID).withSchema(this.schema)
- .withBranches(1).forBranch(0).build();
+ @DataProvider(name = "codecs")
+ private String[] codecs() {
+ return new String[]{"null", "deflate", "snappy", "bzip2", "xz",
"zstandard"};
}
- @Test
- public void testWrite() throws IOException {
+ @Test(dataProvider = "codecs")
+ public void testWrite(String codec) throws IOException {
+ properties.setProp(ConfigurationKeys.WRITER_CODEC_TYPE, codec);
+ DataWriterBuilder<Schema, GenericRecord> builder = new
AvroDataWriterBuilder()
+ .writeTo(Destination.of(Destination.DestinationType.HDFS,
properties))
+
.writeInFormat(WriterOutputFormat.AVRO).withWriterId(TestConstants.TEST_WRITER_ID)
+ .withSchema(this.schema).withBranches(1).forBranch(0);
+ DataWriter<GenericRecord> writer = builder.build();
+
// Write all test records
for (String record : TestConstants.JSON_RECORDS) {
- this.writer.write(convertRecord(record));
+ writer.write(convertRecord(record));
}
- Assert.assertEquals(this.writer.recordsWritten(), 3);
+ Assert.assertEquals(writer.recordsWritten(), 3);
- this.writer.close();
- this.writer.commit();
+ writer.close();
+ writer.commit();
File outputFile =
new File(TestConstants.TEST_OUTPUT_DIR + Path.SEPARATOR +
this.filePath, TestConstants.TEST_FILE_NAME);
diff --git a/gobblin-docs/sinks/AvroHdfsDataWriter.md
b/gobblin-docs/sinks/AvroHdfsDataWriter.md
index 9eea07066..26ae7e7a0 100644
--- a/gobblin-docs/sinks/AvroHdfsDataWriter.md
+++ b/gobblin-docs/sinks/AvroHdfsDataWriter.md
@@ -15,8 +15,8 @@ For more info, see
[`AvroHdfsDataWriter`](https://github.com/apache/gobblin/sear
# Configuration
-| Key | Type | Description | Default Value |
-|-----|------|-------------|---------------|
-| writer.codec.type | One of null,deflate,snappy,bzip2,xz | Type of the
compression codec | deflate |
-| writer.deflate.level | 1-9 | The compression level for the "deflate" codec |
9 |
+| Key | Type |
Description | Default Value |
+|----------------------|-----------------------------------------------|-----------------------------------------------|---------------|
+| writer.codec.type | One of null,deflate,snappy,bzip2,xz,zstandard | Type
of the compression codec | deflate |
+| writer.deflate.level | 1-9 | The
compression level for the "deflate" codec | 9 |
diff --git a/gradle/scripts/dependencyDefinitions.gradle
b/gradle/scripts/dependencyDefinitions.gradle
index b15f76e0c..dddda7bc9 100644
--- a/gradle/scripts/dependencyDefinitions.gradle
+++ b/gradle/scripts/dependencyDefinitions.gradle
@@ -210,7 +210,8 @@ ext.externalDependency = [
],
"postgresConnector": "org.postgresql:postgresql:42.1.4",
"testContainers": "org.testcontainers:testcontainers:1.17.3",
- "testContainersMysql": "org.testcontainers:mysql:1.17.3"
+ "testContainersMysql": "org.testcontainers:mysql:1.17.3",
+ "xz": "org.tukaani:xz:1.8"
]
if (!isDefaultEnvironment)