This is an automated email from the ASF dual-hosted git repository.
wlo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new 315d9599e [GOBBLIN-1953] Add an exception message to orc writer
validation GTE (#3826)
315d9599e is described below
commit 315d9599e08521dd71ae6dccb09b7f7254961564
Author: Matthew Ho <[email protected]>
AuthorDate: Tue Nov 14 13:13:45 2023 -0800
[GOBBLIN-1953] Add an exception message to orc writer validation GTE (#3826)
---
.../gobblin/writer/GobblinBaseOrcWriter.java | 2 +
.../gobblin/writer/GobblinBaseOrcWriterTest.java | 68 +++++++++++++++++-----
2 files changed, 57 insertions(+), 13 deletions(-)
diff --git
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
index bb6c11aaa..2ed31b93b 100644
---
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
+++
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
@@ -399,6 +399,8 @@ public abstract class GobblinBaseOrcWriter<S, D> extends
FsDataWriter<D> {
HadoopUtils.deletePath(fs, filePath, false);
GobblinEventBuilder eventBuilder = new
GobblinEventBuilder(CORRUPTED_ORC_FILE_DELETION_EVENT,
GobblinBaseOrcWriter.ORC_WRITER_NAMESPACE);
eventBuilder.addMetadata("filePath", filePath.toString());
+ eventBuilder.addMetadata("exceptionType",
e.getClass().getCanonicalName());
+ eventBuilder.addMetadata("exceptionMessage", e.getMessage());
EventSubmitter.submit(metricContext, eventBuilder);
throw e;
diff --git
a/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinBaseOrcWriterTest.java
b/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinBaseOrcWriterTest.java
index 813870597..1448848cf 100644
---
a/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinBaseOrcWriterTest.java
+++
b/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinBaseOrcWriterTest.java
@@ -26,8 +26,11 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.orc.FileFormatException;
import org.apache.orc.OrcFile;
+import org.mockito.Mock;
import org.mockito.Mockito;
import org.testng.Assert;
+import org.testng.annotations.AfterTest;
+import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.google.common.io.Files;
@@ -36,31 +39,70 @@ import org.apache.gobblin.metrics.MetricContext;
import org.apache.gobblin.metrics.event.GobblinEventBuilder;
import static
org.apache.gobblin.writer.GobblinBaseOrcWriter.CORRUPTED_ORC_FILE_DELETION_EVENT;
+import static org.mockito.MockitoAnnotations.openMocks;
public class GobblinBaseOrcWriterTest {
+ Configuration conf;
+ FileSystem fs;
+ File tmpDir;
+ File orcFile;
+ Path orcFilePath;
+
+ @Mock
+ MetricContext mockContext;
+
+ AutoCloseable closeable;
+
+ @BeforeTest
+ public void setup() throws IOException {
+ this.closeable = openMocks(this);
+ this.conf = new Configuration();
+ this.fs = FileSystem.getLocal(conf);
+ this.tmpDir = Files.createTempDir();
+ this.orcFile = new File(tmpDir, "test.orc");
+ this.orcFilePath = new Path(orcFile.getAbsolutePath());
+ }
+
+ @AfterTest
+ public void tearDown()
+ throws Exception {
+ this.closeable.close();
+ }
@Test
- public void testOrcValidation()
+ public void testOrcValidationOnlyHeader()
throws IOException {
- Configuration conf = new Configuration();
- FileSystem fs = FileSystem.getLocal(conf);
- File tmpDir = Files.createTempDir();
- File corruptedOrcFile = new File(tmpDir, "test.orc");
- try (FileWriter writer = new FileWriter(corruptedOrcFile)) {
- // write a corrupted ORC file that only contains the header but without
content
+ try (FileWriter writer = new FileWriter(orcFile)) {
+ // writer a corrupted ORC file that only contains thethe header
writer.write(OrcFile.MAGIC);
}
- OrcFile.ReaderOptions readerOptions = new OrcFile.ReaderOptions(conf);
+ Assert.assertThrows(FileFormatException.class, () ->
GobblinBaseOrcWriter.assertOrcFileIsValid(
+ fs, orcFilePath, new OrcFile.ReaderOptions(conf), mockContext));
+
+ GobblinEventBuilder eventBuilder = new
GobblinEventBuilder(CORRUPTED_ORC_FILE_DELETION_EVENT,
GobblinBaseOrcWriter.ORC_WRITER_NAMESPACE);
+ eventBuilder.addMetadata("filePath", orcFilePath.toString());
+ eventBuilder.addMetadata("exceptionType",
"org.apache.orc.FileFormatException");
+ eventBuilder.addMetadata("exceptionMessage", String.format("Not a valid
ORC file %s (maxFileLength= 9223372036854775807)", orcFilePath));
+ Mockito.verify(mockContext,
Mockito.times(1)).submitEvent(eventBuilder.build());
+ }
+
+ @Test
+ public void testOrcValidationWithContent() throws IOException {
+ try (FileWriter writer = new FileWriter(orcFile)) {
+ // write a corrupted ORC file that only contains the header and invalid
protobuf content
+ writer.write(OrcFile.MAGIC);
+ writer.write("\n");
+ }
- MetricContext mockContext = Mockito.mock(MetricContext.class);
- Path p = new Path(corruptedOrcFile.getAbsolutePath());
- Assert.assertThrows(FileFormatException.class,
- () -> GobblinBaseOrcWriter.assertOrcFileIsValid(fs, p, readerOptions,
mockContext));
+
Assert.assertThrows(com.google.protobuf25.InvalidProtocolBufferException.class,
+ () -> GobblinBaseOrcWriter.assertOrcFileIsValid(fs, orcFilePath, new
OrcFile.ReaderOptions(conf), mockContext));
GobblinEventBuilder eventBuilder = new
GobblinEventBuilder(CORRUPTED_ORC_FILE_DELETION_EVENT,
GobblinBaseOrcWriter.ORC_WRITER_NAMESPACE);
- eventBuilder.addMetadata("filePath", p.toString());
+ eventBuilder.addMetadata("filePath", orcFilePath.toString());
+ eventBuilder.addMetadata("exceptionType",
"com.google.protobuf25.InvalidProtocolBufferException");
+ eventBuilder.addMetadata("exceptionMessage", "Protocol message tag had
invalid wire type.");
Mockito.verify(mockContext, Mockito.times(1))
.submitEvent(eventBuilder.build());
}