This is an automated email from the ASF dual-hosted git repository.

wlo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new 315d9599e [GOBBLIN-1953] Add an exception message to orc writer 
validation GTE (#3826)
315d9599e is described below

commit 315d9599e08521dd71ae6dccb09b7f7254961564
Author: Matthew Ho <[email protected]>
AuthorDate: Tue Nov 14 13:13:45 2023 -0800

    [GOBBLIN-1953] Add an exception message to orc writer validation GTE (#3826)
---
 .../gobblin/writer/GobblinBaseOrcWriter.java       |  2 +
 .../gobblin/writer/GobblinBaseOrcWriterTest.java   | 68 +++++++++++++++++-----
 2 files changed, 57 insertions(+), 13 deletions(-)

diff --git 
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
 
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
index bb6c11aaa..2ed31b93b 100644
--- 
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
+++ 
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
@@ -399,6 +399,8 @@ public abstract class GobblinBaseOrcWriter<S, D> extends 
FsDataWriter<D> {
       HadoopUtils.deletePath(fs, filePath, false);
       GobblinEventBuilder eventBuilder = new 
GobblinEventBuilder(CORRUPTED_ORC_FILE_DELETION_EVENT, 
GobblinBaseOrcWriter.ORC_WRITER_NAMESPACE);
       eventBuilder.addMetadata("filePath", filePath.toString());
+      eventBuilder.addMetadata("exceptionType", 
e.getClass().getCanonicalName());
+      eventBuilder.addMetadata("exceptionMessage", e.getMessage());
       EventSubmitter.submit(metricContext, eventBuilder);
 
       throw e;
diff --git 
a/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinBaseOrcWriterTest.java
 
b/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinBaseOrcWriterTest.java
index 813870597..1448848cf 100644
--- 
a/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinBaseOrcWriterTest.java
+++ 
b/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinBaseOrcWriterTest.java
@@ -26,8 +26,11 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.orc.FileFormatException;
 import org.apache.orc.OrcFile;
+import org.mockito.Mock;
 import org.mockito.Mockito;
 import org.testng.Assert;
+import org.testng.annotations.AfterTest;
+import org.testng.annotations.BeforeTest;
 import org.testng.annotations.Test;
 
 import com.google.common.io.Files;
@@ -36,31 +39,70 @@ import org.apache.gobblin.metrics.MetricContext;
 import org.apache.gobblin.metrics.event.GobblinEventBuilder;
 
 import static 
org.apache.gobblin.writer.GobblinBaseOrcWriter.CORRUPTED_ORC_FILE_DELETION_EVENT;
+import static org.mockito.MockitoAnnotations.openMocks;
 
 
 public class GobblinBaseOrcWriterTest {
+  Configuration conf;
+  FileSystem fs;
+  File tmpDir;
+  File orcFile;
+  Path orcFilePath;
+
+  @Mock
+  MetricContext mockContext;
+
+  AutoCloseable closeable;
+
+  @BeforeTest
+  public void setup() throws IOException {
+    this.closeable = openMocks(this);
+    this.conf = new Configuration();
+    this.fs = FileSystem.getLocal(conf);
+    this.tmpDir = Files.createTempDir();
+    this.orcFile = new File(tmpDir, "test.orc");
+    this.orcFilePath = new Path(orcFile.getAbsolutePath());
+  }
+
+  @AfterTest
+  public void tearDown()
+      throws Exception {
+    this.closeable.close();
+  }
 
   @Test
-  public void testOrcValidation()
+  public void testOrcValidationOnlyHeader()
       throws IOException {
-    Configuration conf = new Configuration();
-    FileSystem fs = FileSystem.getLocal(conf);
-    File tmpDir = Files.createTempDir();
-    File corruptedOrcFile = new File(tmpDir, "test.orc");
-    try (FileWriter writer = new FileWriter(corruptedOrcFile)) {
-      // write a corrupted ORC file that only contains the header but without 
content
+    try (FileWriter writer = new FileWriter(orcFile)) {
+      // writer a corrupted ORC file that only contains thethe header
       writer.write(OrcFile.MAGIC);
     }
 
-    OrcFile.ReaderOptions readerOptions = new OrcFile.ReaderOptions(conf);
+    Assert.assertThrows(FileFormatException.class, () -> 
GobblinBaseOrcWriter.assertOrcFileIsValid(
+        fs, orcFilePath, new OrcFile.ReaderOptions(conf), mockContext));
+
+    GobblinEventBuilder eventBuilder = new 
GobblinEventBuilder(CORRUPTED_ORC_FILE_DELETION_EVENT, 
GobblinBaseOrcWriter.ORC_WRITER_NAMESPACE);
+    eventBuilder.addMetadata("filePath", orcFilePath.toString());
+    eventBuilder.addMetadata("exceptionType", 
"org.apache.orc.FileFormatException");
+    eventBuilder.addMetadata("exceptionMessage", String.format("Not a valid 
ORC file %s (maxFileLength= 9223372036854775807)", orcFilePath));
+    Mockito.verify(mockContext, 
Mockito.times(1)).submitEvent(eventBuilder.build());
+  }
+
+  @Test
+  public void testOrcValidationWithContent() throws IOException {
+    try (FileWriter writer = new FileWriter(orcFile)) {
+      // write a corrupted ORC file that only contains the header and invalid 
protobuf content
+      writer.write(OrcFile.MAGIC);
+      writer.write("\n");
+    }
 
-    MetricContext mockContext = Mockito.mock(MetricContext.class);
-    Path p = new Path(corruptedOrcFile.getAbsolutePath());
-    Assert.assertThrows(FileFormatException.class,
-        () -> GobblinBaseOrcWriter.assertOrcFileIsValid(fs, p, readerOptions, 
mockContext));
+    
Assert.assertThrows(com.google.protobuf25.InvalidProtocolBufferException.class,
+        () -> GobblinBaseOrcWriter.assertOrcFileIsValid(fs, orcFilePath, new 
OrcFile.ReaderOptions(conf), mockContext));
 
     GobblinEventBuilder eventBuilder = new 
GobblinEventBuilder(CORRUPTED_ORC_FILE_DELETION_EVENT, 
GobblinBaseOrcWriter.ORC_WRITER_NAMESPACE);
-    eventBuilder.addMetadata("filePath", p.toString());
+    eventBuilder.addMetadata("filePath", orcFilePath.toString());
+    eventBuilder.addMetadata("exceptionType", 
"com.google.protobuf25.InvalidProtocolBufferException");
+    eventBuilder.addMetadata("exceptionMessage", "Protocol message tag had 
invalid wire type.");
     Mockito.verify(mockContext, Mockito.times(1))
         .submitEvent(eventBuilder.build());
   }

Reply via email to