This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.8
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.8 by this push:
     new 89398a079 ORC-1489: Assign a writer id to CUDF
89398a079 is described below

commit 89398a0799b5a2cac66fe691611907d51fefef38
Author: zhangyiqun <[email protected]>
AuthorDate: Tue Aug 29 20:33:30 2023 -0700

    ORC-1489: Assign a writer id to CUDF
    
    ### What changes were proposed in this pull request?
    
    This pr is aimed at assigning a writer id to the CUDF.
    
    ### Why are the changes needed?
    
    This helps to locate the writer of a specific orc file, and it also helps 
the reader to do some special reads for files created by different writers.
    
    ### How was this patch tested?
    
    Added UT
    
    Closes #1594 from guiyanakuang/ORC-1489.
    
    Lead-authored-by: zhangyiqun <[email protected]>
    Co-authored-by: Yiqun Zhang <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
    (cherry picked from commit 5d163d2f3a64272a8f6e7e839df1e062df399f0b)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 c++/include/orc/Common.hh                                | 1 +
 c++/src/Common.cc                                        | 2 ++
 c++/src/Reader.cc                                        | 2 +-
 java/core/src/java/org/apache/orc/OrcFile.java           | 4 ++++
 java/core/src/java/org/apache/orc/OrcUtils.java          | 3 +++
 java/core/src/test/org/apache/orc/TestVectorOrcFile.java | 6 ++++++
 proto/orc_proto.proto                                    | 4 ++++
 site/specification/ORCv1.md                              | 1 +
 site/specification/ORCv2.md                              | 1 +
 9 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/c++/include/orc/Common.hh b/c++/include/orc/Common.hh
index e51e37e71..c0b916cbd 100644
--- a/c++/include/orc/Common.hh
+++ b/c++/include/orc/Common.hh
@@ -72,6 +72,7 @@ namespace orc {
     PRESTO_WRITER = 2,
     SCRITCHLEY_GO = 3,
     TRINO_WRITER = 4,
+    CUDF_WRITER = 5,
     UNKNOWN_WRITER = INT32_MAX
   };
 
diff --git a/c++/src/Common.cc b/c++/src/Common.cc
index 477bfd3b4..31a4f820a 100644
--- a/c++/src/Common.cc
+++ b/c++/src/Common.cc
@@ -82,6 +82,8 @@ namespace orc {
         return "Scritchley Go";
       case TRINO_WRITER:
         return "Trino";
+      case CUDF_WRITER:
+        return "CUDF";
       default: {
         std::ostringstream buffer;
         buffer << "Unknown(" << id << ")";
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index e53dc4c86..625ecfa76 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -632,7 +632,7 @@ namespace orc {
   WriterId ReaderImpl::getWriterId() const {
     if (footer->has_writer()) {
       uint32_t id = footer->writer();
-      if (id > WriterId::TRINO_WRITER) {
+      if (id > WriterId::CUDF_WRITER) {
         return WriterId::UNKNOWN_WRITER;
       } else {
        return static_cast<WriterId>(id);
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java 
b/java/core/src/java/org/apache/orc/OrcFile.java
index a1a00e92f..49cd2a373 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -132,6 +132,7 @@ public class OrcFile {
     PRESTO(2),   // Presto writer
     SCRITCHLEY_GO(3), // Go writer from https://github.com/scritchley/orc
     TRINO(4),   // Trino writer
+    CUDF(5),    // CUDF writer
     UNKNOWN(Integer.MAX_VALUE);
 
     private final int id;
@@ -189,6 +190,9 @@ public class OrcFile {
     // Trino Writer
     TRINO_ORIGINAL(WriterImplementation.TRINO, 6),
 
+    // CUDF Writer
+    CUDF_ORIGINAL(WriterImplementation.CUDF, 6),
+
     // Don't use any magic numbers here except for the below:
     FUTURE(WriterImplementation.UNKNOWN, Integer.MAX_VALUE); // a version from 
a future writer
 
diff --git a/java/core/src/java/org/apache/orc/OrcUtils.java 
b/java/core/src/java/org/apache/orc/OrcUtils.java
index c121537d3..7dde0bc0f 100644
--- a/java/core/src/java/org/apache/orc/OrcUtils.java
+++ b/java/core/src/java/org/apache/orc/OrcUtils.java
@@ -414,6 +414,9 @@ public class OrcUtils {
       case 4:
         base = "Trino";
         break;
+      case 5:
+        base = "CUDF";
+        break;
       default:
         base = String.format("Unknown(%d)", writer);
         break;
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java 
b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index 9a1431c68..8eae7a7cd 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -3597,6 +3597,8 @@ public class TestVectorOrcFile {
         OrcFile.WriterImplementation.from(2));
     assertEquals(OrcFile.WriterImplementation.TRINO,
             OrcFile.WriterImplementation.from(4));
+    assertEquals(OrcFile.WriterImplementation.CUDF,
+        OrcFile.WriterImplementation.from(5));
     assertEquals(OrcFile.WriterImplementation.UNKNOWN,
         OrcFile.WriterImplementation.from(99));
 
@@ -3615,6 +3617,8 @@ public class TestVectorOrcFile {
         OrcFile.WriterVersion.from(OrcFile.WriterImplementation.PRESTO, 6));
     assertEquals(OrcFile.WriterVersion.TRINO_ORIGINAL,
             OrcFile.WriterVersion.from(OrcFile.WriterImplementation.TRINO, 6));
+    assertEquals(OrcFile.WriterVersion.CUDF_ORIGINAL,
+        OrcFile.WriterVersion.from(OrcFile.WriterImplementation.CUDF, 6));
     assertEquals(OrcFile.WriterVersion.FUTURE,
         OrcFile.WriterVersion.from(OrcFile.WriterImplementation.UNKNOWN, 0));
 
@@ -3633,6 +3637,8 @@ public class TestVectorOrcFile {
         OrcFile.WriterVersion.PRESTO_ORIGINAL));
     assertTrue(OrcFile.WriterVersion.HIVE_12055.includes(
         OrcFile.WriterVersion.TRINO_ORIGINAL));
+    assertTrue(OrcFile.WriterVersion.HIVE_12055.includes(
+        OrcFile.WriterVersion.CUDF_ORIGINAL));
   }
 
   @ParameterizedTest
diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index ff05657a5..45d7d2a05 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -367,6 +367,7 @@ message Footer {
   // 2 = Presto
   // 3 = Scritchley Go from https://github.com/scritchley/orc
   // 4 = Trino
+  // 5 = CUDF
   optional uint32 writer = 9;
 
   // information about the encryption in this file
@@ -432,6 +433,9 @@ message PostScript {
   // Version of the Trino writer:
   //   6 = original
   //
+  // Version of the CUDF writer:
+  //   6 = original
+  //
   optional uint32 writerVersion = 6;
 
   // the number of bytes in the encrypted stripe statistics
diff --git a/site/specification/ORCv1.md b/site/specification/ORCv1.md
index fd18ae0b8..9df424962 100644
--- a/site/specification/ORCv1.md
+++ b/site/specification/ORCv1.md
@@ -136,6 +136,7 @@ message Footer {
  // 2 = Presto
  // 3 = Scritchley Go from https://github.com/scritchley/orc
  // 4 = Trino
+ // 5 = CUDF
  optional uint32 writer = 9;
  // information about the encryption in this file
  optional Encryption encryption = 10;
diff --git a/site/specification/ORCv2.md b/site/specification/ORCv2.md
index 73d89cde4..fd2963898 100644
--- a/site/specification/ORCv2.md
+++ b/site/specification/ORCv2.md
@@ -156,6 +156,7 @@ message Footer {
  // 2 = Presto
  // 3 = Scritchley Go from https://github.com/scritchley/orc
  // 4 = Trino
+ // 5 = CUDF
  optional uint32 writer = 9;
  // information about the encryption in this file
  optional Encryption encryption = 10;

Reply via email to