This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git

commit 488184b33eb3d662e89ecc10f2c3e5fab6f34bf5
Author: zhangyiqun <[email protected]>
AuthorDate: Tue Aug 29 20:33:30 2023 -0700

    ORC-1489: Assign a writer id to CUDF
    
    ### What changes were proposed in this pull request?
    
    This pr is aimed at assigning a writer id to the CUDF.
    
    ### Why are the changes needed?
    
    This helps to locate the writer of a specific orc file, and it also helps 
the reader to do some special reads for files created by different writers.
    
    ### How was this patch tested?
    
    Added UT
    
    Closes #1594 from guiyanakuang/ORC-1489.
    
    Lead-authored-by: zhangyiqun <[email protected]>
    Co-authored-by: Yiqun Zhang <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
    (cherry picked from commit 5d163d2f3a64272a8f6e7e839df1e062df399f0b)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 c++/include/orc/Common.hh                                | 1 +
 c++/src/Common.cc                                        | 2 ++
 c++/src/Reader.cc                                        | 2 +-
 java/core/src/java/org/apache/orc/OrcFile.java           | 4 ++++
 java/core/src/java/org/apache/orc/OrcUtils.java          | 3 +++
 java/core/src/test/org/apache/orc/TestVectorOrcFile.java | 6 ++++++
 proto/orc_proto.proto                                    | 4 ++++
 site/specification/ORCv1.md                              | 1 +
 site/specification/ORCv2.md                              | 1 +
 9 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/c++/include/orc/Common.hh b/c++/include/orc/Common.hh
index 5b580b891..601dad569 100644
--- a/c++/include/orc/Common.hh
+++ b/c++/include/orc/Common.hh
@@ -72,6 +72,7 @@ namespace orc {
     PRESTO_WRITER = 2,
     SCRITCHLEY_GO = 3,
     TRINO_WRITER = 4,
+    CUDF_WRITER = 5,
     UNKNOWN_WRITER = INT32_MAX
   };
 
diff --git a/c++/src/Common.cc b/c++/src/Common.cc
index 477bfd3b4..31a4f820a 100644
--- a/c++/src/Common.cc
+++ b/c++/src/Common.cc
@@ -82,6 +82,8 @@ namespace orc {
         return "Scritchley Go";
       case TRINO_WRITER:
         return "Trino";
+      case CUDF_WRITER:
+        return "CUDF";
       default: {
         std::ostringstream buffer;
         buffer << "Unknown(" << id << ")";
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 82001b9f7..ab5dc0eb2 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -584,7 +584,7 @@ namespace orc {
   WriterId ReaderImpl::getWriterId() const {
     if (footer->has_writer()) {
       uint32_t id = footer->writer();
-      if (id > WriterId::TRINO_WRITER) {
+      if (id > WriterId::CUDF_WRITER) {
         return WriterId::UNKNOWN_WRITER;
       } else {
        return static_cast<WriterId>(id);
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java 
b/java/core/src/java/org/apache/orc/OrcFile.java
index c888b93a1..579750ecb 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -132,6 +132,7 @@ public class OrcFile {
     PRESTO(2),   // Presto writer
     SCRITCHLEY_GO(3), // Go writer from https://github.com/scritchley/orc
     TRINO(4),   // Trino writer
+    CUDF(5),    // CUDF writer
     UNKNOWN(Integer.MAX_VALUE);
 
     private final int id;
@@ -189,6 +190,9 @@ public class OrcFile {
     // Trino Writer
     TRINO_ORIGINAL(WriterImplementation.TRINO, 6),
 
+    // CUDF Writer
+    CUDF_ORIGINAL(WriterImplementation.CUDF, 6),
+
     // Don't use any magic numbers here except for the below:
     FUTURE(WriterImplementation.UNKNOWN, Integer.MAX_VALUE); // a version from 
a future writer
 
diff --git a/java/core/src/java/org/apache/orc/OrcUtils.java 
b/java/core/src/java/org/apache/orc/OrcUtils.java
index 358407fc9..1fd6862d6 100644
--- a/java/core/src/java/org/apache/orc/OrcUtils.java
+++ b/java/core/src/java/org/apache/orc/OrcUtils.java
@@ -418,6 +418,9 @@ public class OrcUtils {
       case 4:
         base = "Trino";
         break;
+      case 5:
+        base = "CUDF";
+        break;
       default:
         base = String.format("Unknown(%d)", writer);
         break;
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java 
b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index 7c8d8cf1e..f1aa38a2a 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -3595,6 +3595,8 @@ public class TestVectorOrcFile {
         OrcFile.WriterImplementation.from(2));
     assertEquals(OrcFile.WriterImplementation.TRINO,
             OrcFile.WriterImplementation.from(4));
+    assertEquals(OrcFile.WriterImplementation.CUDF,
+        OrcFile.WriterImplementation.from(5));
     assertEquals(OrcFile.WriterImplementation.UNKNOWN,
         OrcFile.WriterImplementation.from(99));
 
@@ -3613,6 +3615,8 @@ public class TestVectorOrcFile {
         OrcFile.WriterVersion.from(OrcFile.WriterImplementation.PRESTO, 6));
     assertEquals(OrcFile.WriterVersion.TRINO_ORIGINAL,
             OrcFile.WriterVersion.from(OrcFile.WriterImplementation.TRINO, 6));
+    assertEquals(OrcFile.WriterVersion.CUDF_ORIGINAL,
+        OrcFile.WriterVersion.from(OrcFile.WriterImplementation.CUDF, 6));
     assertEquals(OrcFile.WriterVersion.FUTURE,
         OrcFile.WriterVersion.from(OrcFile.WriterImplementation.UNKNOWN, 0));
 
@@ -3631,6 +3635,8 @@ public class TestVectorOrcFile {
         OrcFile.WriterVersion.PRESTO_ORIGINAL));
     assertTrue(OrcFile.WriterVersion.HIVE_12055.includes(
         OrcFile.WriterVersion.TRINO_ORIGINAL));
+    assertTrue(OrcFile.WriterVersion.HIVE_12055.includes(
+        OrcFile.WriterVersion.CUDF_ORIGINAL));
   }
 
   @ParameterizedTest
diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index ff05657a5..45d7d2a05 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -367,6 +367,7 @@ message Footer {
   // 2 = Presto
   // 3 = Scritchley Go from https://github.com/scritchley/orc
   // 4 = Trino
+  // 5 = CUDF
   optional uint32 writer = 9;
 
   // information about the encryption in this file
@@ -432,6 +433,9 @@ message PostScript {
   // Version of the Trino writer:
   //   6 = original
   //
+  // Version of the CUDF writer:
+  //   6 = original
+  //
   optional uint32 writerVersion = 6;
 
   // the number of bytes in the encrypted stripe statistics
diff --git a/site/specification/ORCv1.md b/site/specification/ORCv1.md
index 472e11e5b..b789faa74 100644
--- a/site/specification/ORCv1.md
+++ b/site/specification/ORCv1.md
@@ -136,6 +136,7 @@ message Footer {
  // 2 = Presto
  // 3 = Scritchley Go from https://github.com/scritchley/orc
  // 4 = Trino
+ // 5 = CUDF
  optional uint32 writer = 9;
  // information about the encryption in this file
  optional Encryption encryption = 10;
diff --git a/site/specification/ORCv2.md b/site/specification/ORCv2.md
index 703cea01d..b98dea953 100644
--- a/site/specification/ORCv2.md
+++ b/site/specification/ORCv2.md
@@ -156,6 +156,7 @@ message Footer {
  // 2 = Presto
  // 3 = Scritchley Go from https://github.com/scritchley/orc
  // 4 = Trino
+ // 5 = CUDF
  optional uint32 writer = 9;
  // information about the encryption in this file
  optional Encryption encryption = 10;

Reply via email to