This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.7 by this push:
     new be72554  ORC-1127: [C++] add missing version of UNSTABLE-PRE-2.0 
(#1064)
be72554 is described below

commit be72554bee51707a2de684e2bcfdcf668a7eab58
Author: Quanlong Huang <[email protected]>
AuthorDate: Wed Mar 16 00:58:25 2022 +0800

    ORC-1127: [C++] add missing version of UNSTABLE-PRE-2.0 (#1064)
    
    ### What changes were proposed in this pull request?
    
    This adds the UNSTABLE-PRE-2.0 version in the c++ client. It's part of 
#1062 but now extracted for easy backporting.
    Ref: https://github.com/apache/orc/pull/1062#discussion_r824362393
    
    ### Why are the changes needed?
    
    Without this change, the C++ client will read the UNSTABLE-PRE-2.0 version 
as 1.9999, which is inconsistent with the Java client. See examples in the JIRA 
description.
    
    ### How was this patch tested?
    
    Add unit test in TestFileMetadata.cc with a test file generated by Hive.
    
    (cherry picked from commit 9d45c92402cc8d62b363bebab09f7936b1792e5f)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 c++/include/orc/Common.hh      |   1 +
 c++/src/Common.cc              |  18 +++++++++++++++++-
 examples/decimal64_v2.orc      | Bin 0 -> 738 bytes
 tools/test/TestFileMetadata.cc |  33 +++++++++++++++++++++++++++++++++
 4 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/c++/include/orc/Common.hh b/c++/include/orc/Common.hh
index f865b30..5b580b8 100644
--- a/c++/include/orc/Common.hh
+++ b/c++/include/orc/Common.hh
@@ -34,6 +34,7 @@ namespace orc {
   public:
     static const FileVersion& v_0_11();
     static const FileVersion& v_0_12();
+    static const FileVersion& UNSTABLE_PRE_2_0();
 
     FileVersion(uint32_t major, uint32_t minor) :
                 majorVersion(major), minorVersion(minor) {
diff --git a/c++/src/Common.cc b/c++/src/Common.cc
index dbf0737..477bfd3 100644
--- a/c++/src/Common.cc
+++ b/c++/src/Common.cc
@@ -131,8 +131,11 @@ namespace orc {
   }
 
   std::string FileVersion::toString() const {
+    if (majorVersion == 1 && minorVersion == 9999) {
+      return "UNSTABLE-PRE-2.0";
+    }
     std::stringstream ss;
-    ss << getMajor() << '.' << getMinor();
+    ss << majorVersion << '.' << minorVersion;
     return ss.str();
   }
   
@@ -145,4 +148,17 @@ namespace orc {
     static FileVersion version(0,12);
     return version;
   }
+
+  /**
+   * Do not use this format except for testing. It will not be compatible
+   * with other versions of the software. While we iterate on the ORC 2.0
+   * format, we will make incompatible format changes under this version
+   * without providing any forward or backward compatibility.
+   *
+   * When 2.0 is released, this version identifier will be completely removed.
+  */
+  const FileVersion& FileVersion::UNSTABLE_PRE_2_0() {
+    static FileVersion version(1, 9999);
+    return version;
+  }
 }
diff --git a/examples/decimal64_v2.orc b/examples/decimal64_v2.orc
new file mode 100644
index 0000000..196be7f
Binary files /dev/null and b/examples/decimal64_v2.orc differ
diff --git a/tools/test/TestFileMetadata.cc b/tools/test/TestFileMetadata.cc
index d9ec4e4..e525549 100644
--- a/tools/test/TestFileMetadata.cc
+++ b/tools/test/TestFileMetadata.cc
@@ -238,6 +238,39 @@ TEST (TestFileMetadata, testNoFormat) {
   EXPECT_EQ("", error);
 }
 
+TEST (TestFileMetadata, testV2Format) {
+  const std::string pgm = findProgram("tools/src/orc-metadata");
+  const std::string file = findExample("decimal64_v2.orc");
+  const std::string expected_out =
+    "{ \"name\": \"" + file + "\",\n"
+    "  \"type\": 
\"struct<a:bigint,b:decimal(12,0),c:decimal(20,2),d:decimal(12,2),e:decimal(2,2)>\",\n"
+    "  \"attributes\": {},\n"
+    "  \"rows\": 10,\n"
+    "  \"stripe count\": 1,\n"
+    "  \"format\": \"UNSTABLE-PRE-2.0\", \"writer version\": \"ORC-135\", 
\"software version\": \"ORC Java\",\n"
+    "  \"compression\": \"zlib\", \"compression block\": 262144,\n"
+    "  \"file length\": 738,\n"
+    "  \"content\": 377, \"stripe stats\": 130, \"footer\": 204, 
\"postscript\": 26,\n"
+    "  \"row index stride\": 10000,\n"
+    "  \"user metadata\": {\n"
+    "  },\n"
+    "  \"stripes\": [\n"
+    "    { \"stripe\": 0, \"rows\": 10,\n"
+    "      \"offset\": 3, \"length\": 374,\n"
+    "      \"index\": 192, \"data\": 112, \"footer\": 70\n"
+    "    }\n"
+    "  ]\n"
+    "}\n";
+  const std::string expected_err = "Warning: ORC file " + file +
+    " was written in an unknown format version UNSTABLE-PRE-2.0\n";
+
+  std::string output;
+  std::string error;
+  EXPECT_EQ(0, runProgram({pgm, file}, output, error)) << error;
+  EXPECT_EQ(expected_out, output);
+  EXPECT_EQ(expected_err, error);
+}
+
 TEST (TestFileMetadata, testAttributes) {
   const std::string pgm = findProgram("tools/src/orc-metadata");
   const std::string file = findExample("complextypes_iceberg.orc");

Reply via email to