This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new 9d45c92 ORC-1127: [C++] add missing version of UNSTABLE-PRE-2.0
(#1064)
9d45c92 is described below
commit 9d45c92402cc8d62b363bebab09f7936b1792e5f
Author: Quanlong Huang <[email protected]>
AuthorDate: Wed Mar 16 00:58:25 2022 +0800
ORC-1127: [C++] add missing version of UNSTABLE-PRE-2.0 (#1064)
### What changes were proposed in this pull request?
This adds the UNSTABLE-PRE-2.0 version in the c++ client. It's part of
#1062 but now extracted for easy backporting.
Ref: https://github.com/apache/orc/pull/1062#discussion_r824362393
### Why are the changes needed?
Without this change, the C++ client will read the UNSTABLE-PRE-2.0 version
as 1.9999, which is inconsistent with the Java client. See examples in the JIRA
description.
### How was this patch tested?
Add unit test in TestFileMetadata.cc with a test file generated by Hive.
---
c++/include/orc/Common.hh | 1 +
c++/src/Common.cc | 18 +++++++++++++++++-
examples/decimal64_v2.orc | Bin 0 -> 738 bytes
tools/test/TestFileMetadata.cc | 33 +++++++++++++++++++++++++++++++++
4 files changed, 51 insertions(+), 1 deletion(-)
diff --git a/c++/include/orc/Common.hh b/c++/include/orc/Common.hh
index b3ee4e5..e51e37e 100644
--- a/c++/include/orc/Common.hh
+++ b/c++/include/orc/Common.hh
@@ -34,6 +34,7 @@ namespace orc {
public:
static const FileVersion& v_0_11();
static const FileVersion& v_0_12();
+ static const FileVersion& UNSTABLE_PRE_2_0();
FileVersion(uint32_t major, uint32_t minor) :
majorVersion(major), minorVersion(minor) {
diff --git a/c++/src/Common.cc b/c++/src/Common.cc
index dbf0737..477bfd3 100644
--- a/c++/src/Common.cc
+++ b/c++/src/Common.cc
@@ -131,8 +131,11 @@ namespace orc {
}
std::string FileVersion::toString() const {
+ if (majorVersion == 1 && minorVersion == 9999) {
+ return "UNSTABLE-PRE-2.0";
+ }
std::stringstream ss;
- ss << getMajor() << '.' << getMinor();
+ ss << majorVersion << '.' << minorVersion;
return ss.str();
}
@@ -145,4 +148,17 @@ namespace orc {
static FileVersion version(0,12);
return version;
}
+
+ /**
+ * Do not use this format except for testing. It will not be compatible
+ * with other versions of the software. While we iterate on the ORC 2.0
+ * format, we will make incompatible format changes under this version
+ * without providing any forward or backward compatibility.
+ *
+ * When 2.0 is released, this version identifier will be completely removed.
+ */
+ const FileVersion& FileVersion::UNSTABLE_PRE_2_0() {
+ static FileVersion version(1, 9999);
+ return version;
+ }
}
diff --git a/examples/decimal64_v2.orc b/examples/decimal64_v2.orc
new file mode 100644
index 0000000..196be7f
Binary files /dev/null and b/examples/decimal64_v2.orc differ
diff --git a/tools/test/TestFileMetadata.cc b/tools/test/TestFileMetadata.cc
index d9ec4e4..e525549 100644
--- a/tools/test/TestFileMetadata.cc
+++ b/tools/test/TestFileMetadata.cc
@@ -238,6 +238,39 @@ TEST (TestFileMetadata, testNoFormat) {
EXPECT_EQ("", error);
}
+TEST (TestFileMetadata, testV2Format) {
+ const std::string pgm = findProgram("tools/src/orc-metadata");
+ const std::string file = findExample("decimal64_v2.orc");
+ const std::string expected_out =
+ "{ \"name\": \"" + file + "\",\n"
+ " \"type\":
\"struct<a:bigint,b:decimal(12,0),c:decimal(20,2),d:decimal(12,2),e:decimal(2,2)>\",\n"
+ " \"attributes\": {},\n"
+ " \"rows\": 10,\n"
+ " \"stripe count\": 1,\n"
+ " \"format\": \"UNSTABLE-PRE-2.0\", \"writer version\": \"ORC-135\",
\"software version\": \"ORC Java\",\n"
+ " \"compression\": \"zlib\", \"compression block\": 262144,\n"
+ " \"file length\": 738,\n"
+ " \"content\": 377, \"stripe stats\": 130, \"footer\": 204,
\"postscript\": 26,\n"
+ " \"row index stride\": 10000,\n"
+ " \"user metadata\": {\n"
+ " },\n"
+ " \"stripes\": [\n"
+ " { \"stripe\": 0, \"rows\": 10,\n"
+ " \"offset\": 3, \"length\": 374,\n"
+ " \"index\": 192, \"data\": 112, \"footer\": 70\n"
+ " }\n"
+ " ]\n"
+ "}\n";
+ const std::string expected_err = "Warning: ORC file " + file +
+ " was written in an unknown format version UNSTABLE-PRE-2.0\n";
+
+ std::string output;
+ std::string error;
+ EXPECT_EQ(0, runProgram({pgm, file}, output, error)) << error;
+ EXPECT_EQ(expected_out, output);
+ EXPECT_EQ(expected_err, error);
+}
+
TEST (TestFileMetadata, testAttributes) {
const std::string pgm = findProgram("tools/src/orc-metadata");
const std::string file = findExample("complextypes_iceberg.orc");