This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new 7114f16 ORC-781: [C++] Make type annotations available from C++
7114f16 is described below
commit 7114f16ae502acf21b2b1815227655266787500e
Author: boroknagyz <[email protected]>
AuthorDate: Tue Apr 20 16:53:21 2021 +0200
ORC-781: [C++] Make type annotations available from C++
ORC-522 added support for type annotations, but only to the Java ORC
library. This patch adds support for type annotations for the C++
ORC library.
Both reads and writes are supported. orc-metadata now also prints out
the type attriburtes.
This closes #682
---
c++/include/orc/Type.hh | 6 ++
c++/src/TypeImpl.cc | 63 +++++++++--
c++/src/TypeImpl.hh | 12 +++
c++/src/Writer.cc | 7 ++
c++/test/CMakeLists.txt | 1 +
c++/test/TestAttributes.cc | 215 ++++++++++++++++++++++++++++++++++++++
examples/complextypes_iceberg.orc | Bin 0 -> 1734 bytes
tools/src/FileMetadata.cc | 45 ++++++++
tools/test/TestFileMetadata.cc | 65 +++++++++++-
9 files changed, 405 insertions(+), 9 deletions(-)
diff --git a/c++/include/orc/Type.hh b/c++/include/orc/Type.hh
index c0cbf2d..cfef512 100644
--- a/c++/include/orc/Type.hh
+++ b/c++/include/orc/Type.hh
@@ -58,6 +58,12 @@ namespace orc {
virtual uint64_t getMaximumLength() const = 0;
virtual uint64_t getPrecision() const = 0;
virtual uint64_t getScale() const = 0;
+ virtual Type& setAttribute(const std::string& key,
+ const std::string& value) = 0;
+ virtual bool hasAttributeKey(const std::string& key) const = 0;
+ virtual Type& removeAttribute(const std::string& key) = 0;
+ virtual std::vector<std::string> getAttributeKeys() const = 0;
+ virtual std::string getAttributeValue(const std::string& key) const = 0;
virtual std::string toString() const = 0;
/**
diff --git a/c++/src/TypeImpl.cc b/c++/src/TypeImpl.cc
index 363190d..3049174 100644
--- a/c++/src/TypeImpl.cc
+++ b/c++/src/TypeImpl.cc
@@ -121,6 +121,42 @@ namespace orc {
return scale;
}
+ Type& TypeImpl::setAttribute(const std::string& key,
+ const std::string& value) {
+ attributes[key] = value;
+ return *this;
+ }
+
+ bool TypeImpl::hasAttributeKey(const std::string& key) const {
+ return attributes.find(key) != attributes.end();
+ }
+
+ Type& TypeImpl::removeAttribute(const std::string& key) {
+ auto it = attributes.find(key);
+ if (it == attributes.end()) {
+ throw std::range_error("Key not found: " + key);
+ }
+ attributes.erase(it);
+ return *this;
+ }
+
+ std::vector<std::string> TypeImpl::getAttributeKeys() const {
+ std::vector<std::string> ret;
+ ret.reserve(attributes.size());
+ for (auto& attribute : attributes) {
+ ret.push_back(attribute.first);
+ }
+ return ret;
+ }
+
+ std::string TypeImpl::getAttributeValue(const std::string& key) const {
+ auto it = attributes.find(key);
+ if (it == attributes.end()) {
+ throw std::range_error("Key not found: " + key);
+ }
+ return it->second;
+ }
+
void TypeImpl::setIds(uint64_t _columnId, uint64_t _maxColumnId) {
columnId = static_cast<int64_t>(_columnId);
maximumColumnId = static_cast<int64_t>(_maxColumnId);
@@ -352,6 +388,7 @@ namespace orc {
std::string printProtobufMessage(const google::protobuf::Message& message);
std::unique_ptr<Type> convertType(const proto::Type& type,
const proto::Footer& footer) {
+ std::unique_ptr<Type> ret;
switch (static_cast<int64_t>(type.kind())) {
case proto::Type_Kind_BOOLEAN:
@@ -365,24 +402,27 @@ namespace orc {
case proto::Type_Kind_BINARY:
case proto::Type_Kind_TIMESTAMP:
case proto::Type_Kind_DATE:
- return std::unique_ptr<Type>
+ ret = std::unique_ptr<Type>
(new TypeImpl(static_cast<TypeKind>(type.kind())));
+ break;
case proto::Type_Kind_CHAR:
case proto::Type_Kind_VARCHAR:
- return std::unique_ptr<Type>
+ ret = std::unique_ptr<Type>
(new TypeImpl(static_cast<TypeKind>(type.kind()),
type.maximumlength()));
+ break;
case proto::Type_Kind_DECIMAL:
- return std::unique_ptr<Type>
+ ret = std::unique_ptr<Type>
(new TypeImpl(DECIMAL, type.precision(), type.scale()));
+ break;
case proto::Type_Kind_LIST:
case proto::Type_Kind_MAP:
case proto::Type_Kind_UNION: {
TypeImpl* result = new TypeImpl(static_cast<TypeKind>(type.kind()));
- std::unique_ptr<Type> return_value = std::unique_ptr<Type>(result);
+ ret = std::unique_ptr<Type>(result);
if (type.kind() == proto::Type_Kind_LIST && type.subtypes_size() != 1)
throw ParseError("Illegal LIST type that doesn't contain one subtype");
if (type.kind() == proto::Type_Kind_MAP && type.subtypes_size() != 2)
@@ -394,23 +434,28 @@ namespace orc {
(type.subtypes(i))),
footer));
}
- return return_value;
+ break;
}
case proto::Type_Kind_STRUCT: {
TypeImpl* result = new TypeImpl(STRUCT);
- std::unique_ptr<Type> return_value = std::unique_ptr<Type>(result);
+ ret = std::unique_ptr<Type>(result);
for(int i=0; i < type.subtypes_size(); ++i) {
result->addStructField(type.fieldnames(i),
convertType(footer.types(static_cast<int>
(type.subtypes(i))),
footer));
}
- return return_value;
+ break;
}
default:
throw NotImplementedYet("Unknown type kind");
}
+ for (int i = 0; i < type.attributes_size(); ++i) {
+ const auto& attribute = type.attributes(i);
+ ret->setAttribute(attribute.key(), attribute.value());
+ }
+ return ret;
}
/**
@@ -496,6 +541,10 @@ namespace orc {
throw NotImplementedYet("Unknown type kind");
}
result->setIds(fileType->getColumnId(), fileType->getMaximumColumnId());
+ for (auto& key : fileType->getAttributeKeys()) {
+ const auto& value = fileType->getAttributeValue(key);
+ result->setAttribute(key, value);
+ }
return std::unique_ptr<Type>(result);
}
diff --git a/c++/src/TypeImpl.hh b/c++/src/TypeImpl.hh
index c42d80a..18a3e71 100644
--- a/c++/src/TypeImpl.hh
+++ b/c++/src/TypeImpl.hh
@@ -40,6 +40,7 @@ namespace orc {
uint64_t maxLength;
uint64_t precision;
uint64_t scale;
+ std::map<std::string, std::string> attributes;
public:
/**
@@ -76,6 +77,17 @@ namespace orc {
uint64_t getScale() const override;
+ Type& setAttribute(const std::string& key,
+ const std::string& value) override;
+
+ bool hasAttributeKey(const std::string& key) const override;
+
+ Type& removeAttribute(const std::string& key) override;
+
+ std::vector<std::string> getAttributeKeys() const override;
+
+ std::string getAttributeValue(const std::string& key) const override;
+
std::string toString() const override;
Type* addStructField(const std::string& fieldName,
diff --git a/c++/src/Writer.cc b/c++/src/Writer.cc
index 730d7ff..f6d127f 100644
--- a/c++/src/Writer.cc
+++ b/c++/src/Writer.cc
@@ -621,6 +621,13 @@ namespace orc {
throw std::logic_error("Unknown type.");
}
+ for (auto& key : t.getAttributeKeys()) {
+ const auto& value = t.getAttributeValue(key);
+ auto protoAttr = protoType.add_attributes();
+ protoAttr->set_key(key);
+ protoAttr->set_value(value);
+ }
+
int pos = static_cast<int>(index);
*footer.add_types() = protoType;
diff --git a/c++/test/CMakeLists.txt b/c++/test/CMakeLists.txt
index badee89..8b5ee7b 100644
--- a/c++/test/CMakeLists.txt
+++ b/c++/test/CMakeLists.txt
@@ -21,6 +21,7 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX11_FLAGS}
${WARN_FLAGS}")
add_executable (orc-test
MemoryInputStream.cc
MemoryOutputStream.cc
+ TestAttributes.cc
TestBufferedOutputStream.cc
TestBloomFilter.cc
TestByteRle.cc
diff --git a/c++/test/TestAttributes.cc b/c++/test/TestAttributes.cc
new file mode 100644
index 0000000..f7d0b47
--- /dev/null
+++ b/c++/test/TestAttributes.cc
@@ -0,0 +1,215 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/OrcFile.hh"
+
+#include "MemoryInputStream.hh"
+#include "MemoryOutputStream.hh"
+
+#include "wrap/gmock.h"
+#include "wrap/gtest-wrapper.h"
+
+#include <cmath>
+#include <sstream>
+
+namespace orc {
+ const int DEFAULT_MEM_STREAM_SIZE = 10 * 1024 * 1024; // 10M
+
+ class TypeAttributes : public ::testing::Test {
+ public:
+ ~TypeAttributes();
+ protected:
+ static void SetUpTestCase() {
+ memStream.reset();
+ }
+
+ static void TearDownTestCase() {}
+
+ std::unique_ptr<Reader> createReader() {
+ std::unique_ptr<InputStream> inStream(
+ new MemoryInputStream (memStream.getData(), memStream.getLength()));
+ ReaderOptions options;
+ return orc::createReader(std::move(inStream), options);
+ }
+
+ std::unique_ptr<RowReader> createRowReader(
+ const std::unique_ptr<Reader>& reader) {
+ RowReaderOptions rowReaderOpts;
+ return reader->createRowReader(rowReaderOpts);
+ }
+
+ std::unique_ptr<RowReader> createRowReader(
+ const std::unique_ptr<Reader>& reader,
+ const std::list<uint64_t>& includeTypes) {
+ RowReaderOptions rowReaderOpts;
+ rowReaderOpts.includeTypes(includeTypes);
+ return reader->createRowReader(rowReaderOpts);
+ }
+
+ void writeFileWithType(Type& type) {
+ WriterOptions options;
+ auto writer = createWriter(type, &memStream, options);
+ writer->close();
+ }
+
+ const Type* getTypeByPath(const Type* root,
+ const std::vector<uint64_t>& path) {
+ const Type* ret = root;
+ for (uint64_t idx : path) {
+ ret = ret->getSubtype(idx);
+ }
+ return ret;
+ }
+
+ private:
+ static MemoryOutputStream memStream;
+ };
+
+ TypeAttributes::~TypeAttributes() {}
+ MemoryOutputStream TypeAttributes::memStream(DEFAULT_MEM_STREAM_SIZE);
+
+ TEST_F(TypeAttributes, writeSimple) {
+ auto intType = createPrimitiveType(INT);
+ intType->setAttribute("id", "1");
+ auto structType = createStructType();
+ structType->addStructField("i", std::move(intType));
+ writeFileWithType(*structType);
+
+ auto reader = createReader();
+ auto& root = reader->getType();
+ auto i = root.getSubtype(0);
+ EXPECT_EQ("1", i->getAttributeValue("id"));
+
+ auto rowReader = createRowReader(reader, {1});
+ auto& selectedRoot = rowReader->getSelectedType();
+ auto selectedCol = selectedRoot.getSubtype(0);
+ EXPECT_EQ("1", selectedCol->getAttributeValue("id"));
+ }
+
+ TEST_F(TypeAttributes, writeMultipleAttributes) {
+ auto stringType = createPrimitiveType(STRING);
+ stringType->setAttribute("foo", "xfoo");
+ stringType->setAttribute("bar", "xbar");
+ stringType->setAttribute("baz", "xbaz");
+ // Let's also test removing an attribute.
+ stringType->removeAttribute("bar");
+ auto structType = createStructType();
+ structType->addStructField("str", std::move(stringType));
+ writeFileWithType(*structType);
+
+ auto reader = createReader();
+ auto rowReader = createRowReader(reader, {1});
+ auto& root = rowReader->getSelectedType();
+ auto col = root.getSubtype(0);
+ auto attributeKeys = col->getAttributeKeys();
+
+ EXPECT_EQ(2, attributeKeys.size());
+ EXPECT_FALSE(col->hasAttributeKey("bar"));
+ EXPECT_TRUE(col->hasAttributeKey("foo"));
+ EXPECT_TRUE(col->hasAttributeKey("baz"));
+ EXPECT_EQ("xfoo", col->getAttributeValue("foo"));
+ EXPECT_EQ("xbaz", col->getAttributeValue("baz"));
+ }
+
+ // Tests that type and all descendants have only a single attribute,
+ // and the attibute value equals to 'x' + <attribute key>.
+ void testTypeHasXAttr(const Type* type) {
+ auto keys = type->getAttributeKeys();
+ EXPECT_EQ(1, keys.size());
+ auto& key = keys.front();
+ EXPECT_EQ('x' + key, type->getAttributeValue(key));
+ for (uint64_t i = 0; i < type->getSubtypeCount(); ++i) {
+ testTypeHasXAttr(type->getSubtype(i));
+ }
+ }
+
+ TEST_F(TypeAttributes, writeAttributesForNestedTypes) {
+ // Let's create struct<list:array<struct<myMap:map<int,union<long,
float>>>>>
+ auto intType = createPrimitiveType(INT);
+ intType->setAttribute("i", "xi");
+ auto longType = createPrimitiveType(FLOAT);
+ longType->setAttribute("l", "xl");
+ auto floatType = createPrimitiveType(FLOAT);
+ floatType->setAttribute("f", "xf");
+ auto unionType = createUnionType();
+ unionType->setAttribute("u", "xu");
+ unionType->addUnionChild(std::move(longType));
+ unionType->addUnionChild(std::move(floatType));
+ auto mapType = createMapType(std::move(intType), std::move(unionType));
+ mapType->setAttribute("m", "xm");
+ auto innerStructType = createStructType();
+ innerStructType->setAttribute("is", "xis");
+ innerStructType->addStructField("myMap", std::move(mapType));
+ auto listType = createListType(std::move(innerStructType));
+ listType->setAttribute("l", "xl");
+ auto rootStructType = createStructType();
+ rootStructType->addStructField("list", std::move(listType));
+ writeFileWithType(*rootStructType);
+
+ auto reader = createReader();
+ auto rowReader = createRowReader(reader);
+ auto& root = rowReader->getSelectedType();
+
+ auto getVal = [this, &root] (const std::vector<uint64_t>& path,
+ const std::string& key) {
+ auto t = getTypeByPath(&root, path);
+ return t->getAttributeValue(key);
+ };
+ EXPECT_EQ("xl", getVal({0}, "l"));
+ EXPECT_EQ("xis", getVal({0, 0}, "is"));
+ EXPECT_EQ("xm", getVal({0, 0, 0}, "m"));
+ EXPECT_EQ("xi", getVal({0, 0, 0, 0}, "i"));
+ EXPECT_EQ("xu", getVal({0, 0, 0, 1}, "u"));
+ EXPECT_EQ("xl", getVal({0, 0, 0, 1, 0}, "l"));
+ EXPECT_EQ("xf", getVal({0, 0, 0, 1, 1}, "f"));
+ }
+
+ void collectFieldIds(const Type* t, std::vector<uint64_t>* fieldIds) {
+ const std::string ICEBERG_ID = "iceberg.id";
+ if (t->hasAttributeKey(ICEBERG_ID)) {
+ std::string id = t->getAttributeValue(ICEBERG_ID);
+ fieldIds->push_back(static_cast<uint64_t>(stoi(id)));
+ } else {
+ EXPECT_EQ(0, t->getColumnId());
+ }
+ for (uint64_t i = 0; i < t->getSubtypeCount(); ++i) {
+ collectFieldIds(t->getSubtype(i), fieldIds);
+ }
+ }
+
+ TEST_F(TypeAttributes, readExampleFile) {
+ std::stringstream ss;
+ if(const char* example_dir = std::getenv("ORC_EXAMPLE_DIR")) {
+ ss << example_dir;
+ } else {
+ ss << "../../../examples";
+ }
+ ss << "/complextypes_iceberg.orc";
+ std::unique_ptr<orc::Reader> reader =
+ orc::createReader(readLocalFile(ss.str().c_str()), ReaderOptions());
+ auto rowReader = createRowReader(reader);
+ auto& root = rowReader->getSelectedType();
+ std::vector<uint64_t> fieldIds;
+ collectFieldIds(&root, &fieldIds);
+ EXPECT_EQ(29, fieldIds.size());
+ sort(fieldIds.begin(), fieldIds.end());
+ for (uint64_t i = 0; i < fieldIds.size(); ++i) {
+ EXPECT_EQ(i+1, fieldIds[i]);
+ }
+ }
+}
diff --git a/examples/complextypes_iceberg.orc
b/examples/complextypes_iceberg.orc
new file mode 100644
index 0000000..2829e50
Binary files /dev/null and b/examples/complextypes_iceberg.orc differ
diff --git a/tools/src/FileMetadata.cc b/tools/src/FileMetadata.cc
index c4784fd..292979a 100644
--- a/tools/src/FileMetadata.cc
+++ b/tools/src/FileMetadata.cc
@@ -93,6 +93,47 @@ void printRawTail(std::ostream& out,
out << tail.DebugString();
}
+void printAttributes(std::ostream& out, const orc::Type& type,
+ const std::string name, bool* hasAnyAttributes) {
+ const auto& attributeKeys = type.getAttributeKeys();
+ bool typeHasAttrs = !attributeKeys.empty();
+ if (typeHasAttrs) {
+ // 'hasAnyAttributes' is only needed to deal with commas properly.
+ if (*hasAnyAttributes) {
+ out << ',';
+ } else {
+ *hasAnyAttributes = true;
+ }
+ out << "\n \"" << name << "\": {";
+ }
+ for (uint64_t i = 0; i < attributeKeys.size(); ++i) {
+ const auto& key = attributeKeys[i];
+ const auto& value = type.getAttributeValue(key);
+ out << "\"" << key << "\": \"" << value << "\"";
+ if (i < attributeKeys.size() - 1) {
+ out << ", ";
+ }
+ }
+ if (typeHasAttrs) {
+ out << '}';
+ }
+ for (uint64_t i = 0; i < type.getSubtypeCount(); ++i) {
+ const auto& child = *type.getSubtype(i);
+ std::string fieldName;
+ if (type.getKind() == orc::STRUCT) {
+ fieldName = type.getFieldName(i);
+ } else if (type.getKind() == orc::LIST) {
+ fieldName = "_elem";
+ } else if (type.getKind() == orc::MAP) {
+ fieldName = i == 0 ? "_key" : "_value";
+ } else {
+ fieldName = "_field_" + std::to_string(i);
+ }
+ std::string childName = (name.empty() ? "" : name + '.') + fieldName;
+ printAttributes(out, child, childName, hasAnyAttributes);
+ }
+}
+
void printMetadata(std::ostream & out, const char*filename, bool verbose) {
std::unique_ptr<orc::Reader> reader =
orc::createReader(orc::readFile(filename), orc::ReaderOptions());
@@ -100,6 +141,10 @@ void printMetadata(std::ostream & out, const
char*filename, bool verbose) {
uint64_t numberColumns = reader->getType().getMaximumColumnId() + 1;
out << " \"type\": \""
<< reader->getType().toString() << "\",\n";
+ out << " \"attributes\": {";
+ bool hasAnyAttributes = false;
+ printAttributes(out, reader->getType(), /*name=*/"", &hasAnyAttributes);
+ out << "},\n";
out << " \"rows\": " << reader->getNumberOfRows() << ",\n";
uint64_t stripeCount = reader->getNumberOfStripes();
out << " \"stripe count\": " << stripeCount << ",\n";
diff --git a/tools/test/TestFileMetadata.cc b/tools/test/TestFileMetadata.cc
index 45d6d5b..87f2539 100644
--- a/tools/test/TestFileMetadata.cc
+++ b/tools/test/TestFileMetadata.cc
@@ -164,6 +164,7 @@ TEST (TestFileMetadata, testJson) {
const std::string expected =
"{ \"name\": \"" + file + "\",\n"
" \"type\":
\"struct<userid:bigint,string1:string,subtype:double,decimal1:decimal(0,0),ts:timestamp>\",\n"
+ " \"attributes\": {},\n"
" \"rows\": 25000,\n"
" \"stripe count\": 5,\n"
" \"format\": \"0.12\", \"writer version\": \"original\",\n"
@@ -205,14 +206,13 @@ TEST (TestFileMetadata, testJson) {
EXPECT_EQ("", error);
}
-
-
TEST (TestFileMetadata, testNoFormat) {
const std::string pgm = findProgram("tools/src/orc-metadata");
const std::string file = findExample("orc_no_format.orc");
const std::string expected =
"{ \"name\": \"" + file + "\",\n"
" \"type\":
\"struct<_col0:array<string>,_col1:map<int,string>,_col2:struct<name:string,score:int>>\",\n"
+ " \"attributes\": {},\n"
" \"rows\": 5,\n"
" \"stripe count\": 1,\n"
" \"format\": \"0.11\", \"writer version\": \"original\",\n"
@@ -237,3 +237,64 @@ TEST (TestFileMetadata, testNoFormat) {
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
}
+
+TEST (TestFileMetadata, testAttributes) {
+ const std::string pgm = findProgram("tools/src/orc-metadata");
+ const std::string file = findExample("complextypes_iceberg.orc");
+ const std::string expected =
+ "{ \"name\": \"" + file + "\",\n"
+ " \"type\":
\"struct<id:bigint,int_array:array<int>,int_array_array:array<array<int>>,int_map:map<string,int>,int_map_array:array<map<string,int>>,nested_struct:struct<a:int,b:array<int>,c:struct<d:array<array<struct<e:int,f:string>>>>,g:map<string,struct<h:struct<i:array<double>>>>>>\",\n"
+ " \"attributes\": {\n"
+ " \"id\": {\"iceberg.id\": \"1\", \"iceberg.long-type\": \"LONG\",
\"iceberg.required\": \"false\"},\n"
+ " \"int_array\": {\"iceberg.id\": \"2\", \"iceberg.required\":
\"false\"},\n"
+ " \"int_array._elem\": {\"iceberg.id\": \"7\", \"iceberg.required\":
\"false\"},\n"
+ " \"int_array_array\": {\"iceberg.id\": \"3\", \"iceberg.required\":
\"false\"},\n"
+ " \"int_array_array._elem\": {\"iceberg.id\": \"8\",
\"iceberg.required\": \"false\"},\n"
+ " \"int_array_array._elem._elem\": {\"iceberg.id\": \"9\",
\"iceberg.required\": \"false\"},\n"
+ " \"int_map\": {\"iceberg.id\": \"4\", \"iceberg.required\":
\"false\"},\n"
+ " \"int_map._key\": {\"iceberg.id\": \"10\", \"iceberg.required\":
\"true\"},\n"
+ " \"int_map._value\": {\"iceberg.id\": \"11\", \"iceberg.required\":
\"false\"},\n"
+ " \"int_map_array\": {\"iceberg.id\": \"5\", \"iceberg.required\":
\"false\"},\n"
+ " \"int_map_array._elem\": {\"iceberg.id\": \"12\",
\"iceberg.required\": \"false\"},\n"
+ " \"int_map_array._elem._key\": {\"iceberg.id\": \"13\",
\"iceberg.required\": \"true\"},\n"
+ " \"int_map_array._elem._value\": {\"iceberg.id\": \"14\",
\"iceberg.required\": \"false\"},\n"
+ " \"nested_struct\": {\"iceberg.id\": \"6\", \"iceberg.required\":
\"false\"},\n"
+ " \"nested_struct.a\": {\"iceberg.id\": \"15\", \"iceberg.required\":
\"false\"},\n"
+ " \"nested_struct.b\": {\"iceberg.id\": \"16\", \"iceberg.required\":
\"false\"},\n"
+ " \"nested_struct.b._elem\": {\"iceberg.id\": \"19\",
\"iceberg.required\": \"false\"},\n"
+ " \"nested_struct.c\": {\"iceberg.id\": \"17\", \"iceberg.required\":
\"false\"},\n"
+ " \"nested_struct.c.d\": {\"iceberg.id\": \"20\", \"iceberg.required\":
\"false\"},\n"
+ " \"nested_struct.c.d._elem\": {\"iceberg.id\": \"21\",
\"iceberg.required\": \"false\"},\n"
+ " \"nested_struct.c.d._elem._elem\": {\"iceberg.id\": \"22\",
\"iceberg.required\": \"false\"},\n"
+ " \"nested_struct.c.d._elem._elem.e\": {\"iceberg.id\": \"23\",
\"iceberg.required\": \"false\"},\n"
+ " \"nested_struct.c.d._elem._elem.f\": {\"iceberg.id\": \"24\",
\"iceberg.required\": \"false\"},\n"
+ " \"nested_struct.g\": {\"iceberg.id\": \"18\", \"iceberg.required\":
\"false\"},\n"
+ " \"nested_struct.g._key\": {\"iceberg.id\": \"25\",
\"iceberg.required\": \"true\"},\n"
+ " \"nested_struct.g._value\": {\"iceberg.id\": \"26\",
\"iceberg.required\": \"false\"},\n"
+ " \"nested_struct.g._value.h\": {\"iceberg.id\": \"27\",
\"iceberg.required\": \"false\"},\n"
+ " \"nested_struct.g._value.h.i\": {\"iceberg.id\": \"28\",
\"iceberg.required\": \"false\"},\n"
+ " \"nested_struct.g._value.h.i._elem\": {\"iceberg.id\": \"29\",
\"iceberg.required\": \"false\"}},\n"
+ " \"rows\": 1,\n"
+ " \"stripe count\": 1,\n"
+ " \"format\": \"0.12\", \"writer version\": \"future - 9\",\n"
+ " \"compression\": \"zlib\", \"compression block\": 131072,\n"
+ " \"file length\": 1734,\n"
+ " \"content\": 1006, \"stripe stats\": 167, \"footer\": 535,
\"postscript\": 25,\n"
+ " \"row index stride\": 10000,\n"
+ " \"user metadata\": {\n"
+ " },\n"
+ " \"stripes\": [\n"
+ " { \"stripe\": 0, \"rows\": 1,\n"
+ " \"offset\": 3, \"length\": 1003,\n"
+ " \"index\": 679, \"data\": 150, \"footer\": 174\n"
+ " }\n"
+ " ]\n"
+ "}\n";
+
+ std::string output;
+ std::string error;
+ std::cout << expected;
+ EXPECT_EQ(0, runProgram({pgm, file}, output, error));
+ EXPECT_EQ(expected, output);
+ EXPECT_EQ("", error);
+}