[
https://issues.apache.org/jira/browse/ARROW-1835?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16342623#comment-16342623
]
ASF GitHub Bot commented on ARROW-1835:
---------------------------------------
xhochy closed pull request #1478: ARROW-1835: [C++] Create Arrow schema from
std::tuple types
URL: https://github.com/apache/arrow/pull/1478
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index ad86256e0..74674bebb 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -153,6 +153,7 @@ install(FILES
pretty_print.h
record_batch.h
status.h
+ stl.h
table.h
table_builder.h
tensor.h
@@ -183,6 +184,7 @@ ADD_ARROW_TEST(memory_pool-test)
ADD_ARROW_TEST(pretty_print-test)
ADD_ARROW_TEST(public-api-test)
ADD_ARROW_TEST(status-test)
+ADD_ARROW_TEST(stl-test)
ADD_ARROW_TEST(type-test)
ADD_ARROW_TEST(table-test)
ADD_ARROW_TEST(table_builder-test)
diff --git a/cpp/src/arrow/stl-test.cc b/cpp/src/arrow/stl-test.cc
new file mode 100644
index 000000000..c85baa3a1
--- /dev/null
+++ b/cpp/src/arrow/stl-test.cc
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gtest/gtest.h"
+
+#include "arrow/stl.h"
+
+namespace arrow {
+namespace stl {
+
+TEST(TestSchemaFromTuple, PrimitiveTypesVector) {
+ Schema expected_schema(
+ {field("column1", int8(), false), field("column2", int16(), false),
+ field("column3", int32(), false), field("column4", int64(), false),
+ field("column5", uint8(), false), field("column6", uint16(), false),
+ field("column7", uint32(), false), field("column8", uint64(), false),
+ field("column9", boolean(), false), field("column10", utf8(), false)});
+
+ std::shared_ptr<Schema> schema =
+ SchemaFromTuple<std::tuple<int8_t, int16_t, int32_t, int64_t, uint8_t,
uint16_t,
+ uint32_t, uint64_t, bool, std::string>>::
+ MakeSchema(std::vector<std::string>({"column1", "column2",
"column3", "column4",
+ "column5", "column6",
"column7", "column8",
+ "column9", "column10"}));
+ ASSERT_TRUE(expected_schema.Equals(*schema));
+}
+
+TEST(TestSchemaFromTuple, PrimitiveTypesTuple) {
+ Schema expected_schema(
+ {field("column1", int8(), false), field("column2", int16(), false),
+ field("column3", int32(), false), field("column4", int64(), false),
+ field("column5", uint8(), false), field("column6", uint16(), false),
+ field("column7", uint32(), false), field("column8", uint64(), false),
+ field("column9", boolean(), false), field("column10", utf8(), false)});
+
+ std::shared_ptr<Schema> schema = SchemaFromTuple<
+ std::tuple<int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
uint32_t, uint64_t,
+ bool, std::string>>::MakeSchema(std::make_tuple("column1",
"column2",
+ "column3",
"column4",
+ "column5",
"column6",
+ "column7",
"column8",
+ "column9",
"column10"));
+ ASSERT_TRUE(expected_schema.Equals(*schema));
+}
+
+TEST(TestSchemaFromTuple, SimpleList) {
+ Schema expected_schema({field("column1", list(utf8()), false)});
+ std::shared_ptr<Schema> schema =
+
SchemaFromTuple<std::tuple<std::vector<std::string>>>::MakeSchema({"column1"});
+
+ ASSERT_TRUE(expected_schema.Equals(*schema));
+}
+
+TEST(TestSchemaFromTuple, NestedList) {
+ Schema expected_schema({field("column1", list(list(boolean())), false)});
+ std::shared_ptr<Schema> schema =
+ SchemaFromTuple<std::tuple<std::vector<std::vector<bool>>>>::MakeSchema(
+ {"column1"});
+
+ ASSERT_TRUE(expected_schema.Equals(*schema));
+}
+
+} // namespace stl
+} // namespace arrow
diff --git a/cpp/src/arrow/stl.h b/cpp/src/arrow/stl.h
new file mode 100644
index 000000000..3250b5a32
--- /dev/null
+++ b/cpp/src/arrow/stl.h
@@ -0,0 +1,153 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_STL_H
+#define ARROW_STL_H
+
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "arrow/type.h"
+
+namespace arrow {
+
+class Schema;
+
+namespace stl {
+
+/// Traits meta class to map standard C/C++ types to equivalent Arrow types.
+template <typename T>
+struct ConversionTraits {};
+
+#define ARROW_STL_CONVERSION(c_type, ArrowType_) \
+ template <> \
+ struct ConversionTraits<c_type> { \
+ using ArrowType = ArrowType_; \
+ constexpr static bool nullable = false; \
+ };
+
+ARROW_STL_CONVERSION(bool, BooleanType)
+ARROW_STL_CONVERSION(int8_t, Int8Type)
+ARROW_STL_CONVERSION(int16_t, Int16Type)
+ARROW_STL_CONVERSION(int32_t, Int32Type)
+ARROW_STL_CONVERSION(int64_t, Int64Type)
+ARROW_STL_CONVERSION(uint8_t, UInt8Type)
+ARROW_STL_CONVERSION(uint16_t, UInt16Type)
+ARROW_STL_CONVERSION(uint32_t, UInt32Type)
+ARROW_STL_CONVERSION(uint64_t, UInt64Type)
+ARROW_STL_CONVERSION(float, FloatType)
+ARROW_STL_CONVERSION(double, DoubleType)
+ARROW_STL_CONVERSION(std::string, StringType)
+
+template <typename value_c_type>
+struct ConversionTraits<std::vector<value_c_type>> {
+ using ArrowType = meta::ListType<typename
ConversionTraits<value_c_type>::ArrowType>;
+ constexpr static bool nullable = false;
+};
+
+/// Build an arrow::Schema based upon the types defined in a std::tuple-like
structure.
+///
+/// While the type information is available at compile-time, we still need to
add the
+/// column names at runtime, thus these methods are not constexpr.
+template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
+struct SchemaFromTuple {
+ using Element = typename std::tuple_element<N - 1, Tuple>::type;
+ using ArrowType = typename ConversionTraits<Element>::ArrowType;
+
+ // Implementations that take a vector-like object for the column names.
+
+ /// Recursively build a vector of arrow::Field from the defined types.
+ ///
+ /// In most cases MakeSchema is the better entrypoint for the Schema
creation.
+ static std::vector<std::shared_ptr<Field>> MakeSchemaRecursion(
+ const std::vector<std::string>& names) {
+ std::vector<std::shared_ptr<Field>> ret =
+ SchemaFromTuple<Tuple, N - 1>::MakeSchemaRecursion(names);
+ ret.push_back(field(names[N - 1], std::make_shared<ArrowType>(),
+ ConversionTraits<Element>::nullable));
+ return ret;
+ }
+
+ /// Build a Schema from the types of the tuple-like structure passed in as
template
+ /// parameter assign the column names at runtime.
+ ///
+ /// An example usage of this API can look like the following:
+ ///
+ /// \code{.cpp}
+ /// using TupleType = std::tuple<int, std::vector<std::string>>;
+ /// std::shared_ptr<Schema> schema =
+ /// SchemaFromTuple<TupleType>::MakeSchema({"int_column",
"list_of_strings_column"});
+ /// \endcode
+ static std::shared_ptr<Schema> MakeSchema(const std::vector<std::string>&
names) {
+ return std::make_shared<Schema>(MakeSchemaRecursion(names));
+ }
+
+ // Implementations that take a tuple-like object for the column names.
+
+ /// Recursively build a vector of arrow::Field from the defined types.
+ ///
+ /// In most cases MakeSchema is the better entrypoint for the Schema
creation.
+ template <typename NamesTuple>
+ static std::vector<std::shared_ptr<Field>> MakeSchemaRecursionT(
+ const NamesTuple& names) {
+ std::vector<std::shared_ptr<Field>> ret =
+ SchemaFromTuple<Tuple, N - 1>::MakeSchemaRecursionT(names);
+ ret.push_back(field(std::get<N - 1>(names), std::make_shared<ArrowType>(),
+ ConversionTraits<Element>::nullable));
+ return ret;
+ }
+
+ /// Build a Schema from the types of the tuple-like structure passed in as
template
+ /// parameter assign the column names at runtime.
+ ///
+ /// An example usage of this API can look like the following:
+ ///
+ /// \code{.cpp}
+ /// using TupleType = std::tuple<int, std::vector<std::string>>;
+ /// std::shared_ptr<Schema> schema =
+ /// SchemaFromTuple<TupleType>::MakeSchema({"int_column",
"list_of_strings_column"});
+ /// \endcode
+ template <typename NamesTuple>
+ static std::shared_ptr<Schema> MakeSchema(const NamesTuple& names) {
+ return std::make_shared<Schema>(MakeSchemaRecursionT<NamesTuple>(names));
+ }
+};
+
+template <typename Tuple>
+struct SchemaFromTuple<Tuple, 0> {
+ static std::vector<std::shared_ptr<Field>> MakeSchemaRecursion(
+ const std::vector<std::string>& names) {
+ std::vector<std::shared_ptr<Field>> ret;
+ ret.reserve(names.size());
+ return ret;
+ }
+
+ template <typename NamesTuple>
+ static std::vector<std::shared_ptr<Field>> MakeSchemaRecursionT(
+ const NamesTuple& names) {
+ std::vector<std::shared_ptr<Field>> ret;
+ ret.reserve(std::tuple_size<NamesTuple>::value);
+ return ret;
+ }
+};
+/// @endcond
+
+} // namespace stl
+} // namespace arrow
+
+#endif // ARROW_STL_H
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 009e07db0..cfee6fd0e 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -407,6 +407,19 @@ class ARROW_EXPORT ListType : public NestedType {
std::string name() const override { return "list"; }
};
+namespace meta {
+
+/// Additional ListType class that can be instantiated with only compile-time
arguments.
+template <typename T>
+class ARROW_EXPORT ListType : public ::arrow::ListType {
+ public:
+ using ValueType = T;
+
+ ListType() : ::arrow::ListType(std::make_shared<T>()) {}
+};
+
+} // namespace meta
+
// BinaryType type is represents lists of 1-byte values.
class ARROW_EXPORT BinaryType : public DataType, public NoExtraMeta {
public:
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [C++] Create Arrow schema from std::tuple types
> -----------------------------------------------
>
> Key: ARROW-1835
> URL: https://issues.apache.org/jira/browse/ARROW-1835
> Project: Apache Arrow
> Issue Type: Improvement
> Components: C++
> Reporter: Uwe L. Korn
> Assignee: Uwe L. Korn
> Priority: Minor
> Labels: pull-request-available
> Fix For: 0.9.0
>
>
> Given an `std::tuple` type, create an Arrow schema instance.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)