This is an automated email from the ASF dual-hosted git repository. xuanwo pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push: new 4a5fe91 feat: sort field/order json serialize/deserialization (#64) 4a5fe91 is described below commit 4a5fe917c6b99da9fad49f7f556e5e2302c5cf4c Author: Guotao Yu <guotao.y...@gmail.com> AuthorDate: Thu Apr 10 17:19:22 2025 +0800 feat: sort field/order json serialize/deserialization (#64) --- src/iceberg/CMakeLists.txt | 1 + src/iceberg/json_internal.cc | 112 +++++++++++++++++++++++++++++++++++++++++++ src/iceberg/json_internal.h | 73 ++++++++++++++++++++++++++++ src/iceberg/sort_field.cc | 28 +---------- src/iceberg/sort_field.h | 41 ++++++++++++++++ src/iceberg/transform.cc | 10 ++++ src/iceberg/transform.h | 5 +- src/iceberg/type_fwd.h | 1 + test/CMakeLists.txt | 3 +- test/json_internal_test.cc | 97 +++++++++++++++++++++++++++++++++++++ 10 files changed, 342 insertions(+), 29 deletions(-) diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index d25da9b..9e0670e 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -20,6 +20,7 @@ set(ICEBERG_INCLUDES "$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/src>" set(ICEBERG_SOURCES arrow_c_data_internal.cc demo.cc + json_internal.cc schema.cc schema_field.cc schema_internal.cc diff --git a/src/iceberg/json_internal.cc b/src/iceberg/json_internal.cc new file mode 100644 index 0000000..b953d1d --- /dev/null +++ b/src/iceberg/json_internal.cc @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/json_internal.h" + +#include <format> + +#include <nlohmann/json.hpp> + +#include "iceberg/sort_order.h" +#include "iceberg/transform.h" +#include "iceberg/util/formatter.h" + +namespace iceberg { + +namespace { + +constexpr std::string_view kTransform = "transform"; +constexpr std::string_view kSourceId = "source-id"; +constexpr std::string_view kDirection = "direction"; +constexpr std::string_view kNullOrder = "null-order"; + +constexpr std::string_view kOrderId = "order-id"; +constexpr std::string_view kFields = "fields"; + +// --- helper for safe JSON extraction --- +template <typename T> +expected<T, Error> GetJsonValue(const nlohmann::json& json, std::string_view key) { + if (!json.contains(key)) { + return unexpected<Error>({.kind = ErrorKind::kInvalidArgument, + .message = "Missing key: " + std::string(key)}); + } + try { + return json.at(key).get<T>(); + } catch (const std::exception& ex) { + return unexpected<Error>({.kind = ErrorKind::kInvalidArgument, + .message = std::string("Failed to parse key: ") + + key.data() + ", " + ex.what()}); + } +} + +#define TRY_ASSIGN(json_value, expr) \ + auto _tmp_##json_value = (expr); \ + if (!_tmp_##json_value) return unexpected(_tmp_##json_value.error()); \ + auto json_value = std::move(_tmp_##json_value.value()); +} // namespace + +nlohmann::json ToJson(const SortField& sort_field) { + nlohmann::json json; + json[kTransform] = std::format("{}", *sort_field.transform()); + json[kSourceId] = sort_field.source_id(); + json[kDirection] = SortDirectionToString(sort_field.direction()); + json[kNullOrder] = NullOrderToString(sort_field.null_order()); + return json; +} + +nlohmann::json ToJson(const SortOrder& sort_order) { + nlohmann::json json; + json[kOrderId] = sort_order.order_id(); + + nlohmann::json fields_json = nlohmann::json::array(); + for (const auto& field : sort_order.fields()) { + fields_json.push_back(ToJson(field)); + } + json[kFields] = fields_json; + return json; +} + +expected<std::unique_ptr<SortField>, Error> SortFieldFromJson( + const nlohmann::json& json) { + TRY_ASSIGN(transform_str, GetJsonValue<std::string>(json, kTransform)); + TRY_ASSIGN(transform, TransformFunctionFromString(transform_str)); + TRY_ASSIGN(source_id, GetJsonValue<int32_t>(json, kSourceId)); + TRY_ASSIGN(direction_str, GetJsonValue<std::string>(json, kDirection)); + TRY_ASSIGN(direction, SortDirectionFromString(direction_str)); + TRY_ASSIGN(null_order_str, GetJsonValue<std::string>(json, kNullOrder)); + TRY_ASSIGN(null_order, NullOrderFromString(null_order_str)); + + return std::make_unique<SortField>(source_id, std::move(transform), direction, + null_order); +} + +expected<std::unique_ptr<SortOrder>, Error> SortOrderFromJson( + const nlohmann::json& json) { + TRY_ASSIGN(order_id, GetJsonValue<int32_t>(json, kOrderId)); + + std::vector<SortField> sort_fields; + for (const auto& field_json : json.at(kFields)) { + TRY_ASSIGN(sort_field, SortFieldFromJson(field_json)); + sort_fields.push_back(*sort_field); + } + + return std::make_unique<SortOrder>(order_id, std::move(sort_fields)); +} + +} // namespace iceberg diff --git a/src/iceberg/json_internal.h b/src/iceberg/json_internal.h new file mode 100644 index 0000000..a05bf3e --- /dev/null +++ b/src/iceberg/json_internal.h @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <memory> + +#include <nlohmann/json_fwd.hpp> + +#include "iceberg/error.h" +#include "iceberg/expected.h" +#include "iceberg/type_fwd.h" + +namespace iceberg { +/// \brief Serializes a `SortField` object to JSON. +/// +/// This function converts a `SortField` object into a JSON representation. +/// The resulting JSON object includes the transform type, source ID, sort direction, and +/// null ordering. +/// +/// \param sort_field The `SortField` object to be serialized. +/// \return A JSON object representing the `SortField` in the form of key-value pairs. +nlohmann::json ToJson(const SortField& sort_field); + +/// \brief Serializes a `SortOrder` object to JSON. +/// +/// This function converts a `SortOrder` object into a JSON representation. +/// The resulting JSON includes the order ID and a list of `SortField` objects. +/// Each `SortField` is serialized as described in the `ToJson(SortField)` function. +/// +/// \param sort_order The `SortOrder` object to be serialized. +/// \return A JSON object representing the `SortOrder` with its order ID and fields array. +nlohmann::json ToJson(const SortOrder& sort_order); + +/// \brief Deserializes a JSON object into a `SortField` object. +/// +/// This function parses the provided JSON and creates a `SortField` object. +/// It expects the JSON object to contain keys for the transform, source ID, direction, +/// and null order. +/// +/// \param json The JSON object representing a `SortField`. +/// \return An `expected` value containing either a `SortField` object or an error. If the +/// JSON is malformed or missing expected fields, an error will be returned. +expected<std::unique_ptr<SortField>, Error> SortFieldFromJson(const nlohmann::json& json); + +/// \brief Deserializes a JSON object into a `SortOrder` object. +/// +/// This function parses the provided JSON and creates a `SortOrder` object. +/// It expects the JSON object to contain the order ID and a list of `SortField` objects. +/// Each `SortField` will be parsed using the `SortFieldFromJson` function. +/// +/// \param json The JSON object representing a `SortOrder`. +/// \return An `expected` value containing either a `SortOrder` object or an error. If the +/// JSON is malformed or missing expected fields, an error will be returned. +expected<std::unique_ptr<SortOrder>, Error> SortOrderFromJson(const nlohmann::json& json); + +} // namespace iceberg diff --git a/src/iceberg/sort_field.cc b/src/iceberg/sort_field.cc index 8b9188d..ae5464b 100644 --- a/src/iceberg/sort_field.cc +++ b/src/iceberg/sort_field.cc @@ -27,32 +27,6 @@ namespace iceberg { -namespace { -/// \brief Get the relative sort direction name -constexpr std::string_view ToString(SortDirection direction) { - switch (direction) { - case SortDirection::kAscending: - return "asc"; - case SortDirection::kDescending: - return "desc"; - default: - return "invalid"; - } -} - -/// \brief Get the relative null order name -constexpr std::string_view ToString(NullOrder null_order) { - switch (null_order) { - case NullOrder::kFirst: - return "nulls-first"; - case NullOrder::kLast: - return "nulls-last"; - default: - return "invalid"; - } -} -} // namespace - SortField::SortField(int32_t source_id, std::shared_ptr<TransformFunction> transform, SortDirection direction, NullOrder null_order) : source_id_(source_id), @@ -73,7 +47,7 @@ NullOrder SortField::null_order() const { return null_order_; } std::string SortField::ToString() const { return std::format( "sort_field(source_id={}, transform={}, direction={}, null_order={})", source_id_, - *transform_, iceberg::ToString(direction_), iceberg::ToString(null_order_)); + *transform_, SortDirectionToString(direction_), NullOrderToString(null_order_)); } bool SortField::Equals(const SortField& other) const { diff --git a/src/iceberg/sort_field.h b/src/iceberg/sort_field.h index c28b1b6..f02aef4 100644 --- a/src/iceberg/sort_field.h +++ b/src/iceberg/sort_field.h @@ -28,6 +28,8 @@ #include <string_view> #include <vector> +#include "iceberg/error.h" +#include "iceberg/expected.h" #include "iceberg/iceberg_export.h" #include "iceberg/type_fwd.h" #include "iceberg/util/formattable.h" @@ -41,6 +43,26 @@ enum class SortDirection { /// Descending kDescending, }; +/// \brief Get the relative sort direction name +ICEBERG_EXPORT constexpr std::string_view SortDirectionToString(SortDirection direction) { + switch (direction) { + case SortDirection::kAscending: + return "asc"; + case SortDirection::kDescending: + return "desc"; + default: + return "invalid"; + } +} +/// \brief Get the relative sort direction from name +ICEBERG_EXPORT constexpr expected<SortDirection, Error> SortDirectionFromString( + std::string_view str) { + if (str == "asc") return SortDirection::kAscending; + if (str == "desc") return SortDirection::kDescending; + return unexpected<Error>( + {.kind = ErrorKind::kInvalidArgument, + .message = "Invalid SortDirection string: " + std::string(str)}); +} enum class NullOrder { /// Nulls are sorted first @@ -48,6 +70,25 @@ enum class NullOrder { /// Nulls are sorted last kLast, }; +/// \brief Get the relative null order name +ICEBERG_EXPORT constexpr std::string_view NullOrderToString(NullOrder null_order) { + switch (null_order) { + case NullOrder::kFirst: + return "nulls-first"; + case NullOrder::kLast: + return "nulls-last"; + default: + return "invalid"; + } +} +/// \brief Get the relative null order from name +ICEBERG_EXPORT constexpr expected<NullOrder, Error> NullOrderFromString( + std::string_view str) { + if (str == "nulls-first") return NullOrder::kFirst; + if (str == "nulls-last") return NullOrder::kLast; + return unexpected<Error>({.kind = ErrorKind::kInvalidArgument, + .message = "Invalid NullOrder string: " + std::string(str)}); +} /// \brief a field with its transform. class ICEBERG_EXPORT SortField : public util::Formattable { diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc index 6511ba9..ed37082 100644 --- a/src/iceberg/transform.cc +++ b/src/iceberg/transform.cc @@ -72,4 +72,14 @@ expected<ArrowArray, Error> IdentityTransformFunction::Transform( .message = "IdentityTransformFunction::Transform"}); } +expected<std::unique_ptr<TransformFunction>, Error> TransformFunctionFromString( + std::string_view str) { + if (str == "identity") { + return std::make_unique<IdentityTransformFunction>(); + } + return unexpected<Error>( + {.kind = ErrorKind::kInvalidArgument, + .message = "Invalid TransformFunction string: " + std::string(str)}); +} + } // namespace iceberg diff --git a/src/iceberg/transform.h b/src/iceberg/transform.h index 1d12a77..f948a45 100644 --- a/src/iceberg/transform.h +++ b/src/iceberg/transform.h @@ -82,7 +82,10 @@ class ICEBERG_EXPORT TransformFunction : public util::Formattable { TransformType transform_type_; }; -class IdentityTransformFunction : public TransformFunction { +ICEBERG_EXPORT expected<std::unique_ptr<TransformFunction>, Error> +TransformFunctionFromString(std::string_view str); + +class ICEBERG_EXPORT IdentityTransformFunction : public TransformFunction { public: IdentityTransformFunction(); /// \brief Transform will take an input array and transform it into a new array. diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index f16b646..215e8f2 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -96,6 +96,7 @@ class Transaction; class HistoryEntry; class PartitionSpec; class Snapshot; +class SortField; class SortOrder; class StructLike; class TableMetadata; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 36fd75a..8102880 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -25,7 +25,8 @@ fetchcontent_makeavailable(googletest) add_executable(schema_test) target_sources(schema_test - PRIVATE schema_test.cc + PRIVATE json_internal_test.cc + schema_test.cc schema_field_test.cc schema_json_test.cc type_test.cc diff --git a/test/json_internal_test.cc b/test/json_internal_test.cc new file mode 100644 index 0000000..95e4b8b --- /dev/null +++ b/test/json_internal_test.cc @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/json_internal.h" + +#include <format> +#include <memory> + +#include <gtest/gtest.h> +#include <nlohmann/json.hpp> + +#include "iceberg/schema.h" +#include "iceberg/sort_field.h" +#include "iceberg/sort_order.h" +#include "iceberg/transform.h" +#include "iceberg/util/formatter.h" + +namespace iceberg { + +namespace { +// Specialized FromJson helper based on type +template <typename T> +expected<std::unique_ptr<T>, Error> FromJsonHelper(const nlohmann::json& json); + +template <> +expected<std::unique_ptr<SortField>, Error> FromJsonHelper(const nlohmann::json& json) { + return SortFieldFromJson(json); +} + +template <> +expected<std::unique_ptr<SortOrder>, Error> FromJsonHelper(const nlohmann::json& json) { + return SortOrderFromJson(json); +} + +// Helper function to reduce duplication in testing +template <typename T> +void TestJsonConversion(const T& obj, const nlohmann::json& expected_json) { + auto json = ToJson(obj); + EXPECT_EQ(expected_json, json) << "JSON conversion mismatch."; + + // Specialize FromJson based on type (T) + auto obj_ex = FromJsonHelper<T>(expected_json); + EXPECT_TRUE(obj_ex.has_value()) << "Failed to deserialize JSON."; + EXPECT_EQ(obj, *obj_ex.value()) << "Deserialized object mismatch."; +} + +} // namespace + +TEST(JsonInternalTest, SortField) { + auto identity_transform = std::make_shared<IdentityTransformFunction>(); + + // Test for SortField with ascending order + SortField sort_field_asc(5, identity_transform, SortDirection::kAscending, + NullOrder::kFirst); + nlohmann::json expected_asc = + R"({"transform":"identity","source-id":5,"direction":"asc","null-order":"nulls-first"})"_json; + TestJsonConversion(sort_field_asc, expected_asc); + + // Test for SortField with descending order + SortField sort_field_desc(7, identity_transform, SortDirection::kDescending, + NullOrder::kLast); + nlohmann::json expected_desc = + R"({"transform":"identity","source-id":7,"direction":"desc","null-order":"nulls-last"})"_json; + TestJsonConversion(sort_field_desc, expected_desc); +} + +TEST(JsonInternalTest, SortOrder) { + auto identity_transform = std::make_shared<IdentityTransformFunction>(); + SortField st_ts(5, identity_transform, SortDirection::kAscending, NullOrder::kFirst); + SortField st_bar(7, identity_transform, SortDirection::kDescending, NullOrder::kLast); + SortOrder sort_order(100, {st_ts, st_bar}); + + nlohmann::json expected_sort_order = + R"({"order-id":100,"fields":[ + {"transform":"identity","source-id":5,"direction":"asc","null-order":"nulls-first"}, + {"transform":"identity","source-id":7,"direction":"desc","null-order":"nulls-last"}]})"_json; + + TestJsonConversion(sort_order, expected_sort_order); +} + +} // namespace iceberg