evindj commented on code in PR #553:
URL: https://github.com/apache/iceberg-cpp/pull/553#discussion_r2853273135
##########
src/iceberg/expression/json_serde.cc:
##########
@@ -123,27 +181,252 @@ nlohmann::json ToJson(Expression::Operation op) {
return json;
}
+nlohmann::json ToJson(const NamedReference& ref) { return ref.name(); }
+
+Result<std::unique_ptr<NamedReference>> NamedReferenceFromJson(
+ const nlohmann::json& json) {
+ if (json.is_object() && json.contains(kType) &&
+ json[kType].get<std::string>() == kTypeReference &&
json.contains(kTerm)) {
+ return NamedReference::Make(json[kTerm].get<std::string>());
+ }
+ if (!json.is_string()) [[unlikely]] {
+ return JsonParseError("Expected string for named reference");
+ }
+ return NamedReference::Make(json.get<std::string>());
+}
+
+nlohmann::json ToJson(const UnboundTransform& transform) {
+ auto& mutable_transform = const_cast<UnboundTransform&>(transform);
+ nlohmann::json json;
+ json[kType] = kTransform;
+ json[kTransform] = transform.transform()->ToString();
+ json[kTerm] = mutable_transform.reference()->name();
+ return json;
+}
+
+Result<std::unique_ptr<UnboundTransform>> UnboundTransformFromJson(
+ const nlohmann::json& json) {
+ if (IsTransformTerm(json)) {
+ ICEBERG_ASSIGN_OR_RAISE(auto transform_str,
+ GetJsonValue<std::string>(json, kTransform));
+ ICEBERG_ASSIGN_OR_RAISE(auto transform,
TransformFromString(transform_str));
+ ICEBERG_ASSIGN_OR_RAISE(auto ref, NamedReferenceFromJson(json[kTerm]));
+ return UnboundTransform::Make(std::move(ref), std::move(transform));
+ }
+ return JsonParseError("Invalid unbound transform json: {}",
SafeDumpJson(json));
+}
+
+nlohmann::json ToJson(const Literal& literal) {
+ if (literal.IsNull()) {
+ return nullptr;
+ }
+
+ const auto type_id = literal.type()->type_id();
+ const auto& value = literal.value();
+
+ switch (type_id) {
+ case TypeId::kBoolean:
+ return std::get<bool>(value);
+ case TypeId::kInt:
+ return std::get<int32_t>(value);
+ case TypeId::kDate:
+ return TransformUtil::HumanDay(std::get<int32_t>(value));
+ case TypeId::kLong:
+ return std::get<int64_t>(value);
+ case TypeId::kTime:
+ return TransformUtil::HumanTime(std::get<int64_t>(value));
+ case TypeId::kTimestamp:
+ return TransformUtil::HumanTimestamp(std::get<int64_t>(value));
+ case TypeId::kTimestampTz:
+ return TransformUtil::HumanTimestampWithZone(std::get<int64_t>(value));
+ case TypeId::kFloat:
+ return std::get<float>(value);
+ case TypeId::kDouble:
+ return std::get<double>(value);
+ case TypeId::kString:
+ return std::get<std::string>(value);
+ case TypeId::kBinary:
+ case TypeId::kFixed: {
+ const auto& bytes = std::get<std::vector<uint8_t>>(value);
+ std::string hex;
+ hex.reserve(bytes.size() * 2);
+ for (uint8_t byte : bytes) {
+ hex += std::format("{:02X}", byte);
+ }
+ return hex;
+ }
+ case TypeId::kDecimal: {
+ return literal.ToString();
+ }
+ case TypeId::kUuid:
+ return std::get<Uuid>(value).ToString();
+ default:
+ nlohmann::json json;
+ return json;
+ }
+}
+
+Result<Literal> LiteralFromJson(const nlohmann::json& json) {
+ // Unwrap {"type": "literal", "value": <actual>} wrapper
+ if (json.is_object() && json.contains(kType) &&
+ json[kType].get<std::string>() == kTypeLiteral && json.contains(kValue))
{
+ return LiteralFromJson(json[kValue]);
+ }
+ if (json.is_null()) {
+ return Literal::Null(nullptr);
+ }
+ if (json.is_boolean()) {
+ return Literal::Boolean(json.get<bool>());
+ }
+ if (json.is_number_integer()) {
+ return Literal::Long(json.get<int64_t>());
+ }
+ if (json.is_number_float()) {
+ return Literal::Double(json.get<double>());
+ }
+ if (json.is_string()) {
+ // All strings are returned as String literals.
+ // Conversion to binary/date/time/etc. happens during binding
+ // when schema type information is available.
+ return Literal::String(json.get<std::string>());
+ }
+ return JsonParseError("Unsupported literal JSON type");
+}
+
+nlohmann::json TermToJson(const Term& term) {
+ switch (term.kind()) {
+ case Term::Kind::kReference:
+ return ToJson(static_cast<const NamedReference&>(term));
+ case Term::Kind::kTransform:
+ return ToJson(static_cast<const UnboundTransform&>(term));
+ default:
+ return nullptr;
+ }
+}
+
+nlohmann::json ToJson(const UnboundPredicate& pred) {
+ nlohmann::json json;
+ json[kType] = ToJson(pred.op());
+
+ // Get term and literals by casting to the appropriate impl type
+ std::span<const Literal> literals;
+
+ if (auto* ref_pred = dynamic_cast<const
UnboundPredicateImpl<BoundReference>*>(&pred)) {
+ json[kTerm] = TermToJson(*ref_pred->term());
+ literals = ref_pred->literals();
+ } else if (auto* transform_pred =
+ dynamic_cast<const
UnboundPredicateImpl<BoundTransform>*>(&pred)) {
+ json[kTerm] = TermToJson(*transform_pred->term());
+ literals = transform_pred->literals();
+ }
+
+ if (!IsUnaryOperation(pred.op())) {
+ if (IsSetOperation(pred.op())) {
+ nlohmann::json values = nlohmann::json::array();
+ for (const auto& lit : literals) {
+ values.push_back(ToJson(lit));
+ }
+ json[kValues] = std::move(values);
+ } else if (!literals.empty()) {
+ json[kValue] = ToJson(literals[0]);
+ }
+ }
+ return json;
+}
+
+Result<std::unique_ptr<UnboundPredicate>> UnboundPredicateFromJson(
+ const nlohmann::json& json) {
+ ICEBERG_ASSIGN_OR_RAISE(auto op, OperationTypeFromJson(json[kType]));
+
+ const auto& term_json = json[kTerm];
+
+ if (IsTransformTerm(term_json)) {
+ ICEBERG_ASSIGN_OR_RAISE(auto term, UnboundTransformFromJson(term_json));
Review Comment:
> Should we add a TermFromJson to wrap the logic of
UnboundTransformFromJson?
I don't think we should deal with bound terms here since SerDe does not take
care of binding. I would rather keep this fact explicite instead of using the
generic TermFromJson which would have made more sense if SerDe was also taking
care of type binding.
##########
src/iceberg/expression/json_serde.cc:
##########
@@ -123,27 +181,252 @@ nlohmann::json ToJson(Expression::Operation op) {
return json;
}
+nlohmann::json ToJson(const NamedReference& ref) { return ref.name(); }
+
+Result<std::unique_ptr<NamedReference>> NamedReferenceFromJson(
+ const nlohmann::json& json) {
+ if (json.is_object() && json.contains(kType) &&
+ json[kType].get<std::string>() == kTypeReference &&
json.contains(kTerm)) {
+ return NamedReference::Make(json[kTerm].get<std::string>());
+ }
+ if (!json.is_string()) [[unlikely]] {
+ return JsonParseError("Expected string for named reference");
+ }
+ return NamedReference::Make(json.get<std::string>());
+}
+
+nlohmann::json ToJson(const UnboundTransform& transform) {
+ auto& mutable_transform = const_cast<UnboundTransform&>(transform);
+ nlohmann::json json;
+ json[kType] = kTransform;
+ json[kTransform] = transform.transform()->ToString();
+ json[kTerm] = mutable_transform.reference()->name();
+ return json;
+}
+
+Result<std::unique_ptr<UnboundTransform>> UnboundTransformFromJson(
+ const nlohmann::json& json) {
+ if (IsTransformTerm(json)) {
+ ICEBERG_ASSIGN_OR_RAISE(auto transform_str,
+ GetJsonValue<std::string>(json, kTransform));
+ ICEBERG_ASSIGN_OR_RAISE(auto transform,
TransformFromString(transform_str));
+ ICEBERG_ASSIGN_OR_RAISE(auto ref, NamedReferenceFromJson(json[kTerm]));
+ return UnboundTransform::Make(std::move(ref), std::move(transform));
+ }
+ return JsonParseError("Invalid unbound transform json: {}",
SafeDumpJson(json));
+}
+
+nlohmann::json ToJson(const Literal& literal) {
+ if (literal.IsNull()) {
+ return nullptr;
+ }
+
+ const auto type_id = literal.type()->type_id();
+ const auto& value = literal.value();
+
+ switch (type_id) {
+ case TypeId::kBoolean:
+ return std::get<bool>(value);
+ case TypeId::kInt:
+ return std::get<int32_t>(value);
+ case TypeId::kDate:
+ return TransformUtil::HumanDay(std::get<int32_t>(value));
+ case TypeId::kLong:
+ return std::get<int64_t>(value);
+ case TypeId::kTime:
+ return TransformUtil::HumanTime(std::get<int64_t>(value));
+ case TypeId::kTimestamp:
+ return TransformUtil::HumanTimestamp(std::get<int64_t>(value));
+ case TypeId::kTimestampTz:
+ return TransformUtil::HumanTimestampWithZone(std::get<int64_t>(value));
+ case TypeId::kFloat:
+ return std::get<float>(value);
+ case TypeId::kDouble:
+ return std::get<double>(value);
+ case TypeId::kString:
+ return std::get<std::string>(value);
+ case TypeId::kBinary:
+ case TypeId::kFixed: {
+ const auto& bytes = std::get<std::vector<uint8_t>>(value);
+ std::string hex;
+ hex.reserve(bytes.size() * 2);
+ for (uint8_t byte : bytes) {
+ hex += std::format("{:02X}", byte);
+ }
+ return hex;
+ }
+ case TypeId::kDecimal: {
+ return literal.ToString();
+ }
+ case TypeId::kUuid:
+ return std::get<Uuid>(value).ToString();
+ default:
+ nlohmann::json json;
+ return json;
+ }
+}
+
+Result<Literal> LiteralFromJson(const nlohmann::json& json) {
+ // Unwrap {"type": "literal", "value": <actual>} wrapper
+ if (json.is_object() && json.contains(kType) &&
+ json[kType].get<std::string>() == kTypeLiteral && json.contains(kValue))
{
+ return LiteralFromJson(json[kValue]);
+ }
+ if (json.is_null()) {
+ return Literal::Null(nullptr);
+ }
+ if (json.is_boolean()) {
+ return Literal::Boolean(json.get<bool>());
+ }
+ if (json.is_number_integer()) {
+ return Literal::Long(json.get<int64_t>());
+ }
+ if (json.is_number_float()) {
+ return Literal::Double(json.get<double>());
+ }
+ if (json.is_string()) {
+ // All strings are returned as String literals.
+ // Conversion to binary/date/time/etc. happens during binding
+ // when schema type information is available.
+ return Literal::String(json.get<std::string>());
+ }
+ return JsonParseError("Unsupported literal JSON type");
+}
+
+nlohmann::json TermToJson(const Term& term) {
+ switch (term.kind()) {
+ case Term::Kind::kReference:
+ return ToJson(static_cast<const NamedReference&>(term));
+ case Term::Kind::kTransform:
+ return ToJson(static_cast<const UnboundTransform&>(term));
+ default:
+ return nullptr;
+ }
+}
+
+nlohmann::json ToJson(const UnboundPredicate& pred) {
+ nlohmann::json json;
+ json[kType] = ToJson(pred.op());
+
+ // Get term and literals by casting to the appropriate impl type
+ std::span<const Literal> literals;
+
+ if (auto* ref_pred = dynamic_cast<const
UnboundPredicateImpl<BoundReference>*>(&pred)) {
+ json[kTerm] = TermToJson(*ref_pred->term());
+ literals = ref_pred->literals();
+ } else if (auto* transform_pred =
+ dynamic_cast<const
UnboundPredicateImpl<BoundTransform>*>(&pred)) {
+ json[kTerm] = TermToJson(*transform_pred->term());
+ literals = transform_pred->literals();
+ }
+
+ if (!IsUnaryOperation(pred.op())) {
+ if (IsSetOperation(pred.op())) {
+ nlohmann::json values = nlohmann::json::array();
+ for (const auto& lit : literals) {
+ values.push_back(ToJson(lit));
+ }
+ json[kValues] = std::move(values);
+ } else if (!literals.empty()) {
+ json[kValue] = ToJson(literals[0]);
+ }
+ }
+ return json;
+}
+
+Result<std::unique_ptr<UnboundPredicate>> UnboundPredicateFromJson(
+ const nlohmann::json& json) {
+ ICEBERG_ASSIGN_OR_RAISE(auto op, OperationTypeFromJson(json[kType]));
+
+ const auto& term_json = json[kTerm];
+
+ if (IsTransformTerm(term_json)) {
+ ICEBERG_ASSIGN_OR_RAISE(auto term, UnboundTransformFromJson(term_json));
Review Comment:
> Should we add a TermFromJson to wrap the logic of
UnboundTransformFromJson?
I don't think we should deal with bound terms here since SerDe does not take
care of binding. I would rather keep this fact explicite instead of using the
generic TermFromJson which would have made more sense if SerDe was also taking
care of type binding.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]