Fokko commented on code in PR #36846:
URL: https://github.com/apache/arrow/pull/36846#discussion_r1295905033


##########
cpp/src/arrow/type.cc:
##########
@@ -316,6 +299,405 @@ std::shared_ptr<Field> Field::WithNullable(const bool 
nullable) const {
   return std::make_shared<Field>(name_, type_, nullable, metadata_);
 }
 
+Field::MergeOptions Field::MergeOptions::Permissive() {
+  MergeOptions options = Defaults();
+  options.promote_nullability = true;
+  options.promote_decimal = true;
+  options.promote_decimal_to_float = true;
+  options.promote_integer_to_decimal = true;
+  options.promote_integer_to_float = true;
+  options.promote_integer_sign = true;
+  options.promote_numeric_width = true;
+  options.promote_binary = true;
+  options.promote_temporal_unit = true;
+  options.promote_dictionary = true;
+  options.promote_dictionary_ordered = false;
+  return options;
+}
+
+std::string Field::MergeOptions::ToString() const {
+  std::stringstream ss;
+  ss << "MergeOptions{";
+  ss << "promote_nullability=" << (promote_nullability ? "true" : "false");
+  ss << ", promote_numeric_width=" << (promote_numeric_width ? "true" : 
"false");
+  ss << ", promote_integer_to_float=" << (promote_integer_to_float ? "true" : 
"false");
+  ss << ", promote_integer_to_decimal="
+     << (promote_integer_to_decimal ? "true" : "false");
+  ss << ", promote_decimal_to_float=" << (promote_decimal_to_float ? "true" : 
"false");
+  ss << ", promote_temporal_unit=" << (promote_temporal_unit ? "true" : 
"false");
+  ss << ", promote_dictionary=" << (promote_dictionary ? "true" : "false");
+  ss << ", promote_integer_sign=" << (promote_integer_sign ? "true" : "false");
+  ss << ", promote_binary=" << (promote_binary ? "true" : "false");
+  ss << '}';
+  return ss.str();
+}
+
+namespace {
+// Utilities for Field::MergeWith
+
+std::shared_ptr<DataType> MakeBinary(const DataType& type) {
+  switch (type.id()) {
+    case Type::BINARY:
+    case Type::STRING:
+      return binary();
+    case Type::LARGE_BINARY:
+    case Type::LARGE_STRING:
+      return large_binary();
+    default:
+      Unreachable("Hit an unknown type");
+  }
+  return nullptr;
+}
+
+Result<std::shared_ptr<DataType>> WidenDecimals(
+    const std::shared_ptr<DataType>& promoted_type,
+    const std::shared_ptr<DataType>& other_type) {
+  const auto& left = checked_cast<const DecimalType&>(*promoted_type);
+  const auto& right = checked_cast<const DecimalType&>(*other_type);
+  const int32_t max_scale = std::max<int32_t>(left.scale(), right.scale());
+  const int32_t common_precision =
+      std::max<int32_t>(left.precision() + max_scale - left.scale(),
+                        right.precision() + max_scale - right.scale());
+  if (left.id() == Type::DECIMAL256 || right.id() == Type::DECIMAL256 ||
+      common_precision > BasicDecimal128::kMaxPrecision) {
+    return DecimalType::Make(Type::DECIMAL256, common_precision, max_scale);
+  }
+  return DecimalType::Make(Type::DECIMAL128, common_precision, max_scale);
+}
+
+Result<std::shared_ptr<DataType>> MergeTypes(std::shared_ptr<DataType> 
promoted_type,
+                                             std::shared_ptr<DataType> 
other_type,
+                                             const Field::MergeOptions& 
options);
+
+// Merge two dictionary types, or else give an error.
+Result<std::shared_ptr<DataType>> MergeDictionaryTypes(
+    const std::shared_ptr<DataType>& promoted_type,
+    const std::shared_ptr<DataType>& other_type, const Field::MergeOptions& 
options) {
+  const auto& left = checked_cast<const DictionaryType&>(*promoted_type);
+  const auto& right = checked_cast<const DictionaryType&>(*other_type);
+  if (!options.promote_dictionary_ordered && left.ordered() != 
right.ordered()) {
+    return Status::TypeError(
+        "Cannot merge ordered and unordered dictionary unless "
+        "promote_dictionary_ordered=true");
+  }
+  Field::MergeOptions index_options = options;
+  index_options.promote_integer_sign = true;
+  index_options.promote_numeric_width = true;
+  ARROW_ASSIGN_OR_RAISE(auto indices,
+                        MergeTypes(left.index_type(), right.index_type(), 
index_options));
+  ARROW_ASSIGN_OR_RAISE(auto values,
+                        MergeTypes(left.value_type(), right.value_type(), 
options));
+  auto ordered = left.ordered() && right.ordered();
+  if (indices && values) {
+    return dictionary(indices, values, ordered);
+  } else if (values) {
+    return Status::Invalid("Could not merge index types");
+  }
+  return Status::Invalid("Could not merge value types");
+}
+
+// Merge temporal types based on options. Returns nullptr for non-temporal 
types.
+Result<std::shared_ptr<DataType>> MaybeMergeTemporalTypes(
+    const std::shared_ptr<DataType>& promoted_type,
+    const std::shared_ptr<DataType>& other_type, const Field::MergeOptions& 
options) {
+  if (options.promote_temporal_unit) {
+    if (promoted_type->id() == Type::DATE32 && other_type->id() == 
Type::DATE64) {
+      return date64();
+    }
+    if (promoted_type->id() == Type::DATE64 && other_type->id() == 
Type::DATE32) {
+      return date64();
+    }
+  }
+
+  if (options.promote_temporal_unit && promoted_type->id() == Type::DURATION &&
+      other_type->id() == Type::DURATION) {
+    const auto& left = checked_cast<const DurationType&>(*promoted_type);
+    const auto& right = checked_cast<const DurationType&>(*other_type);
+    return duration(std::max(left.unit(), right.unit()));
+  }
+
+  if (options.promote_temporal_unit && is_time(promoted_type->id()) &&
+      is_time(other_type->id())) {
+    const auto& left = checked_cast<const TimeType&>(*promoted_type);
+    const auto& right = checked_cast<const TimeType&>(*other_type);
+    const auto unit = std::max(left.unit(), right.unit());
+    if (unit == TimeUnit::MICRO || unit == TimeUnit::NANO) {
+      return time64(unit);
+    }
+    return time32(unit);
+  }
+
+  if (options.promote_temporal_unit && promoted_type->id() == Type::TIMESTAMP 
&&
+      other_type->id() == Type::TIMESTAMP) {
+    const auto& left = checked_cast<const TimestampType&>(*promoted_type);
+    const auto& right = checked_cast<const TimestampType&>(*other_type);
+    if (left.timezone().empty() ^ right.timezone().empty()) {
+      return Status::TypeError(
+          "Cannot merge timestamp with timezone and timestamp without 
timezone");
+    }
+    if (left.timezone() != right.timezone()) {
+      return Status::TypeError("Cannot merge timestamps with differing 
timezones");
+    }
+    return timestamp(std::max(left.unit(), right.unit()), left.timezone());
+  }
+
+  return nullptr;
+}
+
+// Merge numeric types based on options. Returns nullptr for non-numeric types.
+Result<std::shared_ptr<DataType>> MaybeMergeNumericTypes(
+    std::shared_ptr<DataType> promoted_type, std::shared_ptr<DataType> 
other_type,
+    const Field::MergeOptions& options) {
+  bool promoted = false;
+  if (options.promote_decimal_to_float) {
+    if (is_decimal(promoted_type->id()) && is_floating(other_type->id())) {
+      promoted_type = other_type;
+      promoted = true;
+    } else if (is_floating(promoted_type->id()) && 
is_decimal(other_type->id())) {
+      other_type = promoted_type;
+      promoted = true;
+    }
+  }
+
+  if (options.promote_integer_to_decimal &&
+      ((is_decimal(promoted_type->id()) && is_integer(other_type->id())) ||
+       (is_decimal(other_type->id()) && is_integer(promoted_type->id())))) {
+    if (is_integer(promoted_type->id()) && is_decimal(other_type->id())) {
+      // Other type is always the int
+      promoted_type.swap(other_type);
+    }
+    ARROW_ASSIGN_OR_RAISE(const int32_t precision,
+                          MaxDecimalDigitsForInteger(other_type->id()));
+    ARROW_ASSIGN_OR_RAISE(const auto promoted_decimal,
+                          DecimalType::Make(promoted_type->id(), precision, 
0));
+    ARROW_ASSIGN_OR_RAISE(promoted_type, WidenDecimals(promoted_type, 
promoted_decimal));
+    return promoted_type;
+  }
+
+  if (options.promote_decimal && is_decimal(promoted_type->id()) &&
+      is_decimal(other_type->id())) {
+    const DecimalType& left = checked_cast<const DecimalType&>(*promoted_type);
+    const DecimalType& right = checked_cast<const DecimalType&>(*other_type);
+    if (!options.promote_numeric_width && left.bit_width() != 
right.bit_width()) {
+      return Status::TypeError(
+          "Cannot promote decimal128 to decimal256 without 
promote_numeric_width=true");
+    }
+    ARROW_ASSIGN_OR_RAISE(promoted_type, WidenDecimals(promoted_type, 
other_type));
+    return promoted_type;
+  }
+
+  if (options.promote_integer_sign && 
((is_unsigned_integer(promoted_type->id()) &&
+                                        is_signed_integer(other_type->id())) ||
+                                       (is_signed_integer(promoted_type->id()) 
&&
+                                        
is_unsigned_integer(other_type->id())))) {
+    const int max_width =
+        std::max<int>(bit_width(promoted_type->id()), 
bit_width(other_type->id()));
+
+    if (max_width >= 64) {
+      promoted_type = int64();
+    } else if (max_width >= 32) {
+      promoted_type = int32();
+    } else if (max_width >= 16) {
+      promoted_type = int16();
+    } else {
+      promoted_type = int8();
+    }
+    return promoted_type;
+  }
+
+  if (options.promote_integer_to_float &&
+      ((is_floating(promoted_type->id()) && is_integer(other_type->id())) ||
+       (is_integer(promoted_type->id()) && is_floating(other_type->id())))) {
+    const int max_width =
+        std::max<int>(bit_width(promoted_type->id()), 
bit_width(other_type->id()));
+    promoted = true;
+    if (max_width >= 64) {
+      promoted_type = float64();
+    } else if (max_width >= 32) {
+      promoted_type = float32();
+    } else {
+      promoted_type = float16();
+    }
+  }
+
+  if (options.promote_numeric_width) {
+    const int max_width =
+        std::max<int>(bit_width(promoted_type->id()), 
bit_width(other_type->id()));
+    if (is_floating(promoted_type->id()) && is_floating(other_type->id())) {
+      promoted = true;
+      if (max_width >= 64) {
+        promoted_type = float64();
+      } else if (max_width >= 32) {
+        promoted_type = float32();
+      } else {
+        promoted_type = float16();
+      }
+    } else if (is_signed_integer(promoted_type->id()) &&
+               is_signed_integer(other_type->id())) {
+      promoted = true;
+      if (max_width >= 64) {
+        promoted_type = int64();
+      } else if (max_width >= 32) {
+        promoted_type = int32();
+      } else if (max_width >= 16) {
+        promoted_type = int16();
+      } else {
+        promoted_type = int8();
+      }
+    } else if (is_unsigned_integer(promoted_type->id()) &&
+               is_unsigned_integer(other_type->id())) {
+      promoted = true;
+      if (max_width >= 64) {
+        promoted_type = uint64();
+      } else if (max_width >= 32) {
+        promoted_type = uint32();
+      } else if (max_width >= 16) {
+        promoted_type = uint16();
+      } else {
+        promoted_type = uint8();
+      }
+    }
+  }
+
+  return promoted ? promoted_type : nullptr;
+}
+
+// Merge temporal types based on options. Returns nullptr for non-binary types.
+Result<std::shared_ptr<DataType>> MaybeMergeBinaryTypes(
+    std::shared_ptr<DataType>& promoted_type, std::shared_ptr<DataType>& 
other_type,
+    const Field::MergeOptions& options) {
+  if (options.promote_binary) {
+    if (other_type->id() == Type::FIXED_SIZE_BINARY &&
+        is_base_binary_like(promoted_type->id())) {
+      return MakeBinary(*promoted_type);
+    } else if (promoted_type->id() == Type::FIXED_SIZE_BINARY &&
+               is_base_binary_like(other_type->id())) {
+      return MakeBinary(*other_type);
+    } else if (promoted_type->id() == Type::FIXED_SIZE_BINARY &&
+               other_type->id() == Type::FIXED_SIZE_BINARY) {
+      return binary();
+    } else if (is_string(promoted_type->id()) && is_binary(other_type->id())) {
+      return MakeBinary(*other_type);

Review Comment:
   Removed them for now



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to