mapleFU commented on code in PR #47190:
URL: https://github.com/apache/arrow/pull/47190#discussion_r2241444421


##########
cpp/src/parquet/types.cc:
##########
@@ -96,40 +98,107 @@ bool PageCanUseChecksum(PageType::type pageType) {
   }
 }
 
-std::string FormatStatValue(Type::type parquet_type, ::std::string_view val) {
+namespace {
+
+template <typename T>
+std::enable_if_t<std::is_arithmetic_v<T>, std::string> FormatNumericValue(
+    ::std::string_view val) {
+  std::stringstream result;
+  T value{};
+  std::memcpy(&value, val.data(), sizeof(T));
+  result << value;
+  return result.str();
+}
+
+std::string FormatDecimalValue(Type::type parquet_type, ::std::string_view val,
+                               const std::shared_ptr<const LogicalType>& 
logical_type) {
+  ARROW_DCHECK(logical_type != nullptr && logical_type->is_decimal());
+
+  const auto& decimal_type =
+      ::arrow::internal::checked_cast<const 
DecimalLogicalType&>(*logical_type);
+  const int32_t scale = decimal_type.scale();
+
+  std::stringstream result;
+  switch (parquet_type) {
+    case Type::INT32: {
+      int32_t int_value{};
+      std::memcpy(&int_value, val.data(), sizeof(int32_t));
+      ::arrow::Decimal128 decimal_value(int_value);
+      result << decimal_value.ToString(scale);
+      break;
+    }
+    case Type::INT64: {
+      int64_t long_value{};
+      std::memcpy(&long_value, val.data(), sizeof(int64_t));
+      ::arrow::Decimal128 decimal_value(long_value);
+      result << decimal_value.ToString(scale);
+      break;
+    }
+    case Type::FIXED_LEN_BYTE_ARRAY:
+    case Type::BYTE_ARRAY: {
+      auto decimal_result = ::arrow::Decimal128::FromBigEndian(
+          reinterpret_cast<const uint8_t*>(val.data()), 
static_cast<int32_t>(val.size()));
+      if (!decimal_result.ok()) {
+        throw ParquetException("Failed to parse decimal value: ",
+                               decimal_result.status().message());
+      }
+      result << decimal_result.ValueUnsafe().ToString(scale);
+      break;
+    }
+    default:
+      throw ParquetException("Unsupported decimal type: ", 
TypeToString(parquet_type));
+  }
+
+  return result.str();
+}
+
+std::string FormatNonUTF8Value(::std::string_view val) {
+  if (val.empty()) {
+    return "";
+  }
+
+  std::stringstream result;
+  result << "0x" << std::hex;
+  for (const auto& c : val) {
+    result << std::setw(2) << std::setfill('0')
+           << static_cast<int>(static_cast<unsigned char>(c));
+  }
+  return result.str();
+}
+
+}  // namespace
+
+std::string FormatStatValue(Type::type parquet_type, ::std::string_view val,
+                            const std::shared_ptr<const LogicalType>& 
logical_type) {
   std::stringstream result;
 
+  if (logical_type != nullptr && logical_type->is_decimal()) {
+    return FormatDecimalValue(parquet_type, val, logical_type);
+  }
+
+  // Default handling for non-decimal types or when decimal processing fails
   const char* bytes = val.data();
   switch (parquet_type) {
     case Type::BOOLEAN: {
-      bool value{};
-      std::memcpy(&value, bytes, sizeof(bool));
-      result << value;
-      break;
+      return FormatNumericValue<bool>(val);

Review Comment:
   What about an extra `<< boolalpha` ? I think using numeric here is too 
C-style...



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to