wgtmac commented on code in PR #36073:
URL: https://github.com/apache/arrow/pull/36073#discussion_r1288809202
##########
cpp/src/parquet/statistics.cc:
##########
@@ -412,9 +492,9 @@ TypedComparatorImpl</*is_signed=*/false,
Int32Type>::GetMinMax(const int32_t* va
return {SafeCopy<int32_t>(min), SafeCopy<int32_t>(max)};
}
-template <bool is_signed, typename DType>
+template <bool is_signed, typename DType, typename Helper>
std::pair<typename DType::c_type, typename DType::c_type>
-TypedComparatorImpl<is_signed, DType>::GetMinMax(const ::arrow::Array& values)
{
+TypedComparatorImpl<is_signed, DType, Helper>::GetMinMax(const ::arrow::Array&
values) {
Review Comment:
We probably need to check if ColumnIndex is generated as expected.
Especially its boundary order is different from raw FLBA type.
##########
cpp/src/parquet/statistics_test.cc:
##########
@@ -1082,6 +1115,45 @@ void TestStatisticsSortOrder<FLBAType>::SetValues() {
.set_max(std::string(reinterpret_cast<const char*>(&vals[8][0]),
FLBA_LENGTH));
}
+template <>
+void TestStatisticsSortOrder<Float16LogicalType>::AddNodes(std::string name) {
+ auto node =
+ schema::PrimitiveNode::Make(name, Repetition::REQUIRED,
LogicalType::Float16(),
+ Type::FIXED_LEN_BYTE_ARRAY,
sizeof(uint16_t));
+ fields_.push_back(std::move(node));
+}
+
+template <>
+void TestStatisticsSortOrder<Float16LogicalType>::SetValues() {
+ constexpr int kValueLen = 2;
+ constexpr int kNumBytes = NUM_VALUES * kValueLen;
+
+ const uint16_t u16_vals[NUM_VALUES] = {
+ 0b1100010100000000, // -5.0
+ 0b1100010000000000, // -4.0
+ 0b1100001000000000, // -3.0
+ 0b1100000000000000, // -2.0
+ 0b1011110000000000, // -1.0
+ 0b0000000000000000, // +0.0
+ 0b0011110000000000, // +1.0
+ 0b0100000000000000, // +2.0
+ 0b0100001000000000, // +3.0
+ 0b0100010000000000, // +4.0
+ };
+
+ values_buf_.resize(kNumBytes);
+ uint8_t* ptr = values_buf_.data();
+ for (int i = 0; i < NUM_VALUES; ++i) {
+ Float16(u16_vals[i]).ToLittleEndian(ptr);
+ values_[i].ptr = ptr;
+ ptr += kValueLen;
+ }
+
+ stats_[0]
+ .set_min(std::string(reinterpret_cast<const char*>(values_[0].ptr),
kValueLen))
Review Comment:
QQ: float16 stats simply encodes values in raw FLBA (w/ length 2) in little
endian?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]