Kontinuation commented on code in PR #43977:
URL: https://github.com/apache/arrow/pull/43977#discussion_r1766346381


##########
cpp/src/parquet/statistics.cc:
##########
@@ -47,6 +48,305 @@ using arrow::util::SafeCopy;
 using arrow::util::SafeLoad;
 
 namespace parquet {
+
+class GeometryStatisticsImpl {
+ public:
+  GeometryStatisticsImpl() = default;
+  GeometryStatisticsImpl(const GeometryStatisticsImpl&) = default;
+
+  bool Equals(const GeometryStatisticsImpl& other) const {
+    if (is_valid_ != other.is_valid_) {
+      return false;
+    }
+
+    if (!is_valid_ && !other.is_valid_) {
+      return true;
+    }
+
+    auto geometry_types = bounder_.GeometryTypes();
+    auto other_geometry_types = other.bounder_.GeometryTypes();
+    if (geometry_types.size() != other_geometry_types.size()) {
+      return false;
+    }
+
+    for (size_t i = 0; i < geometry_types.size(); i++) {
+      if (geometry_types[i] != other_geometry_types[i]) {
+        return false;
+      }
+    }
+
+    return bounder_.Bounds() == other.bounder_.Bounds();
+  }
+
+  void Merge(const GeometryStatisticsImpl& other) {
+    if (!is_valid_ || !other.is_valid_) {
+      is_valid_ = false;
+      return;
+    }
+
+    bounder_.ReadBox(other.bounder_.Bounds());
+    bounder_.ReadGeometryTypes(other.bounder_.GeometryTypes());
+  }
+
+  void Update(const ByteArray* values, int64_t num_values, int64_t null_count) 
{
+    if (!is_valid_) {
+      return;
+    }
+
+    geometry::WKBBuffer buf;
+    try {
+      for (int64_t i = 0; i < num_values; i++) {
+        const ByteArray& item = values[i];
+        buf.Init(item.ptr, item.len);
+        bounder_.ReadGeometry(&buf);
+      }
+
+      bounder_.Flush();
+    } catch (ParquetException&) {
+      is_valid_ = false;
+    }
+  }
+
+  void UpdateSpaced(const ByteArray* values, const uint8_t* valid_bits,
+                    int64_t valid_bits_offset, int64_t num_spaced_values,
+                    int64_t num_values, int64_t null_count) {
+    DCHECK_GT(num_spaced_values, 0);
+
+    geometry::WKBBuffer buf;
+    try {
+      ::arrow::internal::VisitSetBitRunsVoid(
+          valid_bits, valid_bits_offset, num_spaced_values,
+          [&](int64_t position, int64_t length) {
+            for (int64_t i = 0; i < length; i++) {
+              ByteArray item = SafeLoad(values + i + position);
+              buf.Init(item.ptr, item.len);
+              bounder_.ReadGeometry(&buf);
+            }
+          });
+      bounder_.Flush();
+    } catch (ParquetException&) {
+      is_valid_ = false;
+    }
+  }
+
+  void Update(const ::arrow::Array& values, bool update_counts) {
+    ARROW_UNUSED(update_counts);
+
+    const auto& binary_array = static_cast<const 
::arrow::BinaryArray&>(values);
+    geometry::WKBBuffer buf;
+    try {
+      for (int64_t i = 0; i < binary_array.length(); ++i) {
+        if (!binary_array.IsNull(i)) {
+          std::string_view byte_array = binary_array.GetView(i);
+          buf.Init(reinterpret_cast<const uint8_t*>(byte_array.data()),
+                   byte_array.length());
+          bounder_.ReadGeometry(&buf);
+          bounder_.Flush();
+        }
+      }
+    } catch (ParquetException&) {
+      is_valid_ = false;
+    }
+  }
+
+  void Reset() {
+    bounder_.Reset();
+    coverings_.clear();
+    is_valid_ = true;
+  }
+
+  EncodedGeometryStatistics Encode() const {
+    const double* mins = bounder_.Bounds().min;
+    const double* maxes = bounder_.Bounds().max;
+
+    EncodedGeometryStatistics out;
+    out.geometry_types = bounder_.GeometryTypes();
+
+    out.xmin = mins[0];
+    out.xmax = maxes[0];
+    out.ymin = mins[1];
+    out.ymax = maxes[1];
+    out.zmin = mins[2];
+    out.zmax = maxes[2];
+    out.mmin = mins[3];
+    out.mmax = maxes[3];
+
+    if (coverings_.empty()) {
+      // Generate coverings from bounding box if coverings is not present

Review Comment:
   This is for generating coverings from the bounding box when assembling the 
encoded representation of the geometry statistics. I've added a member called 
`generate_covering_` to make it more explicit.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to