paleolimbot commented on code in PR #45459:
URL: https://github.com/apache/arrow/pull/45459#discussion_r2007978636


##########
cpp/src/parquet/geospatial_statistics.h:
##########
@@ -0,0 +1,174 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cmath>
+#include <cstdint>
+#include <memory>
+
+#include "parquet/platform.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+/// \brief Structure represented encoded statistics to be written to and read 
from Parquet
+/// serialized metadata.
+///
+/// See the Parquet Thrift definition and GeoStatistics for the specific 
definition
+/// of field values.
+class PARQUET_EXPORT EncodedGeoStatistics {
+ public:
+  static constexpr double kInf = std::numeric_limits<double>::infinity();
+
+  double xmin{kInf};
+  double xmax{-kInf};
+  double ymin{kInf};
+  double ymax{-kInf};
+  double zmin{kInf};
+  double zmax{-kInf};
+  double mmin{kInf};
+  double mmax{-kInf};
+  std::vector<int32_t> geospatial_types;
+
+  bool has_x() const { return !std::isinf(xmin - xmax); }
+  bool has_y() const { return !std::isinf(ymin - ymax); }
+  bool has_z() const { return !std::isinf(zmin - zmax); }
+  bool has_m() const { return !std::isinf(mmin - mmax); }
+
+  bool is_set() const {
+    return !geospatial_types.empty() || has_x() || has_y() || has_z() || 
has_m();
+  }
+};
+
+class GeoStatisticsImpl;
+
+/// \brief Base type for computing geospatial column statistics while writing 
a file
+/// or representing them when reading a file
+///
+/// EXPERIMENTAL
+class PARQUET_EXPORT GeoStatistics {
+ public:
+  GeoStatistics();
+  explicit GeoStatistics(const EncodedGeoStatistics& encoded);
+
+  ~GeoStatistics();
+
+  /// \brief Return true if bounds, geometry types, and validity are identical
+  bool Equals(const GeoStatistics& other) const;
+
+  /// \brief Update these statistics based on previously calculated or decoded 
statistics
+  void Merge(const GeoStatistics& other);
+
+  /// \brief Update these statistics based on values
+  void Update(const ByteArray* values, int64_t num_values, int64_t null_count);

Review Comment:
   I removed the `null_count` for all the methods here (this was leftover from 
replicating the statistics interface)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to