wjones127 commented on code in PR #12891:
URL: https://github.com/apache/arrow/pull/12891#discussion_r852193507


##########
cpp/src/arrow/dataset/file_parquet.cc:
##########
@@ -128,17 +128,30 @@ util::optional<compute::Expression> 
ColumnChunkStatisticsAsExpression(
   auto maybe_min = min->CastTo(field->type());
   auto maybe_max = max->CastTo(field->type());
   if (maybe_min.ok() && maybe_max.ok()) {
-    auto col_min = maybe_min.MoveValueUnsafe();
-    auto col_max = maybe_max.MoveValueUnsafe();
-    if (col_min->Equals(col_max)) {
-      return compute::equal(std::move(field_expr), 
compute::literal(std::move(col_min)));
+    min = maybe_min.MoveValueUnsafe();
+    max = maybe_max.MoveValueUnsafe();
+
+    compute::Expression range;
+    if (min->Equals(max)) {
+      auto single_value = compute::equal(field_expr, 
compute::literal(std::move(min)));
+
+      if (statistics->null_count() == 0) {
+        return compute::and_(single_value, compute::is_valid(field_expr));
+      }
+      return compute::or_(std::move(single_value), 
is_null(std::move(field_expr)));
     }
 
     auto lower_bound =
-        compute::greater_equal(field_expr, 
compute::literal(std::move(col_min)));
-    auto upper_bound =
-        compute::less_equal(std::move(field_expr), 
compute::literal(std::move(col_max)));
-    return compute::and_(std::move(lower_bound), std::move(upper_bound));
+        compute::greater_equal(field_expr, compute::literal(std::move(min)));
+    auto upper_bound = compute::less_equal(field_expr, 
compute::literal(std::move(max)));
+
+    if (statistics->null_count() != 0) {
+      lower_bound = compute::or_(std::move(lower_bound), is_null(field_expr));
+      upper_bound = compute::or_(std::move(upper_bound), 
is_null(std::move(field_expr)));
+      return compute::and_(std::move(lower_bound), std::move(upper_bound));
+    }
+    return compute::and_(compute::and_(std::move(lower_bound), 
std::move(upper_bound)),
+                         compute::is_valid(field_expr));
   }

Review Comment:
   Ah thanks for the pointer on `num_values`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to