bkietz commented on a change in pull request #8894:
URL: https://github.com/apache/arrow/pull/8894#discussion_r542462619



##########
File path: r/src/expression.cpp
##########
@@ -19,93 +19,98 @@
 
 #if defined(ARROW_R_WITH_ARROW)
 
+#include <arrow/compute/api_scalar.h>
 #include <arrow/dataset/api.h>
 namespace ds = ::arrow::dataset;
 
+std::shared_ptr<ds::Expression> Share(ds::Expression expr) {
+  return std::make_shared<ds::Expression>(std::move(expr));
+}
+
 // [[arrow::export]]
 std::shared_ptr<ds::Expression> dataset___expr__field_ref(std::string name) {
-  return ds::field_ref(std::move(name));
+  return Share(ds::field_ref(std::move(name)));
 }
 
 // [[arrow::export]]
 std::shared_ptr<ds::Expression> dataset___expr__equal(
     const std::shared_ptr<ds::Expression>& lhs,
     const std::shared_ptr<ds::Expression>& rhs) {
-  return ds::equal(lhs, rhs);
+  return Share(ds::equal(*lhs, *rhs));
 }
 
 // [[arrow::export]]
 std::shared_ptr<ds::Expression> dataset___expr__not_equal(
     const std::shared_ptr<ds::Expression>& lhs,
     const std::shared_ptr<ds::Expression>& rhs) {
-  return ds::not_equal(lhs, rhs);
+  return Share(ds::not_equal(*lhs, *rhs));
 }
 
 // [[arrow::export]]
 std::shared_ptr<ds::Expression> dataset___expr__greater(
     const std::shared_ptr<ds::Expression>& lhs,
     const std::shared_ptr<ds::Expression>& rhs) {
-  return ds::greater(lhs, rhs);
+  return Share(ds::greater(*lhs, *rhs));
 }
 
 // [[arrow::export]]
 std::shared_ptr<ds::Expression> dataset___expr__greater_equal(
     const std::shared_ptr<ds::Expression>& lhs,
     const std::shared_ptr<ds::Expression>& rhs) {
-  return ds::greater_equal(lhs, rhs);
+  return Share(ds::greater_equal(*lhs, *rhs));
 }
 
 // [[arrow::export]]
 std::shared_ptr<ds::Expression> dataset___expr__less(
     const std::shared_ptr<ds::Expression>& lhs,
     const std::shared_ptr<ds::Expression>& rhs) {
-  return ds::less(lhs, rhs);
+  return Share(ds::less(*lhs, *rhs));
 }
 
 // [[arrow::export]]
 std::shared_ptr<ds::Expression> dataset___expr__less_equal(
     const std::shared_ptr<ds::Expression>& lhs,
     const std::shared_ptr<ds::Expression>& rhs) {
-  return ds::less_equal(lhs, rhs);
+  return Share(ds::less_equal(*lhs, *rhs));
 }
 
 // [[arrow::export]]
 std::shared_ptr<ds::Expression> dataset___expr__in(
     const std::shared_ptr<ds::Expression>& lhs,
     const std::shared_ptr<arrow::Array>& rhs) {
-  return lhs->In(rhs).Copy();
+  return Share(ds::call("is_in", {*lhs}, 
arrow::compute::SetLookupOptions{rhs}));

Review comment:
       The binary version (`is_in_meta_binary`) would be less performant in the 
context of datasets: `is_in` will reuse a single hash table across all batches, 
while `is_in_meta_binary` will recompute the hash table for each batch. The 
added flexibility would enable expressions like `column_1 %in% column_2`, but 
even if we want to support that I think it should wait for a follow up.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to