joosthooz commented on a change in pull request #12089:
URL: https://github.com/apache/arrow/pull/12089#discussion_r784816517



##########
File path: cpp/src/arrow/compute/exec.cc
##########
@@ -110,6 +112,78 @@ int64_t ExecBatch::TotalBufferSize() const {
   return sum;
 }
 
+bool AddBuffersToSet(const ArrayData& array_data,
+                     std::unordered_set<std::shared_ptr<Buffer>>* 
seen_buffers) {
+  bool insertion_occured = false;
+  for (const auto& buffer : array_data.buffers) {
+    insertion_occured = (buffer && seen_buffers->insert(buffer).second);
+  }
+  for (const auto& child : array_data.child_data) {
+    insertion_occured |= AddBuffersToSet(*child, seen_buffers);
+  }
+  if (array_data.dictionary) {
+    insertion_occured |= AddBuffersToSet(*array_data.dictionary, seen_buffers);
+  }
+  return insertion_occured;
+}
+
+bool AddBuffersToSet(const Array& array,
+                     std::unordered_set<std::shared_ptr<Buffer>>* 
seen_buffers) {
+  return AddBuffersToSet(*array.data(), seen_buffers);
+}
+
+bool AddBuffersToSet(const ChunkedArray& chunked_array,
+                     std::unordered_set<std::shared_ptr<Buffer>>* 
seen_buffers) {
+  bool insertion_occured = false;
+  for (const auto& chunk : chunked_array.chunks()) {
+    insertion_occured |= AddBuffersToSet(*chunk, seen_buffers);
+  }
+  return insertion_occured;
+}
+
+bool AddBuffersToSet(const RecordBatch& record_batch,
+                     std::unordered_set<std::shared_ptr<Buffer>>* 
seen_buffers) {
+  bool insertion_occured = false;
+  for (const auto& column : record_batch.columns()) {
+    insertion_occured |= AddBuffersToSet(*column, seen_buffers);
+  }
+  return insertion_occured;
+}
+
+bool AddBuffersToSet(const Table& table,
+                     std::unordered_set<std::shared_ptr<Buffer>>* 
seen_buffers) {
+  bool insertion_occured = false;
+  for (const auto& column : table.columns()) {
+    insertion_occured |= AddBuffersToSet(*column, seen_buffers);
+  }
+  return insertion_occured;
+}
+
+// Add all Buffers to a given set, return true if anything was actually added.
+// If all the buffers in the datum were already in the set, this will return 
false.
+bool AddBuffersToSet(Datum datum,
+                     std::unordered_set<std::shared_ptr<Buffer>>* 
seen_buffers) {
+  switch (datum.kind()) {
+    case Datum::ARRAY:
+      return 
AddBuffersToSet(*util::get<std::shared_ptr<ArrayData>>(datum.value),
+                             seen_buffers);
+    case Datum::CHUNKED_ARRAY:
+      return 
AddBuffersToSet(*util::get<std::shared_ptr<ChunkedArray>>(datum.value),
+                             seen_buffers);
+    case Datum::RECORD_BATCH:
+      return 
AddBuffersToSet(*util::get<std::shared_ptr<RecordBatch>>(datum.value),
+                             seen_buffers);
+    case Datum::TABLE:
+      return AddBuffersToSet(*util::get<std::shared_ptr<Table>>(datum.value),
+                             seen_buffers);

Review comment:
       Hashing would be nicer, but it would also trigger the error if a kernel 
deletes a buffer it doesn't need anymore. And I don't think we care about that, 
just newly allocated ones. What do you think?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to