save-buffer commented on code in PR #13669:
URL: https://github.com/apache/arrow/pull/13669#discussion_r968870899


##########
cpp/src/arrow/compute/exec/spilling_util.h:
##########
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <vector>
+#include <functional>
+#include <memory>
+#include "arrow/compute/exec/query_context.h"
+
+namespace arrow
+{
+    namespace compute
+    {
+#ifdef _WIN32
+        using FileHandle = void *;
+        extern const FileHandle kInvalidHandle;
+#else
+        using FileHandle = int;
+        constexpr FileHandle kInvalidHandle = -1;
+#endif
+
+        // A temporary file meant for spilling data to disk. It can spill a 
batch to
+        // disk and read it back into memory. This class is designed to fully 
utilize
+        // disk bandwidth and for removing batches from memory as quickly as 
possible.
+        class SpillFile
+        {
+        public:
+            static constexpr size_t kAlignment = 512;
+
+            ~SpillFile();
+            // To spill a batch the following must be true:
+            // - Row offset for each column must be 0.
+            // - Column buffers must be aligned to 512 bits
+            // - No column can be a scalar
+            // These assumptions aren't as inconvenient as it seems because
+            // typically batches will be partitioned before being spilled,
+            // meaning the batches will come from ExecBatchBuilder, which
+            // ensures these assumptions hold. 
+            // It is a bug to spill a batch after ReadBackBatches.
+            Status SpillBatch(QueryContext *ctx, ExecBatch batch);

Review Comment:
   There shouldn't be any type restrictions, I took care to handle nested data 
types. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to