westonpace commented on code in PR #13669:
URL: https://github.com/apache/arrow/pull/13669#discussion_r943998671


##########
cpp/src/arrow/compute/exec/spilling_util.cc:
##########
@@ -0,0 +1,253 @@
+#include "spilling_util.h"
+
+namespace arrow
+{
+namespace compute
+{
+
+    struct ArrayInfo
+    {
+        int64_t num_children;
+        std::array<std::shared_ptr<Buffer>, 3> bufs;
+        std::array<size_t, 3> sizes;
+        bool has_dict;
+    };
+
+    struct SpillFile::BatchInfo
+    {
+        int64_t start;
+        std::vector<ArrayInfo> arrays;
+    };
+
+#ifdef _WIN32
+#include "windows_compatibility.h"
+
+const FileHandle kInvalidHandle = INVALID_HANDLE_VALUE;
+
+static Result<FileHandle> OpenTemporaryFile()
+{
+    constexpr DWORD kTempFileNameSize = MAX_PATH + 1;
+    wchar_t tmp_name_buf[kTempFileNameSize];
+    wchar_t tmp_path_buf[kTempFileNameSize];
+
+    DWORD ret;
+    ret = GetTempPath2W(kTempFileNameSize, tmp_path_buf);
+    if(ret > kTempFileNameSize || ret == 0)
+        return Status::IOError();
+    if(GetTempFileNameW(tmp_path_buf, L"ARROW_TMP", 0, tmp_name_buf) == 0)
+        return Status::IOError();
+
+    HANDLE file_handle = CreateFileA(
+        tmp_name_buf,
+        GENERIC_READ | GENERIC_WRITE | FILE_APPEND_DATA,
+        0,
+        NULL,
+        CREATE_ALWAYS,
+        FILE_FLAG_NO_BUFFERING | FILE_FLAG_OVERLAPPED | 
FILE_FLAG_DELETE_ON_CLOSE,
+        NULL);
+    if(file_handle == INVALID_HANDLE_VALUE)
+        return Status::IOError("Failed to create temp file");
+    return file_handle;
+}
+
+static Status CloseTemporaryFile(FileHandle handle)
+{
+    if(!CloseHandle(handle))
+        return Status::IOError("Failed to close temp file");
+    return Status::OK();
+}
+
+static Status WriteBatch_PlatformSpecific(FileHandle handle, const 
SpillFile::BatchInfo &info)
+{
+    OVERLAPPED overlapped;
+    int64_t offset = info.start;
+    for(const ArrayInfo &arr : info.arrays)
+    {
+        for(int i = 0; i < 3; i++)
+        {
+            if(info.bufs[i] != 0)
+            {
+                overlapped.Offset = static_cast<DWORD>(offset);
+                overlapped.OffsetHigh = static_cast<DWORD>(offset >> 32);
+                if(!WriteFile(
+                       handle,
+                       info.bufs[i]->data(),
+                       info.bufs[i]->size(),
+                       NULL,
+                       &overlapped))
+                    return Status::IOError("Failed to spill!");
+                offset += info.sizes[i];
+            }
+        }
+    }
+    return Status::OK();
+}
+#else
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+Result<FileHandle> OpenTemporaryFile()
+{
+    const char *selectors[] = { "TMPDIR", "TMP", "TEMP", "TEMPDIR" };
+    constexpr size_t kNumSelectors = sizeof(selectors) / sizeof(selectors[0]);
+#ifdef __ANDROID__
+    const char *backup = "/data/local/tmp/";
+#else
+    const char *backup = "/tmp/";
+#endif
+    const char *tmp_dir = backup;
+    for(size_t i = 0; i < kNumSelectors; i++)
+    {
+        const char *env = getenv(selectors[i]);
+        if(env)
+        {
+            tmp_dir = env;
+            break;
+        }
+    }
+    size_t tmp_dir_length = std::strlen(tmp_dir);
+
+    const char *tmp_name_template = "/ARROW_TMP_XXXXXX";
+    size_t tmp_name_length = std::strlen(tmp_name_template);
+
+    constexpr int kFileNameSize = 1024;
+
+    if((tmp_dir_length + tmp_name_length) >= kFileNameSize)
+    {
+        tmp_dir = backup;
+        tmp_dir_length = std::strlen(backup);
+    }
+
+    char name[kFileNameSize + 1] = {};
+    std::strncpy(name, tmp_dir, kFileNameSize);
+    std::strncpy(name + tmp_dir_length, tmp_name_template, kFileNameSize - 
tmp_dir_length);
+
+#ifdef __APPLE__
+    int fd = mkstemp(name);
+    if(fd == kInvalidHandle)
+        return Status::IOError(strerror(errno));
+    if(fcntl(fd, F_NOCACHE, 1) == -1)
+        return Status::IOError(strerror(errno));
+#else    
+    int fd = mkostemp(name, O_DIRECT);

Review Comment:
   > the point of spilling is to get the stuff out of memory as soon as 
possible and avoid memcpy-ing it around into staging buffers.
   
   This is good point.
   
   > Alignment requirements are not an issue since we're partitioning 
everything anyway. We will just make the partitioned batches aligned.
   
   Sounds good.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to