[
https://issues.apache.org/jira/browse/ARROW-14898?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Chenxi Li updated ARROW-14898:
------------------------------
Description:
The code and data file [^arrow_14898.arr] to reproduce the crash. You can paste
it into the end of
{{arrow/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc}}
{code:c++}
#include "arrow/ipc/api.h"
#include "arrow/io/file.h"
#include <string>
#include <vector>
TEST(GroupBy, ARROW_14898) {
// Repeat enough times
for (int i = 0; i < 100; i++) {
// Read file
auto file_patch = "arrow_14898.arr";
auto mmap_file =
arrow::io::MemoryMappedFile::Open(file_patch, arrow::io::FileMode::READ)
.ValueOrDie();
auto record_batch_reader =
arrow::ipc::RecordBatchFileReader::Open(mmap_file.get(),
arrow::ipc::IpcReadOptions::Defaults())
.ValueOrDie();
auto record_batch = record_batch_reader->ReadRecordBatch(0).ValueOrDie();
// Create data for GroupBy
// If the size is close to 8192, the crash happens
int size = 8192;
std::vector<std::string> vec(size, "a");
std::shared_ptr<arrow::Array> array;
arrow::StringBuilder builder;
auto _ = builder.AppendValues(vec);
_ = builder.Finish(&array);
// Call GroupBy will crash in Hashing::helper_tail
arrow::compute::CountOptions all(arrow::compute::CountOptions::ALL);
auto res =
arrow::compute::internal::GroupBy({array}, {array}, {{"hash_count",
&all}}, false);
}
}
{code}
was:
The code and data file [^arrow_14898.arr] to reproduce the crash. You can paste
it into the end of
arrow/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
{code:c++}
#include "arrow/ipc/api.h"
#include "arrow/io/file.h"
#include <string>
#include <vector>
TEST(GroupBy, ARROW_14898) {
// Repeat enough times
for (int i = 0; i < 100; i++) {
// Read file
auto file_patch = "arrow_14898.arr";
auto mmap_file =
arrow::io::MemoryMappedFile::Open(file_patch, arrow::io::FileMode::READ)
.ValueOrDie();
auto record_batch_reader =
arrow::ipc::RecordBatchFileReader::Open(mmap_file.get(),
arrow::ipc::IpcReadOptions::Defaults())
.ValueOrDie();
auto record_batch = record_batch_reader->ReadRecordBatch(0).ValueOrDie();
// Create data for GroupBy
// If the size is close to 8192, the crash happens
int size = 8192;
std::vector<std::string> vec(size, "a");
std::shared_ptr<arrow::Array> array;
arrow::StringBuilder builder;
auto _ = builder.AppendValues(vec);
_ = builder.Finish(&array);
// Call GroupBy will crash in Hashing::helper_tail
arrow::compute::CountOptions all(arrow::compute::CountOptions::ALL);
auto res =
arrow::compute::internal::GroupBy({array}, {array}, {{"hash_count",
&all}}, false);
}
}
{code}
> [C++] Out-of-bounds memory accessing in key_hash if a key is smaller than
> int64
> -------------------------------------------------------------------------------
>
> Key: ARROW-14898
> URL: https://issues.apache.org/jira/browse/ARROW-14898
> Project: Apache Arrow
> Issue Type: Bug
> Components: C++
> Reporter: Chenxi Li
> Assignee: Chenxi Li
> Priority: Major
> Attachments: arrow_14898.arr
>
>
>
> The code and data file [^arrow_14898.arr] to reproduce the crash. You can
> paste it into the end of
> {{arrow/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc}}
> {code:c++}
> #include "arrow/ipc/api.h"
> #include "arrow/io/file.h"
> #include <string>
> #include <vector>
> TEST(GroupBy, ARROW_14898) {
> // Repeat enough times
> for (int i = 0; i < 100; i++) {
> // Read file
> auto file_patch = "arrow_14898.arr";
> auto mmap_file =
> arrow::io::MemoryMappedFile::Open(file_patch,
> arrow::io::FileMode::READ)
> .ValueOrDie();
> auto record_batch_reader =
> arrow::ipc::RecordBatchFileReader::Open(mmap_file.get(),
>
> arrow::ipc::IpcReadOptions::Defaults())
> .ValueOrDie();
> auto record_batch = record_batch_reader->ReadRecordBatch(0).ValueOrDie();
> // Create data for GroupBy
> // If the size is close to 8192, the crash happens
> int size = 8192;
> std::vector<std::string> vec(size, "a");
> std::shared_ptr<arrow::Array> array;
> arrow::StringBuilder builder;
> auto _ = builder.AppendValues(vec);
> _ = builder.Finish(&array);
> // Call GroupBy will crash in Hashing::helper_tail
> arrow::compute::CountOptions all(arrow::compute::CountOptions::ALL);
> auto res =
> arrow::compute::internal::GroupBy({array}, {array}, {{"hash_count",
> &all}}, false);
> }
> }
> {code}
--
This message was sent by Atlassian Jira
(v8.20.1#820001)