rafal-c opened a new issue, #39642:
URL: https://github.com/apache/arrow/issues/39642
### Describe the bug, including details regarding any error messages,
version, and platform.
### Steps to reproduce:
1. Create a sufficiently large `arrow::Table`
2. Save the table to an IPC file.
3. Read the file with memory-mapping.
4. Write data again to the same IPC file - CRASH ("Process finished with
exit code 135 (interrupted by signal 7:SIGBUS)")
If you use `arrow::io::ReadableFile` in step 3 instead of
`arrow::io::MemoryMappedFile`, everything works fine.
Tested on Ubuntu 22.04, Arrow 14.0.2.
### Complete code to reproduce:
```cpp
#include <iostream>
#include "arrow/builder.h"
#include "arrow/io/api.h"
#include "arrow/ipc/api.h"
#include "arrow/table.h"
arrow::Result<std::shared_ptr<arrow::Table>> makeSimpleTable() {
arrow::StringBuilder str_builder = arrow::StringBuilder();
std::vector<std::string> strvals(1000, "xyz");
ARROW_RETURN_NOT_OK(str_builder.AppendValues(strvals));
ARROW_ASSIGN_OR_RAISE(auto arr, str_builder.Finish());
std::shared_ptr<arrow::Schema> schema =
arrow::schema({arrow::field("strings", arrow::utf8())});
return arrow::Table::Make(schema, {arr});
}
arrow::Status writeIPC(const arrow::Table& table, const std::string&
file_path) {
ARROW_ASSIGN_OR_RAISE(auto output,
arrow::io::FileOutputStream::Open(file_path, false));
ARROW_ASSIGN_OR_RAISE(auto writer,
arrow::ipc::MakeFileWriter(output.get(), table.schema()));
ARROW_RETURN_NOT_OK(writer->WriteTable(table));
return writer->Close();
}
arrow::Result<std::shared_ptr<arrow::Table>> readMemoryMappedIPC(const
std::string& file_path) {
ARROW_ASSIGN_OR_RAISE(auto input,
arrow::io::MemoryMappedFile::Open(file_path, arrow::io::FileMode::READ));
ARROW_ASSIGN_OR_RAISE(auto reader,
arrow::ipc::RecordBatchFileReader::Open(input));
auto maybe_table = reader->ToTable();
ARROW_RETURN_NOT_OK(input->Close());
return maybe_table;
}
int main() {
// Make table
auto maybe_table = makeSimpleTable();
auto& table = maybe_table.ValueOrDie();
std::cout << "Read table with " << table->num_rows() << " rows." <<
std::endl;
// Write table to a file - OK
auto status = writeIPC(*table, "/tmp/test_out.arrow");
std::cout << "Wrote table once." << std::endl;
maybe_table = readMemoryMappedIPC("/tmp/test_out.arrow");
table = maybe_table.ValueOrDie();
std::cout << "Read table with " << table->num_rows() << " rows." <<
std::endl;
// Write table to the same file again - CRASH
status = writeIPC(*table, "/tmp/test_out.arrow");
std::cout << "Wrote table twice." << std::endl;
// Process finished with exit code 135 (interrupted by signal 7:SIGBUS)
return static_cast<int>(status.code());
}
```
### Component(s)
C++
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]