paleolimbot commented on code in PR #334:
URL: https://github.com/apache/arrow-nanoarrow/pull/334#discussion_r1424291404
##########
extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_files_test.cc:
##########
@@ -70,31 +71,124 @@ class TestFile {
return Err(ENODATA, path, message);
}
- void Test(std::string dir_prefix) {
+ std::string CheckJSONGzFile() {
+ size_t dot_pos = path_.find('.');
+ return path_.substr(0, dot_pos) + std::string(".json.gz");
+ }
+
+ ArrowErrorCode GetArrowArrayStreamIPC(const std::string& dir_prefix,
+ ArrowArrayStream* out, ArrowError*
error) {
std::stringstream path_builder;
path_builder << dir_prefix << "/" << path_;
- // Read the whole file into an ArrowBuffer. We need the whole thing in
memory
- // to avoid requiring Arrow C++ with filesystem.
- std::ifstream infile(path_builder.str(), std::ios::in | std::ios::binary);
+ // Read using nanoarrow_ipc
nanoarrow::UniqueBuffer content;
+ NANOARROW_RETURN_NOT_OK(ReadFileBuffer(path_builder.str(), content.get(),
error));
+
+ struct ArrowIpcInputStream input;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowIpcInputStreamInitBuffer(&input, content.get()), error);
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowIpcArrayStreamReaderInit(out, &input, nullptr), error);
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode GetArrowArrayStreamCheckJSON(const std::string& dir_prefix,
+ ArrowArrayStream* out,
ArrowError* error) {
+ std::stringstream path_builder;
+ path_builder << dir_prefix << "/" << CheckJSONGzFile();
+
+ // Read .json.gz file into a buffer
+ nanoarrow::UniqueBuffer json_gz_content;
+ NANOARROW_RETURN_NOT_OK(
+ ReadFileBuffer(path_builder.str(), json_gz_content.get(), error));
+
+ // Decompress into a JSON string
+ nanoarrow::UniqueBuffer json_content;
+ NANOARROW_RETURN_NOT_OK(UnGZIP(json_gz_content.get(), json_content.get(),
error));
+
+ std::string json_string(reinterpret_cast<char*>(json_content->data),
+ json_content->size_bytes);
+
+ // Use testing util to populate the array stream
+ nanoarrow::testing::TestingJSONReader reader;
+ NANOARROW_RETURN_NOT_OK(reader.ReadDataFile(json_string, out, error));
+ return NANOARROW_OK;
+ }
+
+ // Read a whole file into an ArrowBuffer
+ static ArrowErrorCode ReadFileBuffer(const std::string& path, ArrowBuffer*
content,
+ ArrowError* error) {
+ std::ifstream infile(path, std::ios::in | std::ios::binary);
do {
content->size_bytes += infile.gcount();
- ASSERT_EQ(ArrowBufferReserve(content.get(), 8096), NANOARROW_OK);
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowBufferReserve(content, 8096),
error);
} while (
infile.read(reinterpret_cast<char*>(content->data +
content->size_bytes), 8096));
content->size_bytes += infile.gcount();
- // Make a copy into another buffer so we can wrap it in something Arrow C++
- // understands
- nanoarrow::UniqueBuffer content_copy;
- ASSERT_EQ(ArrowBufferAppend(content_copy.get(), content->data,
content->size_bytes),
- NANOARROW_OK);
+ return NANOARROW_OK;
+ }
- struct ArrowIpcInputStream input;
+ // Create an arrow::io::InputStream wrapper around an ArrowBuffer
+ static arrow::Result<std::shared_ptr<io::InputStream>> BufferInputStream(
Review Comment:
I tried this, but the functions that need an `InputStream` to which the
result is passed seem to require a `std::shared_ptr<InputStream>`!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]