pitrou commented on code in PR #43834:
URL: https://github.com/apache/arrow/pull/43834#discussion_r1736275817


##########
cpp/src/arrow/integration/json_integration_test.cc:
##########
@@ -148,6 +162,48 @@ static Status ValidateFull(const RecordBatch& batch) {
   return Status::OK();
 }
 
+static Status ValidateEmbeddedStream(
+    const std::shared_ptr<io::RandomAccessFile>& arrow_file,
+    const Schema& footer_schema) {
+  // Many validations are skipped here since they will already
+  // have been handled by RecordBatchFileReader.
+  // For example we already know that the magic is in place.
+  ARROW_ASSIGN_OR_RAISE(int64_t file_size, arrow_file->GetSize());
+  ARROW_ASSIGN_OR_RAISE(auto footer_cookie, arrow_file->ReadAt(file_size - 10, 
10));
+  auto footer_size =
+      
bit_util::FromLittleEndian(util::SafeLoadAs<int32_t>(footer_cookie->data()));
+  int64_t footer_offset = 8 + file_size - footer_size - 10;
+
+  // Get a read stream past the padded magic at the start of the file
+  ARROW_ASSIGN_OR_RAISE(auto stream,
+                        io::RandomAccessFile::GetStream(arrow_file, 8, 
file_size - 8));
+  ARROW_ASSIGN_OR_RAISE(auto arrow_reader, 
ipc::RecordBatchStreamReader::Open(stream));
+
+  RETURN_NOT_OK(CompareSchemas("Embedded stream", *arrow_reader->schema(),  //
+                               "Footer", footer_schema));
+
+  int i = 0;
+  for (auto maybe_batch : *arrow_reader) {
+    ARROW_ASSIGN_OR_RAISE(auto batch, maybe_batch);
+    // Theoretically the Footer could fail to include a Block pointing to some
+    // Message in the embedded stream, so we validate batches again here.
+    Status valid_st = ValidateFull(*batch);
+    if (!valid_st.ok()) {
+      return Status::Invalid("Embedded stream record batch ", i, " did not 
validate:\n",
+                             valid_st.ToString());
+    }
+    ++i;
+  }
+  ARROW_ASSIGN_OR_RAISE(int64_t stream_size, stream->Tell());
+
+  if (footer_offset <= stream_size) {

Review Comment:
   Hmm, why not `footer_offset < stream_size`? It should be ok if the stream 
ends just before the footer.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to