Repository: arrow Updated Branches: refs/heads/master 47e289a9a -> ce0bb5338
ARROW-1002: [C++] Fix inconsistency with padding at start of IPC file format cc @TheNeuralBit -- the 64-byte padding in the C++ file writer was incorrect (http://arrow.apache.org/docs/ipc.html indicate padding to an 8-byte boundary), so this fixes that. Author: Wes McKinney <wes.mckin...@twosigma.com> Closes #693 from wesm/ARROW-1002 and squashes the following commits: 35c023f [Wes McKinney] Fix C++ inconsistency with padding at start of IPC file format Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/ce0bb533 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/ce0bb533 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/ce0bb533 Branch: refs/heads/master Commit: ce0bb5338496785e6c46d9832b75105883bef5de Parents: 47e289a Author: Wes McKinney <wes.mckin...@twosigma.com> Authored: Mon May 15 21:40:36 2017 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Mon May 15 21:40:36 2017 -0400 ---------------------------------------------------------------------- cpp/src/arrow/ipc/writer.cc | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/ce0bb533/cpp/src/arrow/ipc/writer.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc index ced0710..4f5edf2 100644 --- a/cpp/src/arrow/ipc/writer.cc +++ b/cpp/src/arrow/ipc/writer.cc @@ -509,7 +509,7 @@ class DictionaryWriter : public RecordBatchSerializer { }; // Adds padding bytes if necessary to ensure all memory blocks are written on -// 8-byte boundaries. +// 64-byte boundaries. Status AlignStreamPosition(io::OutputStream* stream) { int64_t position; RETURN_NOT_OK(stream->Tell(&position)); @@ -687,9 +687,9 @@ class RecordBatchStreamWriter::RecordBatchStreamWriterImpl { } // Adds padding bytes if necessary to ensure all memory blocks are written on - // 8-byte boundaries. - Status Align() { - int64_t remainder = PaddedLength(position_) - position_; + // 64-byte (or other alignment) boundaries. + Status Align(int64_t alignment = kArrowAlignment) { + int64_t remainder = PaddedLength(position_, alignment) - position_; if (remainder > 0) { return Write(kPaddingBytes, remainder); } return Status::OK(); } @@ -701,12 +701,6 @@ class RecordBatchStreamWriter::RecordBatchStreamWriterImpl { return Status::OK(); } - // Write and align - Status WriteAligned(const uint8_t* data, int64_t nbytes) { - RETURN_NOT_OK(Write(data, nbytes)); - return Align(); - } - void set_memory_pool(MemoryPool* pool) { pool_ = pool; } protected: @@ -762,8 +756,10 @@ class RecordBatchFileWriter::RecordBatchFileWriterImpl using BASE = RecordBatchStreamWriter::RecordBatchStreamWriterImpl; Status Start() override { - RETURN_NOT_OK(WriteAligned( - reinterpret_cast<const uint8_t*>(kArrowMagicBytes), strlen(kArrowMagicBytes))); + // It is only necessary to align to 8-byte boundary at the start of the file + RETURN_NOT_OK(Write(reinterpret_cast<const uint8_t*>(kArrowMagicBytes), + strlen(kArrowMagicBytes))); + RETURN_NOT_OK(Align(8)); // We write the schema at the start of the file (and the end). This also // writes all the dictionaries at the beginning of the file