This is an automated email from the ASF dual-hosted git repository.

wangdan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git


The following commit(s) were added to refs/heads/master by this push:
     new 23f58920b fix: fix binary_writer::write not supporting C-style strings 
(#2349)
23f58920b is described below

commit 23f58920b23cfbc8df1b1160db4971aff28e46c6
Author: Dan Wang <[email protected]>
AuthorDate: Tue Jan 27 16:07:19 2026 +0800

    fix: fix binary_writer::write not supporting C-style strings (#2349)
    
    Fix https://github.com/apache/incubator-pegasus/issues/2355.
    
    The `mutation_log_test.replay_single_file_1000` test was crashing because, 
in the `mutation_log_test::create_test_mutation` function (see the code below), 
the default
    template function `template <typename T> void binary_writer::write(const T 
&val)` was
    invoked instead of one of the non-template `binary_writer::write` overloads.
    
    
https://github.com/apache/incubator-pegasus/blob/58b83260ec31530c4316e1cedceeb84731b66e95/src/replica/test/mutation_log_test.cpp#L294-L315
    
    The default template implementation contains only a single line: 
`assert(false);` (also shown below).
    
    
https://github.com/apache/incubator-pegasus/blob/58b83260ec31530c4316e1cedceeb84731b66e95/src/utils/binary_writer.h#L53-L58
    
    This explains why the issue occurs only in tests built in **debug** mode 
and not in **release** mode, as described
    in the issue: in release builds, `assert` is disabled and therefore does 
nothing.
    
    The fix is to remove the default template function and introduce a `void 
binary_writer::write(std::string_view val)` overload instead. In addition to 
fixing the issue, this PR also improves some code style and removes several 
unused
    member functions from the `binary_writer` class.
---
 src/utils/binary_writer.cpp | 165 ++++++++++++++++++++------------------------
 src/utils/binary_writer.h   |  99 +++++++++++++-------------
 2 files changed, 124 insertions(+), 140 deletions(-)

diff --git a/src/utils/binary_writer.cpp b/src/utils/binary_writer.cpp
index f78d5bd42..94624c77a 100644
--- a/src/utils/binary_writer.cpp
+++ b/src/utils/binary_writer.cpp
@@ -26,6 +26,7 @@
 
 #include "binary_writer.h"
 
+#include <algorithm>
 #include <memory>
 
 #include "utils.h"
@@ -48,16 +49,6 @@ binary_writer::binary_writer(int reserved_buffer_size)
     _buffers.reserve(1);
 }
 
-binary_writer::binary_writer(blob &buffer)
-    : _buffers({buffer}),
-      _current_buffer(const_cast<char *>(buffer.data())),
-      _current_offset(0),
-      _current_buffer_length(static_cast<int>(buffer.length())),
-      _total_size(0),
-      _reserved_size_per_buffer(kReservedSizePerBuffer)
-{
-}
-
 void binary_writer::flush() { commit(); }
 
 void binary_writer::create_buffer(size_t size)
@@ -68,23 +59,26 @@ void binary_writer::create_buffer(size_t size)
     create_new_buffer(size, bb);
     _buffers.push_back(bb);
 
-    _current_buffer = (char *)bb.data();
-    _current_buffer_length = bb.length();
+    _current_buffer = const_cast<char *>(bb.data());
+    _current_buffer_length = static_cast<int>(bb.length());
 }
 
 void binary_writer::create_new_buffer(size_t size, /*out*/ blob &bb)
 {
-    bb.assign(::dsn::utils::make_shared_array<char>(size), 0, (int)size);
+    bb.assign(utils::make_shared_array<char>(size), 0, size);
 }
 
 void binary_writer::commit()
 {
-    if (_current_offset > 0) {
-        *_buffers.rbegin() = _buffers.rbegin()->range(0, _current_offset);
-
-        _current_offset = 0;
-        _current_buffer_length = 0;
+    if (_current_offset <= 0) {
+        return;
     }
+
+    // Commit the last buffer.
+    *_buffers.rbegin() = _buffers.rbegin()->range(0, _current_offset);
+
+    _current_offset = 0;
+    _current_buffer_length = 0;
 }
 
 blob binary_writer::get_buffer()
@@ -99,108 +93,95 @@ blob binary_writer::get_buffer()
         return {};
     }
 
-    std::shared_ptr<char> bptr(utils::make_shared_array<char>(_total_size));
-    blob bb(bptr, _total_size);
-    char *ptr = const_cast<char *>(bb.data());
+    blob bb(utils::make_shared_array<char>(_total_size), _total_size);
+    auto *ptr = const_cast<char *>(bb.data());
 
     for (const auto &buf : _buffers) {
-        memcpy(ptr, buf.data(), buf.length());
+        std::memcpy(ptr, buf.data(), buf.length());
         ptr += buf.length();
     }
 
     return bb;
 }
 
-blob binary_writer::get_current_buffer()
+blob binary_writer::get_current_buffer() const
 {
     if (_buffers.size() == 1) {
         return _current_offset > 0 ? _buffers[0].range(0, _current_offset) : 
_buffers[0];
-    } else {
-        std::shared_ptr<char> 
bptr(::dsn::utils::make_shared_array<char>(_total_size));
-        blob bb(bptr, _total_size);
-        const char *ptr = bb.data();
-
-        for (int i = 0; i < static_cast<int>(_buffers.size()); i++) {
-            size_t len = (size_t)_buffers[i].length();
-            if (_current_offset > 0 && i + 1 == (int)_buffers.size()) {
-                len = _current_offset;
-            }
-
-            memcpy((void *)ptr, (const void *)_buffers[i].data(), len);
-            ptr += _buffers[i].length();
-        }
+    }
+
+    blob bb(utils::make_shared_array<char>(_total_size), _total_size);
+    if (_buffers.empty()) {
+        // TODO(wangdan): just return a default-initialized blob object?
         return bb;
     }
-}
 
-void binary_writer::write_empty(int sz)
-{
-    int sz0 = sz;
-    int rem_size = _current_buffer_length - _current_offset;
-    if (rem_size >= sz) {
-        _current_offset += sz;
-    } else {
-        _current_offset += rem_size;
-        sz -= rem_size;
+    auto *ptr = const_cast<char *>(bb.data());
+    int i = 0;
 
-        int allocSize = _reserved_size_per_buffer;
-        if (sz > allocSize)
-            allocSize = sz;
+    // Now the size of _buffers is at least 2.
+    for (; i < static_cast<int>(_buffers.size()) - 1; ++i) {
+        std::memcpy(ptr, _buffers[i].data(), _buffers[i].length());
+        ptr += _buffers[i].length();
+    }
 
-        create_buffer(allocSize);
-        _current_offset += sz;
+    // Get bytes from the last buffer(namely current buffer).
+    if (_current_offset > 0) {
+        std::memcpy(ptr, _buffers[i].data(), _current_offset);
+    } else {
+        std::memcpy(ptr, _buffers[i].data(), _buffers[i].length());
     }
 
-    _total_size += sz0;
+    return bb;
 }
 
-void binary_writer::write(const char *buffer, int sz)
+void binary_writer::write_empty(int size)
 {
-    int rem_size = _current_buffer_length - _current_offset;
-    if (rem_size >= sz) {
-        memcpy((void *)(_current_buffer + _current_offset), buffer, 
(size_t)sz);
-        _current_offset += sz;
-        _total_size += sz;
-    } else {
-        if (rem_size > 0) {
-            memcpy((void *)(_current_buffer + _current_offset), buffer, 
(size_t)rem_size);
-            _current_offset += rem_size;
-            _total_size += rem_size;
-            sz -= rem_size;
-        }
-
-        int allocSize = _reserved_size_per_buffer;
-        if (sz > allocSize)
-            allocSize = sz;
-
-        create_buffer(allocSize);
-        memcpy((void *)(_current_buffer + _current_offset), buffer + rem_size, 
(size_t)sz);
-        _current_offset += sz;
-        _total_size += sz;
+    const int remaining_size = _current_buffer_length - _current_offset;
+    if (remaining_size >= size) {
+        _current_offset += size;
+        _total_size += size;
+        return;
     }
+
+    _current_offset += remaining_size;
+    _total_size += remaining_size;
+    size -= remaining_size;
+
+    // Because the create_buffer() function will commit the last buffer - that 
is, it reads
+    // `_current_offset` first and then resets it - we need to ensure that 
`_current_offset`
+    // has already been updated to the latest value before create_buffer() is 
called.
+    create_buffer(std::max(size, _reserved_size_per_buffer));
+
+    _current_offset += size;
+    _total_size += size;
 }
 
-bool binary_writer::next(void **data, int *size)
+void binary_writer::write(const char *buffer, int size)
 {
-    int rem_size = _current_buffer_length - _current_offset;
-    if (rem_size == 0) {
-        create_buffer(_reserved_size_per_buffer);
-        rem_size = _current_buffer_length;
+    const int remaining_size = _current_buffer_length - _current_offset;
+    if (remaining_size >= size) {
+        std::memcpy(_current_buffer + _current_offset, buffer, size);
+        _current_offset += size;
+        _total_size += size;
+        return;
     }
 
-    *size = rem_size;
-    *data = (void *)(_current_buffer + _current_offset);
-    _current_offset = _current_buffer_length;
-    _total_size += rem_size;
-    return true;
-}
+    if (remaining_size > 0) {
+        std::memcpy(_current_buffer + _current_offset, buffer, remaining_size);
+        _current_offset += remaining_size;
+        _total_size += remaining_size;
+        size -= remaining_size;
+    }
 
-bool binary_writer::backup(int count)
-{
-    assert(count <= _current_offset);
-    _current_offset -= count;
-    _total_size -= count;
-    return true;
+    // Because the create_buffer() function will commit the last buffer - that 
is, it reads
+    // `_current_offset` first and then resets it - we need to ensure that 
`_current_offset`
+    // has already been updated to the latest value before create_buffer() is 
called.
+    create_buffer(std::max(size, _reserved_size_per_buffer));
+
+    std::memcpy(_current_buffer + _current_offset, buffer + remaining_size, 
size);
+    _current_offset += size;
+    _total_size += size;
 }
 
 } // namespace dsn
diff --git a/src/utils/binary_writer.h b/src/utils/binary_writer.h
index 7bce6139d..579246b65 100644
--- a/src/utils/binary_writer.h
+++ b/src/utils/binary_writer.h
@@ -26,14 +26,12 @@
 
 #pragma once
 
-#include <assert.h>
-#include <stdint.h>
-#include <algorithm>
+#include <cstdint>
 #include <cstring>
-#include <string>
+#include <string_view>
 #include <vector>
 
-#include "blob.h"
+#include "utils/blob.h"
 #include "utils/ports.h"
 
 namespace dsn {
@@ -43,19 +41,16 @@ class binary_writer
 public:
     binary_writer();
     explicit binary_writer(int reserved_buffer_size);
-    explicit binary_writer(blob &buffer);
+
     virtual ~binary_writer() = default;
 
     virtual void flush();
 
-    template <typename T>
-    void write_pod(const T &val);
-    template <typename T>
-    void write(const T &val)
-    {
-        // write of this type is not implemented
-        assert(false);
-    }
+    // Write data of POD types into the buffers.
+    template <typename TVal>
+    void write_pod(const TVal &val);
+
+    // Write data of built-in types into the buffers.
     void write(const int8_t &val) { write_pod(val); }
     void write(const uint8_t &val) { write_pod(val); }
     void write(const int16_t &val) { write_pod(val); }
@@ -66,31 +61,46 @@ public:
     void write(const uint64_t &val) { write_pod(val); }
     void write(const bool &val) { write_pod(val); }
 
-    void write(const std::string &val);
-    void write(const char *buffer, int sz);
+    // Write bytes in string_view into the buffers.
+    void write(std::string_view val);
+
+    // Write bytes in blob into the buffers.
     void write(const blob &val);
-    void write_empty(int sz);
 
-    bool next(void **data, int *size);
-    bool backup(int count);
+    // Write `size` bytes from `buffer` into the buffers.
+    void write(const char *buffer, int size);
+
+    // Just increase the buffers by `size` bytes without writing any data into 
it.
+    void write_empty(int size);
 
-    void get_buffers(/*out*/ std::vector<blob> &buffers);
-    int get_buffer_count() const { return static_cast<int>(_buffers.size()); }
+    // Commit the current buffer and return a blob filled with all bytes over 
all buffers.
     blob get_buffer();
-    blob get_current_buffer(); // without commit, write can be continued on 
the last buffer
-    blob get_first_buffer() const;
 
-    int total_size() const { return _total_size; }
+    // Return a blob filled with all bytes over all buffers without committing 
the current
+    // buffer and thus future written bytes will continue to be put into the 
current buffer.
+    [[nodiscard]] blob get_current_buffer() const;
+
+    // Get the total size in bytes over all buffers.
+    [[nodiscard]] int total_size() const { return _total_size; }
 
 protected:
-    // bb may have large space than size
+    // Commit the current buffer and create a new buffer of at least `size` 
bytes.
     void create_buffer(size_t size);
-    void commit();
+
+    // Allocate space of at least `size` bytes for a new buffer into `bb`.
     virtual void create_new_buffer(size_t size, /*out*/ blob &bb);
 
+    // Commit the current buffer.
+    void commit();
+
 private:
+    // Write data of bytes-like types into buffers.
+    template <typename TBytes>
+    void write_bytes(const TBytes &val);
+
     std::vector<blob> _buffers;
 
+    // The current buffer is just the last buffer of `_buffers`.
     char *_current_buffer;
     int _current_offset;
     int _current_buffer_length;
@@ -104,34 +114,27 @@ private:
 };
 
 //--------------- inline implementation -------------------
-template <typename T>
-inline void binary_writer::write_pod(const T &val)
+template <typename TVal>
+inline void binary_writer::write_pod(const TVal &val)
 {
-    write((char *)&val, static_cast<int>(sizeof(T)));
+    write(reinterpret_cast<const char *>(&val), static_cast<int>(sizeof(val)));
 }
 
-inline void binary_writer::get_buffers(/*out*/ std::vector<blob> &buffers)
+template <typename TBytes>
+inline void binary_writer::write_bytes(const TBytes &val)
 {
-    commit();
-    buffers = _buffers;
+    // Write the length of `val` into the buffers.
+    const auto len = static_cast<int>(val.length());
+    write_pod(len);
+
+    // Write `val` into the buffers if it's not empty.
+    if (len > 0) {
+        write(val.data(), len);
+    }
 }
 
-inline blob binary_writer::get_first_buffer() const { return _buffers[0]; }
+inline void binary_writer::write(std::string_view val) { write_bytes(val); }
 
-inline void binary_writer::write(const std::string &val)
-{
-    int len = static_cast<int>(val.length());
-    write((const char *)&len, sizeof(int));
-    if (len > 0)
-        write((const char *)&val[0], len);
-}
+inline void binary_writer::write(const blob &val) { write_bytes(val); }
 
-inline void binary_writer::write(const blob &val)
-{
-    // TODO: optimization by not memcpy
-    int len = val.length();
-    write((const char *)&len, sizeof(int));
-    if (len > 0)
-        write((const char *)val.data(), len);
-}
 } // namespace dsn


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to