Re: [PR] feat(go): implement binary reader/writer [iggy]

via GitHub Sat, 21 Mar 2026 02:43:19 -0700


hubcio commented on code in PR #2986:
URL: https://github.com/apache/iggy/pull/2986#discussion_r2969387405



##########
foreign/go/internal/codec/writer.go:
##########
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package codec
+
+import (
+       "encoding/binary"
+       "math"
+)
+
+// Writer appends encoded values to a growing byte slice.
+type Writer struct {
+       p []byte
+}
+
+// NewWriter returns a new Writer with an empty internal buffer.
+// Use NewWriterCap instead if the final size is known.
+func NewWriter() *Writer {
+       return &Writer{}
+}
+
+// NewWriterCap returns a Writer with its internal buffer pre-allocated to n
+// bytes. Use this when the final size is known in advance to avoid
+// reallocations.
+func NewWriterCap(n int) *Writer {
+       return &Writer{p: make([]byte, 0, n)}
+}
+
+func (w *Writer) U8(v uint8) {
+       w.p = append(w.p, v)
+}
+
+func (w *Writer) U16(v uint16) {
+       w.p = binary.LittleEndian.AppendUint16(w.p, v)
+}
+
+func (w *Writer) U32(v uint32) {
+       w.p = binary.LittleEndian.AppendUint32(w.p, v)
+}
+
+func (w *Writer) U64(v uint64) {
+       w.p = binary.LittleEndian.AppendUint64(w.p, v)
+}
+
+func (w *Writer) F32(v float32) {
+       w.p = binary.LittleEndian.AppendUint32(w.p, math.Float32bits(v))
+}
+
+// Str writes a string with no length prefix. Use U8LenStr or U32LenStr
+// instead if the reader expects a length prefix.
+func (w *Writer) Str(v string) {
+       w.p = append(w.p, v...)
+}
+
+// U32LenStr writes a length-prefixed string where the length is a 4-byte
+// little-endian unsigned integer.
+func (w *Writer) U32LenStr(v string) {
+       w.p = binary.LittleEndian.AppendUint32(w.p, uint32(len(v)))
+       w.Str(v)
+}
+
+// U8LenStr writes a length-prefixed string where the length is a single byte.
+func (w *Writer) U8LenStr(v string) {
+       w.p = append(w.p, uint8(len(v)))

Review Comment:
   `uint8(len(v))` silently wraps for strings longer than 255 bytes - a 
256-byte string writes length `0x00` followed by all 256 bytes. the reader 
would then see length=0 and return `""`, silently corrupting the stream. 
consider adding a length check or documenting the max size contract.



##########
foreign/go/internal/codec/reader.go:
##########
@@ -0,0 +1,179 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package codec
+
+import (
+       "encoding/binary"
+       "fmt"
+       "math"
+       "runtime"
+)
+
+// Reader is a cursor over a byte slice. The first out-of-bounds read sets err;
+// all subsequent reads are no-ops. Call Err() once after all reads to check.
+type Reader struct {
+       p   []byte
+       pos int
+       err error
+}
+
+func NewReader(p []byte) *Reader {
+       return &Reader{p: p}
+}
+
+// overrun sets r.err to a descriptive error message including the caller's
+// file and line number.
+func (r *Reader) overrun(need int) {
+       _, file, line, _ := runtime.Caller(2)
+       r.err = fmt.Errorf(
+               "reader: need %d bytes at offset %d, only %d remaining (%s:%d)",
+               need, r.pos, len(r.p)-r.pos, file, line)
+}
+
+func (r *Reader) U8() uint8 {
+       if r.err != nil {
+               return 0
+       }
+       if r.pos+1 > len(r.p) {
+               r.overrun(1)
+               return 0
+       }
+       v := r.p[r.pos]
+       r.pos++
+       return v
+}
+
+func (r *Reader) U16() uint16 {
+       if r.err != nil {
+               return 0
+       }
+       if r.pos+2 > len(r.p) {
+               r.overrun(2)
+               return 0
+       }
+       v := binary.LittleEndian.Uint16(r.p[r.pos : r.pos+2])
+       r.pos += 2
+       return v
+}
+
+func (r *Reader) U32() uint32 {
+       if r.err != nil {
+               return 0
+       }
+       if r.pos+4 > len(r.p) {
+               r.overrun(4)
+               return 0
+       }
+       v := binary.LittleEndian.Uint32(r.p[r.pos : r.pos+4])
+       r.pos += 4
+       return v
+}
+
+func (r *Reader) U64() uint64 {
+       if r.err != nil {
+               return 0
+       }
+       if r.pos+8 > len(r.p) {
+               r.overrun(8)
+               return 0
+       }
+       v := binary.LittleEndian.Uint64(r.p[r.pos : r.pos+8])
+       r.pos += 8
+       return v
+}
+
+func (r *Reader) F32() float32 {
+       if r.err != nil {
+               return 0
+       }
+       if r.pos+4 > len(r.p) {
+               r.overrun(4)
+               return 0
+       }
+       v := math.Float32frombits(binary.LittleEndian.Uint32(r.p[r.pos : 
r.pos+4]))
+       r.pos += 4
+       return v
+}
+
+// strN reads exactly n bytes as a string.
+func (r *Reader) strN(n int) string {
+       v := string(r.p[r.pos : r.pos+n])
+       r.pos += n
+       return v
+}
+
+// Str reads exactly n bytes as a string. Use U8LenStr or U32LenStr instead
+// if the data is length-prefixed:
+//
+//     [length: 1 byte][data: N bytes]   → U8LenStr
+//     [length: 4 bytes][data: N bytes]  → U32LenStr
+func (r *Reader) Str(n int) string {
+       if r.err != nil {
+               return ""
+       }
+       if r.pos+n > len(r.p) {

Review Comment:
   `Str(n int)` with a negative `n` bypasses the bounds check here - `r.pos + 
(-1)` is `-1`, and `-1 > len(r.p)` is false - so it falls through to 
`strN(-1)`, which panics on `r.p[0:-1]` (negative slice bounds). no current 
caller passes negative `n`, but since this is an exported method accepting 
`int`, a defensive guard would be cheap:
   
   ```go
   if n < 0 {
       r.overrun(n)
       return ""
   }
   ```



##########
foreign/go/internal/codec/reader_test.go:
##########
@@ -0,0 +1,275 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package codec
+
+import (
+       "encoding/binary"
+       "fmt"
+       "math"
+       "runtime"
+       "strings"
+       "testing"
+)
+
+// --- byte-construction helpers ---
+
+func u16le(v uint16) []byte {
+       b := make([]byte, 2)
+       binary.LittleEndian.PutUint16(b, v)
+       return b
+}
+
+func u32le(v uint32) []byte {
+       b := make([]byte, 4)
+       binary.LittleEndian.PutUint32(b, v)
+       return b
+}
+
+func u64le(v uint64) []byte {
+       b := make([]byte, 8)
+       binary.LittleEndian.PutUint64(b, v)
+       return b
+}
+
+func cat(slices ...[]byte) []byte {
+       var out []byte
+       for _, s := range slices {
+               out = append(out, s...)
+       }
+       return out
+}
+
+// TestReader_reads exercises every read method in sequence.
+func TestReader_reads(t *testing.T) {
+       const wantU8 uint8 = math.MaxUint8
+       const wantU16 uint16 = math.MaxUint16
+       const wantU32 uint32 = math.MaxUint32
+       const wantU64 uint64 = math.MaxUint64
+       const wantF32 float32 = math.Pi
+       const wantRem = 1
+
+       wantStr := "str"
+       wantU32LenStr := "uint32"
+       wantU8LenStr := "uint8"
+
+       payload := cat(
+               []byte{wantU8},                                           // U8
+               u16le(wantU16),                                           // U16
+               u32le(wantU32),                                           // U32
+               u64le(wantU64),                                           // U64
+               u32le(math.Float32bits(wantF32)),                         // F32
+               []byte(wantStr),                                          // 
Str(len(wantStr))
+               u32le(uint32(len(wantU32LenStr))), []byte(wantU32LenStr), // 
U32LenStr
+               []byte{uint8(len(wantU8LenStr))}, []byte(wantU8LenStr), // 
U8LenStr
+               []byte{0xFF}, // wantRem trailing bytes for Remaining()
+       )
+
+       r := NewReader(payload)
+       u8 := r.U8()
+       u16 := r.U16()
+       u32 := r.U32()
+       u64 := r.U64()
+       f32 := r.F32()
+       str := r.Str(len(wantStr))
+       u32LenStr := r.U32LenStr()
+       u8LenStr := r.U8LenStr()
+       rem := r.Remaining()
+       if err := r.Err(); err != nil {
+               t.Fatalf("unexpected error: %v", err)
+       }
+
+       if u8 != wantU8 {
+               t.Errorf("U8: got %#x, want %#x", u8, wantU8)
+       }
+       if u16 != wantU16 {
+               t.Errorf("U16: got %#x, want %#x", u16, wantU16)
+       }
+       if u32 != wantU32 {
+               t.Errorf("U32: got %#x, want %#x", u32, wantU32)
+       }
+       if u64 != wantU64 {
+               t.Errorf("U64: got %#x, want %#x", u64, wantU64)
+       }
+       if f32 != wantF32 {
+               t.Errorf("F32: got %v, want %v", f32, wantF32)
+       }
+       if str != wantStr {
+               t.Errorf("Str: got %q, want %q", str, wantStr)
+       }
+       if u32LenStr != wantU32LenStr {
+               t.Errorf("U32LenStr: got %q, want %q", u32LenStr, wantU32LenStr)
+       }
+       if u8LenStr != wantU8LenStr {
+               t.Errorf("U8LenStr: got %q, want %q", u8LenStr, wantU8LenStr)
+       }
+       if rem != wantRem {
+               t.Errorf("Remaining: got %d, want %d", rem, wantRem)
+       }
+}
+
+// TestReader_str_copies verifies that Str, U8LenStr, and U32LenStr all copy
+// the source buffer so the returned string is not affected by later mutations.
+func TestReader_str_copies(t *testing.T) {
+       const wantStr = "hello"
+
+       cases := []struct {
+               name    string
+               payload func() []byte
+               read    func(*Reader) string
+       }{
+               {
+                       name:    "Str",
+                       payload: func() []byte { return []byte(wantStr) },
+                       read:    func(r *Reader) string { return 
r.Str(len(wantStr)) },
+               },
+               {
+                       name: "U8LenStr",
+                       payload: func() []byte {
+                               return append([]byte{uint8(len(wantStr))}, 
[]byte(wantStr)...)
+                       },
+                       read: func(r *Reader) string { return r.U8LenStr() },
+               },
+               {
+                       name: "U32LenStr",
+                       payload: func() []byte {
+                               return append(u32le(uint32(len(wantStr))), 
[]byte(wantStr)...)
+                       },
+                       read: func(r *Reader) string { return r.U32LenStr() },
+               },
+       }
+
+       for _, tc := range cases {
+               t.Run(tc.name, func(t *testing.T) {
+                       src := tc.payload()
+                       r := NewReader(src)
+                       s := tc.read(r)
+                       if err := r.Err(); err != nil {
+                               t.Fatalf("unexpected error: %v", err)
+                       }
+                       src[len(src)-1] = 'X' // mutate source buffer after read
+                       if s != wantStr {
+                               t.Errorf("got %q, want %q — string was not 
copied", s, wantStr)
+                       }
+               })
+       }
+}
+
+// TestReader_truncation verifies that every read method returns a descriptive 
error
+// when the buffer is too short, including mid-sequence truncation.
+func TestReader_truncation(t *testing.T) {
+       cases := []struct {
+               name    string
+               payload []byte
+               read    func(*Reader)
+       }{
+               {"U8", []byte{}, func(r *Reader) { r.U8() }},
+               {"U16", []byte{0x01}, func(r *Reader) { r.U16() }},             
                              // 1 byte, need 2
+               {"U32", []byte{0x01, 0x02, 0x03}, func(r *Reader) { r.U32() }}, 
                              // 3 bytes, need 4
+               {"U64", []byte{0x01, 0x02, 0x03, 0x04}, func(r *Reader) { 
r.U64() }},                         // 4 bytes, need 8
+               {"F32", []byte{0x01, 0x02, 0x03}, func(r *Reader) { r.F32() }}, 
                              // 3 bytes, need 4
+               {"Str", []byte("hi"), func(r *Reader) { r.Str(5) }},            
                              // claims 5, has 2
+               {"U32LenStr/short-len-prefix", []byte{0x05, 0x00}, func(r 
*Reader) { r.U32LenStr() }},        // len prefix needs 4 bytes, got 2
+               {"U32LenStr/short-body", cat(u32le(10), []byte("short")), 
func(r *Reader) { r.U32LenStr() }}, // claims 10, has 5
+               {"U8LenStr/short-len-prefix", []byte{}, func(r *Reader) { 
r.U8LenStr() }},                    // len prefix needs 1 byte, got 0
+               {"U8LenStr/short-body", cat([]byte{10}, []byte("short")), 
func(r *Reader) { r.U8LenStr() }},  // claims 10, has 5
+               {"mid-sequence", cat(u32le(1), []byte{0xFF}), func(r *Reader) { 
r.U32(); r.U32() }},
+       }
+
+       for _, tc := range cases {
+               t.Run(tc.name, func(t *testing.T) {
+                       r := NewReader(tc.payload)
+                       tc.read(r)
+                       err := r.Err()
+                       if err == nil || !strings.HasPrefix(err.Error(), 
"reader: need ") {
+                               t.Fatalf("got %v, want overrun error", err)
+                       }
+               })
+       }
+}
+
+// TestReader_overrun_error_location verifies that the error message contains
+// the file and line of the call site that triggered the overrun, for every
+// public read method.
+func TestReader_overrun_error_location(t *testing.T) {
+       cases := []struct {
+               name string
+               // fn triggers the overrun and returns the expected file:line 
of the call.
+               fn func(r *Reader) (wantFile string, wantLine int)
+       }{
+               {"U8", func(r *Reader) (string, int) {
+                       _, file, line, _ := runtime.Caller(0)
+                       r.U8()
+                       return file, line + 1
+               }},
+               {"U16", func(r *Reader) (string, int) {
+                       _, file, line, _ := runtime.Caller(0)
+                       r.U16()
+                       return file, line + 1
+               }},
+               {"U32", func(r *Reader) (string, int) {
+                       _, file, line, _ := runtime.Caller(0)
+                       r.U32()
+                       return file, line + 1
+               }},
+               {"U64", func(r *Reader) (string, int) {
+                       _, file, line, _ := runtime.Caller(0)
+                       r.U64()
+                       return file, line + 1
+               }},
+               {"F32", func(r *Reader) (string, int) {
+                       _, file, line, _ := runtime.Caller(0)
+                       r.F32()
+                       return file, line + 1
+               }},
+               {"Str", func(r *Reader) (string, int) {
+                       _, file, line, _ := runtime.Caller(0)
+                       r.Str(1)
+                       return file, line + 1
+               }},
+               {"U32LenStr", func(r *Reader) (string, int) {

Review Comment:
   this test case (and `U8LenStr` below) passes an empty buffer, so the overrun 
always fires on the length-prefix read. the *body*-phase overrun path (prefix 
succeeds but body is too short) is untested for caller location correctness. 
e.g. `NewReader(cat(u32le(100), []byte("short")))` then `r.U32LenStr()` would 
exercise the second `overrun` call in `U32LenStr`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] feat(go): implement binary reader/writer [iggy]

Reply via email to