This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git


The following commit(s) were added to refs/heads/main by this push:
     new 4fd66f795 fix(go): prevent panic in readUTF16LE with odd byte counts 
(#3293)
4fd66f795 is described below

commit 4fd66f795bfb34e012c9aec10caf30772e16148b
Author: Jonathan Yoder <[email protected]>
AuthorDate: Thu Feb 5 22:18:42 2026 -0500

    fix(go): prevent panic in readUTF16LE with odd byte counts (#3293)
    
    ## What does this PR do?
    
    Fixes a panic in `readUTF16LE` when the byte count is odd (malformed
    UTF-16 data).
    
    ## Problem
    
    When `readUTF16LE` receives an odd byte count, the function panics with:
    ```
    panic: runtime error: index out of range [N] with length N
    ```
    
    This occurs because the loop iterates up to `byteCount` but tries to
    access `data[i+1]` on the last iteration when `i = byteCount - 1` (for
    odd `byteCount`).
    
    ### Steps to Reproduce
    
    ```go
    func TestReadUTF16LE_OddByteCount(t *testing.T) {
        // Data: 5 bytes where only first 4 form valid UTF-16
        data := []byte{0x48, 0x00, 0x69, 0x00, 0xFF}
        buf := NewByteBuffer(data)
        err := &Error{}
    
        // This panics without the fix
        result := readUTF16LE(buf, 5, err)
    }
    ```
    
    ### Root Cause
    
    In `go/fory/string.go`, line 89-90:
    ```go
    for i := 0; i < byteCount; i += 2 {
        u16s[i/2] = uint16(data[i]) | uint16(data[i+1])<<8  // Panics when i+1 
>= len(data)
    }
    ```
    
    ## Solution
    
    set error when we get odd utf16 bytes
    
    ## Tests Added
    
    - `TestReadUTF16LE_OddByteCount`: Verifies no panic with odd byte count
    - `TestReadUTF16LE_SingleByte`: Verifies empty string for single byte
    input
    - `TestReadUTF16LE_EvenByteCount`: Verifies normal case still works
    - `TestReadUTF16LE_EmptyBuffer`: Verifies empty input handling
    - `TestReadUTF16LE_SurrogatePair`: Verifies emoji/surrogate pair
    decoding
    
    ## Checklist
    
    - [x] I have read the [Contributing
    Guidelines](https://github.com/apache/fory/blob/main/CONTRIBUTING.md)
    - [x] All tests pass (`go test -v ./...` in `go/fory`)
    - [x] License headers are correct (hawkeye check passed)
    - [x] PR title follows conventional commits format
    
    ---------
    
    Co-authored-by: Claude Opus 4.5 <[email protected]>
    Co-authored-by: chaokunyang <[email protected]>
---
 go/fory/doc.go         |  1 +
 go/fory/errors.go      | 10 ++++++
 go/fory/string.go      |  8 +++++
 go/fory/string_test.go | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 110 insertions(+)

diff --git a/go/fory/doc.go b/go/fory/doc.go
index c06788c11..b9ebb73da 100644
--- a/go/fory/doc.go
+++ b/go/fory/doc.go
@@ -305,6 +305,7 @@ Common error kinds:
   - ErrKindUnknownType: Unknown type encountered
   - ErrKindMaxDepthExceeded: Recursion depth limit exceeded
   - ErrKindHashMismatch: Struct hash mismatch (schema changed)
+  - ErrKindInvalidUTF16String: Malformed UTF-16 string payload
 
 # Best Practices
 
diff --git a/go/fory/errors.go b/go/fory/errors.go
index bf8d7676f..2e4f19e9a 100644
--- a/go/fory/errors.go
+++ b/go/fory/errors.go
@@ -50,6 +50,8 @@ const (
        ErrKindHashMismatch
        // ErrKindInvalidTag indicates invalid fory struct tag configuration
        ErrKindInvalidTag
+       // ErrKindInvalidUTF16String indicates malformed UTF-16 string data
+       ErrKindInvalidUTF16String
 )
 
 // Error is a lightweight error type optimized for hot path performance.
@@ -257,6 +259,14 @@ func InvalidTagErrorf(format string, args ...any) Error {
        })
 }
 
+// InvalidUTF16StringError creates an invalid UTF-16 string error
+func InvalidUTF16StringError(byteCount int) Error {
+       return panicIfEnabled(Error{
+               kind:    ErrKindInvalidUTF16String,
+               message: fmt.Sprintf("invalid UTF-16 string byte count %d: must 
be even", byteCount),
+       })
+}
+
 // WrapError wraps a standard error into a fory Error
 func WrapError(err error, kind ErrorKind) Error {
        if err == nil {
diff --git a/go/fory/string.go b/go/fory/string.go
index cde49328b..10586a27c 100644
--- a/go/fory/string.go
+++ b/go/fory/string.go
@@ -81,7 +81,15 @@ func readLatin1(buf *ByteBuffer, size int, err *Error) 
string {
 }
 
 func readUTF16LE(buf *ByteBuffer, byteCount int, err *Error) string {
+       if byteCount&1 != 0 {
+               err.SetError(InvalidUTF16StringError(byteCount))
+               return ""
+       }
+
        data := buf.ReadBinary(byteCount, err)
+       if err.HasError() {
+               return ""
+       }
 
        // Reconstruct UTF-16 code units
        charCount := byteCount / 2
diff --git a/go/fory/string_test.go b/go/fory/string_test.go
new file mode 100644
index 000000000..cc39ec87a
--- /dev/null
+++ b/go/fory/string_test.go
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package fory
+
+import (
+       "testing"
+
+       "github.com/stretchr/testify/require"
+)
+
+func TestReadUTF16LE_EvenByteCount(t *testing.T) {
+       // Test normal case: even byte count (valid UTF-16)
+       // "Hi" in UTF-16LE: 'H'=0x0048, 'i'=0x0069
+       // Little-endian bytes: 48 00 69 00
+       data := []byte{0x48, 0x00, 0x69, 0x00}
+       buf := NewByteBuffer(data)
+       err := &Error{}
+
+       result := readUTF16LE(buf, 4, err)
+
+       require.False(t, err.HasError())
+       require.Equal(t, "Hi", result)
+}
+
+func TestReadUTF16LE_OddByteCount(t *testing.T) {
+       // Test edge case: odd byte count (malformed UTF-16 data).
+       // This should return a typed decode error rather than silently 
truncating.
+       data := []byte{0x48, 0x00, 0x69, 0x00, 0xFF}
+       buf := NewByteBuffer(data)
+       err := &Error{}
+
+       result := readUTF16LE(buf, 5, err)
+
+       require.True(t, err.HasError())
+       require.Equal(t, ErrKindInvalidUTF16String, err.Kind())
+       require.Equal(t, "", result)
+}
+
+func TestReadUTF16LE_SingleByte(t *testing.T) {
+       // Test edge case: single byte (no complete UTF-16 code units)
+       data := []byte{0x48}
+       buf := NewByteBuffer(data)
+       err := &Error{}
+
+       result := readUTF16LE(buf, 1, err)
+
+       require.True(t, err.HasError())
+       require.Equal(t, ErrKindInvalidUTF16String, err.Kind())
+       require.Equal(t, "", result)
+}
+
+func TestReadUTF16LE_EmptyBuffer(t *testing.T) {
+       // Test edge case: zero bytes
+       data := []byte{}
+       buf := NewByteBuffer(data)
+       err := &Error{}
+
+       result := readUTF16LE(buf, 0, err)
+
+       require.False(t, err.HasError())
+       require.Equal(t, "", result)
+}
+
+func TestReadUTF16LE_SurrogatePair(t *testing.T) {
+       // Test UTF-16 surrogate pair for emoji: 🎉 (U+1F389)
+       // UTF-16: D83C DF89 (surrogate pair)
+       // Little-endian bytes: 3C D8 89 DF
+       data := []byte{0x3C, 0xD8, 0x89, 0xDF}
+       buf := NewByteBuffer(data)
+       err := &Error{}
+
+       result := readUTF16LE(buf, 4, err)
+
+       require.False(t, err.HasError())
+       require.Equal(t, "🎉", result)
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to