This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git
The following commit(s) were added to refs/heads/main by this push:
new 4fd66f795 fix(go): prevent panic in readUTF16LE with odd byte counts
(#3293)
4fd66f795 is described below
commit 4fd66f795bfb34e012c9aec10caf30772e16148b
Author: Jonathan Yoder <[email protected]>
AuthorDate: Thu Feb 5 22:18:42 2026 -0500
fix(go): prevent panic in readUTF16LE with odd byte counts (#3293)
## What does this PR do?
Fixes a panic in `readUTF16LE` when the byte count is odd (malformed
UTF-16 data).
## Problem
When `readUTF16LE` receives an odd byte count, the function panics with:
```
panic: runtime error: index out of range [N] with length N
```
This occurs because the loop iterates up to `byteCount` but tries to
access `data[i+1]` on the last iteration when `i = byteCount - 1` (for
odd `byteCount`).
### Steps to Reproduce
```go
func TestReadUTF16LE_OddByteCount(t *testing.T) {
// Data: 5 bytes where only first 4 form valid UTF-16
data := []byte{0x48, 0x00, 0x69, 0x00, 0xFF}
buf := NewByteBuffer(data)
err := &Error{}
// This panics without the fix
result := readUTF16LE(buf, 5, err)
}
```
### Root Cause
In `go/fory/string.go`, line 89-90:
```go
for i := 0; i < byteCount; i += 2 {
u16s[i/2] = uint16(data[i]) | uint16(data[i+1])<<8 // Panics when i+1
>= len(data)
}
```
## Solution
set error when we get odd utf16 bytes
## Tests Added
- `TestReadUTF16LE_OddByteCount`: Verifies no panic with odd byte count
- `TestReadUTF16LE_SingleByte`: Verifies empty string for single byte
input
- `TestReadUTF16LE_EvenByteCount`: Verifies normal case still works
- `TestReadUTF16LE_EmptyBuffer`: Verifies empty input handling
- `TestReadUTF16LE_SurrogatePair`: Verifies emoji/surrogate pair
decoding
## Checklist
- [x] I have read the [Contributing
Guidelines](https://github.com/apache/fory/blob/main/CONTRIBUTING.md)
- [x] All tests pass (`go test -v ./...` in `go/fory`)
- [x] License headers are correct (hawkeye check passed)
- [x] PR title follows conventional commits format
---------
Co-authored-by: Claude Opus 4.5 <[email protected]>
Co-authored-by: chaokunyang <[email protected]>
---
go/fory/doc.go | 1 +
go/fory/errors.go | 10 ++++++
go/fory/string.go | 8 +++++
go/fory/string_test.go | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 110 insertions(+)
diff --git a/go/fory/doc.go b/go/fory/doc.go
index c06788c11..b9ebb73da 100644
--- a/go/fory/doc.go
+++ b/go/fory/doc.go
@@ -305,6 +305,7 @@ Common error kinds:
- ErrKindUnknownType: Unknown type encountered
- ErrKindMaxDepthExceeded: Recursion depth limit exceeded
- ErrKindHashMismatch: Struct hash mismatch (schema changed)
+ - ErrKindInvalidUTF16String: Malformed UTF-16 string payload
# Best Practices
diff --git a/go/fory/errors.go b/go/fory/errors.go
index bf8d7676f..2e4f19e9a 100644
--- a/go/fory/errors.go
+++ b/go/fory/errors.go
@@ -50,6 +50,8 @@ const (
ErrKindHashMismatch
// ErrKindInvalidTag indicates invalid fory struct tag configuration
ErrKindInvalidTag
+ // ErrKindInvalidUTF16String indicates malformed UTF-16 string data
+ ErrKindInvalidUTF16String
)
// Error is a lightweight error type optimized for hot path performance.
@@ -257,6 +259,14 @@ func InvalidTagErrorf(format string, args ...any) Error {
})
}
+// InvalidUTF16StringError creates an invalid UTF-16 string error
+func InvalidUTF16StringError(byteCount int) Error {
+ return panicIfEnabled(Error{
+ kind: ErrKindInvalidUTF16String,
+ message: fmt.Sprintf("invalid UTF-16 string byte count %d: must
be even", byteCount),
+ })
+}
+
// WrapError wraps a standard error into a fory Error
func WrapError(err error, kind ErrorKind) Error {
if err == nil {
diff --git a/go/fory/string.go b/go/fory/string.go
index cde49328b..10586a27c 100644
--- a/go/fory/string.go
+++ b/go/fory/string.go
@@ -81,7 +81,15 @@ func readLatin1(buf *ByteBuffer, size int, err *Error)
string {
}
func readUTF16LE(buf *ByteBuffer, byteCount int, err *Error) string {
+ if byteCount&1 != 0 {
+ err.SetError(InvalidUTF16StringError(byteCount))
+ return ""
+ }
+
data := buf.ReadBinary(byteCount, err)
+ if err.HasError() {
+ return ""
+ }
// Reconstruct UTF-16 code units
charCount := byteCount / 2
diff --git a/go/fory/string_test.go b/go/fory/string_test.go
new file mode 100644
index 000000000..cc39ec87a
--- /dev/null
+++ b/go/fory/string_test.go
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package fory
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestReadUTF16LE_EvenByteCount(t *testing.T) {
+ // Test normal case: even byte count (valid UTF-16)
+ // "Hi" in UTF-16LE: 'H'=0x0048, 'i'=0x0069
+ // Little-endian bytes: 48 00 69 00
+ data := []byte{0x48, 0x00, 0x69, 0x00}
+ buf := NewByteBuffer(data)
+ err := &Error{}
+
+ result := readUTF16LE(buf, 4, err)
+
+ require.False(t, err.HasError())
+ require.Equal(t, "Hi", result)
+}
+
+func TestReadUTF16LE_OddByteCount(t *testing.T) {
+ // Test edge case: odd byte count (malformed UTF-16 data).
+ // This should return a typed decode error rather than silently
truncating.
+ data := []byte{0x48, 0x00, 0x69, 0x00, 0xFF}
+ buf := NewByteBuffer(data)
+ err := &Error{}
+
+ result := readUTF16LE(buf, 5, err)
+
+ require.True(t, err.HasError())
+ require.Equal(t, ErrKindInvalidUTF16String, err.Kind())
+ require.Equal(t, "", result)
+}
+
+func TestReadUTF16LE_SingleByte(t *testing.T) {
+ // Test edge case: single byte (no complete UTF-16 code units)
+ data := []byte{0x48}
+ buf := NewByteBuffer(data)
+ err := &Error{}
+
+ result := readUTF16LE(buf, 1, err)
+
+ require.True(t, err.HasError())
+ require.Equal(t, ErrKindInvalidUTF16String, err.Kind())
+ require.Equal(t, "", result)
+}
+
+func TestReadUTF16LE_EmptyBuffer(t *testing.T) {
+ // Test edge case: zero bytes
+ data := []byte{}
+ buf := NewByteBuffer(data)
+ err := &Error{}
+
+ result := readUTF16LE(buf, 0, err)
+
+ require.False(t, err.HasError())
+ require.Equal(t, "", result)
+}
+
+func TestReadUTF16LE_SurrogatePair(t *testing.T) {
+ // Test UTF-16 surrogate pair for emoji: 🎉 (U+1F389)
+ // UTF-16: D83C DF89 (surrogate pair)
+ // Little-endian bytes: 3C D8 89 DF
+ data := []byte{0x3C, 0xD8, 0x89, 0xDF}
+ buf := NewByteBuffer(data)
+ err := &Error{}
+
+ result := readUTF16LE(buf, 4, err)
+
+ require.False(t, err.HasError())
+ require.Equal(t, "🎉", result)
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]