This is an automated email from the ASF dual-hosted git repository.
junrushao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm-ffi.git
The following commit(s) were added to refs/heads/main by this push:
new ae30cd6 fix: out-of-bounds read in `StringViewToDLDataType_` with
non-null-terminated strings (#387)
ae30cd6 is described below
commit ae30cd67763ac7bb36a8fccd322590763bcbeed9
Author: Haejoon Kim <[email protected]>
AuthorDate: Fri Jan 9 04:14:33 2026 +0900
fix: out-of-bounds read in `StringViewToDLDataType_` with
non-null-terminated strings (#387)
## Summary
Fixes a critical bug in `StringViewToDataType_` that causes undefined
behavior when parsing dtype strings from non-null-terminated
`std::string_view`. This bug particularly affects Electron applications
due to aggresive memory reuse in Chromium's allocators.
## Problem
The original implementation used `strtoul()` to parse numeric values
(bits and lanes) directly from `std::string_view::data()`. However,
`strtoul()` requires null-terminated strings and will continue reading
beyond the string_view bounds until it finds a null terminator or
non-digit character.
## Solution
Replace `strtoul()` with manual digit parsing that respects
`string_view` boundaries:
```c++
auto parse_digits = [](const char*& ptr, const char* end) -> uint32_t {
uint32_t value = 0;
while (ptr < end && *ptr >= '0' && *ptr <= '9') {
value = value * 10 + (*ptr - '0');
ptr++;
}
return value;
};
```
- Never reads beyond `str.data() + str.length()`
- Works correctly with non-null-terminated strings
- No memory allocation overhead
- Same performance as `strtoul` but safe
## Testing
New test `TEST(DType, NonNullTerminatedStringView)` validates:
- Parsing with digit garbage after the string (`"float16999888777"`
length 7 -> bits=16, not 16999888777)
- Parsing with lane specifications outside bounds (`"int32x4extradata"`
length 5 -> lanes=1, not 4)
- Correct parsing when lanes are within bounds (`"bfloat16x2"` length 10
-> lanes=2)
- Non-null-terminated buffers (`"float64AAAAA"` length 7 -> float64)
- Edge cases with various dtype types (int, uint, float, bfloat)
---
src/ffi/dtype.cc | 79 ++++++++++++++++++++++++++++++++++-----------
tests/cpp/test_dtype.cc | 86 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 147 insertions(+), 18 deletions(-)
diff --git a/src/ffi/dtype.cc b/src/ffi/dtype.cc
index 74c9eeb..14cfa5d 100644
--- a/src/ffi/dtype.cc
+++ b/src/ffi/dtype.cc
@@ -206,17 +206,61 @@ inline DLDataType
StringViewToDLDataType_(std::string_view str) {
dtype.bits = 32;
dtype.lanes = 1;
const char* scan;
+ const char* str_end = str.data() + str.length();
+
+ // Helper lambda to parse decimal digits from a bounded string_view
+ // Returns the parsed value and updates *ptr to point past the last digit
+ auto parse_digits = [](const char** ptr, const char* end) -> uint32_t {
+ uint64_t value = 0;
+ const char* start_ptr = *ptr;
+ while (*ptr < end && **ptr >= '0' && **ptr <= '9') {
+ value = value * 10 + (**ptr - '0');
+ (*ptr)++;
+ }
+ if (value > UINT32_MAX) {
+ TVM_FFI_THROW(ValueError) << "Integer value in dtype string '"
+ << std::string_view(start_ptr, *ptr -
start_ptr)
+ << "' is out of range for uint32_t";
+ }
+ return static_cast<uint32_t>(value);
+ };
+
+ // Helper lambda to parse lanes specification (e.g., "x16" or "xvscalex4")
+ // Returns the parsed lanes value and updates *ptr to point past the lanes
specification
+ // Supports scalable vectors with the "xvscale" prefix (represented as
negative lanes)
+ auto parse_lanes = [&](const char** ptr, const char* end, const
std::string_view& dtype_str,
+ bool allow_scalable = false) -> uint16_t {
+ int multiplier = 1;
+ // Check for "xvscale" prefix for scalable vectors
+ if (allow_scalable && (end - *ptr >= 7) && strncmp(*ptr, "xvscale", 7) ==
0) {
+ multiplier = -1;
+ *ptr += 7;
+ }
+ if (*ptr >= end || **ptr != 'x') {
+ return 1; // No lanes specification, default to 1
+ }
+ (*ptr)++; // Skip 'x'
+ const char* digits_start = *ptr;
+ uint32_t lanes_val = parse_digits(ptr, end);
+ if (*ptr == digits_start || lanes_val == 0) {
+ TVM_FFI_THROW(ValueError) << "Invalid lanes specification in dtype '" <<
dtype_str
+ << "'. Lanes must be a positive integer.";
+ }
+ if (lanes_val > UINT16_MAX) {
+ TVM_FFI_THROW(ValueError) << "Lanes value " << lanes_val
+ << " is out of range for uint16_t in dtype '"
<< dtype_str << "'";
+ }
+ return static_cast<uint16_t>(multiplier * lanes_val);
+ };
auto parse_float = [&](const std::string_view& str, int offset, int code,
int bits) {
dtype.code = static_cast<uint8_t>(code);
dtype.bits = static_cast<uint8_t>(bits);
scan = str.data() + offset;
- char* endpt = nullptr;
- if (*scan == 'x') {
- dtype.lanes = static_cast<uint16_t>(strtoul(scan + 1, &endpt, 10));
- scan = endpt;
- }
- if (scan != str.data() + str.length()) {
+ const char* endpt = scan;
+ dtype.lanes = parse_lanes(&endpt, str_end, str);
+ scan = endpt;
+ if (scan != str_end) {
TVM_FFI_THROW(ValueError) << "unknown dtype `" << str << '`';
}
return dtype;
@@ -293,19 +337,18 @@ inline DLDataType
StringViewToDLDataType_(std::string_view str) {
scan = str.data();
TVM_FFI_THROW(ValueError) << "unknown dtype `" << str << '`';
}
- char* xdelim; // emulate sscanf("%ux%u", bits, lanes)
- uint8_t bits = static_cast<uint8_t>(strtoul(scan, &xdelim, 10));
- if (bits != 0) dtype.bits = bits;
- int scalable_multiplier = 1;
- if (strncmp(xdelim, "xvscale", 7) == 0) {
- scalable_multiplier = -1;
- xdelim += 7;
- }
- char* endpt = xdelim;
- if (*xdelim == 'x') {
- dtype.lanes = static_cast<uint16_t>(scalable_multiplier * strtoul(xdelim +
1, &endpt, 10));
+ // Parse bits manually to handle non-null-terminated string_view
+ const char* xdelim = scan;
+ uint32_t bits_val = parse_digits(&xdelim, str_end);
+ if (bits_val > UINT8_MAX) {
+ TVM_FFI_THROW(ValueError) << "Bits value " << bits_val
+ << " is out of range for uint8_t in dtype '" <<
str << "'";
}
- if (endpt != str.data() + str.length()) {
+ uint8_t bits = static_cast<uint8_t>(bits_val);
+ if (bits != 0) dtype.bits = bits;
+ const char* endpt = xdelim;
+ dtype.lanes = parse_lanes(&endpt, str_end, str, /*allow_scalable=*/true);
+ if (endpt != str_end) {
TVM_FFI_THROW(ValueError) << "unknown dtype `" << str << '`';
}
return dtype;
diff --git a/tests/cpp/test_dtype.cc b/tests/cpp/test_dtype.cc
index 79fc9d7..67b74d8 100644
--- a/tests/cpp/test_dtype.cc
+++ b/tests/cpp/test_dtype.cc
@@ -127,4 +127,90 @@ TEST(DataType, AnyConversionWithString) {
EXPECT_EQ(opt_v1.value().bits, 16);
EXPECT_EQ(opt_v1.value().lanes, 2);
}
+
+TEST(DType, NonNullTerminatedStringView) {
+ // Simulate memory scenario similar to Electron where memory after string
+ // contains garbage data (digits from previous strings)
+ //
+ // We test by calling TVMFFIDataTypeFromString directly with TVMFFIByteArray
+ // to bypass String's automatic null-termination
+
+ // Helper lambda to test with raw byte array (no null terminator)
+ auto test_dtype_from_bytes = [](const char* data, size_t size) -> DLDataType
{
+ TVMFFIByteArray byte_array{data, size};
+ DLDataType dtype;
+ int ret = TVMFFIDataTypeFromString(&byte_array, &dtype);
+ EXPECT_EQ(ret, 0) << "TVMFFIDataTypeFromString failed";
+ return dtype;
+ };
+
+ // Test 1: "float16" followed by digit garbage
+ char buffer1[] = "float16999888777";
+ DLDataType dtype1 = test_dtype_from_bytes(buffer1, 7); // Only "float16"
+ EXPECT_EQ(dtype1.code, kDLFloat);
+ EXPECT_EQ(dtype1.bits, 16); // Should be 16, not 16999888777!
+ EXPECT_EQ(dtype1.lanes, 1);
+
+ // Test 2: "int32" followed by "x4" from previous leftover
+ char buffer2[] = "int32x4extradata";
+ DLDataType dtype2 = test_dtype_from_bytes(buffer2, 5); // Only "int32"
+ EXPECT_EQ(dtype2.code, kDLInt);
+ EXPECT_EQ(dtype2.bits, 32); // Should be 32, not parse the 'x4'
+ EXPECT_EQ(dtype2.lanes, 1); // Should be 1, not 4
+
+ // Test 3: "uint8" followed by more digits
+ char buffer3[] = "uint8192";
+ DLDataType dtype3 = test_dtype_from_bytes(buffer3, 5); // Only "uint8"
+ EXPECT_EQ(dtype3.code, kDLUInt);
+ EXPECT_EQ(dtype3.bits, 8); // Should be 8, not 8192
+ EXPECT_EQ(dtype3.lanes, 1);
+
+ // Test 4: "bfloat16" followed by "x2" garbage
+ char buffer4[] = "bfloat16x2garbage";
+ DLDataType dtype4 = test_dtype_from_bytes(buffer4, 8); // Only "bfloat16"
+ EXPECT_EQ(dtype4.code, kDLBfloat);
+ EXPECT_EQ(dtype4.bits, 16);
+ EXPECT_EQ(dtype4.lanes, 1); // Should be 1, not 2
+
+ // Test 5: "bfloat16x2" - lanes within bounds (should work)
+ DLDataType dtype5 = test_dtype_from_bytes(buffer4, 10); // "bfloat16x2"
+ EXPECT_EQ(dtype5.code, kDLBfloat);
+ EXPECT_EQ(dtype5.bits, 16);
+ EXPECT_EQ(dtype5.lanes, 2); // Should correctly parse x2
+
+ // Test 6: Truly non-null-terminated - overwrite null byte
+ char buffer6[] = "float64AAAAA";
+ buffer6[7] = 'X'; // Ensure no null
terminator at position 7
+ DLDataType dtype6 = test_dtype_from_bytes(buffer6, 7); // "float64"
+ EXPECT_EQ(dtype6.code, kDLFloat);
+ EXPECT_EQ(dtype6.bits, 64);
+ EXPECT_EQ(dtype6.lanes, 1);
+
+ // Test 7: "int8" followed by "x16" pattern
+ char buffer7[] = "int8x16leftovers";
+ DLDataType dtype7 = test_dtype_from_bytes(buffer7, 4); // Only "int8"
+ EXPECT_EQ(dtype7.code, kDLInt);
+ EXPECT_EQ(dtype7.bits, 8);
+ EXPECT_EQ(dtype7.lanes, 1); // Should be 1, not 16
+
+ // Test 8: With actual x specification that should parse
+ DLDataType dtype8 = test_dtype_from_bytes(buffer7, 7); // "int8x16"
+ EXPECT_EQ(dtype8.code, kDLInt);
+ EXPECT_EQ(dtype8.bits, 8);
+ EXPECT_EQ(dtype8.lanes, 16); // Should correctly parse x16
+
+ // Test 9: Scalable vector - "int32xvscalex4"
+ char buffer9[] = "int32xvscalex4extra";
+ DLDataType dtype9 = test_dtype_from_bytes(buffer9, 14); // "int32xvscalex4"
+ EXPECT_EQ(dtype9.code, kDLInt);
+ EXPECT_EQ(dtype9.bits, 32);
+ EXPECT_EQ(dtype9.lanes, static_cast<uint16_t>(-4)); // Scalable: -4
+
+ // Test 10: Scalable vector with garbage after
+ char buffer10[] = "float16xvscalex8999";
+ DLDataType dtype10 = test_dtype_from_bytes(buffer10, 16); //
"float16xvscalex8"
+ EXPECT_EQ(dtype10.code, kDLFloat);
+ EXPECT_EQ(dtype10.bits, 16);
+ EXPECT_EQ(dtype10.lanes, static_cast<uint16_t>(-8)); // Should be -8, not
parse "999"
+}
} // namespace