This is an automated email from the ASF dual-hosted git repository.
tlopex pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new a7463e9b2d [RPC][Tracker] Bound msg_size to MAX_TRACKER_MSG_BYTES to
prevent unbounded buffer growth (#19586)
a7463e9b2d is described below
commit a7463e9b2da4bed070db2938caaed9ca030b49a6
Author: Bl4ckSku11 <[email protected]>
AuthorDate: Sat May 23 23:07:03 2026 -0500
[RPC][Tracker] Bound msg_size to MAX_TRACKER_MSG_BYTES to prevent unbounded
buffer growth (#19586)
Fixes #<issue-number>.
Reads of `_msg_size` from the tracker socket are now bounded to
`MAX_TRACKER_MSG_BYTES = 1 MiB`, and the 4-byte size header is
consumed at read time. Without these checks, a single TCP connection
from a peer can grow the tracker process buffer until OOM, and a wire
size of 0 starves the parser without ever freeing the bytes.
Per the TVM security model the tracker is deployed on trusted networks,
so this is filed as a robustness defect, not a security advisory.
Apache security team triage (private thread, 2026-05-17) confirmed this
is the right channel.
### Test
Added regression test in tests/python/contrib/test_rpc_tracker.py that
completes the magic handshake, sends an oversized msg_size header
(0x7FFFFFFF), and asserts the tracker closes the connection.
### Changes
- python/tvm/rpc/tracker.py: bound `_msg_size` to (0,
MAX_TRACKER_MSG_BYTES], consume size header on read.
- tests/python/contrib/test_rpc_tracker.py: regression test.
---
python/tvm/rpc/tracker.py | 31 ++++++++++++++++++----
tests/python/contrib/test_rpc_tracker.py | 44 ++++++++++++++++++++++++++++++++
2 files changed, 70 insertions(+), 5 deletions(-)
diff --git a/python/tvm/rpc/tracker.py b/python/tvm/rpc/tracker.py
index 81fe2feb69..0714c64fc9 100644
--- a/python/tvm/rpc/tracker.py
+++ b/python/tvm/rpc/tracker.py
@@ -77,6 +77,12 @@ logger.addHandler(console_handler)
logger.setLevel(logging.INFO)
logger.propagate = False
+# Maximum size in bytes for a single tracker message. Tracker frames carry
+# small JSON command tuples; 1 MiB is well above any legitimate payload and
+# bounds memory growth when a peer sends an oversized or malformed size
+# header on the wire.
+MAX_TRACKER_MSG_BYTES = 1 << 20
+
class Scheduler:
"""Abstract interface of scheduler."""
@@ -224,14 +230,29 @@ class TCPEventHandler(tornado_util.TCPHandler):
if self._msg_size == 0:
if len(self._data) >= 4:
self._msg_size = struct.unpack("<i", self._data[:4])[0]
+ if self._msg_size <= 0 or self._msg_size >
MAX_TRACKER_MSG_BYTES:
+ logger.warning(
+ "Invalid msg_size %d from %s; closing connection",
+ self._msg_size,
+ self.name(),
+ )
+ self.close()
+ return
+ del self._data[:4]
else:
return
- if self._msg_size != 0 and len(self._data) >= self._msg_size + 4:
- msg = py_str(bytes(self._data[4 : 4 + self._msg_size]))
- del self._data[: 4 + self._msg_size]
+ if self._msg_size != 0 and len(self._data) >= self._msg_size:
+ msg = py_str(bytes(self._data[: self._msg_size]))
+ del self._data[: self._msg_size]
self._msg_size = 0
- # pylint: disable=broad-except
- self.call_handler(json.loads(msg))
+ try:
+ self.call_handler(json.loads(msg))
+ except Exception: # pylint: disable=broad-except
+ logger.warning(
+ "Error handling message from %s", self.name(),
exc_info=True
+ )
+ self.close()
+ return
else:
return
diff --git a/tests/python/contrib/test_rpc_tracker.py
b/tests/python/contrib/test_rpc_tracker.py
index 37db25982b..486d5abce4 100644
--- a/tests/python/contrib/test_rpc_tracker.py
+++ b/tests/python/contrib/test_rpc_tracker.py
@@ -105,6 +105,50 @@ def check_server_drop():
print("Skip because tornado is not available")
+def check_tracker_rejects_oversized_msg_size():
+ """Tracker must reject an oversized msg_size header and close the
connection
+ instead of buffering an unbounded amount of data on a single TCP
connection.
+
+ Regression test for the unbounded buffer growth defect in
+ TCPEventHandler.on_message. See MAX_TRACKER_MSG_BYTES in tracker.py.
+ """
+ try:
+ # pylint: disable=import-outside-toplevel
+ import socket
+ import struct
+
+ from tvm.rpc import base, tracker
+
+ tserver = tracker.Tracker(port=9180, port_end=9290, silent=True)
+ try:
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.settimeout(5)
+ sock.connect(("127.0.0.1", tserver.port))
+ # complete the 4-byte magic handshake
+ sock.sendall(struct.pack("<i", base.RPC_TRACKER_MAGIC))
+ magic_reply = sock.recv(4)
+ assert struct.unpack("<i", magic_reply)[0] ==
base.RPC_TRACKER_MAGIC
+
+ # send an oversized msg_size header (2 GiB)
+ sock.sendall(struct.pack("<i", 0x7FFFFFFF))
+
+ # server must close the connection (no payload buffering)
+ for _ in range(20):
+ chunk = sock.recv(4096)
+ if chunk == b"":
+ break
+ time.sleep(0.05)
+ else:
+ raise AssertionError(
+ "tracker did not close connection after oversized msg_size"
+ )
+ finally:
+ tserver.terminate()
+ except ImportError:
+ print("Skip because tornado is not available")
+
+
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
check_server_drop()
+ check_tracker_rejects_oversized_msg_size()