https://github.com/kastiglione updated https://github.com/llvm/llvm-project/pull/184242
>From 167abfa39cb5ed9bb6805b6890cd00c9eea2e2ee Mon Sep 17 00:00:00 2001 From: Dave Lee <[email protected]> Date: Mon, 2 Mar 2026 14:03:53 -0800 Subject: [PATCH 1/2] [lldb] Add C source output mode to formatter_bytecode.py --- lldb/examples/python/formatter_bytecode.py | 138 +++++++++++++++++++-- 1 file changed, 126 insertions(+), 12 deletions(-) diff --git a/lldb/examples/python/formatter_bytecode.py b/lldb/examples/python/formatter_bytecode.py index 8f2f09834ff4f..8386785b80d84 100644 --- a/lldb/examples/python/formatter_bytecode.py +++ b/lldb/examples/python/formatter_bytecode.py @@ -17,8 +17,9 @@ import re import io +import textwrap from dataclasses import dataclass -from typing import BinaryIO, TextIO, Tuple, Union +from typing import Any, BinaryIO, TextIO, Tuple, Union BINARY_VERSION = 1 @@ -207,9 +208,7 @@ def validate(self): raise ValueError(f"duplicate signature: {sig}") seen.add(sig) - def write_binary(self, output: BinaryIO) -> None: - self.validate() - + def _to_binary(self) -> bytes: bin = bytearray() bin.extend(_to_uleb(len(self.type_name))) bin.extend(bytes(self.type_name, encoding="utf-8")) @@ -219,9 +218,77 @@ def write_binary(self, output: BinaryIO) -> None: bin.extend(_to_uleb(len(bc))) bin.extend(bc) + return bytes(bin) + + def write_binary(self, output: BinaryIO) -> None: + self.validate() + + bin = self._to_binary() output.write(_to_byte(BINARY_VERSION)) output.write(_to_uleb(len(bin))) - output.write(bin) + output.write(self._to_binary()) + + class _CBuilder: + """Helper class for emitting binary data as a C-string literal.""" + + entries: list[Tuple[Any, str]] + + def __init__(self) -> None: + self.entries = [] + + def add_byte(self, x: int, comment: str) -> None: + self.add_bytes(_to_byte(x), comment) + + def add_uleb(self, x: int, comment: str) -> None: + self.add_bytes(_to_uleb(x), comment) + + def add_bytes(self, x: bytes, comment: str) -> None: + # Construct zero padded hex values with length two. + string = "".join(f"\\x{b:02x}" for b in x) + self.add_string(string, comment) + + def add_string(self, string: str, comment: str) -> None: + self.entries.append((f'"{string}"', comment)) + + def write_source(self, output: TextIO) -> None: + self.validate() + + size = len(self._to_binary()) + + b = self._CBuilder() + b.add_byte(BINARY_VERSION, "version") + b.add_uleb(size, "remaining record size") + b.add_uleb(len(self.type_name), "type name size") + b.add_string(self.type_name, "type name") + b.add_byte(self.flags, "flags") + for sig, bc in self.signatures: + b.add_byte(SIGNATURES[sig], f"sig_{sig}") + b.add_uleb(len(bc), "program size") + b.add_bytes(bc, "program") + + print( + textwrap.dedent( + """ + #ifdef __APPLE__ + #define FORMATTER_SECTION "__DATA_CONST,__lldbformatters" + #else + #define FORMATTER_SECTION ".lldbformatters" + #endif + """ + ), + file=output, + ) + var_name = re.sub(r"\W", "_", self.type_name) + print( + "__attribute__((used, section(FORMATTER_SECTION)))", + file=output, + ) + print(f"unsigned char _{var_name}_synthetic[] =", file=output) + indent = " " + for string, comment in b.entries: + print(f"{indent}// {comment}", file=output) + print(f"{indent}{string}", file=output) + print(";", file=output) def compile_file(type_name: str, input: TextIO) -> BytecodeSection: @@ -601,7 +668,7 @@ def next_byte(): ################################################################################ -def _to_uleb(value: int) -> bytearray: +def _to_uleb(value: int) -> bytes: """Encode an integer to ULEB128 bytes.""" if value < 0: raise ValueError(f"negative number cannot be encoded to ULEB128: {value}") @@ -616,7 +683,7 @@ def _to_uleb(value: int) -> bytearray: if value == 0: break - return result + return bytes(result) def _from_uleb(stream: BinaryIO) -> int: @@ -665,18 +732,27 @@ def _main(): "--output", help="output file (required for --compile)", ) + parser.add_argument( + "-f", + "--format", + choices=("binary", "c"), + default="binary", + help="output file format", + ) parser.add_argument("-t", "--test", action="store_true", help="run unit tests") args = parser.parse_args() if args.compile: if not args.output: parser.error("--output is required with --compile") - with ( - open(args.input) as input, - open(args.output, "wb") as output, - ): + with open(args.input) as input: section = compile_file(args.type_name, input) - section.write_binary(output) + if args.format == "binary": + with open(args.output, "wb") as output: + section.write_binary(output) + else: # args.format == "c" + with open(args.output, "w") as output: + section.write_source(output) elif args.disassemble: if args.output: with ( @@ -758,4 +834,42 @@ def run_disassemble(binary): with self.assertRaises(ValueError): run_compile("MyType", "@summary: 1u return\n@summary: 2u return") + def test_write_source(self): + # Use the Account example from main.cpp as a reference, whose + # exact byte values are known. + section = BytecodeSection( + type_name="Account", + flags=0, + signatures=[ + ("get_num_children", bytes([0x20, 0x01])), + ("get_child_at_index", bytes([0x02, 0x20, 0x00, 0x23, 0x11, 0x60])), + ], + ) + out = io.StringIO() + section.write_source(out) + src = out.getvalue() + + self.assertIn("__attribute__((used, section(FORMATTER_SECTION)))", src) + self.assertIn("unsigned char _Account_synthetic[] =", src) + self.assertIn('"\\x01"', src) # version + self.assertIn('"\\x15"', src) # record size (21) + self.assertIn('"\\x07"', src) # type name size (7) + self.assertIn('"Account"', src) # type name + self.assertIn('"\\x00"', src) # flags + self.assertIn('"\\x02"', src) # sig_get_num_children + self.assertIn('"\\x20\\x01"', src) # program + self.assertIn('"\\x04"', src) # sig_get_child_at_index + self.assertIn('"\\x06"', src) # program size + self.assertIn('"\\x02\\x20\\x00\\x23\\x11\\x60"', src) # program + self.assertIn("// version", src) + self.assertIn("// type name", src) + self.assertIn("// program", src) + # Semicolon terminates the array initializer. + self.assertEqual(src.count(";"), 1) + + # Non-identifier characters in the type name are replaced with '_'. + out2 = io.StringIO() + BytecodeSection("std::vector<int>", 0, []).write_source(out2) + self.assertIn("_std__vector_int__synthetic[] =", out2.getvalue()) + unittest.main(argv=[__file__]) >From 5b82525e489ed6a69a795e87206e46b7d78913a2 Mon Sep 17 00:00:00 2001 From: Dave Lee <[email protected]> Date: Tue, 3 Mar 2026 10:49:12 -0800 Subject: [PATCH 2/2] Declare more precise type for _CBuilder.entries --- lldb/examples/python/formatter_bytecode.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/examples/python/formatter_bytecode.py b/lldb/examples/python/formatter_bytecode.py index 8386785b80d84..1ad10d60fb974 100644 --- a/lldb/examples/python/formatter_bytecode.py +++ b/lldb/examples/python/formatter_bytecode.py @@ -19,7 +19,7 @@ import io import textwrap from dataclasses import dataclass -from typing import Any, BinaryIO, TextIO, Tuple, Union +from typing import BinaryIO, TextIO, Tuple, Union BINARY_VERSION = 1 @@ -231,7 +231,7 @@ def write_binary(self, output: BinaryIO) -> None: class _CBuilder: """Helper class for emitting binary data as a C-string literal.""" - entries: list[Tuple[Any, str]] + entries: list[Tuple[str, str]] def __init__(self) -> None: self.entries = [] _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
