Re: [PR] hrw4u: Allows bulk compilation of many files (faster) [trafficserver]

via GitHub Fri, 06 Feb 2026 15:07:27 -0800


Copilot commented on code in PR #12865:
URL: https://github.com/apache/trafficserver/pull/12865#discussion_r2776378984



##########
tools/hrw4u/src/common.py:
##########
@@ -229,3 +229,108 @@ def generate_output(
         print(error_collector.get_error_summary(), file=sys.stderr)
         if not args.ast and tree is None:
             sys.exit(1)
+
+
+def run_main(
+        description: str, lexer_class: type[LexerProtocol], parser_class: 
type[ParserProtocol],
+        visitor_class: type[VisitorProtocol], error_prefix: str, 
output_flag_name: str, output_flag_help: str) -> None:
+    """
+    Generic main function for hrw4u and u4wrh scripts with bulk compilation 
support.
+
+    Args:
+        description: Description for argument parser
+        lexer_class: ANTLR lexer class to use
+        parser_class: ANTLR parser class to use
+        visitor_class: Visitor class to use
+        error_prefix: Error prefix for error messages
+        output_flag_name: Name of output flag (e.g., "hrw", "hrw4u")
+        output_flag_help: Help text for output flag
+    """
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description=description,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="For bulk compilation to files, use: input1.txt:output1.txt 
input2.txt:output2.txt ...")
+
+    parser.add_argument(
+        "files", help="Input file(s) to parse. Use input:output for bulk file 
output (default: stdin to stdout)", nargs="*")
+
+    output_group = parser.add_mutually_exclusive_group()
+    output_group.add_argument("--ast", action="store_true", help="Produce the 
ANTLR parse tree only")
+    output_group.add_argument(f"--{output_flag_name}", action="store_true", 
help=output_flag_help)
+
+    parser.add_argument("--no-comments", action="store_true", help="Skip 
comment preservation (ignore comments in output)")
+    parser.add_argument("--debug", action="store_true", help="Enable debug 
output")
+    parser.add_argument(
+        "--stop-on-error", action="store_true", help="Stop processing on first 
error (default: collect and report multiple errors)")
+
+    args = parser.parse_args()
+
+    if not hasattr(args, output_flag_name):
+        setattr(args, output_flag_name, False)
+
+    if not (args.ast or getattr(args, output_flag_name)):
+        setattr(args, output_flag_name, True)
+
+    if not args.files:
+        content, filename = process_input(sys.stdin)
+        tree, parser_obj, error_collector = create_parse_tree(
+            content, filename, lexer_class, parser_class, error_prefix, not 
args.stop_on_error)
+        generate_output(tree, parser_obj, visitor_class, filename, args, 
error_collector)
+        return
+
+    if any(':' in f for f in args.files):
+        for pair in args.files:
+            if ':' not in pair:
+                print(
+                    f"Error: Mixed formats not allowed. All files must use 
'input:output' format for bulk compilation.",
+                    file=sys.stderr)
+                sys.exit(1)
+
+            input_path, output_path = pair.split(':', 1)
+
+            try:
+                with open(input_path, 'r', encoding='utf-8') as input_file:
+                    content = input_file.read()
+                    filename = input_path
+            except FileNotFoundError:
+                print(f"Error: Input file '{input_path}' not found", 
file=sys.stderr)
+                sys.exit(1)
+            except Exception as e:
+                print(f"Error reading '{input_path}': {e}", file=sys.stderr)
+                sys.exit(1)
+
+            tree, parser_obj, error_collector = create_parse_tree(
+                content, filename, lexer_class, parser_class, error_prefix, 
not args.stop_on_error)
+
+            try:
+                original_stdout = sys.stdout
+                with open(output_path, 'w', encoding='utf-8') as output_file:
+                    sys.stdout = output_file
+                    generate_output(tree, parser_obj, visitor_class, filename, 
args, error_collector)
+                sys.stdout = original_stdout
+            except Exception as e:
+                sys.stdout = original_stdout

Review Comment:
   In bulk `input:output` mode, `sys.stdout` is reassigned to the output file, 
but restoration isn’t guaranteed for `SystemExit`/`KeyboardInterrupt` (they 
aren’t caught by `except Exception`). Use a `try/finally` (or 
`contextlib.redirect_stdout`) so `sys.stdout` is always restored even if 
`generate_output()` exits early.



##########
tools/hrw4u/src/common.py:
##########
@@ -229,3 +229,108 @@ def generate_output(
         print(error_collector.get_error_summary(), file=sys.stderr)
         if not args.ast and tree is None:
             sys.exit(1)
+
+
+def run_main(
+        description: str, lexer_class: type[LexerProtocol], parser_class: 
type[ParserProtocol],
+        visitor_class: type[VisitorProtocol], error_prefix: str, 
output_flag_name: str, output_flag_help: str) -> None:
+    """
+    Generic main function for hrw4u and u4wrh scripts with bulk compilation 
support.
+
+    Args:
+        description: Description for argument parser
+        lexer_class: ANTLR lexer class to use
+        parser_class: ANTLR parser class to use
+        visitor_class: Visitor class to use
+        error_prefix: Error prefix for error messages
+        output_flag_name: Name of output flag (e.g., "hrw", "hrw4u")
+        output_flag_help: Help text for output flag
+    """
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description=description,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="For bulk compilation to files, use: input1.txt:output1.txt 
input2.txt:output2.txt ...")
+
+    parser.add_argument(
+        "files", help="Input file(s) to parse. Use input:output for bulk file 
output (default: stdin to stdout)", nargs="*")
+
+    output_group = parser.add_mutually_exclusive_group()
+    output_group.add_argument("--ast", action="store_true", help="Produce the 
ANTLR parse tree only")
+    output_group.add_argument(f"--{output_flag_name}", action="store_true", 
help=output_flag_help)
+
+    parser.add_argument("--no-comments", action="store_true", help="Skip 
comment preservation (ignore comments in output)")
+    parser.add_argument("--debug", action="store_true", help="Enable debug 
output")
+    parser.add_argument(
+        "--stop-on-error", action="store_true", help="Stop processing on first 
error (default: collect and report multiple errors)")
+
+    args = parser.parse_args()
+
+    if not hasattr(args, output_flag_name):
+        setattr(args, output_flag_name, False)
+
+    if not (args.ast or getattr(args, output_flag_name)):
+        setattr(args, output_flag_name, True)
+
+    if not args.files:
+        content, filename = process_input(sys.stdin)
+        tree, parser_obj, error_collector = create_parse_tree(
+            content, filename, lexer_class, parser_class, error_prefix, not 
args.stop_on_error)
+        generate_output(tree, parser_obj, visitor_class, filename, args, 
error_collector)
+        return
+
+    if any(':' in f for f in args.files):
+        for pair in args.files:
+            if ':' not in pair:
+                print(

Review Comment:
   This PR adds new CLI behavior (multi-file processing and `input:output` bulk 
compilation), but there’s no test that exercises `run_main()` end-to-end via 
the actual scripts/arg parsing. Consider adding a pytest that runs 
`scripts/hrw4u` (or calls `run_main()` with a patched `sys.argv`) with multiple 
args / `input:output` to prevent regressions.



##########
tools/hrw4u/src/common.py:
##########
@@ -229,3 +229,108 @@ def generate_output(
         print(error_collector.get_error_summary(), file=sys.stderr)
         if not args.ast and tree is None:
             sys.exit(1)
+
+
+def run_main(
+        description: str, lexer_class: type[LexerProtocol], parser_class: 
type[ParserProtocol],
+        visitor_class: type[VisitorProtocol], error_prefix: str, 
output_flag_name: str, output_flag_help: str) -> None:
+    """
+    Generic main function for hrw4u and u4wrh scripts with bulk compilation 
support.
+
+    Args:
+        description: Description for argument parser
+        lexer_class: ANTLR lexer class to use
+        parser_class: ANTLR parser class to use
+        visitor_class: Visitor class to use
+        error_prefix: Error prefix for error messages
+        output_flag_name: Name of output flag (e.g., "hrw", "hrw4u")
+        output_flag_help: Help text for output flag
+    """
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description=description,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="For bulk compilation to files, use: input1.txt:output1.txt 
input2.txt:output2.txt ...")
+
+    parser.add_argument(
+        "files", help="Input file(s) to parse. Use input:output for bulk file 
output (default: stdin to stdout)", nargs="*")
+
+    output_group = parser.add_mutually_exclusive_group()
+    output_group.add_argument("--ast", action="store_true", help="Produce the 
ANTLR parse tree only")
+    output_group.add_argument(f"--{output_flag_name}", action="store_true", 
help=output_flag_help)
+
+    parser.add_argument("--no-comments", action="store_true", help="Skip 
comment preservation (ignore comments in output)")
+    parser.add_argument("--debug", action="store_true", help="Enable debug 
output")
+    parser.add_argument(
+        "--stop-on-error", action="store_true", help="Stop processing on first 
error (default: collect and report multiple errors)")
+
+    args = parser.parse_args()
+
+    if not hasattr(args, output_flag_name):
+        setattr(args, output_flag_name, False)
+
+    if not (args.ast or getattr(args, output_flag_name)):
+        setattr(args, output_flag_name, True)
+
+    if not args.files:
+        content, filename = process_input(sys.stdin)
+        tree, parser_obj, error_collector = create_parse_tree(
+            content, filename, lexer_class, parser_class, error_prefix, not 
args.stop_on_error)
+        generate_output(tree, parser_obj, visitor_class, filename, args, 
error_collector)
+        return
+
+    if any(':' in f for f in args.files):
+        for pair in args.files:
+            if ':' not in pair:
+                print(
+                    f"Error: Mixed formats not allowed. All files must use 
'input:output' format for bulk compilation.",
+                    file=sys.stderr)
+                sys.exit(1)
+
+            input_path, output_path = pair.split(':', 1)
+
+            try:
+                with open(input_path, 'r', encoding='utf-8') as input_file:
+                    content = input_file.read()
+                    filename = input_path
+            except FileNotFoundError:
+                print(f"Error: Input file '{input_path}' not found", 
file=sys.stderr)
+                sys.exit(1)
+            except Exception as e:
+                print(f"Error reading '{input_path}': {e}", file=sys.stderr)
+                sys.exit(1)
+
+            tree, parser_obj, error_collector = create_parse_tree(
+                content, filename, lexer_class, parser_class, error_prefix, 
not args.stop_on_error)
+
+            try:
+                original_stdout = sys.stdout
+                with open(output_path, 'w', encoding='utf-8') as output_file:
+                    sys.stdout = output_file
+                    generate_output(tree, parser_obj, visitor_class, filename, 
args, error_collector)
+                sys.stdout = original_stdout
+            except Exception as e:
+                sys.stdout = original_stdout
+                print(f"Error writing to '{output_path}': {e}", 
file=sys.stderr)
+                sys.exit(1)
+    else:
+        for i, input_path in enumerate(args.files):
+            if i > 0:
+                print("\n# ---")
+
+            try:

Review Comment:
   When processing multiple input files to stdout, the separator is printed as 
`print("\n# ---")`, which introduces an extra blank line before `# ---`. If the 
documented separator is meant to be exactly `# ---` between outputs, consider 
printing just `"# ---"` (or otherwise align the output format with the docs).
   ```suggestion
                   print("# ---")
   ```



##########
tools/hrw4u/tests/utils.py:
##########
@@ -315,3 +318,69 @@ def test_reverse_conversion(input_file: Path, output_file: 
Path) -> None:
         run_reverse_test(input_file, output_file)
 
     return test_reverse_conversion
+
+
+def run_bulk_test(group: str) -> None:
+    """
+    Run bulk compilation test for a specific test group.
+
+    Collects all .input.txt files in the group, runs hrw4u with bulk
+    input:output pairs, and compares each output with expected .output.txt.
+    """
+    base_dir = Path("tests/data") / group
+    exceptions = _read_exceptions(base_dir)
+
+    input_files = []
+    expected_outputs = []
+    file_pairs = []
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmp_path = Path(tmpdir)
+
+        for input_file in sorted(base_dir.glob("*.input.txt")):
+            if ".fail." in input_file.name:
+                continue
+
+            base = input_file.with_suffix('')
+            expected_output_file = base.with_suffix('.output.txt')
+            test_id = base.name
+
+            if test_id in exceptions:
+                test_direction = exceptions[test_id]
+                if test_direction != "hrw4u":
+                    continue
+
+            if not expected_output_file.exists():
+                continue
+
+            input_files.append(input_file)
+            expected_outputs.append(expected_output_file)
+
+            actual_output_file = tmp_path / f"{input_file.stem}.output.txt"
+            
file_pairs.append(f"{input_file.resolve()}:{actual_output_file.resolve()}")
+
+        if not file_pairs:
+            pytest.skip(f"No valid test files found for bulk test in {group}")
+            return
+
+        hrw4u_script = Path("scripts/hrw4u").resolve()
+        cmd = ["python3", str(hrw4u_script)] + file_pairs
+
+        result = subprocess.run(cmd, capture_output=True, text=True, 
cwd=Path.cwd())

Review Comment:
   The bulk test hard-codes `python3` in the command invocation. Using 
`sys.executable` makes the test robust in virtualenvs/uv environments and on 
systems where the interpreter isn’t named `python3`.



##########
tools/hrw4u/src/common.py:
##########
@@ -229,3 +229,108 @@ def generate_output(
         print(error_collector.get_error_summary(), file=sys.stderr)
         if not args.ast and tree is None:
             sys.exit(1)
+
+
+def run_main(
+        description: str, lexer_class: type[LexerProtocol], parser_class: 
type[ParserProtocol],
+        visitor_class: type[VisitorProtocol], error_prefix: str, 
output_flag_name: str, output_flag_help: str) -> None:
+    """
+    Generic main function for hrw4u and u4wrh scripts with bulk compilation 
support.
+
+    Args:
+        description: Description for argument parser
+        lexer_class: ANTLR lexer class to use
+        parser_class: ANTLR parser class to use
+        visitor_class: Visitor class to use
+        error_prefix: Error prefix for error messages
+        output_flag_name: Name of output flag (e.g., "hrw", "hrw4u")
+        output_flag_help: Help text for output flag
+    """
+    import argparse

Review Comment:
   This import of module argparse is redundant, as it was previously imported 
[on line 20](1).
   ```suggestion
   
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] hrw4u: Allows bulk compilation of many files (faster) [trafficserver]

Reply via email to