[PATCH v8 4/4] devtools: add multi-provider AI documentation review script

Stephen Hemminger Wed, 28 Jan 2026 10:25:02 -0800

Add review-doc.py script that reviews DPDK documentation files for
spelling, grammar, technical correctness, and clarity using AI
language models. Supports batch processing of multiple files.


Supported AI providers:
  - Anthropic Claude (default)
  - OpenAI ChatGPT
  - xAI Grok
  - Google Gemini

Output formats (-f/--format):
  - text: plain text with extractable diff/msg markers (default)
  - markdown: formatted review document
  - html: complete HTML document with styling
  - json: structured data with metadata

For each input file, the script produces:
  - <basename>.{txt,md,html,json}: review in selected format
  - <basename>.diff: unified diff (text/json, or with -d flag)
  - <basename>.msg: commit message (text/json, or with -d flag)

The commit message prefix is automatically determined from the
file path (e.g., doc/guides/prog_guide: for programmer's guide).

Features:
  - Multiple file processing with glob support
  - Provider selection via -p/--provider option
  - Custom model selection via -m/--model option
  - Configurable output directory via -o/--output-dir option
  - Output format selection via -f/--format option
  - Force diff/msg generation via -d/--diff option
  - Quiet mode (-q) suppresses stdout output
  - Verbose mode (-v) shows token usage and API details
  - Email integration using git sendemail configuration
  - Prompt caching support for Anthropic to reduce costs

Usage:
  ./devtools/review-doc.py doc/guides/prog_guide/mempool_lib.rst
  ./devtools/review-doc.py doc/guides/nics/*.rst
  ./devtools/review-doc.py -f html -d -o /tmp doc/guides/nics/*.rst
  ./devtools/review-doc.py --send-email --to [email protected] file.rst

Requires the appropriate API key environment variable to be set
for the chosen provider (ANTHROPIC_API_KEY, OPENAI_API_KEY,
XAI_API_KEY, or GOOGLE_API_KEY).

Signed-off-by: Stephen Hemminger <[email protected]>
---
 devtools/review-doc.py | 1098 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1098 insertions(+)
 create mode 100755 devtools/review-doc.py

diff --git a/devtools/review-doc.py b/devtools/review-doc.py
new file mode 100755
index 0000000000..1366aa0f85
--- /dev/null
+++ b/devtools/review-doc.py
@@ -0,0 +1,1098 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2026 Stephen Hemminger
+
+"""
+Review DPDK documentation files using AI providers.
+
+Produces a diff file and commit message compliant with DPDK standards.
+Accepts multiple documentation files and generates output for each.
+Supported providers: Anthropic Claude, OpenAI ChatGPT, xAI Grok, Google Gemini
+"""
+
+import argparse
+import getpass
+import json
+import os
+import re
+import smtplib
+import ssl
+import subprocess
+import sys
+from email.message import EmailMessage
+from pathlib import Path
+from urllib.request import Request, urlopen
+from urllib.error import URLError, HTTPError
+
+# Output formats
+OUTPUT_FORMATS = ["text", "markdown", "html", "json"]
+
+# Map output format to file extension
+FORMAT_EXTENSIONS = {
+    "text": ".txt",
+    "markdown": ".md",
+    "html": ".html",
+    "json": ".json",
+}
+
+# Additional markers for extracting diff/msg (used with --diff flag)
+DIFF_MARKERS_INSTRUCTION = """
+
+ADDITIONALLY, at the end of your response, include these exact markers for 
automated extraction:
+---COMMIT_MESSAGE_START---
+(same commit message as above)
+---COMMIT_MESSAGE_END---
+
+---UNIFIED_DIFF_START---
+(same unified diff as above)
+---UNIFIED_DIFF_END---
+"""
+
+# Provider configurations
+PROVIDERS = {
+    "anthropic": {
+        "name": "Claude",
+        "endpoint": "https://api.anthropic.com/v1/messages";,
+        "default_model": "claude-sonnet-4-5-20250929",
+        "env_var": "ANTHROPIC_API_KEY",
+    },
+    "openai": {
+        "name": "ChatGPT",
+        "endpoint": "https://api.openai.com/v1/chat/completions";,
+        "default_model": "gpt-4o",
+        "env_var": "OPENAI_API_KEY",
+    },
+    "xai": {
+        "name": "Grok",
+        "endpoint": "https://api.x.ai/v1/chat/completions";,
+        "default_model": "grok-3",
+        "env_var": "XAI_API_KEY",
+    },
+    "google": {
+        "name": "Gemini",
+        "endpoint": "https://generativelanguage.googleapis.com/v1beta/models";,
+        "default_model": "gemini-2.0-flash",
+        "env_var": "GOOGLE_API_KEY",
+    },
+}
+
+# Commit prefix mappings based on file path
+COMMIT_PREFIX_MAP = [
+    ("doc/guides/prog_guide/", "doc/guides/prog_guide:"),
+    ("doc/guides/sample_app_ug/", "doc/guides/sample_app:"),
+    ("doc/guides/nics/", "doc/guides/nics:"),
+    ("doc/guides/cryptodevs/", "doc/guides/cryptodevs:"),
+    ("doc/guides/compressdevs/", "doc/guides/compressdevs:"),
+    ("doc/guides/eventdevs/", "doc/guides/eventdevs:"),
+    ("doc/guides/rawdevs/", "doc/guides/rawdevs:"),
+    ("doc/guides/bbdevs/", "doc/guides/bbdevs:"),
+    ("doc/guides/gpus/", "doc/guides/gpus:"),
+    ("doc/guides/dmadevs/", "doc/guides/dmadevs:"),
+    ("doc/guides/regexdevs/", "doc/guides/regexdevs:"),
+    ("doc/guides/mldevs/", "doc/guides/mldevs:"),
+    ("doc/guides/rel_notes/", "doc/guides/rel_notes:"),
+    ("doc/guides/linux_gsg/", "doc/guides/linux_gsg:"),
+    ("doc/guides/freebsd_gsg/", "doc/guides/freebsd_gsg:"),
+    ("doc/guides/windows_gsg/", "doc/guides/windows_gsg:"),
+    ("doc/guides/tools/", "doc/guides/tools:"),
+    ("doc/guides/testpmd_app_ug/", "doc/guides/testpmd:"),
+    ("doc/guides/howto/", "doc/guides/howto:"),
+    ("doc/guides/contributing/", "doc/guides/contributing:"),
+    ("doc/guides/platform/", "doc/guides/platform:"),
+    ("doc/guides/", "doc:"),
+    ("doc/api/", "doc/api:"),
+    ("doc/", "doc:"),
+]
+
+SYSTEM_PROMPT = """\
+You are an expert technical documentation reviewer for DPDK.
+Your task is to review documentation files and suggest improvements for:
+- Spelling errors
+- Grammar issues
+- Technical correctness
+- Clarity and readability
+- Consistency with DPDK terminology
+
+IMPORTANT COMMIT MESSAGE RULES (from check-git-log.sh):
+- Subject line MUST be ≤60 characters
+- Format: "prefix: lowercase description"
+- First word after colon must be lowercase (except acronyms like Rx, Tx, VF, 
MAC, API)
+- Use imperative mood (e.g., "fix typo" not "fixed typo" or "fixes typo")
+- NO trailing period on subject line
+- NO punctuation marks: , ; ! ? & |
+- NO underscores in subject after colon
+- Body lines wrapped at 75 characters
+- Body must NOT start with "It"
+- Do NOT include Signed-off-by (user adds via git commit --sign)
+- Only use "Fixes:" tag for actual errors in documentation, not style 
improvements
+
+Case-sensitive terms (must use exact case):
+- Rx, Tx (not RX, TX, rx, tx)
+- VF, PF (not vf, pf)
+- MAC, VLAN, RSS, API
+- Linux, Windows, FreeBSD
+
+For style/clarity improvements, do NOT use Fixes tag.
+For actual errors (wrong information, broken examples), include Fixes tag \
+if you can identify the commit."""
+
+FORMAT_INSTRUCTIONS = {
+    "text": """
+OUTPUT FORMAT:
+You must output exactly two sections:
+
+1. COMMIT_MESSAGE section containing the complete commit message
+2. UNIFIED_DIFF section containing the unified diff
+
+Use these exact markers:
+---COMMIT_MESSAGE_START---
+(commit message here)
+---COMMIT_MESSAGE_END---
+
+---UNIFIED_DIFF_START---
+(unified diff here)
+---UNIFIED_DIFF_END---
+
+The diff should be in unified format that can be applied with "git apply".
+If no changes are needed, output empty sections with a note.""",
+    "markdown": """
+OUTPUT FORMAT:
+Provide your review in Markdown format with:
+
+## Summary
+Brief description of changes
+
+## Commit Message
+```
+(complete commit message here, ready to use)
+```
+
+## Changes
+For each change:
+### Issue N: Brief title
+- **Location**: file path and line
+- **Problem**: description
+- **Fix**: suggested correction
+
+## Unified Diff
+```diff
+(unified diff here)
+```""",
+    "html": """
+OUTPUT FORMAT:
+Provide your review in HTML format with:
+- <h2> for sections (Summary, Commit Message, Changes, Diff)
+- <pre><code> for commit message and diff
+- <ul>/<li> for individual issues
+- Do NOT include <html>, <head>, or <body> tags - just the content
+
+Include sections for: Summary, Commit Message, Changes, Unified Diff""",
+    "json": """
+OUTPUT FORMAT:
+Provide your review as JSON with this structure:
+{
+  "summary": "Brief description of changes",
+  "commit_message": "Complete commit message ready to use",
+  "changes": [
+    {
+      "type": "spelling|grammar|technical|clarity|style",
+      "location": "line number or section",
+      "original": "original text",
+      "suggested": "corrected text",
+      "reason": "why this change"
+    }
+  ],
+  "diff": "unified diff as a string",
+  "stats": {
+    "total_issues": 0,
+    "spelling": 0,
+    "grammar": 0,
+    "technical": 0,
+    "clarity": 0
+  }
+}
+Output ONLY valid JSON, no markdown code fences or other text.""",
+}
+
+USER_PROMPT = """\
+Review the following DPDK documentation file and provide improvements.
+
+File path: {doc_file}
+Commit message prefix to use: {commit_prefix}
+
+{format_instruction}
+
+---DOCUMENT CONTENT---
+"""
+
+
+def error(msg):
+    """Print error message and exit."""
+    print(f"Error: {msg}", file=sys.stderr)
+    sys.exit(1)
+
+
+def get_git_config(key):
+    """Get a value from git config."""
+    try:
+        result = subprocess.run(
+            ["git", "config", "--get", key],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        return result.stdout.strip()
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return None
+
+
+def get_smtp_config():
+    """Get SMTP configuration from git config sendemail settings."""
+    config = {
+        "server": get_git_config("sendemail.smtpserver"),
+        "port": get_git_config("sendemail.smtpserverport"),
+        "user": get_git_config("sendemail.smtpuser"),
+        "encryption": get_git_config("sendemail.smtpencryption"),
+        "password": get_git_config("sendemail.smtppass"),
+    }
+
+    # Set defaults
+    if not config["port"]:
+        if config["encryption"] == "ssl":
+            config["port"] = "465"
+        else:
+            config["port"] = "587"
+
+    # Convert port to int
+    if config["port"]:
+        config["port"] = int(config["port"])
+
+    return config
+
+
+def get_commit_prefix(filepath):
+    """Determine commit message prefix from file path."""
+    for prefix_path, prefix in COMMIT_PREFIX_MAP:
+        if filepath.startswith(prefix_path):
+            return prefix
+    return "doc:"
+
+
+def build_anthropic_request(
+    model,
+    max_tokens,
+    agents_content,
+    doc_content,
+    doc_file,
+    commit_prefix,
+    output_format="text",
+    include_diff_markers=False,
+):
+    """Build request payload for Anthropic API."""
+    format_instruction = FORMAT_INSTRUCTIONS.get(output_format, "")
+    if include_diff_markers and output_format not in ("text", "json"):
+        format_instruction += DIFF_MARKERS_INSTRUCTION
+    user_prompt = USER_PROMPT.format(
+        doc_file=doc_file,
+        commit_prefix=commit_prefix,
+        format_instruction=format_instruction,
+    )
+    return {
+        "model": model,
+        "max_tokens": max_tokens,
+        "system": [
+            {"type": "text", "text": SYSTEM_PROMPT},
+            {
+                "type": "text",
+                "text": agents_content,
+                "cache_control": {"type": "ephemeral"},
+            },
+        ],
+        "messages": [
+            {
+                "role": "user",
+                "content": user_prompt + doc_content,
+            }
+        ],
+    }
+
+
+def build_openai_request(
+    model,
+    max_tokens,
+    agents_content,
+    doc_content,
+    doc_file,
+    commit_prefix,
+    output_format="text",
+    include_diff_markers=False,
+):
+    """Build request payload for OpenAI-compatible APIs."""
+    format_instruction = FORMAT_INSTRUCTIONS.get(output_format, "")
+    if include_diff_markers and output_format not in ("text", "json"):
+        format_instruction += DIFF_MARKERS_INSTRUCTION
+    user_prompt = USER_PROMPT.format(
+        doc_file=doc_file,
+        commit_prefix=commit_prefix,
+        format_instruction=format_instruction,
+    )
+    return {
+        "model": model,
+        "max_tokens": max_tokens,
+        "messages": [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "system", "content": agents_content},
+            {
+                "role": "user",
+                "content": user_prompt + doc_content,
+            },
+        ],
+    }
+
+
+def build_google_request(
+    max_tokens,
+    agents_content,
+    doc_content,
+    doc_file,
+    commit_prefix,
+    output_format="text",
+    include_diff_markers=False,
+):
+    """Build request payload for Google Gemini API."""
+    format_instruction = FORMAT_INSTRUCTIONS.get(output_format, "")
+    if include_diff_markers and output_format not in ("text", "json"):
+        format_instruction += DIFF_MARKERS_INSTRUCTION
+    user_prompt = USER_PROMPT.format(
+        doc_file=doc_file,
+        commit_prefix=commit_prefix,
+        format_instruction=format_instruction,
+    )
+    return {
+        "contents": [
+            {"role": "user", "parts": [{"text": SYSTEM_PROMPT}]},
+            {"role": "user", "parts": [{"text": agents_content}]},
+            {
+                "role": "user",
+                "parts": [{"text": user_prompt + doc_content}],
+            },
+        ],
+        "generationConfig": {"maxOutputTokens": max_tokens},
+    }
+
+
+def call_api(
+    provider,
+    api_key,
+    model,
+    max_tokens,
+    agents_content,
+    doc_content,
+    doc_file,
+    commit_prefix,
+    output_format="text",
+    include_diff_markers=False,
+    verbose=False,
+):
+    """Make API request to the specified provider."""
+    config = PROVIDERS[provider]
+
+    # Build request based on provider
+    if provider == "anthropic":
+        request_data = build_anthropic_request(
+            model,
+            max_tokens,
+            agents_content,
+            doc_content,
+            doc_file,
+            commit_prefix,
+            output_format,
+            include_diff_markers,
+        )
+        headers = {
+            "Content-Type": "application/json",
+            "x-api-key": api_key,
+            "anthropic-version": "2023-06-01",
+        }
+        url = config["endpoint"]
+    elif provider == "google":
+        request_data = build_google_request(
+            max_tokens,
+            agents_content,
+            doc_content,
+            doc_file,
+            commit_prefix,
+            output_format,
+            include_diff_markers,
+        )
+        headers = {"Content-Type": "application/json"}
+        url = f"{config['endpoint']}/{model}:generateContent?key={api_key}"
+    else:  # openai, xai
+        request_data = build_openai_request(
+            model,
+            max_tokens,
+            agents_content,
+            doc_content,
+            doc_file,
+            commit_prefix,
+            output_format,
+            include_diff_markers,
+        )
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {api_key}",
+        }
+        url = config["endpoint"]
+
+    # Make request
+    request_body = json.dumps(request_data).encode("utf-8")
+    req = Request(url, data=request_body, headers=headers, method="POST")
+
+    try:
+        with urlopen(req) as response:
+            result = json.loads(response.read().decode("utf-8"))
+    except HTTPError as e:
+        error_body = e.read().decode("utf-8")
+        try:
+            error_data = json.loads(error_body)
+            error(f"API error: {error_data.get('error', error_body)}")
+        except json.JSONDecodeError:
+            error(f"API error ({e.code}): {error_body}")
+    except URLError as e:
+        error(f"Connection error: {e.reason}")
+
+    # Show verbose info
+    if verbose:
+        print("=== Token Usage ===", file=sys.stderr)
+        if provider == "anthropic":
+            usage = result.get("usage", {})
+            print(f"Input tokens: {usage.get('input_tokens', 'N/A')}", 
file=sys.stderr)
+            print(
+                f"Cache creation: " 
f"{usage.get('cache_creation_input_tokens', 0)}",
+                file=sys.stderr,
+            )
+            print(
+                f"Cache read: {usage.get('cache_read_input_tokens', 0)}",
+                file=sys.stderr,
+            )
+            print(
+                f"Output tokens: {usage.get('output_tokens', 'N/A')}", 
file=sys.stderr
+            )
+        elif provider == "google":
+            usage = result.get("usageMetadata", {})
+            print(
+                f"Prompt tokens: {usage.get('promptTokenCount', 'N/A')}",
+                file=sys.stderr,
+            )
+            print(
+                f"Output tokens: {usage.get('candidatesTokenCount', 'N/A')}",
+                file=sys.stderr,
+            )
+        else:  # openai, xai
+            usage = result.get("usage", {})
+            print(
+                f"Prompt tokens: {usage.get('prompt_tokens', 'N/A')}", 
file=sys.stderr
+            )
+            print(
+                f"Completion tokens: " f"{usage.get('completion_tokens', 
'N/A')}",
+                file=sys.stderr,
+            )
+        print("===================", file=sys.stderr)
+
+    # Extract response text
+    if provider == "anthropic":
+        if "error" in result:
+            error(f"API error: {result['error'].get('message', result)}")
+        content = result.get("content", [])
+        return "".join(
+            block.get("text", "") for block in content if block.get("type") == 
"text"
+        )
+    elif provider == "google":
+        if "error" in result:
+            error(f"API error: {result['error'].get('message', result)}")
+        candidates = result.get("candidates", [])
+        if not candidates:
+            error("No response from Gemini")
+        parts = candidates[0].get("content", {}).get("parts", [])
+        return "".join(part.get("text", "") for part in parts)
+    else:  # openai, xai
+        if "error" in result:
+            error(f"API error: {result['error'].get('message', result)}")
+        choices = result.get("choices", [])
+        if not choices:
+            error("No response from API")
+        return choices[0].get("message", {}).get("content", "")
+
+
+def parse_review_text(review_text):
+    """Extract commit message and diff from text format response."""
+    commit_msg = ""
+    diff = ""
+
+    # Extract commit message
+    msg_match = re.search(
+        r"---COMMIT_MESSAGE_START---\s*\n(.*?)\n---COMMIT_MESSAGE_END---",
+        review_text,
+        re.DOTALL,
+    )
+    if msg_match:
+        commit_msg = msg_match.group(1).strip()
+
+    # Extract unified diff
+    diff_match = re.search(
+        r"---UNIFIED_DIFF_START---\s*\n(.*?)\n---UNIFIED_DIFF_END---",
+        review_text,
+        re.DOTALL,
+    )
+    if diff_match:
+        diff = diff_match.group(1).strip()
+        # Clean up any markdown code fence if present
+        diff = re.sub(r"^```diff\s*\n?", "", diff)
+        diff = re.sub(r"\n?```\s*$", "", diff)
+
+    return commit_msg, diff
+
+
+def strip_diff_markers(text):
+    """Remove the diff/msg extraction markers from text."""
+    # Remove commit message markers and content
+    text = re.sub(
+        r"\n*---COMMIT_MESSAGE_START---\s*\n.*?\n---COMMIT_MESSAGE_END---\s*",
+        "",
+        text,
+        flags=re.DOTALL,
+    )
+    # Remove unified diff markers and content
+    text = re.sub(
+        r"\n*---UNIFIED_DIFF_START---\s*\n.*?\n---UNIFIED_DIFF_END---\s*",
+        "",
+        text,
+        flags=re.DOTALL,
+    )
+    return text.strip()
+
+
+def send_email(
+    to_addrs,
+    cc_addrs,
+    from_addr,
+    subject,
+    in_reply_to,
+    body,
+    dry_run=False,
+    verbose=False,
+):
+    """Send review email via SMTP using git sendemail config."""
+    # Build email message
+    msg = EmailMessage()
+    msg["From"] = from_addr
+    msg["To"] = ", ".join(to_addrs)
+    if cc_addrs:
+        msg["Cc"] = ", ".join(cc_addrs)
+    msg["Subject"] = subject
+    if in_reply_to:
+        msg["In-Reply-To"] = in_reply_to
+        msg["References"] = in_reply_to
+    msg.set_content(body)
+
+    if dry_run:
+        print("=== Email Preview (dry-run) ===", file=sys.stderr)
+        print(msg.as_string(), file=sys.stderr)
+        print("=== End Preview ===", file=sys.stderr)
+        return True
+
+    # Get SMTP configuration from git config
+    smtp_config = get_smtp_config()
+
+    if not smtp_config["server"]:
+        error("No SMTP server configured. Set git config sendemail.smtpserver")
+
+    server = smtp_config["server"]
+    port = smtp_config["port"]
+    user = smtp_config["user"]
+    encryption = smtp_config["encryption"]
+
+    # Get password from environment or git config, or prompt
+    password = os.environ.get("SMTP_PASSWORD") or smtp_config["password"]
+    if user and not password:
+        password = getpass.getpass(f"SMTP password for {user}@{server}: ")
+
+    if verbose:
+        print(f"SMTP server: {server}:{port}", file=sys.stderr)
+        print(f"SMTP user: {user or '(none)'}", file=sys.stderr)
+        print(f"Encryption: {encryption or 'starttls'}", file=sys.stderr)
+
+    # Collect all recipients
+    all_recipients = list(to_addrs)
+    if cc_addrs:
+        all_recipients.extend(cc_addrs)
+
+    try:
+        if encryption == "ssl":
+            # SSL/TLS connection from the start (port 465)
+            context = ssl.create_default_context()
+            with smtplib.SMTP_SSL(server, port, context=context) as smtp:
+                if user and password:
+                    smtp.login(user, password)
+                smtp.send_message(msg, from_addr, all_recipients)
+        else:
+            # STARTTLS (port 587) or plain (port 25)
+            with smtplib.SMTP(server, port) as smtp:
+                smtp.ehlo()
+                if encryption == "tls" or port == 587:
+                    context = ssl.create_default_context()
+                    smtp.starttls(context=context)
+                    smtp.ehlo()
+                if user and password:
+                    smtp.login(user, password)
+                smtp.send_message(msg, from_addr, all_recipients)
+
+        print(f"Email sent via SMTP ({server}:{port})", file=sys.stderr)
+        return True
+
+    except smtplib.SMTPAuthenticationError as e:
+        error(f"SMTP authentication failed: {e}")
+    except smtplib.SMTPException as e:
+        error(f"SMTP error: {e}")
+    except OSError as e:
+        error(f"Connection error to {server}:{port}: {e}")
+
+
+def list_providers():
+    """Print available providers and exit."""
+    print("Available AI Providers:\n")
+    print(f"{'Provider':<12} {'Default Model':<30} {'API Key Variable'}")
+    print(f"{'--------':<12} {'-------------':<30} {'----------------'}")
+    for name, config in PROVIDERS.items():
+        print(f"{name:<12} {config['default_model']:<30} {config['env_var']}")
+    sys.exit(0)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Review DPDK documentation files using AI providers. "
+        "Accepts multiple files and generates output for each.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    %(prog)s doc/guides/prog_guide/mempool_lib.rst
+    %(prog)s doc/guides/nics/*.rst              # Review all NIC docs
+    %(prog)s -p openai -o /tmp doc/guides/nics/ixgbe.rst 
doc/guides/nics/i40e.rst
+    %(prog)s -f html -d -o /tmp/reviews doc/guides/nics/*.rst  # HTML + diff 
files
+    %(prog)s -f json -o /tmp doc/guides/howto/flow_bifurcation.rst
+    %(prog)s --send-email --to [email protected] doc/guides/nics/ixgbe.rst
+
+Output files (in output-dir):
+    <basename>.txt|.md|.html|.json  Review in selected format
+    <basename>.diff                  Unified diff (text/json, or with --diff)
+    <basename>.msg                   Commit message (text/json, or with --diff)
+
+After review:
+    git apply <basename>.diff
+    git commit -sF <basename>.msg
+
+SMTP Configuration (from git config):
+    sendemail.smtpserver      SMTP server hostname
+    sendemail.smtpserverport  SMTP port (default: 587 for TLS, 465 for SSL)
+    sendemail.smtpuser        SMTP username
+    sendemail.smtpencryption  'tls' for STARTTLS, 'ssl' for SSL/TLS
+    sendemail.smtppass        SMTP password (or set SMTP_PASSWORD env var)
+
+Example git config:
+    git config --global sendemail.smtpserver smtp.gmail.com
+    git config --global sendemail.smtpserverport 587
+    git config --global sendemail.smtpuser [email protected]
+    git config --global sendemail.smtpencryption tls
+        """,
+    )
+
+    parser.add_argument(
+        "doc_files",
+        nargs="+",
+        metavar="doc_file",
+        help="Documentation file(s) to review",
+    )
+    parser.add_argument(
+        "-p",
+        "--provider",
+        choices=PROVIDERS.keys(),
+        default="anthropic",
+        help="AI provider (default: anthropic)",
+    )
+    parser.add_argument(
+        "-a",
+        "--agents",
+        default="AGENTS.md",
+        help="Path to AGENTS.md file (default: AGENTS.md)",
+    )
+    parser.add_argument(
+        "-m",
+        "--model",
+        help="Model to use (default: provider-specific)",
+    )
+    parser.add_argument(
+        "-t",
+        "--tokens",
+        type=int,
+        default=8192,
+        help="Max tokens for response (default: 8192)",
+    )
+    parser.add_argument(
+        "-o",
+        "--output-dir",
+        default=".",
+        help="Output directory for all output files (default: .)",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Show API request details",
+    )
+    parser.add_argument(
+        "-q",
+        "--quiet",
+        action="store_true",
+        help="Suppress review output to stdout (only write files)",
+    )
+    parser.add_argument(
+        "-f",
+        "--format",
+        choices=OUTPUT_FORMATS,
+        default="text",
+        dest="output_format",
+        help="Output format: text, markdown, html, json (default: text)",
+    )
+    parser.add_argument(
+        "-d",
+        "--diff",
+        action="store_true",
+        help="Always produce .diff and .msg files (automatic for text/json)",
+    )
+    parser.add_argument(
+        "-l",
+        "--list-providers",
+        action="store_true",
+        help="List available providers and exit",
+    )
+
+    # Email options
+    email_group = parser.add_argument_group("Email Options")
+    email_group.add_argument(
+        "--send-email",
+        action="store_true",
+        help="Send review via email",
+    )
+    email_group.add_argument(
+        "--to",
+        action="append",
+        dest="to_addrs",
+        default=[],
+        metavar="ADDRESS",
+        help="Email recipient (can be specified multiple times)",
+    )
+    email_group.add_argument(
+        "--cc",
+        action="append",
+        dest="cc_addrs",
+        default=[],
+        metavar="ADDRESS",
+        help="CC recipient (can be specified multiple times)",
+    )
+    email_group.add_argument(
+        "--from",
+        dest="from_addr",
+        metavar="ADDRESS",
+        help="From address (default: from git config)",
+    )
+    email_group.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Show email without sending",
+    )
+
+    args = parser.parse_args()
+
+    if args.list_providers:
+        list_providers()
+
+    # Get provider config
+    config = PROVIDERS[args.provider]
+    model = args.model or config["default_model"]
+
+    # Get API key
+    api_key = os.environ.get(config["env_var"])
+    if not api_key:
+        error(f"{config['env_var']} environment variable not set")
+
+    # Validate files
+    agents_path = Path(args.agents)
+    if not agents_path.exists():
+        error(f"AGENTS.md not found: {args.agents}")
+
+    # Validate all doc files exist before processing
+    doc_paths = []
+    for doc_file in args.doc_files:
+        doc_path = Path(doc_file)
+        if not doc_path.exists():
+            error(f"Documentation file not found: {doc_file}")
+        doc_paths.append((doc_file, doc_path))
+
+    # Validate email options
+    if args.send_email and not args.to_addrs:
+        error("--send-email requires at least one --to address")
+
+    # Get from address for email
+    from_addr = args.from_addr
+    if args.send_email and not from_addr:
+        git_name = get_git_config("user.name")
+        git_email = get_git_config("user.email")
+        if git_email:
+            from_addr = f"{git_name} <{git_email}>" if git_name else git_email
+        else:
+            error("No --from specified and git user.email not configured")
+
+    # Read AGENTS.md once
+    agents_content = agents_path.read_text()
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    provider_name = config["name"]
+
+    # Process each file
+    num_files = len(doc_paths)
+    for file_idx, (doc_file, doc_path) in enumerate(doc_paths, 1):
+        if num_files > 1:
+            print(
+                f"\n{'=' * 60}",
+                file=sys.stderr,
+            )
+            print(
+                f"Processing file {file_idx}/{num_files}: {doc_file}",
+                file=sys.stderr,
+            )
+            print(
+                f"{'=' * 60}",
+                file=sys.stderr,
+            )
+
+        # Determine output filenames
+        doc_basename = doc_path.stem
+        diff_file = output_dir / f"{doc_basename}.diff"
+        msg_file = output_dir / f"{doc_basename}.msg"
+
+        # Get commit prefix
+        commit_prefix = get_commit_prefix(doc_file)
+
+        # Read doc content
+        doc_content = doc_path.read_text()
+
+        if args.verbose:
+            print("=== Request ===", file=sys.stderr)
+            print(f"Provider: {args.provider}", file=sys.stderr)
+            print(f"Model: {model}", file=sys.stderr)
+            print(f"Output format: {args.output_format}", file=sys.stderr)
+            print(f"AGENTS file: {args.agents}", file=sys.stderr)
+            print(f"Doc file: {doc_file}", file=sys.stderr)
+            print(f"Commit prefix: {commit_prefix}", file=sys.stderr)
+            print(f"Output dir: {args.output_dir}", file=sys.stderr)
+            if args.send_email:
+                print("Send email: yes", file=sys.stderr)
+                print(f"To: {', '.join(args.to_addrs)}", file=sys.stderr)
+                if args.cc_addrs:
+                    print(f"Cc: {', '.join(args.cc_addrs)}", file=sys.stderr)
+                print(f"From: {from_addr}", file=sys.stderr)
+            print("===============", file=sys.stderr)
+
+        # Call API
+        review_text = call_api(
+            args.provider,
+            api_key,
+            model,
+            args.tokens,
+            agents_content,
+            doc_content,
+            doc_file,
+            commit_prefix,
+            args.output_format,
+            args.diff,
+            args.verbose,
+        )
+
+        if not review_text:
+            print(
+                f"Warning: No response received for {doc_file}",
+                file=sys.stderr,
+            )
+            continue
+
+        # Determine review output file
+        format_ext = FORMAT_EXTENSIONS[args.output_format]
+        review_file = output_dir / f"{doc_basename}{format_ext}"
+
+        # Determine if we should write diff/msg files
+        write_diff_msg = args.diff or args.output_format in ("text", "json")
+
+        # Extract commit message and diff first (before stripping markers)
+        commit_msg, diff = "", ""
+        if write_diff_msg:
+            if args.output_format == "json":
+                # Will extract from JSON below
+                pass
+            else:
+                # Parse from text format markers
+                commit_msg, diff = parse_review_text(review_text)
+
+        # For non-text formats with --diff, strip the markers from display 
output
+        display_text = review_text
+        if args.diff and args.output_format in ("markdown", "html"):
+            display_text = strip_diff_markers(review_text)
+
+        # Build formatted output text
+        if args.output_format == "text":
+            output_text = review_text
+        elif args.output_format == "json":
+            # Try to parse JSON response
+            try:
+                review_data = json.loads(review_text)
+            except json.JSONDecodeError:
+                print("Warning: Response is not valid JSON", file=sys.stderr)
+                review_data = {"raw_response": review_text}
+
+            # Extract diff/msg from JSON if present
+            if write_diff_msg:
+                if isinstance(review_data, dict) and "raw_response" not in 
review_data:
+                    commit_msg = review_data.get("commit_message", "")
+                    diff = review_data.get("diff", "")
+
+            # Add metadata
+            output_data = {
+                "metadata": {
+                    "doc_file": doc_file,
+                    "provider": args.provider,
+                    "provider_name": provider_name,
+                    "model": model,
+                    "commit_prefix": commit_prefix,
+                },
+                "review": review_data,
+            }
+            output_text = json.dumps(output_data, indent=2)
+        elif args.output_format == "markdown":
+            output_text = f"""# Documentation Review: {doc_path.name}
+
+*Reviewed by {provider_name} ({model})*
+
+{display_text}
+"""
+        elif args.output_format == "html":
+            output_text = f"""<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Review: {doc_path.name}</title>
+<style>
+body {{ font-family: system-ui, sans-serif; max-width: 900px; margin: 2em 
auto; padding: 0 1em; }}
+h1 {{ color: #333; }}
+.review-meta {{ color: #666; font-style: italic; }}
+pre {{ background: #f5f5f5; padding: 1em; overflow-x: auto; }}
+</style>
+</head>
+<body>
+<h1>Documentation Review: {doc_path.name}</h1>
+<p class="review-meta">Reviewed by {provider_name} ({model})</p>
+<div class="review-content">
+{display_text}
+</div>
+</body>
+</html>
+"""
+
+        # Write formatted review to file
+        review_file.write_text(output_text)
+        print(f"Review written to: {review_file}", file=sys.stderr)
+
+        # Write diff/msg files
+        if write_diff_msg:
+            if commit_msg:
+                msg_file.write_text(commit_msg + "\n")
+                print(f"Commit message written to: {msg_file}", 
file=sys.stderr)
+            else:
+                msg_file.write_text("# No commit message generated\n")
+                print("Warning: Could not extract commit message", 
file=sys.stderr)
+
+            if diff:
+                diff_file.write_text(diff + "\n")
+                print(f"Diff written to: {diff_file}", file=sys.stderr)
+            else:
+                diff_file.write_text("# No changes suggested\n")
+                print("Warning: Could not extract diff", file=sys.stderr)
+
+        # Print to stdout unless quiet (or multiple files without verbose)
+        show_stdout = not args.quiet and (num_files == 1 or args.verbose)
+        if show_stdout:
+            print(
+                f"\n=== Documentation Review: {doc_path.name} "
+                f"(via {provider_name}) ==="
+            )
+            print(output_text)
+
+            # Print usage instructions for text format
+            if args.output_format == "text":
+                print("\n=== Output Files ===")
+                print(f"Commit message: {msg_file}")
+                print(f"Diff file:      {diff_file}")
+                print("\nTo apply changes:")
+                print(f"  git apply {diff_file}")
+                print(f"  git commit -sF {msg_file}")
+
+        # Send email if requested
+        if args.send_email:
+            if args.output_format != "text":
+                print(
+                    f"Note: Email will be sent as plain text regardless of "
+                    f"--format={args.output_format}",
+                    file=sys.stderr,
+                )
+
+            review_subject = f"[REVIEW] {commit_prefix} {doc_path.name}"
+
+            # Build email body
+            email_body = f"""AI-generated documentation review of {doc_file}
+Reviewed using {provider_name} ({model})
+
+This is an automated review. Please verify all suggestions.
+
+---
+
+{review_text}
+"""
+
+            if args.verbose:
+                print("", file=sys.stderr)
+                print("=== Email Details ===", file=sys.stderr)
+                print(f"Subject: {review_subject}", file=sys.stderr)
+                print("=====================", file=sys.stderr)
+
+            send_email(
+                args.to_addrs,
+                args.cc_addrs,
+                from_addr,
+                review_subject,
+                None,
+                email_body,
+                args.dry_run,
+                args.verbose,
+            )
+
+            if not args.dry_run:
+                print("", file=sys.stderr)
+                print(f"Review sent to: {', '.join(args.to_addrs)}", 
file=sys.stderr)
+
+    # Print summary for multiple files
+    if num_files > 1:
+        print(f"\n{'=' * 60}", file=sys.stderr)
+        print(f"Processed {num_files} files", file=sys.stderr)
+        print(f"Output directory: {output_dir}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
-- 
2.51.0

[PATCH v8 4/4] devtools: add multi-provider AI documentation review script

Reply via email to