skrawcz commented on code in PR #1425:
URL: https://github.com/apache/hamilton/pull/1425#discussion_r2649635610


##########
scripts/add_license_headers.py:
##########
@@ -0,0 +1,297 @@
+#!/usr/bin/env python3
+"""Script to add Apache 2 license headers to files in the Hamilton 
repository."""
+
+import json
+import sys
+from pathlib import Path
+from typing import List
+
+# Apache 2 license header for Python files
+PYTHON_LICENSE_HEADER = """# Licensed to the Apache Software Foundation (ASF) 
under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+
+# Apache 2 license header for Markdown files (using HTML comments)
+MARKDOWN_LICENSE_HEADER = """<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+"""
+
+# Apache 2 license header text for Jupyter notebooks (as markdown cell content)
+NOTEBOOK_LICENSE_TEXT = """Licensed to the Apache Software Foundation (ASF) 
under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License."""
+
+# Apache 2 license header for SQL files (using -- comments)
+SQL_LICENSE_HEADER = """-- Licensed to the Apache Software Foundation (ASF) 
under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--   http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+"""
+
+
+def add_license_to_python(file_path: Path, content: str) -> str:
+    """Add Apache 2 license header to Python file content."""
+    # Handle shebang lines - preserve them at the top
+    lines = content.split("\n", 1)
+    if lines[0].startswith("#!"):
+        # File has shebang, add license after it
+        if len(lines) > 1:
+            return lines[0] + "\n" + PYTHON_LICENSE_HEADER + lines[1]
+        else:
+            return lines[0] + "\n" + PYTHON_LICENSE_HEADER
+    else:
+        # No shebang, add license at the beginning
+        return PYTHON_LICENSE_HEADER + content
+
+
+def add_license_to_markdown(file_path: Path, content: str) -> str:
+    """Add Apache 2 license header to Markdown file content."""
+    return MARKDOWN_LICENSE_HEADER + content
+
+
+def add_license_to_notebook(file_path: Path, content: str) -> str:
+    """Add Apache 2 license header to Jupyter notebook."""
+    try:
+        notebook = json.loads(content)
+    except json.JSONDecodeError as e:
+        raise ValueError(f"Invalid notebook JSON: {e}") from e
+
+    # Create a new markdown cell with the license
+    license_cell = {"cell_type": "markdown", "metadata": {}, "source": 
[NOTEBOOK_LICENSE_TEXT]}
+
+    # Insert at the beginning
+    if "cells" not in notebook:
+        notebook["cells"] = []
+
+    notebook["cells"].insert(0, license_cell)
+
+    return json.dumps(notebook, indent=1, ensure_ascii=False)
+
+
+def add_license_to_shell(file_path: Path, content: str) -> str:
+    """Add Apache 2 license header to shell script or Dockerfile.
+
+    Uses same logic as Python files (# comments, handle shebang).
+    """
+    # Handle shebang lines - preserve them at the top
+    lines = content.split("\n", 1)
+    if lines[0].startswith("#!"):
+        # File has shebang, add license after it
+        if len(lines) > 1:
+            return lines[0] + "\n" + PYTHON_LICENSE_HEADER + lines[1]
+        else:
+            return lines[0] + "\n" + PYTHON_LICENSE_HEADER
+    else:
+        # No shebang, add license at the beginning
+        return PYTHON_LICENSE_HEADER + content
+
+
+def add_license_to_sql(file_path: Path, content: str) -> str:
+    """Add Apache 2 license header to SQL file content."""
+    return SQL_LICENSE_HEADER + content
+
+
+def add_license_header(file_path: Path, dry_run: bool = False) -> bool:
+    """Add Apache 2 license header to a file.
+
+    Args:
+        file_path: Path to the file
+        dry_run: If True, only print what would be done without modifying files
+
+    Returns:
+        True if header was added (or would be added in dry run), False 
otherwise
+    """
+    try:
+        with open(file_path, "r", encoding="utf-8") as f:
+            content = f.read()
+    except (UnicodeDecodeError, PermissionError) as e:
+        print(f"  ✗ Error reading {file_path}: {e}")
+        return False
+
+    # Check if file already has a license header
+    if "Licensed to the Apache Software Foundation" in content or "Apache 
License" in content:
+        print(f"  ↷ Skipping {file_path} (already has license header)")
+        return False

Review Comment:
   make sense



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to