shahar1 commented on code in PR #55917:
URL: https://github.com/apache/airflow/pull/55917#discussion_r2365420530


##########
dev/i18n/check_translations_completeness.py:
##########
@@ -21,6 +21,8 @@
 # dependencies = [

Review Comment:
   I think that we should start thinking about a new name for the script, as it 
does more than just "checking completeness" at this point (also, it's quite a 
long one) :)
   Not urgent for now though - if it is acceptable, I'd prefer to it after the 
upcoming Airflow Summit as I refer to this script in my talk.



##########
dev/i18n/copilot_translations.py:
##########
@@ -0,0 +1,549 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+GitHub Copilot API client for translation services.
+
+This module provides a class-based interface to the GitHub Copilot API
+with on-demand token refresh and translation capabilities.
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+import threading
+import time
+import unicodedata
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import requests
+from jinja2 import Environment, FileSystemLoader, TemplateNotFound
+from requests.exceptions import ConnectionError, HTTPError, RequestException, 
Timeout
+from rich import print
+from rich.console import Console
+
+if TYPE_CHECKING:
+    from jinja2 import Template
+
+
+COPILOT_CLIENT_ID = "Iv1.b507a08c87ecfe98"

Review Comment:
   Where is this client ID taken from? (I've managed to references for it in 
Google, but not official ones)



##########
dev/i18n/copilot_translations.py:
##########
@@ -0,0 +1,549 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+GitHub Copilot API client for translation services.
+
+This module provides a class-based interface to the GitHub Copilot API
+with on-demand token refresh and translation capabilities.
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+import threading
+import time
+import unicodedata
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import requests
+from jinja2 import Environment, FileSystemLoader, TemplateNotFound
+from requests.exceptions import ConnectionError, HTTPError, RequestException, 
Timeout
+from rich import print
+from rich.console import Console
+
+if TYPE_CHECKING:
+    from jinja2 import Template
+
+
+COPILOT_CLIENT_ID = "Iv1.b507a08c87ecfe98"
+# Create a focused prompt for translation with explicit Unicode handling
+LANGUAGE_NAMES = {
+    "ar": "Arabic (العربية)",
+    "ca": "Catalan (Català)",
+    "de": "German (Deutsch)",
+    "es": "Spanish (Español)",
+    "fr": "French (Français)",
+    "he": "Hebrew (עברית)",
+    "hi": "Hindi (हिन्दी)",
+    "hu": "Hungarian (Magyar)",
+    "it": "Italian (Italiano)",
+    "ja": "Japanese (日本語)",
+    "ko": "Korean (한국어)",
+    "nl": "Dutch (Nederlands)",
+    "pl": "Polish (Polski)",
+    "pt": "Portuguese (Português)",
+    "tr": "Turkish (Türkçe)",
+    "zh-CN": "Simplified Chinese (简体中文)",
+    "zh-TW": "Traditional Chinese (繁體中文)",
+}
+TODO_PREFIX = "TODO: translate:"
+
+
+class CopilotTranslator:
+    """
+    A GitHub Copilot API client with on-demand token refresh and translation 
capabilities.
+
+    This class handles authentication, token management, and provides methods 
for
+    translating JSON translation files using GitHub Copilot. Tokens are 
refreshed
+    automatically when they expire or become invalid.
+    """
+
+    def __init__(self, console: Console | None = None) -> None:
+        """Initialize the CopilotTranslator."""
+        self.access_token_file = Path(".copilot_token")
+        self.access_token: str | None = None
+        self.max_retries = 3
+        self.token: str | None = None
+        self.token_lock = threading.Lock()
+        self.console = console or Console(force_terminal=True, 
color_system="auto")
+
+        # Requests session for connection pooling
+        self.session = requests.Session()
+
+        # Set up Jinja2 environment for prompt templates
+        self.prompts_dir = Path(__file__).parent / "prompts"
+        self.jinja_env = Environment(
+            loader=FileSystemLoader(
+                [
+                    str(self.prompts_dir),  # For global.jinja2
+                    str(self.prompts_dir / "locales"),  # For 
language-specific templates
+                ]
+            ),
+            autoescape=False,
+        )
+        self.template_cache: dict[str, Template] = {}
+
+    def setup_authentication(self) -> None:
+        """Set up GitHub device authentication flow.
+
+        This method initiates the OAuth device flow for GitHub authentication.
+        The user will need to visit the provided URL and enter the user code.
+        """
+        self.console.print("[yellow]Setting up GitHub 
authentication...[/yellow]")
+
+        resp = requests.post(
+            "https://github.com/login/device/code";,
+            headers={
+                "accept": "application/json",
+                "editor-version": "Neovim/0.6.1",
+                "editor-plugin-version": "copilot.vim/1.16.0",
+                "content-type": "application/json",
+                "user-agent": "GithubCopilot/1.155.0",
+                "accept-encoding": "gzip,deflate,br",
+            },
+            json={"client_id": COPILOT_CLIENT_ID, "scope": "read:user"},
+        )
+
+        resp_json = resp.json()
+        device_code = resp_json.get("device_code")
+        user_code = resp_json.get("user_code")
+        verification_uri = resp_json.get("verification_uri")
+
+        self.console.print(
+            f"[bold cyan]Please visit {verification_uri} and enter code 
{user_code} to authenticate.[/bold cyan]"
+        )
+        # Wait until the user completes authentication, so wait until user 
presses Enter
+        # with a 1-minute timeout
+        if not self._wait_for_user_input_with_timeout(
+            "\n\nPress Enter after completing authentication...", timeout=60
+        ):
+            self.console.print("[red]Authentication timed out. Please try 
again.[/red]")
+            sys.exit(1)
+
+        for _ in range(3):
+            time.sleep(5)
+            resp = requests.post(
+                "https://github.com/login/oauth/access_token";,
+                headers={
+                    "accept": "application/json",
+                    "editor-version": "Neovim/0.6.1",
+                    "editor-plugin-version": "copilot.vim/1.16.0",
+                    "content-type": "application/json",
+                    "user-agent": "GithubCopilot/1.155.0",
+                    "accept-encoding": "gzip,deflate,br",
+                },
+                json={
+                    "client_id": COPILOT_CLIENT_ID,
+                    "device_code": device_code,
+                    "grant_type": 
"urn:ietf:params:oauth:grant-type:device_code",
+                },
+            )
+
+            resp_json = resp.json()
+            access_token = resp_json.get("access_token")
+
+            if access_token:
+                break
+
+        if not access_token:
+            self.console.print("[red]Authentication failed or timed 
out.[/red]")
+            return
+
+        # Save the access token to file
+        with open(self.access_token_file, "w") as f:
+            f.write(access_token)
+
+        self.console.print("[green]Authentication successful![/green]")
+
+    def _wait_for_user_input_with_timeout(self, prompt: str, timeout: int = 
60) -> bool:
+        """Wait for user input with a timeout.
+
+        :param prompt: The prompt message to display.
+        :param timeout: Timeout in seconds (default: 60).
+        :return: True if user pressed Enter within timeout, False if timed out.
+        """
+        import select
+
+        self.console.print(f"[yellow]{prompt}[/yellow]")
+
+        # Use different approaches based on platform capabilities
+        try:
+            # For Unix-like systems (macOS, Linux)
+            if hasattr(select, "select"):
+                ready, _, _ = select.select([sys.stdin], [], [], timeout)
+                if ready:
+                    sys.stdin.readline()  # Consume the input
+                    return True
+                self.console.print(
+                    f"[red]Timeout: No input received within {timeout} 
seconds. Continuing...[/red]"
+                )
+                return False
+            # Fallback for systems without select (shouldn't happen on macOS)
+            import signal
+
+            def timeout_handler(signum, frame):
+                raise TimeoutError("Input timeout")
+
+            signal.signal(signal.SIGALRM, timeout_handler)
+            signal.alarm(timeout)
+
+            try:
+                input()
+                signal.alarm(0)  # Cancel the alarm
+                return True
+            except TimeoutError:
+                self.console.print(
+                    f"[red]Timeout: No input received within {timeout} 
seconds. Continuing...[/red]"
+                )
+                return False
+
+        except Exception as e:
+            self.console.print(
+                f"[yellow]Warning: Could not set up timeout, using regular 
input: {e}[/yellow]"
+            )
+            input(prompt)
+            return True
+
+    def _get_access_token(self) -> str:
+        """Get the GitHub access token from file or initiate authentication.
+
+        :return: The GitHub access token.
+        """
+        if not self.access_token_file.exists():
+            self.setup_authentication()
+
+        with open(self.access_token_file) as f:
+            return f.read().strip()
+
+    def _get_token(self) -> None:
+        """Get a fresh Copilot session token using the access token.
+
+        This method exchanges the GitHub access token for a Copilot session 
token.
+        """
+        with self.token_lock:
+            access_token = self._get_access_token()
+
+            resp = requests.get(
+                "https://api.github.com/copilot_internal/v2/token";,
+                headers={
+                    "authorization": f"token {access_token}",
+                    "editor-version": "Neovim/0.6.1",
+                    "editor-plugin-version": "copilot.vim/1.16.0",
+                    "user-agent": "GithubCopilot/1.155.0",
+                },
+            )
+
+            if resp.status_code == 200:
+                resp_json = resp.json()
+                self.token = resp_json.get("token")
+            else:
+                self.console.print(f"[red]Failed to get token: 
{resp.status_code} {resp.text}[/red]")
+
+    def _is_token_invalid(self, token: str | None) -> bool:
+        """Check if the token is invalid or expired.
+
+        :param token: The token to check.
+        :return: True if the token is invalid or expired, False otherwise.
+        """
+        if token is None or "exp" not in token:
+            return True
+
+        exp_value = self._extract_exp_value(token)
+        return exp_value is None or exp_value <= time.time()
+
+    def _extract_exp_value(self, token: str) -> int | None:
+        """Extract the expiration value from the token.
+
+        :param token: The token string.
+        :return: The expiration timestamp or None if not found.
+        """
+        try:
+            pairs = token.split(";")
+            for pair in pairs:
+                if "=" in pair:
+                    key, value = pair.split("=", 1)
+                    if key.strip() == "exp":
+                        return int(value.strip())
+        except (ValueError, AttributeError):
+            pass
+        return None
+
+    def _copilot_complete(self, prompt: str, language: str = "json", retries: 
int = 0) -> str:
+        """Get completion from GitHub Copilot API.
+
+        :param prompt: The prompt to send to Copilot.
+        :param language: The language context for the completion.
+        :param retries: The current number of retries attempted.
+        :return: The completion text from Copilot.
+        """
+        # For retries
+        if retries > self.max_retries:
+            self.console.print("[red]Exceeded maximum retries for Copilot 
completion.[/red]")
+            return ""
+        if retries > 0:
+            self.console.print(
+                f"[yellow]Retrying Copilot completion (attempt 
{retries}/{self.max_retries})...[/yellow]"
+            )
+            time.sleep(2**retries)  # Exponential backoff
+
+        # Ensure we have a valid token
+        if self.token is None or self._is_token_invalid(self.token):
+            self._get_token()
+
+        if self.token is None:
+            return ""
+
+        try:
+            resp = requests.post(
+                
"https://copilot-proxy.githubusercontent.com/v1/engines/copilot-codex/completions";,
+                headers={"authorization": f"Bearer {self.token}"},
+                json={
+                    "prompt": prompt,
+                    "suffix": "",
+                    "max_tokens": 2000,
+                    "temperature": 0.1,

Review Comment:
   We might want to make these parameters configurable



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to