Copilot commented on code in PR #48886:
URL: https://github.com/apache/arrow/pull/48886#discussion_r3402312338


##########
dev/archery/archery/crossbow/core.py:
##########
@@ -560,13 +534,14 @@ def github_overwrite_release_assets(self, tag_name, 
target_commitish,
 
         # remove the whole release if it already exists
         try:
-            release = repo.release_from_tag(tag_name)
-        except github3.exceptions.NotFoundError:
-            pass
-        else:
-            release.delete()
-
-        release = repo.create_release(tag_name, target_commitish)
+            release = repo.get_release(tag_name)
+            release.delete_release()
+        except GithubException as e:
+            if e.status != 404:
+                raise

Review Comment:
   Same issue as `github_release`: `repo.get_release(tag_name)` is likely 
treating `tag_name` as a release ID rather than a tag, so existing releases may 
never be deleted (and you may silently proceed to create a duplicate or fail 
later). Use a tag-based release lookup (e.g., the explicit tags endpoint) 
before attempting deletion.



##########
dev/archery/archery/crossbow/core.py:
##########
@@ -448,110 +447,85 @@ def file_contents(self, commit_id, file):
         blob = self.repo[entry.id]
         return blob.data
 
-    def _github_login(self, github_token):
-        """Returns a logged in github3.GitHub instance"""
-        if not _have_github3:
-            raise ImportError('Must install github3.py')
+    def _github_login(self, github_token=None):
+        """Returns a logged in Github instance using PyGithub"""
+        if not _have_github:
+            raise ImportError('Must install PyGithub')
         github_token = github_token or self.github_token
-        session = github3.session.GitHubSession(
-            default_connect_timeout=10,
-            default_read_timeout=30
-        )
-        github = github3.GitHub(session=session)
-        github.login(token=github_token)
-        return github
+        return Github(auth=GithubAuth.Token(github_token), timeout=30)
 
     def as_github_repo(self, github_token=None):
         """Converts it to a repository object which wraps the GitHub API"""
         if self._github_repo is None:
             github = self._github_login(github_token)
             username, reponame = _parse_github_user_repo(self.remote_url)
-            self._github_repo = github.repository(username, reponame)
+            self._github_repo = github.get_repo(f"{username}/{reponame}")
         return self._github_repo
 
     def token_expiration_date(self, github_token=None):
         """Returns the expiration date for the github_token provided"""
         github = self._github_login(github_token)
-        # github3 hides the headers from us. Use the _get method
-        # to access the response headers.
-        resp = github._get(github.session.base_url)
-        # Response in the form '2023-01-23 10:40:28 UTC'
-        date_string = resp.headers.get(
-            'github-authentication-token-expiration')
+        # PyGithub doesn't expose the token expiration header through a
+        # dedicated API, so request it via the public Requester escape hatch.
+        headers, _ = github.requester.requestJsonAndCheck("GET", "/user")
+        # Response header in the form '2023-01-23 10:40:28 UTC'
+        date_string = headers.get('github-authentication-token-expiration')
         if date_string:
             return date.fromisoformat(date_string.split()[0])
+        return None
 
     def github_commit(self, sha):
         repo = self.as_github_repo()
-        return repo.commit(sha)
+        return repo.get_commit(sha)
 
     def github_release(self, tag):
         repo = self.as_github_repo()
         try:
-            return repo.release_from_tag(tag)
-        except github3.exceptions.NotFoundError:
-            return None
-
-    def github_upload_asset_requests(self, release, path, name, mime,
-                                     max_retries=None, retry_backoff=None):
+            return repo.get_release(tag)
+        except GithubException as e:
+            if e.status == 404:
+                return None
+            raise

Review Comment:
   PyGithub’s `Repository.get_release(...)` is (historically) the 'get release 
by numeric release_id' API; passing a tag string here can lead to calling the 
wrong endpoint and always returning 404 even when a tagged release exists. To 
reliably fetch by tag, use the dedicated 'releases/tags/{tag}' API (either via 
a PyGithub-provided helper if available in your supported version, or via the 
Requester escape hatch as you already do in `token_expiration_date`).



##########
dev/archery/archery/crossbow/core.py:
##########
@@ -448,110 +447,85 @@ def file_contents(self, commit_id, file):
         blob = self.repo[entry.id]
         return blob.data
 
-    def _github_login(self, github_token):
-        """Returns a logged in github3.GitHub instance"""
-        if not _have_github3:
-            raise ImportError('Must install github3.py')
+    def _github_login(self, github_token=None):
+        """Returns a logged in Github instance using PyGithub"""
+        if not _have_github:
+            raise ImportError('Must install PyGithub')
         github_token = github_token or self.github_token
-        session = github3.session.GitHubSession(
-            default_connect_timeout=10,
-            default_read_timeout=30
-        )
-        github = github3.GitHub(session=session)
-        github.login(token=github_token)
-        return github
+        return Github(auth=GithubAuth.Token(github_token), timeout=30)
 
     def as_github_repo(self, github_token=None):
         """Converts it to a repository object which wraps the GitHub API"""
         if self._github_repo is None:
             github = self._github_login(github_token)
             username, reponame = _parse_github_user_repo(self.remote_url)
-            self._github_repo = github.repository(username, reponame)
+            self._github_repo = github.get_repo(f"{username}/{reponame}")
         return self._github_repo
 
     def token_expiration_date(self, github_token=None):
         """Returns the expiration date for the github_token provided"""
         github = self._github_login(github_token)
-        # github3 hides the headers from us. Use the _get method
-        # to access the response headers.
-        resp = github._get(github.session.base_url)
-        # Response in the form '2023-01-23 10:40:28 UTC'
-        date_string = resp.headers.get(
-            'github-authentication-token-expiration')
+        # PyGithub doesn't expose the token expiration header through a
+        # dedicated API, so request it via the public Requester escape hatch.
+        headers, _ = github.requester.requestJsonAndCheck("GET", "/user")
+        # Response header in the form '2023-01-23 10:40:28 UTC'
+        date_string = headers.get('github-authentication-token-expiration')
         if date_string:
             return date.fromisoformat(date_string.split()[0])
+        return None
 
     def github_commit(self, sha):
         repo = self.as_github_repo()
-        return repo.commit(sha)
+        return repo.get_commit(sha)
 
     def github_release(self, tag):
         repo = self.as_github_repo()
         try:
-            return repo.release_from_tag(tag)
-        except github3.exceptions.NotFoundError:
-            return None
-
-    def github_upload_asset_requests(self, release, path, name, mime,
-                                     max_retries=None, retry_backoff=None):
+            return repo.get_release(tag)
+        except GithubException as e:
+            if e.status == 404:
+                return None
+            raise
+
+    def github_upload_asset(self, release, path, name, mime,
+                            max_retries=None, retry_backoff=None):
         if max_retries is None:
             max_retries = int(os.environ.get('CROSSBOW_MAX_RETRIES', 8))
         if retry_backoff is None:
             retry_backoff = int(os.environ.get('CROSSBOW_RETRY_BACKOFF', 5))
 
         for i in range(max_retries):
             try:
-                with open(path, 'rb') as fp:
-                    result = release.upload_asset(name=name, asset=fp,
-                                                  content_type=mime)
-            except github3.exceptions.ResponseError as e:
+                result = release.upload_asset(path, name=name,
+                                              content_type=mime)
+                logger.info(f"Attempt {i + 1} has finished.")
+                return result
+            except GithubException as e:
                 logger.error(f"Attempt {i + 1} has failed with message: {e}.")
-                logger.error(f"Error message {e.msg}")
-                logger.error("List of errors provided by GitHub:")
-                for err in e.errors:
-                    logger.error(f" - {err}")
+                if hasattr(e, 'data'):
+                    logger.error(f"Error data: {e.data}")
 
-                if e.code == 422:
+                if e.status == 422:
                     # 422 Validation Failed, probably raised because
                     # ReleaseAsset already exists, so try to remove it before
                     # reattempting the asset upload
-                    for asset in release.assets():
+                    for asset in release.get_assets():
                         if asset.name == name:
                             logger.info(f"Release asset {name} already exists, 
"
                                         "removing it...")
-                            asset.delete()
+                            asset.delete_asset()
                             logger.info(f"Asset {name} removed.")
                             break
-            except github3.exceptions.ConnectionError as e:
+            except IOError as e:
+                # Catch network and file I/O errors (includes requests 
exceptions)
                 logger.error(f"Attempt {i + 1} has failed with message: {e}.")

Review Comment:
   Catching `IOError` will not catch `requests` transport errors (typically 
`requests.exceptions.RequestException`), so transient network failures may 
bypass the retry loop. Update the exception handling to catch the relevant 
request/network exception types (and keep `OSError`/`IOError` for file-related 
problems) so retries behave as intended; also adjust the comment accordingly.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to