Copilot commented on code in PR #48886:
URL: https://github.com/apache/arrow/pull/48886#discussion_r3393494270


##########
dev/archery/archery/crossbow/core.py:
##########
@@ -560,13 +537,14 @@ def github_overwrite_release_assets(self, tag_name, 
target_commitish,
 
         # remove the whole release if it already exists
         try:
-            release = repo.release_from_tag(tag_name)
-        except github3.exceptions.NotFoundError:
-            pass
-        else:
-            release.delete()
-
-        release = repo.create_release(tag_name, target_commitish)
+            release = repo.get_release(tag_name)
+            release.delete_release()
+        except GithubException as e:
+            if e.status != 404:
+                raise
+
+        release = repo.create_git_release(tag_name, tag_name, "",
+                                          target_commitish=target_commitish)

Review Comment:
   `repo.get_release(tag_name)` has the same issue as above: it expects a 
release ID, not a tag. This will prevent overwriting assets for an existing 
tag. Switch to `get_release_by_tag(tag_name)` (and keep the 404 handling) so 
the delete-and-recreate flow works against the real API.



##########
dev/archery/archery/crossbow/core.py:
##########
@@ -448,110 +447,88 @@ def file_contents(self, commit_id, file):
         blob = self.repo[entry.id]
         return blob.data
 
-    def _github_login(self, github_token):
-        """Returns a logged in github3.GitHub instance"""
-        if not _have_github3:
-            raise ImportError('Must install github3.py')
+    def _github_login(self, github_token=None):
+        """Returns a logged in Github instance using PyGithub"""
+        if not _have_github:
+            raise ImportError('Must install PyGithub')
         github_token = github_token or self.github_token
-        session = github3.session.GitHubSession(
-            default_connect_timeout=10,
-            default_read_timeout=30
-        )
-        github = github3.GitHub(session=session)
-        github.login(token=github_token)
-        return github
+        return Github(auth=GithubAuth.Token(github_token), timeout=30)
 
     def as_github_repo(self, github_token=None):
         """Converts it to a repository object which wraps the GitHub API"""
         if self._github_repo is None:
             github = self._github_login(github_token)
             username, reponame = _parse_github_user_repo(self.remote_url)
-            self._github_repo = github.repository(username, reponame)
+            self._github_repo = github.get_repo(f"{username}/{reponame}")
         return self._github_repo
 
     def token_expiration_date(self, github_token=None):
         """Returns the expiration date for the github_token provided"""
         github = self._github_login(github_token)
-        # github3 hides the headers from us. Use the _get method
-        # to access the response headers.
-        resp = github._get(github.session.base_url)
-        # Response in the form '2023-01-23 10:40:28 UTC'
-        date_string = resp.headers.get(
-            'github-authentication-token-expiration')
+        # NOTE: We access the private _Github__requester to get response
+        # headers, as PyGithub doesn't expose the token expiration header
+        # through its public API. This may break with future PyGithub updates.
+        headers, _ = github._Github__requester.requestJsonAndCheck(
+            "GET", "/user"
+        )
+        # Response header in the form '2023-01-23 10:40:28 UTC'
+        date_string = headers.get('github-authentication-token-expiration')
         if date_string:
             return date.fromisoformat(date_string.split()[0])
+        return None
 
     def github_commit(self, sha):
         repo = self.as_github_repo()
-        return repo.commit(sha)
+        return repo.get_commit(sha)
 
     def github_release(self, tag):
         repo = self.as_github_repo()
         try:
-            return repo.release_from_tag(tag)
-        except github3.exceptions.NotFoundError:
-            return None
-
-    def github_upload_asset_requests(self, release, path, name, mime,
-                                     max_retries=None, retry_backoff=None):
+            return repo.get_release(tag)
+        except GithubException as e:
+            if e.status == 404:
+                return None
+            raise

Review Comment:
   `Repository.get_release()` in PyGithub retrieves a release by numeric 
release ID, not by tag name. Passing a tag here will fail at runtime for real 
GitHub calls. Use the PyGithub API that fetches releases by tag (e.g., 
`repo.get_release_by_tag(tag)`), and update callers/tests accordingly.



##########
dev/archery/setup.py:
##########
@@ -29,10 +29,8 @@
 
 extras = {
     'benchmark': ['pandas'],
-    'crossbow': ['github3.py', jinja_req, 'pygit2>=1.14.0', 'requests',
+    'crossbow': ['pygithub', jinja_req, 'pygit2>=1.14.0', 'requests',
                  'ruamel.yaml', 'setuptools_scm>=8.0.0'],

Review Comment:
   The canonical PyPI project name is `PyGithub`. While pip normalizes names 
and will usually resolve `pygithub`, using the canonical name improves clarity 
and reduces ambiguity (especially for lockfiles/constraints tooling). Consider 
changing the requirement string to `PyGithub`.



##########
dev/archery/archery/crossbow/core.py:
##########
@@ -448,110 +447,88 @@ def file_contents(self, commit_id, file):
         blob = self.repo[entry.id]
         return blob.data
 
-    def _github_login(self, github_token):
-        """Returns a logged in github3.GitHub instance"""
-        if not _have_github3:
-            raise ImportError('Must install github3.py')
+    def _github_login(self, github_token=None):
+        """Returns a logged in Github instance using PyGithub"""
+        if not _have_github:
+            raise ImportError('Must install PyGithub')
         github_token = github_token or self.github_token
-        session = github3.session.GitHubSession(
-            default_connect_timeout=10,
-            default_read_timeout=30
-        )
-        github = github3.GitHub(session=session)
-        github.login(token=github_token)
-        return github
+        return Github(auth=GithubAuth.Token(github_token), timeout=30)
 
     def as_github_repo(self, github_token=None):
         """Converts it to a repository object which wraps the GitHub API"""
         if self._github_repo is None:
             github = self._github_login(github_token)
             username, reponame = _parse_github_user_repo(self.remote_url)
-            self._github_repo = github.repository(username, reponame)
+            self._github_repo = github.get_repo(f"{username}/{reponame}")
         return self._github_repo
 
     def token_expiration_date(self, github_token=None):
         """Returns the expiration date for the github_token provided"""
         github = self._github_login(github_token)
-        # github3 hides the headers from us. Use the _get method
-        # to access the response headers.
-        resp = github._get(github.session.base_url)
-        # Response in the form '2023-01-23 10:40:28 UTC'
-        date_string = resp.headers.get(
-            'github-authentication-token-expiration')
+        # NOTE: We access the private _Github__requester to get response
+        # headers, as PyGithub doesn't expose the token expiration header
+        # through its public API. This may break with future PyGithub updates.
+        headers, _ = github._Github__requester.requestJsonAndCheck(
+            "GET", "/user"
+        )
+        # Response header in the form '2023-01-23 10:40:28 UTC'
+        date_string = headers.get('github-authentication-token-expiration')
         if date_string:
             return date.fromisoformat(date_string.split()[0])
+        return None
 
     def github_commit(self, sha):
         repo = self.as_github_repo()
-        return repo.commit(sha)
+        return repo.get_commit(sha)
 
     def github_release(self, tag):
         repo = self.as_github_repo()
         try:
-            return repo.release_from_tag(tag)
-        except github3.exceptions.NotFoundError:
-            return None
-
-    def github_upload_asset_requests(self, release, path, name, mime,
-                                     max_retries=None, retry_backoff=None):
+            return repo.get_release(tag)
+        except GithubException as e:
+            if e.status == 404:
+                return None
+            raise
+
+    def github_upload_asset(self, release, path, name, mime,
+                            max_retries=None, retry_backoff=None):
         if max_retries is None:
             max_retries = int(os.environ.get('CROSSBOW_MAX_RETRIES', 8))
         if retry_backoff is None:
             retry_backoff = int(os.environ.get('CROSSBOW_RETRY_BACKOFF', 5))
 
         for i in range(max_retries):
             try:
-                with open(path, 'rb') as fp:
-                    result = release.upload_asset(name=name, asset=fp,
-                                                  content_type=mime)
-            except github3.exceptions.ResponseError as e:
+                result = release.upload_asset(path, label=name,
+                                              content_type=mime)
+                logger.info(f"Attempt {i + 1} has finished.")
+                return result
+            except GithubException as e:
                 logger.error(f"Attempt {i + 1} has failed with message: {e}.")
-                logger.error(f"Error message {e.msg}")
-                logger.error("List of errors provided by GitHub:")
-                for err in e.errors:
-                    logger.error(f" - {err}")
+                if hasattr(e, 'data'):
+                    logger.error(f"Error data: {e.data}")
 
-                if e.code == 422:
+                if e.status == 422:
                     # 422 Validation Failed, probably raised because
                     # ReleaseAsset already exists, so try to remove it before
                     # reattempting the asset upload
-                    for asset in release.assets():
+                    for asset in release.get_assets():
                         if asset.name == name:
                             logger.info(f"Release asset {name} already exists, 
"
                                         "removing it...")
-                            asset.delete()
+                            asset.delete_asset()
                             logger.info(f"Asset {name} removed.")
                             break
-            except github3.exceptions.ConnectionError as e:
+            except IOError as e:
+                # Catch network and file I/O errors (includes requests 
exceptions)
                 logger.error(f"Attempt {i + 1} has failed with message: {e}.")

Review Comment:
   Catching `IOError` will not reliably catch network failures from 
PyGithub/requests (those are typically `requests.exceptions.RequestException` 
or lower-level connection/timeouts). The comment is also incorrect as written. 
Consider catching the appropriate requests exception types (and potentially 
`OSError` for local file issues) so transient network errors actually trigger 
retries.



##########
dev/archery/archery/crossbow/core.py:
##########
@@ -448,110 +447,88 @@ def file_contents(self, commit_id, file):
         blob = self.repo[entry.id]
         return blob.data
 
-    def _github_login(self, github_token):
-        """Returns a logged in github3.GitHub instance"""
-        if not _have_github3:
-            raise ImportError('Must install github3.py')
+    def _github_login(self, github_token=None):
+        """Returns a logged in Github instance using PyGithub"""
+        if not _have_github:
+            raise ImportError('Must install PyGithub')
         github_token = github_token or self.github_token
-        session = github3.session.GitHubSession(
-            default_connect_timeout=10,
-            default_read_timeout=30
-        )
-        github = github3.GitHub(session=session)
-        github.login(token=github_token)
-        return github
+        return Github(auth=GithubAuth.Token(github_token), timeout=30)
 
     def as_github_repo(self, github_token=None):
         """Converts it to a repository object which wraps the GitHub API"""
         if self._github_repo is None:
             github = self._github_login(github_token)
             username, reponame = _parse_github_user_repo(self.remote_url)
-            self._github_repo = github.repository(username, reponame)
+            self._github_repo = github.get_repo(f"{username}/{reponame}")
         return self._github_repo
 
     def token_expiration_date(self, github_token=None):
         """Returns the expiration date for the github_token provided"""
         github = self._github_login(github_token)
-        # github3 hides the headers from us. Use the _get method
-        # to access the response headers.
-        resp = github._get(github.session.base_url)
-        # Response in the form '2023-01-23 10:40:28 UTC'
-        date_string = resp.headers.get(
-            'github-authentication-token-expiration')
+        # NOTE: We access the private _Github__requester to get response
+        # headers, as PyGithub doesn't expose the token expiration header
+        # through its public API. This may break with future PyGithub updates.
+        headers, _ = github._Github__requester.requestJsonAndCheck(
+            "GET", "/user"
+        )
+        # Response header in the form '2023-01-23 10:40:28 UTC'
+        date_string = headers.get('github-authentication-token-expiration')
         if date_string:
             return date.fromisoformat(date_string.split()[0])
+        return None
 
     def github_commit(self, sha):
         repo = self.as_github_repo()
-        return repo.commit(sha)
+        return repo.get_commit(sha)
 
     def github_release(self, tag):
         repo = self.as_github_repo()
         try:
-            return repo.release_from_tag(tag)
-        except github3.exceptions.NotFoundError:
-            return None
-
-    def github_upload_asset_requests(self, release, path, name, mime,
-                                     max_retries=None, retry_backoff=None):
+            return repo.get_release(tag)
+        except GithubException as e:
+            if e.status == 404:
+                return None
+            raise
+
+    def github_upload_asset(self, release, path, name, mime,
+                            max_retries=None, retry_backoff=None):
         if max_retries is None:
             max_retries = int(os.environ.get('CROSSBOW_MAX_RETRIES', 8))
         if retry_backoff is None:
             retry_backoff = int(os.environ.get('CROSSBOW_RETRY_BACKOFF', 5))
 
         for i in range(max_retries):
             try:
-                with open(path, 'rb') as fp:
-                    result = release.upload_asset(name=name, asset=fp,
-                                                  content_type=mime)
-            except github3.exceptions.ResponseError as e:
+                result = release.upload_asset(path, label=name,
+                                              content_type=mime)
+                logger.info(f"Attempt {i + 1} has finished.")
+                return result

Review Comment:
   In PyGithub, `label` is the asset label/description; it does not control the 
uploaded asset filename. If the intent is to force the asset name to `name`, 
pass it via the `name` parameter (and optionally set `label` separately). As 
written, uploads may end up being named after `os.path.basename(path)` instead 
of the `name` argument.



##########
dev/archery/archery/crossbow/tests/test_core.py:
##########
@@ -93,3 +98,295 @@ def test_latest_for_prefix(request):
             queue.latest_for_prefix("nightly-packaging")
             mocked_get.assert_called_once_with(
                 "nightly-packaging-2022-04-11-0")
+
+
+def test_github_release_not_found():
+    """Test github_release returns None for 404."""
+    with mock.patch.object(Repo, 'as_github_repo') as mock_repo:
+        mock_repo.return_value.get_release.side_effect = GithubException(
+            404, {'message': 'Not Found'}, None
+        )
+        repo = Repo('/tmp/test', github_token='test_token')
+        result = repo.github_release('nonexistent')
+        assert result is None
+
+
+def test_github_pr_create():
+    """Test creating a pull request via PyGithub."""
+    with mock.patch.object(Repo, 'as_github_repo') as mock_repo:
+        with mock.patch.object(Repo, 'default_branch_name', 'main'):
+            mock_pr = mock.MagicMock()

Review Comment:
   `default_branch_name` is a `@property`. Patching it with a plain string 
works by shadowing the descriptor at the class level, but it can be brittle and 
surprising. Prefer patching with `new_callable=mock.PropertyMock` (or patching 
the instance attribute) to make the test intent explicit and avoid side effects 
if the property behavior changes.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to