commit:     c1f105c44cd43f073d892ea06c371b6e0a6fe28d
Author:     Arthur Zamarin <arthurzam <AT> gentoo <DOT> org>
AuthorDate: Fri Oct 28 19:25:34 2022 +0000
Commit:     Arthur Zamarin <arthurzam <AT> gentoo <DOT> org>
CommitDate: Sat Oct 29 15:10:59 2022 +0000
URL:        
https://gitweb.gentoo.org/proj/pkgcore/pkgcheck.git/commit/?id=c1f105c4

MissingRemoteIdCheck: check for missing remote-id

Scans HOMEPAGE and SRC_URI for uris matching regexes using which it
extracts remote-id. Skips already defined remote-id types. Skips URIS
that end with ".diff" or ".patch". Prefers to take remote-id from newer
package versions, in case URL updated.

Resolves: https://github.com/pkgcore/pkgcheck/issues/475
Signed-off-by: Arthur Zamarin <arthurzam <AT> gentoo.org>

 src/pkgcheck/checks/metadata_xml.py                | 78 ++++++++++++++++++++++
 .../MissingRemoteId/expected.json                  |  4 ++
 .../MissingRemoteIdCheck/MissingRemoteId/fix.patch | 13 ++++
 .../StableKeywordsOnTestingEapi-0.ebuild           |  2 +-
 .../StableKeywordsOnTestingEapi-1.ebuild           |  2 +-
 .../MissingRemoteIdCheck/MissingRemoteId/Manifest  |  8 +++
 .../MissingRemoteId/MissingRemoteId-0.ebuild       | 16 +++++
 .../MissingRemoteId/MissingRemoteId-1.ebuild       | 10 +++
 .../MissingRemoteId/MissingRemoteId-2.ebuild       |  7 ++
 .../MissingRemoteId/MissingRemoteId-3.ebuild       |  9 +++
 .../MissingRemoteId/MissingRemoteId-4.ebuild       | 11 +++
 .../MissingRemoteId/metadata.xml                   | 10 +++
 .../repos/eapis-testing/profiles/thirdpartymirrors |  2 +
 tests/scripts/test_pkgcheck_scan.py                |  4 +-
 14 files changed, 172 insertions(+), 4 deletions(-)

diff --git a/src/pkgcheck/checks/metadata_xml.py 
b/src/pkgcheck/checks/metadata_xml.py
index 67f0f42c..56dd2c94 100644
--- a/src/pkgcheck/checks/metadata_xml.py
+++ b/src/pkgcheck/checks/metadata_xml.py
@@ -1,11 +1,15 @@
 import os
 import re
 from difflib import SequenceMatcher
+from itertools import chain
 
 from lxml import etree
 from pkgcore import const as pkgcore_const
 from pkgcore.ebuild.atom import MalformedAtom, atom
+from pkgcore.restrictions.packages import Conditional
+from pkgcore.fetch import fetchable
 from snakeoil.osutils import pjoin
+from snakeoil.sequences import iflatten_instance
 from snakeoil.strings import pluralism
 
 from .. import results, sources
@@ -553,3 +557,77 @@ class CategoryMetadataXmlCheck(_XmlBaseCheck):
     def _get_xml_location(self, pkg):
         """Return the metadata.xml location for a given package's category."""
         return pjoin(self.repo_base, pkg.category, 'metadata.xml')
+
+
+class MissingRemoteId(results.PackageResult, results.Info):
+    """Missing remote-id which was inferred from ebuilds.
+
+    Based on URIs found in SRC_URI and HOMEPAGE, a remote-id can be suggested.
+    If a remote-id of same type is already defined in ``metadata.xml``, the
+    suggestion won't be reported. It ignores URIs ending with ``.diff`` or
+    ``.patch``, as they might point to a fork or developer's space. It also
+    ignores URIs that are conditional on USE flags.
+    """
+
+    def __init__(self, remote_type: str, value: str, uri: str, **kwarg):
+        super().__init__(**kwarg)
+        self.remote_type = remote_type
+        self.value = value
+        self.uri = uri
+
+    @property
+    def desc(self):
+        return (f'missing remote-id of type {self.remote_type!r} with '
+            f'value {self.value!r} (inferred from URI {self.uri!r})')
+
+
+class MissingRemoteIdCheck(Check):
+    """Detect missing remote-ids based on SRC_URI and HOMEPAGE."""
+
+    _source = sources.PackageRepoSource
+    known_results = frozenset([MissingRemoteId])
+
+    _gitlab_match = r'(?P<value>(\w[^/]*/)*\w[^/]*/\w[^/]*)'
+
+    remotes_map = (
+        ('bitbucket', r'https://bitbucket.org/(?P<value>[^/]+/[^/]+)'),
+        ('freedesktop-gitlab', 
rf'https://gitlab.freedesktop.org/{_gitlab_match}'),
+        ('github', r'https://github.com/(?P<value>[^/]+/[^/]+)'),
+        ('gitlab', rf'https://gitlab.com/{_gitlab_match}'),
+        ('gnome-gitlab', rf'https://gitlab.gnome.org/{_gitlab_match}'),
+        ('heptapod', rf'https://foss.heptapod.net/{_gitlab_match}'),
+        ('launchpad', r'https://launchpad.net/(?P<value>[^/]+)'),
+        ('pypi', r'https://pypi.org/project/(?P<value>[^/]+)'),
+        ('pypi', 
r'https://files.pythonhosted.org/packages/source/\S/(?P<value>[^/]+)'),
+        ('savannah', r'https://savannah.gnu.org/projects/(?P<value>[^/]+)'),
+        ('savannah-nongnu', 
r'https://savannah.nongnu.org/projects/(?P<value>[^/]+)'),
+        ('sourceforge', r'https://(?P<value>[^/]+).sourceforge.net/'),
+        ('sourceforge', r'https://sourceforge.net/projects/(?P<value>[^/]+)'),
+        ('sourceforge', 
r'https://downloads.sourceforge.net/(?:project/)?(?P<value>[^/]+)'),
+        ('sourcehut', r'https://sr.ht/(?P<value>[^/]+/[^/]+)'),
+    )
+
+    def __init__(self, options, **kwargs):
+        super().__init__(options, **kwargs)
+        self.remotes_map = tuple((remote_type, re.compile(regex)) for 
remote_type, regex in self.remotes_map)
+
+    def feed(self, pkgset):
+        remotes = {u.type: (None, None) for u in pkgset[0].upstreams}
+        for pkg in sorted(pkgset, reverse=True):
+            fetchables = 
iflatten_instance(pkg.generate_fetchables(allow_missing_checksums=True,
+                ignore_unknown_mirrors=True, skip_default_mirrors=True), 
(fetchable, Conditional))
+            all_urls = set(chain.from_iterable(f.uri for f in fetchables if 
isinstance(f, fetchable)))
+            urls = {url for url in all_urls if not url.endswith(('.patch', 
'.diff'))}
+            urls = sorted(urls.union(pkg.homepage), key=len)
+
+            for remote_type, regex in self.remotes_map:
+                if remote_type in remotes:
+                    continue
+                for url in urls:
+                    if mo := regex.match(url):
+                        remotes[remote_type] = (mo.group('value'), url)
+                        break
+
+        for remote_type, (value, url) in remotes.items():
+            if value is not None:
+                yield MissingRemoteId(remote_type, value, url, pkg=pkgset[0])

diff --git 
a/testdata/data/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/expected.json
 
b/testdata/data/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/expected.json
new file mode 100644
index 00000000..ab5ae2d4
--- /dev/null
+++ 
b/testdata/data/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/expected.json
@@ -0,0 +1,4 @@
+{"__class__": "MissingRemoteId", "category": "MissingRemoteIdCheck", 
"package": "MissingRemoteId", "remote_type": "gitlab", "value": 
"pkgcore/pkgcheck/extra/MissingRemoteId", "uri": 
"https://gitlab.com/pkgcore/pkgcheck/extra/MissingRemoteId/-/archive/1/MissingRemoteId-1.tar.bz2"}
+{"__class__": "MissingRemoteId", "category": "MissingRemoteIdCheck", 
"package": "MissingRemoteId", "remote_type": "heptapod", "value": 
"pkgcore/pkgcore", "uri": 
"https://foss.heptapod.net/pkgcore/pkgcore/-/archive/4/MissingRemoteId-4.tar.bz2"}
+{"__class__": "MissingRemoteId", "category": "MissingRemoteIdCheck", 
"package": "MissingRemoteId", "remote_type": "pypi", "value": 
"MissingRemoteId", "uri": 
"https://files.pythonhosted.org/packages/source/M/MissingRemoteId/MissingRemoteId-1.tar.gz"}
+{"__class__": "MissingRemoteId", "category": "MissingRemoteIdCheck", 
"package": "MissingRemoteId", "remote_type": "sourceforge", "value": 
"pkgcheck", "uri": "https://pkgcheck.sourceforge.net/"}

diff --git 
a/testdata/data/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/fix.patch
 
b/testdata/data/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/fix.patch
new file mode 100644
index 00000000..035f9ad4
--- /dev/null
+++ 
b/testdata/data/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/fix.patch
@@ -0,0 +1,13 @@
+--- eapis-testing/MissingRemoteIdCheck/MissingRemoteId/metadata.xml
++++ fixed/MissingRemoteIdCheck/MissingRemoteId/metadata.xml
+@@ -3,6 +3,10 @@
+ <pkgmetadata>
+       <upstream>
+               <remote-id type="bitbucket">pkgcore/pkgcheck</remote-id>
++              <remote-id 
type="gitlab">pkgcore/pkgcheck/extra/MissingRemoteId</remote-id>
++              <remote-id type="heptapod">pkgcore/pkgcheck</remote-id>
++              <remote-id type="pypi">MissingRemoteId</remote-id>
++              <remote-id type="sourceforge">pkgcheck</remote-id>
+       </upstream>
+       <use>
+               <flag name="test">enable tests</flag>

diff --git 
a/testdata/repos/eapis-testing/EapiCheck/StableKeywordsOnTestingEapi/StableKeywordsOnTestingEapi-0.ebuild
 
b/testdata/repos/eapis-testing/EapiCheck/StableKeywordsOnTestingEapi/StableKeywordsOnTestingEapi-0.ebuild
index cd015c21..dddf98ef 100644
--- 
a/testdata/repos/eapis-testing/EapiCheck/StableKeywordsOnTestingEapi/StableKeywordsOnTestingEapi-0.ebuild
+++ 
b/testdata/repos/eapis-testing/EapiCheck/StableKeywordsOnTestingEapi/StableKeywordsOnTestingEapi-0.ebuild
@@ -1,7 +1,7 @@
 EAPI=7
 
 DESCRIPTION="Ebuild with stable keywords on EAPI marked as stable"
-HOMEPAGE="https://github.com/pkgcore/pkgcheck";
+HOMEPAGE="https://pkgcore.github.io/pkgcheck/";
 SRC_URI=""
 
 LICENSE="BSD"

diff --git 
a/testdata/repos/eapis-testing/EapiCheck/StableKeywordsOnTestingEapi/StableKeywordsOnTestingEapi-1.ebuild
 
b/testdata/repos/eapis-testing/EapiCheck/StableKeywordsOnTestingEapi/StableKeywordsOnTestingEapi-1.ebuild
index 5ae1a8d9..40751969 100644
--- 
a/testdata/repos/eapis-testing/EapiCheck/StableKeywordsOnTestingEapi/StableKeywordsOnTestingEapi-1.ebuild
+++ 
b/testdata/repos/eapis-testing/EapiCheck/StableKeywordsOnTestingEapi/StableKeywordsOnTestingEapi-1.ebuild
@@ -1,7 +1,7 @@
 EAPI=8
 
 DESCRIPTION="Ebuild with stable keywords on EAPI marked as testing"
-HOMEPAGE="https://github.com/pkgcore/pkgcheck";
+HOMEPAGE="https://pkgcore.github.io/pkgcheck/";
 SRC_URI=""
 
 LICENSE="BSD"

diff --git 
a/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/Manifest 
b/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/Manifest
new file mode 100644
index 00000000..112fbef4
--- /dev/null
+++ b/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/Manifest
@@ -0,0 +1,8 @@
+DIST MissingRemoteId-0.gh.tar.gz 474194 BLAKE2B 
8abccad85b39a1c3125ba6685f568892e395316138460b7c6d09ce781f4565f66e812a730e1ab5ae7c9bf83bd3bb9e088623036ae12a9f17f54f8fc31b019ecd
 SHA512 
be433a54fa5cf4c421e74af0d8556ce4f666089260aaecc280f53e4cafbdea2b97735e25d22af0c44928b0cba57ce19aaa7d3a25e23ac0d87624e596ddb27e34
+DIST MissingRemoteId-1-extra.tar.bz2 471942 BLAKE2B 
534a32f107067f4237da2cc396d65f42d81f6b8b3bf09abbb84ffd6a33542374d66a15047bb3428c12e4debc771a5661ba3db42c8b7009914815d6c137673b4b
 SHA512 
078310c9cc0154f1ffec6c6c25b3d301a644cc2163cc1dadb24a5f77a4c3d89583dfc7f44972078d59393cdac028bfeb3b481ab03fe9f5a3ebfd6bb0f5ba3a73
+DIST MissingRemoteId-1.tar.gz 497163 BLAKE2B 
600b51c5c034356fcfbbbb741681e7e304ccf14b9390207305d35c48dff7675e808101fa76f6a4ce250691b2fd99deae7d59cc91560d609c1a2c3d7421859849
 SHA512 
3539d877b63e739877f79340ff59c3592f43ac9e00b507ab63d7afc2bb3eaa32f863e7cc8029c5a05bcf7068de70fd3149447054f0d9d304d4e3548a0a25d676
+DIST MissingRemoteId-2.tar.gz 467493 BLAKE2B 
ecfc1e435397f52cc174724b08e806ee92b49438d05f28fb9a698d2a5778b8fae3a5aa8cfa574512725d34a364774722ddb38689d7db13cd2fcfccc7451f69f1
 SHA512 
15da0fb2c3615739a8ebd268e1b7a924e24c43f1c7390a78793b8345480a7a23718f281616dbabc9510ac002005c27ec3c64743368593b9413198ae0718395ff
+DIST MissingRemoteId-3.tar.bz2 469597 BLAKE2B 
6f2234da65c9903dd1c7727e163f05965ebdbb303cfab162bc0070a79507981e2b88917e0d3eaab20d352c6f369d71acba195a8cc0c5f6ae28252b1e369658c7
 SHA512 
a40641bf6dba77b6b98e20d95ff6f92a48a500f737e55ef1d500c4469bb8eb489af83c84264731dac0654caadbd520105ef85d669fe01930f507764750f44f35
+DIST MissingRemoteId-4.tar.bz2 467403 BLAKE2B 
e31bdf219d4aa01c2255f1a6b29e88457b9ea7bb078c5eb797e100662e0635b6eb28d15c411aaa3e676fd0a8fb929f9e50018bbcaa1856999469e2b06ad2b43c
 SHA512 
b5d3978acce3b03f2b46fc2e7bf5e61185bfb745835a375d7d1d76bb210ef7f661e0a938e1510a1f9b815d9fe832f318f2f23d23040a21782a2502a473c561fd
+DIST MissingRemoteId-486.diff 4480 BLAKE2B 
9fd62254dd8a4f0cbbaaf7edde1eea013e6b06cacf90b605cda8fea8afac678a13450854a5a8b0a37e730675e31229edb2be1d30756e87ee0de3af5334ba74b5
 SHA512 
ba45d0eb7bb47eaddf950ff535e1cf448bfcf5528615efc214c71c8489a0578871763e6909327f2d86784905cfe78d15441991615c59c97e887c3a71fce80094
+DIST MissingRemoteId-486.patch 14111416 BLAKE2B 
fd3ac79ffe3ca031d40a6523a4ae900200c1b1eea0369fd1eec3233f852af8edffd81ca6c0bba628132221346d283d2138794e1f9697074fc1044944ad852e5b
 SHA512 
a2cce0b1fd2267cc8dbd3e2ad3ec1f99526de3988443c0937405cc7b2eab9ef517001ef0b4b387e8721d3c33610cd642c6b7cf83217996a83481830de9713de7

diff --git 
a/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/MissingRemoteId-0.ebuild
 
b/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/MissingRemoteId-0.ebuild
new file mode 100644
index 00000000..5b605897
--- /dev/null
+++ 
b/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/MissingRemoteId-0.ebuild
@@ -0,0 +1,16 @@
+EAPI=7
+
+DESCRIPTION="Skip patch and diff urls, and urls behind use flags"
+HOMEPAGE="https://pkgcore.github.io/pkgcheck/";
+SRC_URI="
+       https://github.com/pkgcore/pkgcheck/pull/486.patch -> ${PN}-486.patch
+       https://github.com/pkgcore/pkgcheck/pull/486.diff -> ${PN}-486.diff
+       test? (
+               https://github.com/pkgcore/pkgcheck/archive/v${PV}.tar.gz
+                -> ${P}.gh.tar.gz
+       )
+"
+LICENSE="BSD"
+SLOT="0"
+IUSE="test"
+RESTRICT="!test? ( test )"

diff --git 
a/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/MissingRemoteId-1.ebuild
 
b/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/MissingRemoteId-1.ebuild
new file mode 100644
index 00000000..2a49ad82
--- /dev/null
+++ 
b/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/MissingRemoteId-1.ebuild
@@ -0,0 +1,10 @@
+EAPI=7
+
+DESCRIPTION="Use from mirror://pypi and gitlab archive"
+HOMEPAGE="https://pkgcore.github.io/pkgcheck/";
+SRC_URI="
+       mirror://pypi/${PN::1}/${PN}/${P}.tar.gz
+       
https://gitlab.com/pkgcore/pkgcheck/extra/${PN}/-/archive/${PV}/${P}.tar.bz2 -> 
${P}-extra.tar.bz2
+"
+LICENSE="BSD"
+SLOT="0"

diff --git 
a/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/MissingRemoteId-2.ebuild
 
b/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/MissingRemoteId-2.ebuild
new file mode 100644
index 00000000..ed226b8e
--- /dev/null
+++ 
b/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/MissingRemoteId-2.ebuild
@@ -0,0 +1,7 @@
+EAPI=7
+
+DESCRIPTION="Check homepage"
+HOMEPAGE="https://pkgcheck.sourceforge.net/";
+SRC_URI="mirror://sourceforge/${PN}/${P}.tar.gz"
+LICENSE="BSD"
+SLOT="0"

diff --git 
a/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/MissingRemoteId-3.ebuild
 
b/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/MissingRemoteId-3.ebuild
new file mode 100644
index 00000000..e9a20138
--- /dev/null
+++ 
b/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/MissingRemoteId-3.ebuild
@@ -0,0 +1,9 @@
+EAPI=7
+
+DESCRIPTION="Don't suggest where already value exists"
+HOMEPAGE="https://bitbucket.org/pkgcore/pkgcore";
+SRC_URI="
+       https://bitbucket.org/pkgcore/pkgdev/get/v${PV}.tar.bz2 -> ${P}.tar.bz2
+"
+LICENSE="BSD"
+SLOT="0"

diff --git 
a/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/MissingRemoteId-4.ebuild
 
b/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/MissingRemoteId-4.ebuild
new file mode 100644
index 00000000..970d959e
--- /dev/null
+++ 
b/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/MissingRemoteId-4.ebuild
@@ -0,0 +1,11 @@
+EAPI=7
+
+DESCRIPTION="Don't suggest where already value exists"
+HOMEPAGE="https://pkgcore.github.io/pkgcheck/";
+SRC_URI="
+       https://foss.heptapod.net/pkgcore/pkgcheck/-/archive/${PV}/${P}.tar.bz2
+       
https://foss.heptapod.net/pkgcore/pkgcheck/extra/-/archive/${PV}/${P}.tar.bz2
+       https://foss.heptapod.net/pkgcore/pkgcore/-/archive/${PV}/${P}.tar.bz2
+"
+LICENSE="BSD"
+SLOT="0"

diff --git 
a/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/metadata.xml
 
b/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/metadata.xml
new file mode 100644
index 00000000..2048587f
--- /dev/null
+++ 
b/testdata/repos/eapis-testing/MissingRemoteIdCheck/MissingRemoteId/metadata.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE pkgmetadata SYSTEM "https://www.gentoo.org/dtd/metadata.dtd";>
+<pkgmetadata>
+       <upstream>
+               <remote-id type="bitbucket">pkgcore/pkgcheck</remote-id>
+       </upstream>
+       <use>
+               <flag name="test">enable tests</flag>
+       </use>
+</pkgmetadata>

diff --git a/testdata/repos/eapis-testing/profiles/thirdpartymirrors 
b/testdata/repos/eapis-testing/profiles/thirdpartymirrors
new file mode 100644
index 00000000..15399f5c
--- /dev/null
+++ b/testdata/repos/eapis-testing/profiles/thirdpartymirrors
@@ -0,0 +1,2 @@
+pypi            https://files.pythonhosted.org/packages/source
+sourceforge     https://downloads.sourceforge.net

diff --git a/tests/scripts/test_pkgcheck_scan.py 
b/tests/scripts/test_pkgcheck_scan.py
index 8caed9f2..8b8113e7 100644
--- a/tests/scripts/test_pkgcheck_scan.py
+++ b/tests/scripts/test_pkgcheck_scan.py
@@ -406,9 +406,9 @@ class TestPkgcheckScan:
         # create stub profile to suppress ArchesWithoutProfiles result
         repo.create_profiles([Profile('stub', 'amd64')])
         # create ebuild with unknown keywords
-        repo.create_ebuild('cat/pkg-0', keywords=['unknown'])
+        repo.create_ebuild('cat/pkg-0', keywords=['unknown'], 
homepage='https://example.com')
         # and a good ebuild for the latest version
-        repo.create_ebuild('cat/pkg-1', keywords=['amd64'])
+        repo.create_ebuild('cat/pkg-1', keywords=['amd64'], 
homepage='https://example.com')
 
         # results for old pkgs will be shown by default
         args = ['-r', repo.location]

Reply via email to