Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-identify for openSUSE:Factory checked in at 2023-11-01 22:10:52 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-identify (Old) and /work/SRC/openSUSE:Factory/.python-identify.new.17445 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-identify" Wed Nov 1 22:10:52 2023 rev:10 rq:1121649 version:2.5.31 Changes: -------- --- /work/SRC/openSUSE:Factory/python-identify/python-identify.changes 2021-08-19 10:02:12.427177273 +0200 +++ /work/SRC/openSUSE:Factory/.python-identify.new.17445/python-identify.changes 2023-11-01 22:11:27.042935282 +0100 @@ -1,0 +2,17 @@ +Wed Nov 1 09:17:44 UTC 2023 - Matthias Fehring <buschman...@opensuse.org> + +- update to version 2.5.31 + * no changelog available, alternatively have a look at + https://github.com/pre-commit/identify/compare/v2.2.13...v2.5.31 +- use editdistance instead of ukkonen + * Upstream changed from editdistance_s to ukkonen for performance + reasons. Ukkonen is not now available for openSUSE. So switch + back to editdistance for now. + * Add 0001-use-editdistance-not-ukkonen.patch +- spec file changes + * remove not required python-setuptools from Requires + * upstream URL has changed to https://github.com/pre-commit/identify + * change requirement for python-editdistance from Suggests to + Recommends + +------------------------------------------------------------------- Old: ---- identify-2.2.13.tar.gz New: ---- 0001-use-editdistance-not-ukkonen.patch identify-2.5.31.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-identify.spec ++++++ --- /var/tmp/diff_new_pack.dIAZOO/_old 2023-11-01 22:11:27.526953212 +0100 +++ /var/tmp/diff_new_pack.dIAZOO/_new 2023-11-01 22:11:27.526953212 +0100 @@ -1,7 +1,7 @@ # # spec file for package python-identify # -# Copyright (c) 2021 SUSE LLC +# Copyright (c) 2023 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -16,24 +16,24 @@ # -%{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-identify -Version: 2.2.13 +Version: 2.5.31 Release: 0 Summary: File identification library for Python License: MIT Group: Development/Languages/Python -URL: https://github.com/chriskuehl/identify -Source: https://github.com/chriskuehl/identify/archive/v%{version}.tar.gz#/identify-%{version}.tar.gz +URL: https://github.com/pre-commit/identify +Source: https://github.com/pre-commit/identify/archive/v%{version}.tar.gz#/identify-%{version}.tar.gz +# PATCH-FIX-OPENSUSE 0001-use-editdistance-not-ukkonen.patch -- ukkonen not packaged for opensuse now +Patch1: 0001-use-editdistance-not-ukkonen.patch BuildRequires: %{python_module editdistance} BuildRequires: %{python_module pytest} BuildRequires: %{python_module setuptools} BuildRequires: fdupes BuildRequires: python-rpm-macros -Requires: python-setuptools Requires(post): update-alternatives Requires(postun):update-alternatives -Suggests: python-editdistance +Recommends: python-editdistance BuildArch: noarch %python_subpackages @@ -42,9 +42,7 @@ %prep %setup -q -n identify-%{version} -# stick with editdistance as it generally has more functionality -sed -i 's/editdistance_s.distance/editdistance.eval/' identify/identify.py -sed -i 's/editdistance_s/editdistance/' identify/identify.py +%autopatch -p1 %build %python_build @@ -67,5 +65,6 @@ %doc README.md %license LICENSE %python_alternative %{_bindir}/identify-cli -%{python_sitelib}/* +%{python_sitelib}/identify +%{python_sitelib}/identify-%{version}-*-info ++++++ 0001-use-editdistance-not-ukkonen.patch ++++++ From: Matthias Fehring <buschman...@opensuse.org> Date: 2023-11-01 09:44:00 +0100 Subject: Use editdistance instead of ukkonen Upstream: never Upstream switched from editdistance_s to ukkonen that is currently not packaged for openSUSE. --- identify/identify.py | 10 ++++------ setup.cfg | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) --- a/identify/identify.py 2023-10-28 19:19:41.000000000 +0200 +++ b/identify/identify.py 2023-11-01 09:39:16.942021416 +0100 @@ -243,7 +243,7 @@ 3. check exact text match with existing licenses 4. failing that use edit distance """ - import ukkonen # `pip install identify[license]` + import editdistance # `pip install identify[license]` with open(filename, encoding='UTF-8') as f: contents = f.read() @@ -253,8 +253,6 @@ min_edit_dist = sys.maxsize min_edit_dist_spdx = '' - cutoff = math.ceil(.05 * len(norm)) - # try exact matches for spdx, text in licenses.LICENSES: norm_license = _norm_license(text) @@ -265,13 +263,13 @@ if norm and abs(len(norm) - len(norm_license)) / len(norm) > .05: continue - edit_dist = ukkonen.distance(norm, norm_license, cutoff) - if edit_dist < cutoff and edit_dist < min_edit_dist: + edit_dist = editdistance.distance(norm, norm_license) + if edit_dist < min_edit_dist: min_edit_dist = edit_dist min_edit_dist_spdx = spdx # if there's less than 5% edited from the license, we found our match - if norm and min_edit_dist < cutoff: + if norm and min_edit_dist / len(norm) < .05: return min_edit_dist_spdx else: # no matches :'( --- a/setup.cfg 2023-10-28 19:19:41.000000000 +0200 +++ b/setup.cfg 2023-11-01 10:13:07.904157653 +0100 @@ -31,7 +31,7 @@ [options.extras_require] license = - ukkonen + editdistance [options.package_data] identify = ++++++ identify-2.2.13.tar.gz -> identify-2.5.31.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/.github/FUNDING.yml new/identify-2.5.31/.github/FUNDING.yml --- old/identify-2.2.13/.github/FUNDING.yml 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/.github/FUNDING.yml 1970-01-01 01:00:00.000000000 +0100 @@ -1,2 +0,0 @@ -github: asottile -open_collective: pre-commit diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/.github/workflows/main.yml new/identify-2.5.31/.github/workflows/main.yml --- old/identify-2.2.13/.github/workflows/main.yml 1970-01-01 01:00:00.000000000 +0100 +++ new/identify-2.5.31/.github/workflows/main.yml 2023-10-28 19:19:41.000000000 +0200 @@ -0,0 +1,13 @@ +name: main + +on: + push: + branches: [main, test-me-*] + tags: '*' + pull_request: + +jobs: + main: + uses: asottile/workflows/.github/workflows/tox.yml@v1.5.0 + with: + env: '["py38", "py39", "py310", "py311"]' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/.gitignore new/identify-2.5.31/.gitignore --- old/identify-2.2.13/.gitignore 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/.gitignore 2023-10-28 19:19:41.000000000 +0200 @@ -1,8 +1,5 @@ *.egg-info *.py[co] /.coverage -/.pytest_cache /.tox -/coverage-html /dist -/venv diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/.pre-commit-config.yaml new/identify-2.5.31/.pre-commit-config.yaml --- old/identify-2.2.13/.pre-commit-config.yaml 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/.pre-commit-config.yaml 2023-10-28 19:19:41.000000000 +0200 @@ -1,45 +1,42 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 + rev: v4.5.0 hooks: - - id: check-docstring-first + - id: trailing-whitespace + - id: end-of-file-fixer - id: check-yaml - id: debug-statements - id: double-quote-string-fixer - - id: end-of-file-fixer - id: name-tests-test - id: requirements-txt-fixer - - id: trailing-whitespace - repo: https://github.com/asottile/setup-cfg-fmt - rev: v1.17.0 + rev: v2.5.0 hooks: - id: setup-cfg-fmt -- repo: https://github.com/PyCQA/flake8 - rev: 3.9.2 - hooks: - - id: flake8 - exclude: ^identify/vendor/licenses\.py$ - additional_dependencies: [flake8-typing-imports==1.10.1] -- repo: https://github.com/pre-commit/mirrors-autopep8 - rev: v1.5.7 - hooks: - - id: autopep8 -- repo: https://github.com/asottile/reorder_python_imports - rev: v2.6.0 +- repo: https://github.com/asottile/reorder-python-imports + rev: v3.12.0 hooks: - id: reorder-python-imports - args: [--py3-plus] + args: [--py38-plus, --add-import, 'from __future__ import annotations'] - repo: https://github.com/asottile/add-trailing-comma - rev: v2.1.0 + rev: v3.1.0 hooks: - id: add-trailing-comma - args: [--py36-plus] - repo: https://github.com/asottile/pyupgrade - rev: v2.23.1 + rev: v3.15.0 hooks: - id: pyupgrade - args: [--py36-plus] + args: [--py38-plus] +- repo: https://github.com/hhatto/autopep8 + rev: v2.0.4 + hooks: + - id: autopep8 +- repo: https://github.com/PyCQA/flake8 + rev: 6.1.0 + hooks: + - id: flake8 + exclude: ^identify/vendor/licenses\.py$ - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.910 + rev: v1.6.1 hooks: - id: mypy diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/README.md new/identify-2.5.31/README.md --- old/identify-2.2.13/README.md 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/README.md 2023-10-28 19:19:41.000000000 +0200 @@ -1,11 +1,9 @@ +[](https://github.com/pre-commit/identify/actions/workflows/main.yml) +[](https://results.pre-commit.ci/latest/github/pre-commit/identify/main) + identify ======== -[](https://dev.azure.com/asottile/asottile/_build/latest?definitionId=67&branchName=master) -[](https://dev.azure.com/asottile/asottile/_build/latest?definitionId=67&branchName=master) -[](https://results.pre-commit.ci/latest/github/pre-commit/identify/master) -[](https://pypi.python.org/pypi/identify) - File identification library for Python. Given a file (or some information about a file), return a set of standardized @@ -13,7 +11,9 @@ ## Installation -`pip install identify` +```bash +pip install identify +``` ## Usage ### With a file on disk @@ -80,7 +80,7 @@ $ identify-cli setup.py; echo $? ["file", "non-executable", "python", "text"] 0 -$ identify setup.py --filename-only; echo $? +$ identify-cli setup.py --filename-only; echo $? ["python", "text"] 0 $ identify-cli wat.wat; echo $? diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/azure-pipelines.yml new/identify-2.5.31/azure-pipelines.yml --- old/identify-2.2.13/azure-pipelines.yml 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/azure-pipelines.yml 1970-01-01 01:00:00.000000000 +0100 @@ -1,19 +0,0 @@ -trigger: - branches: - include: [master, test-me-*] - tags: - include: ['*'] - -resources: - repositories: - - repository: asottile - type: github - endpoint: github - name: asottile/azure-pipeline-templates - ref: refs/tags/v2.1.0 - -jobs: -- template: job--python-tox.yml@asottile - parameters: - toxenvs: [pypy3, py36, py37, py38] - os: linux diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/bin/vendor-licenses new/identify-2.5.31/bin/vendor-licenses --- old/identify-2.2.13/bin/vendor-licenses 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/bin/vendor-licenses 2023-10-28 19:19:41.000000000 +0200 @@ -3,6 +3,8 @@ ./bin/vendor-licenses > identify/vendor/licenses.py """ +from __future__ import annotations + import argparse import os.path import subprocess @@ -54,4 +56,4 @@ if __name__ == '__main__': - exit(main()) + raise SystemExit(main()) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/identify/cli.py new/identify-2.5.31/identify/cli.py --- old/identify-2.2.13/identify/cli.py 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/identify/cli.py 2023-10-28 19:19:41.000000000 +0200 @@ -1,12 +1,13 @@ +from __future__ import annotations + import argparse import json -from typing import Optional from typing import Sequence from identify import identify -def main(argv: Optional[Sequence[str]] = None) -> int: +def main(argv: Sequence[str] | None = None) -> int: parser = argparse.ArgumentParser() parser.add_argument('--filename-only', action='store_true') parser.add_argument('path') @@ -31,4 +32,4 @@ if __name__ == '__main__': - exit(main()) + raise SystemExit(main()) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/identify/extensions.py new/identify-2.5.31/identify/extensions.py --- old/identify-2.2.13/identify/extensions.py 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/identify/extensions.py 2023-10-28 19:19:41.000000000 +0200 @@ -1,6 +1,8 @@ +from __future__ import annotations EXTENSIONS = { 'adoc': {'text', 'asciidoc'}, 'ai': {'binary', 'adobe-illustrator'}, + 'aj': {'text', 'aspectj'}, 'asciidoc': {'text', 'asciidoc'}, 'apinotes': {'text', 'apinotes'}, 'asar': {'binary', 'asar'}, @@ -8,14 +10,20 @@ 'bash': {'text', 'shell', 'bash'}, 'bat': {'text', 'batch'}, 'bats': {'text', 'shell', 'bash', 'bats'}, + 'bazel': {'text', 'bazel'}, + 'beancount': {'text', 'beancount'}, 'bib': {'text', 'bib'}, 'bmp': {'binary', 'image', 'bitmap'}, 'bz2': {'binary', 'bzip2'}, 'bzl': {'text', 'bazel'}, 'c': {'text', 'c'}, + 'c++': {'text', 'c++'}, + 'c++m': {'text', 'c++'}, 'cc': {'text', 'c++'}, + 'ccm': {'text', 'c++'}, 'cfg': {'text'}, 'chs': {'text', 'c2hs'}, + 'cjs': {'text', 'javascript'}, 'clj': {'text', 'clojure'}, 'cljc': {'text', 'clojure'}, 'cljs': {'text', 'clojure', 'clojurescript'}, @@ -24,6 +32,8 @@ 'coffee': {'text', 'coffee'}, 'conf': {'text'}, 'cpp': {'text', 'c++'}, + 'cppm': {'text', 'c++'}, + 'cr': {'text', 'crystal'}, 'crt': {'text', 'pem'}, 'cs': {'text', 'c#'}, 'csproj': {'text', 'xml', 'csproj'}, @@ -32,24 +42,39 @@ 'css': {'text', 'css'}, 'csv': {'text', 'csv'}, 'cu': {'text', 'cuda'}, + 'cue': {'text', 'cue'}, 'cuh': {'text', 'cuda'}, 'cxx': {'text', 'c++'}, + 'cxxm': {'text', 'c++'}, + 'cylc': {'text', 'cylc'}, 'dart': {'text', 'dart'}, + 'dbc': {'text', 'dbc'}, 'def': {'text', 'def'}, 'dll': {'binary'}, 'dtd': {'text', 'dtd'}, 'ear': {'binary', 'zip', 'jar'}, 'edn': {'text', 'clojure', 'edn'}, 'ejs': {'text', 'ejs'}, + 'env': {'text', 'dotenv'}, 'eot': {'binary', 'eot'}, 'eps': {'binary', 'eps'}, 'erb': {'text', 'erb'}, + 'erl': {'text', 'erlang'}, + 'ex': {'text', 'elixir'}, 'exe': {'binary'}, + 'exs': {'text', 'elixir'}, 'eyaml': {'text', 'yaml'}, + 'f03': {'text', 'fortran'}, + 'f08': {'text', 'fortran'}, + 'f90': {'text', 'fortran'}, + 'f95': {'text', 'fortran'}, 'feature': {'text', 'gherkin'}, 'fish': {'text', 'fish'}, + 'fits': {'binary', 'fits'}, 'gd': {'text', 'gdscript'}, 'gemspec': {'text', 'ruby'}, + 'geojson': {'text', 'geojson', 'json'}, + 'ggb': {'binary', 'zip', 'ggb'}, 'gif': {'binary', 'image', 'gif'}, 'go': {'text', 'go'}, 'gotmpl': {'text', 'gotmpl'}, @@ -62,8 +87,11 @@ 'gypi': {'text', 'gyp', 'python'}, 'gz': {'binary', 'gzip'}, 'h': {'text', 'header', 'c', 'c++'}, + 'hbs': {'text', 'handlebars'}, + 'hcl': {'text', 'hcl'}, 'hh': {'text', 'header', 'c++'}, 'hpp': {'text', 'header', 'c++'}, + 'hrl': {'text', 'erlang'}, 'hs': {'text', 'haskell'}, 'htm': {'text', 'html'}, 'html': {'text', 'html'}, @@ -75,8 +103,11 @@ 'idr': {'text', 'idris'}, 'inc': {'text', 'inc'}, 'ini': {'text', 'ini'}, + 'inl': {'text', 'inl', 'c++'}, + 'ino': {'text', 'ino', 'c++'}, 'inx': {'text', 'xml', 'inx'}, - 'ipynb': {'text', 'jupyter'}, + 'ipynb': {'text', 'jupyter', 'json'}, + 'ixx': {'text', 'c++'}, 'j2': {'text', 'jinja'}, 'jade': {'text', 'jade'}, 'jar': {'binary', 'zip', 'jar'}, @@ -85,11 +116,14 @@ 'jenkinsfile': {'text', 'groovy', 'jenkins'}, 'jinja': {'text', 'jinja'}, 'jinja2': {'text', 'jinja'}, + 'jl': {'text', 'julia'}, 'jpeg': {'binary', 'image', 'jpeg'}, 'jpg': {'binary', 'image', 'jpeg'}, 'js': {'text', 'javascript'}, 'json': {'text', 'json'}, + 'jsonld': {'text', 'json', 'jsonld'}, 'jsonnet': {'text', 'jsonnet'}, + 'json5': {'text', 'json5'}, 'jsx': {'text', 'jsx'}, 'key': {'text', 'pem'}, 'kml': {'text', 'kml', 'xml'}, @@ -98,31 +132,44 @@ 'lean': {'text', 'lean'}, 'lektorproject': {'text', 'ini', 'lektorproject'}, 'less': {'text', 'less'}, + 'lfm': {'text', 'lazarus', 'lazarus-form'}, 'lhs': {'text', 'literate-haskell'}, 'libsonnet': {'text', 'jsonnet'}, 'lidr': {'text', 'idris'}, + 'liquid': {'text', 'liquid'}, + 'lpi': {'text', 'lazarus', 'xml'}, + 'lpr': {'text', 'lazarus', 'pascal'}, 'lr': {'text', 'lektor'}, 'lua': {'text', 'lua'}, - 'm': {'text', 'c', 'objective-c'}, + 'm': {'text', 'objective-c'}, + 'm4': {'text', 'm4'}, + 'make': {'text', 'makefile'}, 'manifest': {'text', 'manifest'}, 'map': {'text', 'map'}, 'markdown': {'text', 'markdown'}, 'md': {'text', 'markdown'}, 'mdx': {'text', 'mdx'}, + 'meson': {'text', 'meson'}, 'mib': {'text', 'mib'}, + 'mjs': {'text', 'javascript'}, 'mk': {'text', 'makefile'}, 'ml': {'text', 'ocaml'}, 'mli': {'text', 'ocaml'}, 'mm': {'text', 'c++', 'objective-c++'}, 'modulemap': {'text', 'modulemap'}, + 'mscx': {'text', 'xml', 'musescore'}, + 'mscz': {'binary', 'zip', 'musescore'}, + 'mustache': {'text', 'mustache'}, 'myst': {'text', 'myst'}, 'ngdoc': {'text', 'ngdoc'}, 'nim': {'text', 'nim'}, 'nims': {'text', 'nim'}, 'nimble': {'text', 'nimble'}, 'nix': {'text', 'nix'}, + 'njk': {'text', 'nunjucks'}, 'otf': {'binary', 'otf'}, 'p12': {'binary', 'p12'}, + 'pas': {'text', 'pascal'}, 'patch': {'text', 'diff'}, 'pdf': {'binary', 'pdf'}, 'pem': {'text', 'pem'}, @@ -135,9 +182,13 @@ 'pm': {'text', 'perl'}, 'png': {'binary', 'image', 'png'}, 'po': {'text', 'pofile'}, + 'pom': {'pom', 'text', 'xml'}, 'pp': {'text', 'puppet'}, + 'prisma': {'text', 'prisma'}, 'properties': {'text', 'java-properties'}, 'proto': {'text', 'proto'}, + 'ps1': {'text', 'powershell'}, + 'pug': {'text', 'pug'}, 'puml': {'text', 'plantuml'}, 'purs': {'text', 'purescript'}, 'pxd': {'text', 'cython'}, @@ -148,9 +199,12 @@ 'pyx': {'text', 'cython'}, 'pyz': {'binary', 'pyz'}, 'pyzw': {'binary', 'pyz'}, + 'qml': {'text', 'qml'}, 'r': {'text', 'r'}, 'rake': {'text', 'ruby'}, 'rb': {'text', 'ruby'}, + 'resx': {'text', 'resx', 'xml'}, + 'rng': {'text', 'xml', 'relax-ng'}, 'rs': {'text', 'rust'}, 'rst': {'text', 'rst'}, 's': {'text', 'asm'}, @@ -168,8 +222,10 @@ 'spec': {'text', 'spec'}, 'sql': {'text', 'sql'}, 'ss': {'text', 'scheme'}, + 'sty': {'text', 'tex'}, 'styl': {'text', 'stylus'}, 'sv': {'text', 'system-verilog'}, + 'svelte': {'text', 'svelte'}, 'svg': {'text', 'image', 'svg', 'xml'}, 'svh': {'text', 'system-verilog'}, 'swf': {'binary', 'swf'}, @@ -178,6 +234,7 @@ 'tac': {'text', 'twisted', 'python'}, 'tar': {'binary', 'tar'}, 'tex': {'text', 'tex'}, + 'textproto': {'text', 'textproto'}, 'tf': {'text', 'terraform'}, 'tfvars': {'text', 'terraform'}, 'tgz': {'binary', 'gzip'}, @@ -191,6 +248,7 @@ 'twig': {'text', 'twig'}, 'txsprofile': {'text', 'ini', 'txsprofile'}, 'txt': {'text', 'plain-text'}, + 'txtpb': {'text', 'textproto'}, 'urdf': {'text', 'xml', 'urdf'}, 'v': {'text', 'verilog'}, 'vb': {'text', 'vb'}, @@ -200,6 +258,7 @@ 'vh': {'text', 'verilog'}, 'vhd': {'text', 'vhdl'}, 'vim': {'text', 'vim'}, + 'vtl': {'text', 'vtl'}, 'vue': {'text', 'vue'}, 'war': {'binary', 'zip', 'jar'}, 'wav': {'binary', 'audio', 'wav'}, @@ -210,6 +269,7 @@ 'woff2': {'binary', 'woff2'}, 'wsgi': {'text', 'wsgi', 'python'}, 'xhtml': {'text', 'xml', 'html', 'xhtml'}, + 'xacro': {'text', 'xml', 'urdf', 'xacro'}, 'xml': {'text', 'xml'}, 'xq': {'text', 'xquery'}, 'xql': {'text', 'xquery'}, @@ -220,6 +280,7 @@ 'xsd': {'text', 'xml', 'xsd'}, 'xsl': {'text', 'xml', 'xsl'}, 'yaml': {'text', 'yaml'}, + 'yamlld': {'text', 'yaml', 'yamlld'}, 'yang': {'text', 'yang'}, 'yin': {'text', 'xml', 'yin'}, 'yml': {'text', 'yaml'}, @@ -231,9 +292,11 @@ } EXTENSIONS_NEED_BINARY_CHECK = { 'plist': {'plist'}, + 'ppm': {'image', 'ppm'}, } NAMES = { + '.ansible-lint': EXTENSIONS['yaml'], '.babelrc': EXTENSIONS['json'] | {'babelrc'}, '.bash_aliases': EXTENSIONS['bash'], '.bash_profile': EXTENSIONS['bash'], @@ -256,11 +319,13 @@ '.gitlint': EXTENSIONS['ini'] | {'gitlint'}, '.gitmodules': {'text', 'gitmodules'}, '.hgrc': EXTENSIONS['ini'] | {'hgrc'}, + '.isort.cfg': EXTENSIONS['ini'] | {'isort'}, '.jshintrc': EXTENSIONS['json'] | {'jshintrc'}, '.mailmap': {'text', 'mailmap'}, '.mention-bot': EXTENSIONS['json'] | {'mention-bot'}, '.npmignore': {'text', 'npmignore'}, '.pdbrc': EXTENSIONS['py'] | {'pdbrc'}, + '.prettierignore': {'text', 'gitignore', 'prettierignore'}, '.pypirc': EXTENSIONS['ini'] | {'pypirc'}, '.rstcheck.cfg': EXTENSIONS['ini'], '.yamllint': EXTENSIONS['yaml'] | {'yamllint'}, @@ -271,19 +336,25 @@ '.zshenv': EXTENSIONS['zsh'], 'AUTHORS': EXTENSIONS['txt'], 'BUILD': EXTENSIONS['bzl'], - 'BUILD.bazel': EXTENSIONS['bzl'], 'CMakeLists.txt': EXTENSIONS['cmake'], 'CHANGELOG': EXTENSIONS['txt'], + 'config.ru': EXTENSIONS['rb'], + 'Containerfile': {'text', 'dockerfile'}, 'CONTRIBUTING': EXTENSIONS['txt'], + 'copy.bara.sky': EXTENSIONS['bzl'], 'COPYING': EXTENSIONS['txt'], 'Dockerfile': {'text', 'dockerfile'}, 'Gemfile': EXTENSIONS['rb'], 'Gemfile.lock': {'text'}, 'GNUmakefile': EXTENSIONS['mk'], + 'go.mod': {'text', 'go-mod'}, + 'go.sum': {'text', 'go-sum'}, 'Jenkinsfile': EXTENSIONS['jenkins'], 'LICENSE': EXTENSIONS['txt'], 'MAINTAINERS': EXTENSIONS['txt'], 'Makefile': EXTENSIONS['mk'], + 'meson.build': EXTENSIONS['meson'], + 'meson_options.txt': EXTENSIONS['meson'], 'makefile': EXTENSIONS['mk'], 'NEWS': EXTENSIONS['txt'], 'NOTICE': EXTENSIONS['txt'], @@ -291,10 +362,15 @@ 'Pipfile': EXTENSIONS['toml'], 'Pipfile.lock': EXTENSIONS['json'], 'PKGBUILD': {'text', 'bash', 'pkgbuild', 'alpm'}, + 'poetry.lock': EXTENSIONS['toml'], + 'pom.xml': EXTENSIONS['pom'], 'pylintrc': EXTENSIONS['ini'] | {'pylintrc'}, 'README': EXTENSIONS['txt'], 'Rakefile': EXTENSIONS['rb'], + 'rebar.config': EXTENSIONS['erl'], 'setup.cfg': EXTENSIONS['ini'], + 'sys.config': EXTENSIONS['erl'], + 'sys.config.src': EXTENSIONS['erl'], 'WORKSPACE': EXTENSIONS['bzl'], 'wscript': EXTENSIONS['py'], } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/identify/identify.py new/identify-2.5.31/identify/identify.py --- old/identify-2.2.13/identify/identify.py 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/identify/identify.py 2023-10-28 19:19:41.000000000 +0200 @@ -1,4 +1,7 @@ +from __future__ import annotations + import errno +import math import os.path import re import shlex @@ -6,10 +9,6 @@ import string import sys from typing import IO -from typing import List -from typing import Optional -from typing import Set -from typing import Tuple from identify import extensions from identify import interpreters @@ -38,7 +37,7 @@ ALL_TAGS = frozenset(_ALL_TAGS) -def tags_from_path(path: str) -> Set[str]: +def tags_from_path(path: str) -> set[str]: try: sr = os.lstat(path) except (OSError, ValueError): # same error-handling as `os.lexists()` @@ -84,7 +83,7 @@ return tags -def tags_from_filename(path: str) -> Set[str]: +def tags_from_filename(path: str) -> set[str]: _, filename = os.path.split(path) _, ext = os.path.splitext(filename) @@ -106,7 +105,7 @@ return ret -def tags_from_interpreter(interpreter: str) -> Set[str]: +def tags_from_interpreter(interpreter: str) -> set[str]: _, _, interpreter = interpreter.rpartition('/') # Try "python3.5.2" => "python3.5" => "python3" until one matches. @@ -140,7 +139,7 @@ return is_text(f) -def _shebang_split(line: str) -> List[str]: +def _shebang_split(line: str) -> list[str]: try: # shebangs aren't supposed to be quoted, though some tools such as # setuptools will write them with quotes so we'll best-guess parse @@ -154,8 +153,8 @@ def _parse_nix_shebang( bytesio: IO[bytes], - cmd: Tuple[str, ...], -) -> Tuple[str, ...]: + cmd: tuple[str, ...], +) -> tuple[str, ...]: while bytesio.read(2) == b'#!': next_line_b = bytesio.readline() try: @@ -176,7 +175,7 @@ return cmd -def parse_shebang(bytesio: IO[bytes]) -> Tuple[str, ...]: +def parse_shebang(bytesio: IO[bytes]) -> tuple[str, ...]: """Parse the shebang from a file opened for reading binary.""" if bytesio.read(2) != b'#!': return () @@ -193,13 +192,17 @@ cmd = tuple(_shebang_split(first_line.strip())) if cmd and cmd[0] == '/usr/bin/env': - cmd = cmd[1:] + if cmd[1] == '-S': + cmd = cmd[2:] + else: + cmd = cmd[1:] + if cmd == ('nix-shell',): return _parse_nix_shebang(bytesio, cmd) return cmd -def parse_shebang_from_file(path: str) -> Tuple[str, ...]: +def parse_shebang_from_file(path: str) -> tuple[str, ...]: """Parse the shebang given a file path.""" if not os.path.lexists(path): raise ValueError(f'{path} does not exist.') @@ -226,7 +229,7 @@ return s.strip() -def license_id(filename: str) -> Optional[str]: +def license_id(filename: str) -> str | None: """Return the spdx id for the license contained in `filename`. If no license is detected, returns `None`. @@ -240,7 +243,7 @@ 3. check exact text match with existing licenses 4. failing that use edit distance """ - import editdistance_s # `pip install identify[license]` + import ukkonen # `pip install identify[license]` with open(filename, encoding='UTF-8') as f: contents = f.read() @@ -250,6 +253,8 @@ min_edit_dist = sys.maxsize min_edit_dist_spdx = '' + cutoff = math.ceil(.05 * len(norm)) + # try exact matches for spdx, text in licenses.LICENSES: norm_license = _norm_license(text) @@ -260,13 +265,13 @@ if norm and abs(len(norm) - len(norm_license)) / len(norm) > .05: continue - edit_dist = editdistance_s.distance(norm, norm_license) - if edit_dist < min_edit_dist: + edit_dist = ukkonen.distance(norm, norm_license, cutoff) + if edit_dist < cutoff and edit_dist < min_edit_dist: min_edit_dist = edit_dist min_edit_dist_spdx = spdx # if there's less than 5% edited from the license, we found our match - if norm and min_edit_dist / len(norm) < .05: + if norm and min_edit_dist < cutoff: return min_edit_dist_spdx else: # no matches :'( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/identify/interpreters.py new/identify-2.5.31/identify/interpreters.py --- old/identify-2.2.13/identify/interpreters.py 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/identify/interpreters.py 2023-10-28 19:19:41.000000000 +0200 @@ -1,8 +1,10 @@ +from __future__ import annotations INTERPRETERS = { 'ash': {'shell', 'ash'}, 'awk': {'awk'}, 'bash': {'shell', 'bash'}, 'bats': {'shell', 'bash', 'bats'}, + 'cbsd': {'shell', 'cbsd'}, 'csh': {'shell', 'csh'}, 'dash': {'shell', 'dash'}, 'expect': {'expect'}, @@ -10,6 +12,9 @@ 'node': {'javascript'}, 'nodejs': {'javascript'}, 'perl': {'perl'}, + 'php': {'php'}, + 'php7': {'php', 'php7'}, + 'php8': {'php', 'php8'}, 'python': {'python'}, 'python2': {'python', 'python2'}, 'python3': {'python', 'python3'}, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/identify/vendor/licenses.py new/identify-2.5.31/identify/vendor/licenses.py --- old/identify-2.2.13/identify/vendor/licenses.py 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/identify/vendor/licenses.py 2023-10-28 19:19:41.000000000 +0200 @@ -1,3 +1,4 @@ +from __future__ import annotations LICENSES = ( ( '0BSD', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/setup.cfg new/identify-2.5.31/setup.cfg --- old/identify-2.2.13/setup.cfg 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/setup.cfg 2023-10-28 19:19:41.000000000 +0200 @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.2.13 +version = 2.5.31 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown @@ -8,21 +8,17 @@ author = Chris Kuehl author_email = cku...@ocf.berkeley.edu license = MIT -license_file = LICENSE +license_files = LICENSE classifiers = License :: OSI Approved :: MIT License Programming Language :: Python :: 3 Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.6 - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 Programming Language :: Python :: Implementation :: CPython Programming Language :: Python :: Implementation :: PyPy [options] packages = find: -python_requires = >=3.6.1 +python_requires = >=3.8 [options.packages.find] exclude = @@ -35,7 +31,11 @@ [options.extras_require] license = - editdistance-s + ukkonen + +[options.package_data] +identify = + py.typed [bdist_wheel] universal = True @@ -48,7 +48,6 @@ disallow_any_generics = true disallow_incomplete_defs = true disallow_untyped_defs = true -no_implicit_optional = true warn_redundant_casts = true warn_unused_ignores = true diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/setup.py new/identify-2.5.31/setup.py --- old/identify-2.2.13/setup.py 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/setup.py 2023-10-28 19:19:41.000000000 +0200 @@ -1,2 +1,4 @@ +from __future__ import annotations + from setuptools import setup setup() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/tests/cli_test.py new/identify-2.5.31/tests/cli_test.py --- old/identify-2.2.13/tests/cli_test.py 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/tests/cli_test.py 2023-10-28 19:19:41.000000000 +0200 @@ -1,3 +1,5 @@ +from __future__ import annotations + from identify import cli diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/tests/extensions_test.py new/identify-2.5.31/tests/extensions_test.py --- old/identify-2.2.13/tests/extensions_test.py 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/tests/extensions_test.py 2023-10-28 19:19:41.000000000 +0200 @@ -1,3 +1,5 @@ +from __future__ import annotations + import pytest from identify import extensions diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/tests/identify_test.py new/identify-2.5.31/tests/identify_test.py --- old/identify-2.2.13/tests/identify_test.py 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/tests/identify_test.py 2023-10-28 19:19:41.000000000 +0200 @@ -1,3 +1,5 @@ +from __future__ import annotations + import builtins import errno import io @@ -37,6 +39,7 @@ assert 'plist' in identify.ALL_TAGS # extension, needs binary check assert 'dockerfile' in identify.ALL_TAGS # by file convention assert 'python3' in identify.ALL_TAGS # by shebang + assert 'php8' in identify.ALL_TAGS # by shebang def test_tags_from_path_does_not_exist(tmpdir): @@ -151,6 +154,7 @@ ('test.py', {'text', 'python'}), ('test.mk', {'text', 'makefile'}), ('Makefile', {'text', 'makefile'}), + ('Containerfile', {'text', 'dockerfile'}), ('Dockerfile', {'text', 'dockerfile'}), ('Dockerfile.xenial', {'text', 'dockerfile'}), ('xenial.Dockerfile', {'text', 'dockerfile'}), @@ -158,11 +162,14 @@ ('Pipfile.lock', {'text', 'json'}), ('mod/test.py', {'text', 'python'}), ('mod/Dockerfile', {'text', 'dockerfile'}), + ('config.ru', {'text', 'ruby'}), ('Gemfile', {'text', 'ruby'}), ('Gemfile.lock', {'text'}), ('Jenkinsfile', {'text', 'groovy', 'jenkins'}), ('build.jenkins', {'text', 'groovy', 'jenkins'}), ('build.jenkinsfile', {'text', 'groovy', 'jenkins'}), + ('meson.build', {'text', 'meson'}), + ('meson_options.txt', {'text', 'plain-text', 'meson'}), # does not set binary / text ('f.plist', {'plist'}), @@ -314,6 +321,8 @@ (b'\xf9\x93\x01\x42\xcd', ()), (b'#!\xf9\x93\x01\x42\xcd', ()), (b'#!\x00\x00\x00\x00', ()), + # shebang lines with multiple arguments + (b'#!/usr/bin/env -S python -u', ('python', '-u')), ), ) def test_parse_shebang(s, expected): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/identify-2.2.13/tox.ini new/identify-2.5.31/tox.ini --- old/identify-2.2.13/tox.ini 2021-08-06 18:07:44.000000000 +0200 +++ new/identify-2.5.31/tox.ini 2023-10-28 19:19:41.000000000 +0200 @@ -1,5 +1,5 @@ [tox] -envlist = py36,pypy3,pre-commit +envlist = py,pre-commit [testenv] deps = -rrequirements-dev.txt @@ -7,7 +7,7 @@ commands = coverage erase coverage run -m pytest {posargs:tests} - coverage report --fail-under 100 + coverage report [testenv:pre-commit] skip_install = true