Script 'mail_helper' called by obssrc
Hello community,
here is the log from the commit of package python-charset-normalizer for
openSUSE:Factory checked in at 2023-07-17 19:22:47
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-charset-normalizer (Old)
and /work/SRC/openSUSE:Factory/.python-charset-normalizer.new.3193 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-charset-normalizer"
Mon Jul 17 19:22:47 2023 rev:20 rq:1098807 version:3.2.0
Changes:
--------
---
/work/SRC/openSUSE:Factory/python-charset-normalizer/python-charset-normalizer.changes
2023-05-09 13:06:40.988787178 +0200
+++
/work/SRC/openSUSE:Factory/.python-charset-normalizer.new.3193/python-charset-normalizer.changes
2023-07-17 19:22:54.233604473 +0200
@@ -1,0 +2,15 @@
+Tue Jul 11 13:22:52 UTC 2023 - Dirk Müller <[email protected]>
+
+- update to 3.2.0:
+ * Typehint for function `from_path` no longer enforce
+ `PathLike` as its first argument
+ * Minor improvement over the global detection reliability
+ * Introduce function `is_binary` that relies on main
+ capabilities, and optimized to detect binaries
+ * Propagate `enable_fallback` argument throughout `from_bytes`,
+ `from_path`, and `from_fp` that allow a deeper control over
+ the detection (default True)
+ * Edge case detection failure where a file would contain 'very-
+ long' camel cased word (Issue #289)
+
+-------------------------------------------------------------------
Old:
----
charset_normalizer-3.1.0.tar.gz
New:
----
charset_normalizer-3.2.0.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-charset-normalizer.spec ++++++
--- /var/tmp/diff_new_pack.SMA7Ge/_old 2023-07-17 19:22:54.997608898 +0200
+++ /var/tmp/diff_new_pack.SMA7Ge/_new 2023-07-17 19:22:55.001608921 +0200
@@ -18,7 +18,7 @@
%{?sle15_python_module_pythons}
Name: python-charset-normalizer
-Version: 3.1.0
+Version: 3.2.0
Release: 0
Summary: Python Universal Charset detector
License: MIT
++++++ charset_normalizer-3.1.0.tar.gz -> charset_normalizer-3.2.0.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/.github/workflows/cd.yml
new/charset_normalizer-3.2.0/.github/workflows/cd.yml
--- old/charset_normalizer-3.1.0/.github/workflows/cd.yml 1970-01-01
01:00:00.000000000 +0100
+++ new/charset_normalizer-3.2.0/.github/workflows/cd.yml 2023-07-07
20:01:05.000000000 +0200
@@ -0,0 +1,161 @@
+name: Continuous Delivery
+
+on:
+ workflow_dispatch:
+
+ release:
+ types:
+ - created
+
+permissions:
+ contents: read
+
+jobs:
+ pre_flight_check:
+ name: Preflight Checks
+ uses: ./.github/workflows/ci.yml
+
+ universal-wheel:
+ name: Build Universal Wheel
+ runs-on: ubuntu-latest
+ needs:
+ - pre_flight_check
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.11'
+ - name: Update pip, setuptools, wheel, build and twine
+ run: |
+ python -m pip install --upgrade pip
+ pip install setuptools wheel build
+ - name: Build Wheel
+ env:
+ CHARSET_NORMALIZER_USE_MYPYC: '0'
+ run: python -m build
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v3
+ with:
+ name: dist
+ path: dist
+
+ build-wheels:
+ name: Build wheels on ${{ matrix.os }} ${{ matrix.qemu }}
+ runs-on: ${{ matrix.os }}-latest
+ needs: pre_flight_check
+ strategy:
+ matrix:
+ os: [ ubuntu, windows, macos ]
+ qemu: [ '' ]
+ include:
+ # Split ubuntu job for the sake of speed-up
+ - os: ubuntu
+ qemu: aarch64
+ - os: ubuntu
+ qemu: ppc64le
+ - os: ubuntu
+ qemu: s390x
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v3
+ with:
+ submodules: true
+ - name: Set up QEMU
+ if: ${{ matrix.qemu }}
+ uses: docker/setup-qemu-action@v2
+ with:
+ platforms: all
+ id: qemu
+ - name: Prepare emulation
+ run: |
+ if [[ -n "${{ matrix.qemu }}" ]]; then
+ # Build emulated architectures only if QEMU is set,
+ # use default "auto" otherwise
+ echo "CIBW_ARCHS_LINUX=${{ matrix.qemu }}" >> $GITHUB_ENV
+ fi
+ shell: bash
+ - name: Setup Python
+ uses: actions/setup-python@v4
+ - name: Update pip, wheel, setuptools, build, twine
+ run: |
+ python -m pip install -U pip wheel setuptools build twine
+ - name: Build wheels
+ uses: pypa/[email protected]
+ env:
+ #CIBW_BUILD_FRONTEND: "build"
+ CIBW_ARCHS_MACOS: x86_64 arm64 universal2
+ CIBW_ENVIRONMENT: CHARSET_NORMALIZER_USE_MYPYC='1'
+ CIBW_BEFORE_BUILD: pip install -r build-requirements.txt
+ #CIBW_CONFIG_SETTINGS: "--build-option=--no-isolation"
+ CIBW_TEST_REQUIRES: pytest
+ CIBW_TEST_COMMAND: pytest -c {package} {package}/tests
+ CIBW_SKIP: pp* cp36*
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v3
+ with:
+ name: dist
+ path: ./wheelhouse/*.whl
+
+ checksum:
+ name: Compute hashes
+ runs-on: ubuntu-latest
+ needs:
+ - build-wheels
+ - universal-wheel
+ outputs:
+ hashes: ${{ steps.compute.outputs.hashes }}
+ steps:
+ - uses: actions/checkout@v3
+ - name: Download distributions
+ uses: actions/download-artifact@v3
+ with:
+ name: dist
+ path: dist
+ - name: Collected dists
+ run: |
+ tree dist
+ - name: Generate hashes
+ id: compute # needs.checksum.outputs.hashes
+ working-directory: ./dist
+ run: echo "hashes=$(sha256sum * | base64 -w0)" >> $GITHUB_OUTPUT
+
+ provenance:
+ needs: checksum
+ uses:
slsa-framework/slsa-github-generator/.github/workflows/[email protected]
+ permissions:
+ actions: read
+ id-token: write
+ contents: write
+ with:
+ base64-subjects: ${{ needs.checksum.outputs.hashes }}
+ upload-assets: true
+
+ deploy:
+ name: ð Deploy to PyPi
+ runs-on: ubuntu-latest
+ if: startsWith(github.ref, 'refs/tags/')
+ permissions:
+ id-token: write
+ contents: write
+ needs: provenance
+ environment:
+ name: pypi
+ url: https://pypi.org/project/charset-normalizer/
+ steps:
+ - uses: actions/checkout@v3
+ - name: Download distributions
+ uses: actions/download-artifact@v3
+ with:
+ name: dist
+ path: dist
+ - name: Collected dists
+ run: |
+ tree dist
+ - name: Publish package distributions to PyPI
+ uses: pypa/gh-action-pypi-publish@release/v1
+ - name: Upload dists to GitHub Release
+ env:
+ GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
+ run: |
+ gh release upload ${{ github.ref_name }} dist/* --repo ${{
github.repository }}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/charset_normalizer-3.1.0/.github/workflows/chardet-bc.yml
new/charset_normalizer-3.2.0/.github/workflows/chardet-bc.yml
--- old/charset_normalizer-3.1.0/.github/workflows/chardet-bc.yml
2023-03-06 07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/.github/workflows/chardet-bc.yml
1970-01-01 01:00:00.000000000 +0100
@@ -1,35 +0,0 @@
-name: Chardet BC Coverage
-
-on: [push, pull_request]
-
-jobs:
- chardet_bc:
- runs-on: ${{ matrix.os }}
-
- strategy:
- fail-fast: false
- matrix:
- python-version: [3.9]
- os: [ubuntu-latest]
-
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- pip install -U pip setuptools
- pip install -r dev-requirements.txt
- pip uninstall -y charset-normalizer
- - name: Install the package
- run: |
- python -m build
- pip install ./dist/*.whl
- - name: Clone the complete dataset
- run: |
- git clone https://github.com/Ousret/char-dataset.git
- - name: BC Coverage
- run: |
- python ./bin/bc.py --coverage 80
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/.github/workflows/ci.yml
new/charset_normalizer-3.2.0/.github/workflows/ci.yml
--- old/charset_normalizer-3.1.0/.github/workflows/ci.yml 1970-01-01
01:00:00.000000000 +0100
+++ new/charset_normalizer-3.2.0/.github/workflows/ci.yml 2023-07-07
20:01:05.000000000 +0200
@@ -0,0 +1,239 @@
+name: Continuous Integration
+
+on:
+ workflow_call:
+ pull_request:
+ push:
+ branches:
+ - master
+
+permissions:
+ contents: read
+
+jobs:
+ lint:
+ name: ð¨ Linters
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.11'
+ - name: Install dependencies
+ run: |
+ pip install -U pip setuptools
+ pip install -r dev-requirements.txt
+ pip uninstall -y charset-normalizer
+ - name: Type checking (Mypy)
+ run: |
+ mypy --strict charset_normalizer
+ - name: Import sorting check (isort)
+ run: |
+ isort --check charset_normalizer
+ - name: Code format (Black)
+ run: |
+ black --check --diff --target-version=py37 charset_normalizer
+ - name: Style guide enforcement (Flake8)
+ run: |
+ flake8 charset_normalizer
+
+ tests:
+ name: â
Tests
+ runs-on: ubuntu-latest
+
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev" ]
+
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ pip install -U pip setuptools
+ pip install -r dev-requirements.txt
+ pip uninstall -y charset-normalizer
+ - name: Install the package
+ run: |
+ python -m build --no-isolation
+ pip install ./dist/*.whl
+ - name: Run tests
+ run: |
+ pytest
+ - uses: codecov/codecov-action@v3
+
+ detection_coverage:
+
+ needs:
+ - tests
+
+ name: ð Detection Coverage
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.11'
+ - name: Install dependencies
+ run: |
+ pip install -U pip setuptools
+ pip install -r dev-requirements.txt
+ pip uninstall -y charset-normalizer
+ - name: Install the package
+ run: |
+ python -m build
+ pip install ./dist/*.whl
+ - name: Clone the complete dataset
+ run: |
+ git clone https://github.com/Ousret/char-dataset.git
+ - name: Coverage WITH preemptive
+ run: |
+ python ./bin/coverage.py --coverage 97 --with-preemptive
+ - name: Coverage WITHOUT preemptive
+ run: |
+ python ./bin/coverage.py --coverage 95
+
+ integration_test:
+
+ needs:
+ - tests
+
+ name: ð Integration Tests
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.11'
+ - name: Install dependencies
+ run: |
+ pip install -U pip setuptools
+ pip install -r dev-requirements.txt
+ - name: Remove Chardet & Charset-Normalizer
+ run: |
+ pip uninstall -y chardet
+ pip uninstall -y charset-normalizer
+ - name: Install the package
+ run: |
+ python -m build
+ pip install ./dist/*.whl
+ - name: Clone the complete dataset
+ run: |
+ git clone https://github.com/Ousret/char-dataset.git
+ - name: Start the Flask server
+ run: |
+ python ./bin/serve.py &
+ - name: Integration Tests with Requests
+ run: |
+ python ./bin/integration.py
+
+ chardet_bc:
+
+ name: ⪠Chardet Backward-Compatibility Test
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.11'
+ - name: Install dependencies
+ run: |
+ pip install -U pip setuptools
+ pip install -r dev-requirements.txt
+ pip uninstall -y charset-normalizer
+ - name: Install the package
+ run: |
+ python -m build
+ pip install ./dist/*.whl
+ - name: Clone the complete dataset
+ run: |
+ git clone https://github.com/Ousret/char-dataset.git
+ - name: BC Coverage
+ run: |
+ python ./bin/bc.py --coverage 80
+
+ mypyc_test:
+
+ name: â¡ MypyC Tests
+
+ needs:
+ - tests
+
+ runs-on: ${{ matrix.os }}
+
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ] # , "3.12-dev"
+ os: [ ubuntu-latest, macos-latest, windows-latest ]
+ env:
+ PYTHONIOENCODING: utf8 # only needed for Windows (console IO output
encoding)
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ pip install -U pip setuptools
+ pip install -r dev-requirements.txt
+ pip uninstall -y charset-normalizer
+ - name: Install the package
+ env:
+ CHARSET_NORMALIZER_USE_MYPYC: '1'
+ run: |
+ pip install .
+ - name: Clone the complete dataset
+ run: |
+ git clone https://github.com/Ousret/char-dataset.git
+ - name: Coverage WITH preemptive
+ run: |
+ python ./bin/coverage.py --coverage 97 --with-preemptive
+ - name: Performance (Normal)
+ run: |
+ python ./bin/performance.py
+
+ performance:
+ name: â¡ Performance Test (no MypyC)
+ runs-on: ubuntu-latest
+
+ needs:
+ - mypyc_test
+ - chardet_bc
+
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.11'
+ - name: Install dependencies
+ run: |
+ pip install -U pip setuptools
+ pip install -r dev-requirements.txt
+ pip uninstall -y charset-normalizer
+ - name: Install the package
+ run: |
+ python -m build
+ pip install ./dist/*.whl
+ - name: Clone the complete dataset
+ run: |
+ git clone https://github.com/Ousret/char-dataset.git
+ - name: Performance (Normal)
+ run: |
+ python ./bin/performance.py
+ - name: Performance (Medium)
+ run: |
+ python ./bin/performance.py --size-increase 2
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/charset_normalizer-3.1.0/.github/workflows/codeql.yml
new/charset_normalizer-3.2.0/.github/workflows/codeql.yml
--- old/charset_normalizer-3.1.0/.github/workflows/codeql.yml 2023-03-06
07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/.github/workflows/codeql.yml 2023-07-07
20:01:05.000000000 +0200
@@ -11,6 +11,9 @@
#
name: "CodeQL"
+permissions:
+ contents: read
+
on:
push:
branches: [ "master", "2.1.x" ]
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/charset_normalizer-3.1.0/.github/workflows/detector-coverage.yml
new/charset_normalizer-3.2.0/.github/workflows/detector-coverage.yml
--- old/charset_normalizer-3.1.0/.github/workflows/detector-coverage.yml
2023-03-06 07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/.github/workflows/detector-coverage.yml
1970-01-01 01:00:00.000000000 +0100
@@ -1,38 +0,0 @@
-name: Detection Coverage
-
-on: [push, pull_request]
-
-jobs:
- detection_coverage:
- runs-on: ${{ matrix.os }}
-
- strategy:
- fail-fast: false
- matrix:
- python-version: [3.9]
- os: [ubuntu-latest]
-
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- pip install -U pip setuptools
- pip install -r dev-requirements.txt
- pip uninstall -y charset-normalizer
- - name: Install the package
- run: |
- python -m build
- pip install ./dist/*.whl
- - name: Clone the complete dataset
- run: |
- git clone https://github.com/Ousret/char-dataset.git
- - name: Coverage WITH preemptive
- run: |
- python ./bin/coverage.py --coverage 97 --with-preemptive
- - name: Coverage WITHOUT preemptive
- run: |
- python ./bin/coverage.py --coverage 95
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/charset_normalizer-3.1.0/.github/workflows/integration.yml
new/charset_normalizer-3.2.0/.github/workflows/integration.yml
--- old/charset_normalizer-3.1.0/.github/workflows/integration.yml
2023-03-06 07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/.github/workflows/integration.yml
1970-01-01 01:00:00.000000000 +0100
@@ -1,41 +0,0 @@
-name: Integration
-
-on: [push, pull_request]
-
-jobs:
- downstream:
- runs-on: ${{ matrix.os }}
-
- strategy:
- fail-fast: false
- matrix:
- python-version: [3.9]
- os: [ubuntu-latest]
-
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- pip install -U pip setuptools
- pip install -r dev-requirements.txt
- - name: Remove Chardet & Charset-Normalizer
- run: |
- pip uninstall -y chardet
- pip uninstall -y charset-normalizer
- - name: Install the package
- run: |
- python -m build
- pip install ./dist/*.whl
- - name: Clone the complete dataset
- run: |
- git clone https://github.com/Ousret/char-dataset.git
- - name: Start the Flask server
- run: |
- python ./bin/serve.py &
- - name: Integration Tests with Requests
- run: |
- python ./bin/integration.py
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/.github/workflows/lint.yml
new/charset_normalizer-3.2.0/.github/workflows/lint.yml
--- old/charset_normalizer-3.1.0/.github/workflows/lint.yml 2023-03-06
07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/.github/workflows/lint.yml 1970-01-01
01:00:00.000000000 +0100
@@ -1,41 +0,0 @@
-name: ð¨ Linters
-
-on: [push, pull_request]
-
-jobs:
- lint:
- runs-on: ${{ matrix.os }}
-
- strategy:
- fail-fast: false
- matrix:
- python-version: [3.8]
- os: [ubuntu-latest]
-
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- pip install -U pip setuptools
- pip install -r dev-requirements.txt
- pip uninstall -y charset-normalizer
- - name: Install the package
- run: |
- python -m build
- pip install ./dist/*.whl
- - name: Type checking (Mypy)
- run: |
- mypy --strict charset_normalizer
- - name: Import sorting check (isort)
- run: |
- isort --check charset_normalizer
- - name: Code format (Black)
- run: |
- black --check --diff --target-version=py36 charset_normalizer
- - name: Style guide enforcement (Flake8)
- run: |
- flake8 charset_normalizer
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/charset_normalizer-3.1.0/.github/workflows/mypyc-verify.yml
new/charset_normalizer-3.2.0/.github/workflows/mypyc-verify.yml
--- old/charset_normalizer-3.1.0/.github/workflows/mypyc-verify.yml
2023-03-06 07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/.github/workflows/mypyc-verify.yml
1970-01-01 01:00:00.000000000 +0100
@@ -1,40 +0,0 @@
-name: MYPYC Run
-
-on: [push, pull_request]
-
-jobs:
- detection_coverage:
- runs-on: ${{ matrix.os }}
-
- strategy:
- fail-fast: false
- matrix:
- python-version: [3.7, 3.8, 3.9, "3.10", "3.11"]
- os: [ubuntu-latest]
-
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- pip install -U pip setuptools
- pip install -r dev-requirements.txt
- pip uninstall -y charset-normalizer
- - name: Install the package
- env:
- CHARSET_NORMALIZER_USE_MYPYC: '1'
- run: |
- python -m build --no-isolation
- pip install ./dist/*.whl
- - name: Clone the complete dataset
- run: |
- git clone https://github.com/Ousret/char-dataset.git
- - name: Coverage WITH preemptive
- run: |
- python ./bin/coverage.py --coverage 97 --with-preemptive
- - name: Coverage WITHOUT preemptive
- run: |
- python ./bin/coverage.py --coverage 95
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/charset_normalizer-3.1.0/.github/workflows/performance.yml
new/charset_normalizer-3.2.0/.github/workflows/performance.yml
--- old/charset_normalizer-3.1.0/.github/workflows/performance.yml
2023-03-06 07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/.github/workflows/performance.yml
1970-01-01 01:00:00.000000000 +0100
@@ -1,41 +0,0 @@
-name: Performance Check
-
-on: [pull_request]
-
-jobs:
- performance:
- runs-on: ${{ matrix.os }}
-
- strategy:
- fail-fast: false
- matrix:
- python-version: [3.9]
- os: [ubuntu-latest]
-
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- pip install -U pip setuptools
- pip install -r dev-requirements.txt
- pip uninstall -y charset-normalizer
- - name: Install the package
- run: |
- python -m build
- pip install ./dist/*.whl
- - name: Clone the complete dataset
- run: |
- git clone https://github.com/Ousret/char-dataset.git
- - name: Performance (Normal)
- run: |
- python ./bin/performance.py
- - name: Performance (Medium)
- run: |
- python ./bin/performance.py --size-increase 2
- - name: Performance (Big)
- run: |
- python ./bin/performance.py --size-increase 4
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/charset_normalizer-3.1.0/.github/workflows/python-publish.yml
new/charset_normalizer-3.2.0/.github/workflows/python-publish.yml
--- old/charset_normalizer-3.1.0/.github/workflows/python-publish.yml
2023-03-06 07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/.github/workflows/python-publish.yml
1970-01-01 01:00:00.000000000 +0100
@@ -1,263 +0,0 @@
-name: Release-Deployment CI
-
-on:
- workflow_dispatch:
- release:
- types: [created]
-
-jobs:
-
- lint:
- runs-on: ${{ matrix.os }}
-
- strategy:
- fail-fast: false
- matrix:
- python-version: [ 3.9 ]
- os: [ ubuntu-latest ]
-
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- pip install -U pip setuptools
- pip install -r dev-requirements.txt
- pip uninstall -y charset-normalizer
- - name: Install the package
- run: |
- python -m build
- pip install ./dist/*.whl
- - name: Type checking (Mypy)
- run: |
- mypy charset_normalizer
- - name: Import sorting check (isort)
- run: |
- isort --check charset_normalizer
- - name: Code format (Black)
- run: |
- black --check --diff --target-version=py35 charset_normalizer
- - name: Style guide enforcement (Flake8)
- run: |
- flake8 charset_normalizer
-
- tests:
- runs-on: ${{ matrix.os }}
- needs:
- - lint
-
- strategy:
- fail-fast: false
- matrix:
- python-version: [ 3.7, 3.8, 3.9, "3.10", "3.11" ]
- os: [ ubuntu-latest ]
-
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- pip install -U pip setuptools
- pip install -r dev-requirements.txt
- pip uninstall -y charset-normalizer
- - name: Install the package
- run: |
- python -m build
- pip install ./dist/*.whl
- - name: Run tests
- run: |
- pytest
-
- detection_coverage:
- runs-on: ${{ matrix.os }}
- needs:
- - tests
-
- strategy:
- fail-fast: false
- matrix:
- python-version: [ 3.9, "3.10" ]
- os: [ ubuntu-latest ]
-
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- pip install -U pip setuptools
- pip install -r dev-requirements.txt
- pip uninstall -y charset-normalizer
- - name: Install the package
- run: |
- python -m build
- pip install ./dist/*.whl
- - name: Clone the complete dataset
- run: |
- git clone https://github.com/Ousret/char-dataset.git
- - name: Coverage WITH preemptive
- run: |
- python ./bin/coverage.py --coverage 97 --with-preemptive
- - name: Coverage WITHOUT preemptive
- run: |
- python ./bin/coverage.py --coverage 95
- - name: BC Coverage (With Chardet)
- run: |
- python ./bin/bc.py --coverage 80
-
- integration:
- runs-on: ${{ matrix.os }}
- needs:
- - detection_coverage
- strategy:
- fail-fast: false
- matrix:
- python-version: [ 3.9, "3.10" ]
- os: [ ubuntu-latest ]
-
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- pip install -U pip setuptools
- pip install -r dev-requirements.txt
- - name: Remove Chardet & Charset-Normalizer
- run: |
- pip uninstall -y chardet
- pip uninstall -y charset-normalizer
- - name: Install the package
- run: |
- python -m build
- pip install ./dist/*.whl
- - name: Clone the complete dataset
- run: |
- git clone https://github.com/Ousret/char-dataset.git
- - name: Start the Flask server
- run: |
- python ./bin/serve.py &
- - name: Integration Tests with Requests
- run: |
- python ./bin/integration.py
- universal-wheel:
- runs-on: ubuntu-latest
- needs:
- - integration
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: '3.x'
- - name: Update pip, setuptools, wheel, build and twine
- run: |
- python -m pip install --upgrade pip
- pip install setuptools wheel twine build
- - name: Build Wheel
- env:
- CHARSET_NORMALIZER_USE_MYPYC: '0'
- run: python -m build
- - name: Upload artifacts
- uses: actions/upload-artifact@v3
- with:
- name: dist
- path: dist
-
- build-wheels:
- name: Build wheels on ${{ matrix.os }} ${{ matrix.qemu }}
- runs-on: ${{ matrix.os }}-latest
- needs: universal-wheel
- strategy:
- matrix:
- os: [ ubuntu, windows, macos ]
- qemu: [ '' ]
- include:
- # Split ubuntu job for the sake of speed-up
- - os: ubuntu
- qemu: aarch64
- - os: ubuntu
- qemu: ppc64le
- - os: ubuntu
- qemu: s390x
- steps:
- - name: Checkout
- uses: actions/checkout@v3
- with:
- submodules: true
- - name: Set up QEMU
- if: ${{ matrix.qemu }}
- uses: docker/setup-qemu-action@v2
- with:
- platforms: all
- id: qemu
- - name: Prepare emulation
- run: |
- if [[ -n "${{ matrix.qemu }}" ]]; then
- # Build emulated architectures only if QEMU is set,
- # use default "auto" otherwise
- echo "CIBW_ARCHS_LINUX=${{ matrix.qemu }}" >> $GITHUB_ENV
- fi
- shell: bash
- - name: Setup Python
- uses: actions/setup-python@v4
- - name: Update pip, wheel, setuptools, build, twine
- run: |
- python -m pip install -U pip wheel setuptools build twine
- - name: Build wheels
- uses: pypa/[email protected]
- env:
- #CIBW_BUILD_FRONTEND: "build"
- CIBW_ARCHS_MACOS: x86_64 arm64 universal2
- CIBW_ENVIRONMENT: CHARSET_NORMALIZER_USE_MYPYC='1'
- CIBW_BEFORE_BUILD: pip install -r build-requirements.txt
- #CIBW_CONFIG_SETTINGS: "--build-option=--no-isolation"
- CIBW_TEST_REQUIRES: pytest
- CIBW_TEST_COMMAND: pytest -c {package} {package}/tests
- CIBW_SKIP: pp*
- - name: Upload artifacts
- uses: actions/upload-artifact@v3
- with:
- name: dist
- path: ./wheelhouse/*.whl
-
- deploy:
-
- runs-on: ubuntu-latest
- needs:
- - build-wheels
-
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: '3.x'
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install setuptools wheel twine
- - name: Download disctributions
- uses: actions/download-artifact@v3
- with:
- name: dist
- path: dist
- - name: Collected dists
- run: |
- tree dist
- - name: Publish
- env:
- TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
- TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
- run: |
- twine upload dist/*
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/charset_normalizer-3.1.0/.github/workflows/run-tests.yml
new/charset_normalizer-3.2.0/.github/workflows/run-tests.yml
--- old/charset_normalizer-3.1.0/.github/workflows/run-tests.yml
2023-03-06 07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/.github/workflows/run-tests.yml
1970-01-01 01:00:00.000000000 +0100
@@ -1,33 +0,0 @@
-name: Tests
-
-on: [push, pull_request]
-
-jobs:
- tests:
- runs-on: ${{ matrix.os }}
-
- strategy:
- fail-fast: false
- matrix:
- python-version: [3.7, 3.8, 3.9, "3.10", "3.11", "3.12-dev"]
- os: [ubuntu-latest]
-
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- pip install -U pip setuptools
- pip install -r dev-requirements.txt
- pip uninstall -y charset-normalizer
- - name: Install the package
- run: |
- python -m build --no-isolation
- pip install ./dist/*.whl
- - name: Run tests
- run: |
- pytest
- - uses: codecov/codecov-action@v3
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/CHANGELOG.md
new/charset_normalizer-3.2.0/CHANGELOG.md
--- old/charset_normalizer-3.1.0/CHANGELOG.md 2023-03-06 07:46:55.000000000
+0100
+++ new/charset_normalizer-3.2.0/CHANGELOG.md 2023-07-07 20:01:05.000000000
+0200
@@ -2,6 +2,20 @@
All notable changes to charset-normalizer will be documented in this file.
This project adheres to [Semantic
Versioning](https://semver.org/spec/v2.0.0.html).
The format is based on [Keep a
Changelog](https://keepachangelog.com/en/1.0.0/).
+## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0)
(2023-06-07)
+
+### Changed
+- Typehint for function `from_path` no longer enforce `PathLike` as its first
argument
+- Minor improvement over the global detection reliability
+
+### Added
+- Introduce function `is_binary` that relies on main capabilities, and
optimized to detect binaries
+- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`,
and `from_fp` that allow a deeper control over the detection (default True)
+- Explicit support for Python 3.12
+
+### Fixed
+- Edge case detection failure where a file would contain 'very-long' camel
cased word (Issue #289)
+
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0)
(2023-03-06)
### Added
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/README.md
new/charset_normalizer-3.2.0/README.md
--- old/charset_normalizer-3.1.0/README.md 2023-03-06 07:46:55.000000000
+0100
+++ new/charset_normalizer-3.2.0/README.md 2023-07-07 20:01:05.000000000
+0200
@@ -1,16 +1,16 @@
-<h1 align="center">Charset Detection, for Everyone ð <a
href="https://twitter.com/intent/tweet?text=The%20Real%20First%20Universal%20Charset%20%26%20Language%20Detector&url=https://www.github.com/Ousret/charset_normalizer&hashtags=python,encoding,chardet,developers"><img
src="https://img.shields.io/twitter/url/http/shields.io.svg?style=social"/></a></h1>
+<h1 align="center">Charset Detection, for Everyone ð</h1>
<p align="center">
<sup>The Real First Universal Charset Detector</sup><br>
<a href="https://pypi.org/project/charset-normalizer">
<img
src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue"
/>
</a>
- <a href="https://codecov.io/gh/Ousret/charset_normalizer">
- <img
src="https://codecov.io/gh/Ousret/charset_normalizer/branch/master/graph/badge.svg"
/>
- </a>
<a href="https://pepy.tech/project/charset-normalizer/">
<img alt="Download Count Total"
src="https://pepy.tech/badge/charset-normalizer/month" />
</a>
+ <a href="https://bestpractices.coreinfrastructure.org/projects/7297">
+ <img
src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
+ </a>
</p>
> A library that helps you read text from an unknown charset encoding.<br />
> Motivated by `chardet`,
@@ -33,8 +33,8 @@
| `Native Python` | â
| â
| â
|
| `Detect spoken language` | â
| â
| N/A
|
| `UnicodeDecodeError Safety` | â
| â
| â
|
-| `Whl Size` | 193.6
kB | 39.5 kB
| ~200 kB
|
-| `Supported Encoding` | 33
| :tada:
[90](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings)
| 40 |
+| `Whl Size` | 193.6
kB | 40 kB
| ~200 kB
|
+| `Supported Encoding` | 33
| ð
[90](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings)
| 40 |
<p align="center">
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text"
width="226"/><img
src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif"
alt="Cat Reading Text" width="200"/>
@@ -42,10 +42,6 @@
*\*\* : They are clearly using specific code for a specific encoding even if
covering most of used one*<br>
Did you got there because of the logs? See
[https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html](https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html)
-## â Your support
-
-*Fork, test-it, star-it, submit your ideas! We do listen.*
-
## â¡ Performance
This package offer better performance than its counterpart Chardet. Here are
some numbers.
@@ -70,7 +66,8 @@
## ⨠Installation
-Using PyPi for latest stable
+Using pip:
+
```sh
pip install charset-normalizer -U
```
@@ -115,7 +112,7 @@
normalizer ./data/sample.1.fr.srt
```
-:tada: Since version 1.4.0 the CLI produce easily usable stdout result in JSON
format.
+ð Since version 1.4.0 the CLI produce easily usable stdout result in JSON
format.
```json
{
@@ -229,7 +226,7 @@
## ð¼ For Enterprise
Professional support for charset-normalizer is available as part of the
[Tidelift
-Subscription][1]. Tidelift gives software development teams a single source
for
+Subscription][1]. Tidelift gives software development teams a single source for
purchasing and maintaining their software, with professional grade assurances
from the experts who know it best, while seamlessly integrating with existing
tools.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/bin/performance.py
new/charset_normalizer-3.2.0/bin/performance.py
--- old/charset_normalizer-3.1.0/bin/performance.py 2023-03-06
07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/bin/performance.py 2023-07-07
20:01:05.000000000 +0200
@@ -57,31 +57,35 @@
)
print(" --> Charset-Normalizer: " +
str(charset_normalizer_results[-1]) + "s")
- chardet_avg_delay = mean(chardet_results)
- chardet_99p = calc_percentile(chardet_results, 99)
- chardet_95p = calc_percentile(chardet_results, 95)
- chardet_50p = calc_percentile(chardet_results, 50)
-
- charset_normalizer_avg_delay = mean(charset_normalizer_results)
- charset_normalizer_99p = calc_percentile(charset_normalizer_results, 99)
- charset_normalizer_95p = calc_percentile(charset_normalizer_results, 95)
- charset_normalizer_50p = calc_percentile(charset_normalizer_results, 50)
+ chardet_avg_delay = round(mean(chardet_results) * 1000)
+ chardet_99p = round(calc_percentile(chardet_results, 99) * 1000)
+ chardet_95p = round(calc_percentile(chardet_results, 95) * 1000)
+ chardet_50p = round(calc_percentile(chardet_results, 50) * 1000)
+
+ charset_normalizer_avg_delay = round(mean(charset_normalizer_results) *
1000)
+ charset_normalizer_99p = round(calc_percentile(charset_normalizer_results,
99) * 1000)
+ charset_normalizer_95p = round(calc_percentile(charset_normalizer_results,
95) * 1000)
+ charset_normalizer_50p = round(calc_percentile(charset_normalizer_results,
50) * 1000)
+
+ # mypyc can offer performance ~1ms in the 50p. When eq to 0 assume 1 due
to imprecise nature of this test.
+ if charset_normalizer_50p == 0:
+ charset_normalizer_50p = 1
print("")
print("------------------------------")
print("--> Chardet Conclusions")
- print(" --> Avg: " + str(chardet_avg_delay) + "s")
- print(" --> 99th: " + str(chardet_99p) + "s")
- print(" --> 95th: " + str(chardet_95p) + "s")
- print(" --> 50th: " + str(chardet_50p) + "s")
+ print(" --> Avg: " + str(chardet_avg_delay) + "ms")
+ print(" --> 99th: " + str(chardet_99p) + "ms")
+ print(" --> 95th: " + str(chardet_95p) + "ms")
+ print(" --> 50th: " + str(chardet_50p) + "ms")
print("------------------------------")
print("--> Charset-Normalizer Conclusions")
- print(" --> Avg: " + str(charset_normalizer_avg_delay) + "s")
- print(" --> 99th: " + str(charset_normalizer_99p) + "s")
- print(" --> 95th: " + str(charset_normalizer_95p) + "s")
- print(" --> 50th: " + str(charset_normalizer_50p) + "s")
+ print(" --> Avg: " + str(charset_normalizer_avg_delay) + "ms")
+ print(" --> 99th: " + str(charset_normalizer_99p) + "ms")
+ print(" --> 95th: " + str(charset_normalizer_95p) + "ms")
+ print(" --> 50th: " + str(charset_normalizer_50p) + "ms")
print("------------------------------")
print("--> Charset-Normalizer / Chardet: Performance Сomparison")
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/build-requirements.txt
new/charset_normalizer-3.2.0/build-requirements.txt
--- old/charset_normalizer-3.1.0/build-requirements.txt 2023-03-06
07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/build-requirements.txt 2023-07-07
20:01:05.000000000 +0200
@@ -1,5 +1,5 @@
# in the meantime we migrate to pyproject.toml
# this represent the minimum requirement to build (for the optional speedup)
-mypy==1.0.1
+mypy==1.4.1
build==0.10.0
-wheel==0.38.4
+wheel==0.40.0
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/charset_normalizer-3.1.0/charset_normalizer/__init__.py
new/charset_normalizer-3.2.0/charset_normalizer/__init__.py
--- old/charset_normalizer-3.1.0/charset_normalizer/__init__.py 2023-03-06
07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/charset_normalizer/__init__.py 2023-07-07
20:01:05.000000000 +0200
@@ -21,7 +21,7 @@
"""
import logging
-from .api import from_bytes, from_fp, from_path
+from .api import from_bytes, from_fp, from_path, is_binary
from .legacy import detect
from .models import CharsetMatch, CharsetMatches
from .utils import set_logging_handler
@@ -31,6 +31,7 @@
"from_fp",
"from_path",
"from_bytes",
+ "is_binary",
"detect",
"CharsetMatch",
"CharsetMatches",
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/charset_normalizer/api.py
new/charset_normalizer-3.2.0/charset_normalizer/api.py
--- old/charset_normalizer-3.1.0/charset_normalizer/api.py 2023-03-06
07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/charset_normalizer/api.py 2023-07-07
20:01:05.000000000 +0200
@@ -1,6 +1,6 @@
import logging
from os import PathLike
-from typing import Any, BinaryIO, List, Optional, Set
+from typing import BinaryIO, List, Optional, Set, Union
from .cd import (
coherence_ratio,
@@ -31,7 +31,7 @@
def from_bytes(
- sequences: bytes,
+ sequences: Union[bytes, bytearray],
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.2,
@@ -40,6 +40,7 @@
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
+ enable_fallback: bool = True,
) -> CharsetMatches:
"""
Given a raw bytes sequence, return the best possibles charset usable to
render str objects.
@@ -361,7 +362,8 @@
)
# Preparing those fallbacks in case we got nothing.
if (
- encoding_iana in ["ascii", "utf_8", specified_encoding]
+ enable_fallback
+ and encoding_iana in ["ascii", "utf_8", specified_encoding]
and not lazy_str_hard_failure
):
fallback_entry = CharsetMatch(
@@ -507,6 +509,7 @@
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
+ enable_fallback: bool = True,
) -> CharsetMatches:
"""
Same thing than the function from_bytes but using a file pointer that is
already ready.
@@ -522,11 +525,12 @@
preemptive_behaviour,
explain,
language_threshold,
+ enable_fallback,
)
def from_path(
- path: "PathLike[Any]",
+ path: Union[str, bytes, PathLike], # type: ignore[type-arg]
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
@@ -535,6 +539,7 @@
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
+ enable_fallback: bool = True,
) -> CharsetMatches:
"""
Same thing than the function from_bytes but with one extra step. Opening
and reading given file path in binary mode.
@@ -551,4 +556,71 @@
preemptive_behaviour,
explain,
language_threshold,
+ enable_fallback,
)
+
+
+def is_binary(
+ fp_or_path_or_payload: Union[PathLike, str, BinaryIO, bytes], # type:
ignore[type-arg]
+ steps: int = 5,
+ chunk_size: int = 512,
+ threshold: float = 0.20,
+ cp_isolation: Optional[List[str]] = None,
+ cp_exclusion: Optional[List[str]] = None,
+ preemptive_behaviour: bool = True,
+ explain: bool = False,
+ language_threshold: float = 0.1,
+ enable_fallback: bool = False,
+) -> bool:
+ """
+ Detect if the given input (file, bytes, or path) points to a binary file.
aka. not a string.
+ Based on the same main heuristic algorithms and default kwargs at the sole
exception that fallbacks match
+ are disabled to be stricter around ASCII-compatible but unlikely to be a
string.
+ """
+ if isinstance(fp_or_path_or_payload, (str, PathLike)):
+ guesses = from_path(
+ fp_or_path_or_payload,
+ steps=steps,
+ chunk_size=chunk_size,
+ threshold=threshold,
+ cp_isolation=cp_isolation,
+ cp_exclusion=cp_exclusion,
+ preemptive_behaviour=preemptive_behaviour,
+ explain=explain,
+ language_threshold=language_threshold,
+ enable_fallback=enable_fallback,
+ )
+ elif isinstance(
+ fp_or_path_or_payload,
+ (
+ bytes,
+ bytearray,
+ ),
+ ):
+ guesses = from_bytes(
+ fp_or_path_or_payload,
+ steps=steps,
+ chunk_size=chunk_size,
+ threshold=threshold,
+ cp_isolation=cp_isolation,
+ cp_exclusion=cp_exclusion,
+ preemptive_behaviour=preemptive_behaviour,
+ explain=explain,
+ language_threshold=language_threshold,
+ enable_fallback=enable_fallback,
+ )
+ else:
+ guesses = from_fp(
+ fp_or_path_or_payload,
+ steps=steps,
+ chunk_size=chunk_size,
+ threshold=threshold,
+ cp_isolation=cp_isolation,
+ cp_exclusion=cp_exclusion,
+ preemptive_behaviour=preemptive_behaviour,
+ explain=explain,
+ language_threshold=language_threshold,
+ enable_fallback=enable_fallback,
+ )
+
+ return not guesses
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/charset_normalizer/md.py
new/charset_normalizer-3.2.0/charset_normalizer/md.py
--- old/charset_normalizer-3.1.0/charset_normalizer/md.py 2023-03-06
07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/charset_normalizer/md.py 2023-07-07
20:01:05.000000000 +0200
@@ -294,14 +294,25 @@
if buffer_length >= 4:
if self._buffer_accent_count / buffer_length > 0.34:
self._is_current_word_bad = True
- # Word/Buffer ending with a upper case accentuated letter are
so rare,
+ # Word/Buffer ending with an upper case accentuated letter are
so rare,
# that we will consider them all as suspicious. Same weight as
foreign_long suspicious.
if is_accentuated(self._buffer[-1]) and
self._buffer[-1].isupper():
self._foreign_long_count += 1
self._is_current_word_bad = True
if buffer_length >= 24 and self._foreign_long_watch:
- self._foreign_long_count += 1
- self._is_current_word_bad = True
+ camel_case_dst = [
+ i
+ for c, i in zip(self._buffer, range(0, buffer_length))
+ if c.isupper()
+ ]
+ probable_camel_cased: bool = False
+
+ if camel_case_dst and (len(camel_case_dst) / buffer_length <=
0.3):
+ probable_camel_cased = True
+
+ if not probable_camel_cased:
+ self._foreign_long_count += 1
+ self._is_current_word_bad = True
if self._is_current_word_bad:
self._bad_word_count += 1
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/charset_normalizer/utils.py
new/charset_normalizer-3.2.0/charset_normalizer/utils.py
--- old/charset_normalizer-3.1.0/charset_normalizer/utils.py 2023-03-06
07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/charset_normalizer/utils.py 2023-07-07
20:01:05.000000000 +0200
@@ -120,12 +120,12 @@
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_separator(character: str) -> bool:
- if character.isspace() or character in {"ï½", "+", ",", ";", "<", ">"}:
+ if character.isspace() or character in {"ï½", "+", "<", ">"}:
return True
character_category: str = unicodedata.category(character)
- return "Z" in character_category
+ return "Z" in character_category or character_category in {"Po", "Pd",
"Pc"}
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/charset_normalizer-3.1.0/charset_normalizer/version.py
new/charset_normalizer-3.2.0/charset_normalizer/version.py
--- old/charset_normalizer-3.1.0/charset_normalizer/version.py 2023-03-06
07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/charset_normalizer/version.py 2023-07-07
20:01:05.000000000 +0200
@@ -2,5 +2,5 @@
Expose version
"""
-__version__ = "3.1.0"
+__version__ = "3.2.0"
VERSION = __version__.split(".")
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/dev-requirements.txt
new/charset_normalizer-3.2.0/dev-requirements.txt
--- old/charset_normalizer-3.1.0/dev-requirements.txt 2023-03-06
07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/dev-requirements.txt 2023-07-07
20:01:05.000000000 +0200
@@ -1,13 +1,13 @@
flake8==5.0.4
chardet==5.1.0
isort==5.11.4
-codecov==2.1.12
-pytest-cov==4.0.0
+codecov==2.1.13
+pytest-cov==4.1.0
build==0.10.0
-wheel==0.38.4
+wheel==0.40.0
-black==23.1.0
-mypy==1.0.1
+black==23.3.0
+mypy==1.4.1
Flask==2.2.3
-pytest==7.2.1
-requests==2.28.2
+pytest==7.4.0
+requests==2.31.0
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/docs/api.rst
new/charset_normalizer-3.2.0/docs/api.rst
--- old/charset_normalizer-3.1.0/docs/api.rst 2023-03-06 07:46:55.000000000
+0100
+++ new/charset_normalizer-3.2.0/docs/api.rst 2023-07-07 20:01:05.000000000
+0200
@@ -13,6 +13,7 @@
.. autofunction:: from_bytes
.. autofunction:: from_fp
.. autofunction:: from_path
+.. autofunction:: is_binary
.. autoclass:: charset_normalizer.models.CharsetMatches
:inherited-members:
@@ -100,5 +101,3 @@
.. class:: os.PathLike
-
- Used as a generic way to accept AnyStr for paths.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/docs/index.rst
new/charset_normalizer-3.2.0/docs/index.rst
--- old/charset_normalizer-3.1.0/docs/index.rst 2023-03-06 07:46:55.000000000
+0100
+++ new/charset_normalizer-3.2.0/docs/index.rst 2023-07-07 20:01:05.000000000
+0200
@@ -51,6 +51,7 @@
- Transpose any encoded content to Unicode the best we can.
- Detect spoken language in text.
- Ship with a great CLI.
+- Also, detect binaries.
Start Guide
-----------
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/docs/user/miscellaneous.rst
new/charset_normalizer-3.2.0/docs/user/miscellaneous.rst
--- old/charset_normalizer-3.1.0/docs/user/miscellaneous.rst 2023-03-06
07:46:55.000000000 +0100
+++ new/charset_normalizer-3.2.0/docs/user/miscellaneous.rst 2023-07-07
20:01:05.000000000 +0200
@@ -44,3 +44,21 @@
Then regarding the others log entries, they will be pushed as `Level 5`.
Commonly known as TRACE level, but we do
not register it globally.
+
+
+Detect binaries
+---------------
+
+This package offers a neat way to detect files that can be considered as
'binaries'
+meaning that it is not likely to be a text-file.
+
+ ::
+
+ from charset_normalizer import is_binary
+
+ # It can receive both a path or bytes or even a file pointer.
+ result = is_binary("./my-file.ext")
+
+ # This should print 'True' or 'False'
+ print(result)
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/setup.cfg
new/charset_normalizer-3.2.0/setup.cfg
--- old/charset_normalizer-3.1.0/setup.cfg 2023-03-06 07:46:55.000000000
+0100
+++ new/charset_normalizer-3.2.0/setup.cfg 2023-07-07 20:01:05.000000000
+0200
@@ -8,7 +8,6 @@
license = MIT
author_email = [email protected]
author = Ahmed TAHRI
-python_requires = >=3.7.0
project_urls =
Bug Reports = https://github.com/Ousret/charset_normalizer/issues
Documentation = https://charset-normalizer.readthedocs.io/en/latest
@@ -25,6 +24,7 @@
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
+ Programming Language :: Python :: 3.12
Programming Language :: Python :: Implementation :: PyPy
Topic :: Text Processing :: Linguistic
Topic :: Utilities
@@ -49,6 +49,7 @@
[options]
packages = find:
include_package_data = True
+python_requires = >=3.7.0
[options.package_data]
charset_normalizer = py.typed
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-3.1.0/tests/test_isbinary.py
new/charset_normalizer-3.2.0/tests/test_isbinary.py
--- old/charset_normalizer-3.1.0/tests/test_isbinary.py 1970-01-01
01:00:00.000000000 +0100
+++ new/charset_normalizer-3.2.0/tests/test_isbinary.py 2023-07-07
20:01:05.000000000 +0200
@@ -0,0 +1,28 @@
+import pytest
+import typing
+from io import BytesIO
+from base64 import b64decode
+from charset_normalizer import is_binary
+from os import path, pardir
+
+DIR_PATH = path.join(
+ path.dirname(path.realpath(__file__)),
+ pardir
+)
+
+
[email protected](
+ "raw, expected",
+ [
+ (b'\x00\x5f\x2f\xff'*50, True),
+ (b64decode("R0lGODlhAQABAAAAACw="), True),
+ (BytesIO(b64decode("R0lGODlhAQABAAAAACw=")), True),
+ ('sample-polish.txt', False),
+ ('sample-arabic.txt', False)
+ ]
+)
+def test_isbinary(raw: typing.Union[bytes, typing.BinaryIO, str], expected:
bool) -> None:
+ if isinstance(raw, str):
+ raw = DIR_PATH + "/data/{}".format(raw)
+
+ assert is_binary(raw) is expected