Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package re2 for openSUSE:Factory checked in at 2024-03-03 20:19:25 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/re2 (Old) and /work/SRC/openSUSE:Factory/.re2.new.1770 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "re2" Sun Mar 3 20:19:25 2024 rev:58 rq:1154117 version:MACRO Changes: -------- --- /work/SRC/openSUSE:Factory/re2/re2.changes 2024-02-01 18:04:01.684184253 +0100 +++ /work/SRC/openSUSE:Factory/.re2.new.1770/re2.changes 2024-03-03 20:19:51.875009682 +0100 @@ -1,0 +2,6 @@ +Fri Mar 1 16:20:16 UTC 2024 - Andreas Stieger <andreas.stie...@gmx.de> + +- update to 2024-03-01: + * Fix bugs in Latin-1 handling + +------------------------------------------------------------------- Old: ---- re2-2024-02-01.tar.gz New: ---- re2-2024-03-01.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ re2.spec ++++++ --- /var/tmp/diff_new_pack.bVMbJN/_old 2024-03-03 20:19:52.315025600 +0100 +++ /var/tmp/diff_new_pack.bVMbJN/_new 2024-03-03 20:19:52.315025600 +0100 @@ -17,7 +17,7 @@ # -%global longver 2024-02-01 +%global longver 2024-03-01 %global shortver %(echo %{longver}|sed 's|-||g') %define libname libre2-11 Name: re2 ++++++ re2-2024-02-01.tar.gz -> re2-2024-03-01.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2024-02-01/.github/workflows/ci-bazel.yml new/re2-2024-03-01/.github/workflows/ci-bazel.yml --- old/re2-2024-02-01/.github/workflows/ci-bazel.yml 2024-01-30 22:02:44.000000000 +0100 +++ new/re2-2024-03-01/.github/workflows/ci-bazel.yml 2024-02-29 11:59:13.000000000 +0100 @@ -15,7 +15,8 @@ BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v4 - # TODO(junyer): Use `v2` whenever a new release is tagged. - - uses: bazelbuild/setup-bazelisk@6244971d4f7ba9aca943c2f3ede2bbd813fcca51 + - uses: p0deje/setup-bazel@0.6.0 + with: + bazelisk-version: '1.x' - run: .github/bazel.sh shell: bash diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2024-02-01/.github/workflows/pages.yml new/re2-2024-03-01/.github/workflows/pages.yml --- old/re2-2024-02-01/.github/workflows/pages.yml 1970-01-01 01:00:00.000000000 +0100 +++ new/re2-2024-03-01/.github/workflows/pages.yml 2024-02-29 11:59:13.000000000 +0100 @@ -0,0 +1,41 @@ +name: Pages +on: + workflow_dispatch: +permissions: + contents: read +jobs: + build: + runs-on: ubuntu-latest + container: + image: emscripten/emsdk + # Don't run as root within the container. + # Neither Git nor Bazel appreciates that. + # 1001 is the GitHub Actions runner user. + options: --init --user 1001 + env: + BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # Bazel fails if the username is unknown. + USER: runner + steps: + - uses: actions/checkout@v4 + - uses: p0deje/setup-bazel@0.6.0 + with: + bazelisk-version: '1.x' + - run: app/build.sh + shell: bash + - uses: actions/upload-pages-artifact@v3 + with: + path: app/deploy + deploy: + needs: + - build + permissions: + contents: read + # Needed for Pages deployment. + id-token: write + pages: write + environment: github-pages + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/deploy-pages@v4 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2024-02-01/.github/workflows/python.yml new/re2-2024-03-01/.github/workflows/python.yml --- old/re2-2024-02-01/.github/workflows/python.yml 2024-01-30 22:02:44.000000000 +0100 +++ new/re2-2024-03-01/.github/workflows/python.yml 2024-02-29 11:59:13.000000000 +0100 @@ -35,8 +35,9 @@ # Stash the timestamp for the commit SHA that triggered the workflow. - run: echo "timestamp=$(git log -1 --pretty=%ct)" >> "${GITHUB_ENV}" shell: bash - # TODO(junyer): Use `v2` whenever a new release is tagged. - - uses: bazelbuild/setup-bazelisk@6244971d4f7ba9aca943c2f3ede2bbd813fcca51 + - uses: p0deje/setup-bazel@0.6.0 + with: + bazelisk-version: '1.x' - name: Prepare Python ${{ matrix.ver }} environment run: | "${PYTHON}" -m pip install --upgrade pip @@ -87,8 +88,9 @@ # Stash the timestamp for the commit SHA that triggered the workflow. - run: echo "timestamp=$(git log -1 --pretty=%ct)" >> "${GITHUB_ENV}" shell: bash - # TODO(junyer): Use `v2` whenever a new release is tagged. - - uses: bazelbuild/setup-bazelisk@6244971d4f7ba9aca943c2f3ede2bbd813fcca51 + - uses: p0deje/setup-bazel@0.6.0 + with: + bazelisk-version: '1.x' - uses: actions/setup-python@v5 with: python-version: ${{ matrix.ver }} @@ -137,12 +139,9 @@ # Stash the timestamp for the commit SHA that triggered the workflow. - run: echo "timestamp=$(git log -1 --pretty=%ct)" >> "${GITHUB_ENV}" shell: bash - # Avoid the Chocolatey install of Bazel getting in the way; - # `bazelbuild/setup-bazelisk` doesn't work for some reason. - - run: | - choco uninstall -y bazel - choco install -y bazelisk - shell: bash + - uses: p0deje/setup-bazel@0.6.0 + with: + bazelisk-version: '1.x' # Lowercase the architecture name for `actions/setup-python`. - run: | ARCHITECTURE=${{ matrix.arch.name }} @@ -223,4 +222,4 @@ uses: pypa/gh-action-pypi-publish@release/v1 with: password: ${{ secrets.PYPI_API_TOKEN }} - packages_dir: python/dist + packages-dir: python/dist diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2024-02-01/BUILD.bazel new/re2-2024-03-01/BUILD.bazel --- old/re2-2024-02-01/BUILD.bazel 2024-01-30 22:02:44.000000000 +0100 +++ new/re2-2024-03-01/BUILD.bazel 2024-02-29 11:59:13.000000000 +0100 @@ -76,17 +76,17 @@ }), visibility = ["//visibility:public"], deps = [ - "@com_google_absl//absl/base", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/container:fixed_array", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/container:inlined_vector", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/types:optional", - "@com_google_absl//absl/types:span", + "@abseil-cpp//absl/base", + "@abseil-cpp//absl/base:core_headers", + "@abseil-cpp//absl/container:fixed_array", + "@abseil-cpp//absl/container:flat_hash_map", + "@abseil-cpp//absl/container:flat_hash_set", + "@abseil-cpp//absl/container:inlined_vector", + "@abseil-cpp//absl/strings", + "@abseil-cpp//absl/strings:str_format", + "@abseil-cpp//absl/synchronization", + "@abseil-cpp//absl/types:optional", + "@abseil-cpp//absl/types:span", ], ) @@ -130,11 +130,11 @@ visibility = [":__subpackages__"], deps = [ ":re2", - "@com_google_absl//absl/base", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", + "@abseil-cpp//absl/base", + "@abseil-cpp//absl/base:core_headers", + "@abseil-cpp//absl/flags:flag", + "@abseil-cpp//absl/strings", + "@abseil-cpp//absl/strings:str_format", "@googletest//:gtest", ], ) @@ -145,8 +145,8 @@ srcs = ["re2/testing/charclass_test.cc"], deps = [ ":testing", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/strings:str_format", + "@abseil-cpp//absl/base:core_headers", + "@abseil-cpp//absl/strings:str_format", "@googletest//:gtest", "@googletest//:gtest_main", ], @@ -158,7 +158,7 @@ srcs = ["re2/testing/compile_test.cc"], deps = [ ":testing", - "@com_google_absl//absl/base:core_headers", + "@abseil-cpp//absl/base:core_headers", "@googletest//:gtest", "@googletest//:gtest_main", ], @@ -171,7 +171,7 @@ deps = [ ":re2", ":testing", - "@com_google_absl//absl/base:core_headers", + "@abseil-cpp//absl/base:core_headers", "@googletest//:gtest", "@googletest//:gtest_main", ], @@ -183,7 +183,7 @@ srcs = ["re2/testing/mimics_pcre_test.cc"], deps = [ ":testing", - "@com_google_absl//absl/base:core_headers", + "@abseil-cpp//absl/base:core_headers", "@googletest//:gtest", "@googletest//:gtest_main", ], @@ -195,7 +195,7 @@ srcs = ["re2/testing/parse_test.cc"], deps = [ ":testing", - "@com_google_absl//absl/base:core_headers", + "@abseil-cpp//absl/base:core_headers", "@googletest//:gtest", "@googletest//:gtest_main", ], @@ -208,8 +208,8 @@ deps = [ ":re2", ":testing", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/strings", + "@abseil-cpp//absl/base:core_headers", + "@abseil-cpp//absl/strings", "@googletest//:gtest", "@googletest//:gtest_main", ], @@ -222,7 +222,7 @@ deps = [ ":re2", ":testing", - "@com_google_absl//absl/base:core_headers", + "@abseil-cpp//absl/base:core_headers", "@googletest//:gtest", "@googletest//:gtest_main", ], @@ -235,8 +235,8 @@ deps = [ ":re2", ":testing", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/strings:str_format", + "@abseil-cpp//absl/base:core_headers", + "@abseil-cpp//absl/strings:str_format", "@googletest//:gtest", "@googletest//:gtest_main", ], @@ -259,7 +259,7 @@ srcs = ["re2/testing/required_prefix_test.cc"], deps = [ ":testing", - "@com_google_absl//absl/base:core_headers", + "@abseil-cpp//absl/base:core_headers", "@googletest//:gtest", "@googletest//:gtest_main", ], @@ -271,7 +271,7 @@ srcs = ["re2/testing/search_test.cc"], deps = [ ":testing", - "@com_google_absl//absl/base:core_headers", + "@abseil-cpp//absl/base:core_headers", "@googletest//:gtest", "@googletest//:gtest_main", ], @@ -295,7 +295,7 @@ srcs = ["re2/testing/simplify_test.cc"], deps = [ ":testing", - "@com_google_absl//absl/base:core_headers", + "@abseil-cpp//absl/base:core_headers", "@googletest//:gtest", "@googletest//:gtest_main", ], @@ -319,9 +319,9 @@ deps = [ ":re2", ":testing", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/strings:str_format", + "@abseil-cpp//absl/base:core_headers", + "@abseil-cpp//absl/flags:flag", + "@abseil-cpp//absl/strings:str_format", "@googletest//:gtest", "@googletest//:gtest_main", ], @@ -377,8 +377,8 @@ srcs = ["re2/testing/random_test.cc"], deps = [ ":testing", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/strings:str_format", + "@abseil-cpp//absl/flags:flag", + "@abseil-cpp//absl/strings:str_format", "@googletest//:gtest", "@googletest//:gtest_main", ], @@ -391,10 +391,10 @@ deps = [ ":re2", ":testing", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/strings:str_format", - "@com_google_absl//absl/synchronization", + "@abseil-cpp//absl/container:flat_hash_map", + "@abseil-cpp//absl/flags:flag", + "@abseil-cpp//absl/strings:str_format", + "@abseil-cpp//absl/synchronization", "@google_benchmark//:benchmark_main", ], ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2024-02-01/MODULE.bazel new/re2-2024-03-01/MODULE.bazel --- old/re2-2024-02-01/MODULE.bazel 2024-01-30 22:02:44.000000000 +0100 +++ new/re2-2024-03-01/MODULE.bazel 2024-02-29 11:59:13.000000000 +0100 @@ -6,26 +6,22 @@ module( name = "re2", - version = "2024-02-01", + version = "2024-03-01", compatibility_level = 1, ) bazel_dep(name = "platforms", version = "0.0.8") -bazel_dep(name = "apple_support", version = "1.11.1", repo_name = "build_bazel_apple_support") +bazel_dep(name = "apple_support", version = "1.14.0") bazel_dep(name = "rules_cc", version = "0.0.9") -bazel_dep(name = "abseil-cpp", version = "20240116.0", repo_name = "com_google_absl") -bazel_dep(name = "rules_python", version = "0.29.0") -bazel_dep(name = "pybind11_bazel", version = "2.11.1.bzl.1") +bazel_dep(name = "abseil-cpp", version = "20240116.1") +bazel_dep(name = "rules_python", version = "0.31.0") +bazel_dep(name = "pybind11_bazel", version = "2.11.1.bzl.2") # This is a temporary hack for `x64_x86_windows`. # TODO(junyer): Remove whenever no longer needed. cc_configure = use_extension("@bazel_tools//tools/cpp:cc_configure.bzl", "cc_configure_extension") use_repo(cc_configure, "local_config_cc") -python_configure = use_extension("@pybind11_bazel//:python_configure.bzl", "extension") -python_configure.toolchain(python_version = "3") # ignored when non-root module -use_repo(python_configure, "local_config_python", "pybind11") - # These dependencies will be ignored when the `re2` module is not # the root module (or when `--ignore_dev_dependency` is enabled). bazel_dep(name = "google_benchmark", version = "1.8.3", dev_dependency = True) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2024-02-01/app/build.sh new/re2-2024-03-01/app/build.sh --- old/re2-2024-02-01/app/build.sh 2024-01-30 22:02:44.000000000 +0100 +++ new/re2-2024-03-01/app/build.sh 2024-02-29 11:59:13.000000000 +0100 @@ -4,41 +4,29 @@ SRCDIR=$(readlink --canonicalize $(dirname $0)) DSTDIR=$(mktemp --directory --tmpdir $(basename $0).XXXXXXXXXX) -BAZEL=/tmp/bazel -BAZELISK_RELEASE=v1.17.0 - -if [[ ${UID} -ne 0 ]]; then - if [[ -d deploy ]]; then - echo -e '\033[1;31m' "** The ${PWD}/deploy directory exists! Refusing to clobber it! **" '\033[0m' - exit 1 - fi - mkdir deploy - sudo docker run -i -t --pull always --rm -v ${SRCDIR}/..:/src -v ${PWD}:/dst emscripten/emsdk /src/app/$(basename $0) - ls -l deploy -else - wget -O ${BAZEL} https://github.com/bazelbuild/bazelisk/releases/download/${BAZELISK_RELEASE}/bazelisk-linux-amd64 - chmod +x ${BAZEL} - - cd ${SRCDIR} - # Emscripten doesn't support `-fstack-protector`. - AR=emar CC=emcc \ - ${BAZEL} build --compilation_mode=opt \ - --copt=-fno-stack-protector \ - -- :all - cp ../bazel-bin/app/_re2.js ${DSTDIR} - # Clean up the sundry Bazel output directories. - ${BAZEL} clean --expunge - cp app.ts index.html _re2.d.ts ${DSTDIR} - cp package.json rollup.config.js tsconfig.json ${DSTDIR} - - cd ${DSTDIR} - npm install - npx tsc - npx rollup -c rollup.config.js -d deploy - mv deploy/* /dst/deploy -fi +cd ${SRCDIR} +# Emscripten doesn't support `-fstack-protector`. +AR=emar CC=emcc \ + bazel build --compilation_mode=opt \ + --copt=-fno-stack-protector \ + -- :all +cp ../bazel-bin/app/_re2.js ${DSTDIR} +bazel clean --expunge +cp app.ts index.html _re2.d.ts ${DSTDIR} +cp package.json rollup.config.js tsconfig.json ${DSTDIR} + +cd ${DSTDIR} +npm install +npx tsc +npx rollup -c rollup.config.js -d deploy cd ${SRCDIR} -rm -rf ${DSTDIR} +mkdir deploy +cat >deploy/index.html <<EOF +<html><head><meta http-equiv="refresh" content="0; url=https://github.com/google/re2"></head><body></body></html> +EOF +mkdir deploy/app +cp ${DSTDIR}/deploy/* deploy/app +ls -lR deploy exit 0 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2024-02-01/python/BUILD.bazel new/re2-2024-03-01/python/BUILD.bazel --- old/re2-2024-02-01/python/BUILD.bazel 2024-01-30 22:02:44.000000000 +0100 +++ new/re2-2024-03-01/python/BUILD.bazel 2024-02-29 11:59:13.000000000 +0100 @@ -12,7 +12,7 @@ srcs = ["_re2.cc"], deps = [ "//:re2", - "@com_google_absl//absl/strings", + "@abseil-cpp//absl/strings", ], ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2024-02-01/python/setup.py new/re2-2024-03-01/python/setup.py --- old/re2-2024-02-01/python/setup.py 2024-01-30 22:02:44.000000000 +0100 +++ new/re2-2024-03-01/python/setup.py 2024-02-29 11:59:13.000000000 +0100 @@ -7,6 +7,7 @@ import setuptools.command.build_ext import shutil import sys +import sysconfig long_description = r"""A drop-in replacement for the re module. @@ -48,9 +49,6 @@ if 'GITHUB_ACTIONS' not in os.environ: return super().build_extension(ext) - # For @pybind11_bazel's `python_configure()`. - os.environ['PYTHON_BIN_PATH'] = sys.executable - cmd = ['bazel', 'build'] try: cpu = os.environ['BAZEL_CPU'] @@ -63,8 +61,9 @@ cmd.append(f'--extra_toolchains=@local_config_cc//:cc-toolchain-{cpu}') except KeyError: pass - # Register the local Python toolchain with highest priority. - cmd.append('--extra_toolchains=@local_config_python//:py_toolchain') + # Register the local Python toolchains with highest priority. + self.generate_python_toolchains() + cmd.append('--extra_toolchains=//python/toolchains:all') # Print debug information during toolchain resolution. cmd.append('--toolchain_resolution_debug=.*') cmd += ['--compilation_mode=opt', '--', ':all'] @@ -78,6 +77,88 @@ cmd = ['bazel', 'clean', '--expunge'] self.spawn(cmd) + def generate_python_toolchains(self): + include = sysconfig.get_path('include') + libs = os.path.join(include, '../libs') + + os.makedirs('toolchains') + shutil.copytree(include, 'toolchains/include') + try: + shutil.copytree(libs, 'toolchains/libs') + except FileNotFoundError: + # We must not be running on Windows. :) + pass + + with open('toolchains/BUILD.bazel', 'x') as file: + file.write( + """\ +load("@rules_python//python/cc:py_cc_toolchain.bzl", "py_cc_toolchain") +load("@rules_python//python:py_runtime.bzl", "py_runtime") +load("@rules_python//python:py_runtime_pair.bzl", "py_runtime_pair") + +package(default_visibility = ["//visibility:public"]) + +toolchain( + name = "py", + toolchain = ":py_toolchain", + toolchain_type = "@rules_python//python:toolchain_type", +) + +py_runtime_pair( + name = "py_toolchain", + py3_runtime = ":interpreter", +) + +py_runtime( + name = "interpreter", + interpreter_path = "{interpreter_path}", + interpreter_version_info = {{ + "major": "{major}", + "minor": "{minor}", + }}, + python_version = "PY3", +) + +toolchain( + name = "py_cc", + toolchain = ":py_cc_toolchain", + toolchain_type = "@rules_python//python/cc:toolchain_type", +) + +py_cc_toolchain( + name = "py_cc_toolchain", + headers = ":headers", + libs = ":libraries", + python_version = "{major}.{minor}", +) + +cc_library( + name = "headers", + hdrs = glob(["include/**/*.h"]), + includes = ["include"], + deps = select({{ + "@platforms//os:windows": [":interface_library"], + "//conditions:default": [], + }}), +) + +cc_import( + name = "interface_library", + interface_library = select({{ + "@platforms//os:windows": "libs/python{major}{minor}.lib", + "//conditions:default": None, + }}), + system_provided = True, +) + +# Not actually necessary for our purposes. :) +cc_library( + name = "libraries", +) +""".format(interpreter_path=sys.executable.replace('\\', '/'), + major=sys.version_info.major, + minor=sys.version_info.minor)) + def options(): bdist_wheel = {} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2024-02-01/re2/parse.cc new/re2-2024-03-01/re2/parse.cc --- old/re2-2024-02-01/re2/parse.cc 2024-01-30 22:02:44.000000000 +0100 +++ new/re2-2024-03-01/re2/parse.cc 2024-02-29 11:59:13.000000000 +0100 @@ -338,6 +338,20 @@ } // Add lo-hi to the class, along with their fold-equivalent characters. +static void AddFoldedRangeLatin1(CharClassBuilder* cc, Rune lo, Rune hi) { + while (lo <= hi) { + cc->AddRange(lo, lo); + if ('A' <= lo && lo <= 'Z') { + cc->AddRange(lo - 'A' + 'a', lo - 'A' + 'a'); + } + if ('a' <= lo && lo <= 'z') { + cc->AddRange(lo - 'a' + 'A', lo - 'a' + 'A'); + } + lo++; + } +} + +// Add lo-hi to the class, along with their fold-equivalent characters. // If lo-hi is already in the class, assume that the fold-equivalent // chars are there too, so there's no work to do. static void AddFoldedRange(CharClassBuilder* cc, Rune lo, Rune hi, int depth) { @@ -394,17 +408,26 @@ // Pushes the literal rune r onto the stack. bool Regexp::ParseState::PushLiteral(Rune r) { // Do case folding if needed. - if ((flags_ & FoldCase) && CycleFoldRune(r) != r) { - Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase); - re->ccb_ = new CharClassBuilder; - Rune r1 = r; - do { - if (!(flags_ & NeverNL) || r != '\n') { - re->ccb_->AddRange(r, r); - } - r = CycleFoldRune(r); - } while (r != r1); - return PushRegexp(re); + if (flags_ & FoldCase) { + if (flags_ & Latin1 && (('A' <= r && r <= 'Z') || + ('a' <= r && r <= 'z'))) { + Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase); + re->ccb_ = new CharClassBuilder; + AddFoldedRangeLatin1(re->ccb_, r, r); + return PushRegexp(re); + } + if (!(flags_ & Latin1) && CycleFoldRune(r) != r) { + Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase); + re->ccb_ = new CharClassBuilder; + Rune r1 = r; + do { + if (!(flags_ & NeverNL) || r != '\n') { + re->ccb_->AddRange(r, r); + } + r = CycleFoldRune(r); + } while (r != r1); + return PushRegexp(re); + } } // Exclude newline if applicable. @@ -776,7 +799,8 @@ while (re->op() == kRegexpConcat && re->nsub() > 0) re = re->sub()[0]; - *flags = static_cast<Regexp::ParseFlags>(re->parse_flags_ & Regexp::FoldCase); + *flags = static_cast<Regexp::ParseFlags>(re->parse_flags_ & + (Regexp::FoldCase | Regexp::Latin1)); if (re->op() == kRegexpLiteral) { *nrune = 1; @@ -1175,7 +1199,7 @@ if (re->op() == kRegexpCharClass) { CharClass* cc = re->cc(); for (CharClass::iterator it = cc->begin(); it != cc->end(); ++it) - ccb.AddRange(it->lo, it->hi); + ccb.AddRangeFlags(it->lo, it->hi, re->parse_flags()); } else if (re->op() == kRegexpLiteral) { if (re->parse_flags() & Regexp::FoldCase) { // AddFoldedRange() can terminate prematurely if the character class @@ -1194,7 +1218,7 @@ } re->Decref(); } - Regexp* re = Regexp::NewCharClass(ccb.GetCharClass(), flags); + Regexp* re = Regexp::NewCharClass(ccb.GetCharClass(), flags & ~Regexp::FoldCase); splices->emplace_back(re, sub + start, i - start); } @@ -1622,10 +1646,15 @@ } // If folding case, add fold-equivalent characters too. - if (parse_flags & Regexp::FoldCase) - AddFoldedRange(this, lo, hi, 0); - else + if (parse_flags & Regexp::FoldCase) { + if (parse_flags & Regexp::Latin1) { + AddFoldedRangeLatin1(this, lo, hi); + } else { + AddFoldedRange(this, lo, hi, 0); + } + } else { AddRange(lo, hi); + } } // Look for a group with the given name. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2024-02-01/re2/re2.h new/re2-2024-03-01/re2/re2.h --- old/re2-2024-02-01/re2/re2.h 2024-01-30 22:02:44.000000000 +0100 +++ new/re2-2024-03-01/re2/re2.h 2024-02-29 11:59:13.000000000 +0100 @@ -972,7 +972,7 @@ } // Silence warnings about missing initializers for members of LazyRE2. -#if !defined(__clang__) && defined(__GNUC__) +#if defined(__GNUC__) #pragma GCC diagnostic ignored "-Wmissing-field-initializers" #endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2024-02-01/re2/testing/dump.cc new/re2-2024-03-01/re2/testing/dump.cc --- old/re2-2024-02-01/re2/testing/dump.cc 2024-01-30 22:02:44.000000000 +0100 +++ new/re2-2024-03-01/re2/testing/dump.cc 2024-02-29 11:59:13.000000000 +0100 @@ -96,17 +96,25 @@ break; case kRegexpLiteral: { Rune r = re->rune(); - char buf[UTFmax+1]; - buf[runetochar(buf, &r)] = 0; - s->append(buf); + if (re->parse_flags() & Regexp::Latin1) { + s->push_back(r); + } else { + char buf[UTFmax+1]; + buf[runetochar(buf, &r)] = 0; + s->append(buf); + } break; } case kRegexpLiteralString: for (int i = 0; i < re->nrunes(); i++) { Rune r = re->runes()[i]; - char buf[UTFmax+1]; - buf[runetochar(buf, &r)] = 0; - s->append(buf); + if (re->parse_flags() & Regexp::Latin1) { + s->push_back(r); + } else { + char buf[UTFmax+1]; + buf[runetochar(buf, &r)] = 0; + s->append(buf); + } } break; case kRegexpConcat: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2024-02-01/re2/testing/parse_test.cc new/re2-2024-03-01/re2/testing/parse_test.cc --- old/re2-2024-02-01/re2/testing/parse_test.cc 2024-01-30 22:02:44.000000000 +0100 +++ new/re2-2024-03-01/re2/testing/parse_test.cc 2024-02-29 11:59:13.000000000 +0100 @@ -225,6 +225,29 @@ // Bug in Regexp::ToString() that emitted [^], which // would (obviously) fail to parse when fed back in. { "[\\s\\S]", "cc{0-0x10ffff}" }, + + // As per https://github.com/google/re2/issues/477, + // there were long-standing bugs involving Latin-1. + // Here, we exercise it WITHOUT case folding... + { "\xa5\x64\xd1", "str{\xa5""d\xd1}", Regexp::Latin1 }, + { "\xa5\xd1\x64", "str{\xa5\xd1""d}", Regexp::Latin1 }, + { "\xa5\x64[\xd1\xd2]", "cat{str{\xa5""d}cc{0xd1-0xd2}}", Regexp::Latin1 }, + { "\xa5[\xd1\xd2]\x64", "cat{lit{\xa5}cc{0xd1-0xd2}lit{d}}", Regexp::Latin1 }, + { "\xa5\x64|\xa5\xd1", "cat{lit{\xa5}cc{0x64 0xd1}}", Regexp::Latin1 }, + { "\xa5\xd1|\xa5\x64", "cat{lit{\xa5}cc{0x64 0xd1}}", Regexp::Latin1 }, + { "\xa5\x64|\xa5[\xd1\xd2]", "cat{lit{\xa5}cc{0x64 0xd1-0xd2}}", Regexp::Latin1 }, + { "\xa5[\xd1\xd2]|\xa5\x64", "cat{lit{\xa5}cc{0x64 0xd1-0xd2}}", Regexp::Latin1 }, + // Here, we exercise it WITH case folding... + // 0x64 should fold to 0x44, but neither 0xD1 nor 0xD2 + // should fold to 0xF1 and 0xF2, respectively. + { "\xa5\x64\xd1", "strfold{\xa5""d\xd1}", Regexp::Latin1 | Regexp::FoldCase }, + { "\xa5\xd1\x64", "strfold{\xa5\xd1""d}", Regexp::Latin1 | Regexp::FoldCase }, + { "\xa5\x64[\xd1\xd2]", "cat{strfold{\xa5""d}cc{0xd1-0xd2}}", Regexp::Latin1 | Regexp::FoldCase }, + { "\xa5[\xd1\xd2]\x64", "cat{lit{\xa5}cc{0xd1-0xd2}litfold{d}}", Regexp::Latin1 | Regexp::FoldCase }, + { "\xa5\x64|\xa5\xd1", "cat{lit{\xa5}cc{0x44 0x64 0xd1}}", Regexp::Latin1 | Regexp::FoldCase }, + { "\xa5\xd1|\xa5\x64", "cat{lit{\xa5}cc{0x44 0x64 0xd1}}", Regexp::Latin1 | Regexp::FoldCase }, + { "\xa5\x64|\xa5[\xd1\xd2]", "cat{lit{\xa5}cc{0x44 0x64 0xd1-0xd2}}", Regexp::Latin1 | Regexp::FoldCase }, + { "\xa5[\xd1\xd2]|\xa5\x64", "cat{lit{\xa5}cc{0x44 0x64 0xd1-0xd2}}", Regexp::Latin1 | Regexp::FoldCase }, }; bool RegexpEqualTestingOnly(Regexp* a, Regexp* b) { @@ -492,7 +515,7 @@ // << " t=" << t << " regexp=" << tests[i].regexp; // Test that if we parse the new regexp we get the same structure. - Regexp* nre = Regexp::Parse(t, Regexp::MatchNL | Regexp::PerlX, &status); + Regexp* nre = Regexp::Parse(t, f, &status); ASSERT_TRUE(nre != NULL) << " reparse " << t << " " << status.Text(); std::string ss = nre->Dump(); std::string tt = nre->ToString(); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2024-02-01/re2/testing/re2_test.cc new/re2-2024-03-01/re2/testing/re2_test.cc --- old/re2-2024-02-01/re2/testing/re2_test.cc 2024-01-30 22:02:44.000000000 +0100 +++ new/re2-2024-03-01/re2/testing/re2_test.cc 2024-02-29 11:59:13.000000000 +0100 @@ -1658,4 +1658,23 @@ ASSERT_EQ(m, "") << " got m='" << m << "', want ''"; } +TEST(RE2, Issue477) { + // Regexp::LeadingString didn't output Latin1 into flags. + // In the given pattern, 0xA5 should be factored out, but + // shouldn't lose its Latin1-ness in the process. Because + // that was happening, the prefix for accel was 0xC2 0xA5 + // instead of 0xA5. Note that the former doesn't occur in + // the given input and so replacements weren't occurring. + + const char bytes[] = { + (char)0xa5, (char)0xd1, (char)0xa5, (char)0xd1, + (char)0x61, (char)0x63, (char)0xa5, (char)0x64, + }; + std::string s(bytes, ABSL_ARRAYSIZE(bytes)); + RE2 re("\xa5\xd1|\xa5\x64", RE2::Latin1); + int n = RE2::GlobalReplace(&s, re, ""); + ASSERT_EQ(n, 3); + ASSERT_EQ(s, "\x61\x63"); +} + } // namespace re2 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2024-02-01/util/pcre.cc new/re2-2024-03-01/util/pcre.cc --- old/re2-2024-02-01/util/pcre.cc 2024-01-30 22:02:44.000000000 +0100 +++ new/re2-2024-03-01/util/pcre.cc 2024-02-29 11:59:13.000000000 +0100 @@ -21,7 +21,7 @@ #include "util/pcre.h" // Silence warnings about the wacky formatting in the operator() functions. -#if !defined(__clang__) && defined(__GNUC__) +#if defined(__GNUC__) #pragma GCC diagnostic ignored "-Wmisleading-indentation" #endif