Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package re2 for openSUSE:Factory checked in at 2022-02-06 23:54:12 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/re2 (Old) and /work/SRC/openSUSE:Factory/.re2.new.1898 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "re2" Sun Feb 6 23:54:12 2022 rev:43 rq:951559 version:MACRO Changes: -------- --- /work/SRC/openSUSE:Factory/re2/re2.changes 2021-12-08 22:08:30.922850181 +0100 +++ /work/SRC/openSUSE:Factory/.re2.new.1898/re2.changes 2022-02-06 23:55:22.218380832 +0100 @@ -1,0 +2,8 @@ +Fri Feb 4 11:54:32 UTC 2022 - Callum Farmer <[email protected]> + +- Update to 2022-02-01: + * Address a `-Wunused-but-set-variable' warning from Clang 13.x + * Don't specify the -std flag in Makefile or re2.pc + * Remove a redundant map access + +------------------------------------------------------------------- Old: ---- re2-2021-11-01.tar.gz New: ---- re2-2022-02-01.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ re2.spec ++++++ --- /var/tmp/diff_new_pack.EeLX77/_old 2022-02-06 23:55:22.666377802 +0100 +++ /var/tmp/diff_new_pack.EeLX77/_new 2022-02-06 23:55:22.674377749 +0100 @@ -16,7 +16,7 @@ # -%global longver 2021-11-01 +%global longver 2022-02-01 %global shortver %(echo %{longver}|sed 's|-||g') %define libname libre2-9 Name: re2 ++++++ re2-2021-11-01.tar.gz -> re2-2022-02-01.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2021-11-01/.github/workflows/ci.yml new/re2-2022-02-01/.github/workflows/ci.yml --- old/re2-2021-11-01/.github/workflows/ci.yml 2021-10-28 16:42:34.000000000 +0200 +++ new/re2-2022-02-01/.github/workflows/ci.yml 2022-01-27 21:47:09.000000000 +0100 @@ -3,15 +3,21 @@ push: branches: [main] jobs: - build: - runs-on: ${{ matrix.os }} + build-appleclang: + runs-on: macos-latest strategy: fail-fast: false matrix: - os: [macos-latest, ubuntu-latest] + ver: [11, 14, 17, 20] env: CC: clang CXX: clang++ + # Unlike GCC and upstream Clang, AppleClang still defaults to `-std=c++98` + # for some reason. Also, the macOS image on GitHub Actions provides wildly + # numbered Xcode versions. Thus, rather than varying the compiler version, + # we set the `-std` flag explicitly in order to vary the language version. + # (The other two flags are the default provided for CXXFLAGS in Makefile.) + CXXFLAGS: -O3 -g -std=c++${{ matrix.ver }} steps: - uses: actions/checkout@v2 - run: make && make test @@ -21,27 +27,27 @@ strategy: fail-fast: false matrix: - tag: [9, 10, 11, 12, 13] + ver: [9, 10, 11, 12, 13] env: - CC: clang-${{ matrix.tag }} - CXX: clang++-${{ matrix.tag }} + CC: clang-${{ matrix.ver }} + CXX: clang++-${{ matrix.ver }} steps: - uses: actions/checkout@v2 - - name: Install Clang ${{ matrix.tag }} + - name: Install Clang ${{ matrix.ver }} run: | wget https://apt.llvm.org/llvm.sh chmod +x ./llvm.sh - sudo ./llvm.sh ${{ matrix.tag }} + sudo ./llvm.sh ${{ matrix.ver }} shell: bash - run: make && make test shell: bash build-gcc: runs-on: ubuntu-latest - container: gcc:${{ matrix.tag }} + container: gcc:${{ matrix.ver }} strategy: fail-fast: false matrix: - tag: [4, 5, 6, 7, 8, 9, 10, 11] + ver: [6, 7, 8, 9, 10, 11] env: CC: gcc CXX: g++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2021-11-01/.github/workflows/pr.yml new/re2-2022-02-01/.github/workflows/pr.yml --- old/re2-2021-11-01/.github/workflows/pr.yml 1970-01-01 01:00:00.000000000 +0100 +++ new/re2-2022-02-01/.github/workflows/pr.yml 2022-01-27 21:47:09.000000000 +0100 @@ -0,0 +1,26 @@ +name: PR +on: + pull_request_target: + branches: [main] + types: [opened] +jobs: + close: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/github-script@v5 + with: + script: | + const fs = require('fs'); + console.log(await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: fs.readFileSync('CONTRIBUTING.md', { encoding: 'utf8', }), + })); + console.log(await github.rest.pulls.update({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.issue.number, + state: 'closed', + })); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2021-11-01/CMakeLists.txt new/re2-2022-02-01/CMakeLists.txt --- old/re2-2021-11-01/CMakeLists.txt 2021-10-28 16:42:34.000000000 +0200 +++ new/re2-2022-02-01/CMakeLists.txt 2022-01-27 21:47:09.000000000 +0100 @@ -6,6 +6,7 @@ cmake_minimum_required(VERSION 3.5.1) project(RE2 CXX) +include(CMakePackageConfigHelpers) include(CTest) include(GNUInstallDirs) @@ -154,10 +155,23 @@ install(FILES ${RE2_HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/re2) -install(TARGETS re2 EXPORT re2Config +install(TARGETS re2 EXPORT re2Targets ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) -install(EXPORT re2Config +install(EXPORT re2Targets DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/re2 NAMESPACE re2::) + +configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/re2Config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/re2Config.cmake + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/re2 + ) +write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/re2ConfigVersion.cmake + VERSION ${SONAME}.0.0 + COMPATIBILITY SameMajorVersion + ) + +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/re2Config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/re2ConfigVersion.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/re2) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2021-11-01/CONTRIBUTING.md new/re2-2022-02-01/CONTRIBUTING.md --- old/re2-2021-11-01/CONTRIBUTING.md 2021-10-28 16:42:34.000000000 +0200 +++ new/re2-2022-02-01/CONTRIBUTING.md 2022-01-27 21:47:09.000000000 +0100 @@ -1,2 +1,2 @@ RE2 uses Gerrit instead of GitHub pull requests. -See the [Contributing](https://github.com/google/re2/wiki/Contribute) wiki page. +See the [Contribute](https://github.com/google/re2/wiki/Contribute) wiki page. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2021-11-01/Makefile new/re2-2022-02-01/Makefile --- old/re2-2021-11-01/Makefile 2021-10-28 16:42:34.000000000 +0200 +++ new/re2-2022-02-01/Makefile 2022-01-27 21:47:09.000000000 +0100 @@ -17,7 +17,7 @@ CXXFLAGS?=-O3 -g LDFLAGS?= # required -RE2_CXXFLAGS?=-std=c++11 -pthread -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCICU) $(CCPCRE) +RE2_CXXFLAGS?=-pthread -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCICU) $(CCPCRE) RE2_LDFLAGS?=-pthread $(LDICU) $(LDPCRE) AR?=ar ARFLAGS?=rsc @@ -327,7 +327,7 @@ @echo .PHONY: static-testinstall -static-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS) +static-testinstall: CXXFLAGS:=-pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS) static-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -l:libre2.a $(LDICU) $(LDFLAGS) static-testinstall: @mkdir -p obj @@ -337,21 +337,21 @@ else ifeq ($(shell uname),SunOS) @echo Skipping test for libre2.a on SunOS. else - (cd obj && $(CXX) testinstall.cc -o testinstall $(CXXFLAGS) $(LDFLAGS)) - obj/testinstall + (cd obj && $(CXX) testinstall.cc -o static-testinstall $(CXXFLAGS) $(LDFLAGS)) + obj/static-testinstall endif .PHONY: shared-testinstall -shared-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS) +shared-testinstall: CXXFLAGS:=-pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS) shared-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -lre2 $(LDICU) $(LDFLAGS) shared-testinstall: @mkdir -p obj @cp testinstall.cc obj - (cd obj && $(CXX) testinstall.cc -o testinstall $(CXXFLAGS) $(LDFLAGS)) + (cd obj && $(CXX) testinstall.cc -o shared-testinstall $(CXXFLAGS) $(LDFLAGS)) ifeq ($(shell uname),Darwin) - DYLD_LIBRARY_PATH="$(DESTDIR)$(libdir):$(DYLD_LIBRARY_PATH)" obj/testinstall + DYLD_LIBRARY_PATH="$(DESTDIR)$(libdir):$(DYLD_LIBRARY_PATH)" obj/shared-testinstall else - LD_LIBRARY_PATH="$(DESTDIR)$(libdir):$(LD_LIBRARY_PATH)" obj/testinstall + LD_LIBRARY_PATH="$(DESTDIR)$(libdir):$(LD_LIBRARY_PATH)" obj/shared-testinstall endif .PHONY: benchlog diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2021-11-01/re2/fuzzing/compiler-rt/include/fuzzer/FuzzedDataProvider.h new/re2-2022-02-01/re2/fuzzing/compiler-rt/include/fuzzer/FuzzedDataProvider.h --- old/re2-2021-11-01/re2/fuzzing/compiler-rt/include/fuzzer/FuzzedDataProvider.h 2021-10-28 16:42:34.000000000 +0200 +++ new/re2-2022-02-01/re2/fuzzing/compiler-rt/include/fuzzer/FuzzedDataProvider.h 2022-01-27 21:47:09.000000000 +0100 @@ -14,11 +14,13 @@ #define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_ #include <algorithm> +#include <array> #include <climits> #include <cstddef> #include <cstdint> #include <cstring> #include <initializer_list> +#include <limits> #include <string> #include <type_traits> #include <utility> @@ -34,208 +36,49 @@ : data_ptr_(data), remaining_bytes_(size) {} ~FuzzedDataProvider() = default; - // Returns a std::vector containing |num_bytes| of input data. If fewer than - // |num_bytes| of data remain, returns a shorter std::vector containing all - // of the data that's left. Can be used with any byte sized type, such as - // char, unsigned char, uint8_t, etc. - template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes) { - num_bytes = std::min(num_bytes, remaining_bytes_); - return ConsumeBytes<T>(num_bytes, num_bytes); - } + // See the implementation below (after the class definition) for more verbose + // comments for each of the methods. - // Similar to |ConsumeBytes|, but also appends the terminator value at the end - // of the resulting vector. Useful, when a mutable null-terminated C-string is - // needed, for example. But that is a rare case. Better avoid it, if possible, - // and prefer using |ConsumeBytes| or |ConsumeBytesAsString| methods. + // Methods returning std::vector of bytes. These are the most popular choice + // when splitting fuzzing input into pieces, as every piece is put into a + // separate buffer (i.e. ASan would catch any under-/overflow) and the memory + // will be released automatically. + template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes); template <typename T> - std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes, - T terminator = 0) { - num_bytes = std::min(num_bytes, remaining_bytes_); - std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes); - result.back() = terminator; - return result; - } - - // Returns a std::string containing |num_bytes| of input data. Using this and - // |.c_str()| on the resulting string is the best way to get an immutable - // null-terminated C string. If fewer than |num_bytes| of data remain, returns - // a shorter std::string containing all of the data that's left. - std::string ConsumeBytesAsString(size_t num_bytes) { - static_assert(sizeof(std::string::value_type) == sizeof(uint8_t), - "ConsumeBytesAsString cannot convert the data to a string."); - - num_bytes = std::min(num_bytes, remaining_bytes_); - std::string result( - reinterpret_cast<const std::string::value_type *>(data_ptr_), - num_bytes); - Advance(num_bytes); - return result; - } - - // Returns a number in the range [min, max] by consuming bytes from the - // input data. The value might not be uniformly distributed in the given - // range. If there's no input data left, always returns |min|. |min| must - // be less than or equal to |max|. - template <typename T> T ConsumeIntegralInRange(T min, T max) { - static_assert(std::is_integral<T>::value, "An integral type is required."); - static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type."); - - if (min > max) - abort(); - - // Use the biggest type possible to hold the range and the result. - uint64_t range = static_cast<uint64_t>(max) - min; - uint64_t result = 0; - size_t offset = 0; - - while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 && - remaining_bytes_ != 0) { - // Pull bytes off the end of the seed data. Experimentally, this seems to - // allow the fuzzer to more easily explore the input space. This makes - // sense, since it works by modifying inputs that caused new code to run, - // and this data is often used to encode length of data read by - // |ConsumeBytes|. Separating out read lengths makes it easier modify the - // contents of the data that is actually read. - --remaining_bytes_; - result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_]; - offset += CHAR_BIT; - } + std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes, T terminator = 0); + template <typename T> std::vector<T> ConsumeRemainingBytes(); - // Avoid division by 0, in case |range + 1| results in overflow. - if (range != std::numeric_limits<decltype(range)>::max()) - result = result % (range + 1); + // Methods returning strings. Use only when you need a std::string or a null + // terminated C-string. Otherwise, prefer the methods returning std::vector. + std::string ConsumeBytesAsString(size_t num_bytes); + std::string ConsumeRandomLengthString(size_t max_length); + std::string ConsumeRandomLengthString(); + std::string ConsumeRemainingBytesAsString(); + + // Methods returning integer values. + template <typename T> T ConsumeIntegral(); + template <typename T> T ConsumeIntegralInRange(T min, T max); + + // Methods returning floating point values. + template <typename T> T ConsumeFloatingPoint(); + template <typename T> T ConsumeFloatingPointInRange(T min, T max); + + // 0 <= return value <= 1. + template <typename T> T ConsumeProbability(); - return static_cast<T>(min + result); - } + bool ConsumeBool(); - // Returns a std::string of length from 0 to |max_length|. When it runs out of - // input data, returns what remains of the input. Designed to be more stable - // with respect to a fuzzer inserting characters than just picking a random - // length and then consuming that many bytes with |ConsumeBytes|. - std::string ConsumeRandomLengthString(size_t max_length) { - // Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\" - // followed by anything else to the end of the string. As a result of this - // logic, a fuzzer can insert characters into the string, and the string - // will be lengthened to include those new characters, resulting in a more - // stable fuzzer than picking the length of a string independently from - // picking its contents. - std::string result; - - // Reserve the anticipated capaticity to prevent several reallocations. - result.reserve(std::min(max_length, remaining_bytes_)); - for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) { - char next = ConvertUnsignedToSigned<char>(data_ptr_[0]); - Advance(1); - if (next == '\\' && remaining_bytes_ != 0) { - next = ConvertUnsignedToSigned<char>(data_ptr_[0]); - Advance(1); - if (next != '\\') - break; - } - result += next; - } + // Returns a value chosen from the given enum. + template <typename T> T ConsumeEnum(); - result.shrink_to_fit(); - return result; - } - - // Returns a std::vector containing all remaining bytes of the input data. - template <typename T> std::vector<T> ConsumeRemainingBytes() { - return ConsumeBytes<T>(remaining_bytes_); - } - - // Returns a std::string containing all remaining bytes of the input data. - // Prefer using |ConsumeRemainingBytes| unless you actually need a std::string - // object. - std::string ConsumeRemainingBytesAsString() { - return ConsumeBytesAsString(remaining_bytes_); - } - - // Returns a number in the range [Type's min, Type's max]. The value might - // not be uniformly distributed in the given range. If there's no input data - // left, always returns |min|. - template <typename T> T ConsumeIntegral() { - return ConsumeIntegralInRange(std::numeric_limits<T>::min(), - std::numeric_limits<T>::max()); - } - - // Reads one byte and returns a bool, or false when no data remains. - bool ConsumeBool() { return 1 & ConsumeIntegral<uint8_t>(); } - - // Returns a copy of the value selected from the given fixed-size |array|. + // Returns a value from the given array. + template <typename T, size_t size> T PickValueInArray(const T (&array)[size]); template <typename T, size_t size> - T PickValueInArray(const T (&array)[size]) { - static_assert(size > 0, "The array must be non empty."); - return array[ConsumeIntegralInRange<size_t>(0, size - 1)]; - } - - template <typename T> - T PickValueInArray(std::initializer_list<const T> list) { - // TODO(Dor1s): switch to static_assert once C++14 is allowed. - if (!list.size()) - abort(); - - return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1)); - } - - // Returns an enum value. The enum must start at 0 and be contiguous. It must - // also contain |kMaxValue| aliased to its largest (inclusive) value. Such as: - // enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue }; - template <typename T> T ConsumeEnum() { - static_assert(std::is_enum<T>::value, "|T| must be an enum type."); - return static_cast<T>(ConsumeIntegralInRange<uint32_t>( - 0, static_cast<uint32_t>(T::kMaxValue))); - } - - // Returns a floating point number in the range [0.0, 1.0]. If there's no - // input data left, always returns 0. - template <typename T> T ConsumeProbability() { - static_assert(std::is_floating_point<T>::value, - "A floating point type is required."); - - // Use different integral types for different floating point types in order - // to provide better density of the resulting values. - using IntegralType = - typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t, - uint64_t>::type; - - T result = static_cast<T>(ConsumeIntegral<IntegralType>()); - result /= static_cast<T>(std::numeric_limits<IntegralType>::max()); - return result; - } - - // Returns a floating point value in the range [Type's lowest, Type's max] by - // consuming bytes from the input data. If there's no input data left, always - // returns approximately 0. - template <typename T> T ConsumeFloatingPoint() { - return ConsumeFloatingPointInRange<T>(std::numeric_limits<T>::lowest(), - std::numeric_limits<T>::max()); - } - - // Returns a floating point value in the given range by consuming bytes from - // the input data. If there's no input data left, returns |min|. Note that - // |min| must be less than or equal to |max|. - template <typename T> T ConsumeFloatingPointInRange(T min, T max) { - if (min > max) - abort(); - - T range = .0; - T result = min; - constexpr T zero(.0); - if (max > zero && min < zero && max > min + std::numeric_limits<T>::max()) { - // The diff |max - min| would overflow the given floating point type. Use - // the half of the diff as the range and consume a bool to decide whether - // the result is in the first of the second part of the diff. - range = (max / 2.0) - (min / 2.0); - if (ConsumeBool()) { - result += range; - } - } else { - range = max - min; - } + T PickValueInArray(const std::array<T, size> &array); + template <typename T> T PickValueInArray(std::initializer_list<const T> list); - return result + range * ConsumeProbability<T>(); - } + // Writes data to the given destination and returns number of bytes written. + size_t ConsumeData(void *destination, size_t num_bytes); // Reports the remaining bytes available for fuzzed input. size_t remaining_bytes() { return remaining_bytes_; } @@ -244,62 +87,311 @@ FuzzedDataProvider(const FuzzedDataProvider &) = delete; FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete; - void Advance(size_t num_bytes) { - if (num_bytes > remaining_bytes_) - abort(); + void CopyAndAdvance(void *destination, size_t num_bytes); - data_ptr_ += num_bytes; - remaining_bytes_ -= num_bytes; - } + void Advance(size_t num_bytes); template <typename T> - std::vector<T> ConsumeBytes(size_t size, size_t num_bytes_to_consume) { - static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type."); + std::vector<T> ConsumeBytes(size_t size, size_t num_bytes); - // The point of using the size-based constructor below is to increase the - // odds of having a vector object with capacity being equal to the length. - // That part is always implementation specific, but at least both libc++ and - // libstdc++ allocate the requested number of bytes in that constructor, - // which seems to be a natural choice for other implementations as well. - // To increase the odds even more, we also call |shrink_to_fit| below. - std::vector<T> result(size); - if (size == 0) { - if (num_bytes_to_consume != 0) - abort(); - return result; - } + template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value); - std::memcpy(result.data(), data_ptr_, num_bytes_to_consume); - Advance(num_bytes_to_consume); + const uint8_t *data_ptr_; + size_t remaining_bytes_; +}; - // Even though |shrink_to_fit| is also implementation specific, we expect it - // to provide an additional assurance in case vector's constructor allocated - // a buffer which is larger than the actual amount of data we put inside it. - result.shrink_to_fit(); - return result; +// Returns a std::vector containing |num_bytes| of input data. If fewer than +// |num_bytes| of data remain, returns a shorter std::vector containing all +// of the data that's left. Can be used with any byte sized type, such as +// char, unsigned char, uint8_t, etc. +template <typename T> +std::vector<T> FuzzedDataProvider::ConsumeBytes(size_t num_bytes) { + num_bytes = std::min(num_bytes, remaining_bytes_); + return ConsumeBytes<T>(num_bytes, num_bytes); +} + +// Similar to |ConsumeBytes|, but also appends the terminator value at the end +// of the resulting vector. Useful, when a mutable null-terminated C-string is +// needed, for example. But that is a rare case. Better avoid it, if possible, +// and prefer using |ConsumeBytes| or |ConsumeBytesAsString| methods. +template <typename T> +std::vector<T> FuzzedDataProvider::ConsumeBytesWithTerminator(size_t num_bytes, + T terminator) { + num_bytes = std::min(num_bytes, remaining_bytes_); + std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes); + result.back() = terminator; + return result; +} + +// Returns a std::vector containing all remaining bytes of the input data. +template <typename T> +std::vector<T> FuzzedDataProvider::ConsumeRemainingBytes() { + return ConsumeBytes<T>(remaining_bytes_); +} + +// Returns a std::string containing |num_bytes| of input data. Using this and +// |.c_str()| on the resulting string is the best way to get an immutable +// null-terminated C string. If fewer than |num_bytes| of data remain, returns +// a shorter std::string containing all of the data that's left. +inline std::string FuzzedDataProvider::ConsumeBytesAsString(size_t num_bytes) { + static_assert(sizeof(std::string::value_type) == sizeof(uint8_t), + "ConsumeBytesAsString cannot convert the data to a string."); + + num_bytes = std::min(num_bytes, remaining_bytes_); + std::string result( + reinterpret_cast<const std::string::value_type *>(data_ptr_), num_bytes); + Advance(num_bytes); + return result; +} + +// Returns a std::string of length from 0 to |max_length|. When it runs out of +// input data, returns what remains of the input. Designed to be more stable +// with respect to a fuzzer inserting characters than just picking a random +// length and then consuming that many bytes with |ConsumeBytes|. +inline std::string +FuzzedDataProvider::ConsumeRandomLengthString(size_t max_length) { + // Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\" + // followed by anything else to the end of the string. As a result of this + // logic, a fuzzer can insert characters into the string, and the string + // will be lengthened to include those new characters, resulting in a more + // stable fuzzer than picking the length of a string independently from + // picking its contents. + std::string result; + + // Reserve the anticipated capaticity to prevent several reallocations. + result.reserve(std::min(max_length, remaining_bytes_)); + for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) { + char next = ConvertUnsignedToSigned<char>(data_ptr_[0]); + Advance(1); + if (next == '\\' && remaining_bytes_ != 0) { + next = ConvertUnsignedToSigned<char>(data_ptr_[0]); + Advance(1); + if (next != '\\') + break; + } + result += next; } - template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value) { - static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types."); - static_assert(!std::numeric_limits<TU>::is_signed, - "Source type must be unsigned."); - - // TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream. - if (std::numeric_limits<TS>::is_modulo) - return static_cast<TS>(value); - - // Avoid using implementation-defined unsigned to signer conversions. - // To learn more, see https://stackoverflow.com/questions/13150449. - if (value <= std::numeric_limits<TS>::max()) { - return static_cast<TS>(value); - } else { - constexpr auto TS_min = std::numeric_limits<TS>::min(); - return TS_min + static_cast<char>(value - TS_min); + result.shrink_to_fit(); + return result; +} + +// Returns a std::string of length from 0 to |remaining_bytes_|. +inline std::string FuzzedDataProvider::ConsumeRandomLengthString() { + return ConsumeRandomLengthString(remaining_bytes_); +} + +// Returns a std::string containing all remaining bytes of the input data. +// Prefer using |ConsumeRemainingBytes| unless you actually need a std::string +// object. +inline std::string FuzzedDataProvider::ConsumeRemainingBytesAsString() { + return ConsumeBytesAsString(remaining_bytes_); +} + +// Returns a number in the range [Type's min, Type's max]. The value might +// not be uniformly distributed in the given range. If there's no input data +// left, always returns |min|. +template <typename T> T FuzzedDataProvider::ConsumeIntegral() { + return ConsumeIntegralInRange(std::numeric_limits<T>::min(), + std::numeric_limits<T>::max()); +} + +// Returns a number in the range [min, max] by consuming bytes from the +// input data. The value might not be uniformly distributed in the given +// range. If there's no input data left, always returns |min|. |min| must +// be less than or equal to |max|. +template <typename T> +T FuzzedDataProvider::ConsumeIntegralInRange(T min, T max) { + static_assert(std::is_integral<T>::value, "An integral type is required."); + static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type."); + + if (min > max) + abort(); + + // Use the biggest type possible to hold the range and the result. + uint64_t range = static_cast<uint64_t>(max) - min; + uint64_t result = 0; + size_t offset = 0; + + while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 && + remaining_bytes_ != 0) { + // Pull bytes off the end of the seed data. Experimentally, this seems to + // allow the fuzzer to more easily explore the input space. This makes + // sense, since it works by modifying inputs that caused new code to run, + // and this data is often used to encode length of data read by + // |ConsumeBytes|. Separating out read lengths makes it easier modify the + // contents of the data that is actually read. + --remaining_bytes_; + result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_]; + offset += CHAR_BIT; + } + + // Avoid division by 0, in case |range + 1| results in overflow. + if (range != std::numeric_limits<decltype(range)>::max()) + result = result % (range + 1); + + return static_cast<T>(min + result); +} + +// Returns a floating point value in the range [Type's lowest, Type's max] by +// consuming bytes from the input data. If there's no input data left, always +// returns approximately 0. +template <typename T> T FuzzedDataProvider::ConsumeFloatingPoint() { + return ConsumeFloatingPointInRange<T>(std::numeric_limits<T>::lowest(), + std::numeric_limits<T>::max()); +} + +// Returns a floating point value in the given range by consuming bytes from +// the input data. If there's no input data left, returns |min|. Note that +// |min| must be less than or equal to |max|. +template <typename T> +T FuzzedDataProvider::ConsumeFloatingPointInRange(T min, T max) { + if (min > max) + abort(); + + T range = .0; + T result = min; + constexpr T zero(.0); + if (max > zero && min < zero && max > min + std::numeric_limits<T>::max()) { + // The diff |max - min| would overflow the given floating point type. Use + // the half of the diff as the range and consume a bool to decide whether + // the result is in the first of the second part of the diff. + range = (max / 2.0) - (min / 2.0); + if (ConsumeBool()) { + result += range; } + } else { + range = max - min; } - const uint8_t *data_ptr_; - size_t remaining_bytes_; -}; + return result + range * ConsumeProbability<T>(); +} + +// Returns a floating point number in the range [0.0, 1.0]. If there's no +// input data left, always returns 0. +template <typename T> T FuzzedDataProvider::ConsumeProbability() { + static_assert(std::is_floating_point<T>::value, + "A floating point type is required."); + + // Use different integral types for different floating point types in order + // to provide better density of the resulting values. + using IntegralType = + typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t, + uint64_t>::type; + + T result = static_cast<T>(ConsumeIntegral<IntegralType>()); + result /= static_cast<T>(std::numeric_limits<IntegralType>::max()); + return result; +} + +// Reads one byte and returns a bool, or false when no data remains. +inline bool FuzzedDataProvider::ConsumeBool() { + return 1 & ConsumeIntegral<uint8_t>(); +} + +// Returns an enum value. The enum must start at 0 and be contiguous. It must +// also contain |kMaxValue| aliased to its largest (inclusive) value. Such as: +// enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue }; +template <typename T> T FuzzedDataProvider::ConsumeEnum() { + static_assert(std::is_enum<T>::value, "|T| must be an enum type."); + return static_cast<T>( + ConsumeIntegralInRange<uint32_t>(0, static_cast<uint32_t>(T::kMaxValue))); +} + +// Returns a copy of the value selected from the given fixed-size |array|. +template <typename T, size_t size> +T FuzzedDataProvider::PickValueInArray(const T (&array)[size]) { + static_assert(size > 0, "The array must be non empty."); + return array[ConsumeIntegralInRange<size_t>(0, size - 1)]; +} + +template <typename T, size_t size> +T FuzzedDataProvider::PickValueInArray(const std::array<T, size> &array) { + static_assert(size > 0, "The array must be non empty."); + return array[ConsumeIntegralInRange<size_t>(0, size - 1)]; +} + +template <typename T> +T FuzzedDataProvider::PickValueInArray(std::initializer_list<const T> list) { + // TODO(Dor1s): switch to static_assert once C++14 is allowed. + if (!list.size()) + abort(); + + return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1)); +} + +// Writes |num_bytes| of input data to the given destination pointer. If there +// is not enough data left, writes all remaining bytes. Return value is the +// number of bytes written. +// In general, it's better to avoid using this function, but it may be useful +// in cases when it's necessary to fill a certain buffer or object with +// fuzzing data. +inline size_t FuzzedDataProvider::ConsumeData(void *destination, + size_t num_bytes) { + num_bytes = std::min(num_bytes, remaining_bytes_); + CopyAndAdvance(destination, num_bytes); + return num_bytes; +} + +// Private methods. +inline void FuzzedDataProvider::CopyAndAdvance(void *destination, + size_t num_bytes) { + std::memcpy(destination, data_ptr_, num_bytes); + Advance(num_bytes); +} + +inline void FuzzedDataProvider::Advance(size_t num_bytes) { + if (num_bytes > remaining_bytes_) + abort(); + + data_ptr_ += num_bytes; + remaining_bytes_ -= num_bytes; +} + +template <typename T> +std::vector<T> FuzzedDataProvider::ConsumeBytes(size_t size, size_t num_bytes) { + static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type."); + + // The point of using the size-based constructor below is to increase the + // odds of having a vector object with capacity being equal to the length. + // That part is always implementation specific, but at least both libc++ and + // libstdc++ allocate the requested number of bytes in that constructor, + // which seems to be a natural choice for other implementations as well. + // To increase the odds even more, we also call |shrink_to_fit| below. + std::vector<T> result(size); + if (size == 0) { + if (num_bytes != 0) + abort(); + return result; + } + + CopyAndAdvance(result.data(), num_bytes); + + // Even though |shrink_to_fit| is also implementation specific, we expect it + // to provide an additional assurance in case vector's constructor allocated + // a buffer which is larger than the actual amount of data we put inside it. + result.shrink_to_fit(); + return result; +} + +template <typename TS, typename TU> +TS FuzzedDataProvider::ConvertUnsignedToSigned(TU value) { + static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types."); + static_assert(!std::numeric_limits<TU>::is_signed, + "Source type must be unsigned."); + + // TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream. + if (std::numeric_limits<TS>::is_modulo) + return static_cast<TS>(value); + + // Avoid using implementation-defined unsigned to signed conversions. + // To learn more, see https://stackoverflow.com/questions/13150449. + if (value <= std::numeric_limits<TS>::max()) { + return static_cast<TS>(value); + } else { + constexpr auto TS_min = std::numeric_limits<TS>::min(); + return TS_min + static_cast<TS>(value - TS_min); + } +} #endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2021-11-01/re2/make_unicode_casefold.py new/re2-2022-02-01/re2/make_unicode_casefold.py --- old/re2-2021-11-01/re2/make_unicode_casefold.py 2021-10-28 16:42:34.000000000 +0200 +++ new/re2-2022-02-01/re2/make_unicode_casefold.py 2022-01-27 21:47:09.000000000 +0100 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # coding=utf-8 # # Copyright 2008 The RE2 Authors. All Rights Reserved. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2021-11-01/re2/make_unicode_groups.py new/re2-2022-02-01/re2/make_unicode_groups.py --- old/re2-2021-11-01/re2/make_unicode_groups.py 2021-10-28 16:42:34.000000000 +0200 +++ new/re2-2022-02-01/re2/make_unicode_groups.py 2022-01-27 21:47:09.000000000 +0100 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # Copyright 2008 The RE2 Authors. All Rights Reserved. # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2021-11-01/re2/prog.cc new/re2-2022-02-01/re2/prog.cc --- old/re2-2021-11-01/re2/prog.cc 2021-10-28 16:42:34.000000000 +0200 +++ new/re2-2022-02-01/re2/prog.cc 2022-01-27 21:47:09.000000000 +0100 @@ -611,10 +611,13 @@ inst_count_[ip->opcode()]++; } - int total = 0; +#if !defined(NDEBUG) + // Address a `-Wunused-but-set-variable' warning from Clang 13.x. + size_t total = 0; for (int i = 0; i < kNumInst; i++) total += inst_count_[i]; - DCHECK_EQ(total, static_cast<int>(flat.size())); + CHECK_EQ(total, flat.size()); +#endif // Remap start_unanchored and start. if (start_unanchored() == 0) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2021-11-01/re2/re2.h new/re2-2022-02-01/re2/re2.h --- old/re2-2021-11-01/re2/re2.h 2021-10-28 16:42:34.000000000 +0200 +++ new/re2-2022-02-01/re2/re2.h 2022-01-27 21:47:09.000000000 +0100 @@ -971,7 +971,7 @@ // As per https://github.com/google/re2/issues/325, thread_local support in // MinGW seems to be buggy. (FWIW, Abseil folks also avoid it.) #define RE2_HAVE_THREAD_LOCAL -#if (defined(__APPLE__) && !TARGET_OS_OSX) || defined(__MINGW32__) +#if (defined(__APPLE__) && !(defined(TARGET_OS_OSX) && TARGET_OS_OSX)) || defined(__MINGW32__) #undef RE2_HAVE_THREAD_LOCAL #endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2021-11-01/re2/regexp.cc new/re2-2022-02-01/re2/regexp.cc --- old/re2-2021-11-01/re2/regexp.cc 2021-10-28 16:42:34.000000000 +0200 +++ new/re2-2022-02-01/re2/regexp.cc 2022-01-27 21:47:09.000000000 +0100 @@ -585,8 +585,7 @@ // Record first occurrence of each name. // (The rule is that if you have the same name // multiple times, only the leftmost one counts.) - if (map_->find(*re->name()) == map_->end()) - (*map_)[*re->name()] = re->cap(); + map_->insert({*re->name(), re->cap()}); } return ignored; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2021-11-01/re2.pc new/re2-2022-02-01/re2.pc --- old/re2-2021-11-01/re2.pc 2021-10-28 16:42:34.000000000 +0200 +++ new/re2-2022-02-01/re2.pc 2022-01-27 21:47:09.000000000 +0100 @@ -4,5 +4,5 @@ Name: re2 Description: RE2 is a fast, safe, thread-friendly regular expression engine. Version: 0.0.0 -Cflags: -std=c++11 -pthread -I${includedir} +Cflags: -pthread -I${includedir} Libs: -pthread -L${libdir} -lre2 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2021-11-01/re2Config.cmake.in new/re2-2022-02-01/re2Config.cmake.in --- old/re2-2021-11-01/re2Config.cmake.in 1970-01-01 01:00:00.000000000 +0100 +++ new/re2-2022-02-01/re2Config.cmake.in 2022-01-27 21:47:09.000000000 +0100 @@ -0,0 +1,22 @@ +# Copyright 2022 The RE2 Authors. All Rights Reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) + +set_and_check(re2_INCLUDE_DIR ${PACKAGE_PREFIX_DIR}/@CMAKE_INSTALL_INCLUDEDIR@) + +if(UNIX) + set(THREADS_PREFER_PTHREAD_FLAG ON) + find_dependency(Threads REQUIRED) +endif() + +check_required_components(re2) + +if(TARGET re2::re2) + return() +endif() + +include(${CMAKE_CURRENT_LIST_DIR}/re2Targets.cmake)
