Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package ugrep for openSUSE:Factory checked in at 2023-08-21 11:45:49 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/ugrep (Old) and /work/SRC/openSUSE:Factory/.ugrep.new.1766 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "ugrep" Mon Aug 21 11:45:49 2023 rev:50 rq:1104932 version:4.0.1 Changes: -------- --- /work/SRC/openSUSE:Factory/ugrep/ugrep.changes 2023-08-20 00:15:06.699340842 +0200 +++ /work/SRC/openSUSE:Factory/.ugrep.new.1766/ugrep.changes 2023-08-21 11:46:05.823748099 +0200 @@ -1,0 +2,8 @@ +Sun Aug 20 17:52:48 UTC 2023 - Andreas Stieger <andreas.stie...@gmx.de> + +- update to 4.0.1: + * speed up option -l to search compressed files + * enable an important search optimization that was no longer + enabled + +------------------------------------------------------------------- Old: ---- ugrep-4.0.0.tar.gz New: ---- ugrep-4.0.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ ugrep.spec ++++++ --- /var/tmp/diff_new_pack.kRl0K0/_old 2023-08-21 11:46:06.743749861 +0200 +++ /var/tmp/diff_new_pack.kRl0K0/_new 2023-08-21 11:46:06.747749869 +0200 @@ -17,7 +17,7 @@ Name: ugrep -Version: 4.0.0 +Version: 4.0.1 Release: 0 Summary: Universal grep: a feature-rich grep implementation with focus on speed License: BSD-3-Clause ++++++ ugrep-4.0.0.tar.gz -> ugrep-4.0.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ugrep-4.0.0/README.md new/ugrep-4.0.1/README.md --- old/ugrep-4.0.0/README.md 2023-08-18 15:21:29.000000000 +0200 +++ new/ugrep-4.0.1/README.md 2023-08-20 19:09:32.000000000 +0200 @@ -27,7 +27,7 @@ - listen to users to continue to add new and updated features - improve the interactive TUI with a split screen - make ugrep even faster and report on progress, see [my latest article](https://www.genivia.com/ugrep.html) -- share reproducible performance data with the community +- share [reproducible performance data](https://github.com/Genivia/ugrep-benchmarks) with the community - add file indexing to speed up cold search performance, see [ugrep-indexer](https://github.com/Genivia/ugrep-indexer) Overview @@ -472,6 +472,12 @@ Performance comparisons ----------------------- +For an up-to-date comprehensive performance comparison, please see the +[ugrep performance benchmarks](https://github.com/Genivia/ugrep-benchmarks). + +Below is a two-year old performance comparison when ugrep was first released +with performance enhancements. This old comparison is getting outdated. + Performance comparisons should represent what users can expect the performance to be in practice. There should not be any shenanigans to trick the system to perform more optimally or to degrade an important aspect of the search to make Binary files old/ugrep-4.0.0/bin/win32/ugrep.exe and new/ugrep-4.0.1/bin/win32/ugrep.exe differ Binary files old/ugrep-4.0.0/bin/win64/ugrep.exe and new/ugrep-4.0.1/bin/win64/ugrep.exe differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ugrep-4.0.0/lib/matcher.cpp new/ugrep-4.0.1/lib/matcher.cpp --- old/ugrep-4.0.0/lib/matcher.cpp 2023-08-18 15:21:29.000000000 +0200 +++ new/ugrep-4.0.1/lib/matcher.cpp 2023-08-20 19:09:32.000000000 +0200 @@ -2566,7 +2566,7 @@ } } #endif - if (min >= 2 && pat_->npy_ < 16) + if (pat_->npy_ < 16) { if (min >= 4) { @@ -2695,6 +2695,40 @@ } } } + const Pattern::Pred *bit = pat_->bit_; + while (true) + { + const char *s = buf_ + loc; + const char *e = buf_ + end_ - 3; + bool f = true; + while (s < e && + (f = ((bit[static_cast<uint8_t>(*s)] & 1) && + (bit[static_cast<uint8_t>(*++s)] & 1) && + (bit[static_cast<uint8_t>(*++s)] & 1) && + (bit[static_cast<uint8_t>(*++s)] & 1)))) + { + ++s; + } + loc = s - buf_; + if (!f) + { + if (s < e && Pattern::predict_match(pma, s)) + { + ++loc; + continue; + } + set_current(loc); + return true; + } + set_current_match(loc - 1); + (void)peek_more(); + loc = cur_ + 1; + if (loc + 3 >= end_) + { + set_current(loc); + return false; + } + } } if (min >= 4) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ugrep-4.0.0/lib/pattern.cpp new/ugrep-4.0.1/lib/pattern.cpp --- old/ugrep-4.0.0/lib/pattern.cpp 2023-08-18 15:21:29.000000000 +0200 +++ new/ugrep-4.0.1/lib/pattern.cpp 2023-08-20 19:09:32.000000000 +0200 @@ -347,6 +347,12 @@ } } } + // only one position to pin + if (min_ == 1) + { + nlcs = nlcp; + lcs_ = lcp_; + } // number of needles required uint16_t n = nlcp > nlcs ? nlcp : nlcs; // determine if a needle-based search is worthwhile, below or meeting the thresholds diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ugrep-4.0.0/lib/simd_avx2.cpp new/ugrep-4.0.1/lib/simd_avx2.cpp --- old/ugrep-4.0.0/lib/simd_avx2.cpp 2023-08-18 15:21:29.000000000 +0200 +++ new/ugrep-4.0.1/lib/simd_avx2.cpp 2023-08-20 19:09:32.000000000 +0200 @@ -35,6 +35,7 @@ */ #include <reflex/absmatcher.h> +#include <cstddef> namespace reflex { @@ -48,7 +49,7 @@ return 0; size_t n = 0; // align on 32 bytes - while ((reinterpret_cast<ptrdiff_t>(s) & 0x1f) != 0) + while ((reinterpret_cast<std::ptrdiff_t>(s) & 0x1f) != 0) n += (*s++ == '\n'); __m256i vlcn = _mm256_set1_epi8('\n'); while (s <= e) @@ -82,7 +83,7 @@ return 0; size_t n = 0; // align on 16 bytes - while ((reinterpret_cast<ptrdiff_t>(s) & 0x0f) != 0) + while ((reinterpret_cast<std::ptrdiff_t>(s) & 0x0f) != 0) n += (*s++ == '\n'); __m128i vlcn = _mm_set1_epi8('\n'); while (s <= e) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ugrep-4.0.0/lib/simd_avx512bw.cpp new/ugrep-4.0.1/lib/simd_avx512bw.cpp --- old/ugrep-4.0.0/lib/simd_avx512bw.cpp 2023-08-18 15:21:29.000000000 +0200 +++ new/ugrep-4.0.1/lib/simd_avx512bw.cpp 2023-08-20 19:09:32.000000000 +0200 @@ -35,6 +35,7 @@ */ #include <reflex/absmatcher.h> +#include <cstddef> namespace reflex { @@ -48,7 +49,7 @@ return 0; size_t n = 0; // align on 64 bytes - while ((reinterpret_cast<ptrdiff_t>(s) & 0x3f) != 0) + while ((reinterpret_cast<std::ptrdiff_t>(s) & 0x3f) != 0) n += (*s++ == '\n'); __m512i vlcn = _mm512_set1_epi8('\n'); while (s <= e) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ugrep-4.0.0/src/ugrep.cpp new/ugrep-4.0.1/src/ugrep.cpp --- old/ugrep-4.0.0/src/ugrep.cpp 2023-08-18 15:21:29.000000000 +0200 +++ new/ugrep-4.0.1/src/ugrep.cpp 2023-08-20 19:09:32.000000000 +0200 @@ -1066,7 +1066,13 @@ { // write buffer data to the pipe, if the pipe is broken then the receiver is waiting for this thread to join so we drain the rest of the decompressed data if (is_selected && !drain && write(pipe_fd[1], buf, static_cast<size_t>(len)) < len) + { + // if no next decompression thread and decompressing a single file (not zip), then stop immediately + if (ztchain == NULL && zipinfo == NULL) + break; + drain = true; + } // decompress the next block of data into the buffer len = zstream->decompress(buf, maxlen); @@ -7595,10 +7601,15 @@ unsigned int cores = std::thread::hardware_concurrency(); unsigned int concurrency = cores > 2 ? cores : 2; // reduce concurrency by a few for 9+ core CPUs +#if defined(__APPLE__) && defined(HAVE_NEON) + // apple M1 8 or 10 cores should be reduced to 7 if (concurrency >= 10) - concurrency -= concurrency / 5; + concurrency = 7; else - concurrency -= concurrency / 9; + concurrency -= concurrency / 8; +#else + concurrency -= concurrency / 9; +#endif flag_jobs = std::min(concurrency, MAX_JOBS); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ugrep-4.0.0/src/ugrep.hpp new/ugrep-4.0.1/src/ugrep.hpp --- old/ugrep-4.0.0/src/ugrep.hpp 2023-08-18 15:21:29.000000000 +0200 +++ new/ugrep-4.0.1/src/ugrep.hpp 2023-08-20 19:09:32.000000000 +0200 @@ -38,7 +38,7 @@ #define UGREP_HPP // ugrep version -#define UGREP_VERSION "4.0.0" +#define UGREP_VERSION "4.0.1" // disable mmap because mmap is almost always slower than the file reading speed improvements since 3.0.0 #define WITH_NO_MMAP