[PATCH] D27068: Improve string::find
Tangmou added inline comments. Comment at: libcxx/trunk/include/__string:557 + _CharT __f2 = *__first2; + while (true) { +__len1 = __last1 - __first1; Tangmou wrote: > Sorry for the comment after such a long time, but I have a question about > this patch. > > Since we have already calculated `__len1` and ensured that `__len1 < __len2` > before the loop, can we just skip the length calculation and the comparison > in the first loop cycle? And thus we can replace the `while` loop with > `do-while` or keep the `while` loop but delete the length calculation and the > comparison before the loop? Sorry, I made a mistake. Please ignore it! Repository: rL LLVM CHANGES SINCE LAST ACTION https://reviews.llvm.org/D27068/new/ https://reviews.llvm.org/D27068 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
Tangmou added inline comments. Herald added a project: All. Comment at: libcxx/trunk/include/__string:557 + _CharT __f2 = *__first2; + while (true) { +__len1 = __last1 - __first1; Sorry for the comment after such a long time, but I have a question about this patch. Since we have already calculated `__len1` and ensured that `__len1 < __len2` before the loop, can we just skip the length calculation and the comparison in the first loop cycle? And thus we can replace the `while` loop with `do-while` or keep the `while` loop but delete the length calculation and the comparison before the loop? Repository: rL LLVM CHANGES SINCE LAST ACTION https://reviews.llvm.org/D27068/new/ https://reviews.llvm.org/D27068 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
hiraditya added a comment. Just for the record, I ported the patch to gcc/libstdc++ as well: PR66414 optimize std::string::find https://github.com/gcc-mirror/gcc/commit/fc7ebc4b8d9ad7e2891b7f72152e8a2b7543cd65 Repository: rL LLVM https://reviews.llvm.org/D27068 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
sebpop added a comment. In https://reviews.llvm.org/D27068#632685, @EricWF wrote: > Holy crap those improvements are impressive. You're welcome. > This LGTM. I'm assuming @mclow.lists has nothing left to say about this. Thanks for your review and LGTM. I addressed your last comment, added some more comments, tested on x86_64-linux, and committed. Repository: rL LLVM https://reviews.llvm.org/D27068 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
This revision was automatically updated to reflect the committed changes. Closed by commit rL290761: improve performance of string::find (authored by spop). Changed prior to commit: https://reviews.llvm.org/D27068?vs=80134=82731#toc Repository: rL LLVM https://reviews.llvm.org/D27068 Files: libcxx/trunk/benchmarks/string.bench.cpp libcxx/trunk/include/__string Index: libcxx/trunk/include/__string === --- libcxx/trunk/include/__string +++ libcxx/trunk/include/__string @@ -538,19 +538,59 @@ return static_cast<_SizeT>(__r - __p); } +template +inline _LIBCPP_CONSTEXPR_AFTER_CXX11 const _CharT * +__search_substring(const _CharT *__first1, const _CharT *__last1, + const _CharT *__first2, const _CharT *__last2) { + // Take advantage of knowing source and pattern lengths. + // Stop short when source is smaller than pattern. + const ptrdiff_t __len2 = __last2 - __first2; + if (__len2 == 0) +return __first1; + + ptrdiff_t __len1 = __last1 - __first1; + if (__len1 < __len2) +return __last1; + + // First element of __first2 is loop invariant. + _CharT __f2 = *__first2; + while (true) { +__len1 = __last1 - __first1; +// Check whether __first1 still has at least __len2 bytes. +if (__len1 < __len2) + return __last1; + +// Find __f2 the first byte matching in __first1. +__first1 = _Traits::find(__first1, __len1 - __len2 + 1, __f2); +if (__first1 == 0) + return __last1; + +// It is faster to compare from the first byte of __first1 even if we +// already know that it matches the first byte of __first2: this is because +// __first2 is most likely aligned, as it is user's "pattern" string, and +// __first1 + 1 is most likely not aligned, as the match is in the middle of +// the string. +if (_Traits::compare(__first1, __first2, __len2) == 0) + return __first1; + +++__first1; + } +} + template inline _SizeT _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY __str_find(const _CharT *__p, _SizeT __sz, const _CharT* __s, _SizeT __pos, _SizeT __n) _NOEXCEPT { -if (__pos > __sz || __sz - __pos < __n) +if (__pos > __sz) return __npos; -if (__n == 0) + +if (__n == 0) // There is nothing to search, just return __pos. return __pos; -const _CharT* __r = -_VSTD::__search(__p + __pos, __p + __sz, -__s, __s + __n, _Traits::eq, -random_access_iterator_tag(), random_access_iterator_tag()).first; + +const _CharT *__r = __search_substring<_CharT, _Traits>( +__p + __pos, __p + __sz, __s, __s + __n); + if (__r == __p + __sz) return __npos; return static_cast<_SizeT>(__r - __p); Index: libcxx/trunk/benchmarks/string.bench.cpp === --- libcxx/trunk/benchmarks/string.bench.cpp +++ libcxx/trunk/benchmarks/string.bench.cpp @@ -0,0 +1,49 @@ +#include +#include +#include + +#include "benchmark/benchmark_api.h" +#include "GenerateInput.hpp" + +constexpr std::size_t MAX_STRING_LEN = 8 << 14; + +// Benchmark when there is no match. +static void BM_StringFindNoMatch(benchmark::State ) { + std::string s1(state.range(0), '-'); + std::string s2(8, '*'); + while (state.KeepRunning()) +benchmark::DoNotOptimize(s1.find(s2)); +} +BENCHMARK(BM_StringFindNoMatch)->Range(10, MAX_STRING_LEN); + +// Benchmark when the string matches first time. +static void BM_StringFindAllMatch(benchmark::State ) { + std::string s1(MAX_STRING_LEN, '-'); + std::string s2(state.range(0), '-'); + while (state.KeepRunning()) +benchmark::DoNotOptimize(s1.find(s2)); +} +BENCHMARK(BM_StringFindAllMatch)->Range(1, MAX_STRING_LEN); + +// Benchmark when the string matches somewhere in the end. +static void BM_StringFindMatch1(benchmark::State ) { + std::string s1(MAX_STRING_LEN / 2, '*'); + s1 += std::string(state.range(0), '-'); + std::string s2(state.range(0), '-'); + while (state.KeepRunning()) +benchmark::DoNotOptimize(s1.find(s2)); +} +BENCHMARK(BM_StringFindMatch1)->Range(1, MAX_STRING_LEN / 4); + +// Benchmark when the string matches somewhere from middle to the end. +static void BM_StringFindMatch2(benchmark::State ) { + std::string s1(MAX_STRING_LEN / 2, '*'); + s1 += std::string(state.range(0), '-'); + s1 += std::string(state.range(0), '*'); + std::string s2(state.range(0), '-'); + while (state.KeepRunning()) +benchmark::DoNotOptimize(s1.find(s2)); +} +BENCHMARK(BM_StringFindMatch2)->Range(1, MAX_STRING_LEN / 4); + +BENCHMARK_MAIN() ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
EricWF accepted this revision. EricWF added a comment. This revision is now accepted and ready to land. Holy crap those improvements are impressive. This LGTM. I'm assuming @mclow.lists has nothing left to say about this. Comment at: libcxx/include/__string:542 +template +_LIBCPP_CONSTEXPR_AFTER_CXX11 +const _CharT * Please add `inline` to the template so `-fvisibility-inlines-hidden` works with it. https://reviews.llvm.org/D27068 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
hiraditya added a comment. Ping https://reviews.llvm.org/D27068 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
EricWF added a comment. @mclow.lists I'm going to leave this review up to you. I don't see any immediate issues. https://reviews.llvm.org/D27068 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
hiraditya updated this revision to Diff 80134. hiraditya added a comment. Addressed Marshall's comments. https://reviews.llvm.org/D27068 Files: libcxx/benchmarks/string.bench.cpp libcxx/include/__string Index: libcxx/include/__string === --- libcxx/include/__string +++ libcxx/include/__string @@ -538,25 +538,60 @@ return static_cast<_SizeT>(__r - __p); } +template +_LIBCPP_CONSTEXPR_AFTER_CXX11 +const _CharT * +__search_substring(const _CharT *__first1, const _CharT *__last1, + const _CharT * __first2, const _CharT * __last2) +{ +// Take advantage of knowing source and pattern lengths. +// Stop short when source is smaller than pattern. +const ptrdiff_t __len2 = __last2 - __first2; +if (__len2 == 0) +return __first1; +ptrdiff_t __len1 = __last1 - __first1; +if (__len1 < __len2) +return __last1; + +// First element of __first2 is loop invariant. +_CharT __f2 = *__first2; +while (true) +{ + __len1 = __last1 - __first1; + if (__len1 < __len2) + return __last1; + + __first1 = _Traits::find(__first1, __len1 - __len2 + 1, __f2); + if (__first1 == 0) +return __last1; + + // Comparing __first2 as well because the first pointer will be aligned + // and we dont know if __first1+1 is going to be aligned. + if (_Traits::compare(__first1, __first2, __len2) == 0) +return __first1; + +++__first1; +} +} + template inline _SizeT _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY __str_find(const _CharT *__p, _SizeT __sz, const _CharT* __s, _SizeT __pos, _SizeT __n) _NOEXCEPT { -if (__pos > __sz || __sz - __pos < __n) +if (__pos > __sz) return __npos; -if (__n == 0) + +if (__n == 0) // There is nothing to search, just return __pos return __pos; -const _CharT* __r = -_VSTD::__search(__p + __pos, __p + __sz, -__s, __s + __n, _Traits::eq, -random_access_iterator_tag(), random_access_iterator_tag()).first; + +const _CharT* __r = __search_substring<_CharT, _Traits>(__p + __pos, __p + __sz, +__s, __s + __n); if (__r == __p + __sz) return __npos; return static_cast<_SizeT>(__r - __p); } - // __str_rfind template Index: libcxx/benchmarks/string.bench.cpp === --- /dev/null +++ libcxx/benchmarks/string.bench.cpp @@ -0,0 +1,49 @@ +#include +#include +#include + +#include "benchmark/benchmark_api.h" +#include "GenerateInput.hpp" + +constexpr std::size_t MAX_STRING_LEN = 8<<14; + +// Benchmark when there is no match. +static void BM_StringFindNoMatch(benchmark::State& state) { + std::string s1(state.range(0), '-'); + std::string s2(8, '*'); + while (state.KeepRunning()) +benchmark::DoNotOptimize(s1.find(s2)); +} +BENCHMARK(BM_StringFindNoMatch)->Range(10,MAX_STRING_LEN); + +// Benchmark when the string matches first time. +static void BM_StringFindAllMatch(benchmark::State& state) { + std::string s1(MAX_STRING_LEN, '-'); + std::string s2(state.range(0), '-'); + while (state.KeepRunning()) +benchmark::DoNotOptimize(s1.find(s2)); +} +BENCHMARK(BM_StringFindAllMatch)->Range(1,MAX_STRING_LEN); + +// Benchmark when the string matches somewhere in the end. +static void BM_StringFindMatch1(benchmark::State& state) { + std::string s1(MAX_STRING_LEN/2, '*'); + s1 += std::string(state.range(0), '-'); + std::string s2(state.range(0), '-'); + while (state.KeepRunning()) +benchmark::DoNotOptimize(s1.find(s2)); +} +BENCHMARK(BM_StringFindMatch1)->Range(1,MAX_STRING_LEN/4); + +// Benchmark when the string matches somewhere from middle to the end. +static void BM_StringFindMatch2(benchmark::State& state) { + std::string s1(MAX_STRING_LEN/2, '*'); + s1 += std::string(state.range(0), '-'); + s1 += std::string(state.range(0), '*'); + std::string s2(state.range(0), '-'); + while (state.KeepRunning()) +benchmark::DoNotOptimize(s1.find(s2)); +} +BENCHMARK(BM_StringFindMatch2)->Range(1,MAX_STRING_LEN/4); + +BENCHMARK_MAIN() ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
hiraditya added inline comments. Comment at: libcxx/include/__string:549 +// Stop short when source is smaller than pattern. +ptrdiff_t __len2 = __last2 - __first2; +if (__len2 == 0) mclow.lists wrote: > Is there a reason that you calculate the end pointer(s) from `first + len` in > the calling function, then recover the length here? > > Also, `__len1` and `__len2` should be const. Recovering length here is only making the interface uniform, I don't have any specific reason for that. This function is going to be inlined anyways. It started this way because I copied the interface from std::__search in the header file algorithm. len2 can be a const, since len1 changes in the loop, for this to make const I would have to introduce another variable. I'll update the patch. https://reviews.llvm.org/D27068 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
mclow.lists added a comment. This is starting to look good. Comment at: libcxx/include/__string:549 +// Stop short when source is smaller than pattern. +ptrdiff_t __len2 = __last2 - __first2; +if (__len2 == 0) Is there a reason that you calculate the end pointer(s) from `first + len` in the calling function, then recover the length here? Also, `__len1` and `__len2` should be const. https://reviews.llvm.org/D27068 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
hiraditya updated this revision to Diff 80090. hiraditya added a comment. Addressed Tim's comments. https://reviews.llvm.org/D27068 Files: libcxx/benchmarks/string.bench.cpp libcxx/include/__string Index: libcxx/include/__string === --- libcxx/include/__string +++ libcxx/include/__string @@ -538,25 +538,60 @@ return static_cast<_SizeT>(__r - __p); } +template +_LIBCPP_CONSTEXPR_AFTER_CXX11 +const _CharT * +__search_substring(const _CharT *__first1, const _CharT *__last1, + const _CharT * __first2, const _CharT * __last2) +{ +// Take advantage of knowing source and pattern lengths. +// Stop short when source is smaller than pattern. +ptrdiff_t __len2 = __last2 - __first2; +if (__len2 == 0) +return __first1; +ptrdiff_t __len1 = __last1 - __first1; +if (__len1 < __len2) +return __last1; + +// First element of __first2 is loop invariant. +_CharT __f2 = *__first2; +while (true) +{ + __len1 = __last1 - __first1; + if (__len1 < __len2) + return __last1; + + __first1 = _Traits::find(__first1, __len1 - __len2 + 1, __f2); + if (__first1 == 0) +return __last1; + + // Comparing __first2 as well because the first pointer will be aligned + // and we dont know if __first1+1 is going to be aligned. + if (_Traits::compare(__first1, __first2, __len2) == 0) +return __first1; + +++__first1; +} +} + template inline _SizeT _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY __str_find(const _CharT *__p, _SizeT __sz, const _CharT* __s, _SizeT __pos, _SizeT __n) _NOEXCEPT { -if (__pos > __sz || __sz - __pos < __n) +if (__pos > __sz) return __npos; -if (__n == 0) + +if (__n == 0) // There is nothing to search, just return __pos return __pos; -const _CharT* __r = -_VSTD::__search(__p + __pos, __p + __sz, -__s, __s + __n, _Traits::eq, -random_access_iterator_tag(), random_access_iterator_tag()).first; + +const _CharT* __r = __search_substring<_CharT, _Traits>(__p + __pos, __p + __sz, +__s, __s + __n); if (__r == __p + __sz) return __npos; return static_cast<_SizeT>(__r - __p); } - // __str_rfind template Index: libcxx/benchmarks/string.bench.cpp === --- /dev/null +++ libcxx/benchmarks/string.bench.cpp @@ -0,0 +1,49 @@ +#include +#include +#include + +#include "benchmark/benchmark_api.h" +#include "GenerateInput.hpp" + +constexpr std::size_t MAX_STRING_LEN = 8<<14; + +// Benchmark when there is no match. +static void BM_StringFindNoMatch(benchmark::State& state) { + std::string s1(state.range(0), '-'); + std::string s2(8, '*'); + while (state.KeepRunning()) +benchmark::DoNotOptimize(s1.find(s2)); +} +BENCHMARK(BM_StringFindNoMatch)->Range(10,MAX_STRING_LEN); + +// Benchmark when the string matches first time. +static void BM_StringFindAllMatch(benchmark::State& state) { + std::string s1(MAX_STRING_LEN, '-'); + std::string s2(state.range(0), '-'); + while (state.KeepRunning()) +benchmark::DoNotOptimize(s1.find(s2)); +} +BENCHMARK(BM_StringFindAllMatch)->Range(1,MAX_STRING_LEN); + +// Benchmark when the string matches somewhere in the end. +static void BM_StringFindMatch1(benchmark::State& state) { + std::string s1(MAX_STRING_LEN/2, '*'); + s1 += std::string(state.range(0), '-'); + std::string s2(state.range(0), '-'); + while (state.KeepRunning()) +benchmark::DoNotOptimize(s1.find(s2)); +} +BENCHMARK(BM_StringFindMatch1)->Range(1,MAX_STRING_LEN/4); + +// Benchmark when the string matches somewhere from middle to the end. +static void BM_StringFindMatch2(benchmark::State& state) { + std::string s1(MAX_STRING_LEN/2, '*'); + s1 += std::string(state.range(0), '-'); + s1 += std::string(state.range(0), '*'); + std::string s2(state.range(0), '-'); + while (state.KeepRunning()) +benchmark::DoNotOptimize(s1.find(s2)); +} +BENCHMARK(BM_StringFindMatch2)->Range(1,MAX_STRING_LEN/4); + +BENCHMARK_MAIN() ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
tcanens added inline comments. Comment at: include/__string:543 +_LIBCPP_CONSTEXPR_AFTER_CXX11 +_RandomAccessIterator +__search_substring(_RandomAccessIterator __first1, _RandomAccessIterator __last1, A character traits class need only accept pointers, so the name `_RandomAccessIterator` is misleading when you are passing them directly to `_Traits::find`/`_Traits::compare`. Why not just `const _CharT*`? Then you can strip out all the `iterator_traits` circumlocution as well. Comment at: include/__string:548 +using __iterator_traits = iterator_traits<_RandomAccessIterator>; +typedef typename __iterator_traits::difference_type __difference_type; + For example, since `_RandomAccessIterator` must be a pointer type, this can't possibly be anything other than `ptrdiff_t`. Comment at: include/__string:568 + __first1 = _Traits::find(__first1, __len1 - __len2 + 1, __f2); + if (__first1 == _RandomAccessIterator(0)) +return __last1; The function-style cast is redundant. https://reviews.llvm.org/D27068 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
hiraditya updated this revision to Diff 79908. hiraditya added a comment. Updated the benchmark: Without patch: Run on (8 X 3400 MHz CPU s) 2016-12-01 09:20:38 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. Benchmark Time CPU Iterations - BM_StringFindNoMatch/10 4 ns 4 ns 166421326 BM_StringFindNoMatch/6437 ns 37 ns 18754392 BM_StringFindNoMatch/512 268 ns268 ns2586060 BM_StringFindNoMatch/4k 2143 ns 2144 ns 328342 BM_StringFindNoMatch/32k16910 ns 16917 ns 40623 BM_StringFindNoMatch/128k 67577 ns 67602 ns 10138 BM_StringFindAllMatch/1 3 ns 3 ns 265163471 BM_StringFindAllMatch/8 6 ns 6 ns 112582467 BM_StringFindAllMatch/64 36 ns 36 ns 19566457 BM_StringFindAllMatch/512 209 ns209 ns3318893 BM_StringFindAllMatch/4k 1618 ns 1618 ns 432963 BM_StringFindAllMatch/32k 12909 ns 12914 ns 54317 BM_StringFindAllMatch/128k 48342 ns 48361 ns 13922 BM_StringFindMatch1/1 33777 ns 33790 ns 20698 BM_StringFindMatch1/8 33940 ns 33953 ns 20619 BM_StringFindMatch1/64 34038 ns 34051 ns 20571 BM_StringFindMatch1/512 34217 ns 34230 ns 20480 BM_StringFindMatch1/4k 35510 ns 35524 ns 19752 BM_StringFindMatch1/32k 46438 ns 46456 ns 15030 BM_StringFindMatch2/1 33839 ns 33852 ns 20648 BM_StringFindMatch2/8 33950 ns 33963 ns 20594 BM_StringFindMatch2/64 33846 ns 33859 ns 20668 BM_StringFindMatch2/512 34023 ns 34036 ns 20279 BM_StringFindMatch2/4k 35422 ns 35436 ns 19716 BM_StringFindMatch2/32k 46570 ns 46588 ns 15027 With patch: Run on (8 X 3400 MHz CPU s) 2016-12-01 09:15:15 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. Benchmark Time CPU Iterations - BM_StringFindNoMatch/10 5 ns 5 ns 133724346 BM_StringFindNoMatch/64 6 ns 6 ns 119312184 BM_StringFindNoMatch/512 13 ns 13 ns 51539628 BM_StringFindNoMatch/4k77 ns 77 ns8935934 BM_StringFindNoMatch/32k 551 ns551 ns1222808 BM_StringFindNoMatch/128k2684 ns 2685 ns 259957 BM_StringFindAllMatch/1 7 ns 7 ns 98017959 BM_StringFindAllMatch/8 7 ns 7 ns 91466911 BM_StringFindAllMatch/648 ns 8 ns 85707392 BM_StringFindAllMatch/512 20 ns 20 ns 34490895 BM_StringFindAllMatch/4k 93 ns 93 ns7360375 BM_StringFindAllMatch/32k 827 ns828 ns 829944 BM_StringFindAllMatch/128k 3593 ns 3594 ns 195815 BM_StringFindMatch1/11332 ns 1332 ns 516354 BM_StringFindMatch1/81336 ns 1336 ns 495876 BM_StringFindMatch1/64 1338 ns 1339 ns 516656 BM_StringFindMatch1/512 1357 ns 1357 ns 510717 BM_StringFindMatch1/4k 1485 ns 1486 ns 461228 BM_StringFindMatch1/32k 2235 ns 2236 ns 318253 BM_StringFindMatch2/11335 ns 1335 ns 517105 BM_StringFindMatch2/81336 ns 1337 ns 518004 BM_StringFindMatch2/64 1344 ns 1345 ns 511751 BM_StringFindMatch2/512 1361 ns 1361 ns 508150 BM_StringFindMatch2/4k 1611 ns 1611 ns 463388 BM_StringFindMatch2/32k 2187 ns 2187 ns 317532 https://reviews.llvm.org/D27068 Files: benchmarks/string.bench.cpp include/__string Index: include/__string === --- include/__string +++ include/__string @@ -538,25 +538,63 @@ return static_cast<_SizeT>(__r - __p); } +template +_LIBCPP_CONSTEXPR_AFTER_CXX11 +_RandomAccessIterator +__search_substring(_RandomAccessIterator __first1, _RandomAccessIterator __last1, + _RandomAccessIterator __first2, _RandomAccessIterator __last2) +{ +using __iterator_traits = iterator_traits<_RandomAccessIterator>; +typedef typename __iterator_traits::difference_type __difference_type; + +// Take advantage of knowing source and pattern lengths. +// Stop short when source is smaller than pattern. +__difference_type __len2 = __last2 - __first2; +if (__len2 == 0) +return __first1; +__difference_type __len1 = __last1 - __first1; +if (__len1 <
[PATCH] D27068: Improve string::find
sebpop added a comment. The numbers are from an x86_64-linux machine: Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz Overall the patch is a win on the synthetic benchmark. We have also seen this in a proprietary benchmark where it accounted for about 10% speedup. https://reviews.llvm.org/D27068 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
sebpop added a comment. In https://reviews.llvm.org/D27068#606757, @mclow.lists wrote: > Definitely want to see numbers. master: Benchmark Time CPU Iterations --- BM_StringFindPhase1/104 ns 4 ns 161568945 BM_StringFindPhase1/64 28 ns 28 ns 24937005 BM_StringFindPhase1/512 132 ns132 ns5321432 BM_StringFindPhase1/4k 956 ns956 ns 732038 BM_StringFindPhase1/32k7622 ns 7621 ns 92121 BM_StringFindPhase1/128k 30485 ns 30483 ns 22938 BM_StringFindPhase2/1 4 ns 4 ns 191884173 BM_StringFindPhase2/8 7 ns 7 ns 105129099 BM_StringFindPhase2/64 34 ns 34 ns 20582485 BM_StringFindPhase2/512 187 ns187 ns3736654 BM_StringFindPhase2/4k 1415 ns 1414 ns 495342 BM_StringFindPhase2/32k 11221 ns 11220 ns 62393 BM_StringFindPhase2/128k 44952 ns 44949 ns 15595 master + patch: Benchmark Time CPU Iterations --- BM_StringFindPhase1/103 ns 3 ns 204725158 BM_StringFindPhase1/645 ns 5 ns 146268957 BM_StringFindPhase1/512 12 ns 12 ns 60176557 BM_StringFindPhase1/4k 67 ns 67 ns 10508533 BM_StringFindPhase1/32k 503 ns503 ns100 BM_StringFindPhase1/128k 2396 ns 2396 ns 292701 BM_StringFindPhase2/1 6 ns 6 ns 127378641 BM_StringFindPhase2/8 6 ns 6 ns 117407388 BM_StringFindPhase2/647 ns 7 ns 95998016 BM_StringFindPhase2/512 18 ns 18 ns 38135928 BM_StringFindPhase2/4k 83 ns 83 ns8452337 BM_StringFindPhase2/32k 762 ns762 ns 925744 BM_StringFindPhase2/128k 3255 ns 3255 ns 215141 https://reviews.llvm.org/D27068 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
mclow.lists added a comment. Definitely want to see numbers. https://reviews.llvm.org/D27068 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
sebpop added a comment. Please also post the performance numbers from the added benchmarks with and without the change to the lib. Comment at: libcxx/benchmarks/string.bench.cpp:12 +// until the end of s1. +static void BM_StringFindPhase1(benchmark::State& state) { + // Benchmark following the length of s1. Let's call this benchmark "BM_StringFindNoMatch" Comment at: libcxx/benchmarks/string.bench.cpp:21 + +// Benchmark the __phase2 part of string search: we want the strings s1 and s2 +// to match from the first char in s1. Please reword to remove the reference to __phase2. Comment at: libcxx/benchmarks/string.bench.cpp:23 +// to match from the first char in s1. +static void BM_StringFindPhase2(benchmark::State& state) { + std::string s1(MAX_STRING_LEN, '-'); Let's call this benchmark "BM_StringFindAllMatch" https://reviews.llvm.org/D27068 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27068: Improve string::find
hiraditya created this revision. hiraditya added reviewers: mclow.lists, EricWF. hiraditya added subscribers: sebpop, cfe-commits. Improving string::find such that it ultimately gets converted to calls to memchr and memcmp. This is an intermediate patch to see if there is an interest in this optimization. I'm planning to propagate this change to similar functions string::rfind ... etc. Worked in collaboration with Sebastian Pop. https://reviews.llvm.org/D27068 Files: libcxx/benchmarks/string.bench.cpp libcxx/include/__string Index: libcxx/include/__string === --- libcxx/include/__string +++ libcxx/include/__string @@ -538,25 +538,63 @@ return static_cast<_SizeT>(__r - __p); } +template +_LIBCPP_CONSTEXPR_AFTER_CXX11 +_RandomAccessIterator +__search_substring(_RandomAccessIterator __first1, _RandomAccessIterator __last1, + _RandomAccessIterator __first2, _RandomAccessIterator __last2) +{ +using __iterator_traits = iterator_traits<_RandomAccessIterator>; +typedef typename __iterator_traits::difference_type __difference_type; + +// Take advantage of knowing source and pattern lengths. +// Stop short when source is smaller than pattern. +__difference_type __len2 = __last2 - __first2; +if (__len2 == 0) +return __first1; +__difference_type __len1 = __last1 - __first1; +if (__len1 < __len2) +return __last1; + +// First element of __first2 is loop invariant. +typename __iterator_traits::value_type __f2 = *__first2; +while (true) +{ + __len1 = __last1 - __first1; + if (__len1 < __len2) + return __last1; + + __first1 = _Traits::find(__first1, __len1 - __len2 + 1, __f2); + if (__first1 == _RandomAccessIterator(0)) +return __last1; + + // Comparing __first2 as well because the first pointer will be aligned + // and we dont know if __first1+1 is going to be aligned. + if (_Traits::compare(__first1, __first2, __len2) == 0) +return __first1; + +++__first1; +} +} + template inline _SizeT _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY __str_find(const _CharT *__p, _SizeT __sz, const _CharT* __s, _SizeT __pos, _SizeT __n) _NOEXCEPT { -if (__pos > __sz || __sz - __pos < __n) +if (__pos > __sz) return __npos; -if (__n == 0) + +if (__n == 0) // There is nothing to search, just return __pos return __pos; -const _CharT* __r = -_VSTD::__search(__p + __pos, __p + __sz, -__s, __s + __n, _Traits::eq, -random_access_iterator_tag(), random_access_iterator_tag()).first; + +const _CharT* __r = __search_substring(__p + __pos, __p + __sz, + __s, __s + __n); if (__r == __p + __sz) return __npos; return static_cast<_SizeT>(__r - __p); } - // __str_rfind template Index: libcxx/benchmarks/string.bench.cpp === --- /dev/null +++ libcxx/benchmarks/string.bench.cpp @@ -0,0 +1,32 @@ +#include +#include +#include + +#include "benchmark/benchmark_api.h" +#include "GenerateInput.hpp" + +constexpr std::size_t MAX_STRING_LEN = 8<<14; + +// Benchmark the first loop of string search: we do not want the string to match +// until the end of s1. +static void BM_StringFindPhase1(benchmark::State& state) { + // Benchmark following the length of s1. + std::string s1(state.range(0), '-'); + std::string s2(8, '*'); + while (state.KeepRunning()) +benchmark::DoNotOptimize(s1.find(s2)); +} +BENCHMARK(BM_StringFindPhase1)->Range(10,MAX_STRING_LEN); + +// Benchmark the __phase2 part of string search: we want the strings s1 and s2 +// to match from the first char in s1. +static void BM_StringFindPhase2(benchmark::State& state) { + std::string s1(MAX_STRING_LEN, '-'); + // Benchmark following the length of s2. + std::string s2(state.range(0), '-'); + while (state.KeepRunning()) +benchmark::DoNotOptimize(s1.find(s2)); +} +BENCHMARK(BM_StringFindPhase2)->Range(1,MAX_STRING_LEN); + +BENCHMARK_MAIN() ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits