On 24/11/24 01:42 +0100, Jan Hubicka wrote:
Hi,
another common source of unnecesary throw_bad_alloc calls is
basic_string::_M_create.
basic_string<_CharT, _Traits, _Alloc>::
_M_create(size_type& __capacity, size_type __old_capacity)
{
// _GLIBCXX_RESOLVE_LIB_DEFECTS
// 83. String::npos vs. string::max_size()
if (__capacity > max_size())
std::__throw_length_error(__N("basic_string::_M_create"));
// The below implements an exponential growth policy, necessary to
// meet amortized linear time requirements of the library: see
// http://gcc.gnu.org/ml/libstdc++/2001-07/msg00085.html.
if (__capacity > __old_capacity && __capacity < 2 * __old_capacity)
{
__capacity = 2 * __old_capacity;
// Never allocate a string bigger than max_size.
if (__capacity > max_size())
__capacity = max_size();
}
// NB: Need an array of char_type[__capacity], plus a terminating
// null char_type() element.
return _S_allocate(_M_get_allocator(), __capacity + 1);
}
The code makes it visible that string is never bigger then max_size, however + 1
makes it one byte bigger than maximal size allowed by allocator. I believe
this is by miscounting the 0 at the end of string in max_size function:
- { return (_Alloc_traits::max_size(_M_get_allocator()) - 1) / 2; }
+ { return _Alloc_traits::max_size(_M_get_allocator()) / 2 - 1; }
Ah yes, good catch. That code was copied from ext/sso_string_base.h
where it has been present since r0-76546-g3ad7074772808f in 2006!
Path also adds __builtin_unreachable to express value ranges of capacity and
length.
This makes it possible to optimize out throw when copying strings as in the
testcase.
std::string
test (std::string &a)
{
return a;
}
This now yields the following:
struct string test (struct string & a)
{
size_type __siz;
struct string & _2(D);
char[16] * _4;
char * _5;
char * _7;
char * prephitmp_10;
char * pretmp_11;
char _15;
char * _18;
char * _27;
long unsigned int _32;
<bb 2> [local count: 1073741824]:
_4 = &MEM[(struct basic_string *)_2(D)].D.32970._M_local_buf;
MEM[(struct _Alloc_hider *)_2(D)]._M_p = _4;
_5 = MEM[(const struct basic_string *)a_3(D)]._M_dataplus._M_p;
__siz_6 = MEM[(const struct basic_string *)a_3(D)]._M_string_length;
if (__siz_6 > 15)
goto <bb 3>; [33.00%]
else
goto <bb 4>; [67.00%]
<bb 3> [local count: 354334800]:
_32 = __siz_6 + 1;
_27 = operator new (_32);
MEM[(struct basic_string *)_2(D)]._M_dataplus._M_p = _27;
MEM[(struct basic_string *)_2(D)].D.32970._M_allocated_capacity = __siz_6;
goto <bb 8>; [100.00%]
<bb 4> [local count: 719407024]:
if (__siz_6 == 1)
goto <bb 5>; [34.00%]
else
goto <bb 7>; [66.00%]
<bb 5> [local count: 353024841]:
_15 = MEM[(const char_type &)_5];
MEM[(char_type &)_2(D) + 16] = _15;
<bb 6> [local count: 350316931]:
goto <bb 9>; [100.00%]
<bb 7> [local count: 474808633]:
if (__siz_6 == 0)
goto <bb 6>; [73.78%]
else
goto <bb 8>; [26.22%]
<bb 8> [local count: 334966574]:
# _7 = PHI <_4(7), _27(3)>
__builtin_memcpy (_7, _5, __siz_6);
pretmp_11 = MEM[(const struct basic_string *)_2(D)]._M_dataplus._M_p;
<bb 9> [local count: 1038308344]:
# prephitmp_10 = PHI <_4(6), pretmp_11(8)>
MEM[(struct basic_string *)_2(D)]._M_string_length = __siz_6;
_18 = prephitmp_10 + __siz_6;
MEM[(char_type &)_18] = 0;
return _2(D);
}
.text segment is reduced from 213 bytes to 192 (saving 2 conditionals
and 2 calls to throw)
I find it funny that the code special cases a.size () == 1 to copy single byte
and a.size () == 0 to bypass memcpy call. I would say it is job of middle-end
to optimize memcpy reasonably even for small blocks. Was this based on some
data
showing that many strings have size of 0 and 1 which is not visible to
compiler?
That was added as _M_copy in 2004 by r0-62838-gec61e852bc917b and when
I implemented the new std::__cxx11::basic_string I copied it, only
renaming it to _S_copy because that's our convention for static member
functions.
There's no explanation in the ChangeLog (obviously, because ChangeLog
files are useless at telling you *why* something happened), but this
comment was added:
+ // When __n = 1 way faster than the general multichar
+ // traits_type::copy/move/assign.
Back in 2004 char_traits<char>::copy called memcpy (not
__builtin_memcpy) and maybe it was badly optimized.
I think we can probably drop that "optimization" for __n == 1.
regtested x86_64-linux, OK?
Honza
libstdc++-v3/ChangeLog:
* include/bits/basic_string.h:
gcc/testsuite/ChangeLog:
* g++.dg/tree-ssa/string-1.C: New test.
diff --git a/gcc/testsuite/g++.dg/tree-ssa/string-1.C
b/gcc/testsuite/g++.dg/tree-ssa/string-1.C
new file mode 100644
index 00000000000..d38c23a7628
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/string-1.C
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -std=c++20 -fdump-tree-optimized" } */
+#include <string>
+std::string
+test (std::string &a)
+{
+ return a;
+}
+/* { dg-final { scan-tree-dump-not "throw" "optimized" } } */
diff --git a/libstdc++-v3/include/bits/basic_string.h
b/libstdc++-v3/include/bits/basic_string.h
index f5b320099b1..d754deb28ed 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -1079,20 +1079,30 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
_GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
size_type
size() const _GLIBCXX_NOEXCEPT
- { return _M_string_length; }
+ {
+ size_type __siz = _M_string_length;
Please use __sz for these variables, as that's consistent with names
used elsewhere.
+ if (__siz > max_size ())
+ __builtin_unreachable ();
+ return __siz;
+ }
/// Returns the number of characters in the string, not including any
/// null-termination.
_GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
size_type
length() const _GLIBCXX_NOEXCEPT
- { return _M_string_length; }
+ {
+ size_type __siz = _M_string_length;
+ if (__siz > max_size ())
+ __builtin_unreachable ();
+ return __siz;
Should this just call size() instead of repeating the definition?
+ }
/// Returns the size() of the largest possible %string.
_GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
size_type
max_size() const _GLIBCXX_NOEXCEPT
- { return (_Alloc_traits::max_size(_M_get_allocator()) - 1) / 2; }
+ { return _Alloc_traits::max_size(_M_get_allocator()) / 2 - 1; }
/**
* @brief Resizes the %string to the specified number of characters.
@@ -1184,8 +1194,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
size_type
capacity() const _GLIBCXX_NOEXCEPT
{
- return _M_is_local() ? size_type(_S_local_capacity)
- : _M_allocated_capacity;
+ size_t __siz = _M_is_local() ? size_type(_S_local_capacity)
size_type __sz = ...
+ : _M_allocated_capacity;
+ if (__siz < _S_local_capacity || __siz > max_size () + 1)
Is this +1 correct?
I think it should be simply `__siz > max_size()`
The largest allocation allowed is max_size()+1 but the value stored in
_M_allocated_capacity will be just max_size() without the +1.
+ __builtin_unreachable ();
+ return __siz;
}
/**