https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89118

            Bug ID: 89118
           Summary: Illegal memory access in codecvt::out()
           Product: gcc
           Version: 8.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: libstdc++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: vagran.ast at gmail dot com
  Target Milestone: ---

On Ubuntu 18.04.1 I try to compile and run example from
https://en.cppreference.com/w/cpp/locale/codecvt/out under Valgrind.

#include <iostream>
#include <string>
#include <locale>

int main()
{
    std::locale::global(std::locale("en_US.utf8"));
    auto& f = std::use_facet<std::codecvt<wchar_t, char,
std::mbstate_t>>(std::locale());
    std::wstring internal = L"z\u00df\u6c34\U0001f34c"; // L"zß水🍌"

    // note that the following can be done with wstring_convert
    std::mbstate_t mb{}; // initial shift state
    std::string external(internal.size() * f.max_length(), '\0'); 
    const wchar_t* from_next;
    char* to_next;
    f.out(mb, &internal[0], &internal[internal.size()], from_next,
              &external[0], &external[external.size()], to_next);
    // error checking skipped for brevity
    external.resize(to_next - &external[0]);

    std::cout << "The string in narrow multibyte encoding: " << external <<
'\n';
}

g++ test.cpp
valgrind ./a.out
==2924== Memcheck, a memory error detector
==2924== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==2924== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==2924== Command: ./a.out
==2924== 
==2924== Invalid read of size 16
==2924==    at 0x54AD98D: __wcsnlen_sse4_1 (strlen.S:117)
==2924==    by 0x549B458: wcsnrtombs (wcsnrtombs.c:58)
==2924==    by 0x4EEBD9D: std::codecvt<wchar_t, char,
__mbstate_t>::do_out(__mbstate_t&, wchar_t const*, wchar_t const*, wchar_t
const*&, char*, char*, char*&) const (in
/usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25)
==2924==    by 0x1095FF: std::__codecvt_abstract_base<wchar_t, char,
__mbstate_t>::out(__mbstate_t&, wchar_t const*, wchar_t const*, wchar_t
const*&, char*, char*, char*&) const (in /home/artyom/tmp/a.out)
==2924==    by 0x1093EC: main (in /home/artyom/tmp/a.out)
==2924==  Address 0x5b84530 is 12 bytes after a block of size 20 alloc'd
==2924==    at 0x4C3017F: operator new(unsigned long) (in
/usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==2924==    by 0x4F7757C: void std::__cxx11::basic_string<wchar_t,
std::char_traits<wchar_t>, std::allocator<wchar_t> >::_M_construct<wchar_t
const*>(wchar_t const*, wchar_t const*, std::forward_iterator_tag) (in
/usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25)
==2924==    by 0x1092DE: main (in /home/artyom/tmp/a.out)
==2924== 
==2924== Invalid read of size 16
==2924==    at 0x54AD992: __wcsnlen_sse4_1 (strlen.S:117)
==2924==    by 0x549B458: wcsnrtombs (wcsnrtombs.c:58)
==2924==    by 0x4EEBD9D: std::codecvt<wchar_t, char,
__mbstate_t>::do_out(__mbstate_t&, wchar_t const*, wchar_t const*, wchar_t
const*&, char*, char*, char*&) const (in
/usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25)
==2924==    by 0x1095FF: std::__codecvt_abstract_base<wchar_t, char,
__mbstate_t>::out(__mbstate_t&, wchar_t const*, wchar_t const*, wchar_t
const*&, char*, char*, char*&) const (in /home/artyom/tmp/a.out)
==2924==    by 0x1093EC: main (in /home/artyom/tmp/a.out)
==2924==  Address 0x5b84540 is 16 bytes after a block of size 32 in arena
"client"
==2924== 
==2924== Conditional jump or move depends on uninitialised value(s)
==2924==    at 0x54ADA61: __wcsnlen_sse4_1 (strlen.S:161)
==2924==    by 0x549B458: wcsnrtombs (wcsnrtombs.c:58)
==2924==    by 0x4EEBD9D: std::codecvt<wchar_t, char,
__mbstate_t>::do_out(__mbstate_t&, wchar_t const*, wchar_t const*, wchar_t
const*&, char*, char*, char*&) const (in
/usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25)
==2924==    by 0x1095FF: std::__codecvt_abstract_base<wchar_t, char,
__mbstate_t>::out(__mbstate_t&, wchar_t const*, wchar_t const*, wchar_t
const*&, char*, char*, char*&) const (in /home/artyom/tmp/a.out)
==2924==    by 0x1093EC: main (in /home/artyom/tmp/a.out)
==2924== 
The string in narrow multibyte encoding: zß水🍌
==2924== 
==2924== HEAP SUMMARY:
==2924==     in use at exit: 11,919 bytes in 113 blocks
==2924==   total heap usage: 149 allocs, 36 frees, 90,511 bytes allocated
==2924== 
==2924== LEAK SUMMARY:
==2924==    definitely lost: 0 bytes in 0 blocks
==2924==    indirectly lost: 0 bytes in 0 blocks
==2924==      possibly lost: 0 bytes in 0 blocks
==2924==    still reachable: 11,919 bytes in 113 blocks
==2924==         suppressed: 0 bytes in 0 blocks
==2924== Rerun with --leak-check=full to see details of leaked memory
==2924== 
==2924== For counts of detected and suppressed errors, rerun with: -v
==2924== Use --track-origins=yes to see where uninitialised values come from
==2924== ERROR SUMMARY: 3 errors from 3 contexts (suppressed: 0 from 0)

ldd a.out 
        linux-vdso.so.1 (0x00007ffd63be6000)
        libstdc++.so.6 => /usr/lib/x86_64-linux-gnu/libstdc++.so.6
(0x00007f6494552000)
        libgcc_s.so.1 => /lib/x86_64-linux-gnu/libgcc_s.so.1
(0x00007f649433a000)
        libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f6493f49000)
        libm.so.6 => /lib/x86_64-linux-gnu/libm.so.6 (0x00007f6493bab000)
        /lib64/ld-linux-x86-64.so.2 (0x00007f6494ade000)

g++ --version
g++ (Ubuntu 7.3.0-27ubuntu1~18.04) 7.3.0

dpkg -l libstdc++6
ii  libstdc++6:amd64                                8.2.0-1ubuntu2~18.04       
 amd64                        GNU Standard C++ Library v3
ii  libstdc++6:i386                                 8.2.0-1ubuntu2~18.04       
 i386                         GNU Standard C++ Library v3

There is no such problem when converting UTF-32 to UTF-8.

Reply via email to