On Thu, Dec 14, 2023 at 6:05 PM Jonathan Wakely <jwak...@redhat.com> wrote:

> Tested x86_64-linux. Pushed to trunk.
>
> -- >8 --
>
> This adds the C++23 std::print functions, which use std::format to write
> to a FILE stream or std::ostream (defaulting to stdout).
>
> The new extern symbols are in the libstdc++exp.a archive, so we aren't
> committing to stable symbols in the DSO yet. There's a UTF-8 validating
> and transcoding function added by this change. That can certainly be
> optimized, but it's internal to libstdc++exp.a so can be tweaked later
> at leisure.
>
> Currently the external symbols work for all targets, but are only
> actually used for Windows, where it's necessary to transcode to UTF-16
> to write to the console.  The standard seems to encourage us to also
> diagnose invalid UTF-8 for non-Windows targets when writing to a
> terminal (and only when writing to a terminal), but I'm reliably
> informed that that wasn't the intent of the wording. Checking for
> invalid UTF-8 sequences only needs to happen for Windows, which is good
> as checking for a terminal requires a call to isatty, and on Linux that
> uses an ioctl syscall, which would make std::print ten times slower!
>
> Testing the std::print behaviour is difficult if it depends on whether
> the output stream is connected to a Windows console or not, as we can't
> (as far as I know) do that non-interactively in DejaGNU. One of the new
> tests uses the internal __write_to_terminal function directly. That
> allows us to verify its UTF-8 error handling on POSIX targets, even
> though that's not actually used by std::print. For Windows, that
> __write_to_terminal function transcodes to UTF-16 but then uses
> WriteConsoleW which fails unless it really is writing to the console.
> That means the 27_io/print/2.cc test FAILs on Windows. The UTF-16
> transcoding has been manually tested using mingw-w64 and Wine, and
> appears to work.
>
> libstdc++-v3/ChangeLog:
>
>         PR libstdc++/107760
>         * include/Makefile.am: Add new header.
>         * include/Makefile.in: Regenerate.
>         * include/bits/version.def (__cpp_lib_print): Define.
>         * include/bits/version.h: Regenerate.
>         * include/std/format (__literal_encoding_is_utf8): New function.
>         (_Seq_sink::view()): New member function.
>         * include/std/ostream (vprintf_nonunicode, vprintf_unicode)
>         (print, println): New functions.
>         * include/std/print: New file.
>         * src/c++23/Makefile.am: Add new source file.
>         * src/c++23/Makefile.in: Regenerate.
>         * src/c++23/print.cc: New file.
>         * testsuite/27_io/basic_ostream/print/1.cc: New test.
>         * testsuite/27_io/print/1.cc: New test.
>         * testsuite/27_io/print/2.cc: New test.
> ---
>  libstdc++-v3/include/Makefile.am              |   1 +
>  libstdc++-v3/include/Makefile.in              |   1 +
>  libstdc++-v3/include/bits/version.def         |   9 +
>  libstdc++-v3/include/bits/version.h           |  29 +-
>  libstdc++-v3/include/std/format               |  53 +++
>  libstdc++-v3/include/std/ostream              | 152 ++++++++
>  libstdc++-v3/include/std/print                | 138 +++++++
>  libstdc++-v3/src/c++23/Makefile.am            |   8 +-
>  libstdc++-v3/src/c++23/Makefile.in            |  10 +-
>  libstdc++-v3/src/c++23/print.cc               | 348 ++++++++++++++++++
>  .../testsuite/27_io/basic_ostream/print/1.cc  | 112 ++++++
>  libstdc++-v3/testsuite/27_io/print/1.cc       |  85 +++++
>  libstdc++-v3/testsuite/27_io/print/2.cc       | 151 ++++++++
>  13 files changed, 1085 insertions(+), 12 deletions(-)
>  create mode 100644 libstdc++-v3/include/std/print
>  create mode 100644 libstdc++-v3/src/c++23/print.cc
>  create mode 100644 libstdc++-v3/testsuite/27_io/basic_ostream/print/1.cc
>  create mode 100644 libstdc++-v3/testsuite/27_io/print/1.cc
>  create mode 100644 libstdc++-v3/testsuite/27_io/print/2.cc
>
> diff --git a/libstdc++-v3/include/Makefile.am
> b/libstdc++-v3/include/Makefile.am
> index 17d9d9cec31..368b92eafbc 100644
> --- a/libstdc++-v3/include/Makefile.am
> +++ b/libstdc++-v3/include/Makefile.am
> @@ -85,6 +85,7 @@ std_headers = \
>         ${std_srcdir}/memory_resource \
>         ${std_srcdir}/mutex \
>         ${std_srcdir}/ostream \
> +       ${std_srcdir}/print \
>         ${std_srcdir}/queue \
>         ${std_srcdir}/random \
>         ${std_srcdir}/regex \
> diff --git a/libstdc++-v3/include/Makefile.in
> b/libstdc++-v3/include/Makefile.in
> index f038af709cc..a31588c0100 100644
> --- a/libstdc++-v3/include/Makefile.in
> +++ b/libstdc++-v3/include/Makefile.in
> @@ -441,6 +441,7 @@ std_freestanding = \
>  @GLIBCXX_HOSTED_TRUE@  ${std_srcdir}/memory_resource \
>  @GLIBCXX_HOSTED_TRUE@  ${std_srcdir}/mutex \
>  @GLIBCXX_HOSTED_TRUE@  ${std_srcdir}/ostream \
> +@GLIBCXX_HOSTED_TRUE@  ${std_srcdir}/print \
>  @GLIBCXX_HOSTED_TRUE@  ${std_srcdir}/queue \
>  @GLIBCXX_HOSTED_TRUE@  ${std_srcdir}/random \
>  @GLIBCXX_HOSTED_TRUE@  ${std_srcdir}/regex \
> diff --git a/libstdc++-v3/include/bits/version.def
> b/libstdc++-v3/include/bits/version.def
> index 38b73ec9b5d..0134a71b3ab 100644
> --- a/libstdc++-v3/include/bits/version.def
> +++ b/libstdc++-v3/include/bits/version.def
> @@ -1645,6 +1645,15 @@ ftms = {
>    };
>  };
>
> +ftms = {
> +  name = print;
> +  values = {
> +    v = 202211;
> +    cxxmin = 23;
> +    hosted = yes;
> +  };
> +};
> +
>  ftms = {
>    name = spanstream;
>    values = {
> diff --git a/libstdc++-v3/include/bits/version.h
> b/libstdc++-v3/include/bits/version.h
> index a201a444925..c28fbe14c15 100644
> --- a/libstdc++-v3/include/bits/version.h
> +++ b/libstdc++-v3/include/bits/version.h
> @@ -2005,6 +2005,17 @@
>  #undef __glibcxx_want_out_ptr
>
>  // from version.def line 1649
> +#if !defined(__cpp_lib_print)
> +# if (__cplusplus >= 202100L) && _GLIBCXX_HOSTED
> +#  define __glibcxx_print 202211L
> +#  if defined(__glibcxx_want_all) || defined(__glibcxx_want_print)
> +#   define __cpp_lib_print 202211L
> +#  endif
> +# endif
> +#endif /* !defined(__cpp_lib_print) && defined(__glibcxx_want_print) */
> +#undef __glibcxx_want_print
> +
> +// from version.def line 1658
>  #if !defined(__cpp_lib_spanstream)
>  # if (__cplusplus >= 202100L) && _GLIBCXX_HOSTED && (__glibcxx_span)
>  #  define __glibcxx_spanstream 202106L
> diff --git a/libstdc++-v3/include/std/format
> b/libstdc++-v3/include/std/format
> index 6204fd0e3c1..1110ba4ab16 100644
> --- a/libstdc++-v3/include/std/format
> +++ b/libstdc++-v3/include/std/format
> @@ -346,6 +346,44 @@ namespace __format
>      _WP_from_arg // Use a formatting argument for width/prec.
>    };
>
> +  consteval bool
> +  __literal_encoding_is_utf8()
> +  {
> +#ifdef __GNUC_EXECUTION_CHARSET_NAME
> +    const char* __enc = __GNUC_EXECUTION_CHARSET_NAME;
> +    // GNU iconv allows "ISO-10646/" prefix (case-insensitive).
> +    if (__enc[0] == 'I' || __enc[0] == 'i')
> +      {
> +       if ((__enc[1] == 'S' || __enc[1] == 's')
> +             && (__enc[2] == 'O' || __enc[2] == 'o'))
> +         {
> +           __enc += 3;
> +           if (string_view(__enc).starts_with("-10646/"))
> +             __enc += 7;
> +           else
> +             return false;
> +         }
> +       else
> +         return false;
> +      }
> +
> +    if ((__enc[0] == 'U' || __enc[0] == 'u')
> +         && (__enc[1] == 'T' || __enc[1] == 't')
> +         && (__enc[2] == 'F' || __enc[2] == 'f'))
> +      {
> +       __enc += 3;
> +       if (__enc[0] == '-')
> +         ++__enc;
> +       if (__enc[0] == '8')
> +         return __enc[1] == '\0' || string_view(__enc + 1) == "//";
> +      }
> +#elif defined __clang_literal_encoding__
> +    // Clang accepts "-fexec-charset=utf-8" but the macro is still
> uppercase.
> +    return string_view(__clang_literal_encoding__) == "UTF-8";
> +#endif
> +    return false;
> +  }
> +
>    template<typename _Context>
>      size_t
>      __int_from_arg(const basic_format_arg<_Context>& __arg);
> @@ -2754,6 +2792,21 @@ namespace __format
>           _Seq_sink::_M_overflow();
>         return std::move(_M_seq);
>        }
> +
> +      // A writable span that views everything written to the sink.
> +      // Will be either a view over _M_seq or the used part of _M_buf.
> +      span<_CharT>
> +      view()
> +      {
> +       auto __s = this->_M_used();
> +       if (_M_seq.size())
> +         {
> +           if (__s.size() != 0)
> +             _Seq_sink::_M_overflow();
> +           return _M_seq;
> +         }
> +       return __s;
> +      }
>      };
>
>    template<typename _CharT, typename _Alloc = allocator<_CharT>>
> diff --git a/libstdc++-v3/include/std/ostream
> b/libstdc++-v3/include/std/ostream
> index 1de1c1bd359..4f1cdc281a3 100644
> --- a/libstdc++-v3/include/std/ostream
> +++ b/libstdc++-v3/include/std/ostream
> @@ -39,6 +39,11 @@
>
>  #include <ios>
>  #include <bits/ostream_insert.h>
> +#if __cplusplus > 202002L
> +# include <format>
> +#endif
> +
> +# define __glibcxx_want_print
>  #include <bits/version.h> // __glibcxx_syncbuf
>
>  namespace std _GLIBCXX_VISIBILITY(default)
> @@ -872,6 +877,153 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      }
>  #endif // __glibcxx_syncbuf
>
> +#if __cpp_lib_print // C++ >= 23
> +
> +  inline void
> +  vprint_nonunicode(ostream& __os, string_view __fmt, format_args __args)
> +  {
> +    ostream::sentry __cerb(__os);
> +    if (__cerb)
> +      {
> +       __format::_Str_sink<char> __buf;
> +       std::vformat_to(__buf.out(), __os.getloc(), __fmt, __args);
> +       auto __out = __buf.view();
> +
> +       __try
> +         {
> +           const streamsize __w = __os.width();
> +           const streamsize __n = __out.size();
> +           if (__w > __n)
> +             {
> +               const bool __left
> +                 = (__os.flags() & ios_base::adjustfield) ==
> ios_base::left;
> +               if (!__left)
> +                 std::__ostream_fill(__os, __w - __n);
> +               if (__os.good())
> +                 std::__ostream_write(__os, __out.data(), __n);
> +               if (__left && __os.good())
> +                 std::__ostream_fill(__os, __w - __n);
> +             }
> +           else
> +             std::__ostream_write(__os, __out.data(), __n);
> +         }
> +       __catch(const __cxxabiv1::__forced_unwind&)
> +         {
> +           __os._M_setstate(ios_base::badbit);
> +           __throw_exception_again;
> +         }
> +       __catch(...)
> +         { __os._M_setstate(ios_base::badbit); }
> +      }
> +  }
> +
> +  inline void
> +  vprint_unicode(ostream& __os, string_view __fmt, format_args __args)
> +  {
> +    ostream::sentry __cerb(__os);
> +    if (__cerb)
> +      {
> +
> +       const streamsize __w = __os.width();
> +       const bool __left
> +         = (__os.flags() & ios_base::adjustfield) == ios_base::left;
>

I'm pretty sure - when I wrote this wording anyway - that the intent was
that it was just an unformatted write at the end. The wording in
[ostream.formatted.print] doesn't use the "determines padding" words of
power that would invoke [ostream.formatted.reqmts]/3.


> +
> +       __format::_Str_sink<char> __buf;
> +       std::vformat_to(__buf.out(), __os.getloc(), __fmt, __args);
> +       auto __out = __buf.view();
> +
> +#ifdef _WIN32
> +       void* __open_terminal(streambuf*);
> +       error_code __write_to_terminal(void*, span<char>);
> +       // If stream refers to a terminal, write a Unicode string to it.
> +       if (auto __term = __open_terminal(__os.rdbuf()))
> +         {
> +           __format::_Str_sink<char> __buf2;
> +           if (__w != 0)
> +             {
> +               char __fmt[] = "{0:..{1}}";
> +               __fmt[3] == __os.fill();
> +               __fmt[4] == __left ? '<' : '>';
> +               string_view __str(__out);
> +               std::vformat_to(__buf2.out(), // N.B. no need to use
> getloc()
> +                               __fmt, std::make_format_args(__str, __w));
> +               __out = __buf2.view();
> +             }
> +
> +           ios_base::iostate __err = ios_base::goodbit;
> +           __try
> +             {
> +               if (__os.rdbuf()->pubsync() == -1)
> +                 __err = ios::badbit;
> +               else if (auto __e = __write_to_terminal(__term, __out))
> +                 if (__e !=
> std::make_error_code(errc::illegal_byte_sequence))
> +                   __err = ios::badbit;
> +#ifndef _WIN32
> +               // __open_terminal(streambuf*) opens a new FILE with
> fdopen,
> +               // so we need to close it here.
> +               std::fclose((FILE*)__term);
> +#endif
> +             }
> +           __catch(const __cxxabiv1::__forced_unwind&)
> +             {
> +               __os._M_setstate(ios_base::badbit);
> +               __throw_exception_again;
> +             }
> +           __catch(...)
> +             { __os._M_setstate(ios_base::badbit); }
> +
> +           if (__err)
> +             __os.setstate(__err);
> +           return;
> +         }
> +#endif
> +
> +       // Otherwise just insert the string as normal.
> +       __try
> +         {
> +           const streamsize __n = __out.size();
> +           if (__w > __n)
> +             {
> +               if (!__left)
> +                 std::__ostream_fill(__os, __w - __n);
> +               if (__os.good())
> +                 std::__ostream_write(__os, __out.data(), __n);
> +               if (__left && __os.good())
> +                 std::__ostream_fill(__os, __w - __n);
> +             }

+           else

+             std::__ostream_write(__os, __out.data(), __n);

+         }
> +       __catch(const __cxxabiv1::__forced_unwind&)
> +         {
> +           __os._M_setstate(ios_base::badbit);
> +           __throw_exception_again;
> +         }
> +       __catch(...)
> +         { __os._M_setstate(ios_base::badbit); }
> +      }
> +  }
> +
> +  template<typename... _Args>
> +    inline void
> +    print(ostream& __os, format_string<_Args...> __fmt, _Args&&... __args)
> +    {
> +      auto __fmtargs =
> std::make_format_args(std::forward<_Args>(__args)...);
> +      if constexpr (__format::__literal_encoding_is_utf8())
> +       std::vprint_unicode(__os, __fmt.get(), __fmtargs);
> +      else
> +       std::vprint_nonunicode(__os, __fmt.get(), __fmtargs);
> +    }
> +
> +  template<typename... _Args>
> +    inline void
> +    println(ostream& __os, format_string<_Args...> __fmt, _Args&&...
> __args)
> +    {
> +      std::print(__os, "{}\n",
> +                std::format(__fmt, std::forward<_Args>(__args)...));
> +    }
> +#endif // __cpp_lib_print
> +
>  #endif // C++11
>
>  _GLIBCXX_END_NAMESPACE_VERSION
> diff --git a/libstdc++-v3/include/std/print
> b/libstdc++-v3/include/std/print
> new file mode 100644
> index 00000000000..e7099ab6fe3
> --- /dev/null
> +++ b/libstdc++-v3/include/std/print
> @@ -0,0 +1,138 @@
> +// <print> Print functions -*- C++ -*-
> +
> +// Copyright The GNU Toolchain Authors.
> +//
> +// This file is part of the GNU ISO C++ Library.  This library is free
> +// software; you can redistribute it and/or modify it under the
> +// terms of the GNU General Public License as published by the
> +// Free Software Foundation; either version 3, or (at your option)
> +// any later version.
> +
> +// This library is distributed in the hope that it will be useful,
> +// but WITHOUT ANY WARRANTY; without even the implied warranty of
> +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +// GNU General Public License for more details.
> +
> +// Under Section 7 of GPL version 3, you are granted additional
> +// permissions described in the GCC Runtime Library Exception, version
> +// 3.1, as published by the Free Software Foundation.
> +
> +// You should have received a copy of the GNU General Public License and
> +// a copy of the GCC Runtime Library Exception along with this program;
> +// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +// <http://www.gnu.org/licenses/>.
> +
> +/** @file include/print
> + *  This is a Standard C++ Library header.
> + */
> +
> +#ifndef _GLIBCXX_PRINT
> +#define _GLIBCXX_PRINT 1
> +
> +#pragma GCC system_header
> +
> +#include <bits/requires_hosted.h> // for std::format
> +
> +#define __glibcxx_want_print
> +#include <bits/version.h>
> +
> +#ifdef __cpp_lib_print // C++ >= 23
> +
> +#include <format>
> +#include <cstdio>
> +#include <cerrno>
> +#include <bits/functexcept.h>
> +
> +#ifdef _WIN32
> +# include <system_error>
> +#endif
> +
> +namespace std _GLIBCXX_VISIBILITY(default)
> +{
> +_GLIBCXX_BEGIN_NAMESPACE_VERSION
> +
> +  inline void
> +  vprint_nonunicode(FILE* __stream, string_view __fmt, format_args __args)
> +  {
> +    __format::_Str_sink<char> __buf;
> +    std::vformat_to(__buf.out(), __fmt, __args);
> +    auto __out = __buf.view();
> +    if (std::fwrite(__out.data(), 1, __out.size(), __stream) !=
> __out.size())
> +      __throw_system_error(EIO);
> +  }
> +
> +  inline void
> +  vprint_unicode(FILE* __stream, string_view __fmt, format_args __args)
> +  {
> +    __format::_Str_sink<char> __buf;
> +    std::vformat_to(__buf.out(), __fmt, __args);
> +    auto __out = __buf.view();
> +
> +#ifdef _WIN32
> +    void* __open_terminal(FILE*);
> +    error_code __write_to_terminal(void*, span<char>);
> +    // If stream refers to a terminal, write a native Unicode string to
> it.
> +    if (auto __term = __open_terminal(__stream))
> +      {
> +       string __out = std::vformat(__fmt, __args);
> +       error_code __e;
> +       if (!std::fflush(__stream))
> +         {
> +           __e = __write_to_terminal(__term, __out);
> +           if (!__e)
> +             return;
> +           if (__e == std::make_error_code(errc::illegal_byte_sequence))
> +             return;
> +         }
> +       else
> +         __e = error_code(errno, generic_category());
> +       _GLIBCXX_THROW_OR_ABORT(system_error(__e, "std::vprint_unicode"));
> +      }
> +#endif
> +
> +    // Otherwise just write the string to the file.
> +    if (std::fwrite(__out.data(), 1, __out.size(), __stream) !=
> __out.size())
> +      __throw_system_error(EIO);
> +  }
> +
> +  template<typename... _Args>
> +    inline void
> +    print(FILE* __stream, format_string<_Args...> __fmt, _Args&&...
> __args)
> +    {
> +      auto __fmtargs =
> std::make_format_args(std::forward<_Args>(__args)...);
> +      if constexpr (__format::__literal_encoding_is_utf8())
> +       std::vprint_unicode(__stream, __fmt.get(), __fmtargs);
> +      else
> +       std::vprint_nonunicode(__stream, __fmt.get(), __fmtargs);
> +    }
> +
> +  template<typename... _Args>
> +    inline void
> +    print(format_string<_Args...> __fmt, _Args&&... __args)
> +    { std::print(stdout, __fmt, std::forward<_Args>(__args)...); }
> +
> +  template<typename... _Args>
> +    inline void
> +    println(FILE* __stream, format_string<_Args...> __fmt, _Args&&...
> __args)
> +    {
> +      std::print(__stream, "{}\n",
> +                std::format(__fmt, std::forward<_Args>(__args)...));
> +    }
> +
> +  template<typename... _Args>
> +    inline void
> +    println(format_string<_Args...> __fmt, _Args&&... __args)
> +    { std::println(stdout, __fmt, std::forward<_Args>(__args)...); }
> +
> +  inline void
> +  vprint_unicode(string_view __fmt, format_args __args)
> +  { std::vprint_unicode(stdout, __fmt, __args); }
> +
> +  inline void
> +  vprint_nonunicode(string_view __fmt, format_args __args)
> +  { std::vprint_nonunicode(stdout, __fmt, __args); }
> +
> +_GLIBCXX_END_NAMESPACE_VERSION
> +} // namespace std
> +#endif // __cpp_lib_print
> +#endif // _GLIBCXX_PRINT
> diff --git a/libstdc++-v3/src/c++23/Makefile.am
> b/libstdc++-v3/src/c++23/Makefile.am
> index da988c352f8..76938755f58 100644
> --- a/libstdc++-v3/src/c++23/Makefile.am
> +++ b/libstdc++-v3/src/c++23/Makefile.am
> @@ -35,7 +35,7 @@ else
>  inst_sources =
>  endif
>
> -sources = stacktrace.cc
> +sources = stacktrace.cc print.cc
>
>  vpath % $(top_srcdir)/src/c++23
>
> @@ -46,6 +46,12 @@ else
>  libc__23convenience_la_SOURCES =
>  endif
>
> +# Use C++26 so that std::filebuf::native_handle() is available.
> +print.lo: print.cc
> +       $(LTCXXCOMPILE) -std=gnu++26 -c $<
> +print.o: print.cc
> +       $(CXXCOMPILE) -std=gnu++26 -c $<
> +
>  # AM_CXXFLAGS needs to be in each subdirectory so that it can be
>  # modified in a per-library or per-sub-library way.  Need to manually
>  # set this option because CONFIG_CXXFLAGS has to be after
> diff --git a/libstdc++-v3/src/c++23/Makefile.in
> b/libstdc++-v3/src/c++23/Makefile.in
> index 1121749d84b..ce609688025 100644
> --- a/libstdc++-v3/src/c++23/Makefile.in
> +++ b/libstdc++-v3/src/c++23/Makefile.in
> @@ -121,7 +121,7 @@ CONFIG_CLEAN_FILES =
>  CONFIG_CLEAN_VPATH_FILES =
>  LTLIBRARIES = $(noinst_LTLIBRARIES)
>  libc__23convenience_la_LIBADD =
> -am__objects_1 = stacktrace.lo
> +am__objects_1 = stacktrace.lo print.lo
>  am__objects_2 =
>  @GLIBCXX_HOSTED_TRUE@am_libc__23convenience_la_OBJECTS =  \
>  @GLIBCXX_HOSTED_TRUE@  $(am__objects_1) $(am__objects_2)
> @@ -430,7 +430,7 @@ headers =
>
>  # XTEMPLATE_FLAGS = -fno-implicit-templates
>  @ENABLE_EXTERN_TEMPLATE_TRUE@inst_sources =
> -sources = stacktrace.cc
> +sources = stacktrace.cc print.cc
>  @GLIBCXX_HOSTED_FALSE@libc__23convenience_la_SOURCES =
>  @GLIBCXX_HOSTED_TRUE@libc__23convenience_la_SOURCES = $(sources)
> $(inst_sources)
>
> @@ -742,6 +742,12 @@ uninstall-am:
>
>  vpath % $(top_srcdir)/src/c++23
>
> +# Use C++26 so that std::filebuf::native_handle() is available.
> +print.lo: print.cc
> +       $(LTCXXCOMPILE) -std=gnu++26 -c $<
> +print.o: print.cc
> +       $(CXXCOMPILE) -std=gnu++26 -c $<
> +
>  # Tell versions [3.59,3.63) of GNU make to not export all variables.
>  # Otherwise a system limit (for SysV at least) may be exceeded.
>  .NOEXPORT:
> diff --git a/libstdc++-v3/src/c++23/print.cc
> b/libstdc++-v3/src/c++23/print.cc
> new file mode 100644
> index 00000000000..2fe7a2e3565
> --- /dev/null
> +++ b/libstdc++-v3/src/c++23/print.cc
> @@ -0,0 +1,348 @@
> +// std::print -*- C++ -*-
> +
> +// Copyright The GNU Toolchain Authors.
> +//
> +// This file is part of the GNU ISO C++ Library.  This library is free
> +// software; you can redistribute it and/or modify it under the
> +// terms of the GNU General Public License as published by the
> +// Free Software Foundation; either version 3, or (at your option)
> +// any later version.
> +
> +// This library is distributed in the hope that it will be useful,
> +// but WITHOUT ANY WARRANTY; without even the implied warranty of
> +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +// GNU General Public License for more details.
> +
> +// Under Section 7 of GPL version 3, you are granted additional
> +// permissions described in the GCC Runtime Library Exception, version
> +// 3.1, as published by the Free Software Foundation.
> +
> +// You should have received a copy of the GNU General Public License and
> +// a copy of the GCC Runtime Library Exception along with this program;
> +// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +// <http://www.gnu.org/licenses/>.
> +
> +#include <span>
> +#include <string>
> +#include <streambuf>
> +#include <system_error>
> +#include <cstdio>
> +#include <cstdint> // uint32_t
> +#include <fstream>
> +#include <ext/stdio_filebuf.h>
> +#include <ext/stdio_sync_filebuf.h>
> +#include <ext/numeric_traits.h>
> +
> +#ifdef _WIN32
> +# include <stdio.h>   // _fileno
> +# include <io.h>      // _get_osfhandle
> +# include <windows.h> // GetLastError, WriteConsoleW
> +#elifdef _GLIBCXX_HAVE_UNISTD_H
> +# include <stdio.h>   // fileno
> +# include <unistd.h>  // isatty
> +#endif
> +
> +namespace std _GLIBCXX_VISIBILITY(default)
> +{
> +_GLIBCXX_BEGIN_NAMESPACE_VERSION
> +
> +#ifdef _WIN32
> +namespace
> +{
> +  void*
> +  check_for_console(void* handle)
> +  {
> +    if (handle != nullptr && handle != INVALID_HANDLE_VALUE)
> +      {
> +       unsigned long mode; // unused
> +       if (::GetConsoleMode(handle, &mode))
> +         return handle;
> +      }
> +    return nullptr;
> +  }
> +} // namespace
> +#endif
> +
> +  // This returns intptr_t that is either a Windows HANDLE
> +  // or 1 + a POSIX file descriptor. A zero return indicates failure.
> +  void*
> +  __open_terminal(FILE* f)
> +  {
> +#ifndef _GLIBCXX_USE_STDIO_PURE
> +    if (f)
> +      {
> +#ifdef _WIN32
> +       if (int fd = ::_fileno(f); fd >= 0)
> +         return check_for_console((void*)_get_osfhandle(fd));
> +#elifdef _GLIBCXX_HAVE_UNISTD_H
> +       if (int fd = ::fileno(f); fd >= 0 && ::isatty(fd))
> +         return f;
> +#endif
> +      }
> +#endif
> +    return nullptr;
> +  }
> +
> +  void*
> +  __open_terminal(std::streambuf* sb)
> +  {
> +#ifndef _GLIBCXX_USE_STDIO_PURE
> +    using namespace __gnu_cxx;
> +
> +    if (auto fb = dynamic_cast<stdio_sync_filebuf<char>*>(sb))
> +      return __open_terminal(fb->file());
> +
> +    if (auto fb = dynamic_cast<stdio_filebuf<char>*>(sb))
> +      return __open_terminal(fb->file());
> +
> +#ifdef __glibcxx_fstream_native_handle
> +#ifdef _WIN32
> +    if (auto fb = dynamic_cast<filebuf*>(sb))
> +      return check_for_console(fb->native_handle());
> +#elifdef _GLIBCXX_HAVE_UNISTD_H
> +    if (auto fb = dynamic_cast<filebuf*>(sb))
> +      if (int fd = fb->native_handle(); fd >= 0 && ::isatty(fd))
> +       return ::fdopen(::dup(fd), "w"); // Caller must call fclose.
> +#endif
> +#endif
> +#endif // ! _GLIBCXX_USE_STDIO_PURE
> +
> +    return nullptr;
> +  }
> +
> +namespace
> +{
> +  // Validate UTF-8 string, replacing invalid sequences with U+FFFD.
> +  //
> +  // Return true if the input is valid UTF-8, false otherwise.
> +  //
> +  // If sizeof(_CharT) > 1, then transcode a valid string into out,
> +  // using either UTF-16 or UTF-32 as determined by sizeof(_CharT).
> +  //
> +  // If sizeof(_CharT) == 1 and the input is valid UTF-8, both s and out
> will
> +  // be unchanged. Otherwise, each invalid sequence in s will be
> overwritten
> +  // with a single 0xFF byte followed by zero or more 0xFE bytes, and then
> +  // a valid UTF-8 string will be produced in out (replacing invalid
> +  // sequences with U+FFFD).
> +  template<typename _CharT>
> +    bool
> +    to_valid_unicode(span<char> s, basic_string<_CharT>& out)
> +    {
> +      constexpr bool transcode = sizeof(_CharT) > 1;
> +
> +      unsigned seen = 0, needed = 0;
> +      unsigned char lo_bound = 0x80, hi_bound = 0xBF;
> +      size_t errors = 0;
> +
> +      [[maybe_unused]] uint32_t code_point{};
> +      if constexpr (transcode)
> +       {
> +         out.clear();
> +         // XXX: count code points in s instead of bytes?
> +         out.reserve(s.size());
> +       }
> +
> +      auto q = s.data(), eoq = q + s.size();
> +      while (q != eoq)
> +       {
> +         unsigned char byte = *q;
> +         if (needed == 0)
> +           {
> +             if (byte <= 0x7F) [[likely]]      // 0x00 to 0x7F
> +               {
> +                 if constexpr (transcode)
> +                   out.push_back(_CharT(byte));
> +
> +                 // Fast forward to the next non-ASCII character.
> +                 while (++q != eoq && (unsigned char)*q <= 0x7F)
> +                   {
> +                     if constexpr (transcode)
> +                       out.push_back(*q);
> +                   }
> +                 continue;
> +               }
> +             else if (byte < 0xC2)
> +               {
> +                 if constexpr (transcode)
> +                   out.push_back(0xFFFD);
> +                 else
> +                   *q = 0xFF;
> +                 ++errors;
> +               }
> +             else if (byte <= 0xDF) // 0xC2 to 0xDF
> +               {
> +                 needed = 1;
> +                 if constexpr (transcode)
> +                   code_point = byte & 0x1F;
> +               }
> +             else if (byte <= 0xEF) // 0xE0 to 0xEF
> +               {
> +                 if (byte == 0xE0)
> +                   lo_bound = 0xA0;
> +                 else if (byte == 0xED)
> +                   hi_bound = 0x9F;
> +
> +                 needed = 2;
> +                 if constexpr (transcode)
> +                   code_point = byte & 0x0F;
> +               }
> +             else if (byte <= 0xF4) // 0xF0 to 0xF4
> +               {
> +                 if (byte == 0xF0)
> +                   lo_bound = 0x90;
> +                 else if (byte == 0xF4)
> +                   hi_bound = 0x8F;
> +
> +                 needed = 3;
> +                 if constexpr (transcode)
> +                   code_point = byte & 0x07;
> +               }
> +             else [[unlikely]]
> +               {
> +                 if constexpr (transcode)
> +                   out.push_back(0xFFFD);
> +                 else
> +                   *q = 0xFF;
> +                 ++errors;
> +               }
> +           }
> +         else
> +           {
> +             if (byte < lo_bound || byte > hi_bound) [[unlikely]]
> +               {
> +                 if constexpr (transcode)
> +                   out.push_back(0xFFFD);
> +                 else
> +                   {
> +                     *(q - seen - 1) = 0xFF;
> +                     __builtin_memset(q - seen, 0xFE, seen);
> +                   }
> +                 ++errors;
> +                 needed = seen = 0;
> +                 lo_bound = 0x80;
> +                 hi_bound = 0xBF;
> +                 continue; // Reprocess the current character.
> +               }
> +
> +             if constexpr (transcode)
> +               code_point = (code_point << 6) | (byte & 0x3f);
> +
> +             lo_bound = 0x80;
> +             hi_bound = 0xBF;
> +             ++seen;
> +             if (seen == needed) [[likely]]
> +               {
> +                 if constexpr (transcode)
> +                   {
> +                     if (code_point <=
> __gnu_cxx::__int_traits<_CharT>::__max)
> +                       out.push_back(code_point);
> +                     else
> +                       {
> +                         // Algorithm from
> +                         //
> http://www.unicode.org/faq/utf_bom.html#utf16-4
> +                         const char32_t LEAD_OFFSET = 0xD800 - (0x10000
> >> 10);
> +                         char16_t lead = LEAD_OFFSET + (code_point >> 10);
> +                         char16_t trail = 0xDC00 + (code_point & 0x3FF);
> +                         out.push_back(lead);
> +                         out.push_back(trail);
> +                       }
> +                   }
> +                 needed = seen = 0;
> +               }
> +           }
> +         ++q;
> +       }
> +
> +      if (needed) [[unlikely]]
> +       {
> +         // The string ends with an incomplete multibyte sequence.
> +         if constexpr (transcode)
> +           out.push_back(0xFFFD);
> +         else
> +           {
> +             // Truncate the incomplete sequence to a single byte.
> +             if (seen)
> +               s = s.first(s.size() - seen);
> +             s.back() = 0xFF;
> +           }
> +         ++errors;
> +       }
> +
> +      if (errors == 0) [[likely]]
> +       return true;
> +      else if constexpr (!transcode)
> +       {
> +         out.reserve(s.size() + errors * 2);
> +         for (unsigned char byte : s)
> +           {
> +             if (byte < 0xFE) [[likely]]
> +               out += (char)byte;
> +             else if (byte == 0xFF)
> +               out += "\xef\xbf\xbd"; // U+FFFD in UTF-8
> +           }
> +       }
> +      return false;
> +    }
> +
> +  // Validate UTF-8 string.
> +  // Returns true if s is valid UTF-8, otherwise returns false and stores
> +  // a valid UTF-8 string in err.
> +  [[__gnu__::__always_inline__]]
> +  inline bool
> +  to_valid_utf8(span<char> s, string& err)
> +  {
> +    return to_valid_unicode(s, err);
> +  }
> +
> +  // Transcode UTF-8 string to UTF-16.
> +  // Returns true if s is valid UTF-8, otherwise returns false.
> +  // In either case, a valid UTF-16 string is stored in u16.
> +  [[__gnu__::__always_inline__]]
> +  inline bool
> +  to_valid_utf16(span<char> s, u16string& u16)
> +  {
> +    return to_valid_unicode(s, u16);
> +  }
> +} // namespace
> +
> +  // Write a UTF-8 string to a file descriptor/handle.
> +  // Ill-formed sequences in the string will be substituted with U+FFFD.
> +  error_code
> +  __write_to_terminal(void* term, span<char> str)
> +  {
> +    if (term == nullptr) [[unlikely]]
> +      return std::make_error_code(std::errc::invalid_argument);
> +
> +    error_code ec;
> +
> +#ifdef _WIN32
> +    // We could use std::wstring here instead of std::u16string. In
> general
> +    // char_traits<wchar_t> is more optimized than char_traits<char16_t>
> but
> +    // for the purposes of to_valid_unicode only char_traits::copy
> matters,
> +    // and char_traits<char16_t>::copy uses memcpy so is OK.
> +    u16string wstr;
> +    if (!to_valid_utf16(str, wstr))
> +      ec = std::make_error_code(errc::illegal_byte_sequence);
> +
> +    unsigned long nchars = 0;
> +    WriteConsoleW(term, wstr.data(), wstr.size(), &nchars, nullptr);
> +    if (nchars != wstr.size())
> +      return {(int)GetLastError(), system_category()};
> +#elifdef _GLIBCXX_HAVE_UNISTD_H
> +    string out;
> +    if (!to_valid_utf8(str, out))
> +      {
> +       str = out;
> +       ec = std::make_error_code(errc::illegal_byte_sequence);
> +      }
> +
> +    auto n = std::fwrite(str.data(), 1, str.size(), (FILE*)term);
> +    if (n != str.size())
> +      ec = std::make_error_code(errc::io_error);
> +#else
> +    ec = std::make_error_code(std::errc::function_not_supported);
> +#endif
> +    return ec;
> +  }
> +_GLIBCXX_END_NAMESPACE_VERSION
> +} // namespace std
> diff --git a/libstdc++-v3/testsuite/27_io/basic_ostream/print/1.cc
> b/libstdc++-v3/testsuite/27_io/basic_ostream/print/1.cc
> new file mode 100644
> index 00000000000..28dc8af33e6
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/27_io/basic_ostream/print/1.cc
> @@ -0,0 +1,112 @@
> +// { dg-options "-lstdc++exp" }
> +// { dg-do run { target c++23 } }
> +// { dg-require-fileio "" }
> +
> +#include <ostream>
> +#include <spanstream>
> +#include <string_view>
> +#include <iostream>
> +#include <iomanip>
> +#include <testsuite_hooks.h>
> +
> +void
> +test_print_ostream()
> +{
> +  char buf[64];
> +  std::spanstream os(buf);
> +  std::print(os, "File under '{}' for {}", 'O', "OUT OF FILE");
> +  std::string_view txt(os.span());
> +  VERIFY( txt == "File under 'O' for OUT OF FILE" );
> +}
> +
> +void
> +test_println_ostream()
> +{
> +  char buf[64];
> +  std::spanstream os(buf);
> +  std::println(os, "{} Lineman was a song I once heard", "Wichita");
> +  std::string_view txt(os.span());
> +  VERIFY( txt == "Wichita Lineman was a song I once heard\n" );
> +}
> +
> +void
> +test_print_raw()
> +{
> +  char buf[64];
> +  std::spanstream os(buf);
> +  std::print(os, "{}", '\xa3'); // Not a valid UTF-8 string.
> +  std::string_view txt(os.span());
> +  // Invalid UTF-8 should be written out unchanged if the ostream is not
> +  // connected to a tty:
> +  VERIFY( txt == "\xa3" );
> +}
> +
> +void
> +test_print_formatted()
> +{
> +  char buf[64];
> +  std::spanstream os(buf);
> +  os << std::setw(20) << std::setfill('*') << std::right;
> +  std::print(os, "{} Luftballons", 99);
> +  std::string_view txt(os.span());
> +  VERIFY( txt == "******99 Luftballons" );
> +}
> +
> +void
> +test_vprint_nonunicode()
> +{
> +  std::ostream out(std::cout.rdbuf());
> +  std::vprint_nonunicode(out, "{0} in \xc0 {0} out\n",
> +      std::make_format_args("garbage"));
> +  // { dg-output "garbage in . garbage out" }
> +}
> +
> +struct brit_punc : std::numpunct<char>
> +{
> +  std::string do_grouping() const override { return "\3\3"; }
> +  char do_thousands_sep() const override { return ','; }
> +  std::string do_truename() const override { return "yes mate"; }
> +  std::string do_falsename() const override { return "nah bruv"; }
> +};
> +
> +void
> +test_locale()
> +{
> +  struct stream_punc : std::numpunct<char>
> +  {
> +    std::string do_grouping() const override { return "\2\2"; }
> +    char do_thousands_sep() const override { return '~'; }
> +  };
> +
> +  // The default C locale.
> +  std::locale cloc = std::locale::classic();
> +  // A custom locale using comma digit separators.
> +  std::locale bloc(cloc, new stream_punc);
> +
> +  {
> +    std::ostringstream os;
> +    std::print(os, "{:L} {}", 12345, 6789);
> +    VERIFY(os.str() == "12345 6789");
> +  }
> +  {
> +    std::ostringstream os;
> +    std::print(os, "{}", 42);
> +    VERIFY(os.str() == "42");
> +  }
> +  {
> +    std::ostringstream os;
> +    os.imbue(bloc);
> +    std::print(os, "{:L} {}", 12345, 6789);
> +    VERIFY(os.str() == "1~23~45 6789");
> +  }
> +}
> +
> +int main()
> +{
> +  test_print_ostream();
> +  test_println_ostream();
> +  test_print_raw();
> +  test_print_formatted();
> +  test_vprint_nonunicode();
> +  test_locale();
> +}
> diff --git a/libstdc++-v3/testsuite/27_io/print/1.cc
> b/libstdc++-v3/testsuite/27_io/print/1.cc
> new file mode 100644
> index 00000000000..3cfdac1bb74
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/27_io/print/1.cc
> @@ -0,0 +1,85 @@
> +// { dg-options "-lstdc++exp" }
> +// { dg-do run { target c++23 } }
> +// { dg-require-fileio "" }
> +
> +#include <print>
> +#include <cstdio>
> +#include <spanstream>
> +#include <testsuite_hooks.h>
> +#include <testsuite_fs.h>
> +
> +void
> +test_print_default()
> +{
> +  std::print("H{}ll{}, {}!", 3, 0, "world");
> +  // { dg-output "H3ll0, world!" }
> +}
> +
> +void
> +test_println_default()
> +{
> +  std::println("I walk the line");
> +  // { dg-output "I walk the line\n" }
> +}
> +
> +void
> +test_print_file()
> +{
> +  __gnu_test::scoped_file f;
> +  FILE* strm = std::fopen(f.path.string().c_str(), "w");
> +  VERIFY( strm );
> +  std::print(strm, "File under '{}' for {}", 'O', "OUT OF FILE");
> +  std::fclose(strm);
> +
> +  std::ifstream in(f.path);
> +  std::string txt(std::istreambuf_iterator<char>(in), {});
> +  VERIFY( txt == "File under 'O' for OUT OF FILE" );
> +}
> +
> +void
> +test_println_file()
> +{
> +  __gnu_test::scoped_file f;
> +  FILE* strm = std::fopen(f.path.string().c_str(), "w");
> +  VERIFY( strm );
> +  std::println(strm, "{} Lineman was a song I once heard", "Wichita");
> +  std::fclose(strm);
> +
> +  std::ifstream in(f.path);
> +  std::string txt(std::istreambuf_iterator<char>(in), {});
> +  VERIFY( txt == "Wichita Lineman was a song I once heard\n" );
> +}
> +
> +void
> +test_print_raw()
> +{
> +  __gnu_test::scoped_file f;
> +  FILE* strm = std::fopen(f.path.string().c_str(), "w");
> +  VERIFY( strm );
> +  std::print(strm, "{}", '\xa3'); // Not a valid UTF-8 string.
> +  std::fclose(strm);
> +
> +  std::ifstream in(f.path);
> +  std::string txt(std::istreambuf_iterator<char>(in), {});
> +  // Invalid UTF-8 should be written out unchanged if the stream is not
> +  // connected to a tty:
> +  VERIFY( txt == "\xa3" );
> +}
> +
> +void
> +test_vprint_nonunicode()
> +{
> +  std::vprint_nonunicode("{0} in \xc0 {0} out\n",
> +      std::make_format_args("garbage"));
> +  // { dg-output "garbage in . garbage out" }
> +}
> +
> +int main()
> +{
> +  test_print_default();
> +  test_println_default();
> +  test_print_file();
> +  test_println_file();
> +  test_print_raw();
> +  test_vprint_nonunicode();
> +}
> diff --git a/libstdc++-v3/testsuite/27_io/print/2.cc
> b/libstdc++-v3/testsuite/27_io/print/2.cc
> new file mode 100644
> index 00000000000..e101201f109
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/27_io/print/2.cc
> @@ -0,0 +1,151 @@
> +// { dg-options "-lstdc++exp" }
> +// { dg-do run { target c++23 } }
> +// { dg-require-fileio "" }
> +
> +#include <print>
> +#include <system_error>
> +#include <climits>
> +#include <cstdio>
> +#include <cstring>
> +#include <testsuite_hooks.h>
> +#include <testsuite_fs.h>
> +
> +#ifdef _WIN32
> +#include <io.h>
> +#endif
> +
> +namespace std
> +{
> +_GLIBCXX_BEGIN_NAMESPACE_VERSION
> +  // This is an internal implementation detail that must not be used
> directly.
> +  // We need to use it here to test the behaviour
> +  error_code __write_to_terminal(void*, span<char>);
> +_GLIBCXX_END_NAMESPACE_VERSION
> +}
> +
> +// Test the internal __write_to_terminal function that vprintf_unicode
> uses.
> +// The string parameter will be written to a file, then the bytes of the
> file
> +// will be read back again. On Windows those bytes will be a UTF-16
> string.
> +// Returns true if the string was valid UTF-8.
> +bool
> +as_printed_to_terminal(std::string& s)
> +{
> +  __gnu_test::scoped_file f;
> +  FILE* strm = std::fopen(f.path.string().c_str(), "w");
> +  VERIFY( strm );
> +#ifdef _WIN32
> +  void* handle = (void*)_get_osfhandle(_fileno(strm));
> +  const auto ec = std::__write_to_terminal(handle, s);
> +#else
> +  const auto ec = std::__write_to_terminal(strm, s);
> +#endif
> +  VERIFY( !ec || ec ==
> std::make_error_code(std::errc::illegal_byte_sequence) );
> +  std::fclose(strm);
> +  std::ifstream in(f.path);
> +  s.assign(std::istreambuf_iterator<char>(in), {});
> +  return !ec;
> +}
> +
> +void
> +test_utf8_validation()
> +{
> +#ifndef _WIN32
> +  std::string s = (const char*)u8"£🇬🇧 €🇪🇺";
> +  const std::string s2 = s;
> +  VERIFY( as_printed_to_terminal(s) );
> +  VERIFY( s == s2 );
> +
> +  s += " \xa3 10.99 \xee \xdd";
> +  const std::string s3 = s;
> +  VERIFY( ! as_printed_to_terminal(s) );
> +  VERIFY( s != s3 );
> +  std::string repl = (const char*)u8"\uFFFD";
> +  const std::string s4 = s2 + " " + repl + " 10.99 " + repl + " " + repl;
> +  VERIFY( s == s4 );
> +
> +  s = "\xc0\x80";
> +  VERIFY( ! as_printed_to_terminal(s) );
> +  VERIFY( s == repl + repl );
> +  s = "\xc0\xae";
> +  VERIFY( ! as_printed_to_terminal(s) );
> +  VERIFY( s == repl + repl );
> +
> +  // Examples of U+FFFD substitution from Unicode standard.
> +  std::string r4 = repl + repl + repl + repl;
> +  s = "\xc0\xaf\xe0\x80\xbf\xf0\x81\x82\x41"; // Table 3-8
> +  VERIFY( ! as_printed_to_terminal(s) );
> +  VERIFY( s == r4 + r4 + "\x41" );
> +  s = "\xed\xa0\x80\xed\xbf\xbf\xed\xaf\x41"; // Table 3-9
> +  VERIFY( ! as_printed_to_terminal(s) );
> +  VERIFY( s == r4 + r4 + "\x41" );
> +  s = "\xf4\x91\x92\x93\xff\x41\x80\xbf\x42"; // Table 3-10
> +  VERIFY( ! as_printed_to_terminal(s) );
> +  VERIFY( s == r4 + repl + "\x41" + repl + repl + "\x42" );
> +  s = "\xe1\x80\xe2\xf0\x91\x92\xf1\xbf\x41"; // Table 3-11
> +  VERIFY( ! as_printed_to_terminal(s) );
> +  VERIFY( s == r4 + "\x41" );
> +#endif
> +}
> +
> +// Create a std::u16string from the bytes in a std::string.
> +std::u16string
> +utf16_from_bytes(const std::string& s)
> +{
> +  std::u16string u16;
> +  // s should have an even number of bytes. If it doesn't, we'll copy its
> +  // null terminator into the result, which will not match the expected
> value.
> +  const auto len = (s.size() + 1) / 2;
> +  u16.resize_and_overwrite(len, [&s](char16_t* p, size_t n) {
> +    std::memcpy(p, s.data(), n * sizeof(char16_t));
> +    return n;
> +  });
> +  return u16;
> +}
> +
> +void
> +test_utf16_transcoding()
> +{
> +#ifdef _WIN32
> +  // FIXME: We can't test __write_to_terminal for Windows, because it
> +  // returns an INVALID_HANDLE Windows error when writing to a normal
> file.
> +
> +  std::string s = (const char*)u8"£🇬🇧 €🇪🇺";
> +  const std::u16string s2 = u"£🇬🇧 €🇪🇺";
> +  VERIFY( as_printed_to_terminal(s) );
> +  VERIFY( utf16_from_bytes(s) == s2 );
> +
> +  s += " \xa3 10.99 \xee\xdd";
> +  VERIFY( ! as_printed_to_terminal(s) );
> +  std::u16string repl = u"\uFFFD";
> +  const std::u16string s3 = s2 + u" " + repl + u" 10.99 " + repl + repl;
> +  VERIFY( utf16_from_bytes(s) == s3 );
> +
> +  s = "\xc0\x80";
> +  VERIFY( ! as_printed_to_terminal(s) );
> +  VERIFY( utf16_from_bytes(s) == repl + repl );
> +  s = "\xc0\xae";
> +  VERIFY( ! as_printed_to_terminal(s) );
> +  VERIFY( utf16_from_bytes(s) == repl + repl );
> +
> +  // Examples of U+FFFD substitution from Unicode standard.
> +  std::u16string r4 = repl + repl + repl + repl;
> +  s = "\xc0\xaf\xe0\x80\xbf\xf0\x81\x82\x41"; // Table 3-8
> +  VERIFY( ! as_printed_to_terminal(s) );
> +  VERIFY( utf16_from_bytes(s) == r4 + r4 + u"\x41" );
> +  s = "\xed\xa0\x80\xed\xbf\xbf\xed\xaf\x41"; // Table 3-9
> +  VERIFY( ! as_printed_to_terminal(s) );
> +  VERIFY( utf16_from_bytes(s) == r4 + r4 + u"\x41" );
> +  s = "\xf4\x91\x92\x93\xff\x41\x80\xbf\x42"; // Table 3-10
> +  VERIFY( ! as_printed_to_terminal(s) );
> +  VERIFY( utf16_from_bytes(s) == r4 + repl + u"\x41" + repl + repl +
> u"\x42" );
> +  s = "\xe1\x80\xe2\xf0\x91\x92\xf1\xbf\x41"; // Table 3-11
> +  VERIFY( ! as_printed_to_terminal(s) );
> +  VERIFY( utf16_from_bytes(s) == r4 + u"\x41" );
> +#endif
> +}
> +
> +int main()
> +{
> +  test_utf8_validation();
> +  test_utf16_transcoding();
> +}
> --
> 2.43.0
>
>

Reply via email to