Dear GCC,

     This patch adds macros to the general preprocessor that allow
users to understand what the execution and wide execution charsets
are, which are used for "bark" and L"meow" literals in C-family
languages.

     The goal of this is to enable individuals in capable languages
like C and C++ to determine the encoding of string literals and,
hopefully, transcode those literals to other encodings. For example,
data stored in `char[]` on an IBM machine with EBCDIC can be
(losslessly, or with a compile-time warning) transcoded to UTF-8 at
constant-expression time in C++ with this patch, without having to use
a series of complicated architecture and platform definitions to
figure out what the encoding of string literals and wide character
literals might be.

     The names are meaningful because they are tied directly to iconv,
which means there is a strong reference between the name and the code
that encodes/decodes a proper sequence. Therefore, we just present the
names.

     Does this sound useful?

Sincerely,
JeanHeyd

Patch notes: the strings passed to the charset creation routines are
all static and/or allocated far at the beginning of the program and
never deallocated until the end of the invocation, so it seems safe to
just store a normal pointer to it.

2020-10-08  JeanHeyd "ThePhD" Meneide  <phdoftheho...@gmail.com>

        * gcc/c-family/c-cppbuiltin.c: Add predefined macro
definitions for charsets
        * gcc/doc/cpp.texi: Document new predefined macro.
        * gcc/testsuite/c-c++-common/cpp/wide-narrow-predef-macros.c (new):
          New test for macro definitions to always exist.
        * libcpp/include/cpplib.h: Add functions declarations for
          retrieving charset names
        * libcpp/directives.c: Add function definitions to retrieve charset
          names.
        * libcpp/internal.h: Add to/from name preservations
diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index 74ecca8de8e..8de25786592 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -866,6 +866,13 @@ c_cpp_builtins (cpp_reader *pfile)
 
   define_language_independent_builtin_macros (pfile);
 
+  /* encoding definitions used by users and libraries  */
+  builtin_define_with_value ("__GNUC_EXECUTION_CHARSET_NAME",
+    cpp_get_narrow_charset_name (pfile), 1);
+  builtin_define_with_value ("__GNUC_WIDE_EXECUTION_CHARSET_NAME",
+    cpp_get_wide_charset_name (pfile), 1);
+
+
   if (c_dialect_cxx ())
   {
     int major;
diff --git a/gcc/doc/cpp.texi b/gcc/doc/cpp.texi
index 33f876ab706..55fa5739812 100644
--- a/gcc/doc/cpp.texi
+++ b/gcc/doc/cpp.texi
@@ -2451,6 +2451,13 @@ features are supported by GCC.
 @item __NO_MATH_ERRNO__
 This macro is defined if @option{-fno-math-errno} is used, or enabled
 by another option such as @option{-ffast-math} or by default.
+
+@item __GNUC_EXECUTION_CHARSET_NAME
+@itemx __GNUC_WIDE_EXECUTION_CHARSET_NAME
+These macros are defined to the name of the narrow and wide compile-time
+execution character set used.  It directly reflects the name passed to
+the options @option{-fexec-charset} and @option{-fwide-exec-charset},
+or the defaults documented for those options.  @xref{Invocation}.
 @end table
 
 @node System-specific Predefined Macros
diff --git a/gcc/testsuite/c-c++-common/cpp/wide-narrow-predef-macros.c 
b/gcc/testsuite/c-c++-common/cpp/wide-narrow-predef-macros.c
new file mode 100644
index 00000000000..0f55e6915f4
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/wide-narrow-predef-macros.c
@@ -0,0 +1,10 @@
+/*
+   { dg-do compile }
+ */
+
+#if !defined(__GNUC_EXECUTION_CHARSET_NAME)
+#error "Required implementation macro for compile-time charset name is not 
present"
+#endif
+#if !defined(__GNUC_WIDE_EXECUTION_CHARSET_NAME)
+#error "Required implementation macro for wide compile-time charset name is 
not present"
+#endif
diff --git a/libcpp/charset.c b/libcpp/charset.c
index 28b81c9c864..3e5578b1390 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -638,6 +638,9 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const 
char *from)
   char *pair;
   size_t i;
 
+  ret.to = to;
+  ret.from = from;
+
   if (!strcasecmp (to, from))
     {
       ret.func = convert_no_conversion;
diff --git a/libcpp/directives.c b/libcpp/directives.c
index f59718708e4..ad540872581 100644
--- a/libcpp/directives.c
+++ b/libcpp/directives.c
@@ -2571,6 +2571,20 @@ cpp_set_callbacks (cpp_reader *pfile, cpp_callbacks *cb)
   pfile->cb = *cb;
 }
 
+/* The narrow character set identifier.  */
+const char *
+cpp_get_narrow_charset_name (cpp_reader *pfile)
+{
+  return pfile->narrow_cset_desc.to;
+}
+
+/* The wide character set identifier.  */
+const char *
+cpp_get_wide_charset_name (cpp_reader *pfile)
+{
+  return pfile->wide_cset_desc.to;
+}
+
 /* The dependencies structure.  (Creates one if it hasn't already been.)  */
 class mkdeps *
 cpp_get_deps (cpp_reader *pfile)
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 8e398863cf6..69a5042d0bf 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -971,6 +971,11 @@ extern cpp_callbacks *cpp_get_callbacks (cpp_reader *) 
ATTRIBUTE_PURE;
 extern void cpp_set_callbacks (cpp_reader *, cpp_callbacks *);
 extern class mkdeps *cpp_get_deps (cpp_reader *) ATTRIBUTE_PURE;
 
+/* Call these to get name data about the various compile-time
+   charsets.  */
+extern const char *cpp_get_narrow_charset_name (cpp_reader *) ATTRIBUTE_PURE;
+extern const char *cpp_get_wide_charset_name (cpp_reader *) ATTRIBUTE_PURE;
+
 /* This function reads the file, but does not start preprocessing.  It
    returns the name of the original file; this is the same as the
    input file, except for preprocessed input.  This will generate at
diff --git a/libcpp/internal.h b/libcpp/internal.h
index 4bafe1cf353..0a85766ff41 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -48,6 +48,8 @@ struct cset_converter
   convert_f func;
   iconv_t cd;
   int width;
+  const char* from;
+  const char* to;
 };
 
 #define BITS_PER_CPPCHAR_T (CHAR_BIT * sizeof (cppchar_t))

Reply via email to