abhina.sreeskantharajan created this revision.
Herald added a subscriber: hiraditya.
Herald added a project: All.
abhina.sreeskantharajan requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, MaskRay.
Herald added projects: clang, LLVM.
This patch enables the fexec-charset option to control the execution charset of
string literals. It sets the default internal charset, system charset, and
execution charset for z/OS and UTF-8 for all other platforms.
This patch depends on adding the CharSetConverter class
https://reviews.llvm.org/D153417
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D153419
Files:
clang/docs/LanguageExtensions.rst
clang/include/clang/Basic/LangOptions.h
clang/include/clang/Basic/TokenKinds.h
clang/include/clang/Driver/Options.td
clang/include/clang/Lex/LiteralConverter.h
clang/include/clang/Lex/LiteralSupport.h
clang/include/clang/Lex/Preprocessor.h
clang/lib/Driver/ToolChains/Clang.cpp
clang/lib/Frontend/CompilerInstance.cpp
clang/lib/Frontend/InitPreprocessor.cpp
clang/lib/Lex/CMakeLists.txt
clang/lib/Lex/LiteralConverter.cpp
clang/lib/Lex/LiteralSupport.cpp
clang/test/CodeGen/systemz-charset.c
clang/test/CodeGen/systemz-charset.cpp
clang/test/Driver/cl-options.c
clang/test/Driver/clang_f_opts.c
clang/test/Preprocessor/init-s390x.c
clang/test/Preprocessor/init-x86.c
llvm/include/llvm/TargetParser/Triple.h
llvm/lib/TargetParser/Triple.cpp
Index: llvm/lib/TargetParser/Triple.cpp
===
--- llvm/lib/TargetParser/Triple.cpp
+++ llvm/lib/TargetParser/Triple.cpp
@@ -1193,6 +1193,13 @@
return Tmp.split('-').second; // Strip second component
}
+// System charset on z/OS is IBM-1047 and UTF-8 otherwise
+StringRef Triple::getSystemCharset() const {
+ if (getOS() == llvm::Triple::ZOS)
+return "IBM-1047";
+ return "UTF-8";
+}
+
static VersionTuple parseVersionFromName(StringRef Name) {
VersionTuple Version;
Version.tryParse(Name);
Index: llvm/include/llvm/TargetParser/Triple.h
===
--- llvm/include/llvm/TargetParser/Triple.h
+++ llvm/include/llvm/TargetParser/Triple.h
@@ -436,6 +436,9 @@
/// string (separated by a '-' if the environment component is present).
StringRef getOSAndEnvironmentName() const;
+ /// getSystemCharset - Get the system charset of the triple.
+ StringRef getSystemCharset() const;
+
/// @}
/// @name Convenience Predicates
/// @{
Index: clang/test/Preprocessor/init-x86.c
===
--- clang/test/Preprocessor/init-x86.c
+++ clang/test/Preprocessor/init-x86.c
@@ -1297,7 +1297,7 @@
// X86_64-CLOUDABI:#define __amd64 1
// X86_64-CLOUDABI:#define __amd64__ 1
// X86_64-CLOUDABI:#define __clang__ 1
-// X86_64-CLOUDABI:#define __clang_literal_encoding__ {{.*}}
+// X86_64-CLOUDABI:#define __clang_literal_encoding__ UTF-8
// X86_64-CLOUDABI:#define __clang_major__ {{.*}}
// X86_64-CLOUDABI:#define __clang_minor__ {{.*}}
// X86_64-CLOUDABI:#define __clang_patchlevel__ {{.*}}
Index: clang/test/Preprocessor/init-s390x.c
===
--- clang/test/Preprocessor/init-s390x.c
+++ clang/test/Preprocessor/init-s390x.c
@@ -201,4 +201,5 @@
// S390X-ZOS: #define __TOS_390__ 1
// S390X-ZOS: #define __TOS_MVS__ 1
// S390X-ZOS: #define __XPLINK__ 1
+// S390X-ZOS: #define __clang_literal_encoding__ IBM-1047
// S390X-ZOS-GNUXX: #define __wchar_t 1
Index: clang/test/Driver/clang_f_opts.c
===
--- clang/test/Driver/clang_f_opts.c
+++ clang/test/Driver/clang_f_opts.c
@@ -226,8 +226,14 @@
// RUN: %clang -### -S -finput-charset=iso-8859-1 -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-CHARSET %s
// CHECK-INVALID-CHARSET: error: invalid value 'iso-8859-1' in '-finput-charset=iso-8859-1'
-// RUN: %clang -### -S -fexec-charset=iso-8859-1 -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-INPUT-CHARSET %s
-// CHECK-INVALID-INPUT-CHARSET: error: invalid value 'iso-8859-1' in '-fexec-charset=iso-8859-1'
+// RUN: %clang -### -S -fexec-charset=invalid-charset -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-INPUT-CHARSET %s
+// CHECK-INVALID-INPUT-CHARSET: error: invalid value 'invalid-charset' in '-fexec-charset=invalid-charset'
+
+// Test that we support the following exec charsets.
+// RUN: %clang -### -S -fexec-charset=UTF-8 -o /dev/null %s 2>&1 | FileCheck --check-prefix=INVALID %s
+// RUN: %clang -### -S -fexec-charset=ISO8859-1 -o /dev/null %s 2>&1 | FileCheck --check-prefix=INVALID %s
+// RUN: %clang -### -S -fexec-charset=IBM-1047 -o /dev/null %s 2>&1 | FileCheck --check-prefix=INVALID %s
+// INVALID-NOT: error: invalid value
// Test that we don't error on these.
// RUN: %clang