And I attached the wrong llvm diff. Here is the correct one.
On Sat, Jul 16, 2011 at 8:21 PM, Scott Conger <[email protected]> wrote:
> Attached patch adds support for -finput-charset and automatic text
> conversion when there are multibyte characters or a byte-order-mark is
> present. The net effect is that all internal text should now be in
> UTF-8.
>
> I have the exec charset options mostly working, but I trimmed it down
> to this for now, as it's a decently sized patch as-is.
>
>
> Performance impact:
>
> At a minimum, we have to scan through the input text to see if there
> are any multi-byte characters. There are usually none as portable code
> won't have any. The cost of this is lower if you have SSE2 support as
> I added an optimized version using intrinsics:
>
> For 1000 calls against a 16 MB ASCII buffer, on an AMD Athlon 7850
> (2.81 Ghz) rough costs with GCC were:
> Default checkAscii - 13050 ms
> SSE2 checkAscii - 4025 ms
>
> If you do use -finput-charset, there is multi-byte text, or some
> byte-order-mark is present, the cost to convert the text to UTF-8 is
> somewhere between 10 to 20 times higher than the default checkAscii
> implementation. It varies considerably depending on the input and
> character set.
>
> As a special case, UTF-8 input avoids most of this cost and it just
> checks that it's valid UTF-8.
>
> GCC differences:
>
> * Didn't add GCC's support for IBM character encodings, although
> -finput-charset should work if iconv supports it.
> * Didn't add GCC's special handling of a few character sets like
> Shift-Jis when no iconv present.
> * GCC's only seems to do byte-order-mark detection if the underlying
> iconv does, which apparently varies.
>
> Issues:
>
> * It turned out to be quite ugly to get iconv working on Windows. See
> comment in NativeIconv.cpp. If what's there is objectionable, I'd
> prefer to rip out Windows support of iconv for now.
> * Difficult to automatically test as iconv implementations support
> very different sets of encodings.
> * It looks like I picked up some non-checked in changes when I
> regenerated configure relating to a bug report URL?
>
> Testing:
>
> Did Linux GCC, Windows Visual Studio 10 and Cygwin GCC builds. Ran all
> tests on Linux.
>
> You can run a simple input conversion test like so:
>
> sconger@scott-ubuntu:~/dev/llvmpatch/build$ iconv -f ASCII -t UTF-16BE
> test.c > test_utf16be.c
> sconger@scott-ubuntu:~/dev/llvmpatch/build$ ./bin/clang
> -finput-charset=UTF-16BE test_utf16be.c
> sconger@scott-ubuntu:~/dev/llvmpatch/build$ ./a.out
> Hello World
>
> -Scott
>
Index: cmake/config-ix.cmake
===================================================================
--- cmake/config-ix.cmake (revision 135359)
+++ cmake/config-ix.cmake (working copy)
@@ -44,7 +44,9 @@
check_include_file(errno.h HAVE_ERRNO_H)
check_include_file(execinfo.h HAVE_EXECINFO_H)
check_include_file(fcntl.h HAVE_FCNTL_H)
+check_include_file(iconv.h HAVE_ICONV_H)
check_include_file(inttypes.h HAVE_INTTYPES_H)
+check_include_file(langinfo.h HAVE_LANGINFO_H)
check_include_file(limits.h HAVE_LIMITS_H)
check_include_file(link.h HAVE_LINK_H)
check_include_file(malloc.h HAVE_MALLOC_H)
Index: configure
===================================================================
--- configure (revision 135359)
+++ configure (working copy)
@@ -1471,6 +1471,8 @@
64 bit multilib directory.
--with-binutils-include Specify path to binutils/include/ containing
plugin-api.h file for gold plugin.
+ --with-bug-report-url Specify the URL where bug reports should be
+ submitted (default=http://llvm.org)
--with-tclinclude directory where tcl headers are
--with-llvmcc=<name> Choose the LLVM capable compiler to use (llvm-gcc,
clang, or none; default=check)
@@ -5520,6 +5522,20 @@
fi
fi
+
+# Check whether --with-bug-report-url was given.
+if test "${with_bug_report_url+set}" = set; then
+ withval=$with_bug_report_url;
+else
+ withval="http://llvm.org"
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define BUG_REPORT_URL "$withval"
+_ACEOF
+
+
# Check whether --enable-libffi was given.
if test "${enable_libffi+set}" = set; then
enableval=$enable_libffi; case "$enableval" in
@@ -11596,7 +11612,7 @@
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<EOF
-#line 11591 "configure"
+#line 11615 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -16976,6 +16992,176 @@
done
+
+
+for ac_header in langinfo.h iconv.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+ { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+ # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_header_compiler=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ ac_header_preproc=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So? What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+ yes:no: )
+ { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+ ac_header_preproc=yes
+ ;;
+ no:yes:* )
+ { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+ ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to [email protected] ##
+## ----------------------------------- ##
+_ASBOX
+ ) | sed "s/^/$as_me: WARNING: /" >&2
+ ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
for ac_header in pthread.h
Index: include/llvm/Config/config.h.in
===================================================================
--- include/llvm/Config/config.h.in (revision 135359)
+++ include/llvm/Config/config.h.in (working copy)
@@ -3,6 +3,9 @@
#ifndef CONFIG_H
#define CONFIG_H
+/* Bug report URL. */
+#undef BUG_REPORT_URL
+
/* Relative directory for resource files */
#undef CLANG_RESOURCE_DIR
@@ -170,6 +173,9 @@
/* Define if the gv program is available */
#undef HAVE_GV
+/* Define to 1 if you have the <iconv.h> header file. */
+#undef HAVE_ICONV_H
+
/* Define to 1 if you have the `index' function. */
#undef HAVE_INDEX
@@ -194,6 +200,9 @@
/* Set to 1 if the isnan function is found in <math.h> */
#undef HAVE_ISNAN_IN_MATH_H
+/* Define to 1 if you have the <langinfo.h> header file. */
+#undef HAVE_LANGINFO_H
+
/* Define if you have the libdl library or equivalent. */
#undef HAVE_LIBDL
Index: include/llvm/Config/config.h.cmake
===================================================================
--- include/llvm/Config/config.h.cmake (revision 135359)
+++ include/llvm/Config/config.h.cmake (working copy)
@@ -154,6 +154,9 @@
/* Define if the gv program is available */
#cmakedefine HAVE_GV ${HAVE_GV}
+/* Define to 1 if you have the <iconv.h> header file. */
+#cmakedefine HAVE_ICONV_H ${HAVE_ICONV_H}
+
/* Define to 1 if you have the `index' function. */
#cmakedefine HAVE_INDEX ${HAVE_INDEX}
@@ -178,6 +181,9 @@
/* Set to 1 if the isnan function is found in <math.h> */
#cmakedefine HAVE_ISNAN_IN_MATH_H ${HAVE_ISNAN_IN_MATH_H}
+/* Define to 1 if you have the <langinfo.h> header file. */
+#cmakedefine HAVE_LANGINFO_H ${HAVE_LANGINFO_H}
+
/* Define if you have the libdl library or equivalent. */
#cmakedefine HAVE_LIBDL ${HAVE_LIBDL}
Index: autoconf/configure.ac
===================================================================
--- autoconf/configure.ac (revision 135359)
+++ autoconf/configure.ac (working copy)
@@ -1363,6 +1363,7 @@
AC_CHECK_HEADERS([sys/types.h sys/ioctl.h malloc/malloc.h mach/mach.h])
AC_CHECK_HEADERS([valgrind/valgrind.h])
AC_CHECK_HEADERS([fenv.h])
+AC_CHECK_HEADERS([langinfo.h iconv.h])
if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
AC_CHECK_HEADERS(pthread.h,
AC_SUBST(HAVE_PTHREAD, 1),
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits