On Sat, 2013-04-27 at 00:20 +0200, Andres Freund wrote: > CFLAGS_VECTORIZATION? EXTRA sounds to generic to me.
I went with CFLAGS_VECTOR to be a little shorter while still keeping some meaning. > I think it would be better to have a PGAC_PROG_CC_VAR_OPT or so which > assigns the flag to some passed variable name. Then we can reuse it for > different vars and I have the feeling those will come. And having a > CFLAGS_VECTOR_OPT would just be stupid ;) Good suggestion; done. Thank you for the review. New renamed patch attached for the build options only (the other patch for the FNV checksum algorithm is unchanged). Regards, Jeff Davis
*** a/config/c-compiler.m4 --- b/config/c-compiler.m4 *************** *** 242,247 **** undefine([Ac_cachevar])dnl --- 242,272 ---- + # PGAC_PROG_CC_VAR_OPT + # ----------------------- + # Given a variable name and a string, check if the compiler supports + # the string as a command-line option. If it does, add the string to + # the given variable. + AC_DEFUN([PGAC_PROG_CC_VAR_OPT], + [define([Ac_cachevar], [AS_TR_SH([pgac_cv_prog_cc_cflags_$2])])dnl + AC_CACHE_CHECK([whether $CC supports $2], [Ac_cachevar], + [pgac_save_CFLAGS=$CFLAGS + CFLAGS="$pgac_save_CFLAGS $2" + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + _AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], + [Ac_cachevar=yes], + [Ac_cachevar=no]) + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="$pgac_save_CFLAGS"]) + if test x"$Ac_cachevar" = x"yes"; then + $1="${$1} $2" + fi + undefine([Ac_cachevar])dnl + ])# PGAC_PROG_CC_CFLAGS_OPT + + + # PGAC_PROG_CC_LDFLAGS_OPT # ------------------------ # Given a string, check if the compiler supports the string as a *** a/configure --- b/configure *************** *** 731,736 **** autodepend --- 731,737 ---- TAS GCC CPP + CFLAGS_VECTOR SUN_STUDIO_CC OBJEXT EXEEXT *************** *** 3944,3949 **** else --- 3945,3955 ---- fi fi + # set CFLAGS_VECTOR from the environment, if available + if test "$ac_env_CFLAGS_VECTOR_set" = set; then + CFLAGS_VECTOR=$ac_env_CFLAGS_VECTOR_value + fi + # Some versions of GCC support some additional useful warning flags. # Check whether they are supported, and add them to CFLAGS if so. # ICC pretends to be GCC but it's lying; it doesn't support these flags, *************** *** 4376,4381 **** if test x"$pgac_cv_prog_cc_cflags__fexcess_precision_standard" = x"yes"; then --- 4382,4508 ---- CFLAGS="$CFLAGS -fexcess-precision=standard" fi + # Optimization flags for specific files that benefit from vectorization + { $as_echo "$as_me:$LINENO: checking whether $CC supports -funroll-loops" >&5 + $as_echo_n "checking whether $CC supports -funroll-loops... " >&6; } + if test "${pgac_cv_prog_cc_cflags__funroll_loops+set}" = set; then + $as_echo_n "(cached) " >&6 + else + pgac_save_CFLAGS=$CFLAGS + CFLAGS="$pgac_save_CFLAGS -funroll-loops" + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + cat >conftest.$ac_ext <<_ACEOF + /* confdefs.h. */ + _ACEOF + cat confdefs.h >>conftest.$ac_ext + cat >>conftest.$ac_ext <<_ACEOF + /* end confdefs.h. */ + + int + main () + { + + ; + return 0; + } + _ACEOF + rm -f conftest.$ac_objext + if { (ac_try="$ac_compile" + case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; + esac + eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" + $as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + pgac_cv_prog_cc_cflags__funroll_loops=yes + else + $as_echo "$as_me: failed program was:" >&5 + sed 's/^/| /' conftest.$ac_ext >&5 + + pgac_cv_prog_cc_cflags__funroll_loops=no + fi + + rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="$pgac_save_CFLAGS" + fi + { $as_echo "$as_me:$LINENO: result: $pgac_cv_prog_cc_cflags__funroll_loops" >&5 + $as_echo "$pgac_cv_prog_cc_cflags__funroll_loops" >&6; } + if test x"$pgac_cv_prog_cc_cflags__funroll_loops" = x"yes"; then + CFLAGS_VECTOR="${CFLAGS_VECTOR} -funroll-loops" + fi + + { $as_echo "$as_me:$LINENO: checking whether $CC supports -ftree-vectorize" >&5 + $as_echo_n "checking whether $CC supports -ftree-vectorize... " >&6; } + if test "${pgac_cv_prog_cc_cflags__ftree_vectorize+set}" = set; then + $as_echo_n "(cached) " >&6 + else + pgac_save_CFLAGS=$CFLAGS + CFLAGS="$pgac_save_CFLAGS -ftree-vectorize" + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + cat >conftest.$ac_ext <<_ACEOF + /* confdefs.h. */ + _ACEOF + cat confdefs.h >>conftest.$ac_ext + cat >>conftest.$ac_ext <<_ACEOF + /* end confdefs.h. */ + + int + main () + { + + ; + return 0; + } + _ACEOF + rm -f conftest.$ac_objext + if { (ac_try="$ac_compile" + case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; + esac + eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" + $as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + pgac_cv_prog_cc_cflags__ftree_vectorize=yes + else + $as_echo "$as_me: failed program was:" >&5 + sed 's/^/| /' conftest.$ac_ext >&5 + + pgac_cv_prog_cc_cflags__ftree_vectorize=no + fi + + rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="$pgac_save_CFLAGS" + fi + { $as_echo "$as_me:$LINENO: result: $pgac_cv_prog_cc_cflags__ftree_vectorize" >&5 + $as_echo "$pgac_cv_prog_cc_cflags__ftree_vectorize" >&6; } + if test x"$pgac_cv_prog_cc_cflags__ftree_vectorize" = x"yes"; then + CFLAGS_VECTOR="${CFLAGS_VECTOR} -ftree-vectorize" + fi + elif test "$ICC" = yes; then # Intel's compiler has a bug/misoptimization in checking for # division by NAN (NaN == 0), -mp1 fixes it, so add it to the CFLAGS. *************** *** 4627,4632 **** fi --- 4754,4762 ---- fi + CFLAGS_VECTOR=$CFLAGS_VECTOR + + # supply -g if --enable-debug if test "$enable_debug" = yes && test "$ac_cv_prog_cc_g" = yes; then CFLAGS="$CFLAGS -g" *** a/configure.in --- b/configure.in *************** *** 400,405 **** else --- 400,410 ---- fi fi + # set CFLAGS_VECTOR from the environment, if available + if test "$ac_env_CFLAGS_VECTOR_set" = set; then + CFLAGS_VECTOR=$ac_env_CFLAGS_VECTOR_value + fi + # Some versions of GCC support some additional useful warning flags. # Check whether they are supported, and add them to CFLAGS if so. # ICC pretends to be GCC but it's lying; it doesn't support these flags, *************** *** 419,424 **** if test "$GCC" = yes -a "$ICC" = no; then --- 424,432 ---- PGAC_PROG_CC_CFLAGS_OPT([-fwrapv]) # Disable FP optimizations that cause various errors on gcc 4.5+ or maybe 4.6+ PGAC_PROG_CC_CFLAGS_OPT([-fexcess-precision=standard]) + # Optimization flags for specific files that benefit from vectorization + PGAC_PROG_CC_VAR_OPT(CFLAGS_VECTOR, [-funroll-loops]) + PGAC_PROG_CC_VAR_OPT(CFLAGS_VECTOR, [-ftree-vectorize]) elif test "$ICC" = yes; then # Intel's compiler has a bug/misoptimization in checking for # division by NAN (NaN == 0), -mp1 fixes it, so add it to the CFLAGS. *************** *** 434,439 **** elif test "$PORTNAME" = "hpux"; then --- 442,449 ---- PGAC_PROG_CC_CFLAGS_OPT([+Olibmerrno]) fi + AC_SUBST(CFLAGS_VECTOR, $CFLAGS_VECTOR) + # supply -g if --enable-debug if test "$enable_debug" = yes && test "$ac_cv_prog_cc_g" = yes; then CFLAGS="$CFLAGS -g" *** a/src/Makefile.global.in --- b/src/Makefile.global.in *************** *** 219,224 **** CC = @CC@ --- 219,225 ---- GCC = @GCC@ SUN_STUDIO_CC = @SUN_STUDIO_CC@ CFLAGS = @CFLAGS@ + CFLAGS_VECTOR = @CFLAGS_VECTOR@ # Kind-of compilers *** a/src/backend/storage/page/Makefile --- b/src/backend/storage/page/Makefile *************** *** 15,17 **** include $(top_builddir)/src/Makefile.global --- 15,20 ---- OBJS = bufpage.o checksum.o itemptr.o include $(top_srcdir)/src/backend/common.mk + + # important optimizations flags for checksum.c + checksum.o: CFLAGS += ${CFLAGS_VECTOR}
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers