On Wed, Oct 09, 2019 at 01:15:29PM -0400, Tom Lane wrote:
> Noah Misch <n...@leadboat.com> writes:
> > On Mon, Oct 07, 2019 at 03:06:35PM -0400, Tom Lane wrote:
> >> This still fails on Apple's compilers. ...
> 
> > Thanks for testing.  That error boils down to "need to use some other
> > register".  The second operand of addi is one of the ppc instruction 
> > operands
> > that can hold a constant zero or a register number[1], so the proper
> > constraint is "b".  I've made it so and added a comment.
> 
> Ah-hah.  This version does compile and pass check-world for me.
> 
> > I should probably
> > update s_lock.h, too, in a later patch.  I don't know how it has
> > mostly-avoided this failure mode, but its choice of constraint could explain
> > https://postgr.es/m/flat/36E70B06-2C5C-11D8-A096-0005024EF27F%40ifrance.com
> 
> Indeed.  It's a bit astonishing that more people haven't hit that.
> This should be back-patched.

I may as well do that first, so there's no time when s_lock.h disagrees with
arch-ppc.h about the constraint to use.  I'm attaching that patch, too.

> * I still think that the added configure test is a waste of build cycles.
> It'd be sufficient to test "#ifdef HAVE__BUILTIN_CONSTANT_P" where you
> are testing HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P, because our previous
> buildfarm go-round with this showed that all supported compilers
> interpret "i" this way.

xlc does not interpret "i" that way:
https://buildfarm.postgresql.org/cgi-bin/show_stage_log.pl?nm=hornet&dt=2019-09-14%2016%3A42%3A32&stg=config

> * I really dislike building the asm calls with macros as you've done
> here.  The macros violate project style, and are not remotely general-
> purpose, because they have hard-wired references to variables that are
> not in their argument lists.  While that could be fixed with more
> arguments, I don't think that the approach is readable or maintainable
> --- it's impossible for example to understand the register constraints
> without looking simultaneously at the calls and the macro definition.
> And, as we've seen in this "b" issue, the interactions between the chosen
> instruction types and the constraints are subtle enough to make me wonder
> whether you won't need even more arguments to allow some of the other
> constraints to be variable.  I think it'd be far better just to write out
> the asm in-line and accept the not-very-large amount of code duplication
> you'd get.

For a macro local to one C file, I think readability is the relevant metric.
In particular, it would be wrong to add arguments to make these more like
header file macros.  I think the macros make the code somewhat more readable,
and you think they make the code less readable.  I have removed the macros.

> * src/tools/pginclude/headerscheck needs the same adjustment as you
> made in cpluspluscheck.

Done.
diff --git a/configure b/configure
index 74627a7..02a905b 100755
--- a/configure
+++ b/configure
@@ -14518,6 +14518,46 @@ $as_echo "$pgac_cv_have_ppc_mutex_hint" >&6; }
 $as_echo "#define HAVE_PPC_LWARX_MUTEX_HINT 1" >>confdefs.h
 
     fi
+    # Check if compiler accepts "i"(x) when __builtin_constant_p(x).
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether 
__builtin_constant_p(x) implies \"i\"(x) acceptance" >&5
+$as_echo_n "checking whether __builtin_constant_p(x) implies \"i\"(x) 
acceptance... " >&6; }
+if ${pgac_cv_have_i_constraint__builtin_constant_p+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+static inline int
+     addi(int ra, int si)
+     {
+         int res = 0;
+         if (__builtin_constant_p(si))
+             __asm__ __volatile__(
+                 " addi %0,%1,%2\n" : "=r"(res) : "b"(ra), "i"(si));
+         return res;
+     }
+     int test_adds(int x) { return addi(3, x) + addi(x, 5); }
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  pgac_cv_have_i_constraint__builtin_constant_p=yes
+else
+  pgac_cv_have_i_constraint__builtin_constant_p=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 
$pgac_cv_have_i_constraint__builtin_constant_p" >&5
+$as_echo "$pgac_cv_have_i_constraint__builtin_constant_p" >&6; }
+    if test x"$pgac_cv_have_i_constraint__builtin_constant_p" = xyes ; then
+
+$as_echo "#define HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P 1" >>confdefs.h
+
+    fi
   ;;
 esac
 
diff --git a/configure.in b/configure.in
index 0d16c1a..88d3a59 100644
--- a/configure.in
+++ b/configure.in
@@ -1539,6 +1539,26 @@ case $host_cpu in
     if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then
        AC_DEFINE(HAVE_PPC_LWARX_MUTEX_HINT, 1, [Define to 1 if the assembler 
supports PPC's LWARX mutex hint bit.])
     fi
+    # Check if compiler accepts "i"(x) when __builtin_constant_p(x).
+    AC_CACHE_CHECK([whether __builtin_constant_p(x) implies "i"(x) acceptance],
+                   [pgac_cv_have_i_constraint__builtin_constant_p],
+    [AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+    [static inline int
+     addi(int ra, int si)
+     {
+         int res = 0;
+         if (__builtin_constant_p(si))
+             __asm__ __volatile__(
+                 " addi %0,%1,%2\n" : "=r"(res) : "b"(ra), "i"(si));
+         return res;
+     }
+     int test_adds(int x) { return addi(3, x) + addi(x, 5); }], [])],
+    [pgac_cv_have_i_constraint__builtin_constant_p=yes],
+    [pgac_cv_have_i_constraint__builtin_constant_p=no])])
+    if test x"$pgac_cv_have_i_constraint__builtin_constant_p" = xyes ; then
+      AC_DEFINE(HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P, 1,
+                [Define to 1 if __builtin_constant_p(x) implies "i"(x) 
acceptance.])
+    fi
   ;;
 esac
 
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 53e6fe3..939245d 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -329,6 +329,9 @@
 /* Define to 1 if you have isinf(). */
 #undef HAVE_ISINF
 
+/* Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance. */
+#undef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+
 /* Define to 1 if you have the <langinfo.h> header file. */
 #undef HAVE_LANGINFO_H
 
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index fab66f8..363e1df 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -87,14 +87,11 @@
  * using compiler intrinsics are a good idea.
  */
 /*
- * Given a gcc-compatible xlc compiler, prefer the xlc implementation.  The
- * ppc64le "IBM XL C/C++ for Linux, V13.1.2" implements both interfaces, but
- * __sync_lock_test_and_set() of one-byte types elicits SIGSEGV.
+ * gcc or compatible, including clang and icc.  Exclude xlc.  The ppc64le "IBM
+ * XL C/C++ for Linux, V13.1.2" emulates gcc, but __sync_lock_test_and_set()
+ * of one-byte types elicits SIGSEGV.
  */
-#if defined(__IBMC__) || defined(__IBMCPP__)
-#include "port/atomics/generic-xlc.h"
-/* gcc or compatible, including clang and icc */
-#elif defined(__GNUC__) || defined(__INTEL_COMPILER)
+#if (defined(__GNUC__) || defined(__INTEL_COMPILER)) && !(defined(__IBMC__) || 
defined(__IBMCPP__))
 #include "port/atomics/generic-gcc.h"
 #elif defined(_MSC_VER)
 #include "port/atomics/generic-msvc.h"
diff --git a/src/include/port/atomics/arch-ppc.h 
b/src/include/port/atomics/arch-ppc.h
index 344b394..568292d 100644
--- a/src/include/port/atomics/arch-ppc.h
+++ b/src/include/port/atomics/arch-ppc.h
@@ -25,5 +25,236 @@
 #define pg_write_barrier_impl()                __asm__ __volatile__ ("lwsync" 
: : : "memory")
 #endif
 
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+       volatile uint32 value;
+} pg_atomic_uint32;
+
+/* 64bit atomics are only supported in 64bit mode */
+#ifdef __64BIT__
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+typedef struct pg_atomic_uint64
+{
+       volatile uint64 value pg_attribute_aligned(8);
+} pg_atomic_uint64;
+
+#endif /* __64BIT__ */
+
+/*
+ * This mimics gcc __atomic_compare_exchange_n(..., __ATOMIC_SEQ_CST), but
+ * code generation differs at the end.  __atomic_compare_exchange_n():
+ *  100:       isync
+ *  104:       mfcr    r3
+ *  108:       rlwinm  r3,r3,3,31,31
+ *  10c:       bne     120 <.eb+0x10>
+ *  110:       clrldi  r3,r3,63
+ *  114:       addi    r1,r1,112
+ *  118:       blr
+ *  11c:       nop
+ *  120:       clrldi  r3,r3,63
+ *  124:       stw     r9,0(r4)
+ *  128:       addi    r1,r1,112
+ *  12c:       blr
+ *
+ * This:
+ *   f0:       isync
+ *   f4:       mfcr    r9
+ *   f8:       rldicl. r3,r9,35,63
+ *   fc:       bne     104 <.eb>
+ *  100:       stw     r10,0(r4)
+ *  104:       addi    r1,r1,112
+ *  108:       blr
+ *
+ * This implementation may or may not have materially different performance.
+ * It's not exploiting the fact that cr0 still holds the relevant comparison
+ * bits, set during the __asm__.  One could fix that by moving more code into
+ * the __asm__.  (That would remove the freedom to eliminate dead stores when
+ * the caller ignores "expected", but few callers do.)
+ *
+ * The cmpwi variant may be dead code.  In gcc 7.2.0,
+ * __builtin_constant_p(*expected) always reports false.
+ * __atomic_compare_exchange_n() does use cmpwi when its second argument
+ * points to a constant.  Hence, using this instead of
+ * __atomic_compare_exchange_n() nominally penalizes the generic.h
+ * pg_atomic_test_set_flag_impl().  Modern GCC will use the generic-gcc.h
+ * version, making the penalty theoretical only.
+ *
+ * Recognizing constant "newval" would be superfluous, because there's no
+ * immediate-operand version of stwcx.
+ */
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static inline bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+                                                                       uint32 
*expected, uint32 newval)
+{
+       uint32 found;
+       uint32 condition_register;
+       bool ret;
+
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+       if (__builtin_constant_p(*expected) &&
+               *expected <= PG_INT16_MAX && *expected >= PG_INT16_MIN)
+               __asm__ __volatile__(
+                       "       sync                            \n"
+                       "       lwarx   %0,0,%5         \n"
+                       "       cmpwi   %0,%3           \n"
+                       "       bne     $+12            \n"             /* 
branch to isync */
+                       "       stwcx.  %4,0,%5         \n"
+                       "       bne     $-16            \n"             /* 
branch to lwarx */
+                       "       isync                           \n"
+                       "       mfcr    %1          \n"
+:                      "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+:                      "i"(*expected), "r"(newval), "r"(&ptr->value)
+:                      "memory", "cc");
+       else
+#endif
+               __asm__ __volatile__(
+                       "       sync                            \n"
+                       "       lwarx   %0,0,%5         \n"
+                       "       cmpw    %0,%3           \n"
+                       "       bne     $+12            \n"             /* 
branch to isync */
+                       "       stwcx.  %4,0,%5         \n"
+                       "       bne     $-16            \n"             /* 
branch to lwarx */
+                       "       isync                           \n"
+                       "       mfcr    %1          \n"
+:                      "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+:                      "r"(*expected), "r"(newval), "r"(&ptr->value)
+:                      "memory", "cc");
+
+       ret = (condition_register >> 29) & 1;   /* test eq bit of cr0 */
+       if (!ret)
+               *expected = found;
+       return ret;
+}
+
+/*
+ * This mirrors gcc __sync_fetch_and_add().
+ *
+ * Like tas(), use constraint "=&b" to avoid allocating r0.
+ */
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+static inline uint32
+pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+       uint32 _t;
+       uint32 res;
+
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+       if (__builtin_constant_p(add_) &&
+               add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
+               __asm__ __volatile__(
+                       "       sync                            \n"
+                       "       lwarx   %1,0,%4         \n"
+                       "       addi    %0,%1,%3        \n"
+                       "       stwcx.  %0,0,%4         \n"
+                       "       bne     $-12            \n"             /* 
branch to lwarx */
+                       "       isync                           \n"
+:                      "=&r"(_t), "=&b"(res), "+m"(ptr->value)
+:                      "i"(add_), "r"(&ptr->value)
+:                      "memory", "cc");
+       else
+#endif
+               __asm__ __volatile__(
+                       "       sync                            \n"
+                       "       lwarx   %1,0,%4         \n"
+                       "       add     %0,%1,%3        \n"
+                       "       stwcx.  %0,0,%4         \n"
+                       "       bne     $-12            \n"             /* 
branch to lwarx */
+                       "       isync                           \n"
+:                      "=&r"(_t), "=&r"(res), "+m"(ptr->value)
+:                      "r"(add_), "r"(&ptr->value)
+:                      "memory", "cc");
+
+       return res;
+}
+
+#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static inline bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+                                                                       uint64 
*expected, uint64 newval)
+{
+       uint64 found;
+       uint32 condition_register;
+       bool ret;
+
+       /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/; s/cmpw/cmpd/ */
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+       if (__builtin_constant_p(*expected) &&
+               *expected <= PG_INT16_MAX && *expected >= PG_INT16_MIN)
+               __asm__ __volatile__(
+                       "       sync                            \n"
+                       "       ldarx   %0,0,%5         \n"
+                       "       cmpdi   %0,%3           \n"
+                       "       bne     $+12            \n"             /* 
branch to isync */
+                       "       stdcx.  %4,0,%5         \n"
+                       "       bne     $-16            \n"             /* 
branch to ldarx */
+                       "       isync                           \n"
+                       "       mfcr    %1          \n"
+:                      "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+:                      "i"(*expected), "r"(newval), "r"(&ptr->value)
+:                      "memory", "cc");
+       else
+#endif
+               __asm__ __volatile__(
+                       "       sync                            \n"
+                       "       ldarx   %0,0,%5         \n"
+                       "       cmpd    %0,%3           \n"
+                       "       bne     $+12            \n"             /* 
branch to isync */
+                       "       stdcx.  %4,0,%5         \n"
+                       "       bne     $-16            \n"             /* 
branch to ldarx */
+                       "       isync                           \n"
+                       "       mfcr    %1          \n"
+:                      "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+:                      "r"(*expected), "r"(newval), "r"(&ptr->value)
+:                      "memory", "cc");
+
+       ret = (condition_register >> 29) & 1;   /* test eq bit of cr0 */
+       if (!ret)
+               *expected = found;
+       return ret;
+}
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+static inline uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+       uint64 _t;
+       uint64 res;
+
+       /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+       if (__builtin_constant_p(add_) &&
+               add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
+               __asm__ __volatile__(
+                       "       sync                            \n"
+                       "       ldarx   %1,0,%4         \n"
+                       "       addi    %0,%1,%3        \n"
+                       "       stdcx.  %0,0,%4         \n"
+                       "       bne     $-12            \n"             /* 
branch to ldarx */
+                       "       isync                           \n"
+:                      "=&r"(_t), "=&b"(res), "+m"(ptr->value)
+:                      "i"(add_), "r"(&ptr->value)
+:                      "memory", "cc");
+       else
+#endif
+               __asm__ __volatile__(
+                       "       sync                            \n"
+                       "       ldarx   %1,0,%4         \n"
+                       "       add     %0,%1,%3        \n"
+                       "       stdcx.  %0,0,%4         \n"
+                       "       bne     $-12            \n"             /* 
branch to ldarx */
+                       "       isync                           \n"
+:                      "=&r"(_t), "=&r"(res), "+m"(ptr->value)
+:                      "r"(add_), "r"(&ptr->value)
+:                      "memory", "cc");
+
+       return res;
+}
+
+#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
+
 /* per architecture manual doubleword accesses have single copy atomicity */
 #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
diff --git a/src/include/port/atomics/generic-xlc.h 
b/src/include/port/atomics/generic-xlc.h
deleted file mode 100644
index 8b5c732..0000000
--- a/src/include/port/atomics/generic-xlc.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * generic-xlc.h
- *       Atomic operations for IBM's CC
- *
- * Portions Copyright (c) 2013-2019, PostgreSQL Global Development Group
- *
- * NOTES:
- *
- * Documentation:
- * * Synchronization and atomic built-in functions
- *   
http://www-01.ibm.com/support/knowledgecenter/SSGH3R_13.1.2/com.ibm.xlcpp131.aix.doc/compiler_ref/bifs_sync_atomic.html
- *
- * src/include/port/atomics/generic-xlc.h
- *
- * -------------------------------------------------------------------------
- */
-
-#if defined(HAVE_ATOMICS)
-
-#define PG_HAVE_ATOMIC_U32_SUPPORT
-typedef struct pg_atomic_uint32
-{
-       volatile uint32 value;
-} pg_atomic_uint32;
-
-
-/* 64bit atomics are only supported in 64bit mode */
-#ifdef __64BIT__
-#define PG_HAVE_ATOMIC_U64_SUPPORT
-typedef struct pg_atomic_uint64
-{
-       volatile uint64 value pg_attribute_aligned(8);
-} pg_atomic_uint64;
-
-#endif /* __64BIT__ */
-
-#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
-static inline bool
-pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
-                                                                       uint32 
*expected, uint32 newval)
-{
-       bool            ret;
-
-       /*
-        * atomics.h specifies sequential consistency ("full barrier semantics")
-        * for this interface.  Since "lwsync" provides acquire/release
-        * consistency only, do not use it here.  GCC atomics observe the same
-        * restriction; see its rs6000_pre_atomic_barrier().
-        */
-       __asm__ __volatile__ (" sync \n" ::: "memory");
-
-       /*
-        * XXX: __compare_and_swap is defined to take signed parameters, but 
that
-        * shouldn't matter since we don't perform any arithmetic operations.
-        */
-       ret = __compare_and_swap((volatile int*)&ptr->value,
-                                                        (int *)expected, 
(int)newval);
-
-       /*
-        * xlc's documentation tells us:
-        * "If __compare_and_swap is used as a locking primitive, insert a call 
to
-        * the __isync built-in function at the start of any critical sections."
-        *
-        * The critical section begins immediately after __compare_and_swap().
-        */
-       __isync();
-
-       return ret;
-}
-
-#define PG_HAVE_ATOMIC_FETCH_ADD_U32
-static inline uint32
-pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
-{
-       uint32 _t;
-       uint32 res;
-
-       /*
-        * xlc has a no-longer-documented __fetch_and_add() intrinsic.  In xlc
-        * 12.01.0000.0000, it emits a leading "sync" and trailing "isync".  In
-        * xlc 13.01.0003.0004, it emits neither.  Hence, using the intrinsic
-        * would add redundant syncs on xlc 12.
-        */
-       __asm__ __volatile__(
-               "       sync                            \n"
-               "       lwarx   %1,0,%4         \n"
-               "       add     %0,%1,%3        \n"
-               "       stwcx.  %0,0,%4         \n"
-               "       bne     $-12            \n"             /* branch to 
lwarx */
-               "       isync                           \n"
-:              "=&r"(_t), "=&r"(res), "+m"(ptr->value)
-:              "r"(add_), "r"(&ptr->value)
-:              "memory", "cc");
-
-       return res;
-}
-
-#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
-
-#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
-static inline bool
-pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
-                                                                       uint64 
*expected, uint64 newval)
-{
-       bool            ret;
-
-       __asm__ __volatile__ (" sync \n" ::: "memory");
-
-       ret = __compare_and_swaplp((volatile long*)&ptr->value,
-                                                          (long *)expected, 
(long)newval);
-
-       __isync();
-
-       return ret;
-}
-
-#define PG_HAVE_ATOMIC_FETCH_ADD_U64
-static inline uint64
-pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
-{
-       uint64 _t;
-       uint64 res;
-
-       /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
-       __asm__ __volatile__(
-               "       sync                            \n"
-               "       ldarx   %1,0,%4         \n"
-               "       add     %0,%1,%3        \n"
-               "       stdcx.  %0,0,%4         \n"
-               "       bne     $-12            \n"             /* branch to 
ldarx */
-               "       isync                           \n"
-:              "=&r"(_t), "=&r"(res), "+m"(ptr->value)
-:              "r"(add_), "r"(&ptr->value)
-:              "memory", "cc");
-
-       return res;
-}
-
-#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
-
-#endif /* defined(HAVE_ATOMICS) */
diff --git a/src/tools/pginclude/cpluspluscheck 
b/src/tools/pginclude/cpluspluscheck
index 843d391..3f315bc 100755
--- a/src/tools/pginclude/cpluspluscheck
+++ b/src/tools/pginclude/cpluspluscheck
@@ -89,7 +89,6 @@ do
        test "$f" = src/include/port/atomics/generic-gcc.h && continue
        test "$f" = src/include/port/atomics/generic-msvc.h && continue
        test "$f" = src/include/port/atomics/generic-sunpro.h && continue
-       test "$f" = src/include/port/atomics/generic-xlc.h && continue
 
        # rusagestub.h is also platform-specific, and will be included
        # by utils/pg_rusage.h if necessary.
diff --git a/src/tools/pginclude/headerscheck b/src/tools/pginclude/headerscheck
index 8dbec90..7c0e504 100755
--- a/src/tools/pginclude/headerscheck
+++ b/src/tools/pginclude/headerscheck
@@ -85,7 +85,6 @@ do
        test "$f" = src/include/port/atomics/generic-gcc.h && continue
        test "$f" = src/include/port/atomics/generic-msvc.h && continue
        test "$f" = src/include/port/atomics/generic-sunpro.h && continue
-       test "$f" = src/include/port/atomics/generic-xlc.h && continue
 
        # rusagestub.h is also platform-specific, and will be included
        # by utils/pg_rusage.h if necessary.
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index d0ab247..e0984ef 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -452,6 +452,11 @@ typedef unsigned int slock_t;
 #define TAS_SPIN(lock) (*(lock) ? 1 : TAS(lock))
 
 /*
+ * The second operand of addi can hold a constant zero or a register number,
+ * hence constraint "=&b" to avoid allocating r0.  "b" stands for "address
+ * base register"; most operands having this register-or-zero property are
+ * address bases, e.g. the second operand of lwax.
+ *
  * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002,
  * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop.
  * On newer machines, we can use lwsync instead for better performance.
@@ -488,7 +493,7 @@ tas(volatile slock_t *lock)
 #endif
 "      li      %1,0            \n"
 
-:      "=&r"(_t), "=r"(_res), "+m"(*lock)
+:      "=&b"(_t), "=r"(_res), "+m"(*lock)
 :      "r"(lock)
 :      "memory", "cc");
        return _res;

Reply via email to