Re: [Qemu-devel] [PATCH v4 14/35] tcg: Add atomic128 helpers

2016-10-05 Thread Alex Bennée

Richard Henderson  writes:

> Force the use of cmpxchg16b on x86_64.
>
> Wikipedia suggests that only very old AMD64 (circa 2004) did not have
> this instruction.  Further, it's required by Windows 8 so no new cpus
> will ever omit it.
>
> If we truely care about these, then we could check this at startup time
> and then avoid executing paths that use it.
>
> Signed-off-by: Richard Henderson 

> diff --git a/tcg-runtime.c b/tcg-runtime.c
> index aa55d12..0c97cdf 100644
> --- a/tcg-runtime.c
> +++ b/tcg-runtime.c
> @@ -118,8 +118,8 @@ static void *atomic_mmu_lookup(CPUArchState *env, 
> target_ulong addr,
>  /* Macro to call the above, with local variables from the use context.  */
>  #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC())
>
> -#define ATOMIC_NAME(X)   HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
>  #define EXTRA_ARGS
> +#define ATOMIC_NAME(X)   HELPER(glue(glue(atomic_ ## X, SUFFIX), END))

This seems a null change. I think it was also in the last series.

--
Alex Bennée



[Qemu-devel] [PATCH v4 14/35] tcg: Add atomic128 helpers

2016-09-16 Thread Richard Henderson
Force the use of cmpxchg16b on x86_64.

Wikipedia suggests that only very old AMD64 (circa 2004) did not have
this instruction.  Further, it's required by Windows 8 so no new cpus
will ever omit it.

If we truely care about these, then we could check this at startup time
and then avoid executing paths that use it.

Signed-off-by: Richard Henderson 
---
 atomic_template.h | 40 +++-
 configure | 29 -
 cputlb.c  |  5 +
 include/qemu/int128.h |  6 ++
 tcg-runtime.c | 20 +++-
 tcg/tcg.h | 24 +++-
 6 files changed, 120 insertions(+), 4 deletions(-)

diff --git a/atomic_template.h b/atomic_template.h
index d2c8a08..4fdf722 100644
--- a/atomic_template.h
+++ b/atomic_template.h
@@ -18,7 +18,11 @@
  * License along with this library; if not, see .
  */
 
-#if DATA_SIZE == 8
+#if DATA_SIZE == 16
+# define SUFFIX o
+# define DATA_TYPE  Int128
+# define BSWAP  bswap128
+#elif DATA_SIZE == 8
 # define SUFFIX q
 # define DATA_TYPE  uint64_t
 # define BSWAP  bswap64
@@ -59,6 +63,21 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, 
target_ulong addr,
 return atomic_cmpxchg__nocheck(haddr, cmpv, newv);
 }
 
+#if DATA_SIZE >= 16
+ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
+{
+DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
+__atomic_load(haddr, , __ATOMIC_RELAXED);
+return val;
+}
+
+void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
+ ABI_TYPE val EXTRA_ARGS)
+{
+DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+__atomic_store(haddr, , __ATOMIC_RELAXED);
+}
+#else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
 {
@@ -84,6 +103,8 @@ GEN_ATOMIC_HELPER(or_fetch)
 GEN_ATOMIC_HELPER(xor_fetch)
 
 #undef GEN_ATOMIC_HELPER
+#endif /* DATA SIZE >= 16 */
+
 #undef END
 
 #if DATA_SIZE > 1
@@ -101,6 +122,22 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, 
target_ulong addr,
 return BSWAP(atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)));
 }
 
+#if DATA_SIZE >= 16
+ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
+{
+DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
+__atomic_load(haddr, , __ATOMIC_RELAXED);
+return BSWAP(val);
+}
+
+void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
+ ABI_TYPE val EXTRA_ARGS)
+{
+DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+val = BSWAP(val);
+__atomic_store(haddr, , __ATOMIC_RELAXED);
+}
+#else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
 {
@@ -162,6 +199,7 @@ ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, 
target_ulong addr,
 ldo = ldn;
 }
 }
+#endif /* DATA_SIZE >= 16 */
 
 #undef END
 #endif /* DATA_SIZE > 1 */
diff --git a/configure b/configure
index 7d083bd..b4d3f90 100755
--- a/configure
+++ b/configure
@@ -1217,7 +1217,10 @@ case "$cpu" in
cc_i386='$(CC) -m32'
;;
 x86_64)
-   CPU_CFLAGS="-m64"
+   # ??? Only extremely old AMD cpus do not have cmpxchg16b.
+   # If we truly care, we should simply detect this case at
+   # runtime and generate the fallback to serial emulation.
+   CPU_CFLAGS="-m64 -mcx16"
LDFLAGS="-m64 $LDFLAGS"
cc_i386='$(CC) -m32'
;;
@@ -4456,6 +4459,26 @@ if compile_prog "" "" ; then
 int128=yes
 fi
 
+#
+# See if 128-bit atomic operations are supported.
+
+atomic128=no
+if test "$int128" = "yes"; then
+  cat > $TMPC << EOF
+int main(void)
+{
+  unsigned __int128 x = 0, y = 0;
+  y = __atomic_load_16(, 0);
+  __atomic_store_16(, y, 0);
+  __atomic_compare_exchange_16(, , x, 0, 0, 0);
+  return 0;
+}
+EOF
+  if compile_prog "" "" ; then
+atomic128=yes
+  fi
+fi
+
 
 # check if getauxval is available.
 
@@ -5410,6 +5433,10 @@ if test "$int128" = "yes" ; then
   echo "CONFIG_INT128=y" >> $config_host_mak
 fi
 
+if test "$atomic128" = "yes" ; then
+  echo "CONFIG_ATOMIC128=y" >> $config_host_mak
+fi
+
 if test "$getauxval" = "yes" ; then
   echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
 fi
diff --git a/cputlb.c b/cputlb.c
index 4f2c500..845b2a7 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -690,6 +690,11 @@ static void *atomic_mmu_lookup(CPUArchState *env, 
target_ulong addr,
 #define DATA_SIZE 8
 #include "atomic_template.h"
 
+#ifdef CONFIG_ATOMIC128
+#define DATA_SIZE 16
+#include "atomic_template.h"
+#endif
+
 /* Second set of helpers are directly callable from TCG as helpers.  */
 
 #undef EXTRA_ARGS
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index 67440fa..261b55f 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -2,6 +2,7 @@
 #define