On 12/01/16 17:16, Richard Earnshaw (lists) wrote:
> On 12/01/16 16:53, Richard Henderson wrote:
>> The problem in this PR is that we never got around to flushing out the vector
>> support for transactions for anything but x86.  My goal here is to make this 
>> as
>> generic as possible, so that it should Just Work with existing vector support
>> in the backend.
>>
>> In addition, if I encounter other unexpected register types, I will now copy
>> them to memory and use memcpy, rather than crash.
>>
>> The one piece of this that requires a tiny bit of extra work is enabling the
>> vector entry points in libitm.
>>
>> For x86, we make sure to build the files with SSE or AVX support enabled.  
>> For
>> s390x, I do the same thing, enabling z13 support.  I suppose we might need to
>> check for binutils support, but I'd rather do this only if necessary.
>>
>> For arm I'm less sure what to do, since I seem to recall that use of Neon 
>> sets
>> a bit in the ELF header.  Which presumably means that the binary could no
>> longer be run without neon, even though the entry points wouldn't be used.
> 
> No, we don't use bits in the elf headers: there wouldn't be enough of
> them!  Instead we use build attributes to record user intentions.  These
> are (normally) derived from .arch and .fpu directives.
> 
> For normal core attributes you can use .object_arch to force the .arch
> entry recorded in the attributes to a specific value, but I'm not sure
> if you can override the .fpu directive in this way.  You might have to
> experiment a bit.  Alternatively you might be able to force out the
> relevant build attributes using .eabi_attribute to record some explicit
> values (which then override the values that would be normally detected).
> 

BTW, the above only applies to AArch32 (traditional ARM), AArch64
doesn't put any marking out -- we assume that Neon is available.

R.

> R.
> 
> 
>>
>> For powerpc, I don't know how to select Altivec if VSX isn't already enabled,
>> or indeed if that's the best thing to do.
>>
>>
>> Thanks for the review,
>>
>>
>> r~
>>
>>
>> d-68964
>>
>>
>>      PR tree-opt/68964
>>      * target.def (builtin_tm_load, builtin_tm_store): Remove.
>>      * config/i386/i386.c (ix86_builtin_tm_load): Remove.
>>      (ix86_builtin_tm_store): Remove.
>>      (TARGET_VECTORIZE_BUILTIN_TM_LOAD): Remove.
>>      (TARGET_VECTORIZE_BUILTIN_TM_STORE): Remove.
>>      * doc/tm.texi.in (TARGET_VECTORIZE_BUILTIN_TM_LOAD): Remove.
>>      (TARGET_VECTORIZE_BUILTIN_TM_STORE): Remove.
>>      * doc/tm.texi: Rebuild.
>>
>>      * gtm-builtins.def (BUILT_IN_TM_MEMCPY_RNWT): New.
>>      (BUILT_IN_TM_MEMCPY_RTWN): New.
>>      * trans-mem.c (tm_log_emit_stmt): Rearrange code for better
>>      fallback from vector to integer helpers.
>>      (build_tm_load): Handle vector types directly, instead of
>>      via target hook.
>>      (build_tm_store): Likewise.
>>      (expand_assign_tm): Prepare for register types not handled by
>>      the above.  Copy them to memory and use memcpy.
>>      * tree.c (tm_define_builtin): New.
>>      (find_tm_vector_type): New.
>>      (build_tm_vector_builtins): New.
>>      (build_common_builtin_nodes): Call it.
>>
>> gcc/testsuite/
>>      * gcc.dg/tm/memopt-13.c: Update expected function.
>>      * gcc.dg/tm/memopt-6.c: Likewise.
>>
>> libitm/
>>      * Makefile.am (libitm_la_SOURCES) [ARCH_AARCH64]: Add neon.cc
>>      (libitm_la_SOURCES) [ARCH_ARM]: Add neon.cc
>>      (libitm_la_SOURCES) [ARCH_PPC]: Add vect.cc
>>      (libitm_la_SOURCES) [ARCH_S390]: Add vx.cc
>>      * configure.ac (ARCH_AARCH64): New conditional.
>>      (ARCH_PPC, ARCH_S390): Likewise.
>>      * Makefile.in, configure: Rebuild.
>>
>>      * libitm.h (_ITM_TYPE_M128): Always define.
>>      * config/generic/dispatch-m64.cc: Split ...
>>      * config/generic/dispatch-m128.cc: ... out of...
>>      * config/x86/x86_sse.cc: ... here.
>>      * config/aarch64/neon.cc: New file.
>>      * config/arm/neon.cc: New file.
>>      * config/powerpc/vect.cc: New file.
>>
>>
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index ed91e5d..0b31ccd 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -35214,48 +35214,6 @@ static const struct builtin_description bdesc_tm[] =
>>    { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum 
>> ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
>>  };
>>  
>> -/* TM callbacks.  */
>> -
>> -/* Return the builtin decl needed to load a vector of TYPE.  */
>> -
>> -static tree
>> -ix86_builtin_tm_load (tree type)
>> -{
>> -  if (TREE_CODE (type) == VECTOR_TYPE)
>> -    {
>> -      switch (tree_to_uhwi (TYPE_SIZE (type)))
>> -    {
>> -    case 64:
>> -      return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
>> -    case 128:
>> -      return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
>> -    case 256:
>> -      return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
>> -    }
>> -    }
>> -  return NULL_TREE;
>> -}
>> -
>> -/* Return the builtin decl needed to store a vector of TYPE.  */
>> -
>> -static tree
>> -ix86_builtin_tm_store (tree type)
>> -{
>> -  if (TREE_CODE (type) == VECTOR_TYPE)
>> -    {
>> -      switch (tree_to_uhwi (TYPE_SIZE (type)))
>> -    {
>> -    case 64:
>> -      return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
>> -    case 128:
>> -      return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
>> -    case 256:
>> -      return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
>> -    }
>> -    }
>> -  return NULL_TREE;
>> -}
>> -
>>  /* Initialize the transactional memory vector load/store builtins.  */
>>  
>>  static void
>> @@ -54341,12 +54299,6 @@ ix86_addr_space_zero_address_valid (addr_space_t as)
>>  #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
>>    ix86_builtin_vectorized_function
>>  
>> -#undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
>> -#define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
>> -
>> -#undef TARGET_VECTORIZE_BUILTIN_TM_STORE
>> -#define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
>> -
>>  #undef TARGET_VECTORIZE_BUILTIN_GATHER
>>  #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
>>  
>> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
>> index 195ddf8..7a11552 100644
>> --- a/gcc/doc/tm.texi
>> +++ b/gcc/doc/tm.texi
>> @@ -5728,14 +5728,6 @@ This hook should complete calculations of the cost of 
>> vectorizing a loop or basi
>>  This hook should release @var{data} and any related data structures 
>> allocated by TARGET_VECTORIZE_INIT_COST.  The default releases the 
>> accumulator.
>>  @end deftypefn
>>  
>> -@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_TM_LOAD (tree)
>> -This hook should return the built-in decl needed to load a vector of the 
>> given type within a transaction.
>> -@end deftypefn
>> -
>> -@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_TM_STORE (tree)
>> -This hook should return the built-in decl needed to store a vector of the 
>> given type within a transaction.
>> -@end deftypefn
>> -
>>  @deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_GATHER (const_tree 
>> @var{mem_vectype}, const_tree @var{index_type}, int @var{scale})
>>  Target builtin that implements vector gather operation.  @var{mem_vectype}
>>  is the vector type of the load and @var{index_type} is scalar type of
>> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
>> index dedc798..f31c763 100644
>> --- a/gcc/doc/tm.texi.in
>> +++ b/gcc/doc/tm.texi.in
>> @@ -4253,10 +4253,6 @@ address;  but often a machine-dependent strategy can 
>> generate better code.
>>  
>>  @hook TARGET_VECTORIZE_DESTROY_COST_DATA
>>  
>> -@hook TARGET_VECTORIZE_BUILTIN_TM_LOAD
>> -
>> -@hook TARGET_VECTORIZE_BUILTIN_TM_STORE
>> -
>>  @hook TARGET_VECTORIZE_BUILTIN_GATHER
>>  
>>  @hook TARGET_VECTORIZE_BUILTIN_SCATTER
>> diff --git a/gcc/gtm-builtins.def b/gcc/gtm-builtins.def
>> index e2bc081..6d5cfb9 100644
>> --- a/gcc/gtm-builtins.def
>> +++ b/gcc/gtm-builtins.def
>> @@ -12,6 +12,10 @@ DEF_TM_BUILTIN (BUILT_IN_TM_IRREVOCABLE, 
>> "_ITM_changeTransactionMode",
>>  
>>  DEF_TM_BUILTIN (BUILT_IN_TM_MEMCPY, "_ITM_memcpyRtWt",
>>              BT_FN_VOID_PTR_CONST_PTR_SIZE, ATTR_TM_TMPURE_NOTHROW_LIST)
>> +DEF_TM_BUILTIN (BUILT_IN_TM_MEMCPY_RNWT, "_ITM_memcpyRnWt",
>> +            BT_FN_VOID_PTR_CONST_PTR_SIZE, ATTR_TM_TMPURE_NOTHROW_LIST)
>> +DEF_TM_BUILTIN (BUILT_IN_TM_MEMCPY_RTWN, "_ITM_memcpyRtWn",
>> +            BT_FN_VOID_PTR_CONST_PTR_SIZE, ATTR_TM_TMPURE_NOTHROW_LIST)
>>  DEF_TM_BUILTIN (BUILT_IN_TM_MEMMOVE, "_ITM_memmoveRtWt",
>>              BT_FN_VOID_PTR_CONST_PTR_SIZE, ATTR_TM_TMPURE_NOTHROW_LIST)
>>  DEF_TM_BUILTIN (BUILT_IN_TM_MEMSET, "_ITM_memsetW",
>> diff --git a/gcc/target.def b/gcc/target.def
>> index c21b122..946d2e5 100644
>> --- a/gcc/target.def
>> +++ b/gcc/target.def
>> @@ -1808,24 +1808,6 @@ parameter is true if the memory access is defined in 
>> a packed struct.",
>>   (machine_mode mode, const_tree type, int misalignment, bool is_packed),
>>   default_builtin_support_vector_misalignment)
>>  
>> -/* Return the builtin decl needed to load a vector of TYPE.  */
>> -DEFHOOK
>> -(builtin_tm_load,
>> - "This hook should return the built-in decl needed to load a vector of the "
>> - "given type within a transaction.",
>> - tree,
>> - (tree),
>> - default_builtin_tm_load_store)
>> -
>> -/* Return the builtin decl needed to store a vector of TYPE.  */
>> -DEFHOOK
>> -(builtin_tm_store,
>> - "This hook should return the built-in decl needed to store a vector of the 
>> "
>> - "given type within a transaction.",
>> - tree,
>> - (tree),
>> - default_builtin_tm_load_store)
>> -
>>  /* Returns the preferred mode for SIMD operations for the specified
>>     scalar mode.  */
>>  DEFHOOK
>> diff --git a/gcc/testsuite/gcc.dg/tm/memopt-13.c 
>> b/gcc/testsuite/gcc.dg/tm/memopt-13.c
>> index 6e93b7f..9db096a 100644
>> --- a/gcc/testsuite/gcc.dg/tm/memopt-13.c
>> +++ b/gcc/testsuite/gcc.dg/tm/memopt-13.c
>> @@ -12,4 +12,4 @@ void f()
>>    }
>>  }
>>  
>> -/* { dg-final { scan-tree-dump-times "memmoveRtWt \\\(&large_global," 1 
>> "tmmark" } } */
>> +/* { dg-final { scan-tree-dump-times "memcpyRnWt \\\(&large_global," 1 
>> "tmmark" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tm/memopt-6.c 
>> b/gcc/testsuite/gcc.dg/tm/memopt-6.c
>> index 4f1a41d..d1a2f1d 100644
>> --- a/gcc/testsuite/gcc.dg/tm/memopt-6.c
>> +++ b/gcc/testsuite/gcc.dg/tm/memopt-6.c
>> @@ -18,4 +18,4 @@ int f()
>>    return lala.x[i];
>>  }
>>  
>> -/* { dg-final { scan-tree-dump-times "memmoveRtWt \\\(.*, &lacopy" 1 
>> "tmedge" } } */
>> +/* { dg-final { scan-tree-dump-times "memcpyRtWn \\\(.*, &lacopy" 1 
>> "tmedge" } } */
>> diff --git a/gcc/trans-mem.c b/gcc/trans-mem.c
>> index a04e244..c462307 100644
>> --- a/gcc/trans-mem.c
>> +++ b/gcc/trans-mem.c
>> @@ -1190,7 +1190,6 @@ static void
>>  tm_log_emit_stmt (tree addr, gimple *stmt)
>>  {
>>    tree type = TREE_TYPE (addr);
>> -  tree size = TYPE_SIZE_UNIT (type);
>>    gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
>>    gimple *log;
>>    enum built_in_function code = BUILT_IN_TM_LOG;
>> @@ -1201,43 +1200,60 @@ tm_log_emit_stmt (tree addr, gimple *stmt)
>>      code = BUILT_IN_TM_LOG_DOUBLE;
>>    else if (type == long_double_type_node)
>>      code = BUILT_IN_TM_LOG_LDOUBLE;
>> -  else if (tree_fits_uhwi_p (size))
>> +  else if (TYPE_SIZE (type) != NULL
>> +       && tree_fits_uhwi_p (TYPE_SIZE (type)))
>>      {
>> -      unsigned int n = tree_to_uhwi (size);
>> -      switch (n)
>> +      unsigned HOST_WIDE_INT type_size = tree_to_uhwi (TYPE_SIZE (type));
>> +
>> +      if (TREE_CODE (type) == VECTOR_TYPE)
>>      {
>> -    case 1:
>> -      code = BUILT_IN_TM_LOG_1;
>> -      break;
>> -    case 2:
>> -      code = BUILT_IN_TM_LOG_2;
>> -      break;
>> -    case 4:
>> -      code = BUILT_IN_TM_LOG_4;
>> -      break;
>> -    case 8:
>> -      code = BUILT_IN_TM_LOG_8;
>> -      break;
>> -    default:
>> -      code = BUILT_IN_TM_LOG;
>> -      if (TREE_CODE (type) == VECTOR_TYPE)
>> +      switch (type_size)
>>          {
>> -          if (n == 8 && builtin_decl_explicit (BUILT_IN_TM_LOG_M64))
>> -            code = BUILT_IN_TM_LOG_M64;
>> -          else if (n == 16 && builtin_decl_explicit (BUILT_IN_TM_LOG_M128))
>> -            code = BUILT_IN_TM_LOG_M128;
>> -          else if (n == 32 && builtin_decl_explicit (BUILT_IN_TM_LOG_M256))
>> -            code = BUILT_IN_TM_LOG_M256;
>> +        case 64:
>> +          code = BUILT_IN_TM_LOG_M64;
>> +          break;
>> +        case 128:
>> +          code = BUILT_IN_TM_LOG_M128;
>> +          break;
>> +        case 256:
>> +          code = BUILT_IN_TM_LOG_M256;
>> +          break;
>> +        default:
>> +          goto unhandled_vec;
>> +        }
>> +      if (!builtin_decl_explicit_p (code))
>> +        goto unhandled_vec;
>> +    }
>> +      else
>> +    {
>> +    unhandled_vec:
>> +      switch (type_size)
>> +        {
>> +        case 1:
>> +          code = BUILT_IN_TM_LOG_1;
>> +          break;
>> +        case 2:
>> +          code = BUILT_IN_TM_LOG_2;
>> +          break;
>> +        case 4:
>> +          code = BUILT_IN_TM_LOG_4;
>> +          break;
>> +        case 8:
>> +          code = BUILT_IN_TM_LOG_8;
>> +          break;
>>          }
>> -      break;
>>      }
>>      }
>>  
>> +  if (code != BUILT_IN_TM_LOG && !builtin_decl_explicit_p (code))
>> +    code = BUILT_IN_TM_LOG;
>> +  tree decl = builtin_decl_explicit (code);
>> +
>>    addr = gimplify_addr (&gsi, addr);
>>    if (code == BUILT_IN_TM_LOG)
>> -    log = gimple_build_call (builtin_decl_explicit (code), 2, addr,  size);
>> +    log = gimple_build_call (decl, 2, addr, TYPE_SIZE_UNIT (type));
>>    else
>> -    log = gimple_build_call (builtin_decl_explicit (code), 1, addr);
>> +    log = gimple_build_call (decl, 1, addr);
>>    gsi_insert_before (&gsi, log, GSI_SAME_STMT);
>>  }
>>  
>> @@ -2171,44 +2187,66 @@ transaction_subcode_ior (struct tm_region *region, 
>> unsigned flags)
>>  static gcall *
>>  build_tm_load (location_t loc, tree lhs, tree rhs, gimple_stmt_iterator 
>> *gsi)
>>  {
>> -  enum built_in_function code = END_BUILTINS;
>> -  tree t, type = TREE_TYPE (rhs), decl;
>> +  tree t, type = TREE_TYPE (rhs);
>>    gcall *gcall;
>>  
>> +  built_in_function code;
>>    if (type == float_type_node)
>>      code = BUILT_IN_TM_LOAD_FLOAT;
>>    else if (type == double_type_node)
>>      code = BUILT_IN_TM_LOAD_DOUBLE;
>>    else if (type == long_double_type_node)
>>      code = BUILT_IN_TM_LOAD_LDOUBLE;
>> -  else if (TYPE_SIZE_UNIT (type) != NULL
>> -       && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
>> +  else
>>      {
>> -      switch (tree_to_uhwi (TYPE_SIZE_UNIT (type)))
>> +      if (TYPE_SIZE (type) == NULL || !tree_fits_uhwi_p (TYPE_SIZE (type)))
>> +    return NULL;
>> +      unsigned HOST_WIDE_INT type_size = tree_to_uhwi (TYPE_SIZE (type));
>> +
>> +      if (TREE_CODE (type) == VECTOR_TYPE)
>>      {
>> -    case 1:
>> -      code = BUILT_IN_TM_LOAD_1;
>> -      break;
>> -    case 2:
>> -      code = BUILT_IN_TM_LOAD_2;
>> -      break;
>> -    case 4:
>> -      code = BUILT_IN_TM_LOAD_4;
>> -      break;
>> -    case 8:
>> -      code = BUILT_IN_TM_LOAD_8;
>> -      break;
>> +      switch (type_size)
>> +        {
>> +        case 64:
>> +          code = BUILT_IN_TM_LOAD_M64;
>> +          break;
>> +        case 128:
>> +          code = BUILT_IN_TM_LOAD_M128;
>> +          break;
>> +        case 256:
>> +          code = BUILT_IN_TM_LOAD_M256;
>> +          break;
>> +        default:
>> +          goto unhandled_vec;
>> +        }
>> +      if (!builtin_decl_explicit_p (code))
>> +        goto unhandled_vec;
>> +    }
>> +      else
>> +    {
>> +    unhandled_vec:
>> +      switch (type_size)
>> +        {
>> +        case 8:
>> +          code = BUILT_IN_TM_LOAD_1;
>> +          break;
>> +        case 16:
>> +          code = BUILT_IN_TM_LOAD_2;
>> +          break;
>> +        case 32:
>> +          code = BUILT_IN_TM_LOAD_4;
>> +          break;
>> +        case 64:
>> +          code = BUILT_IN_TM_LOAD_8;
>> +          break;
>> +        default:
>> +          return NULL;
>> +        }
>>      }
>>      }
>>  
>> -  if (code == END_BUILTINS)
>> -    {
>> -      decl = targetm.vectorize.builtin_tm_load (type);
>> -      if (!decl)
>> -    return NULL;
>> -    }
>> -  else
>> -    decl = builtin_decl_explicit (code);
>> +  tree decl = builtin_decl_explicit (code);
>> +  gcc_assert (decl);
>>  
>>    t = gimplify_addr (gsi, rhs);
>>    gcall = gimple_build_call (decl, 1, t);
>> @@ -2243,44 +2281,66 @@ build_tm_load (location_t loc, tree lhs, tree rhs, 
>> gimple_stmt_iterator *gsi)
>>  static gcall *
>>  build_tm_store (location_t loc, tree lhs, tree rhs, gimple_stmt_iterator 
>> *gsi)
>>  {
>> -  enum built_in_function code = END_BUILTINS;
>>    tree t, fn, type = TREE_TYPE (rhs), simple_type;
>>    gcall *gcall;
>>  
>> +  built_in_function code;
>>    if (type == float_type_node)
>>      code = BUILT_IN_TM_STORE_FLOAT;
>>    else if (type == double_type_node)
>>      code = BUILT_IN_TM_STORE_DOUBLE;
>>    else if (type == long_double_type_node)
>>      code = BUILT_IN_TM_STORE_LDOUBLE;
>> -  else if (TYPE_SIZE_UNIT (type) != NULL
>> -       && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
>> +  else
>>      {
>> -      switch (tree_to_uhwi (TYPE_SIZE_UNIT (type)))
>> +      if (TYPE_SIZE (type) == NULL || !tree_fits_uhwi_p (TYPE_SIZE (type)))
>> +    return NULL;
>> +      unsigned HOST_WIDE_INT type_size = tree_to_uhwi (TYPE_SIZE (type));
>> +
>> +      if (TREE_CODE (type) == VECTOR_TYPE)
>>      {
>> -    case 1:
>> -      code = BUILT_IN_TM_STORE_1;
>> -      break;
>> -    case 2:
>> -      code = BUILT_IN_TM_STORE_2;
>> -      break;
>> -    case 4:
>> -      code = BUILT_IN_TM_STORE_4;
>> -      break;
>> -    case 8:
>> -      code = BUILT_IN_TM_STORE_8;
>> -      break;
>> +      switch (type_size)
>> +        {
>> +        case 64:
>> +          code = BUILT_IN_TM_STORE_M64;
>> +          break;
>> +        case 128:
>> +          code = BUILT_IN_TM_STORE_M128;
>> +          break;
>> +        case 256:
>> +          code = BUILT_IN_TM_STORE_M256;
>> +          break;
>> +        default:
>> +          goto unhandled_vec;
>> +        }
>> +      if (!builtin_decl_explicit_p (code))
>> +        goto unhandled_vec;
>> +    }
>> +      else
>> +    {
>> +    unhandled_vec:
>> +      switch (type_size)
>> +        {
>> +        case 8:
>> +          code = BUILT_IN_TM_STORE_1;
>> +          break;
>> +        case 16:
>> +          code = BUILT_IN_TM_STORE_2;
>> +          break;
>> +        case 32:
>> +          code = BUILT_IN_TM_STORE_4;
>> +          break;
>> +        case 64:
>> +          code = BUILT_IN_TM_STORE_8;
>> +          break;
>> +        default:
>> +          return NULL;
>> +        }
>>      }
>>      }
>>  
>> -  if (code == END_BUILTINS)
>> -    {
>> -      fn = targetm.vectorize.builtin_tm_store (type);
>> -      if (!fn)
>> -    return NULL;
>> -    }
>> -  else
>> -    fn = builtin_decl_explicit (code);
>> +  fn = builtin_decl_explicit (code);
>> +  gcc_assert (fn);
>>  
>>    simple_type = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (TREE_TYPE (fn))));
>>  
>> @@ -2342,63 +2402,80 @@ expand_assign_tm (struct tm_region *region, 
>> gimple_stmt_iterator *gsi)
>>        return;
>>      }
>>  
>> +  if (load_p)
>> +    transaction_subcode_ior (region, GTMA_HAVE_LOAD);
>> +  if (store_p)
>> +    transaction_subcode_ior (region, GTMA_HAVE_STORE);
>> +
>>    // Remove original load/store statement.
>>    gsi_remove (gsi, true);
>>  
>> +  // Attempt to use a simple load/store helper function.
>>    if (load_p && !store_p)
>> -    {
>> -      transaction_subcode_ior (region, GTMA_HAVE_LOAD);
>> -      gcall = build_tm_load (loc, lhs, rhs, gsi);
>> -    }
>> +    gcall = build_tm_load (loc, lhs, rhs, gsi);
>>    else if (store_p && !load_p)
>> -    {
>> -      transaction_subcode_ior (region, GTMA_HAVE_STORE);
>> -      gcall = build_tm_store (loc, lhs, rhs, gsi);
>> -    }
>> +    gcall = build_tm_store (loc, lhs, rhs, gsi);
>> +
>> +  // If gcall has not been set, then we do not have a simple helper
>> +  // function available for the type.  This may be true of larger
>> +  // structures, vectors, and non-standard float types.
>>    if (!gcall)
>>      {
>> -      tree lhs_addr, rhs_addr, tmp;
>> -
>> -      if (load_p)
>> -    transaction_subcode_ior (region, GTMA_HAVE_LOAD);
>> -      if (store_p)
>> -    transaction_subcode_ior (region, GTMA_HAVE_STORE);
>> +      tree lhs_addr, rhs_addr, ltmp = NULL, copy_fn;
>>  
>> -      /* ??? Figure out if there's any possible overlap between the LHS
>> -     and the RHS and if not, use MEMCPY.  */
>> +      // If this is a type that we couldn't handle above, but it's
>> +      // in a register, we must spill it to memory for the copy.
>> +      if (is_gimple_reg (lhs))
>> +    {
>> +      ltmp = create_tmp_var (TREE_TYPE (lhs));
>> +      lhs_addr = build_fold_addr_expr (ltmp);
>> +    }
>> +      else
>> +    lhs_addr = gimplify_addr (gsi, lhs);
>> +      if (is_gimple_reg (rhs))
>> +    {
>> +      tree rtmp = create_tmp_var (TREE_TYPE (rhs));
>> +      rhs_addr = build_fold_addr_expr (rtmp);
>> +      gcall = gimple_build_assign (rtmp, rhs);
>> +      gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
>> +    }
>> +      else
>> +    rhs_addr = gimplify_addr (gsi, rhs);
>>  
>> -      if (load_p && is_gimple_reg (lhs))
>> +      // Choose the appropriate memory transfer function.
>> +      if (load_p && store_p)
>> +    {
>> +      // ??? Figure out if there's any possible overlap between
>> +      // the LHS and the RHS and if not, use MEMCPY.
>> +      copy_fn = builtin_decl_explicit (BUILT_IN_TM_MEMMOVE);
>> +    }
>> +      else if (load_p)
>>      {
>> -      tmp = create_tmp_var (TREE_TYPE (lhs));
>> -      lhs_addr = build_fold_addr_expr (tmp);
>> +      // Note that the store is non-transactional and cannot overlap.
>> +      copy_fn = builtin_decl_explicit (BUILT_IN_TM_MEMCPY_RTWN);
>>      }
>>        else
>>      {
>> -      tmp = NULL_TREE;
>> -      lhs_addr = gimplify_addr (gsi, lhs);
>> +      // Note that the load is non-transactional and cannot overlap.
>> +      copy_fn = builtin_decl_explicit (BUILT_IN_TM_MEMCPY_RNWT);
>>      }
>> -      rhs_addr = gimplify_addr (gsi, rhs);
>> -      gcall = gimple_build_call (builtin_decl_explicit 
>> (BUILT_IN_TM_MEMMOVE),
>> -                             3, lhs_addr, rhs_addr,
>> +
>> +      gcall = gimple_build_call (copy_fn, 3, lhs_addr, rhs_addr,
>>                               TYPE_SIZE_UNIT (TREE_TYPE (lhs)));
>>        gimple_set_location (gcall, loc);
>>        gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
>>  
>> -      if (tmp)
>> +      if (ltmp)
>>      {
>> -      gcall = gimple_build_assign (lhs, tmp);
>> +      gcall = gimple_build_assign (lhs, ltmp);
>>        gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
>>      }
>>      }
>>  
>> -  /* Now that we have the load/store in its instrumented form, add
>> -     thread private addresses to the log if applicable.  */
>> +  // Now that we have the load/store in its instrumented form, add
>> +  // thread private addresses to the log if applicable.
>>    if (!store_p)
>>      requires_barrier (region->entry_block, lhs, gcall);
>> -
>> -  // The calls to build_tm_{store,load} above inserted the instrumented
>> -  // call into the stream.
>> -  // gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
>>  }
>>  
>>  
>> diff --git a/gcc/tree.c b/gcc/tree.c
>> index 20470c5..e6880f0 100644
>> --- a/gcc/tree.c
>> +++ b/gcc/tree.c
>> @@ -10330,6 +10330,143 @@ local_define_builtin (const char *name, tree type, 
>> enum built_in_function code,
>>    set_builtin_decl (code, decl, true);
>>  }
>>  
>> +/* A subroutine of build_tm_vector_builtins.  Define a builtin with
>> +   all of the appropriate attributes.  */
>> +static void
>> +tm_define_builtin (const char *name, tree type, built_in_function code,
>> +               tree decl_attrs, tree type_attrs)
>> +{
>> +  tree decl = add_builtin_function (name, type, code, BUILT_IN_NORMAL,
>> +                                name + strlen ("__builtin_"), decl_attrs);
>> +  decl_attributes (&TREE_TYPE (decl), type_attrs, ATTR_FLAG_BUILT_IN);
>> +  set_builtin_decl (code, decl, true);
>> +}
>> +
>> +/* A subroutine of build_tm_vector_builtins.  Find a supported vector
>> +   type VECTOR_BITS wide with inner mode ELEM_MODE.  */
>> +static tree
>> +find_tm_vector_type (unsigned vector_bits, machine_mode elem_mode)
>> +{
>> +  unsigned elem_bits = GET_MODE_BITSIZE (elem_mode);
>> +  unsigned nunits = vector_bits / elem_bits;
>> +
>> +  gcc_assert (elem_bits * nunits == vector_bits);
>> +
>> +  machine_mode vector_mode = mode_for_vector (elem_mode, nunits);
>> +  if (!VECTOR_MODE_P (vector_mode)
>> +      || !targetm.vector_mode_supported_p (vector_mode))
>> +    return NULL_TREE;
>> +
>> +  tree innertype = lang_hooks.types.type_for_mode (elem_mode, 0);
>> +  return build_vector_type_for_mode (innertype, vector_mode);
>> +}
>> +
>> +/* A subroutine of build_common_builtin_nodes.  Define TM builtins for
>> +   vector types.  This is done after the target hook, so that the target
>> +   has a chance to override these.  */
>> +static void
>> +build_tm_vector_builtins (void)
>> +{
>> +  tree vtype, pvtype, ftype, decl;
>> +  tree attrs_load, attrs_type_load;
>> +  tree attrs_store, attrs_type_store;
>> +  tree attrs_log, attrs_type_log;
>> +
>> +  /* Do nothing if TM is turned off, either with switch or
>> +     not enabled in the language.  */
>> +  if (!flag_tm || !builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
>> +    return;
>> +
>> +  /* Use whatever attributes a normal TM load has.  */
>> +  decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
>> +  attrs_load = DECL_ATTRIBUTES (decl);
>> +  attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
>> +  /* Use whatever attributes a normal TM store has.  */
>> +  decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
>> +  attrs_store = DECL_ATTRIBUTES (decl);
>> +  attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
>> +  /* Use whatever attributes a normal TM log has.  */
>> +  decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
>> +  attrs_log = DECL_ATTRIBUTES (decl);
>> +  attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
>> +
>> +  /* By default, 64 bit vectors go through the long long helpers.  */
>> +
>> +  /* If a 128-bit vector is supported, declare those builtins.  */
>> +  if (!builtin_decl_declared_p (BUILT_IN_TM_STORE_M128)
>> +      && ((vtype = find_tm_vector_type (128, SImode))
>> +      || (vtype = find_tm_vector_type (128, SFmode))))
>> +    {
>> +      pvtype = build_pointer_type (vtype);
>> +
>> +      ftype = build_function_type_list (void_type_node, pvtype, vtype, 
>> NULL);
>> +      tm_define_builtin ("__builtin__ITM_WM128", ftype,
>> +                     BUILT_IN_TM_STORE_M128,
>> +                     attrs_store, attrs_type_store);
>> +      tm_define_builtin ("__builtin__ITM_WaRM128", ftype,
>> +                     BUILT_IN_TM_STORE_WAR_M128,
>> +                     attrs_store, attrs_type_store);
>> +      tm_define_builtin ("__builtin__ITM_WaWM128", ftype,
>> +                     BUILT_IN_TM_STORE_WAW_M128,
>> +                     attrs_store, attrs_type_store);
>> +
>> +      ftype = build_function_type_list (vtype, pvtype, NULL);
>> +      tm_define_builtin ("__builtin__ITM_RM128", ftype,
>> +                     BUILT_IN_TM_LOAD_M128,
>> +                     attrs_load, attrs_type_load);
>> +      tm_define_builtin ("__builtin__ITM_RaRM128", ftype,
>> +                     BUILT_IN_TM_LOAD_RAR_M128,
>> +                     attrs_load, attrs_type_load);
>> +      tm_define_builtin ("__builtin__ITM_RaWM128", ftype,
>> +                     BUILT_IN_TM_LOAD_RAW_M128,
>> +                     attrs_load, attrs_type_load);
>> +      tm_define_builtin ("__builtin__ITM_RfWM128", ftype,
>> +                     BUILT_IN_TM_LOAD_RFW_M128,
>> +                     attrs_load, attrs_type_load);
>> +
>> +      ftype = build_function_type_list (void_type_node, pvtype, NULL);
>> +      tm_define_builtin ("__builtin__ITM_LM128", ftype,
>> +                     BUILT_IN_TM_LOG_M128, attrs_log, attrs_type_log);
>> +    }
>> +
>> +  /* If a 256-bit vector is supported, declare those builtins.  */
>> +  if (!builtin_decl_declared_p (BUILT_IN_TM_STORE_M256)
>> +      && ((vtype = find_tm_vector_type (256, SImode))
>> +      || (vtype = find_tm_vector_type (256, SFmode))))
>> +    {
>> +      pvtype = build_pointer_type (vtype);
>> +
>> +      ftype = build_function_type_list (void_type_node, pvtype, vtype, 
>> NULL);
>> +      tm_define_builtin ("__builtin__ITM_WM256", ftype,
>> +                     BUILT_IN_TM_STORE_M256,
>> +                     attrs_store, attrs_type_store);
>> +      tm_define_builtin ("__builtin__ITM_WaRM256", ftype,
>> +                     BUILT_IN_TM_STORE_WAR_M256,
>> +                     attrs_store, attrs_type_store);
>> +      tm_define_builtin ("__builtin__ITM_WaWM256", ftype,
>> +                     BUILT_IN_TM_STORE_WAW_M256,
>> +                     attrs_store, attrs_type_store);
>> +
>> +      ftype = build_function_type_list (vtype, pvtype, NULL);
>> +      tm_define_builtin ("__builtin__ITM_RM256", ftype,
>> +                     BUILT_IN_TM_LOAD_M256,
>> +                     attrs_load, attrs_type_load);
>> +      tm_define_builtin ("__builtin__ITM_RaRM256", ftype,
>> +                     BUILT_IN_TM_LOAD_RAR_M256,
>> +                     attrs_load, attrs_type_load);
>> +      tm_define_builtin ("__builtin__ITM_RaWM256", ftype,
>> +                     BUILT_IN_TM_LOAD_RAW_M256,
>> +                     attrs_load, attrs_type_load);
>> +      tm_define_builtin ("__builtin__ITM_RfWM256", ftype,
>> +                     BUILT_IN_TM_LOAD_RFW_M256,
>> +                     attrs_load, attrs_type_load);
>> +
>> +      ftype = build_function_type_list (void_type_node, pvtype, NULL);
>> +      tm_define_builtin ("__builtin__ITM_LM256", ftype,
>> +                     BUILT_IN_TM_LOG_M256, attrs_log, attrs_type_log);
>> +    }
>> +}
>> +
>>  /* Call this function after instantiating all builtins that the language
>>     front end cares about.  This will build the rest of the builtins
>>     and internal functions that are relied upon by the tree optimizers and
>> @@ -10568,6 +10705,7 @@ build_common_builtin_nodes (void)
>>        }
>>    }
>>  
>> +  build_tm_vector_builtins ();
>>    init_internal_fns ();
>>  }
>>  
>> diff --git a/libitm/Makefile.am b/libitm/Makefile.am
>> index 1dce82d..c9f7a16 100644
>> --- a/libitm/Makefile.am
>> +++ b/libitm/Makefile.am
>> @@ -62,8 +62,18 @@ libitm_la_SOURCES = \
>>      query.cc retry.cc rwlock.cc useraction.cc util.cc \
>>      sjlj.S tls.cc method-serial.cc method-gl.cc method-ml.cc
>>  
>> +if ARCH_AARCH64
>> +libitm_la_SOURCES += neon.cc
>> +endif
>>  if ARCH_ARM
>> -libitm_la_SOURCES += hwcap.cc
>> +libitm_la_SOURCES += hwcap.cc neon.cc
>> +endif
>> +if ARCH_PPC
>> +libitm_la_SOURCES += vect.cc
>> +endif
>> +if ARCH_S390
>> +libitm_la_SOURCES += vx.cc
>> +vx.lo : override CXXFLAGS += -march=z13
>>  endif
>>  if ARCH_X86
>>  libitm_la_SOURCES += x86_sse.cc x86_avx.cc
>> diff --git a/libitm/Makefile.in b/libitm/Makefile.in
>> index 138eeb1..edd65dc 100644
>> --- a/libitm/Makefile.in
>> +++ b/libitm/Makefile.in
>> @@ -53,9 +53,12 @@ POST_UNINSTALL = :
>>  build_triplet = @build@
>>  host_triplet = @host@
>>  target_triplet = @target@
>> -@ARCH_ARM_TRUE@am__append_1 = hwcap.cc
>> -@ARCH_X86_TRUE@am__append_2 = x86_sse.cc x86_avx.cc
>> -@ARCH_FUTEX_TRUE@am__append_3 = futex.cc
>> +@ARCH_AARCH64_TRUE@am__append_1 = neon.cc
>> +@ARCH_ARM_TRUE@am__append_2 = hwcap.cc neon.cc
>> +@ARCH_PPC_TRUE@am__append_3 = vect.cc
>> +@ARCH_S390_TRUE@am__append_4 = vx.cc
>> +@ARCH_X86_TRUE@am__append_5 = x86_sse.cc x86_avx.cc
>> +@ARCH_FUTEX_TRUE@am__append_6 = futex.cc
>>  subdir = .
>>  DIST_COMMON = ChangeLog $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
>>      $(top_srcdir)/configure $(am__configure_deps) \
>> @@ -117,14 +120,18 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" 
>> "$(DESTDIR)$(infodir)" \
>>      "$(DESTDIR)$(toolexeclibdir)"
>>  LTLIBRARIES = $(toolexeclib_LTLIBRARIES)
>>  libitm_la_LIBADD =
>> -@ARCH_ARM_TRUE@am__objects_1 = hwcap.lo
>> -@ARCH_X86_TRUE@am__objects_2 = x86_sse.lo x86_avx.lo
>> -@ARCH_FUTEX_TRUE@am__objects_3 = futex.lo
>> +@ARCH_AARCH64_TRUE@am__objects_1 = neon.lo
>> +@ARCH_ARM_TRUE@am__objects_2 = hwcap.lo neon.lo
>> +@ARCH_PPC_TRUE@am__objects_3 = vect.lo
>> +@ARCH_S390_TRUE@am__objects_4 = vx.lo
>> +@ARCH_X86_TRUE@am__objects_5 = x86_sse.lo x86_avx.lo
>> +@ARCH_FUTEX_TRUE@am__objects_6 = futex.lo
>>  am_libitm_la_OBJECTS = aatree.lo alloc.lo alloc_c.lo alloc_cpp.lo \
>>      barrier.lo beginend.lo clone.lo eh_cpp.lo local.lo query.lo \
>>      retry.lo rwlock.lo useraction.lo util.lo sjlj.lo tls.lo \
>>      method-serial.lo method-gl.lo method-ml.lo $(am__objects_1) \
>> -    $(am__objects_2) $(am__objects_3)
>> +    $(am__objects_2) $(am__objects_3) $(am__objects_4) \
>> +    $(am__objects_5) $(am__objects_6)
>>  libitm_la_OBJECTS = $(am_libitm_la_OBJECTS)
>>  DEFAULT_INCLUDES = -I.@am__isrc@
>>  depcomp = $(SHELL) $(top_srcdir)/../depcomp
>> @@ -362,7 +369,8 @@ libitm_la_SOURCES = aatree.cc alloc.cc alloc_c.cc 
>> alloc_cpp.cc \
>>      barrier.cc beginend.cc clone.cc eh_cpp.cc local.cc query.cc \
>>      retry.cc rwlock.cc useraction.cc util.cc sjlj.S tls.cc \
>>      method-serial.cc method-gl.cc method-ml.cc $(am__append_1) \
>> -    $(am__append_2) $(am__append_3)
>> +    $(am__append_2) $(am__append_3) $(am__append_4) \
>> +    $(am__append_5) $(am__append_6)
>>  
>>  # Automake Documentation:
>>  # If your package has Texinfo files in many directories, you can use the
>> @@ -495,6 +503,7 @@ distclean-compile:
>>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/method-gl.Plo@am__quote@
>>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/method-ml.Plo@am__quote@
>>  @AMDEP_TRUE@@am__include@ 
>> @am__quote@./$(DEPDIR)/method-serial.Plo@am__quote@
>> +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/neon.Plo@am__quote@
>>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/query.Plo@am__quote@
>>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/retry.Plo@am__quote@
>>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rwlock.Plo@am__quote@
>> @@ -502,6 +511,8 @@ distclean-compile:
>>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tls.Plo@am__quote@
>>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/useraction.Plo@am__quote@
>>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/util.Plo@am__quote@
>> +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vect.Plo@am__quote@
>> +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vx.Plo@am__quote@
>>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/x86_avx.Plo@am__quote@
>>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/x86_sse.Plo@am__quote@
>>  
>> @@ -1096,6 +1107,7 @@ vpath % $(strip $(search_path))
>>  @LIBITM_BUILD_VERSIONED_SHLIB_SUN_TRUE@@LIBITM_BUILD_VERSIONED_SHLIB_TRUE@  
>>  `echo $(libitm_la_LIBADD) | \
>>  @LIBITM_BUILD_VERSIONED_SHLIB_SUN_TRUE@@LIBITM_BUILD_VERSIONED_SHLIB_TRUE@  
>>     sed 's,/\([^/.]*\)\.la,/.libs/\1.a,g'` \
>>  @LIBITM_BUILD_VERSIONED_SHLIB_SUN_TRUE@@LIBITM_BUILD_VERSIONED_SHLIB_TRUE@  
>>  > $@ || (rm -f $@ ; exit 1)
>> +@arch_s390_t...@vx.lo : override CXXFLAGS += -march=z13
>>  # Make sure -msse is appended at the end.
>>  @ARCH_X86_TRUE@x86_sse.lo : override CXXFLAGS += -msse
>>  # Make sure -mavx is appended at the end.
>> diff --git a/libitm/config/aarch64/neon.cc b/libitm/config/aarch64/neon.cc
>> new file mode 100644
>> index 0000000..c2dda20
>> --- /dev/null
>> +++ b/libitm/config/aarch64/neon.cc
>> @@ -0,0 +1,3 @@
>> +#ifdef __ARM_NEON
>> +#include <config/generic/dispatch-m128.cc>
>> +#endif
>> diff --git a/libitm/config/arm/neon.cc b/libitm/config/arm/neon.cc
>> new file mode 100644
>> index 0000000..c2dda20
>> --- /dev/null
>> +++ b/libitm/config/arm/neon.cc
>> @@ -0,0 +1,3 @@
>> +#ifdef __ARM_NEON
>> +#include <config/generic/dispatch-m128.cc>
>> +#endif
>> diff --git a/libitm/config/generic/dispatch-m128.cc 
>> b/libitm/config/generic/dispatch-m128.cc
>> new file mode 100644
>> index 0000000..d4fdb5e
>> --- /dev/null
>> +++ b/libitm/config/generic/dispatch-m128.cc
>> @@ -0,0 +1,36 @@
>> +/* Copyright (C) 2009-2015 Free Software Foundation, Inc.
>> +   Contributed by Richard Henderson <r...@redhat.com>.
>> +
>> +   This file is part of the GNU Transactional Memory Library (libitm).
>> +
>> +   Libitm is free software; you can redistribute it and/or modify it
>> +   under the terms of the GNU General Public License as published by
>> +   the Free Software Foundation; either version 3 of the License, or
>> +   (at your option) any later version.
>> +
>> +   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
>> +   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
>> +   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> +   more details.
>> +
>> +   Under Section 7 of GPL version 3, you are granted additional
>> +   permissions described in the GCC Runtime Library Exception, version
>> +   3.1, as published by the Free Software Foundation.
>> +
>> +   You should have received a copy of the GNU General Public License and
>> +   a copy of the GCC Runtime Library Exception along with this program;
>> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
>> +   <http://www.gnu.org/licenses/>.  */
>> +
>> +#include "libitm_i.h"
>> +#include "dispatch.h"
>> +
>> +// ??? Use memcpy for now, until we have figured out how to best instantiate
>> +// these loads/stores.
>> +CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(M128, GTM::abi_disp()->, )
>> +
>> +void ITM_REGPARM
>> +_ITM_LM128 (const _ITM_TYPE_M128 *ptr)
>> +{
>> +  GTM::GTM_LB (ptr, sizeof (*ptr));
>> +}
>> diff --git a/libitm/config/generic/dispatch-m64.cc 
>> b/libitm/config/generic/dispatch-m64.cc
>> new file mode 100644
>> index 0000000..34e9465
>> --- /dev/null
>> +++ b/libitm/config/generic/dispatch-m64.cc
>> @@ -0,0 +1,36 @@
>> +/* Copyright (C) 2009-2015 Free Software Foundation, Inc.
>> +   Contributed by Richard Henderson <r...@redhat.com>.
>> +
>> +   This file is part of the GNU Transactional Memory Library (libitm).
>> +
>> +   Libitm is free software; you can redistribute it and/or modify it
>> +   under the terms of the GNU General Public License as published by
>> +   the Free Software Foundation; either version 3 of the License, or
>> +   (at your option) any later version.
>> +
>> +   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
>> +   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
>> +   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> +   more details.
>> +
>> +   Under Section 7 of GPL version 3, you are granted additional
>> +   permissions described in the GCC Runtime Library Exception, version
>> +   3.1, as published by the Free Software Foundation.
>> +
>> +   You should have received a copy of the GNU General Public License and
>> +   a copy of the GCC Runtime Library Exception along with this program;
>> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
>> +   <http://www.gnu.org/licenses/>.  */
>> +
>> +#include "libitm_i.h"
>> +#include "dispatch.h"
>> +
>> +// ??? Use memcpy for now, until we have figured out how to best instantiate
>> +// these loads/stores.
>> +CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(M64, GTM::abi_disp()->, )
>> +
>> +void ITM_REGPARM
>> +_ITM_LM64 (const _ITM_TYPE_M64 *ptr)
>> +{
>> +  GTM::GTM_LB (ptr, sizeof (*ptr));
>> +}
>> diff --git a/libitm/config/powerpc/vect.cc b/libitm/config/powerpc/vect.cc
>> new file mode 100644
>> index 0000000..43fcdfe
>> --- /dev/null
>> +++ b/libitm/config/powerpc/vect.cc
>> @@ -0,0 +1,3 @@
>> +#if defined(__ALTIVEC__) || defined(__VSX__)
>> +#include <config/generic/dispatch-m128.cc>
>> +#endif
>> diff --git a/libitm/config/s390/vx.cc b/libitm/config/s390/vx.cc
>> new file mode 100644
>> index 0000000..1f7268c
>> --- /dev/null
>> +++ b/libitm/config/s390/vx.cc
>> @@ -0,0 +1 @@
>> +#include <config/generic/dispatch-m128.cc>
>> diff --git a/libitm/config/x86/x86_sse.cc b/libitm/config/x86/x86_sse.cc
>> index c3b7237..266ea09 100644
>> --- a/libitm/config/x86/x86_sse.cc
>> +++ b/libitm/config/x86/x86_sse.cc
>> @@ -1,43 +1,2 @@
>> -/* Copyright (C) 2009-2016 Free Software Foundation, Inc.
>> -   Contributed by Richard Henderson <r...@redhat.com>.
>> -
>> -   This file is part of the GNU Transactional Memory Library (libitm).
>> -
>> -   Libitm is free software; you can redistribute it and/or modify it
>> -   under the terms of the GNU General Public License as published by
>> -   the Free Software Foundation; either version 3 of the License, or
>> -   (at your option) any later version.
>> -
>> -   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
>> -   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
>> -   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> -   more details.
>> -
>> -   Under Section 7 of GPL version 3, you are granted additional
>> -   permissions described in the GCC Runtime Library Exception, version
>> -   3.1, as published by the Free Software Foundation.
>> -
>> -   You should have received a copy of the GNU General Public License and
>> -   a copy of the GCC Runtime Library Exception along with this program;
>> -   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
>> -   <http://www.gnu.org/licenses/>.  */
>> -
>> -#include "libitm_i.h"
>> -#include "dispatch.h"
>> -
>> -// ??? Use memcpy for now, until we have figured out how to best instantiate
>> -// these loads/stores.
>> -CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(M64, GTM::abi_disp()->, )
>> -CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(M128, GTM::abi_disp()->, )
>> -
>> -void ITM_REGPARM
>> -_ITM_LM64 (const _ITM_TYPE_M64 *ptr)
>> -{
>> -  GTM::GTM_LB (ptr, sizeof (*ptr));
>> -}
>> -
>> -void ITM_REGPARM
>> -_ITM_LM128 (const _ITM_TYPE_M128 *ptr)
>> -{
>> -  GTM::GTM_LB (ptr, sizeof (*ptr));
>> -}
>> +#include <config/generic/dispatch-m64.cc>
>> +#include <config/generic/dispatch-m128.cc>
>> diff --git a/libitm/configure b/libitm/configure
>> index 55332bb..637fcd1 100644
>> --- a/libitm/configure
>> +++ b/libitm/configure
>> @@ -607,8 +607,14 @@ ARCH_X86_AVX_FALSE
>>  ARCH_X86_AVX_TRUE
>>  ARCH_X86_FALSE
>>  ARCH_X86_TRUE
>> +ARCH_S390_FALSE
>> +ARCH_S390_TRUE
>> +ARCH_PPC_FALSE
>> +ARCH_PPC_TRUE
>>  ARCH_ARM_FALSE
>>  ARCH_ARM_TRUE
>> +ARCH_AARCH64_FALSE
>> +ARCH_AARCH64_TRUE
>>  link_itm
>>  XLDFLAGS
>>  XCFLAGS
>> @@ -9762,7 +9768,7 @@ _LT_EOF
>>      if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
>>        export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if 
>> (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && 
>> (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
>>      else
>> -      export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if 
>> (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != 
>> ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
>> +      export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if 
>> (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L")) && 
>> (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
>>      fi
>>      aix_use_runtimelinking=no
>>  
>> @@ -11790,7 +11796,7 @@ else
>>    lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>>    lt_status=$lt_dlunknown
>>    cat > conftest.$ac_ext <<_LT_EOF
>> -#line 11793 "configure"
>> +#line 11799 "configure"
>>  #include "confdefs.h"
>>  
>>  #if HAVE_DLFCN_H
>> @@ -11896,7 +11902,7 @@ else
>>    lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>>    lt_status=$lt_dlunknown
>>    cat > conftest.$ac_ext <<_LT_EOF
>> -#line 11899 "configure"
>> +#line 11905 "configure"
>>  #include "confdefs.h"
>>  
>>  #if HAVE_DLFCN_H
>> @@ -14248,7 +14254,7 @@ $as_echo_n "checking whether the $compiler linker 
>> ($LD) supports shared librarie
>>      if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
>>        export_symbols_cmds_CXX='$NM -Bpg $libobjs $convenience | awk '\''{ 
>> if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && 
>> (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
>>      else
>> -      export_symbols_cmds_CXX='$NM -BCpg $libobjs $convenience | awk '\''{ 
>> if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) 
>> != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
>> +      export_symbols_cmds_CXX='$NM -BCpg $libobjs $convenience | awk '\''{ 
>> if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L")) && 
>> (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
>>      fi
>>      ;;
>>    pw32*)
>> @@ -17603,6 +17609,14 @@ else
>>  fi
>>  
>>  
>> + if test "$ARCH" = aarch64; then
>> +  ARCH_AARCH64_TRUE=
>> +  ARCH_AARCH64_FALSE='#'
>> +else
>> +  ARCH_AARCH64_TRUE='#'
>> +  ARCH_AARCH64_FALSE=
>> +fi
>> +
>>   if test "$ARCH" = arm; then
>>    ARCH_ARM_TRUE=
>>    ARCH_ARM_FALSE='#'
>> @@ -17611,6 +17625,22 @@ else
>>    ARCH_ARM_FALSE=
>>  fi
>>  
>> + if test "$ARCH" = powerpc -o "$ARCH" = powerpc64; then
>> +  ARCH_PPC_TRUE=
>> +  ARCH_PPC_FALSE='#'
>> +else
>> +  ARCH_PPC_TRUE='#'
>> +  ARCH_PPC_FALSE=
>> +fi
>> +
>> + if test "$ARCH" = s390 -o "$ARCH" = s390x; then
>> +  ARCH_S390_TRUE=
>> +  ARCH_S390_FALSE='#'
>> +else
>> +  ARCH_S390_TRUE='#'
>> +  ARCH_S390_FALSE=
>> +fi
>> +
>>   if test "$ARCH" = x86; then
>>    ARCH_X86_TRUE=
>>    ARCH_X86_FALSE='#'
>> @@ -17788,10 +17818,22 @@ if test -z 
>> "${LIBITM_BUILD_VERSIONED_SHLIB_SUN_TRUE}" && test -z "${LIBITM_BUILD
>>    as_fn_error "conditional \"LIBITM_BUILD_VERSIONED_SHLIB_SUN\" was never 
>> defined.
>>  Usually this means the macro was only invoked conditionally." "$LINENO" 5
>>  fi
>> +if test -z "${ARCH_AARCH64_TRUE}" && test -z "${ARCH_AARCH64_FALSE}"; then
>> +  as_fn_error "conditional \"ARCH_AARCH64\" was never defined.
>> +Usually this means the macro was only invoked conditionally." "$LINENO" 5
>> +fi
>>  if test -z "${ARCH_ARM_TRUE}" && test -z "${ARCH_ARM_FALSE}"; then
>>    as_fn_error "conditional \"ARCH_ARM\" was never defined.
>>  Usually this means the macro was only invoked conditionally." "$LINENO" 5
>>  fi
>> +if test -z "${ARCH_PPC_TRUE}" && test -z "${ARCH_PPC_FALSE}"; then
>> +  as_fn_error "conditional \"ARCH_PPC\" was never defined.
>> +Usually this means the macro was only invoked conditionally." "$LINENO" 5
>> +fi
>> +if test -z "${ARCH_S390_TRUE}" && test -z "${ARCH_S390_FALSE}"; then
>> +  as_fn_error "conditional \"ARCH_S390\" was never defined.
>> +Usually this means the macro was only invoked conditionally." "$LINENO" 5
>> +fi
>>  if test -z "${ARCH_X86_TRUE}" && test -z "${ARCH_X86_FALSE}"; then
>>    as_fn_error "conditional \"ARCH_X86\" was never defined.
>>  Usually this means the macro was only invoked conditionally." "$LINENO" 5
>> diff --git a/libitm/configure.ac b/libitm/configure.ac
>> index 3875aa0..36e0651 100644
>> --- a/libitm/configure.ac
>> +++ b/libitm/configure.ac
>> @@ -281,7 +281,10 @@ else
>>  fi
>>  AC_SUBST(link_itm)
>>  
>> +AM_CONDITIONAL([ARCH_AARCH64], [test "$ARCH" = aarch64])
>>  AM_CONDITIONAL([ARCH_ARM], [test "$ARCH" = arm])
>> +AM_CONDITIONAL([ARCH_PPC], [test "$ARCH" = powerpc -o "$ARCH" = powerpc64])
>> +AM_CONDITIONAL([ARCH_S390], [test "$ARCH" = s390 -o "$ARCH" = s390x])
>>  AM_CONDITIONAL([ARCH_X86], [test "$ARCH" = x86])
>>  AM_CONDITIONAL([ARCH_X86_AVX], [test "$libitm_cv_as_avx" = yes])
>>  AM_CONDITIONAL([ARCH_FUTEX], [test $enable_linux_futex = yes])
>> diff --git a/libitm/libitm.h b/libitm/libitm.h
>> index d7bc166..1a6738b 100644
>> --- a/libitm/libitm.h
>> +++ b/libitm/libitm.h
>> @@ -232,7 +232,11 @@ ITM_LOG(CE)
>>    ITM_BARRIERS(M256)
>>    ITM_LOG(M256)
>>  # endif
>> -#endif /* i386 */
>> +#else
>> +  typedef int _ITM_TYPE_M128 __attribute__((vector_size(16), may_alias));
>> +  ITM_BARRIERS(M128)
>> +  ITM_LOG(M128)
>> +#endif
>>  
>>  #undef ITM_BARRIERS
>>  #undef ITM_LOG
>>
> 

Reply via email to