On 12/01/16 17:16, Richard Earnshaw (lists) wrote: > On 12/01/16 16:53, Richard Henderson wrote: >> The problem in this PR is that we never got around to flushing out the vector >> support for transactions for anything but x86. My goal here is to make this >> as >> generic as possible, so that it should Just Work with existing vector support >> in the backend. >> >> In addition, if I encounter other unexpected register types, I will now copy >> them to memory and use memcpy, rather than crash. >> >> The one piece of this that requires a tiny bit of extra work is enabling the >> vector entry points in libitm. >> >> For x86, we make sure to build the files with SSE or AVX support enabled. >> For >> s390x, I do the same thing, enabling z13 support. I suppose we might need to >> check for binutils support, but I'd rather do this only if necessary. >> >> For arm I'm less sure what to do, since I seem to recall that use of Neon >> sets >> a bit in the ELF header. Which presumably means that the binary could no >> longer be run without neon, even though the entry points wouldn't be used. > > No, we don't use bits in the elf headers: there wouldn't be enough of > them! Instead we use build attributes to record user intentions. These > are (normally) derived from .arch and .fpu directives. > > For normal core attributes you can use .object_arch to force the .arch > entry recorded in the attributes to a specific value, but I'm not sure > if you can override the .fpu directive in this way. You might have to > experiment a bit. Alternatively you might be able to force out the > relevant build attributes using .eabi_attribute to record some explicit > values (which then override the values that would be normally detected). >
BTW, the above only applies to AArch32 (traditional ARM), AArch64 doesn't put any marking out -- we assume that Neon is available. R. > R. > > >> >> For powerpc, I don't know how to select Altivec if VSX isn't already enabled, >> or indeed if that's the best thing to do. >> >> >> Thanks for the review, >> >> >> r~ >> >> >> d-68964 >> >> >> PR tree-opt/68964 >> * target.def (builtin_tm_load, builtin_tm_store): Remove. >> * config/i386/i386.c (ix86_builtin_tm_load): Remove. >> (ix86_builtin_tm_store): Remove. >> (TARGET_VECTORIZE_BUILTIN_TM_LOAD): Remove. >> (TARGET_VECTORIZE_BUILTIN_TM_STORE): Remove. >> * doc/tm.texi.in (TARGET_VECTORIZE_BUILTIN_TM_LOAD): Remove. >> (TARGET_VECTORIZE_BUILTIN_TM_STORE): Remove. >> * doc/tm.texi: Rebuild. >> >> * gtm-builtins.def (BUILT_IN_TM_MEMCPY_RNWT): New. >> (BUILT_IN_TM_MEMCPY_RTWN): New. >> * trans-mem.c (tm_log_emit_stmt): Rearrange code for better >> fallback from vector to integer helpers. >> (build_tm_load): Handle vector types directly, instead of >> via target hook. >> (build_tm_store): Likewise. >> (expand_assign_tm): Prepare for register types not handled by >> the above. Copy them to memory and use memcpy. >> * tree.c (tm_define_builtin): New. >> (find_tm_vector_type): New. >> (build_tm_vector_builtins): New. >> (build_common_builtin_nodes): Call it. >> >> gcc/testsuite/ >> * gcc.dg/tm/memopt-13.c: Update expected function. >> * gcc.dg/tm/memopt-6.c: Likewise. >> >> libitm/ >> * Makefile.am (libitm_la_SOURCES) [ARCH_AARCH64]: Add neon.cc >> (libitm_la_SOURCES) [ARCH_ARM]: Add neon.cc >> (libitm_la_SOURCES) [ARCH_PPC]: Add vect.cc >> (libitm_la_SOURCES) [ARCH_S390]: Add vx.cc >> * configure.ac (ARCH_AARCH64): New conditional. >> (ARCH_PPC, ARCH_S390): Likewise. >> * Makefile.in, configure: Rebuild. >> >> * libitm.h (_ITM_TYPE_M128): Always define. >> * config/generic/dispatch-m64.cc: Split ... >> * config/generic/dispatch-m128.cc: ... out of... >> * config/x86/x86_sse.cc: ... here. >> * config/aarch64/neon.cc: New file. >> * config/arm/neon.cc: New file. >> * config/powerpc/vect.cc: New file. >> >> >> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c >> index ed91e5d..0b31ccd 100644 >> --- a/gcc/config/i386/i386.c >> +++ b/gcc/config/i386/i386.c >> @@ -35214,48 +35214,6 @@ static const struct builtin_description bdesc_tm[] = >> { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum >> ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID }, >> }; >> >> -/* TM callbacks. */ >> - >> -/* Return the builtin decl needed to load a vector of TYPE. */ >> - >> -static tree >> -ix86_builtin_tm_load (tree type) >> -{ >> - if (TREE_CODE (type) == VECTOR_TYPE) >> - { >> - switch (tree_to_uhwi (TYPE_SIZE (type))) >> - { >> - case 64: >> - return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64); >> - case 128: >> - return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128); >> - case 256: >> - return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256); >> - } >> - } >> - return NULL_TREE; >> -} >> - >> -/* Return the builtin decl needed to store a vector of TYPE. */ >> - >> -static tree >> -ix86_builtin_tm_store (tree type) >> -{ >> - if (TREE_CODE (type) == VECTOR_TYPE) >> - { >> - switch (tree_to_uhwi (TYPE_SIZE (type))) >> - { >> - case 64: >> - return builtin_decl_explicit (BUILT_IN_TM_STORE_M64); >> - case 128: >> - return builtin_decl_explicit (BUILT_IN_TM_STORE_M128); >> - case 256: >> - return builtin_decl_explicit (BUILT_IN_TM_STORE_M256); >> - } >> - } >> - return NULL_TREE; >> -} >> - >> /* Initialize the transactional memory vector load/store builtins. */ >> >> static void >> @@ -54341,12 +54299,6 @@ ix86_addr_space_zero_address_valid (addr_space_t as) >> #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ >> ix86_builtin_vectorized_function >> >> -#undef TARGET_VECTORIZE_BUILTIN_TM_LOAD >> -#define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load >> - >> -#undef TARGET_VECTORIZE_BUILTIN_TM_STORE >> -#define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store >> - >> #undef TARGET_VECTORIZE_BUILTIN_GATHER >> #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather >> >> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi >> index 195ddf8..7a11552 100644 >> --- a/gcc/doc/tm.texi >> +++ b/gcc/doc/tm.texi >> @@ -5728,14 +5728,6 @@ This hook should complete calculations of the cost of >> vectorizing a loop or basi >> This hook should release @var{data} and any related data structures >> allocated by TARGET_VECTORIZE_INIT_COST. The default releases the >> accumulator. >> @end deftypefn >> >> -@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_TM_LOAD (tree) >> -This hook should return the built-in decl needed to load a vector of the >> given type within a transaction. >> -@end deftypefn >> - >> -@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_TM_STORE (tree) >> -This hook should return the built-in decl needed to store a vector of the >> given type within a transaction. >> -@end deftypefn >> - >> @deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_GATHER (const_tree >> @var{mem_vectype}, const_tree @var{index_type}, int @var{scale}) >> Target builtin that implements vector gather operation. @var{mem_vectype} >> is the vector type of the load and @var{index_type} is scalar type of >> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in >> index dedc798..f31c763 100644 >> --- a/gcc/doc/tm.texi.in >> +++ b/gcc/doc/tm.texi.in >> @@ -4253,10 +4253,6 @@ address; but often a machine-dependent strategy can >> generate better code. >> >> @hook TARGET_VECTORIZE_DESTROY_COST_DATA >> >> -@hook TARGET_VECTORIZE_BUILTIN_TM_LOAD >> - >> -@hook TARGET_VECTORIZE_BUILTIN_TM_STORE >> - >> @hook TARGET_VECTORIZE_BUILTIN_GATHER >> >> @hook TARGET_VECTORIZE_BUILTIN_SCATTER >> diff --git a/gcc/gtm-builtins.def b/gcc/gtm-builtins.def >> index e2bc081..6d5cfb9 100644 >> --- a/gcc/gtm-builtins.def >> +++ b/gcc/gtm-builtins.def >> @@ -12,6 +12,10 @@ DEF_TM_BUILTIN (BUILT_IN_TM_IRREVOCABLE, >> "_ITM_changeTransactionMode", >> >> DEF_TM_BUILTIN (BUILT_IN_TM_MEMCPY, "_ITM_memcpyRtWt", >> BT_FN_VOID_PTR_CONST_PTR_SIZE, ATTR_TM_TMPURE_NOTHROW_LIST) >> +DEF_TM_BUILTIN (BUILT_IN_TM_MEMCPY_RNWT, "_ITM_memcpyRnWt", >> + BT_FN_VOID_PTR_CONST_PTR_SIZE, ATTR_TM_TMPURE_NOTHROW_LIST) >> +DEF_TM_BUILTIN (BUILT_IN_TM_MEMCPY_RTWN, "_ITM_memcpyRtWn", >> + BT_FN_VOID_PTR_CONST_PTR_SIZE, ATTR_TM_TMPURE_NOTHROW_LIST) >> DEF_TM_BUILTIN (BUILT_IN_TM_MEMMOVE, "_ITM_memmoveRtWt", >> BT_FN_VOID_PTR_CONST_PTR_SIZE, ATTR_TM_TMPURE_NOTHROW_LIST) >> DEF_TM_BUILTIN (BUILT_IN_TM_MEMSET, "_ITM_memsetW", >> diff --git a/gcc/target.def b/gcc/target.def >> index c21b122..946d2e5 100644 >> --- a/gcc/target.def >> +++ b/gcc/target.def >> @@ -1808,24 +1808,6 @@ parameter is true if the memory access is defined in >> a packed struct.", >> (machine_mode mode, const_tree type, int misalignment, bool is_packed), >> default_builtin_support_vector_misalignment) >> >> -/* Return the builtin decl needed to load a vector of TYPE. */ >> -DEFHOOK >> -(builtin_tm_load, >> - "This hook should return the built-in decl needed to load a vector of the " >> - "given type within a transaction.", >> - tree, >> - (tree), >> - default_builtin_tm_load_store) >> - >> -/* Return the builtin decl needed to store a vector of TYPE. */ >> -DEFHOOK >> -(builtin_tm_store, >> - "This hook should return the built-in decl needed to store a vector of the >> " >> - "given type within a transaction.", >> - tree, >> - (tree), >> - default_builtin_tm_load_store) >> - >> /* Returns the preferred mode for SIMD operations for the specified >> scalar mode. */ >> DEFHOOK >> diff --git a/gcc/testsuite/gcc.dg/tm/memopt-13.c >> b/gcc/testsuite/gcc.dg/tm/memopt-13.c >> index 6e93b7f..9db096a 100644 >> --- a/gcc/testsuite/gcc.dg/tm/memopt-13.c >> +++ b/gcc/testsuite/gcc.dg/tm/memopt-13.c >> @@ -12,4 +12,4 @@ void f() >> } >> } >> >> -/* { dg-final { scan-tree-dump-times "memmoveRtWt \\\(&large_global," 1 >> "tmmark" } } */ >> +/* { dg-final { scan-tree-dump-times "memcpyRnWt \\\(&large_global," 1 >> "tmmark" } } */ >> diff --git a/gcc/testsuite/gcc.dg/tm/memopt-6.c >> b/gcc/testsuite/gcc.dg/tm/memopt-6.c >> index 4f1a41d..d1a2f1d 100644 >> --- a/gcc/testsuite/gcc.dg/tm/memopt-6.c >> +++ b/gcc/testsuite/gcc.dg/tm/memopt-6.c >> @@ -18,4 +18,4 @@ int f() >> return lala.x[i]; >> } >> >> -/* { dg-final { scan-tree-dump-times "memmoveRtWt \\\(.*, &lacopy" 1 >> "tmedge" } } */ >> +/* { dg-final { scan-tree-dump-times "memcpyRtWn \\\(.*, &lacopy" 1 >> "tmedge" } } */ >> diff --git a/gcc/trans-mem.c b/gcc/trans-mem.c >> index a04e244..c462307 100644 >> --- a/gcc/trans-mem.c >> +++ b/gcc/trans-mem.c >> @@ -1190,7 +1190,6 @@ static void >> tm_log_emit_stmt (tree addr, gimple *stmt) >> { >> tree type = TREE_TYPE (addr); >> - tree size = TYPE_SIZE_UNIT (type); >> gimple_stmt_iterator gsi = gsi_for_stmt (stmt); >> gimple *log; >> enum built_in_function code = BUILT_IN_TM_LOG; >> @@ -1201,43 +1200,60 @@ tm_log_emit_stmt (tree addr, gimple *stmt) >> code = BUILT_IN_TM_LOG_DOUBLE; >> else if (type == long_double_type_node) >> code = BUILT_IN_TM_LOG_LDOUBLE; >> - else if (tree_fits_uhwi_p (size)) >> + else if (TYPE_SIZE (type) != NULL >> + && tree_fits_uhwi_p (TYPE_SIZE (type))) >> { >> - unsigned int n = tree_to_uhwi (size); >> - switch (n) >> + unsigned HOST_WIDE_INT type_size = tree_to_uhwi (TYPE_SIZE (type)); >> + >> + if (TREE_CODE (type) == VECTOR_TYPE) >> { >> - case 1: >> - code = BUILT_IN_TM_LOG_1; >> - break; >> - case 2: >> - code = BUILT_IN_TM_LOG_2; >> - break; >> - case 4: >> - code = BUILT_IN_TM_LOG_4; >> - break; >> - case 8: >> - code = BUILT_IN_TM_LOG_8; >> - break; >> - default: >> - code = BUILT_IN_TM_LOG; >> - if (TREE_CODE (type) == VECTOR_TYPE) >> + switch (type_size) >> { >> - if (n == 8 && builtin_decl_explicit (BUILT_IN_TM_LOG_M64)) >> - code = BUILT_IN_TM_LOG_M64; >> - else if (n == 16 && builtin_decl_explicit (BUILT_IN_TM_LOG_M128)) >> - code = BUILT_IN_TM_LOG_M128; >> - else if (n == 32 && builtin_decl_explicit (BUILT_IN_TM_LOG_M256)) >> - code = BUILT_IN_TM_LOG_M256; >> + case 64: >> + code = BUILT_IN_TM_LOG_M64; >> + break; >> + case 128: >> + code = BUILT_IN_TM_LOG_M128; >> + break; >> + case 256: >> + code = BUILT_IN_TM_LOG_M256; >> + break; >> + default: >> + goto unhandled_vec; >> + } >> + if (!builtin_decl_explicit_p (code)) >> + goto unhandled_vec; >> + } >> + else >> + { >> + unhandled_vec: >> + switch (type_size) >> + { >> + case 1: >> + code = BUILT_IN_TM_LOG_1; >> + break; >> + case 2: >> + code = BUILT_IN_TM_LOG_2; >> + break; >> + case 4: >> + code = BUILT_IN_TM_LOG_4; >> + break; >> + case 8: >> + code = BUILT_IN_TM_LOG_8; >> + break; >> } >> - break; >> } >> } >> >> + if (code != BUILT_IN_TM_LOG && !builtin_decl_explicit_p (code)) >> + code = BUILT_IN_TM_LOG; >> + tree decl = builtin_decl_explicit (code); >> + >> addr = gimplify_addr (&gsi, addr); >> if (code == BUILT_IN_TM_LOG) >> - log = gimple_build_call (builtin_decl_explicit (code), 2, addr, size); >> + log = gimple_build_call (decl, 2, addr, TYPE_SIZE_UNIT (type)); >> else >> - log = gimple_build_call (builtin_decl_explicit (code), 1, addr); >> + log = gimple_build_call (decl, 1, addr); >> gsi_insert_before (&gsi, log, GSI_SAME_STMT); >> } >> >> @@ -2171,44 +2187,66 @@ transaction_subcode_ior (struct tm_region *region, >> unsigned flags) >> static gcall * >> build_tm_load (location_t loc, tree lhs, tree rhs, gimple_stmt_iterator >> *gsi) >> { >> - enum built_in_function code = END_BUILTINS; >> - tree t, type = TREE_TYPE (rhs), decl; >> + tree t, type = TREE_TYPE (rhs); >> gcall *gcall; >> >> + built_in_function code; >> if (type == float_type_node) >> code = BUILT_IN_TM_LOAD_FLOAT; >> else if (type == double_type_node) >> code = BUILT_IN_TM_LOAD_DOUBLE; >> else if (type == long_double_type_node) >> code = BUILT_IN_TM_LOAD_LDOUBLE; >> - else if (TYPE_SIZE_UNIT (type) != NULL >> - && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type))) >> + else >> { >> - switch (tree_to_uhwi (TYPE_SIZE_UNIT (type))) >> + if (TYPE_SIZE (type) == NULL || !tree_fits_uhwi_p (TYPE_SIZE (type))) >> + return NULL; >> + unsigned HOST_WIDE_INT type_size = tree_to_uhwi (TYPE_SIZE (type)); >> + >> + if (TREE_CODE (type) == VECTOR_TYPE) >> { >> - case 1: >> - code = BUILT_IN_TM_LOAD_1; >> - break; >> - case 2: >> - code = BUILT_IN_TM_LOAD_2; >> - break; >> - case 4: >> - code = BUILT_IN_TM_LOAD_4; >> - break; >> - case 8: >> - code = BUILT_IN_TM_LOAD_8; >> - break; >> + switch (type_size) >> + { >> + case 64: >> + code = BUILT_IN_TM_LOAD_M64; >> + break; >> + case 128: >> + code = BUILT_IN_TM_LOAD_M128; >> + break; >> + case 256: >> + code = BUILT_IN_TM_LOAD_M256; >> + break; >> + default: >> + goto unhandled_vec; >> + } >> + if (!builtin_decl_explicit_p (code)) >> + goto unhandled_vec; >> + } >> + else >> + { >> + unhandled_vec: >> + switch (type_size) >> + { >> + case 8: >> + code = BUILT_IN_TM_LOAD_1; >> + break; >> + case 16: >> + code = BUILT_IN_TM_LOAD_2; >> + break; >> + case 32: >> + code = BUILT_IN_TM_LOAD_4; >> + break; >> + case 64: >> + code = BUILT_IN_TM_LOAD_8; >> + break; >> + default: >> + return NULL; >> + } >> } >> } >> >> - if (code == END_BUILTINS) >> - { >> - decl = targetm.vectorize.builtin_tm_load (type); >> - if (!decl) >> - return NULL; >> - } >> - else >> - decl = builtin_decl_explicit (code); >> + tree decl = builtin_decl_explicit (code); >> + gcc_assert (decl); >> >> t = gimplify_addr (gsi, rhs); >> gcall = gimple_build_call (decl, 1, t); >> @@ -2243,44 +2281,66 @@ build_tm_load (location_t loc, tree lhs, tree rhs, >> gimple_stmt_iterator *gsi) >> static gcall * >> build_tm_store (location_t loc, tree lhs, tree rhs, gimple_stmt_iterator >> *gsi) >> { >> - enum built_in_function code = END_BUILTINS; >> tree t, fn, type = TREE_TYPE (rhs), simple_type; >> gcall *gcall; >> >> + built_in_function code; >> if (type == float_type_node) >> code = BUILT_IN_TM_STORE_FLOAT; >> else if (type == double_type_node) >> code = BUILT_IN_TM_STORE_DOUBLE; >> else if (type == long_double_type_node) >> code = BUILT_IN_TM_STORE_LDOUBLE; >> - else if (TYPE_SIZE_UNIT (type) != NULL >> - && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type))) >> + else >> { >> - switch (tree_to_uhwi (TYPE_SIZE_UNIT (type))) >> + if (TYPE_SIZE (type) == NULL || !tree_fits_uhwi_p (TYPE_SIZE (type))) >> + return NULL; >> + unsigned HOST_WIDE_INT type_size = tree_to_uhwi (TYPE_SIZE (type)); >> + >> + if (TREE_CODE (type) == VECTOR_TYPE) >> { >> - case 1: >> - code = BUILT_IN_TM_STORE_1; >> - break; >> - case 2: >> - code = BUILT_IN_TM_STORE_2; >> - break; >> - case 4: >> - code = BUILT_IN_TM_STORE_4; >> - break; >> - case 8: >> - code = BUILT_IN_TM_STORE_8; >> - break; >> + switch (type_size) >> + { >> + case 64: >> + code = BUILT_IN_TM_STORE_M64; >> + break; >> + case 128: >> + code = BUILT_IN_TM_STORE_M128; >> + break; >> + case 256: >> + code = BUILT_IN_TM_STORE_M256; >> + break; >> + default: >> + goto unhandled_vec; >> + } >> + if (!builtin_decl_explicit_p (code)) >> + goto unhandled_vec; >> + } >> + else >> + { >> + unhandled_vec: >> + switch (type_size) >> + { >> + case 8: >> + code = BUILT_IN_TM_STORE_1; >> + break; >> + case 16: >> + code = BUILT_IN_TM_STORE_2; >> + break; >> + case 32: >> + code = BUILT_IN_TM_STORE_4; >> + break; >> + case 64: >> + code = BUILT_IN_TM_STORE_8; >> + break; >> + default: >> + return NULL; >> + } >> } >> } >> >> - if (code == END_BUILTINS) >> - { >> - fn = targetm.vectorize.builtin_tm_store (type); >> - if (!fn) >> - return NULL; >> - } >> - else >> - fn = builtin_decl_explicit (code); >> + fn = builtin_decl_explicit (code); >> + gcc_assert (fn); >> >> simple_type = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (TREE_TYPE (fn)))); >> >> @@ -2342,63 +2402,80 @@ expand_assign_tm (struct tm_region *region, >> gimple_stmt_iterator *gsi) >> return; >> } >> >> + if (load_p) >> + transaction_subcode_ior (region, GTMA_HAVE_LOAD); >> + if (store_p) >> + transaction_subcode_ior (region, GTMA_HAVE_STORE); >> + >> // Remove original load/store statement. >> gsi_remove (gsi, true); >> >> + // Attempt to use a simple load/store helper function. >> if (load_p && !store_p) >> - { >> - transaction_subcode_ior (region, GTMA_HAVE_LOAD); >> - gcall = build_tm_load (loc, lhs, rhs, gsi); >> - } >> + gcall = build_tm_load (loc, lhs, rhs, gsi); >> else if (store_p && !load_p) >> - { >> - transaction_subcode_ior (region, GTMA_HAVE_STORE); >> - gcall = build_tm_store (loc, lhs, rhs, gsi); >> - } >> + gcall = build_tm_store (loc, lhs, rhs, gsi); >> + >> + // If gcall has not been set, then we do not have a simple helper >> + // function available for the type. This may be true of larger >> + // structures, vectors, and non-standard float types. >> if (!gcall) >> { >> - tree lhs_addr, rhs_addr, tmp; >> - >> - if (load_p) >> - transaction_subcode_ior (region, GTMA_HAVE_LOAD); >> - if (store_p) >> - transaction_subcode_ior (region, GTMA_HAVE_STORE); >> + tree lhs_addr, rhs_addr, ltmp = NULL, copy_fn; >> >> - /* ??? Figure out if there's any possible overlap between the LHS >> - and the RHS and if not, use MEMCPY. */ >> + // If this is a type that we couldn't handle above, but it's >> + // in a register, we must spill it to memory for the copy. >> + if (is_gimple_reg (lhs)) >> + { >> + ltmp = create_tmp_var (TREE_TYPE (lhs)); >> + lhs_addr = build_fold_addr_expr (ltmp); >> + } >> + else >> + lhs_addr = gimplify_addr (gsi, lhs); >> + if (is_gimple_reg (rhs)) >> + { >> + tree rtmp = create_tmp_var (TREE_TYPE (rhs)); >> + rhs_addr = build_fold_addr_expr (rtmp); >> + gcall = gimple_build_assign (rtmp, rhs); >> + gsi_insert_before (gsi, gcall, GSI_SAME_STMT); >> + } >> + else >> + rhs_addr = gimplify_addr (gsi, rhs); >> >> - if (load_p && is_gimple_reg (lhs)) >> + // Choose the appropriate memory transfer function. >> + if (load_p && store_p) >> + { >> + // ??? Figure out if there's any possible overlap between >> + // the LHS and the RHS and if not, use MEMCPY. >> + copy_fn = builtin_decl_explicit (BUILT_IN_TM_MEMMOVE); >> + } >> + else if (load_p) >> { >> - tmp = create_tmp_var (TREE_TYPE (lhs)); >> - lhs_addr = build_fold_addr_expr (tmp); >> + // Note that the store is non-transactional and cannot overlap. >> + copy_fn = builtin_decl_explicit (BUILT_IN_TM_MEMCPY_RTWN); >> } >> else >> { >> - tmp = NULL_TREE; >> - lhs_addr = gimplify_addr (gsi, lhs); >> + // Note that the load is non-transactional and cannot overlap. >> + copy_fn = builtin_decl_explicit (BUILT_IN_TM_MEMCPY_RNWT); >> } >> - rhs_addr = gimplify_addr (gsi, rhs); >> - gcall = gimple_build_call (builtin_decl_explicit >> (BUILT_IN_TM_MEMMOVE), >> - 3, lhs_addr, rhs_addr, >> + >> + gcall = gimple_build_call (copy_fn, 3, lhs_addr, rhs_addr, >> TYPE_SIZE_UNIT (TREE_TYPE (lhs))); >> gimple_set_location (gcall, loc); >> gsi_insert_before (gsi, gcall, GSI_SAME_STMT); >> >> - if (tmp) >> + if (ltmp) >> { >> - gcall = gimple_build_assign (lhs, tmp); >> + gcall = gimple_build_assign (lhs, ltmp); >> gsi_insert_before (gsi, gcall, GSI_SAME_STMT); >> } >> } >> >> - /* Now that we have the load/store in its instrumented form, add >> - thread private addresses to the log if applicable. */ >> + // Now that we have the load/store in its instrumented form, add >> + // thread private addresses to the log if applicable. >> if (!store_p) >> requires_barrier (region->entry_block, lhs, gcall); >> - >> - // The calls to build_tm_{store,load} above inserted the instrumented >> - // call into the stream. >> - // gsi_insert_before (gsi, gcall, GSI_SAME_STMT); >> } >> >> >> diff --git a/gcc/tree.c b/gcc/tree.c >> index 20470c5..e6880f0 100644 >> --- a/gcc/tree.c >> +++ b/gcc/tree.c >> @@ -10330,6 +10330,143 @@ local_define_builtin (const char *name, tree type, >> enum built_in_function code, >> set_builtin_decl (code, decl, true); >> } >> >> +/* A subroutine of build_tm_vector_builtins. Define a builtin with >> + all of the appropriate attributes. */ >> +static void >> +tm_define_builtin (const char *name, tree type, built_in_function code, >> + tree decl_attrs, tree type_attrs) >> +{ >> + tree decl = add_builtin_function (name, type, code, BUILT_IN_NORMAL, >> + name + strlen ("__builtin_"), decl_attrs); >> + decl_attributes (&TREE_TYPE (decl), type_attrs, ATTR_FLAG_BUILT_IN); >> + set_builtin_decl (code, decl, true); >> +} >> + >> +/* A subroutine of build_tm_vector_builtins. Find a supported vector >> + type VECTOR_BITS wide with inner mode ELEM_MODE. */ >> +static tree >> +find_tm_vector_type (unsigned vector_bits, machine_mode elem_mode) >> +{ >> + unsigned elem_bits = GET_MODE_BITSIZE (elem_mode); >> + unsigned nunits = vector_bits / elem_bits; >> + >> + gcc_assert (elem_bits * nunits == vector_bits); >> + >> + machine_mode vector_mode = mode_for_vector (elem_mode, nunits); >> + if (!VECTOR_MODE_P (vector_mode) >> + || !targetm.vector_mode_supported_p (vector_mode)) >> + return NULL_TREE; >> + >> + tree innertype = lang_hooks.types.type_for_mode (elem_mode, 0); >> + return build_vector_type_for_mode (innertype, vector_mode); >> +} >> + >> +/* A subroutine of build_common_builtin_nodes. Define TM builtins for >> + vector types. This is done after the target hook, so that the target >> + has a chance to override these. */ >> +static void >> +build_tm_vector_builtins (void) >> +{ >> + tree vtype, pvtype, ftype, decl; >> + tree attrs_load, attrs_type_load; >> + tree attrs_store, attrs_type_store; >> + tree attrs_log, attrs_type_log; >> + >> + /* Do nothing if TM is turned off, either with switch or >> + not enabled in the language. */ >> + if (!flag_tm || !builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1)) >> + return; >> + >> + /* Use whatever attributes a normal TM load has. */ >> + decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1); >> + attrs_load = DECL_ATTRIBUTES (decl); >> + attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl)); >> + /* Use whatever attributes a normal TM store has. */ >> + decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1); >> + attrs_store = DECL_ATTRIBUTES (decl); >> + attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl)); >> + /* Use whatever attributes a normal TM log has. */ >> + decl = builtin_decl_explicit (BUILT_IN_TM_LOG); >> + attrs_log = DECL_ATTRIBUTES (decl); >> + attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl)); >> + >> + /* By default, 64 bit vectors go through the long long helpers. */ >> + >> + /* If a 128-bit vector is supported, declare those builtins. */ >> + if (!builtin_decl_declared_p (BUILT_IN_TM_STORE_M128) >> + && ((vtype = find_tm_vector_type (128, SImode)) >> + || (vtype = find_tm_vector_type (128, SFmode)))) >> + { >> + pvtype = build_pointer_type (vtype); >> + >> + ftype = build_function_type_list (void_type_node, pvtype, vtype, >> NULL); >> + tm_define_builtin ("__builtin__ITM_WM128", ftype, >> + BUILT_IN_TM_STORE_M128, >> + attrs_store, attrs_type_store); >> + tm_define_builtin ("__builtin__ITM_WaRM128", ftype, >> + BUILT_IN_TM_STORE_WAR_M128, >> + attrs_store, attrs_type_store); >> + tm_define_builtin ("__builtin__ITM_WaWM128", ftype, >> + BUILT_IN_TM_STORE_WAW_M128, >> + attrs_store, attrs_type_store); >> + >> + ftype = build_function_type_list (vtype, pvtype, NULL); >> + tm_define_builtin ("__builtin__ITM_RM128", ftype, >> + BUILT_IN_TM_LOAD_M128, >> + attrs_load, attrs_type_load); >> + tm_define_builtin ("__builtin__ITM_RaRM128", ftype, >> + BUILT_IN_TM_LOAD_RAR_M128, >> + attrs_load, attrs_type_load); >> + tm_define_builtin ("__builtin__ITM_RaWM128", ftype, >> + BUILT_IN_TM_LOAD_RAW_M128, >> + attrs_load, attrs_type_load); >> + tm_define_builtin ("__builtin__ITM_RfWM128", ftype, >> + BUILT_IN_TM_LOAD_RFW_M128, >> + attrs_load, attrs_type_load); >> + >> + ftype = build_function_type_list (void_type_node, pvtype, NULL); >> + tm_define_builtin ("__builtin__ITM_LM128", ftype, >> + BUILT_IN_TM_LOG_M128, attrs_log, attrs_type_log); >> + } >> + >> + /* If a 256-bit vector is supported, declare those builtins. */ >> + if (!builtin_decl_declared_p (BUILT_IN_TM_STORE_M256) >> + && ((vtype = find_tm_vector_type (256, SImode)) >> + || (vtype = find_tm_vector_type (256, SFmode)))) >> + { >> + pvtype = build_pointer_type (vtype); >> + >> + ftype = build_function_type_list (void_type_node, pvtype, vtype, >> NULL); >> + tm_define_builtin ("__builtin__ITM_WM256", ftype, >> + BUILT_IN_TM_STORE_M256, >> + attrs_store, attrs_type_store); >> + tm_define_builtin ("__builtin__ITM_WaRM256", ftype, >> + BUILT_IN_TM_STORE_WAR_M256, >> + attrs_store, attrs_type_store); >> + tm_define_builtin ("__builtin__ITM_WaWM256", ftype, >> + BUILT_IN_TM_STORE_WAW_M256, >> + attrs_store, attrs_type_store); >> + >> + ftype = build_function_type_list (vtype, pvtype, NULL); >> + tm_define_builtin ("__builtin__ITM_RM256", ftype, >> + BUILT_IN_TM_LOAD_M256, >> + attrs_load, attrs_type_load); >> + tm_define_builtin ("__builtin__ITM_RaRM256", ftype, >> + BUILT_IN_TM_LOAD_RAR_M256, >> + attrs_load, attrs_type_load); >> + tm_define_builtin ("__builtin__ITM_RaWM256", ftype, >> + BUILT_IN_TM_LOAD_RAW_M256, >> + attrs_load, attrs_type_load); >> + tm_define_builtin ("__builtin__ITM_RfWM256", ftype, >> + BUILT_IN_TM_LOAD_RFW_M256, >> + attrs_load, attrs_type_load); >> + >> + ftype = build_function_type_list (void_type_node, pvtype, NULL); >> + tm_define_builtin ("__builtin__ITM_LM256", ftype, >> + BUILT_IN_TM_LOG_M256, attrs_log, attrs_type_log); >> + } >> +} >> + >> /* Call this function after instantiating all builtins that the language >> front end cares about. This will build the rest of the builtins >> and internal functions that are relied upon by the tree optimizers and >> @@ -10568,6 +10705,7 @@ build_common_builtin_nodes (void) >> } >> } >> >> + build_tm_vector_builtins (); >> init_internal_fns (); >> } >> >> diff --git a/libitm/Makefile.am b/libitm/Makefile.am >> index 1dce82d..c9f7a16 100644 >> --- a/libitm/Makefile.am >> +++ b/libitm/Makefile.am >> @@ -62,8 +62,18 @@ libitm_la_SOURCES = \ >> query.cc retry.cc rwlock.cc useraction.cc util.cc \ >> sjlj.S tls.cc method-serial.cc method-gl.cc method-ml.cc >> >> +if ARCH_AARCH64 >> +libitm_la_SOURCES += neon.cc >> +endif >> if ARCH_ARM >> -libitm_la_SOURCES += hwcap.cc >> +libitm_la_SOURCES += hwcap.cc neon.cc >> +endif >> +if ARCH_PPC >> +libitm_la_SOURCES += vect.cc >> +endif >> +if ARCH_S390 >> +libitm_la_SOURCES += vx.cc >> +vx.lo : override CXXFLAGS += -march=z13 >> endif >> if ARCH_X86 >> libitm_la_SOURCES += x86_sse.cc x86_avx.cc >> diff --git a/libitm/Makefile.in b/libitm/Makefile.in >> index 138eeb1..edd65dc 100644 >> --- a/libitm/Makefile.in >> +++ b/libitm/Makefile.in >> @@ -53,9 +53,12 @@ POST_UNINSTALL = : >> build_triplet = @build@ >> host_triplet = @host@ >> target_triplet = @target@ >> -@ARCH_ARM_TRUE@am__append_1 = hwcap.cc >> -@ARCH_X86_TRUE@am__append_2 = x86_sse.cc x86_avx.cc >> -@ARCH_FUTEX_TRUE@am__append_3 = futex.cc >> +@ARCH_AARCH64_TRUE@am__append_1 = neon.cc >> +@ARCH_ARM_TRUE@am__append_2 = hwcap.cc neon.cc >> +@ARCH_PPC_TRUE@am__append_3 = vect.cc >> +@ARCH_S390_TRUE@am__append_4 = vx.cc >> +@ARCH_X86_TRUE@am__append_5 = x86_sse.cc x86_avx.cc >> +@ARCH_FUTEX_TRUE@am__append_6 = futex.cc >> subdir = . >> DIST_COMMON = ChangeLog $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ >> $(top_srcdir)/configure $(am__configure_deps) \ >> @@ -117,14 +120,18 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" >> "$(DESTDIR)$(infodir)" \ >> "$(DESTDIR)$(toolexeclibdir)" >> LTLIBRARIES = $(toolexeclib_LTLIBRARIES) >> libitm_la_LIBADD = >> -@ARCH_ARM_TRUE@am__objects_1 = hwcap.lo >> -@ARCH_X86_TRUE@am__objects_2 = x86_sse.lo x86_avx.lo >> -@ARCH_FUTEX_TRUE@am__objects_3 = futex.lo >> +@ARCH_AARCH64_TRUE@am__objects_1 = neon.lo >> +@ARCH_ARM_TRUE@am__objects_2 = hwcap.lo neon.lo >> +@ARCH_PPC_TRUE@am__objects_3 = vect.lo >> +@ARCH_S390_TRUE@am__objects_4 = vx.lo >> +@ARCH_X86_TRUE@am__objects_5 = x86_sse.lo x86_avx.lo >> +@ARCH_FUTEX_TRUE@am__objects_6 = futex.lo >> am_libitm_la_OBJECTS = aatree.lo alloc.lo alloc_c.lo alloc_cpp.lo \ >> barrier.lo beginend.lo clone.lo eh_cpp.lo local.lo query.lo \ >> retry.lo rwlock.lo useraction.lo util.lo sjlj.lo tls.lo \ >> method-serial.lo method-gl.lo method-ml.lo $(am__objects_1) \ >> - $(am__objects_2) $(am__objects_3) >> + $(am__objects_2) $(am__objects_3) $(am__objects_4) \ >> + $(am__objects_5) $(am__objects_6) >> libitm_la_OBJECTS = $(am_libitm_la_OBJECTS) >> DEFAULT_INCLUDES = -I.@am__isrc@ >> depcomp = $(SHELL) $(top_srcdir)/../depcomp >> @@ -362,7 +369,8 @@ libitm_la_SOURCES = aatree.cc alloc.cc alloc_c.cc >> alloc_cpp.cc \ >> barrier.cc beginend.cc clone.cc eh_cpp.cc local.cc query.cc \ >> retry.cc rwlock.cc useraction.cc util.cc sjlj.S tls.cc \ >> method-serial.cc method-gl.cc method-ml.cc $(am__append_1) \ >> - $(am__append_2) $(am__append_3) >> + $(am__append_2) $(am__append_3) $(am__append_4) \ >> + $(am__append_5) $(am__append_6) >> >> # Automake Documentation: >> # If your package has Texinfo files in many directories, you can use the >> @@ -495,6 +503,7 @@ distclean-compile: >> @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/method-gl.Plo@am__quote@ >> @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/method-ml.Plo@am__quote@ >> @AMDEP_TRUE@@am__include@ >> @am__quote@./$(DEPDIR)/method-serial.Plo@am__quote@ >> +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/neon.Plo@am__quote@ >> @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/query.Plo@am__quote@ >> @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/retry.Plo@am__quote@ >> @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rwlock.Plo@am__quote@ >> @@ -502,6 +511,8 @@ distclean-compile: >> @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tls.Plo@am__quote@ >> @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/useraction.Plo@am__quote@ >> @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/util.Plo@am__quote@ >> +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vect.Plo@am__quote@ >> +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vx.Plo@am__quote@ >> @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/x86_avx.Plo@am__quote@ >> @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/x86_sse.Plo@am__quote@ >> >> @@ -1096,6 +1107,7 @@ vpath % $(strip $(search_path)) >> @LIBITM_BUILD_VERSIONED_SHLIB_SUN_TRUE@@LIBITM_BUILD_VERSIONED_SHLIB_TRUE@ >> `echo $(libitm_la_LIBADD) | \ >> @LIBITM_BUILD_VERSIONED_SHLIB_SUN_TRUE@@LIBITM_BUILD_VERSIONED_SHLIB_TRUE@ >> sed 's,/\([^/.]*\)\.la,/.libs/\1.a,g'` \ >> @LIBITM_BUILD_VERSIONED_SHLIB_SUN_TRUE@@LIBITM_BUILD_VERSIONED_SHLIB_TRUE@ >> > $@ || (rm -f $@ ; exit 1) >> +@arch_s390_t...@vx.lo : override CXXFLAGS += -march=z13 >> # Make sure -msse is appended at the end. >> @ARCH_X86_TRUE@x86_sse.lo : override CXXFLAGS += -msse >> # Make sure -mavx is appended at the end. >> diff --git a/libitm/config/aarch64/neon.cc b/libitm/config/aarch64/neon.cc >> new file mode 100644 >> index 0000000..c2dda20 >> --- /dev/null >> +++ b/libitm/config/aarch64/neon.cc >> @@ -0,0 +1,3 @@ >> +#ifdef __ARM_NEON >> +#include <config/generic/dispatch-m128.cc> >> +#endif >> diff --git a/libitm/config/arm/neon.cc b/libitm/config/arm/neon.cc >> new file mode 100644 >> index 0000000..c2dda20 >> --- /dev/null >> +++ b/libitm/config/arm/neon.cc >> @@ -0,0 +1,3 @@ >> +#ifdef __ARM_NEON >> +#include <config/generic/dispatch-m128.cc> >> +#endif >> diff --git a/libitm/config/generic/dispatch-m128.cc >> b/libitm/config/generic/dispatch-m128.cc >> new file mode 100644 >> index 0000000..d4fdb5e >> --- /dev/null >> +++ b/libitm/config/generic/dispatch-m128.cc >> @@ -0,0 +1,36 @@ >> +/* Copyright (C) 2009-2015 Free Software Foundation, Inc. >> + Contributed by Richard Henderson <r...@redhat.com>. >> + >> + This file is part of the GNU Transactional Memory Library (libitm). >> + >> + Libitm is free software; you can redistribute it and/or modify it >> + under the terms of the GNU General Public License as published by >> + the Free Software Foundation; either version 3 of the License, or >> + (at your option) any later version. >> + >> + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY >> + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS >> + FOR A PARTICULAR PURPOSE. See the GNU General Public License for >> + more details. >> + >> + Under Section 7 of GPL version 3, you are granted additional >> + permissions described in the GCC Runtime Library Exception, version >> + 3.1, as published by the Free Software Foundation. >> + >> + You should have received a copy of the GNU General Public License and >> + a copy of the GCC Runtime Library Exception along with this program; >> + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see >> + <http://www.gnu.org/licenses/>. */ >> + >> +#include "libitm_i.h" >> +#include "dispatch.h" >> + >> +// ??? Use memcpy for now, until we have figured out how to best instantiate >> +// these loads/stores. >> +CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(M128, GTM::abi_disp()->, ) >> + >> +void ITM_REGPARM >> +_ITM_LM128 (const _ITM_TYPE_M128 *ptr) >> +{ >> + GTM::GTM_LB (ptr, sizeof (*ptr)); >> +} >> diff --git a/libitm/config/generic/dispatch-m64.cc >> b/libitm/config/generic/dispatch-m64.cc >> new file mode 100644 >> index 0000000..34e9465 >> --- /dev/null >> +++ b/libitm/config/generic/dispatch-m64.cc >> @@ -0,0 +1,36 @@ >> +/* Copyright (C) 2009-2015 Free Software Foundation, Inc. >> + Contributed by Richard Henderson <r...@redhat.com>. >> + >> + This file is part of the GNU Transactional Memory Library (libitm). >> + >> + Libitm is free software; you can redistribute it and/or modify it >> + under the terms of the GNU General Public License as published by >> + the Free Software Foundation; either version 3 of the License, or >> + (at your option) any later version. >> + >> + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY >> + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS >> + FOR A PARTICULAR PURPOSE. See the GNU General Public License for >> + more details. >> + >> + Under Section 7 of GPL version 3, you are granted additional >> + permissions described in the GCC Runtime Library Exception, version >> + 3.1, as published by the Free Software Foundation. >> + >> + You should have received a copy of the GNU General Public License and >> + a copy of the GCC Runtime Library Exception along with this program; >> + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see >> + <http://www.gnu.org/licenses/>. */ >> + >> +#include "libitm_i.h" >> +#include "dispatch.h" >> + >> +// ??? Use memcpy for now, until we have figured out how to best instantiate >> +// these loads/stores. >> +CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(M64, GTM::abi_disp()->, ) >> + >> +void ITM_REGPARM >> +_ITM_LM64 (const _ITM_TYPE_M64 *ptr) >> +{ >> + GTM::GTM_LB (ptr, sizeof (*ptr)); >> +} >> diff --git a/libitm/config/powerpc/vect.cc b/libitm/config/powerpc/vect.cc >> new file mode 100644 >> index 0000000..43fcdfe >> --- /dev/null >> +++ b/libitm/config/powerpc/vect.cc >> @@ -0,0 +1,3 @@ >> +#if defined(__ALTIVEC__) || defined(__VSX__) >> +#include <config/generic/dispatch-m128.cc> >> +#endif >> diff --git a/libitm/config/s390/vx.cc b/libitm/config/s390/vx.cc >> new file mode 100644 >> index 0000000..1f7268c >> --- /dev/null >> +++ b/libitm/config/s390/vx.cc >> @@ -0,0 +1 @@ >> +#include <config/generic/dispatch-m128.cc> >> diff --git a/libitm/config/x86/x86_sse.cc b/libitm/config/x86/x86_sse.cc >> index c3b7237..266ea09 100644 >> --- a/libitm/config/x86/x86_sse.cc >> +++ b/libitm/config/x86/x86_sse.cc >> @@ -1,43 +1,2 @@ >> -/* Copyright (C) 2009-2016 Free Software Foundation, Inc. >> - Contributed by Richard Henderson <r...@redhat.com>. >> - >> - This file is part of the GNU Transactional Memory Library (libitm). >> - >> - Libitm is free software; you can redistribute it and/or modify it >> - under the terms of the GNU General Public License as published by >> - the Free Software Foundation; either version 3 of the License, or >> - (at your option) any later version. >> - >> - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY >> - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS >> - FOR A PARTICULAR PURPOSE. See the GNU General Public License for >> - more details. >> - >> - Under Section 7 of GPL version 3, you are granted additional >> - permissions described in the GCC Runtime Library Exception, version >> - 3.1, as published by the Free Software Foundation. >> - >> - You should have received a copy of the GNU General Public License and >> - a copy of the GCC Runtime Library Exception along with this program; >> - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see >> - <http://www.gnu.org/licenses/>. */ >> - >> -#include "libitm_i.h" >> -#include "dispatch.h" >> - >> -// ??? Use memcpy for now, until we have figured out how to best instantiate >> -// these loads/stores. >> -CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(M64, GTM::abi_disp()->, ) >> -CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(M128, GTM::abi_disp()->, ) >> - >> -void ITM_REGPARM >> -_ITM_LM64 (const _ITM_TYPE_M64 *ptr) >> -{ >> - GTM::GTM_LB (ptr, sizeof (*ptr)); >> -} >> - >> -void ITM_REGPARM >> -_ITM_LM128 (const _ITM_TYPE_M128 *ptr) >> -{ >> - GTM::GTM_LB (ptr, sizeof (*ptr)); >> -} >> +#include <config/generic/dispatch-m64.cc> >> +#include <config/generic/dispatch-m128.cc> >> diff --git a/libitm/configure b/libitm/configure >> index 55332bb..637fcd1 100644 >> --- a/libitm/configure >> +++ b/libitm/configure >> @@ -607,8 +607,14 @@ ARCH_X86_AVX_FALSE >> ARCH_X86_AVX_TRUE >> ARCH_X86_FALSE >> ARCH_X86_TRUE >> +ARCH_S390_FALSE >> +ARCH_S390_TRUE >> +ARCH_PPC_FALSE >> +ARCH_PPC_TRUE >> ARCH_ARM_FALSE >> ARCH_ARM_TRUE >> +ARCH_AARCH64_FALSE >> +ARCH_AARCH64_TRUE >> link_itm >> XLDFLAGS >> XCFLAGS >> @@ -9762,7 +9768,7 @@ _LT_EOF >> if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then >> export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if >> (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && >> (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' >> else >> - export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if >> (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != >> ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' >> + export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if >> (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L")) && >> (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' >> fi >> aix_use_runtimelinking=no >> >> @@ -11790,7 +11796,7 @@ else >> lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 >> lt_status=$lt_dlunknown >> cat > conftest.$ac_ext <<_LT_EOF >> -#line 11793 "configure" >> +#line 11799 "configure" >> #include "confdefs.h" >> >> #if HAVE_DLFCN_H >> @@ -11896,7 +11902,7 @@ else >> lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 >> lt_status=$lt_dlunknown >> cat > conftest.$ac_ext <<_LT_EOF >> -#line 11899 "configure" >> +#line 11905 "configure" >> #include "confdefs.h" >> >> #if HAVE_DLFCN_H >> @@ -14248,7 +14254,7 @@ $as_echo_n "checking whether the $compiler linker >> ($LD) supports shared librarie >> if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then >> export_symbols_cmds_CXX='$NM -Bpg $libobjs $convenience | awk '\''{ >> if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && >> (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' >> else >> - export_symbols_cmds_CXX='$NM -BCpg $libobjs $convenience | awk '\''{ >> if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) >> != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' >> + export_symbols_cmds_CXX='$NM -BCpg $libobjs $convenience | awk '\''{ >> if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L")) && >> (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' >> fi >> ;; >> pw32*) >> @@ -17603,6 +17609,14 @@ else >> fi >> >> >> + if test "$ARCH" = aarch64; then >> + ARCH_AARCH64_TRUE= >> + ARCH_AARCH64_FALSE='#' >> +else >> + ARCH_AARCH64_TRUE='#' >> + ARCH_AARCH64_FALSE= >> +fi >> + >> if test "$ARCH" = arm; then >> ARCH_ARM_TRUE= >> ARCH_ARM_FALSE='#' >> @@ -17611,6 +17625,22 @@ else >> ARCH_ARM_FALSE= >> fi >> >> + if test "$ARCH" = powerpc -o "$ARCH" = powerpc64; then >> + ARCH_PPC_TRUE= >> + ARCH_PPC_FALSE='#' >> +else >> + ARCH_PPC_TRUE='#' >> + ARCH_PPC_FALSE= >> +fi >> + >> + if test "$ARCH" = s390 -o "$ARCH" = s390x; then >> + ARCH_S390_TRUE= >> + ARCH_S390_FALSE='#' >> +else >> + ARCH_S390_TRUE='#' >> + ARCH_S390_FALSE= >> +fi >> + >> if test "$ARCH" = x86; then >> ARCH_X86_TRUE= >> ARCH_X86_FALSE='#' >> @@ -17788,10 +17818,22 @@ if test -z >> "${LIBITM_BUILD_VERSIONED_SHLIB_SUN_TRUE}" && test -z "${LIBITM_BUILD >> as_fn_error "conditional \"LIBITM_BUILD_VERSIONED_SHLIB_SUN\" was never >> defined. >> Usually this means the macro was only invoked conditionally." "$LINENO" 5 >> fi >> +if test -z "${ARCH_AARCH64_TRUE}" && test -z "${ARCH_AARCH64_FALSE}"; then >> + as_fn_error "conditional \"ARCH_AARCH64\" was never defined. >> +Usually this means the macro was only invoked conditionally." "$LINENO" 5 >> +fi >> if test -z "${ARCH_ARM_TRUE}" && test -z "${ARCH_ARM_FALSE}"; then >> as_fn_error "conditional \"ARCH_ARM\" was never defined. >> Usually this means the macro was only invoked conditionally." "$LINENO" 5 >> fi >> +if test -z "${ARCH_PPC_TRUE}" && test -z "${ARCH_PPC_FALSE}"; then >> + as_fn_error "conditional \"ARCH_PPC\" was never defined. >> +Usually this means the macro was only invoked conditionally." "$LINENO" 5 >> +fi >> +if test -z "${ARCH_S390_TRUE}" && test -z "${ARCH_S390_FALSE}"; then >> + as_fn_error "conditional \"ARCH_S390\" was never defined. >> +Usually this means the macro was only invoked conditionally." "$LINENO" 5 >> +fi >> if test -z "${ARCH_X86_TRUE}" && test -z "${ARCH_X86_FALSE}"; then >> as_fn_error "conditional \"ARCH_X86\" was never defined. >> Usually this means the macro was only invoked conditionally." "$LINENO" 5 >> diff --git a/libitm/configure.ac b/libitm/configure.ac >> index 3875aa0..36e0651 100644 >> --- a/libitm/configure.ac >> +++ b/libitm/configure.ac >> @@ -281,7 +281,10 @@ else >> fi >> AC_SUBST(link_itm) >> >> +AM_CONDITIONAL([ARCH_AARCH64], [test "$ARCH" = aarch64]) >> AM_CONDITIONAL([ARCH_ARM], [test "$ARCH" = arm]) >> +AM_CONDITIONAL([ARCH_PPC], [test "$ARCH" = powerpc -o "$ARCH" = powerpc64]) >> +AM_CONDITIONAL([ARCH_S390], [test "$ARCH" = s390 -o "$ARCH" = s390x]) >> AM_CONDITIONAL([ARCH_X86], [test "$ARCH" = x86]) >> AM_CONDITIONAL([ARCH_X86_AVX], [test "$libitm_cv_as_avx" = yes]) >> AM_CONDITIONAL([ARCH_FUTEX], [test $enable_linux_futex = yes]) >> diff --git a/libitm/libitm.h b/libitm/libitm.h >> index d7bc166..1a6738b 100644 >> --- a/libitm/libitm.h >> +++ b/libitm/libitm.h >> @@ -232,7 +232,11 @@ ITM_LOG(CE) >> ITM_BARRIERS(M256) >> ITM_LOG(M256) >> # endif >> -#endif /* i386 */ >> +#else >> + typedef int _ITM_TYPE_M128 __attribute__((vector_size(16), may_alias)); >> + ITM_BARRIERS(M128) >> + ITM_LOG(M128) >> +#endif >> >> #undef ITM_BARRIERS >> #undef ITM_LOG >> >