> Eric, the testsuite target tests for vis2 and vi3 capable hardware > work well in my own testing but if you find some problem with how > it's done just let me know and I'll try to fix it up.
There are many failures in 64-bit mode with VIS1 because of the use of the high part to expand vec_init, both in vector_init_move_words: case V2SImode: emit_move_insn (gen_highpart (SImode, target), XVECEXP (vals, 0, 0)); emit_move_insn (gen_lowpart (SImode, target), XVECEXP (vals, 0, 1)); return true; and in sparc_expand_vector_init_vis1: if (tmp != target) emit_move_insn (target, gen_highpart (mode, tmp)); Taking the high part is valid only if it is at least as large as a word (in the middle-end sense). Otherwise, the compiler stops. So, in 64-bit mode, this breaks for V2SImode in vector_init_move_words and for V2HImode and V4QImode in sparc_expand_vector_init_vis1. I tried to think about some solutions, for example using a paradoxical subreg in sparc_expand_vector_init_vis1, but this pessimizes. > Support for the short floating point loads starts to show up here as > well, and I intend to flesh these out, support the short float stores, > and add VIS intrinsic access to them. There isn't an equivalent for 32-bit, is it? That is, you can load 8, 16 and 64 bits in the upper FP regs, but not 32 bits? While trying to debug the failures, I've made some cosmetic changes left and right. Tested on SPARC/Solaris, applied on the mainline. 2011-11-09 Eric Botcazou <ebotca...@adacore.com> * config/sparc/sparc.c (output_v8plus_shift): Take INSN parameter first and adjust head comment. (output_v8plus_mult): Change NAME into OPCODE and adjust throughout. (vector_init_bshuffle): Add head comment. (vector_init_move_words): Likewise. (vector_init_prepare_elts): Likewise. Take LOCS parameter first. (sparc_expand_vector_init): Likewise. Adjust call to above function. (sparc_expand_vector_init_vis2): Likewise. (sparc_expand_vector_init_vis1): Likewise. (sparc_expand_conditional_move): Likewise. (sparc_expand_vcond): Likewise. * config/sparc/sparc-protos.h (output_v8plus_shift): Adjust. * config/sparc/sparc.md (ashldi3_v8plus): Adjust call to output_v8plus_shift. (ashrdi3_v8plus): Likewise. (lshrdi3_v8plus): Likewise. -- Eric Botcazou
Index: config/sparc/sparc.md =================================================================== --- config/sparc/sparc.md (revision 181149) +++ config/sparc/sparc.md (working copy) @@ -5649,7 +5649,7 @@ (define_insn "ashldi3_v8plus" (match_operand:SI 2 "arith_operand" "rI,rI,rI"))) (clobber (match_scratch:SI 3 "=X,X,&h"))] "TARGET_V8PLUS" - "* return output_v8plus_shift (operands, insn, \"sllx\");" + "* return output_v8plus_shift (insn ,operands, \"sllx\");" [(set_attr "type" "multi") (set_attr "length" "5,5,6")]) @@ -5759,7 +5759,7 @@ (define_insn "ashrdi3_v8plus" (match_operand:SI 2 "arith_operand" "rI,rI,rI"))) (clobber (match_scratch:SI 3 "=X,X,&h"))] "TARGET_V8PLUS" - "* return output_v8plus_shift (operands, insn, \"srax\");" + "* return output_v8plus_shift (insn, operands, \"srax\");" [(set_attr "type" "multi") (set_attr "length" "5,5,6")]) @@ -5849,7 +5849,7 @@ (define_insn "lshrdi3_v8plus" (match_operand:SI 2 "arith_operand" "rI,rI,rI"))) (clobber (match_scratch:SI 3 "=X,X,&h"))] "TARGET_V8PLUS" - "* return output_v8plus_shift (operands, insn, \"srlx\");" + "* return output_v8plus_shift (insn, operands, \"srlx\");" [(set_attr "type" "multi") (set_attr "length" "5,5,6")]) Index: config/sparc/sparc-protos.h =================================================================== --- config/sparc/sparc-protos.h (revision 181149) +++ config/sparc/sparc-protos.h (working copy) @@ -74,7 +74,8 @@ extern const char *output_ubranch (rtx, extern const char *output_cbranch (rtx, rtx, int, int, int, rtx); extern const char *output_return (rtx); extern const char *output_sibcall (rtx, rtx); -extern const char *output_v8plus_shift (rtx *, rtx, const char *); +extern const char *output_v8plus_shift (rtx, rtx *, const char *); +extern const char *output_v8plus_mult (rtx, rtx *, const char *); extern const char *output_v9branch (rtx, rtx, int, int, int, int, rtx); extern const char *output_probe_stack_range (rtx, rtx); extern bool emit_scc_insn (rtx []); @@ -104,7 +105,6 @@ extern int v9_regcmp_p (enum rtx_code); extern int sparc_check_64 (rtx, rtx); extern rtx gen_df_reg (rtx, int); extern void sparc_expand_compare_and_swap_12 (rtx, rtx, rtx, rtx); -extern const char *output_v8plus_mult (rtx, rtx *, const char *); extern void sparc_expand_vector_init (rtx, rtx); extern void sparc_expand_vec_perm_bmask(enum machine_mode, rtx); extern bool sparc_expand_conditional_move (enum machine_mode, rtx *); Index: config/sparc/sparc.c =================================================================== --- config/sparc/sparc.c (revision 181149) +++ config/sparc/sparc.c (working copy) @@ -576,7 +576,8 @@ static bool sparc_print_operand_punct_va static void sparc_print_operand (FILE *, rtx, int); static void sparc_print_operand_address (FILE *, rtx); static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t, - enum machine_mode, secondary_reload_info *); + enum machine_mode, + secondary_reload_info *); #ifdef SUBTARGET_ATTRIBUTE_TABLE /* Table of valid machine attributes. */ @@ -9120,10 +9121,11 @@ sparc_check_64 (rtx x, rtx insn) return 0; } -/* Returns assembly code to perform a DImode shift using - a 64-bit global or out register on SPARC-V8+. */ +/* Output a wide shift instruction in V8+ mode. INSN is the instruction, + OPERANDS are its operands and OPCODE is the mnemonic to be used. */ + const char * -output_v8plus_shift (rtx *operands, rtx insn, const char *opcode) +output_v8plus_shift (rtx insn, rtx *operands, const char *opcode) { static char asm_code[60]; @@ -9148,12 +9150,13 @@ output_v8plus_shift (rtx *operands, rtx output_asm_insn ("or\t%L1, %3, %3", operands); } - strcpy(asm_code, opcode); + strcpy (asm_code, opcode); if (which_alternative != 2) return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0"); else - return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"); + return + strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"); } /* Output rtl to increment the profiler label LABELNO @@ -11156,7 +11159,7 @@ sparc_conditional_register_usage (void) global_regs[SPARC_GSR_REG] = 1; } -/* Implement TARGET_PREFERRED_RELOAD_CLASS +/* Implement TARGET_PREFERRED_RELOAD_CLASS: - We can't load constants into FP registers. - We can't load FP constants into integer registers when soft-float, @@ -11206,8 +11209,11 @@ sparc_preferred_reload_class (rtx x, reg return rclass; } +/* Output a wide multiply instruction in V8+ mode. INSN is the instruction, + OPERANDS are its operands and OPCODE is the mnemonic to be used. */ + const char * -output_v8plus_mult (rtx insn, rtx *operands, const char *name) +output_v8plus_mult (rtx insn, rtx *operands, const char *opcode) { char mulstr[32]; @@ -11222,7 +11228,7 @@ output_v8plus_mult (rtx insn, rtx *opera if (which_alternative == 1) { output_asm_insn ("or\t%L1, %H1, %H1", operands); - sprintf (mulstr, "%s\t%%H1, %%2, %%L0", name); + sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode); output_asm_insn (mulstr, operands); return "srlx\t%L0, 32, %H0"; } @@ -11230,7 +11236,7 @@ output_v8plus_mult (rtx insn, rtx *opera { output_asm_insn ("sllx\t%H1, 32, %3", operands); output_asm_insn ("or\t%L1, %3, %3", operands); - sprintf (mulstr, "%s\t%%3, %%2, %%3", name); + sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode); output_asm_insn (mulstr, operands); output_asm_insn ("srlx\t%3, 32, %H0", operands); return "mov\t%3, %L0"; @@ -11241,7 +11247,7 @@ output_v8plus_mult (rtx insn, rtx *opera if (which_alternative == 1) { output_asm_insn ("or\t%L1, %H1, %H1", operands); - sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", name); + sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode); output_asm_insn (mulstr, operands); return "srlx\t%L0, 32, %H0"; } @@ -11249,7 +11255,7 @@ output_v8plus_mult (rtx insn, rtx *opera { output_asm_insn ("sllx\t%H1, 32, %3", operands); output_asm_insn ("or\t%L1, %3, %3", operands); - sprintf (mulstr, "%s\t%%3, %%3, %%3", name); + sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode); output_asm_insn (mulstr, operands); output_asm_insn ("srlx\t%3, 32, %H0", operands); return "mov\t%3, %L0"; @@ -11262,7 +11268,7 @@ output_v8plus_mult (rtx insn, rtx *opera output_asm_insn ("or\t%L1, %H1, %H1", operands); output_asm_insn ("sllx\t%H2, 32, %L1", operands); output_asm_insn ("or\t%L2, %L1, %L1", operands); - sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", name); + sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode); output_asm_insn (mulstr, operands); return "srlx\t%L0, 32, %H0"; } @@ -11272,15 +11278,20 @@ output_v8plus_mult (rtx insn, rtx *opera output_asm_insn ("sllx\t%H2, 32, %4", operands); output_asm_insn ("or\t%L1, %3, %3", operands); output_asm_insn ("or\t%L2, %4, %4", operands); - sprintf (mulstr, "%s\t%%3, %%4, %%3", name); + sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode); output_asm_insn (mulstr, operands); output_asm_insn ("srlx\t%3, 32, %H0", operands); return "mov\t%3, %L0"; } } +/* Subroutine of sparc_expand_vector_init. Emit code to initialize TARGET to + the N_ELTS values for individual fields contained in LOCS by means of VIS2 + BSHUFFLE insn. MODE and INNER_MODE are the modes describing TARGET. */ + static void -vector_init_bshuffle (rtx target, rtx *locs, int n_elts, enum machine_mode mode, +vector_init_bshuffle (rtx target, rtx *locs, int n_elts, + enum machine_mode mode, enum machine_mode inner_mode) { rtx mid_target, r0_high, r0_low, r1_high, r1_low; @@ -11394,6 +11405,11 @@ vector_init_bshuffle (rtx target, rtx *l emit_move_insn (target, gen_lowpart (partial_mode, mid_target)); } +/* Subroutine of sparc_expand_vector_init. Emit code to initialize TARGET to + values for individual fields VALS by means of simple word moves if this is + possible. MODE and INNER_MODE are the modes describing TARGET. Return true + on success. */ + static bool vector_init_move_words (rtx target, rtx vals, enum machine_mode mode, enum machine_mode inner_mode) @@ -11417,11 +11433,13 @@ vector_init_move_words (rtx target, rtx return false; } -/* Move the elements in rtvec VALS into registers compatible with MODE. - Store the rtx for these regs into the corresponding array entry of - LOCS. */ +/* Subroutine of sparc_expand_vector_init. Move the N_ELTS elements in VALS + into registers compatible with MODE and INNER_MODE. Store the RTX for + these regs into the corresponding array entry of LOCS. */ + static void -vector_init_prepare_elts (rtx vals, int n_elts, rtx *locs, enum machine_mode mode, +vector_init_prepare_elts (rtx *locs, rtx vals, int n_elts, + enum machine_mode mode, enum machine_mode inner_mode) { enum machine_mode loc_mode; @@ -11494,7 +11512,9 @@ vector_init_prepare_elts (rtx vals, int } else if (code != MEM) { - rtx stk = assign_stack_temp (inner_mode, GET_MODE_SIZE(inner_mode), 0); + rtx stk + = assign_stack_temp (inner_mode, GET_MODE_SIZE(inner_mode), + 0); emit_move_insn (stk, elt); m = stk; } @@ -11516,6 +11536,11 @@ vector_init_prepare_elts (rtx vals, int } } +/* Subroutine of sparc_expand_vector_init. Emit code to initialize TARGET to + the N_ELTS values for individual fields contained in LOCS by means of VIS2 + instructions, among which N_UNIQUE are unique. MODE and INNER_MODE are the + modes describing TARGET. */ + static void sparc_expand_vector_init_vis2 (rtx target, rtx *locs, int n_elts, int n_unique, enum machine_mode mode, @@ -11541,6 +11566,10 @@ sparc_expand_vector_init_vis2 (rtx targe } } +/* Subroutine of sparc_expand_vector_init. Emit code to initialize TARGET to + the N_ELTS values for individual fields contained in LOCS by means of VIS1 + instructions, among which N_UNIQUE are unique. MODE is TARGET's mode. */ + static void sparc_expand_vector_init_vis1 (rtx target, rtx *locs, int n_elts, int n_unique, enum machine_mode mode) @@ -11609,6 +11638,8 @@ sparc_expand_vector_init_vis1 (rtx targe emit_move_insn (target, gen_highpart (mode, tmp)); } +/* Emit code to initialize TARGET to values for individual fields VALS. */ + void sparc_expand_vector_init (rtx target, rtx vals) { @@ -11650,7 +11681,7 @@ sparc_expand_vector_init (rtx target, rt if (vector_init_move_words (target, vals, mode, inner_mode)) return; - vector_init_prepare_elts (vals, n_elts, locs, mode, inner_mode); + vector_init_prepare_elts (locs, vals, n_elts, mode, inner_mode); if (TARGET_VIS2) sparc_expand_vector_init_vis2 (target, locs, n_elts, n_unique, @@ -11659,6 +11690,8 @@ sparc_expand_vector_init (rtx target, rt sparc_expand_vector_init_vis1 (target, locs, n_elts, n_unique, mode); } +/* Implement TARGET_SECONDARY_RELOAD. */ + static reg_class_t sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, enum machine_mode mode, secondary_reload_info *sri) @@ -11722,6 +11755,9 @@ sparc_secondary_reload (bool in_p, rtx x return NO_REGS; } +/* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into + OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */ + bool sparc_expand_conditional_move (enum machine_mode mode, rtx *operands) { @@ -11777,6 +11813,12 @@ sparc_expand_conditional_move (enum mach return true; } +/* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2] + into OPERANDS[0] in MODE, depending on the outcome of the comparison of + OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition. + FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine + code to be used for the condition mask. */ + void sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode) {