Georg-Johann Lay schrieb: > To come back to the original topic, here is a tentative patch for > better popcount and parity: > > * config/avr/t-avr (LIB1ASMFUNCS): Rename _loop_ffsqi2 to > _ffsqi2_nz. > * confif/avr/libgcc.S: Ditto. Rename __loop_ffsqi2 to __ffsqi2_nz. > (__ctzsi2, __ctzhi2): Map zero to 255. > (__popcounthi2): Use r27 instead of r30. > (__popcountdi2): Use r30 instead of r27. > * config/avr/avr.md (parityhi2): New expander. > (popcounthi2): New expander. > (popcountsi2): New expander. > (*parityhi2.libgcc): New insn. > (*parityqihi2.libgcc): New insn. > (*popcounthi2.libgcc): New insn. > (*popcountsi2.libgcc): New insn. > (*popcountqi2.libgcc): New insn. > (*popcountqihi2.libgcc): New insn_and_split. > > Johann
Oops, picked the wrong file.
Index: config/avr/libgcc.S =================================================================== --- config/avr/libgcc.S (revision 175149) +++ config/avr/libgcc.S (working copy) @@ -935,7 +935,7 @@ DEFUN __ffssi2 brne 1f ret 1: mov r24, r22 - XJMP __loop_ffsqi2 + XJMP __ffsqi2_nz ENDF __ffssi2 #endif /* defined (L_ffssi2) */ @@ -946,7 +946,7 @@ ENDF __ffssi2 DEFUN __ffshi2 clr r26 cpse r24, __zero_reg__ -1: XJMP __loop_ffsqi2 +1: XJMP __ffsqi2_nz ldi r26, 8 or r24, r25 brne 1b @@ -954,20 +954,20 @@ DEFUN __ffshi2 ENDF __ffshi2 #endif /* defined (L_ffshi2) */ -#if defined (L_loop_ffsqi2) +#if defined (L_ffsqi2_nz) ;; Helper for ffshi2, ffssi2 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24)) ;; r24 must be != 0 ;; clobbers: r26 -DEFUN __loop_ffsqi2 +DEFUN __ffsqi2_nz inc r26 lsr r24 - brcc __loop_ffsqi2 + brcc __ffsqi2_nz mov r24, r26 clr r25 ret -ENDF __loop_ffsqi2 -#endif /* defined (L_loop_ffsqi2) */ +ENDF __ffsqi2_nz +#endif /* defined (L_ffsqi2_nz) */ /********************************** @@ -977,12 +977,11 @@ ENDF __loop_ffsqi2 #if defined (L_ctzsi2) ;; count trailing zeros ;; r25:r24 = ctz32 (r25:r22) -;; ctz(0) = 32 +;; ctz(0) = 255 +;; Note that ctz(0) is undefined for GCC. DEFUN __ctzsi2 XCALL __ffssi2 dec r24 - sbrc r24, 7 - ldi r24, 32 ret ENDF __ctzsi2 #endif /* defined (L_ctzsi2) */ @@ -990,12 +989,11 @@ ENDF __ctzsi2 #if defined (L_ctzhi2) ;; count trailing zeros ;; r25:r24 = ctz16 (r25:r24) -;; ctz(0) = 16 +;; ctz(0) = 255 +;; Note that ctz(0) is undefined for GCC. DEFUN __ctzhi2 XCALL __ffshi2 dec r24 - sbrc r24, 7 - ldi r24, 16 ret ENDF __ctzhi2 #endif /* defined (L_ctzhi2) */ @@ -1129,13 +1127,13 @@ ENDF __parityqi2 #if defined (L_popcounthi2) ;; population count ;; r25:r24 = popcount16 (r25:r24) -;; clobbers: r30, __tmp_reg__ +;; clobbers: r27, __tmp_reg__ DEFUN __popcounthi2 XCALL __popcountqi2 - mov r30, r24 + mov r27, r24 mov r24, r25 XCALL __popcountqi2 - add r24, r30 + add r24, r27 clr r25 ret ENDF __popcounthi2 @@ -1144,7 +1142,7 @@ ENDF __popcounthi2 #if defined (L_popcountsi2) ;; population count ;; r25:r24 = popcount32 (r25:r22) -;; clobbers: r26, r30, __tmp_reg__ +;; clobbers: r26, r27, __tmp_reg__ DEFUN __popcountsi2 XCALL __popcounthi2 mov r26, r24 @@ -1162,13 +1160,13 @@ ENDF __popcountsi2 ;; clobbers: r22, r23, r26, r27, r30, __tmp_reg__ DEFUN __popcountdi2 XCALL __popcountsi2 - mov r27, r24 + mov r30, r24 mov_l r22, r18 mov_h r23, r19 mov_l r24, r20 mov_h r25, r21 XCALL __popcountsi2 - add r24, r27 + add r24, r30 ret ENDF __popcountdi2 #endif /* defined (L_popcountdi2) */ Index: config/avr/avr.md =================================================================== --- config/avr/avr.md (revision 175149) +++ config/avr/avr.md (working copy) @@ -3321,6 +3321,92 @@ (define_insn "delay_cycles_4" [(set_attr "length" "9") (set_attr "cc" "clobber")]) +(define_expand "parityhi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (set (reg:HI 24) + (parity:HI (reg:HI 24))) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + "") + +(define_insn "*parityhi2.libgcc" + [(set (reg:HI 24) + (parity:HI (reg:HI 24)))] + "" + "%~call __parityhi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*parityqihi2.libgcc" + [(set (reg:HI 24) + (parity:HI (reg:QI 24)))] + "" + "%~call __parityqi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_expand "popcounthi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (parallel[(set (reg:HI 24) + (popcount:HI (reg:HI 24))) + (clobber (reg:QI 27))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + "") + +(define_expand "popcountsi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (parallel[(set (reg:HI 24) + (popcount:HI (reg:SI 22))) + (clobber (reg:HI 26))]) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (reg:HI 24)))] + "" + "") + +(define_insn "*popcounthi2.libgcc" + [(set (reg:HI 24) + (popcount:HI (reg:HI 24))) + (clobber (reg:QI 27))] + "" + "%~call __popcounthi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*popcountsi2.libgcc" + [(set (reg:HI 24) + (popcount:HI (reg:SI 22))) + (clobber (reg:HI 26))] + "" + "%~call __popcountsi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*popcountqi2.libgcc" + [(set (reg:QI 24) + (popcount:QI (reg:QI 24)))] + "" + "%~call __popcountqi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "*popcountqihi2.libgcc" + [(set (reg:HI 24) + (popcount:HI (reg:QI 24)))] + "" + "#" + "" + [(set (reg:QI 24) + (popcount:QI (reg:QI 24))) + (set (reg:QI 25) + (const_int 0))] + "") + ;; CPU instructions ;; NOP taking 1 or 2 Ticks Index: config/avr/t-avr =================================================================== --- config/avr/t-avr (revision 175149) +++ config/avr/t-avr (working copy) @@ -53,7 +53,7 @@ LIB1ASMFUNCS = \ _dtors \ _ffssi2 \ _ffshi2 \ - _loop_ffsqi2 \ + _ffsqi2_nz \ _ctzsi2 \ _ctzhi2 \ _clzdi2 \