----- Am 28. Dez 2025 um 14:34 schrieb Sebastian Huber 
[email protected]:

> ----- Am 28. Dez 2025 um 14:26 schrieb Sebastian Huber
> [email protected]:
> 
>> ----- Am 27. Dez 2025 um 0:43 schrieb Jeff Law [email protected]:
>> 
>>> On 12/7/2025 5:11 AM, Sebastian Huber wrote:
>> [...]
>>>> +
>>>> +    /* Get the high 32-bit of the counter */
>>>> +    tree shift_32 = build_int_cst (integer_type_node, 32);
>>>> +    tree counter_high_64 = make_temp_ssa_name (gcov_type_node, NULL,
>>>> +                                         "PROF_decision");
>>>> +    gassign *assign3 = gimple_build_assign (counter_high_64, LSHIFT_EXPR,
>>>> +                                      counter, shift_32);
>>> 
>>> Doesn't the type of shift_32 need to match the type of the object being
>>> shifted?  Or do we have loose requirements around type checking operands
>>> for this case (where the shift count is often in a smaller precision
>>> than the object being shifted).
>> 
>> This is my attempt to write something like this:
>> 
>> int shift_32 = 32;
>> gcov_type_node counter_high_64 = counter >> shift_32;
> 
> Oh, it looks like I confused left and right. This should be an RSHIFT_EXPR:
> 
> gassign *assign3 = gimple_build_assign (counter_high_64, RSHIFT_EXPR,
>                                        counter, shift_32);

I used this test case to double check that the shifting is now correct:

int a(void);
int b(void);
int c(int);
int f(int *i)
{
  if (c(i[0]) || c(i[1]) || c(i[2]) || c(i[3]) || c(i[4]) ||
      c(i[5]) || c(i[6]) || c(i[7]) || c(i[8]) || c(i[9]) ||
      c(i[10]) || c(i[11]) || c(i[12]) || c(i[13]) || c(i[14]) ||
      c(i[15]) || c(i[16]) || c(i[17]) || c(i[18]) || c(i[19]) ||
      c(i[20]) || c(i[21]) || c(i[22]) || c(i[23]) || c(i[24]) ||
      c(i[25]) || c(i[26]) || c(i[27]) || c(i[28]) || c(i[29]) ||
      c(i[30]) || c(i[31]) || c(i[32]) || c(i[33]) || c(i[34]) ||
      c(i[35]) || c(i[36]) || c(i[37]) || c(i[38]) || c(i[39])) {
    return a();
  } else {
    return b();
  }
}

Interestingly, GCC now reuses the "amoor.w zero,zero" operations (see "j .L46").

        .type   f, @function
f:
        addi    sp,sp,-16
        sw      s0,8(sp)
        mv      s0,a0
        lw      a0,0(a0)
        sw      ra,12(sp)
        call    c
        bne     a0,zero,.L49
        lw      a0,4(s0)
        call    c
        beq     a0,zero,.L4
        lui     a5,%hi(.LANCHOR0)
        addi    a5,a5,%lo(.LANCHOR0)
        li      a4,2
.L44:
        amoor.w zero,a4,0(a5)
        addi    a4,a5,4
.L46:
        amoor.w zero,zero,0(a4)
        addi    a4,a5,8
        amoor.w zero,zero,0(a4)
        addi    a5,a5,12
        amoor.w zero,zero,0(a5)
.L3:
        lw      s0,8(sp)
        lw      ra,12(sp)
        addi    sp,sp,16
        tail    a
.L4:
        lw      a0,8(s0)
        call    c
        beq     a0,zero,.L5
        lui     a5,%hi(.LANCHOR0)
        addi    a5,a5,%lo(.LANCHOR0)
        li      a4,4
        amoor.w zero,a4,0(a5)
        add     a4,a5,a4
        j       .L46

GCC reloads the .LANCHOR0 about 40 times. It probably should do this only once 
and keep it in a non-volatile register.

Once the counter exceeds 32 bits, we get this code:

.L34:
        lw      a0,128(s0)
        call    c
        beq     a0,zero,.L35
        lui     a5,%hi(.LANCHOR0)
        addi    a5,a5,%lo(.LANCHOR0)
        amoor.w zero,zero,0(a5)
        li      a4,1
.L45:
        addi    a3,a5,4
.L47:
        amoor.w zero,a4,0(a3)
        addi    a4,a5,8
        amoor.w zero,zero,0(a4)
        addi    a5,a5,12
        amoor.w zero,zero,0(a5)
        j       .L3

This is the corresponding 64-bit code:

.L34:
        lw      a0,128(s0)
        call    c
        beq     a0,zero,.L35
        lui     a5,%hi(.LANCHOR0)
        li      a4,1
        addi    a5,a5,%lo(.LANCHOR0)
        slli    a4,a4,32
        amoor.d zero,a4,0(a5)
        addi    a5,a5,8
        amoor.d zero,zero,0(a5)
        j       .L3

-- 
embedded brains GmbH & Co. KG
Herr Sebastian HUBER
Dornierstr. 4
82178 Puchheim
Germany
email: [email protected]
phone: +49-89-18 94 741 - 16
fax:   +49-89-18 94 741 - 08

Registergericht: Amtsgericht München
Registernummer: HRB 157899
Vertretungsberechtigte Geschäftsführer: Peter Rasmussen, Thomas Dörfler
Unsere Datenschutzerklärung finden Sie hier:
https://embedded-brains.de/datenschutzerklaerung/

Reply via email to