https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100855

--- Comment #5 from Nadav Halahmi <nadavhalahmi560 at gmail dot com> ---
(In reply to Richard Biener from comment #3)
> Might be interesting to see whether ifort does any expression simplification
> here.  Can you share the produced assembly?

ifort pow.f90 -O3 -no-vec -S -o intel.s:

# mark_description "Intel(R) Fortran Intel(R) 64 Compiler for applications
running on Intel(R) 64, Version 19.1.3.304 Build 2020";
# mark_description "0925_000000";
# mark_description "-O3 -no-vec -S -o intel.s";
        .file "pow.f90"
        .text
..TXTST0:
.L_2__routine_start_MAIN___0:
# -- Begin  MAIN__
        .text
# mark_begin;
       .align    16,0x90
        .globl MAIN__
# --- POWER
MAIN__:
..B1.1:                         # Preds ..B1.0
                                # Execution count [1.00e+00]
        .cfi_startproc
..___tag_value_MAIN__.1:
..L2:
                                                          #1.9
        pushq     %rbp                                          #1.9
        .cfi_def_cfa_offset 16
        movq      %rsp, %rbp                                    #1.9
        .cfi_def_cfa 6, 16
        .cfi_offset 6, -16
        andq      $-128, %rsp                                   #1.9
        subq      $128, %rsp                                    #1.9
        movl      $3, %edi                                      #1.9
        xorl      %esi, %esi                                    #1.9
        call      __intel_new_feature_proc_init                 #1.9
                                # LOE rbx r12 r13 r14 r15
..B1.13:                        # Preds ..B1.1
                                # Execution count [1.00e+00]
        stmxcsr   (%rsp)                                        #1.9
        movl      $__NLITPACK_0.0.1, %edi                       #1.9
        orl       $32832, (%rsp)                                #1.9
        ldmxcsr   (%rsp)                                        #1.9
        call      for_set_reentrancy                            #1.9
                                # LOE rbx r12 r13 r14 r15
..B1.2:                         # Preds ..B1.13
                                # Execution count [1.00e+00]
        movss     .L_2il0floatpacket.0(%rip), %xmm1             #11.5
        lea       80(%rsp), %rdi                                #13.10
        movss     %xmm1, -64(%rdi)                              #11.5[spill]
        pxor      %xmm0, %xmm0                                  #9.5
        movss     %xmm0, -8(%rdi)                               #9.5[spill]
        call      for_cpusec                                    #13.10
                                # LOE rbx r12 r13 r14 r15
..B1.3:                         # Preds ..B1.2
                                # Execution count [8.67e-01]
        movl      $1, %eax                                      #14.5
        movq      %r15, (%rsp)                                  #12.5[spill]
        movq      %rbx, 8(%rsp)                                 #12.5[spill]
        .cfi_escape 0x10, 0x03, 0x0e, 0x38, 0x1c, 0x0d, 0x80, 0xff, 0xff, 0xff,
0x1a, 0x0d, 0x88, 0xff, 0xff, 0xff, 0x22
        .cfi_escape 0x10, 0x0f, 0x0e, 0x38, 0x1c, 0x0d, 0x80, 0xff, 0xff, 0xff,
0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22
        movl      %eax, %ebx                                    #12.5
                                # LOE r12 r13 r14 ebx
..B1.4:                         # Preds ..B1.6 ..B1.3
                                # Execution count [5.33e+00]
        movl      $1, %r15d                                     #15.9
                                # LOE r12 r13 r14 ebx r15d
..B1.5:                         # Preds ..B1.14 ..B1.4
                                # Execution count [2.96e+01]
        movss     16(%rsp), %xmm2                               #16.13[spill]
        pxor      %xmm0, %xmm0                                  #17.28
        mulss     .L_2il0floatpacket.1(%rip), %xmm2             #16.13
        cvtsi2ss  %r15d, %xmm0                                  #17.28
        movss     .L_2il0floatpacket.2(%rip), %xmm1             #17.28
        movss     %xmm2, 16(%rsp)                               #16.13[spill]
        addss     %xmm2, %xmm1                                  #17.28
        call      powf                                          #17.28
                                # LOE r12 r13 r14 ebx r15d xmm0
..B1.14:                        # Preds ..B1.5
                                # Execution count [2.96e+01]
        movss     72(%rsp), %xmm1                               #17.13[spill]
        incl      %r15d                                         #18.9
        addss     %xmm0, %xmm1                                  #17.13
        movss     %xmm1, 72(%rsp)                               #17.13[spill]
        cmpl      $10000, %r15d                                 #18.9
        jle       ..B1.5        # Prob 82%                      #18.9
                                # LOE r12 r13 r14 ebx r15d
..B1.6:                         # Preds ..B1.14
                                # Execution count [5.44e+00]
        incl      %ebx                                          #19.5
        cmpl      $10000, %ebx                                  #19.5
        jle       ..B1.4        # Prob 82%                      #19.5
                                # LOE r12 r13 r14 ebx
..B1.7:                         # Preds ..B1.6
                                # Execution count [1.00e+00]
        movq      (%rsp), %r15                                  #[spill]
        .cfi_restore 15
        lea       84(%rsp), %rdi                                #20.10
        movq      8(%rsp), %rbx                                 #[spill]
        .cfi_restore 3
        call      for_cpusec                                    #20.10
        .cfi_escape 0x10, 0x03, 0x0e, 0x38, 0x1c, 0x0d, 0x80, 0xff, 0xff, 0xff,
0x1a, 0x0d, 0x88, 0xff, 0xff, 0xff, 0x22
        .cfi_escape 0x10, 0x0f, 0x0e, 0x38, 0x1c, 0x0d, 0x80, 0xff, 0xff, 0xff,
0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22
                                # LOE rbx r12 r13 r14 r15
..B1.8:                         # Preds ..B1.7
                                # Execution count [1.00e+00]
        movss     72(%rsp), %xmm0                               #21.5[spill]
        lea       (%rsp), %rdi                                  #21.5
        movl      $-1, %esi                                     #21.5
        movq      $0x1208384ff00, %rdx                          #21.5
        movl      $__STRLITPACK_3.0.1, %ecx                     #21.5
        lea       64(%rsp), %r8                                 #21.5
        xorl      %eax, %eax                                    #21.5
        movq      $0, (%rdi)                                    #21.5
        movss     %xmm0, 64(%rdi)                               #21.5
        call      for_write_seq_lis                             #21.5
                                # LOE rbx r12 r13 r14 r15
..B1.9:                         # Preds ..B1.8
                                # Execution count [1.00e+00]
        movss     84(%rsp), %xmm0                               #22.5
        lea       (%rsp), %rdi                                  #22.5
        movl      $-1, %esi                                     #22.5
        movq      $0x1208384ff00, %rdx                          #22.5
        movl      $__STRLITPACK_4.0.1, %ecx                     #22.5
        lea       72(%rsp), %r8                                 #22.5
        movl      $power_$format_pack.0.1, %r9d                 #22.5
        xorl      %eax, %eax                                    #22.5
        movq      $0, (%rdi)                                    #22.5
        subss     80(%rdi), %xmm0                               #22.5
        movss     %xmm0, 72(%rdi)                               #22.5
        call      for_write_seq_fmt                             #22.5
                                # LOE rbx r12 r13 r14 r15
..B1.10:                        # Preds ..B1.9
                                # Execution count [1.00e+00]
        xorl      %eax, %eax                                    #23.1
        movq      %rbp, %rsp                                    #23.1
        popq      %rbp                                          #23.1
        .cfi_def_cfa 7, 8
        .cfi_restore 6
        ret                                                     #23.1
        .align    16,0x90
                                # LOE
        .cfi_endproc
# mark_end;
        .type   MAIN__,@function
        .size   MAIN__,.-MAIN__
..LNMAIN__.0:
        .section .rodata, "a"
        .align 4
        .align 4
__NLITPACK_0.0.1:
        .long   2
        .align 4
__STRLITPACK_3.0.1:
        .long   65818
        .byte   0
        .space 3, 0x00  # pad
        .align 4
__STRLITPACK_4.0.1:
        .long   65818
        .byte   0
        .space 3, 0x00  # pad
        .align 4
power_$format_pack.0.1:
        .byte   54
        .byte   0
        .byte   0
        .byte   0
        .byte   28
        .byte   0
        .byte   7
        .byte   0
        .byte   84
        .byte   105
        .byte   109
        .byte   101
        .byte   32
        .byte   61
        .byte   32
        .byte   0
        .byte   33
        .byte   0
        .byte   0
        .byte   3
        .byte   1
        .byte   0
        .byte   0
        .byte   0
        .byte   6
        .byte   0
        .byte   0
        .byte   0
        .byte   28
        .byte   0
        .byte   9
        .byte   0
        .byte   32
        .byte   115
        .byte   101
        .byte   99
        .byte   111
        .byte   110
        .byte   100
        .byte   115
        .byte   46
        .byte   0
        .byte   0
        .byte   0
        .byte   55
        .byte   0
        .byte   0
        .byte   0
        .data
# -- End  MAIN__
        .section .rodata, "a"
        .align 4
.L_2il0floatpacket.0:
        .long   0x40000000
        .type   .L_2il0floatpacket.0,@object
        .size   .L_2il0floatpacket.0,4
        .align 4
.L_2il0floatpacket.1:
        .long   0x3f000000
        .type   .L_2il0floatpacket.1,@object
        .size   .L_2il0floatpacket.1,4
        .align 4
.L_2il0floatpacket.2:
        .long   0x3d4ccccd
        .type   .L_2il0floatpacket.2,@object
        .size   .L_2il0floatpacket.2,4
        .data
        .section .note.GNU-stack, ""
# End

Reply via email to