Can anyone skilled in the art tell me why a compiler that probably
dates back to the late 1970'ies or early 1980'ies generates the
following short and sweet code for a PL/I "BY NAME" assignment, while
the not completely new (but still fairly recent) version of Enterprise
PL/I (V3R9) generates the very, very, very long-winded code below it?
Or is this (V3R9) code (that predates the OOO z196 architecture)
really faster?
OS PL/I V2.3.0 - OPT(2)
343 1 2 REPT_LINE = REPT_LIST, BY NAME;
* STATEMENT NUMBER 343
002664 58 70 8 268 L 7,REPT_WORK.LINE_PTR
002668 58 60 8 030 L 6,REPT_WORK.REPT_PTR
00266C 58 F0 3 600 L 15,1536(0,3)
002670 D2 03 7 003 F B54 MVC REPT_LINE.TR(4),2900(15)
002676 DE 03 7 003 6 00C ED REPT_LINE.TR(4),REPT_LIST.TR
00267C D2 03 7 00A F B54 MVC REPT_LINE.RE(4),2900(15)
002682 DE 03 7 00A 6 00E ED REPT_LINE.RI(4),REPT_LIST.RI
002688 D2 02 7 011 6 010 MVC REPT_LINE.DA(3),REPT_LIST.DA
00268E 58 E0 3 608 L 14,1544(0,3)
002692 D2 06 4 158 E 5D4 MVC 344(7,4),1492(14)
002698 DE 06 4 158 6 014 ED 344(7,4),REPT_LIST.K+1
00269E D2 05 7 017 4 159 MVC REPT_LINE.K(6),345(4)
0026A4 D2 06 4 158 E 5D4 MVC 344(7,4),1492(14)
0026AA DE 06 4 158 6 01B ED 344(7,4),REPT_LIST.V
0026B0 D2 04 7 028 4 15A MVC REPT_LINE.V(5),346(4)
0026B6 D2 03 7 030 6 026 MVC REPT_LINE.NA(4),REPT_LIST.NA
0026BC D2 03 7 036 6 02A MVC REPT_LINE.TY(4),REPT_LIST.TY
0026C2 D2 03 7 03D 6 02E MVC REPT_LINE.CO(4),REPT_LIST.CO
0026C8 D2 00 7 04B 6 036 MVC REPT_LINE.SP(1),REPT_LIST.SP
0026CE D2 03 7 05F 6 043 MVC REPT_LINE.DATE.YEAR(4),REPT_LIST.DATE.YEAR
0026D4 D2 01 7 064 6 047 MVC REPT_LINE.DATE.MONTH(2),REPT_LIST.DATE.MONTH
0026DA D2 01 7 067 6 049 MVC REPT_LINE.DATE.DAY(2),REPT_LIST.DATE.DAY
Enterprise PL/I for z/OS V3.R9.M0 (Built:20100923) - OPT(3)
3120.0 368 1 2 rept_line = rept_list, by name;
003E40 E350 D340 0624 003120 | STG r5,#SPILL33(,r13,25408)
003E46 E320 D270 0624 003120 | STG r2,#SPILL7(,r13,25200)
003E4C E350 D8FD 0571 003120 | LAY r5,_temp9(,r13,22781)
003E52 E300 D368 0604 003120 | LG r0,#SPILL38(,r13,25448)
003E58 E340 D308 0624 003120 | STG r4,#SPILL26(,r13,25352)
003E5E E310 D4B4 0271 003119 | LAY r1,LINE(,r13,9396)
003E64 E300 D8FC 0550 003120 | STY r0,_temp9(,r13,22780)
003E6A E300 D148 0214 003120 | LGF r0,<a1:d8520:l4>(,r13,8520)
003E70 D278 1000 4D33 003119 | MVC LINE(121,r1,0),REPT_INIT(r4,3379)
003E76 4110 E00C 003120 | LA r1,_shadow21(,r14,12)
003E7A E3E0 D8FC 0571 003120 | LAY r14,_temp9(,r13,22780)
003E80 DE03 E000 1000 003120 | ED _temp9(4,r14,0),_shadow21(r1,0)
003E86 B914 00E0 003120 | LGFR r14,r0
003E8A E300 D368 0604 003120 | LG r0,#SPILL38(,r13,25448)
003E90 4110 E003 003120 | LA r1,#AddressShadow(,r14,3)
003E94 41F0 E00A 003120 | LA r15,#AddressShadow(,r14,10)
003E98 D202 1001 5000 003120 | MVC _shadow21(3,r1,1),_temp9(r5,0)
003E9E 9240 E003 003120 | MVI _shadow21(r14,3),64
003EA2 E310 DF10 0158 003120 | LY r1,<a1:d7952:l4>(,r13,7952)
003EA8 E300 D984 0550 003120 | STY r0,_temp8(,r13,22916)
003EAE E350 D984 0571 003120 | LAY r5,_temp8(,r13,22916)
003EB4 4120 E017 003120 | LA r2,#AddressShadow(,r14,23)
003EB8 4110 100E 003120 | LA r1,_shadow21(,r1,14)
003EBC DE03 5000 1000 003120 | ED _temp8(4,r5,0),_shadow21(r1,0)
003EC2 E310 D985 0571 003120 | LAY r1,_temp8(,r13,22917)
003EC8 4140 E028 003120 | LA r4,#AddressShadow(,r14,40)
003ECC D202 F001 1000 003120 | MVC _shadow21(3,r15,1),_temp8(r1,0)
003ED2 9240 E00A 003120 | MVI _shadow21(r14,10),64
003ED6 E310 DF10 0158 003120 | LY r1,<a1:d7952:l4>(,r13,7952)
003EDC E3F0 D974 0571 003120 | LAY r15,_temp19(,r13,22900)
003EE2 D202 E011 1010 003120 | MVC _shadow21(3,r14,17),_shadow21(r1,16)
003EE8 E310 D238 0604 003120 | LG r1,#SPILL0(,r13,25144)
003EEE D206 F000 14A4 003120 | MVC _temp19(7,r15,0),' ......'(r1,1188)
003EF4 E310 DF10 0158 003120 | LY r1,<a1:d7952:l4>(,r13,7952)
003EFA D203 B95C 1013 003120 | MVC _temp15(4,r11,2396),_shadow18(r1,19)
003F00 E310 D90C 0571 003120 | LAY r1,_temp15(,r13,22796)
003F06 D202 B93C 1001 003120 | MVC _temp11(3,r11,2364),_shadow12(r1,1)
003F0C E310 D8EC 0571 003120 | LAY r1,_temp11(,r13,22764)
003F12 DE06 F000 1000 003120 | ED _temp19(7,r15,0),_temp11(r1,0)
003F18 E310 D975 0571 003120 | LAY r1,_temp19(,r13,22901)
003F1E D205 2000 1000 003120 | MVC _shadow21(6,r2,0),_temp19(r1,0)
003F24 E310 D238 0604 003120 | LG r1,#SPILL0(,r13,25144)
003F2A E320 D96C 0571 003120 | LAY r2,_temp21(,r13,22892)
003F30 D206 2000 14A4 003120 | MVC _temp21(7,r2,0),' ......'(r1,1188)
003F36 E310 DF10 0158 003120 | LY r1,<a1:d7952:l4>(,r13,7952)
003F3C D202 B939 101B 003120 | MVC _temp18(3,r11,2361),_shadow12(r1,27)
003F42 D202 B936 B939 003120 | MVC _temp20(3,r11,2358),_temp18(r11,2361)
003F48 E300 D8E6 0590 003120 | LLGC r0,<a1:d22758:l1>(,r13,22758)
003F4E E300 30EE 0080 003120 | NG r0,=X'00000000 0000000F'
003F54 E310 D8E6 0571 003120 | LAY r1,_temp20(,r13,22758)
003F5A E300 D8E6 0572 003120 | STCY r0,<a1:d22758:l1>(,r13,22758)
003F60 DE06 2000 1000 003120 | ED _temp21(7,r2,0),_temp20(r1,0)
003F66 E320 D96E 0571 003120 | LAY r2,_temp21(,r13,22894)
003F6C D204 4000 2000 003120 | MVC _shadow21(5,r4,0),_temp21(r2,0)
003F72 E310 DF10 0158 003120 | LY r1,<a1:d7952:l4>(,r13,7952)
003F78 E300 1026 0014 003120 | LGF r0,_shadow19(,r1,38)
003F7E 5000 E030 003120 | ST r0,_shadow19(,r14,48)
003F82 E310 DF10 0158 003120 | LY r1,<a1:d7952:l4>(,r13,7952)
003F88 E300 102A 0014 003120 | LGF r0,_shadow19(,r1,42)
003F8E 5000 E036 003120 | ST r0,_shadow19(,r14,54)
003F92 E310 DF10 0158 003120 | LY r1,<a1:d7952:l4>(,r13,7952)
003F98 E300 102E 0014 003120 | LGF r0,_shadow19(,r1,46)
003F9E 5000 E03D 003120 | ST r0,_shadow19(,r14,61)
003FA2 E310 DF10 0158 003120 | LY r1,<a1:d7952:l4>(,r13,7952)
003FA8 4300 1036 003120 | IC r0,_shadow21(,r1,54)
003FAC 4200 E04B 003120 | STC r0,_shadow21(,r14,75)
003FB0 E310 DF10 0158 003120 | LY r1,<a1:d7952:l4>(,r13,7952)
003FB6 E300 1043 0014 003120 | LGF r0,_shadow19(,r1,67)
003FBC 5000 E05F 003120 | ST r0,_shadow19(,r14,95)
003FC0 E310 DF10 0158 003120 | LY r1,<a1:d7952:l4>(,r13,7952)
003FC6 E300 1047 0015 003120 | LGH r0,_shadow20(,r1,71)
003FCC 4000 E064 003120 | STH r0,_shadow20(,r14,100)
003FD0 E310 DF10 0158 003120 | LY r1,<a1:d7952:l4>(,r13,7952)
003FD6 E340 D9A8 0571 003121 | LAY r4,_temp12(,r13,22952)
003FDC E320 D270 0604 000000 | LG r2,#SPILL7(,r13,25200)
003FE2 E300 1049 0015 003120 | LGH r0,_shadow20(,r1,73)
003FE8 4000 E067 003120 | STH r0,_shadow20(,r14,103)
TEN superfluous reloads of R1? AD 2012? How the fluffing H can you
call this an optimizing compiler? How can someone from IBM tell you
(i.e. me, two years ago!) that "we are at least five years ahead of
the competition"?
Oh, maybe it's because Enterprise PL/I is a direct descendant from
Visual Age PL/I for OS/2, a compiler that had to work on a CPU with
just a dozen available registers? Let's see what PL/I for Windows
generates?
IBM(R) PL/I for Windows 8.0 (Built:20110825)
; 3132 rept_line = rept_list, by name;
mov ecx,[ebp-03680h]; REPT_WORK
mov [ebp-05938h],ecx; _temp67
push offset FLAT:@CBE273
add ecx,03h
mov edi,offset FLAT:@CBE213
mov edx,edi
mov [ebp-05a38h],edi; @CBE390
add eax,0ch
sub esp,0ch
mov edi,dword ptr __imp__IBMPCODP
call edi
mov edx,[ebp-05a38h]; @CBE390
push offset FLAT:@CBE273
mov eax,[ebp-05938h]; _temp67
lea ecx,[eax+0ah]
mov eax,[ebp-038b8h]; REPT_WORK
add eax,0eh
sub esp,0ch
call edi
mov eax,[ebp-05938h]; _temp67
mov edx,[ebp-038b8h]; REPT_WORK
add edx,010h
mov cx,[edx]
mov dl,[edx+02h]
mov [eax+013h],dl
mov [eax+011h],cx
push offset FLAT:@CBE58
lea ecx,[eax+017h]
mov edx,offset FLAT:@CBE224
mov eax,[ebp-038b8h]; REPT_WORK
add eax,013h
sub esp,0ch
call edi
mov eax,[ebp-05938h]; _temp67
push offset FLAT:@CBE27
lea ecx,[eax+028h]
mov edx,offset FLAT:@CBE218
mov eax,[ebp-038b8h]; REPT_WORK
add eax,01bh
sub esp,0ch
call edi
mov eax,[ebp-05938h]; _temp67
mov ecx,[ebp-038b8h]; REPT_WORK
mov ecx,[ecx+026h]
mov [eax+030h],ecx
mov ecx,[ebp-038b8h]; REPT_WORK
mov ecx,[ecx+02ah]
mov [eax+036h],ecx
mov ecx,[ebp-038b8h]; REPT_WORK
mov ecx,[ecx+02eh]
mov [eax+03dh],ecx
mov ecx,[ebp-038b8h]; REPT_WORK
mov cl,[ecx+036h]
mov [eax+04bh],cl
mov ecx,[ebp-038b8h]; REPT_WORK
mov ecx,[ecx+043h]
mov [eax+05fh],ecx
mov ecx,[ebp-038b8h]; REPT_WORK
mov cx,[ecx+047h]
mov [eax+064h],cx
mov ecx,[ebp-038b8h]; REPT_WORK
mov cx,[ecx+049h]
mov [eax+067h],cx
Wow! The code ends with the same six superfluous reloads, as ECX is
needlessly overwritten - why not use EDX?
Again, I'm only the observer, it's you and your companies that are
paying for the extra(?) CPU usage, and maybe a 16-byte
three-instruction sequence like
003FC0 E310 DF10 0158 003120 | LY r1,<a1:d7952:l4>(,r13,7952)
003FC6 E300 1047 0015 003120 | LGH r0,_shadow20(,r1,71)
003FCC 4000 E064 003120 | STH r0,_shadow20(,r14,100)
is really faster than the simple 6-byte one-instruction sequence
0026D4 D2 01 7 064 6 047 MVC REPT_LINE.DATE.MONTH(2),REPT_LIST.DATE.MONTH
Then again, I always thought that the fastest instructions are those
ones that are never executed...
Robert