[
https://issues.apache.org/jira/browse/ORC-186?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Gopal V updated ORC-186:
------------------------
Description:
{code}
public long readLongLE(InputStream in) throws IOException {
readFully(in, readBuffer, 0, 8);
return (((readBuffer[0] & 0xff) << 0)
+ ((readBuffer[1] & 0xff) << 8)
+ ((readBuffer[2] & 0xff) << 16)
+ ((long) (readBuffer[3] & 0xff) << 24)
+ ((long) (readBuffer[4] & 0xff) << 32)
+ ((long) (readBuffer[5] & 0xff) << 40)
+ ((long) (readBuffer[6] & 0xff) << 48)
+ ((long) (readBuffer[7] & 0xff) << 56));
}
{code}
Compiles into
{code}
7fc99381f393: movslq %r10d,%r10
7fc99381f396: movzbq 0x1f(%rbx,%r10,1),%r8
7fc99381f39c: movzbq 0x18(%rbx,%r10,1),%r9
7fc99381f3a2: movzbq 0x19(%rbx,%r10,1),%rdi
7fc99381f3a8: movzbq 0x1e(%rbx,%r10,1),%rdx
7fc99381f3ae: movzbq 0x1d(%rbx,%r10,1),%rsi
7fc99381f3b4: movzbq 0x1c(%rbx,%r10,1),%rax
7fc99381f3ba: movzbq 0x1b(%rbx,%r10,1),%r13
7fc99381f3c0: movzbq 0x1a(%rbx,%r10,1),%r10
7fc99381f3c6: shl $0x38,%r9
7fc99381f3ca: shl $0x28,%r10
7fc99381f3ce: shl $0x20,%r13
7fc99381f3d2: shl $0x18,%rax
7fc99381f3d6: shl $0x10,%rsi
7fc99381f3da: shl $0x8,%rdx
7fc99381f3de: shl $0x30,%rdi
7fc99381f3e2: add %r9,%rdi
7fc99381f3e5: add %rdi,%r10
7fc99381f3e8: add %r10,%r13
7fc99381f3eb: add %r13,%rax
7fc99381f3ee: add %rax,%rsi
7fc99381f3f1: add %rsi,%rdx
7fc99381f3f4: add %rdx,%r8
7fc99381f3f7: vmovq %r8,%xmm1
7fc99381f3fc: mov (%rsp),%r10
7fc99381f400: vmovsd %xmm1,0x18(%r10)
{code}
The byte reads are slow and show up as a significant memory bottleneck when
reading Doubles off a cached memory buffer.
!perf-top-readDouble.png!
was:
{code}
public long readLongLE(InputStream in) throws IOException {
readFully(in, readBuffer, 0, 8);
return (((readBuffer[0] & 0xff) << 0)
+ ((readBuffer[1] & 0xff) << 8)
+ ((readBuffer[2] & 0xff) << 16)
+ ((long) (readBuffer[3] & 0xff) << 24)
+ ((long) (readBuffer[4] & 0xff) << 32)
+ ((long) (readBuffer[5] & 0xff) << 40)
+ ((long) (readBuffer[6] & 0xff) << 48)
+ ((long) (readBuffer[7] & 0xff) << 56));
}
{code}
Compiles into
{code}
7fc99381f393: movslq %r10d,%r10
7fc99381f396: movzbq 0x1f(%rbx,%r10,1),%r8
7fc99381f39c: movzbq 0x18(%rbx,%r10,1),%r9
7fc99381f3a2: movzbq 0x19(%rbx,%r10,1),%rdi
7fc99381f3a8: movzbq 0x1e(%rbx,%r10,1),%rdx
7fc99381f3ae: movzbq 0x1d(%rbx,%r10,1),%rsi
7fc99381f3b4: movzbq 0x1c(%rbx,%r10,1),%rax
7fc99381f3ba: movzbq 0x1b(%rbx,%r10,1),%r13
7fc99381f3c0: movzbq 0x1a(%rbx,%r10,1),%r10
7fc99381f3c6: shl $0x38,%r9
7fc99381f3ca: shl $0x28,%r10
7fc99381f3ce: shl $0x20,%r13
7fc99381f3d2: shl $0x18,%rax
7fc99381f3d6: shl $0x10,%rsi
7fc99381f3da: shl $0x8,%rdx
7fc99381f3de: shl $0x30,%rdi
7fc99381f3e2: add %r9,%rdi
7fc99381f3e5: add %rdi,%r10
7fc99381f3e8: add %r10,%r13
7fc99381f3eb: add %r13,%rax
7fc99381f3ee: add %rax,%rsi
7fc99381f3f1: add %rsi,%rdx
7fc99381f3f4: add %rdx,%r8
7fc99381f3f7: vmovq %r8,%xmm1
7fc99381f3fc: mov (%rsp),%r10
7fc99381f400: vmovsd %xmm1,0x18(%r10)
{code}
The byte reads are slow and show up as a significant memory bottleneck when
reading Doubles off a cached memory buffer.
> SerializationUtils::readDouble() is memory bandwidth bound
> ----------------------------------------------------------
>
> Key: ORC-186
> URL: https://issues.apache.org/jira/browse/ORC-186
> Project: ORC
> Issue Type: Bug
> Components: Java
> Reporter: Gopal V
> Attachments: perf-top-readDouble.png
>
>
> {code}
> public long readLongLE(InputStream in) throws IOException {
> readFully(in, readBuffer, 0, 8);
> return (((readBuffer[0] & 0xff) << 0)
> + ((readBuffer[1] & 0xff) << 8)
> + ((readBuffer[2] & 0xff) << 16)
> + ((long) (readBuffer[3] & 0xff) << 24)
> + ((long) (readBuffer[4] & 0xff) << 32)
> + ((long) (readBuffer[5] & 0xff) << 40)
> + ((long) (readBuffer[6] & 0xff) << 48)
> + ((long) (readBuffer[7] & 0xff) << 56));
> }
> {code}
> Compiles into
> {code}
> 7fc99381f393: movslq %r10d,%r10
> 7fc99381f396: movzbq 0x1f(%rbx,%r10,1),%r8
> 7fc99381f39c: movzbq 0x18(%rbx,%r10,1),%r9
> 7fc99381f3a2: movzbq 0x19(%rbx,%r10,1),%rdi
> 7fc99381f3a8: movzbq 0x1e(%rbx,%r10,1),%rdx
> 7fc99381f3ae: movzbq 0x1d(%rbx,%r10,1),%rsi
> 7fc99381f3b4: movzbq 0x1c(%rbx,%r10,1),%rax
> 7fc99381f3ba: movzbq 0x1b(%rbx,%r10,1),%r13
> 7fc99381f3c0: movzbq 0x1a(%rbx,%r10,1),%r10
> 7fc99381f3c6: shl $0x38,%r9
> 7fc99381f3ca: shl $0x28,%r10
> 7fc99381f3ce: shl $0x20,%r13
> 7fc99381f3d2: shl $0x18,%rax
> 7fc99381f3d6: shl $0x10,%rsi
> 7fc99381f3da: shl $0x8,%rdx
> 7fc99381f3de: shl $0x30,%rdi
> 7fc99381f3e2: add %r9,%rdi
> 7fc99381f3e5: add %rdi,%r10
> 7fc99381f3e8: add %r10,%r13
> 7fc99381f3eb: add %r13,%rax
> 7fc99381f3ee: add %rax,%rsi
> 7fc99381f3f1: add %rsi,%rdx
> 7fc99381f3f4: add %rdx,%r8
> 7fc99381f3f7: vmovq %r8,%xmm1
> 7fc99381f3fc: mov (%rsp),%r10
> 7fc99381f400: vmovsd %xmm1,0x18(%r10)
> {code}
> The byte reads are slow and show up as a significant memory bottleneck when
> reading Doubles off a cached memory buffer.
> !perf-top-readDouble.png!
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)