[ 
https://issues.apache.org/jira/browse/TEZ-3752?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16102046#comment-16102046
 ] 

Muhammad Samir Khan commented on TEZ-3752:
------------------------------------------

JOL dump:
Before:
-internals:
{code}
# Running 64-bit HotSpot VM.
# Using compressed oop with 3-bit shift.
# Using compressed klass with 3-bit shift.
# Objects are 8 bytes aligned.
# Field sizes by type: 4, 1, 1, 2, 2, 4, 4, 8, 8 [bytes]
# Array element sizes: 4, 1, 1, 2, 2, 4, 4, 8, 8 [bytes]

Instantiated the sample instance via 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput$InMemoryMapOutput(org.apache.tez.runtime.library.common.InputAttemptIdentifier,org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetchedInputAllocatorOrderedGrouped,long,boolean,org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput$1)

org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput$InMemoryMapOutput
 object internals:
 OFFSET  SIZE                                                                   
                            TYPE DESCRIPTION                               VALUE
      0     4                                                                   
                                 (object header)                           01 
00 00 00 (00000001 00000000 00000000 00000000) (1)
      4     4                                                                   
                                 (object header)                           00 
00 00 00 (00000000 00000000 00000000 00000000) (0)
      8     4                                                                   
                                 (object header)                           78 
12 01 f8 (01111000 00010010 00000001 11111000) (-134147464)
     12     4                                                                   
                             int MapOutput.id                              1
     16     1                                                                   
                         boolean MapOutput.primaryMapOutput                false
     17     3                                                                   
                                 (alignment/padding gap)                  
     20     4                                       
org.apache.tez.runtime.library.common.InputAttemptIdentifier 
MapOutput.attemptIdentifier               null
     24     4   
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetchedInputAllocatorOrderedGrouped
 MapOutput.callback                        null
     28     4                                                  
org.apache.hadoop.io.BoundedByteArrayOutputStream InMemoryMapOutput.byteStream  
            (object)
Instance size: 32 bytes
Space losses: 3 bytes internal + 0 bytes external = 3 bytes total
{code}

-footprint:
{code}
# Running 64-bit HotSpot VM.
# Using compressed oop with 3-bit shift.
# Using compressed klass with 3-bit shift.
# Objects are 8 bytes aligned.
# Field sizes by type: 4, 1, 1, 2, 2, 4, 4, 8, 8 [bytes]
# Array element sizes: 4, 1, 1, 2, 2, 4, 4, 8, 8 [bytes]

Instantiated the sample instance via 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput$InMemoryMapOutput(org.apache.tez.runtime.library.common.InputAttemptIdentifier,org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetchedInputAllocatorOrderedGrouped,long,boolean,org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput$1)

org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput$InMemoryMapOutput@10bdf5e5d
 footprint:
     COUNT       AVG       SUM   DESCRIPTION
         1        16        16   [B
         1        32        32   
org.apache.hadoop.io.BoundedByteArrayOutputStream
         1        32        32   
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput$InMemoryMapOutput
         3                  80   (total)
{code}

After:
-internals:
{code}
# Running 64-bit HotSpot VM.
# Using compressed oop with 3-bit shift.
# Using compressed klass with 3-bit shift.
# Objects are 8 bytes aligned.
# Field sizes by type: 4, 1, 1, 2, 2, 4, 4, 8, 8 [bytes]
# Array element sizes: 4, 1, 1, 2, 2, 4, 4, 8, 8 [bytes]

Instantiated the sample instance via 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput$InMemoryMapOutput(org.apache.tez.runtime.library.common.InputAttemptIdentifier,org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetchedInputAllocatorOrderedGrouped,long,boolean,org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput$1)

org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput$InMemoryMapOutput
 object internals:
 OFFSET  SIZE                                                                   
                            TYPE DESCRIPTION                               VALUE
      0     4                                                                   
                                 (object header)                           01 
00 00 00 (00000001 00000000 00000000 00000000) (1)
      4     4                                                                   
                                 (object header)                           00 
00 00 00 (00000000 00000000 00000000 00000000) (0)
      8     4                                                                   
                                 (object header)                           77 
12 01 f8 (01110111 00010010 00000001 11111000) (-134147465)
     12     4                                                                   
                             int MapOutput.id                              1
     16     1                                                                   
                         boolean MapOutput.primaryMapOutput                false
     17     3                                                                   
                                 (alignment/padding gap)                  
     20     4                                       
org.apache.tez.runtime.library.common.InputAttemptIdentifier 
MapOutput.attemptIdentifier               null
     24     4   
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetchedInputAllocatorOrderedGrouped
 MapOutput.callback                        null
     28     4                                                                   
                          byte[] InMemoryMapOutput.byteArray               []
Instance size: 32 bytes
Space losses: 3 bytes internal + 0 bytes external = 3 bytes total
{code}

-footprint
{code}
# Running 64-bit HotSpot VM.
# Using compressed oop with 3-bit shift.
# Using compressed klass with 3-bit shift.
# Objects are 8 bytes aligned.
# Field sizes by type: 4, 1, 1, 2, 2, 4, 4, 8, 8 [bytes]
# Array element sizes: 4, 1, 1, 2, 2, 4, 4, 8, 8 [bytes]

Instantiated the sample instance via 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput$InMemoryMapOutput(org.apache.tez.runtime.library.common.InputAttemptIdentifier,org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetchedInputAllocatorOrderedGrouped,long,boolean,org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput$1)

org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput$InMemoryMapOutput@148080bbd
 footprint:
     COUNT       AVG       SUM   DESCRIPTION
         1        16        16   [B
         1        32        32   
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput$InMemoryMapOutput
         2                  48   (total)


{code}


> Reduce Object size of InMemoryMapOutput for large jobs
> ------------------------------------------------------
>
>                 Key: TEZ-3752
>                 URL: https://issues.apache.org/jira/browse/TEZ-3752
>             Project: Apache Tez
>          Issue Type: Bug
>            Reporter: Jonathan Eagles
>            Assignee: Jonathan Eagles
>         Attachments: TEZ-3752.001.patch
>
>
> Follow-on jira from TEZ-3732. The InMemoryMapOutput has a 
> BoundedByteArrayOutputStream that is only used in the Merged MapOutput case. 



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to