Code0x58 opened a new issue #3670:
URL: https://github.com/apache/incubator-heron/issues/3670


   The current install is ~816M as seen in the docker images, with ~20% 
duplicate files. It may be worth cleaning up the install (preferably by 
reworking the install script) if anyone wants to reduce container sizes:
   
   ```shell
   (
       cd /usr/local;
       join -j1 -t' ' \
           <(find . -type f | xargs md5sum --binary | cut -d' ' -f1 | sort | 
uniq --repeated) \
           <(find . -type f | xargs md5sum --binary | sort) \
       | awk 'last != $1 {
               printf ": "
               system( "echo " size * (count - 1) " | numfmt --to=iec" );
   
               cmd="stat -c %s " substr($2, 2);
               cmd | getline size;
               last=$1;
               count=0;
           }
           
           {
               count++;
               printf "%s ", substr($2, 2);
           }
           
           END {
               printf "total duplicated size: "
               system( "echo " size * (count - 1) " | numfmt --to=iec" );
           }
           '
   ) | sort -t: -h -k2
   ```
   Gives a total of 190M duplicated, most of which (182.9M) is between 
`/usr/local/heron/lib/` and `/usr/local/heron/dist/heron-core/lib/`:
   ```
   : 0
   ./heron/conf/examples/roundrobin_packing.yaml ./heron/conf/mesos/client.yaml 
: 800
   ./heron/conf/local/client.yaml ./heron/conf/sandbox/client.yaml : 984
   ./heron/conf/aurora/client.yaml ./heron/conf/marathon/client.yaml : 1.1K
   ./heron/conf/kubernetes/uploader.yaml ./heron/conf/nomad/uploader.yaml : 1.2K
   ./heron/conf/slurm/statemgr.yaml ./heron/conf/yarn/statemgr.yaml : 1.2K
   ./heron/include/spout/irich-spout.h ./heron/include/topology/irich-spout.h : 
1.3K
   ./heron/include/spout/base-rich-spout.h 
./heron/include/topology/base-rich-spout.h : 1.5K
   ./heron/bin/heron-apiserver ./heron/bin/heron-apiserver.sh : 1.6K
   ./heron/conf/kubernetes/stateful.yaml ./heron/conf/nomad/stateful.yaml : 1.6K
   ./heron/conf/localzk/client.yaml ./heron/conf/slurm/client.yaml 
./heron/conf/yarn/client.yaml : 1.8K
   ./heron/include/spout/ispout-output-collector.h 
./heron/include/topology/ispout-output-collector.h : 2.5K
   ./heron/conf/local/scheduler.yaml ./heron/conf/localzk/scheduler.yaml 
./heron/conf/sandbox/scheduler.yaml : 2.7K
   ./heron/bin/heron-downloader-config ./heron/bin/heron-downloader-config.sh 
./heron/dist/heron-core/bin/heron-downloader-config 
./heron/dist/heron-core/bin/heron-downloader-config.sh : 2.8K
   ./heron/conf/local/stateful.yaml ./heron/conf/localzk/stateful.yaml 
./heron/conf/sandbox/stateful.yaml : 3.3K
   ./heron/bin/heron-downloader ./heron/bin/heron-downloader.sh 
./heron/dist/heron-core/bin/heron-downloader 
./heron/dist/heron-core/bin/heron-downloader.sh : 3.6K
   ./heron/conf/local/statemgr.yaml ./heron/conf/marathon/statemgr.yaml 
./heron/conf/mesos/statemgr.yaml ./heron/conf/sandbox/statemgr.yaml : 4.0K
   ./heron/conf/local/healthmgr.yaml ./heron/conf/sandbox/healthmgr.yaml 
./heron/conf/yarn/healthmgr.yaml : 4.4K
   ./heron/conf/local/uploader.yaml ./heron/conf/localzk/uploader.yaml 
./heron/conf/mesos/uploader.yaml ./heron/conf/sandbox/uploader.yaml 
./heron/conf/slurm/uploader.yaml : 4.7K
   ./heron/conf/marathon/stateful.yaml ./heron/conf/mesos/stateful.yaml 
./heron/conf/slurm/stateful.yaml ./heron/conf/yarn/stateful.yaml : 4.9K
   ./heron/include/spout/ispout.h ./heron/include/topology/ispout.h : 5.6K
   ./heron/conf/aurora/downloader.yaml ./heron/conf/examples/downloader.yaml 
./heron/conf/kubernetes/downloader.yaml ./heron/conf/local/downloader.yaml 
./heron/conf/localzk/downloader.yaml ./heron/conf/nomad/downloader.yaml 
./heron/conf/sandbox/downloader.yaml ./heron/conf/yarn/downloader.yaml total 
duplicated size: 7.7K
   ./heron/conf/aurora/packing.yaml ./heron/conf/kubernetes/packing.yaml 
./heron/conf/local/packing.yaml ./heron/conf/localzk/packing.yaml 
./heron/conf/marathon/packing.yaml ./heron/conf/mesos/packing.yaml 
./heron/conf/nomad/packing.yaml ./heron/conf/sandbox/packing.yaml 
./heron/conf/slurm/packing.yaml ./heron/conf/yarn/packing.yaml : 9.1K
   ./heron/conf/local/heron_internals.yaml 
./heron/conf/sandbox/heron_internals.yaml : 13K
   ./heron/conf/aurora/heron_internals.yaml 
./heron/conf/examples/heron_internals.yaml 
./heron/conf/localzk/heron_internals.yaml 
./heron/conf/marathon/heron_internals.yaml 
./heron/conf/mesos/heron_internals.yaml ./heron/conf/slurm/heron_internals.yaml 
./heron/conf/yarn/heron_internals.yaml : 74K
   ./heron/conf/aurora/metrics_sinks.yaml ./heron/conf/local/metrics_sinks.yaml 
./heron/conf/localzk/metrics_sinks.yaml 
./heron/conf/marathon/metrics_sinks.yaml ./heron/conf/mesos/metrics_sinks.yaml 
./heron/conf/sandbox/metrics_sinks.yaml ./heron/conf/slurm/metrics_sinks.yaml 
./heron/conf/yarn/metrics_sinks.yaml : 76K
   ./heron/dist/heron-core/lib/statemgr/heron-localfs-statemgr.jar 
./heron/lib/statemgr/heron-localfs-statemgr.jar : 5.7M
   ./heron/dist/heron-core/lib/scheduler/heron-scheduler.jar 
./heron/lib/scheduler/heron-scheduler.jar : 7.8M
   ./heron/dist/heron-core/lib/scheduler/heron-local-scheduler.jar 
./heron/lib/scheduler/heron-local-scheduler.jar : 8.0M
   ./heron/dist/heron-core/lib/scheduler/heron-marathon-scheduler.jar 
./heron/lib/scheduler/heron-marathon-scheduler.jar : 8.0M
   ./heron/dist/heron-core/lib/scheduler/heron-slurm-scheduler.jar 
./heron/lib/scheduler/heron-slurm-scheduler.jar : 8.0M
   ./heron/dist/heron-core/lib/statemgr/heron-zookeeper-statemgr.jar 
./heron/lib/statemgr/heron-zookeeper-statemgr.jar : 11M
   ./heron/dist/heron-core/lib/packing/heron-binpacking-packing.jar 
./heron/lib/packing/heron-binpacking-packing.jar 
./heron/lib/scheduler/heron-binpacking-packing.jar : 12M
   ./heron/dist/heron-core/lib/packing/heron-roundrobin-packing.jar 
./heron/lib/packing/heron-roundrobin-packing.jar 
./heron/lib/scheduler/heron-roundrobin-packing.jar : 12M
   ./heron/dist/heron-core/lib/metricscachemgr/heron-metricscachemgr.jar 
./heron/lib/metricscachemgr/heron-metricscachemgr.jar : 13M
   ./heron/dist/heron-core/lib/scheduler/heron-mesos-scheduler.jar 
./heron/lib/scheduler/heron-mesos-scheduler.jar : 14M
   ./heron/dist/heron-core/lib/scheduler/heron-nomad-scheduler.jar 
./heron/lib/scheduler/heron-nomad-scheduler.jar : 15M
   ./heron/dist/heron-core/lib/scheduler/heron-kubernetes-scheduler.jar 
./heron/lib/scheduler/heron-kubernetes-scheduler.jar : 36M
   ./heron/dist/heron-core/lib/downloaders/heron-downloader.jar 
./heron/lib/downloaders/heron-downloader.jar : 45M
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to