Code0x58 opened a new issue #3670:
URL: https://github.com/apache/incubator-heron/issues/3670
The current install is ~816M as seen in the docker images, with ~20%
duplicate files. It may be worth cleaning up the install (preferably by
reworking the install script) if anyone wants to reduce container sizes:
```shell
(
cd /usr/local;
join -j1 -t' ' \
<(find . -type f | xargs md5sum --binary | cut -d' ' -f1 | sort |
uniq --repeated) \
<(find . -type f | xargs md5sum --binary | sort) \
| awk 'last != $1 {
printf ": "
system( "echo " size * (count - 1) " | numfmt --to=iec" );
cmd="stat -c %s " substr($2, 2);
cmd | getline size;
last=$1;
count=0;
}
{
count++;
printf "%s ", substr($2, 2);
}
END {
printf "total duplicated size: "
system( "echo " size * (count - 1) " | numfmt --to=iec" );
}
'
) | sort -t: -h -k2
```
Gives a total of 190M duplicated, most of which (182.9M) is between
`/usr/local/heron/lib/` and `/usr/local/heron/dist/heron-core/lib/`:
```
: 0
./heron/conf/examples/roundrobin_packing.yaml ./heron/conf/mesos/client.yaml
: 800
./heron/conf/local/client.yaml ./heron/conf/sandbox/client.yaml : 984
./heron/conf/aurora/client.yaml ./heron/conf/marathon/client.yaml : 1.1K
./heron/conf/kubernetes/uploader.yaml ./heron/conf/nomad/uploader.yaml : 1.2K
./heron/conf/slurm/statemgr.yaml ./heron/conf/yarn/statemgr.yaml : 1.2K
./heron/include/spout/irich-spout.h ./heron/include/topology/irich-spout.h :
1.3K
./heron/include/spout/base-rich-spout.h
./heron/include/topology/base-rich-spout.h : 1.5K
./heron/bin/heron-apiserver ./heron/bin/heron-apiserver.sh : 1.6K
./heron/conf/kubernetes/stateful.yaml ./heron/conf/nomad/stateful.yaml : 1.6K
./heron/conf/localzk/client.yaml ./heron/conf/slurm/client.yaml
./heron/conf/yarn/client.yaml : 1.8K
./heron/include/spout/ispout-output-collector.h
./heron/include/topology/ispout-output-collector.h : 2.5K
./heron/conf/local/scheduler.yaml ./heron/conf/localzk/scheduler.yaml
./heron/conf/sandbox/scheduler.yaml : 2.7K
./heron/bin/heron-downloader-config ./heron/bin/heron-downloader-config.sh
./heron/dist/heron-core/bin/heron-downloader-config
./heron/dist/heron-core/bin/heron-downloader-config.sh : 2.8K
./heron/conf/local/stateful.yaml ./heron/conf/localzk/stateful.yaml
./heron/conf/sandbox/stateful.yaml : 3.3K
./heron/bin/heron-downloader ./heron/bin/heron-downloader.sh
./heron/dist/heron-core/bin/heron-downloader
./heron/dist/heron-core/bin/heron-downloader.sh : 3.6K
./heron/conf/local/statemgr.yaml ./heron/conf/marathon/statemgr.yaml
./heron/conf/mesos/statemgr.yaml ./heron/conf/sandbox/statemgr.yaml : 4.0K
./heron/conf/local/healthmgr.yaml ./heron/conf/sandbox/healthmgr.yaml
./heron/conf/yarn/healthmgr.yaml : 4.4K
./heron/conf/local/uploader.yaml ./heron/conf/localzk/uploader.yaml
./heron/conf/mesos/uploader.yaml ./heron/conf/sandbox/uploader.yaml
./heron/conf/slurm/uploader.yaml : 4.7K
./heron/conf/marathon/stateful.yaml ./heron/conf/mesos/stateful.yaml
./heron/conf/slurm/stateful.yaml ./heron/conf/yarn/stateful.yaml : 4.9K
./heron/include/spout/ispout.h ./heron/include/topology/ispout.h : 5.6K
./heron/conf/aurora/downloader.yaml ./heron/conf/examples/downloader.yaml
./heron/conf/kubernetes/downloader.yaml ./heron/conf/local/downloader.yaml
./heron/conf/localzk/downloader.yaml ./heron/conf/nomad/downloader.yaml
./heron/conf/sandbox/downloader.yaml ./heron/conf/yarn/downloader.yaml total
duplicated size: 7.7K
./heron/conf/aurora/packing.yaml ./heron/conf/kubernetes/packing.yaml
./heron/conf/local/packing.yaml ./heron/conf/localzk/packing.yaml
./heron/conf/marathon/packing.yaml ./heron/conf/mesos/packing.yaml
./heron/conf/nomad/packing.yaml ./heron/conf/sandbox/packing.yaml
./heron/conf/slurm/packing.yaml ./heron/conf/yarn/packing.yaml : 9.1K
./heron/conf/local/heron_internals.yaml
./heron/conf/sandbox/heron_internals.yaml : 13K
./heron/conf/aurora/heron_internals.yaml
./heron/conf/examples/heron_internals.yaml
./heron/conf/localzk/heron_internals.yaml
./heron/conf/marathon/heron_internals.yaml
./heron/conf/mesos/heron_internals.yaml ./heron/conf/slurm/heron_internals.yaml
./heron/conf/yarn/heron_internals.yaml : 74K
./heron/conf/aurora/metrics_sinks.yaml ./heron/conf/local/metrics_sinks.yaml
./heron/conf/localzk/metrics_sinks.yaml
./heron/conf/marathon/metrics_sinks.yaml ./heron/conf/mesos/metrics_sinks.yaml
./heron/conf/sandbox/metrics_sinks.yaml ./heron/conf/slurm/metrics_sinks.yaml
./heron/conf/yarn/metrics_sinks.yaml : 76K
./heron/dist/heron-core/lib/statemgr/heron-localfs-statemgr.jar
./heron/lib/statemgr/heron-localfs-statemgr.jar : 5.7M
./heron/dist/heron-core/lib/scheduler/heron-scheduler.jar
./heron/lib/scheduler/heron-scheduler.jar : 7.8M
./heron/dist/heron-core/lib/scheduler/heron-local-scheduler.jar
./heron/lib/scheduler/heron-local-scheduler.jar : 8.0M
./heron/dist/heron-core/lib/scheduler/heron-marathon-scheduler.jar
./heron/lib/scheduler/heron-marathon-scheduler.jar : 8.0M
./heron/dist/heron-core/lib/scheduler/heron-slurm-scheduler.jar
./heron/lib/scheduler/heron-slurm-scheduler.jar : 8.0M
./heron/dist/heron-core/lib/statemgr/heron-zookeeper-statemgr.jar
./heron/lib/statemgr/heron-zookeeper-statemgr.jar : 11M
./heron/dist/heron-core/lib/packing/heron-binpacking-packing.jar
./heron/lib/packing/heron-binpacking-packing.jar
./heron/lib/scheduler/heron-binpacking-packing.jar : 12M
./heron/dist/heron-core/lib/packing/heron-roundrobin-packing.jar
./heron/lib/packing/heron-roundrobin-packing.jar
./heron/lib/scheduler/heron-roundrobin-packing.jar : 12M
./heron/dist/heron-core/lib/metricscachemgr/heron-metricscachemgr.jar
./heron/lib/metricscachemgr/heron-metricscachemgr.jar : 13M
./heron/dist/heron-core/lib/scheduler/heron-mesos-scheduler.jar
./heron/lib/scheduler/heron-mesos-scheduler.jar : 14M
./heron/dist/heron-core/lib/scheduler/heron-nomad-scheduler.jar
./heron/lib/scheduler/heron-nomad-scheduler.jar : 15M
./heron/dist/heron-core/lib/scheduler/heron-kubernetes-scheduler.jar
./heron/lib/scheduler/heron-kubernetes-scheduler.jar : 36M
./heron/dist/heron-core/lib/downloaders/heron-downloader.jar
./heron/lib/downloaders/heron-downloader.jar : 45M
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]