Repository: chukwa Updated Branches: refs/heads/master 971641927 -> 337a0a974
Chukwa JIRA-819 Project: http://git-wip-us.apache.org/repos/asf/chukwa/repo Commit: http://git-wip-us.apache.org/repos/asf/chukwa/commit/d1b7ef94 Tree: http://git-wip-us.apache.org/repos/asf/chukwa/tree/d1b7ef94 Diff: http://git-wip-us.apache.org/repos/asf/chukwa/diff/d1b7ef94 Branch: refs/heads/master Commit: d1b7ef949c7db832ae3476310a2eeab2b5103b22 Parents: 9716419 Author: faywang <[email protected]> Authored: Mon May 15 10:41:23 2017 -0700 Committer: faywang <[email protected]> Committed: Mon May 15 10:41:23 2017 -0700 ---------------------------------------------------------------------- contrib/docker/Dockerfile | 67 +++++- contrib/docker/Makefile.config | 118 +++++++++++ contrib/docker/MetricsCollector.java | 332 ++++++++++++++++++++++++++++++ contrib/docker/caffe-testdata.tar.gz | Bin 0 -> 5946368 bytes contrib/docker/config-caffe.sh | 59 ++++++ contrib/docker/makeImage.sh | 6 + contrib/docker/setup-image.sh | 4 +- contrib/docker/start-all.sh | 3 +- contrib/docker/tera.sh | 9 + contrib/docker/test_solver.prototxt | 24 +++ contrib/docker/train.sh | 32 +++ contrib/docker/train_test.prototxt | 178 ++++++++++++++++ 12 files changed, 821 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/chukwa/blob/d1b7ef94/contrib/docker/Dockerfile ---------------------------------------------------------------------- diff --git a/contrib/docker/Dockerfile b/contrib/docker/Dockerfile index 0afad77..42b3718 100644 --- a/contrib/docker/Dockerfile +++ b/contrib/docker/Dockerfile @@ -12,19 +12,67 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -FROM centos:6 +FROM centos:7 MAINTAINER Apache -RUN yum install -y tar wget bind-utils ntpd java-1.7.0-openjdk which openssh-server openssh-clients lsof +ENV container docker + +RUN yum -y update && yum clean all + +RUN (cd /lib/systemd/system/sysinit.target.wants/; for i in *; do [ $i == systemd-tmpfiles-setup.service ] || rm -f $i; done); \ +rm -f /lib/systemd/system/multi-user.target.wants/*;\ +rm -f /etc/systemd/system/*.wants/*;\ +rm -f /lib/systemd/system/local-fs.target.wants/*; \ +rm -f /lib/systemd/system/sockets.target.wants/*udev*; \ +rm -f /lib/systemd/system/sockets.target.wants/*initctl*; \ +rm -f /lib/systemd/system/basic.target.wants/*;\ +rm -f /lib/systemd/system/anaconda.target.wants/*; \ +rm -f /run/nologin + + +RUN yum install -y net-tools tar wget bind-utils ntpd java-1.8.0-openjdk which openssh-server openssh-clients lsof +RUN yum -y install epel-release java-1.8.0-openjdk-devel.x86_64 +RUN yum groupinstall -y 'Development Tools' +RUN yum install -y protobuf-devel leveldb-devel snappy-devel opencv-devel boost-devel hdf5-devel +RUN yum install -y gflags-devel glog-devel lmdb-devel +RUN yum install -y gcc gcc-c++ numpy scipy cmake git python-devel +RUN yum install -y openblas openblas-devel atlas-devel + RUN mkdir -p /opt/apache +RUN wget https://archive.apache.org/dist/spark/spark-1.6.0/spark-1.6.0-bin-hadoop2.6.tgz +RUN tar xf spark-1.6.0-bin-hadoop2.6.tgz -C /opt/apache +RUN ln -s /opt/apache/spark-* /opt/apache/spark + +ADD Makefile.config /tmp/Makefile.config +ADD config-caffe.sh /tmp/config-caffe.sh +RUN mkdir -p /caffe-test/train +RUN mkdir -p /caffe-test/train/data +RUN mkdir -p /caffe-test/chukwa +RUN mkdir -p /caffe-test/tera + +ADD tera.sh /caffe-test/tera/tera.sh +ADD MetricsCollector.java /caffe-test/chukwa/MetricsCollector.java +ADD caffe-testdata.tar.gz /caffe-test/train/data/ + +ADD makeImage.sh /caffe-test/train/makeImage.sh +ADD test_solver.prototxt /caffe-test/train/test_solver.prototxt +ADD train_test.prototxt /caffe-test/train/train_test.prototxt +ADD train.sh /caffe-test/train/train.sh +RUN wget https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/google-glog/glog-0.3.3.tar.gz +RUN wget https://github.com/schuhschuh/gflags/archive/master.zip +RUN git clone https://github.com/LMDB/lmdb +RUN wget http://www-eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz +RUN git clone https://github.com/yahoo/CaffeOnSpark.git --recursive +RUN bash /tmp/config-caffe.sh + RUN wget https://www.apache.org/dist/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz RUN wget https://www.apache.org/dist/hadoop/common/hadoop-2.7.2/hadoop-2.7.2.tar.gz -RUN wget https://www.apache.org/dist/hbase/1.2.4/hbase-1.2.4-bin.tar.gz -RUN wget https://www.apache.org/dist/lucene/solr/5.5.3/solr-5.5.3.tgz +RUN wget https://www.apache.org/dist/hbase/1.2.5/hbase-1.2.5-bin.tar.gz +RUN wget https://www.apache.org/dist/lucene/solr/5.5.4/solr-5.5.4.tgz ADD chukwa-0.8.0.tar.gz /opt/apache/ RUN tar xf zookeeper-3.4.6.tar.gz -C /opt/apache RUN tar xf hadoop-2.7.2.tar.gz -C /opt/apache -RUN tar xf hbase-1.2.4-bin.tar.gz -C /opt/apache -RUN tar xf solr-5.5.3.tgz -C /opt/apache +RUN tar xf hbase-1.2.5-bin.tar.gz -C /opt/apache +RUN tar xf solr-5.5.4.tgz -C /opt/apache RUN rm -f zookeeper-*.tar.gz hadoop-*.tar.gz hbase-*.tar.gz solr-*.tgz RUN ln -s /opt/apache/zookeeper-* /opt/apache/zookeeper RUN ln -s /opt/apache/hadoop-* /opt/apache/hadoop @@ -39,7 +87,8 @@ ADD hadoop/* /opt/apache/hadoop/etc/hadoop/ ADD hbase/* /opt/apache/hbase/conf/ ADD start-all.sh /etc/start-all.sh ADD setup-image.sh / -RUN bash setup-image.sh -RUN rm -f /setup-image.sh + EXPOSE 4080 50070 8088 16010 7574 -CMD ["/etc/start-all.sh"] + +CMD [ "/usr/lib/systemd/systemd" ] + http://git-wip-us.apache.org/repos/asf/chukwa/blob/d1b7ef94/contrib/docker/Makefile.config ---------------------------------------------------------------------- diff --git a/contrib/docker/Makefile.config b/contrib/docker/Makefile.config new file mode 100644 index 0000000..9a1fd19 --- /dev/null +++ b/contrib/docker/Makefile.config @@ -0,0 +1,118 @@ +## Refer to http://caffe.berkeleyvision.org/installation.html +# Contributions simplifying and improving our build system are welcome! + +# cuDNN acceleration switch (uncomment to build with cuDNN). +# USE_CUDNN := 1 + +# CPU-only switch (uncomment to build without GPU support). +CPU_ONLY := 1 + +# uncomment to disable IO dependencies and corresponding data layers +# USE_OPENCV := 0 +# USE_LEVELDB := 0 +# USE_LMDB := 0 + +# uncomment to allow MDB_NOLOCK when reading LMDB files (only if necessary) +# You should not set this flag if you will be reading LMDBs with any +# possibility of simultaneous read and write +# ALLOW_LMDB_NOLOCK := 1 + +# Uncomment if you're using OpenCV 3 +# OPENCV_VERSION := 3 + +# To customize your choice of compiler, uncomment and set the following. +# N.B. the default for Linux is g++ and the default for OSX is clang++ +# CUSTOM_CXX := g++ + +# CUDA directory contains bin/ and lib/ directories that we need. +CUDA_DIR := /usr/local/cuda +# On Ubuntu 14.04, if cuda tools are installed via +# "sudo apt-get install nvidia-cuda-toolkit" then use this instead: +# CUDA_DIR := /usr + +# CUDA architecture setting: going with all of them. +# For CUDA < 6.0, comment the *_50 lines for compatibility. +CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \ + -gencode arch=compute_20,code=sm_21 \ + -gencode arch=compute_30,code=sm_30 \ + -gencode arch=compute_35,code=sm_35 \ + -gencode arch=compute_50,code=sm_50 \ + -gencode arch=compute_50,code=compute_50 + +# BLAS choice: +# atlas for ATLAS (default) +# mkl for MKL +# open for OpenBlas +BLAS := open +# Custom (MKL/ATLAS/OpenBLAS) include and lib directories. +# Leave commented to accept the defaults for your choice of BLAS +# (which should work)! + BLAS_INCLUDE := /usr/include/openblas +# BLAS_LIB := /path/to/your/blas + +# Homebrew puts openblas in a directory that is not on the standard search path +# BLAS_INCLUDE := $(shell brew --prefix openblas)/include +# BLAS_LIB := $(shell brew --prefix openblas)/lib + +# This is required only if you will compile the matlab interface. +# MATLAB directory should contain the mex binary in /bin. +# MATLAB_DIR := /usr/local +# MATLAB_DIR := /Applications/MATLAB_R2012b.app + +# NOTE: this is required only if you will compile the python interface. +# We need to be able to find Python.h and numpy/arrayobject.h. +PYTHON_INCLUDE := /usr/include/python2.7 \ + /usr/lib64/python2.7/site-packages/numpy/core/include + +# Anaconda Python distribution is quite popular. Include path: +# Verify anaconda location, sometimes it's in root. +# ANACONDA_HOME := $(HOME)/anaconda +# PYTHON_INCLUDE := $(ANACONDA_HOME)/include \ + # $(ANACONDA_HOME)/include/python2.7 \ + # $(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include \ + +# Uncomment to use Python 3 (default is Python 2) +# PYTHON_LIBRARIES := boost_python3 python3.5m +# PYTHON_INCLUDE := /usr/include/python3.5m \ +# /usr/lib/python3.5/dist-packages/numpy/core/include + +# We need to be able to find libpythonX.X.so or .dylib. +PYTHON_LIB := /usr/lib +# PYTHON_LIB := $(ANACONDA_HOME)/lib + +# Homebrew installs numpy in a non standard path (keg only) +# PYTHON_INCLUDE += $(dir $(shell python -c 'import numpy.core; print(numpy.core.__file__)'))/include +# PYTHON_LIB += $(shell brew --prefix numpy)/lib + +# Uncomment to support layers written in Python (will link against Python libs) +# WITH_PYTHON_LAYER := 1 + +# Whatever else you find you need goes here. +INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include +LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib + +# If Homebrew is installed at a non standard location (for example your home directory) and you use it for general dependencies +# INCLUDE_DIRS += $(shell brew --prefix)/include +# LIBRARY_DIRS += $(shell brew --prefix)/lib + +# Uncomment to use `pkg-config` to specify OpenCV library paths. +# (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.) +# USE_PKG_CONFIG := 1 + +BUILD_DIR := build +DISTRIBUTE_DIR := distribute + +# Uncomment for debugging. Does not work on OSX due to https://github.com/BVLC/caffe/issues/171 +# DEBUG := 1 + +# The ID of the GPU that 'make runtest' will use to run unit tests. +TEST_GPUID := 0 + +# enable pretty build (comment to see full commands) +Q ?= @ + +#INCLUDE_DIRS += /usr/jdk64/java-1.8.0-openjdk-1.8.0.77-0.b03.el7_2.x86_64/include + +INCLUDE_DIRS += /usr/lib/jvm/java-1.7.0-openjdk-1.7.0.131-2.6.9.0.el7_3.x86_64/include + + http://git-wip-us.apache.org/repos/asf/chukwa/blob/d1b7ef94/contrib/docker/MetricsCollector.java ---------------------------------------------------------------------- diff --git a/contrib/docker/MetricsCollector.java b/contrib/docker/MetricsCollector.java new file mode 100644 index 0000000..84f7f40 --- /dev/null +++ b/contrib/docker/MetricsCollector.java @@ -0,0 +1,332 @@ +package chukwa; + +import java.awt.BasicStroke; +import java.awt.Color; +import java.awt.Graphics2D; +import java.awt.image.BufferedImage; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.Timer; +import java.util.TimerTask; +import java.util.Calendar; +import java.util.*; + +import javax.imageio.ImageIO; + +import org.json.simple.JSONObject; +import org.apache.hadoop.chukwa.datastore.ChukwaHBaseStore; +import org.apache.hadoop.chukwa.hicc.bean.Series; + +// http://10.177.68.181:8080/job/Reinstall%20Cluster/api/json +// curl -X GET -u root:password -i http://10.177.68.181:8080/job/Create%20Cluster/147/parameters/ + +// hbase(main):011:0> get 'chukwa_meta', "HBaseMetrics" + + +//yum -y install java-1.8.0-openjdk-devel.x86_64 + +//export CLASSPATH=/opt/apache/hadoop/etc/hadoop:/opt/apache/hbase/conf:/opt/apache/chukwa-0.8.0/share/chukwa/*:/opt/apache/chukwa-0.8.0/share/chukwa/lib/*:$CLASSPATH + +//export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/apache/hadoop/bin:/opt/apache/hbase/bin +//su hdfs -c "hadoop dfs -mkdir -p /user/hdfs" +//while : +//do +// su hdfs -c "hadoop jar /opt/apache/hadoop-2.7.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.2.jar teragen 100 /user/hdfs/terasort-input" +// su hdfs -c "hadoop jar /opt/apache/hadoop-2.7.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.2.jar terasort /user/hdfs/terasort-input /user/hdfs/terasort-output" +// su hdfs -c "hadoop dfs -rmr -skipTrash /user/hdfs/terasort-input/" +// su hdfs -c "hadoop dfs -rmr -skipTrash /user/hdfs/terasort-output/" +//done + +public class MetricsCollector +{ + final static String[] system_metrics = { + "SystemMetrics.LoadAverage.1", + "SystemMetrics.cpu.combined", + "SystemMetrics.cpu.sys", + "SystemMetrics.cpu.user", + "SystemMetrics.memory.FreePercent", + "SystemMetrics.memory.UsedPercent", + "SystemMetrics.disk.ReadBytes", + "SystemMetrics.disk.WriteBytes", + "SystemMetrics.network.TxBytes", + "SystemMetrics.network.RxBytes", + "SystemMetrics.swap.Total", + "SystemMetrics.swap.Used", + "SystemMetrics.swap.Free" + }; + + final static String [] hadoop_metrics = { + "HadoopMetrics.jvm.JvmMetrics.MemHeapUsedM", + //"HadoopMetrics.jvm.JvmMetrics.MemHeapMaxM", + //"HadoopMetrics.dfs.FSNamesystem.CapacityRemainingGB", + //"HadoopMetrics.dfs.FSNamesystem.CapacityTotalGB", + //"HadoopMetrics.yarn.ClusterMetrics.NumActiveNMs", + //"HadoopMetrics.yarn.ClusterMetrics.NumLostNMs", + //"HadoopMetrics.dfs.FSNamesystem.HAState" , + //"HadoopMetrics.dfs.FSNamesystem.TotalLoad", + //"HadoopMetrics.rpc.rpc.RpcProcessingTimeAvgTime", + //"HadoopMetrics.dfs.FSNamesystem.StaleDataNodes" + }; + //final static String [] hadoop_processes = {"NameNode", "DataNode", "NodeManager", "ResourceManager"}; + final static String [] hadoop_processes = {"NodeManager"}; + + final static String [] hbase_metrics = { + "HBaseMetrics.jvm.JvmMetrics.MemHeapUsedM", + //"HBaseMetrics.jvm.JvmMetrics.MemHeapMaxM" + }; + final static String [] hbase_processes = {"Master", "RegionServer"}; + + private Timer getMetricSnapshotTimer = null; + private String hostname = null; + private int intervalInMin; + private long intervalInMilli; + + MetricsCollector (String hostname, int intervalInMin) + { + this.hostname = hostname; + this.intervalInMin = intervalInMin; + this.intervalInMilli = intervalInMin * 60 * 1000; + } + + public void startGetMetricSnapshotTimer (MetricsCollector tester) + { + getMetricSnapshotTimer = new Timer ("GetMetricSnapshot", true); + getMetricSnapshotTimer.schedule (new GetMetricSnapshotTimerTask (tester), 0, intervalInMilli); + } + + public void cancelFetMetricSnapshotTimer () + { + if (getMetricSnapshotTimer!= null) + getMetricSnapshotTimer.cancel (); + } + + class GetMetricSnapshotTimerTask extends TimerTask + { + MetricsCollector tester = null; + String hostname = null; + BufferedWriter bufferedWriter = null; + + GetMetricSnapshotTimerTask (MetricsCollector tester) + { + this.tester = tester; + hostname = tester.hostname; + String outputFileName = "labels.txt"; + + try { + FileWriter fileWriter = new FileWriter(outputFileName); + bufferedWriter = new BufferedWriter(fileWriter); + } catch (IOException e) { + e.printStackTrace (); + } + + } + + public void run () + { + // one hour + TimeZone tz = TimeZone.getTimeZone("UTC"); + Calendar now = Calendar.getInstance(tz); + long currTime=now.getTimeInMillis(); + + System.out.println ("currTime in UTC: " + currTime); + System.out.println ("currTime in current time zone" + System.currentTimeMillis ()); + + long startTime = currTime - intervalInMilli; + long endTime = currTime; + try { + System.out.println ("About to run"); + //tester.getSystemMetrics (hostname, startTime, endTime); + //tester.getHbaseMetrics (hostname, startTime, endTime); + tester.getHadoopMetrics (hostname, startTime, endTime, bufferedWriter); + System.out.println ("Done run"); + } catch (Exception e) { + e.printStackTrace (); + } + } + } + + + private void getHadoopMetrics (String hostname, long startTime, long endTime, BufferedWriter bufferedWriter) throws Exception + { + + for (int j = 0; j < hadoop_metrics.length; j++) { + System.out.println ("--------------------------------"); + System.out.println ("metrics: " + hadoop_metrics [j]); + for (int i = 0; i < hadoop_processes.length; i++) { + String source = hostname + ":" + hadoop_processes [i]; + System.out.println ("source: " + source); + System.out.println ("startTime: " + startTime); + System.out.println ("endTime: " + endTime); + Series series = ChukwaHBaseStore.getSeries (hadoop_metrics [j], source, startTime, endTime); + String value = series.toString (); + System.out.println ("value: " + value); + + JSONObject jsonObj = (JSONObject) series.toJSONObject (); + Set set = jsonObj.keySet (); + Iterator iter = set.iterator (); + List list = (List) jsonObj.get ("data"); + if (list == null) + continue; + int size = list.size (); + System.out.println ("size: " + size); + + if (size > 0 ) { + String name = hadoop_metrics [j] + "_" + hadoop_processes [i] + "_" + hostname; + drawImage (list, name, startTime, endTime, bufferedWriter); + } + } + } + } + + private void getHbaseMetrics (String hostname, long startTime, long endTime, BufferedWriter bufferedWriter) throws Exception + { + for (int j = 0; j < hbase_metrics.length; j++) { + System.out.println ("--------------------------------"); + System.out.println ("metrics: " + hbase_metrics [j]); + for (int i = 0; i < hbase_processes.length; i++) { + String source = hostname + ":" + hbase_processes [i]; + System.out.println ("source: " + source); + System.out.println ("startTime: " + startTime); + System.out.println ("endTime: " + endTime); + Series series = ChukwaHBaseStore.getSeries (hbase_metrics [j], source, startTime, endTime); + String value = series.toString (); + System.out.println ("value: " + value); + + JSONObject jsonObj = (JSONObject) series.toJSONObject (); + Set set = jsonObj.keySet (); + Iterator iter = set.iterator (); + List list = (List) jsonObj.get ("data"); + if (list == null) + continue; + int size = list.size (); + System.out.println ("size: " + size); + + if (size > 0 ) { + String name = hbase_metrics [j] + "_" + hbase_processes [i] + "_" + hostname; + drawImage (list, name, startTime, endTime, bufferedWriter); + } + } + } + } + + private void getSystemMetrics (String source, long startTime, long endTime, BufferedWriter bufferedWriter) throws Exception + { + for (int j = 0; j < system_metrics.length; j++) { + System.out.println ("--------------------------------"); + + System.out.println ("metrics: " + system_metrics [j]); + System.out.println ("source: " + source); + System.out.println ("startTime: " + startTime); + System.out.println ("endTime: " + endTime); + Series series = ChukwaHBaseStore.getSeries (system_metrics [j], source, startTime, endTime); + String value = series.toString (); + System.out.println ("value: " + value); + + JSONObject jsonObj = (JSONObject) series.toJSONObject (); + Set set = jsonObj.keySet (); + Iterator iter = set.iterator (); + List list = (List) jsonObj.get ("data"); + if (list == null) + continue; + int size = list.size (); + System.out.println ("size: " + size); + + if (size > 0 ) { + String name = system_metrics [j] + "_" + hostname; + drawImage (list, name, startTime, endTime, bufferedWriter); + } + } + } + + public void drawImage (List list, String name, long startTime, long endTime, BufferedWriter bufferedWriter) throws Exception + { + String fileName = name + "_" + startTime; + PrintWriter writer = new PrintWriter(fileName + ".csv", "UTF-8"); + int size = list.size (); + long startTimeX = (Long) ((List) list.get (0)).get (0) / MyPoint.XSCALE; + long endTimeX = (Long) ((List) list.get (size - 1 )).get (0) / MyPoint.XSCALE; + int x_size = (int) (endTimeX - startTimeX); + int y_size = 1000; + System.out.println ("x_size: " + x_size); + BufferedImage img = new BufferedImage(x_size, y_size, BufferedImage.TYPE_INT_ARGB); + + Graphics2D ig2 = img.createGraphics(); + ig2.setBackground(Color.WHITE); + + ig2.setColor (Color.BLUE); + ig2.setStroke(new BasicStroke(2)); + + MyPoint prevPoint = null; + MyPoint currPoint = null; + for (int i = 0; i < size; i++) { + List point = (List) list.get (i); + long time = (Long) point.get (0); + double val = (Double) point.get (1); + currPoint = new MyPoint (time, val); + System.out.println ("time:" + time + ", value:" + val); + + if (prevPoint != null) { + int x1 = (int) (prevPoint.time - startTimeX); + int x2 = (int) (currPoint.time - startTimeX); + + int y1 = (int) (y_size - prevPoint.data); + int y2 = (int) (y_size - currPoint.data); + + System.out.println ("point 1: " + x1 + ", " + y1); + System.out.println ("point 2: " + x2 + ", " + y2); + + ig2.drawLine ((int) (prevPoint.time - startTimeX), (int) (y_size - prevPoint.data), (int) (currPoint.time - startTimeX), (int) (y_size - currPoint.data)); + } + prevPoint = currPoint; + writer.println(currPoint.time + "," + currPoint.data ); + + } + String imageFileName = fileName + ".png"; + File f = new File(imageFileName); + ImageIO.write(img, "PNG", f); + bufferedWriter.write (imageFileName + " 0\n"); + bufferedWriter.flush (); + writer.close(); + } + + + static class MyPoint + { + public long time; + public double data; + static int XSCALE = 1000; + static int YSCALE = 100; + + public MyPoint (long time, double data) { + this.time = time/XSCALE; + this.data = data; + } + } + + public static void main (String [] args) throws Exception { + String hostname = args [0]; + String interval = args [1]; // in min + int intervalInMin = Integer.parseInt (interval); + + MetricsCollector tester = new MetricsCollector (hostname, intervalInMin); + tester.startGetMetricSnapshotTimer (tester); + + try { + Object lock = new Object(); + synchronized (lock) { + while (true) { + lock.wait(); + } + } + } catch (InterruptedException ex) { + } + } + + +} http://git-wip-us.apache.org/repos/asf/chukwa/blob/d1b7ef94/contrib/docker/caffe-testdata.tar.gz ---------------------------------------------------------------------- diff --git a/contrib/docker/caffe-testdata.tar.gz b/contrib/docker/caffe-testdata.tar.gz new file mode 100644 index 0000000..3403719 Binary files /dev/null and b/contrib/docker/caffe-testdata.tar.gz differ http://git-wip-us.apache.org/repos/asf/chukwa/blob/d1b7ef94/contrib/docker/config-caffe.sh ---------------------------------------------------------------------- diff --git a/contrib/docker/config-caffe.sh b/contrib/docker/config-caffe.sh new file mode 100644 index 0000000..4a98ce7 --- /dev/null +++ b/contrib/docker/config-caffe.sh @@ -0,0 +1,59 @@ +cd / +tar zxvf glog-0.3.3.tar.gz +cd glog-0.3.3 +./configure +make && make install + +cd / +unzip master.zip +cd gflags-master +mkdir build && cd build +export CXXFLAGS="-fPIC" && cmake .. && make VERBOSE=1 +make && make install + +cd / +cd lmdb/libraries/liblmdb +make && make install + +cd /opt +mv /apache-maven-3.3.9-bin.tar.gz . +tar xzf apache-maven-3.3.9-bin.tar.gz +ln -s apache-maven-3.3.9 maven + +echo "export M2_HOME=/opt/maven" > /etc/profile.d/maven.sh +echo "PATH=/opt/maven/bin:${PATH}" >> /etc/profile.d/maven.sh +echo "export SPARK_HOME=/opt/apache/spark-2.1.0-bin-hadoop2.7" >> /etc/profile.d/maven.sh +echo "export HADOOP_HOME=/opt/apache/hadoop-2.7.2" >> /etc/profile.d/maven.sh + +source /etc/profile.d/maven.sh + +cp /tmp/Makefile.config /CaffeOnSpark/caffe-public/ + +#export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.121-0.b13.el7_3.x86_64 + +JAVA_HOME=$(ls /usr/lib/jvm | grep java-1.8.0-openjdk-1.8.0 | grep -v jre) +JAVA_HOME=/usr/lib/jvm/${JAVA_HOME} +cp $JAVA_HOME/include/linux/* $JAVA_HOME/include/ + +echo "INCLUDE_DIRS += $JAVA_HOME/include" >> /CaffeOnSpark/caffe-public/Makefile.config + +cd /CaffeOnSpark/caffe-public/ +make all + +export SPARK_HOME=/opt/apache/spark-1.6.0-bin-hadoop2.6 + +cd .. +make build + + +#export SPARK_HOME=/opt/apache/spark-1.6.0-bin-hadoop2.6 +#${SPARK_HOME}/sbin/start-master.sh +#export MASTER_URL=spark://1dafed1ac7bf:7077 +#export SPARK_WORKER_INSTANCES=1 +#export CORES_PER_WORKER=1 +#export TOTAL_CORES=$((${CORES_PER_WORKER}*${SPARK_WORKER_INSTANCES})) +#${SPARK_HOME}/sbin/start-slave.sh -c $CORES_PER_WORKER -m 3G ${MASTER_URL} + + + + http://git-wip-us.apache.org/repos/asf/chukwa/blob/d1b7ef94/contrib/docker/makeImage.sh ---------------------------------------------------------------------- diff --git a/contrib/docker/makeImage.sh b/contrib/docker/makeImage.sh new file mode 100644 index 0000000..0d31f15 --- /dev/null +++ b/contrib/docker/makeImage.sh @@ -0,0 +1,6 @@ +GLOG_logtostderr=1 /CaffeOnSpark/caffe-public/.build_release/tools/convert_imageset \ + --resize_height=200 --resize_width=200 --shuffle --encoded \ + /caffe-test/train/data/ \ + /caffe-test/train/data/labels.txt \ + /caffe-test/train/lmdb + http://git-wip-us.apache.org/repos/asf/chukwa/blob/d1b7ef94/contrib/docker/setup-image.sh ---------------------------------------------------------------------- diff --git a/contrib/docker/setup-image.sh b/contrib/docker/setup-image.sh index fcaffcc..1e90fca 100755 --- a/contrib/docker/setup-image.sh +++ b/contrib/docker/setup-image.sh @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +rm -f /run/nologin + # Initialize Users groupadd -g 123 hadoop groupadd -g 201 hdfs @@ -106,7 +108,7 @@ export JAVA_HOME=/usr/lib/jvm/jre export HADOOP_CONF_DIR=/opt/apache/hadoop/etc/hadoop export HBASE_CONF_DIR=/opt/apache/hbase/conf export CHUKWA_CONF_DIR=/opt/apache/chukwa/etc/chukwa -service sshd start +systemctl status sshd su - hdfs -c '/opt/apache/hadoop/bin/hadoop namenode -format' su - hdfs -c '/opt/apache/hadoop/sbin/start-all.sh' su - zookeeper -c '/opt/apache/zookeeper/bin/zkServer.sh start' http://git-wip-us.apache.org/repos/asf/chukwa/blob/d1b7ef94/contrib/docker/start-all.sh ---------------------------------------------------------------------- diff --git a/contrib/docker/start-all.sh b/contrib/docker/start-all.sh index d1a397f..bfb214a 100755 --- a/contrib/docker/start-all.sh +++ b/contrib/docker/start-all.sh @@ -14,12 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +rm -f /run/nologin export PATH=${PATH}:/opt/apache/hadoop/bin:/opt/apache/hbase/bin export JAVA_HOME=/usr/lib/jvm/jre export HADOOP_CONF_DIR=/opt/apache/hadoop/etc/hadoop export HBASE_CONF_DIR=/opt/apache/hbase/conf export CHUKWA_CONF_DIR=/opt/apache/chukwa/etc/chukwa -service sshd start +systemctl status sshd su - zookeeper -c '/opt/apache/zookeeper/bin/zkServer.sh start' su - solr -c 'cd /opt/apache/solr; ./bin/solr start -cloud -z localhost:2181' su - solr -c 'cd /opt/apache/solr; ./bin/solr create_collection -c chukwa -n chukwa' http://git-wip-us.apache.org/repos/asf/chukwa/blob/d1b7ef94/contrib/docker/tera.sh ---------------------------------------------------------------------- diff --git a/contrib/docker/tera.sh b/contrib/docker/tera.sh new file mode 100644 index 0000000..3d613ba --- /dev/null +++ b/contrib/docker/tera.sh @@ -0,0 +1,9 @@ +export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/apache/hadoop/bin:/opt/apache/hbase/bin +su hdfs -c "hadoop dfs -mkdir -p /user/hdfs" +while : +do + su hdfs -c "hadoop jar /opt/apache/hadoop-2.7.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.2.jar teragen 100 /user/hdfs/terasort-input" + su hdfs -c "hadoop jar /opt/apache/hadoop-2.7.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.2.jar terasort /user/hdfs/terasort-input /user/hdfs/terasort-output" + su hdfs -c "hadoop dfs -rmr -skipTrash /user/hdfs/terasort-input/" + su hdfs -c "hadoop dfs -rmr -skipTrash /user/hdfs/terasort-output/" +done http://git-wip-us.apache.org/repos/asf/chukwa/blob/d1b7ef94/contrib/docker/test_solver.prototxt ---------------------------------------------------------------------- diff --git a/contrib/docker/test_solver.prototxt b/contrib/docker/test_solver.prototxt new file mode 100644 index 0000000..dd75ae8 --- /dev/null +++ b/contrib/docker/test_solver.prototxt @@ -0,0 +1,24 @@ +# The train/test net protocol buffer definition +net: "/caffe-test/train/train_test.prototxt" +# test_iter specifies how many forward passes the test should carry out. +test_iter: 2 +# Carry out testing every 500 training iterations. +test_interval: 10 +# The base learning rate, momentum and the weight decay of the network. +base_lr: 0.01 +momentum: 0.9 +weight_decay: 0.0005 +# The learning rate policy +lr_policy: "inv" +gamma: 0.0001 +power: 0.75 +# Display every 100 iterations +display: 20 +# The maximum number of iterations +max_iter: 81 +# snapshot intermediate results +snapshot: 5000 +snapshot_prefix: "mnist_lenet" +# solver mode: CPU or GPU +solver_mode: CPU + http://git-wip-us.apache.org/repos/asf/chukwa/blob/d1b7ef94/contrib/docker/train.sh ---------------------------------------------------------------------- diff --git a/contrib/docker/train.sh b/contrib/docker/train.sh new file mode 100644 index 0000000..a9f3d3b --- /dev/null +++ b/contrib/docker/train.sh @@ -0,0 +1,32 @@ +export SPARK_HOME=/opt/apache/spark-1.6.0-bin-hadoop2.6 +${SPARK_HOME}/sbin/start-master.sh +export MASTER_URL=spark://localhost:7077 +export SPARK_WORKER_INSTANCES=1 +export CORES_PER_WORKER=1 +export TOTAL_CORES=$((${CORES_PER_WORKER}*${SPARK_WORKER_INSTANCES})) +${SPARK_HOME}/sbin/start-slave.sh -c $CORES_PER_WORKER -m 3G ${MASTER_URL} + + +export LD_LIBRARY_PATH=/CaffeOnSpark/caffe-public/distribute/lib:/CaffeOnSpark/caffe-distri/distribute/lib +export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda-7.0/lib64:/usr/local/mkl/lib/intel64/ + +sh ./makeImage.sh + + +/opt/apache/spark-1.6.0-bin-hadoop2.6/bin/spark-submit \ + --files /caffe-test/train/test_solver.prototxt,/caffe-test/train/train_test.prototxt \ + --conf spark.cores.max=1 \ + --conf spark.task.cpus=1 \ + --conf spark.driver.extraLibraryPath="${LD_LIBRARY_PATH}" \ + --conf spark.executorEnv.LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \ + --class com.yahoo.ml.caffe.CaffeOnSpark \ + /CaffeOnSpark/caffe-grid/target/caffe-grid-0.1-SNAPSHOT-jar-with-dependencies.jar \ + -train \ + -features accuracy,loss -label label \ + -conf /caffe-test/train/test_solver.prototxt \ + -clusterSize 1 \ + -devices 1 \ + -connection ethernet \ + -model file:/caffe-test/train/test.model \ + -output file:/caffe-test/train/test_result + http://git-wip-us.apache.org/repos/asf/chukwa/blob/d1b7ef94/contrib/docker/train_test.prototxt ---------------------------------------------------------------------- diff --git a/contrib/docker/train_test.prototxt b/contrib/docker/train_test.prototxt new file mode 100644 index 0000000..f35b02c --- /dev/null +++ b/contrib/docker/train_test.prototxt @@ -0,0 +1,178 @@ +name: "LeNet" +layer { + name: "data" + type: "MemoryData" + top: "data" + top: "label" + include { + phase: TRAIN + } + source_class: "com.yahoo.ml.caffe.LMDB" + memory_data_param { + source: "file:/caffe-test/train/lmdb/" + batch_size: 1 + channels: 1 + height: 200 + width: 200 + share_in_parallel: false + } + transform_param { + scale: 0.00390625 + force_color: true + } +} +layer { + name: "data" + type: "MemoryData" + top: "data" + top: "label" + include { + phase: TEST + } + source_class: "com.yahoo.ml.caffe.LMDB" + memory_data_param { + source: "file:/caffe-test/train/lmdb/" + batch_size: 1 + channels: 1 + height: 200 + width: 200 + share_in_parallel: false + } + transform_param { + scale: 0.00390625 + force_color: true + } +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 20 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2" + type: "Convolution" + bottom: "pool1" + top: "conv2" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 50 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "pool2" + type: "Pooling" + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool2" + top: "ip1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 500 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "ip1" + top: "ip1" +} +layer { + name: "ip2" + type: "InnerProduct" + bottom: "ip1" + top: "ip2" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 10 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "accuracy" + type: "Accuracy" + bottom: "ip2" + bottom: "label" + top: "accuracy" + include { + phase: TEST + } +} +layer { + name: "loss" + type: "SoftmaxWithLoss" + bottom: "ip2" + bottom: "label" + top: "loss" +}
