This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 0fdf7764a2dc3c5f6d025580a16d397b32fffc7c Author: Temple Zhou <dba...@gmail.com> AuthorDate: Sun Apr 26 13:04:32 2020 +0800 KYLIN-4181 Schedule Kylin using Kubernetes --- .gitignore | 4 +- kubernetes/Dockerfile | 78 +++++++++++++++++++++++ kubernetes/README.md | 109 ++++++++++++++++++++++++++++++++ kubernetes/kylin-configmap.sh | 17 +++++ kubernetes/kylin-job-statefulset.yaml | 95 ++++++++++++++++++++++++++++ kubernetes/kylin-query-statefulset.yaml | 95 ++++++++++++++++++++++++++++ kubernetes/kylin-secret.sh | 3 + kubernetes/kylin-service.yaml | 44 +++++++++++++ 8 files changed, 444 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 98da29c..69d61d0 100644 --- a/.gitignore +++ b/.gitignore @@ -93,4 +93,6 @@ dependency-reduced-pom.xml webapp/package-lock.json # stream_index -stream-receiver/stream_index \ No newline at end of file +stream-receiver/stream_index +# configuration files +kubernetes/conf/* diff --git a/kubernetes/Dockerfile b/kubernetes/Dockerfile new file mode 100644 index 0000000..6454417 --- /dev/null +++ b/kubernetes/Dockerfile @@ -0,0 +1,78 @@ +FROM centos:6.9 + +ARG APACHE_MIRRORS=http://mirrors.aliyun.com +ENV APACHE_MIRRORS ${APACHE_MIRRORS} + +ENV JAVA_VERSION 1.8.0 +ENV SPARK_VERSION 2.3.4 +ENV KAFKA_VERSION 2.1.1 +ENV KYLIN_VERSION 3.0.0 + +ENV JAVA_HOME /usr/lib/jvm/java-${JAVA_VERSION} +ENV HADOOP_HOME /usr/lib/hadoop +ENV HIVE_HOME /usr/lib/hive +ENV HCAT_HOME /usr/lib/hive-hcatalog +ENV HBASE_HOME /usr/lib/hbase +ENV SPARK_HOME /opt/spark-${SPARK_VERSION}-bin-hadoop2.6 +ENV KAFKA_HOME /opt/kafka_2.11-${KAFKA_VERSION} +ENV KYLIN_HOME /opt/apache-kylin-${KYLIN_VERSION}-bin-cdh57 + +ENV PATH $PATH:\ +$SPARK_HOME/bin:\ +$KAFKA_HOME/bin:\ +$KYLIN_HOME/bin + +ENV HADOOP_CONF_DIR /etc/hadoop/conf +ENV HIVE_CONF_DIR /etc/hive/conf +ENV HBASE_CONF_DIR /etc/hbase/conf +ENV HIVE_CONF ${HIVE_CONF_DIR} +ENV HIVE_LIB ${HIVE_HOME}/lib + +RUN echo $'[cloudera-cdh5] \n\ +# Packages for Cloudera\'s Distribution for Hadoop, Version 5, on RedHat or CentOS 6 x86_64 \n\ +name=Cloudera\'s Distribution for Hadoop, Version 5 \n\ +baseurl=https://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/5.7.6/ \n\ +gpgkey =https://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/RPM-GPG-KEY-cloudera \n\ +gpgcheck = 1' > /etc/yum.repos.d/cloudera-cdh5.repo + +WORKDIR /opt + +# Download Kafka from APACHE_MIRRORS +RUN set -xeu && \ + curl -o kafka_2.11-${KAFKA_VERSION}.tgz \ + ${APACHE_MIRRORS}/apache/kafka/${KAFKA_VERSION}/kafka_2.11-${KAFKA_VERSION}.tgz && \ + tar -zxf kafka_2.11-${KAFKA_VERSION}.tgz && rm kafka_2.11-${KAFKA_VERSION}.tgz + +# Download Spark from APACHE_MIRRORS +RUN set -xeu && \ + curl -o spark-${SPARK_VERSION}-bin-hadoop2.6.tgz \ + ${APACHE_MIRRORS}/apache/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.6.tgz && \ + tar -zxf spark-${SPARK_VERSION}-bin-hadoop2.6.tgz && rm spark-${SPARK_VERSION}-bin-hadoop2.6.tgz + +# Download Kylin from APACHE_MIRRORS +RUN set -xeu && \ + curl -o apache-kylin-${KYLIN_VERSION}-bin-cdh57.tar.gz \ + ${APACHE_MIRRORS}/apache/kylin/apache-kylin-${KYLIN_VERSION}/apache-kylin-${KYLIN_VERSION}-bin-cdh57.tar.gz && \ + tar -zxf apache-kylin-${KYLIN_VERSION}-bin-cdh57.tar.gz && rm apache-kylin-${KYLIN_VERSION}-bin-cdh57.tar.gz + +# Setup Hadoop & Hive & HBase using CDH Repository. PS: The libhadoop.so provided by CDH is complied with snappy +RUN set -xeu && \ + yum -y -q install java-1.8.0-openjdk-devel && \ + yum -y -q install krb5-workstation && \ + yum -y -q install hadoop-client && \ + yum -y -q install hive hive-hcatalog && \ + yum -y -q install hbase && \ + curl -o ${HIVE_HOME}/lib/hadoop-lzo-0.4.15.jar \ + https://clojars.org/repo/hadoop-lzo/hadoop-lzo/0.4.15/hadoop-lzo-0.4.15.jar && \ + curl -o ${HIVE_HOME}/lib/mysql-connector-java-5.1.24.jar \ + https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.24/mysql-connector-java-5.1.24.jar && \ + yum -q clean all && \ + rm -rf /var/cache/yum && \ + rm -rf /tmp/* /var/tmp/* && \ + groupadd kylin --gid 1000 && \ + useradd kylin --uid 1000 --gid 1000 && \ + chown -R "kylin:kylin" ${KYLIN_HOME} + +EXPOSE 7070 +USER kylin:kylin +CMD ${KYLIN_HOME}/bin/kylin.sh run \ No newline at end of file diff --git a/kubernetes/README.md b/kubernetes/README.md new file mode 100644 index 0000000..205e3f3 --- /dev/null +++ b/kubernetes/README.md @@ -0,0 +1,109 @@ +# Kubernetes QuickStart + +This guide shows how to run Kylin cluster using Kubernetes StatefulSet Controller. The following figure depicts a typical scenario for Kylin cluster mode deployment: + +![image_name](http://kylin.apache.org/images/install/kylin_server_modes.png) + +## Build or Pull Docker Image + +You can pull the image from Docker Hub directly if you do not want to build the image locally: + +```bash +docker pull apachekylin/apache-kylin:3.0.0-cdh57 +``` + +TIPS: If you are woking with air-gapped network or slow internet speeds, we suggest you prepare the binary packages by yourself and execute this: + +```bash +docker build -t "apache-kylin:${KYLIN_VERSION}-cdh57" --build-arg APACHE_MIRRORS=http://127.0.0.1:8000 . +``` + +## Prepare your Hadoop Configuration + +Put all of the configuration files under the "conf" directory. + +```bash +kylin.properties +applicationContext.xml # If you need to set cacheManager to Memcached +hbase-site.xml +hive-site.xml +hdfs-site.xml +core-site.xml +mapred-site.xml +yarn-site.xml +``` + +If you worked with Kerberized Hadoop Cluster, do not forget to prepare the following files: + +```bash +krb5.conf +kylin.keytab +``` + +## Create ConfigMaps and Secret + +We recommand you to create separate Kubernetes namespace for Kylin. + +```bash +kubectl create namespace kylin +``` + +Execute the following shell scripts to create the required ConfigMaps: + +```bash +./kylin-configmap.sh +./kylin-secret.sh +``` + +## Create Service and StatefulSet + +Make sure the following resources exist in your namespace: + +```bash +kubectl get configmaps,secret -n kylin + +NAME DATA AGE +configmap/hadoop-config 4 89d +configmap/hbase-config 1 89d +configmap/hive-config 1 89d +configmap/krb5-config 1 89d +configmap/kylin-config 1 89d +configmap/kylin-context 1 45d + +NAME TYPE DATA AGE +secret/kylin-keytab Opaque 1 89d + +``` + +Then, you need to create headless service for stable DNS entries(kylin-0.kylin, kylin-1.kylin, kylin-2.kylin...) of StatefulSet members. + +```bash +kubectl apply -f kylin-service.yaml +``` + +Finally, create the StatefulSet and try to use it: + +```bash +kubectl apply -f kylin-job-statefulset.yaml +kubectl apply -f kylin-query-statefulset.yaml +``` + +If everything goes smoothly, you should see all 3 Pods become Running: + +```bash +kubectl get statefulset,pod,service -n kylin + +NAME READY AGE +statefulset.apps/kylin-job 1/1 36d +statefulset.apps/kylin-query 3/3 36d + +NAME READY STATUS RESTARTS AGE +pod/kylin-job-0 1/1 Running 0 13m +pod/kylin-query-0 1/1 Running 0 40h +pod/kylin-query-1 1/1 Running 0 40h + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +service/kylin ClusterIP None <none> 7070/TCP 58d +service/kylin-job ClusterIP xx.xxx.xx.xx <none> 7070/TCP 89d +service/kylin-query ClusterIP xx.xxx.xxx.xxx <none> 7070/TCP 89d +``` diff --git a/kubernetes/kylin-configmap.sh b/kubernetes/kylin-configmap.sh new file mode 100755 index 0000000..b8ec1b9 --- /dev/null +++ b/kubernetes/kylin-configmap.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +kubectl create configmap -n kylin hadoop-config --from-file=conf/core-site.xml \ + --from-file=conf/hdfs-site.xml \ + --from-file=conf/yarn-site.xml \ + --from-file=conf/mapred-site.xml \ + --dry-run -o yaml | kubectl apply -f - +kubectl create configmap -n kylin hive-config --from-file=conf/hive-site.xml \ + --dry-run -o yaml | kubectl apply -f - +kubectl create configmap -n kylin hbase-config --from-file=conf/hbase-site.xml \ + --dry-run -o yaml | kubectl apply -f - +kubectl create configmap -n kylin kylin-config --from-file=conf/kylin.properties \ + --dry-run -o yaml | kubectl apply -f - +kubectl create configmap -n kylin krb5-config --from-file=conf/krb5.conf \ + --dry-run -o yaml | kubectl apply -f - +kubectl create configmap -n kylin kylin-context --from-file=conf/applicationContext.xml \ + --dry-run -o yaml | kubectl apply -f - diff --git a/kubernetes/kylin-job-statefulset.yaml b/kubernetes/kylin-job-statefulset.yaml new file mode 100644 index 0000000..2a0f9fe --- /dev/null +++ b/kubernetes/kylin-job-statefulset.yaml @@ -0,0 +1,95 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: {} + name: kylin-job + namespace: kylin +spec: + replicas: 1 + selector: + matchLabels: + app: kylin + type: job + serviceName: kylin + template: + metadata: + labels: + app: kylin + type: job + spec: + containers: + - image: 'apachekylin/apache-kylin:3.0.0-cdh57' + imagePullPolicy: Always + lifecycle: + postStart: + exec: + command: + - bash + - '-c' + - | + set -ex + # initialize the keytab + kinit -kt /home/kylin/kylin.keytab kylin + # set the kylin.server.mode + sed "s/kylin\.server\.mode.*/kylin\.server\.mode=all/g" /mnt/kylin-config/kylin.properties > ${KYLIN_HOME}/conf/kylin.properties + sed -i "s/kylin\.server\.host-address.*/kylin\.server\.host-address=`hostname`\.kylin:7070/g" ${KYLIN_HOME}/conf/kylin.properties + sed -i "s/export KYLIN_JVM_SETTINGS.*/export KYLIN_JVM_SETTINGS=\"-Xms40g -Xmx40g -XX:NewSize=10g -XX:MaxNewSize=10g -XX:SurvivorRatio=3 -XX:+CMSClassUnloadingEnabled -XX:+CMSParallelRemarkEnabled -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -XX:CMSInitiatingOccupancyFraction=70 -XX:+DisableExplicitGC -XX:+HeapDumpOnOutOfMemoryError\"/g" ${KYLIN_HOME}/conf/setenv.sh + # unarchive the war file and replace the applicationContext if needed + mkdir ${KYLIN_HOME}/tomcat/webapps/kylin + cd ${KYLIN_HOME}/tomcat/webapps/kylin + jar -xvf ${KYLIN_HOME}/tomcat/webapps/kylin.war + cp /mnt/kylin-context/applicationContext.xml ${KYLIN_HOME}/tomcat/webapps/kylin/WEB-INF/classes + name: kylin + ports: + - containerPort: 7070 + readinessProbe: + httpGet: + path: /kylin + port: 7070 + resources: + limits: + cpu: 16 + memory: 50G + requests: + cpu: 8 + memory: 50G + volumeMounts: + - mountPath: /etc/hadoop/conf + name: hadoop-config + - mountPath: /etc/hive/conf + name: hive-config + - mountPath: /etc/hbase/conf + name: hbase-config + - mountPath: /home/kylin + name: kylin-keytab + - mountPath: /etc/krb5.conf + name: krb5-config + subPath: krb5.conf + - mountPath: /mnt/kylin-context + name: kylin-context + - mountPath: /mnt/kylin-config + name: kylin-config + volumes: + - configMap: + name: hadoop-config + name: hadoop-config + - configMap: + name: hive-config + name: hive-config + - configMap: + name: hbase-config + name: hbase-config + - configMap: + name: kylin-config + name: kylin-config + - configMap: + name: krb5-config + name: krb5-config + - configMap: + name: kylin-context + name: kylin-context + - name: kylin-keytab + secret: + secretName: kylin-keytab + updateStrategy: + type: RollingUpdate diff --git a/kubernetes/kylin-query-statefulset.yaml b/kubernetes/kylin-query-statefulset.yaml new file mode 100644 index 0000000..f504a58 --- /dev/null +++ b/kubernetes/kylin-query-statefulset.yaml @@ -0,0 +1,95 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: {} + name: kylin-query + namespace: kylin +spec: + replicas: 3 + selector: + matchLabels: + app: kylin + type: query + serviceName: kylin + template: + metadata: + labels: + app: kylin + type: query + spec: + containers: + - image: 'apachekylin/apache-kylin:3.0.0-cdh57' + imagePullPolicy: Always + lifecycle: + postStart: + exec: + command: + - bash + - '-c' + - | + set -ex + # initialize the keytab + kinit -kt /home/kylin/kylin.keytab kylin + # set the kylin.server.mode + sed "s/kylin\.server\.mode.*/kylin\.server\.mode=query/g" /mnt/kylin-config/kylin.properties > ${KYLIN_HOME}/conf/kylin.properties + sed -i "s/kylin\.server\.host-address.*/kylin\.server\.host-address=`hostname`\.kylin:7070/g" ${KYLIN_HOME}/conf/kylin.properties + sed -i "s/export KYLIN_JVM_SETTINGS.*/export KYLIN_JVM_SETTINGS=\"-Xms16g -Xmx16g -XX:NewSize=3g -XX:MaxNewSize=3g -XX:SurvivorRatio=4 -XX:+CMSClassUnloadingEnabled -XX:+CMSParallelRemarkEnabled -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -XX:CMSInitiatingOccupancyFraction=70 -XX:+DisableExplicitGC -XX:+HeapDumpOnOutOfMemoryError\"/g" ${KYLIN_HOME}/conf/setenv.sh + # unarchive the war file and replace the applicationContext if needed + mkdir ${KYLIN_HOME}/tomcat/webapps/kylin + cd ${KYLIN_HOME}/tomcat/webapps/kylin + jar -xvf ${KYLIN_HOME}/tomcat/webapps/kylin.war + cp /mnt/kylin-context/applicationContext.xml ${KYLIN_HOME}/tomcat/webapps/kylin/WEB-INF/classes + name: kylin + ports: + - containerPort: 7070 + readinessProbe: + httpGet: + path: /kylin + port: 7070 + resources: + limits: + cpu: 8 + memory: 20G + requests: + cpu: 8 + memory: 20G + volumeMounts: + - mountPath: /etc/hadoop/conf + name: hadoop-config + - mountPath: /etc/hive/conf + name: hive-config + - mountPath: /etc/hbase/conf + name: hbase-config + - mountPath: /home/kylin + name: kylin-keytab + - mountPath: /etc/krb5.conf + name: krb5-config + subPath: krb5.conf + - mountPath: /mnt/kylin-context + name: kylin-context + - mountPath: /mnt/kylin-config + name: kylin-config + volumes: + - configMap: + name: hadoop-config + name: hadoop-config + - configMap: + name: hive-config + name: hive-config + - configMap: + name: hbase-config + name: hbase-config + - configMap: + name: kylin-config + name: kylin-config + - configMap: + name: krb5-config + name: krb5-config + - configMap: + name: kylin-context + name: kylin-context + - name: kylin-keytab + secret: + secretName: kylin-keytab + updateStrategy: + type: RollingUpdate diff --git a/kubernetes/kylin-secret.sh b/kubernetes/kylin-secret.sh new file mode 100755 index 0000000..87ab71e --- /dev/null +++ b/kubernetes/kylin-secret.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +kubectl create secret -n kylin generic kylin-keytab --from-file=conf/kylin.keytab \ No newline at end of file diff --git a/kubernetes/kylin-service.yaml b/kubernetes/kylin-service.yaml new file mode 100644 index 0000000..50c3206 --- /dev/null +++ b/kubernetes/kylin-service.yaml @@ -0,0 +1,44 @@ +# Headless service for stable DNS entries of StatefulSet members. +apiVersion: v1 +kind: Service +metadata: + name: kylin + labels: + app: kylin +spec: + ports: + - name: kylin + port: 7070 + clusterIP: None + selector: + app: kylin +--- +# For job instances. +apiVersion: v1 +kind: Service +metadata: + name: kylin-job +spec: + type: ClusterIP + selector: + app: kylin + type: job + ports: + - protocol: TCP + port: 7070 + targetPort: 7070 +--- +# For query instances. +apiVersion: v1 +kind: Service +metadata: + name: kylin-query +spec: + type: ClusterIP + selector: + app: kylin + type: query + ports: + - protocol: TCP + port: 7070 + targetPort: 7070 \ No newline at end of file