This is an automated email from the ASF dual-hosted git repository.
yuqi4733 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 3c4d9067b [#3751] feat(CI): Enable Apache Ranger plugin in Hive Docker
image (#3912)
3c4d9067b is described below
commit 3c4d9067b0532b085ef74420df9a3e150e23d5b8
Author: Eric Chang <[email protected]>
AuthorDate: Fri Jul 19 15:13:34 2024 +0800
[#3751] feat(CI): Enable Apache Ranger plugin in Hive Docker image (#3912)
### What changes were proposed in this pull request?
1. Enable Apache Ranger plugin in Hive Docker image.
2. Add Ranger integration Hive permission test.
3. Both support Hive2 and Hive3
* To start the Hive container, you can to specify environment variables:
HIVE_RUNTIME_VERSION:
* Hive version, currently support `hive2`(default) and `hive3`
DOCKER_ENV_RANGER_SERVER_URL:
* Ranger server URL DOCKER_ENV_RANGER_HIVE_REPOSITORY_NAME: Ranger Hive
repository name
* DOCKER_ENV_RANGER_HDFS_REPOSITORY_NAME: Ranger HDFS repository name
Fix: #3751
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Local test.
TODO:
- [x] @xunliu needs to publish hive docker image
- [x] @unknowntpo needs to update docker-image-details.md
---------
Co-authored-by: xunliu <[email protected]>
Co-authored-by: xunliu <[email protected]>
---
.github/workflows/docker-image.yml | 5 +-
build.gradle.kts | 5 +-
catalogs/catalog-hadoop/build.gradle.kts | 2 +-
catalogs/catalog-hive/build.gradle.kts | 2 +-
.../catalog-lakehouse-iceberg/build.gradle.kts | 2 +-
catalogs/catalog-lakehouse-paimon/build.gradle.kts | 2 +-
clients/client-python/build.gradle.kts | 2 +-
dev/docker/build-docker.sh | 14 +-
dev/docker/hive/Dockerfile | 103 +++++---
dev/docker/hive/core-site.xml | 12 +-
dev/docker/hive/hadoop-env.sh | 4 +-
dev/docker/hive/hdfs-site.xml | 2 +-
dev/docker/hive/hive-dependency.sh | 64 ++++-
dev/docker/hive/hive-site.xml | 2 +-
dev/docker/hive/start.sh | 88 +++++++
docs/docker-image-details.md | 20 ++
flink-connector/build.gradle.kts | 2 +-
gradle/libs.versions.toml | 1 +
.../integration/test/container/ContainerSuite.java | 16 +-
.../integration/test/container/HiveContainer.java | 27 +-
.../test/container/RangerContainer.java | 19 +-
integration-test/build.gradle.kts | 7 +-
.../test/authorization/ranger/RangerDefines.java | 51 ++++
.../test/authorization/ranger/RangerHiveIT.java | 192 ++++++++++++++
.../test/authorization/ranger/RangerIT.java | 281 +++++++++++++++++----
spark-connector/spark-common/build.gradle.kts | 2 +-
spark-connector/v3.3/spark/build.gradle.kts | 2 +-
spark-connector/v3.4/spark/build.gradle.kts | 2 +-
spark-connector/v3.5/spark/build.gradle.kts | 2 +-
29 files changed, 793 insertions(+), 140 deletions(-)
diff --git a/.github/workflows/docker-image.yml
b/.github/workflows/docker-image.yml
index 243a3478a..1993a209a 100644
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -93,4 +93,7 @@ jobs:
distribution: 'temurin'
- name: Build and Push the Docker image
- run: sudo rm -rf /usr/local/lib/android && sudo rm -rf
/opt/hostedtoolcache/CodeQL && ./dev/docker/build-docker.sh --platform all
--type ${image_type} --image ${image_name} --tag ${{ github.event.inputs.tag }}
--latest
+ run: |
+ sudo rm -rf /usr/local/lib/android
+ sudo rm -rf /opt/hostedtoolcache/CodeQL
+ ./dev/docker/build-docker.sh --platform all --type ${image_type}
--image ${image_name} --tag ${{ github.event.inputs.tag }} --latest
diff --git a/build.gradle.kts b/build.gradle.kts
index 7db69dcd5..feb62f828 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -468,7 +468,8 @@ tasks.rat {
"**/*.log",
"**/licenses/*.txt",
"**/licenses/*.md",
- "integration-test/**",
+ "integration-test/**/*.sql",
+ "integration-test/**/*.txt",
"docs/**/*.md",
"web/.**",
"web/next-env.d.ts",
@@ -710,7 +711,7 @@ fun printDockerCheckInfo() {
val dockerTest = project.extra["dockerTest"] as? Boolean ?: false
if (dockerTest) {
- println("Using Docker container to run all tests. [$testMode test]")
+ println("Using Docker container to run all tests .........................
[$testMode test]")
} else {
println("Run test cases without `gravitino-docker-test` tag ..............
[$testMode test]")
}
diff --git a/catalogs/catalog-hadoop/build.gradle.kts
b/catalogs/catalog-hadoop/build.gradle.kts
index 0dfa23154..4547da920 100644
--- a/catalogs/catalog-hadoop/build.gradle.kts
+++ b/catalogs/catalog-hadoop/build.gradle.kts
@@ -125,7 +125,7 @@ tasks.test {
dependsOn(tasks.jar)
doFirst {
- environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.12")
+ environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.13")
environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-kerberos-hive:0.1.2")
}
diff --git a/catalogs/catalog-hive/build.gradle.kts
b/catalogs/catalog-hive/build.gradle.kts
index d0cad3909..9930be542 100644
--- a/catalogs/catalog-hive/build.gradle.kts
+++ b/catalogs/catalog-hive/build.gradle.kts
@@ -178,7 +178,7 @@ tasks.test {
dependsOn(tasks.jar)
doFirst {
- environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.12")
+ environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.13")
environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-kerberos-hive:0.1.2")
}
diff --git a/catalogs/catalog-lakehouse-iceberg/build.gradle.kts
b/catalogs/catalog-lakehouse-iceberg/build.gradle.kts
index eadd43f68..4f2a606c8 100644
--- a/catalogs/catalog-lakehouse-iceberg/build.gradle.kts
+++ b/catalogs/catalog-lakehouse-iceberg/build.gradle.kts
@@ -179,7 +179,7 @@ tasks.test {
dependsOn(tasks.jar)
doFirst {
- environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.12")
+ environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.13")
environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-kerberos-hive:0.1.2")
}
diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts
b/catalogs/catalog-lakehouse-paimon/build.gradle.kts
index e7ef41b23..757fe7060 100644
--- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts
+++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts
@@ -138,7 +138,7 @@ tasks.test {
dependsOn(tasks.jar)
doFirst {
- environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.12")
+ environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.13")
environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-kerberos-hive:0.1.3")
}
diff --git a/clients/client-python/build.gradle.kts
b/clients/client-python/build.gradle.kts
index b4bfc12cb..f8ec9ea6c 100644
--- a/clients/client-python/build.gradle.kts
+++ b/clients/client-python/build.gradle.kts
@@ -222,7 +222,7 @@ tasks {
"GRAVITINO_HOME" to project.rootDir.path + "/distribution/package",
"START_EXTERNAL_GRAVITINO" to "true",
"DOCKER_TEST" to dockerTest.toString(),
- "GRAVITINO_CI_HIVE_DOCKER_IMAGE" to
"datastrato/gravitino-ci-hive:0.1.12",
+ "GRAVITINO_CI_HIVE_DOCKER_IMAGE" to
"datastrato/gravitino-ci-hive:0.1.13",
))
environment = envMap
diff --git a/dev/docker/build-docker.sh b/dev/docker/build-docker.sh
index 527981537..c604f6b37 100755
--- a/dev/docker/build-docker.sh
+++ b/dev/docker/build-docker.sh
@@ -87,7 +87,19 @@ fi
if [[ "${component_type}" == "hive" ]]; then
. ${script_dir}/hive/hive-dependency.sh
- build_args="--build-arg HADOOP_PACKAGE_NAME=${HADOOP_PACKAGE_NAME}
--build-arg HIVE_PACKAGE_NAME=${HIVE_PACKAGE_NAME} --build-arg
JDBC_DIVER_PACKAGE_NAME=${JDBC_DIVER_PACKAGE_NAME} --build-arg
HADOOP_VERSION=${HADOOP_VERSION} --build-arg HIVE_VERSION=${HIVE_VERSION}
--build-arg MYSQL_JDBC_DRIVER_VERSION=${MYSQL_JDBC_DRIVER_VERSION}"
+ build_args="
+ --build-arg HADOOP_PACKAGE_NAME=${HADOOP_PACKAGE_NAME} \
+ --build-arg HIVE_PACKAGE_NAME=${HIVE_PACKAGE_NAME} \
+ --build-arg HADOOP_VERSION=${HADOOP_VERSION} \
+ --build-arg HIVE_VERSION=${HIVE_VERSION} \
+ --build-arg MYSQL_JDBC_DRIVER_VERSION=${MYSQL_JDBC_DRIVER_VERSION} \
+ --build-arg RANGER_VERSION=${RANGER_VERSION} \
+ --build-arg ZOOKEEPER_VERSION=${ZOOKEEPER_VERSION} \
+ --build-arg HIVE2_VERSION=${HIVE2_VERSION} \
+ --build-arg HIVE3_VERSION=${HIVE3_VERSION} \
+ --build-arg HADOOP2_VERSION=${HADOOP2_VERSION} \
+ --build-arg HADOOP3_VERSION=${HADOOP3_VERSION}
+"
elif [[ "${component_type}" == "kerberos-hive" ]]; then
. ${script_dir}/kerberos-hive/hive-dependency.sh
build_args="--build-arg HADOOP_PACKAGE_NAME=${HADOOP_PACKAGE_NAME}
--build-arg HIVE_PACKAGE_NAME=${HIVE_PACKAGE_NAME} --build-arg
JDBC_DIVER_PACKAGE_NAME=${JDBC_DIVER_PACKAGE_NAME}"
diff --git a/dev/docker/hive/Dockerfile b/dev/docker/hive/Dockerfile
index 10bab1604..1b4d4dd7f 100644
--- a/dev/docker/hive/Dockerfile
+++ b/dev/docker/hive/Dockerfile
@@ -20,15 +20,24 @@
FROM ubuntu:16.04
LABEL maintainer="[email protected]"
-ARG HADOOP_VERSION
-ARG HIVE_VERSION
-ARG JDBC_DIVER_PACKAGE_NAME
+ARG HADOOP2_VERSION
+ARG HADOOP3_VERSION
+ARG HIVE2_VERSION
+ARG HIVE3_VERSION
+ARG ZOOKEEPER_VERSION
+ARG RANGER_VERSION
ARG MYSQL_JDBC_DRIVER_VERSION
ARG DEBIAN_FRONTEND=noninteractive
WORKDIR /
################################################################################
+# prepare MySQL installation
+ENV MYSQL_PWD=ds123
+RUN echo "mysql-server mysql-server/root_password password ${MYSQL_PWD}" |
debconf-set-selections
+RUN echo "mysql-server mysql-server/root_password_again password ${MYSQL_PWD}"
| debconf-set-selections
+
+###############################################################################
# update and install basic tools
RUN apt-get update && apt-get upgrade -y && apt-get install --fix-missing -yq \
git \
@@ -53,7 +62,13 @@ RUN apt-get update && apt-get upgrade -y && apt-get install
--fix-missing -yq \
openssh-server \
wget \
sudo \
- openjdk-8-jdk
+ openjdk-8-jdk \
+ mysql-server
+
+# setup mysql configuration
+RUN chown -R mysql:mysql /var/lib/mysql
+RUN usermod -d /var/lib/mysql/ mysql
+RUN sed -i "s/.*bind-address.*/bind-address = 0.0.0.0/"
/etc/mysql/mysql.conf.d/mysqld.cnf
#################################################################################
# setup ssh
@@ -64,18 +79,28 @@ RUN cat /dev/zero | ssh-keygen -q -N "" > /dev/null && cat
/root/.ssh/id_rsa.pub
# set environment variables
ENV JAVA_HOME=/usr/local/jdk
ENV HIVE_HOME=/usr/local/hive
+ENV HIVE2_HOME=/usr/local/hive2
+ENV HIVE3_HOME=/usr/local/hive3
+ENV HIVE_CONF_DIR=${HIVE_HOME}/conf
+ENV HIVE_TMP_CONF_DIR=/tmp/hive-conf
ENV HADOOP_HOME=/usr/local/hadoop
-ENV HADOOP_HEAPSIZE=128
+ENV HADOOP2_HOME=/usr/local/hadoop2
+ENV HADOOP3_HOME=/usr/local/hadoop3
+ENV HADOOP_HEAPSIZE=512
ENV HADOOP_INSTALL=${HADOOP_HOME}
ENV HADOOP_MAPRED_HOME=${HADOOP_INSTALL}
ENV HADOOP_COMMON_HOME=${HADOOP_INSTALL}
ENV HADOOP_HDFS_HOME=${HADOOP_INSTALL}
ENV HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
+ENV HADOOP_TMP_CONF_DIR=/tmp/hadoop-conf
ENV YARN_HOME=${HADOOP_INSTALL}
-
+ENV ZK_HOME=/usr/local/zookeeper
+ENV RANGER_HDFS_PLUGIN_HOME=/usr/local/ranger-hdfs-plugin
+ENV RANGER_HIVE_PLUGIN_HOME=/usr/local/ranger-hive-plugin
ENV
PATH=${JAVA_HOME}/bin:${HADOOP_HOME}/bin:${HADOOP_INSTALL}/sbin:${HIVE_HOME}/bin:${PATH}
ENV CLASSPATH=${HADOOP_HOME}/lib/*:HIVE_HOME/lib/*:.
ENV LD_LIBRARY_PATH=${HADOOP_HOME}/lib/native
+ENV MYSQL_JDBC_DRIVER_VERSION=${MYSQL_JDBC_DRIVER_VERSION}
################################################################################
# add the above env for all users
@@ -97,54 +122,54 @@ RUN echo "HADOOP_CONF_DIR=${HADOOP_CONF_DIR}" >>
/etc/environment
RUN echo "HADOOP_CLASSPATH=${JAVA_HOME}/lib/tools.jar" >> /etc/environment
RUN echo "YARN_HOME=${YARN_HOME}" >> /etc/environment
RUN echo "HIVE_HOME=${HIVE_HOME}" >> /etc/environment
+RUN echo "ZK_HOME=${ZK_HOME}" >> /etc/environment
RUN echo "PATH=${PATH}" >> /etc/environment
RUN echo "CLASSPATH=${CLASSPATH}" >> /etc/environment
RUN echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> /etc/environment
################################################################################
-# install hadoop
-RUN mkdir ${HADOOP_HOME}
-ADD packages/hadoop-${HADOOP_VERSION}.tar.gz /opt/
-RUN ln -s /opt/hadoop-${HADOOP_VERSION}/* ${HADOOP_HOME}
-
-# replace configuration templates
-RUN rm -f ${HADOOP_CONF_DIR}/core-site.xml
-RUN rm -f ${HADOOP_CONF_DIR}/hadoop-env.sh
-RUN rm -f ${HADOOP_CONF_DIR}/hdfs-site.xml
-RUN rm -f ${HADOOP_CONF_DIR}/mapred-site.xml
-
-ADD core-site.xml ${HADOOP_CONF_DIR}/core-site.xml
-ADD hadoop-env.sh ${HADOOP_CONF_DIR}/hadoop-env.sh
-ADD hdfs-site.xml ${HADOOP_CONF_DIR}/hdfs-site.xml
-ADD mapred-site.xml ${HADOOP_CONF_DIR}/mapred-site.xml
-ADD check-status.sh /tmp/check-status.sh
+# install zookeeper
+ADD packages/zookeeper-${ZOOKEEPER_VERSION}.tar.gz /opt/
+RUN ln -s /opt/zookeeper-${ZOOKEEPER_VERSION} ${ZK_HOME}
+RUN mkdir ${ZK_HOME}/data
-# format HFS
-RUN ${HADOOP_HOME}/bin/hdfs namenode -format -nonInteractive
+################################################################################
+# install ranger hadoop plugin
+ADD packages/ranger-${RANGER_VERSION}-hdfs-plugin.tar.gz /opt/
+RUN ln -s /opt/ranger-${RANGER_VERSION}-hdfs-plugin ${RANGER_HDFS_PLUGIN_HOME}
################################################################################
-# install hive
-RUN mkdir ${HIVE_HOME}
-ADD packages/apache-hive-${HIVE_VERSION}-bin.tar.gz /opt/
-RUN ln -s /opt/apache-hive-${HIVE_VERSION}-bin/* ${HIVE_HOME}
+# install ranger hive plugin
+ADD packages/ranger-${RANGER_VERSION}-hive-plugin.tar.gz /opt/
+RUN ln -s /opt/ranger-${RANGER_VERSION}-hive-plugin ${RANGER_HIVE_PLUGIN_HOME}
-ADD hive-site.xml ${HIVE_HOME}/conf/hive-site.xml
+################################################################################
+# install hadoop2 and hadoop3
+ADD packages/hadoop-${HADOOP2_VERSION}.tar.gz /opt/
+RUN ln -s /opt/hadoop-${HADOOP2_VERSION} ${HADOOP2_HOME}
+ADD packages/hadoop-${HADOOP3_VERSION}.tar.gz /opt/
+RUN ln -s /opt/hadoop-${HADOOP3_VERSION} ${HADOOP3_HOME}
+
+# Add hadoop configuration to temporary directory
+ADD core-site.xml ${HADOOP_TMP_CONF_DIR}/core-site.xml
+ADD hadoop-env.sh ${HADOOP_TMP_CONF_DIR}/hadoop-env.sh
+ADD hdfs-site.xml ${HADOOP_TMP_CONF_DIR}/hdfs-site.xml
+ADD mapred-site.xml ${HADOOP_TMP_CONF_DIR}/mapred-site.xml
+ADD check-status.sh /tmp/check-status.sh
################################################################################
-# install MySQL
-ENV MYSQL_PWD=ds123
-RUN echo "mysql-server mysql-server/root_password password ${MYSQL_PWD}" |
debconf-set-selections
-RUN echo "mysql-server mysql-server/root_password_again password ${MYSQL_PWD}"
| debconf-set-selections
-RUN apt-get install -y mysql-server
+# install hive2 and hive3
+ADD packages/apache-hive-${HIVE2_VERSION}-bin.tar.gz /opt/
+RUN ln -s /opt/apache-hive-${HIVE2_VERSION}-bin ${HIVE2_HOME}
+ADD packages/apache-hive-${HIVE3_VERSION}-bin.tar.gz /opt/
+RUN ln -s /opt/apache-hive-${HIVE3_VERSION}-bin ${HIVE3_HOME}
-RUN chown -R mysql:mysql /var/lib/mysql
-RUN usermod -d /var/lib/mysql/ mysql
-RUN sed -i "s/.*bind-address.*/bind-address = 0.0.0.0/"
/etc/mysql/mysql.conf.d/mysqld.cnf
+# Add hive configuration to temporary directory
+ADD hive-site.xml ${HIVE_TMP_CONF_DIR}/hive-site.xml
################################################################################
# add mysql jdbc driver
ADD packages/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}.tar.gz /opt/
-RUN ln -s /opt/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}/*
${HIVE_HOME}/lib
################################################################################
# add users and groups
@@ -168,7 +193,7 @@ RUN rm -rf /var/lib/apt/lists/*
################################################################################
# expose port
-EXPOSE 3306 9000 9083 10000 10002 50070 50075 50010
+EXPOSE 2181 3306 9000 9083 10000 10002 50070 50075 50010
################################################################################
# create startup script and set ENTRYPOINT
diff --git a/dev/docker/hive/core-site.xml b/dev/docker/hive/core-site.xml
index b5e8be775..060db854d 100644
--- a/dev/docker/hive/core-site.xml
+++ b/dev/docker/hive/core-site.xml
@@ -1,7 +1,7 @@
<configuration>
<property>
<name>fs.defaultFS</name>
- <value>hdfs://0.0.0.0:9000</value>
+ <value>hdfs://__REPLACE__HOST_NAME:9000</value>
</property>
<property>
@@ -33,4 +33,14 @@
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
+
+ <property>
+ <name>hadoop.proxyuser.gravitino.groups</name>
+ <value>*</value>
+ </property>
+
+ <property>
+ <name>hadoop.proxyuser.gravitino.hosts</name>
+ <value>*</value>
+ </property>
</configuration>
diff --git a/dev/docker/hive/hadoop-env.sh b/dev/docker/hive/hadoop-env.sh
index f9d9cf0a9..2609b51ce 100644
--- a/dev/docker/hive/hadoop-env.sh
+++ b/dev/docker/hive/hadoop-env.sh
@@ -42,10 +42,10 @@ for f in ${HADOOP_HOME}/contrib/capacity-scheduler/*.jar; do
done
# The maximum amount of heap to use, in MB. Default is 1000.
-export HADOOP_HEAPSIZE=128
+export HADOOP_HEAPSIZE=512
# Extra Java runtime options. Empty by default.
-export HADOOP_OPTS="${HADOOP_OPTS} -Djava.net.preferIPv4Stack=true
-XX:MaxPermSize=128m"
+export HADOOP_OPTS="${HADOOP_OPTS} -Djava.net.preferIPv4Stack=true
-XX:MaxPermSize=256m"
# Command specific options appended to HADOOP_OPTS when specified
export
HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS}
-Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender}
${HADOOP_NAMENODE_OPTS}"
diff --git a/dev/docker/hive/hdfs-site.xml b/dev/docker/hive/hdfs-site.xml
index ae15b096f..fefa16db5 100644
--- a/dev/docker/hive/hdfs-site.xml
+++ b/dev/docker/hive/hdfs-site.xml
@@ -11,7 +11,7 @@
<property>
<name>dfs.datanode.address</name>
- <value>0.0.0.0:50010</value>
+ <value>__REPLACE__HOST_NAME:50010</value>
</property>
<property>
diff --git a/dev/docker/hive/hive-dependency.sh
b/dev/docker/hive/hive-dependency.sh
index 657e171a9..5ec228003 100755
--- a/dev/docker/hive/hive-dependency.sh
+++ b/dev/docker/hive/hive-dependency.sh
@@ -22,32 +22,72 @@ hive_dir="$(dirname "${BASH_SOURCE-$0}")"
hive_dir="$(cd "${hive_dir}">/dev/null; pwd)"
# Environment variables definition
-HADOOP_VERSION="2.7.3"
-HIVE_VERSION="2.3.9"
-MYSQL_JDBC_DRIVER_VERSION="8.0.15"
+HADOOP2_VERSION="2.7.3"
+HADOOP3_VERSION="3.1.0"
-HADOOP_PACKAGE_NAME="hadoop-${HADOOP_VERSION}.tar.gz" # Must export this
variable for Dockerfile
-HADOOP_DOWNLOAD_URL="https://archive.apache.org/dist/hadoop/core/hadoop-${HADOOP_VERSION}/${HADOOP_PACKAGE_NAME}"
+HIVE2_VERSION="2.3.9"
+HIVE3_VERSION="3.1.3"
+MYSQL_JDBC_DRIVER_VERSION=${MYSQL_VERSION:-"8.0.15"}
+ZOOKEEPER_VERSION=${ZOOKEEPER_VERSION:-"3.4.13"}
+RANGER_VERSION="2.4.0" # Notice: Currently only tested Ranger plugin 2.4.0 in
the Hadoop 3.1.0 and Hive 3.1.3
-HIVE_PACKAGE_NAME="apache-hive-${HIVE_VERSION}-bin.tar.gz" # Must export this
variable for Dockerfile
-HIVE_DOWNLOAD_URL="https://archive.apache.org/dist/hive/hive-${HIVE_VERSION}/${HIVE_PACKAGE_NAME}"
+HADOOP2_PACKAGE_NAME="hadoop-${HADOOP2_VERSION}.tar.gz"
+HADOOP2_DOWNLOAD_URL="https://archive.apache.org/dist/hadoop/core/hadoop-${HADOOP2_VERSION}/${HADOOP2_PACKAGE_NAME}"
-JDBC_DIVER_PACKAGE_NAME="mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}.tar.gz"
# Must export this variable for Dockerfile
+HADOOP3_PACKAGE_NAME="hadoop-${HADOOP3_VERSION}.tar.gz"
+HADOOP3_DOWNLOAD_URL="https://archive.apache.org/dist/hadoop/core/hadoop-${HADOOP3_VERSION}/${HADOOP3_PACKAGE_NAME}"
+
+HIVE2_PACKAGE_NAME="apache-hive-${HIVE2_VERSION}-bin.tar.gz"
+HIVE2_DOWNLOAD_URL="https://archive.apache.org/dist/hive/hive-${HIVE2_VERSION}/${HIVE2_PACKAGE_NAME}"
+
+HIVE3_PACKAGE_NAME="apache-hive-${HIVE3_VERSION}-bin.tar.gz"
+HIVE3_DOWNLOAD_URL="https://archive.apache.org/dist/hive/hive-${HIVE3_VERSION}/${HIVE3_PACKAGE_NAME}"
+
+JDBC_DIVER_PACKAGE_NAME="mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}.tar.gz"
JDBC_DIVER_DOWNLOAD_URL="https://downloads.mysql.com/archives/get/p/3/file/${JDBC_DIVER_PACKAGE_NAME}"
+ZOOKEEPER_PACKAGE_NAME="zookeeper-${ZOOKEEPER_VERSION}.tar.gz"
+ZOOKEEPER_DOWNLOAD_URL="https://archive.apache.org/dist/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/${ZOOKEEPER_PACKAGE_NAME}"
+
+RANGER_HIVE_PACKAGE_NAME="ranger-${RANGER_VERSION}-hive-plugin.tar.gz"
+RANGER_HIVE_DOWNLOAD_URL=https://github.com/datastrato/apache-ranger/releases/download/release-ranger-${RANGER_VERSION}/ranger-${RANGER_VERSION}-hive-plugin.tar.gz
+
+RANGER_HDFS_PACKAGE_NAME="ranger-${RANGER_VERSION}-hdfs-plugin.tar.gz"
+RANGER_HDFS_DOWNLOAD_URL=https://github.com/datastrato/apache-ranger/releases/download/release-ranger-${RANGER_VERSION}/ranger-${RANGER_VERSION}-hdfs-plugin.tar.gz
+
# Prepare download packages
if [[ ! -d "${hive_dir}/packages" ]]; then
mkdir -p "${hive_dir}/packages"
fi
-if [ ! -f "${hive_dir}/packages/${HADOOP_PACKAGE_NAME}" ]; then
- curl -L -s -o "${hive_dir}/packages/${HADOOP_PACKAGE_NAME}"
${HADOOP_DOWNLOAD_URL}
+if [ ! -f "${hive_dir}/packages/${HADOOP2_PACKAGE_NAME}" ]; then
+ curl -L -s -o "${hive_dir}/packages/${HADOOP2_PACKAGE_NAME}"
${HADOOP2_DOWNLOAD_URL}
+fi
+
+if [ ! -f "${hive_dir}/packages/${HADOOP3_PACKAGE_NAME}" ]; then
+ curl -L -s -o "${hive_dir}/packages/${HADOOP3_PACKAGE_NAME}"
${HADOOP3_DOWNLOAD_URL}
fi
-if [ ! -f "${hive_dir}/packages/${HIVE_PACKAGE_NAME}" ]; then
- curl -L -s -o "${hive_dir}/packages/${HIVE_PACKAGE_NAME}"
${HIVE_DOWNLOAD_URL}
+if [ ! -f "${hive_dir}/packages/${HIVE2_PACKAGE_NAME}" ]; then
+ curl -L -s -o "${hive_dir}/packages/${HIVE2_PACKAGE_NAME}"
${HIVE2_DOWNLOAD_URL}
+fi
+
+if [ ! -f "${hive_dir}/packages/${HIVE3_PACKAGE_NAME}" ]; then
+ curl -L -s -o "${hive_dir}/packages/${HIVE3_PACKAGE_NAME}"
${HIVE3_DOWNLOAD_URL}
fi
if [ ! -f "${hive_dir}/packages/${JDBC_DIVER_PACKAGE_NAME}" ]; then
curl -L -s -o "${hive_dir}/packages/${JDBC_DIVER_PACKAGE_NAME}"
${JDBC_DIVER_DOWNLOAD_URL}
fi
+
+if [ ! -f "${hive_dir}/packages/${ZOOKEEPER_PACKAGE_NAME}" ]; then
+ curl -L -s -o "${hive_dir}/packages/${ZOOKEEPER_PACKAGE_NAME}"
${ZOOKEEPER_DOWNLOAD_URL}
+fi
+
+if [ ! -f "${hive_dir}/packages/${RANGER_HDFS_PACKAGE_NAME}" ]; then
+ curl -L -s -o "${hive_dir}/packages/${RANGER_HDFS_PACKAGE_NAME}"
${RANGER_HDFS_DOWNLOAD_URL}
+fi
+
+if [ ! -f "${hive_dir}/packages/${RANGER_HIVE_PACKAGE_NAME}" ]; then
+ curl -L -s -o "${hive_dir}/packages/${RANGER_HIVE_PACKAGE_NAME}"
${RANGER_HIVE_DOWNLOAD_URL}
+fi
diff --git a/dev/docker/hive/hive-site.xml b/dev/docker/hive/hive-site.xml
index 3b8327b7c..3346d6be6 100644
--- a/dev/docker/hive/hive-site.xml
+++ b/dev/docker/hive/hive-site.xml
@@ -39,7 +39,7 @@
<property>
<name>hive.metastore.warehouse.dir</name>
- <value>hdfs://localhost:9000/user/hive/warehouse</value>
+ <value>hdfs://__REPLACE__HOST_NAME:9000/user/hive/warehouse</value>
<description>location of default database for the warehouse</description>
</property>
</configuration>
diff --git a/dev/docker/hive/start.sh b/dev/docker/hive/start.sh
index f42ce4d5d..8bf1f12b9 100644
--- a/dev/docker/hive/start.sh
+++ b/dev/docker/hive/start.sh
@@ -18,8 +18,96 @@
# under the License.
#
+# Special Hive runtime version environment variable to decide which version of
Hive to install
+if [[ "${HIVE_RUNTIME_VERSION}" == "hive3" ]]; then
+ ln -s ${HIVE3_HOME} ${HIVE_HOME}
+ ln -s ${HADOOP3_HOME} ${HADOOP_HOME}
+else
+ ln -s ${HIVE2_HOME} ${HIVE_HOME}
+ ln -s ${HADOOP2_HOME} ${HADOOP_HOME}
+fi
+
+# Copy Hadoop and Hive configuration file and update hostname
+cp -f ${HADOOP_TMP_CONF_DIR}/* ${HADOOP_CONF_DIR}
+cp -f ${HIVE_TMP_CONF_DIR}/* ${HIVE_CONF_DIR}
+sed -i "s/__REPLACE__HOST_NAME/$(hostname)/g" ${HADOOP_CONF_DIR}/core-site.xml
+sed -i "s/__REPLACE__HOST_NAME/$(hostname)/g" ${HADOOP_CONF_DIR}/hdfs-site.xml
+sed -i "s/__REPLACE__HOST_NAME/$(hostname)/g" ${HIVE_CONF_DIR}/hive-site.xml
+
+# Link mysql-connector-java after deciding where HIVE_HOME symbolic link
points to.
+ln -s
/opt/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}.jar
${HIVE_HOME}/lib
+
+# install Ranger hive plugin
+if [[ -n "${RANGER_HIVE_REPOSITORY_NAME}" && -n "${RANGER_SERVER_URL}" ]]; then
+ # If Hive enable Ranger plugin need requires zookeeper
+ echo "Starting zookeeper..."
+ mv ${ZK_HOME}/conf/zoo_sample.cfg ${ZK_HOME}/conf/zoo.cfg
+ sed -i "s|/tmp/zookeeper|${ZK_HOME}/data|g" ${ZK_HOME}/conf/zoo.cfg
+ ${ZK_HOME}/bin/zkServer.sh start-foreground > /dev/null 2>&1 &
+
+ cd ${RANGER_HIVE_PLUGIN_HOME}
+ sed -i "s|POLICY_MGR_URL=|POLICY_MGR_URL=${RANGER_SERVER_URL}|g"
install.properties
+ sed -i "s|REPOSITORY_NAME=|REPOSITORY_NAME=${RANGER_HIVE_REPOSITORY_NAME}|g"
install.properties
+ echo "XAAUDIT.SUMMARY.ENABLE=true" >> install.properties
+ sed -i
"s|COMPONENT_INSTALL_DIR_NAME=|COMPONENT_INSTALL_DIR_NAME=${HIVE_HOME}|g"
install.properties
+ ${RANGER_HIVE_PLUGIN_HOME}/enable-hive-plugin.sh
+
+ # Reduce poll policy interval in the ranger plugin configuration
+ sed -i
'/<name>ranger.plugin.hive.policy.pollIntervalMs<\/name>/{n;s/<value>30000<\/value>/<value>500<\/value>/}'
${HIVE_HOME}/conf/ranger-hive-security.xml
+
+ # Enable audit log in hive
+ cp ${HIVE_HOME}/conf/hive-log4j2.properties.template
${HIVE_HOME}/conf/hive-log4j2.properties
+ sed -i "s/appenders = console, DRFA/appenders = console, DRFA,
RANGERAUDIT/g" ${HIVE_HOME}/conf/hive-log4j2.properties
+ sed -i "s/loggers = NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus,
Datastore, JPOX, PerfLogger, AmazonAws, ApacheHttp/loggers = NIOServerCnxn,
ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX, PerfLogger, AmazonAws,
ApacheHttp, Ranger/g" ${HIVE_HOME}/conf/hive-log4j2.properties
+
+ # Enable Ranger audit log in hive
+cat <<'EOF' >> ${HIVE_HOME}/conf/hive-log4j2.properties
+
+# RANGERAUDIT appender
+logger.Ranger.name = xaaudit
+logger.Ranger.level = INFO
+logger.Ranger.appenderRefs = RANGERAUDIT
+logger.Ranger.appenderRef.RANGERAUDIT.ref = RANGERAUDIT
+appender.RANGERAUDIT.type=file
+appender.RANGERAUDIT.name=RANGERAUDIT
+appender.RANGERAUDIT.fileName=${sys:hive.log.dir}/ranger-hive-audit.log
+appender.RANGERAUDIT.filePermissions=rwxrwxrwx
+appender.RANGERAUDIT.layout.type=PatternLayout
+appender.RANGERAUDIT.layout.pattern=%d{ISO8601} %q %5p [%t] %c{2} (%F:%M(%L))
- %m%n
+EOF
+fi
+
+# install Ranger hdfs plugin
+if [[ -n "${RANGER_HDFS_REPOSITORY_NAME}" && -n "${RANGER_SERVER_URL}" ]]; then
+ cd ${RANGER_HDFS_PLUGIN_HOME}
+ sed -i "s|POLICY_MGR_URL=|POLICY_MGR_URL=${RANGER_SERVER_URL}|g"
install.properties
+ sed -i "s|REPOSITORY_NAME=|REPOSITORY_NAME=${RANGER_HDFS_REPOSITORY_NAME}|g"
install.properties
+ echo "XAAUDIT.SUMMARY.ENABLE=true" >> install.properties
+ sed -i
"s|COMPONENT_INSTALL_DIR_NAME=|COMPONENT_INSTALL_DIR_NAME=${HADOOP_HOME}|g"
install.properties
+ ${RANGER_HDFS_PLUGIN_HOME}/enable-hdfs-plugin.sh
+
+ # Reduce poll policy interval in the ranger plugin configuration
+ sed -i
'/<name>ranger.plugin.hdfs.policy.pollIntervalMs<\/name>/{n;s/<value>30000<\/value>/<value>500<\/value>/}'
${HADOOP_HOME}/etc/hadoop/ranger-hdfs-security.xml
+
+ # Enable Ranger audit log in hdfs
+cat <<'EOF' >> ${HADOOP_HOME}/etc/hadoop/log4j.properties
+
+# RANGERAUDIT appender
+ranger.logger=INFO,console,RANGERAUDIT
+log4j.logger.xaaudit=${ranger.logger}
+log4j.appender.RANGERAUDIT=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.RANGERAUDIT.File=${hadoop.log.dir}/ranger-hdfs-audit.log
+log4j.appender.RANGERAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.RANGERAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %L
%m%n
+log4j.appender.RANGERAUDIT.DatePattern=.yyyy-MM-dd
+EOF
+fi
+
# start hdfs
echo "Starting HDFS..."
+echo "Format NameNode..."
+${HADOOP_HOME}/bin/hdfs namenode -format -nonInteractive
+
echo "Starting NameNode..."
${HADOOP_HOME}/sbin/hadoop-daemon.sh start namenode
diff --git a/docs/docker-image-details.md b/docs/docker-image-details.md
index bd902707a..db526c3d1 100644
--- a/docs/docker-image-details.md
+++ b/docs/docker-image-details.md
@@ -118,6 +118,20 @@ You can use this kind of image to test the catalog of
Apache Hive.
Changelog
+- gravitino-ci-hive:0.1.13
+ - Support Hive 2.3.9 and HDFS 2.7.3
+ - Docker environment variables:
+ - `HIVE_RUNTIME_VERSION`: `hive2` (default)
+ - Support Hive 3.1.3, HDFS 3.1.0 and Ranger plugin version 2.4.0
+ - Docker environment variables:
+ - `HIVE_RUNTIME_VERSION`: `hive3`
+ - `RANGER_SERVER_URL`: Ranger admin URL
+ - `RANGER_HIVE_REPOSITORY_NAME`: Hive repository name in Ranger admin
+ - `RANGER_HDFS_REPOSITORY_NAME`: HDFS repository name in Ranger admin
+ - If you want to enable Hive Ranger plugin, you need both set the
`RANGER_SERVER_URL` and `RANGER_HIVE_REPOSITORY_NAME` environment variables.
Hive Ranger audit logs are stored in the `/tmp/root/ranger-hive-audit.log`.
+ - If you want to enable HDFS Ranger plugin, you need both set the
`RANGER_SERVER_URL` and `RANGER_HDFS_REPOSITORY_NAME` environment variables.
HDFS Ranger audit logs are stored in the
`/usr/local/hadoop/logs/ranger-hdfs-audit.log`
+ - Example: docker run -e HIVE_RUNTIME_VERSION='hive3' -e
RANGER_SERVER_URL='http://ranger-server:6080' -e
RANGER_HIVE_REPOSITORY_NAME='hiveDev' -e RANGER_HDFS_REPOSITORY_NAME='hdfsDev'
... datastrato/gravitino-ci-hive:0.1.13
+
- gravitino-ci-hive:0.1.12
- Shrink hive Docker image size by 420MB
@@ -251,6 +265,12 @@ Changelog
- Use `ranger-admin` release from `datastrato/apache-ranger:2.4.0` to build
docker image.
- Remove unnecessary hack in `start-ranger-service.sh`.
- Reduce docker image build time from `~1h` to `~5min`.
+ - How to debug Ranger admin service:
+ - Use `docker exec -it <container_id> bash` to enter the docker container.
+ - Add these context `export
JAVA_OPTS=-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001`
into
`/opt/ranger-admin/ews/webapp/WEB-INF/classes/conf/ranger-admin-env-debug.sh`
in the docker container.
+ - Execute `./opt/ranger-admin/stop-ranger-admin.sh` and
`./opt/ranger-admin/start-ranger-admin.sh` to restart Ranger admin.
+ - Clone the `Apache Ranger` project from GiHub and checkout the `2.4.0`
release.
+ - Create a remote debug configuration (`Use model classpath` =
`EmbeddedServer`) in your IDE and connect to the Ranger admin container.
- gravitino-ci-ranger:0.1.0
- Docker image `datastrato/gravitino-ci-ranger:0.1.0`
diff --git a/flink-connector/build.gradle.kts b/flink-connector/build.gradle.kts
index ff28a72c4..456be48ef 100644
--- a/flink-connector/build.gradle.kts
+++ b/flink-connector/build.gradle.kts
@@ -152,7 +152,7 @@ tasks.test {
dependsOn(tasks.jar)
doFirst {
- environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.12")
+ environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.13")
}
val init = project.extra.get("initIntegrationTest") as (Test) -> Unit
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index 7fa802ef6..ad8709126 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -128,6 +128,7 @@ mockito-inline = { group = "org.mockito", name =
"mockito-inline", version.ref =
hive2-metastore = { group = "org.apache.hive", name = "hive-metastore",
version.ref = "hive2"}
hive2-exec = { group = "org.apache.hive", name = "hive-exec", version.ref =
"hive2"}
hive2-common = { group = "org.apache.hive", name = "hive-common", version.ref
= "hive2"}
+hive2-jdbc = { group = "org.apache.hive", name = "hive-jdbc", version.ref =
"hive2"}
hadoop2-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs",
version.ref = "hadoop2" }
hadoop2-common = { group = "org.apache.hadoop", name = "hadoop-common",
version.ref = "hadoop2"}
hadoop2-mapreduce-client-core = { group = "org.apache.hadoop", name =
"hadoop-mapreduce-client-core", version.ref = "hadoop2"}
diff --git
a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java
b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java
index 15336298d..4b3b5fdc4 100644
---
a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java
+++
b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java
@@ -98,6 +98,17 @@ public class ContainerSuite implements Closeable {
}
public void startHiveContainer() {
+ startHiveContainer(
+ ImmutableMap.<String, String>builder().put("HADOOP_USER_NAME",
"datastrato").build());
+ }
+
+ /**
+ * To start the Hive container, you can to specify environment variables:
HIVE_RUNTIME_VERSION:
+ * Hive version, currently support `hive2`(default) and `hive3`
DOCKER_ENV_RANGER_SERVER_URL:
+ * Ranger server URL DOCKER_ENV_RANGER_HIVE_REPOSITORY_NAME: Ranger Hive
repository name
+ * DOCKER_ENV_RANGER_HDFS_REPOSITORY_NAME: Ranger HDFS repository name
+ */
+ public void startHiveContainer(Map<String, String> envVars) {
if (hiveContainer == null) {
synchronized (ContainerSuite.class) {
if (hiveContainer == null) {
@@ -105,10 +116,7 @@ public class ContainerSuite implements Closeable {
HiveContainer.Builder hiveBuilder =
HiveContainer.builder()
.withHostName("gravitino-ci-hive")
- .withEnvVars(
- ImmutableMap.<String, String>builder()
- .put("HADOOP_USER_NAME", "datastrato")
- .build())
+ .withEnvVars(envVars)
.withNetwork(network);
HiveContainer container = closer.register(hiveBuilder.build());
container.start();
diff --git
a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/HiveContainer.java
b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/HiveContainer.java
index 08e8dfe07..6fbcfb16e 100644
---
a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/HiveContainer.java
+++
b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/HiveContainer.java
@@ -41,10 +41,18 @@ public class HiveContainer extends BaseContainer {
public static final String DEFAULT_IMAGE =
System.getenv("GRAVITINO_CI_HIVE_DOCKER_IMAGE");
public static final String KERBEROS_IMAGE =
System.getenv("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE");
+
public static final String HOST_NAME = "gravitino-ci-hive";
+ public static final String HADOOP_USER_NAME = "HADOOP_USER_NAME";
+ // Specify the Hive version to start the Hive container, currently support
`hive2`(default) and
+ // `hive3`
+ public static final String HIVE_RUNTIME_VERSION = "HIVE_RUNTIME_VERSION";
+ public static final String HIVE2 = "hive2"; // The Hive container default
version
+ public static final String HIVE3 = "hive3";
private static final int MYSQL_PORT = 3306;
public static final int HDFS_DEFAULTFS_PORT = 9000;
public static final int HIVE_METASTORE_PORT = 9083;
+ public static final int HIVE_SERVICE_PORT = 10000;
private static final String HIVE_LOG_PATH = "/tmp/root/";
private static final String HDFS_LOG_PATH = "/usr/local/hadoop/logs/";
@@ -204,19 +212,22 @@ public class HiveContainer extends BaseContainer {
private Builder() {
this.image = DEFAULT_IMAGE;
this.hostName = HOST_NAME;
- this.exposePorts = ImmutableSet.of(MYSQL_PORT, HDFS_DEFAULTFS_PORT,
HIVE_METASTORE_PORT);
+ this.exposePorts =
+ ImmutableSet.of(MYSQL_PORT, HDFS_DEFAULTFS_PORT,
HIVE_METASTORE_PORT, HIVE_SERVICE_PORT);
+ }
+
+ private String generateImageName() {
+ String hiveDockerImageName = image;
+ if (kerberosEnabled) {
+ hiveDockerImageName = KERBEROS_IMAGE;
+ }
+ return hiveDockerImageName;
}
@Override
public HiveContainer build() {
return new HiveContainer(
- kerberosEnabled ? KERBEROS_IMAGE : image,
- kerberosEnabled ? "kerberos-" + hostName : hostName,
- exposePorts,
- extraHosts,
- filesToMount,
- envVars,
- network);
+ generateImageName(), hostName, exposePorts, extraHosts,
filesToMount, envVars, network);
}
}
}
diff --git
a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/RangerContainer.java
b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/RangerContainer.java
index e703fcdfc..3892c7feb 100644
---
a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/RangerContainer.java
+++
b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/RangerContainer.java
@@ -37,17 +37,22 @@ public class RangerContainer extends BaseContainer {
public static final String DEFAULT_IMAGE =
System.getenv("GRAVITINO_CI_RANGER_DOCKER_IMAGE");
public static final String HOST_NAME = "gravitino-ci-ranger";
- public static final int RANGER_PORT = 6080;
+ public static final int RANGER_SERVER_PORT = 6080;
public RangerClient rangerClient;
private String rangerUrl;
private static final String username = "admin";
// Apache Ranger Password should be minimum 8 characters with min one
alphabet and one numeric.
private static final String password = "rangerR0cks!";
- /* for kerberos authentication:
- authType = "kerberos"
- username = principal
- password = path of the keytab file */
+
+ /**
+ * for kerberos authentication: authType = "kerberos" username = principal
password = path of the
+ * keytab file
+ */
private static final String authType = "simple";
+ // Ranger hive/hdfs Docker startup environment variable name
+ public static final String DOCKER_ENV_RANGER_SERVER_URL =
"RANGER_SERVER_URL";
+ public static final String DOCKER_ENV_RANGER_HDFS_REPOSITORY_NAME =
"RANGER_HDFS_REPOSITORY_NAME";
+ public static final String DOCKER_ENV_RANGER_HIVE_REPOSITORY_NAME =
"RANGER_HIVE_REPOSITORY_NAME";
public static Builder builder() {
return new Builder();
@@ -74,7 +79,7 @@ public class RangerContainer extends BaseContainer {
public void start() {
super.start();
- rangerUrl = String.format("http://localhost:%s", this.getMappedPort(6080));
+ rangerUrl = String.format("http://localhost:%s",
this.getMappedPort(RANGER_SERVER_PORT));
rangerClient = new RangerClient(rangerUrl, authType, username, password,
null);
Preconditions.check("Ranger container startup failed!",
checkContainerStatus(10));
@@ -118,7 +123,7 @@ public class RangerContainer extends BaseContainer {
private Builder() {
this.image = DEFAULT_IMAGE;
this.hostName = HOST_NAME;
- this.exposePorts = ImmutableSet.of(RANGER_PORT);
+ this.exposePorts = ImmutableSet.of(RANGER_SERVER_PORT);
this.envVars =
ImmutableMap.<String, String>builder().put("RANGER_PASSWORD",
password).build();
}
diff --git a/integration-test/build.gradle.kts
b/integration-test/build.gradle.kts
index c64494b21..630f16c68 100644
--- a/integration-test/build.gradle.kts
+++ b/integration-test/build.gradle.kts
@@ -76,6 +76,9 @@ dependencies {
exclude("org.pentaho")
exclude("org.slf4j")
}
+ testImplementation(libs.hive2.jdbc) {
+ exclude("org.slf4j")
+ }
testImplementation(libs.hive2.metastore) {
exclude("co.cask.tephra")
exclude("com.github.joshelser")
@@ -157,7 +160,7 @@ tasks.test {
doFirst {
// Gravitino CI Docker image
- environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.12")
+ environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.13")
environment("GRAVITINO_CI_TRINO_DOCKER_IMAGE",
"datastrato/gravitino-ci-trino:0.1.5")
environment("GRAVITINO_CI_KAFKA_DOCKER_IMAGE", "apache/kafka:3.7.0")
environment("GRAVITINO_CI_DORIS_DOCKER_IMAGE",
"datastrato/gravitino-ci-doris:0.1.5")
@@ -176,7 +179,7 @@ tasks.test {
// Check whether this module has already built
val trinoConnectorBuildDir = project(":trino-connector").buildDir
if (trinoConnectorBuildDir.exists()) {
- // Check the version gravitino related jars in build equal to the
current project version
+ // Check the version Gravitino related jars in build equal to the
current project version
val invalidGravitinoJars =
trinoConnectorBuildDir.resolve("libs").listFiles { _, name ->
name.startsWith("gravitino") }?.filter {
val name = it.name
!name.endsWith(version + ".jar")
diff --git
a/integration-test/src/test/java/org/apache/gravitino/integration/test/authorization/ranger/RangerDefines.java
b/integration-test/src/test/java/org/apache/gravitino/integration/test/authorization/ranger/RangerDefines.java
new file mode 100644
index 000000000..9245f0a8c
--- /dev/null
+++
b/integration-test/src/test/java/org/apache/gravitino/integration/test/authorization/ranger/RangerDefines.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.gravitino.integration.test.authorization.ranger;
+
+import org.apache.ranger.plugin.util.SearchFilter;
+
+public class RangerDefines {
+ // In the Ranger 2.4.0
+ //
security-admin/src/main/java/org/apache/ranger/service/RangerServiceDefService.java:L43
+ public static final String IMPLICIT_CONDITION_EXPRESSION_NAME =
"_expression";
+
+ // In the Ranger 2.4.0
+ //
security-admin/src/main/java/org/apache/ranger/common/RangerSearchUtil.java:L159
+ public static final String SEARCH_FILTER_SERVICE_NAME =
SearchFilter.SERVICE_NAME;
+ public static final String RESOURCE_DATABASE = "database"; // Hive resource
database name
+ public static final String RESOURCE_TABLE = "table"; // Hive resource table
name
+ public static final String RESOURCE_COLUMN = "column"; // Hive resource
column name
+ public static final String RESOURCE_PATH = "path"; // HDFS resource path name
+ public static final String SEARCH_FILTER_DATABASE =
+ SearchFilter.RESOURCE_PREFIX + RESOURCE_DATABASE;
+ public static final String SEARCH_FILTER_TABLE =
SearchFilter.RESOURCE_PREFIX + RESOURCE_TABLE;
+ public static final String SEARCH_FILTER_COLUMN =
SearchFilter.RESOURCE_PREFIX + RESOURCE_COLUMN;
+ public static final String SEARCH_FILTER_PATH = SearchFilter.RESOURCE_PREFIX
+ RESOURCE_PATH;
+ public static final String SERVICE_TYPE_HDFS = "hdfs"; // HDFS service type
+ public static final String SERVICE_TYPE_HIVE = "hive"; // Hive service type
+ public static final String OWNER_USER = "{OWNER}"; // {OWNER}: resource
owner user variable
+ public static final String CURRENT_USER = "{USER}"; // {USER}: current user
variable
+ public static final String PUBLIC_GROUP = "public"; // public group
+ public static final String ACCESS_TYPE_HDFS_READ = "read"; // Read access
type in the HDFS
+ public static final String ACCESS_TYPE_HDFS_WRITE = "write"; // Write access
type in the HDFS
+ public static final String ACCESS_TYPE_HDFS_EXECUTE =
+ "execute"; // execute access type in the HDFS
+ public static final String ACCESS_TYPE_HIVE_ALL = "all"; // All access type
in the Hive
+ public static final String ACCESS_TYPE_HIVE_SELECT = "select"; // Select
access type in the Hive
+}
diff --git
a/integration-test/src/test/java/org/apache/gravitino/integration/test/authorization/ranger/RangerHiveIT.java
b/integration-test/src/test/java/org/apache/gravitino/integration/test/authorization/ranger/RangerHiveIT.java
new file mode 100644
index 000000000..19e84f9d8
--- /dev/null
+++
b/integration-test/src/test/java/org/apache/gravitino/integration/test/authorization/ranger/RangerHiveIT.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.gravitino.integration.test.authorization.ranger;
+
+import com.google.common.collect.ImmutableMap;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.gravitino.integration.test.container.ContainerSuite;
+import org.apache.gravitino.integration.test.container.HiveContainer;
+import org.apache.gravitino.integration.test.container.RangerContainer;
+import org.apache.ranger.plugin.model.RangerPolicy;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+
+@Tag("gravitino-docker-test")
+public class RangerHiveIT extends RangerIT {
+ private static final ContainerSuite containerSuite =
ContainerSuite.getInstance();
+ private static Connection adminConnection;
+ private static Connection anonymousConnection;
+ private static final String adminUser = "gravitino";
+ private static final String anonymousUser = "anonymous";
+
+ @BeforeAll
+ public static void setup() {
+ RangerIT.setup();
+
+ containerSuite.startHiveContainer(
+ new HashMap<>(
+ ImmutableMap.of(
+ HiveContainer.HIVE_RUNTIME_VERSION,
+ HiveContainer.HIVE3,
+ RangerContainer.DOCKER_ENV_RANGER_SERVER_URL,
+ String.format(
+ "http://%s:%d",
+
containerSuite.getRangerContainer().getContainerIpAddress(),
+ RangerContainer.RANGER_SERVER_PORT),
+ RangerContainer.DOCKER_ENV_RANGER_HIVE_REPOSITORY_NAME,
+ RangerIT.RANGER_HIVE_REPO_NAME,
+ RangerContainer.DOCKER_ENV_RANGER_HDFS_REPOSITORY_NAME,
+ RangerIT.RANGER_HDFS_REPO_NAME,
+ HiveContainer.HADOOP_USER_NAME,
+ adminUser)));
+
+
createRangerHdfsRepository(containerSuite.getHiveContainer().getContainerIpAddress(),
true);
+
createRangerHiveRepository(containerSuite.getHiveContainer().getContainerIpAddress(),
true);
+ allowAnyoneAccessHDFS();
+ allowAnyoneAccessInformationSchema();
+
+ // Create hive connection
+ String url =
+ String.format(
+ "jdbc:hive2://%s:%d/default",
+ containerSuite.getHiveContainer().getContainerIpAddress(),
+ HiveContainer.HIVE_SERVICE_PORT);
+ try {
+ Class.forName("org.apache.hive.jdbc.HiveDriver");
+ adminConnection = DriverManager.getConnection(url, adminUser, "");
+ anonymousConnection = DriverManager.getConnection(url, anonymousUser,
"");
+ } catch (ClassNotFoundException | SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /** Currently we only test Ranger Hive, So wo Allow anyone to visit HDFS */
+ static void allowAnyoneAccessHDFS() {
+ Map<String, RangerPolicy.RangerPolicyResource> policyResourceMap =
+ ImmutableMap.of(RangerDefines.RESOURCE_PATH, new
RangerPolicy.RangerPolicyResource("/*"));
+ RangerPolicy.RangerPolicyItem policyItem = new
RangerPolicy.RangerPolicyItem();
+ policyItem.setUsers(Arrays.asList(RangerDefines.CURRENT_USER));
+ policyItem.setAccesses(
+ Arrays.asList(
+ new
RangerPolicy.RangerPolicyItemAccess(RangerDefines.ACCESS_TYPE_HDFS_READ),
+ new
RangerPolicy.RangerPolicyItemAccess(RangerDefines.ACCESS_TYPE_HDFS_WRITE),
+ new
RangerPolicy.RangerPolicyItemAccess(RangerDefines.ACCESS_TYPE_HDFS_EXECUTE)));
+ updateOrCreateRangerPolicy(
+ RangerDefines.SERVICE_TYPE_HDFS,
+ RANGER_HDFS_REPO_NAME,
+ "allowAnyoneAccessHDFS",
+ policyResourceMap,
+ Collections.singletonList(policyItem));
+ }
+
+ /**
+ * Hive must have this policy Allow anyone can access information schema to
show `database`,
+ * `tables` and `columns`
+ */
+ static void allowAnyoneAccessInformationSchema() {
+ Map<String, RangerPolicy.RangerPolicyResource> policyResourceMap =
+ ImmutableMap.of(
+ RangerDefines.RESOURCE_DATABASE,
+ new RangerPolicy.RangerPolicyResource("information_schema"),
+ RangerDefines.RESOURCE_TABLE,
+ new RangerPolicy.RangerPolicyResource("*"),
+ RangerDefines.RESOURCE_COLUMN,
+ new RangerPolicy.RangerPolicyResource("*"));
+ RangerPolicy.RangerPolicyItem policyItem = new
RangerPolicy.RangerPolicyItem();
+ policyItem.setGroups(Arrays.asList(RangerDefines.PUBLIC_GROUP));
+ policyItem.setAccesses(
+ Arrays.asList(
+ new
RangerPolicy.RangerPolicyItemAccess(RangerDefines.ACCESS_TYPE_HIVE_SELECT)));
+ updateOrCreateRangerPolicy(
+ RangerDefines.SERVICE_TYPE_HIVE,
+ RANGER_HIVE_REPO_NAME,
+ "allowAnyoneAccessInformationSchema",
+ policyResourceMap,
+ Collections.singletonList(policyItem));
+ }
+
+ @Test
+ public void testCreateDatabase() throws Exception {
+ String dbName = "db1";
+
+ // Only allow admin user to operation database `db1`
+ // Other users can't see the database `db1`
+ Map<String, RangerPolicy.RangerPolicyResource> policyResourceMap =
+ ImmutableMap.of(
+ RangerDefines.RESOURCE_DATABASE, new
RangerPolicy.RangerPolicyResource(dbName));
+ RangerPolicy.RangerPolicyItem policyItem = new
RangerPolicy.RangerPolicyItem();
+ policyItem.setUsers(Arrays.asList(adminUser));
+ policyItem.setAccesses(
+ Arrays.asList(new
RangerPolicy.RangerPolicyItemAccess(RangerDefines.ACCESS_TYPE_HIVE_ALL)));
+ updateOrCreateRangerPolicy(
+ RangerDefines.SERVICE_TYPE_HIVE,
+ RANGER_HIVE_REPO_NAME,
+ "testAllowShowDatabase",
+ policyResourceMap,
+ Collections.singletonList(policyItem));
+
+ Statement adminStmt = adminConnection.createStatement();
+ adminStmt.execute(String.format("CREATE DATABASE %s", dbName));
+ String sql = "show databases";
+ ResultSet adminRS = adminStmt.executeQuery(sql);
+ List<String> adminDbs = new ArrayList<>();
+ while (adminRS.next()) {
+ adminDbs.add(adminRS.getString(1));
+ }
+ Assertions.assertTrue(adminDbs.contains(dbName));
+
+ // Anonymous user can't see the database `db1`
+ Statement anonymousStmt = anonymousConnection.createStatement();
+ ResultSet anonymousRS = anonymousStmt.executeQuery(sql);
+ List<String> anonymousDbs = new ArrayList<>();
+ while (anonymousRS.next()) {
+ anonymousDbs.add(anonymousRS.getString(1));
+ }
+ Assertions.assertFalse(anonymousDbs.contains(dbName));
+
+ // Allow anonymous user to see the database `db1`
+ policyItem.setUsers(Arrays.asList(adminUser, anonymousUser));
+ policyItem.setAccesses(
+ Arrays.asList(new
RangerPolicy.RangerPolicyItemAccess(RangerDefines.ACCESS_TYPE_HIVE_ALL)));
+ updateOrCreateRangerPolicy(
+ RangerDefines.SERVICE_TYPE_HIVE,
+ RANGER_HIVE_REPO_NAME,
+ "testAllowShowDatabase",
+ policyResourceMap,
+ Collections.singletonList(policyItem));
+ anonymousRS = anonymousStmt.executeQuery(sql);
+ anonymousDbs.clear();
+ while (anonymousRS.next()) {
+ anonymousDbs.add(anonymousRS.getString(1));
+ }
+ Assertions.assertTrue(anonymousDbs.contains(dbName));
+ }
+}
diff --git
a/integration-test/src/test/java/org/apache/gravitino/integration/test/authorization/ranger/RangerIT.java
b/integration-test/src/test/java/org/apache/gravitino/integration/test/authorization/ranger/RangerIT.java
index 0548ceb48..8b560ba2d 100644
---
a/integration-test/src/test/java/org/apache/gravitino/integration/test/authorization/ranger/RangerIT.java
+++
b/integration-test/src/test/java/org/apache/gravitino/integration/test/authorization/ranger/RangerIT.java
@@ -18,29 +18,32 @@
*/
package org.apache.gravitino.integration.test.authorization.ranger;
-import static org.apache.ranger.plugin.util.SearchFilter.SERVICE_NAME;
-import static org.apache.ranger.plugin.util.SearchFilter.SERVICE_TYPE;
-
import com.google.common.collect.ImmutableMap;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.stream.Collectors;
import org.apache.gravitino.integration.test.container.ContainerSuite;
+import org.apache.gravitino.integration.test.container.HiveContainer;
+import org.apache.gravitino.integration.test.container.TrinoContainer;
import org.apache.ranger.RangerClient;
import org.apache.ranger.RangerServiceException;
+import org.apache.ranger.plugin.model.RangerPolicy;
import org.apache.ranger.plugin.model.RangerService;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Tag;
-import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
-@Tag("gravitino-docker-test")
public class RangerIT {
- private static final String trinoServiceName = "trinodev";
- private static final String trinoType = "trino";
- private static final String hiveServiceName = "hivedev";
- private static final String hiveType = "hive";
+ private static final Logger LOG = LoggerFactory.getLogger(RangerIT.class);
+ protected static final String RANGER_TRINO_REPO_NAME = "trinoDev";
+ private static final String RANGER_TRINO_TYPE = "trino";
+ protected static final String RANGER_HIVE_REPO_NAME = "hiveDev";
+ private static final String RANGER_HIVE_TYPE = "hive";
+ protected static final String RANGER_HDFS_REPO_NAME = "hdfsDev";
+ private static final String RANGER_HDFS_TYPE = "hdfs";
private static RangerClient rangerClient;
private static final ContainerSuite containerSuite =
ContainerSuite.getInstance();
@@ -48,30 +51,36 @@ public class RangerIT {
@BeforeAll
public static void setup() {
containerSuite.startRangerContainer();
-
rangerClient = containerSuite.getRangerContainer().rangerClient;
}
@AfterAll
- public static void cleanup() throws RangerServiceException {
- if (rangerClient != null) {
- rangerClient.deleteService(trinoServiceName);
- rangerClient.deleteService(hiveServiceName);
+ public static void cleanup() {
+ try {
+ if (rangerClient != null) {
+ if (rangerClient.getService(RANGER_TRINO_REPO_NAME) != null) {
+ rangerClient.deleteService(RANGER_TRINO_REPO_NAME);
+ }
+ if (rangerClient.getService(RANGER_HIVE_REPO_NAME) != null) {
+ rangerClient.deleteService(RANGER_HIVE_REPO_NAME);
+ }
+ }
+ } catch (RangerServiceException e) {
+ // ignore
}
}
- @Test
- public void testCreateTrinoService() throws RangerServiceException {
+ public void createRangerTrinoRepository(String tirnoIp) {
String usernameKey = "username";
String usernameVal = "admin";
String jdbcKey = "jdbc.driverClassName";
String jdbcVal = "io.trino.jdbc.TrinoDriver";
String jdbcUrlKey = "jdbc.url";
- String jdbcUrlVal = "http://localhost:8080";
+ String jdbcUrlVal = String.format("http:hive2://%s:%d", tirnoIp,
TrinoContainer.TRINO_PORT);
RangerService service = new RangerService();
- service.setType(trinoType);
- service.setName(trinoServiceName);
+ service.setType(RANGER_TRINO_TYPE);
+ service.setName(RANGER_TRINO_REPO_NAME);
service.setConfigs(
ImmutableMap.<String, String>builder()
.put(usernameKey, usernameVal)
@@ -79,22 +88,32 @@ public class RangerIT {
.put(jdbcUrlKey, jdbcUrlVal)
.build());
- RangerService createdService = rangerClient.createService(service);
- Assertions.assertNotNull(createdService);
-
- Map<String, String> filter = new HashMap<>();
- filter.put(SERVICE_TYPE, trinoType);
- filter.put(SERVICE_NAME, trinoServiceName);
- List<RangerService> services = rangerClient.findServices(filter);
- Assertions.assertEquals(services.get(0).getName(), trinoServiceName);
- Assertions.assertEquals(services.get(0).getType(), trinoType);
- Assertions.assertEquals(services.get(0).getConfigs().get(usernameKey),
usernameVal);
- Assertions.assertEquals(services.get(0).getConfigs().get(jdbcKey),
jdbcVal);
- Assertions.assertEquals(services.get(0).getConfigs().get(jdbcUrlKey),
jdbcUrlVal);
+ try {
+ RangerService createdService = rangerClient.createService(service);
+ Assertions.assertNotNull(createdService);
+
+ Map<String, String> filter =
+ ImmutableMap.of(RangerDefines.SEARCH_FILTER_SERVICE_NAME,
RANGER_TRINO_REPO_NAME);
+ List<RangerService> services = rangerClient.findServices(filter);
+ Assertions.assertEquals(RANGER_TRINO_TYPE, services.get(0).getType());
+ Assertions.assertEquals(RANGER_TRINO_REPO_NAME,
services.get(0).getName());
+ Assertions.assertEquals(usernameVal,
services.get(0).getConfigs().get(usernameKey));
+ Assertions.assertEquals(jdbcVal,
services.get(0).getConfigs().get(jdbcKey));
+ Assertions.assertEquals(jdbcUrlVal,
services.get(0).getConfigs().get(jdbcUrlKey));
+ } catch (RangerServiceException e) {
+ throw new RuntimeException(e);
+ }
}
- @Test
- public void createHiveService() throws RangerServiceException {
+ public static void createRangerHiveRepository(String hiveIp, boolean
cleanAllPolicy) {
+ try {
+ if (null != rangerClient.getService(RANGER_HIVE_REPO_NAME)) {
+ return;
+ }
+ } catch (RangerServiceException e) {
+ LOG.error("Error while fetching service: {}", e.getMessage());
+ }
+
String usernameKey = "username";
String usernameVal = "admin";
String passwordKey = "password";
@@ -102,11 +121,12 @@ public class RangerIT {
String jdbcKey = "jdbc.driverClassName";
String jdbcVal = "org.apache.hive.jdbc.HiveDriver";
String jdbcUrlKey = "jdbc.url";
- String jdbcUrlVal = "jdbc:hive2://172.17.0.2:10000";
+ String jdbcUrlVal =
+ String.format("jdbc:hive2://%s:%d", hiveIp,
HiveContainer.HIVE_SERVICE_PORT);
RangerService service = new RangerService();
- service.setType(hiveType);
- service.setName(hiveServiceName);
+ service.setType(RANGER_HIVE_TYPE);
+ service.setName(RANGER_HIVE_REPO_NAME);
service.setConfigs(
ImmutableMap.<String, String>builder()
.put(usernameKey, usernameVal)
@@ -115,17 +135,180 @@ public class RangerIT {
.put(jdbcUrlKey, jdbcUrlVal)
.build());
- RangerService createdService = rangerClient.createService(service);
- Assertions.assertNotNull(createdService);
-
- Map<String, String> filter = new HashMap<>();
- filter.put(SERVICE_TYPE, hiveType);
- filter.put(SERVICE_NAME, hiveServiceName);
- List<RangerService> services = rangerClient.findServices(filter);
- Assertions.assertEquals(services.get(0).getName(), hiveServiceName);
- Assertions.assertEquals(services.get(0).getType(), hiveType);
- Assertions.assertEquals(services.get(0).getConfigs().get(usernameKey),
usernameVal);
- Assertions.assertEquals(services.get(0).getConfigs().get(jdbcKey),
jdbcVal);
- Assertions.assertEquals(services.get(0).getConfigs().get(jdbcUrlKey),
jdbcUrlVal);
+ try {
+ RangerService createdService = rangerClient.createService(service);
+ Assertions.assertNotNull(createdService);
+
+ Map<String, String> filter =
+ ImmutableMap.of(RangerDefines.SEARCH_FILTER_SERVICE_NAME,
RANGER_HIVE_REPO_NAME);
+ List<RangerService> services = rangerClient.findServices(filter);
+ Assertions.assertEquals(RANGER_HIVE_TYPE, services.get(0).getType());
+ Assertions.assertEquals(RANGER_HIVE_REPO_NAME,
services.get(0).getName());
+ Assertions.assertEquals(usernameVal,
services.get(0).getConfigs().get(usernameKey));
+ Assertions.assertEquals(jdbcVal,
services.get(0).getConfigs().get(jdbcKey));
+ Assertions.assertEquals(jdbcUrlVal,
services.get(0).getConfigs().get(jdbcUrlKey));
+
+ if (cleanAllPolicy) {
+ cleanAllPolicy(RANGER_HIVE_REPO_NAME);
+ }
+ } catch (RangerServiceException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static void createRangerHdfsRepository(String hdfsIp, boolean
cleanAllPolicy) {
+ try {
+ if (null != rangerClient.getService(RANGER_HDFS_REPO_NAME)) {
+ return;
+ }
+ } catch (RangerServiceException e) {
+ LOG.error("Error while fetching service: {}", e.getMessage());
+ }
+
+ String usernameKey = "username";
+ String usernameVal = "admin";
+ String passwordKey = "password";
+ String passwordVal = "admin";
+ String authenticationKey = "hadoop.security.authentication";
+ String authenticationVal = "simple";
+ String protectionKey = "hadoop.rpc.protection";
+ String protectionVal = "authentication";
+ String authorizationKey = "hadoop.security.authorization";
+ String authorizationVal = "false";
+ String fsDefaultNameKey = "fs.default.name";
+ String fsDefaultNameVal =
+ String.format("hdfs://%s:%d", hdfsIp,
HiveContainer.HDFS_DEFAULTFS_PORT);
+
+ RangerService service = new RangerService();
+ service.setType(RANGER_HDFS_TYPE);
+ service.setName(RANGER_HDFS_REPO_NAME);
+ service.setConfigs(
+ ImmutableMap.<String, String>builder()
+ .put(usernameKey, usernameVal)
+ .put(passwordKey, passwordVal)
+ .put(authenticationKey, authenticationVal)
+ .put(protectionKey, protectionVal)
+ .put(authorizationKey, authorizationVal)
+ .put(fsDefaultNameKey, fsDefaultNameVal)
+ .build());
+
+ try {
+ RangerService createdService = rangerClient.createService(service);
+ Assertions.assertNotNull(createdService);
+
+ Map<String, String> filter =
+ ImmutableMap.of(RangerDefines.SEARCH_FILTER_SERVICE_NAME,
RANGER_HDFS_REPO_NAME);
+ List<RangerService> services = rangerClient.findServices(filter);
+ Assertions.assertEquals(RANGER_HDFS_TYPE, services.get(0).getType());
+ Assertions.assertEquals(RANGER_HDFS_REPO_NAME,
services.get(0).getName());
+ Assertions.assertEquals(usernameVal,
services.get(0).getConfigs().get(usernameKey));
+ Assertions.assertEquals(
+ authenticationVal,
services.get(0).getConfigs().get(authenticationKey));
+ Assertions.assertEquals(protectionVal,
services.get(0).getConfigs().get(protectionKey));
+ Assertions.assertEquals(authorizationVal,
services.get(0).getConfigs().get(authorizationKey));
+ Assertions.assertEquals(fsDefaultNameVal,
services.get(0).getConfigs().get(fsDefaultNameKey));
+
+ if (cleanAllPolicy) {
+ cleanAllPolicy(RANGER_HDFS_REPO_NAME);
+ }
+ } catch (RangerServiceException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ protected static String updateOrCreateRangerPolicy(
+ String type,
+ String serviceName,
+ String policyName,
+ Map<String, RangerPolicy.RangerPolicyResource> policyResourceMap,
+ List<RangerPolicy.RangerPolicyItem> policyItems) {
+ String retPolicyName = policyName;
+
+ Map<String, String> resourceFilter = new HashMap<>(); // use to match the
precise policy
+ Map<String, String> policyFilter = new HashMap<>();
+ policyFilter.put(RangerDefines.SEARCH_FILTER_SERVICE_NAME, serviceName);
+ final int[] index = {0};
+ policyResourceMap.forEach(
+ (k, v) -> {
+ if (type.equals(RANGER_HIVE_TYPE)) {
+ if (index[0] == 0) {
+ policyFilter.put(RangerDefines.SEARCH_FILTER_DATABASE,
v.getValues().get(0));
+ resourceFilter.put(RangerDefines.RESOURCE_DATABASE,
v.getValues().get(0));
+ } else if (index[0] == 1) {
+ policyFilter.put(RangerDefines.SEARCH_FILTER_TABLE,
v.getValues().get(0));
+ resourceFilter.put(RangerDefines.RESOURCE_TABLE,
v.getValues().get(0));
+ } else if (index[0] == 2) {
+ policyFilter.put(RangerDefines.SEARCH_FILTER_COLUMN,
v.getValues().get(0));
+ resourceFilter.put(RangerDefines.RESOURCE_TABLE,
v.getValues().get(0));
+ }
+ index[0]++;
+ } else if (type.equals(RANGER_HDFS_TYPE)) {
+ policyFilter.put(RangerDefines.SEARCH_FILTER_PATH,
v.getValues().get(0));
+ resourceFilter.put(RangerDefines.RESOURCE_PATH,
v.getValues().get(0));
+ }
+ });
+ try {
+ List<RangerPolicy> policies = rangerClient.findPolicies(policyFilter);
+ if (!policies.isEmpty()) {
+ // Because Ranger user the wildcard filter, Ranger will return the
policy meets
+ // the wildcard(*,?) conditions, just like `*.*.*` policy will match
`db1.table1.column1`
+ // So we need to manual precise filter the policies.
+ policies =
+ policies.stream()
+ .filter(
+ policy ->
+ policy.getResources().entrySet().stream()
+ .allMatch(
+ entry ->
+ resourceFilter.containsKey(entry.getKey())
+ && entry.getValue().getValues().size()
== 1
+ && entry
+ .getValue()
+ .getValues()
+
.contains(resourceFilter.get(entry.getKey()))))
+ .collect(Collectors.toList());
+ }
+
+ Assertions.assertTrue(policies.size() <= 1);
+ if (!policies.isEmpty()) {
+ RangerPolicy policy = policies.get(0);
+ policy.getPolicyItems().addAll(policyItems);
+ rangerClient.updatePolicy(policy.getId(), policy);
+ retPolicyName = policy.getName();
+ } else {
+ RangerPolicy policy = new RangerPolicy();
+ policy.setServiceType(type);
+ policy.setService(serviceName);
+ policy.setName(policyName);
+ policy.setResources(policyResourceMap);
+ policy.setPolicyItems(policyItems);
+ rangerClient.createPolicy(policy);
+ }
+ } catch (RangerServiceException e) {
+ throw new RuntimeException(e);
+ }
+
+ try {
+ Thread.sleep(
+ 1000); // Sleep for a while to wait for the Hive/HDFS Ranger plugin
to be updated policy.
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+
+ return retPolicyName;
+ }
+
+ /** Clean all policy in the Ranger */
+ protected static void cleanAllPolicy(String serviceName) {
+ try {
+ List<RangerPolicy> policies =
+ rangerClient.findPolicies(
+ ImmutableMap.of(RangerDefines.SEARCH_FILTER_SERVICE_NAME,
serviceName));
+ for (RangerPolicy policy : policies) {
+ rangerClient.deletePolicy(policy.getId());
+ }
+ } catch (RangerServiceException e) {
+ throw new RuntimeException(e);
+ }
}
}
diff --git a/spark-connector/spark-common/build.gradle.kts
b/spark-connector/spark-common/build.gradle.kts
index b97e72458..65ba1f641 100644
--- a/spark-connector/spark-common/build.gradle.kts
+++ b/spark-connector/spark-common/build.gradle.kts
@@ -143,7 +143,7 @@ tasks.test {
dependsOn(tasks.jar)
doFirst {
- environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.12")
+ environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.13")
}
val init = project.extra.get("initIntegrationTest") as (Test) -> Unit
diff --git a/spark-connector/v3.3/spark/build.gradle.kts
b/spark-connector/v3.3/spark/build.gradle.kts
index 98c05f359..e9e25d34e 100644
--- a/spark-connector/v3.3/spark/build.gradle.kts
+++ b/spark-connector/v3.3/spark/build.gradle.kts
@@ -152,7 +152,7 @@ tasks.test {
dependsOn(":catalogs:catalog-lakehouse-iceberg:jar")
doFirst {
- environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.12")
+ environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.13")
}
val init = project.extra.get("initIntegrationTest") as (Test) -> Unit
diff --git a/spark-connector/v3.4/spark/build.gradle.kts
b/spark-connector/v3.4/spark/build.gradle.kts
index 449e3c76b..fac5b44c7 100644
--- a/spark-connector/v3.4/spark/build.gradle.kts
+++ b/spark-connector/v3.4/spark/build.gradle.kts
@@ -152,7 +152,7 @@ tasks.test {
dependsOn(":catalogs:catalog-lakehouse-iceberg:jar")
doFirst {
- environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.12")
+ environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.13")
}
val init = project.extra.get("initIntegrationTest") as (Test) -> Unit
diff --git a/spark-connector/v3.5/spark/build.gradle.kts
b/spark-connector/v3.5/spark/build.gradle.kts
index 125ff5fa6..579fe5b46 100644
--- a/spark-connector/v3.5/spark/build.gradle.kts
+++ b/spark-connector/v3.5/spark/build.gradle.kts
@@ -154,7 +154,7 @@ tasks.test {
dependsOn(":catalogs:catalog-lakehouse-iceberg:jar")
doFirst {
- environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.12")
+ environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE",
"datastrato/gravitino-ci-hive:0.1.13")
}
val init = project.extra.get("initIntegrationTest") as (Test) -> Unit