This is an automated email from the ASF dual-hosted git repository. forwardxu pushed a commit to branch release-0.12.1 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit ac0d1d81a48d1ce558294757a5812487cd9b2cf0 Author: XuQianJin-Stars <[email protected]> AuthorDate: Tue Aug 23 11:47:37 2022 +0800 [MINOR] Adapt to tianqiong spark --- dev/settings.xml | 266 +++++++++++++++++++++ dev/tencent-install.sh | 157 ++++++++++++ dev/tencent-release.sh | 154 ++++++++++++ hudi-cli/pom.xml | 4 +- hudi-client/hudi-spark-client/pom.xml | 4 +- hudi-examples/hudi-examples-spark/pom.xml | 4 +- hudi-integ-test/pom.xml | 4 +- hudi-spark-datasource/hudi-spark-common/pom.xml | 12 +- hudi-spark-datasource/hudi-spark/pom.xml | 12 +- hudi-spark-datasource/hudi-spark2/pom.xml | 12 +- hudi-spark-datasource/hudi-spark3-common/pom.xml | 2 +- hudi-spark-datasource/hudi-spark3.1.x/pom.xml | 2 +- hudi-spark-datasource/hudi-spark3.2.x/pom.xml | 6 +- hudi-spark-datasource/hudi-spark3.3.x/pom.xml | 6 +- hudi-sync/hudi-hive-sync/pom.xml | 4 +- hudi-utilities/pom.xml | 10 +- .../org/apache/hudi/utilities/UtilHelpers.java | 38 ++- packaging/hudi-integ-test-bundle/pom.xml | 8 +- pom.xml | 94 +++++--- 19 files changed, 715 insertions(+), 84 deletions(-) diff --git a/dev/settings.xml b/dev/settings.xml new file mode 100644 index 0000000000..5f5dfd4fa6 --- /dev/null +++ b/dev/settings.xml @@ -0,0 +1,266 @@ +<settings> + <proxies> + <proxy> + <id>dev http</id> + <active>true</active> + <protocol>http</protocol> + <host>web-proxy.oa.com</host> + <port>8080</port> + <nonProxyHosts>mirrors.tencent.com|qq.com|localhost|127.0.0.1|*.oa.com|repo.maven.apache.org|packages.confluent.io</nonProxyHosts> + </proxy> + <proxy> + <id>dev https</id> + <active>true</active> + <protocol>https</protocol> + <host>web-proxy.oa.com</host> + <port>8080</port> + <nonProxyHosts>mirrors.tencent.com|qq.com|localhost|127.0.0.1|*.oa.com|repo.maven.apache.org|packages.confluent.io</nonProxyHosts> + </proxy> + </proxies> + + <offline>false</offline> + + <profiles> + <profile> + <id>nexus</id> + <repositories> + <repository> + <id>maven_public</id> + <url>https://mirrors.tencent.com/nexus/repository/maven-public/</url> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>false</enabled> + </snapshots> + </repository> + <repository> + <id>tencent_public</id> + <url>https://mirrors.tencent.com/repository/maven/tencent_public/</url> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>false</enabled> + </snapshots> + </repository> + + <repository> + <id>thirdparty</id> + <url>https://mirrors.tencent.com/repository/maven/thirdparty/</url> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>false</enabled> + </snapshots> + </repository> + + <repository> + <id>mqq</id> + <url>https://mirrors.tencent.com/repository/maven/mqq/</url> + <releases> + <enabled>false</enabled> + </releases> + <snapshots> + <enabled>true</enabled> + </snapshots> + </repository> + + <repository> + <id>thirdparty-snapshots</id> + <url>https://mirrors.tencent.com/repository/maven/thirdparty-snapshots/</url> + <releases> + <enabled>false</enabled> + </releases> + <snapshots> + <enabled>true</enabled> + </snapshots> + </repository> + </repositories> + + <pluginRepositories> + <pluginRepository> + <id>maven-public-plugin</id> + <url>https://mirrors.tencent.com/nexus/repository/maven-public/</url> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>false</enabled> + </snapshots> + </pluginRepository> + <pluginRepository> + <id>public-plugin</id> + <url>https://mirrors.tencent.com/repository/maven/tencent_public/</url> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>false</enabled> + </snapshots> + </pluginRepository> + <pluginRepository> + <id>thirdparty-plugin</id> + <url>https://mirrors.tencent.com/repository/maven/thirdparty/</url> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>false</enabled> + </snapshots> + </pluginRepository> + </pluginRepositories> + </profile> + + <profile> + <id>tbds</id> + <repositories> + <repository> + <id>tbds-maven-public</id> + <url>http://tbdsrepo.oa.com/repository/maven-public/</url> + <releases> + <enabled>true</enabled> + <updatePolicy>never</updatePolicy> + </releases> + <snapshots> + <enabled>true</enabled> + <updatePolicy>never</updatePolicy> + </snapshots> + </repository> + <repository> + <id>tbds</id> + <url>http://tbdsrepo.oa.com/repository/tbds/</url> + <releases> + <enabled>true</enabled> + <updatePolicy>never</updatePolicy> + <checksumPolicy>ignore</checksumPolicy> + </releases> + <snapshots> + <enabled>true</enabled> + <updatePolicy>never</updatePolicy> + <checksumPolicy>ignore</checksumPolicy> + </snapshots> + </repository> + </repositories> + <pluginRepositories> + <pluginRepository> + <id>tbds</id> + <url>http://tbdsrepo.oa.com/repository/tbds/</url> + <releases> + <enabled>true</enabled> + <updatePolicy>never</updatePolicy> + <checksumPolicy>ignore</checksumPolicy> + </releases> + <snapshots> + <enabled>true</enabled> + <updatePolicy>never</updatePolicy> + <checksumPolicy>ignore</checksumPolicy> + </snapshots> + </pluginRepository> + <pluginRepository> + <id>tbds-maven-public</id> + <url>http://tbdsrepo.oa.com/repository/maven-public/</url> + <releases> + <enabled>true</enabled> + <updatePolicy>never</updatePolicy> + <checksumPolicy>warn</checksumPolicy> + </releases> + <snapshots> + <enabled>true</enabled> + <updatePolicy>never</updatePolicy> + <checksumPolicy>ignore</checksumPolicy> + </snapshots> + </pluginRepository> + </pluginRepositories> + </profile> + + <profile> + <id>confluent_repo</id> + <repositories> + <repository> + <id>tencent-repo</id> + <url>https://mirrors.tencent.com/repository/maven/CSIG_TWINS</url> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>false</enabled> + </snapshots> + </repository> + <repository> + <id>confluent</id> + <url>https://packages.confluent.io/maven/</url> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>false</enabled> + </snapshots> + </repository> + </repositories> + </profile> + + <profile> + <id>tianqiong_releases</id> + <repositories> + <repository> + <id>tianqiong-releases</id> + <url>https://mirrors.tencent.com/repository/maven/tianqiong-releases</url> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>false</enabled> + </snapshots> + </repository> + </repositories> + </profile> + + <profile> + <id>tianqiong_snapshots</id> + <repositories> + <repository> + <id>tianqiong-snapshots</id> + <url>https://mirrors.tencent.com/repository/maven/tianqiong-snapshots</url> + <releases> + <enabled>false</enabled> + </releases> + <snapshots> + <enabled>true</enabled> + <updatePolicy>always</updatePolicy> + </snapshots> + </repository> + </repositories> + </profile> + </profiles> + + <activeProfiles> + <activeProfile>confluent_repo</activeProfile> + <activeProfile>tianqiong_releases</activeProfile> + <activeProfile>tianqiong_snapshots</activeProfile> + <activeProfile>nexus</activeProfile> + </activeProfiles> + <servers> + <server> + <id>thirdparty-snapshots</id> + <username>ethansu</username> + <password>664a1eeceee211e9b3cf6c92bf47000d</password> + </server> + <server> + <id>tbds</id> + <username>tbds</username> + <password>[email protected]</password> + </server> + <server> + <id>tianqiong-releases</id> + <username>g_datalake</username> + <password>be3c75f8fc9a11e9b2a36c92bf3acd2c</password> + </server> + <server> + <id>tianqiong-snapshots</id> + <username>g_datalake</username> + <password>be3c75f8fc9a11e9b2a36c92bf3acd2c</password> + </server> + </servers> +</settings> diff --git a/dev/tencent-install.sh b/dev/tencent-install.sh new file mode 100644 index 0000000000..1e34f40440 --- /dev/null +++ b/dev/tencent-install.sh @@ -0,0 +1,157 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e # Exit immediately if a command exits with a non-zero status + +if [ $# -ne 7 ]; then + echo "Usage: $0 <apache-version> <tencent-version> <rc-num> <release-repo-not-snapshot?> <scala_version> <spark_version> <flink_version>" + echo "example: $0 0.12.0 1 1 N 2.11 2 1.13" + exit +fi + +version=$1-$2-tencent # <apache-version>-<tencent-version>-tencent, e.g. 0.10.0-1-tencent +if [ $4 = "N" ]; then + version=$version-SNAPSHOT +fi +rc=$3 +release_repo=$4 # Y for release repo, others for snapshot repo + +tag=apache-hudi-$version +tagrc=${tag}-rc${rc} + +echo "Preparing source for $tagrc" + +# change version +echo "Change version for ${version}" +mvn versions:set -DnewVersion=${version} -DgenerateBackupPom=false -s dev/settings.xml -U +mvn versions:commit -s dev/settings.xml -U + +function git_push() { + # create version.txt for this release + if [ ${release_repo} = "Y" ]; then + git add . + + if [ $# -eq 7 ]; then + git commit -m "Add version tag for release ${version} $5 $6" + else + git commit -m "Add version tag for release ${version}" + fi + else + git add . + + if [ $# -eq 7 ]; then + git commit -m"Add snapshot tag ${version} $5 $6" + else + git commit -m"Add snapshot tag ${version}" + fi + fi + + set_version_hash=$(git rev-list HEAD 2>/dev/null | head -n 1) + + # delete remote tag + git fetch --tags --all + tag_exist=`git tag -l ${tagrc} | wc -l` + if [ ${tag_exist} -gt 0 ]; then + git tag -l ${tagrc} | xargs git tag -d + git push origin :refs/tags/${tagrc} + fi + + # add remote tag + git tag -am "Apache Hudi $version" ${tagrc} ${set_version_hash} + remote=$(git remote -v | grep data-lake-technology/hudi.git | head -n 1 | awk '{print $1}') + git push ${remote} ${tagrc} + + release_hash=$(git rev-list ${tagrc} 2>/dev/null | head -n 1) + + if [ -z "$release_hash" ]; then + echo "Cannot continue: unknown git tag: $tag" + exit + fi + + echo -e "Using commit ${release_hash}\n" + + #echo "git push origin" + #git push origin + + echo -e "begin archive ${release_hash}\n" + rm -rf ${tag}* + tarball=$tag.tar.gz + + # be conservative and use the release hash, even though git produces the same + # archive (identical hashes) using the scm tag + git archive $release_hash --worktree-attributes --prefix $tag/ -o $tarball + + # checksum + sha512sum $tarball >${tarball}.sha512 + + # extract source tarball + tar xzf ${tarball} + + cd ${tag} + if [ ${release_repo} = "N" ]; then + echo $version >version.txt + fi + + echo -e "end archive ${release_hash}\n" +} + +function deploy_spark() { + echo ------------------------------------------------------- + SCALA_VERSION=$1 + SPARK_VERSION=$2 + FLINK_VERSION=$3 + + if [ ${release_repo} = "Y" ]; then + COMMON_OPTIONS="-Dscala-${SCALA_VERSION} -Dspark${SPARK_VERSION} -Dflink${FLINK_VERSION} -DskipTests -s dev/settings.xml -DretryFailedDeploymentCount=30 -T 2.5C" + else + COMMON_OPTIONS="-Dscala-${SCALA_VERSION} -Dspark${SPARK_VERSION} -Dflink${FLINK_VERSION} -DskipTests -s dev/settings.xml -DretryFailedDeploymentCount=30 -T 2.5C" + fi + + # INSTALL_OPTIONS="-U -Drat.skip=true -Djacoco.skip=true -Dscala-${SCALA_VERSION} -Dspark${SPARK_VERSION} -DskipTests -s dev/settings.xml -T 2.5C" + # + # echo "INSTALL_OPTIONS: mvn clean package ${INSTALL_OPTIONS}" + # mvn clean package ${INSTALL_OPTIONS} + + echo "DEPLOY_OPTIONS: mvn clean install $COMMON_OPTIONS" + # mvn clean package install $COMMON_OPTIONS + mvn clean package install $COMMON_OPTIONS -Drat.skip=true + + if [ ${release_repo} = "Y" ]; then + echo -e "Published to release repo\n" + else + echo -e "Published to snapshot repo\n" + fi + echo ------------------------------------------------------- +} + +echo "SCALA_VERSION: $5 SPARK_VERSION: $6" +deploy_spark $5 $6 $7 + +## spark 2.4.6 +#deploy_spark 2.11 2 +## spark 3.0.1 +#deploy_spark 2.12 3.0.x +## spark 3.1.2 +#deploy_spark 2.12 3 + +# clean +#rm -rf ../${tag}* + +echo "Success! The release candidate [${tagrc}] is available" +echo "Commit SHA1: ${release_hash}" diff --git a/dev/tencent-release.sh b/dev/tencent-release.sh new file mode 100644 index 0000000000..944f497070 --- /dev/null +++ b/dev/tencent-release.sh @@ -0,0 +1,154 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e # Exit immediately if a command exits with a non-zero status + +if [ $# -ne 7 ]; then + echo "Usage: $0 <apache-version> <tencent-version> <rc-num> <release-repo-not-snapshot?> <scala_version> <spark_version> <flink_version>" + echo "example: $0 0.12.0 1 1 N 2.11 2 1.13" + exit +fi + +version=$1-$2-tencent # <apache-version>-<tencent-version>-tencent, e.g. 0.10.0-1-tencent +if [ $4 = "N" ]; then + version=$version-SNAPSHOT +fi +rc=$3 +release_repo=$4 # Y for release repo, others for snapshot repo + +tag=apache-hudi-$version +tagrc=${tag}-rc${rc} + +echo "Preparing source for $tagrc" + +# change version +echo "Change version for ${version}" +mvn versions:set -DnewVersion=${version} -DgenerateBackupPom=false -s dev/settings.xml -U +mvn versions:commit -s dev/settings.xml -U + +# create version.txt for this release +if [ ${release_repo} = "Y" ]; then + git add . + + if [ $# -eq 7 ]; then + git commit -m "Add version tag for release ${version} $5 $6" + else + git commit -m "Add version tag for release ${version}" + fi +else + git add . + + if [ $# -eq 7 ]; then + git commit -m"Add snapshot tag ${version} $5 $6" + else + git commit -m"Add snapshot tag ${version}" + fi +fi + +set_version_hash=`git rev-list HEAD 2> /dev/null | head -n 1 ` + +# delete remote tag +git fetch --tags --all +tag_exist=`git tag -l ${tagrc} | wc -l` +if [ ${tag_exist} -gt 0 ]; then + git tag -l ${tagrc} | xargs git tag -d + git push origin :refs/tags/${tagrc} +fi + +# add remote tag +git tag -am "Apache Hudi $version" ${tagrc} ${set_version_hash} +remote=$(git remote -v | grep data-lake-technology/hudi.git | head -n 1 | awk '{print $1}') +git push ${remote} ${tagrc} + +release_hash=`git rev-list ${tagrc} 2> /dev/null | head -n 1 ` + +if [ -z "$release_hash" ]; then + echo "Cannot continue: unknown git tag: $tag" + exit +fi + +echo -e "Using commit ${release_hash}\n" + +#echo "git push origin" +#git push origin + +echo -e "begin archive ${release_hash}\n" +rm -rf ${tag}* +tarball=$tag.tar.gz + +# be conservative and use the release hash, even though git produces the same +# archive (identical hashes) using the scm tag +git archive $release_hash --worktree-attributes --prefix $tag/ -o $tarball + +# checksum +sha512sum $tarball > ${tarball}.sha512 + +# extract source tarball +tar xzf ${tarball} + +cd ${tag} +if [ ${release_repo} = "N" ]; then + echo $version > version.txt +fi + +echo -e "end archive ${release_hash}\n" + +function deploy_spark(){ + echo ------------------------------------------------------- + SCALA_VERSION=$1 + SPARK_VERSION=$2 + FLINK_VERSION=$3 + + if [ ${release_repo} = "Y" ]; then + COMMON_OPTIONS="-Dscala-${SCALA_VERSION} -Dspark${SPARK_VERSION} -Dflink${FLINK_VERSION} -DskipTests -s dev/settings.xml -DretryFailedDeploymentCount=30" + else + COMMON_OPTIONS="-Dscala-${SCALA_VERSION} -Dspark${SPARK_VERSION} -Dflink${FLINK_VERSION} -DskipTests -s dev/settings.xml -DretryFailedDeploymentCount=30" + fi + +# INSTALL_OPTIONS="-U -Drat.skip=true -Djacoco.skip=true -Dscala-${SCALA_VERSION} -Dspark${SPARK_VERSION} -DskipTests -s dev/settings.xml -T 2.5C" +# +# echo "INSTALL_OPTIONS: mvn clean package ${INSTALL_OPTIONS}" +# mvn clean package ${INSTALL_OPTIONS} + + echo "DEPLOY_OPTIONS: mvn clean deploy $COMMON_OPTIONS" + mvn deploy $COMMON_OPTIONS + + if [ ${release_repo} = "Y" ]; then + echo -e "Published to release repo\n" + else + echo -e "Published to snapshot repo\n" + fi + echo ------------------------------------------------------- +} + +echo "SCALA_VERSION: $5 SPARK_VERSION: $6" +deploy_spark $5 $6 $7 + +## spark 2.4.6 +#deploy_spark 2.11 2 +## spark 3.0.1 +#deploy_spark 2.12 3.0.x +## spark 3.1.2 +#deploy_spark 2.12 3 + +# clean +rm -rf ../${tag}* + +echo "Success! The release candidate [${tagrc}] is available" +echo "Commit SHA1: ${release_hash}" diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml index ee78bf24b0..27596e779f 100644 --- a/hudi-cli/pom.xml +++ b/hudi-cli/pom.xml @@ -250,11 +250,11 @@ <!-- Spark --> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_${scala.binary.version}</artifactId> </dependency> diff --git a/hudi-client/hudi-spark-client/pom.xml b/hudi-client/hudi-spark-client/pom.xml index a7ae3a7049..da1ad6cb9f 100644 --- a/hudi-client/hudi-spark-client/pom.xml +++ b/hudi-client/hudi-spark-client/pom.xml @@ -57,11 +57,11 @@ <!-- Spark --> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_${scala.binary.version}</artifactId> </dependency> diff --git a/hudi-examples/hudi-examples-spark/pom.xml b/hudi-examples/hudi-examples-spark/pom.xml index 4eeb11ecb1..d0611c6752 100644 --- a/hudi-examples/hudi-examples-spark/pom.xml +++ b/hudi-examples/hudi-examples-spark/pom.xml @@ -189,11 +189,11 @@ <!-- Spark --> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_${scala.binary.version}</artifactId> </dependency> diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml index 2134f80bb0..703cbb067f 100644 --- a/hudi-integ-test/pom.xml +++ b/hudi-integ-test/pom.xml @@ -62,7 +62,7 @@ </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_${scala.binary.version}</artifactId> <exclusions> <exclusion> @@ -89,7 +89,7 @@ </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-avro_${scala.binary.version}</artifactId> <version>${spark.version}</version> <scope>test</scope> diff --git a/hudi-spark-datasource/hudi-spark-common/pom.xml b/hudi-spark-datasource/hudi-spark-common/pom.xml index a1016299ba..6fd1d7d458 100644 --- a/hudi-spark-datasource/hudi-spark-common/pom.xml +++ b/hudi-spark-datasource/hudi-spark-common/pom.xml @@ -184,7 +184,7 @@ <!-- Spark --> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> <exclusions> <exclusion> @@ -194,29 +194,29 @@ </exclusions> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_${scala.binary.version}</artifactId> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-hive_${scala.binary.version}</artifactId> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_${scala.binary.version}</artifactId> <classifier>tests</classifier> <scope>test</scope> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> <classifier>tests</classifier> <scope>test</scope> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-catalyst_${scala.binary.version}</artifactId> <classifier>tests</classifier> <scope>test</scope> diff --git a/hudi-spark-datasource/hudi-spark/pom.xml b/hudi-spark-datasource/hudi-spark/pom.xml index f55cb3359c..f4ad09bb57 100644 --- a/hudi-spark-datasource/hudi-spark/pom.xml +++ b/hudi-spark-datasource/hudi-spark/pom.xml @@ -245,7 +245,7 @@ <!-- Spark --> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> <exclusions> <exclusion> @@ -255,31 +255,31 @@ </exclusions> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_${scala.binary.version}</artifactId> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-hive_${scala.binary.version}</artifactId> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_${scala.binary.version}</artifactId> <classifier>tests</classifier> <scope>test</scope> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> <classifier>tests</classifier> <scope>test</scope> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-catalyst_${scala.binary.version}</artifactId> <classifier>tests</classifier> <scope>test</scope> diff --git a/hudi-spark-datasource/hudi-spark2/pom.xml b/hudi-spark-datasource/hudi-spark2/pom.xml index f74dd96a5b..63cc6f3a4f 100644 --- a/hudi-spark-datasource/hudi-spark2/pom.xml +++ b/hudi-spark-datasource/hudi-spark2/pom.xml @@ -21,10 +21,10 @@ </parent> <modelVersion>4.0.0</modelVersion> - <artifactId>hudi-spark2_${scala.binary.version}</artifactId> + <artifactId>hudi-spark2_2.11</artifactId> <version>0.12.1</version> - <name>hudi-spark2_${scala.binary.version}</name> + <name>hudi-spark2_2.11</name> <packaging>jar</packaging> <properties> @@ -185,13 +185,13 @@ </dependency> <dependency> <groupId>org.apache.hudi</groupId> - <artifactId>hudi-spark-common_${scala.binary.version}</artifactId> + <artifactId>hudi-spark-common_2.11</artifactId> <version>${project.version}</version> </dependency> <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-sql_${scala.binary.version}</artifactId> + <groupId>${spark.groupId}</groupId> + <artifactId>spark-sql_2.11</artifactId> <version>${spark2.version}</version> <scope>provided</scope> <optional>true</optional> @@ -230,7 +230,7 @@ </dependency> <dependency> <groupId>org.apache.hudi</groupId> - <artifactId>hudi-spark-common_${scala.binary.version}</artifactId> + <artifactId>hudi-spark-common_2.11</artifactId> <version>${project.version}</version> <classifier>tests</classifier> <type>test-jar</type> diff --git a/hudi-spark-datasource/hudi-spark3-common/pom.xml b/hudi-spark-datasource/hudi-spark3-common/pom.xml index 75957d6d4c..6bbb4e42b4 100644 --- a/hudi-spark-datasource/hudi-spark3-common/pom.xml +++ b/hudi-spark-datasource/hudi-spark3-common/pom.xml @@ -160,7 +160,7 @@ <dependencies> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_2.12</artifactId> <version>${spark3.version}</version> <scope>provided</scope> diff --git a/hudi-spark-datasource/hudi-spark3.1.x/pom.xml b/hudi-spark-datasource/hudi-spark3.1.x/pom.xml index 6768e0ce03..fb43cd2855 100644 --- a/hudi-spark-datasource/hudi-spark3.1.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.1.x/pom.xml @@ -151,7 +151,7 @@ <dependencies> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_2.12</artifactId> <version>${spark31.version}</version> <optional>true</optional> diff --git a/hudi-spark-datasource/hudi-spark3.2.x/pom.xml b/hudi-spark-datasource/hudi-spark3.2.x/pom.xml index cd6ba3a4b5..51f986e069 100644 --- a/hudi-spark-datasource/hudi-spark3.2.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.2.x/pom.xml @@ -174,7 +174,7 @@ <dependencies> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_2.12</artifactId> <version>${spark32.version}</version> <scope>provided</scope> @@ -182,7 +182,7 @@ </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-catalyst_2.12</artifactId> <version>${spark32.version}</version> <scope>provided</scope> @@ -190,7 +190,7 @@ </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-core_2.12</artifactId> <version>${spark32.version}</version> <scope>provided</scope> diff --git a/hudi-spark-datasource/hudi-spark3.3.x/pom.xml b/hudi-spark-datasource/hudi-spark3.3.x/pom.xml index 9ab65dca2e..65ce18d2d3 100644 --- a/hudi-spark-datasource/hudi-spark3.3.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.3.x/pom.xml @@ -174,7 +174,7 @@ <dependencies> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_2.12</artifactId> <version>${spark33.version}</version> <scope>provided</scope> @@ -182,7 +182,7 @@ </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-catalyst_2.12</artifactId> <version>${spark33.version}</version> <scope>provided</scope> @@ -190,7 +190,7 @@ </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-core_2.12</artifactId> <version>${spark33.version}</version> <scope>provided</scope> diff --git a/hudi-sync/hudi-hive-sync/pom.xml b/hudi-sync/hudi-hive-sync/pom.xml index 7cf31550b6..9785d71c9e 100644 --- a/hudi-sync/hudi-hive-sync/pom.xml +++ b/hudi-sync/hudi-hive-sync/pom.xml @@ -139,13 +139,13 @@ </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_${scala.binary.version}</artifactId> <scope>test</scope> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> <scope>test</scope> </dependency> diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml index 0c2a612d78..93cb94b320 100644 --- a/hudi-utilities/pom.xml +++ b/hudi-utilities/pom.xml @@ -184,7 +184,7 @@ <!-- Spark --> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> <exclusions> <exclusion> @@ -199,7 +199,7 @@ </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_${scala.binary.version}</artifactId> <exclusions> <exclusion> @@ -210,17 +210,17 @@ </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-streaming_${scala.binary.version}</artifactId> <version>${spark.version}</version> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-streaming-kafka-0-10_${scala.binary.version}</artifactId> <version>${spark.version}</version> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-streaming-kafka-0-10_${scala.binary.version}</artifactId> <version>${spark.version}</version> <classifier>tests</classifier> diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java index 523546c9ef..4a38da6528 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java @@ -83,6 +83,7 @@ import org.apache.spark.util.LongAccumulator; import java.io.BufferedReader; import java.io.IOException; import java.io.StringReader; +import java.lang.reflect.Method; import java.nio.ByteBuffer; import java.sql.Connection; import java.sql.Driver; @@ -421,7 +422,7 @@ public class UtilHelpers { statement.setQueryTimeout(Integer.parseInt(options.get(JDBCOptions.JDBC_QUERY_TIMEOUT()))); statement.executeQuery(); } catch (SQLException e) { - throw new HoodieException(e); + return false; } return true; } @@ -445,12 +446,23 @@ public class UtilHelpers { statement.setQueryTimeout(Integer.parseInt(options.get("queryTimeout"))); try (ResultSet rs = statement.executeQuery()) { StructType structType; + Object[] methodParas; + Method method = getMethodByName(JdbcUtils.class, "getSchema"); + int parasCount = getMethodParasCount(method); + if (Boolean.parseBoolean(options.get("nullable"))) { - structType = JdbcUtils.getSchema(rs, dialect, true); + methodParas = parasCount == 3 ? new Object[] {rs, dialect, true} : new Object[] {method, rs, dialect, url, true}; } else { - structType = JdbcUtils.getSchema(rs, dialect, false); + methodParas = parasCount == 3 ? new Object[] {rs, dialect, false} : new Object[] {method, rs, dialect, url, false}; + } + + structType = getStructTypeReflection(method, methodParas); + + if (structType != null) { + return AvroConversionUtils.convertStructTypeToAvroSchema(structType, table, "hoodie." + table); + } else { + throw new HoodieException(String.format("%s structType can not null!", table)); } - return AvroConversionUtils.convertStructTypeToAvroSchema(structType, table, "hoodie." + table); } } } else { @@ -572,4 +584,22 @@ public class UtilHelpers { Schema schema = schemaResolver.getTableAvroSchema(false); return schema.toString(); } + + public static Method getMethodByName(Class clazz, String methodName) { + return Arrays.stream(clazz.getDeclaredMethods()) + .filter(m -> m.getName().equalsIgnoreCase(methodName)) + .findFirst().orElse(null); + } + + public static int getMethodParasCount(Method method) { + return method.getParameterCount(); + } + + public static StructType getStructTypeReflection(Method method, Object... objs) throws Exception { + if (method != null) { + return (StructType) method.invoke(null, objs); + } else { + return null; + } + } } diff --git a/packaging/hudi-integ-test-bundle/pom.xml b/packaging/hudi-integ-test-bundle/pom.xml index d1789b863a..8323703622 100644 --- a/packaging/hudi-integ-test-bundle/pom.xml +++ b/packaging/hudi-integ-test-bundle/pom.xml @@ -646,12 +646,12 @@ </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_${scala.binary.version}</artifactId> </dependency> @@ -662,14 +662,14 @@ </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-streaming_${scala.binary.version}</artifactId> <version>${spark.version}</version> <scope>provided</scope> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-streaming-kafka-0-10_${scala.binary.version}</artifactId> <version>${spark.version}</version> </dependency> diff --git a/pom.xml b/pom.xml index 47e53fed97..159ae2a841 100644 --- a/pom.xml +++ b/pom.xml @@ -125,6 +125,7 @@ <spark2.version>2.4.4</spark2.version> <spark3.version>3.3.0</spark3.version> <sparkbundle.version></sparkbundle.version> + <spark.groupId>com.tencent.spark</spark.groupId> <flink1.15.version>1.15.1</flink1.15.version> <flink1.14.version>1.14.5</flink1.14.version> <flink1.13.version>1.13.6</flink1.13.version> @@ -142,7 +143,7 @@ <flink.clients.artifactId>flink-clients</flink.clients.artifactId> <flink.connector.kafka.artifactId>flink-connector-kafka</flink.connector.kafka.artifactId> <flink.hadoop.compatibility.artifactId>flink-hadoop-compatibility_2.12</flink.hadoop.compatibility.artifactId> - <spark31.version>3.1.3</spark31.version> + <spark31.version>3.1.2</spark31.version> <spark32.version>3.2.2</spark32.version> <spark33.version>3.3.0</spark33.version> <hudi.spark.module>hudi-spark2</hudi.spark.module> @@ -724,7 +725,7 @@ <!-- Spark --> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> <version>${spark.version}</version> <scope>provided</scope> @@ -740,26 +741,26 @@ </exclusions> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_${scala.binary.version}</artifactId> <version>${spark.version}</version> <scope>provided</scope> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-hive_${scala.binary.version}</artifactId> <version>${spark.version}</version> <scope>provided</scope> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-sql_${scala.binary.version}</artifactId> <classifier>tests</classifier> <version>${spark.version}</version> <scope>test</scope> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> <classifier>tests</classifier> <version>${spark.version}</version> @@ -776,7 +777,7 @@ </exclusions> </dependency> <dependency> - <groupId>org.apache.spark</groupId> + <groupId>${spark.groupId}</groupId> <artifactId>spark-catalyst_${scala.binary.version}</artifactId> <classifier>tests</classifier> <version>${spark.version}</version> @@ -1472,33 +1473,6 @@ </dependency> </dependencies> </dependencyManagement> - <repositories> - <repository> - <id>Maven Central</id> - <name>Maven Repository</name> - <url>https://repo.maven.apache.org/maven2</url> - <releases> - <enabled>true</enabled> - </releases> - <snapshots> - <enabled>false</enabled> - </snapshots> - </repository> - <repository> - <id>cloudera-repo-releases</id> - <url>https://repository.cloudera.com/artifactory/public/</url> - <releases> - <enabled>true</enabled> - </releases> - <snapshots> - <enabled>false</enabled> - </snapshots> - </repository> - <repository> - <id>confluent</id> - <url>https://packages.confluent.io/maven/</url> - </repository> - </repositories> <profiles> <profile> @@ -1985,7 +1959,7 @@ <profile> <id>spark3.1</id> <properties> - <spark3.version>3.1.3</spark3.version> + <spark3.version>3.1.2</spark3.version> <spark.version>${spark3.version}</spark.version> <sparkbundle.version>3.1</sparkbundle.version> <scala.version>${scala12.version}</scala.version> @@ -2137,6 +2111,8 @@ <flink.clients.artifactId>flink-clients_${scala.binary.version}</flink.clients.artifactId> <flink.connector.kafka.artifactId>flink-connector-kafka_${scala.binary.version}</flink.connector.kafka.artifactId> <flink.hadoop.compatibility.artifactId>flink-hadoop-compatibility_${scala.binary.version}</flink.hadoop.compatibility.artifactId> + <hudi.flink.module>hudi-flink1.13.x</hudi.flink.module> + <flink.bundle.version>1.13</flink.bundle.version> <skipITs>true</skipITs> </properties> <activation> @@ -2157,6 +2133,54 @@ </property> </activation> </profile> + + <profile> + <id>community</id> + <properties> + <spark.groupId>org.apache.spark</spark.groupId> + </properties> + <repositories> + <repository> + <id>Maven Central</id> + <name>Maven Repository</name> + <url>https://repo.maven.apache.org/maven2</url> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>false</enabled> + </snapshots> + </repository> + <repository> + <id>cloudera-repo-releases</id> + <url>https://repository.cloudera.com/artifactory/public/</url> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>false</enabled> + </snapshots> + </repository> + <repository> + <id>confluent</id> + <url>https://packages.confluent.io/maven/</url> + </repository> + </repositories> + </profile> </profiles> + <distributionManagement> + <repository> + <id>tianqiong-releases</id> + <name>Tianqiong Release Repository</name> + <url>https://mirrors.tencent.com/repository/maven/tianqiong-releases</url> + </repository> + + <snapshotRepository> + <id>tianqiong-snapshots</id> + <name>Tianqiong Snapshots Repository</name> + <url>https://mirrors.tencent.com/repository/maven/tianqiong-snapshots</url> + </snapshotRepository> + </distributionManagement> + </project>
