This is an automated email from the ASF dual-hosted git repository. stoty pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/phoenix-connectors.git
commit bfbfb8d14e63f39adb2bce3e9ed281dccf4dd875 Author: Istvan Toth <st...@apache.org> AuthorDate: Wed Nov 22 15:54:46 2023 +0100 PHOENIX-7118 Fix Shading Regressions in Spark Connector --- phoenix5-spark-shaded/pom.xml | 271 ++++++++++++++++++++++++++++++++++++++--- phoenix5-spark3-shaded/pom.xml | 38 +++--- pom.xml | 7 +- 3 files changed, 282 insertions(+), 34 deletions(-) diff --git a/phoenix5-spark-shaded/pom.xml b/phoenix5-spark-shaded/pom.xml index 7cab58c..1b9ff72 100644 --- a/phoenix5-spark-shaded/pom.xml +++ b/phoenix5-spark-shaded/pom.xml @@ -30,13 +30,15 @@ <modelVersion>4.0.0</modelVersion> <artifactId>phoenix5-spark-shaded</artifactId> - <name>Shaded Phoenix Spark Connector for Phoenix 5</name> + <name>Shaded Phoenix Spark 2 Connector for Phoenix 5</name> <properties> <top.dir>${project.basedir}/..</top.dir> </properties> <dependencies> + + <!-- Phoenix comes first, as we shade most dependencies anyway --> <dependency> <groupId>org.apache.phoenix</groupId> <artifactId>phoenix5-spark</artifactId> @@ -44,18 +46,199 @@ <dependency> <groupId>org.apache.phoenix</groupId> <artifactId>phoenix-hbase-compat-${hbase.compat.version}</artifactId> - <scope>runtime</scope> + </dependency> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + <scope>provided</scope> + </dependency> + + <!-- Phoenix excludes commons-beanutils from the Omid dependency, but that's basically a bug + We need to add it back, so that we don't depend on hadoop's common-beanutils, which may or + may not be shaded. + This can be removed once we use a Phoenix version that doesn't have this problem --> + <dependency> + <groupId>commons-beanutils</groupId> + <artifactId>commons-beanutils</artifactId> + <scope>compile</scope> + </dependency> + + <!-- Mark every Hadoop jar as provided --> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-mapreduce-client-core</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-annotations</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-auth</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-api</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs-client</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-distcp</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-mapreduce-client-jobclient</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-mapreduce-client-common</artifactId> + <scope>provided</scope> </dependency> <!-- We want to take the implementation from Spark --> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-log4j12</artifactId> + <scope>provided</scope> + </dependency> <dependency> <groupId>log4j</groupId> <artifactId>log4j</artifactId> <scope>provided</scope> </dependency> + + <!-- Mark HBase as provided, too --> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-client</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-common</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-mapreduce</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-replication</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-endpoint</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-endpoint</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-metrics-api</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-metrics</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-protocol</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-protocol-shaded</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-server</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-hadoop-compat</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-hadoop2-compat</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-zookeeper</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase.thirdparty</groupId> + <artifactId>hbase-shaded-netty</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase.thirdparty</groupId> + <artifactId>hbase-shaded-miscellaneous</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase.thirdparty</groupId> + <artifactId>hbase-shaded-protobuf</artifactId> + <scope>provided</scope> + </dependency> + <!-- Other dependencies we don't want to shade in, but are not transitively excluded by the + above for some reason --> + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + <!-- random version, for exclusion only --> + <version>11.0.2</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>com.github.stephenc.findbugs</groupId> + <artifactId>findbugs-annotations</artifactId> + <!-- random version, for exclusion only --> + <version>1.3.9-1</version> + <scope>provided</scope> + </dependency> </dependencies> <build> <plugins> + <!-- Taken from phoenix-client-parent this should be kept in sync with + Phoenix as much as possible --> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> @@ -75,6 +258,7 @@ <exclude>NOTICE.*</exclude> <exclude>NOTICE</exclude> <exclude>README*</exclude> + <exclude>META-INF/versions/11/org/glassfish/jersey/internal/jsr166/*.class</exclude> <!-- Coming from Omid, should be fixed there --> <exclude>log4j.properties</exclude> </excludes> @@ -107,31 +291,32 @@ </filters> <transformers> <transformer - implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" /> + implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" /> <transformer - implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer"> + implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer"> <resource>csv-bulk-load-config.properties</resource> <file> ${project.basedir}/../config/csv-bulk-load-config.properties </file> </transformer> <transformer - implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer"> + implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer"> <resource>README.md</resource> <file>${project.basedir}/../README.md</file> </transformer> <transformer - implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer"> + implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer"> <resource>LICENSE.txt</resource> <file>${project.basedir}/../LICENSE</file> </transformer> <transformer - implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer"> + implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer"> <resource>NOTICE</resource> <file>${project.basedir}/../NOTICE</file> </transformer> </transformers> <relocations> + <!-- Keep phoenix-client-byo-shaded-hadoop in sync with this --> <relocation> <pattern>org/</pattern> <shadedPattern>${shaded.package}.org.</shadedPattern> @@ -156,20 +341,68 @@ <exclude>org/apache/phoenix/**</exclude> <exclude>org/apache/omid/**</exclude> <!-- Do want/need to expose Tephra as well ? --> + <!-- See PHOENIX-7118 + Depending on the Spark classpath we may need to leave this unshaded, relocate + it under org/apache/hadoop/shaded/ or under org/apache/hadoop/hbase/shaded/. + The only thing that is guaranteed not to work is relocating it under + ${shaded.package} --> + <exclude>org/apache/commons/configuration2/**</exclude> </excludes> </relocation> + <!-- We cannot use the more elegant shading rules in -client + and -server for com packages, but it SHOULD be equivalent, except for the + protobuf change for hbase-shaded-client compatibility --> <relocation> - <pattern>com/</pattern> - <shadedPattern>${shaded.package}.com.</shadedPattern> - <excludes> - <!-- Not the com/ packages that are a part of particular jdk implementations --> - <exclude>com/sun/tools/**</exclude> - <exclude>com/sun/javadoc/**</exclude> - <exclude>com/sun/security/**</exclude> - <exclude>com/sun/jndi/**</exclude> - <exclude>com/sun/management/**</exclude> - </excludes> + <pattern>com/beust/</pattern> + <shadedPattern>${shaded.package}.com.beust.</shadedPattern> + </relocation> + <relocation> + <pattern>com/clearspring/</pattern> + <shadedPattern>${shaded.package}.com.clearspring.</shadedPattern> + </relocation> + <relocation> + <pattern>com/codahale/</pattern> + <shadedPattern>${shaded.package}.come.codahale.</shadedPattern> + </relocation> + <relocation> + <pattern>com/fasterxml/</pattern> + <shadedPattern>${shaded.package}.com.fasterxml.</shadedPattern> + </relocation> + <relocation> + <pattern>com/force/</pattern> + <shadedPattern>${shaded.package}.com.force.</shadedPattern> </relocation> + <relocation> + <pattern>com/google/gson/</pattern> + <shadedPattern>${shaded.package}.com.google.gson.</shadedPattern> + </relocation> + <relocation> + <pattern>com/google/inject/</pattern> + <shadedPattern>${shaded.package}.com.google.inject.</shadedPattern> + </relocation> + <!-- This is protobuf 2.5.0 which is shaded to this package in hbase-shaded-client, + not the modified protobuf 3.x from hbase-thirdparty --> + <relocation> + <pattern>com/google/protobuf/</pattern> + <shadedPattern>${hbase.shaded.package}.com.google.protobuf.</shadedPattern> + </relocation> + <relocation> + <pattern>com/ibm/</pattern> + <shadedPattern>${shaded.package}.com.ibm.</shadedPattern> + </relocation> + <relocation> + <pattern>com/lmax/</pattern> + <shadedPattern>${shaded.package}.com.lmax.</shadedPattern> + </relocation> + <relocation> + <pattern>com/sun/jna/</pattern> + <shadedPattern>${shaded.package}.com.sun.jna.</shadedPattern> + </relocation> + <relocation> + <pattern>com/squareup</pattern> + <shadedPattern>${shaded.package}.com.squareup.</shadedPattern> + </relocation> + <relocation> <pattern>io/</pattern> <shadedPattern>${shaded.package}.io.</shadedPattern> @@ -275,8 +508,8 @@ <artifactId>maven-compiler-plugin</artifactId> <executions> <execution> - <id>default-compile</id> - <phase>none</phase> + <id>default-compile</id> + <phase>none</phase> </execution> </executions> </plugin> diff --git a/phoenix5-spark3-shaded/pom.xml b/phoenix5-spark3-shaded/pom.xml index d10e9f2..cfc90a0 100644 --- a/phoenix5-spark3-shaded/pom.xml +++ b/phoenix5-spark3-shaded/pom.xml @@ -23,8 +23,8 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <parent> - <artifactId>phoenix-connectors</artifactId> <groupId>org.apache.phoenix</groupId> + <artifactId>phoenix-connectors</artifactId> <version>6.0.0-SNAPSHOT</version> </parent> <modelVersion>4.0.0</modelVersion> @@ -53,6 +53,16 @@ <scope>provided</scope> </dependency> + <!-- Phoenix excludes commons-beanutils from the Omid dependency, but that's basically a bug + We need to add it back, so that we don't depend on hadoop's common-beanutils, which may or + may not be shaded. + This can be removed once we use a Phoenix version that doesn't have this problem --> + <dependency> + <groupId>commons-beanutils</groupId> + <artifactId>commons-beanutils</artifactId> + <scope>compile</scope> + </dependency> + <!-- Mark every Hadoop jar as provided --> <dependency> <groupId>org.apache.hadoop</groupId> @@ -227,7 +237,7 @@ <build> <plugins> - <!-- Taken from phoenix-client-parent this should be kept in sync with + <!-- Taken from phoenix-client-parent this should be kept in sync with Phoenix as much as possible --> <plugin> <groupId>org.apache.maven.plugins</groupId> @@ -323,24 +333,25 @@ <exclude>org/w3c/dom/**</exclude> <exclude>org/xml/sax/**</exclude> <!-- Extras compared to Hadoop --> - <!-- Hbase classes - Maybe these could be shaded as well - ? --> + <!-- Hbase classes - Maybe these could be shaded as well ? --> <exclude>org/apache/hbase/**</exclude> <!-- We use the spark classpath directly --> <exclude>org/apache/spark/**</exclude> <!-- Phoenix classes --> <exclude>org/apache/phoenix/**</exclude> <exclude>org/apache/omid/**</exclude> - <!-- We must not shade this, as this is provided by HBase. - This is specific to --> - <!-- the cases where we rely on external HBase / Hadoop --> - <exclude>org.apache.commons.beanutils/**</exclude> <!-- Do want/need to expose Tephra as well ? --> + <!-- See PHOENIX-7118 + Depending on the Spark classpath we may need to leave this unshaded, relocate + it under org/apache/hadoop/shaded/ or under org/apache/hadoop/hbase/shaded/. + The only thing that is guaranteed not to work is relocating it under + ${shaded.package} --> + <exclude>org/apache/commons/configuration2/**</exclude> </excludes> </relocation> <!-- We cannot use the more elegant shading rules in -client and -server for com packages, but it SHOULD be equivalent, except for the - changes for hbase-shaded-client compatibility --> + protobuf change for hbase-shaded-client compatibility --> <relocation> <pattern>com/beust/</pattern> <shadedPattern>${shaded.package}.com.beust.</shadedPattern> @@ -369,7 +380,8 @@ <pattern>com/google/inject/</pattern> <shadedPattern>${shaded.package}.com.google.inject.</shadedPattern> </relocation> - <!-- HBase shaded ! --> + <!-- This is protobuf 2.5.0 which is shaded to this package in hbase-shaded-client, + not the modified protobuf 3.x from hbase-thirdparty --> <relocation> <pattern>com/google/protobuf/</pattern> <shadedPattern>${hbase.shaded.package}.com.google.protobuf.</shadedPattern> @@ -395,8 +407,7 @@ <pattern>io/</pattern> <shadedPattern>${shaded.package}.io.</shadedPattern> <excludes> - <!-- Exclude config keys for Hadoop that look like package - names --> + <!-- Exclude config keys for Hadoop that look like package names --> <exclude>io/compression/**</exclude> <exclude>io/mapfile/**</exclude> <exclude>io/map/index/*</exclude> @@ -440,8 +451,7 @@ <pattern>net/</pattern> <shadedPattern>${shaded.package}.net.</shadedPattern> <excludes> - <!-- Exclude config keys for Hadoop that look like package - names --> + <!-- Exclude config keys for Hadoop that look like package names --> <exclude>net/topology/**</exclude> </excludes> </relocation> diff --git a/pom.xml b/pom.xml index 5dbfdec..1f01f94 100644 --- a/pom.xml +++ b/pom.xml @@ -57,6 +57,7 @@ <!-- Phoenix Version --> <phoenix.version>5.1.3</phoenix.version> <omid.version>1.0.2</omid.version> + <commons-beanutils.version>1.9.4</commons-beanutils.version> <phoenix.thirdparty.version>2.0.0</phoenix.thirdparty.version> <!-- The should match the versions used to build HBase and Hadoop --> <hbase.version>2.4.16</hbase.version> @@ -549,7 +550,11 @@ <artifactId>phoenix5-hive-shaded</artifactId> <version>${project.version}</version> </dependency> - + <dependency> + <groupId>commons-beanutils</groupId> + <artifactId>commons-beanutils</artifactId> + <version>${commons-beanutils.version}</version> + </dependency> <!-- HBase dependencies --> <!-- These are only needed so that we can set them provided and exclude from the shaded jars -->