PHOENIX-2503 Multiple Java NoClass/Method Errors with Spark and Phoenix The calcite dependency in the regular client JAR is pulling in a version of com.fasterxml.jackson which is incompatible with the Spark runtime.
This patch creates a new assembly artifact, client-spark, which attempts to include only the client JARs necessary for the Spark integration to work. Also made sure the Phoenix driver is explicitly registered in the PhoenixRDD Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/7c76bbab Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/7c76bbab Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/7c76bbab Branch: refs/heads/4.x-HBase-1.0 Commit: 7c76bbabdf308e67d548d613987dcc024d65bf39 Parents: b35cb98 Author: Josh Mahonin <[email protected]> Authored: Mon Dec 21 10:30:36 2015 -0500 Committer: Josh Mahonin <[email protected]> Committed: Mon Dec 21 10:38:00 2015 -0500 ---------------------------------------------------------------------- phoenix-assembly/pom.xml | 17 ++++ phoenix-assembly/src/build/client-spark.xml | 87 ++++++++++++++++++++ .../org/apache/phoenix/spark/PhoenixRDD.scala | 6 ++ 3 files changed, 110 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/phoenix/blob/7c76bbab/phoenix-assembly/pom.xml ---------------------------------------------------------------------- diff --git a/phoenix-assembly/pom.xml b/phoenix-assembly/pom.xml index 34a53dc..1945cac 100644 --- a/phoenix-assembly/pom.xml +++ b/phoenix-assembly/pom.xml @@ -64,6 +64,23 @@ </descriptors> </configuration> </execution> + <!-- Due to SPARK-8332 and Calcite's fasterxml dependency, we need a custom spark client --> + <execution> + <id>client-spark</id> + <phase>package</phase> + <goals> + <goal>single</goal> + </goals> + <configuration> + <finalName>phoenix-${project.version}</finalName> + <attach>false</attach> + <appendAssemblyId>true</appendAssemblyId> + <descriptors> + <!-- build the phoenix spark client jar --> + <descriptor>src/build/client-spark.xml</descriptor> + </descriptors> + </configuration> + </execution> <execution> <id>client-minimal</id> <phase>package</phase> http://git-wip-us.apache.org/repos/asf/phoenix/blob/7c76bbab/phoenix-assembly/src/build/client-spark.xml ---------------------------------------------------------------------- diff --git a/phoenix-assembly/src/build/client-spark.xml b/phoenix-assembly/src/build/client-spark.xml new file mode 100644 index 0000000..748f1d8 --- /dev/null +++ b/phoenix-assembly/src/build/client-spark.xml @@ -0,0 +1,87 @@ +<?xml version='1.0'?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +--> + +<!-- Due to SPARK-8332 and Calcite's fasterxml dependency, we need a custom spark client --> +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd"> + <id>client-spark</id> + <!-- All the dependencies (unpacked) necessary to run phoenix from a single, stand-alone jar --> + <formats> + <format>jar</format> + </formats> + <includeBaseDirectory>false</includeBaseDirectory> + + <containerDescriptorHandlers> + <containerDescriptorHandler> + <!-- + aggregate SPI's so that things like HDFS FileSystem works in uberjar + http://docs.oracle.com/javase/tutorial/sound/SPI-intro.html + --> + <handlerName>metaInf-services</handlerName> + </containerDescriptorHandler> + </containerDescriptorHandlers> + + <componentDescriptors> + <componentDescriptor>src/build/components-minimal.xml</componentDescriptor> + </componentDescriptors> + + <dependencySets> + <dependencySet> + <!-- Unpack all the dependencies to class files, since java doesn't support + jar of jars for running --> + <unpack>true</unpack> + <!-- save these dependencies to the top-level --> + <outputDirectory>/</outputDirectory> + <includes> + <include>org.apache.hbase:hbase*</include> + <include>org.apache.htrace:htrace-core</include> + <include>io.netty:netty-all</include> + <include>commons-codec:commons-codec</include> + <include>co.cask.tephra:tephra*</include> + <include>org.apache.twill:twill*</include> + <include>org.apache.thrift:*</include> + <include>com.google.code.gson:gson*</include> + <!-- We use a newer version of guava than HBase - this might be an issue? --> + <include>com.google.guava:guava</include> + <!-- HBase also pulls in these dependencies on its own, should we include-them? --> + <include>com.google.protobuf:protobuf-java</include> + <include>org.slf4j:slf4j-api</include> + <include>org.apache.zookeeper:zookeeper</include> + <include>log4j:log4j</include> + <include>org.apache.hadoop:hadoop*</include> + <include>commons-configuration:commons-configuration</include> + <include>commons-io:commons-io</include> + <include>commons-logging:commons-logging</include> + <include>commons-lang:commons-lang</include> + <include>commons-cli:commons-cli</include> + <include>org.apache.commons:commons-csv</include> + <include>org.codehaus.jackson:jackson-mapper-asl</include> + <include>org.codehaus.jackson:jackson-core-asl</include> + <include>commons-collections:commons-collections</include> + <include>joda-time:joda-time</include> + <include>org.jruby.joni:joni</include> + <include>org.jruby.jcodings:jcodings</include> + </includes> + </dependencySet> + </dependencySets> +</assembly> http://git-wip-us.apache.org/repos/asf/phoenix/blob/7c76bbab/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala ---------------------------------------------------------------------- diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala index fa36a1f..d79189b 100644 --- a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala +++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala @@ -13,9 +13,12 @@ */ package org.apache.phoenix.spark +import java.sql.DriverManager + import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.{HBaseConfiguration, HConstants} import org.apache.hadoop.io.NullWritable +import org.apache.phoenix.jdbc.PhoenixDriver import org.apache.phoenix.mapreduce.PhoenixInputFormat import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil import org.apache.phoenix.schema.types._ @@ -32,6 +35,9 @@ class PhoenixRDD(sc: SparkContext, table: String, columns: Seq[String], @transient conf: Configuration) extends RDD[PhoenixRecordWritable](sc, Nil) with Logging { + // Make sure to register the Phoenix driver + DriverManager.registerDriver(new PhoenixDriver) + @transient lazy val phoenixConf = { getPhoenixConfiguration }
