Repository: spark Updated Branches: refs/heads/master 664367781 -> 16ba71aba
[SPARK-15416][SQL] Display a better message for not finding classes removed in Spark 2.0 ## What changes were proposed in this pull request? If finding `NoClassDefFoundError` or `ClassNotFoundException`, check if the class name is removed in Spark 2.0. If so, the user must be using an incompatible library and we can provide a better message. ## How was this patch tested? 1. Run `bin/pyspark --packages com.databricks:spark-avro_2.10:2.0.1` 2. type `sqlContext.read.format("com.databricks.spark.avro").load("src/test/resources/episodes.avro")`. It will show `java.lang.ClassNotFoundException: org.apache.spark.sql.sources.HadoopFsRelationProvider is removed in Spark 2.0. Please check if your library is compatible with Spark 2.0` Author: Shixiong Zhu <shixi...@databricks.com> Closes #13201 from zsxwing/better-message. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/16ba71ab Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/16ba71ab Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/16ba71ab Branch: refs/heads/master Commit: 16ba71aba4e68bbb892d4ceb38d6d1d135d63fd3 Parents: 6643677 Author: Shixiong Zhu <shixi...@databricks.com> Authored: Thu May 19 18:31:05 2016 -0700 Committer: Michael Armbrust <mich...@databricks.com> Committed: Thu May 19 18:31:05 2016 -0700 ---------------------------------------------------------------------- .../sql/execution/datasources/DataSource.scala | 61 ++++++++++++++------ 1 file changed, 44 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/16ba71ab/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index ce45168..ccad9b3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -83,6 +83,14 @@ case class DataSource( "com.databricks.spark.csv" -> classOf[csv.DefaultSource].getCanonicalName ) + /** + * Class that were removed in Spark 2.0. Used to detect incompatibility libraries for Spark 2.0. + */ + private val spark2RemovedClasses = Set( + "org.apache.spark.sql.DataFrame", + "org.apache.spark.sql.sources.HadoopFsRelationProvider", + "org.apache.spark.Logging") + /** Given a provider name, look up the data source class definition. */ private def lookupDataSource(provider0: String): Class[_] = { val provider = backwardCompatibilityMap.getOrElse(provider0, provider0) @@ -93,26 +101,45 @@ case class DataSource( serviceLoader.asScala.filter(_.shortName().equalsIgnoreCase(provider)).toList match { // the provider format did not match any given registered aliases case Nil => - Try(loader.loadClass(provider)).orElse(Try(loader.loadClass(provider2))) match { - case Success(dataSource) => - // Found the data source using fully qualified path - dataSource - case Failure(error) => - if (provider.startsWith("org.apache.spark.sql.hive.orc")) { - throw new ClassNotFoundException( - "The ORC data source must be used with Hive support enabled.", error) - } else { - if (provider == "avro" || provider == "com.databricks.spark.avro") { + try { + Try(loader.loadClass(provider)).orElse(Try(loader.loadClass(provider2))) match { + case Success(dataSource) => + // Found the data source using fully qualified path + dataSource + case Failure(error) => + if (error.isInstanceOf[ClassNotFoundException]) { + val className = error.getMessage + if (spark2RemovedClasses.contains(className)) { + throw new ClassNotFoundException(s"$className is removed in Spark 2.0. " + + "Please check if your library is compatible with Spark 2.0") + } + } + if (provider.startsWith("org.apache.spark.sql.hive.orc")) { throw new ClassNotFoundException( - s"Failed to find data source: $provider. Please use Spark package " + - "http://spark-packages.org/package/databricks/spark-avro", - error) + "The ORC data source must be used with Hive support enabled.", error) } else { - throw new ClassNotFoundException( - s"Failed to find data source: $provider. Please find packages at " + - "http://spark-packages.org", - error) + if (provider == "avro" || provider == "com.databricks.spark.avro") { + throw new ClassNotFoundException( + s"Failed to find data source: $provider. Please use Spark package " + + "http://spark-packages.org/package/databricks/spark-avro", + error) + } else { + throw new ClassNotFoundException( + s"Failed to find data source: $provider. Please find packages at " + + "http://spark-packages.org", + error) + } } + } + } catch { + case e: NoClassDefFoundError => // This one won't be caught by Scala NonFatal + // NoClassDefFoundError's class name uses "/" rather than "." for packages + val className = e.getMessage.replaceAll("/", ".") + if (spark2RemovedClasses.contains(className)) { + throw new ClassNotFoundException(s"$className was removed in Spark 2.0. " + + "Please check if your library is compatible with Spark 2.0", e) + } else { + throw e } } case head :: Nil => --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org