Repository: spark Updated Branches: refs/heads/master 41b60125b -> e3f67a97f
[SPARK-22416][SQL] Move OrcOptions from `sql/hive` to `sql/core` ## What changes were proposed in this pull request? According to the [discussion](https://github.com/apache/spark/pull/19571#issuecomment-339472976) on SPARK-15474, we will add new OrcFileFormat in `sql/core` module and allow users to use both old and new OrcFileFormat. To do that, `OrcOptions` should be visible in `sql/core` module, too. Previously, it was `private[orc]` in `sql/hive`. This PR removes `private[orc]` because we don't use `private[sql]` in `sql/execution` package after [SPARK-16964](https://github.com/apache/spark/pull/14554). ## How was this patch tested? Pass the Jenkins with the existing tests. Author: Dongjoon Hyun <dongj...@apache.org> Closes #19636 from dongjoon-hyun/SPARK-22416. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e3f67a97 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e3f67a97 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e3f67a97 Branch: refs/heads/master Commit: e3f67a97f126abfb7eeb864f657bfc9221bb195e Parents: 41b6012 Author: Dongjoon Hyun <dongj...@apache.org> Authored: Thu Nov 2 18:28:56 2017 +0100 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Thu Nov 2 18:28:56 2017 +0100 ---------------------------------------------------------------------- .../execution/datasources/orc/OrcOptions.scala | 70 ++++++++++++++++++++ .../spark/sql/hive/orc/OrcFileFormat.scala | 1 + .../apache/spark/sql/hive/orc/OrcOptions.scala | 70 -------------------- .../spark/sql/hive/orc/OrcSourceSuite.scala | 1 + 4 files changed, 72 insertions(+), 70 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/e3f67a97/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala new file mode 100644 index 0000000..c866dd8 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.orc + +import java.util.Locale + +import org.apache.orc.OrcConf.COMPRESS + +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.internal.SQLConf + +/** + * Options for the ORC data source. + */ +class OrcOptions( + @transient private val parameters: CaseInsensitiveMap[String], + @transient private val sqlConf: SQLConf) + extends Serializable { + + import OrcOptions._ + + def this(parameters: Map[String, String], sqlConf: SQLConf) = + this(CaseInsensitiveMap(parameters), sqlConf) + + /** + * Compression codec to use. + * Acceptable values are defined in [[shortOrcCompressionCodecNames]]. + */ + val compressionCodec: String = { + // `compression`, `orc.compress`(i.e., OrcConf.COMPRESS), and `spark.sql.orc.compression.codec` + // are in order of precedence from highest to lowest. + val orcCompressionConf = parameters.get(COMPRESS.getAttribute) + val codecName = parameters + .get("compression") + .orElse(orcCompressionConf) + .getOrElse(sqlConf.orcCompressionCodec) + .toLowerCase(Locale.ROOT) + if (!shortOrcCompressionCodecNames.contains(codecName)) { + val availableCodecs = shortOrcCompressionCodecNames.keys.map(_.toLowerCase(Locale.ROOT)) + throw new IllegalArgumentException(s"Codec [$codecName] " + + s"is not available. Available codecs are ${availableCodecs.mkString(", ")}.") + } + shortOrcCompressionCodecNames(codecName) + } +} + +object OrcOptions { + // The ORC compression short names + private val shortOrcCompressionCodecNames = Map( + "none" -> "NONE", + "uncompressed" -> "NONE", + "snappy" -> "SNAPPY", + "zlib" -> "ZLIB", + "lzo" -> "LZO") +} http://git-wip-us.apache.org/repos/asf/spark/blob/e3f67a97/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala index d26ec15..3b33a9f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala @@ -39,6 +39,7 @@ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.execution.datasources._ +import org.apache.spark.sql.execution.datasources.orc.OrcOptions import org.apache.spark.sql.hive.{HiveInspectors, HiveShim} import org.apache.spark.sql.sources.{Filter, _} import org.apache.spark.sql.types.StructType http://git-wip-us.apache.org/repos/asf/spark/blob/e3f67a97/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala deleted file mode 100644 index 6ce90c0..0000000 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive.orc - -import java.util.Locale - -import org.apache.orc.OrcConf.COMPRESS - -import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap -import org.apache.spark.sql.internal.SQLConf - -/** - * Options for the ORC data source. - */ -private[orc] class OrcOptions( - @transient private val parameters: CaseInsensitiveMap[String], - @transient private val sqlConf: SQLConf) - extends Serializable { - - import OrcOptions._ - - def this(parameters: Map[String, String], sqlConf: SQLConf) = - this(CaseInsensitiveMap(parameters), sqlConf) - - /** - * Compression codec to use. - * Acceptable values are defined in [[shortOrcCompressionCodecNames]]. - */ - val compressionCodec: String = { - // `compression`, `orc.compress`(i.e., OrcConf.COMPRESS), and `spark.sql.orc.compression.codec` - // are in order of precedence from highest to lowest. - val orcCompressionConf = parameters.get(COMPRESS.getAttribute) - val codecName = parameters - .get("compression") - .orElse(orcCompressionConf) - .getOrElse(sqlConf.orcCompressionCodec) - .toLowerCase(Locale.ROOT) - if (!shortOrcCompressionCodecNames.contains(codecName)) { - val availableCodecs = shortOrcCompressionCodecNames.keys.map(_.toLowerCase(Locale.ROOT)) - throw new IllegalArgumentException(s"Codec [$codecName] " + - s"is not available. Available codecs are ${availableCodecs.mkString(", ")}.") - } - shortOrcCompressionCodecNames(codecName) - } -} - -private[orc] object OrcOptions { - // The ORC compression short names - private val shortOrcCompressionCodecNames = Map( - "none" -> "NONE", - "uncompressed" -> "NONE", - "snappy" -> "SNAPPY", - "zlib" -> "ZLIB", - "lzo" -> "LZO") -} http://git-wip-us.apache.org/repos/asf/spark/blob/e3f67a97/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala index ef9e67c..2a086be 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala @@ -24,6 +24,7 @@ import org.apache.orc.OrcConf.COMPRESS import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.execution.datasources.orc.OrcOptions import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources._ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org