core`

wenchen Thu, 02 Nov 2017 10:29:20 -0700

Repository: spark
Updated Branches:
  refs/heads/master 41b60125b -> e3f67a97f



[SPARK-22416][SQL] Move OrcOptions from `sql/hive` to `sql/core`

## What changes were proposed in this pull request?

According to the 
[discussion](https://github.com/apache/spark/pull/19571#issuecomment-339472976) 
on SPARK-15474, we will add new OrcFileFormat in `sql/core` module and allow 
users to use both old and new OrcFileFormat.

To do that, `OrcOptions` should be visible in `sql/core` module, too. 
Previously, it was `private[orc]` in `sql/hive`. This PR removes `private[orc]` 
because we don't use `private[sql]` in `sql/execution` package after 
[SPARK-16964](https://github.com/apache/spark/pull/14554).

## How was this patch tested?

Pass the Jenkins with the existing tests.

Author: Dongjoon Hyun <dongj...@apache.org>

Closes #19636 from dongjoon-hyun/SPARK-22416.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e3f67a97
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e3f67a97
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e3f67a97

Branch: refs/heads/master
Commit: e3f67a97f126abfb7eeb864f657bfc9221bb195e
Parents: 41b6012
Author: Dongjoon Hyun <dongj...@apache.org>
Authored: Thu Nov 2 18:28:56 2017 +0100
Committer: Wenchen Fan <wenc...@databricks.com>
Committed: Thu Nov 2 18:28:56 2017 +0100

----------------------------------------------------------------------
 .../execution/datasources/orc/OrcOptions.scala  | 70 ++++++++++++++++++++
 .../spark/sql/hive/orc/OrcFileFormat.scala      |  1 +
 .../apache/spark/sql/hive/orc/OrcOptions.scala  | 70 --------------------
 .../spark/sql/hive/orc/OrcSourceSuite.scala     |  1 +
 4 files changed, 72 insertions(+), 70 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e3f67a97/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
new file mode 100644
index 0000000..c866dd8
--- /dev/null
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import java.util.Locale
+
+import org.apache.orc.OrcConf.COMPRESS
+
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Options for the ORC data source.
+ */
+class OrcOptions(
+    @transient private val parameters: CaseInsensitiveMap[String],
+    @transient private val sqlConf: SQLConf)
+  extends Serializable {
+
+  import OrcOptions._
+
+  def this(parameters: Map[String, String], sqlConf: SQLConf) =
+    this(CaseInsensitiveMap(parameters), sqlConf)
+
+  /**
+   * Compression codec to use.
+   * Acceptable values are defined in [[shortOrcCompressionCodecNames]].
+   */
+  val compressionCodec: String = {
+    // `compression`, `orc.compress`(i.e., OrcConf.COMPRESS), and 
`spark.sql.orc.compression.codec`
+    // are in order of precedence from highest to lowest.
+    val orcCompressionConf = parameters.get(COMPRESS.getAttribute)
+    val codecName = parameters
+      .get("compression")
+      .orElse(orcCompressionConf)
+      .getOrElse(sqlConf.orcCompressionCodec)
+      .toLowerCase(Locale.ROOT)
+    if (!shortOrcCompressionCodecNames.contains(codecName)) {
+      val availableCodecs = 
shortOrcCompressionCodecNames.keys.map(_.toLowerCase(Locale.ROOT))
+      throw new IllegalArgumentException(s"Codec [$codecName] " +
+        s"is not available. Available codecs are ${availableCodecs.mkString(", 
")}.")
+    }
+    shortOrcCompressionCodecNames(codecName)
+  }
+}
+
+object OrcOptions {
+  // The ORC compression short names
+  private val shortOrcCompressionCodecNames = Map(
+    "none" -> "NONE",
+    "uncompressed" -> "NONE",
+    "snappy" -> "SNAPPY",
+    "zlib" -> "ZLIB",
+    "lzo" -> "LZO")
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/e3f67a97/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index d26ec15..3b33a9f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -39,6 +39,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.execution.datasources.orc.OrcOptions
 import org.apache.spark.sql.hive.{HiveInspectors, HiveShim}
 import org.apache.spark.sql.sources.{Filter, _}
 import org.apache.spark.sql.types.StructType

http://git-wip-us.apache.org/repos/asf/spark/blob/e3f67a97/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala
deleted file mode 100644
index 6ce90c0..0000000
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.orc
-
-import java.util.Locale
-
-import org.apache.orc.OrcConf.COMPRESS
-
-import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.internal.SQLConf
-
-/**
- * Options for the ORC data source.
- */
-private[orc] class OrcOptions(
-    @transient private val parameters: CaseInsensitiveMap[String],
-    @transient private val sqlConf: SQLConf)
-  extends Serializable {
-
-  import OrcOptions._
-
-  def this(parameters: Map[String, String], sqlConf: SQLConf) =
-    this(CaseInsensitiveMap(parameters), sqlConf)
-
-  /**
-   * Compression codec to use.
-   * Acceptable values are defined in [[shortOrcCompressionCodecNames]].
-   */
-  val compressionCodec: String = {
-    // `compression`, `orc.compress`(i.e., OrcConf.COMPRESS), and 
`spark.sql.orc.compression.codec`
-    // are in order of precedence from highest to lowest.
-    val orcCompressionConf = parameters.get(COMPRESS.getAttribute)
-    val codecName = parameters
-      .get("compression")
-      .orElse(orcCompressionConf)
-      .getOrElse(sqlConf.orcCompressionCodec)
-      .toLowerCase(Locale.ROOT)
-    if (!shortOrcCompressionCodecNames.contains(codecName)) {
-      val availableCodecs = 
shortOrcCompressionCodecNames.keys.map(_.toLowerCase(Locale.ROOT))
-      throw new IllegalArgumentException(s"Codec [$codecName] " +
-        s"is not available. Available codecs are ${availableCodecs.mkString(", 
")}.")
-    }
-    shortOrcCompressionCodecNames(codecName)
-  }
-}
-
-private[orc] object OrcOptions {
-  // The ORC compression short names
-  private val shortOrcCompressionCodecNames = Map(
-    "none" -> "NONE",
-    "uncompressed" -> "NONE",
-    "snappy" -> "SNAPPY",
-    "zlib" -> "ZLIB",
-    "lzo" -> "LZO")
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/e3f67a97/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
index ef9e67c..2a086be 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
@@ -24,6 +24,7 @@ import org.apache.orc.OrcConf.COMPRESS
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.execution.datasources.orc.OrcOptions
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-22416][SQL] Move OrcOptions from `sql/hive` to `sql/core`

Reply via email to