git commit: [SPARK-3559][SQL] Remove unnecessary columns from List of needed Column Ids in Hive Conf

marmbrus Mon, 13 Oct 2014 13:46:26 -0700

Repository: spark
Updated Branches:
  refs/heads/master 73da9c26b -> e10d71e7e



[SPARK-3559][SQL] Remove unnecessary columns from List of needed Column Ids in 
Hive Conf

Author: Venkata Ramana G <ramana.gollamudihuawei.com>

Author: Venkata Ramana Gollamudi <[email protected]>

Closes #2713 from gvramana/remove_unnecessary_columns and squashes the 
following commits:

b7ba768 [Venkata Ramana Gollamudi] Added comment and checkstyle fix
6a93459 [Venkata Ramana Gollamudi] cloned hiveconf for each TableScanOperators 
so that only required columns are added


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e10d71e7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e10d71e7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e10d71e7

Branch: refs/heads/master
Commit: e10d71e7e58bf2ec0f1942cb2f0602396ab866b4
Parents: 73da9c2
Author: Venkata Ramana Gollamudi <[email protected]>
Authored: Mon Oct 13 13:45:34 2014 -0700
Committer: Michael Armbrust <[email protected]>
Committed: Mon Oct 13 13:45:34 2014 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/sql/hive/TableReader.scala     |  6 ++++--
 .../apache/spark/sql/hive/execution/HiveTableScan.scala   | 10 ++++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e10d71e7/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 84fafcd..0de29d5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{Path, PathFilter}
+import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants._
 import org.apache.hadoop.hive.ql.exec.Utilities
 import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Table 
=> HiveTable}
@@ -52,7 +53,8 @@ private[hive]
 class HadoopTableReader(
     @transient attributes: Seq[Attribute],
     @transient relation: MetastoreRelation,
-    @transient sc: HiveContext)
+    @transient sc: HiveContext,
+    @transient hiveExtraConf: HiveConf)
   extends TableReader {
 
   // Choose the minimum number of splits. If mapred.map.tasks is set, then use 
that unless
@@ -63,7 +65,7 @@ class HadoopTableReader(
   // TODO: set aws s3 credentials.
 
   private val _broadcastedHiveConf =
-    sc.sparkContext.broadcast(new SerializableWritable(sc.hiveconf))
+    sc.sparkContext.broadcast(new SerializableWritable(hiveExtraConf))
 
   def broadcastedHiveConf = _broadcastedHiveConf
 

http://git-wip-us.apache.org/repos/asf/spark/blob/e10d71e7/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
index 577ca92..a321475 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
@@ -64,8 +64,14 @@ case class HiveTableScan(
     BindReferences.bindReference(pred, relation.partitionKeys)
   }
 
+  // Create a local copy of hiveconf,so that scan specific modifications 
should not impact 
+  // other queries
   @transient
-  private[this] val hadoopReader = new HadoopTableReader(attributes, relation, 
context)
+  private[this] val hiveExtraConf = new HiveConf(context.hiveconf)
+
+  @transient
+  private[this] val hadoopReader = 
+    new HadoopTableReader(attributes, relation, context, hiveExtraConf)
 
   private[this] def castFromString(value: String, dataType: DataType) = {
     Cast(Literal(value), dataType).eval(null)
@@ -97,7 +103,7 @@ case class HiveTableScan(
     hiveConf.set(serdeConstants.LIST_COLUMNS, 
relation.attributes.map(_.name).mkString(","))
   }
 
-  addColumnMetadataToConf(context.hiveconf)
+  addColumnMetadataToConf(hiveExtraConf)
 
   /**
    * Prunes partitions not involve the query plan.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

git commit: [SPARK-3559][SQL] Remove unnecessary columns from List of needed Column Ids in Hive Conf

Reply via email to