Repository: spark Updated Branches: refs/heads/master 08887369c -> 21bdbe9fe
[SPARK-9627] [SQL] Stops using Scala runtime reflection in DictionaryEncoding `DictionaryEncoding` uses Scala runtime reflection to avoid boxing costs while building the directory array. However, this code path may hit [SI-6240] [1] and throw exception. [1]: https://issues.scala-lang.org/browse/SI-6240 Author: Cheng Lian <[email protected]> Closes #8306 from liancheng/spark-9627/in-memory-cache-scala-reflection. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/21bdbe9f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/21bdbe9f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/21bdbe9f Branch: refs/heads/master Commit: 21bdbe9fe69be47be562de24216a469e5ee64c7b Parents: 0888736 Author: Cheng Lian <[email protected]> Authored: Wed Aug 19 13:57:52 2015 -0700 Committer: Michael Armbrust <[email protected]> Committed: Wed Aug 19 13:57:52 2015 -0700 ---------------------------------------------------------------------- .../sql/columnar/InMemoryColumnarTableScan.scala | 1 - .../columnar/compression/compressionSchemes.scala | 15 ++++----------- 2 files changed, 4 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/21bdbe9f/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala index 45f15fd..66d429b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala @@ -120,7 +120,6 @@ private[sql] case class InMemoryRelation( new Iterator[CachedBatch] { def next(): CachedBatch = { val columnBuilders = output.map { attribute => - val columnType = ColumnType(attribute.dataType) ColumnBuilder(attribute.dataType, batchSize, attribute.name, useCompression) }.toArray http://git-wip-us.apache.org/repos/asf/spark/blob/21bdbe9f/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala index c91d960..ca910a9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala @@ -270,20 +270,13 @@ private[sql] case object DictionaryEncoding extends CompressionScheme { class Decoder[T <: AtomicType](buffer: ByteBuffer, columnType: NativeColumnType[T]) extends compression.Decoder[T] { - private val dictionary = { - // TODO Can we clean up this mess? Maybe move this to `DataType`? - implicit val classTag = { - val mirror = runtimeMirror(Utils.getSparkClassLoader) - ClassTag[T#InternalType](mirror.runtimeClass(columnType.scalaTag.tpe)) - } - - Array.fill(buffer.getInt()) { - columnType.extract(buffer) - } + private val dictionary: Array[Any] = { + val elementNum = buffer.getInt() + Array.fill[Any](elementNum)(columnType.extract(buffer).asInstanceOf[Any]) } override def next(row: MutableRow, ordinal: Int): Unit = { - columnType.setField(row, ordinal, dictionary(buffer.getShort())) + columnType.setField(row, ordinal, dictionary(buffer.getShort()).asInstanceOf[T#InternalType]) } override def hasNext: Boolean = buffer.hasRemaining --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
