Repository: spark Updated Branches: refs/heads/master 8e491af52 -> f327e1686
[SPARK-17738] [SQL] fix ARRAY/MAP in columnar cache ## What changes were proposed in this pull request? The actualSize() of array and map is different from the actual size, the header is Int, rather than Long. ## How was this patch tested? The flaky test should be fixed. Author: Davies Liu <[email protected]> Closes #15305 from davies/fix_MAP. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f327e168 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f327e168 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f327e168 Branch: refs/heads/master Commit: f327e16863371076dbd2a7f22c8895ae07f8274b Parents: 8e491af Author: Davies Liu <[email protected]> Authored: Fri Sep 30 09:59:12 2016 -0700 Committer: Davies Liu <[email protected]> Committed: Fri Sep 30 09:59:12 2016 -0700 ---------------------------------------------------------------------- .../org/apache/spark/sql/execution/columnar/ColumnType.scala | 8 ++++---- .../spark/sql/execution/columnar/ColumnTypeSuite.scala | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/f327e168/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala index fa9619e..d27d8c3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala @@ -589,7 +589,7 @@ private[columnar] case class STRUCT(dataType: StructType) private[columnar] case class ARRAY(dataType: ArrayType) extends ColumnType[UnsafeArrayData] with DirectCopyColumnType[UnsafeArrayData] { - override def defaultSize: Int = 16 + override def defaultSize: Int = 28 override def setField(row: MutableRow, ordinal: Int, value: UnsafeArrayData): Unit = { row.update(ordinal, value) @@ -601,7 +601,7 @@ private[columnar] case class ARRAY(dataType: ArrayType) override def actualSize(row: InternalRow, ordinal: Int): Int = { val unsafeArray = getField(row, ordinal) - 8 + unsafeArray.getSizeInBytes + 4 + unsafeArray.getSizeInBytes } override def append(value: UnsafeArrayData, buffer: ByteBuffer): Unit = { @@ -628,7 +628,7 @@ private[columnar] case class ARRAY(dataType: ArrayType) private[columnar] case class MAP(dataType: MapType) extends ColumnType[UnsafeMapData] with DirectCopyColumnType[UnsafeMapData] { - override def defaultSize: Int = 32 + override def defaultSize: Int = 68 override def setField(row: MutableRow, ordinal: Int, value: UnsafeMapData): Unit = { row.update(ordinal, value) @@ -640,7 +640,7 @@ private[columnar] case class MAP(dataType: MapType) override def actualSize(row: InternalRow, ordinal: Int): Int = { val unsafeMap = getField(row, ordinal) - 8 + unsafeMap.getSizeInBytes + 4 + unsafeMap.getSizeInBytes } override def append(value: UnsafeMapData, buffer: ByteBuffer): Unit = { http://git-wip-us.apache.org/repos/asf/spark/blob/f327e168/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala index 0b93c63..805b566 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala @@ -38,7 +38,7 @@ class ColumnTypeSuite extends SparkFunSuite with Logging { val checks = Map( NULL -> 0, BOOLEAN -> 1, BYTE -> 1, SHORT -> 2, INT -> 4, LONG -> 8, FLOAT -> 4, DOUBLE -> 8, COMPACT_DECIMAL(15, 10) -> 8, LARGE_DECIMAL(20, 10) -> 12, - STRING -> 8, BINARY -> 16, STRUCT_TYPE -> 20, ARRAY_TYPE -> 16, MAP_TYPE -> 32) + STRING -> 8, BINARY -> 16, STRUCT_TYPE -> 20, ARRAY_TYPE -> 28, MAP_TYPE -> 68) checks.foreach { case (columnType, expectedSize) => assertResult(expectedSize, s"Wrong defaultSize for $columnType") { @@ -73,8 +73,8 @@ class ColumnTypeSuite extends SparkFunSuite with Logging { checkActualSize(BINARY, Array.fill[Byte](4)(0.toByte), 4 + 4) checkActualSize(COMPACT_DECIMAL(15, 10), Decimal(0, 15, 10), 8) checkActualSize(LARGE_DECIMAL(20, 10), Decimal(0, 20, 10), 5) - checkActualSize(ARRAY_TYPE, Array[Any](1), 8 + 8 + 8 + 8) - checkActualSize(MAP_TYPE, Map(1 -> "a"), 8 + (8 + 8 + 8 + 8) + (8 + 8 + 8 + 8)) + checkActualSize(ARRAY_TYPE, Array[Any](1), 4 + 8 + 8 + 8) + checkActualSize(MAP_TYPE, Map(1 -> "a"), 4 + (8 + 8 + 8 + 8) + (8 + 8 + 8 + 8)) checkActualSize(STRUCT_TYPE, Row("hello"), 28) } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
