[
https://issues.apache.org/jira/browse/SPARK-17827?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15571348#comment-15571348
]
Pete Robbins commented on SPARK-17827:
--------------------------------------
In Statistics,scala
case class StringColumnStat(statRow: InternalRow) {
println("StringColumnStat: " + statRow)
// The indices here must be consistent with
`ColumnStatStruct.stringColumnStat`.
val numNulls: Long = statRow.getLong(0)
val avgColLen: Double = statRow.getDouble(1)
val maxColLen: Long = statRow.getLong(2) <<<<<< Actual type in
statRow is Int
val ndv: Long = statRow.getLong(3)
}
case class BinaryColumnStat(statRow: InternalRow) {
// The indices here must be consistent with
`ColumnStatStruct.binaryColumnStat`.
val numNulls: Long = statRow.getLong(0)
val avgColLen: Double = statRow.getDouble(1)
val maxColLen: Long = statRow.getLong(2) <<<<<< Actual type in
statRow is Int
}
So either the code above should be using getInt for the maxColLen or the code
generating the row should be creating a Long
> StatisticsColumnSuite failures on big endian platforms
> ------------------------------------------------------
>
> Key: SPARK-17827
> URL: https://issues.apache.org/jira/browse/SPARK-17827
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 2.1.0
> Environment: big endian
> Reporter: Pete Robbins
> Labels: big-endian
>
> https://issues.apache.org/jira/browse/SPARK-17073
> introduces new tests/function that fails on big endian platforms
> Failing tests:
> org.apache.spark.sql.StatisticsColumnSuite.column-level statistics for
> string column
> org.apache.spark.sql.StatisticsColumnSuite.column-level statistics for
> binary column
> org.apache.spark.sql.StatisticsColumnSuite.column-level statistics for
> columns with different types
> org.apache.spark.sql.hive.StatisticsSuite.generate column-level statistics
> and load them from hive metastore
> all fail in checkColStat eg:
> java.lang.AssertionError: assertion failed
> at scala.Predef$.assert(Predef.scala:156)
> at
> org.apache.spark.sql.StatisticsTest$.checkColStat(StatisticsTest.scala:92)
> at
> org.apache.spark.sql.StatisticsTest$$anonfun$checkColStats$1$$anonfun$apply$mcV$sp$1.apply(StatisticsTest.scala:43)
> at
> org.apache.spark.sql.StatisticsTest$$anonfun$checkColStats$1$$anonfun$apply$mcV$sp$1.apply(StatisticsTest.scala:40)
> at scala.collection.immutable.List.foreach(List.scala:381)
> at
> org.apache.spark.sql.StatisticsTest$$anonfun$checkColStats$1.apply$mcV$sp(StatisticsTest.scala:40)
> at
> org.apache.spark.sql.test.SQLTestUtils$class.withTable(SQLTestUtils.scala:168)
> at
> org.apache.spark.sql.StatisticsColumnSuite.withTable(StatisticsColumnSuite.scala:30)
> at
> org.apache.spark.sql.StatisticsTest$class.checkColStats(StatisticsTest.scala:33)
> at
> org.apache.spark.sql.StatisticsColumnSuite.checkColStats(StatisticsColumnSuite.scala:30)
> at
> org.apache.spark.sql.StatisticsColumnSuite$$anonfun$7.apply$mcV$sp(StatisticsColumnSuite.scala:171)
> at
> org.apache.spark.sql.StatisticsColumnSuite$$anonfun$7.apply(StatisticsColumnSuite.scala:160)
> at
> org.apache.spark.sql.StatisticsColumnSuite$$anonfun$7.apply(StatisticsColumnSuite.scala:160)
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]