[jira] [Commented] (KYLIN-2799) Building cube with percentile measure encounter with NullPointerException
[ https://issues.apache.org/jira/browse/KYLIN-2799?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16136756#comment-16136756 ] Shaofeng SHI commented on KYLIN-2799: - thanks Zhengdong, now all measure types are supported with the Spark engine. > Building cube with percentile measure encounter with NullPointerException > - > > Key: KYLIN-2799 > URL: https://issues.apache.org/jira/browse/KYLIN-2799 > Project: Kylin > Issue Type: Bug >Affects Versions: v2.0.0, v2.1.0 >Reporter: zhengdong >Assignee: zhengdong > Fix For: v2.2.0 > > Attachments: > 0001-KYLIN-2799-Building-cube-with-percentile-measure-enc.patch > > > The error trace log as follows: > {code:java} > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1958) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1158) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:1005) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:996) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:996) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopFile(PairRDDFunctions.scala:996) > at > org.apache.spark.api.java.JavaPairRDD.saveAsNewAPIHadoopFile(JavaPairRDD.scala:823) > at > org.apache.kylin.engine.spark.SparkCubingByLayer.saveToHDFS(SparkCubingByLayer.java:298) > at > org.apache.kylin.engine.spark.SparkCubingByLayer.execute(SparkCubingByLayer.java:257) > at > org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:37) > ... 6 more > Caused by: java.lang.NullPointerException > at > org.apache.kylin.job.shaded.com.tdunning.math.stats.AbstractTDigest.add(AbstractTDigest.java:160) > at > org.apache.kylin.measure.percentile.PercentileCounter.merge(PercentileCounter.java:63) > at > org.apache.kylin.measure.percentile.PercentileCounter.(PercentileCounter.java:44) > at > org.apache.kylin.measure.percentile.PercentileAggregator.aggregate(PercentileAggregator.java:46) > at >
[jira] [Commented] (KYLIN-2799) Building cube with percentile measure encounter with NullPointerException
[ https://issues.apache.org/jira/browse/KYLIN-2799?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16136203#comment-16136203 ] zhengdong commented on KYLIN-2799: -- com.tdunning.math.stats library only found in PercentileCounter. Since we using custom serializer for PercentileCounter instead of default FieldSerializer, we don't have to register com.tdunning.math.stats library. > Building cube with percentile measure encounter with NullPointerException > - > > Key: KYLIN-2799 > URL: https://issues.apache.org/jira/browse/KYLIN-2799 > Project: Kylin > Issue Type: Bug >Affects Versions: v2.0.0, v2.1.0 >Reporter: zhengdong >Assignee: zhengdong > Attachments: > 0001-KYLIN-2799-Building-cube-with-percentile-measure-enc.patch > > > The error trace log as follows: > {code:java} > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1958) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1158) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:1005) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:996) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:996) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopFile(PairRDDFunctions.scala:996) > at > org.apache.spark.api.java.JavaPairRDD.saveAsNewAPIHadoopFile(JavaPairRDD.scala:823) > at > org.apache.kylin.engine.spark.SparkCubingByLayer.saveToHDFS(SparkCubingByLayer.java:298) > at > org.apache.kylin.engine.spark.SparkCubingByLayer.execute(SparkCubingByLayer.java:257) > at > org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:37) > ... 6 more > Caused by: java.lang.NullPointerException > at > org.apache.kylin.job.shaded.com.tdunning.math.stats.AbstractTDigest.add(AbstractTDigest.java:160) > at > org.apache.kylin.measure.percentile.PercentileCounter.merge(PercentileCounter.java:63) > at > org.apache.kylin.measure.percentile.PercentileCounter.(PercentileCounter.java:44) > at >
[jira] [Commented] (KYLIN-2799) Building cube with percentile measure encounter with NullPointerException
[ https://issues.apache.org/jira/browse/KYLIN-2799?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16136160#comment-16136160 ] Billy Liu commented on KYLIN-2799: -- Hi [~zhengd], why com.tdunning.math.stats library was removed from Kyro. > Building cube with percentile measure encounter with NullPointerException > - > > Key: KYLIN-2799 > URL: https://issues.apache.org/jira/browse/KYLIN-2799 > Project: Kylin > Issue Type: Bug >Affects Versions: v2.0.0, v2.1.0 >Reporter: zhengdong >Assignee: zhengdong > Attachments: > 0001-KYLIN-2799-Building-cube-with-percentile-measure-enc.patch > > > The error trace log as follows: > {code:java} > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1958) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1158) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:1005) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:996) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:996) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopFile(PairRDDFunctions.scala:996) > at > org.apache.spark.api.java.JavaPairRDD.saveAsNewAPIHadoopFile(JavaPairRDD.scala:823) > at > org.apache.kylin.engine.spark.SparkCubingByLayer.saveToHDFS(SparkCubingByLayer.java:298) > at > org.apache.kylin.engine.spark.SparkCubingByLayer.execute(SparkCubingByLayer.java:257) > at > org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:37) > ... 6 more > Caused by: java.lang.NullPointerException > at > org.apache.kylin.job.shaded.com.tdunning.math.stats.AbstractTDigest.add(AbstractTDigest.java:160) > at > org.apache.kylin.measure.percentile.PercentileCounter.merge(PercentileCounter.java:63) > at > org.apache.kylin.measure.percentile.PercentileCounter.(PercentileCounter.java:44) > at > org.apache.kylin.measure.percentile.PercentileAggregator.aggregate(PercentileAggregator.java:46) > at >
[jira] [Commented] (KYLIN-2799) Building cube with percentile measure encounter with NullPointerException
[ https://issues.apache.org/jira/browse/KYLIN-2799?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16135025#comment-16135025 ] zhengdong commented on KYLIN-2799: -- Hi Shaofeng SHI, yes, i verify this in my test env, and Spark engine seems to work fine for all kind of measure types now. > Building cube with percentile measure encounter with NullPointerException > - > > Key: KYLIN-2799 > URL: https://issues.apache.org/jira/browse/KYLIN-2799 > Project: Kylin > Issue Type: Bug >Affects Versions: v2.0.0, v2.1.0 >Reporter: zhengdong >Assignee: zhengdong > Attachments: > 0001-KYLIN-2799-Building-cube-with-percentile-measure-enc.patch > > > The error trace log as follows: > {code:java} > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1958) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1158) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:1005) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:996) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:996) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopFile(PairRDDFunctions.scala:996) > at > org.apache.spark.api.java.JavaPairRDD.saveAsNewAPIHadoopFile(JavaPairRDD.scala:823) > at > org.apache.kylin.engine.spark.SparkCubingByLayer.saveToHDFS(SparkCubingByLayer.java:298) > at > org.apache.kylin.engine.spark.SparkCubingByLayer.execute(SparkCubingByLayer.java:257) > at > org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:37) > ... 6 more > Caused by: java.lang.NullPointerException > at > org.apache.kylin.job.shaded.com.tdunning.math.stats.AbstractTDigest.add(AbstractTDigest.java:160) > at > org.apache.kylin.measure.percentile.PercentileCounter.merge(PercentileCounter.java:63) > at > org.apache.kylin.measure.percentile.PercentileCounter.(PercentileCounter.java:44) > at > org.apache.kylin.measure.percentile.PercentileAggregator.aggregate(PercentileAggregator.java:46) > at >
[jira] [Commented] (KYLIN-2799) Building cube with percentile measure encounter with NullPointerException
[ https://issues.apache.org/jira/browse/KYLIN-2799?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16134890#comment-16134890 ] Shaofeng SHI commented on KYLIN-2799: - Hi Zhengdong, I can't recall the detail. At that moment it seems the PercentileCounter couldn't be serilized automatically due to a bug of Kyro. As no external user uses percential with Spark at that moment, so I disabled that to bypass. Thanks for the patch, it looks good. Have you tested it and verify Spark engine can build percentile measure now? > Building cube with percentile measure encounter with NullPointerException > - > > Key: KYLIN-2799 > URL: https://issues.apache.org/jira/browse/KYLIN-2799 > Project: Kylin > Issue Type: Bug >Affects Versions: v2.0.0, v2.1.0 >Reporter: zhengdong >Assignee: zhengdong > Attachments: > 0001-KYLIN-2799-Building-cube-with-percentile-measure-enc.patch > > > The error trace log as follows: > {code:java} > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1958) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1158) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:1005) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:996) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:996) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopFile(PairRDDFunctions.scala:996) > at > org.apache.spark.api.java.JavaPairRDD.saveAsNewAPIHadoopFile(JavaPairRDD.scala:823) > at > org.apache.kylin.engine.spark.SparkCubingByLayer.saveToHDFS(SparkCubingByLayer.java:298) > at > org.apache.kylin.engine.spark.SparkCubingByLayer.execute(SparkCubingByLayer.java:257) > at > org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:37) > ... 6 more > Caused by: java.lang.NullPointerException > at > org.apache.kylin.job.shaded.com.tdunning.math.stats.AbstractTDigest.add(AbstractTDigest.java:160) > at > org.apache.kylin.measure.percentile.PercentileCounter.merge(PercentileCounter.java:63) > at >
[jira] [Commented] (KYLIN-2799) Building cube with percentile measure encounter with NullPointerException
[ https://issues.apache.org/jira/browse/KYLIN-2799?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16134814#comment-16134814 ] zhengdong commented on KYLIN-2799: -- Hi Shaofeng SHI, I noticed the TODO comment in KylinKryoRegistrator that PercentileCounter class should use a JavaSerializer. Could you elaborate on that or help to review this patch? > Building cube with percentile measure encounter with NullPointerException > - > > Key: KYLIN-2799 > URL: https://issues.apache.org/jira/browse/KYLIN-2799 > Project: Kylin > Issue Type: Bug >Affects Versions: v2.0.0, v2.1.0 >Reporter: zhengdong > Attachments: > 0001-KYLIN-2799-Building-cube-with-percentile-measure-enc.patch > > > The error trace log as follows: > {code:java} > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1958) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1158) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:1005) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:996) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:996) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopFile(PairRDDFunctions.scala:996) > at > org.apache.spark.api.java.JavaPairRDD.saveAsNewAPIHadoopFile(JavaPairRDD.scala:823) > at > org.apache.kylin.engine.spark.SparkCubingByLayer.saveToHDFS(SparkCubingByLayer.java:298) > at > org.apache.kylin.engine.spark.SparkCubingByLayer.execute(SparkCubingByLayer.java:257) > at > org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:37) > ... 6 more > Caused by: java.lang.NullPointerException > at > org.apache.kylin.job.shaded.com.tdunning.math.stats.AbstractTDigest.add(AbstractTDigest.java:160) > at > org.apache.kylin.measure.percentile.PercentileCounter.merge(PercentileCounter.java:63) > at > org.apache.kylin.measure.percentile.PercentileCounter.(PercentileCounter.java:44) > at > org.apache.kylin.measure.percentile.PercentileAggregator.aggregate(PercentileAggregator.java:46) > at >