[ https://issues.apache.org/jira/browse/KYLIN-2799?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Billy Liu resolved KYLIN-2799. ------------------------------ Resolution: Fixed Fix Version/s: v2.2.0 Thanks [~zhengd], LGTM. The patch is merged. https://git1-us-west.apache.org/repos/asf?p=kylin.git;a=commit;h=aaf94169 > Building cube with percentile measure encounter with NullPointerException > ------------------------------------------------------------------------- > > Key: KYLIN-2799 > URL: https://issues.apache.org/jira/browse/KYLIN-2799 > Project: Kylin > Issue Type: Bug > Affects Versions: v2.0.0, v2.1.0 > Reporter: zhengdong > Assignee: zhengdong > Fix For: v2.2.0 > > Attachments: > 0001-KYLIN-2799-Building-cube-with-percentile-measure-enc.patch > > > The error trace log as follows: > {code:java} > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1958) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1158) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:1005) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:996) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:996) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopFile(PairRDDFunctions.scala:996) > at > org.apache.spark.api.java.JavaPairRDD.saveAsNewAPIHadoopFile(JavaPairRDD.scala:823) > at > org.apache.kylin.engine.spark.SparkCubingByLayer.saveToHDFS(SparkCubingByLayer.java:298) > at > org.apache.kylin.engine.spark.SparkCubingByLayer.execute(SparkCubingByLayer.java:257) > at > org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:37) > ... 6 more > Caused by: java.lang.NullPointerException > at > org.apache.kylin.job.shaded.com.tdunning.math.stats.AbstractTDigest.add(AbstractTDigest.java:160) > at > org.apache.kylin.measure.percentile.PercentileCounter.merge(PercentileCounter.java:63) > at > org.apache.kylin.measure.percentile.PercentileCounter.<init>(PercentileCounter.java:44) > at > org.apache.kylin.measure.percentile.PercentileAggregator.aggregate(PercentileAggregator.java:46) > at > org.apache.kylin.measure.percentile.PercentileAggregator.aggregate(PercentileAggregator.java:23) > at > org.apache.kylin.measure.MeasureAggregators.aggregate(MeasureAggregators.java:91) > at > org.apache.kylin.engine.spark.SparkCubingByLayer$BaseCuboidReducerFunction2.call(SparkCubingByLayer.java:316) > at > org.apache.kylin.engine.spark.SparkCubingByLayer$BaseCuboidReducerFunction2.call(SparkCubingByLayer.java:302) > at > org.apache.spark.api.java.JavaPairRDD$$anonfun$toScalaFunction2$1.apply(JavaPairRDD.scala:1037) > at > org.apache.spark.util.collection.ExternalAppendOnlyMap$$anonfun$3.apply(ExternalAppendOnlyMap.scala:151) > at > org.apache.spark.util.collection.ExternalAppendOnlyMap$$anonfun$3.apply(ExternalAppendOnlyMap.scala:150) > at > org.apache.spark.util.collection.AppendOnlyMap.changeValue(AppendOnlyMap.scala:150) > at > org.apache.spark.util.collection.SizeTrackingAppendOnlyMap.changeValue(SizeTrackingAppendOnlyMap.scala:32) > at > org.apache.spark.util.collection.ExternalAppendOnlyMap.insertAll(ExternalAppendOnlyMap.scala:163) > at > org.apache.spark.Aggregator.combineCombinersByKey(Aggregator.scala:50) > at > org.apache.spark.shuffle.BlockStoreShuffleReader.read(BlockStoreShuffleReader.scala:85) > at org.apache.spark.rdd.ShuffledRDD.compute(ShuffledRDD.scala:109) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336) > at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1005) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:996) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:936) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:996) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:700) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:285) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) > at org.apache.spark.scheduler.Task.run(Task.scala:99) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029)