The spark version you are using is 3.1.1。However, TernaryLike comes from spark 3.2 and is introduced from https://issues.apache.org/jira/browse/SPARK-34989。 So if you want to use the community version of spark to complete the build, it is recommended to use at least spark 3.2 or above, and it is more recommended to use spark 3.3
On Tue, Dec 26, 2023 at 10:31 AM Li, Can <c...@ebay.com.invalid> wrote: > 在build dictionary时候使用DictEncode方法,但是其需要的三个表达式Expression > 在实例化时继承自来自于TreeNode中自定义的trait TernaryLike。 > case class DictEncode(left: Expression, mid: Expression, right: > Expression) extends TernaryExpression with ExpectsInputTypes > with TernaryLike{ > > def maxFields: Int = SQLConf.get.maxToStringFields > > override def first: Expression = left > > override def second: Expression = mid > > override def third: Expression = right > 如果去掉override,虽然编译可以通过,但是相应的在build dictionary时会抛出空指针异常。 > 2023-12-06T03:50:58,896 ERROR [logger-thread-0] application.JobMonitor : > handleResourceLack --> java.lang.NullPointerException > java.lang.RuntimeException: Error execute > org.apache.kylin.engine.spark.job.SegmentBuildJob > at > org.apache.kylin.engine.spark.application.SparkApplication.execute(SparkApplication.java:135) > ~[newten-job.jar:?] > at > org.apache.spark.application.JobWorker$$anon$2.run(JobWorker.scala:56) > ~[newten-job.jar:?] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [?:1.8.0_202] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [?:1.8.0_202] > at java.lang.Thread.run(Thread.java:748) [?:1.8.0_202] > Caused by: java.lang.NullPointerException > at > org.apache.spark.sql.catalyst.trees.TreeNode.containsChild$lzycompute(TreeNode.scala:121) > ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] > at > org.apache.spark.sql.catalyst.trees.TreeNode.containsChild(TreeNode.scala:121) > ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:357) > ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:322) > ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$3(TreeNode.scala:322) > ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:407) > ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:243) > ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:405) > ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:358) > ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:322) > ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] > at > org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:306) > ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] > at org.apache.spark.sql.Column.normalizedExpr(Column.scala:161) > ~[spark-sql_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] > at org.apache.spark.sql.Column.hashCode(Column.scala:159) > ~[spark-sql_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] > at scala.runtime.Statics.anyHash(Statics.java:122) > ~[scala-library-2.12.17.jar:?] > at scala.util.hashing.MurmurHash3.productHash(MurmurHash3.scala:76) > ~[scala-library-2.12.17.jar:?] > at scala.util.hashing.MurmurHash3$.productHash(MurmurHash3.scala:246) > ~[scala-library-2.12.17.jar:?] > at scala.runtime.ScalaRunTime$._hashCode(ScalaRunTime.scala:167) > ~[scala-library-2.12.17.jar:?] > at scala.Tuple5.hashCode(Tuple5.scala:27) > ~[scala-library-2.12.17.jar:?] > at > scala.collection.mutable.FlatHashTable.addEntry(FlatHashTable.scala:153) > ~[scala-library-2.12.17.jar:?] > at > scala.collection.mutable.FlatHashTable.addEntry$(FlatHashTable.scala:152) > ~[scala-library-2.12.17.jar:?] > at scala.collection.mutable.HashSet.addEntry(HashSet.scala:41) > ~[scala-library-2.12.17.jar:?] > at > scala.collection.mutable.FlatHashTable.addElem(FlatHashTable.scala:144) > ~[scala-library-2.12.17.jar:?] > at > scala.collection.mutable.FlatHashTable.addElem$(FlatHashTable.scala:143) > ~[scala-library-2.12.17.jar:?] > at scala.collection.mutable.HashSet.addElem(HashSet.scala:41) > ~[scala-library-2.12.17.jar:?] > at scala.collection.mutable.HashSet.$plus$eq(HashSet.scala:60) > ~[scala-library-2.12.17.jar:?] > at scala.collection.mutable.HashSet.$plus$eq(HashSet.scala:41) > ~[scala-library-2.12.17.jar:?] > at > scala.collection.mutable.GrowingBuilder.$plus$eq(GrowingBuilder.scala:32) > ~[scala-library-2.12.17.jar:?] > at > scala.collection.mutable.GrowingBuilder.$plus$eq(GrowingBuilder.scala:30) > ~[scala-library-2.12.17.jar:?] > at > scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) > ~[scala-library-2.12.17.jar:?] > at scala.collection.Iterator.foreach(Iterator.scala:943) > ~[scala-library-2.12.17.jar:?] > at scala.collection.Iterator.foreach$(Iterator.scala:943) > ~[scala-library-2.12.17.jar:?] > at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) > ~[scala-library-2.12.17.jar:?] > at scala.collection.IterableLike.foreach(IterableLike.scala:74) > ~[scala-library-2.12.17.jar:?] > at scala.collection.IterableLike.foreach$(IterableLike.scala:73) > ~[scala-library-2.12.17.jar:?] > at scala.collection.AbstractIterable.foreach(Iterable.scala:56) > ~[scala-library-2.12.17.jar:?] > at scala.collection.TraversableLike.map(TraversableLike.scala:286) > ~[scala-library-2.12.17.jar:?] > at scala.collection.TraversableLike.map$(TraversableLike.scala:279) > ~[scala-library-2.12.17.jar:?] > at > scala.collection.mutable.AbstractSet.scala$collection$SetLike$$super$map(Set.scala:50) > ~[scala-library-2.12.17.jar:?] > at scala.collection.SetLike.map(SetLike.scala:105) > ~[scala-library-2.12.17.jar:?] > at scala.collection.SetLike.map$(SetLike.scala:105) > ~[scala-library-2.12.17.jar:?] > at scala.collection.mutable.AbstractSet.map(Set.scala:50) > ~[scala-library-2.12.17.jar:?] > at > org.apache.kylin.engine.spark.builder.DFTableEncoder$.encodeTable(DFTableEncoder.scala:62) > ~[newten-job.jar:?] > at > org.apache.kylin.engine.spark.job.stage.build.FlatTableAndDictBase.encodeColumn(FlatTableAndDictBase.scala:569) > ~[newten-job.jar:?] > at > org.apache.kylin.engine.spark.job.stage.build.FlatTableAndDictBase.buildDictIfNeed(FlatTableAndDictBase.scala:545) > ~[newten-job.jar:?] > at > org.apache.kylin.engine.spark.job.stage.build.FlatTableAndDictBase.buildDictIfNeed(FlatTableAndDictBase.scala:201) > ~[newten-job.jar:?] > at > org.apache.kylin.engine.spark.job.stage.build.BuildDict.execute(BuildDict.scala:31) > ~[newten-job.jar:?] > at > org.apache.kylin.engine.spark.job.stage.StageExec.toWork(StageExec.scala:116) > ~[newten-job.jar:?] > at > org.apache.kylin.engine.spark.job.stage.StageExec.toWork$(StageExec.scala:112) > ~[newten-job.jar:?] > at > org.apache.kylin.engine.spark.job.stage.build.BuildStage.toWork(BuildStage.scala:48) > ~[newten-job.jar:?] > at > org.apache.kylin.engine.spark.job.exec.BuildExec.$anonfun$buildSegment$1(BuildExec.scala:38) > ~[newten-job.jar:?] > at > org.apache.kylin.engine.spark.job.exec.BuildExec.$anonfun$buildSegment$1$adapted(BuildExec.scala:37) > ~[newten-job.jar:?] > at scala.collection.Iterator.foreach(Iterator.scala:943) > ~[scala-library-2.12.17.jar:?] > at scala.collection.Iterator.foreach$(Iterator.scala:943) > ~[scala-library-2.12.17.jar:?] > at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) > ~[scala-library-2.12.17.jar:?] > at scala.collection.IterableLike.foreach(IterableLike.scala:74) > ~[scala-library-2.12.17.jar:?] > at scala.collection.IterableLike.foreach$(IterableLike.scala:73) > ~[scala-library-2.12.17.jar:?] > at scala.collection.AbstractIterable.foreach(Iterable.scala:56) > ~[scala-library-2.12.17.jar:?] > at > org.apache.kylin.engine.spark.job.exec.BuildExec.buildSegment(BuildExec.scala:37) > ~[newten-job.jar:?] > at > org.apache.kylin.engine.spark.job.SegmentBuildJob.buildSegment(SegmentBuildJob.java:181) > ~[newten-job.jar:?] > at > org.apache.kylin.engine.spark.job.SegmentBuildJob.lambda$build$1(SegmentBuildJob.java:166) > ~[newten-job.jar:?] > at java.util.Iterator.forEachRemaining(Iterator.java:116) > ~[?:1.8.0_202] > at > java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801) > ~[?:1.8.0_202] > at > java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:580) > ~[?:1.8.0_202] > at > org.apache.kylin.engine.spark.job.SegmentBuildJob.build(SegmentBuildJob.java:146) > ~[newten-job.jar:?] > at > org.apache.kylin.engine.spark.job.SegmentBuildJob.doExecute(SegmentBuildJob.java:108) > ~[newten-job.jar:?] > at > org.apache.kylin.engine.spark.application.SparkApplication.execute(SparkApplication.java:319) > ~[newten-job.jar:?] > at > org.apache.kylin.engine.spark.application.SparkApplication.execute(SparkApplication.java:133) > ~[newten-job.jar:?] > ... 4 more > > > > 发件人: MINGMING GE <7mmi...@gmail.com> > 日期: 星期一, 2023年12月25日 19:17 > 收件人: dev@kylin.apache.org <dev@kylin.apache.org> > 主题: Re: 关于特供spark版本的问题 > External Email > > Can you explain in detail the problem you encountered? > > On Mon, Dec 25, 2023 at 4:50 PM Li, Can <c...@ebay.com.invalid> wrote: > > > 我们在替换spark版本为社区版本后发现部份build job会失败。 > > 我们对比了社区版本的spark和kylin 特供的spark版本,发现在需要build global > > dictionary的时候这一块kylin的版本在jar包里自定义了一些接口方法,这些是否能在应用层去实现。 > > >