[ 
https://issues.apache.org/jira/browse/KYLIN-4595?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

zhaotao updated KYLIN-4595:
---------------------------
    Component/s: Job Engine

> build with global dictionary error at step:Build N-Dimension Cuboid
> -------------------------------------------------------------------
>
>                 Key: KYLIN-4595
>                 URL: https://issues.apache.org/jira/browse/KYLIN-4595
>             Project: Kylin
>          Issue Type: Bug
>          Components: Job Engine
>    Affects Versions: v2.6.0
>         Environment: hadoop2.6.0-cdh5.13.2
> hive1.1.0-cdh5.13.2
> hbase1.2.0-cdh5.13.2
>            Reporter: zhaotao
>            Priority: Major
>
> When I build the cube, the cube build fails and here is the error message:
> {code:java}
> 2020-06-22 07:30:23,172 DEBUG [Scheduler 1723420609 Job 
> ba1beb03-8bd1-76f6-4460-0607dbf9d6f7-5150] util.ZookeeperDistributedLock:226 
> : 5150-10831@ZZZZ trying to unlock 
> /kylin/kylin_metadata/dict/KYLIN.XXXX_YYYY/lock
> 2020-06-22 07:30:23,175 INFO  [Scheduler 1723420609 Job 
> ba1beb03-8bd1-76f6-4460-0607dbf9d6f7-5150] util.ZookeeperDistributedLock:237 
> : 5150-10831@pZZZZ released lock at 
> /kylin/kylin_metadata/dict/KYLIN.XXXX_YYYY/lock
> 2020-06-22 07:30:23,176 ERROR [Scheduler 1723420609 Job 
> ba1beb03-8bd1-76f6-4460-0607dbf9d6f7-5150] common.HadoopShellExecutable:65 : 
> error execute 
> HadoopShellExecutable{id=ba1beb03-8bd1-76f6-4460-0607dbf9d6f7-03, name=Build 
> Dimension Dictionary, state=SUCCEED}
> java.lang.RuntimeException: Failed to create dictionary on 
> KYLIN.DW_TELE_SALES_ORDER_V2_D.TRADE_NO
>         at 
> org.apache.kylin.dict.DictionaryManager.buildDictFromReadableTable(DictionaryManager.java:304)
>         at 
> org.apache.kylin.dict.DictionaryManager.buildDictionary(DictionaryManager.java:288)
>         at 
> org.apache.kylin.cube.CubeManager$DictionaryAssist.buildDictionary(CubeManager.java:1105)
>         at 
> org.apache.kylin.cube.CubeManager.buildDictionary(CubeManager.java:1074)
>         at 
> org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:77)
>         at 
> org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:55)
>         at 
> org.apache.kylin.engine.mr.steps.CreateDictionaryJob.run(CreateDictionaryJob.java:73)
>         at org.apache.kylin.engine.mr.MRUtil.runMRJob(MRUtil.java:93)
>         at 
> org.apache.kylin.engine.mr.common.HadoopShellExecutable.doWork(HadoopShellExecutable.java:63)
>         at 
> org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:165)
>         at 
> org.apache.kylin.job.execution.DefaultChainedExecutable.doWork(DefaultChainedExecutable.java:70)
>         at 
> org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:165)
>         at 
> org.apache.kylin.job.impl.threadpool.DistributedScheduler$JobRunner.run(DistributedScheduler.java:110)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>         at java.lang.Thread.run(Thread.java:745)
> Caused by: java.lang.RuntimeException: Failed to create global dictionary on 
> KYLIN.XXXX.YYYY
>         at 
> org.apache.kylin.dict.GlobalDictionaryBuilder.addValue(GlobalDictionaryBuilder.java:89)
>         at 
> org.apache.kylin.dict.DictionaryGenerator.buildDictionary(DictionaryGenerator.java:82)
>         at 
> org.apache.kylin.dict.DictionaryManager.buildDictFromReadableTable(DictionaryManager.java:301)
>         ... 15 more
> Caused by: java.io.FileNotFoundException: File does not exist: 
> /kylin/kylin_metadata/resources/GlobalDict/dict/KYLIN.XXXX/YYYY/working/cached_1592695454554_1396573148
>         at 
> org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:66)
>         at 
> org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:56)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocationsInt(FSNamesystem.java:2094)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:2064)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1977)
>         at 
> org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:575)
>         at 
> org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.getBlockLocations(AuthorizationProviderProxyClientProtocol.java:92)
>         at 
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:376)
>         at 
> org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
>         at 
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
>         at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073)
>         at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2226)
>         at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2222)
>         at java.security.AccessController.doPrivileged(Native Method)
>         at javax.security.auth.Subject.doAs(Subject.java:422)
>         at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1920)
>         at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2220)
>         at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native 
> Method)
>         at 
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
>         at 
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>         at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
>         at 
> org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
>         at 
> org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73)
>         at 
> org.apache.hadoop.hdfs.DFSClient.callGetBlockLocations(DFSClient.java:1303)
>         at 
> org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:1288)
>         at 
> org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:1276)
>         at 
> org.apache.hadoop.hdfs.DFSInputStream.fetchLocatedBlocksAndGetLastBlockLength(DFSInputStream.java:307)
>         at 
> org.apache.hadoop.hdfs.DFSInputStream.openInfo(DFSInputStream.java:273)
>         at 
> org.apache.hadoop.hdfs.DFSInputStream.<init>(DFSInputStream.java:265)
>         at org.apache.hadoop.hdfs.DFSClient.open(DFSClient.java:1607)
>         at 
> org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:338)
>         at 
> org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:334)
>         at 
> org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
>         at 
> org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:334)
>         at 
> org.apache.kylin.dict.global.GlobalDictHDFSStore.readSlice(GlobalDictHDFSStore.java:187)
>         at 
> org.apache.kylin.dict.global.AppendTrieDictionaryBuilder.addValue(AppendTrieDictionaryBuilder.java:101)
>         at 
> org.apache.kylin.dict.GlobalDictionaryBuilder.addValue(GlobalDictionaryBuilder.java:85)
>         ... 17 more
> {code}
> After a failed build, an attempt to rebuild succeeds
> This happens occasionally, and I tried to get it to work by setting 
> kylin.job.retry to 3, hoping it would work in the subjob The retry mechanism 
> is triggered on failure, but it doesn't seem to work as found through the logs
>  
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to