Colin Luo created KYLIN-3990: -------------------------------- Summary: java.lang.ClassCastException: org.apache.hadoop.io.IntWritable cannot be cast to org.apache.hadoop.io.Text Key: KYLIN-3990 URL: https://issues.apache.org/jira/browse/KYLIN-3990 Project: Kylin Issue Type: Bug Reporter: Colin Luo
HDP 3.1 Kylin 2.6.1 Error in build cube step 4 2019-04-28 19:32:31,762 INFO [Scheduler 2100016947 Job 16f753e8-5444-1f03-178a-5cca4f3cd10f-202] orc.OrcInputFormat:1737 : ORC pushdown predicate: null 2019-04-28 19:32:32,139 INFO [ORC_GET_SPLITS #1] impl.OrcCodecPool:56 : Got brand-new codec ZLIB 2019-04-28 19:32:32,261 INFO [Scheduler 2100016947 Job 16f753e8-5444-1f03-178a-5cca4f3cd10f-202] orc.OrcInputFormat:1855 : FooterCacheHitRatio: 0/1 2019-04-28 19:32:32,321 INFO [Scheduler 2100016947 Job 16f753e8-5444-1f03-178a-5cca4f3cd10f-202] orc.ReaderImpl:82 : Reading ORC rows from hdfs://master1:8020/warehouse/tablespace/managed/hive/wmdp_etl_dm.db/dm_customer_type/base_0000011/bucket_00000 with \{include: null, offset: 3, length: 725, schema: struct<customer_type_cd:string,customer_type_desc:string,customer_cate_cd:string,customer_cate_desc:string>, includeAcidColumns: true} 2019-04-28 19:32:32,391 INFO [Scheduler 2100016947 Job 16f753e8-5444-1f03-178a-5cca4f3cd10f-202] mapreduce.InternalUtil:156 : Initializing org.apache.hadoop.hive.ql.io.orc.OrcSerde with properties \{columns.types=string,string,string,string, comment=客户类型维度, columns=customer_type_cd,customer_type_desc,customer_cate_cd,customer_cate_desc, transactional_properties=default, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true","COLUMN_STATS":{"customer_cate_cd":"true","customer_cate_desc":"true","customer_type_cd":"true","customer_type_desc":"true"}}, serialization.format=1, numRows=11, numFiles=1, transient_lastDdlTime=1556442134, rawDataSize=0, columns.comments=客户类型代码 客户类型描述 客户大类代码 客户大类描述, totalSize=1306, serialization.lib=org.apache.hadoop.hive.ql.io.orc.OrcSerde, serialization.null.format=\N, bucketing_version=2, transactional=true, column.name.delimiter=,, name=WMDP_ETL_DM.DM_CUSTOMER_TYPE} 2019-04-28 19:32:32,402 ERROR [Scheduler 2100016947 Job 16f753e8-5444-1f03-178a-5cca4f3cd10f-202] common.HadoopShellExecutable:65 : error execute HadoopShellExecutable\{id=16f753e8-5444-1f03-178a-5cca4f3cd10f-03, name=Build Dimension Dictionary, state=RUNNING} java.lang.ClassCastException: org.apache.hadoop.io.IntWritable cannot be cast to org.apache.hadoop.io.Text at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46) at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:26) at org.apache.hive.hcatalog.data.HCatRecordSerDe.serializePrimitiveField(HCatRecordSerDe.java:278) at org.apache.hive.hcatalog.data.HCatRecordSerDe.serializeField(HCatRecordSerDe.java:199) at org.apache.hive.hcatalog.data.LazyHCatRecord.get(LazyHCatRecord.java:53) at org.apache.hive.hcatalog.data.LazyHCatRecord.get(LazyHCatRecord.java:97) at org.apache.hive.hcatalog.mapreduce.HCatRecordReader.nextKeyValue(HCatRecordReader.java:204) at org.apache.hive.hcatalog.data.transfer.impl.HCatInputFormatReader$HCatRecordItr.hasNext(HCatInputFormatReader.java:107) at org.apache.kylin.source.hive.HiveTableReader.next(HiveTableReader.java:90) at org.apache.kylin.dict.lookup.SnapshotTable.takeSnapshot(SnapshotTable.java:98) at org.apache.kylin.dict.lookup.SnapshotManager.buildSnapshot(SnapshotManager.java:133) at org.apache.kylin.cube.CubeManager$DictionaryAssist.buildSnapshotTable(CubeManager.java:1174) at org.apache.kylin.cube.CubeManager.buildSnapshotTable(CubeManager.java:1090) at org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:97) at org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:55) at org.apache.kylin.engine.mr.steps.CreateDictionaryJob.run(CreateDictionaryJob.java:73) at org.apache.kylin.engine.mr.MRUtil.runMRJob(MRUtil.java:93) at org.apache.kylin.engine.mr.common.HadoopShellExecutable.doWork(HadoopShellExecutable.java:63) at org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:166) at org.apache.kylin.job.execution.DefaultChainedExecutable.doWork(DefaultChainedExecutable.java:71) at org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:166) at org.apache.kylin.job.impl.threadpool.DefaultScheduler$JobRunner.run(DefaultScheduler.java:114) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 2019-04-28 19:32:32,407 INFO [Scheduler 2100016947 Job 16f753e8-5444-1f03-178a-5cca4f3cd10f-202] execution.ExecutableManager:453 : job id:16f753e8-5444-1f03-178a-5cca4f3cd10f-03 from RUNNING to ERROR 2019-04-28 19:32:32,408 ERROR [Scheduler 2100016947 Job 16f753e8-5444-1f03-178a-5cca4f3cd10f-202] execution.AbstractExecutable:168 : error running Executable: CubingJob\{id=16f753e8-5444-1f03-178a-5cca4f3cd10f, name=BUILD CUBE - FT_DEALER_QTY_STAT_M_cube - FULL_BUILD - CST 2019-04-28 19:31:02, state=RUNNING} 2019-04-28 19:32:32,410 DEBUG [pool-7-thread-1] cachesync.Broadcaster:116 : Servers in the cluster: [localhost:7070] 2019-04-28 19:32:32,411 DEBUG [pool-7-thread-1] cachesync.Broadcaster:126 : Announcing new broadcast to all: BroadcastEvent\{entity=execute_output, event=update, cacheKey=16f753e8-5444-1f03-178a-5cca4f3cd10f} 2019-04-28 19:32:32,412 INFO [Scheduler 2100016947 Job 16f753e8-5444-1f03-178a-5cca4f3cd10f-202] execution.ExecutableManager:453 : job id:16f753e8-5444-1f03-178a-5cca4f3cd10f from RUNNING to ERROR 2019-04-28 19:32:32,412 DEBUG [Scheduler 2100016947 Job 16f753e8-5444-1f03-178a-5cca4f3cd10f-202] execution.AbstractExecutable:319 : no need to send email, user list is empty 2019-04-28 19:32:32,413 DEBUG [pool-7-thread-1] cachesync.Broadcaster:116 : Servers in the cluster: [localhost:7070] 2019-04-28 19:32:32,413 DEBUG [pool-7-thread-1] cachesync.Broadcaster:126 : Announcing new broadcast to all: BroadcastEvent\{entity=execute_output, event=update, cacheKey=16f753e8-5444-1f03-178a-5cca4f3cd10f} 2019-04-28 19:32:32,414 ERROR [pool-11-thread-4] threadpool.DefaultScheduler:116 : ExecuteException job:16f753e8-5444-1f03-178a-5cca4f3cd10f org.apache.kylin.job.exception.ExecuteException: org.apache.kylin.job.exception.ExecuteException: org.apache.kylin.engine.mr.exception.HadoopShellException: java.lang.ClassCastException: org.apache.hadoop.io.IntWritable cannot be cast to org.apache.hadoop.io.Text at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46) at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:26) at org.apache.hive.hcatalog.data.HCatRecordSerDe.serializePrimitiveField(HCatRecordSerDe.java:278) at org.apache.hive.hcatalog.data.HCatRecordSerDe.serializeField(HCatRecordSerDe.java:199) at org.apache.hive.hcatalog.data.LazyHCatRecord.get(LazyHCatRecord.java:53) at org.apache.hive.hcatalog.data.LazyHCatRecord.get(LazyHCatRecord.java:97) at org.apache.hive.hcatalog.mapreduce.HCatRecordReader.nextKeyValue(HCatRecordReader.java:204) at org.apache.hive.hcatalog.data.transfer.impl.HCatInputFormatReader$HCatRecordItr.hasNext(HCatInputFormatReader.java:107) at org.apache.kylin.source.hive.HiveTableReader.next(HiveTableReader.java:90) at org.apache.kylin.dict.lookup.SnapshotTable.takeSnapshot(SnapshotTable.java:98) at org.apache.kylin.dict.lookup.SnapshotManager.buildSnapshot(SnapshotManager.java:133) at org.apache.kylin.cube.CubeManager$DictionaryAssist.buildSnapshotTable(CubeManager.java:1174) at org.apache.kylin.cube.CubeManager.buildSnapshotTable(CubeManager.java:1090) at org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:97) at org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:55) at org.apache.kylin.engine.mr.steps.CreateDictionaryJob.run(CreateDictionaryJob.java:73) at org.apache.kylin.engine.mr.MRUtil.runMRJob(MRUtil.java:93) at org.apache.kylin.engine.mr.common.HadoopShellExecutable.doWork(HadoopShellExecutable.java:63) at org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:166) at org.apache.kylin.job.execution.DefaultChainedExecutable.doWork(DefaultChainedExecutable.java:71) at org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:166) at org.apache.kylin.job.impl.threadpool.DefaultScheduler$JobRunner.run(DefaultScheduler.java:114) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) result code:2 at org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:181) at org.apache.kylin.job.impl.threadpool.DefaultScheduler$JobRunner.run(DefaultScheduler.java:114) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.kylin.job.exception.ExecuteException: org.apache.kylin.engine.mr.exception.HadoopShellException: java.lang.ClassCastException: org.apache.hadoop.io.IntWritable cannot be cast to org.apache.hadoop.io.Text at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46) at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:26) at org.apache.hive.hcatalog.data.HCatRecordSerDe.serializePrimitiveField(HCatRecordSerDe.java:278) at org.apache.hive.hcatalog.data.HCatRecordSerDe.serializeField(HCatRecordSerDe.java:199) at org.apache.hive.hcatalog.data.LazyHCatRecord.get(LazyHCatRecord.java:53) at org.apache.hive.hcatalog.data.LazyHCatRecord.get(LazyHCatRecord.java:97) at org.apache.hive.hcatalog.mapreduce.HCatRecordReader.nextKeyValue(HCatRecordReader.java:204) at org.apache.hive.hcatalog.data.transfer.impl.HCatInputFormatReader$HCatRecordItr.hasNext(HCatInputFormatReader.java:107) at org.apache.kylin.source.hive.HiveTableReader.next(HiveTableReader.java:90) at org.apache.kylin.dict.lookup.SnapshotTable.takeSnapshot(SnapshotTable.java:98) at org.apache.kylin.dict.lookup.SnapshotManager.buildSnapshot(SnapshotManager.java:133) at org.apache.kylin.cube.CubeManager$DictionaryAssist.buildSnapshotTable(CubeManager.java:1174) at org.apache.kylin.cube.CubeManager.buildSnapshotTable(CubeManager.java:1090) at org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:97) at org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:55) at org.apache.kylin.engine.mr.steps.CreateDictionaryJob.run(CreateDictionaryJob.java:73) at org.apache.kylin.engine.mr.MRUtil.runMRJob(MRUtil.java:93) at org.apache.kylin.engine.mr.common.HadoopShellExecutable.doWork(HadoopShellExecutable.java:63) at org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:166) at org.apache.kylin.job.execution.DefaultChainedExecutable.doWork(DefaultChainedExecutable.java:71) at org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:166) at org.apache.kylin.job.impl.threadpool.DefaultScheduler$JobRunner.run(DefaultScheduler.java:114) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) result code:2 at org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:181) at org.apache.kylin.job.execution.DefaultChainedExecutable.doWork(DefaultChainedExecutable.java:71) at org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:166) ... 4 more Caused by: org.apache.kylin.engine.mr.exception.HadoopShellException: java.lang.ClassCastException: org.apache.hadoop.io.IntWritable cannot be cast to org.apache.hadoop.io.Text at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46) at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:26) at org.apache.hive.hcatalog.data.HCatRecordSerDe.serializePrimitiveField(HCatRecordSerDe.java:278) at org.apache.hive.hcatalog.data.HCatRecordSerDe.serializeField(HCatRecordSerDe.java:199) at org.apache.hive.hcatalog.data.LazyHCatRecord.get(LazyHCatRecord.java:53) at org.apache.hive.hcatalog.data.LazyHCatRecord.get(LazyHCatRecord.java:97) at org.apache.hive.hcatalog.mapreduce.HCatRecordReader.nextKeyValue(HCatRecordReader.java:204) at org.apache.hive.hcatalog.data.transfer.impl.HCatInputFormatReader$HCatRecordItr.hasNext(HCatInputFormatReader.java:107) at org.apache.kylin.source.hive.HiveTableReader.next(HiveTableReader.java:90) at org.apache.kylin.dict.lookup.SnapshotTable.takeSnapshot(SnapshotTable.java:98) at org.apache.kylin.dict.lookup.SnapshotManager.buildSnapshot(SnapshotManager.java:133) at org.apache.kylin.cube.CubeManager$DictionaryAssist.buildSnapshotTable(CubeManager.java:1174) at org.apache.kylin.cube.CubeManager.buildSnapshotTable(CubeManager.java:1090) at org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:97) at org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:55) at org.apache.kylin.engine.mr.steps.CreateDictionaryJob.run(CreateDictionaryJob.java:73) at org.apache.kylin.engine.mr.MRUtil.runMRJob(MRUtil.java:93) at org.apache.kylin.engine.mr.common.HadoopShellExecutable.doWork(HadoopShellExecutable.java:63) at org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:166) at org.apache.kylin.job.execution.DefaultChainedExecutable.doWork(DefaultChainedExecutable.java:71) at org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:166) at org.apache.kylin.job.impl.threadpool.DefaultScheduler$JobRunner.run(DefaultScheduler.java:114) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) result code:2 at org.apache.kylin.engine.mr.common.HadoopShellExecutable.doWork(HadoopShellExecutable.java:73) at org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:166) ... 6 more -- This message was sent by Atlassian JIRA (v7.6.3#76005)