anchalkataria commented on issue #588: Has anyone used hudi with AWS EMR and EMRFS on s3? URL: https://github.com/apache/incubator-hudi/issues/588#issuecomment-513093255 Hi , I am also facing issue while using S3 as storage for Hudi dataset. I am using HoodieDeltaStreamer for pushing records from kafka and then syncing to hive. Below is the stack trace for the same - Exception in thread "main" com.uber.hoodie.hive.HoodieHiveSyncException: Failed in executing SQL CREATE EXTERNAL TABLE IF NOT EXISTS default.impressions_s3tbl( `_hoodie_commit_time` string, `_hoodie_commit_seqno` string, `_hoodie_record_key` string, `_hoodie_partition_path` string, `_hoodie_file_name` string, `impresssiontime` bigint, `impressionid` string, `adid` string) PARTITIONED BY (`userid` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'com.uber.hoodie.hadoop.HoodieInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' LOCATION 's3a://srev-emr-test/hudi_test' at com.uber.hoodie.hive.HoodieHiveClient.updateHiveSQL(HoodieHiveClient.java:464) at com.uber.hoodie.hive.HoodieHiveClient.createTable(HoodieHiveClient.java:267) at com.uber.hoodie.hive.HiveSyncTool.syncSchema(HiveSyncTool.java:132) at com.uber.hoodie.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:96) at com.uber.hoodie.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:72) at com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer.syncHive(HoodieDeltaStreamer.java:312) at com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer.sync(HoodieDeltaStreamer.java:296) at com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer.main(HoodieDeltaStreamer.java:445) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:497) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:738) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: com.uber.hoodie.org.apache.hive.service.cli.HiveSQLException: Error running query: java.lang.NoSuchMethodError: org.apache.http.impl.auth.HttpAuthenticator.<init>(Lcom/uber/hoodie/org/apache/commons/logging/Log;)V at com.uber.hoodie.org.apache.hive.jdbc.Utils.verifySuccess(Utils.java:256) at com.uber.hoodie.org.apache.hive.jdbc.Utils.verifySuccessWithInfo(Utils.java:242) at com.uber.hoodie.org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:254) at com.uber.hoodie.org.apache.commons.dbcp.DelegatingStatement.execute(DelegatingStatement.java:264) at com.uber.hoodie.org.apache.commons.dbcp.DelegatingStatement.execute(DelegatingStatement.java:264) at com.uber.hoodie.hive.HoodieHiveClient.updateHiveSQL(HoodieHiveClient.java:462) ... 16 more Caused by: org.apache.hive.service.cli.HiveSQLException: Error running query: java.lang.NoSuchMethodError: org.apache.http.impl.auth.HttpAuthenticator.<init>(Lcom/uber/hoodie/org/apache/commons/logging/Log;)V at org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:161) at org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:202) at org.apache.hive.service.cli.operation.Operation.run(Operation.java:309) at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:464) at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:440) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:497) at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78) at org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36) at org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59) at com.sun.proxy.$Proxy33.executeStatementAsync(Unknown Source) at org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:260) at org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:507) at org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1317) at org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1302) at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39) at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56) at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.NoSuchMethodError: org.apache.http.impl.auth.HttpAuthenticator.<init>(Lcom/uber/hoodie/org/apache/commons/logging/Log;)V at org.apache.http.impl.client.HttpAuthenticator.<init>(HttpAuthenticator.java:45) at org.apache.http.impl.client.DefaultRequestDirector.<init>(DefaultRequestDirector.java:284) at org.apache.http.impl.client.AbstractHttpClient.createClientRequestDirector(AbstractHttpClient.java:953) at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:810) at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82) at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55) at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:384) at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:232) at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3528) at com.amazonaws.services.s3.AmazonS3Client.headBucket(AmazonS3Client.java:1031) at com.amazonaws.services.s3.AmazonS3Client.doesBucketExist(AmazonS3Client.java:994) at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:154) at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2598) at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:91) at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2632) at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2614) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:370) at org.apache.hadoop.fs.Path.getFileSystem(Path.java:296) at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tryQualifyPath(BaseSemanticAnalyzer.java:1383) at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.toReadEntity(BaseSemanticAnalyzer.java:1374) at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.toReadEntity(BaseSemanticAnalyzer.java:1369) at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeCreateTable(SemanticAnalyzer.java:10846) at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genResolvedParseTree(SemanticAnalyzer.java:9989) at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:10093) at org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:229) at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:239) at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:479) at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:319) at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1255) at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1245) at org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:127) ... 27 more
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
