[ 
https://issues.apache.org/jira/browse/HIVE-20803?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

t oo updated HIVE-20803:
------------------------
    Summary: Hive external table can't read S3 file containing timestamp 
partition  (was: Hive can't read S3 parquet file with timestamp partition)

> Hive external table can't read S3 file containing timestamp partition
> ---------------------------------------------------------------------
>
>                 Key: HIVE-20803
>                 URL: https://issues.apache.org/jira/browse/HIVE-20803
>             Project: Hive
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.3.2
>            Reporter: t oo
>            Priority: Major
>
> SparkThriftServer can select * from the table fine and get data. But 
> HiveServer2 throws below error on select *:
>  
> hive.msck.path.validation = ignore in hive-site.xml
> then ran msck repair my_sch.h_l
> aws s3 ls s3://priv1/priv2/H_L/ --recursive
> 2018-10-18 03:00:56 2474983 
> priv1/priv2/H_L/part_dt=20180309/part_src=xyz/part_src_file=MY_LOC/part_ldts=2018-10-18
>  02:59:46/part-00000-2536ca01-243c-4220-8e55-6869a045fba2.snappy.parquet
> show create table my_sch.h_l;
> +----------------------------------------------------+
> | createtab_stmt |
> +----------------------------------------------------+
> | CREATE EXTERNAL TABLE `my_sch.h_l`( |
> | `xy_hkey_h_l` binary, |
> | `xy_rtts` timestamp, |
> | `xy_rsrc` string, |
> | `xy_bkcc` string, |
> | `xy_mltid` string, |
> | `location_id` bigint) |
> | PARTITIONED BY ( |
> | `part_dt` string, |
> | `part_src` string, |
> | `part_src_file` string, |
> | `part_ldts` timestamp) |
> | ROW FORMAT SERDE |
> | 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' |
> | STORED AS INPUTFORMAT |
> | 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' |
> | OUTPUTFORMAT |
> | 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' |
> | LOCATION |
> | 's3a://priv1/priv2/H_L' |
> | TBLPROPERTIES ( |
> | 'spark.sql.partitionProvider'='catalog', |
> | 'spark.sql.sources.schema.numPartCols'='4', |
> | 'spark.sql.sources.schema.numParts'='1', |
> | 
> 'spark.sql.sources.schema.part.0'='\{"type":"struct","fields":[{"name":"xy_hkey_h_l","type":"binary","nullable":true,"metadata":{}},\{"name":"xy_rtts","type":"timestamp","nullable":true,"metadata":{}},\{"name":"xy_rsrc","type":"string","nullable":true,"metadata":{}},\{"name":"xy_bkcc","type":"string","nullable":true,"metadata":{}},\{"name":"xy_mltid","type":"string","nullable":true,"metadata":{}},\{"name":"location_id","type":"long","nullable":true,"metadata":{}},\{"name":"part_dt","type":"string","nullable":true,"metadata":{}},\{"name":"part_src","type":"string","nullable":true,"metadata":{}},\{"name":"part_src_file","type":"string","nullable":true,"metadata":{}},\{"name":"part_ldts","type":"timestamp","nullable":true,"metadata":{}}]}',
>  |
> | 'spark.sql.sources.schema.partCol.0'='part_dt', |
> | 'spark.sql.sources.schema.partCol.1'='part_src', |
> | 'spark.sql.sources.schema.partCol.2'='part_src_file', |
> | 'spark.sql.sources.schema.partCol.3'='part_ldts', |
> | 'transient_lastDdlTime'='1540421484') |
> +----------------------------------------------------+
>  select * from my_sch.h_l limit 5;
> Error: java.io.IOException: java.lang.IllegalArgumentException: 
> java.net.URISyntaxException: Relative path in absolute URI: 
> part_ldts=2018-10-18 02:59:46 (state=,code=0)
> org.apache.hive.service.cli.HiveSQLException: java.io.IOException: 
> java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative 
> path in absolute URI: part_ldts=2018-10-18 02:59:46
>  at org.apache.hive.jdbc.Utils.verifySuccess(Utils.java:267)
>  at org.apache.hive.jdbc.Utils.verifySuccessWithInfo(Utils.java:253)
>  at org.apache.hive.jdbc.HiveQueryResultSet.next(HiveQueryResultSet.java:374)
>  at org.apache.hive.beeline.BufferedRows.<init>(BufferedRows.java:53)
>  at 
> org.apache.hive.beeline.IncrementalRowsWithNormalization.<init>(IncrementalRowsWithNormalization.java:50)
>  at org.apache.hive.beeline.BeeLine.print(BeeLine.java:2192)
>  at org.apache.hive.beeline.Commands.executeInternal(Commands.java:1009)
>  at org.apache.hive.beeline.Commands.execute(Commands.java:1205)
>  at org.apache.hive.beeline.Commands.sql(Commands.java:1134)
>  at org.apache.hive.beeline.BeeLine.dispatch(BeeLine.java:1314)
>  at org.apache.hive.beeline.BeeLine.execute(BeeLine.java:1178)
>  at org.apache.hive.beeline.BeeLine.begin(BeeLine.java:1033)
>  at org.apache.hive.beeline.BeeLine.mainWithInputRedirection(BeeLine.java:519)
>  at org.apache.hive.beeline.BeeLine.main(BeeLine.java:501)
>  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>  at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>  at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>  at java.lang.reflect.Method.invoke(Method.java:498)
>  at org.apache.hadoop.util.RunJar.run(RunJar.java:239)
>  at org.apache.hadoop.util.RunJar.main(RunJar.java:153)
> Caused by: org.apache.hive.service.cli.HiveSQLException: java.io.IOException: 
> java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative 
> path in absolute URI: part_ldts=2018-10-18 02:59:46
>  at 
> org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:499)
>  at 
> org.apache.hive.service.cli.operation.OperationManager.getOperationNextRowSet(OperationManager.java:307)
>  at 
> org.apache.hive.service.cli.session.HiveSessionImpl.fetchResults(HiveSessionImpl.java:878)
>  at sun.reflect.GeneratedMethodAccessor51.invoke(Unknown Source)
>  at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>  at java.lang.reflect.Method.invoke(Method.java:498)
>  at 
> org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78)
>  at 
> org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36)
>  at 
> org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63)
>  at java.security.AccessController.doPrivileged(Native Method)
>  at javax.security.auth.Subject.doAs(Subject.java:422)
>  at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1844)
>  at 
> org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59)
>  at com.sun.proxy.$Proxy42.fetchResults(Unknown Source)
>  at org.apache.hive.service.cli.CLIService.fetchResults(CLIService.java:559)
>  at 
> org.apache.hive.service.cli.thrift.ThriftCLIService.FetchResults(ThriftCLIService.java:751)
>  at 
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1717)
>  at 
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1702)
>  at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
>  at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
>  at 
> org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
>  at 
> org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
>  at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  at java.lang.Thread.run(Thread.java:748)
> Caused by: java.io.IOException: java.lang.IllegalArgumentException: 
> java.net.URISyntaxException: Relative path in absolute URI: 
> part_ldts=2018-10-18 02:59:46
>  at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:521)
>  at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:428)
>  at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:147)
>  at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2208)
>  at 
> org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:494)
>  ... 24 more
> Caused by: java.lang.IllegalArgumentException: java.net.URISyntaxException: 
> Relative path in absolute URI: part_ldts=2018-10-18 02:59:46
>  at org.apache.hadoop.fs.Path.initialize(Path.java:254)
>  at org.apache.hadoop.fs.Path.<init>(Path.java:212)
>  at org.apache.hadoop.fs.Path.<init>(Path.java:120)
>  at org.apache.hadoop.fs.Globber.doGlob(Globber.java:269)
>  at org.apache.hadoop.fs.Globber.glob(Globber.java:148)
>  at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1705)
>  at org.apache.hadoop.fs.s3a.S3AFileSystem.globStatus(S3AFileSystem.java:2031)
>  at 
> org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:266)
>  at 
> org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:236)
>  at 
> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:322)
>  at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.getNextSplits(FetchOperator.java:372)
>  at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:304)
>  at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:459)
>  ... 28 more
> Caused by: java.lang.RuntimeException: java.net.URISyntaxException:Relative 
> path in absolute URI: part_ldts=2018-10-18 02:59:46
>  at java.net.URI.checkPath(URI.java:1823)
>  at java.net.URI.<init>(URI.java:745)
>  at org.apache.hadoop.fs.Path.initialize(Path.java:251)
>  ... 40 more



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to