[ https://issues.apache.org/jira/browse/HIVE-2150?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Vaibhav Aggarwal reassigned HIVE-2150: -------------------------------------- Assignee: Vaibhav Aggarwal > Sampling fails after dynamic-partition insert into a bucketed s3n table > ----------------------------------------------------------------------- > > Key: HIVE-2150 > URL: https://issues.apache.org/jira/browse/HIVE-2150 > Project: Hive > Issue Type: Bug > Affects Versions: 0.7.0 > Reporter: Steven Wong > Assignee: Vaibhav Aggarwal > > When using dynamic-partition insert and bucketing together on an s3n table, > the insert does not create files for empty buckets. This will result in the > following exception when running a sampling query that includes the empty > buckets. > {noformat} > FAILED: Hive Internal Error: java.lang.RuntimeException(Cannot get bucket > path for bucket 1) > java.lang.RuntimeException: Cannot get bucket path for bucket 1 > at > org.apache.hadoop.hive.ql.metadata.Partition.getBucketPath(Partition.java:367) > at > org.apache.hadoop.hive.ql.optimizer.SamplePruner.prune(SamplePruner.java:186) > at > org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:603) > at > org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:514) > at > org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1.processFS(GenMRFileSink1.java:586) > at > org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1.process(GenMRFileSink1.java:145) > at > org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:89) > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:88) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:55) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:102) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genMapRedTasks(SemanticAnalyzer.java:6336) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:6615) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:238) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:332) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:686) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:149) > at > org.apache.hadoop.hive.cli.CliDriver.processLineInternal(CliDriver.java:228) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:209) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:355) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) > at java.lang.reflect.Method.invoke(Method.java:597) > at org.apache.hadoop.util.RunJar.main(RunJar.java:156) > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hadoop.hive.ql.metadata.Partition.getBucketPath(Partition.java:365) > ... 27 more > {noformat} > Here is a repro case: > {noformat} > CREATE TABLE tab > (x string) > PARTITIONED BY (p1 string, p2 string) > CLUSTERED BY (x) INTO 4 BUCKETS > LOCATION 's3n://some/path'; > SET hive.exec.dynamic.partition=true; > SET hive.enforce.bucketing=true; > INSERT OVERWRITE TABLE tab > PARTITION (p1='p', p2) > SELECT 'v1', 'v2' > FROM dual; > SELECT * > FROM tab TABLESAMPLE (BUCKET 2 OUT OF 4); > {noformat} -- This message is automatically generated by JIRA. For more information on JIRA, see: http://www.atlassian.com/software/jira