[ https://issues.apache.org/jira/browse/HIVE-3374?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Fabian Alenius updated HIVE-3374: --------------------------------- Description: When running a query on an external bucketed table, TABLESAMPLE does not ignore _logs and _SUCCESS. This means that if you run the select on bucket 1 or 2, the query will fail. hive> select count(*) from table TABLESAMPLE(BUCKET 1 OUT OF 100 on username) where var = value; Total MapReduce jobs = 1 Launching Job 1 out of 1 Number of reduce tasks determined at compile time: 1 In order to change the average load for a reducer (in bytes): set hive.exec.reducers.bytes.per.reducer=<number> In order to limit the maximum number of reducers: set hive.exec.reducers.max=<number> In order to set a constant number of reducers: set mapred.reduce.tasks=<number> org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: hdfs://namenode/table/var/_SUCCESS at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:194) at org.apache.hadoop.mapred.lib.CombineFileInputFormat.getSplits(CombineFileInputFormat.java:180) at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getSplits(HadoopShimsSecure.java:387) at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getSplits(HadoopShimsSecure.java:353) at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getSplits(CombineHiveInputFormat.java:387) at org.apache.hadoop.mapred.JobClient.writeOldSplits(JobClient.java:989) at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:981) at org.apache.hadoop.mapred.JobClient.access$500(JobClient.java:170) at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:891) at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:844) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:396) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1232) at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:844) at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:818) at org.apache.hadoop.hive.ql.exec.ExecDriver.execute(ExecDriver.java:452) at org.apache.hadoop.hive.ql.exec.MapRedTask.execute(MapRedTask.java:136) at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:133) at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:57) at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1332) at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1123) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:931) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:255) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:212) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:671) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:554) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) at java.lang.reflect.Method.invoke(Method.java:597) at org.apache.hadoop.util.RunJar.main(RunJar.java:208) Job Submission failed with exception 'org.apache.hadoop.mapred.InvalidInputException(Input path does not exist: hdfs://namenode/table/var/_SUCCESS)' FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MapRedTask was: When running a query on an external bucketed table, TABLESAMPLE does not ignore _logs and _SUCCESS. This means that if you run the select bucket 1 or 2, the query will fail. hive> select count(*) from table TABLESAMPLE(BUCKET 1 OUT OF 100 on username) where var = value; Total MapReduce jobs = 1 Launching Job 1 out of 1 Number of reduce tasks determined at compile time: 1 In order to change the average load for a reducer (in bytes): set hive.exec.reducers.bytes.per.reducer=<number> In order to limit the maximum number of reducers: set hive.exec.reducers.max=<number> In order to set a constant number of reducers: set mapred.reduce.tasks=<number> org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: hdfs://namenode/table/var/_SUCCESS at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:194) at org.apache.hadoop.mapred.lib.CombineFileInputFormat.getSplits(CombineFileInputFormat.java:180) at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getSplits(HadoopShimsSecure.java:387) at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getSplits(HadoopShimsSecure.java:353) at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getSplits(CombineHiveInputFormat.java:387) at org.apache.hadoop.mapred.JobClient.writeOldSplits(JobClient.java:989) at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:981) at org.apache.hadoop.mapred.JobClient.access$500(JobClient.java:170) at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:891) at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:844) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:396) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1232) at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:844) at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:818) at org.apache.hadoop.hive.ql.exec.ExecDriver.execute(ExecDriver.java:452) at org.apache.hadoop.hive.ql.exec.MapRedTask.execute(MapRedTask.java:136) at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:133) at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:57) at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1332) at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1123) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:931) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:255) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:212) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:671) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:554) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) at java.lang.reflect.Method.invoke(Method.java:597) at org.apache.hadoop.util.RunJar.main(RunJar.java:208) Job Submission failed with exception 'org.apache.hadoop.mapred.InvalidInputException(Input path does not exist: hdfs://namenode/table/var/_SUCCESS)' FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MapRedTask > TABLESAMPLE does not ignore _SUCCESS and _logs > ---------------------------------------------- > > Key: HIVE-3374 > URL: https://issues.apache.org/jira/browse/HIVE-3374 > Project: Hive > Issue Type: Bug > Affects Versions: 0.8.1 > Reporter: Fabian Alenius > > When running a query on an external bucketed table, TABLESAMPLE does not > ignore _logs and _SUCCESS. This means that if you run the select on bucket 1 > or 2, the query will fail. > hive> select count(*) from table TABLESAMPLE(BUCKET 1 OUT OF 100 on username) > where var = value; > Total MapReduce jobs = 1 > Launching Job 1 out of 1 > Number of reduce tasks determined at compile time: 1 > In order to change the average load for a reducer (in bytes): > set hive.exec.reducers.bytes.per.reducer=<number> > In order to limit the maximum number of reducers: > set hive.exec.reducers.max=<number> > In order to set a constant number of reducers: > set mapred.reduce.tasks=<number> > org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: > hdfs://namenode/table/var/_SUCCESS > at > org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:194) > at > org.apache.hadoop.mapred.lib.CombineFileInputFormat.getSplits(CombineFileInputFormat.java:180) > at > org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getSplits(HadoopShimsSecure.java:387) > at > org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getSplits(HadoopShimsSecure.java:353) > at > org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getSplits(CombineHiveInputFormat.java:387) > at > org.apache.hadoop.mapred.JobClient.writeOldSplits(JobClient.java:989) > at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:981) > at org.apache.hadoop.mapred.JobClient.access$500(JobClient.java:170) > at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:891) > at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:844) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:396) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1232) > at > org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:844) > at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:818) > at > org.apache.hadoop.hive.ql.exec.ExecDriver.execute(ExecDriver.java:452) > at > org.apache.hadoop.hive.ql.exec.MapRedTask.execute(MapRedTask.java:136) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:133) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:57) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1332) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1123) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:931) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:255) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:212) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:671) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:554) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) > at java.lang.reflect.Method.invoke(Method.java:597) > at org.apache.hadoop.util.RunJar.main(RunJar.java:208) > Job Submission failed with exception > 'org.apache.hadoop.mapred.InvalidInputException(Input path does not exist: > hdfs://namenode/table/var/_SUCCESS)' > FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MapRedTask -- This message is automatically generated by JIRA. If you think it was sent incorrectly, please contact your JIRA administrators: https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa For more information on JIRA, see: http://www.atlassian.com/software/jira