[jira] [Commented] (HIVE-15367) CTAS with LOCATION should write temp data under location directory rather than database location
[ https://issues.apache.org/jira/browse/HIVE-15367?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15724727#comment-15724727 ] Hive QA commented on HIVE-15367: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841872/HIVE-15367.1.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 38 failed/errored test(s), 10768 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[crtseltbl_serdeprops] (batchId=73) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[materialized_view_authorization_sqlstd] (batchId=43) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[materialized_view_create] (batchId=64) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[materialized_view_describe] (batchId=62) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[materialized_view_drop] (batchId=9) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample2] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample4] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample6] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample7] (batchId=60) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample9] (batchId=38) org.apache.hadoop.hive.cli.TestEncryptedHDFSCliDriver.testCliDriver[encryption_ctas] (batchId=155) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[transform_ppr2] (batchId=134) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[stats_based_fetch_decision] (batchId=150) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[ctas_noemptyfolder] (batchId=84) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[materialized_view_authorization_create_no_grant] (batchId=84) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[materialized_view_authorization_create_no_select_perm] (batchId=84) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[materialized_view_authorization_drop_other] (batchId=84) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[materialized_view_authorization_no_select_perm] (batchId=84) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[materialized_view_delete] (batchId=84) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[materialized_view_drop2] (batchId=84) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[materialized_view_drop] (batchId=84) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[materialized_view_insert] (batchId=84) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[materialized_view_load] (batchId=84) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[materialized_view_replace_with_view] (batchId=84) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[materialized_view_update] (batchId=84) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[auto_join22] (batchId=116) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[auto_join29] (batchId=116) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[groupby5_map] (batchId=116) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[groupby6] (batchId=116) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[join_array] (batchId=116) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[mapjoin_distinct] (batchId=116) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[merge1] (batchId=116) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[multi_insert_move_tasks_share_dependencies] (batchId=116) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[runtime_skewjoin_mapjoin_spark] (batchId=116) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[sample5] (batchId=116) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[stats0] (batchId=116) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union8] (batchId=116) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[vector_decimal_mapjoin] (batchId=116) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/2434/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/2434/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-2434/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 38 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12841872 - PreCommit-HIVE-Build > CTAS with LOCATION should write temp data under location directory rather > than database location > ---
[jira] [Commented] (HIVE-15333) Add a FetchTask to REPL DUMP plan for reading dump uri, last repl id as ResultSet
[ https://issues.apache.org/jira/browse/HIVE-15333?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15724642#comment-15724642 ] Hive QA commented on HIVE-15333: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841854/HIVE-15333.3.patch {color:green}SUCCESS:{color} +1 due to 2 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 10 failed/errored test(s), 10769 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample2] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample4] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample6] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample7] (batchId=60) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample9] (batchId=38) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_schema_evol_3a] (batchId=134) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[transform_ppr2] (batchId=134) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[stats_based_fetch_decision] (batchId=150) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[authorization_import] (batchId=84) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[exim_00_unsupported_schema] (batchId=84) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/2433/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/2433/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-2433/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 10 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12841854 - PreCommit-HIVE-Build > Add a FetchTask to REPL DUMP plan for reading dump uri, last repl id as > ResultSet > - > > Key: HIVE-15333 > URL: https://issues.apache.org/jira/browse/HIVE-15333 > Project: Hive > Issue Type: Sub-task > Components: repl >Reporter: Vaibhav Gumashta >Assignee: Vaibhav Gumashta > Attachments: HIVE-15333.1.patch, HIVE-15333.2.patch, > HIVE-15333.3.patch > > > We're writing the return values to a file, but we don't add FetchTask while > planning. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15149) Add additional information to ATSHook for Tez UI
[ https://issues.apache.org/jira/browse/HIVE-15149?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jason Dere updated HIVE-15149: -- Attachment: HIVE-15149.3.patch > Add additional information to ATSHook for Tez UI > > > Key: HIVE-15149 > URL: https://issues.apache.org/jira/browse/HIVE-15149 > Project: Hive > Issue Type: Improvement > Components: Hooks >Reporter: Jason Dere >Assignee: Jason Dere > Attachments: HIVE-15149.1.patch, HIVE-15149.2.patch, > HIVE-15149.3.patch > > > Additional query details wanted for TEZ-3530. The additional details > discussed include the following: > Publish the following info ( in addition to existing bits published today): > Application Id to which the query was submitted (primary filter) > DAG Id (primary filter) > Hive query name (primary filter) > Hive Configs (everything a set command would provide except for sensitive > credential info) > Potentially publish source of config i.e. set in hive query script vs > hive-site.xml, etc. > Which HiveServer2 the query was submitted to > *Which IP/host the query was submitted from - not sure what filter support > will be available. > Which execution mode the query is running in (primary filter) > What submission mode was used (cli/beeline/jdbc, etc) > User info ( running as, actual end user, etc) - not sure if already present > Perf logger events. The data published should be able to create a timeline > view of the query i.e. actual submission time, query compile timestamps, > execution timestamps, post-exec data moves, etc. > Explain plan with enough details for visualizing. > Databases and tables being queried (primary filter) > Yarn queue info (primary filter) > Caller context (primary filter) > Original source i.e. submitter > Thread info in HS2 if needed ( I believe Vikram may have added this earlier ) > Query time taken (with filter support ) > Additional context info e.g. llap instance name and appId if required. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15149) Add additional information to ATSHook for Tez UI
[ https://issues.apache.org/jira/browse/HIVE-15149?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jason Dere updated HIVE-15149: -- Status: Patch Available (was: Open) > Add additional information to ATSHook for Tez UI > > > Key: HIVE-15149 > URL: https://issues.apache.org/jira/browse/HIVE-15149 > Project: Hive > Issue Type: Improvement > Components: Hooks >Reporter: Jason Dere >Assignee: Jason Dere > Attachments: HIVE-15149.1.patch, HIVE-15149.2.patch, > HIVE-15149.3.patch > > > Additional query details wanted for TEZ-3530. The additional details > discussed include the following: > Publish the following info ( in addition to existing bits published today): > Application Id to which the query was submitted (primary filter) > DAG Id (primary filter) > Hive query name (primary filter) > Hive Configs (everything a set command would provide except for sensitive > credential info) > Potentially publish source of config i.e. set in hive query script vs > hive-site.xml, etc. > Which HiveServer2 the query was submitted to > *Which IP/host the query was submitted from - not sure what filter support > will be available. > Which execution mode the query is running in (primary filter) > What submission mode was used (cli/beeline/jdbc, etc) > User info ( running as, actual end user, etc) - not sure if already present > Perf logger events. The data published should be able to create a timeline > view of the query i.e. actual submission time, query compile timestamps, > execution timestamps, post-exec data moves, etc. > Explain plan with enough details for visualizing. > Databases and tables being queried (primary filter) > Yarn queue info (primary filter) > Caller context (primary filter) > Original source i.e. submitter > Thread info in HS2 if needed ( I believe Vikram may have added this earlier ) > Query time taken (with filter support ) > Additional context info e.g. llap instance name and appId if required. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15363) Execute hive-blobstore tests using ProxyLocalFileSystem
[ https://issues.apache.org/jira/browse/HIVE-15363?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15724550#comment-15724550 ] Hive QA commented on HIVE-15363: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841853/HIVE-15363.1.patch {color:green}SUCCESS:{color} +1 due to 1 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 10 failed/errored test(s), 10773 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample2] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample4] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample6] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample7] (batchId=60) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample9] (batchId=38) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_schema_evol_3a] (batchId=134) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[transform_ppr2] (batchId=134) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[stats_based_fetch_decision] (batchId=150) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_2] (batchId=92) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_4] (batchId=92) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/2432/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/2432/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-2432/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 10 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12841853 - PreCommit-HIVE-Build > Execute hive-blobstore tests using ProxyLocalFileSystem > --- > > Key: HIVE-15363 > URL: https://issues.apache.org/jira/browse/HIVE-15363 > Project: Hive > Issue Type: Test > Components: Hive >Reporter: Sergio Peña >Assignee: Sergio Peña > Attachments: HIVE-15363.1.patch > > > The {{hive-blobstore}} directory contains tests that an only be executed on > blobstorage systems currently. These test are run manually by committers. > To automate these tests on HiveQA, we should allow hive-blobstore to use the > ProxyLocalFileSystem to run more test coverage on the pre-commit jenkins jobs. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15362) Add the missing fields for 2.2.0 upgrade scripts
[ https://issues.apache.org/jira/browse/HIVE-15362?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15724433#comment-15724433 ] Hive QA commented on HIVE-15362: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841850/HIVE-15362.1.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 8 failed/errored test(s), 10754 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample2] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample4] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample6] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample7] (batchId=60) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample9] (batchId=38) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[transform_ppr2] (batchId=134) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[stats_based_fetch_decision] (batchId=150) org.apache.hadoop.hive.cli.TestSparkCliDriver.org.apache.hadoop.hive.cli.TestSparkCliDriver (batchId=93) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/2431/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/2431/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-2431/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 8 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12841850 - PreCommit-HIVE-Build > Add the missing fields for 2.2.0 upgrade scripts > > > Key: HIVE-15362 > URL: https://issues.apache.org/jira/browse/HIVE-15362 > Project: Hive > Issue Type: Bug > Components: Transactions >Affects Versions: 2.2.0 >Reporter: Wei Zheng >Assignee: Wei Zheng > Attachments: HIVE-15362.1.patch > > > The 2.2.0 upgrade scripts were cut on 05/25/16, while HIVE-13354 (which added > some fields to upgrade scripts) was committed to master on 05/27/16, and > there's no conflict. So we accidentally missed those fields for 2.2.0. > cc [~ekoifman] -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15142) CompactorMR fails with FileNotFoundException
[ https://issues.apache.org/jira/browse/HIVE-15142?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15724383#comment-15724383 ] Eugene Koifman commented on HIVE-15142: --- The idea is that if the table is not owned by "hive", then it would do a doAs to the user that owns the table. Did you set up the doAs parameters? > CompactorMR fails with FileNotFoundException > > > Key: HIVE-15142 > URL: https://issues.apache.org/jira/browse/HIVE-15142 > Project: Hive > Issue Type: Bug > Components: Transactions >Affects Versions: 1.0.0 >Reporter: Eugene Koifman >Assignee: Wei Zheng >Priority: Critical > > {noformat} > No of maps and reduces are 0 job_1478131263487_0009 > Job commit failed: java.io.FileNotFoundException: File > hdfs://../_tmp_80e50691-c9fe-485a-8e1e-ba20331c7d97 does not exist. > at > org.apache.hadoop.hdfs.DistributedFileSystem.listStatusInternal(DistributedFileSystem.java:904) > at > org.apache.hadoop.hdfs.DistributedFileSystem.access$600(DistributedFileSystem.java:113) > at > org.apache.hadoop.hdfs.DistributedFileSystem$21.doCall(DistributedFileSystem.java:966) > at > org.apache.hadoop.hdfs.DistributedFileSystem$21.doCall(DistributedFileSystem.java:962) > at > org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) > at > org.apache.hadoop.hdfs.DistributedFileSystem.listStatus(DistributedFileSystem.java:962) > at > org.apache.hadoop.hive.ql.txn.compactor.CompactorMR$CompactorOutputCommitter.commitJob(CompactorMR.java:776) > at > org.apache.hadoop.mapred.OutputCommitter.commitJob(OutputCommitter.java:291) > at > org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventHandler$EventProcessor.handleJobCommit(CommitterEventHandler.java:285) > at > org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventHandler$EventProcessor.run(CommitterEventHandler.java:237) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > {noformat} > when Compactor doesn't have permissions to write to the table dir. > Evidently we loose the exception when creating CompactorMR.TMP_LOCATION and > only see that the file is not there on commit of the job -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15337) Need to specify starttime when putting Compaction job into "attempted" state
[ https://issues.apache.org/jira/browse/HIVE-15337?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Eugene Koifman updated HIVE-15337: -- Attachment: HIVE-15337.03.patch > Need to specify starttime when putting Compaction job into "attempted" state > > > Key: HIVE-15337 > URL: https://issues.apache.org/jira/browse/HIVE-15337 > Project: Hive > Issue Type: Improvement > Components: Transactions >Affects Versions: 1.0.0 >Reporter: Eugene Koifman >Assignee: Eugene Koifman > Attachments: HIVE-15337.01.patch, HIVE-15337.02.patch, > HIVE-15337.03.patch > > > W/o this SHOW COMPACTIONS output is not as useful > Also, add Hadoop Job ID to SHOW COMPACTIONS output -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15352) MVCC (Multi Versioned Concurrency Control) in Hive
[ https://issues.apache.org/jira/browse/HIVE-15352?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15724364#comment-15724364 ] Garima Dosi commented on HIVE-15352: ACID wouldn't serve our purpose. So we implemented the MVCC design with Zookeeper. Moreover, proper MVCC feature would help a lot. Thanks !! > MVCC (Multi Versioned Concurrency Control) in Hive > -- > > Key: HIVE-15352 > URL: https://issues.apache.org/jira/browse/HIVE-15352 > Project: Hive > Issue Type: New Feature >Reporter: Garima Dosi > Attachments: Hive MVCC - Requirement & Design.pdf > > > Use Case > While working with providing solutions for various applications, we see that > there is at times, a need to provide multi version concurrency support for > certain datasets. The requirement of multi versioned concurrency is mainly > due to two reasons – > • Simultaneous querying and loading from tables or datasets, which requires > maintaining versions for reading and writing (Locking is not the right option > here) > • Maintaining historical load of tables/datasets upto some extent > Both of these requirements are seen in data management systems (warehouses > etc). > What happens without MVCC in Hive? > In cases, where MVCC had to be done, design similar to this - > https://dzone.com/articles/zookeeper-a-real-world-example-of-how-to-use-it > was followed to make it work. Zookeeper was used to maintain versions and > provide MVCC support. However, this design poses a limitation if a normal > user would like to query a hive table because he will not be aware of the > current version to be queried. The additional layer to match versions in > zookeeper with the dataset to be queried introduces a bit of an overhead for > normal users and hence, the request to make this feature available in Hive. > Hive Design for Support of MVCC > The hive design for MVCC support can be as described below (It would somewhat > follow the article mentioned in the previous section) – > 1. The first thing should be the ability for the user to specify that this is > a MVCC table. So, a DDL something like this – > create table ( ) MULTI_VERSIONED ON [sequence, > time] > Internally this DDL can be translated to a partitioned table either on a > sequence number (auto-generated by Hive) or a timestamp. The metastore would > keep this information. > 2. DMLs related to inserting or loading data to the table would remain the > same for an end user. However, internally Hive would automatically detect > that a table is a multi-versioned table and write the new data to a new > partition with a new version of the dataset. The Hive Metastore would also be > updated with the current version. > 3. DMLs related to querying data from the table would remain the same for a > user. However, internally Hive would use the latest version for queries. > Latest version is always stored in the metastore. > Management of obsolete versions > The obsolete versions can be deleted based on the following – > 1.Either a setting which simply says delete the version which is older than a > threshold and is not active, OR > 2.By tracking the count of queries running on older versions and deleting the > ones which are not the latest and are not being used by any query. This would > require some sort of a background thread monitoring the table for obsolete > versions. As shown in the article mentioned above, this would also require > incrementing version count whenever a version is queried and decrement it > once the query is done. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15362) Add the missing fields for 2.2.0 upgrade scripts
[ https://issues.apache.org/jira/browse/HIVE-15362?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15724359#comment-15724359 ] Eugene Koifman commented on HIVE-15362: --- +1 > Add the missing fields for 2.2.0 upgrade scripts > > > Key: HIVE-15362 > URL: https://issues.apache.org/jira/browse/HIVE-15362 > Project: Hive > Issue Type: Bug > Components: Transactions >Affects Versions: 2.2.0 >Reporter: Wei Zheng >Assignee: Wei Zheng > Attachments: HIVE-15362.1.patch > > > The 2.2.0 upgrade scripts were cut on 05/25/16, while HIVE-13354 (which added > some fields to upgrade scripts) was committed to master on 05/27/16, and > there's no conflict. So we accidentally missed those fields for 2.2.0. > cc [~ekoifman] -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15355) Concurrency issues during parallel moveFile due to HDFSUtils.setFullFileStatus
[ https://issues.apache.org/jira/browse/HIVE-15355?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vihang Karajgaonkar updated HIVE-15355: --- Attachment: HIVE-15355.02.patch Updating the patch with a simple test case to check that AclEntries within HadoopFileStatus is unmodifiable. This will prevent inadvertent future code changes which directly try to modify List from setFullFileStatus. Also, checked the test failures above. They seem be to be unrelated since they are failing without the patch on the master too. Also, some of the other recent JIRAs also show the same test failures. [~spena] [~rajesh.balamohan] Can you please review the patch? Thanks. > Concurrency issues during parallel moveFile due to HDFSUtils.setFullFileStatus > -- > > Key: HIVE-15355 > URL: https://issues.apache.org/jira/browse/HIVE-15355 > Project: Hive > Issue Type: Bug >Affects Versions: 2.1.0, 2.2.0 >Reporter: Vihang Karajgaonkar >Assignee: Vihang Karajgaonkar > Attachments: HIVE-15355.01.patch, HIVE-15355.02.patch > > > It is possible to run into concurrency issues during multi-threaded moveFile > issued when processing queries like {{INSERT OVERWRITE TABLE ... SELECT ..}} > when there are multiple files in the staging directory which is a > subdirectory of the target directory. The issue is hard to reproduce but > following stacktrace is one such example: > {noformat} > INFO : Loading data to table > functional_text_gzip.alltypesaggmultifilesnopart from > hdfs://localhost:20500/test-warehouse/alltypesaggmultifilesnopart_text_gzip/.hive-staging_hive_2016-12-01_19-58-21_712_8968735301422943318-1/-ext-1 > ERROR : Failed with exception java.lang.ArrayIndexOutOfBoundsException > org.apache.hadoop.hive.ql.metadata.HiveException: > java.lang.ArrayIndexOutOfBoundsException > at org.apache.hadoop.hive.ql.metadata.Hive.moveFile(Hive.java:2858) > at > org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:3124) > at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:1701) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:313) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > Getting log thread is interrupted, since query is done! > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.ArrayIndexOutOfBoundsException > at java.lang.System.arraycopy(Native Method) > at java.util.ArrayList.removeRange(ArrayList.java:616) > at java.util.ArrayList$SubList.removeRange(ArrayList.java:1021) > at java.util.AbstractList.clear(AbstractList.java:234) > at > com.google.common.collect.Iterables.removeIfFromRandomAccessList(Iterables.java:213) > at com.google.common.collect.Iterables.removeIf(Iterables.java:184) > at > org.apache.hadoop.hive.shims.Hadoop23Shims.removeBaseAclEntries(Hadoop23Shims.java:865) > at > org.apache.hadoop.hive.shims.Hadoop23Shims.setFullFileStatus(Hadoop23Shims.java:757) > at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:2835) > at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:2828) > ... 4 more > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > {noformat} > Quick online search also shows some other instanc
[jira] [Commented] (HIVE-15337) Need to specify starttime when putting Compaction job into "attempted" state
[ https://issues.apache.org/jira/browse/HIVE-15337?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15724348#comment-15724348 ] Hive QA commented on HIVE-15337: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841837/HIVE-15337.02.patch {color:green}SUCCESS:{color} +1 due to 1 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 11 failed/errored test(s), 10753 tests executed *Failed tests:* {noformat} TestSparkCliDriver - did not produce a TEST-*.xml file (likely timed out) (batchId=124) [ptf_seqfile.q,union_remove_23.q,parallel_join0.q,union_remove_9.q,join_thrift.q,skewjoinopt14.q,vectorized_mapjoin.q,union4.q,auto_join5.q,vectorized_shufflejoin.q,smb_mapjoin_20.q,groupby8_noskew.q,auto_sortmerge_join_10.q,groupby11.q,union_remove_16.q] org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample2] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample4] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample6] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample7] (batchId=60) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample9] (batchId=38) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[transform_ppr2] (batchId=134) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[stats_based_fetch_decision] (batchId=150) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_2] (batchId=92) org.apache.hadoop.hive.thrift.TestHadoopAuthBridge23.testDelegationTokenSharedStore (batchId=218) org.apache.hive.jdbc.TestJdbcWithLocalClusterSpark.testTempTable (batchId=215) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/2429/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/2429/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-2429/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 11 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12841837 - PreCommit-HIVE-Build > Need to specify starttime when putting Compaction job into "attempted" state > > > Key: HIVE-15337 > URL: https://issues.apache.org/jira/browse/HIVE-15337 > Project: Hive > Issue Type: Improvement > Components: Transactions >Affects Versions: 1.0.0 >Reporter: Eugene Koifman >Assignee: Eugene Koifman > Attachments: HIVE-15337.01.patch, HIVE-15337.02.patch > > > W/o this SHOW COMPACTIONS output is not as useful > Also, add Hadoop Job ID to SHOW COMPACTIONS output -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15368) consider optimizing Utilities::handleMmTableFinalPath
[ https://issues.apache.org/jira/browse/HIVE-15368?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Gopal V updated HIVE-15368: --- Priority: Major (was: Minor) > consider optimizing Utilities::handleMmTableFinalPath > - > > Key: HIVE-15368 > URL: https://issues.apache.org/jira/browse/HIVE-15368 > Project: Hive > Issue Type: Sub-task >Affects Versions: hive-14535 >Reporter: Rajesh Balamohan > > Branch: hive-14535 > https://github.com/apache/hive/blob/hive-14535/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java#L4049 > When running "insert overwrite...on partitioned table" with 2000+ partitions, > good amount of time (~245 seconds) was spent in iterating every mmDirectory > entry and checking its file listings in S3. Creating this jira to consider > optimizing this codepath, as information from {{getMmDirectoryCandidates}} > could be used in terms of reducing the number of times S3 needs to be > contacted. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15368) consider optimizing Utilities::handleMmTableFinalPath
[ https://issues.apache.org/jira/browse/HIVE-15368?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Gopal V updated HIVE-15368: --- Affects Version/s: hive-14535 > consider optimizing Utilities::handleMmTableFinalPath > - > > Key: HIVE-15368 > URL: https://issues.apache.org/jira/browse/HIVE-15368 > Project: Hive > Issue Type: Sub-task >Affects Versions: hive-14535 >Reporter: Rajesh Balamohan > > Branch: hive-14535 > https://github.com/apache/hive/blob/hive-14535/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java#L4049 > When running "insert overwrite...on partitioned table" with 2000+ partitions, > good amount of time (~245 seconds) was spent in iterating every mmDirectory > entry and checking its file listings in S3. Creating this jira to consider > optimizing this codepath, as information from {{getMmDirectoryCandidates}} > could be used in terms of reducing the number of times S3 needs to be > contacted. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15322) Skipping "hbase mapredcp" in hive script for certain services
[ https://issues.apache.org/jira/browse/HIVE-15322?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15724230#comment-15724230 ] Hive QA commented on HIVE-15322: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841833/HIVE-15322.3.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 9 failed/errored test(s), 10738 tests executed *Failed tests:* {noformat} TestMiniLlapLocalCliDriver - did not produce a TEST-*.xml file (likely timed out) (batchId=143) [vectorized_rcfile_columnar.q,vector_elt.q,explainuser_1.q,multi_insert.q,tez_dml.q,vector_bround.q,schema_evol_orc_acid_table.q,vector_when_case_null.q,orc_ppd_schema_evol_1b.q,vector_join30.q,vectorization_11.q,cte_3.q,update_tmp_table.q,vector_decimal_cast.q,groupby_grouping_id2.q,vector_decimal_round.q,tez_smb_empty.q,orc_merge6.q,vector_decimal_trailing.q,cte_5.q,tez_union.q,cbo_rp_subq_not_in.q,vector_decimal_2.q,columnStatsUpdateForStatsOptimizer_1.q,vector_outer_join3.q,schema_evol_text_vec_part_all_complex.q,tez_dynpart_hashjoin_2.q,auto_sortmerge_join_12.q,offset_limit.q,tez_union_multiinsert.q] org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample2] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample4] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample6] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample7] (batchId=60) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample9] (batchId=38) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[transform_ppr2] (batchId=134) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[stats_based_fetch_decision] (batchId=150) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_2] (batchId=92) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/2428/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/2428/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-2428/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 9 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12841833 - PreCommit-HIVE-Build > Skipping "hbase mapredcp" in hive script for certain services > - > > Key: HIVE-15322 > URL: https://issues.apache.org/jira/browse/HIVE-15322 > Project: Hive > Issue Type: Improvement >Reporter: Daniel Dai >Assignee: Daniel Dai > Attachments: HIVE-15322.1.patch, HIVE-15322.2.patch, > HIVE-15322.3.patch > > > "hbase mapredcp" is intended to append hbase classpath to hive. However, the > command can take some time when the system is heavy loaded. In some extreme > cases, we saw ~20s delay due to it. For certain commands, such as > "schemaTool", hbase classpath is certainly useless, and we can safely skip > invoking it. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15361) INSERT dynamic partition on S3 fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15724112#comment-15724112 ] Hive QA commented on HIVE-15361: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841820/HIVE-15361.2.patch {color:green}SUCCESS:{color} +1 due to 5 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 9 failed/errored test(s), 10768 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample2] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample4] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample6] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample7] (batchId=60) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample9] (batchId=38) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_schema_evol_3a] (batchId=134) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[transform_ppr2] (batchId=134) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[stats_based_fetch_decision] (batchId=150) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_2] (batchId=92) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/2427/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/2427/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-2427/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 9 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12841820 - PreCommit-HIVE-Build > INSERT dynamic partition on S3 fails with a MoveTask failure > > > Key: HIVE-15361 > URL: https://issues.apache.org/jira/browse/HIVE-15361 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 2.2.0 >Reporter: Sergio Peña >Assignee: Sergio Peña >Priority: Critical > Attachments: HIVE-15361.1.patch, HIVE-15361.2.patch > > > The following failure is due to the patch that merges two MoveTask found on > the ConditionalTask (See HIVE-15114) > {panel:title=Repro steps} > CREATE EXTERNAL TABLE external_1k0jU (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING); > ALTER TABLE external_1k0jU ADD PARTITION (COUNTRY='USA', STATE='CA'); > INSERT INTO external_1k0jU PARTITION (country='USA', state='CA') values > ('John Doe', 23), ('Jane Doe', 22); > CREATE EXTERNAL TABLE external_P3kiT (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING) location 's3a://hive-on-s3/foo/bar/'; > set hive.exec.dynamic.partition.mode=nonstrict; > INSERT INTO TABLE external_P3kiT PARTITION (country, state) SELECT * FROM > external_1k0jU; > {panel} > {panel:title=Error & stack trace} > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > INFO : MapReduce Jobs Launched: > INFO : Stage-Stage-1: Map: 1 Cumulative CPU: 3.64 sec HDFS Read: 3656 > HDFS Write: 99 SUCCESS > INFO : Total MapReduce CPU Time Spent: 3 seconds 640 msec > INFO : Completed executing > command(queryId=hive_20161201113939_d64df5d7-a4c4-4885-846f-10f0223fcf4c); > Time taken: 23.227 seconds > Error: Error while processing statement: FAILED: Execution Error, return code > 1 from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1) > INFO : Loading data to table default.external_p3kit partition (country=null, > state=null) from > s3a://hive-on-s3/foo/bar/.hive-staging_hive_2016-12-01_11-39-48_741_6724911837889341086-13/-ext-10002 > {code} > ERROR : Failed with exception MetaException(message:Invalid partition key & > values; keys [country, state, ], values []) > org.apache.hadoop.hive.ql.metadata.HiveException: > MetaException(message:Invalid partition key & values; keys [country, state, > ], values []) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1902) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1834) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1428) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1388) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:453) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976)
[jira] [Work started] (HIVE-15367) CTAS with LOCATION should write temp data under location directory rather than database location
[ https://issues.apache.org/jira/browse/HIVE-15367?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Work on HIVE-15367 started by Sahil Takiar. --- > CTAS with LOCATION should write temp data under location directory rather > than database location > > > Key: HIVE-15367 > URL: https://issues.apache.org/jira/browse/HIVE-15367 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-15367.1.patch > > > For regular CTAS queries, temp data from a SELECT query will be written to to > a staging directory under the database location. The code to control this is > in {{SemanticAnalyzer.java}} > {code} > // allocate a temporary output dir on the location of the table > String tableName = getUnescapedName((ASTNode) ast.getChild(0)); > String[] names = Utilities.getDbTableName(tableName); > Path location; > try { > Warehouse wh = new Warehouse(conf); > //Use destination table's db location. > String destTableDb = qb.getTableDesc() != null? > qb.getTableDesc().getDatabaseName(): null; > if (destTableDb == null) { > destTableDb = names[0]; > } > location = wh.getDatabasePath(db.getDatabase(destTableDb)); > } catch (MetaException e) { > throw new SemanticException(e); > } > {code} > However, CTAS queries allow specifying a {{LOCATION}} for the new table. Its > possible for this location to be on a different filesystem than the database > location. If this happens temp data will be written to the database > filesystem and will be copied to the table filesystem in {{MoveTask}}. > This extra copying of data can drastically affect performance. Rather than > always use the database location as the staging dir for CTAS queries, Hive > should first check if there is an explicit {{LOCATION}} specified in the CTAS > query. If there is, staging data should be stored under the {{LOCATION}} > directory. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15367) CTAS with LOCATION should write temp data under location directory rather than database location
[ https://issues.apache.org/jira/browse/HIVE-15367?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sahil Takiar updated HIVE-15367: Status: Patch Available (was: In Progress) > CTAS with LOCATION should write temp data under location directory rather > than database location > > > Key: HIVE-15367 > URL: https://issues.apache.org/jira/browse/HIVE-15367 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-15367.1.patch > > > For regular CTAS queries, temp data from a SELECT query will be written to to > a staging directory under the database location. The code to control this is > in {{SemanticAnalyzer.java}} > {code} > // allocate a temporary output dir on the location of the table > String tableName = getUnescapedName((ASTNode) ast.getChild(0)); > String[] names = Utilities.getDbTableName(tableName); > Path location; > try { > Warehouse wh = new Warehouse(conf); > //Use destination table's db location. > String destTableDb = qb.getTableDesc() != null? > qb.getTableDesc().getDatabaseName(): null; > if (destTableDb == null) { > destTableDb = names[0]; > } > location = wh.getDatabasePath(db.getDatabase(destTableDb)); > } catch (MetaException e) { > throw new SemanticException(e); > } > {code} > However, CTAS queries allow specifying a {{LOCATION}} for the new table. Its > possible for this location to be on a different filesystem than the database > location. If this happens temp data will be written to the database > filesystem and will be copied to the table filesystem in {{MoveTask}}. > This extra copying of data can drastically affect performance. Rather than > always use the database location as the staging dir for CTAS queries, Hive > should first check if there is an explicit {{LOCATION}} specified in the CTAS > query. If there is, staging data should be stored under the {{LOCATION}} > directory. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15367) CTAS with LOCATION should write temp data under location directory rather than database location
[ https://issues.apache.org/jira/browse/HIVE-15367?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sahil Takiar updated HIVE-15367: Attachment: HIVE-15367.1.patch Attaching pre-lim patch. I suspect there will be some qtest failures, but want to see why the fail, and for the ones that do fail, if it is sufficient to just re-generate the q.out file. > CTAS with LOCATION should write temp data under location directory rather > than database location > > > Key: HIVE-15367 > URL: https://issues.apache.org/jira/browse/HIVE-15367 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-15367.1.patch > > > For regular CTAS queries, temp data from a SELECT query will be written to to > a staging directory under the database location. The code to control this is > in {{SemanticAnalyzer.java}} > {code} > // allocate a temporary output dir on the location of the table > String tableName = getUnescapedName((ASTNode) ast.getChild(0)); > String[] names = Utilities.getDbTableName(tableName); > Path location; > try { > Warehouse wh = new Warehouse(conf); > //Use destination table's db location. > String destTableDb = qb.getTableDesc() != null? > qb.getTableDesc().getDatabaseName(): null; > if (destTableDb == null) { > destTableDb = names[0]; > } > location = wh.getDatabasePath(db.getDatabase(destTableDb)); > } catch (MetaException e) { > throw new SemanticException(e); > } > {code} > However, CTAS queries allow specifying a {{LOCATION}} for the new table. Its > possible for this location to be on a different filesystem than the database > location. If this happens temp data will be written to the database > filesystem and will be copied to the table filesystem in {{MoveTask}}. > This extra copying of data can drastically affect performance. Rather than > always use the database location as the staging dir for CTAS queries, Hive > should first check if there is an explicit {{LOCATION}} specified in the CTAS > query. If there is, staging data should be stored under the {{LOCATION}} > directory. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15367) CTAS with LOCATION should write temp data under location directory rather than database location
[ https://issues.apache.org/jira/browse/HIVE-15367?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15724008#comment-15724008 ] Sahil Takiar commented on HIVE-15367: - [~spena], [~ychena] you were looking at this logic as part of HIVE-11427, any chance you could comment on if my logic sounds correct? > CTAS with LOCATION should write temp data under location directory rather > than database location > > > Key: HIVE-15367 > URL: https://issues.apache.org/jira/browse/HIVE-15367 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Sahil Takiar >Assignee: Sahil Takiar > > For regular CTAS queries, temp data from a SELECT query will be written to to > a staging directory under the database location. The code to control this is > in {{SemanticAnalyzer.java}} > {code} > // allocate a temporary output dir on the location of the table > String tableName = getUnescapedName((ASTNode) ast.getChild(0)); > String[] names = Utilities.getDbTableName(tableName); > Path location; > try { > Warehouse wh = new Warehouse(conf); > //Use destination table's db location. > String destTableDb = qb.getTableDesc() != null? > qb.getTableDesc().getDatabaseName(): null; > if (destTableDb == null) { > destTableDb = names[0]; > } > location = wh.getDatabasePath(db.getDatabase(destTableDb)); > } catch (MetaException e) { > throw new SemanticException(e); > } > {code} > However, CTAS queries allow specifying a {{LOCATION}} for the new table. Its > possible for this location to be on a different filesystem than the database > location. If this happens temp data will be written to the database > filesystem and will be copied to the table filesystem in {{MoveTask}}. > This extra copying of data can drastically affect performance. Rather than > always use the database location as the staging dir for CTAS queries, Hive > should first check if there is an explicit {{LOCATION}} specified in the CTAS > query. If there is, staging data should be stored under the {{LOCATION}} > directory. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15355) Concurrency issues during parallel moveFile due to HDFSUtils.setFullFileStatus
[ https://issues.apache.org/jira/browse/HIVE-15355?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15724006#comment-15724006 ] Hive QA commented on HIVE-15355: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841819/HIVE-15355.01.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 8 failed/errored test(s), 10766 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample2] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample4] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample6] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample7] (batchId=60) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample9] (batchId=38) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[transform_ppr2] (batchId=134) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[stats_based_fetch_decision] (batchId=150) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_2] (batchId=92) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/2426/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/2426/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-2426/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 8 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12841819 - PreCommit-HIVE-Build > Concurrency issues during parallel moveFile due to HDFSUtils.setFullFileStatus > -- > > Key: HIVE-15355 > URL: https://issues.apache.org/jira/browse/HIVE-15355 > Project: Hive > Issue Type: Bug >Affects Versions: 2.1.0, 2.2.0 >Reporter: Vihang Karajgaonkar >Assignee: Vihang Karajgaonkar > Attachments: HIVE-15355.01.patch > > > It is possible to run into concurrency issues during multi-threaded moveFile > issued when processing queries like {{INSERT OVERWRITE TABLE ... SELECT ..}} > when there are multiple files in the staging directory which is a > subdirectory of the target directory. The issue is hard to reproduce but > following stacktrace is one such example: > {noformat} > INFO : Loading data to table > functional_text_gzip.alltypesaggmultifilesnopart from > hdfs://localhost:20500/test-warehouse/alltypesaggmultifilesnopart_text_gzip/.hive-staging_hive_2016-12-01_19-58-21_712_8968735301422943318-1/-ext-1 > ERROR : Failed with exception java.lang.ArrayIndexOutOfBoundsException > org.apache.hadoop.hive.ql.metadata.HiveException: > java.lang.ArrayIndexOutOfBoundsException > at org.apache.hadoop.hive.ql.metadata.Hive.moveFile(Hive.java:2858) > at > org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:3124) > at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:1701) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:313) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > Getting log thread is interrupted, since query is done! > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecut
[jira] [Updated] (HIVE-15351) Disable vectorized VectorUDFAdaptor usage with non-column or constant parameters
[ https://issues.apache.org/jira/browse/HIVE-15351?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Matt McCline updated HIVE-15351: Resolution: Fixed Status: Resolved (was: Patch Available) > Disable vectorized VectorUDFAdaptor usage with non-column or constant > parameters > > > Key: HIVE-15351 > URL: https://issues.apache.org/jira/browse/HIVE-15351 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Matt McCline >Assignee: Matt McCline >Priority: Blocker > Attachments: HIVE-15351.01.patch, HIVE-15351.02.patch > > > Vectorization using VectorUDFAdaptor is broken and produces wrong results > when the parameter(s) have vectorized expressions that allocate scratch > columns. So, for now, we restrict VectorUDFAdaptor usage to columns or > constant expressions. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15351) Disable vectorized VectorUDFAdaptor usage with non-column or constant parameters
[ https://issues.apache.org/jira/browse/HIVE-15351?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15724003#comment-15724003 ] Matt McCline commented on HIVE-15351: - Committed to master. > Disable vectorized VectorUDFAdaptor usage with non-column or constant > parameters > > > Key: HIVE-15351 > URL: https://issues.apache.org/jira/browse/HIVE-15351 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Matt McCline >Assignee: Matt McCline >Priority: Blocker > Attachments: HIVE-15351.01.patch, HIVE-15351.02.patch > > > Vectorization using VectorUDFAdaptor is broken and produces wrong results > when the parameter(s) have vectorized expressions that allocate scratch > columns. So, for now, we restrict VectorUDFAdaptor usage to columns or > constant expressions. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15351) Disable vectorized VectorUDFAdaptor usage with non-column or constant parameters
[ https://issues.apache.org/jira/browse/HIVE-15351?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723925#comment-15723925 ] Hive QA commented on HIVE-15351: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841817/HIVE-15351.02.patch {color:green}SUCCESS:{color} +1 due to 1 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 7 failed/errored test(s), 10764 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=50) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample2] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample4] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample6] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample7] (batchId=60) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample9] (batchId=38) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[transform_ppr2] (batchId=134) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/2425/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/2425/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-2425/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 7 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12841817 - PreCommit-HIVE-Build > Disable vectorized VectorUDFAdaptor usage with non-column or constant > parameters > > > Key: HIVE-15351 > URL: https://issues.apache.org/jira/browse/HIVE-15351 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Matt McCline >Assignee: Matt McCline >Priority: Blocker > Attachments: HIVE-15351.01.patch, HIVE-15351.02.patch > > > Vectorization using VectorUDFAdaptor is broken and produces wrong results > when the parameter(s) have vectorized expressions that allocate scratch > columns. So, for now, we restrict VectorUDFAdaptor usage to columns or > constant expressions. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15142) CompactorMR fails with FileNotFoundException
[ https://issues.apache.org/jira/browse/HIVE-15142?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723869#comment-15723869 ] Wei Zheng commented on HIVE-15142: -- [~ekoifman] Is the backtrace you posted from Hive 1? I tried to reproduce on master, and got the following backtrace from hivemetastore.log. It looks we're doing the right thing, which is to error out on file creation during compaction when there's not enough permission. {code} java.lang.Exception: java.io.IOException: Mkdirs failed to create file:/Users/wzheng/hivetmp/warehouse/acid.db/t1/_tmp_4711984b-2a04-4d7c-aa63-7065726587d4/base_001 (exists=false, cwd=file:/Users/wzheng) at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462) ~[hadoop-mapreduce-client-common-2.6.0.jar:?] at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522) [hadoop-mapreduce-client-common-2.6.0.jar:?] Caused by: java.io.IOException: Mkdirs failed to create file:/Users/wzheng/hivetmp/warehouse/acid.db/t1/_tmp_4711984b-2a04-4d7c-aa63-7065726587d4/base_001 (exists=false, cwd=file:/Users/wzheng) at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442) ~[hadoop-common-2.6.0.jar:?] at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428) ~[hadoop-common-2.6.0.jar:?] at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908) ~[hadoop-common-2.6.0.jar:?] at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889) ~[hadoop-common-2.6.0.jar:?] at org.apache.orc.impl.WriterImpl.getStream(WriterImpl.java:2468) ~[hive-exec-2.2.0-SNAPSHOT.jar:2.2.0-SNAPSHOT] at org.apache.orc.impl.WriterImpl.flushStripe(WriterImpl.java:2485) ~[hive-exec-2.2.0-SNAPSHOT.jar:2.2.0-SNAPSHOT] at org.apache.orc.impl.WriterImpl.close(WriterImpl.java:2787) ~[hive-exec-2.2.0-SNAPSHOT.jar:2.2.0-SNAPSHOT] at org.apache.hadoop.hive.ql.io.orc.WriterImpl.close(WriterImpl.java:313) ~[hive-exec-2.2.0-SNAPSHOT.jar:2.2.0-SNAPSHOT] at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$1.close(OrcOutputFormat.java:315) ~[hive-exec-2.2.0-SNAPSHOT.jar:2.2.0-SNAPSHOT] at org.apache.hadoop.hive.ql.txn.compactor.CompactorMR$CompactorMap.close(CompactorMR.java:692) ~[hive-exec-2.2.0-SNAPSHOT.jar:2.2.0-SNAPSHOT] at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61) ~[hadoop-mapreduce-client-core-2.6.0.jar:?] at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:450) ~[hadoop-mapreduce-client-core-2.6.0.jar:?] at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) ~[hadoop-mapreduce-client-core-2.6.0.jar:?] at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:243) ~[hadoop-mapreduce-client-common-2.6.0.jar:?] at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_112] at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_112] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[?:1.8.0_112] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[?:1.8.0_112] at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_112] 2016-12-05T16:09:37,101 INFO [hw11217.local-26] mapreduce.Job: Job job_local1901112071_0001 running in uber mode : false 2016-12-05T16:09:37,103 INFO [hw11217.local-26] mapreduce.Job: map 0% reduce 0% 2016-12-05T16:09:37,106 INFO [hw11217.local-26] mapreduce.Job: Job job_local1901112071_0001 failed with state FAILED due to: NA 2016-12-05T16:09:37,111 INFO [hw11217.local-26] mapreduce.Job: Counters: 0 2016-12-05T16:09:37,111 ERROR [hw11217.local-26] compactor.Worker: Caught exception while trying to compact id:1,dbname:acid,tableName:t1,partName:null,state:^@,type:MAJOR,properties:null,runAs:null,tooManyAborts:false,highestTxnId:0. Marking failed to avoid repeated failures, java.io.IOException: Job failed ! at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:836) at org.apache.hadoop.hive.ql.txn.compactor.CompactorMR.launchCompactionJob(CompactorMR.java:310) at org.apache.hadoop.hive.ql.txn.compactor.CompactorMR.run(CompactorMR.java:272) at org.apache.hadoop.hive.ql.txn.compactor.Worker$1.run(Worker.java:173) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628) at org.apache.hadoop.hive.ql.txn.compactor.Worker.run(Worker.java:170) {code} > CompactorMR fails with FileNotFoundException > > > Key: HIVE-15142 > URL: https://issues.apache.org/jira/browse/HIVE-15142 > Project: Hive > Issue Type: Bug > Components: Transactions >Affects Versions: 1.0.0 >Reporter: Eugene Koifman >Assignee:
[jira] [Commented] (HIVE-15332) REPL LOAD & DUMP support for incremental CREATE_TABLE/ADD_PTN
[ https://issues.apache.org/jira/browse/HIVE-15332?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723867#comment-15723867 ] Vaibhav Gumashta commented on HIVE-15332: - Created HIVE-15364 for 2 and HIVE-15365 for 3. I'll include changes for 1 in HIVE-15294. I'll commit this as the minor comments can be incorporated in one of the upcoming patches. > REPL LOAD & DUMP support for incremental CREATE_TABLE/ADD_PTN > - > > Key: HIVE-15332 > URL: https://issues.apache.org/jira/browse/HIVE-15332 > Project: Hive > Issue Type: Sub-task > Components: repl >Reporter: Sushanth Sowmyan >Assignee: Sushanth Sowmyan > Attachments: HIVE-15332.patch > > > We need to add in support for REPL LOAD and REPL DUMP of incremental events, > and we need to be able to replicate creates, for a start. This jira tracks > the inclusion of CREATE_TABLE/ADD_PARTITION event support to REPL DUMP & LOAD. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15251) Provide support for complex expressions in ON clauses for OUTER joins
[ https://issues.apache.org/jira/browse/HIVE-15251?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jesus Camacho Rodriguez updated HIVE-15251: --- Resolution: Fixed Fix Version/s: 2.2.0 Status: Resolved (was: Patch Available) Fails are unrelated, except negative join45.q (complex conditions for outer joins not supported) that needed to be removed. Pushed to master, thanks for reviewing [~ashutoshc]! > Provide support for complex expressions in ON clauses for OUTER joins > - > > Key: HIVE-15251 > URL: https://issues.apache.org/jira/browse/HIVE-15251 > Project: Hive > Issue Type: Bug > Components: CBO, Parser >Affects Versions: 2.2.0 >Reporter: Jesus Camacho Rodriguez >Assignee: Jesus Camacho Rodriguez > Fix For: 2.2.0 > > Attachments: HIVE-15251.01.patch, HIVE-15251.patch > > > Follow-up of HIVE-15211. > Currently, we have some restrictions on the predicates that we can use in ON > clauses for outer joins. > This patch is an extension to overcome these restrictions. Follow-up work > will focus on identifying some cases, such as disjunctive predicates, that > can be run more efficiently than with a cartesian product. > It will allow to write queries that currently fail in Hive such as: > {code:sql} > -- Disjunctions > SELECT * > FROM src1 LEFT OUTER JOIN src > ON (src1.key=src.key > OR src1.value between 100 and 102 > OR src.value between 100 and 102) > LIMIT 10; > -- Conjunction with multiple inputs references in one side > SELECT * > FROM src1 RIGHT OUTER JOIN src > ON (src1.key+src.key >= 100 > AND src1.key+src.key <= 102) > LIMIT 10; > -- Conjunct with no references > SELECT * > FROM src1 FULL OUTER JOIN src > ON (src1.value between 100 and 102 > AND src.value between 100 and 102 > AND true) > LIMIT 10; > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15346) "values temp table" should not be an input source for the query
[ https://issues.apache.org/jira/browse/HIVE-15346?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723831#comment-15723831 ] Hive QA commented on HIVE-15346: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841796/HIVE-15346.3.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 8 failed/errored test(s), 10762 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=50) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_sortmerge_join_2] (batchId=44) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample2] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample4] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample6] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample7] (batchId=60) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample9] (batchId=38) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[transform_ppr2] (batchId=134) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/2424/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/2424/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-2424/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 8 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12841796 - PreCommit-HIVE-Build > "values temp table" should not be an input source for the query > --- > > Key: HIVE-15346 > URL: https://issues.apache.org/jira/browse/HIVE-15346 > Project: Hive > Issue Type: Sub-task > Components: Query Planning >Affects Versions: 2.2.0 >Reporter: Aihua Xu >Assignee: Aihua Xu > Attachments: HIVE-15346.1.patch, HIVE-15346.2.patch, > HIVE-15346.3.patch, HIVE-15346.codeonly.patch > > > For "insert values()" query, a values tmp table is created. Such table is > treated as input source. That seems to be incorrect since that should be > treated internal and temporary. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-14731) Use Tez cartesian product edge in Hive (unpartitioned case only)
[ https://issues.apache.org/jira/browse/HIVE-14731?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723819#comment-15723819 ] Zhiyuan Yang commented on HIVE-14731: - Test failures are irrelevant. > Use Tez cartesian product edge in Hive (unpartitioned case only) > > > Key: HIVE-14731 > URL: https://issues.apache.org/jira/browse/HIVE-14731 > Project: Hive > Issue Type: Bug >Reporter: Zhiyuan Yang >Assignee: Zhiyuan Yang > Attachments: HIVE-14731.1.patch, HIVE-14731.2.patch, > HIVE-14731.3.patch, HIVE-14731.4.patch, HIVE-14731.5.patch, > HIVE-14731.6.patch, HIVE-14731.7.patch, HIVE-14731.8.patch, HIVE-14731.9.patch > > > Given cartesian product edge is available in Tez now (see TEZ-3230), let's > integrate it into Hive on Tez. This allows us to have more than one reducer > in cross product queries. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-11812) datediff sometimes returns incorrect results when called with dates
[ https://issues.apache.org/jira/browse/HIVE-11812?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723775#comment-15723775 ] Sergey Shelukhin commented on HIVE-11812: - See TestDateWritable::testDaylightSavingsTime for an example of how to make a test that covers all timezones (supported on a given system, at least). > datediff sometimes returns incorrect results when called with dates > --- > > Key: HIVE-11812 > URL: https://issues.apache.org/jira/browse/HIVE-11812 > Project: Hive > Issue Type: Bug > Components: UDF >Affects Versions: 2.0.0 >Reporter: Nicholas Brenwald >Assignee: Chetna Chaudhari >Priority: Minor > Attachments: HIVE-11812.1.patch > > > DATEDIFF returns an incorrect result when one of the arguments is a date > type. > The Hive Language Manual provides the following signature for datediff: > {code} > int datediff(string enddate, string startdate) > {code} > I think datediff should either throw an error (if date types are not > supported), or return the correct result. > To reproduce, create a table: > {code} > create table t (c1 string, c2 date); > {code} > Assuming you have a table x containing some data, populate table t with 1 row: > {code} > insert into t select '2015-09-15', '2015-09-15' from x limit 1; > {code} > Then run the following 12 test queries: > {code} > select datediff(c1, '2015-09-14') from t; > select datediff(c1, '2015-09-15') from t; > select datediff(c1, '2015-09-16') from t; > select datediff('2015-09-14', c1) from t; > select datediff('2015-09-15', c1) from t; > select datediff('2015-09-16', c1) from t; > select datediff(c2, '2015-09-14') from t; > select datediff(c2, '2015-09-15') from t; > select datediff(c2, '2015-09-16') from t; > select datediff('2015-09-14', c2) from t; > select datediff('2015-09-15', c2) from t; > select datediff('2015-09-16', c2) from t; > {code} > The below table summarises the result. All results for column c1 (which is a > string) are correct, but when using c2 (which is a date), two of the results > are incorrect. > || Test || Expected Result || Actual Result || Passed / Failed || > |datediff(c1, '2015-09-14')| 1 | 1| Passed | > |datediff(c1, '2015-09-15')| 0 | 0| Passed | > |datediff(c1, '2015-09-16') | -1 | -1| Passed | > |datediff('2015-09-14', c1) | -1 | -1| Passed | > |datediff('2015-09-15', c1)| 0 | 0| Passed | > |datediff('2015-09-16', c1)| 1 | 1| Passed | > |datediff(c2, '2015-09-14')| 1 | 0| {color:red}Failed{color} | > |datediff(c2, '2015-09-15')| 0 | 0| Passed | > |datediff(c2, '2015-09-16') | -1 | -1| Passed | > |datediff('2015-09-14', c2) | -1 | 0 | {color:red}Failed{color} | > |datediff('2015-09-15', c2)| 0 | 0| Passed | > |datediff('2015-09-16', c2)| 1 | 1| Passed | -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15363) Execute hive-blobstore tests using ProxyLocalFileSystem
[ https://issues.apache.org/jira/browse/HIVE-15363?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723748#comment-15723748 ] Sergio Peña commented on HIVE-15363: With this patch, you can now execute all hive-blobstore tests without the requirement to create a {{blobstore-conf.xml}} file with the S3 credentials. All tests will run on the local filesystem, but Hive will execute code related to blobstore, such as blobstore optimizations. This will allow HiveQA to execute all tests during pre-commits. If you want to execute on real blobstorage, then just create the {{blobstore-conf.xml}} as usual. The property here values will override the ones from the hive-site.xml. [~mohitsabharwal] Can you help me review and +1 this patch? [~stakiar] [~poeppt] [~yalovyyi] FYI, this might be interesting for you. > Execute hive-blobstore tests using ProxyLocalFileSystem > --- > > Key: HIVE-15363 > URL: https://issues.apache.org/jira/browse/HIVE-15363 > Project: Hive > Issue Type: Test > Components: Hive >Reporter: Sergio Peña >Assignee: Sergio Peña > Attachments: HIVE-15363.1.patch > > > The {{hive-blobstore}} directory contains tests that an only be executed on > blobstorage systems currently. These test are run manually by committers. > To automate these tests on HiveQA, we should allow hive-blobstore to use the > ProxyLocalFileSystem to run more test coverage on the pre-commit jenkins jobs. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15333) Add a FetchTask to REPL DUMP plan for reading dump uri, last repl id as ResultSet
[ https://issues.apache.org/jira/browse/HIVE-15333?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vaibhav Gumashta updated HIVE-15333: Attachment: HIVE-15333.3.patch Fixing related test failures > Add a FetchTask to REPL DUMP plan for reading dump uri, last repl id as > ResultSet > - > > Key: HIVE-15333 > URL: https://issues.apache.org/jira/browse/HIVE-15333 > Project: Hive > Issue Type: Sub-task > Components: repl >Reporter: Vaibhav Gumashta >Assignee: Vaibhav Gumashta > Attachments: HIVE-15333.1.patch, HIVE-15333.2.patch, > HIVE-15333.3.patch > > > We're writing the return values to a file, but we don't add FetchTask while > planning. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Comment Edited] (HIVE-11812) datediff sometimes returns incorrect results when called with dates
[ https://issues.apache.org/jira/browse/HIVE-11812?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723733#comment-15723733 ] Shyam Sunder Rai edited comment on HIVE-11812 at 12/5/16 11:34 PM: --- To all the Gurus here, I made a change locally to my code and the datediff function with the test cases given above (in PST) seems to be working correctly for me with all the anticipated results. Can someone help me out on how we are testing it with different timezones ? I performed the test in the following way and am getting the results as expected after my changes. {code} SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-14', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-15', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-16', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-14', "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-15', "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-16', "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c2, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-14', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c2, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-15', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c2, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-16', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-14', "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c2, "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-15', "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c2, "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-16', "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c2, "-MM-dd") * 1000, 'EST'))) FROM t; {code} Tried these changes with multiple timezones to try variations, including IST. The server itself is on PST. was (Author: shyamsunderrai): To all the Gurus here, I made a change locally to my code and the datediff function with the test cases given above (in PST) seems to be working correctly for me with all the anticipated results. Can someone help me out on how we are testing it with different timezones ? I performed the test in the following way and am getting the results as expected after my changes. {code} SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-14', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-15', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-16', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-14', "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-15', "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-16', "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c2, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-14', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c2, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIME
[jira] [Updated] (HIVE-15363) Execute hive-blobstore tests using ProxyLocalFileSystem
[ https://issues.apache.org/jira/browse/HIVE-15363?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sergio Peña updated HIVE-15363: --- Attachment: HIVE-15363.1.patch > Execute hive-blobstore tests using ProxyLocalFileSystem > --- > > Key: HIVE-15363 > URL: https://issues.apache.org/jira/browse/HIVE-15363 > Project: Hive > Issue Type: Test > Components: Hive >Reporter: Sergio Peña >Assignee: Sergio Peña > Attachments: HIVE-15363.1.patch > > > The {{hive-blobstore}} directory contains tests that an only be executed on > blobstorage systems currently. These test are run manually by committers. > To automate these tests on HiveQA, we should allow hive-blobstore to use the > ProxyLocalFileSystem to run more test coverage on the pre-commit jenkins jobs. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15363) Execute hive-blobstore tests using ProxyLocalFileSystem
[ https://issues.apache.org/jira/browse/HIVE-15363?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sergio Peña updated HIVE-15363: --- Status: Patch Available (was: Open) > Execute hive-blobstore tests using ProxyLocalFileSystem > --- > > Key: HIVE-15363 > URL: https://issues.apache.org/jira/browse/HIVE-15363 > Project: Hive > Issue Type: Test > Components: Hive >Reporter: Sergio Peña >Assignee: Sergio Peña > Attachments: HIVE-15363.1.patch > > > The {{hive-blobstore}} directory contains tests that an only be executed on > blobstorage systems currently. These test are run manually by committers. > To automate these tests on HiveQA, we should allow hive-blobstore to use the > ProxyLocalFileSystem to run more test coverage on the pre-commit jenkins jobs. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-11812) datediff sometimes returns incorrect results when called with dates
[ https://issues.apache.org/jira/browse/HIVE-11812?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723733#comment-15723733 ] Shyam Sunder Rai commented on HIVE-11812: - To all the Gurus here, I made a change locally to my code and the datediff function with the test cases given above (in PST) seems to be working correctly for me with all the anticipated results. Can someone help me out on how we are testing it with different timezones ? I performed the test in the following way and am getting the results as expected after my changes. {code} SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-14', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-15', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-16', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-14', "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-15', "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-16', "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c1, "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c2, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-14', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c2, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-15', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c2, "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-16', "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-14', "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c2, "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-15', "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c2, "-MM-dd") * 1000, 'EST'))) FROM t; SELECT datediff(TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP('2015-09-16', "-MM-dd") * 1000, 'EST')), TO_DATE(FROM_UTC_TIMESTAMP(UNIX_TIMESTAMP(t.c2, "-MM-dd") * 1000, 'EST'))) FROM t; {code} > datediff sometimes returns incorrect results when called with dates > --- > > Key: HIVE-11812 > URL: https://issues.apache.org/jira/browse/HIVE-11812 > Project: Hive > Issue Type: Bug > Components: UDF >Affects Versions: 2.0.0 >Reporter: Nicholas Brenwald >Assignee: Chetna Chaudhari >Priority: Minor > Attachments: HIVE-11812.1.patch > > > DATEDIFF returns an incorrect result when one of the arguments is a date > type. > The Hive Language Manual provides the following signature for datediff: > {code} > int datediff(string enddate, string startdate) > {code} > I think datediff should either throw an error (if date types are not > supported), or return the correct result. > To reproduce, create a table: > {code} > create table t (c1 string, c2 date); > {code} > Assuming you have a table x containing some data, populate table t with 1 row: > {code} > insert into t select '2015-09-15', '2015-09-15' from x limit 1; > {code} > Then run the following 12 test queries: > {code} > select datediff(c1, '2015-09-14') from t; > select datediff(c1, '2015-09-15') from t; > select datediff(c1, '2015-09-16') from t; > select datediff('2015-09-14', c1) from t; > select datediff('2015-09-15', c1) from t; > select datediff('2015-09-16', c1) from t; > select datediff(c2, '2015-09-14') from t; > select datediff(c2, '2015-09-15') from t; > select datediff(c2, '2015-09-16') from t; > select datediff('2015-09-14', c2) from t; > select datediff('2015-09-15', c2) from t; > select datediff('2015-09-16', c2) from t; > {code} > The below table summarises the result. All results for column c1 (which is a > string) are correct, but when using c2 (which is a date), two of the results > are incorrect. > || Test || Expected Result || Actual Result || Passed / Failed || > |datediff(c1, '2015-09-14')| 1 | 1| Passed | > |datediff(c1, '2015-09-15')| 0 | 0| Passed
[jira] [Commented] (HIVE-15251) Provide support for complex expressions in ON clauses for OUTER joins
[ https://issues.apache.org/jira/browse/HIVE-15251?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723729#comment-15723729 ] Hive QA commented on HIVE-15251: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841793/HIVE-15251.01.patch {color:green}SUCCESS:{color} +1 due to 4 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 10 failed/errored test(s), 10767 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_mult_tables] (batchId=77) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample2] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample4] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample6] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample7] (batchId=60) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample9] (batchId=38) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_schema_evol_3a] (batchId=134) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[transform_ppr2] (batchId=134) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[stats_based_fetch_decision] (batchId=150) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[join45] (batchId=84) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/2423/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/2423/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-2423/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 10 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12841793 - PreCommit-HIVE-Build > Provide support for complex expressions in ON clauses for OUTER joins > - > > Key: HIVE-15251 > URL: https://issues.apache.org/jira/browse/HIVE-15251 > Project: Hive > Issue Type: Bug > Components: CBO, Parser >Affects Versions: 2.2.0 >Reporter: Jesus Camacho Rodriguez >Assignee: Jesus Camacho Rodriguez > Attachments: HIVE-15251.01.patch, HIVE-15251.patch > > > Follow-up of HIVE-15211. > Currently, we have some restrictions on the predicates that we can use in ON > clauses for outer joins. > This patch is an extension to overcome these restrictions. Follow-up work > will focus on identifying some cases, such as disjunctive predicates, that > can be run more efficiently than with a cartesian product. > It will allow to write queries that currently fail in Hive such as: > {code:sql} > -- Disjunctions > SELECT * > FROM src1 LEFT OUTER JOIN src > ON (src1.key=src.key > OR src1.value between 100 and 102 > OR src.value between 100 and 102) > LIMIT 10; > -- Conjunction with multiple inputs references in one side > SELECT * > FROM src1 RIGHT OUTER JOIN src > ON (src1.key+src.key >= 100 > AND src1.key+src.key <= 102) > LIMIT 10; > -- Conjunct with no references > SELECT * > FROM src1 FULL OUTER JOIN src > ON (src1.value between 100 and 102 > AND src.value between 100 and 102 > AND true) > LIMIT 10; > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15362) Add the missing fields for 2.2.0 upgrade scripts
[ https://issues.apache.org/jira/browse/HIVE-15362?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Wei Zheng updated HIVE-15362: - Attachment: HIVE-15362.1.patch [~ekoifman] Can you review please? > Add the missing fields for 2.2.0 upgrade scripts > > > Key: HIVE-15362 > URL: https://issues.apache.org/jira/browse/HIVE-15362 > Project: Hive > Issue Type: Bug > Components: Transactions >Affects Versions: 2.2.0 >Reporter: Wei Zheng >Assignee: Wei Zheng > Attachments: HIVE-15362.1.patch > > > The 2.2.0 upgrade scripts were cut on 05/25/16, while HIVE-13354 (which added > some fields to upgrade scripts) was committed to master on 05/27/16, and > there's no conflict. So we accidentally missed those fields for 2.2.0. > cc [~ekoifman] -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15362) Add the missing fields for 2.2.0 upgrade scripts
[ https://issues.apache.org/jira/browse/HIVE-15362?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Wei Zheng updated HIVE-15362: - Status: Patch Available (was: Open) > Add the missing fields for 2.2.0 upgrade scripts > > > Key: HIVE-15362 > URL: https://issues.apache.org/jira/browse/HIVE-15362 > Project: Hive > Issue Type: Bug > Components: Transactions >Affects Versions: 2.2.0 >Reporter: Wei Zheng >Assignee: Wei Zheng > Attachments: HIVE-15362.1.patch > > > The 2.2.0 upgrade scripts were cut on 05/25/16, while HIVE-13354 (which added > some fields to upgrade scripts) was committed to master on 05/27/16, and > there's no conflict. So we accidentally missed those fields for 2.2.0. > cc [~ekoifman] -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15337) Need to specify starttime when putting Compaction job into "attempted" state
[ https://issues.apache.org/jira/browse/HIVE-15337?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Eugene Koifman updated HIVE-15337: -- Attachment: HIVE-15337.02.patch > Need to specify starttime when putting Compaction job into "attempted" state > > > Key: HIVE-15337 > URL: https://issues.apache.org/jira/browse/HIVE-15337 > Project: Hive > Issue Type: Improvement > Components: Transactions >Affects Versions: 1.0.0 >Reporter: Eugene Koifman >Assignee: Eugene Koifman > Attachments: HIVE-15337.01.patch, HIVE-15337.02.patch > > > W/o this SHOW COMPACTIONS output is not as useful > Also, add Hadoop Job ID to SHOW COMPACTIONS output -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Comment Edited] (HIVE-15351) Disable vectorized VectorUDFAdaptor usage with non-column or constant parameters
[ https://issues.apache.org/jira/browse/HIVE-15351?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723396#comment-15723396 ] Matt McCline edited comment on HIVE-15351 at 12/5/16 11:04 PM: --- orc_ppd_schema_evol_3a.q is a flakey test. The others are from queries that no longer vectorize due to the change. Updating patch. was (Author: mmccline): orc_ppd_schema_evol_3a.q is a flakey test. The others are from queries that no long vectorize due to the change. Updating patch. > Disable vectorized VectorUDFAdaptor usage with non-column or constant > parameters > > > Key: HIVE-15351 > URL: https://issues.apache.org/jira/browse/HIVE-15351 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Matt McCline >Assignee: Matt McCline >Priority: Blocker > Attachments: HIVE-15351.01.patch, HIVE-15351.02.patch > > > Vectorization using VectorUDFAdaptor is broken and produces wrong results > when the parameter(s) have vectorized expressions that allocate scratch > columns. So, for now, we restrict VectorUDFAdaptor usage to columns or > constant expressions. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15359) skip.footer.line.count doesnt work properly for certain situations
[ https://issues.apache.org/jira/browse/HIVE-15359?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Yongzhi Chen updated HIVE-15359: Attachment: HIVE-15359.1.patch > skip.footer.line.count doesnt work properly for certain situations > -- > > Key: HIVE-15359 > URL: https://issues.apache.org/jira/browse/HIVE-15359 > Project: Hive > Issue Type: Bug > Components: Reader >Reporter: Yongzhi Chen >Assignee: Yongzhi Chen > Attachments: HIVE-15359.1.patch > > > This issue's reproduce is very like HIVE-12718 , but the data file is larger > than 128M . In this case, even make sure only one mapper is used, the footer > is still wrongly skipped. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15359) skip.footer.line.count doesnt work properly for certain situations
[ https://issues.apache.org/jira/browse/HIVE-15359?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Yongzhi Chen updated HIVE-15359: Status: Patch Available (was: Open) Need code review. > skip.footer.line.count doesnt work properly for certain situations > -- > > Key: HIVE-15359 > URL: https://issues.apache.org/jira/browse/HIVE-15359 > Project: Hive > Issue Type: Bug > Components: Reader >Reporter: Yongzhi Chen >Assignee: Yongzhi Chen > Attachments: HIVE-15359.1.patch > > > This issue's reproduce is very like HIVE-12718 , but the data file is larger > than 128M . In this case, even make sure only one mapper is used, the footer > is still wrongly skipped. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15359) skip.footer.line.count doesnt work properly for certain situations
[ https://issues.apache.org/jira/browse/HIVE-15359?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723657#comment-15723657 ] Yongzhi Chen commented on HIVE-15359: - Current skip footer feature need one file map to one split to work properly. The split need to be not only logical one but also physically one. Which means, the related file is unsplitable. Reproduce the issue with a data file has size of 140M. In hadoop, it is put into two blocks: the lengths are: 128M, 12M . 128M is dfs.block.size. For this query, hive use CombineHiveInputSplit to handle split, although logically, There is only one CombineHiveInputSplit(so one mapper), but the split has two paths (the same path with different startpos and lengths: 128M, 12M). When CombineHiveRecordReader use the split, CombineHiveRecordReader generate two FileSplits for the two blocks. And the code in HiveContextAwareRecordReader that handle skip footer assuming each FileSplit is physically independent file, it skip footer in the first block and does not do any thing in the second block. So some record in the middle of the file is wrongly skipped as the footer, the real footer is still in the result. Fix the issue by tranfer footerbuffer across FileSplits for the same file, that will make the one mapper case correctly for skipping footer. > skip.footer.line.count doesnt work properly for certain situations > -- > > Key: HIVE-15359 > URL: https://issues.apache.org/jira/browse/HIVE-15359 > Project: Hive > Issue Type: Bug > Components: Reader >Reporter: Yongzhi Chen >Assignee: Yongzhi Chen > > This issue's reproduce is very like HIVE-12718 , but the data file is larger > than 128M . In this case, even make sure only one mapper is used, the footer > is still wrongly skipped. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Comment Edited] (HIVE-15279) map join dummy operators are not set up correctly in certain cases with merge join
[ https://issues.apache.org/jira/browse/HIVE-15279?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723650#comment-15723650 ] Sergey Shelukhin edited comment on HIVE-15279 at 12/5/16 11:00 PM: --- 1) As suggested above, we now take dummy ops from all the works, not just the main work. However, I am not sure if it's possible for the same dummy to be added to multiple works given the code (someone might have a picture in their head of how they are propagated, but to me it looks like we just put them in all potentially affected works and hope for the best; hence all the bugs with them). Therefore, to handle this possibility, we use hashset to make sure we only init each dummy operator once. 2) The main goal of the log line is to tell whether work has any dummy operators at all. IDs can be used in conjunction with operator tree that is output elsewhere. 3) Will remove. was (Author: sershe): 1) As suggested above, we now take dummy ops from all the works, not just the main work. However, I am not sure if it's possible for the same dummy to be added to multiple works given the code (someone might have a picture in their head of how they are propagated, but to me it looks like it just puts them in all potentially affected works and hopes for the best, hence all the bugs with them). So, hashset it used to make sure we only init each dummy operator once. 2) The main goal of the log line is to tell whether work has any dummy operators at all. IDs can be used in conjunction with operator tree that is output elsewhere. 3) Will remove. > map join dummy operators are not set up correctly in certain cases with merge > join > -- > > Key: HIVE-15279 > URL: https://issues.apache.org/jira/browse/HIVE-15279 > Project: Hive > Issue Type: Bug >Reporter: Sergey Shelukhin >Assignee: Sergey Shelukhin > Attachments: HIVE-15279.01.patch, HIVE-15279.02.patch, > HIVE-15279.patch > > > As a result, MapJoin is not initialized and there's NPE later. > Tez-specific. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Comment Edited] (HIVE-15279) map join dummy operators are not set up correctly in certain cases with merge join
[ https://issues.apache.org/jira/browse/HIVE-15279?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723650#comment-15723650 ] Sergey Shelukhin edited comment on HIVE-15279 at 12/5/16 11:00 PM: --- 1) As suggested above, we now take dummy ops from all the works, not just the main work. However, I am not sure if it's possible for the same dummy to be added to multiple works given the code (someone might have a picture in their head of how they are propagated, but to me it looks like we just put them in all potentially affected works and hope for the best; hence all the bugs with them). Therefore, to handle this possibility, we use hashset to make sure we only init each dummy operator once. 2) The main goal of the log line is to tell whether work has any dummy operators at all. IDs can be used in conjunction with operator tree that is output elsewhere. 3) Will remove the todo was (Author: sershe): 1) As suggested above, we now take dummy ops from all the works, not just the main work. However, I am not sure if it's possible for the same dummy to be added to multiple works given the code (someone might have a picture in their head of how they are propagated, but to me it looks like we just put them in all potentially affected works and hope for the best; hence all the bugs with them). Therefore, to handle this possibility, we use hashset to make sure we only init each dummy operator once. 2) The main goal of the log line is to tell whether work has any dummy operators at all. IDs can be used in conjunction with operator tree that is output elsewhere. 3) Will remove. > map join dummy operators are not set up correctly in certain cases with merge > join > -- > > Key: HIVE-15279 > URL: https://issues.apache.org/jira/browse/HIVE-15279 > Project: Hive > Issue Type: Bug >Reporter: Sergey Shelukhin >Assignee: Sergey Shelukhin > Attachments: HIVE-15279.01.patch, HIVE-15279.02.patch, > HIVE-15279.patch > > > As a result, MapJoin is not initialized and there's NPE later. > Tez-specific. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15279) map join dummy operators are not set up correctly in certain cases with merge join
[ https://issues.apache.org/jira/browse/HIVE-15279?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723650#comment-15723650 ] Sergey Shelukhin commented on HIVE-15279: - 1) As suggested above, we now take dummy ops from all the works, not just the main work. However, I am not sure if it's possible for the same dummy to be added to multiple works given the code (someone might have a picture in their head of how they are propagated, but to me it looks like it just puts them in all potentially affected works and hopes for the best, hence all the bugs with them). So, hashset it used to make sure we only init each dummy operator once. 2) The main goal of the log line is to tell whether work has any dummy operators at all. IDs can be used in conjunction with operator tree that is output elsewhere. 3) Will remove. > map join dummy operators are not set up correctly in certain cases with merge > join > -- > > Key: HIVE-15279 > URL: https://issues.apache.org/jira/browse/HIVE-15279 > Project: Hive > Issue Type: Bug >Reporter: Sergey Shelukhin >Assignee: Sergey Shelukhin > Attachments: HIVE-15279.01.patch, HIVE-15279.02.patch, > HIVE-15279.patch > > > As a result, MapJoin is not initialized and there's NPE later. > Tez-specific. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15322) Skipping "hbase mapredcp" in hive script for certain services
[ https://issues.apache.org/jira/browse/HIVE-15322?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Daniel Dai updated HIVE-15322: -- Attachment: HIVE-15322.3.patch Further skip "hadoop version" for applicable services. > Skipping "hbase mapredcp" in hive script for certain services > - > > Key: HIVE-15322 > URL: https://issues.apache.org/jira/browse/HIVE-15322 > Project: Hive > Issue Type: Improvement >Reporter: Daniel Dai >Assignee: Daniel Dai > Attachments: HIVE-15322.1.patch, HIVE-15322.2.patch, > HIVE-15322.3.patch > > > "hbase mapredcp" is intended to append hbase classpath to hive. However, the > command can take some time when the system is heavy loaded. In some extreme > cases, we saw ~20s delay due to it. For certain commands, such as > "schemaTool", hbase classpath is certainly useless, and we can safely skip > invoking it. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15355) Concurrency issues during parallel moveFile due to HDFSUtils.setFullFileStatus
[ https://issues.apache.org/jira/browse/HIVE-15355?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723636#comment-15723636 ] Vihang Karajgaonkar commented on HIVE-15355: {{Collections.unmodifiableList(...)}} is not null safe but {{aclStatus.getEntries()}} should never return null. AclStatus objects can only be created using {{AclStatus.Builder}} class and it initializes entries to a newArrayList(). I can add the check if you think its still a good idea but seems like a redundant check to me. I will look into the testing ideas you gave above. Thanks a lot. > Concurrency issues during parallel moveFile due to HDFSUtils.setFullFileStatus > -- > > Key: HIVE-15355 > URL: https://issues.apache.org/jira/browse/HIVE-15355 > Project: Hive > Issue Type: Bug >Affects Versions: 2.1.0, 2.2.0 >Reporter: Vihang Karajgaonkar >Assignee: Vihang Karajgaonkar > Attachments: HIVE-15355.01.patch > > > It is possible to run into concurrency issues during multi-threaded moveFile > issued when processing queries like {{INSERT OVERWRITE TABLE ... SELECT ..}} > when there are multiple files in the staging directory which is a > subdirectory of the target directory. The issue is hard to reproduce but > following stacktrace is one such example: > {noformat} > INFO : Loading data to table > functional_text_gzip.alltypesaggmultifilesnopart from > hdfs://localhost:20500/test-warehouse/alltypesaggmultifilesnopart_text_gzip/.hive-staging_hive_2016-12-01_19-58-21_712_8968735301422943318-1/-ext-1 > ERROR : Failed with exception java.lang.ArrayIndexOutOfBoundsException > org.apache.hadoop.hive.ql.metadata.HiveException: > java.lang.ArrayIndexOutOfBoundsException > at org.apache.hadoop.hive.ql.metadata.Hive.moveFile(Hive.java:2858) > at > org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:3124) > at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:1701) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:313) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > Getting log thread is interrupted, since query is done! > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.ArrayIndexOutOfBoundsException > at java.lang.System.arraycopy(Native Method) > at java.util.ArrayList.removeRange(ArrayList.java:616) > at java.util.ArrayList$SubList.removeRange(ArrayList.java:1021) > at java.util.AbstractList.clear(AbstractList.java:234) > at > com.google.common.collect.Iterables.removeIfFromRandomAccessList(Iterables.java:213) > at com.google.common.collect.Iterables.removeIf(Iterables.java:184) > at > org.apache.hadoop.hive.shims.Hadoop23Shims.removeBaseAclEntries(Hadoop23Shims.java:865) > at > org.apache.hadoop.hive.shims.Hadoop23Shims.setFullFileStatus(Hadoop23Shims.java:757) > at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:2835) > at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:2828) > ... 4 more > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > {noformat} > Quick online search also shows some other instances like the one mentioned in > http://stackoverflow.com/questions/38900333/get-concurrentmodifi
[jira] [Commented] (HIVE-14731) Use Tez cartesian product edge in Hive (unpartitioned case only)
[ https://issues.apache.org/jira/browse/HIVE-14731?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723617#comment-15723617 ] Hive QA commented on HIVE-14731: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841395/HIVE-14731.9.patch {color:green}SUCCESS:{color} +1 due to 2 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 10 failed/errored test(s), 10749 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=50) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample2] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample4] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample6] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample7] (batchId=60) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample9] (batchId=38) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_schema_evol_3a] (batchId=134) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[transform_ppr2] (batchId=134) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_4] (batchId=92) org.apache.hadoop.hive.cli.TestSparkCliDriver.org.apache.hadoop.hive.cli.TestSparkCliDriver (batchId=96) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/2421/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/2421/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-2421/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 10 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12841395 - PreCommit-HIVE-Build > Use Tez cartesian product edge in Hive (unpartitioned case only) > > > Key: HIVE-14731 > URL: https://issues.apache.org/jira/browse/HIVE-14731 > Project: Hive > Issue Type: Bug >Reporter: Zhiyuan Yang >Assignee: Zhiyuan Yang > Attachments: HIVE-14731.1.patch, HIVE-14731.2.patch, > HIVE-14731.3.patch, HIVE-14731.4.patch, HIVE-14731.5.patch, > HIVE-14731.6.patch, HIVE-14731.7.patch, HIVE-14731.8.patch, HIVE-14731.9.patch > > > Given cartesian product edge is available in Tez now (see TEZ-3230), let's > integrate it into Hive on Tez. This allows us to have more than one reducer > in cross product queries. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-12504) TxnHandler.abortTxn() should check if already aborted to improve message
[ https://issues.apache.org/jira/browse/HIVE-12504?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723597#comment-15723597 ] Wei Zheng commented on HIVE-12504: -- OK, thanks for explanation. +1 > TxnHandler.abortTxn() should check if already aborted to improve message > > > Key: HIVE-12504 > URL: https://issues.apache.org/jira/browse/HIVE-12504 > Project: Hive > Issue Type: Bug > Components: Transactions >Affects Versions: 1.0.0 >Reporter: Eugene Koifman >Assignee: Eugene Koifman > Attachments: HIVE-12504.01.patch, HIVE-12504.patch > > > If the update statement misses, it would be useful for abortTxn() to > run another query to see if the requested txnid is in Aborted state to > improve error msg. > Note: get a separate read_committed connection for this. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15329) NullPointerException might occur when create table
[ https://issues.apache.org/jira/browse/HIVE-15329?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723590#comment-15723590 ] Prasanth Jayachandran commented on HIVE-15329: -- +1, pending tests > NullPointerException might occur when create table > -- > > Key: HIVE-15329 > URL: https://issues.apache.org/jira/browse/HIVE-15329 > Project: Hive > Issue Type: Bug > Components: Metastore >Affects Versions: 2.0.0, 2.1.0 >Reporter: Meilong Huang >Assignee: Meilong Huang > Labels: metastore > Attachments: HIVE-15329.1.patch > > > NullPointerException might occur if table.getParameters() returns null when > method isNonNativeTable is invoked in class MetaStoreUtils. > {code} > public static boolean isNonNativeTable(Table table) { > if (table == null) { > return false; > } > return > (table.getParameters().get(hive_metastoreConstants.META_TABLE_STORAGE) != > null); > } > {code} > This will cause a stack trace without any suggestive information at client: > {code} > org.apache.hadoop.hive.metastore.api.MetaException: > java.lang.NullPointerException > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$create_table_with_environment_context_result$create_table_with_environment_context_resultStandardScheme.read... > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15355) Concurrency issues during parallel moveFile due to HDFSUtils.setFullFileStatus
[ https://issues.apache.org/jira/browse/HIVE-15355?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723583#comment-15723583 ] Sahil Takiar commented on HIVE-15355: - Some comments: * Is {{Collections.unmodifiableList(...)}} null safe, what happens if {{aclStatus.getEntries()}} returns null * Test {{HdfsUtils.HadoopStatus.getAclEntries()}} could be as simple as invoking the method, trying the modify it, and then expecting an {{UnsupportedOperationException}} * Tests for {{HdfsUtils.setFullFileStatus}} may be trickier, one idea is to pass in a mock {{HadoopFileStatus}} and ensure that {{getAclEntries}} is invoked, but not {{getAclStatus().getEntries()}} > Concurrency issues during parallel moveFile due to HDFSUtils.setFullFileStatus > -- > > Key: HIVE-15355 > URL: https://issues.apache.org/jira/browse/HIVE-15355 > Project: Hive > Issue Type: Bug >Affects Versions: 2.1.0, 2.2.0 >Reporter: Vihang Karajgaonkar >Assignee: Vihang Karajgaonkar > Attachments: HIVE-15355.01.patch > > > It is possible to run into concurrency issues during multi-threaded moveFile > issued when processing queries like {{INSERT OVERWRITE TABLE ... SELECT ..}} > when there are multiple files in the staging directory which is a > subdirectory of the target directory. The issue is hard to reproduce but > following stacktrace is one such example: > {noformat} > INFO : Loading data to table > functional_text_gzip.alltypesaggmultifilesnopart from > hdfs://localhost:20500/test-warehouse/alltypesaggmultifilesnopart_text_gzip/.hive-staging_hive_2016-12-01_19-58-21_712_8968735301422943318-1/-ext-1 > ERROR : Failed with exception java.lang.ArrayIndexOutOfBoundsException > org.apache.hadoop.hive.ql.metadata.HiveException: > java.lang.ArrayIndexOutOfBoundsException > at org.apache.hadoop.hive.ql.metadata.Hive.moveFile(Hive.java:2858) > at > org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:3124) > at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:1701) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:313) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > Getting log thread is interrupted, since query is done! > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.ArrayIndexOutOfBoundsException > at java.lang.System.arraycopy(Native Method) > at java.util.ArrayList.removeRange(ArrayList.java:616) > at java.util.ArrayList$SubList.removeRange(ArrayList.java:1021) > at java.util.AbstractList.clear(AbstractList.java:234) > at > com.google.common.collect.Iterables.removeIfFromRandomAccessList(Iterables.java:213) > at com.google.common.collect.Iterables.removeIf(Iterables.java:184) > at > org.apache.hadoop.hive.shims.Hadoop23Shims.removeBaseAclEntries(Hadoop23Shims.java:865) > at > org.apache.hadoop.hive.shims.Hadoop23Shims.setFullFileStatus(Hadoop23Shims.java:757) > at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:2835) > at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:2828) > ... 4 more > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > {noformat} > Quick online search also shows some other instances like
[jira] [Commented] (HIVE-15279) map join dummy operators are not set up correctly in certain cases with merge join
[ https://issues.apache.org/jira/browse/HIVE-15279?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723582#comment-15723582 ] Gunther Hagleitner commented on HIVE-15279: --- Hm. Can you explain how this works now/ why you had to change the patch (equals/hash, etc)? {noformat} 125 l4j.info("Main work is " + reduceWork.getName()); 126 List workOps = reduceWork.getDummyOps(); 127 HashSet dummyOps = workOps == null ? null : new HashSet<>(workOps); 126 tagToReducerMap.put(redWork.getTag(), redWork); 128 tagToReducerMap.put(redWork.getTag(), redWork); 127 if (mergeWorkList != null) {129 if (mergeWorkList != null) { 128 for (BaseWork mergeWork : mergeWorkList) {130 for (BaseWork mergeWork : mergeWorkList) { 131 if (l4j.isDebugEnabled()) { 132 l4j.debug("Additional work " + mergeWork.getName()); 133 } 134 workOps = mergeWork.getDummyOps(); 135 if (workOps != null) { 136 if (dummyOps == null) { 137 dummyOps = new HashSet<>(workOps); 138 } else { 139 dummyOps.addAll(workOps); 140 } {noformat} This seems unnecessarily confusing to me. Can't we just always have empty lists/hash instead of distinguishing between empty and null? Also, before commit can you either resolve or remove the TODO? {noformat} LOG.debug("Setting dummy ops for work " + work.getName() + ": " + dummyOps); {noformat} Isn't really that helpful to just know the id of the list of dummy ops, is it? > map join dummy operators are not set up correctly in certain cases with merge > join > -- > > Key: HIVE-15279 > URL: https://issues.apache.org/jira/browse/HIVE-15279 > Project: Hive > Issue Type: Bug >Reporter: Sergey Shelukhin >Assignee: Sergey Shelukhin > Attachments: HIVE-15279.01.patch, HIVE-15279.02.patch, > HIVE-15279.patch > > > As a result, MapJoin is not initialized and there's NPE later. > Tez-specific. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15361) INSERT dynamic partition on S3 fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723546#comment-15723546 ] Illya Yalovyy commented on HIVE-15361: -- Thank you! I'll take a look. > INSERT dynamic partition on S3 fails with a MoveTask failure > > > Key: HIVE-15361 > URL: https://issues.apache.org/jira/browse/HIVE-15361 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 2.2.0 >Reporter: Sergio Peña >Assignee: Sergio Peña >Priority: Critical > Attachments: HIVE-15361.1.patch, HIVE-15361.2.patch > > > The following failure is due to the patch that merges two MoveTask found on > the ConditionalTask (See HIVE-15114) > {panel:title=Repro steps} > CREATE EXTERNAL TABLE external_1k0jU (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING); > ALTER TABLE external_1k0jU ADD PARTITION (COUNTRY='USA', STATE='CA'); > INSERT INTO external_1k0jU PARTITION (country='USA', state='CA') values > ('John Doe', 23), ('Jane Doe', 22); > CREATE EXTERNAL TABLE external_P3kiT (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING) location 's3a://hive-on-s3/foo/bar/'; > set hive.exec.dynamic.partition.mode=nonstrict; > INSERT INTO TABLE external_P3kiT PARTITION (country, state) SELECT * FROM > external_1k0jU; > {panel} > {panel:title=Error & stack trace} > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > INFO : MapReduce Jobs Launched: > INFO : Stage-Stage-1: Map: 1 Cumulative CPU: 3.64 sec HDFS Read: 3656 > HDFS Write: 99 SUCCESS > INFO : Total MapReduce CPU Time Spent: 3 seconds 640 msec > INFO : Completed executing > command(queryId=hive_20161201113939_d64df5d7-a4c4-4885-846f-10f0223fcf4c); > Time taken: 23.227 seconds > Error: Error while processing statement: FAILED: Execution Error, return code > 1 from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1) > INFO : Loading data to table default.external_p3kit partition (country=null, > state=null) from > s3a://hive-on-s3/foo/bar/.hive-staging_hive_2016-12-01_11-39-48_741_6724911837889341086-13/-ext-10002 > {code} > ERROR : Failed with exception MetaException(message:Invalid partition key & > values; keys [country, state, ], values []) > org.apache.hadoop.hive.ql.metadata.HiveException: > MetaException(message:Invalid partition key & values; keys [country, state, > ], values []) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1902) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1834) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1428) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1388) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:453) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: MetaException(message:Invalid partition key & values; keys > [country, state, ], values []) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65142) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_result
[jira] [Commented] (HIVE-15346) "values temp table" should not be an input source for the query
[ https://issues.apache.org/jira/browse/HIVE-15346?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723502#comment-15723502 ] Hive QA commented on HIVE-15346: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841796/HIVE-15346.3.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 11 failed/errored test(s), 10739 tests executed *Failed tests:* {noformat} TestMiniSparkOnYarnCliDriver - did not produce a TEST-*.xml file (likely timed out) (batchId=159) [scriptfile1.q,vector_outer_join5.q,file_with_header_footer.q,bucket4.q,input16_cc.q,bucket5.q,infer_bucket_sort_merge.q,constprog_partitioner.q,orc_merge2.q,reduce_deduplicate.q,schemeAuthority2.q,load_fs2.q,orc_merge8.q,orc_merge_incompat2.q,infer_bucket_sort_bucketed_table.q,vector_outer_join4.q,disable_merge_for_bucketing.q,vector_inner_join.q,orc_merge7.q] org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=50) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_sortmerge_join_2] (batchId=44) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample2] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample4] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample6] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample7] (batchId=60) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample9] (batchId=38) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[transform_ppr2] (batchId=134) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_4] (batchId=92) org.apache.hadoop.hive.cli.TestSparkNegativeCliDriver.org.apache.hadoop.hive.cli.TestSparkNegativeCliDriver (batchId=227) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/2420/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/2420/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-2420/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 11 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12841796 - PreCommit-HIVE-Build > "values temp table" should not be an input source for the query > --- > > Key: HIVE-15346 > URL: https://issues.apache.org/jira/browse/HIVE-15346 > Project: Hive > Issue Type: Sub-task > Components: Query Planning >Affects Versions: 2.2.0 >Reporter: Aihua Xu >Assignee: Aihua Xu > Attachments: HIVE-15346.1.patch, HIVE-15346.2.patch, > HIVE-15346.3.patch, HIVE-15346.codeonly.patch > > > For "insert values()" query, a values tmp table is created. Such table is > treated as input source. That seems to be incorrect since that should be > treated internal and temporary. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15361) INSERT dynamic partition on S3 fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723494#comment-15723494 ] Sergio Peña commented on HIVE-15361: What results do you need? I attached the RB link, and mentioned all tests run successfully. The patch contains some updates on them btw. I also added a couple of new tests: - insert_into_dynamic_partitions.q - insert_overwrite_dynamic_partitions.q > INSERT dynamic partition on S3 fails with a MoveTask failure > > > Key: HIVE-15361 > URL: https://issues.apache.org/jira/browse/HIVE-15361 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 2.2.0 >Reporter: Sergio Peña >Assignee: Sergio Peña >Priority: Critical > Attachments: HIVE-15361.1.patch, HIVE-15361.2.patch > > > The following failure is due to the patch that merges two MoveTask found on > the ConditionalTask (See HIVE-15114) > {panel:title=Repro steps} > CREATE EXTERNAL TABLE external_1k0jU (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING); > ALTER TABLE external_1k0jU ADD PARTITION (COUNTRY='USA', STATE='CA'); > INSERT INTO external_1k0jU PARTITION (country='USA', state='CA') values > ('John Doe', 23), ('Jane Doe', 22); > CREATE EXTERNAL TABLE external_P3kiT (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING) location 's3a://hive-on-s3/foo/bar/'; > set hive.exec.dynamic.partition.mode=nonstrict; > INSERT INTO TABLE external_P3kiT PARTITION (country, state) SELECT * FROM > external_1k0jU; > {panel} > {panel:title=Error & stack trace} > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > INFO : MapReduce Jobs Launched: > INFO : Stage-Stage-1: Map: 1 Cumulative CPU: 3.64 sec HDFS Read: 3656 > HDFS Write: 99 SUCCESS > INFO : Total MapReduce CPU Time Spent: 3 seconds 640 msec > INFO : Completed executing > command(queryId=hive_20161201113939_d64df5d7-a4c4-4885-846f-10f0223fcf4c); > Time taken: 23.227 seconds > Error: Error while processing statement: FAILED: Execution Error, return code > 1 from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1) > INFO : Loading data to table default.external_p3kit partition (country=null, > state=null) from > s3a://hive-on-s3/foo/bar/.hive-staging_hive_2016-12-01_11-39-48_741_6724911837889341086-13/-ext-10002 > {code} > ERROR : Failed with exception MetaException(message:Invalid partition key & > values; keys [country, state, ], values []) > org.apache.hadoop.hive.ql.metadata.HiveException: > MetaException(message:Invalid partition key & values; keys [country, state, > ], values []) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1902) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1834) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1428) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1388) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:453) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: MetaException(message:Invalid partition key & values; keys > [country, state, ], values []) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_res
[jira] [Updated] (HIVE-15361) INSERT dynamic partition on S3 fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sergio Peña updated HIVE-15361: --- Attachment: HIVE-15361.2.patch > INSERT dynamic partition on S3 fails with a MoveTask failure > > > Key: HIVE-15361 > URL: https://issues.apache.org/jira/browse/HIVE-15361 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 2.2.0 >Reporter: Sergio Peña >Assignee: Sergio Peña >Priority: Critical > Attachments: HIVE-15361.1.patch, HIVE-15361.2.patch > > > The following failure is due to the patch that merges two MoveTask found on > the ConditionalTask (See HIVE-15114) > {panel:title=Repro steps} > CREATE EXTERNAL TABLE external_1k0jU (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING); > ALTER TABLE external_1k0jU ADD PARTITION (COUNTRY='USA', STATE='CA'); > INSERT INTO external_1k0jU PARTITION (country='USA', state='CA') values > ('John Doe', 23), ('Jane Doe', 22); > CREATE EXTERNAL TABLE external_P3kiT (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING) location 's3a://hive-on-s3/foo/bar/'; > set hive.exec.dynamic.partition.mode=nonstrict; > INSERT INTO TABLE external_P3kiT PARTITION (country, state) SELECT * FROM > external_1k0jU; > {panel} > {panel:title=Error & stack trace} > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > INFO : MapReduce Jobs Launched: > INFO : Stage-Stage-1: Map: 1 Cumulative CPU: 3.64 sec HDFS Read: 3656 > HDFS Write: 99 SUCCESS > INFO : Total MapReduce CPU Time Spent: 3 seconds 640 msec > INFO : Completed executing > command(queryId=hive_20161201113939_d64df5d7-a4c4-4885-846f-10f0223fcf4c); > Time taken: 23.227 seconds > Error: Error while processing statement: FAILED: Execution Error, return code > 1 from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1) > INFO : Loading data to table default.external_p3kit partition (country=null, > state=null) from > s3a://hive-on-s3/foo/bar/.hive-staging_hive_2016-12-01_11-39-48_741_6724911837889341086-13/-ext-10002 > {code} > ERROR : Failed with exception MetaException(message:Invalid partition key & > values; keys [country, state, ], values []) > org.apache.hadoop.hive.ql.metadata.HiveException: > MetaException(message:Invalid partition key & values; keys [country, state, > ], values []) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1902) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1834) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1428) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1388) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:453) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: MetaException(message:Invalid partition key & values; keys > [country, state, ], values []) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65142) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65119) >
[jira] [Updated] (HIVE-15355) Concurrency issues during parallel moveFile due to HDFSUtils.setFullFileStatus
[ https://issues.apache.org/jira/browse/HIVE-15355?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vihang Karajgaonkar updated HIVE-15355: --- Attachment: HIVE-15355.01.patch Attaching the patch for the issue. The patch uses an idea similar to what [~rajesh.balamohan] suggested but instead of passing a copy of List to {{setFullFileStatus}}, the list is derived from within the method itself. Passing the List might not be very helpful considering that it is possible to still access it from {{sourceStatus.getAclStatus().getEntries()}}. Also, {{sourceStatus}} argument cannot be removed since it is required for obtaining other information from within the method. > Concurrency issues during parallel moveFile due to HDFSUtils.setFullFileStatus > -- > > Key: HIVE-15355 > URL: https://issues.apache.org/jira/browse/HIVE-15355 > Project: Hive > Issue Type: Bug >Affects Versions: 2.1.0, 2.2.0 >Reporter: Vihang Karajgaonkar >Assignee: Vihang Karajgaonkar > Attachments: HIVE-15355.01.patch > > > It is possible to run into concurrency issues during multi-threaded moveFile > issued when processing queries like {{INSERT OVERWRITE TABLE ... SELECT ..}} > when there are multiple files in the staging directory which is a > subdirectory of the target directory. The issue is hard to reproduce but > following stacktrace is one such example: > {noformat} > INFO : Loading data to table > functional_text_gzip.alltypesaggmultifilesnopart from > hdfs://localhost:20500/test-warehouse/alltypesaggmultifilesnopart_text_gzip/.hive-staging_hive_2016-12-01_19-58-21_712_8968735301422943318-1/-ext-1 > ERROR : Failed with exception java.lang.ArrayIndexOutOfBoundsException > org.apache.hadoop.hive.ql.metadata.HiveException: > java.lang.ArrayIndexOutOfBoundsException > at org.apache.hadoop.hive.ql.metadata.Hive.moveFile(Hive.java:2858) > at > org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:3124) > at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:1701) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:313) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > Getting log thread is interrupted, since query is done! > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.ArrayIndexOutOfBoundsException > at java.lang.System.arraycopy(Native Method) > at java.util.ArrayList.removeRange(ArrayList.java:616) > at java.util.ArrayList$SubList.removeRange(ArrayList.java:1021) > at java.util.AbstractList.clear(AbstractList.java:234) > at > com.google.common.collect.Iterables.removeIfFromRandomAccessList(Iterables.java:213) > at com.google.common.collect.Iterables.removeIf(Iterables.java:184) > at > org.apache.hadoop.hive.shims.Hadoop23Shims.removeBaseAclEntries(Hadoop23Shims.java:865) > at > org.apache.hadoop.hive.shims.Hadoop23Shims.setFullFileStatus(Hadoop23Shims.java:757) > at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:2835) > at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:2828) > ... 4 more > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > {noformat} > Quick online search also shows some other instances like the one m
[jira] [Updated] (HIVE-15355) Concurrency issues during parallel moveFile due to HDFSUtils.setFullFileStatus
[ https://issues.apache.org/jira/browse/HIVE-15355?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vihang Karajgaonkar updated HIVE-15355: --- Status: Patch Available (was: Open) > Concurrency issues during parallel moveFile due to HDFSUtils.setFullFileStatus > -- > > Key: HIVE-15355 > URL: https://issues.apache.org/jira/browse/HIVE-15355 > Project: Hive > Issue Type: Bug >Affects Versions: 2.1.0, 2.2.0 >Reporter: Vihang Karajgaonkar >Assignee: Vihang Karajgaonkar > Attachments: HIVE-15355.01.patch > > > It is possible to run into concurrency issues during multi-threaded moveFile > issued when processing queries like {{INSERT OVERWRITE TABLE ... SELECT ..}} > when there are multiple files in the staging directory which is a > subdirectory of the target directory. The issue is hard to reproduce but > following stacktrace is one such example: > {noformat} > INFO : Loading data to table > functional_text_gzip.alltypesaggmultifilesnopart from > hdfs://localhost:20500/test-warehouse/alltypesaggmultifilesnopart_text_gzip/.hive-staging_hive_2016-12-01_19-58-21_712_8968735301422943318-1/-ext-1 > ERROR : Failed with exception java.lang.ArrayIndexOutOfBoundsException > org.apache.hadoop.hive.ql.metadata.HiveException: > java.lang.ArrayIndexOutOfBoundsException > at org.apache.hadoop.hive.ql.metadata.Hive.moveFile(Hive.java:2858) > at > org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:3124) > at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:1701) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:313) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > Getting log thread is interrupted, since query is done! > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.ArrayIndexOutOfBoundsException > at java.lang.System.arraycopy(Native Method) > at java.util.ArrayList.removeRange(ArrayList.java:616) > at java.util.ArrayList$SubList.removeRange(ArrayList.java:1021) > at java.util.AbstractList.clear(AbstractList.java:234) > at > com.google.common.collect.Iterables.removeIfFromRandomAccessList(Iterables.java:213) > at com.google.common.collect.Iterables.removeIf(Iterables.java:184) > at > org.apache.hadoop.hive.shims.Hadoop23Shims.removeBaseAclEntries(Hadoop23Shims.java:865) > at > org.apache.hadoop.hive.shims.Hadoop23Shims.setFullFileStatus(Hadoop23Shims.java:757) > at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:2835) > at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:2828) > ... 4 more > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > {noformat} > Quick online search also shows some other instances like the one mentioned in > http://stackoverflow.com/questions/38900333/get-concurrentmodificationexception-in-step-2-create-intermediate-flat-hive-tab > The issue seems to be coming from the below code : > {code} > if (aclEnabled) { > aclStatus = sourceStatus.getAclStatus(); > if (aclStatus != null) { > LOG.trace(aclStatus.toString()); > aclEntries = aclStatus.getEntries(); > removeBaseAclEntries(aclEntries); > //the ACL api's also expect the tradition use
[jira] [Commented] (HIVE-15361) INSERT dynamic partition on S3 fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723455#comment-15723455 ] Illya Yalovyy commented on HIVE-15361: -- Could you please post results of tests from itests/hive-blobstore package? > INSERT dynamic partition on S3 fails with a MoveTask failure > > > Key: HIVE-15361 > URL: https://issues.apache.org/jira/browse/HIVE-15361 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 2.2.0 >Reporter: Sergio Peña >Assignee: Sergio Peña >Priority: Critical > Attachments: HIVE-15361.1.patch > > > The following failure is due to the patch that merges two MoveTask found on > the ConditionalTask (See HIVE-15114) > {panel:title=Repro steps} > CREATE EXTERNAL TABLE external_1k0jU (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING); > ALTER TABLE external_1k0jU ADD PARTITION (COUNTRY='USA', STATE='CA'); > INSERT INTO external_1k0jU PARTITION (country='USA', state='CA') values > ('John Doe', 23), ('Jane Doe', 22); > CREATE EXTERNAL TABLE external_P3kiT (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING) location 's3a://hive-on-s3/foo/bar/'; > set hive.exec.dynamic.partition.mode=nonstrict; > INSERT INTO TABLE external_P3kiT PARTITION (country, state) SELECT * FROM > external_1k0jU; > {panel} > {panel:title=Error & stack trace} > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > INFO : MapReduce Jobs Launched: > INFO : Stage-Stage-1: Map: 1 Cumulative CPU: 3.64 sec HDFS Read: 3656 > HDFS Write: 99 SUCCESS > INFO : Total MapReduce CPU Time Spent: 3 seconds 640 msec > INFO : Completed executing > command(queryId=hive_20161201113939_d64df5d7-a4c4-4885-846f-10f0223fcf4c); > Time taken: 23.227 seconds > Error: Error while processing statement: FAILED: Execution Error, return code > 1 from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1) > INFO : Loading data to table default.external_p3kit partition (country=null, > state=null) from > s3a://hive-on-s3/foo/bar/.hive-staging_hive_2016-12-01_11-39-48_741_6724911837889341086-13/-ext-10002 > {code} > ERROR : Failed with exception MetaException(message:Invalid partition key & > values; keys [country, state, ], values []) > org.apache.hadoop.hive.ql.metadata.HiveException: > MetaException(message:Invalid partition key & values; keys [country, state, > ], values []) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1902) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1834) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1428) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1388) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:453) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: MetaException(message:Invalid partition key & values; keys > [country, state, ], values []) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65142) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_
[jira] [Updated] (HIVE-15351) Disable vectorized VectorUDFAdaptor usage with non-column or constant parameters
[ https://issues.apache.org/jira/browse/HIVE-15351?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Matt McCline updated HIVE-15351: Status: Patch Available (was: In Progress) > Disable vectorized VectorUDFAdaptor usage with non-column or constant > parameters > > > Key: HIVE-15351 > URL: https://issues.apache.org/jira/browse/HIVE-15351 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Matt McCline >Assignee: Matt McCline >Priority: Blocker > Attachments: HIVE-15351.01.patch, HIVE-15351.02.patch > > > Vectorization using VectorUDFAdaptor is broken and produces wrong results > when the parameter(s) have vectorized expressions that allocate scratch > columns. So, for now, we restrict VectorUDFAdaptor usage to columns or > constant expressions. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15351) Disable vectorized VectorUDFAdaptor usage with non-column or constant parameters
[ https://issues.apache.org/jira/browse/HIVE-15351?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Matt McCline updated HIVE-15351: Status: In Progress (was: Patch Available) > Disable vectorized VectorUDFAdaptor usage with non-column or constant > parameters > > > Key: HIVE-15351 > URL: https://issues.apache.org/jira/browse/HIVE-15351 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Matt McCline >Assignee: Matt McCline >Priority: Blocker > Attachments: HIVE-15351.01.patch, HIVE-15351.02.patch > > > Vectorization using VectorUDFAdaptor is broken and produces wrong results > when the parameter(s) have vectorized expressions that allocate scratch > columns. So, for now, we restrict VectorUDFAdaptor usage to columns or > constant expressions. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15351) Disable vectorized VectorUDFAdaptor usage with non-column or constant parameters
[ https://issues.apache.org/jira/browse/HIVE-15351?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Matt McCline updated HIVE-15351: Attachment: HIVE-15351.02.patch > Disable vectorized VectorUDFAdaptor usage with non-column or constant > parameters > > > Key: HIVE-15351 > URL: https://issues.apache.org/jira/browse/HIVE-15351 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Matt McCline >Assignee: Matt McCline >Priority: Blocker > Attachments: HIVE-15351.01.patch, HIVE-15351.02.patch > > > Vectorization using VectorUDFAdaptor is broken and produces wrong results > when the parameter(s) have vectorized expressions that allocate scratch > columns. So, for now, we restrict VectorUDFAdaptor usage to columns or > constant expressions. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15361) INSERT dynamic partition on S3 fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sergio Peña updated HIVE-15361: --- Attachment: (was: HIVE-15361.2.patch) > INSERT dynamic partition on S3 fails with a MoveTask failure > > > Key: HIVE-15361 > URL: https://issues.apache.org/jira/browse/HIVE-15361 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 2.2.0 >Reporter: Sergio Peña >Assignee: Sergio Peña >Priority: Critical > Attachments: HIVE-15361.1.patch > > > The following failure is due to the patch that merges two MoveTask found on > the ConditionalTask (See HIVE-15114) > {panel:title=Repro steps} > CREATE EXTERNAL TABLE external_1k0jU (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING); > ALTER TABLE external_1k0jU ADD PARTITION (COUNTRY='USA', STATE='CA'); > INSERT INTO external_1k0jU PARTITION (country='USA', state='CA') values > ('John Doe', 23), ('Jane Doe', 22); > CREATE EXTERNAL TABLE external_P3kiT (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING) location 's3a://hive-on-s3/foo/bar/'; > set hive.exec.dynamic.partition.mode=nonstrict; > INSERT INTO TABLE external_P3kiT PARTITION (country, state) SELECT * FROM > external_1k0jU; > {panel} > {panel:title=Error & stack trace} > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > INFO : MapReduce Jobs Launched: > INFO : Stage-Stage-1: Map: 1 Cumulative CPU: 3.64 sec HDFS Read: 3656 > HDFS Write: 99 SUCCESS > INFO : Total MapReduce CPU Time Spent: 3 seconds 640 msec > INFO : Completed executing > command(queryId=hive_20161201113939_d64df5d7-a4c4-4885-846f-10f0223fcf4c); > Time taken: 23.227 seconds > Error: Error while processing statement: FAILED: Execution Error, return code > 1 from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1) > INFO : Loading data to table default.external_p3kit partition (country=null, > state=null) from > s3a://hive-on-s3/foo/bar/.hive-staging_hive_2016-12-01_11-39-48_741_6724911837889341086-13/-ext-10002 > {code} > ERROR : Failed with exception MetaException(message:Invalid partition key & > values; keys [country, state, ], values []) > org.apache.hadoop.hive.ql.metadata.HiveException: > MetaException(message:Invalid partition key & values; keys [country, state, > ], values []) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1902) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1834) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1428) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1388) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:453) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: MetaException(message:Invalid partition key & values; keys > [country, state, ], values []) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65142) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65119) > at >
[jira] [Commented] (HIVE-15251) Provide support for complex expressions in ON clauses for OUTER joins
[ https://issues.apache.org/jira/browse/HIVE-15251?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723407#comment-15723407 ] Ashutosh Chauhan commented on HIVE-15251: - +1 pending tests > Provide support for complex expressions in ON clauses for OUTER joins > - > > Key: HIVE-15251 > URL: https://issues.apache.org/jira/browse/HIVE-15251 > Project: Hive > Issue Type: Bug > Components: CBO, Parser >Affects Versions: 2.2.0 >Reporter: Jesus Camacho Rodriguez >Assignee: Jesus Camacho Rodriguez > Attachments: HIVE-15251.01.patch, HIVE-15251.patch > > > Follow-up of HIVE-15211. > Currently, we have some restrictions on the predicates that we can use in ON > clauses for outer joins. > This patch is an extension to overcome these restrictions. Follow-up work > will focus on identifying some cases, such as disjunctive predicates, that > can be run more efficiently than with a cartesian product. > It will allow to write queries that currently fail in Hive such as: > {code:sql} > -- Disjunctions > SELECT * > FROM src1 LEFT OUTER JOIN src > ON (src1.key=src.key > OR src1.value between 100 and 102 > OR src.value between 100 and 102) > LIMIT 10; > -- Conjunction with multiple inputs references in one side > SELECT * > FROM src1 RIGHT OUTER JOIN src > ON (src1.key+src.key >= 100 > AND src1.key+src.key <= 102) > LIMIT 10; > -- Conjunct with no references > SELECT * > FROM src1 FULL OUTER JOIN src > ON (src1.value between 100 and 102 > AND src.value between 100 and 102 > AND true) > LIMIT 10; > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15361) INSERT dynamic partition on S3 fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sergio Peña updated HIVE-15361: --- Attachment: HIVE-15361.2.patch > INSERT dynamic partition on S3 fails with a MoveTask failure > > > Key: HIVE-15361 > URL: https://issues.apache.org/jira/browse/HIVE-15361 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 2.2.0 >Reporter: Sergio Peña >Assignee: Sergio Peña >Priority: Critical > Attachments: HIVE-15361.1.patch, HIVE-15361.2.patch > > > The following failure is due to the patch that merges two MoveTask found on > the ConditionalTask (See HIVE-15114) > {panel:title=Repro steps} > CREATE EXTERNAL TABLE external_1k0jU (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING); > ALTER TABLE external_1k0jU ADD PARTITION (COUNTRY='USA', STATE='CA'); > INSERT INTO external_1k0jU PARTITION (country='USA', state='CA') values > ('John Doe', 23), ('Jane Doe', 22); > CREATE EXTERNAL TABLE external_P3kiT (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING) location 's3a://hive-on-s3/foo/bar/'; > set hive.exec.dynamic.partition.mode=nonstrict; > INSERT INTO TABLE external_P3kiT PARTITION (country, state) SELECT * FROM > external_1k0jU; > {panel} > {panel:title=Error & stack trace} > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > INFO : MapReduce Jobs Launched: > INFO : Stage-Stage-1: Map: 1 Cumulative CPU: 3.64 sec HDFS Read: 3656 > HDFS Write: 99 SUCCESS > INFO : Total MapReduce CPU Time Spent: 3 seconds 640 msec > INFO : Completed executing > command(queryId=hive_20161201113939_d64df5d7-a4c4-4885-846f-10f0223fcf4c); > Time taken: 23.227 seconds > Error: Error while processing statement: FAILED: Execution Error, return code > 1 from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1) > INFO : Loading data to table default.external_p3kit partition (country=null, > state=null) from > s3a://hive-on-s3/foo/bar/.hive-staging_hive_2016-12-01_11-39-48_741_6724911837889341086-13/-ext-10002 > {code} > ERROR : Failed with exception MetaException(message:Invalid partition key & > values; keys [country, state, ], values []) > org.apache.hadoop.hive.ql.metadata.HiveException: > MetaException(message:Invalid partition key & values; keys [country, state, > ], values []) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1902) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1834) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1428) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1388) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:453) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: MetaException(message:Invalid partition key & values; keys > [country, state, ], values []) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65142) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65119) >
[jira] [Commented] (HIVE-15351) Disable vectorized VectorUDFAdaptor usage with non-column or constant parameters
[ https://issues.apache.org/jira/browse/HIVE-15351?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723396#comment-15723396 ] Matt McCline commented on HIVE-15351: - orc_ppd_schema_evol_3a.q is a flakey test. The others are from queries that no long vectorize due to the change. Updating patch. > Disable vectorized VectorUDFAdaptor usage with non-column or constant > parameters > > > Key: HIVE-15351 > URL: https://issues.apache.org/jira/browse/HIVE-15351 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Matt McCline >Assignee: Matt McCline >Priority: Blocker > Attachments: HIVE-15351.01.patch > > > Vectorization using VectorUDFAdaptor is broken and produces wrong results > when the parameter(s) have vectorized expressions that allocate scratch > columns. So, for now, we restrict VectorUDFAdaptor usage to columns or > constant expressions. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15330) Bump JClouds version to 2.0.0 on Hive/Ptest
[ https://issues.apache.org/jira/browse/HIVE-15330?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723380#comment-15723380 ] Sergio Peña commented on HIVE-15330: [~szehon] Could you review this patch? > Bump JClouds version to 2.0.0 on Hive/Ptest > --- > > Key: HIVE-15330 > URL: https://issues.apache.org/jira/browse/HIVE-15330 > Project: Hive > Issue Type: Task > Components: Hive, Testing Infrastructure >Reporter: Sergio Peña >Assignee: Sergio Peña > Attachments: HIVE-15330.1.patch > > > NO PRECOMMIT TESTS > JClouds 2.0.0 fixes several issues with Google Compute Engine API. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15361) INSERT dynamic partition on S3 fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723341#comment-15723341 ] Hive QA commented on HIVE-15361: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841786/HIVE-15361.1.patch {color:green}SUCCESS:{color} +1 due to 5 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 177 failed/errored test(s), 10762 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestAccumuloCliDriver.testCliDriver[accumulo_single_sourced_multi_insert] (batchId=216) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=50) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[annotate_stats_table] (batchId=19) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[autoColumnStats_5] (batchId=37) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[autoColumnStats_6] (batchId=59) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[binary_output_format] (batchId=78) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[bucketmapjoin5] (batchId=76) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[bucketmapjoin_negative2] (batchId=62) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[bucketmapjoin_negative] (batchId=21) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[case_sensitivity] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cast1] (batchId=68) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[constprog_dp] (batchId=17) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[constprog_type] (batchId=1) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[ctas_colname] (batchId=53) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[ctas_uses_database_location] (batchId=31) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cte_3] (batchId=31) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cte_mat_3] (batchId=22) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cte_mat_4] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cte_mat_5] (batchId=2) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[dynamic_rdd_cache] (batchId=49) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[explain_ddl] (batchId=43) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_1_23] (batchId=71) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_3] (batchId=73) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_5] (batchId=41) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_7] (batchId=69) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_skew_1_23] (batchId=8) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_test_1] (batchId=7) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto] (batchId=41) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_file_format] (batchId=50) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_mult_tables_compact] (batchId=32) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_multiple] (batchId=31) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_partitioned] (batchId=10) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_update] (batchId=67) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_compression] (batchId=9) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_serde] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_skewtable] (batchId=73) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input11] (batchId=49) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input12] (batchId=68) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input13] (batchId=68) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input34] (batchId=17) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input35] (batchId=53) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input36] (batchId=13) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input38] (batchId=12) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input6] (batchId=80) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input7] (batchId=67) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input8] (batchId=8) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input9] (batchId=54) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_dynamicserde] (batchId=78) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_part1] (batchId=7) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_part2] (batchId=43) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_part5] (batchId=35) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_testsequencefile] (batchId=78) org.apache.hado
[jira] [Issue Comment Deleted] (HIVE-15361) INSERT dynamic partition on S3 fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sergio Peña updated HIVE-15361: --- Comment: was deleted (was: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841783/HIVE-15361.1.patch {color:green}SUCCESS:{color} +1 due to 4 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 179 failed/errored test(s), 10762 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestAccumuloCliDriver.testCliDriver[accumulo_single_sourced_multi_insert] (batchId=216) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=50) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[annotate_stats_table] (batchId=19) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[autoColumnStats_5] (batchId=37) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[autoColumnStats_6] (batchId=59) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[binary_output_format] (batchId=78) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[bucketmapjoin5] (batchId=76) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[bucketmapjoin_negative2] (batchId=62) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[bucketmapjoin_negative] (batchId=21) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[case_sensitivity] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cast1] (batchId=68) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[constprog_dp] (batchId=17) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[constprog_type] (batchId=1) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[ctas_colname] (batchId=53) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[ctas_uses_database_location] (batchId=31) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cte_3] (batchId=31) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cte_mat_3] (batchId=22) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cte_mat_4] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cte_mat_5] (batchId=2) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[dynamic_rdd_cache] (batchId=49) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[explain_ddl] (batchId=43) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_1_23] (batchId=71) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_3] (batchId=73) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_5] (batchId=41) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_7] (batchId=69) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_skew_1_23] (batchId=8) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_test_1] (batchId=7) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto] (batchId=41) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_file_format] (batchId=50) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_mult_tables_compact] (batchId=32) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_multiple] (batchId=31) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_partitioned] (batchId=10) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_update] (batchId=67) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_compression] (batchId=9) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_serde] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_skewtable] (batchId=73) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input11] (batchId=49) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input12] (batchId=68) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input13] (batchId=68) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input34] (batchId=17) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input35] (batchId=53) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input36] (batchId=13) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input38] (batchId=12) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input6] (batchId=80) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input7] (batchId=67) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input8] (batchId=8) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input9] (batchId=54) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_dynamicserde] (batchId=78) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_part1] (batchId=7) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_part2] (batchId=43) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_part5] (batchId=35) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_testsequencefile] (batchId=78) org.apache.hadoop.hive.cli.TestCl
[jira] [Commented] (HIVE-15361) INSERT dynamic partition on S3 fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723304#comment-15723304 ] Sergio Peña commented on HIVE-15361: Sorry. The issue is happening only on S3 due to HIVE-15114. I will update the description and add a comment about the solution. > INSERT dynamic partition on S3 fails with a MoveTask failure > > > Key: HIVE-15361 > URL: https://issues.apache.org/jira/browse/HIVE-15361 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 2.2.0 >Reporter: Sergio Peña >Assignee: Sergio Peña >Priority: Critical > Attachments: HIVE-15361.1.patch > > > The following failure is due to the patch that merges two MoveTask found on > the ConditionalTask (See HIVE-15114) > {panel:title=Repro steps} > CREATE EXTERNAL TABLE external_1k0jU (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING); > ALTER TABLE external_1k0jU ADD PARTITION (COUNTRY='USA', STATE='CA'); > INSERT INTO external_1k0jU PARTITION (country='USA', state='CA') values > ('John Doe', 23), ('Jane Doe', 22); > CREATE EXTERNAL TABLE external_P3kiT (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING) location 's3a://hive-on-s3/foo/bar/'; > set hive.exec.dynamic.partition.mode=nonstrict; > INSERT INTO TABLE external_P3kiT PARTITION (country, state) SELECT * FROM > external_1k0jU; > {panel} > {panel:title=Error & stack trace} > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > INFO : MapReduce Jobs Launched: > INFO : Stage-Stage-1: Map: 1 Cumulative CPU: 3.64 sec HDFS Read: 3656 > HDFS Write: 99 SUCCESS > INFO : Total MapReduce CPU Time Spent: 3 seconds 640 msec > INFO : Completed executing > command(queryId=hive_20161201113939_d64df5d7-a4c4-4885-846f-10f0223fcf4c); > Time taken: 23.227 seconds > Error: Error while processing statement: FAILED: Execution Error, return code > 1 from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1) > INFO : Loading data to table default.external_p3kit partition (country=null, > state=null) from > s3a://hive-on-s3/foo/bar/.hive-staging_hive_2016-12-01_11-39-48_741_6724911837889341086-13/-ext-10002 > {code} > ERROR : Failed with exception MetaException(message:Invalid partition key & > values; keys [country, state, ], values []) > org.apache.hadoop.hive.ql.metadata.HiveException: > MetaException(message:Invalid partition key & values; keys [country, state, > ], values []) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1902) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1834) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1428) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1388) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:453) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: MetaException(message:Invalid partition key & values; keys > [country, state, ], values []) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65142) > at > org.apache.hadoop.hive.metastore.api.Thrif
[jira] [Commented] (HIVE-15332) REPL LOAD & DUMP support for incremental CREATE_TABLE/ADD_PTN
[ https://issues.apache.org/jira/browse/HIVE-15332?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723300#comment-15723300 ] Vaibhav Gumashta commented on HIVE-15332: - +1, but some minor comments below (can be taken up in new jiras): bq. 206 batchSize = 15; Make this default batch size configurable? bq. 515 analyzeTableLoad(dbNameOrPattern, tblNameOrPattern, dir.getPath().toUri().toString(), null); Did you intend to implement this as part of analyzeEventLoad? General comment: should we have an replRoot/events/evId1 etc type of directory hierarchy to make it easy to filter? Also, creating follow up jiras for: # Enhancing DBNotif listener to capture files added (if any) on create table/partition events (line 273 FIXME) # Implementing consolidateEvent (bootstrap rubberbanding) # Add new methods to MessageFactory API (corresponding to JSONMessageFactory) > REPL LOAD & DUMP support for incremental CREATE_TABLE/ADD_PTN > - > > Key: HIVE-15332 > URL: https://issues.apache.org/jira/browse/HIVE-15332 > Project: Hive > Issue Type: Sub-task > Components: repl >Reporter: Sushanth Sowmyan >Assignee: Sushanth Sowmyan > Attachments: HIVE-15332.patch > > > We need to add in support for REPL LOAD and REPL DUMP of incremental events, > and we need to be able to replicate creates, for a start. This jira tracks > the inclusion of CREATE_TABLE/ADD_PARTITION event support to REPL DUMP & LOAD. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15361) INSERT dynamic partition on S3 fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sergio Peña updated HIVE-15361: --- Description: The following failure is due to the patch that merges two MoveTask found on the ConditionalTask (See HIVE-15114) {panel:title=Repro steps} CREATE EXTERNAL TABLE external_1k0jU (name STRING, age INT) PARTITIONED BY (country STRING, state STRING); ALTER TABLE external_1k0jU ADD PARTITION (COUNTRY='USA', STATE='CA'); INSERT INTO external_1k0jU PARTITION (country='USA', state='CA') values ('John Doe', 23), ('Jane Doe', 22); CREATE EXTERNAL TABLE external_P3kiT (name STRING, age INT) PARTITIONED BY (country STRING, state STRING) location 's3a://hive-on-s3/foo/bar/'; set hive.exec.dynamic.partition.mode=nonstrict; INSERT INTO TABLE external_P3kiT PARTITION (country, state) SELECT * FROM external_1k0jU; {panel} {panel:title=Error & stack trace} ERROR : FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask INFO : MapReduce Jobs Launched: INFO : Stage-Stage-1: Map: 1 Cumulative CPU: 3.64 sec HDFS Read: 3656 HDFS Write: 99 SUCCESS INFO : Total MapReduce CPU Time Spent: 3 seconds 640 msec INFO : Completed executing command(queryId=hive_20161201113939_d64df5d7-a4c4-4885-846f-10f0223fcf4c); Time taken: 23.227 seconds Error: Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1) INFO : Loading data to table default.external_p3kit partition (country=null, state=null) from s3a://hive-on-s3/foo/bar/.hive-staging_hive_2016-12-01_11-39-48_741_6724911837889341086-13/-ext-10002 {code} ERROR : Failed with exception MetaException(message:Invalid partition key & values; keys [country, state, ], values []) org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:Invalid partition key & values; keys [country, state, ], values []) at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1902) at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1834) at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1428) at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1388) at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:453) at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) at org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) at org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) at org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) at java.util.concurrent.FutureTask.run(FutureTask.java:262) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Caused by: MetaException(message:Invalid partition key & values; keys [country, state, ], values []) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65142) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65119) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result.read(ThriftHiveMetastore.java:65050) at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:86) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partition_with_auth(ThriftHiveMetastore.java:2007) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.get_partition_with_auth(ThriftHiveMetastore.java:1990) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getPartitionWithAuthInfo(HiveMetaStoreClient.java:1210) at sun.reflect.G
[jira] [Updated] (HIVE-15361) INSERT dynamic partition on S3 fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sergio Peña updated HIVE-15361: --- Summary: INSERT dynamic partition on S3 fails with a MoveTask failure (was: Dynamic partition INSERT fails with a MoveTask failure) > INSERT dynamic partition on S3 fails with a MoveTask failure > > > Key: HIVE-15361 > URL: https://issues.apache.org/jira/browse/HIVE-15361 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 2.2.0 >Reporter: Sergio Peña >Assignee: Sergio Peña >Priority: Critical > Attachments: HIVE-15361.1.patch > > > {panel:title=Repro steps} > CREATE EXTERNAL TABLE external_1k0jU (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING); > ALTER TABLE external_1k0jU ADD PARTITION (COUNTRY='USA', STATE='CA'); > INSERT INTO external_1k0jU PARTITION (country='USA', state='CA') values > ('John Doe', 23), ('Jane Doe', 22); > CREATE EXTERNAL TABLE external_P3kiT (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING) location 's3a://hive-on-s3/foo/bar/'; > set hive.exec.dynamic.partition.mode=nonstrict; > INSERT INTO TABLE external_P3kiT PARTITION (country, state) SELECT * FROM > external_1k0jU; > {panel} > {panel:title=Error & stack trace} > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > INFO : MapReduce Jobs Launched: > INFO : Stage-Stage-1: Map: 1 Cumulative CPU: 3.64 sec HDFS Read: 3656 > HDFS Write: 99 SUCCESS > INFO : Total MapReduce CPU Time Spent: 3 seconds 640 msec > INFO : Completed executing > command(queryId=hive_20161201113939_d64df5d7-a4c4-4885-846f-10f0223fcf4c); > Time taken: 23.227 seconds > Error: Error while processing statement: FAILED: Execution Error, return code > 1 from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1) > INFO : Loading data to table default.external_p3kit partition (country=null, > state=null) from > s3a://hive-on-s3/foo/bar/.hive-staging_hive_2016-12-01_11-39-48_741_6724911837889341086-13/-ext-10002 > {code} > ERROR : Failed with exception MetaException(message:Invalid partition key & > values; keys [country, state, ], values []) > org.apache.hadoop.hive.ql.metadata.HiveException: > MetaException(message:Invalid partition key & values; keys [country, state, > ], values []) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1902) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1834) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1428) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1388) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:453) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: MetaException(message:Invalid partition key & values; keys > [country, state, ], values []) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65142) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65119) > at > org.apache.hadoop.hive.me
[jira] [Commented] (HIVE-15346) "values temp table" should not be an input source for the query
[ https://issues.apache.org/jira/browse/HIVE-15346?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723267#comment-15723267 ] Yongzhi Chen commented on HIVE-15346: - The source code looks good to me +1 Make sure all the unit tests are properly fixed. > "values temp table" should not be an input source for the query > --- > > Key: HIVE-15346 > URL: https://issues.apache.org/jira/browse/HIVE-15346 > Project: Hive > Issue Type: Sub-task > Components: Query Planning >Affects Versions: 2.2.0 >Reporter: Aihua Xu >Assignee: Aihua Xu > Attachments: HIVE-15346.1.patch, HIVE-15346.2.patch, > HIVE-15346.3.patch, HIVE-15346.codeonly.patch > > > For "insert values()" query, a values tmp table is created. Such table is > treated as input source. That seems to be incorrect since that should be > treated internal and temporary. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Comment Edited] (HIVE-15361) Dynamic partition INSERT fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723206#comment-15723206 ] Sahil Takiar edited comment on HIVE-15361 at 12/5/16 8:05 PM: -- When doing the merge of the two {{MoveTasks}} we should probably assert that the destination of the first {{MoveTask}} = the source of the second {{MoveTask}}. If not then the optimization should be dropped. Looking at the code, we should never hit a case where this condition returns false, but it would make the code safer. was (Author: stakiar): When doing the merge of the two {{MoveTask}}s we should probably assert that the destination of the first {{MoveTask}} = the source of the second {{MoveTask}}. If not then the optimization should be dropped. Looking at the code, we should never hit a case where this condition returns false, but it would make the code safer. > Dynamic partition INSERT fails with a MoveTask failure > -- > > Key: HIVE-15361 > URL: https://issues.apache.org/jira/browse/HIVE-15361 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 2.2.0 >Reporter: Sergio Peña >Assignee: Sergio Peña >Priority: Critical > Attachments: HIVE-15361.1.patch > > > {panel:title=Repro steps} > CREATE EXTERNAL TABLE external_1k0jU (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING); > ALTER TABLE external_1k0jU ADD PARTITION (COUNTRY='USA', STATE='CA'); > INSERT INTO external_1k0jU PARTITION (country='USA', state='CA') values > ('John Doe', 23), ('Jane Doe', 22); > CREATE EXTERNAL TABLE external_P3kiT (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING) location 's3a://hive-on-s3/foo/bar/'; > set hive.exec.dynamic.partition.mode=nonstrict; > INSERT INTO TABLE external_P3kiT PARTITION (country, state) SELECT * FROM > external_1k0jU; > {panel} > {panel:title=Error & stack trace} > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > INFO : MapReduce Jobs Launched: > INFO : Stage-Stage-1: Map: 1 Cumulative CPU: 3.64 sec HDFS Read: 3656 > HDFS Write: 99 SUCCESS > INFO : Total MapReduce CPU Time Spent: 3 seconds 640 msec > INFO : Completed executing > command(queryId=hive_20161201113939_d64df5d7-a4c4-4885-846f-10f0223fcf4c); > Time taken: 23.227 seconds > Error: Error while processing statement: FAILED: Execution Error, return code > 1 from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1) > INFO : Loading data to table default.external_p3kit partition (country=null, > state=null) from > s3a://hive-on-s3/foo/bar/.hive-staging_hive_2016-12-01_11-39-48_741_6724911837889341086-13/-ext-10002 > {code} > ERROR : Failed with exception MetaException(message:Invalid partition key & > values; keys [country, state, ], values []) > org.apache.hadoop.hive.ql.metadata.HiveException: > MetaException(message:Invalid partition key & values; keys [country, state, > ], values []) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1902) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1834) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1428) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1388) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:453) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at
[jira] [Updated] (HIVE-14895) CompactorMR.CompactorOutputCommitter race condition
[ https://issues.apache.org/jira/browse/HIVE-14895?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Eugene Koifman updated HIVE-14895: -- Attachment: HIVE-14895.03.patch This bug was a red herring. Patch 3 has no code changes but adds some comments to make the logic clear. > CompactorMR.CompactorOutputCommitter race condition > --- > > Key: HIVE-14895 > URL: https://issues.apache.org/jira/browse/HIVE-14895 > Project: Hive > Issue Type: Bug >Affects Versions: 0.14.0 >Reporter: Eugene Koifman >Assignee: Eugene Koifman > Attachments: HIVE-14895.01.patch, HIVE-14895.02.patch, > HIVE-14895.03.patch > > > commitJob() method renames each file in TMP_LOCATION separately. so someone > may read FINAL_LOCATION and see only some of the files that should be there. > Use FileSystem.getFileStatus(TMP_LOCATION) and just rename the dir. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-14895) CompactorMR.CompactorOutputCommitter race condition
[ https://issues.apache.org/jira/browse/HIVE-14895?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Eugene Koifman updated HIVE-14895: -- Status: Open (was: Patch Available) > CompactorMR.CompactorOutputCommitter race condition > --- > > Key: HIVE-14895 > URL: https://issues.apache.org/jira/browse/HIVE-14895 > Project: Hive > Issue Type: Bug >Affects Versions: 0.14.0 >Reporter: Eugene Koifman >Assignee: Eugene Koifman > Attachments: HIVE-14895.01.patch, HIVE-14895.02.patch, > HIVE-14895.03.patch > > > commitJob() method renames each file in TMP_LOCATION separately. so someone > may read FINAL_LOCATION and see only some of the files that should be there. > Use FileSystem.getFileStatus(TMP_LOCATION) and just rename the dir. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15360) Nested column pruning: add pruned column paths to explain output
[ https://issues.apache.org/jira/browse/HIVE-15360?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723225#comment-15723225 ] Hive QA commented on HIVE-15360: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841787/HIVE-15360.1.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 13 failed/errored test(s), 10762 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=50) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[case_sensitivity] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_testxpath3] (batchId=20) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_testxpath] (batchId=28) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample2] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample4] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample6] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample7] (batchId=60) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[sample9] (batchId=38) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[udf_coalesce] (batchId=74) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_schema_evol_3a] (batchId=134) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[transform_ppr2] (batchId=134) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_3] (batchId=91) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/2418/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/2418/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-2418/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 13 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12841787 - PreCommit-HIVE-Build > Nested column pruning: add pruned column paths to explain output > > > Key: HIVE-15360 > URL: https://issues.apache.org/jira/browse/HIVE-15360 > Project: Hive > Issue Type: Sub-task > Components: Query Planning >Reporter: Chao Sun >Assignee: Chao Sun >Priority: Minor > Attachments: HIVE-15360.1.patch > > > We should add the pruned nested column paths to the explain output for easier > tracing and debugging. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15361) Dynamic partition INSERT fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723206#comment-15723206 ] Sahil Takiar commented on HIVE-15361: - When doing the merge of the two {{MoveTask}}s we should probably assert that the destination of the first {{MoveTask}} = the source of the second {{MoveTask}}. If not then the optimization should be dropped. Looking at the code, we should never hit a case where this condition returns false, but it would make the code safer. > Dynamic partition INSERT fails with a MoveTask failure > -- > > Key: HIVE-15361 > URL: https://issues.apache.org/jira/browse/HIVE-15361 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 2.2.0 >Reporter: Sergio Peña >Assignee: Sergio Peña >Priority: Critical > Attachments: HIVE-15361.1.patch > > > {panel:title=Repro steps} > CREATE EXTERNAL TABLE external_1k0jU (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING); > ALTER TABLE external_1k0jU ADD PARTITION (COUNTRY='USA', STATE='CA'); > INSERT INTO external_1k0jU PARTITION (country='USA', state='CA') values > ('John Doe', 23), ('Jane Doe', 22); > CREATE EXTERNAL TABLE external_P3kiT (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING) location 's3a://hive-on-s3/foo/bar/'; > set hive.exec.dynamic.partition.mode=nonstrict; > INSERT INTO TABLE external_P3kiT PARTITION (country, state) SELECT * FROM > external_1k0jU; > {panel} > {panel:title=Error & stack trace} > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > INFO : MapReduce Jobs Launched: > INFO : Stage-Stage-1: Map: 1 Cumulative CPU: 3.64 sec HDFS Read: 3656 > HDFS Write: 99 SUCCESS > INFO : Total MapReduce CPU Time Spent: 3 seconds 640 msec > INFO : Completed executing > command(queryId=hive_20161201113939_d64df5d7-a4c4-4885-846f-10f0223fcf4c); > Time taken: 23.227 seconds > Error: Error while processing statement: FAILED: Execution Error, return code > 1 from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1) > INFO : Loading data to table default.external_p3kit partition (country=null, > state=null) from > s3a://hive-on-s3/foo/bar/.hive-staging_hive_2016-12-01_11-39-48_741_6724911837889341086-13/-ext-10002 > {code} > ERROR : Failed with exception MetaException(message:Invalid partition key & > values; keys [country, state, ], values []) > org.apache.hadoop.hive.ql.metadata.HiveException: > MetaException(message:Invalid partition key & values; keys [country, state, > ], values []) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1902) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1834) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1428) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1388) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:453) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: MetaException(message:Invalid partition key & values; keys > [country, state, ], values []) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetas
[jira] [Updated] (HIVE-15346) "values temp table" should not be an input source for the query
[ https://issues.apache.org/jira/browse/HIVE-15346?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Aihua Xu updated HIVE-15346: Attachment: HIVE-15346.3.patch Patch-3: fix a test failure which is caused by the newly added test. > "values temp table" should not be an input source for the query > --- > > Key: HIVE-15346 > URL: https://issues.apache.org/jira/browse/HIVE-15346 > Project: Hive > Issue Type: Sub-task > Components: Query Planning >Affects Versions: 2.2.0 >Reporter: Aihua Xu >Assignee: Aihua Xu > Attachments: HIVE-15346.1.patch, HIVE-15346.2.patch, > HIVE-15346.3.patch, HIVE-15346.codeonly.patch > > > For "insert values()" query, a values tmp table is created. Such table is > treated as input source. That seems to be incorrect since that should be > treated internal and temporary. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15346) "values temp table" should not be an input source for the query
[ https://issues.apache.org/jira/browse/HIVE-15346?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Aihua Xu updated HIVE-15346: Attachment: HIVE-15346.codeonly.patch > "values temp table" should not be an input source for the query > --- > > Key: HIVE-15346 > URL: https://issues.apache.org/jira/browse/HIVE-15346 > Project: Hive > Issue Type: Sub-task > Components: Query Planning >Affects Versions: 2.2.0 >Reporter: Aihua Xu >Assignee: Aihua Xu > Attachments: HIVE-15346.1.patch, HIVE-15346.2.patch, > HIVE-15346.codeonly.patch > > > For "insert values()" query, a values tmp table is created. Such table is > treated as input source. That seems to be incorrect since that should be > treated internal and temporary. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15346) "values temp table" should not be an input source for the query
[ https://issues.apache.org/jira/browse/HIVE-15346?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Aihua Xu updated HIVE-15346: Attachment: (was: HIVE-15346.2.text) > "values temp table" should not be an input source for the query > --- > > Key: HIVE-15346 > URL: https://issues.apache.org/jira/browse/HIVE-15346 > Project: Hive > Issue Type: Sub-task > Components: Query Planning >Affects Versions: 2.2.0 >Reporter: Aihua Xu >Assignee: Aihua Xu > Attachments: HIVE-15346.1.patch, HIVE-15346.2.patch > > > For "insert values()" query, a values tmp table is created. Such table is > treated as input source. That seems to be incorrect since that should be > treated internal and temporary. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15251) Provide support for complex expressions in ON clauses for OUTER joins
[ https://issues.apache.org/jira/browse/HIVE-15251?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jesus Camacho Rodriguez updated HIVE-15251: --- Attachment: HIVE-15251.01.patch New patch addressing [~ashutoshc] comments. > Provide support for complex expressions in ON clauses for OUTER joins > - > > Key: HIVE-15251 > URL: https://issues.apache.org/jira/browse/HIVE-15251 > Project: Hive > Issue Type: Bug > Components: CBO, Parser >Affects Versions: 2.2.0 >Reporter: Jesus Camacho Rodriguez >Assignee: Jesus Camacho Rodriguez > Attachments: HIVE-15251.01.patch, HIVE-15251.patch > > > Follow-up of HIVE-15211. > Currently, we have some restrictions on the predicates that we can use in ON > clauses for outer joins. > This patch is an extension to overcome these restrictions. Follow-up work > will focus on identifying some cases, such as disjunctive predicates, that > can be run more efficiently than with a cartesian product. > It will allow to write queries that currently fail in Hive such as: > {code:sql} > -- Disjunctions > SELECT * > FROM src1 LEFT OUTER JOIN src > ON (src1.key=src.key > OR src1.value between 100 and 102 > OR src.value between 100 and 102) > LIMIT 10; > -- Conjunction with multiple inputs references in one side > SELECT * > FROM src1 RIGHT OUTER JOIN src > ON (src1.key+src.key >= 100 > AND src1.key+src.key <= 102) > LIMIT 10; > -- Conjunct with no references > SELECT * > FROM src1 FULL OUTER JOIN src > ON (src1.value between 100 and 102 > AND src.value between 100 and 102 > AND true) > LIMIT 10; > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15301) Expose SparkStatistics information in SparkTask
[ https://issues.apache.org/jira/browse/HIVE-15301?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Chao Sun updated HIVE-15301: Affects Version/s: 2.2.0 > Expose SparkStatistics information in SparkTask > --- > > Key: HIVE-15301 > URL: https://issues.apache.org/jira/browse/HIVE-15301 > Project: Hive > Issue Type: Improvement > Components: Spark >Affects Versions: 2.2.0 >Reporter: zhihai xu >Assignee: zhihai xu >Priority: Minor > Attachments: HIVE-15301.000.patch > > > Expose SparkStatistics information in SparkTask. So we can get > SparkStatistics in Hook. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15361) Dynamic partition INSERT fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723096#comment-15723096 ] Xuefu Zhang commented on HIVE-15361: [~spena], thanks for working on this. I'm wondering if you mind giving some description of the problem and your proposed solution. It helps others understand w/o going to the code change. Thanks. > Dynamic partition INSERT fails with a MoveTask failure > -- > > Key: HIVE-15361 > URL: https://issues.apache.org/jira/browse/HIVE-15361 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 2.2.0 >Reporter: Sergio Peña >Assignee: Sergio Peña >Priority: Critical > Attachments: HIVE-15361.1.patch > > > {panel:title=Repro steps} > CREATE EXTERNAL TABLE external_1k0jU (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING); > ALTER TABLE external_1k0jU ADD PARTITION (COUNTRY='USA', STATE='CA'); > INSERT INTO external_1k0jU PARTITION (country='USA', state='CA') values > ('John Doe', 23), ('Jane Doe', 22); > CREATE EXTERNAL TABLE external_P3kiT (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING) location 's3a://hive-on-s3/foo/bar/'; > set hive.exec.dynamic.partition.mode=nonstrict; > INSERT INTO TABLE external_P3kiT PARTITION (country, state) SELECT * FROM > external_1k0jU; > {panel} > {panel:title=Error & stack trace} > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > INFO : MapReduce Jobs Launched: > INFO : Stage-Stage-1: Map: 1 Cumulative CPU: 3.64 sec HDFS Read: 3656 > HDFS Write: 99 SUCCESS > INFO : Total MapReduce CPU Time Spent: 3 seconds 640 msec > INFO : Completed executing > command(queryId=hive_20161201113939_d64df5d7-a4c4-4885-846f-10f0223fcf4c); > Time taken: 23.227 seconds > Error: Error while processing statement: FAILED: Execution Error, return code > 1 from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1) > INFO : Loading data to table default.external_p3kit partition (country=null, > state=null) from > s3a://hive-on-s3/foo/bar/.hive-staging_hive_2016-12-01_11-39-48_741_6724911837889341086-13/-ext-10002 > {code} > ERROR : Failed with exception MetaException(message:Invalid partition key & > values; keys [country, state, ], values []) > org.apache.hadoop.hive.ql.metadata.HiveException: > MetaException(message:Invalid partition key & values; keys [country, state, > ], values []) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1902) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1834) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1428) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1388) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:453) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: MetaException(message:Invalid partition key & values; keys > [country, state, ], values []) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65142) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_w
[jira] [Updated] (HIVE-14535) add micromanaged tables to Hive (metastore keeps track of the files)
[ https://issues.apache.org/jira/browse/HIVE-14535?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sergey Shelukhin updated HIVE-14535: Description: Design doc: https://docs.google.com/document/d/1b3t1RywfyRb73-cdvkEzJUyOiekWwkMHdiQ-42zCllY Feel free to comment. Update: we ended up going with sequence number based implementation was: Design doc: https://docs.google.com/document/d/1b3t1RywfyRb73-cdvkEzJUyOiekWwkMHdiQ-42zCllY Feel free to comment. > add micromanaged tables to Hive (metastore keeps track of the files) > > > Key: HIVE-14535 > URL: https://issues.apache.org/jira/browse/HIVE-14535 > Project: Hive > Issue Type: Improvement >Reporter: Sergey Shelukhin >Assignee: Sergey Shelukhin > > Design doc: > https://docs.google.com/document/d/1b3t1RywfyRb73-cdvkEzJUyOiekWwkMHdiQ-42zCllY > Feel free to comment. > Update: we ended up going with sequence number based implementation -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15331) Decimal multiplication with high precision/scale often returns NULL
[ https://issues.apache.org/jira/browse/HIVE-15331?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jason Dere updated HIVE-15331: -- Attachment: HIVE-15331.1.patch Initial patch. Let's see what changes in the golden files. > Decimal multiplication with high precision/scale often returns NULL > --- > > Key: HIVE-15331 > URL: https://issues.apache.org/jira/browse/HIVE-15331 > Project: Hive > Issue Type: Bug > Components: Types >Reporter: Jason Dere >Assignee: Jason Dere > Attachments: HIVE-15331.1.patch > > > {noformat} > create temporary table dec (a decimal(38,18)); > insert into dec values(100.0); > hive> select a*a from dec; > OK > NULL > Time taken: 0.165 seconds, Fetched: 1 row(s) > {noformat} > Looks like the reason is because the result of decimal(38,18) * > decimal(38,18) only has 2 digits of precision for integers: > {noformat} > hive> set hive.explain.user=false; > hive> explain select a*a from dec; > OK > STAGE DEPENDENCIES: > Stage-0 is a root stage > STAGE PLANS: > Stage: Stage-0 > Fetch Operator > limit: -1 > Processor Tree: > TableScan > alias: dec > Select Operator > expressions: (a * a) (type: decimal(38,36)) > outputColumnNames: _col0 > ListSink > Time taken: 0.039 seconds, Fetched: 15 row(s) > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15331) Decimal multiplication with high precision/scale often returns NULL
[ https://issues.apache.org/jira/browse/HIVE-15331?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jason Dere updated HIVE-15331: -- Status: Patch Available (was: Open) > Decimal multiplication with high precision/scale often returns NULL > --- > > Key: HIVE-15331 > URL: https://issues.apache.org/jira/browse/HIVE-15331 > Project: Hive > Issue Type: Bug > Components: Types >Reporter: Jason Dere >Assignee: Jason Dere > Attachments: HIVE-15331.1.patch > > > {noformat} > create temporary table dec (a decimal(38,18)); > insert into dec values(100.0); > hive> select a*a from dec; > OK > NULL > Time taken: 0.165 seconds, Fetched: 1 row(s) > {noformat} > Looks like the reason is because the result of decimal(38,18) * > decimal(38,18) only has 2 digits of precision for integers: > {noformat} > hive> set hive.explain.user=false; > hive> explain select a*a from dec; > OK > STAGE DEPENDENCIES: > Stage-0 is a root stage > STAGE PLANS: > Stage: Stage-0 > Fetch Operator > limit: -1 > Processor Tree: > TableScan > alias: dec > Select Operator > expressions: (a * a) (type: decimal(38,36)) > outputColumnNames: _col0 > ListSink > Time taken: 0.039 seconds, Fetched: 15 row(s) > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15352) MVCC (Multi Versioned Concurrency Control) in Hive
[ https://issues.apache.org/jira/browse/HIVE-15352?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723103#comment-15723103 ] Sergey Shelukhin commented on HIVE-15352: - Something similar to sequence-based partitioning is pursued in HIVE-14535 (although for different reasons). We were thinking of MVCC being one of the next logical steps there. Also you might want to take a look at Hive ACID implementation. > MVCC (Multi Versioned Concurrency Control) in Hive > -- > > Key: HIVE-15352 > URL: https://issues.apache.org/jira/browse/HIVE-15352 > Project: Hive > Issue Type: New Feature >Reporter: Garima Dosi > Attachments: Hive MVCC - Requirement & Design.pdf > > > Use Case > While working with providing solutions for various applications, we see that > there is at times, a need to provide multi version concurrency support for > certain datasets. The requirement of multi versioned concurrency is mainly > due to two reasons – > • Simultaneous querying and loading from tables or datasets, which requires > maintaining versions for reading and writing (Locking is not the right option > here) > • Maintaining historical load of tables/datasets upto some extent > Both of these requirements are seen in data management systems (warehouses > etc). > What happens without MVCC in Hive? > In cases, where MVCC had to be done, design similar to this - > https://dzone.com/articles/zookeeper-a-real-world-example-of-how-to-use-it > was followed to make it work. Zookeeper was used to maintain versions and > provide MVCC support. However, this design poses a limitation if a normal > user would like to query a hive table because he will not be aware of the > current version to be queried. The additional layer to match versions in > zookeeper with the dataset to be queried introduces a bit of an overhead for > normal users and hence, the request to make this feature available in Hive. > Hive Design for Support of MVCC > The hive design for MVCC support can be as described below (It would somewhat > follow the article mentioned in the previous section) – > 1. The first thing should be the ability for the user to specify that this is > a MVCC table. So, a DDL something like this – > create table ( ) MULTI_VERSIONED ON [sequence, > time] > Internally this DDL can be translated to a partitioned table either on a > sequence number (auto-generated by Hive) or a timestamp. The metastore would > keep this information. > 2. DMLs related to inserting or loading data to the table would remain the > same for an end user. However, internally Hive would automatically detect > that a table is a multi-versioned table and write the new data to a new > partition with a new version of the dataset. The Hive Metastore would also be > updated with the current version. > 3. DMLs related to querying data from the table would remain the same for a > user. However, internally Hive would use the latest version for queries. > Latest version is always stored in the metastore. > Management of obsolete versions > The obsolete versions can be deleted based on the following – > 1.Either a setting which simply says delete the version which is older than a > threshold and is not active, OR > 2.By tracking the count of queries running on older versions and deleting the > ones which are not the latest and are not being used by any query. This would > require some sort of a background thread monitoring the table for obsolete > versions. As shown in the article mentioned above, this would also require > incrementing version count whenever a version is queried and decrement it > once the query is done. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15361) Dynamic partition INSERT fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723093#comment-15723093 ] Hive QA commented on HIVE-15361: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12841783/HIVE-15361.1.patch {color:green}SUCCESS:{color} +1 due to 4 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 179 failed/errored test(s), 10762 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestAccumuloCliDriver.testCliDriver[accumulo_single_sourced_multi_insert] (batchId=216) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=50) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[annotate_stats_table] (batchId=19) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[autoColumnStats_5] (batchId=37) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[autoColumnStats_6] (batchId=59) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[binary_output_format] (batchId=78) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[bucketmapjoin5] (batchId=76) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[bucketmapjoin_negative2] (batchId=62) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[bucketmapjoin_negative] (batchId=21) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[case_sensitivity] (batchId=61) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cast1] (batchId=68) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[constprog_dp] (batchId=17) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[constprog_type] (batchId=1) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[ctas_colname] (batchId=53) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[ctas_uses_database_location] (batchId=31) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cte_3] (batchId=31) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cte_mat_3] (batchId=22) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cte_mat_4] (batchId=5) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cte_mat_5] (batchId=2) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[dynamic_rdd_cache] (batchId=49) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[explain_ddl] (batchId=43) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_1_23] (batchId=71) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_3] (batchId=73) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_5] (batchId=41) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_7] (batchId=69) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_skew_1_23] (batchId=8) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_sort_test_1] (batchId=7) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto] (batchId=41) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_file_format] (batchId=50) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_mult_tables_compact] (batchId=32) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_multiple] (batchId=31) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_partitioned] (batchId=10) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_update] (batchId=67) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_compression] (batchId=9) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_serde] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_skewtable] (batchId=73) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input11] (batchId=49) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input12] (batchId=68) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input13] (batchId=68) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input34] (batchId=17) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input35] (batchId=53) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input36] (batchId=13) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input38] (batchId=12) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input6] (batchId=80) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input7] (batchId=67) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input8] (batchId=8) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input9] (batchId=54) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_dynamicserde] (batchId=78) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_part1] (batchId=7) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_part2] (batchId=43) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_part5] (batchId=35) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_testsequencefile] (batchId=78) org.apache.hado
[jira] [Updated] (HIVE-15301) Expose SparkStatistics information in SparkTask
[ https://issues.apache.org/jira/browse/HIVE-15301?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Chao Sun updated HIVE-15301: Fix Version/s: 2.2.0 > Expose SparkStatistics information in SparkTask > --- > > Key: HIVE-15301 > URL: https://issues.apache.org/jira/browse/HIVE-15301 > Project: Hive > Issue Type: Improvement > Components: Spark >Affects Versions: 2.2.0 >Reporter: zhihai xu >Assignee: zhihai xu >Priority: Minor > Fix For: 2.2.0 > > Attachments: HIVE-15301.000.patch > > > Expose SparkStatistics information in SparkTask. So we can get > SparkStatistics in Hook. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15301) Expose SparkStatistics information in SparkTask
[ https://issues.apache.org/jira/browse/HIVE-15301?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Chao Sun updated HIVE-15301: Resolution: Fixed Status: Resolved (was: Patch Available) Committed to master. Thanks [~zxu] for the patch and [~xuefuz] for the review! > Expose SparkStatistics information in SparkTask > --- > > Key: HIVE-15301 > URL: https://issues.apache.org/jira/browse/HIVE-15301 > Project: Hive > Issue Type: Improvement > Components: Spark >Affects Versions: 2.2.0 >Reporter: zhihai xu >Assignee: zhihai xu >Priority: Minor > Attachments: HIVE-15301.000.patch > > > Expose SparkStatistics information in SparkTask. So we can get > SparkStatistics in Hook. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15337) Need to specify starttime when putting Compaction job into "attempted" state
[ https://issues.apache.org/jira/browse/HIVE-15337?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723072#comment-15723072 ] Wei Zheng commented on HIVE-15337: -- +1 > Need to specify starttime when putting Compaction job into "attempted" state > > > Key: HIVE-15337 > URL: https://issues.apache.org/jira/browse/HIVE-15337 > Project: Hive > Issue Type: Improvement > Components: Transactions >Affects Versions: 1.0.0 >Reporter: Eugene Koifman >Assignee: Eugene Koifman > Attachments: HIVE-15337.01.patch > > > W/o this SHOW COMPACTIONS output is not as useful > Also, add Hadoop Job ID to SHOW COMPACTIONS output -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-12504) TxnHandler.abortTxn() should check if already aborted to improve message
[ https://issues.apache.org/jira/browse/HIVE-12504?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15723035#comment-15723035 ] Eugene Koifman commented on HIVE-12504: --- ensureValidTxn() is a misnomer. What it really does is produce a more informative error msg. It's not practical to call it from abortTxns() since you won't know which txn id to call it on. > TxnHandler.abortTxn() should check if already aborted to improve message > > > Key: HIVE-12504 > URL: https://issues.apache.org/jira/browse/HIVE-12504 > Project: Hive > Issue Type: Bug > Components: Transactions >Affects Versions: 1.0.0 >Reporter: Eugene Koifman >Assignee: Eugene Koifman > Attachments: HIVE-12504.01.patch, HIVE-12504.patch > > > If the update statement misses, it would be useful for abortTxn() to > run another query to see if the requested txnid is in Aborted state to > improve error msg. > Note: get a separate read_committed connection for this. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15346) "values temp table" should not be an input source for the query
[ https://issues.apache.org/jira/browse/HIVE-15346?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Aihua Xu updated HIVE-15346: Attachment: HIVE-15346.2.text > "values temp table" should not be an input source for the query > --- > > Key: HIVE-15346 > URL: https://issues.apache.org/jira/browse/HIVE-15346 > Project: Hive > Issue Type: Sub-task > Components: Query Planning >Affects Versions: 2.2.0 >Reporter: Aihua Xu >Assignee: Aihua Xu > Attachments: HIVE-15346.1.patch, HIVE-15346.2.patch, HIVE-15346.2.text > > > For "insert values()" query, a values tmp table is created. Such table is > treated as input source. That seems to be incorrect since that should be > treated internal and temporary. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15360) Nested column pruning: add pruned column paths to explain output
[ https://issues.apache.org/jira/browse/HIVE-15360?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Chao Sun updated HIVE-15360: Attachment: HIVE-15360.1.patch Attaching initial patch for testing. > Nested column pruning: add pruned column paths to explain output > > > Key: HIVE-15360 > URL: https://issues.apache.org/jira/browse/HIVE-15360 > Project: Hive > Issue Type: Sub-task > Components: Query Planning >Reporter: Chao Sun >Assignee: Chao Sun >Priority: Minor > Attachments: HIVE-15360.1.patch > > > We should add the pruned nested column paths to the explain output for easier > tracing and debugging. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15360) Nested column pruning: add pruned column paths to explain output
[ https://issues.apache.org/jira/browse/HIVE-15360?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Chao Sun updated HIVE-15360: Status: Patch Available (was: Open) > Nested column pruning: add pruned column paths to explain output > > > Key: HIVE-15360 > URL: https://issues.apache.org/jira/browse/HIVE-15360 > Project: Hive > Issue Type: Sub-task > Components: Query Planning >Reporter: Chao Sun >Assignee: Chao Sun >Priority: Minor > Attachments: HIVE-15360.1.patch > > > We should add the pruned nested column paths to the explain output for easier > tracing and debugging. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15361) Dynamic partition INSERT fails with a MoveTask failure
[ https://issues.apache.org/jira/browse/HIVE-15361?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sergio Peña updated HIVE-15361: --- Status: Patch Available (was: In Progress) > Dynamic partition INSERT fails with a MoveTask failure > -- > > Key: HIVE-15361 > URL: https://issues.apache.org/jira/browse/HIVE-15361 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 2.2.0 >Reporter: Sergio Peña >Assignee: Sergio Peña >Priority: Critical > Attachments: HIVE-15361.1.patch > > > {panel:title=Repro steps} > CREATE EXTERNAL TABLE external_1k0jU (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING); > ALTER TABLE external_1k0jU ADD PARTITION (COUNTRY='USA', STATE='CA'); > INSERT INTO external_1k0jU PARTITION (country='USA', state='CA') values > ('John Doe', 23), ('Jane Doe', 22); > CREATE EXTERNAL TABLE external_P3kiT (name STRING, age INT) PARTITIONED BY > (country STRING, state STRING) location 's3a://hive-on-s3/foo/bar/'; > set hive.exec.dynamic.partition.mode=nonstrict; > INSERT INTO TABLE external_P3kiT PARTITION (country, state) SELECT * FROM > external_1k0jU; > {panel} > {panel:title=Error & stack trace} > ERROR : FAILED: Execution Error, return code 1 from > org.apache.hadoop.hive.ql.exec.MoveTask > INFO : MapReduce Jobs Launched: > INFO : Stage-Stage-1: Map: 1 Cumulative CPU: 3.64 sec HDFS Read: 3656 > HDFS Write: 99 SUCCESS > INFO : Total MapReduce CPU Time Spent: 3 seconds 640 msec > INFO : Completed executing > command(queryId=hive_20161201113939_d64df5d7-a4c4-4885-846f-10f0223fcf4c); > Time taken: 23.227 seconds > Error: Error while processing statement: FAILED: Execution Error, return code > 1 from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1) > INFO : Loading data to table default.external_p3kit partition (country=null, > state=null) from > s3a://hive-on-s3/foo/bar/.hive-staging_hive_2016-12-01_11-39-48_741_6724911837889341086-13/-ext-10002 > {code} > ERROR : Failed with exception MetaException(message:Invalid partition key & > values; keys [country, state, ], values []) > org.apache.hadoop.hive.ql.metadata.HiveException: > MetaException(message:Invalid partition key & values; keys [country, state, > ], values []) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1902) > at org.apache.hadoop.hive.ql.metadata.Hive.getPartition(Hive.java:1834) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1428) > at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1388) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:453) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1976) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1689) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1421) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1205) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1200) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:237) > at > org.apache.hive.service.cli.operation.SQLOperation.access$300(SQLOperation.java:88) > at > org.apache.hive.service.cli.operation.SQLOperation$3$1.run(SQLOperation.java:293) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1796) > at > org.apache.hive.service.cli.operation.SQLOperation$3.run(SQLOperation.java:306) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: MetaException(message:Invalid partition key & values; keys > [country, state, ], values []) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65142) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result$get_partition_with_auth_resultStandardScheme.read(ThriftHiveMetastore.java:65119) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_with_auth_result.read(ThriftHiveMetastore.java:65050) >