[jira] [Updated] (HIVE-24771) Fix hang of TransactionalKafkaWriterTest
[ https://issues.apache.org/jira/browse/HIVE-24771?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Zoltan Haindrich updated HIVE-24771: Attachment: jstack.1 jstack.2 jstack.3 hive.log.gz > Fix hang of TransactionalKafkaWriterTest > - > > Key: HIVE-24771 > URL: https://issues.apache.org/jira/browse/HIVE-24771 > Project: Hive > Issue Type: Bug >Reporter: Zoltan Haindrich >Priority: Major > Attachments: hive.log.gz, jstack.1, jstack.2, jstack.3 > > > this test seems to hang randomly - I've launched 3 checks against it - all of > which started to hang after some time > http://ci.hive.apache.org/job/hive-flaky-check/187/ > http://ci.hive.apache.org/job/hive-flaky-check/188/ > http://ci.hive.apache.org/job/hive-flaky-check/189/ > {code} > "main" #1 prio=5 os_prio=0 tid=0x7f1d5400a800 nid=0x31e waiting on > condition [0x7f1d59381000] >java.lang.Thread.State: WAITING (parking) > at sun.misc.Unsafe.park(Native Method) > - parking to wait for <0x894b3ed8> (a > java.util.concurrent.CountDownLatch$Sync) > at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:837) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:999) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1308) > at java.util.concurrent.CountDownLatch.await(CountDownLatch.java:231) > at > org.apache.kafka.clients.producer.internals.TransactionalRequestResult.await(TransactionalRequestResult.java:56) > at > org.apache.hadoop.hive.kafka.HiveKafkaProducer.flushNewPartitions(HiveKafkaProducer.java:187) > at > org.apache.hadoop.hive.kafka.HiveKafkaProducer.flush(HiveKafkaProducer.java:123) > at > org.apache.hadoop.hive.kafka.TransactionalKafkaWriter.close(TransactionalKafkaWriter.java:189) > at > org.apache.hadoop.hive.kafka.TransactionalKafkaWriterTest.writeAndCommit(TransactionalKafkaWriterTest.java:182) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) > at > org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) > at > org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:54) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) > at > org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at org.junit.runners.ParentRunner.run(ParentRunner.java:413) > at > org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238) > at >
[jira] [Commented] (HIVE-24771) Fix hang of TransactionalKafkaWriterTest
[ https://issues.apache.org/jira/browse/HIVE-24771?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17282899#comment-17282899 ] Zoltan Haindrich commented on HIVE-24771: - attached jstack-s from the 3 containers; and a hive.log there is an interesting exception in the logs: {code} 2021-02-10T03:26:01,179 DEBUG [data-plane-kafka-network-thread-0-ListenerName(PLAINTEXT)-PLAINTEXT-0] network.Processor: Processor 0 listening to new connection from /127.0.0.1:38676 2021-02-10T03:26:01,178 WARN [main] utils.AppInfoParser: Error registering AppInfo mbean javax.management.InstanceAlreadyExistsException: kafka.producer:type=app-info,id=attempt_m_0001_0 at com.sun.jmx.mbeanserver.Repository.addMBean(Repository.java:437) ~[?:1.8.0_262] at com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerWithRepository(DefaultMBeanServerInterceptor.java:1898) ~[?:1.8.0_262] at com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerDynamicMBean(DefaultMBeanServerInterceptor.java:966) ~[?:1.8.0_262] at com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerObject(DefaultMBeanServerInterceptor.java:900) ~[?:1.8.0_262] at com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerMBean(DefaultMBeanServerInterceptor.java:324) ~[?:1.8.0_262] at com.sun.jmx.mbeanserver.JmxMBeanServer.registerMBean(JmxMBeanServer.java:522) ~[?:1.8.0_262] at org.apache.kafka.common.utils.AppInfoParser.registerAppInfo(AppInfoParser.java:64) [kafka-clients-2.5.0.jar:?] at org.apache.kafka.clients.producer.KafkaProducer.(KafkaProducer.java:428) [kafka-clients-2.5.0.jar:?] at org.apache.kafka.clients.producer.KafkaProducer.(KafkaProducer.java:298) [kafka-clients-2.5.0.jar:?] at org.apache.hadoop.hive.kafka.HiveKafkaProducer.(HiveKafkaProducer.java:67) [classes/:?] at org.apache.hadoop.hive.kafka.TransactionalKafkaWriter.(TransactionalKafkaWriter.java:103) [classes/:?] {code} > Fix hang of TransactionalKafkaWriterTest > - > > Key: HIVE-24771 > URL: https://issues.apache.org/jira/browse/HIVE-24771 > Project: Hive > Issue Type: Bug >Reporter: Zoltan Haindrich >Priority: Major > > this test seems to hang randomly - I've launched 3 checks against it - all of > which started to hang after some time > http://ci.hive.apache.org/job/hive-flaky-check/187/ > http://ci.hive.apache.org/job/hive-flaky-check/188/ > http://ci.hive.apache.org/job/hive-flaky-check/189/ > {code} > "main" #1 prio=5 os_prio=0 tid=0x7f1d5400a800 nid=0x31e waiting on > condition [0x7f1d59381000] >java.lang.Thread.State: WAITING (parking) > at sun.misc.Unsafe.park(Native Method) > - parking to wait for <0x894b3ed8> (a > java.util.concurrent.CountDownLatch$Sync) > at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:837) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:999) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1308) > at java.util.concurrent.CountDownLatch.await(CountDownLatch.java:231) > at > org.apache.kafka.clients.producer.internals.TransactionalRequestResult.await(TransactionalRequestResult.java:56) > at > org.apache.hadoop.hive.kafka.HiveKafkaProducer.flushNewPartitions(HiveKafkaProducer.java:187) > at > org.apache.hadoop.hive.kafka.HiveKafkaProducer.flush(HiveKafkaProducer.java:123) > at > org.apache.hadoop.hive.kafka.TransactionalKafkaWriter.close(TransactionalKafkaWriter.java:189) > at > org.apache.hadoop.hive.kafka.TransactionalKafkaWriterTest.writeAndCommit(TransactionalKafkaWriterTest.java:182) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) > at >
[jira] [Assigned] (HIVE-24770) Upgrade should update changed FQN in HMS DB.
[ https://issues.apache.org/jira/browse/HIVE-24770?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Naveen Gangam reassigned HIVE-24770: > Upgrade should update changed FQN in HMS DB. > > > Key: HIVE-24770 > URL: https://issues.apache.org/jira/browse/HIVE-24770 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 4.0.0 >Reporter: Naveen Gangam >Assignee: Naveen Gangam >Priority: Major > > While the parent change has does not cause this directly, but post upgrade > the existing tables that use MultiDelimiterSerDe will be broken as the > hive-contrib jar would no longer exist. Instead if the Hive schema upgrade > script can update the SERDES table to alter the classname to the new > classname, the old tables would work automatically. Much better user > experience. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Resolved] (HIVE-24625) CTAS with TBLPROPERTIES ('transactional'='false') loads data into incorrect directory
[ https://issues.apache.org/jira/browse/HIVE-24625?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Naveen Gangam resolved HIVE-24625. -- Fix Version/s: 4.0.0 Resolution: Fixed Fix has been merged into master. Thank you for the contribution [~amagyar]. Closing the jira. > CTAS with TBLPROPERTIES ('transactional'='false') loads data into incorrect > directory > - > > Key: HIVE-24625 > URL: https://issues.apache.org/jira/browse/HIVE-24625 > Project: Hive > Issue Type: Bug > Components: HiveServer2, Metastore >Reporter: Attila Magyar >Assignee: Attila Magyar >Priority: Major > Labels: pull-request-available > Fix For: 4.0.0 > > Time Spent: 1h 10m > Remaining Estimate: 0h > > MetastoreDefaultTransformer in HMS converts a managed non transactional table > to external table. MoveTask still uses the managed path when loading the > data, resulting an always empty table. > {code:java} > create table tbl1 TBLPROPERTIES ('transactional'='false') as select * from > other;{code} > After the conversion the table location points to an external directory: > Location: | > hdfs://c670-node2.coelab.cloudera.com:8020/warehouse/tablespace/external/hive/tbl1 > Move task uses the managed location" > {code:java} > INFO : Moving data to directory > hdfs://...:8020/warehouse/tablespace/managed/hive/tbl1 from > hdfs://...:8020/warehouse/tablespace/managed/hive/.hive-staging_hive_2021-01-05_16-10-39_973_41005081081760609-4/-ext-1000 > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24625) CTAS with TBLPROPERTIES ('transactional'='false') loads data into incorrect directory
[ https://issues.apache.org/jira/browse/HIVE-24625?focusedWorklogId=551139=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-551139 ] ASF GitHub Bot logged work on HIVE-24625: - Author: ASF GitHub Bot Created on: 11/Feb/21 03:44 Start Date: 11/Feb/21 03:44 Worklog Time Spent: 10m Work Description: nrg4878 commented on pull request #1856: URL: https://github.com/apache/hive/pull/1856#issuecomment-777185671 Fix has been merged to master. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 551139) Time Spent: 1h 10m (was: 1h) > CTAS with TBLPROPERTIES ('transactional'='false') loads data into incorrect > directory > - > > Key: HIVE-24625 > URL: https://issues.apache.org/jira/browse/HIVE-24625 > Project: Hive > Issue Type: Bug > Components: HiveServer2, Metastore >Reporter: Attila Magyar >Assignee: Attila Magyar >Priority: Major > Labels: pull-request-available > Time Spent: 1h 10m > Remaining Estimate: 0h > > MetastoreDefaultTransformer in HMS converts a managed non transactional table > to external table. MoveTask still uses the managed path when loading the > data, resulting an always empty table. > {code:java} > create table tbl1 TBLPROPERTIES ('transactional'='false') as select * from > other;{code} > After the conversion the table location points to an external directory: > Location: | > hdfs://c670-node2.coelab.cloudera.com:8020/warehouse/tablespace/external/hive/tbl1 > Move task uses the managed location" > {code:java} > INFO : Moving data to directory > hdfs://...:8020/warehouse/tablespace/managed/hive/tbl1 from > hdfs://...:8020/warehouse/tablespace/managed/hive/.hive-staging_hive_2021-01-05_16-10-39_973_41005081081760609-4/-ext-1000 > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Commented] (HIVE-22717) Exception in thread "main" java.lang.NoSuchMethodError: com.google.common.base.Preconditions.checkArgument(ZLjava/lang/String;Ljava/lang/Object;)V
[ https://issues.apache.org/jira/browse/HIVE-22717?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17282849#comment-17282849 ] Dhirendra Pandit commented on HIVE-22717: - Is there any workaround available for this solution. I am using Hadoop 3.2.1, Hive 3.1.2 and Tez 0.10.1 version and facing the same problem during tez job submission. You can find the logs below. 2021-02-11 04:01:33,573 [INFO] [main] |common.TezClassLoader|: Created TezClassLoader with parent classloader: sun.misc.Launcher$AppClassLoader@4b67cf4d, thread: 1, system classloader: sun.misc.Launcher$AppClassLoader@4b67cf4d 2021-02-11 04:01:34,097 [INFO] [main] |app.DAGAppMaster|: Creating DAGAppMaster for applicationId=application_1613010819541_0003, attemptNum=2, AMContainerId=container_1613010819541_0003_02_01, jvmPid=12590, userFromEnv=init, cliSessionOption=true, pwd=/tmp/hadoop-init/nm-local-dir/usercache/init/appcache/application_1613010819541_0003/container_1613010819541_0003_02_01, localDirs=/tmp/hadoop-init/nm-local-dir/usercache/init/appcache/application_1613010819541_0003, logDirs=/home/init/bigdata/hadoop-3.2.1/logs/userlogs/application_1613010819541_0003/container_1613010819541_0003_02_01 2021-02-11 04:01:34,164 [ERROR] [main] |app.DAGAppMaster|: Error starting DAGAppMaster java.lang.NoSuchMethodError: com.google.common.base.Preconditions.checkArgument(ZLjava/lang/String;Ljava/lang/Object;)V at org.apache.hadoop.conf.Configuration.set(Configuration.java:1357) at org.apache.hadoop.conf.Configuration.set(Configuration.java:1338) at org.apache.tez.common.TezUtilsInternal.addUserSpecifiedTezConfiguration(TezUtilsInternal.java:84) at org.apache.tez.dag.app.DAGAppMaster.main(DAGAppMaster.java:2377) > Exception in thread "main" java.lang.NoSuchMethodError: > com.google.common.base.Preconditions.checkArgument(ZLjava/lang/String;Ljava/lang/Object;)V > -- > > Key: HIVE-22717 > URL: https://issues.apache.org/jira/browse/HIVE-22717 > Project: Hive > Issue Type: Bug >Reporter: ZHangTianLong >Priority: Major > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24516) Txnhandler onrename might ignore exceptions
[ https://issues.apache.org/jira/browse/HIVE-24516?focusedWorklogId=551077=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-551077 ] ASF GitHub Bot logged work on HIVE-24516: - Author: ASF GitHub Bot Created on: 11/Feb/21 00:46 Start Date: 11/Feb/21 00:46 Worklog Time Spent: 10m Work Description: github-actions[bot] commented on pull request #1762: URL: https://github.com/apache/hive/pull/1762#issuecomment-777138529 This pull request has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Feel free to reach out on the d...@hive.apache.org list if the patch is in need of reviews. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 551077) Time Spent: 20m (was: 10m) > Txnhandler onrename might ignore exceptions > --- > > Key: HIVE-24516 > URL: https://issues.apache.org/jira/browse/HIVE-24516 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Peter Varga >Assignee: Peter Varga >Priority: Major > Labels: pull-request-available > Time Spent: 20m > Remaining Estimate: 0h > > This is a followup on HIVE-24193. Table not exists errors shouldn't be > ignored in the first place. > {code} > } catch (SQLException e) { > LOG.debug("Going to rollback: " + callSig); > rollbackDBConn(dbConn); > checkRetryable(dbConn, e, callSig); > if (e.getMessage().contains("does not exist")) { > LOG.warn("Cannot perform " + callSig + " since metastore table does > not exist"); > } else { > throw new MetaException("Unable to " + callSig + ":" + > StringUtils.stringifyException(e)); > } > } > {code} > This error handling might have been put there for backard compatibility for > missing acid metadata tables, but this is not needed anymore. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24397) Add the projection specification to the table request object and add placeholders in ObjectStore.java
[ https://issues.apache.org/jira/browse/HIVE-24397?focusedWorklogId=551076=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-551076 ] ASF GitHub Bot logged work on HIVE-24397: - Author: ASF GitHub Bot Created on: 11/Feb/21 00:45 Start Date: 11/Feb/21 00:45 Worklog Time Spent: 10m Work Description: github-actions[bot] closed pull request #1681: URL: https://github.com/apache/hive/pull/1681 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 551076) Time Spent: 2.5h (was: 2h 20m) > Add the projection specification to the table request object and add > placeholders in ObjectStore.java > - > > Key: HIVE-24397 > URL: https://issues.apache.org/jira/browse/HIVE-24397 > Project: Hive > Issue Type: Sub-task > Components: Hive >Reporter: Narayanan Venkateswaran >Assignee: Narayanan Venkateswaran >Priority: Minor > Labels: pull-request-available > Fix For: 4.0.0 > > Time Spent: 2.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-23458) Introduce unified thread pool for scheduled jobs
[ https://issues.apache.org/jira/browse/HIVE-23458?focusedWorklogId=551017=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-551017 ] ASF GitHub Bot logged work on HIVE-23458: - Author: ASF GitHub Bot Created on: 10/Feb/21 22:13 Start Date: 10/Feb/21 22:13 Worklog Time Spent: 10m Work Description: EugeneChung commented on pull request #1919: URL: https://github.com/apache/hive/pull/1919#issuecomment-777071053 It's just failure of downloading mariadb docker image. ``` [2021-02-03T17:47:35.784Z] Unable to find image 'mariadb:latest' locally [2021-02-03T17:47:36.736Z] latest: Pulling from library/mariadb [2021-02-03T17:47:36.737Z] docker: unauthorized: authentication required. [2021-02-03T17:47:36.737Z] See 'docker run --help'. script returned exit code 125 ``` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 551017) Time Spent: 1h 10m (was: 1h) > Introduce unified thread pool for scheduled jobs > > > Key: HIVE-23458 > URL: https://issues.apache.org/jira/browse/HIVE-23458 > Project: Hive > Issue Type: Improvement > Components: HiveServer2 >Reporter: Eugene Chung >Assignee: Eugene Chung >Priority: Major > Labels: pull-request-available, todoc4.0 > Fix For: 4.0.0 > > Attachments: HIVE-23458.01.patch, HIVE-23458.02.patch, > HIVE-23458.03.patch > > Time Spent: 1h 10m > Remaining Estimate: 0h > > As I mentioned in [the comment of > HIVE-23164|https://issues.apache.org/jira/browse/HIVE-23164?focusedCommentId=17089506=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-17089506], > I've made the unified scheduled executor service like > org.apache.hadoop.hive.metastore.ThreadPool. > I think it could help > 1. to minimize the possibility of making non-daemon threads when developers > need ScheduledExecutorService > 2. to achieve the utilization of server resources because the current > situation is all of the modules make its own ScheduledExecutorService and all > of the threads are just using for one job. > 3. administrators of Hive servers by providing > hive.exec.scheduler.num.threads configuration so that they can predict and > set how many threads are used and needed. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24769) HiveMetaStore getTables() doesn't have Owner information to filter on it
[ https://issues.apache.org/jira/browse/HIVE-24769?focusedWorklogId=551004=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-551004 ] ASF GitHub Bot logged work on HIVE-24769: - Author: ASF GitHub Bot Created on: 10/Feb/21 21:37 Start Date: 10/Feb/21 21:37 Worklog Time Spent: 10m Work Description: saihemanth-cloudera opened a new pull request #1970: URL: https://github.com/apache/hive/pull/1970 …veMetaStoreClient#getTables() ### What changes were proposed in this pull request? Table object has table owner information so that authorization can be done on these objects. ### Why are the changes needed? Otherwise, the "show tables" command in beeline shows all the tables in db instead of the user-owned tables. By having this info, we can filter out tables that are not owned by the user. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Local Machine, Remote cluster. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 551004) Remaining Estimate: 0h Time Spent: 10m > HiveMetaStore getTables() doesn't have Owner information to filter on it > > > Key: HIVE-24769 > URL: https://issues.apache.org/jira/browse/HIVE-24769 > Project: Hive > Issue Type: Improvement >Reporter: Sai Hemanth Gantasala >Assignee: Sai Hemanth Gantasala >Priority: Major > Time Spent: 10m > Remaining Estimate: 0h > > HiveMetaStoreClient#getTables() api should have table owner information so > that they can be used while authorizing in Apache Ranger/Sentry. > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-24769) HiveMetaStore getTables() doesn't have Owner information to filter on it
[ https://issues.apache.org/jira/browse/HIVE-24769?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] ASF GitHub Bot updated HIVE-24769: -- Labels: pull-request-available (was: ) > HiveMetaStore getTables() doesn't have Owner information to filter on it > > > Key: HIVE-24769 > URL: https://issues.apache.org/jira/browse/HIVE-24769 > Project: Hive > Issue Type: Improvement >Reporter: Sai Hemanth Gantasala >Assignee: Sai Hemanth Gantasala >Priority: Major > Labels: pull-request-available > Time Spent: 10m > Remaining Estimate: 0h > > HiveMetaStoreClient#getTables() api should have table owner information so > that they can be used while authorizing in Apache Ranger/Sentry. > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Commented] (HIVE-24734) Sanity check in HiveSplitGenerator available slot calculation
[ https://issues.apache.org/jira/browse/HIVE-24734?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17282660#comment-17282660 ] Zoltan Matyus commented on HIVE-24734: -- [~abstractdog]: could you comment on this? This is probably an edge case, but I see no harm in making the change... > Sanity check in HiveSplitGenerator available slot calculation > - > > Key: HIVE-24734 > URL: https://issues.apache.org/jira/browse/HIVE-24734 > Project: Hive > Issue Type: Bug > Components: Tez >Affects Versions: 4.0.0 >Reporter: Zoltan Matyus >Priority: Minor > > HiveSplitGenerator calculates the number of available slots from available > memory like this: > {code:java} > if (getContext() != null) { > totalResource = getContext().getTotalAvailableResource().getMemory(); > taskResource = getContext().getVertexTaskResource().getMemory(); > availableSlots = totalResource / taskResource; > } > {code} > I had a scenario where the total memory was calculated correctly, but the > task memory returned -1. This led to error like these: > {noformat} > tez.HiveSplitGenerator: Number of input splits: 1. -3641 available slots, 1.7 > waves. Input format is: org.apache.hadoop.hive.ql.io.HiveInputFormat > Estimated number of tasks: -6189 for bucket 1 > java.lang.IllegalArgumentException: Illegal Capacity: -6189 > {noformat} > Admittedly, this happened during development, and hopefully will not occur on > a properly configured cluster. (Although I'm not sure what the issue was on > my setup, possibly XMX set higher than physical memory.) > In any case, it feels like setting availableSlots < 1 will never lead to > desired behavior, so in such cases we could emit a warning and correct the > value to 1. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24743) [HS2] Send tableId to get_partitions_by_names_req HMS API from HS2
[ https://issues.apache.org/jira/browse/HIVE-24743?focusedWorklogId=550857=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550857 ] ASF GitHub Bot logged work on HIVE-24743: - Author: ASF GitHub Bot Created on: 10/Feb/21 19:12 Start Date: 10/Feb/21 19:12 Worklog Time Spent: 10m Work Description: yongzhi commented on a change in pull request #1956: URL: https://github.com/apache/hive/pull/1956#discussion_r574002879 ## File path: standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java ## @@ -2223,6 +2224,29 @@ public PartitionsResponse getPartitionsRequest(PartitionsRequest req) return deepCopyPartitions(FilterUtils.filterPartitionsIfEnabled(isClientFilterEnabled, filterHook, parts)); } + @Override + public GetPartitionsByNamesResult getPartitionsByNames(GetPartitionsByNamesRequest req) + throws NoSuchObjectException, MetaException, TException { +checkDbAndTableFilters(req.getCatName(), req.getDb_name(), req.getTbl_name()); +req.setDb_name(prependCatalogToDbName(req.getCatName(), req.getDb_name(), conf)); +if (req.getValidWriteIdList() == null) { + req.setValidWriteIdList(getValidWriteIdList(prependCatalogToDbName(req.getCatName(), req.getDb_name(), + conf), req.getTbl_name())); +} +if (req.getId() <= 0) { + req.setId(getTable(prependCatalogToDbName(req.getCatName(), req.getDb_name(), conf), req.getTbl_name()).getId()); Review comment: getTable put here will add one more calls to HMS if cache is not used. So it should be at cachedHMS client. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550857) Time Spent: 0.5h (was: 20m) > [HS2] Send tableId to get_partitions_by_names_req HMS API from HS2 > -- > > Key: HIVE-24743 > URL: https://issues.apache.org/jira/browse/HIVE-24743 > Project: Hive > Issue Type: Sub-task >Reporter: Kishen Das >Assignee: Kishen Das >Priority: Major > Labels: pull-request-available > Time Spent: 0.5h > Remaining Estimate: 0h > > As part of ( HIVE-23821: Send tableId in request for all the new HMS > get_partition APIs ) we added logic to send tableId in the request for > several get_partition APIs, but looks like it was missed out for > getPartitionsByNames. TableId and validWriteIdList are used to maintain > consistency, when HMS API response is being served from a remote cache. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24758) Log Tez Task DAG ID, DAG Session ID, HS2 Hostname
[ https://issues.apache.org/jira/browse/HIVE-24758?focusedWorklogId=550836=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550836 ] ASF GitHub Bot logged work on HIVE-24758: - Author: ASF GitHub Bot Created on: 10/Feb/21 18:41 Start Date: 10/Feb/21 18:41 Worklog Time Spent: 10m Work Description: belugabehr commented on a change in pull request #1963: URL: https://github.com/apache/hive/pull/1963#discussion_r573980532 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ## @@ -137,6 +139,8 @@ public int execute() { Context ctx = null; Ref sessionRef = Ref.from(null); +final String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID); Review comment: In working with this code, I think the Hive query ID is passed around in a few different ways. There can be follow up work to unify/simplify this, but I would not want to change it too much in this unrelated ticket. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550836) Time Spent: 1h 20m (was: 1h 10m) > Log Tez Task DAG ID, DAG Session ID, HS2 Hostname > - > > Key: HIVE-24758 > URL: https://issues.apache.org/jira/browse/HIVE-24758 > Project: Hive > Issue Type: Improvement >Reporter: David Mollitor >Assignee: David Mollitor >Priority: Major > Labels: pull-request-available > Time Spent: 1h 20m > Remaining Estimate: 0h > > In order to get the logs for a particular query, submitted to Tez on YARN, > the following pieces of information are required: > * YARN Application ID > * TEZ DAG ID > * HS2 Host that ran the job > Include this information in TezTask output. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24758) Log Tez Task DAG ID, DAG Session ID, HS2 Hostname
[ https://issues.apache.org/jira/browse/HIVE-24758?focusedWorklogId=550835=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550835 ] ASF GitHub Bot logged work on HIVE-24758: - Author: ASF GitHub Bot Created on: 10/Feb/21 18:40 Start Date: 10/Feb/21 18:40 Worklog Time Spent: 10m Work Description: belugabehr commented on a change in pull request #1963: URL: https://github.com/apache/hive/pull/1963#discussion_r573979578 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ## @@ -236,6 +239,10 @@ public int execute() { throw new HiveException("Operation cancelled"); } +// Log all the info required to find the various logs for this query +LOG.info("HS2 Host: [{}], Query ID: [{}], Dag ID: [{}], DAG Session ID: [{}]", getHostNameIP(), queryId, Review comment: Hey @pgaref, thanks for the valuable input. So, on one hand it may seem confusing as to the need to log the host name because if you are looking at this log file then of course the hostname is already known. However, as I understand the code, these log messages are sent (redirected) to the client via Thrift RPC APIs. This is helpful information for debugging for the client as it's not otherwise clear which instance of HS2 is processing the query, if for example, a load balancer is between the client and HS2. I had thought about including some sort of HS2 unique ID as well, but I didn't find such a capability in the project already and did not want to introduce here. However, these 4 pieces of information allow a client to report a problem to the admin and allow the admin to grab all the log files: HS2 and YARN TEZ DAG logs. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550835) Time Spent: 1h 10m (was: 1h) > Log Tez Task DAG ID, DAG Session ID, HS2 Hostname > - > > Key: HIVE-24758 > URL: https://issues.apache.org/jira/browse/HIVE-24758 > Project: Hive > Issue Type: Improvement >Reporter: David Mollitor >Assignee: David Mollitor >Priority: Major > Labels: pull-request-available > Time Spent: 1h 10m > Remaining Estimate: 0h > > In order to get the logs for a particular query, submitted to Tez on YARN, > the following pieces of information are required: > * YARN Application ID > * TEZ DAG ID > * HS2 Host that ran the job > Include this information in TezTask output. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Assigned] (HIVE-24769) HiveMetaStore getTables() doesn't have Owner information to filter on it
[ https://issues.apache.org/jira/browse/HIVE-24769?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sai Hemanth Gantasala reassigned HIVE-24769: > HiveMetaStore getTables() doesn't have Owner information to filter on it > > > Key: HIVE-24769 > URL: https://issues.apache.org/jira/browse/HIVE-24769 > Project: Hive > Issue Type: Improvement >Reporter: Sai Hemanth Gantasala >Assignee: Sai Hemanth Gantasala >Priority: Major > > HiveMetaStoreClient#getTables() api should have table owner information so > that they can be used while authorizing in Apache Ranger/Sentry. > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24768) Use jackson-bom everywhere for version replacement
[ https://issues.apache.org/jira/browse/HIVE-24768?focusedWorklogId=550830=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550830 ] ASF GitHub Bot logged work on HIVE-24768: - Author: ASF GitHub Bot Created on: 10/Feb/21 18:30 Start Date: 10/Feb/21 18:30 Worklog Time Spent: 10m Work Description: saihemanth-cloudera opened a new pull request #1969: URL: https://github.com/apache/hive/pull/1969 …intain versions ### What changes were proposed in this pull request? Changes in pom files to maintain Jackson dependency version. ### Why are the changes needed? It is an optimization so that it'll be easy to replace the dependency version. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Local Machine. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550830) Remaining Estimate: 0h Time Spent: 10m > Use jackson-bom everywhere for version replacement > -- > > Key: HIVE-24768 > URL: https://issues.apache.org/jira/browse/HIVE-24768 > Project: Hive > Issue Type: Improvement >Reporter: Sai Hemanth Gantasala >Assignee: Sai Hemanth Gantasala >Priority: Major > Time Spent: 10m > Remaining Estimate: 0h > > It's more of an optimization but makes it easier to replace the versions > where ever necessary for Jackson dependency. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-24768) Use jackson-bom everywhere for version replacement
[ https://issues.apache.org/jira/browse/HIVE-24768?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] ASF GitHub Bot updated HIVE-24768: -- Labels: pull-request-available (was: ) > Use jackson-bom everywhere for version replacement > -- > > Key: HIVE-24768 > URL: https://issues.apache.org/jira/browse/HIVE-24768 > Project: Hive > Issue Type: Improvement >Reporter: Sai Hemanth Gantasala >Assignee: Sai Hemanth Gantasala >Priority: Major > Labels: pull-request-available > Time Spent: 10m > Remaining Estimate: 0h > > It's more of an optimization but makes it easier to replace the versions > where ever necessary for Jackson dependency. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24743) [HS2] Send tableId to get_partitions_by_names_req HMS API from HS2
[ https://issues.apache.org/jira/browse/HIVE-24743?focusedWorklogId=550829=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550829 ] ASF GitHub Bot logged work on HIVE-24743: - Author: ASF GitHub Bot Created on: 10/Feb/21 18:27 Start Date: 10/Feb/21 18:27 Worklog Time Spent: 10m Work Description: kishendas commented on a change in pull request #1956: URL: https://github.com/apache/hive/pull/1956#discussion_r573970499 ## File path: standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java ## @@ -2223,6 +2224,29 @@ public PartitionsResponse getPartitionsRequest(PartitionsRequest req) return deepCopyPartitions(FilterUtils.filterPartitionsIfEnabled(isClientFilterEnabled, filterHook, parts)); } + @Override + public GetPartitionsByNamesResult getPartitionsByNames(GetPartitionsByNamesRequest req) + throws NoSuchObjectException, MetaException, TException { +checkDbAndTableFilters(req.getCatName(), req.getDb_name(), req.getTbl_name()); +req.setDb_name(prependCatalogToDbName(req.getCatName(), req.getDb_name(), conf)); +if (req.getValidWriteIdList() == null) { + req.setValidWriteIdList(getValidWriteIdList(prependCatalogToDbName(req.getCatName(), req.getDb_name(), + conf), req.getTbl_name())); +} +if (req.getId() <= 0) { + req.setId(getTable(prependCatalogToDbName(req.getCatName(), req.getDb_name(), conf), req.getTbl_name()).getId()); Review comment: That's right. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550829) Time Spent: 20m (was: 10m) > [HS2] Send tableId to get_partitions_by_names_req HMS API from HS2 > -- > > Key: HIVE-24743 > URL: https://issues.apache.org/jira/browse/HIVE-24743 > Project: Hive > Issue Type: Sub-task >Reporter: Kishen Das >Assignee: Kishen Das >Priority: Major > Labels: pull-request-available > Time Spent: 20m > Remaining Estimate: 0h > > As part of ( HIVE-23821: Send tableId in request for all the new HMS > get_partition APIs ) we added logic to send tableId in the request for > several get_partition APIs, but looks like it was missed out for > getPartitionsByNames. TableId and validWriteIdList are used to maintain > consistency, when HMS API response is being served from a remote cache. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24758) Log Tez Task DAG ID, DAG Session ID, HS2 Hostname
[ https://issues.apache.org/jira/browse/HIVE-24758?focusedWorklogId=550816=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550816 ] ASF GitHub Bot logged work on HIVE-24758: - Author: ASF GitHub Bot Created on: 10/Feb/21 17:43 Start Date: 10/Feb/21 17:43 Worklog Time Spent: 10m Work Description: pgaref commented on a change in pull request #1963: URL: https://github.com/apache/hive/pull/1963#discussion_r573937582 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ## @@ -236,6 +239,10 @@ public int execute() { throw new HiveException("Operation cancelled"); } +// Log all the info required to find the various logs for this query +LOG.info("HS2 Host: [{}], Query ID: [{}], Dag ID: [{}], DAG Session ID: [{}]", getHostNameIP(), queryId, Review comment: I would change that to something like "Executing query with Id {} from dag {} by session {} " Not sure I would add the hostName IP here as this can be quite confusing, especially when running mutiple containers on the same node (like kubernetes). Maybe we already have better info for that on a Task level ? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550816) Time Spent: 1h (was: 50m) > Log Tez Task DAG ID, DAG Session ID, HS2 Hostname > - > > Key: HIVE-24758 > URL: https://issues.apache.org/jira/browse/HIVE-24758 > Project: Hive > Issue Type: Improvement >Reporter: David Mollitor >Assignee: David Mollitor >Priority: Major > Labels: pull-request-available > Time Spent: 1h > Remaining Estimate: 0h > > In order to get the logs for a particular query, submitted to Tez on YARN, > the following pieces of information are required: > * YARN Application ID > * TEZ DAG ID > * HS2 Host that ran the job > Include this information in TezTask output. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24758) Log Tez Task DAG ID, DAG Session ID, HS2 Hostname
[ https://issues.apache.org/jira/browse/HIVE-24758?focusedWorklogId=550815=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550815 ] ASF GitHub Bot logged work on HIVE-24758: - Author: ASF GitHub Bot Created on: 10/Feb/21 17:41 Start Date: 10/Feb/21 17:41 Worklog Time Spent: 10m Work Description: pgaref commented on a change in pull request #1963: URL: https://github.com/apache/hive/pull/1963#discussion_r573796890 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ## @@ -137,6 +139,8 @@ public int execute() { Context ctx = null; Ref sessionRef = Ref.from(null); +final String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID); Review comment: I would probably move this to Task initialize method and reuse across method -- e.g., build, execute etc ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ## @@ -236,6 +239,10 @@ public int execute() { throw new HiveException("Operation cancelled"); } +// Log all the info required to find the various logs for this query +LOG.info("HS2 Host: [{}], Query ID: [{}], Dag ID: [{}], DAG Session ID: [{}]", getHostNameIP(), queryId, Review comment: I would change that to something like "Executing query with Id {} from dag {} by session {} " Not sure I would add the hostName IP here as this can be quite confusing, especially when running mutiple containers on the same node (like kubernetes). Not sure if we already have better info for that on a Task level. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550815) Time Spent: 50m (was: 40m) > Log Tez Task DAG ID, DAG Session ID, HS2 Hostname > - > > Key: HIVE-24758 > URL: https://issues.apache.org/jira/browse/HIVE-24758 > Project: Hive > Issue Type: Improvement >Reporter: David Mollitor >Assignee: David Mollitor >Priority: Major > Labels: pull-request-available > Time Spent: 50m > Remaining Estimate: 0h > > In order to get the logs for a particular query, submitted to Tez on YARN, > the following pieces of information are required: > * YARN Application ID > * TEZ DAG ID > * HS2 Host that ran the job > Include this information in TezTask output. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Assigned] (HIVE-24768) Use jackson-bom everywhere for version replacement
[ https://issues.apache.org/jira/browse/HIVE-24768?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sai Hemanth Gantasala reassigned HIVE-24768: > Use jackson-bom everywhere for version replacement > -- > > Key: HIVE-24768 > URL: https://issues.apache.org/jira/browse/HIVE-24768 > Project: Hive > Issue Type: Improvement >Reporter: Sai Hemanth Gantasala >Assignee: Sai Hemanth Gantasala >Priority: Major > > It's more of an optimization but makes it easier to replace the versions > where ever necessary for Jackson dependency. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24682) Collect dynamic partition info in FileSink for direct insert and reuse it in Movetask
[ https://issues.apache.org/jira/browse/HIVE-24682?focusedWorklogId=550802=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550802 ] ASF GitHub Bot logged work on HIVE-24682: - Author: ASF GitHub Bot Created on: 10/Feb/21 17:09 Start Date: 10/Feb/21 17:09 Worklog Time Spent: 10m Work Description: pvargacl commented on a change in pull request #1915: URL: https://github.com/apache/hive/pull/1915#discussion_r573913730 ## File path: ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ## @@ -1470,16 +1470,15 @@ private static ValidTxnList getValidTxnList(Configuration conf) { return dirToSnapshots; } - private static boolean isChildOfDelta(Path childDir, Path rootPath) { + public static boolean isChildOfDelta(Path childDir, Path rootPath) { if (childDir.toUri().toString().length() <= rootPath.toUri().toString().length()) { return false; } // We do not want to look outside the original directory String fullName = childDir.toUri().toString().substring(rootPath.toUri().toString().length() + 1); String dirName = childDir.getName(); -return (fullName.startsWith(BASE_PREFIX) && !dirName.startsWith(BASE_PREFIX)) || -(fullName.startsWith(DELTA_PREFIX) && !dirName.startsWith(DELTA_PREFIX)) || -(fullName.startsWith(DELETE_DELTA_PREFIX) && !dirName.startsWith(DELETE_DELTA_PREFIX)); +return (!dirName.startsWith(BASE_PREFIX)) && !dirName.startsWith(DELTA_PREFIX) && !dirName.startsWith(DELETE_DELTA_PREFIX) Review comment: fixed This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550802) Time Spent: 2h (was: 1h 50m) > Collect dynamic partition info in FileSink for direct insert and reuse it in > Movetask > - > > Key: HIVE-24682 > URL: https://issues.apache.org/jira/browse/HIVE-24682 > Project: Hive > Issue Type: Sub-task >Reporter: Peter Varga >Assignee: Peter Varga >Priority: Major > Labels: pull-request-available > Time Spent: 2h > Remaining Estimate: 0h > > The dynamic partition infos can be collected from the manifest files, no need > to do a costly file listing later -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24682) Collect dynamic partition info in FileSink for direct insert and reuse it in Movetask
[ https://issues.apache.org/jira/browse/HIVE-24682?focusedWorklogId=550801=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550801 ] ASF GitHub Bot logged work on HIVE-24682: - Author: ASF GitHub Bot Created on: 10/Feb/21 17:06 Start Date: 10/Feb/21 17:06 Worklog Time Spent: 10m Work Description: pvargacl commented on a change in pull request #1915: URL: https://github.com/apache/hive/pull/1915#discussion_r573910911 ## File path: ql/src/test/results/clientpositive/llap/acid_multiinsert_dyn_part.q.out ## @@ -649,16 +649,10 @@ POSTHOOK: Output: default@multiinsert_test_mm@c= POSTHOOK: Output: default@multiinsert_test_mm@c=__HIVE_DEFAULT_PARTITION__ POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).a SIMPLE [(multiinsert_test_text)a.FieldSchema(name:a, type:int, comment:null), ] POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).b SIMPLE [(multiinsert_test_text)a.FieldSchema(name:b, type:int, comment:null), ] -POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).a SIMPLE [(multiinsert_test_text)a.FieldSchema(name:a, type:int, comment:null), ] -POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).b SIMPLE [(multiinsert_test_text)a.FieldSchema(name:b, type:int, comment:null), ] -POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).a SIMPLE [(multiinsert_test_text)a.FieldSchema(name:a, type:int, comment:null), ] -POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).b SIMPLE [(multiinsert_test_text)a.FieldSchema(name:b, type:int, comment:null), ] POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).a SIMPLE [(multiinsert_test_text)a.FieldSchema(name:a, type:int, comment:null), ] POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).b SIMPLE [(multiinsert_test_text)a.FieldSchema(name:b, type:int, comment:null), ] POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=__HIVE_DEFAULT_PARTITION__).a SIMPLE [(multiinsert_test_text)a.FieldSchema(name:a, type:int, comment:null), ] POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=__HIVE_DEFAULT_PARTITION__).b SIMPLE [(multiinsert_test_text)a.FieldSchema(name:b, type:int, comment:null), ] -POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=__HIVE_DEFAULT_PARTITION__).a SIMPLE [(multiinsert_test_text)a.FieldSchema(name:a, type:int, comment:null), ] Review comment: yes these were duplicates This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550801) Time Spent: 1h 50m (was: 1h 40m) > Collect dynamic partition info in FileSink for direct insert and reuse it in > Movetask > - > > Key: HIVE-24682 > URL: https://issues.apache.org/jira/browse/HIVE-24682 > Project: Hive > Issue Type: Sub-task >Reporter: Peter Varga >Assignee: Peter Varga >Priority: Major > Labels: pull-request-available > Time Spent: 1h 50m > Remaining Estimate: 0h > > The dynamic partition infos can be collected from the manifest files, no need > to do a costly file listing later -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24682) Collect dynamic partition info in FileSink for direct insert and reuse it in Movetask
[ https://issues.apache.org/jira/browse/HIVE-24682?focusedWorklogId=550798=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550798 ] ASF GitHub Bot logged work on HIVE-24682: - Author: ASF GitHub Bot Created on: 10/Feb/21 17:04 Start Date: 10/Feb/21 17:04 Worklog Time Spent: 10m Work Description: pvargacl commented on a change in pull request #1915: URL: https://github.com/apache/hive/pull/1915#discussion_r573909577 ## File path: ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ## @@ -1470,16 +1470,15 @@ private static ValidTxnList getValidTxnList(Configuration conf) { return dirToSnapshots; } - private static boolean isChildOfDelta(Path childDir, Path rootPath) { + public static boolean isChildOfDelta(Path childDir, Path rootPath) { if (childDir.toUri().toString().length() <= rootPath.toUri().toString().length()) { return false; } // We do not want to look outside the original directory String fullName = childDir.toUri().toString().substring(rootPath.toUri().toString().length() + 1); String dirName = childDir.getName(); -return (fullName.startsWith(BASE_PREFIX) && !dirName.startsWith(BASE_PREFIX)) || -(fullName.startsWith(DELTA_PREFIX) && !dirName.startsWith(DELTA_PREFIX)) || -(fullName.startsWith(DELETE_DELTA_PREFIX) && !dirName.startsWith(DELETE_DELTA_PREFIX)); +return (!dirName.startsWith(BASE_PREFIX)) && !dirName.startsWith(DELTA_PREFIX) && !dirName.startsWith(DELETE_DELTA_PREFIX) + && (fullName.contains(BASE_PREFIX) || fullName.contains(DELTA_PREFIX) || fullName.contains(DELETE_DELTA_PREFIX)); Review comment: Yes. Previously it was working only for rootpath like delta_1/subdir/, now it works for table/partition=1/delta_1/subdir The reordering was needed because fullName.contains(delta_prefix) && !dirname.startWith(delta_prefix) is true for delete_delta_01, and it would yield wrong result This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550798) Time Spent: 1h 40m (was: 1.5h) > Collect dynamic partition info in FileSink for direct insert and reuse it in > Movetask > - > > Key: HIVE-24682 > URL: https://issues.apache.org/jira/browse/HIVE-24682 > Project: Hive > Issue Type: Sub-task >Reporter: Peter Varga >Assignee: Peter Varga >Priority: Major > Labels: pull-request-available > Time Spent: 1h 40m > Remaining Estimate: 0h > > The dynamic partition infos can be collected from the manifest files, no need > to do a costly file listing later -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24682) Collect dynamic partition info in FileSink for direct insert and reuse it in Movetask
[ https://issues.apache.org/jira/browse/HIVE-24682?focusedWorklogId=550785=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550785 ] ASF GitHub Bot logged work on HIVE-24682: - Author: ASF GitHub Bot Created on: 10/Feb/21 16:32 Start Date: 10/Feb/21 16:32 Worklog Time Spent: 10m Work Description: pvary commented on a change in pull request #1915: URL: https://github.com/apache/hive/pull/1915#discussion_r573880385 ## File path: ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ## @@ -1470,16 +1470,15 @@ private static ValidTxnList getValidTxnList(Configuration conf) { return dirToSnapshots; } - private static boolean isChildOfDelta(Path childDir, Path rootPath) { + public static boolean isChildOfDelta(Path childDir, Path rootPath) { if (childDir.toUri().toString().length() <= rootPath.toUri().toString().length()) { return false; } // We do not want to look outside the original directory String fullName = childDir.toUri().toString().substring(rootPath.toUri().toString().length() + 1); String dirName = childDir.getName(); -return (fullName.startsWith(BASE_PREFIX) && !dirName.startsWith(BASE_PREFIX)) || -(fullName.startsWith(DELTA_PREFIX) && !dirName.startsWith(DELTA_PREFIX)) || -(fullName.startsWith(DELETE_DELTA_PREFIX) && !dirName.startsWith(DELETE_DELTA_PREFIX)); +return (!dirName.startsWith(BASE_PREFIX)) && !dirName.startsWith(DELTA_PREFIX) && !dirName.startsWith(DELETE_DELTA_PREFIX) + && (fullName.contains(BASE_PREFIX) || fullName.contains(DELTA_PREFIX) || fullName.contains(DELETE_DELTA_PREFIX)); Review comment: If I understand correctly this is changing the logic. Is it intentional? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550785) Time Spent: 1.5h (was: 1h 20m) > Collect dynamic partition info in FileSink for direct insert and reuse it in > Movetask > - > > Key: HIVE-24682 > URL: https://issues.apache.org/jira/browse/HIVE-24682 > Project: Hive > Issue Type: Sub-task >Reporter: Peter Varga >Assignee: Peter Varga >Priority: Major > Labels: pull-request-available > Time Spent: 1.5h > Remaining Estimate: 0h > > The dynamic partition infos can be collected from the manifest files, no need > to do a costly file listing later -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24682) Collect dynamic partition info in FileSink for direct insert and reuse it in Movetask
[ https://issues.apache.org/jira/browse/HIVE-24682?focusedWorklogId=550777=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550777 ] ASF GitHub Bot logged work on HIVE-24682: - Author: ASF GitHub Bot Created on: 10/Feb/21 16:19 Start Date: 10/Feb/21 16:19 Worklog Time Spent: 10m Work Description: pvary commented on a change in pull request #1915: URL: https://github.com/apache/hive/pull/1915#discussion_r573869599 ## File path: ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ## @@ -1470,16 +1470,15 @@ private static ValidTxnList getValidTxnList(Configuration conf) { return dirToSnapshots; } - private static boolean isChildOfDelta(Path childDir, Path rootPath) { + public static boolean isChildOfDelta(Path childDir, Path rootPath) { if (childDir.toUri().toString().length() <= rootPath.toUri().toString().length()) { return false; } // We do not want to look outside the original directory String fullName = childDir.toUri().toString().substring(rootPath.toUri().toString().length() + 1); String dirName = childDir.getName(); -return (fullName.startsWith(BASE_PREFIX) && !dirName.startsWith(BASE_PREFIX)) || -(fullName.startsWith(DELTA_PREFIX) && !dirName.startsWith(DELTA_PREFIX)) || -(fullName.startsWith(DELETE_DELTA_PREFIX) && !dirName.startsWith(DELETE_DELTA_PREFIX)); +return (!dirName.startsWith(BASE_PREFIX)) && !dirName.startsWith(DELTA_PREFIX) && !dirName.startsWith(DELETE_DELTA_PREFIX) Review comment: Is this an extra `()` on the first one? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550777) Time Spent: 1h 20m (was: 1h 10m) > Collect dynamic partition info in FileSink for direct insert and reuse it in > Movetask > - > > Key: HIVE-24682 > URL: https://issues.apache.org/jira/browse/HIVE-24682 > Project: Hive > Issue Type: Sub-task >Reporter: Peter Varga >Assignee: Peter Varga >Priority: Major > Labels: pull-request-available > Time Spent: 1h 20m > Remaining Estimate: 0h > > The dynamic partition infos can be collected from the manifest files, no need > to do a costly file listing later -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24682) Collect dynamic partition info in FileSink for direct insert and reuse it in Movetask
[ https://issues.apache.org/jira/browse/HIVE-24682?focusedWorklogId=550775=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550775 ] ASF GitHub Bot logged work on HIVE-24682: - Author: ASF GitHub Bot Created on: 10/Feb/21 16:18 Start Date: 10/Feb/21 16:18 Worklog Time Spent: 10m Work Description: pvary commented on a change in pull request #1915: URL: https://github.com/apache/hive/pull/1915#discussion_r573868139 ## File path: ql/src/test/results/clientpositive/llap/acid_multiinsert_dyn_part.q.out ## @@ -649,16 +649,10 @@ POSTHOOK: Output: default@multiinsert_test_mm@c= POSTHOOK: Output: default@multiinsert_test_mm@c=__HIVE_DEFAULT_PARTITION__ POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).a SIMPLE [(multiinsert_test_text)a.FieldSchema(name:a, type:int, comment:null), ] POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).b SIMPLE [(multiinsert_test_text)a.FieldSchema(name:b, type:int, comment:null), ] -POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).a SIMPLE [(multiinsert_test_text)a.FieldSchema(name:a, type:int, comment:null), ] -POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).b SIMPLE [(multiinsert_test_text)a.FieldSchema(name:b, type:int, comment:null), ] -POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).a SIMPLE [(multiinsert_test_text)a.FieldSchema(name:a, type:int, comment:null), ] -POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).b SIMPLE [(multiinsert_test_text)a.FieldSchema(name:b, type:int, comment:null), ] POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).a SIMPLE [(multiinsert_test_text)a.FieldSchema(name:a, type:int, comment:null), ] POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=).b SIMPLE [(multiinsert_test_text)a.FieldSchema(name:b, type:int, comment:null), ] POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=__HIVE_DEFAULT_PARTITION__).a SIMPLE [(multiinsert_test_text)a.FieldSchema(name:a, type:int, comment:null), ] POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=__HIVE_DEFAULT_PARTITION__).b SIMPLE [(multiinsert_test_text)a.FieldSchema(name:b, type:int, comment:null), ] -POSTHOOK: Lineage: multiinsert_test_mm PARTITION(c=__HIVE_DEFAULT_PARTITION__).a SIMPLE [(multiinsert_test_text)a.FieldSchema(name:a, type:int, comment:null), ] Review comment: are these duplicate rows in the lineage info? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550775) Time Spent: 1h (was: 50m) > Collect dynamic partition info in FileSink for direct insert and reuse it in > Movetask > - > > Key: HIVE-24682 > URL: https://issues.apache.org/jira/browse/HIVE-24682 > Project: Hive > Issue Type: Sub-task >Reporter: Peter Varga >Assignee: Peter Varga >Priority: Major > Labels: pull-request-available > Time Spent: 1h > Remaining Estimate: 0h > > The dynamic partition infos can be collected from the manifest files, no need > to do a costly file listing later -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24682) Collect dynamic partition info in FileSink for direct insert and reuse it in Movetask
[ https://issues.apache.org/jira/browse/HIVE-24682?focusedWorklogId=550776=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550776 ] ASF GitHub Bot logged work on HIVE-24682: - Author: ASF GitHub Bot Created on: 10/Feb/21 16:18 Start Date: 10/Feb/21 16:18 Worklog Time Spent: 10m Work Description: pvary commented on a change in pull request #1915: URL: https://github.com/apache/hive/pull/1915#discussion_r573868615 ## File path: ql/src/test/results/clientpositive/llap/mm_all.q.out ## @@ -1649,8 +1649,6 @@ POSTHOOK: Lineage: ###Masked### POSTHOOK: Lineage: ###Masked### POSTHOOK: Lineage: ###Masked### POSTHOOK: Lineage: ###Masked### -POSTHOOK: Lineage: ###Masked### Review comment: Were these also duplicated rows? Could you please check the q.out.orig files before the masking? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550776) Time Spent: 1h 10m (was: 1h) > Collect dynamic partition info in FileSink for direct insert and reuse it in > Movetask > - > > Key: HIVE-24682 > URL: https://issues.apache.org/jira/browse/HIVE-24682 > Project: Hive > Issue Type: Sub-task >Reporter: Peter Varga >Assignee: Peter Varga >Priority: Major > Labels: pull-request-available > Time Spent: 1h 10m > Remaining Estimate: 0h > > The dynamic partition infos can be collected from the manifest files, no need > to do a costly file listing later -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24741) get_partitions_ps_with_auth performance can be improved when requesting all the partitions
[ https://issues.apache.org/jira/browse/HIVE-24741?focusedWorklogId=550762=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550762 ] ASF GitHub Bot logged work on HIVE-24741: - Author: ASF GitHub Bot Created on: 10/Feb/21 15:39 Start Date: 10/Feb/21 15:39 Worklog Time Spent: 10m Work Description: vihangk1 commented on pull request #1948: URL: https://github.com/apache/hive/pull/1948#issuecomment-776799287 Thanks for the review @nrg4878! Merged into master. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550762) Time Spent: 40m (was: 0.5h) > get_partitions_ps_with_auth performance can be improved when requesting all > the partitions > -- > > Key: HIVE-24741 > URL: https://issues.apache.org/jira/browse/HIVE-24741 > Project: Hive > Issue Type: Improvement >Reporter: Vihang Karajgaonkar >Assignee: Vihang Karajgaonkar >Priority: Major > Labels: pull-request-available > Time Spent: 40m > Remaining Estimate: 0h > > {{get_partitions_ps_with_auth}} API does not support DirectSQL. I have seen > some large production use-cases where this API (specifically from Spark > applications) is used heavily to request for all the partitions of a table. > This performance of this API when requesting all the partitions of the table > can be signficantly improved (~4 times from a realworld large workload > usecase) if we forward this API call to a directSQL enabled API. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24741) get_partitions_ps_with_auth performance can be improved when requesting all the partitions
[ https://issues.apache.org/jira/browse/HIVE-24741?focusedWorklogId=550761=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550761 ] ASF GitHub Bot logged work on HIVE-24741: - Author: ASF GitHub Bot Created on: 10/Feb/21 15:38 Start Date: 10/Feb/21 15:38 Worklog Time Spent: 10m Work Description: vihangk1 merged pull request #1948: URL: https://github.com/apache/hive/pull/1948 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550761) Time Spent: 0.5h (was: 20m) > get_partitions_ps_with_auth performance can be improved when requesting all > the partitions > -- > > Key: HIVE-24741 > URL: https://issues.apache.org/jira/browse/HIVE-24741 > Project: Hive > Issue Type: Improvement >Reporter: Vihang Karajgaonkar >Assignee: Vihang Karajgaonkar >Priority: Major > Labels: pull-request-available > Time Spent: 0.5h > Remaining Estimate: 0h > > {{get_partitions_ps_with_auth}} API does not support DirectSQL. I have seen > some large production use-cases where this API (specifically from Spark > applications) is used heavily to request for all the partitions of a table. > This performance of this API when requesting all the partitions of the table > can be signficantly improved (~4 times from a realworld large workload > usecase) if we forward this API call to a directSQL enabled API. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24715) Increase bucketId range
[ https://issues.apache.org/jira/browse/HIVE-24715?focusedWorklogId=550751=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550751 ] ASF GitHub Bot logged work on HIVE-24715: - Author: ASF GitHub Bot Created on: 10/Feb/21 15:18 Start Date: 10/Feb/21 15:18 Worklog Time Spent: 10m Work Description: zeroflag opened a new pull request #1968: URL: https://github.com/apache/hive/pull/1968 POC This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550751) Remaining Estimate: 0h Time Spent: 10m > Increase bucketId range > --- > > Key: HIVE-24715 > URL: https://issues.apache.org/jira/browse/HIVE-24715 > Project: Hive > Issue Type: Bug > Components: HiveServer2 >Reporter: Attila Magyar >Assignee: Attila Magyar >Priority: Major > Fix For: 4.0.0 > > Time Spent: 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-24715) Increase bucketId range
[ https://issues.apache.org/jira/browse/HIVE-24715?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] ASF GitHub Bot updated HIVE-24715: -- Labels: pull-request-available (was: ) > Increase bucketId range > --- > > Key: HIVE-24715 > URL: https://issues.apache.org/jira/browse/HIVE-24715 > Project: Hive > Issue Type: Bug > Components: HiveServer2 >Reporter: Attila Magyar >Assignee: Attila Magyar >Priority: Major > Labels: pull-request-available > Fix For: 4.0.0 > > Time Spent: 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24758) Log Tez Task DAG ID, DAG Session ID, HS2 Hostname
[ https://issues.apache.org/jira/browse/HIVE-24758?focusedWorklogId=550743=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550743 ] ASF GitHub Bot logged work on HIVE-24758: - Author: ASF GitHub Bot Created on: 10/Feb/21 14:41 Start Date: 10/Feb/21 14:41 Worklog Time Spent: 10m Work Description: belugabehr commented on pull request #1963: URL: https://github.com/apache/hive/pull/1963#issuecomment-776753469 @pgaref Can you take a peek at this? This is related to my Tez troubleshooting work. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550743) Time Spent: 40m (was: 0.5h) > Log Tez Task DAG ID, DAG Session ID, HS2 Hostname > - > > Key: HIVE-24758 > URL: https://issues.apache.org/jira/browse/HIVE-24758 > Project: Hive > Issue Type: Improvement >Reporter: David Mollitor >Assignee: David Mollitor >Priority: Major > Labels: pull-request-available > Time Spent: 40m > Remaining Estimate: 0h > > In order to get the logs for a particular query, submitted to Tez on YARN, > the following pieces of information are required: > * YARN Application ID > * TEZ DAG ID > * HS2 Host that ran the job > Include this information in TezTask output. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-23882) Compiler should skip MJ keyExpr for probe optimization
[ https://issues.apache.org/jira/browse/HIVE-23882?focusedWorklogId=550702=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550702 ] ASF GitHub Bot logged work on HIVE-23882: - Author: ASF GitHub Bot Created on: 10/Feb/21 12:50 Start Date: 10/Feb/21 12:50 Worklog Time Spent: 10m Work Description: pgaref commented on pull request #1286: URL: https://github.com/apache/hive/pull/1286#issuecomment-776684576 Hey @kgyrtkirk can you please take another look here? Tests passed, had to retriger. Extended the backtracking logic to use the non-RS brach for the probe-MJ logic -- this actually revealed several missed or wrong optimizations (updated q outs). Also extended the optimization logic to check if there a type Cast between src and destination (as the types have to match in the probe case) and only use it on the LLAP mode (probedecode's target). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550702) Time Spent: 2h 20m (was: 2h 10m) > Compiler should skip MJ keyExpr for probe optimization > -- > > Key: HIVE-23882 > URL: https://issues.apache.org/jira/browse/HIVE-23882 > Project: Hive > Issue Type: Sub-task >Reporter: Panagiotis Garefalakis >Assignee: Panagiotis Garefalakis >Priority: Major > Labels: pull-request-available > Time Spent: 2h 20m > Remaining Estimate: 0h > > In probe we cannot currently support Key expressions (on the big table Side) > as ORC CVs Probe directly the smalltable HT (there is no expr evaluation at > that level). > TezCompiler should take this into account when picking MJs to push probe > details -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24723) Use ExecutorService in TezSessionPool
[ https://issues.apache.org/jira/browse/HIVE-24723?focusedWorklogId=550700=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550700 ] ASF GitHub Bot logged work on HIVE-24723: - Author: ASF GitHub Bot Created on: 10/Feb/21 12:43 Start Date: 10/Feb/21 12:43 Worklog Time Spent: 10m Work Description: pgaref edited a comment on pull request #1939: URL: https://github.com/apache/hive/pull/1939#issuecomment-776680783 > @pgaref OK. I got the current implementation paired down quite a bit. Please review. Thanks @belugabehr changes LGTM. @abstractdog can you also take a look please to these TezSession related changes? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550700) Time Spent: 1h 50m (was: 1h 40m) > Use ExecutorService in TezSessionPool > - > > Key: HIVE-24723 > URL: https://issues.apache.org/jira/browse/HIVE-24723 > Project: Hive > Issue Type: Improvement > Components: Tez >Reporter: David Mollitor >Assignee: David Mollitor >Priority: Minor > Labels: pull-request-available > Time Spent: 1h 50m > Remaining Estimate: 0h > > Currently there are some wonky home-made thread pooling action going on in > {{TezSessionPool}}. Replace it with some JDK/Guava goodness. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24723) Use ExecutorService in TezSessionPool
[ https://issues.apache.org/jira/browse/HIVE-24723?focusedWorklogId=550699=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550699 ] ASF GitHub Bot logged work on HIVE-24723: - Author: ASF GitHub Bot Created on: 10/Feb/21 12:42 Start Date: 10/Feb/21 12:42 Worklog Time Spent: 10m Work Description: pgaref commented on pull request #1939: URL: https://github.com/apache/hive/pull/1939#issuecomment-776680783 > @pgaref OK. I got the current implementation paired down quite a bit. Please review. Thanks @belugabehr changes LGTM. @abstractdog can you also take a look please? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550699) Time Spent: 1h 40m (was: 1.5h) > Use ExecutorService in TezSessionPool > - > > Key: HIVE-24723 > URL: https://issues.apache.org/jira/browse/HIVE-24723 > Project: Hive > Issue Type: Improvement > Components: Tez >Reporter: David Mollitor >Assignee: David Mollitor >Priority: Minor > Labels: pull-request-available > Time Spent: 1h 40m > Remaining Estimate: 0h > > Currently there are some wonky home-made thread pooling action going on in > {{TezSessionPool}}. Replace it with some JDK/Guava goodness. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24693) Parquet Timestamp Values Read/Write Very Slow
[ https://issues.apache.org/jira/browse/HIVE-24693?focusedWorklogId=550695=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550695 ] ASF GitHub Bot logged work on HIVE-24693: - Author: ASF GitHub Bot Created on: 10/Feb/21 12:35 Start Date: 10/Feb/21 12:35 Worklog Time Spent: 10m Work Description: pgaref commented on a change in pull request #1938: URL: https://github.com/apache/hive/pull/1938#discussion_r573691868 ## File path: common/src/java/org/apache/hadoop/hive/common/type/Date.java ## @@ -51,7 +51,7 @@ .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NORMAL); PARSE_FORMATTER = builder.toFormatter().withResolverStyle(ResolverStyle.LENIENT); builder = new DateTimeFormatterBuilder(); -builder.append(DateTimeFormatter.ofPattern("-MM-dd")); +builder.append(DateTimeFormatter.ofPattern("-MM-dd")); Review comment: update javadoc above? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550695) Time Spent: 3.5h (was: 3h 20m) > Parquet Timestamp Values Read/Write Very Slow > - > > Key: HIVE-24693 > URL: https://issues.apache.org/jira/browse/HIVE-24693 > Project: Hive > Issue Type: Improvement >Reporter: David Mollitor >Assignee: David Mollitor >Priority: Critical > Labels: pull-request-available > Time Spent: 3.5h > Remaining Estimate: 0h > > Parquet {{DataWriteableWriter}} relias on {{NanoTimeUtils}} to convert a > timestamp object into a binary value. The way in which it does this,... it > calls {{toString()}} on the timestamp object, and then parses the String. > This particular timestamp do not carry a timezone, so the string is something > like: > {{2021-21-03 12:32:23....}} > The parse code tries to parse the string assuming there is a time zone, and > if not, falls-back and applies the provided "default time zone". As was > noted in [HIVE-24353], if something fails to parse, it is very expensive to > try to parse again. So, for each timestamp in the Parquet file, it: > * Builds a string from the time stamp > * Parses it (throws an exception, parses again) > There is no need to do this kind of string manipulations/parsing, it should > just be using the epoch millis/seconds/time stored internal to the Timestamp > object. > {code:java} > // Converts Timestamp to TimestampTZ. > public static TimestampTZ convert(Timestamp ts, ZoneId defaultTimeZone) { > return parse(ts.toString(), defaultTimeZone); > } > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24693) Parquet Timestamp Values Read/Write Very Slow
[ https://issues.apache.org/jira/browse/HIVE-24693?focusedWorklogId=550696=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550696 ] ASF GitHub Bot logged work on HIVE-24693: - Author: ASF GitHub Bot Created on: 10/Feb/21 12:35 Start Date: 10/Feb/21 12:35 Worklog Time Spent: 10m Work Description: pgaref commented on a change in pull request #1938: URL: https://github.com/apache/hive/pull/1938#discussion_r573691956 ## File path: common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java ## @@ -72,7 +72,7 @@ PARSE_FORMATTER = builder.toFormatter().withResolverStyle(ResolverStyle.LENIENT); builder = new DateTimeFormatterBuilder(); // Date and time parts -builder.append(DateTimeFormatter.ofPattern("-MM-dd HH:mm:ss")); +builder.append(DateTimeFormatter.ofPattern("-MM-dd HH:mm:ss")); Review comment: update doc with details? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550696) Time Spent: 3h 40m (was: 3.5h) > Parquet Timestamp Values Read/Write Very Slow > - > > Key: HIVE-24693 > URL: https://issues.apache.org/jira/browse/HIVE-24693 > Project: Hive > Issue Type: Improvement >Reporter: David Mollitor >Assignee: David Mollitor >Priority: Critical > Labels: pull-request-available > Time Spent: 3h 40m > Remaining Estimate: 0h > > Parquet {{DataWriteableWriter}} relias on {{NanoTimeUtils}} to convert a > timestamp object into a binary value. The way in which it does this,... it > calls {{toString()}} on the timestamp object, and then parses the String. > This particular timestamp do not carry a timezone, so the string is something > like: > {{2021-21-03 12:32:23....}} > The parse code tries to parse the string assuming there is a time zone, and > if not, falls-back and applies the provided "default time zone". As was > noted in [HIVE-24353], if something fails to parse, it is very expensive to > try to parse again. So, for each timestamp in the Parquet file, it: > * Builds a string from the time stamp > * Parses it (throws an exception, parses again) > There is no need to do this kind of string manipulations/parsing, it should > just be using the epoch millis/seconds/time stored internal to the Timestamp > object. > {code:java} > // Converts Timestamp to TimestampTZ. > public static TimestampTZ convert(Timestamp ts, ZoneId defaultTimeZone) { > return parse(ts.toString(), defaultTimeZone); > } > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24693) Parquet Timestamp Values Read/Write Very Slow
[ https://issues.apache.org/jira/browse/HIVE-24693?focusedWorklogId=550694=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550694 ] ASF GitHub Bot logged work on HIVE-24693: - Author: ASF GitHub Bot Created on: 10/Feb/21 12:35 Start Date: 10/Feb/21 12:35 Worklog Time Spent: 10m Work Description: pgaref commented on a change in pull request #1938: URL: https://github.com/apache/hive/pull/1938#discussion_r573691709 ## File path: common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java ## @@ -127,12 +127,15 @@ public static TimestampTZ parseOrNull(String s, ZoneId defaultTimeZone) { // Converts Date to TimestampTZ. public static TimestampTZ convert(Date date, ZoneId defaultTimeZone) { -return parse(date.toString(), defaultTimeZone); +return new TimestampTZ(ZonedDateTime.ofInstant(Instant.ofEpochMilli(date.toEpochMilli()), ZoneOffset.UTC) +.withZoneSameLocal(defaultTimeZone)); } // Converts Timestamp to TimestampTZ. public static TimestampTZ convert(Timestamp ts, ZoneId defaultTimeZone) { -return parse(ts.toString(), defaultTimeZone); +return new TimestampTZ( +ZonedDateTime.ofInstant(Instant.ofEpochSecond(ts.toEpochSecond(), ts.getNanos()), ZoneOffset.UTC) Review comment: use same style one both? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550694) Time Spent: 3h 20m (was: 3h 10m) > Parquet Timestamp Values Read/Write Very Slow > - > > Key: HIVE-24693 > URL: https://issues.apache.org/jira/browse/HIVE-24693 > Project: Hive > Issue Type: Improvement >Reporter: David Mollitor >Assignee: David Mollitor >Priority: Critical > Labels: pull-request-available > Time Spent: 3h 20m > Remaining Estimate: 0h > > Parquet {{DataWriteableWriter}} relias on {{NanoTimeUtils}} to convert a > timestamp object into a binary value. The way in which it does this,... it > calls {{toString()}} on the timestamp object, and then parses the String. > This particular timestamp do not carry a timezone, so the string is something > like: > {{2021-21-03 12:32:23....}} > The parse code tries to parse the string assuming there is a time zone, and > if not, falls-back and applies the provided "default time zone". As was > noted in [HIVE-24353], if something fails to parse, it is very expensive to > try to parse again. So, for each timestamp in the Parquet file, it: > * Builds a string from the time stamp > * Parses it (throws an exception, parses again) > There is no need to do this kind of string manipulations/parsing, it should > just be using the epoch millis/seconds/time stored internal to the Timestamp > object. > {code:java} > // Converts Timestamp to TimestampTZ. > public static TimestampTZ convert(Timestamp ts, ZoneId defaultTimeZone) { > return parse(ts.toString(), defaultTimeZone); > } > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-24766) Fix TestScheduledReplication
[ https://issues.apache.org/jira/browse/HIVE-24766?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Zoltan Haindrich updated HIVE-24766: Description: test seems to be unstable http://ci.hive.apache.org/job/hive-flaky-check/184/ > Fix TestScheduledReplication > > > Key: HIVE-24766 > URL: https://issues.apache.org/jira/browse/HIVE-24766 > Project: Hive > Issue Type: Bug >Reporter: Zoltan Haindrich >Priority: Major > > test seems to be unstable > http://ci.hive.apache.org/job/hive-flaky-check/184/ -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-24766) Fix TestScheduledReplication
[ https://issues.apache.org/jira/browse/HIVE-24766?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Zoltan Haindrich updated HIVE-24766: Environment: (was: test seems to be unstable http://ci.hive.apache.org/job/hive-flaky-check/184/ ) > Fix TestScheduledReplication > > > Key: HIVE-24766 > URL: https://issues.apache.org/jira/browse/HIVE-24766 > Project: Hive > Issue Type: Bug >Reporter: Zoltan Haindrich >Priority: Major > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Commented] (HIVE-24765) ClassCastException with AND or OR condition
[ https://issues.apache.org/jira/browse/HIVE-24765?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17282380#comment-17282380 ] Ryu Kobayashi commented on HIVE-24765: -- [~kgyrtkirk] It's 3.1.2. However, it backported the patch described in the description. And I have CBO enabled. Currently I am creating a PR. Is there anything else you care about? > ClassCastException with AND or OR condition > --- > > Key: HIVE-24765 > URL: https://issues.apache.org/jira/browse/HIVE-24765 > Project: Hive > Issue Type: Bug >Reporter: Ryu Kobayashi >Assignee: Ryu Kobayashi >Priority: Major > > I previously created the following ticket: > https://issues.apache.org/jira/browse/HIVE-11708 > However, it turns out that an error occurs under the following conditions: > {code:java} > CREATE TABLE tbl( > id int, > flg int > ); > set hive.cbo.enable=true; > SELECT * FROM tbl > WHERE id >= 100 AND flg = TRUE; > java.io.IOException: java.lang.ClassCastException: > org.apache.hadoop.hive.serde2.lazy.LazyInteger cannot be cast to > org.apache.hadoop.io.BooleanWritable > at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:165) > at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2204) > at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:253) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) > at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:820) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:760) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:687) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.util.RunJar.run(RunJar.java:244) > at org.apache.hadoop.util.RunJar.main(RunJar.java:158) > Caused by: java.lang.ClassCastException: > org.apache.hadoop.hive.serde2.lazy.LazyInteger cannot be cast to > org.apache.hadoop.io.BooleanWritable > at > org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector.get(WritableBooleanObjectInspector.java:36) > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd.evaluate(GenericUDFOPAnd.java:69) > at > org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) > at > org.apache.hadoop.hive.ql.exec.FilterOperator.process(FilterOperator.java:112) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897) > at > org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:434) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:426) > at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:147) > ... 13 more > {code} > I know this is a cast issue as well as the previous issue. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-24765) ClassCastException with AND or OR condition
[ https://issues.apache.org/jira/browse/HIVE-24765?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ryu Kobayashi updated HIVE-24765: - Description: I previously created the following ticket: https://issues.apache.org/jira/browse/HIVE-11708 However, it turns out that an error occurs under the following conditions: {code:java} CREATE TABLE tbl( id int, flg int ); set hive.cbo.enable=true; SELECT * FROM tbl WHERE id >= 100 AND flg = TRUE; java.io.IOException: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.lazy.LazyInteger cannot be cast to org.apache.hadoop.io.BooleanWritable at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:165) at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2204) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:253) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:820) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:760) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:687) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.RunJar.run(RunJar.java:244) at org.apache.hadoop.util.RunJar.main(RunJar.java:158) Caused by: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.lazy.LazyInteger cannot be cast to org.apache.hadoop.io.BooleanWritable at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector.get(WritableBooleanObjectInspector.java:36) at org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd.evaluate(GenericUDFOPAnd.java:69) at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) at org.apache.hadoop.hive.ql.exec.FilterOperator.process(FilterOperator.java:112) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897) at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130) at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:434) at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:426) at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:147) ... 13 more {code} I know this is a cast issue as well as the previous issue. was: I previously created the following ticket: https://issues.apache.org/jira/browse/HIVE-11708 However, it turns out that an error occurs under the following conditions: {code:java} CREATE TABLE tbl( id int, flg int ); SELECT * FROM tbl WHERE id >= 100 AND flg = TRUE; java.io.IOException: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.lazy.LazyInteger cannot be cast to org.apache.hadoop.io.BooleanWritable at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:165) at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2204) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:253) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:820) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:760) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:687) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.RunJar.run(RunJar.java:244) at org.apache.hadoop.util.RunJar.main(RunJar.java:158) Caused by: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.lazy.LazyInteger cannot be cast to org.apache.hadoop.io.BooleanWritable at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector.get(WritableBooleanObjectInspector.java:36) at org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd.evaluate(GenericUDFOPAnd.java:69) at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) at
[jira] [Work started] (HIVE-24765) ClassCastException with AND or OR condition
[ https://issues.apache.org/jira/browse/HIVE-24765?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Work on HIVE-24765 started by Ryu Kobayashi. > ClassCastException with AND or OR condition > --- > > Key: HIVE-24765 > URL: https://issues.apache.org/jira/browse/HIVE-24765 > Project: Hive > Issue Type: Bug >Reporter: Ryu Kobayashi >Assignee: Ryu Kobayashi >Priority: Major > > I previously created the following ticket: > https://issues.apache.org/jira/browse/HIVE-11708 > However, it turns out that an error occurs under the following conditions: > {code:java} > CREATE TABLE tbl( > id int, > flg int > ); > SELECT * FROM tbl > WHERE id >= 100 AND flg = TRUE; > java.io.IOException: java.lang.ClassCastException: > org.apache.hadoop.hive.serde2.lazy.LazyInteger cannot be cast to > org.apache.hadoop.io.BooleanWritable > at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:165) > at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2204) > at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:253) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) > at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:820) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:760) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:687) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.util.RunJar.run(RunJar.java:244) > at org.apache.hadoop.util.RunJar.main(RunJar.java:158) > Caused by: java.lang.ClassCastException: > org.apache.hadoop.hive.serde2.lazy.LazyInteger cannot be cast to > org.apache.hadoop.io.BooleanWritable > at > org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector.get(WritableBooleanObjectInspector.java:36) > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd.evaluate(GenericUDFOPAnd.java:69) > at > org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) > at > org.apache.hadoop.hive.ql.exec.FilterOperator.process(FilterOperator.java:112) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897) > at > org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:434) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:426) > at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:147) > ... 13 more > {code} > I know this is a cast issue as well as the previous issue. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Resolved] (HIVE-22861) Ptest TestExecutor thread doesn't handle InterruptedException properly
[ https://issues.apache.org/jira/browse/HIVE-22861?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ádám Szita resolved HIVE-22861. --- Resolution: Won't Fix Precommit testing is not using the implementation this error is prone for anymore. Closing as Won't Fix. > Ptest TestExecutor thread doesn't handle InterruptedException properly > -- > > Key: HIVE-22861 > URL: https://issues.apache.org/jira/browse/HIVE-22861 > Project: Hive > Issue Type: Bug >Reporter: Ádám Szita >Assignee: Ádám Szita >Priority: Major > > Ptest was seen failing with: > {code:java} > 2020-02-06 22:05:23 INFO [Thread-4] ExecutionController:110 - Shutdown hook > called > 2020-02-06 22:05:23 INFO [Thread-4] CloudExecutionContextProvider:268 - > Shutting down TerminationExecutor > 2020-02-06 22:05:23 INFO [Thread-4] CloudExecutionContextProvider:270 - > Closing CloudComputeService > 2020-02-06 22:05:23 ERROR [TestExecutor] TestExecutor:149 - Unxpected Error > java.lang.InterruptedException: null > at > java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.reportInterruptAfterWait(AbstractQueuedSynchronizer.java:2017) > ~[?:1.7.0_221] > at > java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2095) > ~[?:1.7.0_221] > at > java.util.concurrent.ArrayBlockingQueue.poll(ArrayBlockingQueue.java:389) > ~[?:1.7.0_221] > at > org.apache.hive.ptest.api.server.TestExecutor.run(TestExecutor.java:85) > [classes/:?] > 2020-02-06 22:05:24 INFO [localhost-startStop-2] > XmlWebApplicationContext:1048 - Closing WebApplicationContext for namespace > 'Spring MVC Dispatcher Servlet-servlet': startup date [Wed Dec 18 13:27:40 > UTC 2019]; parent: Root WebApplicationContext > 2020-02-06 22:05:24 INFO [localhost-startStop-2] > DefaultListableBeanFactory:444 - Destroying singletons in > org.springframework.beans.factory.support.DefaultListableBeanFactory@19724e3f: > defining beans []; parent: > org.springframework.beans.factory.support.DefaultListableBeanFactory@69705cd8 > 2020-02-06 22:05:24 INFO [localhost-startStop-2] > XmlWebApplicationContext:1048 - Closing Root WebApplicationContext: startup > date [Wed Dec 18 13:27:31 UTC 2019]; root of context hierarchy > {code} > ..indicating that whenever there's an interrupt on the TestExecutor thread > Ptest shuts down and doesn't try to recover. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Commented] (HIVE-24765) ClassCastException with AND or OR condition
[ https://issues.apache.org/jira/browse/HIVE-24765?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17282374#comment-17282374 ] Zoltan Haindrich commented on HIVE-24765: - I was not able to reproduce this issue neither on master or 3.1.2 [~ryu_kobayashi] which version are you using? (`hive --version` ) > ClassCastException with AND or OR condition > --- > > Key: HIVE-24765 > URL: https://issues.apache.org/jira/browse/HIVE-24765 > Project: Hive > Issue Type: Bug >Reporter: Ryu Kobayashi >Assignee: Ryu Kobayashi >Priority: Major > > I previously created the following ticket: > https://issues.apache.org/jira/browse/HIVE-11708 > However, it turns out that an error occurs under the following conditions: > {code:java} > CREATE TABLE tbl( > id int, > flg int > ); > SELECT * FROM tbl > WHERE id >= 100 AND flg = TRUE; > java.io.IOException: java.lang.ClassCastException: > org.apache.hadoop.hive.serde2.lazy.LazyInteger cannot be cast to > org.apache.hadoop.io.BooleanWritable > at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:165) > at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2204) > at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:253) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) > at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:820) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:760) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:687) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.util.RunJar.run(RunJar.java:244) > at org.apache.hadoop.util.RunJar.main(RunJar.java:158) > Caused by: java.lang.ClassCastException: > org.apache.hadoop.hive.serde2.lazy.LazyInteger cannot be cast to > org.apache.hadoop.io.BooleanWritable > at > org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector.get(WritableBooleanObjectInspector.java:36) > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd.evaluate(GenericUDFOPAnd.java:69) > at > org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) > at > org.apache.hadoop.hive.ql.exec.FilterOperator.process(FilterOperator.java:112) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897) > at > org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:434) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:426) > at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:147) > ... 13 more > {code} > I know this is a cast issue as well as the previous issue. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24726) Track required data for cache hydration
[ https://issues.apache.org/jira/browse/HIVE-24726?focusedWorklogId=550669=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550669 ] ASF GitHub Bot logged work on HIVE-24726: - Author: ASF GitHub Bot Created on: 10/Feb/21 10:55 Start Date: 10/Feb/21 10:55 Worklog Time Spent: 10m Work Description: szlta commented on a change in pull request #1961: URL: https://github.com/apache/hive/pull/1961#discussion_r573618773 ## File path: llap-server/src/java/org/apache/hadoop/hive/llap/cache/MemoryLimitedPathCache.java ## @@ -0,0 +1,60 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.llap.cache; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.Weigher; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Memory limited version of the path cache. + */ +public class MemoryLimitedPathCache implements PathCache { + + private static final Logger LOG = LoggerFactory.getLogger(MemoryLimitedPathCache.class); + private Cache internalCache; + + public MemoryLimitedPathCache(Configuration conf) { +internalCache = CacheBuilder.newBuilder() +.maximumWeight(HiveConf.getSizeVar(conf, HiveConf.ConfVars.LLAP_IO_PATH_CACHE_SIZE)) +.weigher(new PathWeigher()) +.build(); + } + + @Override + public void touch(Object key, String val) { +if (key != null) { + internalCache.put(key, val); +} + } + + @Override + public String resolve(Object key) { +return key != null ? internalCache.getIfPresent(key) : null; + } + + private static class PathWeigher implements Weigher { + +@Override +public int weigh(Object key, String value) { + // String memory footprint Review comment: Please add more comments on why this is calculated like this. ## File path: llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java ## @@ -112,6 +115,8 @@ private final BufferUsageManager bufferManager; private final Configuration daemonConf; private final LowLevelCacheMemoryManager memoryManager; + private LowLevelCachePolicy realCachePolicy; Review comment: Why was this refactor necessary? (similarly why for tracePool) This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550669) Time Spent: 20m (was: 10m) > Track required data for cache hydration > --- > > Key: HIVE-24726 > URL: https://issues.apache.org/jira/browse/HIVE-24726 > Project: Hive > Issue Type: Sub-task >Reporter: Antal Sinkovits >Assignee: Antal Sinkovits >Priority: Major > Labels: pull-request-available > Time Spent: 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24704) Ensure that all Operator column expressions refer to a column in the RowSchema
[ https://issues.apache.org/jira/browse/HIVE-24704?focusedWorklogId=550667=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550667 ] ASF GitHub Bot logged work on HIVE-24704: - Author: ASF GitHub Bot Created on: 10/Feb/21 10:52 Start Date: 10/Feb/21 10:52 Worklog Time Spent: 10m Work Description: kgyrtkirk commented on pull request #1929: URL: https://github.com/apache/hive/pull/1929#issuecomment-776623568 @jcamachor I've cleaned up the changes a bit - could you please take a look? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550667) Time Spent: 20m (was: 10m) > Ensure that all Operator column expressions refer to a column in the RowSchema > -- > > Key: HIVE-24704 > URL: https://issues.apache.org/jira/browse/HIVE-24704 > Project: Hive > Issue Type: Improvement >Reporter: Zoltan Haindrich >Assignee: Zoltan Haindrich >Priority: Major > Labels: pull-request-available > Time Spent: 20m > Remaining Estimate: 0h > > Hive Operators should satisfy that all keys of the columnExprMap must be > present in the schema -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Assigned] (HIVE-24765) ClassCastException with AND or OR condition
[ https://issues.apache.org/jira/browse/HIVE-24765?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ryu Kobayashi reassigned HIVE-24765: > ClassCastException with AND or OR condition > --- > > Key: HIVE-24765 > URL: https://issues.apache.org/jira/browse/HIVE-24765 > Project: Hive > Issue Type: Bug >Reporter: Ryu Kobayashi >Assignee: Ryu Kobayashi >Priority: Major > > I previously created the following ticket: > https://issues.apache.org/jira/browse/HIVE-11708 > However, it turns out that an error occurs under the following conditions: > {code:java} > CREATE TABLE tbl( > id int, > flg int > ); > SELECT * FROM tbl > WHERE id >= 100 AND flg = TRUE; > java.io.IOException: java.lang.ClassCastException: > org.apache.hadoop.hive.serde2.lazy.LazyInteger cannot be cast to > org.apache.hadoop.io.BooleanWritable > at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:165) > at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2204) > at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:253) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) > at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:820) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:760) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:687) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.util.RunJar.run(RunJar.java:244) > at org.apache.hadoop.util.RunJar.main(RunJar.java:158) > Caused by: java.lang.ClassCastException: > org.apache.hadoop.hive.serde2.lazy.LazyInteger cannot be cast to > org.apache.hadoop.io.BooleanWritable > at > org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector.get(WritableBooleanObjectInspector.java:36) > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd.evaluate(GenericUDFOPAnd.java:69) > at > org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) > at > org.apache.hadoop.hive.ql.exec.FilterOperator.process(FilterOperator.java:112) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897) > at > org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:434) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:426) > at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:147) > ... 13 more > {code} > I know this is a cast issue as well as the previous issue. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24764) insert overwrite on a partition resets row count stats in other partitions
[ https://issues.apache.org/jira/browse/HIVE-24764?focusedWorklogId=550653=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550653 ] ASF GitHub Bot logged work on HIVE-24764: - Author: ASF GitHub Bot Created on: 10/Feb/21 10:00 Start Date: 10/Feb/21 10:00 Worklog Time Spent: 10m Work Description: rbalamohan opened a new pull request #1967: URL: https://github.com/apache/hive/pull/1967 https://issues.apache.org/jira/browse/HIVE-24764 insert overwrite on a partition resets row count stats in other partitions ### What changes were proposed in this pull request? FSStatsAggregator::aggregateStats should return the value when the partitions are present in its statslist. Otherwise, it should return empty or null value. This would prevent stats from being overwritten in BasicStatsTask::updateStats for other partitions during 'insert overwrite' operation. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550653) Remaining Estimate: 0h Time Spent: 10m > insert overwrite on a partition resets row count stats in other partitions > -- > > Key: HIVE-24764 > URL: https://issues.apache.org/jira/browse/HIVE-24764 > Project: Hive > Issue Type: Bug >Reporter: Rajesh Balamohan >Priority: Major > Time Spent: 10m > Remaining Estimate: 0h > > After insert overwrite on a partition, stats on other partitions are messed > up. Subsequent queries end up with plans with PARTIAL stats. In certain > cases, this leads to suboptimal query plans. > {noformat} > drop table if exists test_stats; > drop table if exists test_stats_2; > create table test_stats(i int, j bigint); > create table test_stats_2(i int) partitioned by (j bigint); > insert into test_stats values (1, 1), (2, 2), (3, 3), (4, 4), (5, NULL); > -- select * from test_stats; > 1 1 > 2 2 > 3 3 > 4 4 > 5 > insert overwrite table test_stats_2 partition(j) select i, j from test_stats > where j is not null; > -- After executing this statement, stat gets messed up. > insert overwrite table test_stats_2 partition(j) select i, j from test_stats > where j is null; > -- select * from test_stats_2; > 1 1 > 2 2 > 3 3 > 4 4 > 5 > -- This would return "PARTIAL" stats instead of "COMPLETE" > explain select i, count(*) as c from test_stats_2 group by i order by c desc > limit 10; > STAGE DEPENDENCIES: > Stage-1 is a root stage > Stage-0 depends on stages: Stage-1 > STAGE PLANS: > Stage: Stage-1 > Tez > DagId: hive_20210208093110_62ced99e-f068-42d4-9ba8-d45fccd6c0a2:68 > Edges: > Reducer 2 <- Map 1 (SIMPLE_EDGE) > Reducer 3 <- Reducer 2 (SIMPLE_EDGE) > DagName: hive_20210208093110_62ced99e-f068-42d4-9ba8-d45fccd6c0a2:68 > Vertices: > Map 1 > Map Operator Tree: > TableScan > alias: test_stats_2 > Statistics: Num rows: 125 Data size: 500 Basic stats: > PARTIAL Column stats: COMPLETE > Select Operator > expressions: i (type: int) > outputColumnNames: i > Statistics: Num rows: 125 Data size: 500 Basic stats: > PARTIAL Column stats: COMPLETE > Group By Operator > aggregations: count() > keys: i (type: int) > minReductionHashAggr: 0.99 > mode: hash > outputColumnNames: _col0, _col1 > Statistics: Num rows: 125 Data size: 1500 Basic stats: > PARTIAL Column stats: COMPLETE > Reduce Output Operator > key expressions: _col0 (type: int) > null sort order: a > sort order: + > Map-reduce partition columns: _col0 (type: int) > Statistics: Num rows: 125 Data size: 1500 Basic > stats: PARTIAL Column stats: COMPLETE > value expressions: _col1 (type: bigint) > Execution mode: vectorized, llap > LLAP IO: may be used (ACID table) > Reducer 2 > Execution mode: vectorized, llap > Reduce Operator Tree: > Group By Operator > aggregations: count(VALUE._col0) > keys: KEY._col0 (type: int) > mode:
[jira] [Updated] (HIVE-24764) insert overwrite on a partition resets row count stats in other partitions
[ https://issues.apache.org/jira/browse/HIVE-24764?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] ASF GitHub Bot updated HIVE-24764: -- Labels: pull-request-available (was: ) > insert overwrite on a partition resets row count stats in other partitions > -- > > Key: HIVE-24764 > URL: https://issues.apache.org/jira/browse/HIVE-24764 > Project: Hive > Issue Type: Bug >Reporter: Rajesh Balamohan >Priority: Major > Labels: pull-request-available > Time Spent: 10m > Remaining Estimate: 0h > > After insert overwrite on a partition, stats on other partitions are messed > up. Subsequent queries end up with plans with PARTIAL stats. In certain > cases, this leads to suboptimal query plans. > {noformat} > drop table if exists test_stats; > drop table if exists test_stats_2; > create table test_stats(i int, j bigint); > create table test_stats_2(i int) partitioned by (j bigint); > insert into test_stats values (1, 1), (2, 2), (3, 3), (4, 4), (5, NULL); > -- select * from test_stats; > 1 1 > 2 2 > 3 3 > 4 4 > 5 > insert overwrite table test_stats_2 partition(j) select i, j from test_stats > where j is not null; > -- After executing this statement, stat gets messed up. > insert overwrite table test_stats_2 partition(j) select i, j from test_stats > where j is null; > -- select * from test_stats_2; > 1 1 > 2 2 > 3 3 > 4 4 > 5 > -- This would return "PARTIAL" stats instead of "COMPLETE" > explain select i, count(*) as c from test_stats_2 group by i order by c desc > limit 10; > STAGE DEPENDENCIES: > Stage-1 is a root stage > Stage-0 depends on stages: Stage-1 > STAGE PLANS: > Stage: Stage-1 > Tez > DagId: hive_20210208093110_62ced99e-f068-42d4-9ba8-d45fccd6c0a2:68 > Edges: > Reducer 2 <- Map 1 (SIMPLE_EDGE) > Reducer 3 <- Reducer 2 (SIMPLE_EDGE) > DagName: hive_20210208093110_62ced99e-f068-42d4-9ba8-d45fccd6c0a2:68 > Vertices: > Map 1 > Map Operator Tree: > TableScan > alias: test_stats_2 > Statistics: Num rows: 125 Data size: 500 Basic stats: > PARTIAL Column stats: COMPLETE > Select Operator > expressions: i (type: int) > outputColumnNames: i > Statistics: Num rows: 125 Data size: 500 Basic stats: > PARTIAL Column stats: COMPLETE > Group By Operator > aggregations: count() > keys: i (type: int) > minReductionHashAggr: 0.99 > mode: hash > outputColumnNames: _col0, _col1 > Statistics: Num rows: 125 Data size: 1500 Basic stats: > PARTIAL Column stats: COMPLETE > Reduce Output Operator > key expressions: _col0 (type: int) > null sort order: a > sort order: + > Map-reduce partition columns: _col0 (type: int) > Statistics: Num rows: 125 Data size: 1500 Basic > stats: PARTIAL Column stats: COMPLETE > value expressions: _col1 (type: bigint) > Execution mode: vectorized, llap > LLAP IO: may be used (ACID table) > Reducer 2 > Execution mode: vectorized, llap > Reduce Operator Tree: > Group By Operator > aggregations: count(VALUE._col0) > keys: KEY._col0 (type: int) > mode: mergepartial > outputColumnNames: _col0, _col1 > Statistics: Num rows: 62 Data size: 744 Basic stats: PARTIAL > Column stats: COMPLETE > Top N Key Operator > sort order: - > keys: _col1 (type: bigint) > null sort order: a > Statistics: Num rows: 62 Data size: 744 Basic stats: > PARTIAL Column stats: COMPLETE > top n: 10 > Reduce Output Operator > key expressions: _col1 (type: bigint) > null sort order: a > sort order: - > Statistics: Num rows: 62 Data size: 744 Basic stats: > PARTIAL Column stats: COMPLETE > TopN Hash Memory Usage: 0.04 > value expressions: _col0 (type: int) > Reducer 3 > Execution mode: vectorized, llap > Reduce Operator Tree: > Select Operator > expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 > (type: bigint) > outputColumnNames: _col0, _col1 > Statistics: Num rows: 62
[jira] [Commented] (HIVE-24463) Add special case for Derby and MySQL in Get Next ID DbNotificationListener
[ https://issues.apache.org/jira/browse/HIVE-24463?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17282293#comment-17282293 ] Janos Kovacs commented on HIVE-24463: - [~belugabehr] I'm not sure if LAST_INSERT_ID() usage here is a good idea. It's not only non deterministic in some cases but also does not return the highest value for next_val value in other cases: * for batched value generation it returns the first value of the batch which is the smallest value not the highest one which would be needed for next_val * for rolled back sessions it does not return the highest value related the last successful value generated While it's more elegant to use the DB Engine's solution, this function - last_insert_id() - does not fit for the logic here. > Add special case for Derby and MySQL in Get Next ID DbNotificationListener > -- > > Key: HIVE-24463 > URL: https://issues.apache.org/jira/browse/HIVE-24463 > Project: Hive > Issue Type: Improvement >Reporter: David Mollitor >Assignee: David Mollitor >Priority: Major > Labels: pull-request-available > Time Spent: 0.5h > Remaining Estimate: 0h > > * Derby does not support {{SELECT FOR UPDATE}} statements > * MySQL can be optimized to use {{LAST_INSERT_ID()}} > > Debry tables are locked in other parts of the code already, but not in this > path. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (HIVE-24763) Incremental rebuild of Materialized view fails
[ https://issues.apache.org/jira/browse/HIVE-24763?focusedWorklogId=550618=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-550618 ] ASF GitHub Bot logged work on HIVE-24763: - Author: ASF GitHub Bot Created on: 10/Feb/21 08:00 Start Date: 10/Feb/21 08:00 Worklog Time Spent: 10m Work Description: kasakrisz opened a new pull request #1966: URL: https://github.com/apache/hive/pull/1966 ### What changes were proposed in this pull request? During incremental materialized view rebuild CBO plain is transformed from an insert over write plan to a merge into plan if MV definition contains aggregate. During this transformation the union operator is replaced with a right outer join of the union branches and the join keys are coming from the aggregate keys. When building the join expressions the key equality checks are concatenated by `AND` operators. Building of `AND` expressions are throws AssertionError when only one operand/aggregate key/join key exists. ### Why are the changes needed? Check the number of expressions before creating the `AND` expression and return use expression itself as the root of the join expression if only one expression exists. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? ``` mvn test -Dtest.output.overwrite -DskipSparkTests -Dtest=TestMiniLlapLocalCliDriver -Dqfile=materialized_view_create_rewrite_one_key_gby.q,materialized_view_create_rewrite_4.q -pl itests/qtest -Pitests ``` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 550618) Remaining Estimate: 0h Time Spent: 10m > Incremental rebuild of Materialized view fails > -- > > Key: HIVE-24763 > URL: https://issues.apache.org/jira/browse/HIVE-24763 > Project: Hive > Issue Type: Bug > Components: Materialized views >Reporter: Krisztian Kasa >Assignee: Krisztian Kasa >Priority: Major > Attachments: repro.q > > Time Spent: 10m > Remaining Estimate: 0h > > AssertionError is thrown when Materialized view definition contains aggregate > operator with only one key: > {code} > CREATE MATERIALIZED VIEW cmv_mat_view_n5 TBLPROPERTIES > ('transactional'='true') AS > SELECT cmv_basetable_n5.a, sum(cmv_basetable_2_n2.d) > FROM cmv_basetable_n5 JOIN cmv_basetable_2_n2 ON (cmv_basetable_n5.a = > cmv_basetable_2_n2.a) > WHERE cmv_basetable_2_n2.c > 10.0 > GROUP BY cmv_basetable_n5.a; > ... > ALTER MATERIALIZED VIEW cmv_mat_view_n5 REBUILD; > {code} > {code} > java.lang.AssertionError: wrong operand count 1 for AND > at org.apache.calcite.util.Litmus$1.fail(Litmus.java:31) > at > org.apache.calcite.sql.SqlBinaryOperator.validRexOperands(SqlBinaryOperator.java:219) > at org.apache.calcite.rex.RexCall.(RexCall.java:86) > at org.apache.calcite.rex.RexBuilder.makeCall(RexBuilder.java:251) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAggregateIncrementalRewritingRule.onMatch(HiveAggregateIncrementalRewritingRule.java:124) > at > org.apache.calcite.plan.AbstractRelOptPlanner.fireRule(AbstractRelOptPlanner.java:319) > at org.apache.calcite.plan.hep.HepPlanner.applyRule(HepPlanner.java:560) > at > org.apache.calcite.plan.hep.HepPlanner.applyRules(HepPlanner.java:419) > at > org.apache.calcite.plan.hep.HepPlanner.executeInstruction(HepPlanner.java:256) > at > org.apache.calcite.plan.hep.HepInstruction$RuleInstance.execute(HepInstruction.java:127) > at > org.apache.calcite.plan.hep.HepPlanner.executeProgram(HepPlanner.java:215) > at > org.apache.calcite.plan.hep.HepPlanner.findBestExp(HepPlanner.java:202) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.executeProgram(CalcitePlanner.java:2715) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.executeProgram(CalcitePlanner.java:2681) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.applyMaterializedViewRewriting(CalcitePlanner.java:2318) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1934) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1810) > at > org.apache.calcite.tools.Frameworks.lambda$withPlanner$0(Frameworks.java:130) > at >
[jira] [Updated] (HIVE-24763) Incremental rebuild of Materialized view fails
[ https://issues.apache.org/jira/browse/HIVE-24763?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] ASF GitHub Bot updated HIVE-24763: -- Labels: pull-request-available (was: ) > Incremental rebuild of Materialized view fails > -- > > Key: HIVE-24763 > URL: https://issues.apache.org/jira/browse/HIVE-24763 > Project: Hive > Issue Type: Bug > Components: Materialized views >Reporter: Krisztian Kasa >Assignee: Krisztian Kasa >Priority: Major > Labels: pull-request-available > Attachments: repro.q > > Time Spent: 10m > Remaining Estimate: 0h > > AssertionError is thrown when Materialized view definition contains aggregate > operator with only one key: > {code} > CREATE MATERIALIZED VIEW cmv_mat_view_n5 TBLPROPERTIES > ('transactional'='true') AS > SELECT cmv_basetable_n5.a, sum(cmv_basetable_2_n2.d) > FROM cmv_basetable_n5 JOIN cmv_basetable_2_n2 ON (cmv_basetable_n5.a = > cmv_basetable_2_n2.a) > WHERE cmv_basetable_2_n2.c > 10.0 > GROUP BY cmv_basetable_n5.a; > ... > ALTER MATERIALIZED VIEW cmv_mat_view_n5 REBUILD; > {code} > {code} > java.lang.AssertionError: wrong operand count 1 for AND > at org.apache.calcite.util.Litmus$1.fail(Litmus.java:31) > at > org.apache.calcite.sql.SqlBinaryOperator.validRexOperands(SqlBinaryOperator.java:219) > at org.apache.calcite.rex.RexCall.(RexCall.java:86) > at org.apache.calcite.rex.RexBuilder.makeCall(RexBuilder.java:251) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAggregateIncrementalRewritingRule.onMatch(HiveAggregateIncrementalRewritingRule.java:124) > at > org.apache.calcite.plan.AbstractRelOptPlanner.fireRule(AbstractRelOptPlanner.java:319) > at org.apache.calcite.plan.hep.HepPlanner.applyRule(HepPlanner.java:560) > at > org.apache.calcite.plan.hep.HepPlanner.applyRules(HepPlanner.java:419) > at > org.apache.calcite.plan.hep.HepPlanner.executeInstruction(HepPlanner.java:256) > at > org.apache.calcite.plan.hep.HepInstruction$RuleInstance.execute(HepInstruction.java:127) > at > org.apache.calcite.plan.hep.HepPlanner.executeProgram(HepPlanner.java:215) > at > org.apache.calcite.plan.hep.HepPlanner.findBestExp(HepPlanner.java:202) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.executeProgram(CalcitePlanner.java:2715) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.executeProgram(CalcitePlanner.java:2681) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.applyMaterializedViewRewriting(CalcitePlanner.java:2318) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1934) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1810) > at > org.apache.calcite.tools.Frameworks.lambda$withPlanner$0(Frameworks.java:130) > at > org.apache.calcite.prepare.CalcitePrepareImpl.perform(CalcitePrepareImpl.java:915) > at org.apache.calcite.tools.Frameworks.withPrepare(Frameworks.java:179) > at org.apache.calcite.tools.Frameworks.withPlanner(Frameworks.java:125) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.logicalPlan(CalcitePlanner.java:1571) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:562) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12538) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:456) > at > org.apache.hadoop.hive.ql.ddl.view.materialized.alter.rebuild.AlterMaterializedViewRebuildAnalyzer.analyzeInternal(AlterMaterializedViewRebuildAnalyzer.java:89) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:315) > at > org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer.analyzeInternal(ExplainSemanticAnalyzer.java:171) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:315) > at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:223) > at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:104) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:492) > at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:445) > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:409) > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:403) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:125) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:229) > at >