[GitHub] ashutoshc commented on a change in pull request #552: Hive 21279
ashutoshc commented on a change in pull request #552: Hive 21279 URL: https://github.com/apache/hive/pull/552#discussion_r260595328 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ## @@ -228,7 +229,7 @@ public static InputFormat getInputFormatFromCache( inputFormats.put(inputFormatClass.getName(), format); } catch (Exception e) { throw new IOException("Cannot create an instance of InputFormat class " -+ inputFormatClass.getName() + " as specified in mapredWork!", e); + + inputFormatClass.getName() + " as specified in mapredWork!", e); Review comment: unnecessary ws change. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ashutoshc commented on a change in pull request #552: Hive 21279
ashutoshc commented on a change in pull request #552: Hive 21279 URL: https://github.com/apache/hive/pull/552#discussion_r260597741 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ## @@ -1476,42 +1500,31 @@ private static String replaceTaskIdFromFilename(String filename, String oldTaskI } public static void mvFileToFinalPath(Path specPath, Configuration hconf, - boolean success, Logger log, DynamicPartitionCtx dpCtx, FileSinkDesc conf, - Reporter reporter) throws IOException, + boolean success, Logger log, DynamicPartitionCtx dpCtx, FileSinkDesc conf, + Reporter reporter) throws IOException, HiveException { -// -// Runaway task attempts (which are unable to be killed by MR/YARN) can cause HIVE-17113, -// where they can write duplicate output files to tmpPath after de-duplicating the files, -// but before tmpPath is moved to specPath. -// Fixing this issue will be done differently for blobstore (e.g. S3) -// vs non-blobstore (local filesystem, HDFS) filesystems due to differences in -// implementation - a directory move in a blobstore effectively results in file-by-file -// moves for every file in a directory, while in HDFS/localFS a directory move is just a -// single filesystem operation. -// - For non-blobstore FS, do the following: -// 1) Rename tmpPath to a new directory name to prevent additional files -// from being added by runaway processes. -// 2) Remove duplicates from the temp directory -// 3) Rename/move the temp directory to specPath -// -// - For blobstore FS, do the following: -// 1) Remove duplicates from tmpPath -// 2) Use moveSpecifiedFiles() to perform a file-by-file move of the de-duped files -// to specPath. On blobstore FS, assuming n files in the directory, this results -// in n file moves, compared to 2*n file moves with the previous solution -// (each directory move would result in a file-by-file move of the files in the directory) -// +// There are following two paths this could could take based on the value of shouldAvoidRename Review comment: Rest of the earlier comment still applies for true. we can retain that. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ashutoshc commented on a change in pull request #552: Hive 21279
ashutoshc commented on a change in pull request #552: Hive 21279 URL: https://github.com/apache/hive/pull/552#discussion_r260598413 ## File path: ql/src/java/org/apache/hadoop/hive/ql/io/HiveSequenceFileInputFormat.java ## @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.SequenceFileInputFormat; + +/** + * HiveSequenceFileInputFormat. + * This input format is used by Fetch Operator. This input format does list status + *on list of files (kept in listsToFetch) instead of doing list on whole directory + *as done by previously used SequenceFileFormat. + *To use this FileFormat make sure to provide the list of files + * @param + * @param + */ +public class HiveSequenceFileInputFormat +extends SequenceFileInputFormat { + + public HiveSequenceFileInputFormat() { +setMinSplitSize(SequenceFile.SYNC_INTERVAL); + } + + Set listsToFetch = null; + + public void setListsToFetch(Set listsToFetch) { +this.listsToFetch = listsToFetch; + } + + @Override + protected FileStatus[] listStatus(JobConf job) throws IOException { +if(listsToFetch == null || listsToFetch.isEmpty()) { Review comment: Comment : this input format may also be used in fetch task (select without job) in which case make it behave like SequenceFileIF. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ashutoshc commented on a change in pull request #552: Hive 21279
ashutoshc commented on a change in pull request #552: Hive 21279 URL: https://github.com/apache/hive/pull/552#discussion_r260595146 ## File path: hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java ## @@ -436,6 +437,7 @@ public static void runWorker(HiveConf hiveConf) throws Exception { // stream data into streaming table with N buckets, then copy the data into another bucketed table // check if bucketing in both was done in the same way @Test + @Ignore Review comment: Can you please create a follow-up jira to re-enable this test? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ashutoshc commented on a change in pull request #552: Hive 21279
ashutoshc commented on a change in pull request #552: Hive 21279 URL: https://github.com/apache/hive/pull/552#discussion_r260595310 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ## @@ -215,7 +216,7 @@ public void setWork(FetchWork work) { @SuppressWarnings("unchecked") public static InputFormat getInputFormatFromCache( -Class inputFormatClass, Configuration conf) throws IOException { + Class inputFormatClass, Configuration conf) throws IOException { Review comment: unnecessary ws change. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ashutoshc commented on a change in pull request #552: Hive 21279
ashutoshc commented on a change in pull request #552: Hive 21279 URL: https://github.com/apache/hive/pull/552#discussion_r260598999 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ## @@ -1538,18 +1551,14 @@ public static void mvFileToFinalPath(Path specPath, Configuration hconf, // move to the file destination Utilities.FILE_OP_LOGGER.trace("Moving tmp dir: {} to: {}", tmpPath, specPath); - -perfLogger.PerfLogBegin("FileSinkOperator", "RenameOrMoveFiles"); -if (isBlobStorage) { - // HIVE-17113 - avoid copying files that may have been written to the temp dir by runaway tasks, - // by moving just the files we've tracked from removeTempOrDuplicateFiles(). - Utilities.moveSpecifiedFiles(fs, tmpPath, specPath, filesKept); +if(shouldAvoidRename(conf, hconf)){ + LOG.debug("Skipping rename/move files. Files to be kept are: " + filesKept.toString()); + conf.getFilesToFetch().addAll(filesKept); Review comment: We already have statusList. I am wondering using that instead of filesKept will let us avoid doing listStatus() call later. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ashutoshc commented on a change in pull request #552: Hive 21279
ashutoshc commented on a change in pull request #552: Hive 21279 URL: https://github.com/apache/hive/pull/552#discussion_r260600921 ## File path: ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ## @@ -204,6 +219,16 @@ public void compile(final ParseContext pCtx, fetch.setIsUsingThriftJDBCBinarySerDe(false); } + Collection> tableScanOps = + Lists.>newArrayList(pCtx.getTopOps().values()); + Set fsOps = OperatorUtils.findOperators(tableScanOps, FileSinkOperator.class); + if(fsOps != null && fsOps.size() == 1) { +FileSinkOperator op = fsOps.iterator().next(); +Set filesToFetch = new HashSet<>(); +op.getConf().setFilesToFetch(filesToFetch); Review comment: I am not sure how this works. filesToFetch field is populated in tasks. Having this same reference won't help, because plan (and FilesinkDesc) is sent to cluster from HS2, but any changes in plan won't reflect here after job is finished on cluster. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ashutoshc commented on a change in pull request #552: Hive 21279
ashutoshc commented on a change in pull request #552: Hive 21279 URL: https://github.com/apache/hive/pull/552#discussion_r260599097 ## File path: ql/src/java/org/apache/hadoop/hive/ql/io/HiveSequenceFileInputFormat.java ## @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.SequenceFileInputFormat; + +/** + * HiveSequenceFileInputFormat. + * This input format is used by Fetch Operator. This input format does list status + *on list of files (kept in listsToFetch) instead of doing list on whole directory + *as done by previously used SequenceFileFormat. + *To use this FileFormat make sure to provide the list of files + * @param + * @param + */ +public class HiveSequenceFileInputFormat +extends SequenceFileInputFormat { + + public HiveSequenceFileInputFormat() { +setMinSplitSize(SequenceFile.SYNC_INTERVAL); + } + + Set listsToFetch = null; + + public void setListsToFetch(Set listsToFetch) { +this.listsToFetch = listsToFetch; + } + + @Override + protected FileStatus[] listStatus(JobConf job) throws IOException { +if(listsToFetch == null || listsToFetch.isEmpty()) { + return super.listStatus(job); +} +List fsStatusList = new ArrayList<>(); +for(Path path:listsToFetch) { + FileSystem fs = path.getFileSystem(job); + FileStatus fsStatus = fs.getFileStatus(path); Review comment: I think we can avoid this call, if we can carry over statuslist from FS operator. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ashutoshc commented on a change in pull request #552: Hive 21279
ashutoshc commented on a change in pull request #552: Hive 21279 URL: https://github.com/apache/hive/pull/552#discussion_r260600632 ## File path: ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ## @@ -204,6 +219,16 @@ public void compile(final ParseContext pCtx, fetch.setIsUsingThriftJDBCBinarySerDe(false); } + Collection> tableScanOps = Review comment: Can you please add comments for this? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ashutoshc commented on a change in pull request #552: Hive 21279
ashutoshc commented on a change in pull request #552: Hive 21279 URL: https://github.com/apache/hive/pull/552#discussion_r260598077 ## File path: ql/src/java/org/apache/hadoop/hive/ql/io/HiveSequenceFileInputFormat.java ## @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.SequenceFileInputFormat; + +/** + * HiveSequenceFileInputFormat. + * This input format is used by Fetch Operator. This input format does list status + *on list of files (kept in listsToFetch) instead of doing list on whole directory + *as done by previously used SequenceFileFormat. + *To use this FileFormat make sure to provide the list of files + * @param + * @param + */ +public class HiveSequenceFileInputFormat +extends SequenceFileInputFormat { + + public HiveSequenceFileInputFormat() { +setMinSplitSize(SequenceFile.SYNC_INTERVAL); + } + + Set listsToFetch = null; + + public void setListsToFetch(Set listsToFetch) { Review comment: Better name: setFiles This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ashutoshc commented on a change in pull request #552: Hive 21279
ashutoshc commented on a change in pull request #552: Hive 21279 URL: https://github.com/apache/hive/pull/552#discussion_r260599748 ## File path: ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ## @@ -204,6 +219,16 @@ public void compile(final ParseContext pCtx, fetch.setIsUsingThriftJDBCBinarySerDe(false); } + Collection> tableScanOps = Review comment: Please add comments for this. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ashutoshc commented on a change in pull request #552: Hive 21279
ashutoshc commented on a change in pull request #552: Hive 21279 URL: https://github.com/apache/hive/pull/552#discussion_r260597344 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ## @@ -1204,6 +1203,18 @@ private static void moveSpecifiedFiles(FileSystem fs, Path src, Path dst, Set filesToMove) + throws IOException, HiveException { +if (!fs.exists(dst)) { + fs.mkdirs(dst); Review comment: Can you check mkdirs() contract. I think if dst exists, it will return silently. i.e., no need to check for exists() first. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ashutoshc commented on a change in pull request #552: Hive 21279
ashutoshc commented on a change in pull request #552: Hive 21279 URL: https://github.com/apache/hive/pull/552#discussion_r260597516 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ## @@ -1463,6 +1474,19 @@ private static String replaceTaskIdFromFilename(String filename, String oldTaskI return snew.toString(); } + + public static boolean shouldAvoidRename(FileSinkDesc conf, Configuration hConf) { +// we are avoiding rename/move only if following conditions are met +// * execution engine is tez +// * query cache is disabled +// * if it is select query +if (conf != null && conf.getIsQuery() && conf.getFilesToFetch() != null Review comment: conf will never be null This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ashutoshc commented on a change in pull request #552: Hive 21279
ashutoshc commented on a change in pull request #552: Hive 21279 URL: https://github.com/apache/hive/pull/552#discussion_r260595720 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ## @@ -379,6 +387,11 @@ public boolean doNext(WritableComparable key, Writable value) throws IOException Class formatter = currDesc.getInputFileFormatClass(); Utilities.copyTableJobPropertiesToConf(currDesc.getTableDesc(), job); InputFormat inputFormat = getInputFormatFromCache(formatter, job); + if(inputFormat instanceof HiveSequenceFileInputFormat) { +// input format could be cached, in which case we need to reset the list of files to fetch Review comment: lets also add in comment that format could be cached because of resultset cache This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] rmsmani commented on issue #540: HIVE-21283 Synonyms for the existing functions
rmsmani commented on issue #540: HIVE-21283 Synonyms for the existing functions URL: https://github.com/apache/hive/pull/540#issuecomment-467731239 Hi @sankarh Can you please review the code and merge it This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[jira] [Created] (HIVE-21330) Bucketing id varies b/w data loaded through streaming apis and regular query
Vineet Garg created HIVE-21330: -- Summary: Bucketing id varies b/w data loaded through streaming apis and regular query Key: HIVE-21330 URL: https://issues.apache.org/jira/browse/HIVE-21330 Project: Hive Issue Type: Bug Reporter: Vineet Garg The test at [https://github.com/apache/hive/blob/master/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java#L439] tests for this case. It currently passes but for the wrong reason. This test checks for empty result set. Result sets are empty due to prior INSERT failing to load data not because the bucketing scheme is different. This error with INSERT is fixed in https://github.com/apache/hive/pull/552. Test with this patch fails because the underlying bucketing ids generated are different. These tests are run on MR instead of TEZ which could explain the different bucketing ids. I don't really know what are the repercussion of having different bucketing ids and why are they expected to be same but since there is a test to test this logic it is worth investigating the case. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Created] (HIVE-21329) Custom Tez runtime unordered output buffer size depending on operator pipeline
Jesus Camacho Rodriguez created HIVE-21329: -- Summary: Custom Tez runtime unordered output buffer size depending on operator pipeline Key: HIVE-21329 URL: https://issues.apache.org/jira/browse/HIVE-21329 Project: Hive Issue Type: Improvement Components: Tez Reporter: Jesus Camacho Rodriguez Assignee: Jesus Camacho Rodriguez For instance, if we have a reduce sink operator with no keys followed by a Group By (merge partial), we can decrease the output buffer size since we will only produce a single row. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[GitHub] vineetgarg02 opened a new pull request #552: Hive 21279
vineetgarg02 opened a new pull request #552: Hive 21279 URL: https://github.com/apache/hive/pull/552 This patch avoids rename/move (to tmpPath) during File Sink operation and creates a list of file to pass it over to Fetch operator to fetch from. In context of cloud file system file I/Os are expensive so avoiding even a single operation provides sufficient boost. Internal experiments show more than 50% boost in fetch result performance. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] rmsmani commented on issue #388: HIVE-20057: Fix Hive table conversion DESCRIBE table bug
rmsmani commented on issue #388: HIVE-20057: Fix Hive table conversion DESCRIBE table bug URL: https://github.com/apache/hive/pull/388#issuecomment-467627229 Hi @ashutosh-bapat, @sankarh Can you merge the code This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] rmsmani commented on issue #534: HIVE-21270: A UDTF to show schema (column names and types) of given q…
rmsmani commented on issue #534: HIVE-21270: A UDTF to show schema (column names and types) of given q… URL: https://github.com/apache/hive/pull/534#issuecomment-467626893 Hi @ashutosh-bapat, @sankarh Can you merge the code This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] rmsmani edited a comment on issue #540: HIVE-21283 Synonyms for the existing functions
rmsmani edited a comment on issue #540: HIVE-21283 Synonyms for the existing functions URL: https://github.com/apache/hive/pull/540#issuecomment-467625879 Hi @jcamachor, @ashutosh-bapat, @sankarh Can you please review the code and merge it This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] rmsmani edited a comment on issue #540: HIVE-21283 Synonyms for the existing functions
rmsmani edited a comment on issue #540: HIVE-21283 Synonyms for the existing functions URL: https://github.com/apache/hive/pull/540#issuecomment-467625879 Hi @jcamachor Can you please review the code and merge it This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] rmsmani commented on issue #540: HIVE-21283 Synonyms for the existing functions
rmsmani commented on issue #540: HIVE-21283 Synonyms for the existing functions URL: https://github.com/apache/hive/pull/540#issuecomment-467625879 Hi @jcamachor, @vgarg Can you please review the code and merge it This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[jira] [Created] (HIVE-21328) Call To Hadoop Text getBytes() Without Call to getLength()
BELUGA BEHR created HIVE-21328: -- Summary: Call To Hadoop Text getBytes() Without Call to getLength() Key: HIVE-21328 URL: https://issues.apache.org/jira/browse/HIVE-21328 Project: Hive Issue Type: Bug Components: Query Planning Affects Versions: 4.0.0, 3.2.0 Reporter: BELUGA BEHR I'm not sure if there is actually a bug, but this looks highly suspect: {code:java} public Object set(final Object o, final Text text) { return new BytesWritable(text == null ? null : text.getBytes()); } {code} https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java#L104-L106 There are two components to a Text object. There are the internal bytes and the length of the bytes. The two are independent. I.e., a quick "reset" on the Text object simply sets the internal length counter to zero. This code is potentially looking at obsolete data that it shouldn't be seeing because it is not considering the length of the Text. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[GitHub] sankarh opened a new pull request #551: HIVE-21286: Hive should support clean-up of previously bootstrapped tables when retry from different dump.
sankarh opened a new pull request #551: HIVE-21286: Hive should support clean-up of previously bootstrapped tables when retry from different dump. URL: https://github.com/apache/hive/pull/551 This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] miklosgergely commented on a change in pull request #544: HIVE-16924 Support distinct in presence of Group By
miklosgergely commented on a change in pull request #544: HIVE-16924 Support distinct in presence of Group By URL: https://github.com/apache/hive/pull/544#discussion_r260387274 ## File path: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ## @@ -4194,27 +4191,29 @@ public static long unsetBit(long bitmap, int bitIdx) { } /** - * This function is a wrapper of parseInfo.getGroupByForClause which - * automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY - * a,b,c. + * Returns the GBY, if present; + * DISTINCT, if present, will be handled when generating the SELECT. */ List getGroupByForClause(QBParseInfo parseInfo, String dest) throws SemanticException { -if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) { - ASTNode selectExprs = parseInfo.getSelForClause(dest); - List result = new ArrayList(selectExprs == null ? 0 - : selectExprs.getChildCount()); - if (selectExprs != null) { -for (int i = 0; i < selectExprs.getChildCount(); ++i) { - if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.QUERY_HINT) { +// When *not* invoked by CalcitePlanner, return the DISTINCT as a GBY +// CBO will handle the DISTINCT in CalcitePlannerAction.genSelectLogicalPlan +ASTNode selectExpr = parseInfo.getSelForClause(dest); +Collection aggregateFunction = parseInfo.getDestToAggregationExprs().get(dest).values(); +if (isSelectDistinct(selectExpr) && !isGroupBy(selectExpr) && !isAggregateInSelect(selectExpr, aggregateFunction)) { Review comment: as we agreed, distinct with aggregate function and with group by will be supported only if cbo is enabled. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[jira] [Created] (HIVE-21327) Predicate is not pushed to Parquet if hive.parquet.timestamp.skip.conversion=true
Marta Kuczora created HIVE-21327: Summary: Predicate is not pushed to Parquet if hive.parquet.timestamp.skip.conversion=true Key: HIVE-21327 URL: https://issues.apache.org/jira/browse/HIVE-21327 Project: Hive Issue Type: Bug Affects Versions: 4.0.0 Reporter: Marta Kuczora Assignee: Marta Kuczora -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[GitHub] pvary commented on issue #540: HIVE-21283 Synonyms for the existing functions
pvary commented on issue #540: HIVE-21283 Synonyms for the existing functions URL: https://github.com/apache/hive/pull/540#issuecomment-467430263 > Hi @pvary > Please review and merge the code Sorry, but my schedule is tight nowdays. You might be better off trying to find someone else to review your code than waiting for me to have time to review :( Sorry :( This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] pvary commented on issue #540: HIVE-21283 Synonyms for the existing functions
pvary commented on issue #540: HIVE-21283 Synonyms for the existing functions URL: https://github.com/apache/hive/pull/540#issuecomment-467430263 > Hi @pvary > Please review and merge the code Sorry, but my schedule is tight nowdays. You might be better off trying to find someone else to review your code than waiting for me to have time to review :( Sorry :( This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class
kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class URL: https://github.com/apache/hive/pull/550#discussion_r260256192 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/repl/incremental/IncrementalLoadTasksBuilder.java ## @@ -266,16 +271,15 @@ private boolean shouldReplayEvent(FileStatus dir, DumpType dumpType, String dbNa return updateReplIdTxnTask; } - private Task tableUpdateReplStateTask(String dbName, String tableName, -Map partSpec, String replState, -Task preCursor) throws SemanticException { + private Task tableUpdateReplStateTask(TableName tableName, Map partSpec, Review comment: now that I see this...I think that it would probably make sense to also include `partSpec` into this object (`PartishRef`: `catalog,database,tablename,partitonspec` ) ? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class
kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class URL: https://github.com/apache/hive/pull/550#discussion_r260253361 ## File path: hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java ## @@ -348,8 +348,8 @@ protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext cntxt, Hive hive AlterTableDesc alterTable = work.getAlterTblDesc(); if (alterTable != null) { - Table table = hive.getTable(SessionState.get().getCurrentDatabase(), - Utilities.getDbTableName(alterTable.getOldName())[1], false); + final String tableName = Utilities.getTableName(alterTable.getOldName()).getTable(); Review comment: I guess we can't add `of` methods to `TableName`; but I feel like putting this into "Utilities" is kinda unfortunate. How about: `HiveTableName.of(...)` If you need any utilities you can probably make HiveTableName extend TableName - not sure if that will be needed or not This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class
kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class URL: https://github.com/apache/hive/pull/550#discussion_r260253836 ## File path: hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java ## @@ -348,8 +348,8 @@ protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext cntxt, Hive hive AlterTableDesc alterTable = work.getAlterTblDesc(); if (alterTable != null) { - Table table = hive.getTable(SessionState.get().getCurrentDatabase(), - Utilities.getDbTableName(alterTable.getOldName())[1], false); + final String tableName = Utilities.getTableName(alterTable.getOldName()).getTable(); + Table table = hive.getTable(SessionState.get().getCurrentDatabase(), tableName, false); Review comment: `TableName` contains the database name; `SessionState.get().getCurrentDatabase()` should appear only at the time of creating the `TableName` - so as a sideeffect this "getTable()" will expect `TableName` argument This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class
kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class URL: https://github.com/apache/hive/pull/550#discussion_r260253836 ## File path: hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java ## @@ -348,8 +348,8 @@ protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext cntxt, Hive hive AlterTableDesc alterTable = work.getAlterTblDesc(); if (alterTable != null) { - Table table = hive.getTable(SessionState.get().getCurrentDatabase(), - Utilities.getDbTableName(alterTable.getOldName())[1], false); + final String tableName = Utilities.getTableName(alterTable.getOldName()).getTable(); + Table table = hive.getTable(SessionState.get().getCurrentDatabase(), tableName, false); Review comment: `TableName` contains the database name; `SessionState.get().getCurrentDatabase()` should appear only at the time of creating the `TableName` - so as a sideeffect this "getTable()" will expect `TableName` argument This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class
kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class URL: https://github.com/apache/hive/pull/550#discussion_r260254455 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java ## @@ -86,8 +86,8 @@ private ColumnStatistics constructColumnStatsFromInput() colStats.getStatsDesc().getTableName()); return colStats; } -String dbName = work.dbName(); -String tableName = work.getTableName(); +final String dbName = work.dbName(); Review comment: the "work" should have method(and corresponding field) `getTableName` instead of these 2 getters This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class
kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class URL: https://github.com/apache/hive/pull/550#discussion_r260254184 ## File path: ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java ## @@ -651,7 +652,7 @@ public void notifyTableChanged(String dbName, String tableName, long updateTime) List entriesToInvalidate = null; rwLock.writeLock().lock(); try { - String key = (dbName.toLowerCase() + "." + tableName.toLowerCase()); + String key = TableName.getDbTable(dbName, tableName).toLowerCase(); Review comment: I would think that at this point we should already have a tablename from somewhere; constructing it here doesn't really make sense This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class
kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class URL: https://github.com/apache/hive/pull/550#discussion_r260254184 ## File path: ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java ## @@ -651,7 +652,7 @@ public void notifyTableChanged(String dbName, String tableName, long updateTime) List entriesToInvalidate = null; rwLock.writeLock().lock(); try { - String key = (dbName.toLowerCase() + "." + tableName.toLowerCase()); + String key = TableName.getDbTable(dbName, tableName).toLowerCase(); Review comment: I would think that at this point we should already have a tablename from somewhere; constructing it here doesn't really make sense This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class
kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class URL: https://github.com/apache/hive/pull/550#discussion_r260256192 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/repl/incremental/IncrementalLoadTasksBuilder.java ## @@ -266,16 +271,15 @@ private boolean shouldReplayEvent(FileStatus dir, DumpType dumpType, String dbNa return updateReplIdTxnTask; } - private Task tableUpdateReplStateTask(String dbName, String tableName, -Map partSpec, String replState, -Task preCursor) throws SemanticException { + private Task tableUpdateReplStateTask(TableName tableName, Map partSpec, Review comment: now that I see this...I think that it would probably make sense to also include `partSpec` into this object (`PartishRef`: `catalog,database,tablename,partitonspec` ) ? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class
kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class URL: https://github.com/apache/hive/pull/550#discussion_r260253361 ## File path: hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java ## @@ -348,8 +348,8 @@ protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext cntxt, Hive hive AlterTableDesc alterTable = work.getAlterTblDesc(); if (alterTable != null) { - Table table = hive.getTable(SessionState.get().getCurrentDatabase(), - Utilities.getDbTableName(alterTable.getOldName())[1], false); + final String tableName = Utilities.getTableName(alterTable.getOldName()).getTable(); Review comment: I guess we can't add `of` methods to `TableName`; but I feel like putting this into "Utilities" is kinda unfortunate. How about: `HiveTableName.of(...)` If you need any utilities you can probably make HiveTableName extend TableName - not sure if that will be needed or not This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class
kgyrtkirk commented on a change in pull request #550: HIVE-21198 Introduce a database object reference class URL: https://github.com/apache/hive/pull/550#discussion_r260254455 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java ## @@ -86,8 +86,8 @@ private ColumnStatistics constructColumnStatsFromInput() colStats.getStatsDesc().getTableName()); return colStats; } -String dbName = work.dbName(); -String tableName = work.getTableName(); +final String dbName = work.dbName(); Review comment: the "work" should have method(and corresponding field) `getTableName` instead of these 2 getters This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[jira] [Created] (HIVE-21326) Resurrect counter based stats collector
Zoltan Haindrich created HIVE-21326: --- Summary: Resurrect counter based stats collector Key: HIVE-21326 URL: https://issues.apache.org/jira/browse/HIVE-21326 Project: Hive Issue Type: Improvement Reporter: Zoltan Haindrich HIVE-6500 added it and HIVE-12411 have removed this feature. >From the comments/etc I think around that time M/R also had this kind of >feature - but it was less mature then it is right now. As right now we are already utilizing tez counters a lot with no noticeable backstab - I think it would make sense to "resurrect" this feature. https://github.com/apache/hive/blob/2daaed73de09b4b7987fd2d682140d7ee5637640/ql/src/test/results/clientpositive/llap/tez_input_counters.q.out#L2750 -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Created] (HIVE-21325) Hive external table replication failed with Permission denied issue.
mahesh kumar behera created HIVE-21325: -- Summary: Hive external table replication failed with Permission denied issue. Key: HIVE-21325 URL: https://issues.apache.org/jira/browse/HIVE-21325 Project: Hive Issue Type: Bug Affects Versions: 4.0.0 Reporter: mahesh kumar behera Assignee: mahesh kumar behera Fix For: 4.0.0 During external table replication the file copy is done in parallel to the meta data replication. If the file copy task creates the directory with do as set to true, it will create the directory with permission set to the user running the repl command. In that case the meta data task while creating the table may fail as hive user might not have access to the created directory. The fix should be # While creating directory, if sql based authentication is enabled, then disable storage based authentication for hive user. # Currently the created directory has the login user access, it should retain the source clusters owner, group and permission. # For external table replication don't create the directory during create table and add partition. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[GitHub] maheshk114 commented on a change in pull request #541: HIVE-21197 : Hive Replication can add duplicate data during migration to a target with hive.strict.managed.tables enabled
maheshk114 commented on a change in pull request #541: HIVE-21197 : Hive Replication can add duplicate data during migration to a target with hive.strict.managed.tables enabled URL: https://github.com/apache/hive/pull/541#discussion_r260172472 ## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ## @@ -5225,7 +5226,8 @@ private int updateFirstIncPendingFlag(Hive hive, ReplSetFirstIncLoadFlagDesc des for (String dbName : Utils.matchesDb(hive, dbNameOrPattern)) { Database database = hive.getMSC().getDatabase(dbName); parameters = database.getParameters(); -if (ReplUtils.isFirstIncPending(parameters)) { +String incPendPara = parameters != null ? parameters.get(ReplUtils.REPL_FIRST_INC_PENDING_FLAG) : null; +if (incPendPara != null && (!flag.equalsIgnoreCase(incPendPara))) { Review comment: changed to ReplRemoveFirstIncLoadPendFlagDesc ..now just remove if the property is set This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[jira] [Created] (HIVE-21324) Sometimes TestTriggersTezSessionPoolManager generates a lot of Exceptions
Zoltan Haindrich created HIVE-21324: --- Summary: Sometimes TestTriggersTezSessionPoolManager generates a lot of Exceptions Key: HIVE-21324 URL: https://issues.apache.org/jira/browse/HIVE-21324 Project: Hive Issue Type: Bug Reporter: Zoltan Haindrich in a recent ptest run I've noticed that the TestTriggersTezSessionPoolManager's junit xml is 2.5G! this is most probably also the reason behind why some ptest executions doesn't have test report in jenkins the standard error is flooded with the following exception; logged every ~2ms {code} 2019-02-25T09:52:22,115 WARN [HiveServer2-Background-Pool: Thread-6217] ipc.Client: interrupted waiting to send rpc request to server java.lang.InterruptedException: null at java.util.concurrent.FutureTask.awaitDone(FutureTask.java:404) ~[?:1.8.0_102] at java.util.concurrent.FutureTask.get(FutureTask.java:191) ~[?:1.8.0_102] at org.apache.hadoop.ipc.Client$Connection.sendRpcRequest(Client.java:1140) ~[hadoop-common-3.1.0.jar:?] at org.apache.hadoop.ipc.Client.call(Client.java:1389) [hadoop-common-3.1.0.jar:?] at org.apache.hadoop.ipc.Client.call(Client.java:1347) [hadoop-common-3.1.0.jar:?] at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:228) [hadoop-common-3.1.0.jar:?] at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:116) [hadoop-common-3.1.0.jar:?] at com.sun.proxy.$Proxy134.getApplicationReport(Unknown Source) [?:?] at org.apache.hadoop.yarn.api.impl.pb.client.ApplicationClientProtocolPBClientImpl.getApplicationReport(ApplicationClientProtocolPBClientImpl.java:244) [hadoop-yarn-common-3.1.0.jar:?] at sun.reflect.GeneratedMethodAccessor60.invoke(Unknown Source) ~[?:?] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_102] at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_102] at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422) [hadoop-common-3.1.0.jar:?] at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165) [hadoop-common-3.1.0.jar:?] at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157) [hadoop-common-3.1.0.jar:?] at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95) [hadoop-common-3.1.0.jar:?] at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359) [hadoop-common-3.1.0.jar:?] at com.sun.proxy.$Proxy135.getApplicationReport(Unknown Source) [?:?] at org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.getApplicationReport(YarnClientImpl.java:512) [hadoop-yarn-client-3.1.0.jar:?] at org.apache.tez.client.TezYarnClient.getApplicationReport(TezYarnClient.java:94) [tez-api-0.9.1.jar:0.9.1] at org.apache.tez.dag.api.client.rpc.DAGClientRPCImpl.getAppReport(DAGClientRPCImpl.java:245) [tez-api-0.9.1.jar:0.9.1] at org.apache.tez.dag.api.client.rpc.DAGClientRPCImpl.createAMProxyIfNeeded(DAGClientRPCImpl.java:270) [tez-api-0.9.1.jar:0.9.1] at org.apache.tez.dag.api.client.rpc.DAGClientRPCImpl.getDAGStatus(DAGClientRPCImpl.java:95) [tez-api-0.9.1.jar:0.9.1] at org.apache.tez.dag.api.client.DAGClientImpl.getDAGStatusViaAM(DAGClientImpl.java:371) [tez-api-0.9.1.jar:0.9.1] at org.apache.tez.dag.api.client.DAGClientImpl.getDAGStatusInternal(DAGClientImpl.java:221) [tez-api-0.9.1.jar:0.9.1] at org.apache.tez.dag.api.client.DAGClientImpl.getDAGStatus(DAGClientImpl.java:208) [tez-api-0.9.1.jar:0.9.1] at org.apache.tez.dag.api.client.DAGClientImpl._waitForCompletionWithStatusUpdates(DAGClientImpl.java:540) [tez-api-0.9.1.jar:0.9.1] at org.apache.tez.dag.api.client.DAGClientImpl.waitForCompletion(DAGClientImpl.java:342) [tez-api-0.9.1.jar:0.9.1] at org.apache.hadoop.hive.ql.exec.tez.TezTask$SyncDagClient.waitForCompletion(TezTask.java:757) [hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.tez.TezTask.closeDagClientOnCancellation(TezTask.java:339) [hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.tez.TezTask.execute(TezTask.java:231) [hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:212) [hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:97) [hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT] at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2709) [hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT] at