This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new c104e8b HIVE-21924: Split text files even if header/footer exists (Mustafa Iman, reviewed by Sankar Hariappan) c104e8b is described below commit c104e8bfab957702dc012a8b4ac6a944669f8a3a Author: Mustafa Iman <must...@cloudera.com> AuthorDate: Wed Oct 9 22:54:10 2019 +0530 HIVE-21924: Split text files even if header/footer exists (Mustafa Iman, reviewed by Sankar Hariappan) Signed-off-by: Sankar Hariappan <sank...@apache.org> --- data/files/header_footer_table_4/0001.txt | 7 + data/files/header_footer_table_4/0002.txt | 7 + .../test/resources/testconfiguration.properties | 2 + .../hive/ql/io/HiveContextAwareRecordReader.java | 3 +- .../apache/hadoop/hive/ql/io/HiveInputFormat.java | 58 +- .../hadoop/hive/ql/io/SkippingTextInputFormat.java | 224 ++++++ .../apache/hadoop/hive/ql/io/LineBufferTest.java | 66 ++ .../file_with_header_footer_aggregation.q | 98 +++ ql/src/test/queries/clientpositive/skiphf_aggr2.q | 25 + .../llap/file_with_header_footer_aggregation.q.out | 849 +++++++++++++++++++++ .../results/clientpositive/llap/skiphf_aggr2.q.out | 123 +++ 11 files changed, 1436 insertions(+), 26 deletions(-) diff --git a/data/files/header_footer_table_4/0001.txt b/data/files/header_footer_table_4/0001.txt new file mode 100644 index 0000000..878e3cc --- /dev/null +++ b/data/files/header_footer_table_4/0001.txt @@ -0,0 +1,7 @@ +header_int,header_name,header_choice +12,alex,daily +3,barry,yearly +5,chelsea,monthly +8,xavier,monthly +footer_int,footer_name,footer_choice +footer2_int,footer2_name,footer2_choice \ No newline at end of file diff --git a/data/files/header_footer_table_4/0002.txt b/data/files/header_footer_table_4/0002.txt new file mode 100644 index 0000000..dd27f0e --- /dev/null +++ b/data/files/header_footer_table_4/0002.txt @@ -0,0 +1,7 @@ +header_int,header_name,header_choice +9,derek,yearly +11,ethan,monthly +1,faith,yearly +21,yves,daily +footer_int,footer_name,footer_choice +footer2_int,footer2_name,footer2_choice \ No newline at end of file diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index eb7bcab..0d5ef33 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -398,6 +398,7 @@ minillap.query.files=acid_bucket_pruning.q,\ reduce_deduplicate_distinct.q, \ remote_script.q,\ file_with_header_footer.q,\ + file_with_header_footer_aggregation.q,\ external_table_purge.q,\ external_table_with_space_in_location_path.q,\ import_exported_table.q,\ @@ -956,6 +957,7 @@ minillaplocal.query.files=\ smb_mapjoin_15.q,\ vectorized_nested_mapjoin.q,\ skiphf_aggr.q,\ + skiphf_aggr2.q,\ multi_insert_lateral_view.q,\ smb_mapjoin_4.q,\ cbo_udf_udaf.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java index 7f3ef37..38b226f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import org.apache.hadoop.mapred.TextInputFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileSystem; @@ -340,7 +341,7 @@ public abstract class HiveContextAwareRecordReader<K extends WritableComparable, part = null; } TableDesc table = (part == null) ? null : part.getTableDesc(); - if (table != null) { + if (table != null && !TextInputFormat.class.isAssignableFrom(part.getInputFileFormatClass())) { headerCount = Utilities.getHeaderCount(table); footerCount = Utilities.getFooterCount(table, jobConf); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 7e71c77..c97c961 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -18,40 +18,22 @@ package org.apache.hadoop.hive.ql.io; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.StringInternUtils; -import org.apache.hadoop.hive.common.ValidTxnWriteIdList; -import org.apache.hadoop.hive.common.ValidWriteIdList; -import org.apache.hadoop.hive.ql.exec.SerializationUtilities; -import org.apache.hive.common.util.HiveStringUtils; -import org.apache.hive.common.util.Ref; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.StringInternUtils; +import org.apache.hadoop.hive.common.ValidTxnWriteIdList; +import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil; import org.apache.hadoop.hive.llap.io.api.LlapIo; import org.apache.hadoop.hive.llap.io.api.LlapProxy; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -77,8 +59,27 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobConfigurable; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.util.StringUtils; +import org.apache.hive.common.util.HiveStringUtils; +import org.apache.hive.common.util.Ref; import org.apache.hive.common.util.ReflectionUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; /** * HiveInputFormat is a parameterized InputFormat which looks at the path name @@ -497,8 +498,15 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable> headerCount = Utilities.getHeaderCount(table); footerCount = Utilities.getFooterCount(table, conf); if (headerCount != 0 || footerCount != 0) { - // Input file has header or footer, cannot be splitted. - HiveConf.setLongVar(conf, ConfVars.MAPREDMINSPLITSIZE, Long.MAX_VALUE); + if (TextInputFormat.class.isAssignableFrom(inputFormatClass)) { + SkippingTextInputFormat skippingTextInputFormat = new SkippingTextInputFormat(); + skippingTextInputFormat.configure(conf, headerCount, footerCount); + inputFormat = skippingTextInputFormat; + } else { + // if the input is not text and contains header/footer we have no way of + // splitting them. + HiveConf.setLongVar(conf, ConfVars.MAPREDMINSPLITSIZE, Long.MAX_VALUE); + } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/SkippingTextInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/SkippingTextInputFormat.java new file mode 100644 index 0000000..7e936d5 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/SkippingTextInputFormat.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.TextInputFormat; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.Map; +import java.util.Queue; +import java.util.concurrent.ConcurrentHashMap; + +/** + * SkippingInputFormat is a header/footer aware input format. It truncates + * splits identified by TextInputFormat. Header and footers are removed + * from the splits. + */ +public class SkippingTextInputFormat extends TextInputFormat { + + private final Map<Path, Long> startIndexMap = new ConcurrentHashMap<Path, Long>(); + private final Map<Path, Long> endIndexMap = new ConcurrentHashMap<Path, Long>(); + private JobConf conf; + private int headerCount; + private int footerCount; + + @Override + public void configure(JobConf conf) { + this.conf = conf; + super.configure(conf); + } + + public void configure(JobConf conf, int headerCount, int footerCount) { + configure(conf); + this.headerCount = headerCount; + this.footerCount = footerCount; + } + + @Override + protected FileSplit makeSplit(Path file, long start, long length, String[] hosts) { + return makeSplitInternal(file, start, length, hosts, null); + } + + @Override + protected FileSplit makeSplit(Path file, long start, long length, String[] hosts, String[] inMemoryHosts) { + return makeSplitInternal(file, start, length, hosts, inMemoryHosts); + } + + private FileSplit makeSplitInternal(Path file, long start, long length, String[] hosts, String[] inMemoryHosts) { + long cachedStart; + long cachedEnd; + try { + cachedStart = getCachedStartIndex(file); + cachedEnd = getCachedEndIndex(file); + } catch (IOException e) { + LOG.warn("Could not detect header/footer", e); + return new NullRowsInputFormat.DummyInputSplit(file); + } + if (cachedStart > start + length) { + return new NullRowsInputFormat.DummyInputSplit(file); + } + if (cachedStart > start) { + length = length - (cachedStart - start); + start = cachedStart; + } + if (cachedEnd < start) { + return new NullRowsInputFormat.DummyInputSplit(file); + } + if (cachedEnd < start + length) { + length = cachedEnd - start; + } + if (inMemoryHosts == null) { + return super.makeSplit(file, start, length, hosts); + } else { + return super.makeSplit(file, start, length, hosts, inMemoryHosts); + } + } + + private long getCachedStartIndex(Path path) throws IOException { + if (headerCount == 0) { + return 0; + } + Long startIndexForFile = startIndexMap.get(path); + if (startIndexForFile == null) { + FileSystem fileSystem; + FSDataInputStream fis = null; + fileSystem = path.getFileSystem(conf); + try { + fis = fileSystem.open(path); + for (int j = 0; j < headerCount; j++) { + if (fis.readLine() == null) { + startIndexMap.put(path, Long.MAX_VALUE); + return Long.MAX_VALUE; + } + } + // Readers skip the entire first row if the start index of the + // split is not zero. We are setting the start of the index as + // the last byte of the previous row so the last line of header + // is discarded instead of the first valid input row. + startIndexForFile = fis.getPos() - 1; + } finally { + if (fis != null) { + fis.close(); + } + } + startIndexMap.put(path, startIndexForFile); + } + return startIndexForFile; + } + + private long getCachedEndIndex(Path path) throws IOException { + Long endIndexForFile = endIndexMap.get(path); + if (endIndexForFile == null) { + final long bufferSectionSize = 5 * 1024; + FileSystem fileSystem = path.getFileSystem(conf); + long endOfFile = fileSystem.getFileStatus(path).getLen(); + if (footerCount == 0) { + endIndexForFile = endOfFile; + } else { + long bufferSectionEnd = endOfFile; // first byte that is not included in the section + long bufferSectionStart = Math.max(0, bufferSectionEnd - bufferSectionSize); + + // we need 'footer count' lines and one space for EOF + LineBuffer buffer = new LineBuffer(footerCount + 1); + FSDataInputStream fis = null; + try { + fis = fileSystem.open(path); + while (bufferSectionEnd > bufferSectionStart) { + fis.seek(bufferSectionStart); + long pos = fis.getPos(); + while (pos < bufferSectionEnd) { + if (fis.readLine() == null) { + // if there is not enough lines in this section, check the previous + // section. If this is the beginning section, there are simply not + // enough lines in the file. + break; + } + pos = fis.getPos(); + buffer.consume(pos, bufferSectionEnd); + } + if (buffer.getRemainingLineCount() == 0) { + // if we consumed all the required line ends, that means the buffer now + // contains the index of the first byte of the footer. + break; + } else { + bufferSectionEnd = bufferSectionStart; + bufferSectionStart = Math.max(0, bufferSectionEnd - bufferSectionSize); + } + } + if (buffer.getRemainingLineCount() == 0) { + // buffer.getFirstLineStart() is the first byte of the footer. So the split + // must end before this. + endIndexForFile = buffer.getFirstLineStart() - 1; + } else { + // there were not enough lines in the file to consume all footer rows. + endIndexForFile = Long.MIN_VALUE; + } + } finally { + if (fis != null) { + fis.close(); + } + } + } + endIndexMap.put(path, endIndexForFile); + } + return endIndexForFile; + } + + static class LineBuffer { + private final Queue<Long> queue = new ArrayDeque<Long>(); + private int remainingLineEnds; + private long lowPosition = Long.MAX_VALUE; + + LineBuffer(int requiredLines) { + this.remainingLineEnds = requiredLines; + } + + public void consume(long position, long sectionEnd) { + if (position > sectionEnd) { + return; + } + if (position < lowPosition) { + remainingLineEnds -= queue.size(); + queue.clear(); + queue.add(position); + lowPosition = position; + } else if (position > lowPosition) { + if (queue.size() == remainingLineEnds) { + queue.poll(); + } + queue.add(position); + lowPosition = queue.peek(); + } + } + + public int getRemainingLineCount() { + return remainingLineEnds - queue.size(); + } + + public long getFirstLineStart() { + return lowPosition; + } + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/LineBufferTest.java b/ql/src/test/org/apache/hadoop/hive/ql/io/LineBufferTest.java new file mode 100644 index 0000000..9cbdeef --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/LineBufferTest.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +/** + * LineEndBuffer simple unit test. + */ +public class LineBufferTest { + + @Test + public void testLineEndBuffer() { + SkippingTextInputFormat.LineBuffer buffer; + buffer = new SkippingTextInputFormat.LineBuffer(3); + buffer.consume(200, 200); + buffer.consume(100, 200); + buffer.consume(100, 100); + buffer.consume(50, 100); + assertEquals(0, buffer.getRemainingLineCount()); + assertEquals(50, buffer.getFirstLineStart()); + + buffer = new SkippingTextInputFormat.LineBuffer(3); + buffer.consume(200, 200); + buffer.consume(150, 200); + buffer.consume(100, 200); + assertEquals(0, buffer.getRemainingLineCount()); + assertEquals(100, buffer.getFirstLineStart()); + + buffer = new SkippingTextInputFormat.LineBuffer(3); + buffer.consume(200, 200); + assertEquals(2, buffer.getRemainingLineCount()); + buffer.consume(100, 100); + assertEquals(1, buffer.getRemainingLineCount()); + buffer.consume(50, 100); + assertEquals(0, buffer.getRemainingLineCount()); + assertEquals(50, buffer.getFirstLineStart()); + + buffer = new SkippingTextInputFormat.LineBuffer(3); + buffer.consume(200, 200); + assertEquals(2, buffer.getRemainingLineCount()); + buffer.consume(50, 100); + assertEquals(1, buffer.getRemainingLineCount()); + buffer.consume(25, 100); + assertEquals(0, buffer.getRemainingLineCount()); + assertEquals(25, buffer.getFirstLineStart()); + } +} diff --git a/ql/src/test/queries/clientpositive/file_with_header_footer_aggregation.q b/ql/src/test/queries/clientpositive/file_with_header_footer_aggregation.q new file mode 100644 index 0000000..f66e1dc --- /dev/null +++ b/ql/src/test/queries/clientpositive/file_with_header_footer_aggregation.q @@ -0,0 +1,98 @@ +set hive.mapred.mode=nonstrict; + +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}; +dfs -copyFromLocal ../../data/files/header_footer_table_4 ${system:test.tmp.dir}/header_footer_table_4; + +CREATE TABLE numbrs (numbr int); +INSERT INTO numbrs VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), (11), (12), (NULL); +CREATE EXTERNAL TABLE header_footer_table_4 (header_int int, header_name string, header_choice varchar(10)) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LOCATION '${system:test.tmp.dir}/header_footer_table_4' tblproperties ("skip.header.line.count"="1", "skip.footer.line.count"="2"); + +SELECT * FROM header_footer_table_4; + +SELECT * FROM header_footer_table_4 ORDER BY header_int LIMIT 8; + +-- should return nothing as title is correctly skipped +SELECT * FROM header_footer_table_4 WHERE header_choice = 'header_choice'; +SELECT * FROM header_footer_table_4 WHERE header_choice = 'monthly'; +SELECT COUNT(*) FROM header_footer_table_4; + +-- shpuld return nothing +SELECT * FROM header_footer_table_4 WHERE header_choice is NULL; + + +SELECT AVG(header_int) FROM header_footer_table_4 GROUP BY header_choice; + +-- should not include any header and footer +SELECT * FROM header_footer_table_4 A, header_footer_table_4 B ORDER BY A.header_int, B.header_int; + +-- no join variant should include header or footer +SELECT header_name, header_int FROM header_footer_table_4 LEFT JOIN numbrs ON numbr = header_int; +SELECT header_name, header_int FROM header_footer_table_4 RIGHT JOIN numbrs ON numbr = header_int; +SELECT header_name, header_int FROM header_footer_table_4 INNER JOIN numbrs ON numbr = header_int; +SELECT header_name, header_int FROM header_footer_table_4 FULL JOIN numbrs ON numbr = header_int; + + +SELECT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice; + + +SELECT DISTINCT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice; + + +SELECT header_name, header_choice FROM header_footer_table_4 ORDER BY header_int LIMIT 8; + +-- create table from existing text file should not feed header or footer to the new table +CREATE TABLE transition (title VARCHAR(10), name VARCHAR(10)) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' STORED AS TEXTFILE LOCATION '${system:test.tmp.dir}/header_footer_table_transition'; +INSERT INTO transition SELECT header_choice, header_name FROM header_footer_table_4; +SELECT * FROM transition A, transition B ORDER BY A.title, A.name, B.title, B.name; + +CREATE TABLE transition2 (header_choice VARCHAR(10), sum_header_int int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' STORED AS TEXTFILE LOCATION '${system:test.tmp.dir}/test/header_footer_table_transition2'; +INSERT INTO transition2 SELECT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice; +SELECT * FROM transition2 A, transition2 B ORDER BY A.sum_header_int, A.header_choice, B.sum_header_int, B.header_choice; + +DROP TABLE transition; +DROP TABLE transition2; + +-- turn off fetch conversion. This disables the additional header/footer handling logic in fetch operator. +set hive.fetch.task.conversion=none; + +SELECT * FROM header_footer_table_4; + +SELECT * FROM header_footer_table_4 ORDER BY header_int LIMIT 8; + +-- should return nothing as title is correctly skipped +SELECT * FROM header_footer_table_4 WHERE header_choice = 'header_choice'; +SELECT * FROM header_footer_table_4 WHERE header_choice = 'monthly'; +SELECT COUNT(*) FROM header_footer_table_4; + +-- shpuld return nothing +SELECT * FROM header_footer_table_4 WHERE header_choice is NULL; + + +SELECT AVG(header_int) FROM header_footer_table_4 GROUP BY header_choice; + +-- should not include any header and footer +SELECT * FROM header_footer_table_4 A, header_footer_table_4 B ORDER BY A.header_int, B.header_int; + +-- no join variant should include header or footer +SELECT header_name, header_int FROM header_footer_table_4 LEFT JOIN numbrs ON numbr = header_int; +SELECT header_name, header_int FROM header_footer_table_4 RIGHT JOIN numbrs ON numbr = header_int; +SELECT header_name, header_int FROM header_footer_table_4 INNER JOIN numbrs ON numbr = header_int; +SELECT header_name, header_int FROM header_footer_table_4 FULL JOIN numbrs ON numbr = header_int; + + +SELECT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice; + + +SELECT DISTINCT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice; + + +SELECT header_name, header_choice FROM header_footer_table_4 ORDER BY header_int LIMIT 8; + +-- create table from existing text file should not feed header or footer to the new table +CREATE TABLE transition (title VARCHAR(10), name VARCHAR(10)) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' STORED AS TEXTFILE LOCATION '${system:test.tmp.dir}/header_footer_table_transition'; +INSERT INTO transition SELECT header_choice, header_name FROM header_footer_table_4; +SELECT * FROM transition A, transition B ORDER BY A.title, A.name, B.title, B.name; + +CREATE TABLE transition2 (header_choice VARCHAR(10), sum_header_int int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' STORED AS TEXTFILE LOCATION '${system:test.tmp.dir}/test/header_footer_table_transition2'; +INSERT INTO transition2 SELECT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice; +SELECT * FROM transition2 A, transition2 B ORDER BY A.sum_header_int, A.header_choice, B.sum_header_int, B.header_choice; diff --git a/ql/src/test/queries/clientpositive/skiphf_aggr2.q b/ql/src/test/queries/clientpositive/skiphf_aggr2.q new file mode 100644 index 0000000..478baef --- /dev/null +++ b/ql/src/test/queries/clientpositive/skiphf_aggr2.q @@ -0,0 +1,25 @@ +SET hive.fetch.task.conversion=none; +-- no footer +DROP TABLE IF EXISTS hf1; +CREATE TABLE hf1 (a string, b string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' TBLPROPERTIES('skip.header.line.count'='1'); +INSERT OVERWRITE TABLE hf1 VALUES ('x','y'),('a','b'),('c','d'); +SELECT * FROM hf1; + +-- no header +DROP TABLE IF EXISTS hf2; +CREATE TABLE hf2 (a string, b string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' TBLPROPERTIES('skip.footer.line.count'='2'); +INSERT OVERWRITE TABLE hf2 VALUES ('x','y'),('a','b'),('c','d'); +SELECT * FROM hf2; + +-- only header, no data +DROP TABLE IF EXISTS hf3; +CREATE TABLE hf3 (a string, b string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' TBLPROPERTIES('skip.header.line.count'='3'); +INSERT OVERWRITE TABLE hf3 VALUES ('x','y'),('a','b'),('c','d'); +SELECT * FROM hf3; + +-- header and footer, no data +DROP TABLE IF EXISTS hf4; +CREATE TABLE hf4 (a string, b string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' TBLPROPERTIES('skip.header.line.count'='1','skip.footer.line.count'='2'); +INSERT OVERWRITE TABLE hf4 VALUES ('x','y'),('a','b'),('c','d'); +SELECT * FROM hf4; + diff --git a/ql/src/test/results/clientpositive/llap/file_with_header_footer_aggregation.q.out b/ql/src/test/results/clientpositive/llap/file_with_header_footer_aggregation.q.out new file mode 100644 index 0000000..0dc4892 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/file_with_header_footer_aggregation.q.out @@ -0,0 +1,849 @@ +PREHOOK: query: CREATE TABLE numbrs (numbr int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@numbrs +POSTHOOK: query: CREATE TABLE numbrs (numbr int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@numbrs +PREHOOK: query: INSERT INTO numbrs VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), (11), (12), (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@numbrs +POSTHOOK: query: INSERT INTO numbrs VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), (11), (12), (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@numbrs +POSTHOOK: Lineage: numbrs.numbr SCRIPT [] +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@header_footer_table_4 +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@header_footer_table_4 +PREHOOK: query: SELECT * FROM header_footer_table_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM header_footer_table_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +12 alex daily +3 barry yearly +5 chelsea monthly +8 xavier monthly +9 derek yearly +11 ethan monthly +1 faith yearly +21 yves daily +PREHOOK: query: SELECT * FROM header_footer_table_4 ORDER BY header_int LIMIT 8 +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM header_footer_table_4 ORDER BY header_int LIMIT 8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 faith yearly +3 barry yearly +5 chelsea monthly +8 xavier monthly +9 derek yearly +11 ethan monthly +12 alex daily +21 yves daily +PREHOOK: query: SELECT * FROM header_footer_table_4 WHERE header_choice = 'header_choice' +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM header_footer_table_4 WHERE header_choice = 'header_choice' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: SELECT * FROM header_footer_table_4 WHERE header_choice = 'monthly' +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM header_footer_table_4 WHERE header_choice = 'monthly' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 chelsea monthly +8 xavier monthly +11 ethan monthly +PREHOOK: query: SELECT COUNT(*) FROM header_footer_table_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM header_footer_table_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +8 +PREHOOK: query: SELECT * FROM header_footer_table_4 WHERE header_choice is NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM header_footer_table_4 WHERE header_choice is NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: SELECT AVG(header_int) FROM header_footer_table_4 GROUP BY header_choice +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT AVG(header_int) FROM header_footer_table_4 GROUP BY header_choice +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +8.0 +16.5 +4.333333333333333 +Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * FROM header_footer_table_4 A, header_footer_table_4 B ORDER BY A.header_int, B.header_int +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM header_footer_table_4 A, header_footer_table_4 B ORDER BY A.header_int, B.header_int +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 faith yearly 1 faith yearly +1 faith yearly 3 barry yearly +1 faith yearly 5 chelsea monthly +1 faith yearly 8 xavier monthly +1 faith yearly 9 derek yearly +1 faith yearly 11 ethan monthly +1 faith yearly 12 alex daily +1 faith yearly 21 yves daily +3 barry yearly 1 faith yearly +3 barry yearly 3 barry yearly +3 barry yearly 5 chelsea monthly +3 barry yearly 8 xavier monthly +3 barry yearly 9 derek yearly +3 barry yearly 11 ethan monthly +3 barry yearly 12 alex daily +3 barry yearly 21 yves daily +5 chelsea monthly 1 faith yearly +5 chelsea monthly 3 barry yearly +5 chelsea monthly 5 chelsea monthly +5 chelsea monthly 8 xavier monthly +5 chelsea monthly 9 derek yearly +5 chelsea monthly 11 ethan monthly +5 chelsea monthly 12 alex daily +5 chelsea monthly 21 yves daily +8 xavier monthly 1 faith yearly +8 xavier monthly 3 barry yearly +8 xavier monthly 5 chelsea monthly +8 xavier monthly 8 xavier monthly +8 xavier monthly 9 derek yearly +8 xavier monthly 11 ethan monthly +8 xavier monthly 12 alex daily +8 xavier monthly 21 yves daily +9 derek yearly 1 faith yearly +9 derek yearly 3 barry yearly +9 derek yearly 5 chelsea monthly +9 derek yearly 8 xavier monthly +9 derek yearly 9 derek yearly +9 derek yearly 11 ethan monthly +9 derek yearly 12 alex daily +9 derek yearly 21 yves daily +11 ethan monthly 1 faith yearly +11 ethan monthly 3 barry yearly +11 ethan monthly 5 chelsea monthly +11 ethan monthly 8 xavier monthly +11 ethan monthly 9 derek yearly +11 ethan monthly 11 ethan monthly +11 ethan monthly 12 alex daily +11 ethan monthly 21 yves daily +12 alex daily 1 faith yearly +12 alex daily 3 barry yearly +12 alex daily 5 chelsea monthly +12 alex daily 8 xavier monthly +12 alex daily 9 derek yearly +12 alex daily 11 ethan monthly +12 alex daily 12 alex daily +12 alex daily 21 yves daily +21 yves daily 1 faith yearly +21 yves daily 3 barry yearly +21 yves daily 5 chelsea monthly +21 yves daily 8 xavier monthly +21 yves daily 9 derek yearly +21 yves daily 11 ethan monthly +21 yves daily 12 alex daily +21 yves daily 21 yves daily +PREHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 LEFT JOIN numbrs ON numbr = header_int +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Input: default@numbrs +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 LEFT JOIN numbrs ON numbr = header_int +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Input: default@numbrs +POSTHOOK: Output: hdfs://### HDFS PATH ### +faith 1 +barry 3 +chelsea 5 +xavier 8 +derek 9 +ethan 11 +alex 12 +yves 21 +PREHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 RIGHT JOIN numbrs ON numbr = header_int +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Input: default@numbrs +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 RIGHT JOIN numbrs ON numbr = header_int +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Input: default@numbrs +POSTHOOK: Output: hdfs://### HDFS PATH ### +NULL NULL +faith 1 +NULL NULL +barry 3 +NULL NULL +chelsea 5 +NULL NULL +NULL NULL +xavier 8 +derek 9 +NULL NULL +ethan 11 +alex 12 +PREHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 INNER JOIN numbrs ON numbr = header_int +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Input: default@numbrs +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 INNER JOIN numbrs ON numbr = header_int +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Input: default@numbrs +POSTHOOK: Output: hdfs://### HDFS PATH ### +faith 1 +barry 3 +chelsea 5 +xavier 8 +derek 9 +ethan 11 +alex 12 +PREHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 FULL JOIN numbrs ON numbr = header_int +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Input: default@numbrs +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 FULL JOIN numbrs ON numbr = header_int +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Input: default@numbrs +POSTHOOK: Output: hdfs://### HDFS PATH ### +NULL NULL +faith 1 +NULL NULL +barry 3 +NULL NULL +chelsea 5 +NULL NULL +NULL NULL +xavier 8 +derek 9 +NULL NULL +ethan 11 +alex 12 +yves 21 +PREHOOK: query: SELECT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +monthly 24 +daily 33 +yearly 13 +PREHOOK: query: SELECT DISTINCT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT DISTINCT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +monthly 24 +daily 33 +yearly 13 +PREHOOK: query: SELECT header_name, header_choice FROM header_footer_table_4 ORDER BY header_int LIMIT 8 +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT header_name, header_choice FROM header_footer_table_4 ORDER BY header_int LIMIT 8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +faith yearly +barry yearly +chelsea monthly +xavier monthly +derek yearly +ethan monthly +alex daily +yves daily +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@transition +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@transition +PREHOOK: query: INSERT INTO transition SELECT header_choice, header_name FROM header_footer_table_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: default@transition +POSTHOOK: query: INSERT INTO transition SELECT header_choice, header_name FROM header_footer_table_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: default@transition +POSTHOOK: Lineage: transition.name EXPRESSION [(header_footer_table_4)header_footer_table_4.FieldSchema(name:header_name, type:string, comment:null), ] +POSTHOOK: Lineage: transition.title SIMPLE [(header_footer_table_4)header_footer_table_4.FieldSchema(name:header_choice, type:varchar(10), comment:null), ] +Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * FROM transition A, transition B ORDER BY A.title, A.name, B.title, B.name +PREHOOK: type: QUERY +PREHOOK: Input: default@transition +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM transition A, transition B ORDER BY A.title, A.name, B.title, B.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@transition +POSTHOOK: Output: hdfs://### HDFS PATH ### +daily alex daily alex +daily alex daily yves +daily alex monthly chelsea +daily alex monthly ethan +daily alex monthly xavier +daily alex yearly barry +daily alex yearly derek +daily alex yearly faith +daily yves daily alex +daily yves daily yves +daily yves monthly chelsea +daily yves monthly ethan +daily yves monthly xavier +daily yves yearly barry +daily yves yearly derek +daily yves yearly faith +monthly chelsea daily alex +monthly chelsea daily yves +monthly chelsea monthly chelsea +monthly chelsea monthly ethan +monthly chelsea monthly xavier +monthly chelsea yearly barry +monthly chelsea yearly derek +monthly chelsea yearly faith +monthly ethan daily alex +monthly ethan daily yves +monthly ethan monthly chelsea +monthly ethan monthly ethan +monthly ethan monthly xavier +monthly ethan yearly barry +monthly ethan yearly derek +monthly ethan yearly faith +monthly xavier daily alex +monthly xavier daily yves +monthly xavier monthly chelsea +monthly xavier monthly ethan +monthly xavier monthly xavier +monthly xavier yearly barry +monthly xavier yearly derek +monthly xavier yearly faith +yearly barry daily alex +yearly barry daily yves +yearly barry monthly chelsea +yearly barry monthly ethan +yearly barry monthly xavier +yearly barry yearly barry +yearly barry yearly derek +yearly barry yearly faith +yearly derek daily alex +yearly derek daily yves +yearly derek monthly chelsea +yearly derek monthly ethan +yearly derek monthly xavier +yearly derek yearly barry +yearly derek yearly derek +yearly derek yearly faith +yearly faith daily alex +yearly faith daily yves +yearly faith monthly chelsea +yearly faith monthly ethan +yearly faith monthly xavier +yearly faith yearly barry +yearly faith yearly derek +yearly faith yearly faith +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@transition2 +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@transition2 +PREHOOK: query: INSERT INTO transition2 SELECT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: default@transition2 +POSTHOOK: query: INSERT INTO transition2 SELECT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: default@transition2 +POSTHOOK: Lineage: transition2.header_choice SIMPLE [(header_footer_table_4)header_footer_table_4.FieldSchema(name:header_choice, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: transition2.sum_header_int EXPRESSION [(header_footer_table_4)header_footer_table_4.FieldSchema(name:header_int, type:int, comment:null), ] +Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * FROM transition2 A, transition2 B ORDER BY A.sum_header_int, A.header_choice, B.sum_header_int, B.header_choice +PREHOOK: type: QUERY +PREHOOK: Input: default@transition2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM transition2 A, transition2 B ORDER BY A.sum_header_int, A.header_choice, B.sum_header_int, B.header_choice +POSTHOOK: type: QUERY +POSTHOOK: Input: default@transition2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +yearly 13 yearly 13 +yearly 13 monthly 24 +yearly 13 daily 33 +monthly 24 yearly 13 +monthly 24 monthly 24 +monthly 24 daily 33 +daily 33 yearly 13 +daily 33 monthly 24 +daily 33 daily 33 +PREHOOK: query: DROP TABLE transition +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@transition +PREHOOK: Output: default@transition +POSTHOOK: query: DROP TABLE transition +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@transition +POSTHOOK: Output: default@transition +PREHOOK: query: DROP TABLE transition2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@transition2 +PREHOOK: Output: default@transition2 +POSTHOOK: query: DROP TABLE transition2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@transition2 +POSTHOOK: Output: default@transition2 +PREHOOK: query: SELECT * FROM header_footer_table_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM header_footer_table_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +12 alex daily +3 barry yearly +5 chelsea monthly +8 xavier monthly +9 derek yearly +11 ethan monthly +1 faith yearly +21 yves daily +PREHOOK: query: SELECT * FROM header_footer_table_4 ORDER BY header_int LIMIT 8 +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM header_footer_table_4 ORDER BY header_int LIMIT 8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 faith yearly +3 barry yearly +5 chelsea monthly +8 xavier monthly +9 derek yearly +11 ethan monthly +12 alex daily +21 yves daily +PREHOOK: query: SELECT * FROM header_footer_table_4 WHERE header_choice = 'header_choice' +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM header_footer_table_4 WHERE header_choice = 'header_choice' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: SELECT * FROM header_footer_table_4 WHERE header_choice = 'monthly' +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM header_footer_table_4 WHERE header_choice = 'monthly' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 chelsea monthly +8 xavier monthly +11 ethan monthly +PREHOOK: query: SELECT COUNT(*) FROM header_footer_table_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM header_footer_table_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +8 +PREHOOK: query: SELECT * FROM header_footer_table_4 WHERE header_choice is NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM header_footer_table_4 WHERE header_choice is NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: SELECT AVG(header_int) FROM header_footer_table_4 GROUP BY header_choice +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT AVG(header_int) FROM header_footer_table_4 GROUP BY header_choice +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +8.0 +16.5 +4.333333333333333 +Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * FROM header_footer_table_4 A, header_footer_table_4 B ORDER BY A.header_int, B.header_int +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM header_footer_table_4 A, header_footer_table_4 B ORDER BY A.header_int, B.header_int +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 faith yearly 1 faith yearly +1 faith yearly 3 barry yearly +1 faith yearly 5 chelsea monthly +1 faith yearly 8 xavier monthly +1 faith yearly 9 derek yearly +1 faith yearly 11 ethan monthly +1 faith yearly 12 alex daily +1 faith yearly 21 yves daily +3 barry yearly 1 faith yearly +3 barry yearly 3 barry yearly +3 barry yearly 5 chelsea monthly +3 barry yearly 8 xavier monthly +3 barry yearly 9 derek yearly +3 barry yearly 11 ethan monthly +3 barry yearly 12 alex daily +3 barry yearly 21 yves daily +5 chelsea monthly 1 faith yearly +5 chelsea monthly 3 barry yearly +5 chelsea monthly 5 chelsea monthly +5 chelsea monthly 8 xavier monthly +5 chelsea monthly 9 derek yearly +5 chelsea monthly 11 ethan monthly +5 chelsea monthly 12 alex daily +5 chelsea monthly 21 yves daily +8 xavier monthly 1 faith yearly +8 xavier monthly 3 barry yearly +8 xavier monthly 5 chelsea monthly +8 xavier monthly 8 xavier monthly +8 xavier monthly 9 derek yearly +8 xavier monthly 11 ethan monthly +8 xavier monthly 12 alex daily +8 xavier monthly 21 yves daily +9 derek yearly 1 faith yearly +9 derek yearly 3 barry yearly +9 derek yearly 5 chelsea monthly +9 derek yearly 8 xavier monthly +9 derek yearly 9 derek yearly +9 derek yearly 11 ethan monthly +9 derek yearly 12 alex daily +9 derek yearly 21 yves daily +11 ethan monthly 1 faith yearly +11 ethan monthly 3 barry yearly +11 ethan monthly 5 chelsea monthly +11 ethan monthly 8 xavier monthly +11 ethan monthly 9 derek yearly +11 ethan monthly 11 ethan monthly +11 ethan monthly 12 alex daily +11 ethan monthly 21 yves daily +12 alex daily 1 faith yearly +12 alex daily 3 barry yearly +12 alex daily 5 chelsea monthly +12 alex daily 8 xavier monthly +12 alex daily 9 derek yearly +12 alex daily 11 ethan monthly +12 alex daily 12 alex daily +12 alex daily 21 yves daily +21 yves daily 1 faith yearly +21 yves daily 3 barry yearly +21 yves daily 5 chelsea monthly +21 yves daily 8 xavier monthly +21 yves daily 9 derek yearly +21 yves daily 11 ethan monthly +21 yves daily 12 alex daily +21 yves daily 21 yves daily +PREHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 LEFT JOIN numbrs ON numbr = header_int +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Input: default@numbrs +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 LEFT JOIN numbrs ON numbr = header_int +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Input: default@numbrs +POSTHOOK: Output: hdfs://### HDFS PATH ### +faith 1 +barry 3 +chelsea 5 +xavier 8 +derek 9 +ethan 11 +alex 12 +yves 21 +PREHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 RIGHT JOIN numbrs ON numbr = header_int +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Input: default@numbrs +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 RIGHT JOIN numbrs ON numbr = header_int +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Input: default@numbrs +POSTHOOK: Output: hdfs://### HDFS PATH ### +NULL NULL +faith 1 +NULL NULL +barry 3 +NULL NULL +chelsea 5 +NULL NULL +NULL NULL +xavier 8 +derek 9 +NULL NULL +ethan 11 +alex 12 +PREHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 INNER JOIN numbrs ON numbr = header_int +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Input: default@numbrs +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 INNER JOIN numbrs ON numbr = header_int +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Input: default@numbrs +POSTHOOK: Output: hdfs://### HDFS PATH ### +faith 1 +barry 3 +chelsea 5 +xavier 8 +derek 9 +ethan 11 +alex 12 +PREHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 FULL JOIN numbrs ON numbr = header_int +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Input: default@numbrs +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT header_name, header_int FROM header_footer_table_4 FULL JOIN numbrs ON numbr = header_int +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Input: default@numbrs +POSTHOOK: Output: hdfs://### HDFS PATH ### +NULL NULL +faith 1 +NULL NULL +barry 3 +NULL NULL +chelsea 5 +NULL NULL +NULL NULL +xavier 8 +derek 9 +NULL NULL +ethan 11 +alex 12 +yves 21 +PREHOOK: query: SELECT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +monthly 24 +daily 33 +yearly 13 +PREHOOK: query: SELECT DISTINCT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT DISTINCT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +monthly 24 +daily 33 +yearly 13 +PREHOOK: query: SELECT header_name, header_choice FROM header_footer_table_4 ORDER BY header_int LIMIT 8 +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT header_name, header_choice FROM header_footer_table_4 ORDER BY header_int LIMIT 8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +faith yearly +barry yearly +chelsea monthly +xavier monthly +derek yearly +ethan monthly +alex daily +yves daily +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@transition +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@transition +PREHOOK: query: INSERT INTO transition SELECT header_choice, header_name FROM header_footer_table_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: default@transition +POSTHOOK: query: INSERT INTO transition SELECT header_choice, header_name FROM header_footer_table_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: default@transition +POSTHOOK: Lineage: transition.name EXPRESSION [(header_footer_table_4)header_footer_table_4.FieldSchema(name:header_name, type:string, comment:null), ] +POSTHOOK: Lineage: transition.title SIMPLE [(header_footer_table_4)header_footer_table_4.FieldSchema(name:header_choice, type:varchar(10), comment:null), ] +Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * FROM transition A, transition B ORDER BY A.title, A.name, B.title, B.name +PREHOOK: type: QUERY +PREHOOK: Input: default@transition +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM transition A, transition B ORDER BY A.title, A.name, B.title, B.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@transition +POSTHOOK: Output: hdfs://### HDFS PATH ### +daily alex daily alex +daily alex daily yves +daily alex monthly chelsea +daily alex monthly ethan +daily alex monthly xavier +daily alex yearly barry +daily alex yearly derek +daily alex yearly faith +daily yves daily alex +daily yves daily yves +daily yves monthly chelsea +daily yves monthly ethan +daily yves monthly xavier +daily yves yearly barry +daily yves yearly derek +daily yves yearly faith +monthly chelsea daily alex +monthly chelsea daily yves +monthly chelsea monthly chelsea +monthly chelsea monthly ethan +monthly chelsea monthly xavier +monthly chelsea yearly barry +monthly chelsea yearly derek +monthly chelsea yearly faith +monthly ethan daily alex +monthly ethan daily yves +monthly ethan monthly chelsea +monthly ethan monthly ethan +monthly ethan monthly xavier +monthly ethan yearly barry +monthly ethan yearly derek +monthly ethan yearly faith +monthly xavier daily alex +monthly xavier daily yves +monthly xavier monthly chelsea +monthly xavier monthly ethan +monthly xavier monthly xavier +monthly xavier yearly barry +monthly xavier yearly derek +monthly xavier yearly faith +yearly barry daily alex +yearly barry daily yves +yearly barry monthly chelsea +yearly barry monthly ethan +yearly barry monthly xavier +yearly barry yearly barry +yearly barry yearly derek +yearly barry yearly faith +yearly derek daily alex +yearly derek daily yves +yearly derek monthly chelsea +yearly derek monthly ethan +yearly derek monthly xavier +yearly derek yearly barry +yearly derek yearly derek +yearly derek yearly faith +yearly faith daily alex +yearly faith daily yves +yearly faith monthly chelsea +yearly faith monthly ethan +yearly faith monthly xavier +yearly faith yearly barry +yearly faith yearly derek +yearly faith yearly faith +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@transition2 +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@transition2 +PREHOOK: query: INSERT INTO transition2 SELECT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_4 +PREHOOK: Output: default@transition2 +POSTHOOK: query: INSERT INTO transition2 SELECT header_choice, SUM(header_int) FROM header_footer_table_4 GROUP BY header_choice +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_4 +POSTHOOK: Output: default@transition2 +POSTHOOK: Lineage: transition2.header_choice SIMPLE [(header_footer_table_4)header_footer_table_4.FieldSchema(name:header_choice, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: transition2.sum_header_int EXPRESSION [(header_footer_table_4)header_footer_table_4.FieldSchema(name:header_int, type:int, comment:null), ] +Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * FROM transition2 A, transition2 B ORDER BY A.sum_header_int, A.header_choice, B.sum_header_int, B.header_choice +PREHOOK: type: QUERY +PREHOOK: Input: default@transition2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM transition2 A, transition2 B ORDER BY A.sum_header_int, A.header_choice, B.sum_header_int, B.header_choice +POSTHOOK: type: QUERY +POSTHOOK: Input: default@transition2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +yearly 13 yearly 13 +yearly 13 monthly 24 +yearly 13 daily 33 +monthly 24 yearly 13 +monthly 24 monthly 24 +monthly 24 daily 33 +daily 33 yearly 13 +daily 33 monthly 24 +daily 33 daily 33 diff --git a/ql/src/test/results/clientpositive/llap/skiphf_aggr2.q.out b/ql/src/test/results/clientpositive/llap/skiphf_aggr2.q.out new file mode 100644 index 0000000..ba33a10 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/skiphf_aggr2.q.out @@ -0,0 +1,123 @@ +PREHOOK: query: DROP TABLE IF EXISTS hf1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS hf1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE hf1 (a string, b string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' TBLPROPERTIES('skip.header.line.count'='1') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hf1 +POSTHOOK: query: CREATE TABLE hf1 (a string, b string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' TBLPROPERTIES('skip.header.line.count'='1') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hf1 +PREHOOK: query: INSERT OVERWRITE TABLE hf1 VALUES ('x','y'),('a','b'),('c','d') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hf1 +POSTHOOK: query: INSERT OVERWRITE TABLE hf1 VALUES ('x','y'),('a','b'),('c','d') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hf1 +POSTHOOK: Lineage: hf1.a SCRIPT [] +POSTHOOK: Lineage: hf1.b SCRIPT [] +PREHOOK: query: SELECT * FROM hf1 +PREHOOK: type: QUERY +PREHOOK: Input: default@hf1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM hf1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hf1 +#### A masked pattern was here #### +a b +c d +PREHOOK: query: DROP TABLE IF EXISTS hf2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS hf2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE hf2 (a string, b string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' TBLPROPERTIES('skip.footer.line.count'='2') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hf2 +POSTHOOK: query: CREATE TABLE hf2 (a string, b string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' TBLPROPERTIES('skip.footer.line.count'='2') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hf2 +PREHOOK: query: INSERT OVERWRITE TABLE hf2 VALUES ('x','y'),('a','b'),('c','d') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hf2 +POSTHOOK: query: INSERT OVERWRITE TABLE hf2 VALUES ('x','y'),('a','b'),('c','d') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hf2 +POSTHOOK: Lineage: hf2.a SCRIPT [] +POSTHOOK: Lineage: hf2.b SCRIPT [] +PREHOOK: query: SELECT * FROM hf2 +PREHOOK: type: QUERY +PREHOOK: Input: default@hf2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM hf2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hf2 +#### A masked pattern was here #### +x y +PREHOOK: query: DROP TABLE IF EXISTS hf3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS hf3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE hf3 (a string, b string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' TBLPROPERTIES('skip.header.line.count'='3') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hf3 +POSTHOOK: query: CREATE TABLE hf3 (a string, b string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' TBLPROPERTIES('skip.header.line.count'='3') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hf3 +PREHOOK: query: INSERT OVERWRITE TABLE hf3 VALUES ('x','y'),('a','b'),('c','d') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hf3 +POSTHOOK: query: INSERT OVERWRITE TABLE hf3 VALUES ('x','y'),('a','b'),('c','d') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hf3 +POSTHOOK: Lineage: hf3.a SCRIPT [] +POSTHOOK: Lineage: hf3.b SCRIPT [] +PREHOOK: query: SELECT * FROM hf3 +PREHOOK: type: QUERY +PREHOOK: Input: default@hf3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM hf3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hf3 +#### A masked pattern was here #### +PREHOOK: query: DROP TABLE IF EXISTS hf4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS hf4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE hf4 (a string, b string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' TBLPROPERTIES('skip.header.line.count'='1','skip.footer.line.count'='2') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hf4 +POSTHOOK: query: CREATE TABLE hf4 (a string, b string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' TBLPROPERTIES('skip.header.line.count'='1','skip.footer.line.count'='2') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hf4 +PREHOOK: query: INSERT OVERWRITE TABLE hf4 VALUES ('x','y'),('a','b'),('c','d') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hf4 +POSTHOOK: query: INSERT OVERWRITE TABLE hf4 VALUES ('x','y'),('a','b'),('c','d') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hf4 +POSTHOOK: Lineage: hf4.a SCRIPT [] +POSTHOOK: Lineage: hf4.b SCRIPT [] +PREHOOK: query: SELECT * FROM hf4 +PREHOOK: type: QUERY +PREHOOK: Input: default@hf4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM hf4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hf4 +#### A masked pattern was here ####