HIVE-14355: Schema evolution for ORC in llap is broken for int to string conversion (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/950b11fd Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/950b11fd Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/950b11fd Branch: refs/heads/branch-2.1 Commit: 950b11fd82de256a079cadc83fe2f06fd6a1fabb Parents: f2bf99c Author: Prasanth Jayachandran <[email protected]> Authored: Sat Jul 30 13:13:34 2016 -0700 Committer: Prasanth Jayachandran <[email protected]> Committed: Wed Aug 3 17:30:19 2016 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 32 + .../hive/llap/io/api/impl/LlapInputFormat.java | 77 +- .../llap/io/decode/ColumnVectorProducer.java | 5 +- .../llap/io/decode/OrcColumnVectorProducer.java | 3 +- .../llap/io/decode/OrcEncodedDataConsumer.java | 17 + .../hive/llap/io/decode/ReadPipeline.java | 3 + .../llap/io/encoded/OrcEncodedDataReader.java | 28 +- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 7 +- .../llap/orc_ppd_schema_evol_1a.q.out | 70 + .../llap/orc_ppd_schema_evol_1b.q.out | 124 + .../llap/orc_ppd_schema_evol_2a.q.out | 70 + .../llap/orc_ppd_schema_evol_2b.q.out | 124 + .../llap/orc_ppd_schema_evol_3a.q.out | 832 ++++ .../schema_evol_orc_acid_mapwork_part.q.out | 3662 ++++++++++++++++ .../schema_evol_orc_acid_mapwork_table.q.out | 3331 +++++++++++++++ .../schema_evol_orc_acidvec_mapwork_part.q.out | 3662 ++++++++++++++++ .../schema_evol_orc_acidvec_mapwork_table.q.out | 3331 +++++++++++++++ .../schema_evol_orc_nonvec_fetchwork_part.q.out | 3995 +++++++++++++++++ ...schema_evol_orc_nonvec_fetchwork_table.q.out | 3747 ++++++++++++++++ .../schema_evol_orc_nonvec_mapwork_part.q.out | 3995 +++++++++++++++++ ...ol_orc_nonvec_mapwork_part_all_complex.q.out | 669 +++ ..._orc_nonvec_mapwork_part_all_primitive.q.out | 2899 +++++++++++++ .../schema_evol_orc_nonvec_mapwork_table.q.out | 3747 ++++++++++++++++ .../llap/schema_evol_orc_vec_mapwork_part.q.out | 3995 +++++++++++++++++ ..._evol_orc_vec_mapwork_part_all_complex.q.out | 669 +++ ...vol_orc_vec_mapwork_part_all_primitive.q.out | 2899 +++++++++++++ .../schema_evol_orc_vec_mapwork_table.q.out | 3747 ++++++++++++++++ .../clientpositive/llap/schema_evol_stats.q.out | 392 ++ .../schema_evol_text_nonvec_mapwork_part.q.out | 3995 +++++++++++++++++ ...l_text_nonvec_mapwork_part_all_complex.q.out | 669 +++ ...text_nonvec_mapwork_part_all_primitive.q.out | 2899 +++++++++++++ .../schema_evol_text_nonvec_mapwork_table.q.out | 3747 ++++++++++++++++ .../schema_evol_text_vec_mapwork_part.q.out | 3999 ++++++++++++++++++ ...evol_text_vec_mapwork_part_all_complex.q.out | 673 +++ ...ol_text_vec_mapwork_part_all_primitive.q.out | 2903 +++++++++++++ .../schema_evol_text_vec_mapwork_table.q.out | 3751 ++++++++++++++++ .../schema_evol_text_vecrow_mapwork_part.q.out | 3999 ++++++++++++++++++ ...l_text_vecrow_mapwork_part_all_complex.q.out | 675 +++ ...text_vecrow_mapwork_part_all_primitive.q.out | 2903 +++++++++++++ .../schema_evol_text_vecrow_mapwork_table.q.out | 3751 ++++++++++++++++ 40 files changed, 80056 insertions(+), 40 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/950b11fd/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index dd799bd..7005b4f 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -498,6 +498,38 @@ minillap.shared.query.files=bucket_map_join_tez1.q,\ llap_nullscan.q,\ mrr.q,\ orc_ppd_basic.q,\ + orc_ppd_schema_evol_1a.q,\ + orc_ppd_schema_evol_1b.q,\ + orc_ppd_schema_evol_2a.q,\ + orc_ppd_schema_evol_2b.q,\ + orc_ppd_schema_evol_3a.q,\ + schema_evol_stats.q,\ + schema_evol_orc_acid_mapwork_part.q,\ + schema_evol_orc_acid_mapwork_table.q,\ + schema_evol_orc_acidvec_mapwork_part.q,\ + schema_evol_orc_acidvec_mapwork_table.q,\ + schema_evol_orc_nonvec_fetchwork_part.q,\ + schema_evol_orc_nonvec_fetchwork_table.q,\ + schema_evol_orc_nonvec_mapwork_part.q,\ + schema_evol_orc_nonvec_mapwork_part_all_complex.q,\ + schema_evol_orc_nonvec_mapwork_part_all_primitive.q,\ + schema_evol_orc_nonvec_mapwork_table.q,\ + schema_evol_orc_vec_mapwork_part.q,\ + schema_evol_orc_vec_mapwork_part_all_complex.q,\ + schema_evol_orc_vec_mapwork_part_all_primitive.q,\ + schema_evol_orc_vec_mapwork_table.q,\ + schema_evol_text_nonvec_mapwork_part.q,\ + schema_evol_text_nonvec_mapwork_part_all_complex.q,\ + schema_evol_text_nonvec_mapwork_part_all_primitive.q,\ + schema_evol_text_nonvec_mapwork_table.q,\ + schema_evol_text_vec_mapwork_part.q,\ + schema_evol_text_vec_mapwork_part_all_complex.q,\ + schema_evol_text_vec_mapwork_part_all_primitive.q,\ + schema_evol_text_vec_mapwork_table.q,\ + schema_evol_text_vecrow_mapwork_part.q,\ + schema_evol_text_vecrow_mapwork_part_all_complex.q,\ + schema_evol_text_vecrow_mapwork_part_all_primitive.q,\ + schema_evol_text_vecrow_mapwork_table.q,\ tez_bmj_schema_evolution.q,\ tez_dml.q,\ tez_fsstat.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/950b11fd/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index 57b98b6..0577647 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -20,11 +20,17 @@ package org.apache.hadoop.hive.llap.io.api.impl; import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; +import java.util.concurrent.ExecutorService; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.llap.ConsumerFeedback; import org.apache.hadoop.hive.llap.DebugUtils; import org.apache.hadoop.hive.llap.counters.FragmentCountersMap; @@ -43,7 +49,10 @@ import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.encoded.Consumer; import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.FileSplit; @@ -53,6 +62,8 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hive.common.util.HiveStringUtils; +import org.apache.orc.TypeDescription; +import org.apache.orc.impl.SchemaEvolution; import org.apache.tez.common.counters.TezCounters; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -93,7 +104,7 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB InputSplit split, JobConf job, Reporter reporter) throws IOException { boolean useLlapIo = true; if (split instanceof LlapAwareSplit) { - useLlapIo = ((LlapAwareSplit)split).canUseLlapIo(); + useLlapIo = ((LlapAwareSplit) split).canUseLlapIo(); } // validate for supported types. Until we fix HIVE-14089 we need this check. @@ -103,10 +114,7 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB if (!useLlapIo) { LlapIoImpl.LOG.warn("Not using LLAP IO for an unsupported split: " + split); - @SuppressWarnings("unchecked") - RecordReader<NullWritable, VectorizedRowBatch> rr = - sourceInputFormat.getRecordReader(split, job, reporter); - return rr; + return sourceInputFormat.getRecordReader(split, job, reporter); } boolean isVectorMode = Utilities.getUseVectorizedInputFileFormat(job); if (!isVectorMode) { @@ -118,7 +126,13 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB try { List<Integer> includedCols = ColumnProjectionUtils.isReadAllColumns(job) ? null : ColumnProjectionUtils.getReadColumnIDs(job); - return new LlapRecordReader(job, fileSplit, includedCols, hostName); + LlapRecordReader rr = new LlapRecordReader(job, fileSplit, includedCols, hostName); + + if (!rr.init()) { + return sourceInputFormat.getRecordReader(split, job, reporter); + } + + return rr; } catch (Exception ex) { throw new IOException(ex); } @@ -148,13 +162,18 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB /** Vector that is currently being processed by our user. */ private boolean isDone = false; private final boolean isClosed = false; - private ConsumerFeedback<ColumnVectorBatch> feedback; + private final ConsumerFeedback<ColumnVectorBatch> feedback; private final QueryFragmentCounters counters; private long firstReturnTime; - public LlapRecordReader( - JobConf job, FileSplit split, List<Integer> includedCols, String hostName) - throws IOException { + private final JobConf jobConf; + private final TypeDescription fileSchema; + private final boolean[] includedColumns; + private final ReadPipeline rp; + + public LlapRecordReader(JobConf job, FileSplit split, List<Integer> includedCols, + String hostName) throws IOException, HiveException { + this.jobConf = job; this.split = split; this.columnIds = includedCols; this.sarg = ConvertAstToSearchArg.createFromConf(job); @@ -189,7 +208,33 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB partitionValues = null; } - startRead(); + // Create the consumer of encoded data; it will coordinate decoding to CVBs. + rp = cvp.createReadPipeline(this, split, columnIds, sarg, columnNames, counters); + feedback = rp; + fileSchema = rp.getFileSchema(); + includedColumns = rp.getIncludedColumns(); + } + + /** + * Starts the data read pipeline + */ + public boolean init() { + boolean isAcidScan = HiveConf.getBoolVar(jobConf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); + TypeDescription readerSchema = OrcInputFormat.getDesiredRowTypeDescr(jobConf, isAcidScan, + Integer.MAX_VALUE); + SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, + includedColumns); + for (Integer colId : columnIds) { + if (!schemaEvolution.isPPDSafeConversion(colId)) { + LlapIoImpl.LOG.warn("Unsupported schema evolution! Disabling Llap IO for {}", split); + return false; + } + } + + ListenableFuture<Void> future = executor.submit(rp.getReadCallable()); + // TODO: we should NOT do this thing with handler. Reader needs to do cleanup in most cases. + Futures.addCallback(future, new UncaughtErrorHandler()); + return true; } @Override @@ -253,16 +298,6 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB } } - private void startRead() { - // Create the consumer of encoded data; it will coordinate decoding to CVBs. - ReadPipeline rp = cvp.createReadPipeline( - this, split, columnIds, sarg, columnNames, counters); - feedback = rp; - ListenableFuture<Void> future = executor.submit(rp.getReadCallable()); - // TODO: we should NOT do this thing with handler. Reader needs to do cleanup in most cases. - Futures.addCallback(future, new UncaughtErrorHandler()); - } - ColumnVectorBatch nextCvb() throws InterruptedException, IOException { boolean isFirst = (lastCvb == null); if (!isFirst) { http://git-wip-us.apache.org/repos/asf/hive/blob/950b11fd/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java index b3b571d..b77dfbb 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.llap.io.decode; +import java.io.IOException; import java.util.List; import org.apache.hadoop.hive.llap.counters.QueryFragmentCounters; @@ -32,5 +33,5 @@ import org.apache.hadoop.mapred.FileSplit; public interface ColumnVectorProducer { ReadPipeline createReadPipeline(Consumer<ColumnVectorBatch> consumer, FileSplit split, List<Integer> columnIds, SearchArgument sarg, String[] columnNames, - QueryFragmentCounters counters); -} \ No newline at end of file + QueryFragmentCounters counters) throws IOException; +} http://git-wip-us.apache.org/repos/asf/hive/blob/950b11fd/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java index 7db519c..0a8e3df 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.llap.io.decode; +import java.io.IOException; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -64,7 +65,7 @@ public class OrcColumnVectorProducer implements ColumnVectorProducer { public ReadPipeline createReadPipeline( Consumer<ColumnVectorBatch> consumer, FileSplit split, List<Integer> columnIds, SearchArgument sarg, String[] columnNames, - QueryFragmentCounters counters) { + QueryFragmentCounters counters) throws IOException { cacheMetrics.incrCacheReadRequests(); OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, columnIds.size(), _skipCorrupt, counters, ioMetrics); http://git-wip-us.apache.org/repos/asf/hive/blob/950b11fd/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java index 3dfab63..94e4750 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java @@ -46,6 +46,8 @@ import org.apache.hadoop.hive.ql.io.orc.encoded.EncodedTreeReaderFactory.Settabl import org.apache.hadoop.hive.ql.io.orc.encoded.OrcBatchKey; import org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch; import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl; +import org.apache.orc.OrcUtils; +import org.apache.orc.TypeDescription; import org.apache.orc.impl.TreeReaderFactory; import org.apache.hadoop.hive.ql.io.orc.WriterImpl; import org.apache.orc.OrcProto; @@ -59,6 +61,7 @@ public class OrcEncodedDataConsumer private OrcStripeMetadata[] stripes; private final boolean skipCorrupt; // TODO: get rid of this private final QueryFragmentCounters counters; + private boolean[] includedColumns; public OrcEncodedDataConsumer( Consumer<ColumnVectorBatch> consumer, int colCount, boolean skipCorrupt, @@ -228,4 +231,18 @@ public class OrcEncodedDataConsumer private long getRowCount(OrcProto.RowIndexEntry rowIndexEntry) { return rowIndexEntry.getStatistics().getNumberOfValues(); } + + @Override + public TypeDescription getFileSchema() { + return OrcUtils.convertTypeFromProtobuf(fileMetadata.getTypes(), 0); + } + + @Override + public boolean[] getIncludedColumns() { + return includedColumns; + } + + public void setIncludedColumns(final boolean[] includedColumns) { + this.includedColumns = includedColumns; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/950b11fd/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ReadPipeline.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ReadPipeline.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ReadPipeline.java index 21b1772..1987451 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ReadPipeline.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ReadPipeline.java @@ -21,7 +21,10 @@ import java.util.concurrent.Callable; import org.apache.hadoop.hive.llap.ConsumerFeedback; import org.apache.hadoop.hive.llap.io.api.impl.ColumnVectorBatch; +import org.apache.orc.TypeDescription; public interface ReadPipeline extends ConsumerFeedback<ColumnVectorBatch> { public Callable<Void> getReadCallable(); + TypeDescription getFileSchema(); + boolean[] getIncludedColumns(); } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/950b11fd/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java index e9794bd..fc2b615 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java @@ -162,10 +162,12 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void> @SuppressWarnings("unused") private volatile boolean isPaused = false; + boolean[] globalIncludes = null; + public OrcEncodedDataReader(LowLevelCache lowLevelCache, BufferUsageManager bufferManager, OrcMetadataCache metadataCache, Configuration conf, FileSplit split, List<Integer> columnIds, SearchArgument sarg, String[] columnNames, OrcEncodedDataConsumer consumer, - QueryFragmentCounters counters) { + QueryFragmentCounters counters) throws IOException { this.lowLevelCache = lowLevelCache; this.metadataCache = metadataCache; this.bufferManager = bufferManager; @@ -184,6 +186,19 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void> } catch (IOException e) { throw new RuntimeException(e); } + + // moved this part of code from performDataRead as LlapInputFormat need to know the file schema + // to decide if schema evolution is supported or not + orcReader = null; + // 1. Get file metadata from cache, or create the reader and read it. + // Don't cache the filesystem object for now; Tez closes it and FS cache will fix all that + fs = split.getPath().getFileSystem(conf); + fileKey = determineFileId(fs, split, + HiveConf.getBoolVar(conf, ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID)); + fileMetadata = getOrReadFileMetadata(); + globalIncludes = OrcInputFormat.genIncludedColumns(fileMetadata.getTypes(), columnIds, true); + consumer.setFileMetadata(fileMetadata); + consumer.setIncludedColumns(globalIncludes); } @Override @@ -222,18 +237,9 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void> return null; } counters.setDesc(QueryFragmentCounters.Desc.TABLE, getDbAndTableName(split.getPath())); - orcReader = null; - // 1. Get file metadata from cache, or create the reader and read it. - // Don't cache the filesystem object for now; Tez closes it and FS cache will fix all that - fs = split.getPath().getFileSystem(conf); - fileKey = determineFileId(fs, split, - HiveConf.getBoolVar(conf, ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID)); counters.setDesc(QueryFragmentCounters.Desc.FILE, split.getPath() + (fileKey == null ? "" : " (" + fileKey + ")")); - try { - fileMetadata = getOrReadFileMetadata(); - consumer.setFileMetadata(fileMetadata); validateFileMetadata(); if (columnIds == null) { columnIds = createColumnIds(fileMetadata); @@ -257,10 +263,8 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void> // 3. Apply SARG if needed, and otherwise determine what RGs to read. int stride = fileMetadata.getRowIndexStride(); ArrayList<OrcStripeMetadata> stripeMetadatas = null; - boolean[] globalIncludes = null; boolean[] sargColumns = null; try { - globalIncludes = OrcInputFormat.genIncludedColumns(fileMetadata.getTypes(), columnIds, true); if (sarg != null && stride != 0) { // TODO: move this to a common method int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx( http://git-wip-us.apache.org/repos/asf/hive/blob/950b11fd/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index c053bf2..717d28c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -2164,12 +2164,11 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, * @param isAcidRead is this an acid format? * @param dataColumns the desired number of data columns for vectorized read * @return the desired schema or null if schema evolution isn't enabled - * @throws IOException + * @throws IllegalArgumentException */ public static TypeDescription getDesiredRowTypeDescr(Configuration conf, boolean isAcidRead, - int dataColumns - ) throws IOException { + int dataColumns) { String columnNameProperty = null; String columnTypeProperty = null; @@ -2200,7 +2199,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, } } } else if (isAcidRead) { - throw new IOException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg()); + throw new IllegalArgumentException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg()); } } http://git-wip-us.apache.org/repos/asf/hive/blob/950b11fd/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_1a.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_1a.q.out b/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_1a.q.out new file mode 100644 index 0000000..5cefb3f --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_1a.q.out @@ -0,0 +1,70 @@ +PREHOOK: query: create table unique_1( +i int, +d double, +s string) +row format delimited +fields terminated by '|' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@unique_1 +POSTHOOK: query: create table unique_1( +i int, +d double, +s string) +row format delimited +fields terminated by '|' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@unique_1 +PREHOOK: query: load data local inpath '../../data/files/unique_1.txt' into table unique_1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@unique_1 +POSTHOOK: query: load data local inpath '../../data/files/unique_1.txt' into table unique_1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@unique_1 +PREHOOK: query: create table test1 stored as orc as select * from unique_1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@unique_1 +PREHOOK: Output: database:default +PREHOOK: Output: default@test1 +POSTHOOK: query: create table test1 stored as orc as select * from unique_1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@unique_1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test1 +POSTHOOK: Lineage: test1.d SIMPLE [(unique_1)unique_1.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: test1.i SIMPLE [(unique_1)unique_1.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: test1.s SIMPLE [(unique_1)unique_1.FieldSchema(name:s, type:string, comment:null), ] +unique_1.i unique_1.d unique_1.s +PREHOOK: query: alter table test1 change column i i string +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test1 +PREHOOK: Output: default@test1 +PREHOOK: query: select s from test1 where i = '-1591211872' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +#### A masked pattern was here #### +s +fred king +PREHOOK: query: select s from test1 where i = -1591211872 +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +#### A masked pattern was here #### +s +fred king +PREHOOK: query: select s from test1 where i = '-1591211872' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +#### A masked pattern was here #### +s +fred king +PREHOOK: query: select s from test1 where i = -1591211872 +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +#### A masked pattern was here #### +s +fred king http://git-wip-us.apache.org/repos/asf/hive/blob/950b11fd/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_1b.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_1b.q.out b/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_1b.q.out new file mode 100644 index 0000000..7fa580c --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_1b.q.out @@ -0,0 +1,124 @@ +PREHOOK: query: create table unique_1( +i int, +d double, +s string) +row format delimited +fields terminated by '|' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@unique_1 +POSTHOOK: query: create table unique_1( +i int, +d double, +s string) +row format delimited +fields terminated by '|' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@unique_1 +PREHOOK: query: load data local inpath '../../data/files/unique_1.txt' into table unique_1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@unique_1 +POSTHOOK: query: load data local inpath '../../data/files/unique_1.txt' into table unique_1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@unique_1 +PREHOOK: query: create table unique_2( +i int, +d double, +s string) +row format delimited +fields terminated by '|' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@unique_2 +POSTHOOK: query: create table unique_2( +i int, +d double, +s string) +row format delimited +fields terminated by '|' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@unique_2 +PREHOOK: query: load data local inpath '../../data/files/unique_2.txt' into table unique_2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@unique_2 +POSTHOOK: query: load data local inpath '../../data/files/unique_2.txt' into table unique_2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@unique_2 +PREHOOK: query: create table test_two_files( +i int, +d double, +s string) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_two_files +POSTHOOK: query: create table test_two_files( +i int, +d double, +s string) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_two_files +PREHOOK: query: insert into table test_two_files select * from unique_1 where i <= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@unique_1 +PREHOOK: Output: default@test_two_files +POSTHOOK: query: insert into table test_two_files select * from unique_1 where i <= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@unique_1 +POSTHOOK: Output: default@test_two_files +POSTHOOK: Lineage: test_two_files.d SIMPLE [(unique_1)unique_1.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: test_two_files.i SIMPLE [(unique_1)unique_1.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: test_two_files.s SIMPLE [(unique_1)unique_1.FieldSchema(name:s, type:string, comment:null), ] +unique_1.i unique_1.d unique_1.s +PREHOOK: query: insert into table test_two_files select * from unique_2 where i > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@unique_2 +PREHOOK: Output: default@test_two_files +POSTHOOK: query: insert into table test_two_files select * from unique_2 where i > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@unique_2 +POSTHOOK: Output: default@test_two_files +POSTHOOK: Lineage: test_two_files.d SIMPLE [(unique_2)unique_2.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: test_two_files.i SIMPLE [(unique_2)unique_2.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: test_two_files.s SIMPLE [(unique_2)unique_2.FieldSchema(name:s, type:string, comment:null), ] +unique_2.i unique_2.d unique_2.s +PREHOOK: query: alter table test_two_files change column i i string +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test_two_files +PREHOOK: Output: default@test_two_files +PREHOOK: query: select s from test_two_files where i = -1591211872 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_two_files +#### A masked pattern was here #### +s +fred king +PREHOOK: query: select s from test_two_files where i = -1591211872 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_two_files +#### A masked pattern was here #### +s +fred king +PREHOOK: query: select s from test_two_files where i = '-1591211872' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_two_files +#### A masked pattern was here #### +s +fred king +PREHOOK: query: select s from test_two_files where i = -1591211872 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_two_files +#### A masked pattern was here #### +s +fred king http://git-wip-us.apache.org/repos/asf/hive/blob/950b11fd/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_2a.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_2a.q.out b/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_2a.q.out new file mode 100644 index 0000000..5dead1c --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_2a.q.out @@ -0,0 +1,70 @@ +PREHOOK: query: create table unique_1( +i int, +d string, +s string) +row format delimited +fields terminated by '|' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@unique_1 +POSTHOOK: query: create table unique_1( +i int, +d string, +s string) +row format delimited +fields terminated by '|' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@unique_1 +PREHOOK: query: load data local inpath '../../data/files/unique_1.txt' into table unique_1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@unique_1 +POSTHOOK: query: load data local inpath '../../data/files/unique_1.txt' into table unique_1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@unique_1 +PREHOOK: query: create table test1 stored as orc as select * from unique_1 order by d +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@unique_1 +PREHOOK: Output: database:default +PREHOOK: Output: default@test1 +POSTHOOK: query: create table test1 stored as orc as select * from unique_1 order by d +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@unique_1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test1 +POSTHOOK: Lineage: test1.d SIMPLE [(unique_1)unique_1.FieldSchema(name:d, type:string, comment:null), ] +POSTHOOK: Lineage: test1.i SIMPLE [(unique_1)unique_1.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: test1.s SIMPLE [(unique_1)unique_1.FieldSchema(name:s, type:string, comment:null), ] +unique_1.i unique_1.d unique_1.s +PREHOOK: query: alter table test1 change column d d double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test1 +PREHOOK: Output: default@test1 +PREHOOK: query: select s from test1 where d = -4996703.42 +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +#### A masked pattern was here #### +s +luke quirinius +PREHOOK: query: select s from test1 where d = -4996703.42 +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +#### A masked pattern was here #### +s +luke quirinius +PREHOOK: query: select s from test1 where d = -4996703.42 +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +#### A masked pattern was here #### +s +luke quirinius +PREHOOK: query: select s from test1 where d = -4996703.42 +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +#### A masked pattern was here #### +s +luke quirinius http://git-wip-us.apache.org/repos/asf/hive/blob/950b11fd/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_2b.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_2b.q.out b/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_2b.q.out new file mode 100644 index 0000000..b85ec98 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_2b.q.out @@ -0,0 +1,124 @@ +PREHOOK: query: create table unique_1( +i int, +d string, +s string) +row format delimited +fields terminated by '|' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@unique_1 +POSTHOOK: query: create table unique_1( +i int, +d string, +s string) +row format delimited +fields terminated by '|' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@unique_1 +PREHOOK: query: load data local inpath '../../data/files/unique_1.txt' into table unique_1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@unique_1 +POSTHOOK: query: load data local inpath '../../data/files/unique_1.txt' into table unique_1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@unique_1 +PREHOOK: query: create table unique_2( +i int, +d string, +s string) +row format delimited +fields terminated by '|' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@unique_2 +POSTHOOK: query: create table unique_2( +i int, +d string, +s string) +row format delimited +fields terminated by '|' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@unique_2 +PREHOOK: query: load data local inpath '../../data/files/unique_2.txt' into table unique_2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@unique_2 +POSTHOOK: query: load data local inpath '../../data/files/unique_2.txt' into table unique_2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@unique_2 +PREHOOK: query: create table test_two_files( +i int, +d string, +s string) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_two_files +POSTHOOK: query: create table test_two_files( +i int, +d string, +s string) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_two_files +PREHOOK: query: insert into table test_two_files select * from unique_1 where cast(d as double) <= 0 order by cast(d as double) +PREHOOK: type: QUERY +PREHOOK: Input: default@unique_1 +PREHOOK: Output: default@test_two_files +POSTHOOK: query: insert into table test_two_files select * from unique_1 where cast(d as double) <= 0 order by cast(d as double) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@unique_1 +POSTHOOK: Output: default@test_two_files +POSTHOOK: Lineage: test_two_files.d SIMPLE [(unique_1)unique_1.FieldSchema(name:d, type:string, comment:null), ] +POSTHOOK: Lineage: test_two_files.i SIMPLE [(unique_1)unique_1.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: test_two_files.s SIMPLE [(unique_1)unique_1.FieldSchema(name:s, type:string, comment:null), ] +unique_1.i unique_1.d unique_1.s +PREHOOK: query: insert into table test_two_files select * from unique_2 where cast(d as double) > 0 order by cast(d as double) +PREHOOK: type: QUERY +PREHOOK: Input: default@unique_2 +PREHOOK: Output: default@test_two_files +POSTHOOK: query: insert into table test_two_files select * from unique_2 where cast(d as double) > 0 order by cast(d as double) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@unique_2 +POSTHOOK: Output: default@test_two_files +POSTHOOK: Lineage: test_two_files.d SIMPLE [(unique_2)unique_2.FieldSchema(name:d, type:string, comment:null), ] +POSTHOOK: Lineage: test_two_files.i SIMPLE [(unique_2)unique_2.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: test_two_files.s SIMPLE [(unique_2)unique_2.FieldSchema(name:s, type:string, comment:null), ] +unique_2.i unique_2.d unique_2.s +PREHOOK: query: alter table test_two_files change column d d double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test_two_files +PREHOOK: Output: default@test_two_files +PREHOOK: query: select s from test_two_files where d = -4996703.42 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_two_files +#### A masked pattern was here #### +s +luke quirinius +PREHOOK: query: select s from test_two_files where d = -4996703.42 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_two_files +#### A masked pattern was here #### +s +luke quirinius +PREHOOK: query: select s from test_two_files where d = -4996703.42 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_two_files +#### A masked pattern was here #### +s +luke quirinius +PREHOOK: query: select s from test_two_files where d = -4996703.42 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_two_files +#### A masked pattern was here #### +s +luke quirinius http://git-wip-us.apache.org/repos/asf/hive/blob/950b11fd/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out b/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out new file mode 100644 index 0000000..76bb9d1 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out @@ -0,0 +1,832 @@ +PREHOOK: query: CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@staging +POSTHOOK: query: CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@staging +PREHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_ppd_staging +PREHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.d EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.dec EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.i EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] +PREHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.d SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.dec EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] +PREHOOK: query: CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_ppd +POSTHOOK: query: CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_ppd +PREHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd_staging +PREHOOK: Output: default@orc_ppd +POSTHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_ppd_staging +POSTHOOK: Output: default@orc_ppd +POSTHOOK: Lineage: orc_ppd.b SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.bin SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: orc_ppd.bo SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_ppd.c EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd.d SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: orc_ppd.da EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd.dec SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: orc_ppd.f SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: orc_ppd.i SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_ppd.s SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd.si SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.t SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.ts SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd.v EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: -- Row group statistics for column t: +-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0 +-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11 +-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19 + +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: alter table orc_ppd change column t t smallint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: alter table orc_ppd change column t t int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: alter table orc_ppd change column t t bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: alter table orc_ppd change column t t string +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > '127' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +1566 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > '127' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +1566 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = '55' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = '55' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = '54' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = '54' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: -- float tests +select count(*) from orc_ppd where f = 74.72 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +2 +PREHOOK: query: select count(*) from orc_ppd where f = 74.72 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +2 +PREHOOK: query: alter table orc_ppd change column f f double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where f = 74.72 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +2 +PREHOOK: query: select count(*) from orc_ppd where f = 74.72 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +2 +PREHOOK: query: alter table orc_ppd change column f f string +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where f = '74.72' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +2 +PREHOOK: query: select count(*) from orc_ppd where f = '74.72' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +2 +PREHOOK: query: -- string tests +select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: alter table orc_ppd change column s s char(50) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: alter table orc_ppd change column s s varchar(50) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: alter table orc_ppd change column s s char(50) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: alter table orc_ppd change column s s string +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: alter table orc_ppd add columns (boo boolean) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: -- ppd on newly added column +select count(*) from orc_ppd where si = 442 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +4 +PREHOOK: query: select count(*) from orc_ppd where si = 442 or boo is not null or boo = false +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +4 +PREHOOK: query: select count(*) from orc_ppd where si = 442 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +4 +PREHOOK: query: select count(*) from orc_ppd where si = 442 or boo is not null or boo = false +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +4
