Repository: hive
Updated Branches:
  refs/heads/master 835d718d2 -> aaa34539a


HIVE-11453: Create PostExecutionHook for ORC file dump (Prasanth Jayachandran 
reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/aaa34539
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/aaa34539
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/aaa34539

Branch: refs/heads/master
Commit: aaa34539af3d087625fbfcb31e566124f0670fc5
Parents: 835d718
Author: Prasanth Jayachandran <[email protected]>
Authored: Sun Aug 9 13:01:56 2015 -0700
Committer: Prasanth Jayachandran <[email protected]>
Committed: Sun Aug 9 13:01:56 2015 -0700

----------------------------------------------------------------------
 .../hive/ql/hooks/PostExecOrcFileDump.java      | 120 +++++
 .../test/queries/clientpositive/orc_file_dump.q |  57 +++
 .../results/clientpositive/orc_file_dump.q.out  | 447 +++++++++++++++++++
 3 files changed, 624 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/aaa34539/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java 
b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java
new file mode 100644
index 0000000..b0b4a36
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java
@@ -0,0 +1,120 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.hooks;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.QueryPlan;
+import org.apache.hadoop.hive.ql.exec.FetchTask;
+import org.apache.hadoop.hive.ql.io.FileFormatException;
+import org.apache.hadoop.hive.ql.io.orc.FileDump;
+import org.apache.hadoop.hive.ql.io.orc.OrcFile;
+import org.apache.hadoop.hive.ql.plan.FetchWork;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.shims.ShimLoader;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Post exec hook to print orc file dump for files that will be read by fetch 
task. The file dump
+ * output will be printed before fetch task output. It also prints the row 
index for the 1st column
+ * in the file just to verify the impact of bloom filter fpp.
+ */
+public class PostExecOrcFileDump implements ExecuteWithHookContext {
+  private static final Log LOG = 
LogFactory.getLog(PostExecOrcFileDump.class.getName());
+
+  private static final PathFilter hiddenFileFilter = new PathFilter() {
+    public boolean accept(Path p) {
+      String name = p.getName();
+      return !name.startsWith("_") && !name.startsWith(".");
+    }
+  };
+
+  @Override
+  public void run(HookContext hookContext) throws Exception {
+    assert (hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK);
+    HiveConf conf = hookContext.getConf();
+
+    LOG.info("Executing post execution hook to print orc file dump..");
+    QueryPlan plan = hookContext.getQueryPlan();
+    if (plan == null) {
+      return;
+    }
+
+    FetchTask fetchTask = plan.getFetchTask();
+    if (fetchTask != null) {
+      SessionState ss = SessionState.get();
+      SessionState.LogHelper console = ss.getConsole();
+
+      // file dump should write to session state console's error stream
+      PrintStream old = System.out;
+      System.setOut(console.getErrStream());
+
+      FetchWork fetchWork = fetchTask.getWork();
+      boolean partitionedTable = fetchWork.isPartitioned();
+      List<Path> directories;
+      if (partitionedTable) {
+        LOG.info("Printing orc file dump for files from partitioned 
directory..");
+        directories = fetchWork.getPartDir();
+      } else {
+        LOG.info("Printing orc file dump for files from table directory..");
+        directories = Lists.newArrayList();
+        directories.add(fetchWork.getTblDir());
+      }
+
+      for (Path dir : directories) {
+        FileSystem fs = dir.getFileSystem(conf);
+        List<FileStatus> fileList = 
ShimLoader.getHadoopShims().listLocatedStatus(fs, dir,
+            hiddenFileFilter);
+
+        for (FileStatus fileStatus : fileList) {
+          LOG.info("Printing orc file dump for " + fileStatus.getPath());
+          if (fileStatus.getLen() > 0) {
+            try {
+              // just creating orc reader is going to do sanity checks to make 
sure its valid ORC file
+              OrcFile.createReader(fs, fileStatus.getPath());
+              console.printError("-- BEGIN ORC FILE DUMP --");
+              FileDump.main(new String[]{fileStatus.getPath().toString(), 
"--rowindex=1"});
+              console.printError("-- END ORC FILE DUMP --");
+            } catch (FileFormatException e) {
+              LOG.warn("File " + fileStatus.getPath() + " is not ORC. Skip 
printing orc file dump");
+            } catch (IOException e) {
+              LOG.warn("Skip printing orc file dump. Exception: " + 
e.getMessage());
+            }
+          } else {
+            LOG.warn("Zero length file encountered. Skip printing orc file 
dump.");
+          }
+        }
+      }
+
+      // restore the old out stream
+      System.out.flush();
+      System.setOut(old);
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/aaa34539/ql/src/test/queries/clientpositive/orc_file_dump.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/orc_file_dump.q 
b/ql/src/test/queries/clientpositive/orc_file_dump.q
new file mode 100644
index 0000000..ed0da75
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/orc_file_dump.q
@@ -0,0 +1,57 @@
+CREATE TABLE staging(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging;
+
+CREATE TABLE orc_ppd(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", 
"orc.bloom.filter.columns"="*");
+
+insert overwrite table orc_ppd select * from staging;
+
+SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecOrcFileDump;
+
+select * from orc_ppd limit 1;
+
+alter table orc_ppd set tblproperties("orc.bloom.filter.fpp"="0.01");
+
+insert overwrite table orc_ppd select * from staging;
+
+select * from orc_ppd limit 1;
+
+CREATE TABLE orc_ppd_part(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+PARTITIONED BY (ds string, hr int) STORED AS ORC 
tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*");
+
+insert overwrite table orc_ppd_part partition(ds = "2015", hr = 10) select * 
from staging;
+
+select * from orc_ppd_part limit 1;

http://git-wip-us.apache.org/repos/asf/hive/blob/aaa34539/ql/src/test/results/clientpositive/orc_file_dump.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_file_dump.q.out 
b/ql/src/test/results/clientpositive/orc_file_dump.q.out
new file mode 100644
index 0000000..67aa189
--- /dev/null
+++ b/ql/src/test/results/clientpositive/orc_file_dump.q.out
@@ -0,0 +1,447 @@
+PREHOOK: query: CREATE TABLE staging(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@staging
+POSTHOOK: query: CREATE TABLE staging(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@staging
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE 
INTO TABLE staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE 
INTO TABLE staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@staging
+PREHOOK: query: CREATE TABLE orc_ppd(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", 
"orc.bloom.filter.columns"="*")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_ppd
+POSTHOOK: query: CREATE TABLE orc_ppd(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", 
"orc.bloom.filter.columns"="*")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_ppd
+PREHOOK: query: insert overwrite table orc_ppd select * from staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@orc_ppd
+POSTHOOK: query: insert overwrite table orc_ppd select * from staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@orc_ppd
+POSTHOOK: Lineage: orc_ppd.b SIMPLE [(staging)staging.FieldSchema(name:b, 
type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.bin SIMPLE [(staging)staging.FieldSchema(name:bin, 
type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.bo SIMPLE [(staging)staging.FieldSchema(name:bo, 
type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.d SIMPLE [(staging)staging.FieldSchema(name:d, 
type:double, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.dec SIMPLE [(staging)staging.FieldSchema(name:dec, 
type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: orc_ppd.f SIMPLE [(staging)staging.FieldSchema(name:f, 
type:float, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.i SIMPLE [(staging)staging.FieldSchema(name:i, 
type:int, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.s SIMPLE [(staging)staging.FieldSchema(name:s, 
type:string, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.si SIMPLE [(staging)staging.FieldSchema(name:si, 
type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.t SIMPLE [(staging)staging.FieldSchema(name:t, 
type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.ts SIMPLE [(staging)staging.FieldSchema(name:ts, 
type:timestamp, comment:null), ]
+PREHOOK: query: select * from orc_ppd limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+-- BEGIN ORC FILE DUMP --
+#### A masked pattern was here ####
+File Version: 0.12 with HIVE_8732
+Rows: 1049
+Compression: ZLIB
+Compression size: 262144
+Type: 
struct<_col0:tinyint,_col1:smallint,_col2:int,_col3:bigint,_col4:float,_col5:double,_col6:boolean,_col7:string,_col8:timestamp,_col9:decimal(4,2),_col10:binary>
+
+Stripe Statistics:
+  Stripe 1:
+    Column 0: count: 1049 hasNull: false
+    Column 1: count: 1046 hasNull: true min: -3 max: 124 sum: 62430
+    Column 2: count: 1046 hasNull: true min: 256 max: 511 sum: 398889
+    Column 3: count: 1049 hasNull: false min: 65536 max: 65791 sum: 68881051
+    Column 4: count: 1049 hasNull: false min: 4294967296 max: 4294967551 sum: 
4505420825953
+    Column 5: count: 1049 hasNull: false min: 0.07999999821186066 max: 
99.91999816894531 sum: 52744.70002820343
+    Column 6: count: 1049 hasNull: false min: 0.02 max: 49.85 sum: 
26286.349999999977
+    Column 7: count: 1049 hasNull: false true: 526
+    Column 8: count: 1049 hasNull: false min:  max: zach zipper sum: 13443
+    Column 9: count: 1049 hasNull: false min: 2013-03-01 09:11:58.703 max: 
2013-03-01 09:11:58.703
+    Column 10: count: 1049 hasNull: false min: 0.08 max: 99.94 sum: 53646.16
+    Column 11: count: 1049 hasNull: false sum: 13278
+
+File Statistics:
+  Column 0: count: 1049 hasNull: false
+  Column 1: count: 1046 hasNull: true min: -3 max: 124 sum: 62430
+  Column 2: count: 1046 hasNull: true min: 256 max: 511 sum: 398889
+  Column 3: count: 1049 hasNull: false min: 65536 max: 65791 sum: 68881051
+  Column 4: count: 1049 hasNull: false min: 4294967296 max: 4294967551 sum: 
4505420825953
+  Column 5: count: 1049 hasNull: false min: 0.07999999821186066 max: 
99.91999816894531 sum: 52744.70002820343
+  Column 6: count: 1049 hasNull: false min: 0.02 max: 49.85 sum: 
26286.349999999977
+  Column 7: count: 1049 hasNull: false true: 526
+  Column 8: count: 1049 hasNull: false min:  max: zach zipper sum: 13443
+  Column 9: count: 1049 hasNull: false min: 2013-03-01 09:11:58.703 max: 
2013-03-01 09:11:58.703
+  Column 10: count: 1049 hasNull: false min: 0.08 max: 99.94 sum: 53646.16
+  Column 11: count: 1049 hasNull: false sum: 13278
+
+Stripes:
+  Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944
+    Stream: column 0 section ROW_INDEX start: 3 length 20
+    Stream: column 0 section BLOOM_FILTER start: 23 length 45
+    Stream: column 1 section ROW_INDEX start: 68 length 58
+    Stream: column 1 section BLOOM_FILTER start: 126 length 799
+    Stream: column 2 section ROW_INDEX start: 925 length 58
+    Stream: column 2 section BLOOM_FILTER start: 983 length 978
+    Stream: column 3 section ROW_INDEX start: 1961 length 61
+    Stream: column 3 section BLOOM_FILTER start: 2022 length 983
+    Stream: column 4 section ROW_INDEX start: 3005 length 69
+    Stream: column 4 section BLOOM_FILTER start: 3074 length 963
+    Stream: column 5 section ROW_INDEX start: 4037 length 78
+    Stream: column 5 section BLOOM_FILTER start: 4115 length 1291
+    Stream: column 6 section ROW_INDEX start: 5406 length 85
+    Stream: column 6 section BLOOM_FILTER start: 5491 length 1280
+    Stream: column 7 section ROW_INDEX start: 6771 length 41
+    Stream: column 7 section BLOOM_FILTER start: 6812 length 45
+    Stream: column 8 section ROW_INDEX start: 6857 length 86
+    Stream: column 8 section BLOOM_FILTER start: 6943 length 1157
+    Stream: column 9 section ROW_INDEX start: 8100 length 51
+    Stream: column 9 section BLOOM_FILTER start: 8151 length 62
+    Stream: column 10 section ROW_INDEX start: 8213 length 82
+    Stream: column 10 section BLOOM_FILTER start: 8295 length 1297
+    Stream: column 11 section ROW_INDEX start: 9592 length 47
+    Stream: column 11 section BLOOM_FILTER start: 9639 length 308
+    Stream: column 1 section PRESENT start: 9947 length 17
+    Stream: column 1 section DATA start: 9964 length 962
+    Stream: column 2 section PRESENT start: 10926 length 17
+    Stream: column 2 section DATA start: 10943 length 1441
+    Stream: column 3 section DATA start: 12384 length 1704
+    Stream: column 4 section DATA start: 14088 length 1998
+    Stream: column 5 section DATA start: 16086 length 2925
+    Stream: column 6 section DATA start: 19011 length 3323
+    Stream: column 7 section DATA start: 22334 length 137
+    Stream: column 8 section DATA start: 22471 length 1572
+    Stream: column 8 section LENGTH start: 24043 length 310
+    Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548
+    Stream: column 9 section DATA start: 25901 length 62
+    Stream: column 9 section SECONDARY start: 25963 length 1783
+    Stream: column 10 section DATA start: 27746 length 2138
+    Stream: column 10 section SECONDARY start: 29884 length 231
+    Stream: column 11 section DATA start: 30115 length 1877
+    Stream: column 11 section LENGTH start: 31992 length 591
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT
+    Encoding column 2: DIRECT_V2
+    Encoding column 3: DIRECT_V2
+    Encoding column 4: DIRECT_V2
+    Encoding column 5: DIRECT
+    Encoding column 6: DIRECT
+    Encoding column 7: DIRECT
+    Encoding column 8: DICTIONARY_V2[516]
+    Encoding column 9: DIRECT_V2
+    Encoding column 10: DIRECT_V2
+    Encoding column 11: DIRECT_V2
+    Row group indices for column 1:
+      Entry 0: count: 997 hasNull: true min: -3 max: 124 sum: 59325 positions: 
0,0,0,0,0,0,0
+      Entry 1: count: 49 hasNull: false min: 2 max: 123 sum: 3105 positions: 
0,10,113,0,0,903,101
+    Bloom filters for column 1:
+      Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 
0.0784 expectedFpp: 3.7864847E-5
+      Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 
0.0268 expectedFpp: 5.147697E-7
+      Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 
loadFactor: 0.0784 expectedFpp: 3.7864847E-5
+
+File length: 33456 bytes
+Padding length: 0 bytes
+Padding ratio: 0%
+-- END ORC FILE DUMP --
+124    336     65664   4294967435      74.72   42.47   true    bob davidson    
2013-03-01 09:11:58.703302      45.4    yard duty
+PREHOOK: query: alter table orc_ppd set 
tblproperties("orc.bloom.filter.fpp"="0.01")
+PREHOOK: type: ALTERTABLE_PROPERTIES
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: insert overwrite table orc_ppd select * from staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: select * from orc_ppd limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+-- BEGIN ORC FILE DUMP --
+#### A masked pattern was here ####
+File Version: 0.12 with HIVE_8732
+Rows: 1049
+Compression: ZLIB
+Compression size: 262144
+Type: 
struct<_col0:tinyint,_col1:smallint,_col2:int,_col3:bigint,_col4:float,_col5:double,_col6:boolean,_col7:string,_col8:timestamp,_col9:decimal(4,2),_col10:binary>
+
+Stripe Statistics:
+  Stripe 1:
+    Column 0: count: 1049 hasNull: false
+    Column 1: count: 1046 hasNull: true min: -3 max: 124 sum: 62430
+    Column 2: count: 1046 hasNull: true min: 256 max: 511 sum: 398889
+    Column 3: count: 1049 hasNull: false min: 65536 max: 65791 sum: 68881051
+    Column 4: count: 1049 hasNull: false min: 4294967296 max: 4294967551 sum: 
4505420825953
+    Column 5: count: 1049 hasNull: false min: 0.07999999821186066 max: 
99.91999816894531 sum: 52744.70002820343
+    Column 6: count: 1049 hasNull: false min: 0.02 max: 49.85 sum: 
26286.349999999977
+    Column 7: count: 1049 hasNull: false true: 526
+    Column 8: count: 1049 hasNull: false min:  max: zach zipper sum: 13443
+    Column 9: count: 1049 hasNull: false min: 2013-03-01 09:11:58.703 max: 
2013-03-01 09:11:58.703
+    Column 10: count: 1049 hasNull: false min: 0.08 max: 99.94 sum: 53646.16
+    Column 11: count: 1049 hasNull: false sum: 13278
+
+File Statistics:
+  Column 0: count: 1049 hasNull: false
+  Column 1: count: 1046 hasNull: true min: -3 max: 124 sum: 62430
+  Column 2: count: 1046 hasNull: true min: 256 max: 511 sum: 398889
+  Column 3: count: 1049 hasNull: false min: 65536 max: 65791 sum: 68881051
+  Column 4: count: 1049 hasNull: false min: 4294967296 max: 4294967551 sum: 
4505420825953
+  Column 5: count: 1049 hasNull: false min: 0.07999999821186066 max: 
99.91999816894531 sum: 52744.70002820343
+  Column 6: count: 1049 hasNull: false min: 0.02 max: 49.85 sum: 
26286.349999999977
+  Column 7: count: 1049 hasNull: false true: 526
+  Column 8: count: 1049 hasNull: false min:  max: zach zipper sum: 13443
+  Column 9: count: 1049 hasNull: false min: 2013-03-01 09:11:58.703 max: 
2013-03-01 09:11:58.703
+  Column 10: count: 1049 hasNull: false min: 0.08 max: 99.94 sum: 53646.16
+  Column 11: count: 1049 hasNull: false sum: 13278
+
+Stripes:
+  Stripe: offset: 3 data: 22636 rows: 1049 tail: 251 index: 15096
+    Stream: column 0 section ROW_INDEX start: 3 length 20
+    Stream: column 0 section BLOOM_FILTER start: 23 length 56
+    Stream: column 1 section ROW_INDEX start: 79 length 58
+    Stream: column 1 section BLOOM_FILTER start: 137 length 1258
+    Stream: column 2 section ROW_INDEX start: 1395 length 58
+    Stream: column 2 section BLOOM_FILTER start: 1453 length 1544
+    Stream: column 3 section ROW_INDEX start: 2997 length 61
+    Stream: column 3 section BLOOM_FILTER start: 3058 length 1543
+    Stream: column 4 section ROW_INDEX start: 4601 length 69
+    Stream: column 4 section BLOOM_FILTER start: 4670 length 1556
+    Stream: column 5 section ROW_INDEX start: 6226 length 78
+    Stream: column 5 section BLOOM_FILTER start: 6304 length 1991
+    Stream: column 6 section ROW_INDEX start: 8295 length 85
+    Stream: column 6 section BLOOM_FILTER start: 8380 length 1964
+    Stream: column 7 section ROW_INDEX start: 10344 length 41
+    Stream: column 7 section BLOOM_FILTER start: 10385 length 56
+    Stream: column 8 section ROW_INDEX start: 10441 length 86
+    Stream: column 8 section BLOOM_FILTER start: 10527 length 1829
+    Stream: column 9 section ROW_INDEX start: 12356 length 51
+    Stream: column 9 section BLOOM_FILTER start: 12407 length 95
+    Stream: column 10 section ROW_INDEX start: 12502 length 82
+    Stream: column 10 section BLOOM_FILTER start: 12584 length 1994
+    Stream: column 11 section ROW_INDEX start: 14578 length 47
+    Stream: column 11 section BLOOM_FILTER start: 14625 length 474
+    Stream: column 1 section PRESENT start: 15099 length 17
+    Stream: column 1 section DATA start: 15116 length 962
+    Stream: column 2 section PRESENT start: 16078 length 17
+    Stream: column 2 section DATA start: 16095 length 1441
+    Stream: column 3 section DATA start: 17536 length 1704
+    Stream: column 4 section DATA start: 19240 length 1998
+    Stream: column 5 section DATA start: 21238 length 2925
+    Stream: column 6 section DATA start: 24163 length 3323
+    Stream: column 7 section DATA start: 27486 length 137
+    Stream: column 8 section DATA start: 27623 length 1572
+    Stream: column 8 section LENGTH start: 29195 length 310
+    Stream: column 8 section DICTIONARY_DATA start: 29505 length 1548
+    Stream: column 9 section DATA start: 31053 length 62
+    Stream: column 9 section SECONDARY start: 31115 length 1783
+    Stream: column 10 section DATA start: 32898 length 2138
+    Stream: column 10 section SECONDARY start: 35036 length 231
+    Stream: column 11 section DATA start: 35267 length 1877
+    Stream: column 11 section LENGTH start: 37144 length 591
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT
+    Encoding column 2: DIRECT_V2
+    Encoding column 3: DIRECT_V2
+    Encoding column 4: DIRECT_V2
+    Encoding column 5: DIRECT
+    Encoding column 6: DIRECT
+    Encoding column 7: DIRECT
+    Encoding column 8: DICTIONARY_V2[516]
+    Encoding column 9: DIRECT_V2
+    Encoding column 10: DIRECT_V2
+    Encoding column 11: DIRECT_V2
+    Row group indices for column 1:
+      Entry 0: count: 997 hasNull: true min: -3 max: 124 sum: 59325 positions: 
0,0,0,0,0,0,0
+      Entry 1: count: 49 hasNull: false min: 2 max: 123 sum: 3105 positions: 
0,10,113,0,0,903,101
+    Bloom filters for column 1:
+      Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 849 loadFactor: 
0.0884 expectedFpp: 4.231118E-8
+      Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 285 loadFactor: 
0.0297 expectedFpp: 2.0324289E-11
+      Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 849 
loadFactor: 0.0884 expectedFpp: 4.231118E-8
+
+File length: 38610 bytes
+Padding length: 0 bytes
+Padding ratio: 0%
+-- END ORC FILE DUMP --
+124    336     65664   4294967435      74.72   42.47   true    bob davidson    
2013-03-01 09:11:58.703302      45.4    yard duty
+PREHOOK: query: CREATE TABLE orc_ppd_part(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+PARTITIONED BY (ds string, hr int) STORED AS ORC 
tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_ppd_part
+PREHOOK: query: insert overwrite table orc_ppd_part partition(ds = "2015", hr 
= 10) select * from staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@orc_ppd_part@ds=2015/hr=10
+PREHOOK: query: select * from orc_ppd_part limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd_part
+PREHOOK: Input: default@orc_ppd_part@ds=2015/hr=10
+#### A masked pattern was here ####
+-- BEGIN ORC FILE DUMP --
+#### A masked pattern was here ####
+File Version: 0.12 with HIVE_8732
+Rows: 1049
+Compression: ZLIB
+Compression size: 262144
+Type: 
struct<_col0:tinyint,_col1:smallint,_col2:int,_col3:bigint,_col4:float,_col5:double,_col6:boolean,_col7:string,_col8:timestamp,_col9:decimal(4,2),_col10:binary>
+
+Stripe Statistics:
+  Stripe 1:
+    Column 0: count: 1049 hasNull: false
+    Column 1: count: 1046 hasNull: true min: -3 max: 124 sum: 62430
+    Column 2: count: 1046 hasNull: true min: 256 max: 511 sum: 398889
+    Column 3: count: 1049 hasNull: false min: 65536 max: 65791 sum: 68881051
+    Column 4: count: 1049 hasNull: false min: 4294967296 max: 4294967551 sum: 
4505420825953
+    Column 5: count: 1049 hasNull: false min: 0.07999999821186066 max: 
99.91999816894531 sum: 52744.70002820343
+    Column 6: count: 1049 hasNull: false min: 0.02 max: 49.85 sum: 
26286.349999999977
+    Column 7: count: 1049 hasNull: false true: 526
+    Column 8: count: 1049 hasNull: false min:  max: zach zipper sum: 13443
+    Column 9: count: 1049 hasNull: false min: 2013-03-01 09:11:58.703 max: 
2013-03-01 09:11:58.703
+    Column 10: count: 1049 hasNull: false min: 0.08 max: 99.94 sum: 53646.16
+    Column 11: count: 1049 hasNull: false sum: 13278
+
+File Statistics:
+  Column 0: count: 1049 hasNull: false
+  Column 1: count: 1046 hasNull: true min: -3 max: 124 sum: 62430
+  Column 2: count: 1046 hasNull: true min: 256 max: 511 sum: 398889
+  Column 3: count: 1049 hasNull: false min: 65536 max: 65791 sum: 68881051
+  Column 4: count: 1049 hasNull: false min: 4294967296 max: 4294967551 sum: 
4505420825953
+  Column 5: count: 1049 hasNull: false min: 0.07999999821186066 max: 
99.91999816894531 sum: 52744.70002820343
+  Column 6: count: 1049 hasNull: false min: 0.02 max: 49.85 sum: 
26286.349999999977
+  Column 7: count: 1049 hasNull: false true: 526
+  Column 8: count: 1049 hasNull: false min:  max: zach zipper sum: 13443
+  Column 9: count: 1049 hasNull: false min: 2013-03-01 09:11:58.703 max: 
2013-03-01 09:11:58.703
+  Column 10: count: 1049 hasNull: false min: 0.08 max: 99.94 sum: 53646.16
+  Column 11: count: 1049 hasNull: false sum: 13278
+
+Stripes:
+  Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944
+    Stream: column 0 section ROW_INDEX start: 3 length 20
+    Stream: column 0 section BLOOM_FILTER start: 23 length 45
+    Stream: column 1 section ROW_INDEX start: 68 length 58
+    Stream: column 1 section BLOOM_FILTER start: 126 length 799
+    Stream: column 2 section ROW_INDEX start: 925 length 58
+    Stream: column 2 section BLOOM_FILTER start: 983 length 978
+    Stream: column 3 section ROW_INDEX start: 1961 length 61
+    Stream: column 3 section BLOOM_FILTER start: 2022 length 983
+    Stream: column 4 section ROW_INDEX start: 3005 length 69
+    Stream: column 4 section BLOOM_FILTER start: 3074 length 963
+    Stream: column 5 section ROW_INDEX start: 4037 length 78
+    Stream: column 5 section BLOOM_FILTER start: 4115 length 1291
+    Stream: column 6 section ROW_INDEX start: 5406 length 85
+    Stream: column 6 section BLOOM_FILTER start: 5491 length 1280
+    Stream: column 7 section ROW_INDEX start: 6771 length 41
+    Stream: column 7 section BLOOM_FILTER start: 6812 length 45
+    Stream: column 8 section ROW_INDEX start: 6857 length 86
+    Stream: column 8 section BLOOM_FILTER start: 6943 length 1157
+    Stream: column 9 section ROW_INDEX start: 8100 length 51
+    Stream: column 9 section BLOOM_FILTER start: 8151 length 62
+    Stream: column 10 section ROW_INDEX start: 8213 length 82
+    Stream: column 10 section BLOOM_FILTER start: 8295 length 1297
+    Stream: column 11 section ROW_INDEX start: 9592 length 47
+    Stream: column 11 section BLOOM_FILTER start: 9639 length 308
+    Stream: column 1 section PRESENT start: 9947 length 17
+    Stream: column 1 section DATA start: 9964 length 962
+    Stream: column 2 section PRESENT start: 10926 length 17
+    Stream: column 2 section DATA start: 10943 length 1441
+    Stream: column 3 section DATA start: 12384 length 1704
+    Stream: column 4 section DATA start: 14088 length 1998
+    Stream: column 5 section DATA start: 16086 length 2925
+    Stream: column 6 section DATA start: 19011 length 3323
+    Stream: column 7 section DATA start: 22334 length 137
+    Stream: column 8 section DATA start: 22471 length 1572
+    Stream: column 8 section LENGTH start: 24043 length 310
+    Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548
+    Stream: column 9 section DATA start: 25901 length 62
+    Stream: column 9 section SECONDARY start: 25963 length 1783
+    Stream: column 10 section DATA start: 27746 length 2138
+    Stream: column 10 section SECONDARY start: 29884 length 231
+    Stream: column 11 section DATA start: 30115 length 1877
+    Stream: column 11 section LENGTH start: 31992 length 591
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT
+    Encoding column 2: DIRECT_V2
+    Encoding column 3: DIRECT_V2
+    Encoding column 4: DIRECT_V2
+    Encoding column 5: DIRECT
+    Encoding column 6: DIRECT
+    Encoding column 7: DIRECT
+    Encoding column 8: DICTIONARY_V2[516]
+    Encoding column 9: DIRECT_V2
+    Encoding column 10: DIRECT_V2
+    Encoding column 11: DIRECT_V2
+    Row group indices for column 1:
+      Entry 0: count: 997 hasNull: true min: -3 max: 124 sum: 59325 positions: 
0,0,0,0,0,0,0
+      Entry 1: count: 49 hasNull: false min: 2 max: 123 sum: 3105 positions: 
0,10,113,0,0,903,101
+    Bloom filters for column 1:
+      Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 
0.0784 expectedFpp: 3.7864847E-5
+      Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 
0.0268 expectedFpp: 5.147697E-7
+      Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 
loadFactor: 0.0784 expectedFpp: 3.7864847E-5
+
+File length: 33456 bytes
+Padding length: 0 bytes
+Padding ratio: 0%
+-- END ORC FILE DUMP --
+124    336     65664   4294967435      74.72   42.47   true    bob davidson    
2013-03-01 09:11:58.703302      45.4    yard duty     2015    10

Reply via email to