hive git commit: HIVE-12887 Handle ORC schema on read with fewer columns than file schema (after Schema Evolution changes) (Matt McCline, reviewed by Sergey Shelukhin)

2016-04-26 Thread mmccline
Repository: hive
Updated Branches:
  refs/heads/branch-2.0 8b3e7aa51 -> a3502d05c


HIVE-12887 Handle ORC schema on read with fewer columns than file schema (after 
Schema Evolution changes) (Matt McCline, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a3502d05
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a3502d05
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a3502d05

Branch: refs/heads/branch-2.0
Commit: a3502d05ce4349cfb8836c37311018d49872ca93
Parents: 8b3e7aa
Author: Matt McCline 
Authored: Wed Jan 20 14:08:41 2016 -0800
Committer: Matt McCline 
Committed: Tue Apr 26 16:26:58 2016 -0700

--
 .../hadoop/hive/ql/io/orc/RecordReaderImpl.java |  8 +-
 .../hive/ql/io/orc/TreeReaderFactory.java   | 29 +++---
 .../queries/clientpositive/orc_remove_cols.q| 17 
 .../clientpositive/orc_remove_cols.q.out| 94 
 4 files changed, 132 insertions(+), 16 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/a3502d05/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index 567899a..acfe1a2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -242,6 +242,9 @@ public class RecordReaderImpl implements RecordReader {
 this.types = builder.types;
 TreeReaderFactory.TreeReaderSchema treeReaderSchema;
 if (options.getSchema() == null) {
+  if (LOG.isInfoEnabled()) {
+LOG.info("Schema on read not provided -- using file schema " + 
types.toString());
+  }
   treeReaderSchema = new 
TreeReaderFactory.TreeReaderSchema().fileTypes(types).schemaTypes(types);
 } else {
 
@@ -999,7 +1002,7 @@ public class RecordReaderImpl implements RecordReader {
   // since stream kind is optional, first check if it exists
   if (stream.hasKind() &&
   (StreamName.getArea(streamKind) == StreamName.Area.DATA) &&
-  includedColumns[column]) {
+  (column < includedColumns.length && includedColumns[column])) {
 // if we aren't filtering or it is a dictionary, load it.
 if (includedRowGroups == null
 || RecordReaderUtils.isDictionary(streamKind, 
encodings.get(column))) {
@@ -1024,7 +1027,8 @@ public class RecordReaderImpl implements RecordReader {
 long streamOffset = 0;
 for (OrcProto.Stream streamDesc : streamDescriptions) {
   int column = streamDesc.getColumn();
-  if ((includeColumn != null && !includeColumn[column]) ||
+  if ((includeColumn != null &&
+  (column < included.length && !includeColumn[column])) ||
   streamDesc.hasKind() &&
   (StreamName.getArea(streamDesc.getKind()) != 
StreamName.Area.DATA)) {
 streamOffset += streamDesc.getLength();

http://git-wip-us.apache.org/repos/asf/hive/blob/a3502d05/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
index d8a134b..8bb32ea 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
@@ -2050,7 +2050,7 @@ public class TreeReaderFactory {
   }
 
   protected static class StructTreeReader extends TreeReader {
-private final int fileColumnCount;
+private final int readColumnCount;
 private final int resultColumnCount;
 protected final TreeReader[] fields;
 private final String[] fieldNames;
@@ -2063,30 +2063,31 @@ public class TreeReaderFactory {
   super(columnId);
 
   OrcProto.Type fileStructType = 
treeReaderSchema.getFileTypes().get(columnId);
-  fileColumnCount = fileStructType.getFieldNamesCount();
 
   OrcProto.Type schemaStructType = 
treeReaderSchema.getSchemaTypes().get(columnId);
 
+  readColumnCount = Math.min(fileStructType.getFieldNamesCount(), 
schemaStructType.getFieldNamesCount());
+
   if (columnId == treeReaderSchema.getInnerStructSubtype()) {
 // If there are more result columns than reader columns, we will 
default those additional
 // columns to NULL.
 resultColumnCount = schemaStructType.getFieldNamesCount();
   } else {
-resultColumnCount = fileColumnCount;
+resultColumnCount = readColumnCount;
   }
 
-  

hive git commit: HIVE-12887 Handle ORC schema on read with fewer columns than file schema (after Schema Evolution changes) (Matt McCline, reviewed by Sergey Shelukhin)

2016-01-20 Thread mmccline
Repository: hive
Updated Branches:
  refs/heads/master 43c7f3a8b -> 64ab5aba9


HIVE-12887 Handle ORC schema on read with fewer columns than file schema (after 
Schema Evolution changes) (Matt McCline, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/64ab5aba
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/64ab5aba
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/64ab5aba

Branch: refs/heads/master
Commit: 64ab5aba9768a710efeb8d0f337c48f571d55f8b
Parents: 43c7f3a
Author: Matt McCline 
Authored: Wed Jan 20 14:08:41 2016 -0800
Committer: Matt McCline 
Committed: Wed Jan 20 14:08:41 2016 -0800

--
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   | 43 ++---
 .../hadoop/hive/ql/io/orc/RecordReaderImpl.java |  8 +-
 .../hive/ql/io/orc/TreeReaderFactory.java   | 29 +++---
 .../queries/clientpositive/orc_remove_cols.q| 17 
 .../clientpositive/orc_remove_cols.q.out| 94 
 5 files changed, 164 insertions(+), 27 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/64ab5aba/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index eae281c..8119449 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -87,6 +87,7 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.hive.serde2.SerDeStats;
@@ -2090,12 +2091,13 @@ public class OrcInputFormat implements 
InputFormat,
 }
 
 if (haveSchemaEvolutionProperties) {
-  LOG.info("Using schema evolution configuration variables 
schema.evolution.columns " +
-  schemaEvolutionColumnNames.toString() +
-  " / schema.evolution.columns.types " +
-  schemaEvolutionTypeDescrs.toString() +
-  " (isAcid " + isAcid + ")");
-
+  if (LOG.isInfoEnabled()) {
+LOG.info("Using schema evolution configuration variables 
schema.evolution.columns " +
+schemaEvolutionColumnNames.toString() +
+" / schema.evolution.columns.types " +
+schemaEvolutionTypeDescrs.toString() +
+" (isAcid " + isAcid + ")");
+  }
 } else {
 
   // Try regular properties;
@@ -2114,11 +2116,30 @@ public class OrcInputFormat implements 
InputFormat,
   if (schemaEvolutionTypeDescrs.size() != 
schemaEvolutionColumnNames.size()) {
 return null;
   }
-  LOG.info("Using column configuration variables columns " +
-  schemaEvolutionColumnNames.toString() +
-  " / columns.types " +
-  schemaEvolutionTypeDescrs.toString() +
-  " (isAcid " + isAcid + ")");
+
+  // Find first virtual column and clip them off.
+  int virtualColumnClipNum = -1;
+  int columnNum = 0;
+  for (String columnName : schemaEvolutionColumnNames) {
+if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(columnName)) {
+  virtualColumnClipNum = columnNum;
+  break;
+}
+columnNum++;
+  }
+  if (virtualColumnClipNum != -1) {
+schemaEvolutionColumnNames =
+Lists.newArrayList(schemaEvolutionColumnNames.subList(0, 
virtualColumnClipNum));
+schemaEvolutionTypeDescrs = 
Lists.newArrayList(schemaEvolutionTypeDescrs.subList(0, virtualColumnClipNum));
+  }
+
+  if (LOG.isInfoEnabled()) {
+LOG.info("Using column configuration variables columns " +
+schemaEvolutionColumnNames.toString() +
+" / columns.types " +
+schemaEvolutionTypeDescrs.toString() +
+" (isAcid " + isAcid + ")");
+  }
 }
 
 // Desired schema does not include virtual columns or partition columns.

http://git-wip-us.apache.org/repos/asf/hive/blob/64ab5aba/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index 

hive git commit: HIVE-12887 Handle ORC schema on read with fewer columns than file schema (after Schema Evolution changes) (Matt McCline, reviewed by Sergey Shelukhin)

2016-01-20 Thread mmccline
Repository: hive
Updated Branches:
  refs/heads/branch-1 d36207ec9 -> 743585890


HIVE-12887 Handle ORC schema on read with fewer columns than file schema (after 
Schema Evolution changes) (Matt McCline, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/74358589
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/74358589
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/74358589

Branch: refs/heads/branch-1
Commit: 743585890950d8360b84ae044d50cc1fb8151a61
Parents: d36207e
Author: Matt McCline 
Authored: Wed Jan 20 14:25:21 2016 -0800
Committer: Matt McCline 
Committed: Wed Jan 20 14:25:21 2016 -0800

--
 .../apache/hadoop/hive/ql/io/orc/OrcUtils.java  | 49 ++
 .../hadoop/hive/ql/io/orc/RecordReaderImpl.java |  8 +-
 .../hive/ql/io/orc/TreeReaderFactory.java   | 29 +++---
 .../queries/clientpositive/orc_remove_cols.q| 17 
 .../clientpositive/orc_remove_cols.q.out| 94 
 5 files changed, 164 insertions(+), 33 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/74358589/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
index ad4a9e8..84fd3c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.ql.io.IOConstants;
 import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
@@ -721,15 +722,13 @@ public class OrcUtils {
 }
 
 if (haveSchemaEvolutionProperties) {
-  LOG.info("Using schema evolution configuration variables " +
-  "schema.evolution.columns " +
-  schemaEvolutionColumnNames.toString() +
-  " / schema.evolution.columns.types " +
-  schemaEvolutionTypeDescrs.toString() +
-  " (isAcid " +
-  isAcid +
-  ")");
-
+  if (LOG.isInfoEnabled()) {
+LOG.info("Using schema evolution configuration variables 
schema.evolution.columns " +
+schemaEvolutionColumnNames.toString() +
+" / schema.evolution.columns.types " +
+schemaEvolutionTypeDescrs.toString() +
+" (isAcid " + isAcid + ")");
+  }
 } else {
 
   // Try regular properties;
@@ -748,14 +747,30 @@ public class OrcUtils {
   if (schemaEvolutionTypeDescrs.size() != 
schemaEvolutionColumnNames.size()) {
 return null;
   }
-  LOG.info("Using column configuration variables " +
-  "columns " +
-  schemaEvolutionColumnNames.toString() +
-  " / columns.types " +
-  schemaEvolutionTypeDescrs.toString() +
-  " (isAcid " +
-  isAcid +
-  ")");
+
+  // Find first virtual column and clip them off.
+  int virtualColumnClipNum = -1;
+  int columnNum = 0;
+  for (String columnName : schemaEvolutionColumnNames) {
+if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(columnName)) {
+  virtualColumnClipNum = columnNum;
+  break;
+}
+columnNum++;
+  }
+  if (virtualColumnClipNum != -1) {
+schemaEvolutionColumnNames =
+Lists.newArrayList(schemaEvolutionColumnNames.subList(0, 
virtualColumnClipNum));
+schemaEvolutionTypeDescrs = 
Lists.newArrayList(schemaEvolutionTypeDescrs.subList(0, virtualColumnClipNum));
+  }
+
+  if (LOG.isInfoEnabled()) {
+LOG.info("Using column configuration variables columns " +
+schemaEvolutionColumnNames.toString() +
+" / columns.types " +
+schemaEvolutionTypeDescrs.toString() +
+" (isAcid " + isAcid + ")");
+  }
 }
 
 // Desired schema does not include virtual columns or partition columns.

http://git-wip-us.apache.org/repos/asf/hive/blob/74358589/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index 24834a5..44cac68 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++