http://git-wip-us.apache.org/repos/asf/hive/blob/0b62e6f3/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
----------------------------------------------------------------------
diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
 
b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
index 472ace7..ee945d4 100644
--- 
a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
+++ 
b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
@@ -39,7 +39,7 @@ import org.apache.hadoop.io.WritableUtils;
  * Directly deserialize with the caller reading field-by-field the LazyBinary 
serialization format.
  *
  * The caller is responsible for calling the read method for the right type of 
each field
- * (after calling readCheckNull).
+ * (after calling readNextField).
  *
  * Reading some fields require a results object to receive value information.  
A separate
  * results object is created by the caller at initialization per different 
field even for the same
@@ -65,17 +65,12 @@ public final class LazyBinaryDeserializeRead extends 
DeserializeRead {
   private VInt tempVInt;
   private VLong tempVLong;
 
-  private boolean readBeyondConfiguredFieldsWarned;
-  private boolean bufferRangeHasExtraDataWarned;
-
   public LazyBinaryDeserializeRead(TypeInfo[] typeInfos, boolean 
useExternalBuffer) {
     super(typeInfos, useExternalBuffer);
     fieldCount = typeInfos.length;
     tempVInt = new VInt();
     tempVLong = new VLong();
     currentExternalBufferNeeded = false;
-    readBeyondConfiguredFieldsWarned = false;
-    bufferRangeHasExtraDataWarned = false;
   }
 
   // Not public since we must have the field count so every 8 fields NULL 
bytes can be navigated.
@@ -122,22 +117,19 @@ public final class LazyBinaryDeserializeRead extends 
DeserializeRead {
   }
 
   /*
-   * Reads the NULL information for a field.
+   * Reads the the next field.
+   *
+   * Afterwards, reading is positioned to the next field.
+   *
+   * @return  Return true when the field was not null and data is put in the 
appropriate
+   *          current* member.
+   *          Otherwise, false when the field is null.
    *
-   * @return Returns true when the field is NULL; reading is positioned to the 
next field.
-   *         Otherwise, false when the field is NOT NULL; reading is 
positioned to the field data.
    */
   @Override
-  public boolean readCheckNull() throws IOException {
+  public boolean readNextField() throws IOException {
     if (fieldIndex >= fieldCount) {
-      // Reading beyond the specified field count produces NULL.
-      if (!readBeyondConfiguredFieldsWarned) {
-        // Warn only once.
-        LOG.info("Reading beyond configured fields! Configured " + fieldCount 
+ " fields but "
-            + " reading more (NULLs returned).  Ignoring similar problems.");
-        readBeyondConfiguredFieldsWarned = true;
-      }
-      return true;
+      return false;
     }
 
     fieldStart = offset;
@@ -151,12 +143,24 @@ public final class LazyBinaryDeserializeRead extends 
DeserializeRead {
       nullByte = bytes[offset++];
     }
 
-    // NOTE: The bit is set to 1 if a field is NOT NULL.
-    boolean isNull;
+    // NOTE: The bit is set to 1 if a field is NOT NULL.    boolean isNull;
     if ((nullByte & (1 << (fieldIndex % 8))) == 0) {
-      isNull = true;
+
+      // Logically move past this field.
+      fieldIndex++;
+
+      // Every 8 fields we read a new NULL byte.
+      if (fieldIndex < fieldCount) {
+        if ((fieldIndex % 8) == 0) {
+          // Get next null byte.
+          if (offset >= end) {
+            throw new EOFException();
+          }
+          nullByte = bytes[offset++];
+        }
+      }
+      return false;
     } else {
-      isNull = false;    // Assume.
 
       // Make sure there is at least one byte that can be read for a value.
       if (offset >= end) {
@@ -336,24 +340,30 @@ public final class LazyBinaryDeserializeRead extends 
DeserializeRead {
 
           HiveDecimal decimal = 
currentHiveDecimalWritable.getHiveDecimal(precision, scale);
           if (decimal == null) {
-            isNull = true;
-          } else {
-            // Put value back into writable.
-            currentHiveDecimalWritable.set(decimal);
+
+            // Logically move past this field.
+            fieldIndex++;
+
+            // Every 8 fields we read a new NULL byte.
+            if (fieldIndex < fieldCount) {
+              if ((fieldIndex % 8) == 0) {
+                // Get next null byte.
+                if (offset >= end) {
+                  throw new EOFException();
+                }
+                nullByte = bytes[offset++];
+              }
+            }
+            return false;
           }
+          // Put value back into writable.
+          currentHiveDecimalWritable.set(decimal);
         }
         break;
 
       default:
         throw new Error("Unexpected primitive category " + 
primitiveCategories[fieldIndex].name());
       }
-
-      /*
-       * Now that we have read through the field -- did we really want it?
-       */
-      if (columnsToInclude != null && !columnsToInclude[fieldIndex]) {
-        isNull = true;
-      }
     }
 
     // Logically move past this field.
@@ -370,37 +380,32 @@ public final class LazyBinaryDeserializeRead extends 
DeserializeRead {
       }
     }
 
-    return isNull;
+    return true;
   }
 
   /*
-   * Call this method after all fields have been read to check for extra 
fields.
+   * Reads through an undesired field.
+   *
+   * No data values are valid after this call.
+   * Designed for skipping columns that are not included.
    */
-  public void extraFieldsCheck() {
-    if (offset < end) {
-      // We did not consume all of the byte range.
-      if (!bufferRangeHasExtraDataWarned) {
-        // Warn only once.
-        int length = end - start;
-        int remaining = end - offset;
-        LOG.info("Not all fields were read in the buffer range! Buffer range " 
+  start
-            + " for length " + length + " but " + remaining + " bytes remain. "
-            + "(total buffer length " + bytes.length + ")"
-            + "  Ignoring similar problems.");
-        bufferRangeHasExtraDataWarned = true;
-      }
-    }
+  public void skipNextField() throws IOException {
+    // Not a known use case for LazyBinary -- so don't optimize.
+    readNextField();
   }
 
   /*
-   * Read integrity warning flags.
+   * Call this method may be called after all the all fields have been read to 
check
+   * for unread fields.
+   *
+   * Note that when optimizing reading to stop reading unneeded include 
columns, worrying
+   * about whether all data is consumed is not appropriate (often we aren't 
reading it all by
+   * design).
+   *
+   * Since LazySimpleDeserializeRead parses the line through the last desired 
column it does
+   * support this function.
    */
-  @Override
-  public boolean readBeyondConfiguredFieldsWarned() {
-    return readBeyondConfiguredFieldsWarned;
-  }
-  @Override
-  public boolean bufferRangeHasExtraDataWarned() {
-    return bufferRangeHasExtraDataWarned;
+  public boolean isEndOfInputReached() {
+    return (offset == end);
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/0b62e6f3/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java 
b/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java
index 52dd5a3..3ac339d 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java
@@ -65,7 +65,7 @@ public class VerifyFast {
 
     boolean isNull;
 
-    isNull = deserializeRead.readCheckNull();
+    isNull = !deserializeRead.readNextField();
     if (isNull) {
       if (writable != null) {
         TestCase.fail("Field reports null but object is not null");

http://git-wip-us.apache.org/repos/asf/hive/blob/0b62e6f3/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
----------------------------------------------------------------------
diff --git 
a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
 
b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
index 49ee9c6..f1eeb2d 100644
--- 
a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
+++ 
b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
@@ -120,17 +120,14 @@ public class TestBinarySortableFast extends TestCase {
                   /* useExternalBuffer */ false,
                   columnSortOrderIsDesc);
 
-      if (useIncludeColumns) {
-        binarySortableDeserializeRead.setColumnsToInclude(columnsToInclude);
-      }
-
       BytesWritable bytesWritable = serializeWriteBytes[i];
       binarySortableDeserializeRead.set(
           bytesWritable.getBytes(), 0, bytesWritable.getLength());
 
       for (int index = 0; index < columnCount; index++) {
-        if (index >= writeColumnCount ||
-            (useIncludeColumns && !columnsToInclude[index])) {
+        if (useIncludeColumns && !columnsToInclude[index]) {
+          binarySortableDeserializeRead.skipNextField();
+        } else if (index >= writeColumnCount) {
           // Should come back a null.
           VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, 
primitiveTypeInfos[index], null);
         } else {
@@ -138,9 +135,9 @@ public class TestBinarySortableFast extends TestCase {
           VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, 
primitiveTypeInfos[index], writable);
         }
       }
-      binarySortableDeserializeRead.extraFieldsCheck();
-      
TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned());
-      
TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned());
+      if (writeColumnCount == columnCount) {
+        
TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
+      }
 
       /*
        * Clip off one byte and expect to get an EOFException on the write 
field.
@@ -151,10 +148,6 @@ public class TestBinarySortableFast extends TestCase {
               /* useExternalBuffer */ false,
               columnSortOrderIsDesc);
 
-      if (useIncludeColumns) {
-        binarySortableDeserializeRead2.setColumnsToInclude(columnsToInclude);
-      }
-
       binarySortableDeserializeRead2.set(
           bytesWritable.getBytes(), 0, bytesWritable.getLength() - 1);  // One 
fewer byte.
 
@@ -172,7 +165,7 @@ public class TestBinarySortableFast extends TestCase {
           TestCase.assertTrue(threw);
         } else {
           if (useIncludeColumns && !columnsToInclude[index]) {
-            VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, 
primitiveTypeInfos[index], null);
+            binarySortableDeserializeRead2.skipNextField();
           } else {
             VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, 
primitiveTypeInfos[index], writable);
           }
@@ -258,16 +251,14 @@ public class TestBinarySortableFast extends TestCase {
                   /* useExternalBuffer */ false,
                   columnSortOrderIsDesc);
 
-      if (useIncludeColumns) {
-        binarySortableDeserializeRead.setColumnsToInclude(columnsToInclude);
-      }
 
       BytesWritable bytesWritable = serdeBytes[i];
       binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, 
bytesWritable.getLength());
 
       for (int index = 0; index < columnCount; index++) {
-        if (index >= writeColumnCount ||
-            (useIncludeColumns && !columnsToInclude[index])) {
+        if (useIncludeColumns && !columnsToInclude[index]) {
+          binarySortableDeserializeRead.skipNextField();
+        } else if (index >= writeColumnCount) {
           // Should come back a null.
           VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, 
primitiveTypeInfos[index], null);
         } else {
@@ -275,9 +266,9 @@ public class TestBinarySortableFast extends TestCase {
           VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, 
primitiveTypeInfos[index], writable);
         }
       }
-      binarySortableDeserializeRead.extraFieldsCheck();
-      
TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned());
-      
TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned());
+      if (writeColumnCount == columnCount) {
+        
TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/0b62e6f3/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
----------------------------------------------------------------------
diff --git 
a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java 
b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
index 8285c06..c857b42 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
@@ -101,10 +101,6 @@ public class TestLazySimpleFast extends TestCase {
                   /* useExternalBuffer */ false,
                   separator, serdeParams);
 
-      if (useIncludeColumns) {
-        lazySimpleDeserializeRead.setColumnsToInclude(columnsToInclude);
-      }
-
       BytesWritable bytesWritable = serializeWriteBytes[i];
       byte[] bytes = bytesWritable.getBytes();
       int length = bytesWritable.getLength();
@@ -116,8 +112,9 @@ public class TestLazySimpleFast extends TestCase {
       }
 
       for (int index = 0; index < columnCount; index++) {
-        if (index >= writeColumnCount ||
-            (useIncludeColumns && !columnsToInclude[index])) {
+        if (useIncludeColumns && !columnsToInclude[index]) {
+          lazySimpleDeserializeRead.skipNextField();
+        } else if (index >= writeColumnCount) {
           // Should come back a null.
           VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, 
primitiveTypeInfos[index], null);
         } else {
@@ -125,10 +122,9 @@ public class TestLazySimpleFast extends TestCase {
           VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, 
primitiveTypeInfos[index], writable);
         }
       }
-      lazySimpleDeserializeRead.extraFieldsCheck();
-      
TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned());
-      
TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned());
-
+      if (writeColumnCount == columnCount) {
+        TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
+      }
     }
 
     // Try to deserialize using SerDe class our Writable row objects created 
by SerializeWrite.
@@ -193,16 +189,13 @@ public class TestLazySimpleFast extends TestCase {
                   /* useExternalBuffer */ false,
                   separator, serdeParams);
 
-      if (useIncludeColumns) {
-        lazySimpleDeserializeRead.setColumnsToInclude(columnsToInclude);
-      }
-
       byte[] bytes = serdeBytes[i];
       lazySimpleDeserializeRead.set(bytes, 0, bytes.length);
 
       for (int index = 0; index < columnCount; index++) {
-        if (index >= writeColumnCount ||
-            (useIncludeColumns && !columnsToInclude[index])) {
+        if (useIncludeColumns && !columnsToInclude[index]) {
+          lazySimpleDeserializeRead.skipNextField();
+        } else if (index >= writeColumnCount) {
           // Should come back a null.
           VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, 
primitiveTypeInfos[index], null);
         } else {
@@ -210,9 +203,9 @@ public class TestLazySimpleFast extends TestCase {
           VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, 
primitiveTypeInfos[index], writable);
         }
       }
-      lazySimpleDeserializeRead.extraFieldsCheck();
-      
TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned());
-      
TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned());
+      if (writeColumnCount == columnCount) {
+        TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/0b62e6f3/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
----------------------------------------------------------------------
diff --git 
a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
 
b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
index e64d67d..a1828c9 100644
--- 
a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
+++ 
b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
@@ -95,16 +95,13 @@ public class TestLazyBinaryFast extends TestCase {
                   writePrimitiveTypeInfos,
                   /* useExternalBuffer */ false);
 
-      if (useIncludeColumns) {
-        lazyBinaryDeserializeRead.setColumnsToInclude(columnsToInclude);
-      }
-
       BytesWritable bytesWritable = serializeWriteBytes[i];
       lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, 
bytesWritable.getLength());
 
       for (int index = 0; index < columnCount; index++) {
-        if (index >= writeColumnCount ||
-            (useIncludeColumns && !columnsToInclude[index])) {
+        if (useIncludeColumns && !columnsToInclude[index]) {
+          lazyBinaryDeserializeRead.skipNextField();
+        } else if (index >= writeColumnCount) {
           // Should come back a null.
           VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, 
primitiveTypeInfos[index], null);
         } else {
@@ -112,13 +109,9 @@ public class TestLazyBinaryFast extends TestCase {
           VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, 
primitiveTypeInfos[index], writable);
         }
       }
-      lazyBinaryDeserializeRead.extraFieldsCheck();
-      if (doWriteFewerColumns) {
-        
TestCase.assertTrue(lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned());
-      } else {
-        
TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned());
+      if (writeColumnCount == columnCount) {
+        TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
       }
-      
TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned());
     }
 
     // Try to deserialize using SerDe class our Writable row objects created 
by SerializeWrite.
@@ -197,16 +190,13 @@ public class TestLazyBinaryFast extends TestCase {
                   primitiveTypeInfos,
                   /* useExternalBuffer */ false);
 
-      if (useIncludeColumns) {
-        lazyBinaryDeserializeRead.setColumnsToInclude(columnsToInclude);
-      }
-
       BytesWritable bytesWritable = serdeBytes[i];
       lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, 
bytesWritable.getLength());
 
       for (int index = 0; index < columnCount; index++) {
-        if (index >= writeColumnCount ||
-            (useIncludeColumns && !columnsToInclude[index])) {
+        if (useIncludeColumns && !columnsToInclude[index]) {
+          lazyBinaryDeserializeRead.skipNextField();
+        } else if (index >= writeColumnCount) {
           // Should come back a null.
           VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, 
primitiveTypeInfos[index], null);
         } else {
@@ -214,9 +204,9 @@ public class TestLazyBinaryFast extends TestCase {
           VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, 
primitiveTypeInfos[index], writable);
         }
       }
-      lazyBinaryDeserializeRead.extraFieldsCheck();
-      
TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned());
-      
TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned());
+      if (writeColumnCount == columnCount) {
+        TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
+      }
     }
   }
 

Reply via email to