Repository: drill
Updated Branches:
  refs/heads/master ee399317a -> 34969583b


DRILL-4996: Parquet Date auto-correction is not working in auto-partitioned 
parquet files generated by drill-1.6

- Changed detection approach of corrupted date values for the case, when 
parquet files are generated by drill:
  the corruption status is determined by looking at the min/max values in the 
metadata;
- Appropriate refactoring of TestCorruptParquetDateCorrection.

This closes #687


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/eef3b3fb
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/eef3b3fb
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/eef3b3fb

Branch: refs/heads/master
Commit: eef3b3fb6f4e76e95510253d155d0659e387fc99
Parents: ee39931
Author: Vitalii Diravka <[email protected]>
Authored: Mon Dec 12 04:41:49 2016 +0000
Committer: Parth Chandra <[email protected]>
Committed: Fri Jan 13 17:44:29 2017 -0800

----------------------------------------------------------------------
 .../store/parquet/ParquetReaderUtility.java     |  18 +-
 .../TestCorruptParquetDateCorrection.java       | 284 +++++++++----------
 ...t_dates_and_old_drill_parquet_writer.parquet | Bin 0 -> 4241 bytes
 3 files changed, 140 insertions(+), 162 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/eef3b3fb/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
index b22e666..a94e220 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
@@ -195,26 +195,26 @@ public class ParquetReaderUtility {
 
     String createdBy = footer.getFileMetaData().getCreatedBy();
     String drillVersion = 
footer.getFileMetaData().getKeyValueMetaData().get(ParquetRecordWriter.DRILL_VERSION_PROPERTY);
-    String stringWriterVersion = 
footer.getFileMetaData().getKeyValueMetaData().get(ParquetRecordWriter.WRITER_VERSION_PROPERTY);
+    String writerVersionValue = 
footer.getFileMetaData().getKeyValueMetaData().get(ParquetRecordWriter.WRITER_VERSION_PROPERTY);
     // This flag can be present in parquet files which were generated with 
1.9.0-SNAPSHOT and 1.9.0 drill versions.
     // If this flag is present it means that the version of the drill parquet 
writer is 2
     final String isDateCorrectFlag = "is.date.correct";
     String isDateCorrect = 
footer.getFileMetaData().getKeyValueMetaData().get(isDateCorrectFlag);
     if (drillVersion != null) {
       int writerVersion = 1;
-      if (stringWriterVersion != null) {
-        writerVersion = Integer.parseInt(stringWriterVersion);
+      if (writerVersionValue != null) {
+        writerVersion = Integer.parseInt(writerVersionValue);
       }
       else if (Boolean.valueOf(isDateCorrect)) {
         writerVersion = DRILL_WRITER_VERSION_STD_DATE_FORMAT;
       }
       return writerVersion >= DRILL_WRITER_VERSION_STD_DATE_FORMAT ? 
DateCorruptionStatus.META_SHOWS_NO_CORRUPTION
-          : DateCorruptionStatus.META_SHOWS_CORRUPTION;
+          // loop through parquet column metadata to find date columns, check 
for corrupt values
+          : checkForCorruptDateValuesInStatistics(footer, columns, 
autoCorrectCorruptDates);
     } else {
       // Possibly an old, un-migrated Drill file, check the column statistics 
to see if min/max values look corrupt
       // only applies if there is a date column selected
       if (createdBy == null || createdBy.equals("parquet-mr")) {
-        // loop through parquet column metadata to find date columns, check 
for corrupt values
         return checkForCorruptDateValuesInStatistics(footer, columns, 
autoCorrectCorruptDates);
       } else {
         // check the created by to see if it is a migrated Drill file
@@ -226,7 +226,7 @@ public class ParquetReaderUtility {
             SemanticVersion semVer = 
parsedCreatedByVersion.getSemanticVersion();
             String pre = semVer.pre + "";
             if (semVer.major == 1 && semVer.minor == 8 && semVer.patch == 1 && 
pre.contains("drill")) {
-              return DateCorruptionStatus.META_SHOWS_CORRUPTION;
+              return checkForCorruptDateValuesInStatistics(footer, columns, 
autoCorrectCorruptDates);
             }
           }
           // written by a tool that wasn't Drill, the dates are not corrupted
@@ -244,9 +244,9 @@ public class ParquetReaderUtility {
    * Detect corrupt date values by looking at the min/max values in the 
metadata.
    *
    * This should only be used when a file does not have enough metadata to 
determine if
-   * the data was written with an older version of Drill, or an external tool. 
Drill
-   * versions 1.3 and beyond should have enough metadata to confirm that the 
data was written
-   * by Drill.
+   * the data was written with an external tool or an older version of Drill
+   * ({@link 
org.apache.drill.exec.store.parquet.ParquetRecordWriter#WRITER_VERSION_PROPERTY}
 <
+   * {@link 
org.apache.drill.exec.store.parquet.ParquetReaderUtility#DRILL_WRITER_VERSION_STD_DATE_FORMAT})
    *
    * This method only checks the first Row Group, because Drill has only ever 
written
    * a single Row Group per file.

http://git-wip-us.apache.org/repos/asf/drill/blob/eef3b3fb/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestCorruptParquetDateCorrection.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestCorruptParquetDateCorrection.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestCorruptParquetDateCorrection.java
index 0ab247d..8cd1a85 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestCorruptParquetDateCorrection.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestCorruptParquetDateCorrection.java
@@ -17,6 +17,8 @@
  */
 package org.apache.drill.exec.physical.impl.writer;
 
+import static java.lang.String.format;
+
 import org.apache.drill.PlanTestBase;
 import org.apache.drill.TestBuilder;
 import org.apache.drill.common.util.TestTools;
@@ -37,10 +39,11 @@ import java.util.regex.Pattern;
  * Tests for compatibility reading old parquet files after date corruption
  * issue was fixed in DRILL-4203.
  *
- * Drill was writing non-standard dates into parquet files for all releases
- * before 1.9.0. The values have been read by Drill correctly by Drill, but
- * external tools like Spark reading the files will see corrupted values for
- * all dates that have been written by Drill.
+ * Drill could write non-standard dates into parquet files. This issue is 
related to
+ * all drill releases where {@link 
org.apache.drill.exec.store.parquet.ParquetRecordWriter#WRITER_VERSION_PROPERTY}
 <
+ * {@link 
org.apache.drill.exec.store.parquet.ParquetReaderUtility#DRILL_WRITER_VERSION_STD_DATE_FORMAT}
+ * The values have been read correctly by Drill, but external tools like Spark 
reading the files will see
+ * corrupted values for all dates that have been written by Drill.
  *
  * This change corrects the behavior of the Drill parquet writer to correctly
  * store dates in the format given in the parquet specification.
@@ -59,8 +62,7 @@ import java.util.regex.Pattern;
  * While the old behavior was a consistent shift into an unlikely range
  * to be used in a modern database (over 10,000 years in the future), these 
are still
  * valid date values. In the case where these may have been written into
- * files intentionally, and we cannot be certain from the metadata if Drill
- * produced the files, an option is included to turn off the auto-correction.
+ * files intentionally, an option is included to turn off the auto-correction.
  * Use of this option is assumed to be extremely unlikely, but it is included
  * for completeness.
  */
@@ -76,11 +78,8 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
   //    - one from the 0.6 version of Drill, before files had min/max 
statistics
   //        - detecting corrupt values must be deferred to actual data page 
reading
   //    - one from 1.4, where there is a proper created-by, but the corruption 
is present
-  private static final String MIXED_CORRUPTED_AND_CORRECTED_DATES_PATH =
+  private static final String MIXED_CORRUPTED_AND_CORRECT_DATES_PATH =
       
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/mixed_drill_versions";
-  // partitioned with 1.4.0, date values are known to be corrupt
-  private static final String CORRUPTED_PARTITIONED_DATES_1_4_0_PATH =
-      
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/partitioned_with_corruption_4203";
   // partitioned with 1.2.0, no certain metadata that these were written with 
Drill
   // the value will be checked to see that they look corrupt and they will be 
corrected
   // by default. Users can use the format plugin option 
autoCorrectCorruptDates to disable
@@ -88,9 +87,13 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
   // in the similar range as Drill's corrupt values
   private static final String CORRUPTED_PARTITIONED_DATES_1_2_PATH =
       
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/partitioned_with_corruption_4203_1_2";
+  // partitioned with 1.4.0, no certain metadata regarding the date corruption 
status.
+  // The same detection approach of the corrupt date values as for the files 
partitioned with 1.2.0
+  private static final String CORRUPTED_PARTITIONED_DATES_1_4_0_PATH =
+      
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/partitioned_with_corruption_4203";
   private static final String PARQUET_DATE_FILE_WITH_NULL_FILLED_COLS =
       
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/null_date_cols_with_corruption_4203.parquet";
-  private static final String CORRECTED_PARTITIONED_DATES_1_9_PATH =
+  private static final String CORRECT_PARTITIONED_DATES_1_9_PATH =
       
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/1_9_0_partitioned_no_corruption";
   private static final String VARCHAR_PARTITIONED =
       
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/fewtypes_varcharpartition";
@@ -98,11 +101,13 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
       
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/fewtypes_datepartition";
   private static final String EXCEPTION_WHILE_PARSING_CREATED_BY_META =
       
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/hive1dot2_fewtypes_null";
+  private static final String CORRECT_DATES_1_6_0_PATH =
+      
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/correct_dates_and_old_drill_parquet_writer.parquet";
+  private static final String PARTITIONED_1_2_FOLDER = 
"partitioned_with_corruption_4203_1_2";
+  private static final String MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER = 
"mixed_partitioned";
 
   private static FileSystem fs;
   private static Path path;
-  static String PARTITIONED_1_2_FOLDER = 
"partitioned_with_corruption_4203_1_2";
-  static String MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER = 
"mixed_partitioned";
 
   @BeforeClass
   public static void initFs() throws Exception {
@@ -116,9 +121,9 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
     copyDirectoryIntoTempSpace(CORRUPTED_PARTITIONED_DATES_1_2_PATH);
     
copyMetaDataCacheToTempReplacingInternalPaths("parquet/4203_corrupt_dates/drill.parquet.metadata_1_2.requires_replace.txt",
         PARTITIONED_1_2_FOLDER);
-    copyDirectoryIntoTempSpace(CORRUPTED_PARTITIONED_DATES_1_2_PATH, 
MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER);
-    copyDirectoryIntoTempSpace(CORRECTED_PARTITIONED_DATES_1_9_PATH, 
MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER);
-    copyDirectoryIntoTempSpace(CORRUPTED_PARTITIONED_DATES_1_4_0_PATH, 
MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER);
+    copyDirectoryIntoTempSpace(CORRUPTED_PARTITIONED_DATES_1_2_PATH, 
MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER);
+    copyDirectoryIntoTempSpace(CORRECT_PARTITIONED_DATES_1_9_PATH, 
MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER);
+    copyDirectoryIntoTempSpace(CORRUPTED_PARTITIONED_DATES_1_4_0_PATH, 
MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER);
   }
 
   /**
@@ -128,20 +133,20 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
    * in the case where we are certain correction is NOT needed. For more info 
see DRILL-4203.
    */
   @Test
-  public void testReadPartitionedOnCorrectedDates() throws Exception {
+  public void testReadPartitionedOnCorrectDates() throws Exception {
     try {
       for (String selection : new String[]{"*", "date_col"}) {
         // for sanity, try reading all partitions without a filter
         TestBuilder builder = testBuilder()
-            .sqlQuery("select " + selection + " from table(dfs.`" + 
CORRECTED_PARTITIONED_DATES_1_9_PATH + "`" +
-                "(type => 'parquet', autoCorrectCorruptDates => false))")
+            .sqlQuery("select %s from table(dfs.`%s` (type => 'parquet', 
autoCorrectCorruptDates => false))",
+                selection, CORRECT_PARTITIONED_DATES_1_9_PATH)
             .unOrdered()
             .baselineColumns("date_col");
-        addDateBaselineVals(builder);
+        addDateBaselineValues(builder);
         builder.go();
 
-        String query = "select " + selection + " from table(dfs.`" + 
CORRECTED_PARTITIONED_DATES_1_9_PATH + "` " +
-            "(type => 'parquet', autoCorrectCorruptDates => false))" + " where 
date_col = date '1970-01-01'";
+        String query = format("select %s from table(dfs.`%s` (type => 
'parquet', autoCorrectCorruptDates => false))" +
+            " where date_col = date '1970-01-01'", selection, 
CORRECT_PARTITIONED_DATES_1_9_PATH);
         // verify that pruning is actually taking place
         testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
 
@@ -161,9 +166,7 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
   @Test
   public void testVarcharPartitionedReadWithCorruption() throws Exception {
     testBuilder()
-        .sqlQuery("select date_col from " +
-            "dfs.`" + VARCHAR_PARTITIONED + "`" +
-            "where length(varchar_col) = 12")
+        .sqlQuery("select date_col from dfs.`%s` where length(varchar_col) = 
12", VARCHAR_PARTITIONED)
         .baselineColumns("date_col")
         .unOrdered()
         .baselineValues(new DateTime(2039, 4, 9, 0, 0))
@@ -174,24 +177,21 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
   @Test
   public void testDatePartitionedReadWithCorruption() throws Exception {
     testBuilder()
-        .sqlQuery("select date_col from " +
-            "dfs.`" + DATE_PARTITIONED + "`" +
-            "where date_col = '1999-04-08'")
+        .sqlQuery("select date_col from dfs.`%s` where date_col = 
'1999-04-08'", DATE_PARTITIONED)
         .baselineColumns("date_col")
         .unOrdered()
         .baselineValues(new DateTime(1999, 4, 8, 0, 0))
         .go();
 
-    String sql = "select date_col from dfs.`" + DATE_PARTITIONED + "` where 
date_col > '1999-04-08'";
-    testPlanMatchingPatterns(sql, new String[]{"numFiles=6"}, null);
+    String query = format("select date_col from dfs.`%s` where date_col > 
'1999-04-08'", DATE_PARTITIONED);
+    testPlanMatchingPatterns(query, new String[]{"numFiles=6"}, null);
   }
 
   @Test
   public void testCorrectDatesAndExceptionWhileParsingCreatedBy() throws 
Exception {
     testBuilder()
-        .sqlQuery("select date_col from " +
-            "dfs.`" + EXCEPTION_WHILE_PARSING_CREATED_BY_META +
-            "` where to_date(date_col, 'yyyy-mm-dd') < '1997-01-02'")
+        .sqlQuery("select date_col from dfs.`%s` where to_date(date_col, 
'yyyy-mm-dd') < '1997-01-02'",
+            EXCEPTION_WHILE_PARSING_CREATED_BY_META)
         .baselineColumns("date_col")
         .unOrdered()
         .baselineValues(new DateTime(1996, 1, 29, 0, 0))
@@ -201,68 +201,34 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
         .go();
   }
 
-  /**
-   * Test reading a directory full of partitioned parquet files with dates, 
these files have a drill version
-   * number of 1.4.0 in their footers, so we can be certain they are corrupt. 
The option to disable the
-   * correction is passed, but it will not change the result in the case where 
we are certain correction
-   * is needed. For more info see DRILL-4203.
-   */
-  @Test
-  public void testReadPartitionedOnCorruptedDates() throws Exception {
-    try {
-      for (String selection : new String[]{"*", "date_col"}) {
-        // for sanity, try reading all partitions without a filter
-        TestBuilder builder = testBuilder()
-            .sqlQuery("select " + selection + " from table(dfs.`" + 
CORRUPTED_PARTITIONED_DATES_1_4_0_PATH + "`" +
-                "(type => 'parquet', autoCorrectCorruptDates => false))")
-            .unOrdered()
-            .baselineColumns("date_col");
-        addDateBaselineVals(builder);
-        builder.go();
-
-        String query = "select " + selection + " from table(dfs.`" + 
CORRUPTED_PARTITIONED_DATES_1_4_0_PATH + "` " +
-            "(type => 'parquet', autoCorrectCorruptDates => false))" + " where 
date_col = date '1970-01-01'";
-        // verify that pruning is actually taking place
-        testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
-
-        // read with a filter on the partition column
-        testBuilder()
-            .sqlQuery(query)
-            .unOrdered()
-            .baselineColumns("date_col")
-            .baselineValues(new DateTime(1970, 1, 1, 0, 0))
-            .go();
-      }
-    } finally {
-      test("alter session reset all");
-    }
-  }
 
   @Test
   public void testReadPartitionedOnCorruptedDates_UserDisabledCorrection() 
throws Exception {
     try {
       for (String selection : new String[]{"*", "date_col"}) {
-        // for sanity, try reading all partitions without a filter
-        TestBuilder builder = testBuilder()
-            .sqlQuery("select " + selection + " from table(dfs.`" + 
CORRUPTED_PARTITIONED_DATES_1_2_PATH + "`" +
-                "(type => 'parquet', autoCorrectCorruptDates => false))")
-            .unOrdered()
-            .baselineColumns("date_col");
-        addCorruptedDateBaselineVals(builder);
-        builder.go();
-
-        String query = "select " + selection + " from table(dfs.`" + 
CORRUPTED_PARTITIONED_DATES_1_2_PATH + "` " +
-            "(type => 'parquet', autoCorrectCorruptDates => false))" + " where 
date_col = cast('15334-03-17' as date)";
-        // verify that pruning is actually taking place
-        testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
-
-        // read with a filter on the partition column
-        testBuilder()
-            .sqlQuery(query)
-            .unOrdered()
-            .baselineColumns("date_col")
-            .baselineValues(new DateTime(15334, 03, 17, 0, 0))
-            .go();
+        for (String table : new String[]{CORRUPTED_PARTITIONED_DATES_1_2_PATH, 
CORRUPTED_PARTITIONED_DATES_1_4_0_PATH}) {
+          // for sanity, try reading all partitions without a filter
+          TestBuilder builder = testBuilder()
+              .sqlQuery("select %s from table(dfs.`%s` (type => 'parquet', 
autoCorrectCorruptDates => false))",
+                  selection, table)
+              .unOrdered()
+              .baselineColumns("date_col");
+          addCorruptedDateBaselineValues(builder);
+          builder.go();
+
+          String query = format("select %s from table(dfs.`%s` (type => 
'parquet', " +
+              "autoCorrectCorruptDates => false)) where date_col = 
cast('15334-03-17' as date)", selection, table);
+          // verify that pruning is actually taking place
+          testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
+
+          // read with a filter on the partition column
+          testBuilder()
+              .sqlQuery(query)
+              .unOrdered()
+              .baselineColumns("date_col")
+              .baselineValues(new DateTime(15334, 3, 17, 0, 0))
+              .go();
+        }
       }
     } finally {
       test("alter session reset all");
@@ -270,29 +236,31 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
   }
 
   @Test
-  public void testCorruptValDetectionDuringPruning() throws Exception {
+  public void testCorruptValueDetectionDuringPruning() throws Exception {
     try {
       for (String selection : new String[]{"*", "date_col"}) {
-        // for sanity, try reading all partitions without a filter
-        TestBuilder builder = testBuilder()
-            .sqlQuery("select " + selection + " from dfs.`" + 
CORRUPTED_PARTITIONED_DATES_1_2_PATH + "`")
-            .unOrdered()
-            .baselineColumns("date_col");
-        addDateBaselineVals(builder);
-        builder.go();
-
-        String query = "select " + selection + " from dfs.`" + 
CORRUPTED_PARTITIONED_DATES_1_2_PATH + "`" +
-            " where date_col = date '1970-01-01'";
-        // verify that pruning is actually taking place
-        testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
-
-        // read with a filter on the partition column
-        testBuilder()
-            .sqlQuery(query)
-            .unOrdered()
-            .baselineColumns("date_col")
-            .baselineValues(new DateTime(1970, 1, 1, 0, 0))
-            .go();
+        for (String table : new String[]{CORRUPTED_PARTITIONED_DATES_1_2_PATH, 
CORRUPTED_PARTITIONED_DATES_1_4_0_PATH}) {
+          // for sanity, try reading all partitions without a filter
+          TestBuilder builder = testBuilder()
+              .sqlQuery("select %s from dfs.`%s`", selection, table)
+              .unOrdered()
+              .baselineColumns("date_col");
+          addDateBaselineValues(builder);
+          builder.go();
+
+          String query = format("select %s from dfs.`%s`" +
+              " where date_col = date '1970-01-01'", selection, table);
+          // verify that pruning is actually taking place
+          testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
+
+          // read with a filter on the partition column
+          testBuilder()
+              .sqlQuery(query)
+              .unOrdered()
+              .baselineColumns("date_col")
+              .baselineValues(new DateTime(1970, 1, 1, 0, 0))
+              .go();
+        }
       }
     } finally {
       test("alter session reset all");
@@ -313,8 +281,8 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
   @Test
   public void testReadCorruptDatesWithNullFilledColumns() throws Exception {
     testBuilder()
-        .sqlQuery("select null_dates_1, null_dates_2, non_existent_field, 
date_col from dfs.`" +
-            PARQUET_DATE_FILE_WITH_NULL_FILLED_COLS + "`")
+        .sqlQuery("select null_dates_1, null_dates_2, non_existent_field, 
date_col from dfs.`%s`",
+            PARQUET_DATE_FILE_WITH_NULL_FILLED_COLS)
         .unOrdered()
         .baselineColumns("null_dates_1", "null_dates_2", "non_existent_field", 
"date_col")
         .baselineValues(null, null, null, new DateTime(1970, 1, 1, 0, 0))
@@ -332,7 +300,7 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
     readFilesWithUserDisabledAutoCorrection();
 
     try {
-      test(String.format("alter session set %s = true", 
ExecConstants.PARQUET_NEW_RECORD_READER));
+      test("alter session set %s = true", 
ExecConstants.PARQUET_NEW_RECORD_READER);
       // read all of the types with the complex reader
       readFilesWithUserDisabledAutoCorrection();
     } finally {
@@ -352,34 +320,34 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
   @Test
   public void testReadMixedOldAndNewBothReaders() throws Exception {
     /// read once with the flat reader
-    readMixedCorruptedAndCorrectedDates();
+    readMixedCorruptedAndCorrectDates();
 
     try {
       // read all of the types with the complex reader
-      test(String.format("alter session set %s = true", 
ExecConstants.PARQUET_NEW_RECORD_READER));
-      readMixedCorruptedAndCorrectedDates();
+      test("alter session set %s = true", 
ExecConstants.PARQUET_NEW_RECORD_READER);
+      readMixedCorruptedAndCorrectDates();
     } finally {
-      test(String.format("alter session set %s = false", 
ExecConstants.PARQUET_NEW_RECORD_READER));
+      test("alter session set %s = false", 
ExecConstants.PARQUET_NEW_RECORD_READER);
     }
   }
 
   @Test
   public void testReadOldMetadataCacheFile() throws Exception {
     // for sanity, try reading all partitions without a filter
-    String query = "select date_col from dfs.`" + new Path(path, 
PARTITIONED_1_2_FOLDER) + "`";
+    String query = format("select date_col from dfs.`%s`", new Path(path, 
PARTITIONED_1_2_FOLDER));
     TestBuilder builder = testBuilder()
         .sqlQuery(query)
         .unOrdered()
         .baselineColumns("date_col");
-    addDateBaselineVals(builder);
+    addDateBaselineValues(builder);
     builder.go();
     testPlanMatchingPatterns(query, new String[]{"usedMetadataFile=true"}, 
null);
   }
 
   @Test
   public void testReadOldMetadataCacheFileWithPruning() throws Exception {
-    String query = "select date_col from dfs.`" + new Path(path, 
PARTITIONED_1_2_FOLDER) + "`" +
-        " where date_col = date '1970-01-01'";
+    String query = format("select date_col from dfs.`%s` where date_col = date 
'1970-01-01'",
+        new Path(path, PARTITIONED_1_2_FOLDER));
     // verify that pruning is actually taking place
     testPlanMatchingPatterns(query, new String[]{"numFiles=1", 
"usedMetadataFile=true"}, null);
 
@@ -396,15 +364,16 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
   public void testReadOldMetadataCacheFileOverrideCorrection() throws 
Exception {
     // for sanity, try reading all partitions without a filter
     TestBuilder builder = testBuilder()
-        .sqlQuery("select date_col from table(dfs.`" + new Path(path, 
PARTITIONED_1_2_FOLDER) + "`" +
-            "(type => 'parquet', autoCorrectCorruptDates => false))")
+        .sqlQuery("select date_col from table(dfs.`%s` (type => 'parquet', 
autoCorrectCorruptDates => false))",
+            new Path(path, PARTITIONED_1_2_FOLDER))
         .unOrdered()
         .baselineColumns("date_col");
-    addCorruptedDateBaselineVals(builder);
+    addCorruptedDateBaselineValues(builder);
     builder.go();
 
-    String query = "select date_col from table(dfs.`" + new Path(path, 
PARTITIONED_1_2_FOLDER) + "` " +
-        "(type => 'parquet', autoCorrectCorruptDates => false))" + " where 
date_col = cast('15334-03-17' as date)";
+    String query = format("select date_col from table(dfs.`%s` (type => 
'parquet', " +
+        "autoCorrectCorruptDates => false)) where date_col = 
cast('15334-03-17' as date)",
+        new Path(path, PARTITIONED_1_2_FOLDER));
     // verify that pruning is actually taking place
     testPlanMatchingPatterns(query, new String[]{"numFiles=1", 
"usedMetadataFile=true"}, null);
 
@@ -413,27 +382,26 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
         .sqlQuery(query)
         .unOrdered()
         .baselineColumns("date_col")
-        .baselineValues(new DateTime(15334, 03, 17, 0, 0))
+        .baselineValues(new DateTime(15334, 3, 17, 0, 0))
         .go();
   }
 
   @Test
   public void testReadNewMetadataCacheFileOverOldAndNewFiles() throws 
Exception {
-    String table = "dfs.`" + new Path(path, 
MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER) + "`";
+    String table = format("dfs.`%s`", new Path(path, 
MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER));
     
copyMetaDataCacheToTempReplacingInternalPaths("parquet/4203_corrupt_dates/" +
-        "mixed_version_partitioned_metadata.requires_replace.txt", 
MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER);
+        "mixed_version_partitioned_metadata.requires_replace.txt", 
MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER);
     // for sanity, try reading all partitions without a filter
     TestBuilder builder = testBuilder()
         .sqlQuery("select date_col from " + table)
         .unOrdered()
         .baselineColumns("date_col");
-    addDateBaselineVals(builder);
-    addDateBaselineVals(builder);
-    addDateBaselineVals(builder);
+    addDateBaselineValues(builder);
+    addDateBaselineValues(builder);
+    addDateBaselineValues(builder);
     builder.go();
 
-    String query = "select date_col from " + table +
-        " where date_col = date '1970-01-01'";
+    String query = format("select date_col from %s where date_col = date 
'1970-01-01'", table);
     // verify that pruning is actually taking place
     testPlanMatchingPatterns(query, new String[]{"numFiles=3", 
"usedMetadataFile=true"}, null);
 
@@ -448,28 +416,38 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
         .go();
   }
 
+  @Test
+  public void testCorrectDateValuesGeneratedByOldVersionOfDrill() throws 
Exception {
+    testBuilder()
+        .sqlQuery("select i_rec_end_date from dfs.`%s` limit 1", 
CORRECT_DATES_1_6_0_PATH)
+        .baselineColumns("i_rec_end_date")
+        .unOrdered()
+        .baselineValues(new DateTime(2000, 10, 26, 0, 0))
+        .go();
+  }
+
   /**
    * Read a directory with parquet files where some have corrupted dates, see 
DRILL-4203.
    * @throws Exception
    */
-  private void readMixedCorruptedAndCorrectedDates() throws Exception {
+  private void readMixedCorruptedAndCorrectDates() throws Exception {
     // ensure that selecting the date column explicitly or as part of a star 
still results
     // in checking the file metadata for date columns (when we need to check 
the statistics
     // for bad values) to set the flag that the values are corrupt
     for (String selection : new String[] {"*", "date_col"}) {
       TestBuilder builder = testBuilder()
-          .sqlQuery("select " + selection + " from dfs.`" + 
MIXED_CORRUPTED_AND_CORRECTED_DATES_PATH + "`")
+          .sqlQuery("select %s from dfs.`%s`", selection, 
MIXED_CORRUPTED_AND_CORRECT_DATES_PATH)
           .unOrdered()
           .baselineColumns("date_col");
       for (int i = 0; i < 4; i++) {
-        addDateBaselineVals(builder);
+        addDateBaselineValues(builder);
       }
       builder.go();
     }
   }
 
 
-  private void addDateBaselineVals(TestBuilder builder) {
+  private void addDateBaselineValues(TestBuilder builder) {
     builder
         .baselineValues(new DateTime(1970, 1, 1, 0, 0))
         .baselineValues(new DateTime(1970, 1, 2, 0, 0))
@@ -480,16 +458,16 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
   }
 
   /**
-   * These are the same values added in the addDateBaselineVals, shifted as 
corrupt values
+   * These are the same values added in the addDateBaselineValues, shifted as 
corrupt values
    */
-  private void addCorruptedDateBaselineVals(TestBuilder builder) {
+  private void addCorruptedDateBaselineValues(TestBuilder builder) {
     builder
-        .baselineValues(new DateTime(15334, 03, 17, 0, 0))
-        .baselineValues(new DateTime(15334, 03, 18, 0, 0))
-        .baselineValues(new DateTime(15334, 03, 15, 0, 0))
-        .baselineValues(new DateTime(15334, 03, 16, 0, 0))
-        .baselineValues(new DateTime(15264, 03, 16, 0, 0))
-        .baselineValues(new DateTime(15379, 03, 17, 0, 0));
+        .baselineValues(new DateTime(15334, 3, 17, 0, 0))
+        .baselineValues(new DateTime(15334, 3, 18, 0, 0))
+        .baselineValues(new DateTime(15334, 3, 15, 0, 0))
+        .baselineValues(new DateTime(15334, 3, 16, 0, 0))
+        .baselineValues(new DateTime(15264, 3, 16, 0, 0))
+        .baselineValues(new DateTime(15379, 3, 17, 0, 0));
   }
 
   private void readFilesWithUserDisabledAutoCorrection() throws Exception {
@@ -498,14 +476,14 @@ public class TestCorruptParquetDateCorrection extends 
PlanTestBase {
     // for bad values) to set the flag that the values are corrupt
     for (String selection : new String[] {"*", "date_col"}) {
       TestBuilder builder = testBuilder()
-          .sqlQuery("select " + selection + " from table(dfs.`" + 
MIXED_CORRUPTED_AND_CORRECTED_DATES_PATH + "`" +
-              "(type => 'parquet', autoCorrectCorruptDates => false))")
+          .sqlQuery("select %s from table(dfs.`%s` (type => 'parquet', 
autoCorrectCorruptDates => false))",
+              selection, MIXED_CORRUPTED_AND_CORRECT_DATES_PATH)
           .unOrdered()
           .baselineColumns("date_col");
-      addDateBaselineVals(builder);
-      addDateBaselineVals(builder);
-      addCorruptedDateBaselineVals(builder);
-      addCorruptedDateBaselineVals(builder);
+      addDateBaselineValues(builder);
+      addCorruptedDateBaselineValues(builder);
+      addCorruptedDateBaselineValues(builder);
+      addCorruptedDateBaselineValues(builder);
       builder.go();
     }
   }

http://git-wip-us.apache.org/repos/asf/drill/blob/eef3b3fb/exec/java-exec/src/test/resources/parquet/4203_corrupt_dates/correct_dates_and_old_drill_parquet_writer.parquet
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/resources/parquet/4203_corrupt_dates/correct_dates_and_old_drill_parquet_writer.parquet
 
b/exec/java-exec/src/test/resources/parquet/4203_corrupt_dates/correct_dates_and_old_drill_parquet_writer.parquet
new file mode 100644
index 0000000..6d81db0
Binary files /dev/null and 
b/exec/java-exec/src/test/resources/parquet/4203_corrupt_dates/correct_dates_and_old_drill_parquet_writer.parquet
 differ

Reply via email to