akashrn5 commented on a change in pull request #3896:
URL: https://github.com/apache/carbondata/pull/3896#discussion_r474608789



##########
File path: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
##########
@@ -435,19 +436,48 @@ public static Object 
getDataDataTypeForNoDictionaryColumn(String dimensionValue,
 
   private static Object parseTimestamp(String dimensionValue, String 
dateFormat) {
     Date dateToStr;
-    DateFormat dateFormatter;
+    DateFormat dateFormatter = null;
     try {
       if (null != dateFormat && !dateFormat.trim().isEmpty()) {
         dateFormatter = new SimpleDateFormat(dateFormat);
-        dateFormatter.setLenient(false);
       } else {
         dateFormatter = timestampFormatter.get();
       }
+      dateFormatter.setLenient(false);
       dateToStr = dateFormatter.parse(dimensionValue);
-      return dateToStr.getTime();
+      return validateTimeStampRange(dateToStr.getTime());
     } catch (ParseException e) {
-      throw new NumberFormatException(e.getMessage());
+      // If the parsing fails, try to parse again with setLenient to true if 
the property is set
+      if (CarbonProperties.getInstance().isSetLenientEnabled()) {
+        try {
+          LOGGER.info("Changing setLenient to true for TimeStamp: " + 
dimensionValue);
+          dateFormatter.setLenient(true);
+          dateToStr = dateFormatter.parse(dimensionValue);
+          LOGGER.info("Changing " + dimensionValue + " to " + dateToStr);
+          dateFormatter.setLenient(false);
+          LOGGER.info("Changing setLenient back to false");
+          return validateTimeStampRange(dateToStr.getTime());
+        } catch (ParseException ex) {
+          dateFormatter.setLenient(false);
+          LOGGER.info("Changing setLenient back to false");
+          throw new NumberFormatException(ex.getMessage());
+        }
+      } else {
+        throw new NumberFormatException(e.getMessage());
+      }
+    }
+  }
+
+  private static Long validateTimeStampRange(Long timeValue) {
+    long minValue = DateDirectDictionaryGenerator.MIN_VALUE;
+    long maxValue = DateDirectDictionaryGenerator.MAX_VALUE;
+    if (timeValue < minValue || timeValue > maxValue) {
+      if (LOGGER.isDebugEnabled()) {
+        LOGGER.debug("Value for timestamp type column is not in valid range.");
+      }
+      throw new NumberFormatException("Value for timestamp type column is not 
in valid range.");

Review comment:
       ```suggestion
         throw new NumberFormatException("timestamp column data is not in valid 
range of: " + DateDirectDictionaryGenerator.MIN_VALUE + " and " + 
DateDirectDictionaryGenerator.MAX_VALUE );
   ```

##########
File path: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
##########
@@ -435,19 +436,48 @@ public static Object 
getDataDataTypeForNoDictionaryColumn(String dimensionValue,
 
   private static Object parseTimestamp(String dimensionValue, String 
dateFormat) {
     Date dateToStr;
-    DateFormat dateFormatter;
+    DateFormat dateFormatter = null;
     try {
       if (null != dateFormat && !dateFormat.trim().isEmpty()) {
         dateFormatter = new SimpleDateFormat(dateFormat);
-        dateFormatter.setLenient(false);
       } else {
         dateFormatter = timestampFormatter.get();
       }
+      dateFormatter.setLenient(false);
       dateToStr = dateFormatter.parse(dimensionValue);
-      return dateToStr.getTime();
+      return validateTimeStampRange(dateToStr.getTime());
     } catch (ParseException e) {
-      throw new NumberFormatException(e.getMessage());
+      // If the parsing fails, try to parse again with setLenient to true if 
the property is set
+      if (CarbonProperties.getInstance().isSetLenientEnabled()) {
+        try {
+          LOGGER.info("Changing setLenient to true for TimeStamp: " + 
dimensionValue);
+          dateFormatter.setLenient(true);
+          dateToStr = dateFormatter.parse(dimensionValue);
+          LOGGER.info("Changing " + dimensionValue + " to " + dateToStr);
+          dateFormatter.setLenient(false);
+          LOGGER.info("Changing setLenient back to false");
+          return validateTimeStampRange(dateToStr.getTime());
+        } catch (ParseException ex) {
+          dateFormatter.setLenient(false);
+          LOGGER.info("Changing setLenient back to false");
+          throw new NumberFormatException(ex.getMessage());
+        }
+      } else {
+        throw new NumberFormatException(e.getMessage());
+      }
+    }
+  }
+
+  private static Long validateTimeStampRange(Long timeValue) {
+    long minValue = DateDirectDictionaryGenerator.MIN_VALUE;
+    long maxValue = DateDirectDictionaryGenerator.MAX_VALUE;
+    if (timeValue < minValue || timeValue > maxValue) {
+      if (LOGGER.isDebugEnabled()) {

Review comment:
       here debug log is not required, because always the exception is thrown 
here.

##########
File path: 
core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
##########
@@ -1592,6 +1592,13 @@ private CarbonCommonConstants() {
 
   public static final String CARBON_LUCENE_INDEX_STOP_WORDS_DEFAULT = "false";
 
+  // Property to enable parsing the data with setLenient = true in load flow 
if it fails with

Review comment:
       ```suggestion
     // Property to enable parsing the timestamp/date data with setLenient = 
true in load flow if it fails with
   ```

##########
File path: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
##########
@@ -435,19 +436,48 @@ public static Object 
getDataDataTypeForNoDictionaryColumn(String dimensionValue,
 
   private static Object parseTimestamp(String dimensionValue, String 
dateFormat) {
     Date dateToStr;
-    DateFormat dateFormatter;
+    DateFormat dateFormatter = null;
     try {
       if (null != dateFormat && !dateFormat.trim().isEmpty()) {
         dateFormatter = new SimpleDateFormat(dateFormat);
-        dateFormatter.setLenient(false);
       } else {
         dateFormatter = timestampFormatter.get();
       }
+      dateFormatter.setLenient(false);
       dateToStr = dateFormatter.parse(dimensionValue);
-      return dateToStr.getTime();
+      return validateTimeStampRange(dateToStr.getTime());
     } catch (ParseException e) {
-      throw new NumberFormatException(e.getMessage());
+      // If the parsing fails, try to parse again with setLenient to true if 
the property is set
+      if (CarbonProperties.getInstance().isSetLenientEnabled()) {
+        try {
+          LOGGER.info("Changing setLenient to true for TimeStamp: " + 
dimensionValue);
+          dateFormatter.setLenient(true);
+          dateToStr = dateFormatter.parse(dimensionValue);
+          LOGGER.info("Changing " + dimensionValue + " to " + dateToStr);

Review comment:
       line 456 can be removed, looks this log is not necessary

##########
File path: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
##########
@@ -435,19 +436,48 @@ public static Object 
getDataDataTypeForNoDictionaryColumn(String dimensionValue,
 
   private static Object parseTimestamp(String dimensionValue, String 
dateFormat) {
     Date dateToStr;
-    DateFormat dateFormatter;
+    DateFormat dateFormatter = null;
     try {
       if (null != dateFormat && !dateFormat.trim().isEmpty()) {
         dateFormatter = new SimpleDateFormat(dateFormat);
-        dateFormatter.setLenient(false);
       } else {
         dateFormatter = timestampFormatter.get();
       }
+      dateFormatter.setLenient(false);
       dateToStr = dateFormatter.parse(dimensionValue);
-      return dateToStr.getTime();
+      return validateTimeStampRange(dateToStr.getTime());
     } catch (ParseException e) {
-      throw new NumberFormatException(e.getMessage());
+      // If the parsing fails, try to parse again with setLenient to true if 
the property is set
+      if (CarbonProperties.getInstance().isSetLenientEnabled()) {
+        try {
+          LOGGER.info("Changing setLenient to true for TimeStamp: " + 
dimensionValue);

Review comment:
       do not print the `dimensionValue ` in log. 
   
   

##########
File path: 
integration/spark/src/test/resources/badrecords/invalidTimeStampRange.csv
##########
@@ -0,0 +1,2 @@
+ID,date,starttime,country,name,phonetype,serialname,salary

Review comment:
       instead of adding new csv file just for one test case, i suggest delete 
this file, create this csv file in test case itself and once test is finished, 
delete the file.

##########
File path: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
##########
@@ -435,19 +436,48 @@ public static Object 
getDataDataTypeForNoDictionaryColumn(String dimensionValue,
 
   private static Object parseTimestamp(String dimensionValue, String 
dateFormat) {
     Date dateToStr;
-    DateFormat dateFormatter;
+    DateFormat dateFormatter = null;
     try {
       if (null != dateFormat && !dateFormat.trim().isEmpty()) {
         dateFormatter = new SimpleDateFormat(dateFormat);
-        dateFormatter.setLenient(false);
       } else {
         dateFormatter = timestampFormatter.get();
       }
+      dateFormatter.setLenient(false);
       dateToStr = dateFormatter.parse(dimensionValue);
-      return dateToStr.getTime();
+      return validateTimeStampRange(dateToStr.getTime());
     } catch (ParseException e) {
-      throw new NumberFormatException(e.getMessage());
+      // If the parsing fails, try to parse again with setLenient to true if 
the property is set
+      if (CarbonProperties.getInstance().isSetLenientEnabled()) {
+        try {
+          LOGGER.info("Changing setLenient to true for TimeStamp: " + 
dimensionValue);
+          dateFormatter.setLenient(true);
+          dateToStr = dateFormatter.parse(dimensionValue);
+          LOGGER.info("Changing " + dimensionValue + " to " + dateToStr);
+          dateFormatter.setLenient(false);
+          LOGGER.info("Changing setLenient back to false");
+          return validateTimeStampRange(dateToStr.getTime());
+        } catch (ParseException ex) {
+          dateFormatter.setLenient(false);
+          LOGGER.info("Changing setLenient back to false");
+          throw new NumberFormatException(ex.getMessage());
+        }
+      } else {
+        throw new NumberFormatException(e.getMessage());
+      }
+    }
+  }
+
+  private static Long validateTimeStampRange(Long timeValue) {
+    long minValue = DateDirectDictionaryGenerator.MIN_VALUE;
+    long maxValue = DateDirectDictionaryGenerator.MAX_VALUE;
+    if (timeValue < minValue || timeValue > maxValue) {
+      if (LOGGER.isDebugEnabled()) {
+        LOGGER.debug("Value for timestamp type column is not in valid range.");
+      }
+      throw new NumberFormatException("Value for timestamp type column is not 
in valid range.");

Review comment:
       Instead of `NumberFormatException`, can we throw 
`UnsupportedOperationException`?
   @VenuReddy2103 what you think?

##########
File path: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
##########
@@ -435,19 +436,48 @@ public static Object 
getDataDataTypeForNoDictionaryColumn(String dimensionValue,
 
   private static Object parseTimestamp(String dimensionValue, String 
dateFormat) {
     Date dateToStr;
-    DateFormat dateFormatter;
+    DateFormat dateFormatter = null;
     try {
       if (null != dateFormat && !dateFormat.trim().isEmpty()) {
         dateFormatter = new SimpleDateFormat(dateFormat);
-        dateFormatter.setLenient(false);
       } else {
         dateFormatter = timestampFormatter.get();
       }
+      dateFormatter.setLenient(false);
       dateToStr = dateFormatter.parse(dimensionValue);
-      return dateToStr.getTime();
+      return validateTimeStampRange(dateToStr.getTime());
     } catch (ParseException e) {
-      throw new NumberFormatException(e.getMessage());
+      // If the parsing fails, try to parse again with setLenient to true if 
the property is set
+      if (CarbonProperties.getInstance().isSetLenientEnabled()) {
+        try {
+          LOGGER.info("Changing setLenient to true for TimeStamp: " + 
dimensionValue);
+          dateFormatter.setLenient(true);
+          dateToStr = dateFormatter.parse(dimensionValue);
+          LOGGER.info("Changing " + dimensionValue + " to " + dateToStr);
+          dateFormatter.setLenient(false);
+          LOGGER.info("Changing setLenient back to false");
+          return validateTimeStampRange(dateToStr.getTime());
+        } catch (ParseException ex) {
+          dateFormatter.setLenient(false);
+          LOGGER.info("Changing setLenient back to false");
+          throw new NumberFormatException(ex.getMessage());
+        }
+      } else {
+        throw new NumberFormatException(e.getMessage());
+      }
+    }
+  }
+
+  private static Long validateTimeStampRange(Long timeValue) {

Review comment:
       no need to return Long value here, just make it void and let it just 
validate the value

##########
File path: 
core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
##########
@@ -1592,6 +1592,13 @@ private CarbonCommonConstants() {
 
   public static final String CARBON_LUCENE_INDEX_STOP_WORDS_DEFAULT = "false";
 
+  // Property to enable parsing the data with setLenient = true in load flow 
if it fails with
+  // setLenient = false.

Review comment:
       ```suggestion
     // parse invalid timestamp data.
   ```

##########
File path: integration/spark/src/test/resources/timeStampFormatData3.csv
##########
@@ -0,0 +1,3 @@
+ID,date,starttime,country,name,phonetype,serialname,salary

Review comment:
       same as above

##########
File path: 
integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
##########
@@ -816,10 +816,13 @@ object CarbonDataRDDFactory {
       val partitionByRdd = keyRDD.partitionBy(
         new SegmentPartitioner(segmentIdIndex, segmentUpdateParallelism))
 
+      val carbonSessionInfoBroadcast = sqlContext.sparkSession.sparkContext

Review comment:
       do we really need to broadcast the sessionInfo? How the session 
parameters are sent to executors in case of normal load flow?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to