[GitHub] [spark] MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix
MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix URL: https://github.com/apache/spark/pull/26079#discussion_r334546995 ## File path: common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java ## @@ -73,45 +72,53 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { -if (s == null) { - return null; -} -s = s.trim(); -Matcher m = p.matcher(s); -if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { +try { + return fromCaseInsensitiveString(s); +} catch (IllegalArgumentException e) { return null; -} else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { -if (s == null || s.trim().isEmpty()) { - throw new IllegalArgumentException("Interval cannot be null or blank."); +if (s == null) { + throw new IllegalArgumentException("Interval cannot be null"); } -String sInLowerCase = s.trim().toLowerCase(Locale.ROOT); -String interval = - sInLowerCase.startsWith("interval ") ? sInLowerCase : "interval " + sInLowerCase; -CalendarInterval cal = fromString(interval); -if (cal == null) { +String trimmed = s.trim(); +if (trimmed.isEmpty()) { + throw new IllegalArgumentException("Interval cannot be blank"); +} +String prefix = "interval"; +String intervalStr = trimmed; +// Checks the given interval string does not start with the `interval` prefix +if (!intervalStr.regionMatches(true, 0, prefix, 0, prefix.length())) { + // Prepend `interval` if it does not present because + // the regular expression strictly require it. Review comment: I have checked, it doesn't work: ```scala scala> def unitRegex(unit: String) = "(?:\\s+(-?\\d+)\\s+" + unit + "s?)?" unitRegex: (unit: String)String scala> val p = Pattern.compile("(interval)?" + unitRegex("year") + unitRegex("month") + | unitRegex("week") + unitRegex("day") + unitRegex("hour") + unitRegex("minute") + | unitRegex("second") + unitRegex("millisecond") + unitRegex("microsecond"), | Pattern.CASE_INSENSITIVE) p: java.util.regex.Pattern = (interval)?(?:\s+(-?\d+)\s+years?)?(?:\s+(-?\d+)\s+months?)?(?:\s+(-?\d+)\s+weeks?)?(?:\s+(-?\d+)\s+days?)?(?:\s+(-?\d+)\s+hours?)?(?:\s+(-?\d+)\s+minutes?)?(?:\s+(-?\d+)\s+seconds?)?(?:\s+(-?\d+)\s+milliseconds?)?(?:\s+(-?\d+)\s+microseconds?)? scala> val m = p.matcher("1 month 1 second") m: java.util.regex.Matcher = java.util.regex.Matcher[pattern=(interval)?(?:\s+(-?\d+)\s+years?)?(?:\s+(-?\d+)\s+months?)?(?:\s+(-?\d+)\s+weeks?)?(?:\s+(-?\d+)\s+days?)?(?:\s+(-?\d+)\s+hours?)?(?:\s+(-?\d+)\s+minutes?)?(?:\s+(-?\d+)\s+seconds?)?(?:\s+(-?\d+)\s+milliseconds?)?(?:\s+(-?\d+)\s+microseconds?)? region=0,16 lastmatch=] scala> m.matches() res0: Boolean = false ``` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix
MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix URL: https://github.com/apache/spark/pull/26079#discussion_r334545052 ## File path: common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java ## @@ -73,45 +72,53 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { -if (s == null) { - return null; -} -s = s.trim(); -Matcher m = p.matcher(s); -if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { +try { + return fromCaseInsensitiveString(s); +} catch (IllegalArgumentException e) { return null; -} else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { -if (s == null || s.trim().isEmpty()) { - throw new IllegalArgumentException("Interval cannot be null or blank."); +if (s == null) { + throw new IllegalArgumentException("Interval cannot be null"); } -String sInLowerCase = s.trim().toLowerCase(Locale.ROOT); -String interval = - sInLowerCase.startsWith("interval ") ? sInLowerCase : "interval " + sInLowerCase; -CalendarInterval cal = fromString(interval); -if (cal == null) { +String trimmed = s.trim(); +if (trimmed.isEmpty()) { + throw new IllegalArgumentException("Interval cannot be blank"); +} +String prefix = "interval"; +String intervalStr = trimmed; +// Checks the given interval string does not start with the `interval` prefix +if (!intervalStr.regionMatches(true, 0, prefix, 0, prefix.length())) { + // Prepend `interval` if it does not present because + // the regular expression strictly require it. Review comment: As far as I remember I tried this regex, and it didn't work. Have you tried it? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix
MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix URL: https://github.com/apache/spark/pull/26079#discussion_r334527986 ## File path: common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java ## @@ -73,45 +72,53 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { -if (s == null) { - return null; -} -s = s.trim(); -Matcher m = p.matcher(s); -if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { +try { + return fromCaseInsensitiveString(s); +} catch (IllegalArgumentException e) { return null; -} else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { -if (s == null || s.trim().isEmpty()) { - throw new IllegalArgumentException("Interval cannot be null or blank."); +if (s == null) { + throw new IllegalArgumentException("Interval cannot be null"); } -String sInLowerCase = s.trim().toLowerCase(Locale.ROOT); -String interval = - sInLowerCase.startsWith("interval ") ? sInLowerCase : "interval " + sInLowerCase; -CalendarInterval cal = fromString(interval); -if (cal == null) { +String trimmed = s.trim(); +if (trimmed.isEmpty()) { + throw new IllegalArgumentException("Interval cannot be blank"); +} +String prefix = "interval"; +String intervalStr = trimmed; +// Checks the given interval string does not start with the `interval` prefix Review comment: Yes, don't want to lower case the **entire string** and allocate memory for new one to only compare small prefix. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix
MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix URL: https://github.com/apache/spark/pull/26079#discussion_r333579599 ## File path: common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java ## @@ -73,45 +72,53 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { -if (s == null) { - return null; -} -s = s.trim(); -Matcher m = p.matcher(s); -if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { +try { + return fromCaseInsensitiveString(s); +} catch (IllegalArgumentException e) { return null; -} else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { -if (s == null || s.trim().isEmpty()) { - throw new IllegalArgumentException("Interval cannot be null or blank."); +if (s == null) { + throw new IllegalArgumentException("Interval cannot be null"); } -String sInLowerCase = s.trim().toLowerCase(Locale.ROOT); -String interval = - sInLowerCase.startsWith("interval ") ? sInLowerCase : "interval " + sInLowerCase; -CalendarInterval cal = fromString(interval); -if (cal == null) { +String trimmed = s.trim(); +if (trimmed.isEmpty()) { + throw new IllegalArgumentException("Interval cannot be blank"); +} +String prefix = "interval"; +String intervalStr = trimmed; +// Checks the given interval string does not start with the `interval` prefix +if (!intervalStr.regionMatches(true, 0, prefix, 0, prefix.length())) { + // Prepend `interval` if it does not present because + // the regular expression strictly require it. Review comment: > String intervalStr = trimmed.toLowerCase(); Your code is more expensive because you lower case whole input string. > // parse the interval string assuming there is no leading "interval" Here there is a problem with current regexp when you delete the anchor `"interval"`. Without this anchor, it cannot match to valid inputs: ```scala scala> import java.util.regex._ import java.util.regex._ scala> def unitRegex(unit: String) = "(?:\\s+(-?\\d+)\\s+" + unit + "s?)?" unitRegex: (unit: String)String scala> val p = Pattern.compile(unitRegex("year") + unitRegex("month") + | unitRegex("week") + unitRegex("day") + unitRegex("hour") + unitRegex("minute") + | unitRegex("second") + unitRegex("millisecond") + unitRegex("microsecond"), | Pattern.CASE_INSENSITIVE) p: java.util.regex.Pattern = (?:\s+(-?\d+)\s+years?)?(?:\s+(-?\d+)\s+months?)?(?:\s+(-?\d+)\s+weeks?)?(?:\s+(-?\d+)\s+days?)?(?:\s+(-?\d+)\s+hours?)?(?:\s+(-?\d+)\s+minutes?)?(?:\s+(-?\d+)\s+seconds?)?(?:\s+(-?\d+)\s+milliseconds?)?(?:\s+(-?\d+)\s+microseconds?)? scala> val m = p.matcher("1 month 1 second") m: java.util.regex.Matcher = java.util.regex.Matcher[pattern=(?:\s+(-?\d+)\s+years?)?(?:\s+(-?\d+)\s+months?)?(?:\s+(-?\d+)\s+weeks?)?(?:\s+(-?\d+)\s+days?)?(?:\s+(-?\d+)\s+hours?)?(?:\s+(-?\d+)\s+minutes?)?(?:\s+(-?\d+)\s+seconds?)?(?:\s+(-?\d+)\s+milliseconds?)?(?:\s+(-?\d+)\s+microseconds?)? region=0,16 lastmatch=] scala> m.matches() res7: Boolean = false ``` If we added it back: ```scala scala> val p = Pattern.compile("interval" + unitRegex("year") + unitRegex("month") + | unitRegex("week") + unitRegex("day") + unitRegex("hour") + unitRegex("minute") + | unitRegex("second") + unitRegex("millisecond") + unitRegex("microsecond"), | Pattern.CASE_INSENSITIVE) p: java.util.regex.Pattern = interval(?:\s+(-?\d+)\s+years?)?(?:\s+(-?\d+)\s+months?)?(?:\s+(-?\d+)\s+weeks?)?(?:\s+(-?\d+)\s+days?)?(?:\s+(-?\d+)\s+hours?)?(?:\s+(-?\d+)\s+minutes?)?(?:\s+(-?\d+)\s+seconds?)?(?:\s+(-?\d+)\s+milliseconds?)?(?:\s+(-?\d+)\s+microseconds?)? scala> val m = p.matcher("interval 1
[GitHub] [spark] MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix
MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix URL: https://github.com/apache/spark/pull/26079#discussion_r333516377 ## File path: common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java ## @@ -73,45 +72,53 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { -if (s == null) { - return null; -} -s = s.trim(); -Matcher m = p.matcher(s); -if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { +try { + return fromCaseInsensitiveString(s); +} catch (IllegalArgumentException e) { return null; -} else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { -if (s == null || s.trim().isEmpty()) { - throw new IllegalArgumentException("Interval cannot be null or blank."); +if (s == null) { + throw new IllegalArgumentException("Interval cannot be null"); } -String sInLowerCase = s.trim().toLowerCase(Locale.ROOT); -String interval = - sInLowerCase.startsWith("interval ") ? sInLowerCase : "interval " + sInLowerCase; -CalendarInterval cal = fromString(interval); -if (cal == null) { +String trimmed = s.trim(); +if (trimmed.isEmpty()) { + throw new IllegalArgumentException("Interval cannot be blank"); +} +String prefix = "interval"; +String intervalStr = trimmed; +// Checks the given interval string does not start with the `interval` prefix +if (!intervalStr.regionMatches(true, 0, prefix, 0, prefix.length())) { + // Prepend `interval` if it does not present because + // the regular expression strictly require it. Review comment: Probably, this needs this feature https://www.regular-expressions.info/branchreset.html which Java's regexps doesn't have. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix
MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix URL: https://github.com/apache/spark/pull/26079#discussion_r333514414 ## File path: common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java ## @@ -73,45 +72,53 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { -if (s == null) { - return null; -} -s = s.trim(); -Matcher m = p.matcher(s); -if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { +try { + return fromCaseInsensitiveString(s); +} catch (IllegalArgumentException e) { return null; -} else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { -if (s == null || s.trim().isEmpty()) { - throw new IllegalArgumentException("Interval cannot be null or blank."); +if (s == null) { + throw new IllegalArgumentException("Interval cannot be null"); } -String sInLowerCase = s.trim().toLowerCase(Locale.ROOT); -String interval = - sInLowerCase.startsWith("interval ") ? sInLowerCase : "interval " + sInLowerCase; -CalendarInterval cal = fromString(interval); -if (cal == null) { +String trimmed = s.trim(); +if (trimmed.isEmpty()) { + throw new IllegalArgumentException("Interval cannot be blank"); +} +String prefix = "interval"; +String intervalStr = trimmed; +// Checks the given interval string does not start with the `interval` prefix +if (!intervalStr.regionMatches(true, 0, prefix, 0, prefix.length())) { + // Prepend `interval` if it does not present because + // the regular expression strictly require it. Review comment: I have not figured out how to modify the regular expression to make the `interval` prefix optional. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix
MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix URL: https://github.com/apache/spark/pull/26079#discussion_r333514414 ## File path: common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java ## @@ -73,45 +72,53 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { -if (s == null) { - return null; -} -s = s.trim(); -Matcher m = p.matcher(s); -if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { +try { + return fromCaseInsensitiveString(s); +} catch (IllegalArgumentException e) { return null; -} else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { -if (s == null || s.trim().isEmpty()) { - throw new IllegalArgumentException("Interval cannot be null or blank."); +if (s == null) { + throw new IllegalArgumentException("Interval cannot be null"); } -String sInLowerCase = s.trim().toLowerCase(Locale.ROOT); -String interval = - sInLowerCase.startsWith("interval ") ? sInLowerCase : "interval " + sInLowerCase; -CalendarInterval cal = fromString(interval); -if (cal == null) { +String trimmed = s.trim(); +if (trimmed.isEmpty()) { + throw new IllegalArgumentException("Interval cannot be blank"); +} +String prefix = "interval"; +String intervalStr = trimmed; +// Checks the given interval string does not start with the `interval` prefix +if (!intervalStr.regionMatches(true, 0, prefix, 0, prefix.length())) { + // Prepend `interval` if it does not present because + // the regular expression strictly require it. Review comment: I have not figure out how to modify the regular expression to make the `interval` prefix optional. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix
MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix URL: https://github.com/apache/spark/pull/26079#discussion_r333513598 ## File path: common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java ## @@ -73,45 +72,50 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { -if (s == null) { - return null; -} -s = s.trim(); -Matcher m = p.matcher(s); -if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { +try { + return fromCaseInsensitiveString(s); +} catch (IllegalArgumentException e) { return null; -} else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { -if (s == null || s.trim().isEmpty()) { - throw new IllegalArgumentException("Interval cannot be null or blank."); +if (s == null) { + throw new IllegalArgumentException("Interval cannot be null"); } -String sInLowerCase = s.trim().toLowerCase(Locale.ROOT); -String interval = - sInLowerCase.startsWith("interval ") ? sInLowerCase : "interval " + sInLowerCase; -CalendarInterval cal = fromString(interval); -if (cal == null) { +String trimmed = s.trim(); +if (trimmed.isEmpty()) { + throw new IllegalArgumentException("Interval cannot be blank"); +} +String prefix = "interval"; +String intervalStr = trimmed; +if (!intervalStr.regionMatches(true, 0, prefix, 0, prefix.length())) { Review comment: I added comments about this. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix
MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix URL: https://github.com/apache/spark/pull/26079#discussion_r333485581 ## File path: common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java ## @@ -73,45 +72,50 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { -if (s == null) { - return null; -} -s = s.trim(); -Matcher m = p.matcher(s); -if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { +try { + return fromCaseInsensitiveString(s); +} catch (IllegalArgumentException e) { return null; -} else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { Review comment: Let's keep them separate so far. And I will try to write flexible and common code in the near future for parsing string intervals that could handle other features found in https://github.com/apache/spark/pull/26055 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix
MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix URL: https://github.com/apache/spark/pull/26079#discussion_r333485581 ## File path: common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java ## @@ -73,45 +72,50 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { -if (s == null) { - return null; -} -s = s.trim(); -Matcher m = p.matcher(s); -if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { +try { + return fromCaseInsensitiveString(s); +} catch (IllegalArgumentException e) { return null; -} else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { Review comment: Let's keep them separate so far. And I will try to write flexible and common code for parsing string interval that could handle other features found in https://github.com/apache/spark/pull/26055 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix
MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix URL: https://github.com/apache/spark/pull/26079#discussion_r333484672 ## File path: common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java ## @@ -73,45 +72,50 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { -if (s == null) { - return null; -} -s = s.trim(); -Matcher m = p.matcher(s); -if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { +try { + return fromCaseInsensitiveString(s); +} catch (IllegalArgumentException e) { return null; -} else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { Review comment: And your regexp is not tolerant to the order of interval units, see: ```sql spark-sql> select interval 'interval 1 microsecond 2 months'; NULL spark-sql> select interval 1 microsecond 2 months; interval 2 months 1 microseconds ``` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix
MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix URL: https://github.com/apache/spark/pull/26079#discussion_r333483069 ## File path: common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java ## @@ -73,45 +72,50 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { -if (s == null) { - return null; -} -s = s.trim(); -Matcher m = p.matcher(s); -if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { +try { + return fromCaseInsensitiveString(s); +} catch (IllegalArgumentException e) { return null; -} else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { Review comment: This PR introduced code duplication https://github.com/apache/spark/pull/8034 for your code https://github.com/apache/spark/pull/7355 5 years ago. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix
MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix URL: https://github.com/apache/spark/pull/26079#discussion_r333480766 ## File path: common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java ## @@ -73,45 +72,50 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { -if (s == null) { - return null; -} -s = s.trim(); -Matcher m = p.matcher(s); -if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { +try { + return fromCaseInsensitiveString(s); +} catch (IllegalArgumentException e) { return null; -} else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { Review comment: Maybe something has been duplicated, and can be reused but this is heavy refactoring for this PR. For instance, `AstBuilder.visitInterval` gets already split interval units but `CalendarInterval.fromString()` uses regular expression to parse & split: https://github.com/apache/spark/blob/b10344956d672fca495d0684b351ab5cf9f210d8/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java#L50-L53 If you don't mind, I would try to do that in a separate PR. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix
MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix URL: https://github.com/apache/spark/pull/26079#discussion_r333469680 ## File path: common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java ## @@ -73,45 +72,50 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { -if (s == null) { - return null; -} -s = s.trim(); -Matcher m = p.matcher(s); -if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { +try { + return fromCaseInsensitiveString(s); +} catch (IllegalArgumentException e) { return null; -} else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { Review comment: antlr parser does this as well but [it parses sql elements](https://github.com/apache/spark/blob/f2ead4d0b50715e3dec79ce762c31f145d46a5b5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala#L1907-L1961) like ```sql spark-sql> select interval 10 days 1 second; interval 1 weeks 3 days 1 seconds ``` here is only the place where we parse string values: ```sql spark-sql> select interval 'interval 10 days 1 second'; interval 1 weeks 3 days 1 seconds ``` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix
MaxGekk commented on a change in pull request #26079: [SPARK-29369][SQL] Support string intervals without the `interval` prefix URL: https://github.com/apache/spark/pull/26079#discussion_r333469680 ## File path: common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java ## @@ -73,45 +72,50 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { -if (s == null) { - return null; -} -s = s.trim(); -Matcher m = p.matcher(s); -if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { +try { + return fromCaseInsensitiveString(s); +} catch (IllegalArgumentException e) { return null; -} else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { Review comment: antlr parser does this as well but it parses sql elements like ```sql spark-sql> select interval 10 days 1 second; interval 1 weeks 3 days 1 seconds ``` here is only the place where we parse string values: ```sql spark-sql> select interval 'interval 10 days 1 second'; interval 1 weeks 3 days 1 seconds ``` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org