cloud-fan commented on a change in pull request #26190: [SPARK-29532][SQL]
simplify interval string parsing
URL: https://github.com/apache/spark/pull/26190#discussion_r337884486
##########
File path:
common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
##########
@@ -32,94 +32,11 @@
public static final long MICROS_PER_DAY = MICROS_PER_HOUR * 24;
public static final long MICROS_PER_WEEK = MICROS_PER_DAY * 7;
- /**
- * A function to generate regex which matches interval string's unit part
like "3 years".
- *
- * First, we can leave out some units in interval string, and we only care
about the value of
- * unit, so here we use non-capturing group to wrap the actual regex.
- * At the beginning of the actual regex, we should match spaces before the
unit part.
- * Next is the number part, starts with an optional "-" to represent
negative value. We use
- * capturing group to wrap this part as we need the value later.
- * Finally is the unit name, ends with an optional "s".
- */
- private static String unitRegex(String unit) {
- return "(?:\\s+(-?\\d+)\\s+" + unit + "s?)?";
- }
-
- private static Pattern p = Pattern.compile("interval" + unitRegex("year") +
unitRegex("month") +
- unitRegex("week") + unitRegex("day") + unitRegex("hour") +
unitRegex("minute") +
- unitRegex("second") + unitRegex("millisecond") + unitRegex("microsecond"),
- Pattern.CASE_INSENSITIVE);
-
- private static Pattern yearMonthPattern =
- Pattern.compile("^(?:['|\"])?([+|-])?(\\d+)-(\\d+)(?:['|\"])?$");
+ private static Pattern yearMonthPattern = Pattern.compile(
+ "^([+|-])?(\\d+)-(\\d+)$");
private static Pattern dayTimePattern = Pattern.compile(
- "^(?:['|\"])?([+|-])?((\\d+)
)?((\\d+):)?(\\d+):(\\d+)(\\.(\\d+))?(?:['|\"])?$");
-
- private static Pattern quoteTrimPattern =
Pattern.compile("^(?:['|\"])?(.*?)(?:['|\"])?$");
-
- private static long toLong(String s) {
- if (s == null) {
- return 0;
- } else {
- return Long.parseLong(s);
- }
- }
-
- /**
- * Convert a string to CalendarInterval. Return null if the input string is
not a valid interval.
- * This method is case-insensitive.
- */
- public static CalendarInterval fromString(String s) {
- try {
- return fromCaseInsensitiveString(s);
- } catch (IllegalArgumentException e) {
- return null;
- }
- }
-
- /**
- * Convert a string to CalendarInterval. This method can handle
- * strings without the `interval` prefix and throws IllegalArgumentException
- * when the input string is not a valid interval.
- *
- * @throws IllegalArgumentException if the string is not a valid internal.
- */
- public static CalendarInterval fromCaseInsensitiveString(String s) {
- if (s == null) {
- throw new IllegalArgumentException("Interval cannot be null");
- }
- String trimmed = s.trim();
- if (trimmed.isEmpty()) {
- throw new IllegalArgumentException("Interval cannot be blank");
- }
- String prefix = "interval";
- String intervalStr = trimmed;
- // Checks the given interval string does not start with the `interval`
prefix
- if (!intervalStr.regionMatches(true, 0, prefix, 0, prefix.length())) {
- // Prepend `interval` if it does not present because
- // the regular expression strictly require it.
- intervalStr = prefix + " " + trimmed;
- } else if (intervalStr.length() == prefix.length()) {
- throw new IllegalArgumentException("Interval string must have time
units");
- }
-
- Matcher m = p.matcher(intervalStr);
- if (!m.matches()) {
- throw new IllegalArgumentException("Invalid interval: " + s);
- }
-
- long months = toLong(m.group(1)) * 12 + toLong(m.group(2));
- long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK;
- microseconds += toLong(m.group(4)) * MICROS_PER_DAY;
- microseconds += toLong(m.group(5)) * MICROS_PER_HOUR;
- microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE;
- microseconds += toLong(m.group(7)) * MICROS_PER_SECOND;
- microseconds += toLong(m.group(8)) * MICROS_PER_MILLI;
- microseconds += toLong(m.group(9));
- return new CalendarInterval((int) months, microseconds);
- }
+ "^([+|-])?((\\d+) )?((\\d+):)?(\\d+):(\\d+)(\\.(\\d+))?$");
public static long toLongWithRange(String fieldName,
Review comment:
maybe it's better, but I'd like to avoid changing unrelated code.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]