This is an automated email from the ASF dual-hosted git repository. dmollitor pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 86c0419 HIVE-22685: Fix TestHiveSqlDateTimeFormatter To Work With New Year 2020 (David Mollitor, reviewed by Marta Kuczora) 86c0419 is described below commit 86c0419d6dcedb56994ada4e13a3b838d1ef683a Author: David Mollitor <dmolli...@apache.org> AuthorDate: Mon Jan 20 22:17:55 2020 -0500 HIVE-22685: Fix TestHiveSqlDateTimeFormatter To Work With New Year 2020 (David Mollitor, reviewed by Marta Kuczora) --- .../format/datetime/HiveSqlDateTimeFormatter.java | 87 +++++++---- .../datetime/TestHiveSqlDateTimeFormatter.java | 171 +++++++++++---------- 2 files changed, 146 insertions(+), 112 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java b/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java index f6a52e9..3476012 100644 --- a/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java +++ b/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.common.format.datetime; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Optional; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.apache.commons.lang.StringUtils; @@ -45,6 +48,7 @@ import java.util.Collections; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Objects; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -396,15 +400,20 @@ import java.util.regex.Pattern; public class HiveSqlDateTimeFormatter implements Serializable { + private static final long serialVersionUID = 1L; + private static final int LONGEST_TOKEN_LENGTH = 5; private static final int LONGEST_ACCEPTED_PATTERN = 100; // for sanity's sake private static final int NANOS_MAX_LENGTH = 9; + private static final DateTimeFormatter MONTH_FORMATTER = DateTimeFormatter.ofPattern("MMM"); + public static final int AM = 0; public static final int PM = 1; - private static final DateTimeFormatter MONTH_FORMATTER = DateTimeFormatter.ofPattern("MMM"); public static final DateTimeFormatter DAY_OF_WEEK_FORMATTER = DateTimeFormatter.ofPattern("EEE"); - private String pattern; - private List<Token> tokens = new ArrayList<>(); + + private final String pattern; + private final List<Token> tokens; + private final Optional<LocalDateTime> now; private boolean formatExact = false; private static final Map<String, TemporalField> NUMERIC_TEMPORAL_TOKENS = @@ -485,6 +494,9 @@ public class HiveSqlDateTimeFormatter implements Serializable { * Token representation. */ public static class Token implements Serializable { + + private static final long serialVersionUID = 1L; + TokenType type; TemporalField temporalField; // for type TEMPORAL e.g. ChronoField.YEAR TemporalUnit temporalUnit; // for type TIMEZONE e.g. ChronoUnit.HOURS @@ -536,20 +548,39 @@ public class HiveSqlDateTimeFormatter implements Serializable { } } - public HiveSqlDateTimeFormatter(String pattern, boolean forParsing) { - setPattern(pattern, forParsing); + /** + * Construct a new instance. + * + * @param pattern Pattern to use for parsing or formatting + * @param forParsing Flag to indicate use of pattern + * @throws IllegalArgumentException if pattern is invalid + */ + public HiveSqlDateTimeFormatter(final String pattern, final boolean forParsing) { + this(pattern, forParsing, Optional.absent()); } /** - * Parse and perhaps verify the pattern. + * Construct a new instance. An optional LocalDateTime can be provided when + * parsing must populate a field provided in the format string does not + * specify the date and time to use. If none is provided, the current + * {@link LocalDateTime#now()} will be used for each call to parse and format. + * + * @param pattern Pattern to use for parsing or formatting + * @param forParsing Flag to indicate use of pattern + * @param now Set an arbitrary context of the current local time + * @throws IllegalArgumentException if pattern is invalid */ - private void setPattern(String pattern, boolean forParsing) { - assert pattern.length() < LONGEST_ACCEPTED_PATTERN : "The input format is too long"; - this.pattern = pattern; + @VisibleForTesting + HiveSqlDateTimeFormatter(final String pattern, final boolean forParsing, final Optional<LocalDateTime> now) { + this.pattern = Objects.requireNonNull(pattern, "Pattern cannot be null"); + this.now = Objects.requireNonNull(now); + + this.tokens = new ArrayList<>(); + + Preconditions.checkArgument(pattern.length() < LONGEST_ACCEPTED_PATTERN, "The input format is too long"); parsePatternToTokens(pattern); - // throw IllegalArgumentException if pattern is invalid if (forParsing) { verifyForParse(); } else { @@ -759,8 +790,8 @@ public class HiveSqlDateTimeFormatter implements Serializable { private void verifyForParse() { // create a list of tokens' temporal fields - ArrayList<TemporalField> temporalFields = new ArrayList<>(); - ArrayList<TemporalUnit> timeZoneTemporalUnits = new ArrayList<>(); + List<TemporalField> temporalFields = new ArrayList<>(); + List<TemporalUnit> timeZoneTemporalUnits = new ArrayList<>(); int roundYearCount=0, yearCount=0; boolean containsIsoFields=false, containsGregorianFields=false; for (Token token : tokens) { @@ -828,7 +859,7 @@ public class HiveSqlDateTimeFormatter implements Serializable { for (TemporalField tokenType : temporalFields) { if (Collections.frequency(temporalFields, tokenType) > 1) { throw new IllegalArgumentException( - "Invalid duplication of format element: multiple " + tokenType.toString() + "Invalid duplication of format element: multiple " + tokenType + " tokens provided."); } } @@ -934,10 +965,10 @@ public class HiveSqlDateTimeFormatter implements Serializable { value = 12; } try { - output = String.valueOf(value); + output = Integer.toString(value); output = padOrTruncateNumericTemporal(token, output); } catch (Exception e) { - throw new IllegalArgumentException("Value: " + value + " couldn't be cast to string.", e); + throw new IllegalArgumentException("Value: " + value + " could not be cast to string.", e); } } return output; @@ -1009,12 +1040,12 @@ public class HiveSqlDateTimeFormatter implements Serializable { return output; } - public Timestamp parseTimestamp(String fullInput){ + public Timestamp parseTimestamp(final String fullInput) { LocalDateTime ldt = LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC); String substring; int index = 0; int value; - int timeZoneSign = 0, timeZoneHours = 0, timeZoneMinutes = 0; + int timeZoneHours = 0, timeZoneMinutes = 0; int iyyy = 0, iw = 0; for (Token token : tokens) { @@ -1032,7 +1063,7 @@ public class HiveSqlDateTimeFormatter implements Serializable { ldt = ldt.with(token.temporalField, value); } catch (DateTimeException e){ throw new IllegalArgumentException( - "Value " + value + " not valid for token " + token.toString()); + "Value " + value + " not valid for token " + token); } //update IYYY and IW if necessary @@ -1048,7 +1079,6 @@ public class HiveSqlDateTimeFormatter implements Serializable { case TIMEZONE: if (token.temporalUnit == ChronoUnit.HOURS) { String nextCharacter = fullInput.substring(index, index + 1); - timeZoneSign = "-".equals(nextCharacter) ? -1 : 1; if ("-".equals(nextCharacter) || "+".equals(nextCharacter)) { index++; } @@ -1159,7 +1189,7 @@ public class HiveSqlDateTimeFormatter implements Serializable { /** * Get the integer value of a temporal substring. */ - private int parseNumericTemporal(String substring, Token token){ + private int parseNumericTemporal(String substring, Token token) { checkFormatExact(substring, token); // exceptions to the rule @@ -1174,9 +1204,9 @@ public class HiveSqlDateTimeFormatter implements Serializable { String currentYearString; if (token.temporalField == ChronoField.YEAR) { - currentYearString = String.valueOf(LocalDateTime.now().getYear()); + currentYearString = Integer.toString(this.now.or(LocalDateTime.now()).getYear()); } else { - currentYearString = String.valueOf(LocalDateTime.now().get(IsoFields.WEEK_BASED_YEAR)); + currentYearString = Integer.toString(this.now.or(LocalDateTime.now()).get(IsoFields.WEEK_BASED_YEAR)); } //deal with round years @@ -1189,7 +1219,7 @@ public class HiveSqlDateTimeFormatter implements Serializable { } else if (valLast2Digits >= 50 && currLast2Digits < 50) { currFirst2Digits -= 1; } - substring = String.valueOf(currFirst2Digits) + substring; + substring = Integer.toString(currFirst2Digits) + substring; } else { // fill in prefix digits with current date substring = currentYearString.substring(0, 4 - substring.length()) + substring; } @@ -1291,7 +1321,7 @@ public class HiveSqlDateTimeFormatter implements Serializable { && !(token.fillMode || token.temporalField == ChronoField.NANO_OF_SECOND) && token.length != substring.length()) { throw new IllegalArgumentException( - "FX on and expected token length " + token.length + " for token " + token.toString() + "FX on and expected token length " + token.length + " for token " + token + " does not match substring (" + substring + ") length " + substring.length()); } } @@ -1328,8 +1358,8 @@ public class HiveSqlDateTimeFormatter implements Serializable { throw new IllegalArgumentException("Missing separator at index " + index); } if (formatExact && !token.string.equals(separatorsFound.toString())) { - throw new IllegalArgumentException("FX on and separator found: " + separatorsFound.toString() - + " doesn't match expected separator: " + token.string); + throw new IllegalArgumentException("FX on and separator found: " + separatorsFound + + " does not match expected separator: " + token.string); } return begin + separatorsFound.length(); @@ -1361,10 +1391,11 @@ public class HiveSqlDateTimeFormatter implements Serializable { */ private boolean nextTokenIs(String pattern, Token currentToken) { // make sure currentToken isn't the last one - if (tokens.indexOf(currentToken) == tokens.size() - 1) { + final int idx = tokens.indexOf(currentToken); + if (idx == tokens.size() - 1) { return false; } - Token nextToken = tokens.get(tokens.indexOf(currentToken) + 1); + Token nextToken = tokens.get(idx + 1); pattern = pattern.toLowerCase(); return (isTimeZoneToken(pattern) && TIME_ZONE_TOKENS.get(pattern) == nextToken.temporalUnit || isNumericTemporalToken(pattern) && NUMERIC_TEMPORAL_TOKENS.get(pattern) == nextToken.temporalField diff --git a/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java b/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java index 3abf28b..9c9b0be 100644 --- a/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java +++ b/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java @@ -18,31 +18,33 @@ package org.apache.hadoop.hive.common.format.datetime; -import com.sun.tools.javac.util.List; -import org.apache.hadoop.hive.common.type.Date; -import org.apache.hadoop.hive.common.type.Timestamp; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; -import java.time.LocalDate; +import java.time.Instant; import java.time.LocalDateTime; +import java.time.ZoneOffset; import java.time.temporal.ChronoField; import java.time.temporal.TemporalField; -import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.Timestamp; import org.junit.Test; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; + +import com.google.common.base.Optional; /** * Tests HiveSqlDateTimeFormatter. */ - public class TestHiveSqlDateTimeFormatter { private HiveSqlDateTimeFormatter formatter; @Test public void testSetPattern() { - verifyPatternParsing(" ---yyyy-\'-:- -,.;/MM-dd--", new ArrayList<>(List.of( + verifyPatternParsing(" ---yyyy-\'-:- -,.;/MM-dd--", Arrays.asList( null, // represents separator, which has no temporal field ChronoField.YEAR, null, @@ -50,10 +52,10 @@ public class TestHiveSqlDateTimeFormatter { null, ChronoField.DAY_OF_MONTH, null - ))); + )); verifyPatternParsing("ymmdddhh24::mi:ss A.M. pm", 25, "ymmdddhh24::mi:ss A.M. pm", - new ArrayList<>(List.of( + Arrays.asList( ChronoField.YEAR, ChronoField.MONTH_OF_YEAR, ChronoField.DAY_OF_YEAR, @@ -62,7 +64,7 @@ public class TestHiveSqlDateTimeFormatter { null, ChronoField.SECOND_OF_MINUTE, null, ChronoField.AMPM_OF_DAY, null, ChronoField.AMPM_OF_DAY - ))); + )); } @Test @@ -153,8 +155,10 @@ public class TestHiveSqlDateTimeFormatter { checkFormatTs("YYYY-mm-dd: Q WW W", "2019-03-31 00:00:00", "2019-03-31: 1 13 5"); checkFormatTs("YYYY-mm-dd: Q WW W", "2019-04-01 00:00:00", "2019-04-01: 2 13 1"); checkFormatTs("YYYY-mm-dd: Q WW W", "2019-12-31 00:00:00", "2019-12-31: 4 53 5"); + } - //ISO 8601 + @Test + public void testFormatTimestampIso8601() { checkFormatTs("YYYY-MM-DD : IYYY-IW-ID", "2018-12-31 00:00:00", "2018-12-31 : 2019-01-01"); checkFormatTs("YYYY-MM-DD : IYYY-IW-ID", "2019-01-06 00:00:00", "2019-01-06 : 2019-01-07"); checkFormatTs("YYYY-MM-DD : IYYY-IW-ID", "2019-01-07 00:00:00", "2019-01-07 : 2019-02-01"); @@ -173,7 +177,8 @@ public class TestHiveSqlDateTimeFormatter { } private void checkFormatTs(String pattern, String input, String expectedOutput) { - formatter = new HiveSqlDateTimeFormatter(pattern, false); + formatter = new HiveSqlDateTimeFormatter(pattern, false, + Optional.of(LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC))); assertEquals("Format timestamp to string failed with pattern: " + pattern, expectedOutput, formatter.format(Timestamp.valueOf(input))); } @@ -190,33 +195,31 @@ public class TestHiveSqlDateTimeFormatter { } private void checkFormatDate(String pattern, String input, String expectedOutput) { - formatter = new HiveSqlDateTimeFormatter(pattern, false); + formatter = new HiveSqlDateTimeFormatter(pattern, false, + Optional.of(LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC))); assertEquals("Format date to string failed with pattern: " + pattern, expectedOutput, formatter.format(Date.valueOf(input))); } @Test public void testParseTimestamp() { - String thisYearString = String.valueOf(LocalDateTime.now().getYear()); - int firstTwoDigits = getFirstTwoDigits(); - //y - checkParseTimestamp("y-mm-dd", "0-02-03", thisYearString.substring(0, 3) + "0-02-03 00:00:00"); - checkParseTimestamp("yy-mm-dd", "00-02-03", thisYearString.substring(0, 2) + "00-02-03 00:00:00"); - checkParseTimestamp("yyy-mm-dd", "000-02-03", thisYearString.substring(0, 1) + "000-02-03 00:00:00"); - checkParseTimestamp("yyyy-mm-dd", "000-02-03", thisYearString.substring(0, 1) + "000-02-03 00:00:00"); - checkParseTimestamp("rr-mm-dd", "0-02-03", thisYearString.substring(0, 3) + "0-02-03 00:00:00"); - checkParseTimestamp("rrrr-mm-dd", "000-02-03", thisYearString.substring(0, 1) + "000-02-03 00:00:00"); + checkParseTimestamp("y-mm-dd", "0-02-03", "1970-02-03 00:00:00"); + checkParseTimestamp("yy-mm-dd", "00-02-03", "1900-02-03 00:00:00"); + checkParseTimestamp("yyy-mm-dd", "000-02-03", "1000-02-03 00:00:00"); + checkParseTimestamp("yyyy-mm-dd", "000-02-03", "1000-02-03 00:00:00"); + checkParseTimestamp("rr-mm-dd", "0-02-03", "1970-02-03 00:00:00"); + checkParseTimestamp("rrrr-mm-dd", "000-02-03", "1000-02-03 00:00:00"); //rr, rrrr - checkParseTimestamp("rr-mm-dd", "00-02-03", firstTwoDigits + 1 + "00-02-03 00:00:00"); - checkParseTimestamp("rr-mm-dd", "49-02-03", firstTwoDigits + 1 + "49-02-03 00:00:00"); - checkParseTimestamp("rr-mm-dd", "50-02-03", firstTwoDigits + "50-02-03 00:00:00"); - checkParseTimestamp("rr-mm-dd", "99-02-03", firstTwoDigits + "99-02-03 00:00:00"); - checkParseTimestamp("rrrr-mm-dd", "00-02-03", firstTwoDigits + 1 + "00-02-03 00:00:00"); - checkParseTimestamp("rrrr-mm-dd", "49-02-03", firstTwoDigits + 1 + "49-02-03 00:00:00"); - checkParseTimestamp("rrrr-mm-dd", "50-02-03", firstTwoDigits + "50-02-03 00:00:00"); - checkParseTimestamp("rrrr-mm-dd", "99-02-03", firstTwoDigits + "99-02-03 00:00:00"); + checkParseTimestamp("rr-mm-dd", "00-02-03", "2000-02-03 00:00:00"); + checkParseTimestamp("rr-mm-dd", "49-02-03", "2049-02-03 00:00:00"); + checkParseTimestamp("rr-mm-dd", "50-02-03", "1950-02-03 00:00:00"); + checkParseTimestamp("rr-mm-dd", "99-02-03", "1999-02-03 00:00:00"); + checkParseTimestamp("rrrr-mm-dd", "00-02-03", "2000-02-03 00:00:00"); + checkParseTimestamp("rrrr-mm-dd", "49-02-03", "2049-02-03 00:00:00"); + checkParseTimestamp("rrrr-mm-dd", "50-02-03", "1950-02-03 00:00:00"); + checkParseTimestamp("rrrr-mm-dd", "99-02-03", "1999-02-03 00:00:00"); //everything else checkParseTimestamp("yyyy-mm-ddThh24:mi:ss.ff8z", "2018-02-03T04:05:06.5665Z", "2018-02-03 04:05:06.5665"); @@ -230,7 +233,7 @@ public class TestHiveSqlDateTimeFormatter { checkParseTimestamp("YYYY-MM-DD HH24:MI TZH:TZM", "2019-1-1 14:00-1:30", "2019-01-01 14:00:00"); checkParseTimestamp("yyyy-mm-dd TZM:TZH", "2019-01-01 1 -3", "2019-01-01 00:00:00"); checkParseTimestamp("yyyy-mm-dd TZH:TZM", "2019-01-01 -0:30", "2019-01-01 00:00:00"); - checkParseTimestamp("TZM/YYY-MM-TZH/DD", "0/333-01-11/02", "2333-01-02 00:00:00"); + checkParseTimestamp("TZM/YYY-MM-TZH/DD", "0/333-01-11/02", "1333-01-02 00:00:00"); checkParseTimestamp("YYYY-MM-DD HH12:MI AM", "2019-01-01 11:00 p.m.", "2019-01-01 23:00:00"); checkParseTimestamp("YYYY-MM-DD HH12:MI A.M..", "2019-01-01 11:00 pm.", "2019-01-01 23:00:00"); checkParseTimestamp("MI DD-TZM-YYYY-MM TZHPM SS:HH12.FF9", @@ -267,81 +270,77 @@ public class TestHiveSqlDateTimeFormatter { //letters and numbers are delimiters to each other, respectively checkParseDate("yyyy-ddMONTH", "2018-4March", "2018-03-04"); checkParseDate("yyyy-MONTHdd", "2018-March4", "2018-03-04"); - //ISO 8601 + } + + @Test + public void testParseTimestampISO8601() { checkParseTimestamp("IYYY-IW-ID", "2019-01-01", "2018-12-31 00:00:00"); checkParseTimestamp("IYYY-IW-ID", "2019-01-07", "2019-01-06 00:00:00"); checkParseTimestamp("IYYY-IW-ID", "2019-02-01", "2019-01-07 00:00:00"); checkParseTimestamp("IYYY-IW-ID", "2019-52-07", "2019-12-29 00:00:00"); checkParseTimestamp("IYYY-IW-ID", "2020-01-01", "2019-12-30 00:00:00"); - checkParseTimestamp("IYYY-IW-ID", "020-01-04", thisYearString.substring(0, 1) + "020-01-02 00:00:00"); - checkParseTimestamp("IYY-IW-ID", "020-01-04", thisYearString.substring(0, 1) + "020-01-02 00:00:00"); - checkParseTimestamp("IYY-IW-ID", "20-01-04", thisYearString.substring(0, 2) + "20-01-02 00:00:00"); - checkParseTimestamp("IY-IW-ID", "20-01-04", thisYearString.substring(0, 2) + "20-01-02 00:00:00"); + checkParseTimestamp("IYYY-IW-ID", "020-01-04", "1020-01-06 00:00:00"); + checkParseTimestamp("IYY-IW-ID", "020-01-04", "1020-01-06 00:00:00"); + checkParseTimestamp("IYY-IW-ID", "20-01-04", "1920-01-01 00:00:00"); + checkParseTimestamp("IY-IW-ID", "20-01-04", "1920-01-01 00:00:00"); checkParseTimestamp("IYYY-IW-DAY", "2019-01-monday", "2018-12-31 00:00:00"); checkParseTimestamp("IYYY-IW-Day", "2019-01-Sunday", "2019-01-06 00:00:00"); checkParseTimestamp("IYYY-IW-Dy", "2019-02-MON", "2019-01-07 00:00:00"); checkParseTimestamp("IYYY-IW-DY", "2019-52-sun", "2019-12-29 00:00:00"); checkParseTimestamp("IYYY-IW-dy", "2020-01-Mon", "2019-12-30 00:00:00"); - //Tests for these patterns would need changing every decade if done in the above way. //Thursday of the first week in an ISO year always matches the Gregorian year. - checkParseTimestampIso("IY-IW-ID", "0-01-04", "iw, yyyy", "01, " + thisYearString.substring(0, 3) + "0"); - checkParseTimestampIso("I-IW-ID", "0-01-04", "iw, yyyy", "01, " + thisYearString.substring(0, 3) + "0"); + checkParseTimestampIso("IY-IW-ID", "0-01-04", "iw, yyyy", "01, 1970"); + checkParseTimestampIso("I-IW-ID", "0-01-04", "iw, yyyy", "01, 1970"); //time patterns are allowed; date patterns are not checkParseTimestamp("IYYY-IW-ID hh24:mi:ss", "2019-01-01 01:02:03", "2018-12-31 01:02:03"); } - private int getFirstTwoDigits() { - int thisYear = LocalDateTime.now().getYear(); - int firstTwoDigits = thisYear / 100; - if (thisYear % 100 < 50) { - firstTwoDigits -= 1; - } - return firstTwoDigits; - } - private void checkParseTimestamp(String pattern, String input, String expectedOutput) { - formatter = new HiveSqlDateTimeFormatter(pattern, true); - assertEquals("Parse string to timestamp failed. Pattern: " + pattern, - Timestamp.valueOf(expectedOutput), formatter.parseTimestamp(input)); + formatter = new HiveSqlDateTimeFormatter(pattern, true, + Optional.of(LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC))); + assertEquals("Parse string to timestamp failed. Pattern: " + pattern, Timestamp.valueOf(expectedOutput), + formatter.parseTimestamp(input)); } - private void checkParseTimestampIso(String parsePattern, String input, String formatPattern, - String expectedOutput) { - formatter = new HiveSqlDateTimeFormatter(parsePattern, true); + private void checkParseTimestampIso(String parsePattern, String input, String formatPattern, String expectedOutput) { + formatter = + new HiveSqlDateTimeFormatter(parsePattern, true, + Optional.of(LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC))); Timestamp ts = formatter.parseTimestamp(input); - formatter = new HiveSqlDateTimeFormatter(formatPattern, false); + formatter = + new HiveSqlDateTimeFormatter(formatPattern, false, + Optional.of(LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC))); assertEquals(expectedOutput, formatter.format(ts)); } @Test public void testParseDate() { - - String thisYearString = String.valueOf(LocalDateTime.now().getYear()); - int firstTwoDigits = getFirstTwoDigits(); //y - checkParseDate("y-mm-dd", "0-02-03", thisYearString.substring(0, 3) + "0-02-03"); - checkParseDate("yy-mm-dd", "00-02-03", thisYearString.substring(0, 2) + "00-02-03"); - checkParseDate("yyy-mm-dd", "000-02-03", thisYearString.substring(0, 1) + "000-02-03"); - checkParseDate("yyyy-mm-dd", "000-02-03", thisYearString.substring(0, 1) + "000-02-03"); - checkParseDate("rr-mm-dd", "0-02-03", thisYearString.substring(0, 3) + "0-02-03"); - checkParseDate("rrrr-mm-dd", "000-02-03", thisYearString.substring(0, 1) + "000-02-03"); + checkParseDate("y-mm-dd", "0-02-03", "1970-02-03"); + checkParseDate("yy-mm-dd", "00-02-03", "1900-02-03"); + checkParseDate("yyy-mm-dd", "000-02-03", "1000-02-03"); + checkParseDate("yyyy-mm-dd", "000-02-03", "1000-02-03"); + checkParseDate("rr-mm-dd", "0-02-03", "1970-02-03"); + checkParseDate("rrrr-mm-dd", "000-02-03", "1000-02-03"); //rr, rrrr - checkParseDate("rr-mm-dd", "00-02-03", firstTwoDigits + 1 + "00-02-03"); - checkParseDate("rr-mm-dd", "49-02-03", firstTwoDigits + 1 + "49-02-03"); - checkParseDate("rr-mm-dd", "50-02-03", firstTwoDigits + "50-02-03"); - checkParseDate("rr-mm-dd", "99-02-03", firstTwoDigits + "99-02-03"); - checkParseDate("rrrr-mm-dd", "00-02-03", firstTwoDigits + 1 + "00-02-03"); - checkParseDate("rrrr-mm-dd", "49-02-03", firstTwoDigits + 1 + "49-02-03"); - checkParseDate("rrrr-mm-dd", "50-02-03", firstTwoDigits + "50-02-03"); - checkParseDate("rrrr-mm-dd", "99-02-03", firstTwoDigits + "99-02-03"); + checkParseDate("rr-mm-dd", "00-02-03", "2000-02-03"); + checkParseDate("rr-mm-dd", "49-02-03", "2049-02-03"); + checkParseDate("rr-mm-dd", "50-02-03", "1950-02-03"); + checkParseDate("rr-mm-dd", "99-02-03", "1999-02-03"); + checkParseDate("rrrr-mm-dd", "00-02-03", "2000-02-03"); + checkParseDate("rrrr-mm-dd", "49-02-03", "2049-02-03"); + checkParseDate("rrrr-mm-dd", "50-02-03", "1950-02-03"); + checkParseDate("rrrr-mm-dd", "99-02-03", "1999-02-03"); checkParseDate("yyyy-mm-dd hh mi ss.ff7", "2018/01/01 2.2.2.55", "2018-01-01"); checkParseDate("dd/MonthT/yyyy", "31/AugustT/2020", "2020-08-31"); checkParseDate("dd/MonthT/yyyy", "31/MarchT/2020", "2020-03-31"); + } - //ISO 8601 + @Test + public void testParseDateISO8601() { checkParseDate("IYYY-IW-ID", "2019-01-01", "2018-12-31"); checkParseDate("IW-ID-IYYY", "01-02-2019", "2019-01-01"); checkParseDate("ID-IW-IYYY", "02-01-2019", "2019-01-01"); @@ -352,7 +351,8 @@ public class TestHiveSqlDateTimeFormatter { } private void checkParseDate(String pattern, String input, String expectedOutput) { - formatter = new HiveSqlDateTimeFormatter(pattern, true); + formatter = new HiveSqlDateTimeFormatter(pattern, true, + Optional.of(LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC))); assertEquals("Parse string to date failed. Pattern: " + pattern, Date.valueOf(expectedOutput), formatter.parseDate(input)); } @@ -382,7 +382,8 @@ public class TestHiveSqlDateTimeFormatter { private void verifyBadPattern(String string, boolean forParsing) { try { - formatter = new HiveSqlDateTimeFormatter(string, forParsing); + formatter = new HiveSqlDateTimeFormatter(string, forParsing, + Optional.of(LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC))); fail("Bad pattern " + string + " should have thrown IllegalArgumentException but didn't"); } catch (Exception e) { assertEquals("Expected IllegalArgumentException, got another exception.", @@ -446,7 +447,7 @@ public class TestHiveSqlDateTimeFormatter { public void testFmFx() { checkParseTimestamp("FXDD-FMMM-YYYY hh12 am", "01-1-1998 12 PM", "1998-01-01 12:00:00"); checkParseTimestamp("FXFMDD-MM-YYYY hh12 am", "1-01-1998 12 PM", "1998-01-01 12:00:00"); - checkParseTimestamp("FXFMiyyy-iw-id hh24:mi:ss", "019-01-02 17:00:05", "2019-01-01 17:00:05"); + checkParseTimestamp("FXFMiyyy-iw-id hh24:mi:ss", "019-01-02 17:00:05", "1019-01-05 17:00:05"); verifyBadParseString("FXFMiyyy-iw-id hh24:mi:ss", "019-01-02 17:0:05"); //ff[1-9] unaffected checkParseTimestamp("FXFMDD-MM-YYYY FMff2", "1-01-1998 4", "1998-01-01 00:00:00.4"); @@ -469,8 +470,8 @@ public class TestHiveSqlDateTimeFormatter { // Characters matter upon parsing verifyBadParseString("\"Year! \"YYYY \"m\" MM \"d\" DD.\"!\"", "Year 3000 m 3 d 1,!"); // non-numeric characters in text counts as a delimiter - checkParseDate("yyyy\"m\"mm\"d\"dd", "19m1d1", LocalDate.now().getYear() / 100 + "19-01-01"); - checkParseDate("yyyy\"[\"mm\"]\"dd", "19[1]1", LocalDate.now().getYear() / 100 + "19-01-01"); + checkParseDate("yyyy\"m\"mm\"d\"dd", "19m1d1", "1919-01-01"); + checkParseDate("yyyy\"[\"mm\"]\"dd", "19[1]1", "1919-01-01"); // parse character temporals correctly checkParseDate("dd/Month\"arch\"/yyyy", "31/Marcharch/2020", "2020-03-31"); checkParseDate("dd/Month\"ember\"/yyyy", "31/Decemberember/2020", "2020-12-31"); @@ -502,13 +503,14 @@ public class TestHiveSqlDateTimeFormatter { * -sum of token.lengths * -concatenation of token.strings */ - private void verifyPatternParsing(String pattern, ArrayList<TemporalField> temporalFields) { + private void verifyPatternParsing(String pattern, List<TemporalField> temporalFields) { verifyPatternParsing(pattern, pattern.length(), pattern.toLowerCase(), temporalFields); } private void verifyPatternParsing(String pattern, int expectedPatternLength, - String expectedPattern, ArrayList<TemporalField> temporalFields) { - formatter = new HiveSqlDateTimeFormatter(pattern, false); + String expectedPattern, List<TemporalField> temporalFields) { + formatter = new HiveSqlDateTimeFormatter(pattern, false, + Optional.of(LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC))); assertEquals(temporalFields.size(), formatter.getTokens().size()); StringBuilder sb = new StringBuilder(); int actualPatternLength = 0; @@ -524,7 +526,8 @@ public class TestHiveSqlDateTimeFormatter { } private void verifyBadParseString(String pattern, String string) { - formatter = new HiveSqlDateTimeFormatter(pattern, true); + formatter = new HiveSqlDateTimeFormatter(pattern, true, + Optional.of(LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC))); try { Timestamp output = formatter.parseTimestamp(string); fail("Parse string to timestamp should have failed.\nString: " + string + "\nPattern: "