This is an automated email from the ASF dual-hosted git repository.
kuczoram pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 9890919 HIVE-21576: Introduce CAST...FORMAT and limited list of
SQL:2016 datetime formats (Karen Coppage via Marta Kuczora)
9890919 is described below
commit 9890919b2d517d1d57bc8ac1c06c47507f54f5a3
Author: Karen Coppage <[email protected]>
AuthorDate: Fri Jun 21 10:21:55 2019 +0200
HIVE-21576: Introduce CAST...FORMAT and limited list of SQL:2016 datetime
formats (Karen Coppage via Marta Kuczora)
---
.../format/datetime/HiveSqlDateTimeFormatter.java | 885 +++++++++++++++++++++
.../hive/common/format/datetime/package-info.java | 22 +
.../datetime/TestHiveSqlDateTimeFormatter.java | 330 ++++++++
.../hive/common/format/datetime/package-info.java | 22 +
.../hadoop/hive/ql/exec/FunctionRegistry.java | 1 +
.../hadoop/hive/ql/parse/IdentifiersParser.g | 14 +-
.../hive/ql/udf/generic/GenericUDFCastFormat.java | 252 ++++++
.../ql/udf/generic/TestGenericUDFCastFormat.java | 188 +++++
.../cast_datetime_with_sql_2016_format.q | 45 ++
.../cast_datetime_with_sql_2016_format.q.out | 329 ++++++++
.../results/clientpositive/show_functions.q.out | 2 +
11 files changed, 2088 insertions(+), 2 deletions(-)
diff --git
a/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
b/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
new file mode 100644
index 0000000..4e024a3
--- /dev/null
+++
b/common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
@@ -0,0 +1,885 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.time.DateTimeException;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.temporal.ChronoField;
+import java.time.temporal.ChronoUnit;
+import java.time.temporal.TemporalField;
+import java.time.temporal.TemporalUnit;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
+
+/**
+ * Formatter using SQL:2016 datetime patterns.
+ *
+ * For all tokens:
+ * - Patterns are case-insensitive, except AM/PM and T/Z. See these sections
for more details.
+ * - For string to datetime conversion, no duplicate format tokens are
allowed, including tokens
+ * that have the same meaning but different lengths ("Y" and "YY" conflict)
or different
+ * behaviors ("RR" and "YY" conflict).
+ *
+ * For all numeric tokens:
+ * - The "expected length" of input/output is the number of tokens in the
character (e.g. "YYY": 3,
+ * "Y": 1, and so on), with some exceptions (see map SPECIAL_LENGTHS).
+ * - For string to datetime conversion, inputs of fewer digits than expected
are accepted if
+ * followed by a delimiter, e.g. format="YYYY-MM-DD", input="19-1-1",
output=2019-01-01 00:00:00.
+ * - For datetime to string conversion, output is left padded with zeros, e.g.
format="DD SSSSS",
+ * input=2019-01-01 00:00:03, output="01 00003".
+ *
+ *
+ * Accepted format tokens:
+ * Note: "|" means "or". "Delimiter" means a separator, tokens T or Z, or end
of input.
+ *
+ * A. Temporal tokens
+ * YYYY
+ * 4-digit year
+ * - For string to datetime conversion, prefix digits for 1, 2, and 3-digit
inputs are obtained
+ * from current date
+ * E.g. input=‘9-01-01’, pattern =‘YYYY-MM-DD’, current year=2020,
output=2029-01-01 00:00:00
+ *
+ *
+ * YYY
+ * Last 3 digits of a year
+ * - Gets the prefix digit from current date.
+ * - Can accept fewer digits than 3, similarly to YYYY.
+ *
+ * YY
+ * Last 2 digits of a year
+ * - Gets the 2 prefix digits from current date.
+ * - Can accept fewer digits than 2, similarly to YYYY.
+ *
+ * Y
+ * Last digit of a year
+ * - Gets the 3 prefix digits from current date.
+ *
+ * RRRR
+ * 4-digit rounded year
+ * - String to datetime conversion:
+ * - If 2 digits are provided then acts like RR.
+ * - If 1,3 or 4 digits provided then acts like YYYY.
+ * - For datetime to string conversion, acts like YYYY.
+ *
+ * RR
+ * 2-digit rounded year
+ * -String to datetime conversion:
+ * - Semantics:
+ * Input: Last 2 digits of current year: First 2 digits of output:
+ * 0 to 49 00 to 49 First 2 digits of current
year
+ * 0 to 49 50 to 99 First 2 digits of current
year + 1
+ * 50 to 99 00 to 49 First 2 digits of current
year - 1
+ * 50 to 99 50 to 99 First 2 digits of current
year
+ * - If 1-digit year is provided followed by a delimiter, falls back to YYYY
with 1-digit year
+ * input.
+ * - For datetime to string conversion, acts like YY.
+ *
+ * MM
+ * Month (1-12)
+ * - For string to datetime conversion, conflicts with DDD.
+ *
+ * DD
+ * Day of month (1-31)
+ * - For string to datetime conversion, conflicts with DDD.
+ *
+ * DDD
+ * Day of year (1-366)
+ * - For string to datetime conversion, conflicts with DD and MM.
+ *
+ * HH
+ * Hour of day (1-12)
+ * - If no AM/PM provided then defaults to AM.
+ * - In string to datetime conversion, conflicts with SSSSS and HH24.
+ *
+ * HH12
+ * Hour of day (1-12)
+ * See HH.
+ *
+ * HH24
+ * Hour of day (0-23)
+ * - In string to datetime conversion, conflicts with SSSSS, HH12 and AM/PM.
+ *
+ * MI
+ * Minute of hour (0-59)
+ * - In string to datetime conversion, conflicts with SSSSS.
+ *
+ * SS
+ * Second of minute (0-59)
+ * - In string to datetime conversion, conflicts with SSSSS.
+ *
+ * SSSSS
+ * Second of Day (0-86399)
+ * - In string to datetime conversion, conflicts with SS, HH, HH12, HH24, MI,
AM/PM.
+ *
+ * FF[1..9]
+ * Fraction of second
+ * - 1..9 indicates the number of decimal digits. "FF" (no number of digits
specified) is also
+ * accepted.
+ * - In datetime to string conversion, "FF" will omit trailing zeros, or
output "0" if subsecond
+ * value is 0.
+ * - In string to datetime conversion, fewer digits than expected are accepted
if followed by a
+ * delimiter. "FF" acts like "FF9".
+ *
+ * AM|A.M.
+ * Meridiem indicator or AM/PM
+ * - Datetime to string conversion:
+ * - AM and PM mean the exact same thing in the pattern.
+ * e.g. input=2019-01-01 20:00, format=“AM”, output=“PM”.
+ * - Retains the exact format (capitalization and length) provided in the
pattern string. If p.m.
+ * is in the pattern, we expect a.m. or p.m. in the output; if AM is in
the pattern, we expect
+ * AM or PM in the output.
+ * - String to datetime conversion:
+ * - Conflicts with HH24 and SSSSS.
+ * - It doesn’t matter which meridian indicator is in the pattern.
+ * E.g. input="2019-01-01 11:00 p.m.", pattern="YYYY-MM-DD HH12:MI AM",
+ * output=2019-01-01 23:00:00
+ *
+ * PM|P.M.
+ * Meridiem indicator
+ * See AM|A.M.
+ *
+ * B. Time zone tokens
+ * TZH
+ * Time zone offset hour (-15 to +15)
+ * - 3-character-long input is expected: 1 character for the sign and 2 digits
for the value.
+ * e.g. “+10”, “-05”
+ * - 2-digit input is accepted without the sign, e.g. “04”.
+ * - Both these 2 and 3-digit versions are accepted even if not followed by
separators.
+ * - Disabled for timestamp to string and date to string conversion, as
timestamp and date are time
+ * zone agnostic.
+ *
+ * TZM
+ * Time zone offset minute (0-59)
+ * - For string to datetime conversion:
+ * - TZH token is required.
+ * - Unsigned; sign comes from TZH.
+ * - Therefore time zone offsets like “-30” minutes should be expressed
thus: input=“-00:30”
+ * pattern=“TZH:TZM”.
+ * - Disabled for timestamp to string and date to string conversion, as
timestamp and date are time
+ * zone agnostic.
+ *
+ * C. Separators
+ * -|.|/|,|'|;|:|<space>
+ * Separator
+ * - Uses loose matching. Existence of a sequence of separators in the format
should match the
+ * existence of a sequence of separators in the input regardless of the
types of the separator or
+ * the length of the sequence where length > 1. E.g. input=“2019-. ;10/10”,
pattern=“YYYY-MM-DD”
+ * is valid; input=“20191010”, pattern=“YYYY-MM-DD” is not valid.
+ * - If the last separator character in the separator substring is "-" and is
immediately followed
+ * by a time zone hour (tzh) token, it's a negative sign and not counted as
a separator, UNLESS
+ * this is the only possible separator character in the separator substring
(in which case it is
+ * not counted as the tzh's negative sign).
+ *
+ * D. ISO 8601 delimiters
+ * T
+ * ISO 8601 delimiter
+ * - Serves as a delimiter.
+ * - Function is to support formats like “YYYY-MM-DDTHH24:MI:SS.FF9Z”,
“YYYY-MM-DD-HH24:MI:SSZ”
+ * - For datetime to string conversion, output is always capitalized ("T"),
even if lowercase ("t")
+ * is provided in the pattern.
+ *
+ * Z
+ * ISO 8601 delimiter
+ * See T.
+ */
+
+public class HiveSqlDateTimeFormatter {
+
+ private static final int LONGEST_TOKEN_LENGTH = 5;
+ private static final int LONGEST_ACCEPTED_PATTERN = 100; // for sanity's sake
+ private static final long MINUTES_PER_HOUR = 60;
+ private static final int NANOS_MAX_LENGTH = 9;
+ public static final int AM = 0;
+ public static final int PM = 1;
+ private String pattern;
+ private List<Token> tokens = new ArrayList<>();
+
+ private static final Map<String, TemporalField> TEMPORAL_TOKENS =
+ ImmutableMap.<String, TemporalField>builder()
+ .put("yyyy", ChronoField.YEAR).put("yyy", ChronoField.YEAR)
+ .put("yy", ChronoField.YEAR).put("y", ChronoField.YEAR)
+ .put("rrrr", ChronoField.YEAR).put("rr", ChronoField.YEAR)
+ .put("mm", ChronoField.MONTH_OF_YEAR)
+ .put("dd", ChronoField.DAY_OF_MONTH)
+ .put("ddd", ChronoField.DAY_OF_YEAR)
+ .put("hh", ChronoField.HOUR_OF_AMPM)
+ .put("hh12", ChronoField.HOUR_OF_AMPM)
+ .put("hh24", ChronoField.HOUR_OF_DAY)
+ .put("mi", ChronoField.MINUTE_OF_HOUR)
+ .put("ss", ChronoField.SECOND_OF_MINUTE)
+ .put("sssss", ChronoField.SECOND_OF_DAY)
+ .put("ff1", ChronoField.NANO_OF_SECOND).put("ff2",
ChronoField.NANO_OF_SECOND)
+ .put("ff3", ChronoField.NANO_OF_SECOND).put("ff4",
ChronoField.NANO_OF_SECOND)
+ .put("ff5", ChronoField.NANO_OF_SECOND).put("ff6",
ChronoField.NANO_OF_SECOND)
+ .put("ff7", ChronoField.NANO_OF_SECOND).put("ff8",
ChronoField.NANO_OF_SECOND)
+ .put("ff9", ChronoField.NANO_OF_SECOND).put("ff",
ChronoField.NANO_OF_SECOND)
+ .put("a.m.", ChronoField.AMPM_OF_DAY).put("am",
ChronoField.AMPM_OF_DAY)
+ .put("p.m.", ChronoField.AMPM_OF_DAY).put("pm",
ChronoField.AMPM_OF_DAY)
+ .build();
+
+ private static final Map<String, TemporalUnit> TIME_ZONE_TOKENS =
+ ImmutableMap.<String, TemporalUnit>builder()
+ .put("tzh", ChronoUnit.HOURS).put("tzm", ChronoUnit.MINUTES).build();
+
+ private static final List<String> VALID_ISO_8601_DELIMITERS =
+ ImmutableList.of("t", "z");
+
+ private static final List<String> VALID_SEPARATORS =
+ ImmutableList.of("-", ":", " ", ".", "/", ";", "\'", ",");
+
+ private static final Map<String, Integer> SPECIAL_LENGTHS =
ImmutableMap.<String, Integer>builder()
+ .put("hh12", 2).put("hh24", 2).put("tzm", 2).put("am", 4).put("pm", 4)
+ .put("ff1", 1).put("ff2", 2).put("ff3", 3).put("ff4", 4).put("ff5", 5)
+ .put("ff6", 6).put("ff7", 7).put("ff8", 8).put("ff9", 9).put("ff", 9)
+ .build();
+
+ /**
+ * Represents broad categories of tokens.
+ */
+ public enum TokenType {
+ TEMPORAL,
+ SEPARATOR,
+ TIMEZONE,
+ ISO_8601_DELIMITER
+ }
+
+ /**
+ * Token representation.
+ */
+ public static class Token {
+ TokenType type;
+ TemporalField temporalField; // for type TEMPORAL e.g. ChronoField.YEAR
+ TemporalUnit temporalUnit; // for type TIMEZONE e.g. ChronoUnit.HOURS
+ String string; // pattern string, e.g. "yyy"
+ int length; // length (e.g. YYY: 3, FF8: 8)
+
+ public Token(TemporalField temporalField, String string, int length) {
+ this(TokenType.TEMPORAL, temporalField, null, string, length);
+ }
+
+ public Token(TemporalUnit temporalUnit, String string, int length) {
+ this(TokenType.TIMEZONE, null, temporalUnit, string, length);
+ }
+
+ public Token(TokenType tokenType, String string) {
+ this(tokenType, null, null, string, string.length());
+ }
+
+ public Token(TokenType tokenType, TemporalField temporalField,
TemporalUnit temporalUnit,
+ String string, int length) {
+ this.type = tokenType;
+ this.temporalField = temporalField;
+ this.temporalUnit = temporalUnit;
+ this.string = string;
+ this.length = length;
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(string);
+ sb.append(" type: ");
+ sb.append(type);
+ if (temporalField != null) {
+ sb.append(" temporalField: ");
+ sb.append(temporalField);
+ } else if (temporalUnit != null) {
+ sb.append(" temporalUnit: ");
+ sb.append(temporalUnit);
+ }
+ return sb.toString();
+ }
+ }
+
+ public HiveSqlDateTimeFormatter(String pattern, boolean forParsing) {
+ setPattern(pattern, forParsing);
+ }
+
+ /**
+ * Parse and perhaps verify the pattern.
+ */
+ private void setPattern(String pattern, boolean forParsing) {
+ assert pattern.length() < LONGEST_ACCEPTED_PATTERN : "The input format is
too long";
+ this.pattern = pattern;
+
+ parsePatternToTokens(pattern);
+
+ // throw IllegalArgumentException if pattern is invalid
+ if (forParsing) {
+ verifyForParse();
+ } else {
+ verifyForFormat();
+ }
+ }
+
+ /**
+ * Parse pattern to list of tokens.
+ */
+ private String parsePatternToTokens(String pattern) {
+ tokens.clear();
+ String originalPattern = pattern;
+ pattern = pattern.toLowerCase();
+
+ // indexes of the substring we will check (includes begin, does not
include end)
+ int begin=0, end=0;
+ String candidate;
+ Token lastAddedToken = null;
+
+ while (begin < pattern.length()) {
+ // if begin hasn't progressed, then pattern is not parsable
+ if (begin != end) {
+ tokens.clear();
+ throw new IllegalArgumentException("Bad date/time conversion pattern:
" + pattern);
+ }
+
+ // find next token
+ for (int i = LONGEST_TOKEN_LENGTH; i > 0; i--) {
+ end = begin + i;
+ if (end > pattern.length()) { // don't go past the end of the pattern
string
+ continue;
+ }
+ candidate = pattern.substring(begin, end);
+ if (isSeparator(candidate)) {
+ lastAddedToken = parseSeparatorToken(candidate, lastAddedToken);
+ begin = end;
+ break;
+ }
+ if (isIso8601Delimiter(candidate)) {
+ lastAddedToken = parseIso8601DelimiterToken(candidate);
+ begin = end;
+ break;
+ }
+ if (isTemporalToken(candidate)) {
+ lastAddedToken = parseTemporalToken(originalPattern, begin,
candidate);
+ begin = end;
+ break;
+ }
+ if (isTimeZoneToken(candidate)) {
+ lastAddedToken = parseTimeZoneToken(candidate);
+ begin = end;
+ break;
+ }
+ }
+ }
+ return pattern;
+ }
+
+ private boolean isSeparator(String candidate) {
+ return candidate.length() == 1 && VALID_SEPARATORS.contains(candidate);
+ }
+
+ private boolean isIso8601Delimiter(String candidate) {
+ return candidate.length() == 1 &&
VALID_ISO_8601_DELIMITERS.contains(candidate);
+ }
+
+ private boolean isTemporalToken(String candidate) {
+ return TEMPORAL_TOKENS.containsKey(candidate);
+ }
+
+ private boolean isTimeZoneToken(String pattern) {
+ return TIME_ZONE_TOKENS.containsKey(pattern);
+ }
+
+ private Token parseSeparatorToken(String candidate, Token lastAddedToken) {
+ // try to clump separator with immediately preceding separators (e.g.
"---" counts as one
+ // separator)
+ if (lastAddedToken != null && lastAddedToken.type == TokenType.SEPARATOR) {
+ lastAddedToken.string += candidate;
+ lastAddedToken.length += 1;
+ } else {
+ lastAddedToken = new Token(TokenType.SEPARATOR, candidate);
+ tokens.add(lastAddedToken);
+ }
+ return lastAddedToken;
+ }
+
+ private Token parseIso8601DelimiterToken(String candidate) {
+ Token lastAddedToken;
+ lastAddedToken = new Token(TokenType.ISO_8601_DELIMITER,
candidate.toUpperCase());
+ tokens.add(lastAddedToken);
+ return lastAddedToken;
+ }
+
+ private Token parseTemporalToken(String originalPattern, int begin, String
candidate) {
+ Token lastAddedToken;
+
+ // for AM/PM, keep original case
+ if (TEMPORAL_TOKENS.get(candidate) == ChronoField.AMPM_OF_DAY) {
+ int subStringEnd = begin + candidate.length();
+ candidate = originalPattern.substring(begin, subStringEnd);
+ }
+ lastAddedToken = new Token(TEMPORAL_TOKENS.get(candidate.toLowerCase()),
candidate,
+ getTokenStringLength(candidate.toLowerCase()));
+ tokens.add(lastAddedToken);
+ return lastAddedToken;
+ }
+
+ private Token parseTimeZoneToken(String candidate) {
+ Token lastAddedToken;
+ lastAddedToken = new Token(TIME_ZONE_TOKENS.get(candidate), candidate,
+ getTokenStringLength(candidate));
+ tokens.add(lastAddedToken);
+ return lastAddedToken;
+ }
+
+ private int getTokenStringLength(String candidate) {
+ Integer length = SPECIAL_LENGTHS.get(candidate);
+ if (length != null) {
+ return length;
+ }
+ return candidate.length();
+ }
+
+ /**
+ * Make sure the generated list of tokens is valid for parsing strings to
datetime objects.
+ */
+ private void verifyForParse() {
+
+ // create a list of tokens' temporal fields
+ ArrayList<TemporalField> temporalFields = new ArrayList<>();
+ ArrayList<TemporalUnit> timeZoneTemporalUnits = new ArrayList<>();
+ int roundYearCount=0, yearCount=0;
+ for (Token token : tokens) {
+ if (token.temporalField != null) {
+ temporalFields.add(token.temporalField);
+ if (token.temporalField == ChronoField.YEAR) {
+ if (token.string.startsWith("r")) {
+ roundYearCount += 1;
+ } else {
+ yearCount += 1;
+ }
+ }
+ } else if (token.temporalUnit != null) {
+ timeZoneTemporalUnits.add(token.temporalUnit);
+ }
+ }
+ if (!(temporalFields.contains(ChronoField.YEAR))) {
+ throw new IllegalArgumentException("Missing year token.");
+ }
+ if (!(temporalFields.contains(ChronoField.MONTH_OF_YEAR) &&
+ temporalFields.contains(ChronoField.DAY_OF_MONTH) ||
+ temporalFields.contains(ChronoField.DAY_OF_YEAR))) {
+ throw new IllegalArgumentException("Missing day of year or (month of
year + day of month)"
+ + " tokens.");
+ }
+ if (roundYearCount > 0 && yearCount > 0) {
+ throw new IllegalArgumentException("Invalid duplication of format
element: Both year and"
+ + "round year are provided");
+ }
+ for (TemporalField tokenType : temporalFields) {
+ if (Collections.frequency(temporalFields, tokenType) > 1) {
+ throw new IllegalArgumentException(
+ "Invalid duplication of format element: multiple " +
tokenType.toString()
+ + " tokens provided.");
+ }
+ }
+ if (temporalFields.contains(ChronoField.AMPM_OF_DAY) &&
+ !(temporalFields.contains(ChronoField.HOUR_OF_DAY) ||
+ temporalFields.contains(ChronoField.HOUR_OF_AMPM))) {
+ throw new IllegalArgumentException("AM/PM provided but missing hour
token.");
+ }
+ if (temporalFields.contains(ChronoField.AMPM_OF_DAY) &&
+ temporalFields.contains(ChronoField.HOUR_OF_DAY)) {
+ throw new IllegalArgumentException("Conflict between median indicator
and hour token.");
+ }
+ if (temporalFields.contains(ChronoField.HOUR_OF_AMPM) &&
+ temporalFields.contains(ChronoField.HOUR_OF_DAY)) {
+ throw new IllegalArgumentException("Conflict between hour of day and
hour of am/pm token.");
+ }
+ if (temporalFields.contains(ChronoField.DAY_OF_YEAR) &&
+ (temporalFields.contains(ChronoField.DAY_OF_MONTH) ||
+ temporalFields.contains(ChronoField.MONTH_OF_YEAR))) {
+ throw new IllegalArgumentException("Day of year provided with day or
month token.");
+ }
+ if (temporalFields.contains(ChronoField.SECOND_OF_DAY) &&
+ (temporalFields.contains(ChronoField.HOUR_OF_DAY) ||
+ temporalFields.contains(ChronoField.HOUR_OF_AMPM) ||
+ temporalFields.contains(ChronoField.MINUTE_OF_HOUR) ||
+ temporalFields.contains(ChronoField.SECOND_OF_MINUTE))) {
+ throw new IllegalArgumentException(
+ "Second of day token conflicts with other token(s).");
+ }
+ if (timeZoneTemporalUnits.contains(ChronoUnit.MINUTES) &&
+ !timeZoneTemporalUnits.contains(ChronoUnit.HOURS)) {
+ throw new IllegalArgumentException("Time zone minute token provided
without time zone hour token.");
+ }
+ }
+
+ /**
+ * Make sure the generated list of tokens is valid for formatting datetime
objects to strings.
+ */
+ private void verifyForFormat() {
+ for (Token token : tokens) {
+ if (token.type == TokenType.TIMEZONE) {
+ throw new IllegalArgumentException(token.string.toUpperCase() + " not
a valid format for "
+ + "timestamp or date.");
+ }
+ }
+ }
+
+ public String format(Timestamp ts) {
+ StringBuilder fullOutputSb = new StringBuilder();
+ String outputString = null;
+ int value;
+ LocalDateTime localDateTime =
+ LocalDateTime.ofEpochSecond(ts.toEpochSecond(), ts.getNanos(),
ZoneOffset.UTC);
+ for (Token token : tokens) {
+ switch (token.type) {
+ case TEMPORAL:
+ try {
+ value = localDateTime.get(token.temporalField);
+ outputString = formatTemporal(value, token);
+ } catch (DateTimeException e) {
+ throw new IllegalArgumentException(token.temporalField + " couldn't
be obtained from "
+ + "LocalDateTime " + localDateTime, e);
+ }
+ break;
+ case TIMEZONE: //invalid for timestamp and date
+ throw new IllegalArgumentException(token.string.toUpperCase() + " not
a valid format for "
+ + "timestamp or date.");
+ case SEPARATOR:
+ outputString = token.string;
+ break;
+ case ISO_8601_DELIMITER:
+ outputString = token.string.toUpperCase();
+ break;
+ default:
+ // won't happen
+ }
+ fullOutputSb.append(outputString);
+ }
+ return fullOutputSb.toString();
+ }
+
+ public String format(Date date) {
+ return format(Timestamp.ofEpochSecond(date.toEpochSecond()));
+ }
+
+ private String formatTemporal(int value, Token token) {
+ String output;
+ if (token.temporalField == ChronoField.AMPM_OF_DAY) {
+ output = value == 0 ? "a" : "p";
+ output += token.string.length() == 2 ? "m" : ".m.";
+ if (token.string.startsWith("A") || token.string.startsWith("P")) {
+ output = output.toUpperCase();
+ }
+ } else { // it's a numeric value
+
+ if (token.temporalField == ChronoField.HOUR_OF_AMPM && value == 0) {
+ value = 12;
+ }
+ try {
+ output = String.valueOf(value);
+ output = padOrTruncateNumericTemporal(token, output);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Value: " + value + " couldn't be
cast to string.", e);
+ }
+ }
+ return output;
+ }
+
+ /**
+ * To match token.length, pad left with zeroes or truncate.
+ */
+ private String padOrTruncateNumericTemporal(Token token, String output) {
+ if (output.length() < token.length) {
+ output = StringUtils.leftPad(output, token.length, '0'); // pad left
+ } else if (output.length() > token.length) {
+ if (token.temporalField == ChronoField.NANO_OF_SECOND) {
+ output = output.substring(0, token.length); // truncate right
+ } else {
+ output = output.substring(output.length() - token.length); // truncate
left
+ }
+ }
+ if (token.temporalField == ChronoField.NANO_OF_SECOND
+ && token.string.equalsIgnoreCase("ff")) {
+ output = output.replaceAll("0*$", ""); //truncate trailing 0's
+ if (output.isEmpty()) {
+ output = "0";
+ }
+ }
+ return output;
+ }
+
+ /**
+ * Left here for timestamp with local time zone.
+ */
+ private String formatTimeZone(TimeZone timeZone, LocalDateTime
localDateTime, Token token) {
+ ZoneOffset offset =
timeZone.toZoneId().getRules().getOffset(localDateTime);
+ Duration seconds = Duration.of(offset.get(ChronoField.OFFSET_SECONDS),
ChronoUnit.SECONDS);
+ if (token.string.equals("tzh")) {
+ long hours = seconds.toHours();
+ String s = (hours >= 0) ? "+" : "-";
+ s += (Math.abs(hours) < 10) ? "0" : "";
+ s += String.valueOf(Math.abs(hours));
+ return s;
+ } else {
+ long minutes = Math.abs(seconds.toMinutes() % MINUTES_PER_HOUR);
+ String s = String.valueOf(minutes);
+ if (s.length() == 1) {
+ s = "0" + s;
+ }
+ return s;
+ }
+ }
+
+ public Timestamp parseTimestamp(String fullInput){
+ LocalDateTime ldt = LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC);
+ String substring;
+ int index = 0;
+ int value;
+ int timeZoneSign = 0, timeZoneHours = 0, timeZoneMinutes = 0;
+
+ for (Token token : tokens) {
+ switch (token.type) {
+ case TEMPORAL:
+ substring = getNextSubstring(fullInput, index, token); // e.g. yy-m ->
yy
+ value = parseTemporal(substring, token); // e.g. 18->2018, July->07
+ try {
+ ldt = ldt.with(token.temporalField, value);
+ } catch (DateTimeException e){
+ throw new IllegalArgumentException(
+ "Value " + value + " not valid for token " + token.toString());
+ }
+ index += substring.length();
+ break;
+ case TIMEZONE:
+ if (token.temporalUnit == ChronoUnit.HOURS) {
+ String nextCharacter = fullInput.substring(index, index + 1);
+ timeZoneSign = "-".equals(nextCharacter) ? -1 : 1;
+ if ("-".equals(nextCharacter) || "+".equals(nextCharacter)) {
+ index++;
+ }
+ // parse next two digits
+ substring = getNextSubstring(fullInput, index, index + 2, token);
+ try {
+ timeZoneHours = Integer.parseInt(substring);
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" +
substring +
+ "\" with token " + token + " to int. Pattern is " + pattern,
e);
+ }
+ if (timeZoneHours < -15 || timeZoneHours > 15) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" +
substring +
+ "\" to TZH because TZH range is -15 to +15. Pattern is " +
pattern);
+ }
+ } else { // time zone minutes
+ substring = getNextSubstring(fullInput, index, token);
+ try {
+ timeZoneMinutes = Integer.parseInt(substring);
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" +
substring +
+ "\" with token " + token + " to int. Pattern is " + pattern, e);
+ }
+ if (timeZoneMinutes < 0 || timeZoneMinutes > 59) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" +
substring +
+ "\" to TZM because TZM range is 0 to 59. Pattern is " +
pattern);
+ }
+ }
+ index += substring.length();
+ break;
+ case SEPARATOR:
+ index = parseSeparator(fullInput, index, token);
+ break;
+ case ISO_8601_DELIMITER:
+ index = parseIso8601Delimiter(fullInput, index, token);
+ default:
+ //do nothing
+ }
+ }
+
+ // anything left unparsed at end of string? throw error
+ if (!fullInput.substring(index).isEmpty()) {
+ throw new IllegalArgumentException("Leftover input after parsing: " +
+ fullInput.substring(index) + " in string " + fullInput);
+ }
+
+ return Timestamp.ofEpochSecond(ldt.toEpochSecond(ZoneOffset.UTC),
ldt.getNano());
+ }
+
+ public Date parseDate(String input){
+ return Date.ofEpochMilli(parseTimestamp(input).toEpochMilli());
+ }
+ /**
+ * Return the next substring to parse. Length is either specified or
token.length, but a
+ * separator or an ISO-8601 delimiter can cut the substring short. (e.g. if
the token pattern is
+ * "YYYY" we expect the next 4 characters to be 4 numbers. However, if it is
"976/" then we
+ * return "976" because a separator cuts it short.)
+ */
+ private String getNextSubstring(String s, int begin, Token token) {
+ return getNextSubstring(s, begin, begin + token.length, token);
+ }
+
+ private String getNextSubstring(String s, int begin, int end, Token token) {
+ if (end > s.length()) {
+ end = s.length();
+ }
+ s = s.substring(begin, end);
+ if (token.temporalField == ChronoField.AMPM_OF_DAY) {
+ if (s.charAt(1) == 'm' || s.charAt(1) == 'M') { // length 2
+ return s.substring(0, 2);
+ } else {
+ return s;
+ }
+ }
+ for (String sep : VALID_SEPARATORS) {
+ if (s.contains(sep)) {
+ s = s.substring(0, s.indexOf(sep));
+ }
+ }
+ // TODO this will cause problems with DAY (for example, Thursday starts
with T)
+ for (String delimiter : VALID_ISO_8601_DELIMITERS) {
+ if (s.toLowerCase().contains(delimiter)) {
+ s = s.substring(0, s.toLowerCase().indexOf(delimiter));
+ }
+ }
+
+ return s;
+ }
+
+ /**
+ * Get the integer value of a temporal substring.
+ */
+ private int parseTemporal(String substring, Token token){
+ // exceptions to the rule
+ if (token.temporalField == ChronoField.AMPM_OF_DAY) {
+ return substring.toLowerCase().startsWith("a") ? AM : PM;
+
+ } else if (token.temporalField == ChronoField.HOUR_OF_AMPM &&
"12".equals(substring)) {
+ substring = "0";
+
+ } else if (token.temporalField == ChronoField.YEAR) {
+ String currentYearString = String.valueOf(LocalDateTime.now().getYear());
+ //deal with round years
+ if (token.string.startsWith("r") && substring.length() == 2) {
+ int currFirst2Digits = Integer.parseInt(currentYearString.substring(0,
2));
+ int currLast2Digits = Integer.parseInt(currentYearString.substring(2));
+ int valLast2Digits = Integer.parseInt(substring);
+ if (valLast2Digits < 50 && currLast2Digits >= 50) {
+ currFirst2Digits += 1;
+ } else if (valLast2Digits >= 50 && currLast2Digits < 50) {
+ currFirst2Digits -= 1;
+ }
+ substring = String.valueOf(currFirst2Digits) + substring;
+ } else { // fill in prefix digits with current date
+ substring = currentYearString.substring(0, 4 - substring.length()) +
substring;
+ }
+
+ } else if (token.temporalField == ChronoField.NANO_OF_SECOND) {
+ int i = Integer.min(token.length, substring.length());
+ substring += StringUtils.repeat("0", NANOS_MAX_LENGTH - i);
+ }
+
+ // the rule
+ try {
+ return Integer.parseInt(substring);
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" +
substring +
+ "\" with token " + token + " to integer. Pattern is " + pattern, e);
+ }
+ }
+
+ /**
+ * Parse the next separator(s). At least one separator character is
expected. Separator
+ * characters are interchangeable.
+ *
+ * Caveat: If the last separator character in the separator substring is "-"
and is immediately
+ * followed by a time zone hour (tzh) token, it's a negative sign and
not counted as a
+ * separator, UNLESS this is the only separator character in the
separator substring (in
+ * which case it is not counted as the negative sign).
+ *
+ * @throws IllegalArgumentException if separator is missing
+ */
+ private int parseSeparator(String fullInput, int index, Token token){
+ int separatorsFound = 0;
+ int begin = index;
+
+ while (index < fullInput.length() &&
+ VALID_SEPARATORS.contains(fullInput.substring(index, index + 1))) {
+ if (!isLastCharacterOfSeparator(index, fullInput)
+ || !("-".equals(fullInput.substring(index, index + 1)) &&
(nextTokenIs("tzh", token)))
+ || separatorsFound == 0) {
+ separatorsFound++;
+ }
+ index++;
+ }
+
+ if (separatorsFound == 0) {
+ throw new IllegalArgumentException("Missing separator at index " +
index);
+ }
+ return begin + separatorsFound;
+ }
+
+ private int parseIso8601Delimiter(String fullInput, int index, Token token) {
+ String substring;
+ substring = fullInput.substring(index, index + 1);
+ if (token.string.equalsIgnoreCase(substring)) {
+ index++;
+ } else {
+ throw new IllegalArgumentException(
+ "Missing ISO 8601 delimiter " + token.string.toUpperCase());
+ }
+ return index;
+ }
+
+ /**
+ * Is the next character something other than a separator?
+ */
+ private boolean isLastCharacterOfSeparator(int index, String string) {
+ if (index == string.length() - 1) { // if we're at the end of the string,
yes
+ return true;
+ }
+ return !VALID_SEPARATORS.contains(string.substring(index + 1, index + 2));
+ }
+
+ /**
+ * Does the temporalUnit/temporalField of the next token match the pattern's?
+ */
+ private boolean nextTokenIs(String pattern, Token currentToken) {
+ // make sure currentToken isn't the last one
+ if (tokens.indexOf(currentToken) == tokens.size() - 1) {
+ return false;
+ }
+ Token nextToken = tokens.get(tokens.indexOf(currentToken) + 1);
+ pattern = pattern.toLowerCase();
+ return (isTimeZoneToken(pattern) && TIME_ZONE_TOKENS.get(pattern) ==
nextToken.temporalUnit
+ || isTemporalToken(pattern) && TEMPORAL_TOKENS.get(pattern) ==
nextToken.temporalField);
+ }
+
+ public String getPattern() {
+ return pattern;
+ }
+
+ /**
+ * @return a copy of token list
+ */
+ protected List<Token> getTokens() {
+ return new ArrayList<>(tokens);
+ }
+}
diff --git
a/common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java
b/common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java
new file mode 100644
index 0000000..1e838be
--- /dev/null
+++
b/common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Deals with formatting and parsing of datetime objects.
+ */
+package org.apache.hadoop.hive.common.format.datetime;
diff --git
a/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
b/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
new file mode 100644
index 0000000..4e822d5
--- /dev/null
+++
b/common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
@@ -0,0 +1,330 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import com.sun.tools.javac.util.List;
+import junit.framework.TestCase;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeFormatterBuilder;
+import java.time.format.ResolverStyle;
+import java.time.format.SignStyle;
+import java.time.temporal.ChronoField;
+import java.time.temporal.TemporalField;
+import java.util.ArrayList;
+
+import static java.time.temporal.ChronoField.DAY_OF_MONTH;
+import static java.time.temporal.ChronoField.HOUR_OF_DAY;
+import static java.time.temporal.ChronoField.MINUTE_OF_HOUR;
+import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
+import static java.time.temporal.ChronoField.SECOND_OF_MINUTE;
+import static java.time.temporal.ChronoField.YEAR;
+
+/**
+ * Tests HiveSqlDateTimeFormatter.
+ */
+
+public class TestHiveSqlDateTimeFormatter extends TestCase {
+
+ private HiveSqlDateTimeFormatter formatter;
+
+ public void testSetPattern() {
+ verifyPatternParsing(" ---yyyy-\'-:- -,.;/MM-dd--", new
ArrayList<>(List.of(
+ null, // represents separator, which has no temporal field
+ ChronoField.YEAR,
+ null,
+ ChronoField.MONTH_OF_YEAR,
+ null,
+ ChronoField.DAY_OF_MONTH,
+ null
+ )));
+
+ verifyPatternParsing("ymmdddhh24::mi:ss A.M. pm", 25, "ymmdddhh24::mi:ss
A.M. pm",
+ new ArrayList<>(List.of(
+ ChronoField.YEAR,
+ ChronoField.MONTH_OF_YEAR,
+ ChronoField.DAY_OF_YEAR,
+ ChronoField.HOUR_OF_DAY,
+ null, ChronoField.MINUTE_OF_HOUR,
+ null, ChronoField.SECOND_OF_MINUTE,
+ null, ChronoField.AMPM_OF_DAY,
+ null, ChronoField.AMPM_OF_DAY
+ )));
+ }
+
+ public void testSetPatternWithBadPatterns() {
+ verifyBadPattern("eyyyy-ddd", true);
+ verifyBadPattern("1yyyy-mm-dd", true);
+
+ //duplicates
+ verifyBadPattern("yyyy Y", true);
+ verifyBadPattern("yyyy R", true);
+
+ //missing year or (month + dayofmonth or dayofyear)
+ verifyBadPattern("yyyy", true);
+ verifyBadPattern("yyyy-mm", true);
+ verifyBadPattern("yyyy-dd", true);
+ verifyBadPattern("mm-dd", true);
+ verifyBadPattern("ddd", true);
+
+ verifyBadPattern("yyyy-MM-DDD", true);
+ verifyBadPattern("yyyy-mm-DD DDD", true);
+ verifyBadPattern("yyyy-mm-dd HH24 HH12", true);
+ verifyBadPattern("yyyy-mm-dd HH24 AM", true);
+ verifyBadPattern("yyyy-mm-dd HH24 SSSSS", true);
+ verifyBadPattern("yyyy-mm-dd HH12 SSSSS", true);
+ verifyBadPattern("yyyy-mm-dd SSSSS AM", true);
+ verifyBadPattern("yyyy-mm-dd MI SSSSS", true);
+ verifyBadPattern("yyyy-mm-dd SS SSSSS", true);
+
+ verifyBadPattern("tzm", false);
+ verifyBadPattern("tzh", false);
+ }
+
+ public void testFormatTimestamp() {
+ checkFormatTs("rr rrrr ddd", "2018-01-03 00:00:00", "18 2018 003");
+ checkFormatTs("yyyy-mm-ddtsssss.ff4z", "2018-02-03 00:00:10.777777777",
"2018-02-03T00010.7777Z");
+ checkFormatTs("hh24:mi:ss.ff1", "2018-02-03 01:02:03.999999999",
"01:02:03.9");
+ checkFormatTs("y yyy hh:mi:ss.ffz", "2018-02-03 01:02:03.0070070", "8 018
01:02:03.007007Z");
+ checkFormatTs("am a.m. pm p.m. AM A.M. PM P.M.", "2018-02-03
01:02:03.0070070", "am a.m. am a.m. AM A.M. AM A.M.");
+ checkFormatTs("HH12 P.M.", "2019-01-01 00:15:10", "12 A.M.");
+ checkFormatTs("HH12 AM", "2019-01-01 12:15:10", "12 PM");
+ checkFormatTs("YYYY-MM-DD HH12PM", "2017-05-05 00:00:00", "2017-05-05
12AM");
+ }
+
+ private void checkFormatTs(String pattern, String input, String
expectedOutput) {
+ formatter = new HiveSqlDateTimeFormatter(pattern, false);
+ assertEquals(expectedOutput, formatter.format(toTimestamp(input)));
+ }
+
+ public void testFormatDate() {
+ checkFormatDate("rr rrrr ddd", "2018-01-03", "18 2018 003");
+ checkFormatDate("yyyy-mm-ddtsssss.ff4z", "2018-02-03",
"2018-02-03T00000.0000Z");
+ checkFormatDate("hh24:mi:ss.ff1", "2018-02-03", "00:00:00.0");
+ checkFormatDate("y yyy T hh:mi:ss.ff am z", "2018-02-03", "8 018 T
12:00:00.0 am Z");
+ checkFormatDate("am a.m. pm p.m. AM A.M. PM P.M.", "2018-02-03", "am a.m.
am a.m. AM A.M. AM A.M.");
+ checkFormatDate("DDD", "2019-12-31", "365");
+ checkFormatDate("DDD", "2020-12-31", "366");
+ }
+
+ private void checkFormatDate(String pattern, String input, String
expectedOutput) {
+ formatter = new HiveSqlDateTimeFormatter(pattern, false);
+ assertEquals(expectedOutput, formatter.format(toDate(input)));
+ }
+
+ public void testParseTimestamp() {
+ String thisYearString = String.valueOf(LocalDateTime.now().getYear());
+ int firstTwoDigits = getFirstTwoDigits();
+
+ //y
+ checkParseTimestamp("y-mm-dd", "0-02-03", thisYearString.substring(0, 3) +
"0-02-03 00:00:00");
+ checkParseTimestamp("yy-mm-dd", "00-02-03", thisYearString.substring(0, 2)
+ "00-02-03 00:00:00");
+ checkParseTimestamp("yyy-mm-dd", "000-02-03", thisYearString.substring(0,
1) + "000-02-03 00:00:00");
+ checkParseTimestamp("yyyy-mm-dd", "000-02-03", thisYearString.substring(0,
1) + "000-02-03 00:00:00");
+ checkParseTimestamp("rr-mm-dd", "0-02-03", thisYearString.substring(0, 3)
+ "0-02-03 00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "000-02-03", thisYearString.substring(0,
1) + "000-02-03 00:00:00");
+
+ //rr, rrrr
+ checkParseTimestamp("rr-mm-dd", "00-02-03", firstTwoDigits + 1 + "00-02-03
00:00:00");
+ checkParseTimestamp("rr-mm-dd", "49-02-03", firstTwoDigits + 1 + "49-02-03
00:00:00");
+ checkParseTimestamp("rr-mm-dd", "50-02-03", firstTwoDigits + "50-02-03
00:00:00");
+ checkParseTimestamp("rr-mm-dd", "99-02-03", firstTwoDigits + "99-02-03
00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "00-02-03", firstTwoDigits + 1 +
"00-02-03 00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "49-02-03", firstTwoDigits + 1 +
"49-02-03 00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "50-02-03", firstTwoDigits + "50-02-03
00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "99-02-03", firstTwoDigits + "99-02-03
00:00:00");
+
+ //everything else
+ checkParseTimestamp("yyyy-mm-ddThh24:mi:ss.ff8z",
"2018-02-03T04:05:06.5665Z", "2018-02-03 04:05:06.5665");
+ checkParseTimestamp("yyyy-mm-dd hh24:mi:ss.ff", "2018-02-03
04:05:06.555555555", "2018-02-03 04:05:06.555555555");
+ checkParseTimestamp("yyyy-mm-dd hh12:mi:ss", "2099-2-03 04:05:06",
"2099-02-03 04:05:06");
+ checkParseTimestamp("yyyyddd", "2018284", "2018-10-11 00:00:00");
+ checkParseTimestamp("yyyyddd", "20184", "2018-01-04 00:00:00");
+ checkParseTimestamp("yyyy-mm-ddThh24:mi:ss.ffz",
"2018-02-03t04:05:06.444Z", "2018-02-03 04:05:06.444");
+ checkParseTimestamp("yyyy-mm-dd hh:mi:ss A.M.", "2018-02-03 04:05:06
P.M.", "2018-02-03 16:05:06");
+ checkParseTimestamp("YYYY-MM-DD HH24:MI TZH:TZM", "2019-1-1 14:00--1:-30",
"2019-01-01 14:00:00");
+ checkParseTimestamp("YYYY-MM-DD HH24:MI TZH:TZM", "2019-1-1 14:00-1:30",
"2019-01-01 14:00:00");
+ checkParseTimestamp("yyyy-mm-dd TZM:TZH", "2019-01-01 1 -3", "2019-01-01
00:00:00");
+ checkParseTimestamp("yyyy-mm-dd TZH:TZM", "2019-01-01 -0:30", "2019-01-01
00:00:00");
+ checkParseTimestamp("TZM/YYY-MM-TZH/DD", "0/333-01-11/02", "2333-01-02
00:00:00");
+ checkParseTimestamp("YYYY-MM-DD HH12:MI AM", "2019-01-01 11:00 p.m.",
"2019-01-01 23:00:00");
+ checkParseTimestamp("YYYY-MM-DD HH12:MI A.M..", "2019-01-01 11:00 pm.",
"2019-01-01 23:00:00");
+ checkParseTimestamp("MI DD-TZM-YYYY-MM TZHPM SS:HH12.FF9",
+ "59 03-30-2017-05 01PM 01:08.123456789", "2017-05-03
20:59:01.123456789");
+ checkParseTimestamp("YYYYDDMMHH12MISSFFAMTZHTZM",
+ "20170501123159123456789AM-0130", "2017-01-05 00:31:59.123456789");
+ checkParseTimestamp("YYYY-MM-DD AMHH12", "2017-05-06 P.M.12", "2017-05-06
12:00:00");
+ checkParseTimestamp("YYYY-MM-DD HH12PM", "2017-05-05 12AM", "2017-05-05
00:00:00");
+ checkParseTimestamp("YYYY-MM-DD HH12:MI:SS.FF9PM TZH:TZM",
+ "2017-05-03 08:59:01.123456789PM 01:30", "2017-05-03
20:59:01.123456789");
+ checkParseTimestamp("YYYYDDMMHH12MISSFFAMTZHTZM",
+ "20170501120159123456789AM-0130", "2017-01-05 00:01:59.123456789");
+
+ //Test "day in year" token in a leap year scenario
+ checkParseTimestamp("YYYY DDD", "2000 60", "2000-02-29 00:00:00");
+ checkParseTimestamp("YYYY DDD", "2000 61", "2000-03-01 00:00:00");
+ checkParseTimestamp("YYYY DDD", "2000 366", "2000-12-31 00:00:00");
+ //Test timezone offset parsing without separators
+ checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM+0515",
"2018-12-31 08:00:00");
+ checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM0515",
"2018-12-31 08:00:00");
+ checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM-0515",
"2018-12-31 08:00:00");
+ }
+
+ private int getFirstTwoDigits() {
+ int thisYear = LocalDateTime.now().getYear();
+ int firstTwoDigits = thisYear / 100;
+ if (thisYear % 100 < 50) {
+ firstTwoDigits -= 1;
+ }
+ return firstTwoDigits;
+ }
+
+ private void checkParseTimestamp(String pattern, String input, String
expectedOutput) {
+ formatter = new HiveSqlDateTimeFormatter(pattern, true);
+ assertEquals(toTimestamp(expectedOutput), formatter.parseTimestamp(input));
+ }
+
+ public void testParseDate() {
+
+ String thisYearString = String.valueOf(LocalDateTime.now().getYear());
+ int firstTwoDigits = getFirstTwoDigits();
+ //y
+ checkParseDate("y-mm-dd", "0-02-03", thisYearString.substring(0, 3) +
"0-02-03");
+ checkParseDate("yy-mm-dd", "00-02-03", thisYearString.substring(0, 2) +
"00-02-03");
+ checkParseDate("yyy-mm-dd", "000-02-03", thisYearString.substring(0, 1) +
"000-02-03");
+ checkParseDate("yyyy-mm-dd", "000-02-03", thisYearString.substring(0, 1) +
"000-02-03");
+ checkParseDate("rr-mm-dd", "0-02-03", thisYearString.substring(0, 3) +
"0-02-03");
+ checkParseDate("rrrr-mm-dd", "000-02-03", thisYearString.substring(0, 1) +
"000-02-03");
+
+ //rr, rrrr
+ checkParseDate("rr-mm-dd", "00-02-03", firstTwoDigits + 1 + "00-02-03");
+ checkParseDate("rr-mm-dd", "49-02-03", firstTwoDigits + 1 + "49-02-03");
+ checkParseDate("rr-mm-dd", "50-02-03", firstTwoDigits + "50-02-03");
+ checkParseDate("rr-mm-dd", "99-02-03", firstTwoDigits + "99-02-03");
+ checkParseDate("rrrr-mm-dd", "00-02-03", firstTwoDigits + 1 + "00-02-03");
+ checkParseDate("rrrr-mm-dd", "49-02-03", firstTwoDigits + 1 + "49-02-03");
+ checkParseDate("rrrr-mm-dd", "50-02-03", firstTwoDigits + "50-02-03");
+ checkParseDate("rrrr-mm-dd", "99-02-03", firstTwoDigits + "99-02-03");
+
+ checkParseDate("yyyy-mm-dd hh mi ss.ff7", "2018/01/01 2.2.2.55",
"2018-01-01");
+ }
+
+ private void checkParseDate(String pattern, String input, String
expectedOutput) {
+ formatter = new HiveSqlDateTimeFormatter(pattern, true);
+ assertEquals(toDate(expectedOutput), formatter.parseDate(input));
+ }
+
+ public void testParseTimestampError() {
+ verifyBadParseString("yyyy", "2019-02-03");
+ verifyBadParseString("yyyy-mm-dd ", "2019-02-03"); //separator missing
+ verifyBadParseString("yyyy-mm-dd", "2019-02-03..."); //extra separators
+ verifyBadParseString("yyyy-mm-dd hh12:mi:ss", "2019-02-03 14:00:00");
//hh12 out of range
+ verifyBadParseString("yyyy-dddsssss", "2019-912345");
+ verifyBadParseString("yyyy-mm-dd", "2019-13-23"); //mm out of range
+ verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +16:00"); //tzh out
of range
+ verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +14:60"); //tzm out
of range
+ verifyBadParseString("YYYY DDD", "2000 367"); //ddd out of range
+ }
+
+ private void verifyBadPattern(String string, boolean forParsing) {
+ try {
+ formatter = new HiveSqlDateTimeFormatter(string, forParsing);
+ fail();
+ } catch (Exception e) {
+ assertEquals(e.getClass().getName(),
IllegalArgumentException.class.getName());
+ }
+ }
+
+ /**
+ * Verify pattern is parsed correctly.
+ * Check:
+ * -token.temporalField for each token
+ * -sum of token.lengths
+ * -concatenation of token.strings
+ */
+ private void verifyPatternParsing(String pattern, ArrayList<TemporalField>
temporalFields) {
+ verifyPatternParsing(pattern, pattern.length(), pattern.toLowerCase(),
temporalFields);
+ }
+
+ private void verifyPatternParsing(String pattern, int expectedPatternLength,
+ String expectedPattern, ArrayList<TemporalField> temporalFields) {
+ formatter = new HiveSqlDateTimeFormatter(pattern, false);
+ assertEquals(temporalFields.size(), formatter.getTokens().size());
+ StringBuilder sb = new StringBuilder();
+ int actualPatternLength = 0;
+ for (int i = 0; i < temporalFields.size(); i++) {
+ assertEquals("Generated list of tokens not correct",
temporalFields.get(i),
+ formatter.getTokens().get(i).temporalField);
+ sb.append(formatter.getTokens().get(i).string);
+ actualPatternLength += formatter.getTokens().get(i).length;
+ }
+ assertEquals("Token strings concatenated don't match original pattern
string",
+ expectedPattern, sb.toString());
+ assertEquals(expectedPatternLength, actualPatternLength);
+ }
+
+ private void verifyBadParseString(String pattern, String string) {
+ try {
+ formatter = new HiveSqlDateTimeFormatter(pattern, true);
+ formatter.parseTimestamp(string);
+ fail();
+ } catch (Exception e) {
+ assertEquals(e.getClass().getName(),
IllegalArgumentException.class.getName());
+ }
+ }
+
+
+ // Methods that construct datetime objects using java.time.DateTimeFormatter.
+
+ public static Date toDate(String s) {
+ LocalDate localDate = LocalDate.parse(s, DATE_FORMATTER);
+ return Date.ofEpochDay((int) localDate.toEpochDay());
+ }
+
+ /**
+ * This is effectively the old Timestamp.valueOf method.
+ */
+ public static Timestamp toTimestamp(String s) {
+ LocalDateTime localDateTime = LocalDateTime.parse(s.trim(),
TIMESTAMP_FORMATTER);
+ return Timestamp.ofEpochSecond(
+ localDateTime.toEpochSecond(ZoneOffset.UTC), localDateTime.getNano());
+ }
+
+ private static final DateTimeFormatter DATE_FORMATTER =
+ DateTimeFormatter.ofPattern("yyyy-MM-dd");
+ private static final DateTimeFormatter TIMESTAMP_FORMATTER;
+ static {
+ DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
+ builder.appendValue(YEAR, 1, 10, SignStyle.NORMAL).appendLiteral('-')
+ .appendValue(MONTH_OF_YEAR, 1, 2, SignStyle.NORMAL).appendLiteral('-')
+ .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NORMAL)
+ .optionalStart().appendLiteral(" ")
+ .appendValue(HOUR_OF_DAY, 1, 2, SignStyle.NORMAL).appendLiteral(':')
+ .appendValue(MINUTE_OF_HOUR, 1, 2, SignStyle.NORMAL).appendLiteral(':')
+ .appendValue(SECOND_OF_MINUTE, 1, 2, SignStyle.NORMAL)
+ .optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 1, 9,
true).optionalEnd()
+ .optionalEnd();
+ TIMESTAMP_FORMATTER =
builder.toFormatter().withResolverStyle(ResolverStyle.LENIENT);
+ }
+}
diff --git
a/common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java
b/common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java
new file mode 100644
index 0000000..70ee426
--- /dev/null
+++
b/common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests formatting and parsing of datetime objects.
+ */
+package org.apache.hadoop.hive.common.format.datetime;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index d08b05f..c09db9a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -528,6 +528,7 @@ public final class FunctionRegistry {
system.registerGenericUDF("to_epoch_milli", GenericUDFEpochMilli.class);
system.registerGenericUDF("bucket_number", GenericUDFBucketNumber.class);
system.registerGenericUDF("tumbling_window",
GenericUDFTumbledWindow.class);
+ system.registerGenericUDF("cast_format", GenericUDFCastFormat.class);
// Generic UDTF's
system.registerGenericUDTF("explode", GenericUDTFExplode.class);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 58fe0cd..013079c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -247,8 +247,18 @@ castExpression
LPAREN
expression
KW_AS
- primitiveType
- RPAREN -> ^(TOK_FUNCTION primitiveType expression)
+ toType=primitiveType
+ (fmt=KW_FORMAT StringLiteral)?
+ RPAREN
+ // simple cast
+ -> {$fmt == null}? ^(TOK_FUNCTION $toType expression)
+
+ // plain cast ... format: toType is int representing a TOK_* in
HiveParser_IdentifiersParser, expression, format pattern
+ -> {((CommonTree)toType.getTree()).getChild(0) == null}?
+ ^(TOK_FUNCTION {adaptor.create(Identifier, "cast_format")}
NumberLiteral[Integer.toString(((CommonTree)toType.getTree()).token.getType())]
expression StringLiteral)
+
+ // cast ... format to type with 4th parameter which is length of CHAR or
VARCHAR
+ -> ^(TOK_FUNCTION {adaptor.create(Identifier, "cast_format")}
NumberLiteral[Integer.toString(((CommonTree)toType.getTree()).token.getType())]
expression StringLiteral
NumberLiteral[((CommonTree)toType.getTree()).getChild(0).getText()])
;
caseExpression
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCastFormat.java
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCastFormat.java
new file mode 100644
index 0000000..16742ee
--- /dev/null
+++
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCastFormat.java
@@ -0,0 +1,252 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveCharObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.Text;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Serializable;
+import java.util.Map;
+
+/**
+ * CAST(<value> AS <type> FORMAT <STRING>).
+ *
+ * Vector expressions: CastDateToCharWithFormat, CastDateToStringWithFormat,
+ * CastDateToVarCharWithFormat, CastTimestampToCharWithFormat,
+ * CastTimestampToStringWithFormat, CastTimestampToVarCharWithFormat.
+ * Could not use @VectorizedExpressions annotation because e.g.
CastXToCharWithFormat,
+ * CastXToStringWithFormat, CastXToVarCharWithFormat would have same
description.
+ */
+@Description(name = "cast_format",
+ value = "CAST(<value> AS <type> FORMAT <STRING>) - Converts a datetime
value to string or"
+ + " string-type value to datetime based on the format pattern
specified.",
+ extended = "If format is specified with FORMAT argument then SQL:2016
datetime formats will "
+ + "be used.\n"
+ + "Example:\n "
+ + " > SELECT CAST(\"2018-01-01 4 PM\" AS timestamp FORMAT
\"yyyy-mm-dd hh12 AM\");\n"
+ + " 2018-01-01 16:00:00")
+public class GenericUDFCastFormat extends GenericUDF implements Serializable {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(GenericUDFCastFormat.class.getName());
+
+ @VisibleForTesting
+ static final Map<Integer, String> OUTPUT_TYPES = ImmutableMap.<Integer,
String>builder()
+ .put(HiveParser_IdentifiersParser.TOK_STRING,
serdeConstants.STRING_TYPE_NAME)
+ .put(HiveParser_IdentifiersParser.TOK_VARCHAR,
serdeConstants.VARCHAR_TYPE_NAME)
+ .put(HiveParser_IdentifiersParser.TOK_CHAR,
serdeConstants.CHAR_TYPE_NAME)
+ .put(HiveParser_IdentifiersParser.TOK_TIMESTAMP,
serdeConstants.TIMESTAMP_TYPE_NAME)
+ .put(HiveParser_IdentifiersParser.TOK_DATE,
serdeConstants.DATE_TYPE_NAME).build();
+
+ private transient HiveSqlDateTimeFormatter formatter;
+ private transient PrimitiveObjectInspector outputOI;
+ private transient PrimitiveObjectInspector inputOI;
+
+ public GenericUDFCastFormat() {
+ }
+
+ /**
+ * @param arguments
+ * 0. const int, value of a HiveParser_IdentifiersParser constant which
represents a TOK_[TYPE]
+ * 1. expression to convert
+ * 2. constant string, format pattern
+ * 3. (optional) constant int, output char/varchar length
+ */
+ @Override public ObjectInspector initialize(ObjectInspector[] arguments)
+ throws UDFArgumentException {
+ if (arguments.length != 3 && arguments.length != 4) {
+ throw new UDFArgumentException(
+ "Function cast_format requires 3 or 4 arguments (int, expression,
StringLiteral"
+ + "[, var/char length]), got " + arguments.length);
+ }
+
+ outputOI = getOutputOI(arguments);
+ try {
+ inputOI = (PrimitiveObjectInspector) arguments[1];
+ } catch (ClassCastException e) {
+ throw new UDFArgumentException(
+ "Function CAST...as ... FORMAT ...takes only primitive types");
+ }
+ PrimitiveObjectInspectorUtils.PrimitiveGrouping inputPG =
+
PrimitiveObjectInspectorUtils.getPrimitiveGrouping(inputOI.getPrimitiveCategory());
+ PrimitiveObjectInspectorUtils.PrimitiveGrouping outputPG =
+
PrimitiveObjectInspectorUtils.getPrimitiveGrouping(outputOI.getPrimitiveCategory());
+
+ if (inputOI.getPrimitiveCategory()
+ == PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMPLOCALTZ) {
+ throw new UDFArgumentException(
+ "Timestamp with local time zone not yet supported for cast ...
format function");
+ }
+ if (!(inputPG ==
PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP
+ && outputPG ==
PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP
+ || inputPG ==
PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP
+ && outputPG ==
PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP
+ || inputPG ==
PrimitiveObjectInspectorUtils.PrimitiveGrouping.VOID_GROUP)) {
+ throw new UDFArgumentException(
+ "Function CAST...as ... FORMAT ... only converts datetime objects to
string types"
+ + " and string or void objects to datetime types. Type of object
provided: "
+ + outputOI.getPrimitiveCategory() + " in primitive grouping " +
inputPG
+ + ", type provided: " + inputOI.getPrimitiveCategory() + " in
primitive grouping "
+ + outputPG);
+ }
+
+ boolean forParsing = (outputPG ==
PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP);
+ formatter = new
HiveSqlDateTimeFormatter(getConstantStringValue(arguments, 2), forParsing);
+ return outputOI;
+ }
+
+ private PrimitiveObjectInspector getOutputOI(ObjectInspector[] arguments)
+ throws UDFArgumentException {
+ int key = getConstantIntValue(arguments, 0);
+ if (!OUTPUT_TYPES.keySet().contains(key)) {
+ throw new UDFArgumentException("Cast...format can only convert to DATE,
TIMESTAMP, STRING,"
+ + "VARCHAR, CHAR. Can't convert to HiveParser_IdentifiersParser
constant with value "
+ + key);
+ }
+ String typeString = OUTPUT_TYPES.get(key);
+ if (serdeConstants.VARCHAR_TYPE_NAME.equals(typeString)
+ || serdeConstants.CHAR_TYPE_NAME.equals(typeString)) {
+ if (arguments.length < 4 || arguments[3] == null) {
+ throw new UDFArgumentException(typeString + " missing length
argument");
+ }
+ typeString += "(" + getConstantIntValue(arguments, 3) + ")";
+ }
+ PrimitiveTypeInfo typeInfo =
TypeInfoFactory.getPrimitiveTypeInfo(typeString);
+ return
PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
+ }
+
+ @Override public Object evaluate(DeferredObject[] arguments) throws
HiveException {
+ Object o0 = arguments[1].get();
+ if (o0 == null) {
+ return null;
+ }
+ return convert(o0);
+ }
+
+ private Object convert(Object o) throws HiveException {
+ Object input;
+ switch (inputOI.getPrimitiveCategory()) {
+ case STRING:
+ input = ((StringObjectInspector) inputOI).getPrimitiveJavaObject(o);
+ break;
+ case CHAR:
+ input = ((HiveCharObjectInspector)
inputOI).getPrimitiveJavaObject(o).getStrippedValue();
+ break;
+ case VARCHAR:
+ input = ((HiveVarcharObjectInspector)
inputOI).getPrimitiveJavaObject(o).toString();
+ break;
+ case TIMESTAMP:
+ input = ((TimestampObjectInspector)
inputOI).getPrimitiveWritableObject(o).getTimestamp();
+ break;
+ case DATE:
+ input = ((DateObjectInspector)
inputOI).getPrimitiveWritableObject(o).get();
+ break;
+ default:
+ throw new HiveException("Input type " + inputOI.getPrimitiveCategory() +
" not valid");
+ }
+
+ // format here
+ Object formattedOutput = null;
+ if (inputOI.getPrimitiveCategory() ==
PrimitiveObjectInspector.PrimitiveCategory.DATE) {
+ formattedOutput = formatter.format((Date) input);
+ if (formattedOutput == null) {
+ return null;
+ }
+ } else if (inputOI.getPrimitiveCategory() ==
PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMP) {
+ formattedOutput = formatter.format((Timestamp) input);
+ if (formattedOutput == null) {
+ return null;
+ }
+ }
+
+ // parse and create Writables
+ switch (outputOI.getPrimitiveCategory()) {
+ case STRING:
+ return new Text((String) formattedOutput);
+ case CHAR:
+ return ((SettableHiveCharObjectInspector) outputOI)
+ .create(new HiveChar((String) formattedOutput, -1));
+ case VARCHAR:
+ return ((SettableHiveVarcharObjectInspector) outputOI)
+ .create(new HiveVarchar((String) formattedOutput, -1));
+ case TIMESTAMP:
+ Timestamp t = formatter.parseTimestamp((String) input);
+ if (t == null) {
+ return null;
+ }
+ return ((SettableTimestampObjectInspector) outputOI).create(t);
+ case DATE:
+ Date d = formatter.parseDate((String) input);
+ if (d == null) {
+ return null;
+ }
+ return ((SettableDateObjectInspector) outputOI).create(d);
+ default:
+ throw new HiveException("Output type " + outputOI.getPrimitiveCategory()
+ " not valid");
+ }
+ }
+
+ @Override public String getDisplayString(String[] children) {
+ assert children.length == 3 || children.length == 4;
+ StringBuilder sb = new StringBuilder();
+ sb.append("CAST( ");
+ sb.append(children[1]);
+ sb.append(" AS ");
+ int typeKey = Integer.parseInt(children[0]);
+ if (!OUTPUT_TYPES.keySet().contains(typeKey)) {
+ sb.append("HiveParsers_IdentifiersParser index ").append(typeKey);
+ } else {
+ sb.append(OUTPUT_TYPES.get(typeKey));
+ if (children.length == 4) {
+ sb.append("(").append(children[3]).append(")");
+ }
+ }
+ sb.append(" FORMAT ");
+ sb.append(children[2]);
+ sb.append(" )");
+ return sb.toString();
+ }
+}
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastFormat.java
b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastFormat.java
new file mode 100644
index 0000000..9afd5af
--- /dev/null
+++
b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastFormat.java
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import static junit.framework.TestCase.assertEquals;
+import static junit.framework.TestCase.assertNull;
+import static org.junit.Assert.fail;
+
+/**
+ * Tests CAST (<TIMESTAMP/DATE> AS STRING/CHAR/VARCHAR FORMAT <STRING>) and
+ * CAST (<STRING/CHAR/VARCHAR> AS TIMESTAMP/DATE FORMAT <STRING>).
+ */
+public class TestGenericUDFCastFormat {
+
+ //type codes
+ public static final int CHAR = HiveParser_IdentifiersParser.TOK_CHAR;
+ public static final int VARCHAR = HiveParser_IdentifiersParser.TOK_VARCHAR;
+ public static final int STRING = HiveParser_IdentifiersParser.TOK_STRING;
+ public static final int DATE = HiveParser_IdentifiersParser.TOK_DATE;
+ public static final int TIMESTAMP =
HiveParser_IdentifiersParser.TOK_TIMESTAMP;
+
+ @Test
+ public void testDateToStringWithFormat() throws HiveException {
+ ObjectInspector inputOI =
PrimitiveObjectInspectorFactory.writableDateObjectInspector;
+ testCast(STRING, inputOI, date("2009-07-30"), "yyyy-MM-dd", "2009-07-30");
+ testCast(STRING, inputOI, date("2009-07-30"), "yyyy", "2009");
+ testCast(STRING, inputOI, date("1969-07-30"), "dd", "30");
+
+ testCast(CHAR, 3, inputOI, date("2009-07-30"), "yyyy-MM-dd", "200");
+ testCast(CHAR, 3, inputOI, date("2009-07-30"), "yyyy", "200");
+ testCast(CHAR, 3, inputOI, date("1969-07-30"), "dd", "30 ");
+
+ testCast(VARCHAR, 3, inputOI, date("2009-07-30"), "yyyy-MM-dd", "200");
+ testCast(VARCHAR, 3, inputOI, date("2009-07-30"), "yyyy", "200");
+ testCast(VARCHAR, 3, inputOI, date("1969-07-30"), "dd", "30");
+ }
+
+ @Test public void testTimestampToStringTypesWithFormat() throws
HiveException {
+ ObjectInspector inputOI =
PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
+ testCast(STRING, inputOI, timestamp("2009-07-30 00:00:08"),
+ "yyyy-MM-dd HH24:mi:ss", "2009-07-30 00:00:08");
+ testCast(STRING, inputOI, timestamp("2009-07-30 11:02:00"),
+ "MM/dd/yyyy hh24miss", "07/30/2009 110200");
+ testCast(STRING, inputOI, timestamp("2009-07-30 01:02:03"), "MM", "07");
+ testCast(STRING, inputOI, timestamp("1969-07-30 00:00:00"), "yy", "69");
+
+ testCast(CHAR, 3, inputOI, timestamp("2009-07-30 00:00:08"),
+ "yyyy-MM-dd HH24:mi:ss", "200");
+ testCast(CHAR, 3, inputOI, timestamp("2009-07-30 11:02:00"),
+ "MM/dd/yyyy hh24miss", "07/");
+ testCast(CHAR, 3, inputOI, timestamp("2009-07-30 01:02:03"), "MM", "07 ");
+ testCast(CHAR, 3, inputOI, timestamp("1969-07-30 00:00:00"), "yy", "69 ");
+
+ testCast(VARCHAR, 3, inputOI, timestamp("2009-07-30 00:00:08"),
+ "yyyy-MM-dd HH24:mi:ss", "200");
+ testCast(VARCHAR, 3, inputOI, timestamp("2009-07-30 11:02:00"),
+ "MM/dd/yyyy hh24miss", "07/");
+ testCast(VARCHAR, 3, inputOI, timestamp("2009-07-30 01:02:03"), "MM",
"07");
+ testCast(VARCHAR, 3, inputOI, timestamp("1969-07-30 00:00:00"), "yy",
"69");
+ }
+
+ @Test public void testStringTypesToDateWithFormat() throws HiveException {
+ ObjectInspector inputOI =
PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ testCast(DATE, inputOI, "1969-07-30 13:00", "yyyy-MM-dd hh24:mi",
"1969-07-30");
+ testCast(DATE, inputOI, "307-2009", "ddmm-yyyy", "2009-07-30");
+ testCast(DATE, inputOI, "307-2009", "ddd-yyyy", "2009-11-03");
+
+ inputOI = PrimitiveObjectInspectorFactory.javaHiveCharObjectInspector;
+ testCast(DATE, inputOI, new HiveChar("1969-07-30 13:00", 15), "yyyy-MM-dd
hh24:mi",
+ "1969-07-30");
+ testCast(DATE, inputOI, new HiveChar("307-2009", 7), "ddmm-yyyy",
"2200-07-30");
+ testCast(DATE, inputOI, new HiveChar("307-2009", 7), "ddd-yyyy",
"2200-11-03");
+
+ inputOI = PrimitiveObjectInspectorFactory.javaHiveVarcharObjectInspector;
+ testCast(DATE, inputOI, new HiveVarchar("1969-07-30 13:00", 15),
"yyyy-MM-dd hh24:mi",
+ "1969-07-30");
+ testCast(DATE, inputOI, new HiveVarchar("307-2009", 7), "ddmm-yyyy",
"2200-07-30");
+ testCast(DATE, inputOI, new HiveVarchar("307-2009", 7), "ddd-yyyy",
"2200-11-03");
+ }
+
+ @Test public void testStringTypesToTimestampWithFormat() throws
HiveException {
+ ObjectInspector inputOI =
PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ testCast(TIMESTAMP, inputOI, "2009-07-30 01:02:03", "yyyy-MM-dd
HH24:mi:ss",
+ "2009-07-30 01:02:03");
+ testCast(TIMESTAMP, inputOI, "07/30/2009 11:0200", "MM/dd/yyyy hh24:miss",
+ "2009-07-30 11:02:00");
+ testCast(TIMESTAMP, inputOI, "969.07.30.", "yyy.MM.dd.", "2969-07-30
00:00:00");
+
+ inputOI = PrimitiveObjectInspectorFactory.javaHiveCharObjectInspector;
+ testCast(TIMESTAMP, 13, inputOI, new HiveChar("2009-07-30 01:02:03", 13),
"yyyy-MM-dd HH24",
+ "2009-07-30 01:00:00");
+ testCast(TIMESTAMP, 18, inputOI, new HiveChar("07/30/2009 11:0200", 18),
"MM/dd/yyyy hh24:miss",
+ "2009-07-30 11:02:00");
+ testCast(TIMESTAMP, 10, inputOI, new HiveChar("969.07.30.12:00", 10),
"yyy.MM.dd.",
+ "2969-07-30 00:00:00");
+
+ inputOI = PrimitiveObjectInspectorFactory.javaHiveVarcharObjectInspector;
+ testCast(TIMESTAMP, 13, inputOI, new HiveVarchar("2009-07-30 01:02:03",
13), "yyyy-MM-dd HH24",
+ "2009-07-30 01:00:00");
+ testCast(TIMESTAMP, 18, inputOI, new HiveVarchar("07/30/2009 11:0200", 18),
+ "MM/dd/yyyy hh24:miss", "2009-07-30 11:02:00");
+ testCast(TIMESTAMP, 10, inputOI, new HiveVarchar("969.07.30.12:00", 10),
"yyy.MM.dd.",
+ "2969-07-30 00:00:00");
+ }
+
+ private TimestampWritableV2 timestamp(String s) {
+ return new TimestampWritableV2(Timestamp.valueOf(s));
+ }
+
+ private DateWritableV2 date(String s) {
+ return new DateWritableV2(Date.valueOf(s));
+ }
+
+ private void testCast(int typeCode, ObjectInspector inputOI, Object input,
String format,
+ String expOutput) throws HiveException {
+ testCast(typeCode, 0, inputOI, input, format, expOutput);
+ }
+
+ private void testCast(int typeCode, int length, ObjectInspector inputOI,
Object input, String format,
+ String expOutput)
+ throws HiveException {
+ // initialize
+ GenericUDFCastFormat udf = new GenericUDFCastFormat();
+ ConstantObjectInspector typeCodeOI =
+
PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.getPrimitiveTypeInfo("int"), new
IntWritable(typeCode));
+ ConstantObjectInspector formatOI =
+
PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.getPrimitiveTypeInfo("string"), new Text(format));
+ ConstantObjectInspector lengthOI =
+
PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.getPrimitiveTypeInfo("int"), new
IntWritable(length));
+ ObjectInspector[] initArgs = {typeCodeOI, inputOI, formatOI, lengthOI};
+ udf.initialize(initArgs);
+
+ // evaluate
+ GenericUDF.DeferredObject typeCodeObj = new
GenericUDF.DeferredJavaObject(typeCode);
+ GenericUDF.DeferredObject inputObj = new
GenericUDF.DeferredJavaObject(input);
+ GenericUDF.DeferredObject formatObj = new
GenericUDF.DeferredJavaObject(new Text(format));
+ GenericUDF.DeferredObject lengthObj = new
GenericUDF.DeferredJavaObject(length);
+ GenericUDF.DeferredObject[] evalArgs = {typeCodeObj, inputObj, formatObj,
lengthObj};
+ Object output = udf.evaluate(evalArgs);
+ if (output == null) {
+ fail(
+ "Cast " + inputOI.getTypeName() + " \"" + input + "\" to " +
GenericUDFCastFormat.OUTPUT_TYPES
+ .get(typeCode) + " failed, output null");
+ }
+ assertEquals(
+ "Cast " + inputOI.getTypeName() + " \"" + input + "\" to " +
GenericUDFCastFormat.OUTPUT_TYPES.get(typeCode)
+ + " failed ", expOutput, output.toString());
+
+ // Try with null input
+ GenericUDF.DeferredObject[] nullArgs =
+ {typeCodeObj, new GenericUDF.DeferredJavaObject(null), formatObj,
lengthObj};
+ assertNull(udf.getFuncName() + " with NULL arguments failed",
udf.evaluate(nullArgs));
+ }
+}
diff --git
a/ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q
b/ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q
new file mode 100644
index 0000000..269edf6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q
@@ -0,0 +1,45 @@
+--non-vectorized
+set hive.vectorized.execution.enabled=false;
+set hive.fetch.task.conversion=more;
+
+create table timestamp1 (t timestamp) stored as parquet;
+insert into timestamp1 values
+("2020-02-03"),
+("1969-12-31 23:59:59.999999999")
+;
+from timestamp1 select cast (t as string format "yyyy hh24...PM ff");
+from timestamp1 select cast (t as char(11) format "yyyy hh24...PM ff"); --
will be truncated
+from timestamp1 select cast (t as varchar(11) format "yyyy hh24...PM ff"); --
will be truncated
+
+create table dates (d date) stored as parquet;
+insert into dates values
+("2020-02-03"),
+("1969-12-31")
+;
+from dates select cast (d as string format "yyyy mm dd , hh24 mi ss ff9");
+from dates select cast (d as char(10) format "yyyy mm dd , hh24 mi ss
ff9"); -- will be truncated
+from dates select cast (d as varchar(10) format "yyyy mm dd , hh24 mi ss
ff9"); -- will be truncated
+
+create table strings (s string) stored as parquet;
+create table varchars (s varchar(11)) stored as parquet;
+create table chars (s char(11)) stored as parquet;
+insert into strings values
+("20 / 2 / 3"),
+("1969 12 31")
+;
+insert into varchars select * from strings;
+insert into chars select * from strings;
+
+from strings select cast (s as timestamp format "yyyy.mm.dd");
+from strings select cast (s as date format "yyyy.mm.dd");
+from varchars select cast (s as timestamp format "yyyy.mm.dd");
+from varchars select cast (s as date format "yyyy.mm.dd");
+from chars select cast (s as timestamp format "yyyy.mm.dd");
+from chars select cast (s as date format "yyyy.mm.dd");
+
+
+--correct descriptions
+explain from strings select cast (s as timestamp format "yyy.mm.dd");
+explain from strings select cast (s as date format "yyy.mm.dd");
+explain from timestamp1 select cast (t as string format "yyyy");
+explain from timestamp1 select cast (t as varchar(12) format "yyyy");
diff --git
a/ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out
b/ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out
new file mode 100644
index 0000000..4a502b9
--- /dev/null
+++
b/ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out
@@ -0,0 +1,329 @@
+PREHOOK: query: create table timestamp1 (t timestamp) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@timestamp1
+POSTHOOK: query: create table timestamp1 (t timestamp) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@timestamp1
+PREHOOK: query: insert into timestamp1 values
+("2020-02-03"),
+("1969-12-31 23:59:59.999999999")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@timestamp1
+POSTHOOK: query: insert into timestamp1 values
+("2020-02-03"),
+("1969-12-31 23:59:59.999999999")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@timestamp1
+POSTHOOK: Lineage: timestamp1.t SCRIPT []
+PREHOOK: query: from timestamp1 select cast (t as string format "yyyy
hh24...PM ff")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamp1 select cast (t as string format "yyyy
hh24...PM ff")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+2020 00...AM 0
+1969 23...PM 999999999
+PREHOOK: query: from timestamp1 select cast (t as char(11) format "yyyy
hh24...PM ff")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamp1 select cast (t as char(11) format "yyyy
hh24...PM ff")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+2020 00...A
+1969 23...P
+PREHOOK: query: -- will be truncated
+from timestamp1 select cast (t as varchar(11) format "yyyy hh24...PM ff")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+POSTHOOK: query: -- will be truncated
+from timestamp1 select cast (t as varchar(11) format "yyyy hh24...PM ff")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+2020 00...A
+1969 23...P
+PREHOOK: query: -- will be truncated
+
+create table dates (d date) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dates
+POSTHOOK: query: -- will be truncated
+
+create table dates (d date) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dates
+PREHOOK: query: insert into dates values
+("2020-02-03"),
+("1969-12-31")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dates
+POSTHOOK: query: insert into dates values
+("2020-02-03"),
+("1969-12-31")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@dates
+POSTHOOK: Lineage: dates.d SCRIPT []
+PREHOOK: query: from dates select cast (d as string format "yyyy mm dd ,
hh24 mi ss ff9")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates
+#### A masked pattern was here ####
+POSTHOOK: query: from dates select cast (d as string format "yyyy mm dd ,
hh24 mi ss ff9")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates
+#### A masked pattern was here ####
+2020 02 03 , 00 00 00 000000000
+1969 12 31 , 00 00 00 000000000
+PREHOOK: query: from dates select cast (d as char(10) format "yyyy mm dd ,
hh24 mi ss ff9")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates
+#### A masked pattern was here ####
+POSTHOOK: query: from dates select cast (d as char(10) format "yyyy mm dd ,
hh24 mi ss ff9")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates
+#### A masked pattern was here ####
+2020 02 03
+1969 12 31
+PREHOOK: query: -- will be truncated
+from dates select cast (d as varchar(10) format "yyyy mm dd , hh24 mi ss ff9")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates
+#### A masked pattern was here ####
+POSTHOOK: query: -- will be truncated
+from dates select cast (d as varchar(10) format "yyyy mm dd , hh24 mi ss ff9")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates
+#### A masked pattern was here ####
+2020 02 03
+1969 12 31
+PREHOOK: query: -- will be truncated
+
+create table strings (s string) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@strings
+POSTHOOK: query: -- will be truncated
+
+create table strings (s string) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@strings
+PREHOOK: query: create table varchars (s varchar(11)) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@varchars
+POSTHOOK: query: create table varchars (s varchar(11)) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@varchars
+PREHOOK: query: create table chars (s char(11)) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@chars
+POSTHOOK: query: create table chars (s char(11)) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@chars
+PREHOOK: query: insert into strings values
+("20 / 2 / 3"),
+("1969 12 31")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@strings
+POSTHOOK: query: insert into strings values
+("20 / 2 / 3"),
+("1969 12 31")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@strings
+POSTHOOK: Lineage: strings.s SCRIPT []
+PREHOOK: query: insert into varchars select * from strings
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+PREHOOK: Output: default@varchars
+POSTHOOK: query: insert into varchars select * from strings
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+POSTHOOK: Output: default@varchars
+POSTHOOK: Lineage: varchars.s EXPRESSION [(strings)strings.FieldSchema(name:s,
type:string, comment:null), ]
+PREHOOK: query: insert into chars select * from strings
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+PREHOOK: Output: default@chars
+POSTHOOK: query: insert into chars select * from strings
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+POSTHOOK: Output: default@chars
+POSTHOOK: Lineage: chars.s EXPRESSION [(strings)strings.FieldSchema(name:s,
type:string, comment:null), ]
+PREHOOK: query: from strings select cast (s as timestamp format
"yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as timestamp format
"yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2020-02-03 00:00:00
+1969-12-31 00:00:00
+PREHOOK: query: from strings select cast (s as date format
"yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as date format
"yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2020-02-03
+1969-12-31
+PREHOOK: query: from varchars select cast (s as timestamp format
"yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchars
+#### A masked pattern was here ####
+POSTHOOK: query: from varchars select cast (s as timestamp format
"yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchars
+#### A masked pattern was here ####
+2020-02-03 00:00:00
+1969-12-31 00:00:00
+PREHOOK: query: from varchars select cast (s as date format
"yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchars
+#### A masked pattern was here ####
+POSTHOOK: query: from varchars select cast (s as date format
"yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchars
+#### A masked pattern was here ####
+2020-02-03
+1969-12-31
+PREHOOK: query: from chars select cast (s as timestamp format
"yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@chars
+#### A masked pattern was here ####
+POSTHOOK: query: from chars select cast (s as timestamp format
"yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@chars
+#### A masked pattern was here ####
+2020-02-03 00:00:00
+1969-12-31 00:00:00
+PREHOOK: query: from chars select cast (s as date format
"yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@chars
+#### A masked pattern was here ####
+POSTHOOK: query: from chars select cast (s as date format
"yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@chars
+#### A masked pattern was here ####
+2020-02-03
+1969-12-31
+PREHOOK: query: explain from strings select cast (s as timestamp format
"yyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: explain from strings select cast (s as timestamp format
"yyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: strings
+ Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column
stats: COMPLETE
+ Select Operator
+ expressions: CAST( s AS timestamp FORMAT 'yyy.mm.dd' ) (type:
timestamp)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column
stats: COMPLETE
+ ListSink
+
+PREHOOK: query: explain from strings select cast (s as date format
"yyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: explain from strings select cast (s as date format
"yyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: strings
+ Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column
stats: COMPLETE
+ Select Operator
+ expressions: CAST( s AS date FORMAT 'yyy.mm.dd' ) (type: date)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE
Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: explain from timestamp1 select cast (t as string format
"yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+POSTHOOK: query: explain from timestamp1 select cast (t as string format
"yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: timestamp1
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column
stats: COMPLETE
+ Select Operator
+ expressions: CAST( t AS string FORMAT 'yyyy' ) (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE
Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: explain from timestamp1 select cast (t as varchar(12) format
"yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+POSTHOOK: query: explain from timestamp1 select cast (t as varchar(12) format
"yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: timestamp1
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column
stats: COMPLETE
+ Select Operator
+ expressions: CAST( t AS varchar(12) FORMAT 'yyyy' ) (type:
varchar(12))
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE
Column stats: COMPLETE
+ ListSink
+
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out
b/ql/src/test/results/clientpositive/show_functions.q.out
index 374e9c4..84a9243 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -62,6 +62,7 @@ bucket_number
buildversion
cardinality_violation
case
+cast_format
cbrt
ceil
ceiling
@@ -349,6 +350,7 @@ POSTHOOK: query: SHOW FUNCTIONS '^c.*'
POSTHOOK: type: SHOWFUNCTIONS
cardinality_violation
case
+cast_format
cbrt
ceil
ceiling