Jackie-Jiang commented on code in PR #8779:
URL: https://github.com/apache/pinot/pull/8779#discussion_r883155571
##########
pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java:
##########
@@ -47,28 +50,75 @@ public class DateTimeFormatSpec {
public static final int MIN_FORMAT_TOKENS = 3;
public static final int MAX_FORMAT_TOKENS = 4;
+ public static final int FORMAT_TIMEFORMAT_POSITION_PIPE = 0;
+ public static final int FORMAT_PATTERN_POSITION_PIPE = 1;
+ public static final int FORMAT_UNIT_POSITION_PIPE = 1;
+ public static final int FORMAT_SIZE_POSITION_PIPE = 2;
+ public static final int FORMAT_TIMEZONE_POSITION_PIPE = 2;
+ public static final int MIN_FORMAT_TOKENS_PIPE = 1;
+ public static final int MAX_FORMAT_TOKENS_PIPE = 3;
+
private final String _format;
private final int _size;
private final DateTimeFormatUnitSpec _unitSpec;
private final DateTimeFormatPatternSpec _patternSpec;
public DateTimeFormatSpec(String format) {
_format = format;
- validateFormat(format);
- String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR,
MAX_FORMAT_TOKENS);
- if (formatTokens.length == MAX_FORMAT_TOKENS) {
- _patternSpec = new
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION],
- formatTokens[FORMAT_PATTERN_POSITION]);
- } else {
- _patternSpec = new
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]);
- }
- if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) {
- // TIMESTAMP type stores millis since epoch
- _size = 1;
- _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS");
+ if (_format.matches(COLON_REGEX)) {
+ validateFormat(format);
Review Comment:
Not introduced in this PR, but suggest to first do the split, then validate
on the split parts to avoid splitting twice. Or even better, perform the
validation alone with the value processing to avoid all unnecessary overhead
##########
pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java:
##########
@@ -47,28 +50,75 @@ public class DateTimeFormatSpec {
public static final int MIN_FORMAT_TOKENS = 3;
public static final int MAX_FORMAT_TOKENS = 4;
+ public static final int FORMAT_TIMEFORMAT_POSITION_PIPE = 0;
+ public static final int FORMAT_PATTERN_POSITION_PIPE = 1;
+ public static final int FORMAT_UNIT_POSITION_PIPE = 1;
+ public static final int FORMAT_SIZE_POSITION_PIPE = 2;
+ public static final int FORMAT_TIMEZONE_POSITION_PIPE = 2;
+ public static final int MIN_FORMAT_TOKENS_PIPE = 1;
+ public static final int MAX_FORMAT_TOKENS_PIPE = 3;
+
private final String _format;
private final int _size;
private final DateTimeFormatUnitSpec _unitSpec;
private final DateTimeFormatPatternSpec _patternSpec;
public DateTimeFormatSpec(String format) {
_format = format;
- validateFormat(format);
- String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR,
MAX_FORMAT_TOKENS);
- if (formatTokens.length == MAX_FORMAT_TOKENS) {
- _patternSpec = new
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION],
- formatTokens[FORMAT_PATTERN_POSITION]);
- } else {
- _patternSpec = new
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]);
- }
- if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) {
- // TIMESTAMP type stores millis since epoch
- _size = 1;
- _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS");
+ if (_format.matches(COLON_REGEX)) {
+ validateFormat(format);
+ String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR,
MAX_FORMAT_TOKENS);
+ if (formatTokens.length == MAX_FORMAT_TOKENS) {
+ _patternSpec = new
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION],
+ formatTokens[FORMAT_PATTERN_POSITION]);
+ } else {
+ _patternSpec = new
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]);
+ }
+ if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) {
+ // TIMESTAMP type stores millis since epoch
+ _size = 1;
+ _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS");
+ } else {
+ _size = Integer.parseInt(formatTokens[FORMAT_SIZE_POSITION]);
+ _unitSpec = new
DateTimeFormatUnitSpec(formatTokens[FORMAT_UNIT_POSITION]);
+ }
} else {
- _size = Integer.parseInt(formatTokens[FORMAT_SIZE_POSITION]);
- _unitSpec = new
DateTimeFormatUnitSpec(formatTokens[FORMAT_UNIT_POSITION]);
+ validatePipeFormat(format);
+ String[] formatTokens = StringUtils.split(format, PIPE_SEPARATOR,
MAX_FORMAT_TOKENS_PIPE);
+ if (formatTokens.length == MAX_FORMAT_TOKENS_PIPE) { //date with tz or
epoch with time-size
Review Comment:
I'd suggest branching based on the first token, instead of the length. The
logic would be much easier to understand that way.
##########
pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java:
##########
@@ -47,28 +50,75 @@ public class DateTimeFormatSpec {
public static final int MIN_FORMAT_TOKENS = 3;
public static final int MAX_FORMAT_TOKENS = 4;
+ public static final int FORMAT_TIMEFORMAT_POSITION_PIPE = 0;
+ public static final int FORMAT_PATTERN_POSITION_PIPE = 1;
+ public static final int FORMAT_UNIT_POSITION_PIPE = 1;
+ public static final int FORMAT_SIZE_POSITION_PIPE = 2;
+ public static final int FORMAT_TIMEZONE_POSITION_PIPE = 2;
+ public static final int MIN_FORMAT_TOKENS_PIPE = 1;
+ public static final int MAX_FORMAT_TOKENS_PIPE = 3;
+
private final String _format;
private final int _size;
private final DateTimeFormatUnitSpec _unitSpec;
private final DateTimeFormatPatternSpec _patternSpec;
public DateTimeFormatSpec(String format) {
_format = format;
- validateFormat(format);
- String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR,
MAX_FORMAT_TOKENS);
- if (formatTokens.length == MAX_FORMAT_TOKENS) {
- _patternSpec = new
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION],
- formatTokens[FORMAT_PATTERN_POSITION]);
- } else {
- _patternSpec = new
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]);
- }
- if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) {
- // TIMESTAMP type stores millis since epoch
- _size = 1;
- _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS");
+ if (_format.matches(COLON_REGEX)) {
Review Comment:
Let's not use regex to determine the format because it is too expensive. The
simplest way I can think of is to check if the first character is a digit
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]