This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 273e0ce0766 Safe Format Change in Apache Pinot: From Colon to Pipe
Notation (#17079)
273e0ce0766 is described below
commit 273e0ce076623bf88958b3fdf10f73fc0e78d433
Author: Akanksha kedia <[email protected]>
AuthorDate: Wed Nov 5 01:25:00 2025 +0530
Safe Format Change in Apache Pinot: From Colon to Pipe Notation (#17079)
---
.../pinot/plugin/inputformat/avro/AvroUtils.java | 4 +--
.../plugin/inputformat/avro/AvroUtilsTest.java | 34 ++++++++++++----------
.../inputformat/protobuf/ProtoBufSchemaUtils.java | 4 +--
.../protobuf/ProtoBufSchemaUtilsTest.java | 2 +-
.../src/test/resources/complex_type_schema.json | 2 +-
.../java/org/apache/pinot/spi/utils/JsonUtils.java | 4 +--
.../org/apache/pinot/spi/utils/JsonUtilsTest.java | 8 ++---
7 files changed, 28 insertions(+), 30 deletions(-)
diff --git
a/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java
b/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java
index 4f2c83b9ee2..8eefe8a5a06 100644
---
a/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java
+++
b/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java
@@ -390,9 +390,7 @@ public class AvroUtils {
case DATE_TIME:
Preconditions.checkState(isSingleValueField, "Time field: %s cannot
be multi-valued", name);
Preconditions.checkNotNull(timeUnit, "Time unit cannot be null");
- // TODO: Switch to new format after releasing 0.11.0
- // "EPOCH|" + timeUnit.name()
- String format = "1:" + timeUnit.name() + ":EPOCH";
+ String format = "EPOCH|" + timeUnit.name();
String granularity = "1:" + timeUnit.name();
pinotSchema.addField(new DateTimeFieldSpec(name, dataType, format,
granularity));
break;
diff --git
a/pinot-plugins/pinot-input-format/pinot-avro-base/src/test/java/org/apache/pinot/plugin/inputformat/avro/AvroUtilsTest.java
b/pinot-plugins/pinot-input-format/pinot-avro-base/src/test/java/org/apache/pinot/plugin/inputformat/avro/AvroUtilsTest.java
index f4e31a8e743..5af642aca6b 100644
---
a/pinot-plugins/pinot-input-format/pinot-avro-base/src/test/java/org/apache/pinot/plugin/inputformat/avro/AvroUtilsTest.java
+++
b/pinot-plugins/pinot-input-format/pinot-avro-base/src/test/java/org/apache/pinot/plugin/inputformat/avro/AvroUtilsTest.java
@@ -67,7 +67,7 @@ public class AvroUtilsTest {
Schema expectedSchema = new
Schema.SchemaBuilder().addSingleValueDimension("d1", DataType.STRING)
.addSingleValueDimension("d2",
DataType.LONG).addSingleValueDimension("d3", DataType.STRING)
.addMetric("m1", DataType.INT).addMetric("m2", DataType.INT)
- .addDateTime("hoursSinceEpoch", DataType.LONG, "1:HOURS:EPOCH",
"1:HOURS").build();
+ .addDateTime("hoursSinceEpoch", DataType.LONG, "EPOCH|HOURS",
"1:HOURS").build();
assertEquals(inferredPinotSchema, expectedSchema);
}
@@ -83,11 +83,12 @@ public class AvroUtilsTest {
AvroUtils.getPinotSchemaFromAvroSchemaWithComplexTypeHandling(avroSchema,
fieldSpecMap, TimeUnit.HOURS,
new ArrayList<>(), ".",
ComplexTypeConfig.CollectionNotUnnestedToJson.NON_PRIMITIVE);
Schema expectedSchema =
- new Schema.SchemaBuilder().addSingleValueDimension("d1",
DataType.STRING).addMetric("m1", DataType.INT)
+ new Schema.SchemaBuilder().addSingleValueDimension("entries",
DataType.STRING)
.addSingleValueDimension("tuple.streetaddress", DataType.STRING)
- .addSingleValueDimension("tuple.city",
DataType.STRING).addSingleValueDimension("entries", DataType.STRING)
+ .addSingleValueDimension("tuple.city", DataType.STRING)
.addMultiValueDimension("d2", DataType.INT)
- .addDateTime("hoursSinceEpoch", DataType.LONG, "1:HOURS:EPOCH",
"1:HOURS").build();
+ .addSingleValueDimension("d1", DataType.STRING).addMetric("m1",
DataType.INT)
+ .addDateTime("hoursSinceEpoch", DataType.LONG, "EPOCH|HOURS",
"1:HOURS").build();
assertEquals(inferredPinotSchema, expectedSchema);
// unnest collection entries
@@ -95,11 +96,12 @@ public class AvroUtilsTest {
AvroUtils.getPinotSchemaFromAvroSchemaWithComplexTypeHandling(avroSchema,
fieldSpecMap, TimeUnit.HOURS,
Lists.newArrayList("entries"), ".",
ComplexTypeConfig.CollectionNotUnnestedToJson.NON_PRIMITIVE);
expectedSchema =
- new Schema.SchemaBuilder().addSingleValueDimension("d1",
DataType.STRING).addMetric("m1", DataType.INT)
+ new Schema.SchemaBuilder().addSingleValueDimension("entries.id",
DataType.LONG)
+ .addSingleValueDimension("entries.description", DataType.STRING)
.addSingleValueDimension("tuple.streetaddress", DataType.STRING)
- .addSingleValueDimension("tuple.city",
DataType.STRING).addSingleValueDimension("entries.id", DataType.LONG)
- .addSingleValueDimension("entries.description",
DataType.STRING).addMultiValueDimension("d2", DataType.INT)
- .addDateTime("hoursSinceEpoch", DataType.LONG, "1:HOURS:EPOCH",
"1:HOURS").build();
+ .addSingleValueDimension("tuple.city",
DataType.STRING).addMultiValueDimension("d2", DataType.INT)
+ .addSingleValueDimension("d1", DataType.STRING).addMetric("m1",
DataType.INT)
+ .addDateTime("hoursSinceEpoch", DataType.LONG, "EPOCH|HOURS",
"1:HOURS").build();
assertEquals(inferredPinotSchema, expectedSchema);
// change delimiter
@@ -107,11 +109,12 @@ public class AvroUtilsTest {
AvroUtils.getPinotSchemaFromAvroSchemaWithComplexTypeHandling(avroSchema,
fieldSpecMap, TimeUnit.HOURS,
Lists.newArrayList(), "_",
ComplexTypeConfig.CollectionNotUnnestedToJson.NON_PRIMITIVE);
expectedSchema =
- new Schema.SchemaBuilder().addSingleValueDimension("d1",
DataType.STRING).addMetric("m1", DataType.INT)
+ new Schema.SchemaBuilder().addSingleValueDimension("entries",
DataType.STRING)
.addSingleValueDimension("tuple_streetaddress", DataType.STRING)
- .addSingleValueDimension("tuple_city",
DataType.STRING).addSingleValueDimension("entries", DataType.STRING)
+ .addSingleValueDimension("tuple_city", DataType.STRING)
.addMultiValueDimension("d2", DataType.INT)
- .addDateTime("hoursSinceEpoch", DataType.LONG, "1:HOURS:EPOCH",
"1:HOURS").build();
+ .addSingleValueDimension("d1", DataType.STRING).addMetric("m1",
DataType.INT)
+ .addDateTime("hoursSinceEpoch", DataType.LONG, "EPOCH|HOURS",
"1:HOURS").build();
assertEquals(inferredPinotSchema, expectedSchema);
// change the handling of collection-to-json option, d2 will become string
@@ -119,12 +122,13 @@ public class AvroUtilsTest {
AvroUtils.getPinotSchemaFromAvroSchemaWithComplexTypeHandling(avroSchema,
fieldSpecMap, TimeUnit.HOURS,
Lists.newArrayList("entries"), ".",
ComplexTypeConfig.CollectionNotUnnestedToJson.ALL);
expectedSchema =
- new Schema.SchemaBuilder().addSingleValueDimension("d1",
DataType.STRING).addMetric("m1", DataType.INT)
- .addSingleValueDimension("tuple.streetaddress", DataType.STRING)
- .addSingleValueDimension("tuple.city",
DataType.STRING).addSingleValueDimension("entries.id", DataType.LONG)
+ new Schema.SchemaBuilder().addSingleValueDimension("entries.id",
DataType.LONG)
.addSingleValueDimension("entries.description", DataType.STRING)
+ .addSingleValueDimension("tuple.streetaddress", DataType.STRING)
+ .addSingleValueDimension("tuple.city", DataType.STRING)
.addSingleValueDimension("d2", DataType.STRING)
- .addDateTime("hoursSinceEpoch", DataType.LONG, "1:HOURS:EPOCH",
"1:HOURS").build();
+ .addSingleValueDimension("d1", DataType.STRING).addMetric("m1",
DataType.INT)
+ .addDateTime("hoursSinceEpoch", DataType.LONG, "EPOCH|HOURS",
"1:HOURS").build();
assertEquals(inferredPinotSchema, expectedSchema);
}
}
diff --git
a/pinot-plugins/pinot-input-format/pinot-protobuf/src/main/java/org/apache/pinot/plugin/inputformat/protobuf/ProtoBufSchemaUtils.java
b/pinot-plugins/pinot-input-format/pinot-protobuf/src/main/java/org/apache/pinot/plugin/inputformat/protobuf/ProtoBufSchemaUtils.java
index 95af8ee26c5..7112fd2fd7e 100644
---
a/pinot-plugins/pinot-input-format/pinot-protobuf/src/main/java/org/apache/pinot/plugin/inputformat/protobuf/ProtoBufSchemaUtils.java
+++
b/pinot-plugins/pinot-input-format/pinot-protobuf/src/main/java/org/apache/pinot/plugin/inputformat/protobuf/ProtoBufSchemaUtils.java
@@ -166,9 +166,7 @@ public class ProtoBufSchemaUtils {
case DATE_TIME:
Preconditions.checkState(isSingleValueField, "Time field: %s cannot
be multi-valued", name);
Preconditions.checkNotNull(timeUnit, "Time unit cannot be null");
- // TODO: Switch to new format after releasing 0.11.0
- // "EPOCH|" + timeUnit.name()
- String format = "1:" + timeUnit.name() + ":EPOCH";
+ String format = "EPOCH|" + timeUnit.name();
String granularity = "1:" + timeUnit.name();
pinotSchema.addField(new DateTimeFieldSpec(name, dataType, format,
granularity));
break;
diff --git
a/pinot-plugins/pinot-input-format/pinot-protobuf/src/test/java/org/apache/pinot/plugin/inputformat/protobuf/ProtoBufSchemaUtilsTest.java
b/pinot-plugins/pinot-input-format/pinot-protobuf/src/test/java/org/apache/pinot/plugin/inputformat/protobuf/ProtoBufSchemaUtilsTest.java
index 518a20c67df..38ec4a79d1d 100644
---
a/pinot-plugins/pinot-input-format/pinot-protobuf/src/test/java/org/apache/pinot/plugin/inputformat/protobuf/ProtoBufSchemaUtilsTest.java
+++
b/pinot-plugins/pinot-input-format/pinot-protobuf/src/test/java/org/apache/pinot/plugin/inputformat/protobuf/ProtoBufSchemaUtilsTest.java
@@ -197,7 +197,7 @@ public class ProtoBufSchemaUtilsTest {
".");
FieldSpec fieldSpec = schema.getFieldSpecFor("test_message.long_field");
FieldSpec expectedFieldSpec = new
DateTimeFieldSpec("test_message.long_field", FieldSpec.DataType.LONG,
- "1:SECONDS:EPOCH", "1:SECONDS");
+ "EPOCH|SECONDS", "1:SECONDS");
assertEquals(fieldSpec, expectedFieldSpec);
fieldSpec = schema.getFieldSpecFor("test_message.int_field");
diff --git
a/pinot-plugins/pinot-input-format/pinot-protobuf/src/test/resources/complex_type_schema.json
b/pinot-plugins/pinot-input-format/pinot-protobuf/src/test/resources/complex_type_schema.json
index 7569c4a1295..6b31334376c 100644
---
a/pinot-plugins/pinot-input-format/pinot-protobuf/src/test/resources/complex_type_schema.json
+++
b/pinot-plugins/pinot-input-format/pinot-protobuf/src/test/resources/complex_type_schema.json
@@ -135,7 +135,7 @@
"name" : "test_message.long_field",
"dataType" : "LONG",
"notNull" : false,
- "format" : "1:MILLISECONDS:EPOCH",
+ "format" : "EPOCH|MILLISECONDS",
"granularity" : "1:MILLISECONDS"
} ]
}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
index 523bf337abb..46d5607406c 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
@@ -722,9 +722,7 @@ public class JsonUtils {
case DATE_TIME:
Preconditions.checkState(isSingleValueField, "Time field: %s cannot
be multi-valued", name);
Preconditions.checkNotNull(timeUnit, "Time unit cannot be null");
- // TODO: Switch to new format after releasing 0.11.0
- // "EPOCH|" + timeUnit.name()
- String format = "1:" + timeUnit.name() + ":EPOCH";
+ String format = "EPOCH|" + timeUnit.name();
String granularity = "1:" + timeUnit.name();
pinotSchema.addField(new DateTimeFieldSpec(name, dataType, format,
granularity));
break;
diff --git
a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java
b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java
index 2fa9171b6de..bc975adff9d 100644
--- a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java
+++ b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java
@@ -587,7 +587,7 @@ public class JsonUtilsTest {
.addSingleValueDimension("tuple.address.city",
FieldSpec.DataType.STRING)
.addSingleValueDimension("entries", FieldSpec.DataType.STRING)
.addMultiValueDimension("d2", FieldSpec.DataType.INT)
- .addDateTime("hoursSinceEpoch", FieldSpec.DataType.INT,
"1:HOURS:EPOCH", "1:HOURS").build();
+ .addDateTime("hoursSinceEpoch", FieldSpec.DataType.INT, "EPOCH|HOURS",
"1:HOURS").build();
Assert.assertEquals(inferredPinotSchema, expectedSchema);
// unnest collection entries
@@ -601,7 +601,7 @@ public class JsonUtilsTest {
.addSingleValueDimension("entries.id", FieldSpec.DataType.INT)
.addSingleValueDimension("entries.description",
FieldSpec.DataType.STRING)
.addMultiValueDimension("d2", FieldSpec.DataType.INT)
- .addDateTime("hoursSinceEpoch", FieldSpec.DataType.INT,
"1:HOURS:EPOCH", "1:HOURS").build();
+ .addDateTime("hoursSinceEpoch", FieldSpec.DataType.INT, "EPOCH|HOURS",
"1:HOURS").build();
Assert.assertEquals(inferredPinotSchema, expectedSchema);
// change delimiter
@@ -614,7 +614,7 @@ public class JsonUtilsTest {
.addSingleValueDimension("tuple_address_city",
FieldSpec.DataType.STRING)
.addSingleValueDimension("entries", FieldSpec.DataType.STRING)
.addMultiValueDimension("d2", FieldSpec.DataType.INT)
- .addDateTime("hoursSinceEpoch", FieldSpec.DataType.INT,
"1:HOURS:EPOCH", "1:HOURS").build();
+ .addDateTime("hoursSinceEpoch", FieldSpec.DataType.INT, "EPOCH|HOURS",
"1:HOURS").build();
Assert.assertEquals(inferredPinotSchema, expectedSchema);
// change the handling of collection-to-json option, d2 will become string
@@ -628,7 +628,7 @@ public class JsonUtilsTest {
.addSingleValueDimension("entries.id", FieldSpec.DataType.INT)
.addSingleValueDimension("entries.description",
FieldSpec.DataType.STRING)
.addSingleValueDimension("d2", FieldSpec.DataType.STRING)
- .addDateTime("hoursSinceEpoch", FieldSpec.DataType.INT,
"1:HOURS:EPOCH", "1:HOURS").build();
+ .addDateTime("hoursSinceEpoch", FieldSpec.DataType.INT, "EPOCH|HOURS",
"1:HOURS").build();
Assert.assertEquals(inferredPinotSchema, expectedSchema);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]