This is an automated email from the ASF dual-hosted git repository.
raghavyadav01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new ff3c92fab5f OPEN_STRUCT — SPI utilities, index registration, stats
constructors (PR 2a/4) (#18710)
ff3c92fab5f is described below
commit ff3c92fab5f7e24e9e225f9ea66d58db346ee9e6
Author: tarun11Mavani <[email protected]>
AuthorDate: Fri Jun 19 00:24:41 2026 +0530
OPEN_STRUCT — SPI utilities, index registration, stats constructors (PR
2a/4) (#18710)
* feat(open_struct): OPEN_STRUCT infrastructure — SPI utilities, index
registration, stats constructors
Split from PR #18643 (storage layer). This PR adds the foundational pieces
that the ingestion/load pipeline (PR 2b) builds on:
- OpenStructTypeInference: value→DataType inference for OPEN_STRUCT keys
- OpenStructNaming: isMaterializedOpenStructColumn, parseParentColumn,
parseKey helpers
- OpenStructSupportedIndexes: vetted allowlist (forward, dictionary,
inverted, range, bloom)
- OpenStructIndexType + OpenStructIndexPlugin: index SPI registration,
per-key index validation
- FieldIndexConfigsUtil.fromFieldConfig: per-FieldConfig index config
builder (no TableConfig required)
- V1Constants: PARENT_COLUMN, HAS_SPARSE_COLUMN metadata keys
- OpenStructDataSource: default getMapValue(docId) SPI method
- Stats collector FieldSpec constructors: AbstractColumnStatisticsCollector
+ 7 scalar collectors
- StatsCollectorUtil: OPEN_STRUCT cases + createStatsCollector(FieldSpec,
FieldConfig) overload
- ForwardIndexType: skip forward index creation for OPEN_STRUCT parents
- PinotDataType: OPEN_STRUCT→MAP ingestion coercion
- TableConfigUtils: reject $ in column names when OPEN_STRUCT is present
Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
* fix(open_struct): return null from createMutableIndex instead of throwing
MutableSegmentImpl iterates all registered IndexTypes and calls
createMutableIndex for each column. The OPEN_STRUCT IndexType was
throwing UnsupportedOperationException, which crashed segment creation
for every realtime table — even those without OPEN_STRUCT columns.
The SPI contract (IndexType.createMutableIndex) specifies that
implementations should return null when the index type doesn't support
mutable indexes. Follow that contract.
Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
* fix(open_struct): guard against NPE in createIndexCreator
shouldCreateIndex() returned true for OPEN_STRUCT columns but
createIndexCreator() returned null, which would NPE on the immutable
segment path. Return false until the real creator lands in PR 2b,
and throw UnsupportedOperationException as a fail-fast guard.
Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
* fix(open_struct): reject '$' in OPEN_STRUCT parent column names
A parent column named 'a$b' would pass the previous check (OPEN_STRUCT
columns were exempt) but produce children 'a$b$key'. OpenStructNaming
splits on the first '$', so parseParentColumn('a$b$key') returns 'a'
instead of 'a$b', corrupting the name mapping at segment load time.
Remove the DataType.OPEN_STRUCT exemption so the '$' guard applies to
all columns — regular and OPEN_STRUCT parents alike.
Adds testValidateOpenStructSeparatorInColumnNames covering: valid schema,
'$' in a non-OPEN_STRUCT column, and '$' in an OPEN_STRUCT parent name.
---------
Co-authored-by: Claude Opus 4.6 (1M context) <[email protected]>
---
.../stats/AbstractColumnStatisticsCollector.java | 29 +++-
.../BigDecimalColumnPreIndexStatsCollector.java | 9 +
.../stats/BytesColumnPreIndexStatsCollector.java | 9 +
.../stats/DoubleColumnPreIndexStatsCollector.java | 9 +
.../stats/FloatColumnPreIndexStatsCollector.java | 9 +
.../stats/IntColumnPreIndexStatsCollector.java | 9 +
.../stats/LongColumnPreIndexStatsCollector.java | 9 +
.../creator/impl/stats/StatsCollectorUtil.java | 33 +++-
.../stats/StringColumnPreIndexStatsCollector.java | 10 ++
.../segment/index/forward/ForwardIndexType.java | 5 +
.../index/openstruct/OpenStructIndexPlugin.java | 23 ++-
.../index/openstruct/OpenStructIndexType.java | 183 ++++++++++++++++++++
.../openstruct/OpenStructSupportedIndexes.java | 38 +++++
.../segment/local/utils/TableConfigUtils.java | 18 ++
.../stats/StatsCollectorUtilFieldSpecTest.java | 54 ++++++
.../index/openstruct/OpenStructIndexTypeTest.java | 86 ++++++++++
.../segment/local/utils/TableConfigUtilsTest.java | 38 +++++
.../org/apache/pinot/segment/spi/V1Constants.java | 3 +
.../spi/datasource/OpenStructDataSource.java | 10 ++
.../segment/spi/index/FieldIndexConfigsUtil.java | 46 +++++
.../spi/index/FieldIndexConfigsUtilTest.java | 189 +++++++++++++++++++++
.../apache/pinot/spi/data/OpenStructNaming.java | 37 ++++
.../pinot/spi/data/OpenStructTypeInference.java | 68 ++++++++
.../org/apache/pinot/spi/utils/PinotDataType.java | 5 +
.../pinot/spi/data/OpenStructNamingTest.java | 40 ++++-
.../spi/data/OpenStructTypeInferenceTest.java | 66 +++++++
26 files changed, 1013 insertions(+), 22 deletions(-)
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/AbstractColumnStatisticsCollector.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/AbstractColumnStatisticsCollector.java
index e548248dea1..9bf8f7f3d48 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/AbstractColumnStatisticsCollector.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/AbstractColumnStatisticsCollector.java
@@ -56,13 +56,28 @@ public abstract class AbstractColumnStatisticsCollector
implements ColumnStatist
protected Comparable _previousValue = null;
public AbstractColumnStatisticsCollector(String column, StatsCollectorConfig
statsCollectorConfig) {
- _fieldSpec = statsCollectorConfig.getFieldSpecForColumn(column);
- Preconditions.checkArgument(_fieldSpec != null, "Failed to find column:
%s", column);
- _storedType = _fieldSpec.getDataType().getStoredType();
- _sorted = _fieldSpec.isSingleValueField();
- _fieldConfig = statsCollectorConfig.getFieldConfigForColumn(column);
- _partitionFunction = statsCollectorConfig.getPartitionFunction(column);
- _partitions = _partitionFunction != null ? new HashSet<>() : null;
+ this(getRequiredFieldSpec(column, statsCollectorConfig),
+ statsCollectorConfig.getFieldConfigForColumn(column),
+ statsCollectorConfig.getPartitionFunction(column));
+ }
+
+ /// Constructs a collector directly from a [FieldSpec], without a
[StatsCollectorConfig]. Lets callers
+ /// that operate outside schema-driven segment generation (e.g. OPEN_STRUCT
materialized child columns,
+ /// whose synthetic columns exist in no schema) reuse the standard stats
collectors.
+ public AbstractColumnStatisticsCollector(FieldSpec fieldSpec, @Nullable
FieldConfig fieldConfig,
+ @Nullable PartitionFunction partitionFunction) {
+ _fieldSpec = fieldSpec;
+ _storedType = fieldSpec.getDataType().getStoredType();
+ _sorted = fieldSpec.isSingleValueField();
+ _fieldConfig = fieldConfig;
+ _partitionFunction = partitionFunction;
+ _partitions = partitionFunction != null ? new HashSet<>() : null;
+ }
+
+ private static FieldSpec getRequiredFieldSpec(String column,
StatsCollectorConfig statsCollectorConfig) {
+ FieldSpec fieldSpec = statsCollectorConfig.getFieldSpecForColumn(column);
+ Preconditions.checkArgument(fieldSpec != null, "Failed to find column:
%s", column);
+ return fieldSpec;
}
/**
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BigDecimalColumnPreIndexStatsCollector.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BigDecimalColumnPreIndexStatsCollector.java
index dd0dcfe00b9..926611d2f7f 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BigDecimalColumnPreIndexStatsCollector.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BigDecimalColumnPreIndexStatsCollector.java
@@ -21,7 +21,11 @@ package
org.apache.pinot.segment.local.segment.creator.impl.stats;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import java.math.BigDecimal;
import java.util.Arrays;
+import javax.annotation.Nullable;
import org.apache.pinot.segment.spi.creator.StatsCollectorConfig;
+import org.apache.pinot.segment.spi.partition.PartitionFunction;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.utils.BigDecimalUtils;
@@ -40,6 +44,11 @@ public class BigDecimalColumnPreIndexStatsCollector extends
AbstractColumnStatis
super(column, statsCollectorConfig);
}
+ public BigDecimalColumnPreIndexStatsCollector(FieldSpec fieldSpec, @Nullable
FieldConfig fieldConfig,
+ @Nullable PartitionFunction partitionFunction) {
+ super(fieldSpec, fieldConfig, partitionFunction);
+ }
+
@Override
public void collect(Object entry) {
assert !_sealed;
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BytesColumnPreIndexStatsCollector.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BytesColumnPreIndexStatsCollector.java
index 371e5bbf51d..169d4d61f44 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BytesColumnPreIndexStatsCollector.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BytesColumnPreIndexStatsCollector.java
@@ -21,7 +21,11 @@ package
org.apache.pinot.segment.local.segment.creator.impl.stats;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import java.util.Arrays;
import java.util.Set;
+import javax.annotation.Nullable;
import org.apache.pinot.segment.spi.creator.StatsCollectorConfig;
+import org.apache.pinot.segment.spi.partition.PartitionFunction;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.utils.ByteArray;
@@ -40,6 +44,11 @@ public class BytesColumnPreIndexStatsCollector extends
AbstractColumnStatisticsC
super(column, statsCollectorConfig);
}
+ public BytesColumnPreIndexStatsCollector(FieldSpec fieldSpec, @Nullable
FieldConfig fieldConfig,
+ @Nullable PartitionFunction partitionFunction) {
+ super(fieldSpec, fieldConfig, partitionFunction);
+ }
+
@Override
public void collect(Object entry) {
assert !_sealed;
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/DoubleColumnPreIndexStatsCollector.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/DoubleColumnPreIndexStatsCollector.java
index 8024460c004..094a4cd6240 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/DoubleColumnPreIndexStatsCollector.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/DoubleColumnPreIndexStatsCollector.java
@@ -21,7 +21,11 @@ package
org.apache.pinot.segment.local.segment.creator.impl.stats;
import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet;
import it.unimi.dsi.fastutil.doubles.DoubleSet;
import java.util.Arrays;
+import javax.annotation.Nullable;
import org.apache.pinot.segment.spi.creator.StatsCollectorConfig;
+import org.apache.pinot.segment.spi.partition.PartitionFunction;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.data.FieldSpec;
public class DoubleColumnPreIndexStatsCollector extends
AbstractColumnStatisticsCollector {
@@ -34,6 +38,11 @@ public class DoubleColumnPreIndexStatsCollector extends
AbstractColumnStatistics
super(column, statsCollectorConfig);
}
+ public DoubleColumnPreIndexStatsCollector(FieldSpec fieldSpec, @Nullable
FieldConfig fieldConfig,
+ @Nullable PartitionFunction partitionFunction) {
+ super(fieldSpec, fieldConfig, partitionFunction);
+ }
+
@Override
public void collect(Object entry) {
assert !_sealed;
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/FloatColumnPreIndexStatsCollector.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/FloatColumnPreIndexStatsCollector.java
index e26f1529335..a5b2604f53b 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/FloatColumnPreIndexStatsCollector.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/FloatColumnPreIndexStatsCollector.java
@@ -21,7 +21,11 @@ package
org.apache.pinot.segment.local.segment.creator.impl.stats;
import it.unimi.dsi.fastutil.floats.FloatOpenHashSet;
import it.unimi.dsi.fastutil.floats.FloatSet;
import java.util.Arrays;
+import javax.annotation.Nullable;
import org.apache.pinot.segment.spi.creator.StatsCollectorConfig;
+import org.apache.pinot.segment.spi.partition.PartitionFunction;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.data.FieldSpec;
public class FloatColumnPreIndexStatsCollector extends
AbstractColumnStatisticsCollector {
@@ -34,6 +38,11 @@ public class FloatColumnPreIndexStatsCollector extends
AbstractColumnStatisticsC
super(column, statsCollectorConfig);
}
+ public FloatColumnPreIndexStatsCollector(FieldSpec fieldSpec, @Nullable
FieldConfig fieldConfig,
+ @Nullable PartitionFunction partitionFunction) {
+ super(fieldSpec, fieldConfig, partitionFunction);
+ }
+
@Override
public void collect(Object entry) {
assert !_sealed;
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/IntColumnPreIndexStatsCollector.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/IntColumnPreIndexStatsCollector.java
index 249bf894c55..1c10b708973 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/IntColumnPreIndexStatsCollector.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/IntColumnPreIndexStatsCollector.java
@@ -21,7 +21,11 @@ package
org.apache.pinot.segment.local.segment.creator.impl.stats;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.ints.IntSet;
import java.util.Arrays;
+import javax.annotation.Nullable;
import org.apache.pinot.segment.spi.creator.StatsCollectorConfig;
+import org.apache.pinot.segment.spi.partition.PartitionFunction;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.data.FieldSpec;
public class IntColumnPreIndexStatsCollector extends
AbstractColumnStatisticsCollector {
@@ -34,6 +38,11 @@ public class IntColumnPreIndexStatsCollector extends
AbstractColumnStatisticsCol
super(column, statsCollectorConfig);
}
+ public IntColumnPreIndexStatsCollector(FieldSpec fieldSpec, @Nullable
FieldConfig fieldConfig,
+ @Nullable PartitionFunction partitionFunction) {
+ super(fieldSpec, fieldConfig, partitionFunction);
+ }
+
@Override
public void collect(Object entry) {
assert !_sealed;
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/LongColumnPreIndexStatsCollector.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/LongColumnPreIndexStatsCollector.java
index c8d98f1d5d5..f19d9438888 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/LongColumnPreIndexStatsCollector.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/LongColumnPreIndexStatsCollector.java
@@ -21,7 +21,11 @@ package
org.apache.pinot.segment.local.segment.creator.impl.stats;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;
import java.util.Arrays;
+import javax.annotation.Nullable;
import org.apache.pinot.segment.spi.creator.StatsCollectorConfig;
+import org.apache.pinot.segment.spi.partition.PartitionFunction;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.data.FieldSpec;
public class LongColumnPreIndexStatsCollector extends
AbstractColumnStatisticsCollector {
@@ -34,6 +38,11 @@ public class LongColumnPreIndexStatsCollector extends
AbstractColumnStatisticsCo
super(column, statsCollectorConfig);
}
+ public LongColumnPreIndexStatsCollector(FieldSpec fieldSpec, @Nullable
FieldConfig fieldConfig,
+ @Nullable PartitionFunction partitionFunction) {
+ super(fieldSpec, fieldConfig, partitionFunction);
+ }
+
@Override
public void collect(Object entry) {
assert !_sealed;
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/StatsCollectorUtil.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/StatsCollectorUtil.java
index 43a6f8d0fec..f94e4cc29b0 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/StatsCollectorUtil.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/StatsCollectorUtil.java
@@ -18,10 +18,12 @@
*/
package org.apache.pinot.segment.local.segment.creator.impl.stats;
+import javax.annotation.Nullable;
import org.apache.pinot.segment.local.utils.ClusterConfigForTable;
import org.apache.pinot.segment.spi.creator.StatsCollectorConfig;
import org.apache.pinot.segment.spi.index.FieldIndexConfigs;
import org.apache.pinot.segment.spi.index.StandardIndexes;
+import org.apache.pinot.spi.config.table.FieldConfig;
import org.apache.pinot.spi.data.FieldSpec;
@@ -43,7 +45,9 @@ public final class StatsCollectorUtil {
public static AbstractColumnStatisticsCollector createStatsCollector(String
columnName, FieldSpec fieldSpec,
FieldIndexConfigs indexConfig, StatsCollectorConfig
statsCollectorConfig) {
boolean dictionaryEnabled =
indexConfig.getConfig(StandardIndexes.dictionary()).isEnabled();
- if (!dictionaryEnabled && fieldSpec.getDataType().getStoredType() !=
FieldSpec.DataType.MAP) {
+ FieldSpec.DataType storedType = fieldSpec.getDataType().getStoredType();
+ if (!dictionaryEnabled && storedType != FieldSpec.DataType.MAP
+ && storedType != FieldSpec.DataType.OPEN_STRUCT) {
if
(ClusterConfigForTable.useOptimizedNoDictCollector(statsCollectorConfig.getTableConfig()))
{
return new NoDictColumnStatisticsCollector(columnName,
statsCollectorConfig);
}
@@ -64,9 +68,36 @@ public final class StatsCollectorUtil {
case BYTES:
return new BytesColumnPreIndexStatsCollector(columnName,
statsCollectorConfig);
case MAP:
+ case OPEN_STRUCT:
return new MapColumnPreIndexStatsCollector(columnName,
statsCollectorConfig);
default:
throw new IllegalStateException("Unsupported data type: " +
fieldSpec.getDataType());
}
}
+
+ /// Creates a scalar stats collector directly from a [FieldSpec], for
callers without a
+ /// [StatsCollectorConfig]/[Schema] (e.g. OPEN_STRUCT materialized child
columns). Skips the
+ /// no-dictionary-optimization branch, which requires a TableConfig that
synthetic columns lack.
+ public static AbstractColumnStatisticsCollector
createStatsCollector(FieldSpec fieldSpec,
+ @Nullable FieldConfig fieldConfig) {
+ switch (fieldSpec.getDataType().getStoredType()) {
+ case INT:
+ return new IntColumnPreIndexStatsCollector(fieldSpec, fieldConfig,
null);
+ case LONG:
+ return new LongColumnPreIndexStatsCollector(fieldSpec, fieldConfig,
null);
+ case FLOAT:
+ return new FloatColumnPreIndexStatsCollector(fieldSpec, fieldConfig,
null);
+ case DOUBLE:
+ return new DoubleColumnPreIndexStatsCollector(fieldSpec, fieldConfig,
null);
+ case BIG_DECIMAL:
+ return new BigDecimalColumnPreIndexStatsCollector(fieldSpec,
fieldConfig, null);
+ case STRING:
+ return new StringColumnPreIndexStatsCollector(fieldSpec, fieldConfig,
null);
+ case BYTES:
+ return new BytesColumnPreIndexStatsCollector(fieldSpec, fieldConfig,
null);
+ default:
+ throw new IllegalStateException("Unsupported stored type for
OPEN_STRUCT child: "
+ + fieldSpec.getDataType().getStoredType());
+ }
+ }
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/StringColumnPreIndexStatsCollector.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/StringColumnPreIndexStatsCollector.java
index a01afe7b49a..26a168d4986 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/StringColumnPreIndexStatsCollector.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/StringColumnPreIndexStatsCollector.java
@@ -27,7 +27,9 @@ import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
+import javax.annotation.Nullable;
import org.apache.pinot.segment.spi.creator.StatsCollectorConfig;
+import org.apache.pinot.segment.spi.partition.PartitionFunction;
import org.apache.pinot.spi.config.table.FieldConfig;
import org.apache.pinot.spi.data.FieldSpec;
@@ -50,6 +52,14 @@ public class StringColumnPreIndexStatsCollector extends
AbstractColumnStatistics
}
}
+ public StringColumnPreIndexStatsCollector(FieldSpec fieldSpec, @Nullable
FieldConfig fieldConfig,
+ @Nullable PartitionFunction partitionFunction) {
+ super(fieldSpec, fieldConfig, partitionFunction);
+ if (_fieldConfig != null && _fieldConfig.getCompressionCodec() ==
FieldConfig.CompressionCodec.CLP) {
+ _clpStatsCollector = new CLPStatsCollector();
+ }
+ }
+
@Override
public void collect(Object entry) {
assert !_sealed;
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/forward/ForwardIndexType.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/forward/ForwardIndexType.java
index 8136a82297f..c2e6388816c 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/forward/ForwardIndexType.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/forward/ForwardIndexType.java
@@ -296,6 +296,11 @@ public class ForwardIndexType extends
AbstractIndexType<ForwardIndexConfig, Forw
}
}
+ @Override
+ public boolean shouldCreateIndex(IndexCreationContext context,
ForwardIndexConfig indexConfig) {
+ return context.getFieldSpec().getDataType() !=
FieldSpec.DataType.OPEN_STRUCT;
+ }
+
@Override
public ForwardIndexCreator createIndexCreator(IndexCreationContext context,
ForwardIndexConfig indexConfig)
throws Exception {
diff --git
a/pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructNamingTest.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/openstruct/OpenStructIndexPlugin.java
similarity index 61%
copy from
pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructNamingTest.java
copy to
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/openstruct/OpenStructIndexPlugin.java
index 6adad485c39..f6d909fcc52 100644
---
a/pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructNamingTest.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/openstruct/OpenStructIndexPlugin.java
@@ -16,22 +16,19 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.pinot.spi.data;
+package org.apache.pinot.segment.local.segment.index.openstruct;
-import org.testng.annotations.Test;
+import com.google.auto.service.AutoService;
+import org.apache.pinot.segment.spi.index.IndexPlugin;
-import static org.testng.Assert.assertEquals;
+/// {@link IndexPlugin} that registers {@link OpenStructIndexType} via
`@AutoService`.
+@AutoService(IndexPlugin.class)
+public class OpenStructIndexPlugin implements IndexPlugin<OpenStructIndexType>
{
+ public static final OpenStructIndexType INSTANCE = new OpenStructIndexType();
-public class OpenStructNamingTest {
-
- @Test
- public void testMaterializedColumnName() {
- assertEquals(OpenStructNaming.materializedColumnName("metrics",
"tenancy"), "metrics$tenancy");
- }
-
- @Test
- public void testSparseColumnName() {
- assertEquals(OpenStructNaming.sparseColumnName("metrics"),
"metrics$__sparse__");
+ @Override
+ public OpenStructIndexType getIndexType() {
+ return INSTANCE;
}
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/openstruct/OpenStructIndexType.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/openstruct/OpenStructIndexType.java
new file mode 100644
index 00000000000..c95bcef2dad
--- /dev/null
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/openstruct/OpenStructIndexType.java
@@ -0,0 +1,183 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.openstruct;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.base.Preconditions;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import javax.annotation.Nullable;
+import org.apache.pinot.segment.spi.ColumnMetadata;
+import org.apache.pinot.segment.spi.creator.IndexCreationContext;
+import org.apache.pinot.segment.spi.index.AbstractIndexType;
+import org.apache.pinot.segment.spi.index.ColumnConfigDeserializer;
+import org.apache.pinot.segment.spi.index.FieldIndexConfigs;
+import org.apache.pinot.segment.spi.index.IndexConfigDeserializer;
+import org.apache.pinot.segment.spi.index.IndexHandler;
+import org.apache.pinot.segment.spi.index.IndexReaderFactory;
+import org.apache.pinot.segment.spi.index.StandardIndexes;
+import
org.apache.pinot.segment.spi.index.creator.ColumnarOpenStructIndexCreator;
+import org.apache.pinot.segment.spi.index.mutable.MutableIndex;
+import org.apache.pinot.segment.spi.index.mutable.provider.MutableIndexContext;
+import org.apache.pinot.segment.spi.index.reader.OpenStructIndexReader;
+import org.apache.pinot.segment.spi.store.SegmentDirectory;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.config.table.OpenStructIndexConfig;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+
+
+/// Index type for the OPEN_STRUCT index on OPEN_STRUCT columns.
+///
+/// The OPEN_STRUCT index has no reader of its own — per-key materialized
columns are loaded by
+/// the standard `PhysicalColumnIndexContainer` and served via standard index
readers. This
+/// type exists for SPI registration, config deserialization, and validation.
+public class OpenStructIndexType
+ extends AbstractIndexType<OpenStructIndexConfig, OpenStructIndexReader,
ColumnarOpenStructIndexCreator> {
+
+ public static final String INDEX_DISPLAY_NAME = "open_struct";
+ private static final List<String> EXTENSIONS =
Collections.singletonList(".open_struct.idx");
+
+ protected OpenStructIndexType() {
+ super(StandardIndexes.OPEN_STRUCT_ID);
+ }
+
+ @Override
+ public Class<OpenStructIndexConfig> getIndexConfigClass() {
+ return OpenStructIndexConfig.class;
+ }
+
+ @Override
+ public OpenStructIndexConfig getDefaultConfig() {
+ return OpenStructIndexConfig.DEFAULT;
+ }
+
+ @Override
+ public void validate(FieldIndexConfigs indexConfigs, FieldSpec fieldSpec,
TableConfig tableConfig) {
+ // The default OpenStructIndexConfig is auto-applied to every column; only
enforce on OPEN_STRUCT
+ // fields. Non-OPEN_STRUCT columns cannot meaningfully opt in to this
index.
+ if (fieldSpec.getDataType() != FieldSpec.DataType.OPEN_STRUCT) {
+ return;
+ }
+ OpenStructIndexConfig config = indexConfigs.getConfig(this);
+ if (config.isEnabled()) {
+ Preconditions.checkState(fieldSpec.isSingleValueField(),
+ "OPEN_STRUCT index can only be created on single-value columns, but
column '%s' is multi-value",
+ fieldSpec.getName());
+ validatePerKeyIndexes(config);
+ }
+ }
+
+ private void validatePerKeyIndexes(OpenStructIndexConfig config) {
+ List<FieldConfig> fieldConfigs = new ArrayList<>();
+ if (config.getValueFieldConfigs() != null) {
+ fieldConfigs.addAll(config.getValueFieldConfigs());
+ }
+ if (config.getDefaultValueFieldConfig() != null) {
+ fieldConfigs.add(config.getDefaultValueFieldConfig());
+ }
+ for (FieldConfig fieldConfig : fieldConfigs) {
+ JsonNode indexes = fieldConfig.getIndexes();
+ if (indexes == null) {
+ continue;
+ }
+ Iterator<String> indexNames = indexes.fieldNames();
+ while (indexNames.hasNext()) {
+ String indexName = indexNames.next();
+
Preconditions.checkState(OpenStructSupportedIndexes.ALLOWED_PRETTY_NAMES.contains(indexName),
+ "OPEN_STRUCT key '%s' declares unsupported index '%s'; supported
indexes are %s",
+ fieldConfig.getName(), indexName,
OpenStructSupportedIndexes.ALLOWED_PRETTY_NAMES);
+ }
+ }
+ }
+
+ @Override
+ public String getPrettyName() {
+ return INDEX_DISPLAY_NAME;
+ }
+
+ @Override
+ protected ColumnConfigDeserializer<OpenStructIndexConfig>
createDeserializerForLegacyConfigs() {
+ // OPEN_STRUCT is net-new; no legacy FieldConfig.properties path to
migrate.
+ return
IndexConfigDeserializer.fromIndexTypes(FieldConfig.IndexType.OPEN_STRUCT,
+ (tableConfig, fieldConfig) -> OpenStructIndexConfig.DEFAULT);
+ }
+
+ @Override
+ public boolean shouldCreateIndex(IndexCreationContext context,
OpenStructIndexConfig indexConfig) {
+ // Creator is wired in the storage-layer PR (PR 2b); returning true here
with a null creator
+ // would NPE in SegmentColumnarIndexCreator.add(). Keep false until the
real creator lands.
+ return false;
+ }
+
+ @Override
+ public ColumnarOpenStructIndexCreator
createIndexCreator(IndexCreationContext context,
+ OpenStructIndexConfig indexConfig) {
+ throw new UnsupportedOperationException(
+ "OPEN_STRUCT index creator is not yet available; shouldCreateIndex()
must return false");
+ }
+
+ @Override
+ protected IndexReaderFactory<OpenStructIndexReader> createReaderFactory() {
+ return new NoOpReaderFactory();
+ }
+
+ @Override
+ public List<String> getFileExtensions(@Nullable ColumnMetadata
columnMetadata) {
+ return EXTENSIONS;
+ }
+
+ @Override
+ public IndexHandler createIndexHandler(SegmentDirectory segmentDirectory,
+ Map<String, FieldIndexConfigs> configsByCol, Schema schema, TableConfig
tableConfig) {
+ return IndexHandler.NoOp.INSTANCE;
+ }
+
+ @Nullable
+ @Override
+ public MutableIndex createMutableIndex(MutableIndexContext context,
OpenStructIndexConfig config) {
+ // Mutable OPEN_STRUCT index is constructed by MutableSegmentImpl, not via
this SPI path.
+ return null;
+ }
+
+ @Override
+ public boolean shouldInvalidateOnDictionaryChange(FieldSpec fieldSpec,
OpenStructIndexConfig indexConfig) {
+ return false;
+ }
+
+ @Override
+ public boolean requiresDictionary(FieldSpec fieldSpec, OpenStructIndexConfig
indexConfig) {
+ return false;
+ }
+
+ /// Reader factory that always returns null. The OPEN_STRUCT index has no
reader of its own —
+ /// materialized columns are loaded independently by the standard column
loading infrastructure.
+ private static class NoOpReaderFactory implements
IndexReaderFactory<OpenStructIndexReader> {
+ @Nullable
+ @Override
+ public OpenStructIndexReader createIndexReader(SegmentDirectory.Reader
segmentReader,
+ FieldIndexConfigs fieldIndexConfigs, ColumnMetadata metadata) {
+ return null;
+ }
+ }
+}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/openstruct/OpenStructSupportedIndexes.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/openstruct/OpenStructSupportedIndexes.java
new file mode 100644
index 00000000000..cf23f188319
--- /dev/null
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/openstruct/OpenStructSupportedIndexes.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.openstruct;
+
+import java.util.Set;
+import org.apache.pinot.segment.spi.index.StandardIndexes;
+
+
+/// The set of index types (by pretty name) supported on OPEN_STRUCT
materialized child columns. A key's
+/// `FieldConfig` may declare only these; non-vetted indexes are rejected at
table-config validation.
+/// `dictionary` is built structurally (lifecycle CUSTOM); `forward` is always
written.
+public final class OpenStructSupportedIndexes {
+ private OpenStructSupportedIndexes() {
+ }
+
+ public static final Set<String> ALLOWED_PRETTY_NAMES = Set.of(
+ StandardIndexes.forward().getPrettyName(),
+ StandardIndexes.dictionary().getPrettyName(),
+ StandardIndexes.inverted().getPrettyName(),
+ StandardIndexes.range().getPrettyName(),
+ StandardIndexes.bloomFilter().getPrettyName());
+}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
index c833b7cdaa3..6acc2a2b422 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
@@ -94,6 +94,7 @@ import
org.apache.pinot.spi.config.table.ingestion.TransformConfig;
import org.apache.pinot.spi.data.DateTimeFieldSpec;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.apache.pinot.spi.data.OpenStructNaming;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.function.FunctionEvaluator;
import org.apache.pinot.spi.ingestion.batch.BatchConfig;
@@ -1645,6 +1646,23 @@ public final class TableConfigUtils {
validateMultiColumnTextIndex(indexingConfig.getMultiColumnTextIndexConfig());
+ // OPEN_STRUCT materialized child columns use a reserved separator '$' in
their name. When any
+ // schema field is OPEN_STRUCT, reject ALL columns (including OPEN_STRUCT
parents themselves)
+ // whose names contain '$'. This prevents naming collisions: a parent
named 'a$b' would produce
+ // children 'a$b$key', which parseParentColumn() would misparse as parent
'a' and key 'b$key'.
+ // This validation runs here (with full schema access) rather than
per-field because each
+ // OpenStructIndexType.validate() call only sees one field at a time.
+ boolean anyOpenStruct = schema.getAllFieldSpecs().stream()
+ .anyMatch(fs -> fs.getDataType() == DataType.OPEN_STRUCT);
+ if (anyOpenStruct) {
+ for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
+ Preconditions.checkState(
+
!OpenStructNaming.isMaterializedOpenStructColumn(fieldSpec.getName()),
+ "Schema column '%s' contains reserved OPEN_STRUCT separator '%s'",
+ fieldSpec.getName(), OpenStructNaming.SEPARATOR);
+ }
+ }
+
// Star-tree index config is not managed by FieldIndexConfigs, and we need
to validate it separately.
List<StarTreeIndexConfig> starTreeIndexConfigs =
indexingConfig.getStarTreeIndexConfigs();
if (CollectionUtils.isNotEmpty(starTreeIndexConfigs)) {
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/impl/stats/StatsCollectorUtilFieldSpecTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/impl/stats/StatsCollectorUtilFieldSpecTest.java
new file mode 100644
index 00000000000..04fe3d57351
--- /dev/null
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/impl/stats/StatsCollectorUtilFieldSpecTest.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.creator.impl.stats;
+
+import java.math.BigDecimal;
+import org.apache.pinot.spi.data.DimensionFieldSpec;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+
+public class StatsCollectorUtilFieldSpecTest {
+
+ @Test
+ public void testCreateIntCollectorFromFieldSpec() {
+ DimensionFieldSpec spec = new DimensionFieldSpec("c", DataType.INT, true);
+ AbstractColumnStatisticsCollector collector =
StatsCollectorUtil.createStatsCollector(spec, null);
+ collector.collect(5);
+ collector.collect(5);
+ collector.collect(2);
+ collector.seal();
+ assertEquals(collector.getCardinality(), 2);
+ assertEquals(collector.getMinValue(), 2);
+ assertEquals(collector.getMaxValue(), 5);
+ }
+
+ @Test
+ public void testCreateBigDecimalCollectorFromFieldSpec() {
+ DimensionFieldSpec spec = new DimensionFieldSpec("c",
DataType.BIG_DECIMAL, true);
+ AbstractColumnStatisticsCollector collector =
StatsCollectorUtil.createStatsCollector(spec, null);
+ collector.collect(new BigDecimal("1.0"));
+ collector.collect(new BigDecimal("1.00"));
+ collector.seal();
+ // 1.0 and 1.00 are distinct under equals
(BigDecimalColumnPreIndexStatsCollector uses ObjectOpenHashSet).
+ assertEquals(collector.getCardinality(), 2);
+ }
+}
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/openstruct/OpenStructIndexTypeTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/openstruct/OpenStructIndexTypeTest.java
new file mode 100644
index 00000000000..317fe219f3f
--- /dev/null
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/openstruct/OpenStructIndexTypeTest.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.openstruct;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.segment.spi.index.FieldIndexConfigs;
+import org.apache.pinot.segment.spi.index.StandardIndexes;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.config.table.OpenStructIndexConfig;
+import org.apache.pinot.spi.data.ComplexFieldSpec;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.utils.JsonUtils;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertSame;
+import static org.testng.Assert.assertThrows;
+
+
+public class OpenStructIndexTypeTest {
+
+ @Test
+ public void testServiceLoaderResolves() {
+ assertNotNull(StandardIndexes.openStruct(),
+ "StandardIndexes.openStruct() should resolve via
OpenStructIndexPlugin");
+ }
+
+ @Test
+ public void testIndexIdMatches() {
+ assertEquals(StandardIndexes.openStruct().getId(),
StandardIndexes.OPEN_STRUCT_ID);
+ }
+
+ @Test
+ public void testSingletonInstance() {
+ assertSame(StandardIndexes.openStruct(), OpenStructIndexPlugin.INSTANCE);
+ }
+
+ @Test
+ public void testValidateRejectsUnsupportedPerKeyIndex()
+ throws Exception {
+ // 'h3' is not in the OPEN_STRUCT vetted subset; declaring it on a key
must fail validation.
+ JsonNode indexes = JsonUtils.stringToJsonNode("{\"h3\": {}}");
+ FieldConfig keyConfig = new
FieldConfig.Builder("loc").withIndexes(indexes).build();
+ // First constructor arg is `disabled` — pass false so the config is
enabled and validation runs.
+ OpenStructIndexConfig config = new OpenStructIndexConfig(false, null, -1,
null, 0.5, List.of(keyConfig));
+ FieldIndexConfigs fieldIndexConfigs =
+ new FieldIndexConfigs.Builder().add(StandardIndexes.openStruct(),
config).build();
+ FieldSpec openStructSpec = new ComplexFieldSpec("payload",
FieldSpec.DataType.OPEN_STRUCT, true, Map.of());
+
+ assertThrows(IllegalStateException.class,
+ () -> StandardIndexes.openStruct().validate(fieldIndexConfigs,
openStructSpec, null));
+ }
+
+ @Test
+ public void testValidateAllowsVettedPerKeyIndexes()
+ throws Exception {
+ JsonNode indexes = JsonUtils.stringToJsonNode("{\"range\": {}, \"bloom\":
{}, \"inverted\": {}}");
+ FieldConfig keyConfig = new
FieldConfig.Builder("clicks").withIndexes(indexes).build();
+ OpenStructIndexConfig config = new OpenStructIndexConfig(false, null, -1,
null, 0.5, List.of(keyConfig));
+ FieldIndexConfigs fieldIndexConfigs =
+ new FieldIndexConfigs.Builder().add(StandardIndexes.openStruct(),
config).build();
+ FieldSpec openStructSpec = new ComplexFieldSpec("payload",
FieldSpec.DataType.OPEN_STRUCT, true, Map.of());
+
+ // Must not throw.
+ StandardIndexes.openStruct().validate(fieldIndexConfigs, openStructSpec,
null);
+ }
+}
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
index 0776eeda342..c7cab544ed3 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
@@ -4398,4 +4398,42 @@ public class TableConfigUtilsTest {
.setTaskConfig(new
org.apache.pinot.spi.config.table.TableTaskConfig(buildMaterializedViewTaskMap(definedSql)))
.build();
}
+
+ @Test
+ public void testValidateOpenStructSeparatorInColumnNames() {
+ TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).build();
+
+ // Valid: no '$' in any column name — OPEN_STRUCT and regular column both
clean.
+ Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+ .addSingleValueDimension("event_type", FieldSpec.DataType.STRING)
+ .addOpenStruct("payload", Map.of())
+ .build();
+ TableConfigUtils.validate(tableConfig, schema);
+
+ // Invalid: non-OPEN_STRUCT column contains '$'.
+ Schema schemaWithDollarInNonOpenStruct = new
Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+ .addSingleValueDimension("event$type", FieldSpec.DataType.STRING)
+ .addOpenStruct("payload", Map.of())
+ .build();
+ try {
+ TableConfigUtils.validate(tableConfig, schemaWithDollarInNonOpenStruct);
+ fail("Should fail when a non-OPEN_STRUCT column contains the reserved
'$' separator");
+ } catch (IllegalStateException e) {
+ assertTrue(e.getMessage().contains("event$type"));
+ }
+
+ // Invalid: OPEN_STRUCT parent name itself contains '$'. Without this
check, a parent named
+ // 'metrics$v2' would produce children 'metrics$v2$key', and
parseParentColumn() would split
+ // on the first '$' and return 'metrics' instead of 'metrics$v2',
corrupting the name mapping.
+ Schema schemaWithDollarInOpenStructParent = new
Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+ .addSingleValueDimension("event_type", FieldSpec.DataType.STRING)
+ .addOpenStruct("metrics$v2", Map.of())
+ .build();
+ try {
+ TableConfigUtils.validate(tableConfig,
schemaWithDollarInOpenStructParent);
+ fail("Should fail when an OPEN_STRUCT parent column name contains the
reserved '$' separator");
+ } catch (IllegalStateException e) {
+ assertTrue(e.getMessage().contains("metrics$v2"));
+ }
+ }
}
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
index fbc27829127..84adb3b421c 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
@@ -178,6 +178,9 @@ public class V1Constants {
// parentColumn = metrics
public static final String PARENT_COLUMN = "parentColumn";
+ // Whether this OPEN_STRUCT column has a sparse column for keys not
materialized as dense.
+ public static final String HAS_SPARSE_COLUMN = "hasSparseColumn";
+
/// Partition function, all optional
public static final String PARTITION_FUNCTION = "partitionFunction";
public static final String NUM_PARTITIONS = "numPartitions";
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/datasource/OpenStructDataSource.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/datasource/OpenStructDataSource.java
index a0705f66e24..27bd6eee53b 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/datasource/OpenStructDataSource.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/datasource/OpenStructDataSource.java
@@ -19,6 +19,7 @@
package org.apache.pinot.segment.spi.datasource;
import java.util.Map;
+import javax.annotation.Nullable;
import org.apache.pinot.segment.spi.index.column.ColumnIndexContainer;
import org.apache.pinot.spi.data.ComplexFieldSpec;
@@ -61,4 +62,13 @@ public interface OpenStructDataSource extends DataSource {
/// Returns the ColumnIndexContainer for the given key's values.
ColumnIndexContainer getIndexContainer(String key);
+
+ /// Reconstructs the full OPEN_STRUCT value for {@code docId} as a {@code
Map<String, Object>}, or
+ /// {@code null} when no key is present at that doc. Used by the realtime
seal path to re-feed the
+ /// OPEN_STRUCT column into the immutable segment build.
+ @Nullable
+ default Map<String, Object> getMapValue(int docId) {
+ throw new UnsupportedOperationException(
+ "Per-doc OPEN_STRUCT map reconstruction is not supported by this
implementation");
+ }
}
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/FieldIndexConfigsUtil.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/FieldIndexConfigsUtil.java
index 033918e437f..ce2e207ad29 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/FieldIndexConfigsUtil.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/FieldIndexConfigsUtil.java
@@ -19,14 +19,20 @@
package org.apache.pinot.segment.spi.index;
+import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.collect.Sets;
+import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
+import javax.annotation.Nullable;
+import org.apache.pinot.spi.config.table.FieldConfig;
import org.apache.pinot.spi.config.table.IndexConfig;
import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.utils.JsonUtils;
public class FieldIndexConfigsUtil {
@@ -51,6 +57,46 @@ public class FieldIndexConfigsUtil {
.collect(Collectors.toMap(Map.Entry::getKey, entry ->
entry.getValue().build()));
}
+ /// Builds a {@link FieldIndexConfigs} for a single column directly from one
{@link FieldConfig}, without a
+ /// `TableConfig` or `Schema`. The dictionary entry is derived from
`fieldConfig.getEncodingType()`
+ /// (RAW => disabled, otherwise default-enabled); every other index type is
read from the modern
+ /// `fieldConfig.getIndexes()` JSON (keyed by index pretty name), falling
back to each type's default config.
+ /// A `null` fieldConfig yields built-in defaults (dictionary enabled).
+ ///
+ /// This reads only the modern `indexes` format and never legacy
`IndexingConfig` lists, so it suits
+ /// synthetic columns (e.g. OPEN_STRUCT materialized children) that exist in
no schema.
+ public static FieldIndexConfigs fromFieldConfig(@Nullable FieldConfig
fieldConfig, FieldSpec fieldSpec) {
+ FieldIndexConfigs.Builder builder = new FieldIndexConfigs.Builder();
+ boolean rawEncoded = fieldConfig != null && fieldConfig.getEncodingType()
== FieldConfig.EncodingType.RAW;
+ builder.add(StandardIndexes.dictionary(),
+ rawEncoded ? DictionaryIndexConfig.DISABLED :
DictionaryIndexConfig.DEFAULT);
+ JsonNode indexes = fieldConfig != null ? fieldConfig.getIndexes() : null;
+ for (IndexType<?, ?, ?> indexType :
IndexService.getInstance().getAllIndexes()) {
+ if (indexType.getId().equals(StandardIndexes.DICTIONARY_ID)) {
+ continue;
+ }
+ addConfigFromIndexes(builder, indexType, indexes);
+ }
+ return builder.build();
+ }
+
+ private static <C extends IndexConfig> void
addConfigFromIndexes(FieldIndexConfigs.Builder builder,
+ IndexType<C, ?, ?> indexType, @Nullable JsonNode indexes) {
+ JsonNode node = indexes != null ? indexes.get(indexType.getPrettyName()) :
null;
+ C config;
+ if (node != null) {
+ try {
+ config = JsonUtils.jsonNodeToObject(node,
indexType.getIndexConfigClass());
+ } catch (IOException e) {
+ throw new IllegalArgumentException(
+ "Failed to parse '" + indexType.getPrettyName() + "' index config
from FieldConfig", e);
+ }
+ } else {
+ config = indexType.getDefaultConfig();
+ }
+ builder.add(indexType, config);
+ }
+
@FunctionalInterface
public interface DeserializerProvider {
<C extends IndexConfig> ColumnConfigDeserializer<C> get(IndexType<C, ?, ?>
indexType);
diff --git
a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/index/FieldIndexConfigsUtilTest.java
b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/index/FieldIndexConfigsUtilTest.java
new file mode 100644
index 00000000000..fdb0e2f0dcb
--- /dev/null
+++
b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/index/FieldIndexConfigsUtilTest.java
@@ -0,0 +1,189 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.spi.index;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import javax.annotation.Nullable;
+import org.apache.pinot.segment.spi.ColumnMetadata;
+import org.apache.pinot.segment.spi.creator.IndexCreationContext;
+import org.apache.pinot.segment.spi.store.SegmentDirectory;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.config.table.IndexConfig;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.data.DimensionFieldSpec;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.utils.JsonUtils;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+
+public class FieldIndexConfigsUtilTest {
+
+ private static final FieldSpec INT_SPEC = new DimensionFieldSpec("x",
FieldSpec.DataType.INT, true);
+
+ private IndexService _originalIndexService;
+ private StubIndexType _dictType;
+ private StubIndexType _rangeType;
+ private StubIndexType _bloomType;
+
+ @BeforeMethod
+ public void setUp() {
+ _originalIndexService = IndexService.getInstance();
+ _dictType = new StubIndexType(StandardIndexes.DICTIONARY_ID,
StandardIndexes.DICTIONARY_ID);
+ _rangeType = new StubIndexType(StandardIndexes.RANGE_ID, "range");
+ _bloomType = new StubIndexType(StandardIndexes.BLOOM_FILTER_ID, "bloom");
+ installIndexService(_dictType, _rangeType, _bloomType);
+ }
+
+ @AfterMethod
+ public void tearDown() {
+ IndexService.setInstance(_originalIndexService);
+ }
+
+ @Test
+ public void testParsesIndexesAndRawEncoding()
+ throws Exception {
+ JsonNode indexes = JsonUtils.stringToJsonNode("{\"range\": {}, \"bloom\":
{}}");
+ FieldConfig fc = new FieldConfig.Builder("x")
+ .withEncodingType(FieldConfig.EncodingType.RAW)
+ .withIndexes(indexes)
+ .build();
+
+ FieldIndexConfigs configs = FieldIndexConfigsUtil.fromFieldConfig(fc,
INT_SPEC);
+
+ // RAW encoding disables the dictionary — verify via the stored
DictionaryIndexConfig constant.
+ IndexConfig dictConfig = configs.getConfig(_dictType);
+ assertTrue(dictConfig.isDisabled(), "RAW => dictionary disabled");
+
+ // range and bloom should be enabled because the JSON contained their
pretty-name keys.
+ IndexConfig rangeConfig = configs.getConfig(_rangeType);
+ assertTrue(rangeConfig.isEnabled(), "range config from JSON should be
enabled");
+
+ IndexConfig bloomConfig = configs.getConfig(_bloomType);
+ assertTrue(bloomConfig.isEnabled(), "bloom config from JSON should be
enabled");
+ }
+
+ @Test
+ public void testNullFieldConfigUsesDictionaryDefault() {
+ FieldIndexConfigs configs = FieldIndexConfigsUtil.fromFieldConfig(null,
INT_SPEC);
+ // null fieldConfig => dictionary should use DictionaryIndexConfig.DEFAULT
(not disabled)
+ IndexConfig dictConfig = configs.getConfig(_dictType);
+ assertFalse(dictConfig.isDisabled(), "null => dictionary enabled");
+ }
+
+ private void installIndexService(StubIndexType... types) {
+ Set<IndexPlugin<?>> plugins = new HashSet<>();
+ int priority = 0;
+ for (StubIndexType type : types) {
+ final int p = priority++;
+ final StubIndexType t = type;
+ plugins.add(new IndexPlugin<StubIndexType>() {
+ @Override
+ public StubIndexType getIndexType() {
+ return t;
+ }
+
+ @Override
+ public int getPriority() {
+ return p;
+ }
+ });
+ }
+ IndexService.setInstance(new IndexService(plugins));
+ }
+
+ private static final class StubIndexType implements IndexType<IndexConfig,
IndexReader, IndexCreator> {
+ private final String _id;
+ private final String _prettyName;
+
+ StubIndexType(String id, String prettyName) {
+ _id = id;
+ _prettyName = prettyName;
+ }
+
+ @Override
+ public String getId() {
+ return _id;
+ }
+
+ @Override
+ public String getPrettyName() {
+ return _prettyName;
+ }
+
+ @Override
+ public Class<IndexConfig> getIndexConfigClass() {
+ return IndexConfig.class;
+ }
+
+ @Override
+ public IndexConfig getDefaultConfig() {
+ return IndexConfig.DISABLED;
+ }
+
+ @Override
+ public Map<String, IndexConfig> getConfig(TableConfig tableConfig, Schema
schema) {
+ return Map.of();
+ }
+
+ @Override
+ public IndexCreator createIndexCreator(IndexCreationContext context,
IndexConfig indexConfig) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public IndexReaderFactory<IndexReader> getReaderFactory() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public List<String> getFileExtensions(@Nullable ColumnMetadata
columnMetadata) {
+ return List.of();
+ }
+
+ @Override
+ public IndexHandler createIndexHandler(SegmentDirectory segmentDirectory,
+ Map<String, FieldIndexConfigs> configsByCol, Schema schema,
TableConfig tableConfig) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean requiresDictionary(FieldSpec fieldSpec, IndexConfig
indexConfig) {
+ return false;
+ }
+
+ @Override
+ public boolean shouldInvalidateOnDictionaryChange(FieldSpec fieldSpec,
IndexConfig indexConfig) {
+ return false;
+ }
+
+ @Override
+ public void convertToNewFormat(TableConfig tableConfig, Schema schema) {
+ }
+ }
+}
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/OpenStructNaming.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/OpenStructNaming.java
index dc0bc138d3e..a0061c6d1a4 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/OpenStructNaming.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/OpenStructNaming.java
@@ -36,4 +36,41 @@ public final class OpenStructNaming {
public static String sparseColumnName(String openStructColumn) {
return openStructColumn + SEPARATOR + SPARSE_SUFFIX;
}
+
+ /// Returns true if the given column name is a materialized OPEN_STRUCT
child column
+ /// (dense materialized key or the sparse JSON column).
+ public static boolean isMaterializedOpenStructColumn(String columnName) {
+ return columnName.indexOf(SEPARATOR.charAt(0)) > 0;
+ }
+
+ /// Returns true if the given column name is the sparse JSON column for some
+ /// OPEN_STRUCT parent.
+ public static boolean isSparseColumn(String columnName) {
+ int sep = columnName.indexOf(SEPARATOR.charAt(0));
+ return sep > 0 && SPARSE_SUFFIX.equals(columnName.substring(sep + 1));
+ }
+
+ /// Returns the parent OPEN_STRUCT column name for a materialized child
column.
+ /// Throws IllegalArgumentException if the input is not a materialized child
column.
+ public static String parseParentColumn(String materializedColumnName) {
+ int sep = materializedColumnName.indexOf(SEPARATOR.charAt(0));
+ if (sep <= 0) {
+ throw new IllegalArgumentException("Not a materialized OPEN_STRUCT
column: " + materializedColumnName);
+ }
+ return materializedColumnName.substring(0, sep);
+ }
+
+ /// Returns the key portion of a materialized dense column name. Throws
+ /// IllegalArgumentException for the sparse column or non-materialized names.
+ public static String parseKey(String materializedColumnName) {
+ int sep = materializedColumnName.indexOf(SEPARATOR.charAt(0));
+ if (sep <= 0) {
+ throw new IllegalArgumentException("Not a materialized OPEN_STRUCT
column: " + materializedColumnName);
+ }
+ String key = materializedColumnName.substring(sep + 1);
+ if (SPARSE_SUFFIX.equals(key)) {
+ throw new IllegalArgumentException("Sparse column has no key: " +
materializedColumnName);
+ }
+ return key;
+ }
}
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/OpenStructTypeInference.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/OpenStructTypeInference.java
new file mode 100644
index 00000000000..7cfab573fb2
--- /dev/null
+++
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/OpenStructTypeInference.java
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.data;
+
+import javax.annotation.Nullable;
+import org.apache.pinot.spi.utils.PinotDataType;
+
+
+/// Infers the {@link FieldSpec.DataType} for an OPEN_STRUCT key from raw
ingested values when the key has
+/// no declared child {@link FieldSpec}. This is OPEN_STRUCT-specific policy
(it keeps TIMESTAMP, folds
+/// DATE/TIME/UUID to STRING, widens BYTE/CHARACTER/SHORT to INT, and returns
{@code null} for values that
+/// cannot be represented as a stored column type), distinct from the
JSON-node-based inference in
+/// {@code JsonUtils.valueOf}.
+public final class OpenStructTypeInference {
+ private OpenStructTypeInference() {
+ }
+
+ /// Infers the {@link FieldSpec.DataType} from a raw ingested value. Returns
{@code null} when the value
+ /// cannot be represented as a stored column type; callers decide whether to
drop the entry or fall back
+ /// to a default (e.g. STRING).
+ @Nullable
+ public static FieldSpec.DataType inferDataType(Object rawValue) {
+ switch (PinotDataType.getSingleValueType(rawValue)) {
+ case INTEGER:
+ case BYTE:
+ case CHARACTER:
+ case SHORT:
+ return FieldSpec.DataType.INT;
+ case LONG:
+ return FieldSpec.DataType.LONG;
+ case FLOAT:
+ return FieldSpec.DataType.FLOAT;
+ case DOUBLE:
+ return FieldSpec.DataType.DOUBLE;
+ case BIG_DECIMAL:
+ return FieldSpec.DataType.BIG_DECIMAL;
+ case BOOLEAN:
+ return FieldSpec.DataType.BOOLEAN;
+ case TIMESTAMP:
+ return FieldSpec.DataType.TIMESTAMP;
+ case STRING:
+ case DATE:
+ case TIME:
+ case UUID:
+ return FieldSpec.DataType.STRING;
+ case BYTES:
+ return FieldSpec.DataType.BYTES;
+ default:
+ return null;
+ }
+ }
+}
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/PinotDataType.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/PinotDataType.java
index e4bbc0ba9ab..7718956bf27 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/PinotDataType.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/PinotDataType.java
@@ -1981,6 +1981,11 @@ public enum PinotDataType {
return MAP;
}
throw new IllegalStateException("There is no multi-value type for
MAP");
+ case OPEN_STRUCT:
+ if (fieldSpec.isSingleValueField()) {
+ return MAP;
+ }
+ throw new IllegalStateException("There is no multi-value type for
OPEN_STRUCT");
default:
throw new IllegalStateException("Unsupported data type: " + dataType);
}
diff --git
a/pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructNamingTest.java
b/pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructNamingTest.java
index 6adad485c39..5c057d4acac 100644
---
a/pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructNamingTest.java
+++
b/pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructNamingTest.java
@@ -21,17 +21,55 @@ package org.apache.pinot.spi.data;
import org.testng.annotations.Test;
import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
public class OpenStructNamingTest {
@Test
public void testMaterializedColumnName() {
- assertEquals(OpenStructNaming.materializedColumnName("metrics",
"tenancy"), "metrics$tenancy");
+ assertEquals(OpenStructNaming.materializedColumnName("metrics", "clicks"),
"metrics$clicks");
}
@Test
public void testSparseColumnName() {
assertEquals(OpenStructNaming.sparseColumnName("metrics"),
"metrics$__sparse__");
}
+
+ @Test
+ public void testIsMaterializedOpenStructColumn() {
+
assertTrue(OpenStructNaming.isMaterializedOpenStructColumn("metrics$clicks"));
+
assertTrue(OpenStructNaming.isMaterializedOpenStructColumn("metrics$__sparse__"));
+ assertFalse(OpenStructNaming.isMaterializedOpenStructColumn("metrics"));
+
assertFalse(OpenStructNaming.isMaterializedOpenStructColumn("plain_column"));
+ }
+
+ @Test
+ public void testIsSparseColumn() {
+ assertTrue(OpenStructNaming.isSparseColumn("metrics$__sparse__"));
+ assertFalse(OpenStructNaming.isSparseColumn("metrics$clicks"));
+ assertFalse(OpenStructNaming.isSparseColumn("metrics"));
+ }
+
+ @Test
+ public void testParseParentColumn() {
+ assertEquals(OpenStructNaming.parseParentColumn("metrics$clicks"),
"metrics");
+ assertEquals(OpenStructNaming.parseParentColumn("metrics$__sparse__"),
"metrics");
+ }
+
+ @Test
+ public void testParseKey() {
+ assertEquals(OpenStructNaming.parseKey("metrics$clicks"), "clicks");
+ }
+
+ @Test(expectedExceptions = IllegalArgumentException.class)
+ public void testParseKeyRejectsSparse() {
+ OpenStructNaming.parseKey("metrics$__sparse__");
+ }
+
+ @Test(expectedExceptions = IllegalArgumentException.class)
+ public void testParseKeyRejectsNonMaterialized() {
+ OpenStructNaming.parseKey("metrics");
+ }
}
diff --git
a/pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructTypeInferenceTest.java
b/pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructTypeInferenceTest.java
new file mode 100644
index 00000000000..3d67eba7514
--- /dev/null
+++
b/pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructTypeInferenceTest.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.data;
+
+import java.math.BigDecimal;
+import java.sql.Timestamp;
+import java.time.LocalDate;
+import java.time.LocalTime;
+import java.util.Map;
+import java.util.UUID;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+
+public class OpenStructTypeInferenceTest {
+
+ @DataProvider(name = "inferenceCases")
+ public Object[][] inferenceCases() {
+ return new Object[][]{
+ // Integral types all widen to INT.
+ {42, DataType.INT},
+ {(byte) 1, DataType.INT},
+ {'a', DataType.INT},
+ {(short) 7, DataType.INT},
+ {42L, DataType.LONG},
+ {3.14f, DataType.FLOAT},
+ {3.14d, DataType.DOUBLE},
+ {new BigDecimal("1.23"), DataType.BIG_DECIMAL},
+ {true, DataType.BOOLEAN},
+ {new Timestamp(0L), DataType.TIMESTAMP},
+ {"hello", DataType.STRING},
+ // Temporal and UUID values fold to STRING.
+ {LocalDate.of(2026, 6, 2), DataType.STRING},
+ {LocalTime.of(12, 0), DataType.STRING},
+ {UUID.randomUUID(), DataType.STRING},
+ {new byte[]{1, 2, 3}, DataType.BYTES},
+ // Unrepresentable values return null so callers can drop or default.
+ {Map.of("k", "v"), null},
+ {new Object(), null},
+ };
+ }
+
+ @Test(dataProvider = "inferenceCases")
+ public void testInferDataType(Object rawValue, DataType expected) {
+ assertEquals(OpenStructTypeInference.inferDataType(rawValue), expected);
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]