This is an automated email from the ASF dual-hosted git repository.
Jackie-Jiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new eddf4c0deb3 feat(open_struct): introduce DataType.OPEN_STRUCT and its
SPI surface (#18368)
eddf4c0deb3 is described below
commit eddf4c0deb3f8c0b710e27b4d946333ac5fa9311
Author: tarun11Mavani <[email protected]>
AuthorDate: Mon Jun 1 06:23:36 2026 +0530
feat(open_struct): introduce DataType.OPEN_STRUCT and its SPI surface
(#18368)
---
.../org/apache/pinot/common/utils/DataSchema.java | 2 +
.../pinot/core/data/manager/TableIndexingTest.java | 3 +-
.../pinot/integration/tests/udf/AvroSink.java | 1 +
.../apache/pinot/query/type/TypeFactoryTest.java | 1 +
.../org/apache/pinot/segment/spi/V1Constants.java | 5 +
.../spi/datasource/OpenStructDataSource.java | 64 ++++++
.../pinot/segment/spi/index/StandardIndexes.java | 16 ++
.../creator/ColumnarOpenStructIndexCreator.java | 51 +++++
.../spi/index/metadata/ColumnMetadataImpl.java | 32 ++-
.../spi/index/reader/OpenStructIndexReader.java | 51 +++++
.../spi/index/metadata/ColumnMetadataImplTest.java | 26 +++
.../apache/pinot/spi/config/table/FieldConfig.java | 23 +-
.../spi/config/table/OpenStructIndexConfig.java | 181 +++++++++++++++
.../apache/pinot/spi/data/ComplexFieldSpec.java | 11 +-
.../java/org/apache/pinot/spi/data/FieldSpec.java | 10 +-
.../apache/pinot/spi/data/OpenStructNaming.java | 39 ++++
.../java/org/apache/pinot/spi/data/Schema.java | 13 ++
.../config/table/OpenStructIndexConfigTest.java | 246 +++++++++++++++++++++
.../pinot/spi/data/OpenStructDataTypeTest.java | 83 +++++++
.../pinot/spi/data/OpenStructNamingTest.java | 37 ++++
.../java/org/apache/pinot/spi/data/SchemaTest.java | 16 ++
21 files changed, 900 insertions(+), 11 deletions(-)
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java
b/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java
index 315c3c9e6ce..40abc348ef5 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java
@@ -915,6 +915,8 @@ public class DataSchema {
return BYTES;
case MAP:
return MAP;
+ case OPEN_STRUCT:
+ return OBJECT;
case UNKNOWN:
return UNKNOWN;
default:
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/TableIndexingTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/TableIndexingTest.java
index f01cd114274..131b7aef593 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/TableIndexingTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/TableIndexingTest.java
@@ -159,7 +159,8 @@ public class TableIndexingTest {
protected void createSchemas() {
for (DataType type : DataType.values()) {
- if (type == DataType.UNKNOWN || type == DataType.LIST || type ==
DataType.MAP || type == DataType.STRUCT) {
+ if (type == DataType.UNKNOWN || type == DataType.LIST || type ==
DataType.MAP || type == DataType.STRUCT
+ || type == DataType.OPEN_STRUCT) {
continue;
}
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/udf/AvroSink.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/udf/AvroSink.java
index 6d517a954b3..de746a65a8b 100644
---
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/udf/AvroSink.java
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/udf/AvroSink.java
@@ -110,6 +110,7 @@ public class AvroSink implements AutoCloseable {
case MAP:
case LIST:
case STRUCT:
+ case OPEN_STRUCT:
case UNKNOWN:
// Types we know we don't support in AVRO
break;
diff --git
a/pinot-query-planner/src/test/java/org/apache/pinot/query/type/TypeFactoryTest.java
b/pinot-query-planner/src/test/java/org/apache/pinot/query/type/TypeFactoryTest.java
index 24f94e15c49..a12b935fddc 100644
---
a/pinot-query-planner/src/test/java/org/apache/pinot/query/type/TypeFactoryTest.java
+++
b/pinot-query-planner/src/test/java/org/apache/pinot/query/type/TypeFactoryTest.java
@@ -88,6 +88,7 @@ public class TypeFactoryTest {
case LIST:
case STRUCT:
case MAP:
+ case OPEN_STRUCT:
case UNKNOWN:
continue;
default:
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
index a6bb7da1103..fbc27829127 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
@@ -173,6 +173,11 @@ public class V1Constants {
// Optional, default false
public static final String IS_AUTO_GENERATED = "isAutoGenerated";
+ // Optional. The parent column name for columns materialized from an
OPEN_STRUCT index.
+ // E.g. for column metrics$cpu with parent metrics:
+ // parentColumn = metrics
+ public static final String PARENT_COLUMN = "parentColumn";
+
/// Partition function, all optional
public static final String PARTITION_FUNCTION = "partitionFunction";
public static final String NUM_PARTITIONS = "numPartitions";
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/datasource/OpenStructDataSource.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/datasource/OpenStructDataSource.java
new file mode 100644
index 00000000000..a0705f66e24
--- /dev/null
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/datasource/OpenStructDataSource.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.spi.datasource;
+
+import java.util.Map;
+import org.apache.pinot.segment.spi.index.column.ColumnIndexContainer;
+import org.apache.pinot.spi.data.ComplexFieldSpec;
+
+
+/// DataSource for an OPEN_STRUCT column. Provides per-key DataSources that
can be used for
+/// filtering, aggregation, and projection on individual keys. Distinct from
`MapDataSource`,
+/// which carries fixed-typed MAP semantics (single value type per column).
+public interface OpenStructDataSource extends DataSource {
+
+ /// Returns the OPEN_STRUCT ComplexFieldSpec.
+ ComplexFieldSpec getFieldSpec();
+
+ /// Returns the DataSource for the given key's values. The DataSource's
value type is the
+ /// per-key declared type (from `childFieldSpecs`) when present, otherwise
auto-derived.
+ DataSource getDataSource(String key);
+
+ /// Returns whether the given key has a materialized per-key index in this
segment. Exact,
+ /// O(1) lookup into the materialized key set.
+ ///
+ /// Query operators use this to choose between the fast path (per-key
inverted/dictionary
+ /// index) and the fallback (expression scan over the sparse blob).
+ ///
+ /// A `false` return is only a definitive "absent" when
[#isFullyMaterialized()] is also
+ /// `true`; otherwise the key may still exist in the sparse blob.
+ boolean isMaterialized(String key);
+
+ /// Returns whether every key in this segment is materialized — i.e., there
is no sparse
+ /// blob and the materialized key set is exhaustive.
+ ///
+ /// When `true`, a `false` return from [#isMaterialized(String)] is a
definitive "absent"
+ /// and callers can short-circuit (e.g. a filter operator returns
`EmptyFilterOperator`
+ /// for value predicates and `MatchAllFilterOperator` for IS_NULL).
+ boolean isFullyMaterialized();
+
+ /// Returns DataSources for all keys present in this segment.
+ Map<String, DataSource> getDataSources();
+
+ /// Returns the DataSourceMetadata for the given key's values.
+ DataSourceMetadata getDataSourceMetadata(String key);
+
+ /// Returns the ColumnIndexContainer for the given key's values.
+ ColumnIndexContainer getIndexContainer(String key);
+}
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/StandardIndexes.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/StandardIndexes.java
index 951bff77b50..8df197c4bb2 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/StandardIndexes.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/StandardIndexes.java
@@ -20,6 +20,7 @@
package org.apache.pinot.segment.spi.index;
import org.apache.pinot.segment.spi.index.creator.BloomFilterCreator;
+import
org.apache.pinot.segment.spi.index.creator.ColumnarOpenStructIndexCreator;
import org.apache.pinot.segment.spi.index.creator.CombinedInvertedIndexCreator;
import
org.apache.pinot.segment.spi.index.creator.DictionaryBasedInvertedIndexCreator;
import org.apache.pinot.segment.spi.index.creator.FSTIndexCreator;
@@ -37,12 +38,14 @@ import
org.apache.pinot.segment.spi.index.reader.H3IndexReader;
import org.apache.pinot.segment.spi.index.reader.InvertedIndexReader;
import org.apache.pinot.segment.spi.index.reader.JsonIndexReader;
import org.apache.pinot.segment.spi.index.reader.NullValueVectorReader;
+import org.apache.pinot.segment.spi.index.reader.OpenStructIndexReader;
import org.apache.pinot.segment.spi.index.reader.RangeIndexReader;
import org.apache.pinot.segment.spi.index.reader.TextIndexReader;
import org.apache.pinot.segment.spi.index.reader.VectorIndexReader;
import org.apache.pinot.spi.config.table.BloomFilterConfig;
import org.apache.pinot.spi.config.table.IndexConfig;
import org.apache.pinot.spi.config.table.JsonIndexConfig;
+import org.apache.pinot.spi.config.table.OpenStructIndexConfig;
/**
@@ -79,6 +82,7 @@ public class StandardIndexes {
public static final String TEXT_ID = "text_index";
public static final String H3_ID = "h3_index";
public static final String VECTOR_ID = "vector_index";
+ public static final String OPEN_STRUCT_ID = "open_struct_index";
private StandardIndexes() {
}
@@ -142,4 +146,16 @@ public class StandardIndexes {
return (IndexType<VectorIndexConfig, VectorIndexReader,
VectorIndexCreator>)
IndexService.getInstance().get(VECTOR_ID);
}
+
+ /// Returns the OPEN_STRUCT index type. A single `OpenStructIndexReader`
implementation
+ /// handles all access patterns for an OPEN_STRUCT column (dense
materialized keys, sparse
+ /// blob tier, or a mix). The plugin registration that makes this helper
resolve lives in
+ /// pinot-segment-local (`ColumnarOpenStructIndexPlugin`); calling this from
a build that
+ /// does not include pinot-segment-local will throw
+ /// `IllegalArgumentException: Unknown index id: open_struct_index`.
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ public static IndexType<OpenStructIndexConfig, OpenStructIndexReader,
ColumnarOpenStructIndexCreator> openStruct() {
+ return (IndexType<OpenStructIndexConfig, OpenStructIndexReader,
ColumnarOpenStructIndexCreator>)
+ IndexService.getInstance().get(OPEN_STRUCT_ID);
+ }
}
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/ColumnarOpenStructIndexCreator.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/ColumnarOpenStructIndexCreator.java
new file mode 100644
index 00000000000..48b4f8a6281
--- /dev/null
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/ColumnarOpenStructIndexCreator.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.spi.index.creator;
+
+import java.io.IOException;
+import java.util.Map;
+import org.apache.commons.configuration2.PropertiesConfiguration;
+import org.apache.pinot.segment.spi.index.IndexCreator;
+
+
+/// Creator for the OPEN_STRUCT index. Accepts one open-struct value per
document during segment
+/// creation and decomposes it into per-key columnar storage on `seal()`.
+///
+/// Implementations are not thread-safe; callers must serialize `add` calls
per creator instance.
+///
+/// The inherited `add(Object, int)` method from `IndexCreator` treats the
first argument as the
+/// open-struct value and the second as the docId, matching the column-major
creator path.
+public interface ColumnarOpenStructIndexCreator extends IndexCreator {
+
+ /// Adds one document's open-struct value. Keys are routed to per-key
columnar storage;
+ /// declared-type keys are coerced to those types, others use the configured
default value type.
+ /// An empty map is valid. Callers must pass an empty map rather than `null`.
+ ///
+ /// @param openStructValue the document's open-struct value (non-null, may
be empty)
+ /// @param docId document id, must be monotonically non-decreasing
across calls
+ void add(Map<String, Object> openStructValue, int docId)
+ throws IOException;
+
+ /// Returns metadata properties for the materialized columns this creator
produced during `seal()`.
+ /// The framework merges the returned properties into the segment metadata.
+ /// Returns an empty map for creators that produce no materialized columns.
Call after `seal()`.
+ default Map<String, PropertiesConfiguration> getMaterializedColumnMetadata()
{
+ return Map.of();
+ }
+}
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
index 7fa13f93ab9..a8b6f3dab39 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
@@ -76,6 +76,8 @@ public class ColumnMetadataImpl implements ColumnMetadata {
private final PartitionFunction _partitionFunction;
private final Set<Integer> _partitions;
private final boolean _autoGenerated;
+ @Nullable
+ private final String _parentColumn;
/// List of longs, each encodes:
/// - 2 byte - numeric id of IndexType
@@ -88,7 +90,8 @@ public class ColumnMetadataImpl implements ColumnMetadata {
EncodingType forwardIndexEncoding, boolean sorted, @Nullable Comparable
minValue, @Nullable Comparable maxValue,
boolean minMaxValueInvalid, int lengthOfShortestElement, int
lengthOfLongestElement, boolean isAscii,
int totalNumberOfEntries, int maxNumberOfMultiValues, int
maxRowLengthInBytes, int bitsPerElement,
- @Nullable PartitionFunction partitionFunction, @Nullable Set<Integer>
partitions, boolean autoGenerated) {
+ @Nullable PartitionFunction partitionFunction, @Nullable Set<Integer>
partitions, boolean autoGenerated,
+ @Nullable String parentColumn) {
_fieldSpec = fieldSpec;
_totalDocs = totalDocs;
_cardinality = cardinality;
@@ -108,6 +111,7 @@ public class ColumnMetadataImpl implements ColumnMetadata {
_partitionFunction = partitionFunction;
_partitions = partitions;
_autoGenerated = autoGenerated;
+ _parentColumn = parentColumn;
}
@Override
@@ -209,6 +213,17 @@ public class ColumnMetadataImpl implements ColumnMetadata {
return _autoGenerated;
}
+ /// Returns {@code true} if this column is a materialized column produced
from an OPEN_STRUCT parent column.
+ public boolean isMaterializedChild() {
+ return _parentColumn != null;
+ }
+
+ /// Returns the name of the parent OPEN_STRUCT column, or {@code null} if
this is not a materialized column.
+ @Nullable
+ public String getParentColumn() {
+ return _parentColumn;
+ }
+
@Override
public long getIndexSizeFor(IndexType type) {
short indexId = IndexService.getInstance().getNumericId(type);
@@ -281,6 +296,7 @@ public class ColumnMetadataImpl implements ColumnMetadata {
&& Objects.equals(_maxValue, that._maxValue)
&& Objects.equals(_partitionFunction, that._partitionFunction)
&& Objects.equals(_partitions, that._partitions)
+ && Objects.equals(_parentColumn, that._parentColumn)
&& Objects.equals(_indexTypeSizeList, that._indexTypeSizeList);
}
@@ -289,7 +305,7 @@ public class ColumnMetadataImpl implements ColumnMetadata {
return Objects.hash(_fieldSpec, _totalDocs, _cardinality, _hasDictionary,
_forwardIndexEncoding, _sorted,
_minValue, _maxValue, _minMaxValueInvalid, _lengthOfShortestElement,
_lengthOfLongestElement, _isAscii,
_totalNumberOfEntries, _maxNumberOfMultiValues, _maxRowLengthInBytes,
_bitsPerElement, _partitionFunction,
- _partitions, _autoGenerated, _indexTypeSizeList);
+ _partitions, _autoGenerated, _parentColumn, _indexTypeSizeList);
}
@Override
@@ -314,6 +330,7 @@ public class ColumnMetadataImpl implements ColumnMetadata {
+ ", _partitionFunction=" + _partitionFunction
+ ", _partitions=" + _partitions
+ ", _autoGenerated=" + _autoGenerated
+ + ", _parentColumn=" + _parentColumn
+ ", _indexTypeSizeList=" + _indexTypeSizeList
+ '}';
}
@@ -341,7 +358,8 @@ public class ColumnMetadataImpl implements ColumnMetadata {
.setMaxRowLengthInBytes(
config.getInt(Column.getKeyFor(column,
Column.MAX_ROW_LENGTH_IN_BYTES), UNAVAILABLE))
.setBitsPerElement(config.getInt(Column.getKeyFor(column,
Column.BITS_PER_ELEMENT), UNAVAILABLE))
- .setAutoGenerated(config.getBoolean(Column.getKeyFor(column,
Column.IS_AUTO_GENERATED), false));
+ .setAutoGenerated(config.getBoolean(Column.getKeyFor(column,
Column.IS_AUTO_GENERATED), false))
+ .setParentColumn(config.getString(Column.getKeyFor(column,
Column.PARENT_COLUMN), null));
// Set min/max value
DataType storedType = fieldSpec.getDataType().getStoredType();
@@ -507,6 +525,7 @@ public class ColumnMetadataImpl implements ColumnMetadata {
private PartitionFunction _partitionFunction;
private Set<Integer> _partitions;
private boolean _autoGenerated;
+ private String _parentColumn;
public Builder setFieldSpec(FieldSpec fieldSpec) {
_fieldSpec = fieldSpec;
@@ -608,6 +627,11 @@ public class ColumnMetadataImpl implements ColumnMetadata {
return this;
}
+ public Builder setParentColumn(String parentColumn) {
+ _parentColumn = parentColumn;
+ return this;
+ }
+
public ColumnMetadataImpl build() {
// Canonicalize forward index encoding
if (_forwardIndexEncoding == null) {
@@ -647,7 +671,7 @@ public class ColumnMetadataImpl implements ColumnMetadata {
return new ColumnMetadataImpl(_fieldSpec, _totalDocs, _cardinality,
_hasDictionary, _forwardIndexEncoding,
_sorted, _minValue, _maxValue, _minMaxValueInvalid,
_lengthOfShortestElement, _lengthOfLongestElement,
_isAscii, _totalNumberOfEntries, _maxNumberOfMultiValues,
_maxRowLengthInBytes, _bitsPerElement,
- _partitionFunction, _partitions, _autoGenerated);
+ _partitionFunction, _partitions, _autoGenerated, _parentColumn);
}
}
}
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/OpenStructIndexReader.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/OpenStructIndexReader.java
new file mode 100644
index 00000000000..0dd9ddcefec
--- /dev/null
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/OpenStructIndexReader.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.spi.index.reader;
+
+import java.util.Map;
+import java.util.Set;
+import org.apache.pinot.segment.spi.ColumnMetadata;
+import org.apache.pinot.segment.spi.index.IndexReader;
+import org.apache.pinot.segment.spi.index.IndexType;
+
+
+/**
+ * Interface for reading from the OPEN_STRUCT index. Provides per-key access
to the
+ * materialized sub-columns produced by the columnar storage backend.
+ *
+ * @param <T> Type of the ReaderContext
+ */
+@SuppressWarnings("rawtypes")
+public interface OpenStructIndexReader<T extends ForwardIndexReaderContext>
extends ForwardIndexReader<T> {
+
+ /**
+ * Returns the keys in the map index.
+ */
+ Set<String> getKeys();
+
+ /**
+ * Returns all the indexes for the given key.
+ */
+ Map<IndexType, IndexReader> getIndexes(String key);
+
+ /**
+ * Returns the column metadata for the given key.
+ */
+ ColumnMetadata getColumnMetadata(String key);
+}
diff --git
a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImplTest.java
b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImplTest.java
index 0b994799933..ffa5ee2a981 100644
---
a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImplTest.java
+++
b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImplTest.java
@@ -21,6 +21,7 @@ package org.apache.pinot.segment.spi.index.metadata;
import org.apache.commons.configuration2.PropertiesConfiguration;
import org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Column;
import org.apache.pinot.spi.config.table.FieldConfig.EncodingType;
+import org.apache.pinot.spi.data.DimensionFieldSpec;
import org.apache.pinot.spi.data.FieldSpec.DataType;
import org.apache.pinot.spi.data.FieldSpec.FieldType;
import org.testng.annotations.Test;
@@ -106,6 +107,31 @@ public class ColumnMetadataImplTest {
assertEquals(metadata.getForwardIndexEncoding(), EncodingType.DICTIONARY);
}
+ @Test
+ public void parentColumnRoundtrip() {
+ ColumnMetadataImpl meta = ColumnMetadataImpl.builder()
+ .setFieldSpec(new DimensionFieldSpec("metrics$cpu", DataType.DOUBLE,
true))
+ .setParentColumn("metrics")
+ .build();
+ assertEquals(meta.getParentColumn(), "metrics");
+ assertTrue(meta.isMaterializedChild());
+ }
+
+ /**
+ * Verify the PARENT_COLUMN key in metadata.properties round-trips through
+ * {@link ColumnMetadataImpl#fromPropertiesConfiguration}.
+ */
+ @Test
+ public void parentColumnReadFromPropertiesConfig() {
+ PropertiesConfiguration config = baseConfig("metrics$cpu");
+ config.setProperty(Column.getKeyFor("metrics$cpu", Column.PARENT_COLUMN),
"metrics");
+
+ ColumnMetadataImpl metadata =
ColumnMetadataImpl.fromPropertiesConfiguration(config, 1, "metrics$cpu");
+
+ assertEquals(metadata.getParentColumn(), "metrics");
+ assertTrue(metadata.isMaterializedChild());
+ }
+
private static PropertiesConfiguration baseConfig(String column) {
PropertiesConfiguration config = new PropertiesConfiguration();
config.setProperty(Column.getKeyFor(column, Column.COLUMN_NAME), column);
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FieldConfig.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FieldConfig.java
index 064526695d4..49766e64ced 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FieldConfig.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FieldConfig.java
@@ -128,7 +128,28 @@ public class FieldConfig extends BaseJsonConfig {
// If null, there won't be any index
// NOTE: TIMESTAMP is ignored. In order to create TIMESTAMP index, configure
'timestampConfig' instead.
public enum IndexType {
- INVERTED, SORTED, TEXT, FST, IFST, H3, JSON, TIMESTAMP, VECTOR, RANGE
+ /** Inverted index mapping values to document IDs for efficient equality
predicates. */
+ INVERTED,
+ /** Marks the column as the sort column; segments store values in sorted
order. */
+ SORTED,
+ /** Full-text search index over string columns. */
+ TEXT,
+ /** Finite-state-transducer index for prefix and regex matching on string
columns. */
+ FST,
+ /** Case-insensitive variant of the FST index. */
+ IFST,
+ /** Geospatial index for H3 hexagonal grid lookups. */
+ H3,
+ /** JSON-path index over JSON-typed columns. */
+ JSON,
+ /** Ignored — configure {@code timestampConfig} on the table instead. */
+ TIMESTAMP,
+ /** Vector index for approximate-nearest-neighbor search. */
+ VECTOR,
+ /** Range index for efficient inequality predicates on numeric/string
columns. */
+ RANGE,
+ /** OPEN_STRUCT index storing semi-structured entries as per-key
materialized columns. */
+ OPEN_STRUCT
}
public enum CompressionCodec {
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/OpenStructIndexConfig.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/OpenStructIndexConfig.java
new file mode 100644
index 00000000000..90994401ff1
--- /dev/null
+++
b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/OpenStructIndexConfig.java
@@ -0,0 +1,181 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.config.table;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.JsonNode;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import javax.annotation.Nullable;
+import org.apache.pinot.spi.utils.JsonUtils;
+
+
+/// Configuration for the OPEN_STRUCT index on an OPEN_STRUCT column.
+///
+/// **Dense vs sparse:** a key is materialized as its own column if (a) it
appears in the explicit
+/// `denseKeys` set, or (b) its fill rate (fraction of documents containing
the key) is ≥
+/// `denseKeyMinFillRate`. Keys not satisfying either criterion go into a
sparse OPEN_STRUCT column.
+///
+/// **maxDenseKeys cutoff:** when more keys qualify as dense than
`maxDenseKeys` allows, the top
+/// `maxDenseKeys` keys ranked by fill rate are materialized; the rest fall
back to the sparse
+/// column. `-1` (default) means unlimited — every qualifying key is
materialized. `0` disables
+/// dense keys entirely (all keys go to the sparse column). Use `denseKeys` to
pin specific keys
+/// regardless of fill rate ranking.
+///
+/// **Per-key index settings** are specified via `valueFieldConfigs` — each
entry is a standard
+/// [FieldConfig] (modern `indexes` format) for one materialized OPEN_STRUCT
key. Keys without an
+/// entry fall back to `defaultValueFieldConfig`. When neither is set, the
built-in default is
+/// DICTIONARY encoding with an inverted index.
+public class OpenStructIndexConfig extends IndexConfig {
+ public static final OpenStructIndexConfig DISABLED = new
OpenStructIndexConfig(false);
+ public static final OpenStructIndexConfig DEFAULT = new
OpenStructIndexConfig(true);
+
+ public static final double DEFAULT_DENSE_KEY_MIN_FILL_RATE = 0.5;
+ /// Default `maxDenseKeys`. `-1` means unlimited.
+ public static final int DEFAULT_MAX_DENSE_KEYS = -1;
+ private static final String INVERTED_INDEX_KEY = "inverted";
+
+ private final FieldConfig _defaultValueFieldConfig;
+ private final int _maxDenseKeys;
+ private final Set<String> _denseKeys;
+ private final double _denseKeyMinFillRate;
+ private final List<FieldConfig> _valueFieldConfigs;
+ // Eager lookup from key name → FieldConfig for O(1) per-key access. Built
in constructor
+ // so the config is fully immutable and safe to share across threads.
+ private final Map<String, FieldConfig> _valueFieldConfigIndex;
+
+ public OpenStructIndexConfig(boolean enabled) {
+ this(!enabled, null, DEFAULT_MAX_DENSE_KEYS, null,
DEFAULT_DENSE_KEY_MIN_FILL_RATE, null);
+ }
+
+ @JsonCreator
+ public OpenStructIndexConfig(
+ @JsonProperty("disabled") Boolean disabled,
+ @JsonProperty("defaultValueFieldConfig") @Nullable FieldConfig
defaultValueFieldConfig,
+ @JsonProperty("maxDenseKeys") @Nullable Integer maxDenseKeys,
+ @JsonProperty("denseKeys") @Nullable Set<String> denseKeys,
+ @JsonProperty("denseKeyMinFillRate") @Nullable Double
denseKeyMinFillRate,
+ @JsonProperty("valueFieldConfigs") @Nullable List<FieldConfig>
valueFieldConfigs) {
+ super(disabled);
+ _defaultValueFieldConfig = defaultValueFieldConfig;
+ _maxDenseKeys = maxDenseKeys != null ? maxDenseKeys :
DEFAULT_MAX_DENSE_KEYS;
+ _denseKeys = denseKeys;
+ _denseKeyMinFillRate = denseKeyMinFillRate != null ? denseKeyMinFillRate :
DEFAULT_DENSE_KEY_MIN_FILL_RATE;
+ _valueFieldConfigs = valueFieldConfigs;
+ if (valueFieldConfigs == null || valueFieldConfigs.isEmpty()) {
+ _valueFieldConfigIndex = Map.of();
+ } else {
+ Map<String, FieldConfig> index = new HashMap<>(valueFieldConfigs.size());
+ for (FieldConfig fc : valueFieldConfigs) {
+ index.put(fc.getName(), fc);
+ }
+ _valueFieldConfigIndex = index;
+ }
+ }
+
+ /// Fallback [FieldConfig] applied to OPEN_STRUCT keys that are materialized
as dense columns
+ /// but have no entry in `valueFieldConfigs`. When this is also unset, the
built-in defaults
+ /// kick in: DICTIONARY encoding and an inverted index.
+ @Nullable
+ public FieldConfig getDefaultValueFieldConfig() {
+ return _defaultValueFieldConfig;
+ }
+
+ /// Maximum number of OPEN_STRUCT keys to materialise as dense columns. `-1`
(default) means
+ /// unlimited — every key qualifying as dense is materialized. `0` disables
dense keys entirely.
+ /// When positive and more keys qualify, the top `maxDenseKeys` by fill rate
are materialized;
+ /// the rest fall back to the sparse OPEN_STRUCT column.
+ public int getMaxDenseKeys() {
+ return _maxDenseKeys;
+ }
+
+ public Set<String> getDenseKeys() {
+ return _denseKeys != null ? _denseKeys : Set.of();
+ }
+
+ public double getDenseKeyMinFillRate() {
+ return _denseKeyMinFillRate;
+ }
+
+ public boolean isDenseKey(String key) {
+ return _denseKeys != null && _denseKeys.contains(key);
+ }
+
+ /// Per-key index settings. Each entry is a standard [FieldConfig] whose
`name` matches an
+ /// OPEN_STRUCT key name. Keys without an entry fall back to
`defaultValueFieldConfig`, or to
+ /// the built-in defaults (DICTIONARY + inverted) when no default is set.
+ @Nullable
+ public List<FieldConfig> getValueFieldConfigs() {
+ return _valueFieldConfigs;
+ }
+
+ /// Returns the [FieldConfig] for the given key, or null if none was
configured.
+ @Nullable
+ public FieldConfig getValueFieldConfig(String key) {
+ return _valueFieldConfigIndex.get(key);
+ }
+
+ /// `true` if the given key should be built with an inverted index.
Resolution order:
+ /// per-key [FieldConfig] → `defaultValueFieldConfig` → built-in default of
inverted-on.
+ public boolean shouldEnableInvertedIndexForKey(String key) {
+ FieldConfig keyConfig = getValueFieldConfig(key);
+ if (keyConfig != null) {
+ return invertedFromIndexes(keyConfig, key);
+ }
+ if (_defaultValueFieldConfig != null) {
+ return invertedFromIndexes(_defaultValueFieldConfig, key);
+ }
+ return true;
+ }
+
+ /// `true` if the given key should be dictionary-encoded. Resolution order:
per-key
+ /// [FieldConfig] → `defaultValueFieldConfig` → built-in default of
DICTIONARY.
+ public boolean shouldUseDictionaryForKey(String key) {
+ FieldConfig keyConfig = getValueFieldConfig(key);
+ if (keyConfig != null) {
+ return keyConfig.getEncodingType() != FieldConfig.EncodingType.RAW;
+ }
+ if (_defaultValueFieldConfig != null) {
+ return _defaultValueFieldConfig.getEncodingType() !=
FieldConfig.EncodingType.RAW;
+ }
+ return true;
+ }
+
+ private static boolean invertedFromIndexes(FieldConfig fieldConfig, String
key) {
+ JsonNode indexes = fieldConfig.getIndexes();
+ if (indexes == null || !indexes.isObject()) {
+ return false;
+ }
+ JsonNode inverted = indexes.get(INVERTED_INDEX_KEY);
+ if (inverted == null) {
+ return false;
+ }
+ try {
+ return JsonUtils.jsonNodeToObject(inverted,
IndexConfig.class).isEnabled();
+ } catch (IOException e) {
+ throw new UncheckedIOException(
+ "Failed to parse inverted index config for OPEN_STRUCT key '" + key
+ "'", e);
+ }
+ }
+}
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/ComplexFieldSpec.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/ComplexFieldSpec.java
index 366fa88f646..bac35553ddb 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/ComplexFieldSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/ComplexFieldSpec.java
@@ -31,8 +31,8 @@ import org.apache.pinot.spi.utils.StringUtil;
/**
* FieldSpec for complex fields. The {@link
org.apache.pinot.spi.data.FieldSpec.FieldType}
* is COMPLEX and the inner data type represents the root data type of the
field.
- * It could be STRUCT, MAP or LIST. A complex field is composable with a
single root type
- * and a number of child types. Although we have multi-value primitive
columns, LIST
+ * It could be STRUCT, MAP, LIST or OPEN_STRUCT. A complex field is composable
with a single root
+ * type and a number of child types. Although we have multi-value primitive
columns, LIST
* is for representing lists of both complex and primitives inside a complex
field.
*
* Consider a person json where the root type is STRUCT and composes of inner
members:
@@ -67,8 +67,11 @@ public final class ComplexFieldSpec extends FieldSpec {
public ComplexFieldSpec(String name, DataType dataType, boolean
isSingleValueField,
Map<String, FieldSpec> childFieldSpecs) {
super(name, dataType, isSingleValueField);
- Preconditions.checkArgument(dataType == DataType.STRUCT || dataType ==
DataType.MAP || dataType == DataType.LIST);
- _childFieldSpecs = childFieldSpecs;
+ Preconditions.checkArgument(
+ dataType == DataType.STRUCT || dataType == DataType.MAP
+ || dataType == DataType.LIST || dataType == DataType.OPEN_STRUCT,
+ "ComplexFieldSpec dataType must be STRUCT, MAP, LIST, or OPEN_STRUCT
(got %s)", dataType);
+ _childFieldSpecs = new HashMap<>(childFieldSpecs);
}
public static String[] getColumnPath(String column) {
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
index 3a57b742f9a..38cdd120c76 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
@@ -96,6 +96,7 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
public static final FieldSpecMetadata FIELD_SPEC_METADATA;
public static final Map DEFAULT_COMPLEX_NULL_VALUE_OF_MAP = Map.of();
+ public static final Map DEFAULT_COMPLEX_NULL_VALUE_OF_OPEN_STRUCT = Map.of();
public static final List DEFAULT_COMPLEX_NULL_VALUE_OF_LIST = List.of();
public static final int DEFAULT_MAX_LENGTH = 512;
@@ -493,6 +494,8 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
switch (dataType) {
case MAP:
return DEFAULT_COMPLEX_NULL_VALUE_OF_MAP;
+ case OPEN_STRUCT:
+ return DEFAULT_COMPLEX_NULL_VALUE_OF_OPEN_STRUCT;
case LIST:
return DEFAULT_COMPLEX_NULL_VALUE_OF_LIST;
case STRUCT:
@@ -619,6 +622,7 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
jsonNode.put(key, BytesUtils.toHexString((byte[])
_defaultNullValue));
break;
case MAP:
+ case OPEN_STRUCT:
case LIST:
jsonNode.set(key, JsonUtils.objectToJsonNode(_defaultNullValue));
break;
@@ -696,6 +700,7 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
BYTES(false, false),
STRUCT(false, false),
MAP(false, false),
+ OPEN_STRUCT(false, false),
LIST(false, false),
UNKNOWN(false, true);
@@ -794,6 +799,7 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
case BYTES:
return BytesUtils.toBytes(value);
case MAP:
+ case OPEN_STRUCT:
return JsonUtils.stringToObject(value, Map.class);
case LIST:
return JsonUtils.stringToObject(value, List.class);
@@ -842,6 +848,7 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
case BYTES:
return ByteArray.compare((byte[]) value1, (byte[]) value2);
case MAP:
+ case OPEN_STRUCT:
case LIST:
throw new UnsupportedOperationException("Cannot compare complex data
types: " + this);
default:
@@ -859,7 +866,7 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
if (this == BYTES) {
return BytesUtils.toHexString((byte[]) value);
}
- if (this == MAP || this == LIST) {
+ if (this == MAP || this == OPEN_STRUCT || this == LIST) {
try {
return JsonUtils.objectToString(value);
} catch (JsonProcessingException e) {
@@ -895,6 +902,7 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
case BYTES:
return BytesUtils.toByteArray(value);
case MAP:
+ case OPEN_STRUCT:
case LIST:
throw new UnsupportedOperationException("Cannot convert complex
data types: " + this);
default:
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/OpenStructNaming.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/OpenStructNaming.java
new file mode 100644
index 00000000000..dc0bc138d3e
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/OpenStructNaming.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.data;
+
+
+/// Naming convention for OPEN_STRUCT materialized columns. Each dense
OPEN_STRUCT key is stored as
+/// a column named `<openStructColumn>$<key>`. Sparse keys share a single
synthetic JSON column
+/// named `<openStructColumn>$__sparse__`.
+public final class OpenStructNaming {
+ public static final String SEPARATOR = "$";
+ public static final String SPARSE_SUFFIX = "__sparse__";
+
+ private OpenStructNaming() {
+ }
+
+ public static String materializedColumnName(String openStructColumn, String
key) {
+ return openStructColumn + SEPARATOR + key;
+ }
+
+ public static String sparseColumnName(String openStructColumn) {
+ return openStructColumn + SEPARATOR + SPARSE_SUFFIX;
+ }
+}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java
index 37970416a40..a023e17a1bc 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java
@@ -158,6 +158,8 @@ public final class Schema implements Serializable {
case MAP:
case LIST:
break;
+ case OPEN_STRUCT:
+ break;
default:
throw new IllegalStateException("Unsupported data type: " +
dataType + " in COMPLEX field");
}
@@ -819,6 +821,17 @@ public final class Schema implements Serializable {
return this;
}
+ /**
+ * Adds an OPEN_STRUCT field to the schema.
+ *
+ * @param name field name
+ * @param childFieldSpecs per-key declared types; pass {@code Map.of()}
for none
+ */
+ public SchemaBuilder addOpenStruct(String name, Map<String, FieldSpec>
childFieldSpecs) {
+ _schema.addField(new ComplexFieldSpec(name,
FieldSpec.DataType.OPEN_STRUCT, true, childFieldSpecs));
+ return this;
+ }
+
public SchemaBuilder setPrimaryKeyColumns(List<String> primaryKeyColumns) {
_schema.setPrimaryKeyColumns(primaryKeyColumns);
return this;
diff --git
a/pinot-spi/src/test/java/org/apache/pinot/spi/config/table/OpenStructIndexConfigTest.java
b/pinot-spi/src/test/java/org/apache/pinot/spi/config/table/OpenStructIndexConfigTest.java
new file mode 100644
index 00000000000..43a71a4dc2b
--- /dev/null
+++
b/pinot-spi/src/test/java/org/apache/pinot/spi/config/table/OpenStructIndexConfigTest.java
@@ -0,0 +1,246 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.config.table;
+
+import java.util.List;
+import java.util.Set;
+import org.apache.pinot.spi.utils.JsonUtils;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+
+
+public class OpenStructIndexConfigTest {
+
+ @Test
+ public void testDefaultConfig() {
+ OpenStructIndexConfig config = OpenStructIndexConfig.DEFAULT;
+ assertTrue(config.isEnabled());
+ assertEquals(config.getMaxDenseKeys(), -1);
+ assertEquals(config.getDenseKeyMinFillRate(), 0.5);
+ assertTrue(config.getDenseKeys().isEmpty());
+ assertNull(config.getValueFieldConfigs());
+ assertNull(config.getDefaultValueFieldConfig());
+ // Built-in defaults when neither per-key nor defaultValueFieldConfig is
set.
+ assertTrue(config.shouldEnableInvertedIndexForKey("any"));
+ assertTrue(config.shouldUseDictionaryForKey("any"));
+ }
+
+ @Test
+ public void testDisabledConfig() {
+ OpenStructIndexConfig config = OpenStructIndexConfig.DISABLED;
+ assertFalse(config.isEnabled());
+ }
+
+ @Test
+ public void testNoDictionaryKeys() {
+ FieldConfig rawKey =
+ new FieldConfig("raw_payload", FieldConfig.EncodingType.RAW,
(List<FieldConfig.IndexType>) null, null, null);
+ OpenStructIndexConfig config = new OpenStructIndexConfig(false, null,
1000, null, 0.5, List.of(rawKey));
+ assertFalse(config.shouldUseDictionaryForKey("raw_payload"));
+ // Unconfigured key falls back to built-in default (DICTIONARY).
+ assertTrue(config.shouldUseDictionaryForKey("other_key"));
+ }
+
+ @Test
+ public void testShouldEnableInvertedIndexForKeyPerKey()
+ throws Exception {
+ FieldConfig country = JsonUtils.stringToObject(
+ "{\"name\":\"country\",\"indexes\":{\"inverted\":{}}}",
FieldConfig.class);
+ FieldConfig clicks = JsonUtils.stringToObject(
+
"{\"name\":\"clicks\",\"indexes\":{\"inverted\":{\"disabled\":true}}}",
FieldConfig.class);
+ OpenStructIndexConfig config = new OpenStructIndexConfig(false, null,
1000, null, 0.5,
+ List.of(country, clicks));
+ assertTrue(config.shouldEnableInvertedIndexForKey("country"));
+ assertFalse(config.shouldEnableInvertedIndexForKey("clicks"));
+ // Unconfigured key falls back to built-in default of inverted-on.
+ assertTrue(config.shouldEnableInvertedIndexForKey("other"));
+ }
+
+ @Test
+ public void testDefaultValueFieldConfigSuppressesInvertedDefault()
+ throws Exception {
+ // defaultValueFieldConfig with no indexes node turns off inverted for
unconfigured keys.
+ FieldConfig defaultFieldConfig = JsonUtils.stringToObject(
+ "{\"name\":\"__default__\",\"encodingType\":\"DICTIONARY\"}",
FieldConfig.class);
+ OpenStructIndexConfig config =
+ new OpenStructIndexConfig(false, defaultFieldConfig, 1000, null, 0.5,
null);
+ assertFalse(config.shouldEnableInvertedIndexForKey("any_key"));
+ assertTrue(config.shouldUseDictionaryForKey("any_key"));
+ }
+
+ @Test
+ public void testDefaultValueFieldConfigInvertedExplicit()
+ throws Exception {
+ FieldConfig defaultFieldConfig = JsonUtils.stringToObject(
+ "{\"name\":\"__default__\",\"indexes\":{\"inverted\":{}}}",
FieldConfig.class);
+ OpenStructIndexConfig config =
+ new OpenStructIndexConfig(false, defaultFieldConfig, 1000, null, 0.5,
null);
+ assertTrue(config.shouldEnableInvertedIndexForKey("any_key"));
+ }
+
+ @Test
+ public void testPerKeyWinsOverDefaultValueFieldConfig()
+ throws Exception {
+ FieldConfig defaultFieldConfig = JsonUtils.stringToObject(
+ "{\"name\":\"__default__\",\"encodingType\":\"DICTIONARY\"}",
FieldConfig.class);
+ FieldConfig country = JsonUtils.stringToObject(
+ "{\"name\":\"country\",\"indexes\":{\"inverted\":{}}}",
FieldConfig.class);
+ OpenStructIndexConfig config =
+ new OpenStructIndexConfig(false, defaultFieldConfig, 1000, null, 0.5,
List.of(country));
+ assertTrue(config.shouldEnableInvertedIndexForKey("country"));
+ // Unconfigured key uses default (no inverted), not built-in.
+ assertFalse(config.shouldEnableInvertedIndexForKey("other"));
+ }
+
+ @Test
+ public void testDefaultValueFieldConfigRawEncoding()
+ throws Exception {
+ FieldConfig defaultFieldConfig = JsonUtils.stringToObject(
+ "{\"name\":\"__default__\",\"encodingType\":\"RAW\"}",
FieldConfig.class);
+ OpenStructIndexConfig config =
+ new OpenStructIndexConfig(false, defaultFieldConfig, 1000, null, 0.5,
null);
+ assertFalse(config.shouldUseDictionaryForKey("any_key"));
+ }
+
+ @Test
+ public void testShouldUseDictionaryForKeyHardOverride() {
+ FieldConfig blob =
+ new FieldConfig("blob", FieldConfig.EncodingType.RAW,
(List<FieldConfig.IndexType>) null, null, null);
+ FieldConfig rawPayload =
+ new FieldConfig("raw_payload", FieldConfig.EncodingType.RAW,
(List<FieldConfig.IndexType>) null, null, null);
+ OpenStructIndexConfig config = new OpenStructIndexConfig(false, null,
1000, null, 0.5, List.of(blob, rawPayload));
+ assertFalse(config.shouldUseDictionaryForKey("blob"));
+ assertFalse(config.shouldUseDictionaryForKey("raw_payload"));
+ // Unconfigured key falls back to built-in default (DICTIONARY).
+ assertTrue(config.shouldUseDictionaryForKey("country"));
+ }
+
+ @Test
+ public void testValueFieldConfigsRoundTrip()
+ throws Exception {
+ String json = "{\n"
+ + " \"maxDenseKeys\": 500,\n"
+ + " \"denseKeyMinFillRate\": 0.3,\n"
+ + " \"denseKeys\": [\"country\", \"clicks\"],\n"
+ + " \"defaultValueFieldConfig\": {\n"
+ + " \"name\": \"__default__\",\n"
+ + " \"encodingType\": \"DICTIONARY\",\n"
+ + " \"indexes\": {\"inverted\": {}}\n"
+ + " },\n"
+ + " \"valueFieldConfigs\": [\n"
+ + " {\n"
+ + " \"name\": \"country\",\n"
+ + " \"encodingType\": \"DICTIONARY\",\n"
+ + " \"indexes\": {\"inverted\": {}}\n"
+ + " },\n"
+ + " {\n"
+ + " \"name\": \"clicks\",\n"
+ + " \"encodingType\": \"RAW\"\n"
+ + " }\n"
+ + " ]\n"
+ + "}";
+ OpenStructIndexConfig config = JsonUtils.stringToObject(json,
OpenStructIndexConfig.class);
+
+ assertEquals(config.getMaxDenseKeys(), 500);
+ assertEquals(config.getDenseKeyMinFillRate(), 0.3);
+ assertEquals(config.getDenseKeys(), Set.of("country", "clicks"));
+ assertNotNull(config.getDefaultValueFieldConfig());
+
+ List<FieldConfig> valueFieldConfigs = config.getValueFieldConfigs();
+ assertNotNull(valueFieldConfigs);
+ assertEquals(valueFieldConfigs.size(), 2);
+
+ // country: dictionary + inverted index
+ assertTrue(config.shouldUseDictionaryForKey("country"));
+ assertTrue(config.shouldEnableInvertedIndexForKey("country"));
+
+ // clicks: raw, no inverted (per-key wins, no indexes node)
+ assertFalse(config.shouldUseDictionaryForKey("clicks"));
+ assertFalse(config.shouldEnableInvertedIndexForKey("clicks"));
+
+ // unconfigured key: defaultValueFieldConfig (dictionary + inverted)
+ assertTrue(config.shouldUseDictionaryForKey("payload"));
+ assertTrue(config.shouldEnableInvertedIndexForKey("payload"));
+
+ // direct lookup
+ assertNotNull(config.getValueFieldConfig("country"));
+ assertEquals(config.getValueFieldConfig("country").getEncodingType(),
FieldConfig.EncodingType.DICTIONARY);
+ assertNull(config.getValueFieldConfig("missing"));
+
+ // JSON serialization round-trip
+ String reJson = JsonUtils.objectToString(config);
+ OpenStructIndexConfig reDeserialized = JsonUtils.stringToObject(reJson,
OpenStructIndexConfig.class);
+ assertEquals(reDeserialized.getMaxDenseKeys(), 500);
+ assertEquals(reDeserialized.getDenseKeys(), Set.of("country", "clicks"));
+ assertNotNull(reDeserialized.getValueFieldConfigs());
+ assertEquals(reDeserialized.getValueFieldConfigs().size(), 2);
+ assertNotNull(reDeserialized.getDefaultValueFieldConfig());
+ assertTrue(reDeserialized.shouldUseDictionaryForKey("country"));
+ assertTrue(reDeserialized.shouldEnableInvertedIndexForKey("country"));
+ assertFalse(reDeserialized.shouldUseDictionaryForKey("clicks"));
+ assertTrue(reDeserialized.shouldEnableInvertedIndexForKey("payload"));
+ }
+
+ @Test
+ public void testEmptyValueFieldConfigs()
+ throws Exception {
+ String json = "{\"valueFieldConfigs\": []}";
+ OpenStructIndexConfig config = JsonUtils.stringToObject(json,
OpenStructIndexConfig.class);
+ assertNotNull(config.getValueFieldConfigs());
+ assertTrue(config.getValueFieldConfigs().isEmpty());
+ assertNull(config.getValueFieldConfig("any"));
+ // Unconfigured key falls back to built-in defaults.
+ assertTrue(config.shouldUseDictionaryForKey("any"));
+ assertTrue(config.shouldEnableInvertedIndexForKey("any"));
+ }
+
+ @Test
+ public void testDisabledViaJson()
+ throws Exception {
+ String json = "{\"disabled\": true}";
+ OpenStructIndexConfig config = JsonUtils.stringToObject(json,
OpenStructIndexConfig.class);
+ assertFalse(config.isEnabled());
+ }
+
+ @Test
+ public void testEmptyJsonDefaults()
+ throws Exception {
+ OpenStructIndexConfig config = JsonUtils.stringToObject("{}",
OpenStructIndexConfig.class);
+ assertTrue(config.isEnabled());
+ assertEquals(config.getMaxDenseKeys(),
OpenStructIndexConfig.DEFAULT_MAX_DENSE_KEYS);
+ assertEquals(config.getDenseKeyMinFillRate(),
OpenStructIndexConfig.DEFAULT_DENSE_KEY_MIN_FILL_RATE);
+ assertTrue(config.getDenseKeys().isEmpty());
+ assertNull(config.getValueFieldConfigs());
+ assertNull(config.getDefaultValueFieldConfig());
+ }
+
+ @Test
+ public void testPartialJsonRetainsDefaultFillRate()
+ throws Exception {
+ OpenStructIndexConfig config =
+ JsonUtils.stringToObject("{\"maxDenseKeys\": 500}",
OpenStructIndexConfig.class);
+ assertEquals(config.getMaxDenseKeys(), 500);
+ assertEquals(config.getDenseKeyMinFillRate(),
OpenStructIndexConfig.DEFAULT_DENSE_KEY_MIN_FILL_RATE);
+ }
+}
diff --git
a/pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructDataTypeTest.java
b/pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructDataTypeTest.java
new file mode 100644
index 00000000000..848e58498a4
--- /dev/null
+++
b/pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructDataTypeTest.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.data;
+
+import java.util.Map;
+import org.apache.pinot.spi.utils.JsonUtils;
+import org.testng.annotations.Test;
+
+import static org.apache.pinot.spi.data.ComplexFieldSpec.KEY_FIELD;
+import static org.apache.pinot.spi.data.ComplexFieldSpec.VALUE_FIELD;
+import static org.apache.pinot.spi.data.FieldSpec.DataType;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+
+public class OpenStructDataTypeTest {
+
+ @Test
+ public void openStructWithEmptyChildFieldSpecs() {
+ ComplexFieldSpec spec = new ComplexFieldSpec("o", DataType.OPEN_STRUCT,
true, Map.of());
+ assertEquals(spec.getDataType(), DataType.OPEN_STRUCT);
+ assertTrue(spec.getChildFieldSpecs().isEmpty());
+ }
+
+ @Test
+ public void openStructAcceptsChildFieldSpecs() {
+ Map<String, FieldSpec> childFieldSpecs = Map.of(
+ "count", new DimensionFieldSpec("count", DataType.INT, true),
+ "name", new DimensionFieldSpec("name", DataType.STRING, true));
+ ComplexFieldSpec spec = new ComplexFieldSpec("o", DataType.OPEN_STRUCT,
true, childFieldSpecs);
+ assertEquals(spec.getChildFieldSpec("count").getDataType(), DataType.INT);
+ assertEquals(spec.getChildFieldSpec("name").getDataType(),
DataType.STRING);
+ }
+
+ @Test
+ public void mapAcceptsKeyAndValueFieldSpecs() {
+ ComplexFieldSpec spec = new ComplexFieldSpec(
+ "m", DataType.MAP, true,
+ Map.of(KEY_FIELD, new DimensionFieldSpec(KEY_FIELD, DataType.STRING,
true),
+ VALUE_FIELD, new DimensionFieldSpec(VALUE_FIELD, DataType.INT,
true)));
+ assertEquals(spec.getChildFieldSpec(KEY_FIELD).getDataType(),
DataType.STRING);
+ assertEquals(spec.getChildFieldSpec(VALUE_FIELD).getDataType(),
DataType.INT);
+ }
+
+ @Test
+ public void openStructJsonRoundtrip()
+ throws Exception {
+ Map<String, FieldSpec> childFieldSpecs = Map.of(
+ "count", new DimensionFieldSpec("count", DataType.INT, true));
+ ComplexFieldSpec original = new ComplexFieldSpec("o",
DataType.OPEN_STRUCT, true, childFieldSpecs);
+
+ String json = JsonUtils.objectToString(original);
+ ComplexFieldSpec roundtripped = JsonUtils.stringToObject(json,
ComplexFieldSpec.class);
+
+ assertEquals(roundtripped.getDataType(), DataType.OPEN_STRUCT);
+ assertEquals(roundtripped.getChildFieldSpec("count").getDataType(),
DataType.INT);
+ }
+
+ @Test
+ public void openStructJsonWithoutChildFieldSpecs()
+ throws Exception {
+ String json =
"{\"name\":\"o\",\"dataType\":\"OPEN_STRUCT\",\"singleValueField\":true}";
+ ComplexFieldSpec spec = JsonUtils.stringToObject(json,
ComplexFieldSpec.class);
+ assertEquals(spec.getDataType(), DataType.OPEN_STRUCT);
+ assertTrue(spec.getChildFieldSpecs().isEmpty());
+ }
+}
diff --git
a/pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructNamingTest.java
b/pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructNamingTest.java
new file mode 100644
index 00000000000..6adad485c39
--- /dev/null
+++
b/pinot-spi/src/test/java/org/apache/pinot/spi/data/OpenStructNamingTest.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.data;
+
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+
+public class OpenStructNamingTest {
+
+ @Test
+ public void testMaterializedColumnName() {
+ assertEquals(OpenStructNaming.materializedColumnName("metrics",
"tenancy"), "metrics$tenancy");
+ }
+
+ @Test
+ public void testSparseColumnName() {
+ assertEquals(OpenStructNaming.sparseColumnName("metrics"),
"metrics$__sparse__");
+ }
+}
diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/data/SchemaTest.java
b/pinot-spi/src/test/java/org/apache/pinot/spi/data/SchemaTest.java
index 51d79b01026..a50d281b2e0 100644
--- a/pinot-spi/src/test/java/org/apache/pinot/spi/data/SchemaTest.java
+++ b/pinot-spi/src/test/java/org/apache/pinot/spi/data/SchemaTest.java
@@ -25,6 +25,7 @@ import java.sql.Timestamp;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
+import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.pinot.spi.data.TimeGranularitySpec.TimeFormat;
import org.apache.pinot.spi.utils.BytesUtils;
@@ -795,4 +796,19 @@ public class SchemaTest {
assertThat(withoutVirtual.getDescription()).isEqualTo("my description");
assertThat(withoutVirtual.getTags()).isEqualTo(List.of("tag1"));
}
+
+ @Test
+ public void schemaBuilderAddOpenStruct() {
+ Schema schema = new Schema.SchemaBuilder()
+ .setSchemaName("test")
+ .addOpenStruct("attrs",
+ Map.of("count", new DimensionFieldSpec("count",
FieldSpec.DataType.INT, true)))
+ .build();
+
+ FieldSpec fs = schema.getFieldSpecFor("attrs");
+ Assert.assertNotNull(fs);
+ Assert.assertEquals(fs.getDataType(), FieldSpec.DataType.OPEN_STRUCT);
+ ComplexFieldSpec cfs = (ComplexFieldSpec) fs;
+ Assert.assertEquals(cfs.getChildFieldSpec("count").getDataType(),
FieldSpec.DataType.INT);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]