imply-cheddar commented on code in PR #14412:
URL: https://github.com/apache/druid/pull/14412#discussion_r1228934409
##########
processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java:
##########
@@ -284,20 +314,42 @@ public ColumnCapabilities getColumnCapabilities()
.setHasNulls(hasNulls);
}
- private ColumnType getLogicalType()
+ public ColumnType getLogicalType()
{
- if (fieldIndexers.isEmpty()) {
- // we didn't see anything, so we can be anything, so why not a string?
- return ColumnType.STRING;
- }
- if (fieldIndexers.size() == 1 &&
fieldIndexers.containsKey(NestedPathFinder.JSON_PATH_ROOT)) {
- FieldIndexer rootField =
fieldIndexers.get(NestedPathFinder.JSON_PATH_ROOT);
- ColumnType singleType = rootField.getTypes().getSingleType();
- return singleType == null ? ColumnType.NESTED_DATA : singleType;
+ if (!hasNestedData) {
Review Comment:
Nit: make this positive instead of negative.
##########
processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java:
##########
@@ -346,6 +347,14 @@ private ColumnAnalysis analyzeComplexColumn(
.withTypeName(typeName);
try (final BaseColumn theColumn = columnHolder != null ?
columnHolder.getColumn() : null) {
+ if (capabilities != null) {
+ bob.hasMultipleValues(capabilities.hasMultipleValues().isTrue())
+ .hasNulls(capabilities.hasNulls().isMaybeTrue());
+ }
Review Comment:
Does this need to be inside of the try? I don't think it does, but I'm
wondering if I'm missing something.
##########
processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java:
##########
@@ -284,20 +314,42 @@ public ColumnCapabilities getColumnCapabilities()
.setHasNulls(hasNulls);
}
- private ColumnType getLogicalType()
+ public ColumnType getLogicalType()
{
- if (fieldIndexers.isEmpty()) {
- // we didn't see anything, so we can be anything, so why not a string?
- return ColumnType.STRING;
- }
- if (fieldIndexers.size() == 1 &&
fieldIndexers.containsKey(NestedPathFinder.JSON_PATH_ROOT)) {
- FieldIndexer rootField =
fieldIndexers.get(NestedPathFinder.JSON_PATH_ROOT);
- ColumnType singleType = rootField.getTypes().getSingleType();
- return singleType == null ? ColumnType.NESTED_DATA : singleType;
+ if (!hasNestedData) {
+ if (fieldIndexers.isEmpty()) {
+ // we didn't see anything, so we can be anything, so why not a string?
+ return ColumnType.STRING;
+ }
Review Comment:
I would've expected this case to be `isConstant = true` and `constantValue =
null`, why not check for those states instead?
##########
processing/src/main/java/org/apache/druid/segment/nested/ConstantColumn.java:
##########
@@ -0,0 +1,363 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.nested;
+
+import org.apache.druid.java.util.common.UOE;
+import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExpressionType;
+import org.apache.druid.query.extraction.ExtractionFn;
+import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.DictionaryEncodedColumn;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.data.Indexed;
+import org.apache.druid.segment.data.IndexedInts;
+import org.apache.druid.segment.data.ListIndexed;
+import org.apache.druid.segment.data.ReadableOffset;
+import org.apache.druid.segment.vector.BaseDoubleVectorValueSelector;
+import org.apache.druid.segment.vector.BaseFloatVectorValueSelector;
+import org.apache.druid.segment.vector.BaseLongVectorValueSelector;
+import org.apache.druid.segment.vector.ConstantVectorSelectors;
+import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.ReadableVectorOffset;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+import org.apache.druid.segment.vector.VectorValueSelector;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Objects;
+
+public class ConstantColumn implements NestedCommonFormatColumn,
DictionaryEncodedColumn<String>
+{
+ private final ColumnType logicalType;
+ private final ExprEval<?> value;
+ private final String stringVal;
+
+ public ConstantColumn(ColumnType logicalType, @Nullable Object value)
+ {
+ this.logicalType = logicalType;
+ this.value = ExprEval.ofType(ExpressionType.fromColumnType(logicalType),
value);
+ this.stringVal = value == null ? null : String.valueOf(value);
+ }
+
+ @Override
+ public int length()
+ {
+ return 0;
+ }
+
+ @Override
+ public boolean hasMultipleValues()
+ {
+ return false;
+ }
+
+ @Override
+ public int getSingleValueRow(int rowNum)
+ {
+ return 0;
+ }
+
+ @Override
+ public IndexedInts getMultiValueRow(int rowNum)
+ {
+ throw new UnsupportedOperationException("Not a multi-value column");
+ }
+
+ @Nullable
+ @Override
+ public String lookupName(int id)
+ {
+ if (id == 0) {
+ return stringVal;
+ }
+ return null;
+ }
+
+ @Override
+ public int lookupId(String name)
+ {
+ if (Objects.equals(name, stringVal)) {
+ return 0;
+ }
+ return -1;
+ }
+
+ @Override
+ public int getCardinality()
+ {
+ return 1;
+ }
+
+ @Override
+ public DimensionSelector makeDimensionSelector(ReadableOffset offset,
@Nullable ExtractionFn extractionFn)
+ {
+ return DimensionSelector.constant(stringVal, extractionFn);
+ }
+
+ @Override
+ public ColumnValueSelector<?> makeColumnValueSelector(ReadableOffset offset)
+ {
+ return new ColumnValueSelector<Object>()
+ {
+ @Override
+ public double getDouble()
+ {
+ return value.asDouble();
+ }
+
+ @Override
+ public float getFloat()
+ {
+ return (float) value.asDouble();
+ }
+
+ @Override
+ public long getLong()
+ {
+ return value.asLong();
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+
+ }
+
+ @Override
+ public boolean isNull()
+ {
+ return value.isNumericNull();
+ }
+
+ @Nullable
+ @Override
+ public Object getObject()
+ {
+ return value.valueOrDefault();
+ }
+
+ @Override
+ public Class<?> classOfObject()
+ {
+ return Object.class;
+ }
+ };
+ }
+
+ @Override
+ public SingleValueDimensionVectorSelector
makeSingleValueDimensionVectorSelector(ReadableVectorOffset vectorOffset)
+ {
+ return
ConstantVectorSelectors.singleValueDimensionVectorSelector(vectorOffset,
stringVal);
+ }
+
+ @Override
+ public MultiValueDimensionVectorSelector
makeMultiValueDimensionVectorSelector(ReadableVectorOffset vectorOffset)
+ {
+ throw new UnsupportedOperationException("Not a multi-value column");
+ }
+
+ @Override
+ public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset
offset)
+ {
+ if (logicalType.isNumeric()) {
+ final boolean[] nulls;
+ if (value.isNumericNull()) {
+ nulls = new boolean[offset.getMaxVectorSize()];
+ Arrays.fill(nulls, true);
+ } else {
+ nulls = null;
+ }
+ switch (logicalType.getType()) {
+ case LONG:
+ final long[] constantLong = new long[offset.getMaxVectorSize()];
+ Arrays.fill(constantLong, value.asLong());
+ return new BaseLongVectorValueSelector(offset)
+ {
+ @Override
+ public long[] getLongVector()
+ {
+ return constantLong;
+ }
+
+ @Nullable
+ @Override
+ public boolean[] getNullVector()
+ {
+ return nulls;
+ }
+ };
+
+ case DOUBLE:
+ final double[] constantDouble = new
double[offset.getMaxVectorSize()];
+ Arrays.fill(constantDouble, value.asDouble());
+ return new BaseDoubleVectorValueSelector(offset)
+ {
+ @Override
+ public double[] getDoubleVector()
+ {
+ return constantDouble;
+ }
+
+ @Nullable
+ @Override
+ public boolean[] getNullVector()
+ {
+ return nulls;
+ }
+ };
+
+ case FLOAT:
+ final float[] constantFloat = new float[offset.getMaxVectorSize()];
+ Arrays.fill(constantFloat, (float) value.asDouble());
+ return new BaseFloatVectorValueSelector(offset)
+ {
+ @Override
+ public float[] getFloatVector()
+ {
+ return constantFloat;
+ }
+
+ @Nullable
+ @Override
+ public boolean[] getNullVector()
+ {
+ return nulls;
+ }
+ };
+ }
+ }
+ throw new UOE("Cannot make VectorValueSelector for constant column of
type[%s]", logicalType);
+ }
+
+ @Override
+ public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset
offset)
+ {
+ final Object[] constant = new Object[offset.getMaxVectorSize()];
+ Arrays.fill(constant, value.valueOrDefault());
+ return new VectorObjectSelector()
+ {
+
+ @Override
+ public Object[] getObjectVector()
+ {
+ return constant;
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return offset.getMaxVectorSize();
+ }
+
+ @Override
+ public int getCurrentVectorSize()
+ {
+ return offset.getCurrentVectorSize();
+ }
+ };
+ }
+
+ @Override
+ public Indexed<String> getStringDictionary()
+ {
+ if (logicalType.is(ValueType.STRING)) {
+ return new ListIndexed<>(Collections.singletonList(value.asString()));
+ }
+ if (logicalType.equals(ColumnType.STRING_ARRAY)) {
+ Object[] array = value.asArray();
+ if (array == null) {
+ return new ListIndexed<>(Collections.singletonList(null));
+ }
+ String[] strings = Arrays.copyOf(array, array.length, String[].class);
+ Arrays.sort(strings, ColumnType.STRING.getNullableStrategy());
+ return new ListIndexed<>(Arrays.asList(strings));
+ }
Review Comment:
This implementation of a constant array column seems suspect to me. It
might be better to separate out the array form of the constant column from the
single-value forms and actually persist a dictionary of the individual values
instead of rebuilding it and re-sorting the values over and over and over again.
##########
processing/src/main/java/org/apache/druid/segment/QueryableIndexIndexableAdapter.java:
##########
@@ -184,6 +186,21 @@ public NestedColumnMergable
getNestedColumnMergeables(String columnName)
final BaseColumn col = columnHolder.getColumn();
if (col instanceof NestedCommonFormatColumn) {
NestedCommonFormatColumn column = (NestedCommonFormatColumn) col;
+ if (column instanceof ConstantColumn) {
+ return new NestedColumnMergable(
+ new SortedValueDictionary(
+ column.getStringDictionary(),
+ column.getLongDictionary(),
+ column.getDoubleDictionary(),
+ column.getArrayDictionary(),
+ column
+ ),
+ column.getFieldTypeInfo(),
+ ColumnType.NESTED_DATA.equals(column.getLogicalType()),
+ true,
+ ((ConstantColumn) column).getConstantValue()
+ );
+ }
Review Comment:
Why not move this check to be before the `if( col instanceof
NestedCommonFormatColumn)` check. It doesn't seem to be meaningful that this
is nested?
##########
processing/src/main/java/org/apache/druid/segment/nested/ConstantColumnAndIndexSupplier.java:
##########
@@ -0,0 +1,339 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.nested;
+
+import com.google.common.base.Predicate;
+import com.google.common.base.Supplier;
+import org.apache.druid.collections.bitmap.BitmapFactory;
+import org.apache.druid.collections.bitmap.ImmutableBitmap;
+import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.java.util.common.RE;
+import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.math.expr.Evals;
+import org.apache.druid.query.filter.DruidDoublePredicate;
+import org.apache.druid.query.filter.DruidFloatPredicate;
+import org.apache.druid.query.filter.DruidLongPredicate;
+import org.apache.druid.query.filter.DruidPredicateFactory;
+import org.apache.druid.segment.IndexMerger;
+import org.apache.druid.segment.column.BitmapColumnIndex;
+import org.apache.druid.segment.column.ColumnIndexSupplier;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex;
+import org.apache.druid.segment.column.DictionaryEncodedValueIndex;
+import org.apache.druid.segment.column.DruidPredicateIndex;
+import org.apache.druid.segment.column.LexicographicalRangeIndex;
+import org.apache.druid.segment.column.NullValueIndex;
+import org.apache.druid.segment.column.NullableTypeStrategy;
+import org.apache.druid.segment.column.NumericRangeIndex;
+import org.apache.druid.segment.column.SimpleImmutableBitmapIndex;
+import org.apache.druid.segment.column.StringValueSetIndex;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.data.BitmapSerdeFactory;
+import org.apache.druid.segment.data.VByte;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Objects;
+import java.util.SortedSet;
+
+public class ConstantColumnAndIndexSupplier implements
Supplier<NestedCommonFormatColumn>, ColumnIndexSupplier
+{
+ public static ConstantColumnAndIndexSupplier read(
+ ColumnType logicalType,
+ BitmapSerdeFactory bitmapSerdeFactory,
+ ByteBuffer bb
+ )
+ {
+ final byte version = bb.get();
+ final int columnNameLength = VByte.readInt(bb);
+ final String columnName = StringUtils.fromUtf8(bb, columnNameLength);
+
+ if (version == NestedCommonFormatColumnSerializer.V0) {
+ try {
+ final int rowCount = VByte.readInt(bb);
+ final int valueLength = VByte.readInt(bb);
+ final Object constantValue =
NestedDataComplexTypeSerde.OBJECT_MAPPER.readValue(
+ IndexMerger.SERIALIZER_UTILS.readBytes(bb, valueLength),
+ Object.class
+ );
+ return new ConstantColumnAndIndexSupplier(logicalType,
bitmapSerdeFactory, rowCount, constantValue);
+ }
+ catch (IOException ex) {
+ throw new RE(ex, "Failed to deserialize V%s column [%s].", version,
columnName);
+ }
+ } else {
+ throw new RE("Unknown version " + version);
Review Comment:
interpolate with `[]`?
##########
processing/src/main/java/org/apache/druid/segment/nested/ConstantColumnAndIndexSupplier.java:
##########
@@ -0,0 +1,339 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.nested;
+
+import com.google.common.base.Predicate;
+import com.google.common.base.Supplier;
+import org.apache.druid.collections.bitmap.BitmapFactory;
+import org.apache.druid.collections.bitmap.ImmutableBitmap;
+import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.java.util.common.RE;
+import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.math.expr.Evals;
+import org.apache.druid.query.filter.DruidDoublePredicate;
+import org.apache.druid.query.filter.DruidFloatPredicate;
+import org.apache.druid.query.filter.DruidLongPredicate;
+import org.apache.druid.query.filter.DruidPredicateFactory;
+import org.apache.druid.segment.IndexMerger;
+import org.apache.druid.segment.column.BitmapColumnIndex;
+import org.apache.druid.segment.column.ColumnIndexSupplier;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex;
+import org.apache.druid.segment.column.DictionaryEncodedValueIndex;
+import org.apache.druid.segment.column.DruidPredicateIndex;
+import org.apache.druid.segment.column.LexicographicalRangeIndex;
+import org.apache.druid.segment.column.NullValueIndex;
+import org.apache.druid.segment.column.NullableTypeStrategy;
+import org.apache.druid.segment.column.NumericRangeIndex;
+import org.apache.druid.segment.column.SimpleImmutableBitmapIndex;
+import org.apache.druid.segment.column.StringValueSetIndex;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.data.BitmapSerdeFactory;
+import org.apache.druid.segment.data.VByte;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Objects;
+import java.util.SortedSet;
+
+public class ConstantColumnAndIndexSupplier implements
Supplier<NestedCommonFormatColumn>, ColumnIndexSupplier
+{
+ public static ConstantColumnAndIndexSupplier read(
+ ColumnType logicalType,
+ BitmapSerdeFactory bitmapSerdeFactory,
+ ByteBuffer bb
+ )
+ {
+ final byte version = bb.get();
+ final int columnNameLength = VByte.readInt(bb);
+ final String columnName = StringUtils.fromUtf8(bb, columnNameLength);
Review Comment:
Generally speaking, you should have to validate the version before you know
that it's safe to pull out the `columnNameLength` and `columnName`.
##########
processing/src/main/java/org/apache/druid/segment/nested/ConstantColumnAndIndexSupplier.java:
##########
@@ -0,0 +1,339 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.nested;
+
+import com.google.common.base.Predicate;
+import com.google.common.base.Supplier;
+import org.apache.druid.collections.bitmap.BitmapFactory;
+import org.apache.druid.collections.bitmap.ImmutableBitmap;
+import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.java.util.common.RE;
+import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.math.expr.Evals;
+import org.apache.druid.query.filter.DruidDoublePredicate;
+import org.apache.druid.query.filter.DruidFloatPredicate;
+import org.apache.druid.query.filter.DruidLongPredicate;
+import org.apache.druid.query.filter.DruidPredicateFactory;
+import org.apache.druid.segment.IndexMerger;
+import org.apache.druid.segment.column.BitmapColumnIndex;
+import org.apache.druid.segment.column.ColumnIndexSupplier;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex;
+import org.apache.druid.segment.column.DictionaryEncodedValueIndex;
+import org.apache.druid.segment.column.DruidPredicateIndex;
+import org.apache.druid.segment.column.LexicographicalRangeIndex;
+import org.apache.druid.segment.column.NullValueIndex;
+import org.apache.druid.segment.column.NullableTypeStrategy;
+import org.apache.druid.segment.column.NumericRangeIndex;
+import org.apache.druid.segment.column.SimpleImmutableBitmapIndex;
+import org.apache.druid.segment.column.StringValueSetIndex;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.data.BitmapSerdeFactory;
+import org.apache.druid.segment.data.VByte;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Objects;
+import java.util.SortedSet;
+
+public class ConstantColumnAndIndexSupplier implements
Supplier<NestedCommonFormatColumn>, ColumnIndexSupplier
+{
+ public static ConstantColumnAndIndexSupplier read(
+ ColumnType logicalType,
+ BitmapSerdeFactory bitmapSerdeFactory,
+ ByteBuffer bb
+ )
+ {
+ final byte version = bb.get();
+ final int columnNameLength = VByte.readInt(bb);
+ final String columnName = StringUtils.fromUtf8(bb, columnNameLength);
+
+ if (version == NestedCommonFormatColumnSerializer.V0) {
+ try {
+ final int rowCount = VByte.readInt(bb);
+ final int valueLength = VByte.readInt(bb);
+ final Object constantValue =
NestedDataComplexTypeSerde.OBJECT_MAPPER.readValue(
+ IndexMerger.SERIALIZER_UTILS.readBytes(bb, valueLength),
+ Object.class
+ );
Review Comment:
We can do better than this. We know the type, let's persist the type and
persist it as the lookup into whatever the relevant dictionary is. Given the
type and the global dictionary id, we should be able to quite simply
deserialize the thing...
##########
processing/src/main/java/org/apache/druid/segment/nested/ConstantColumnAndIndexSupplier.java:
##########
@@ -0,0 +1,339 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.nested;
+
+import com.google.common.base.Predicate;
+import com.google.common.base.Supplier;
+import org.apache.druid.collections.bitmap.BitmapFactory;
+import org.apache.druid.collections.bitmap.ImmutableBitmap;
+import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.java.util.common.RE;
+import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.math.expr.Evals;
+import org.apache.druid.query.filter.DruidDoublePredicate;
+import org.apache.druid.query.filter.DruidFloatPredicate;
+import org.apache.druid.query.filter.DruidLongPredicate;
+import org.apache.druid.query.filter.DruidPredicateFactory;
+import org.apache.druid.segment.IndexMerger;
+import org.apache.druid.segment.column.BitmapColumnIndex;
+import org.apache.druid.segment.column.ColumnIndexSupplier;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex;
+import org.apache.druid.segment.column.DictionaryEncodedValueIndex;
+import org.apache.druid.segment.column.DruidPredicateIndex;
+import org.apache.druid.segment.column.LexicographicalRangeIndex;
+import org.apache.druid.segment.column.NullValueIndex;
+import org.apache.druid.segment.column.NullableTypeStrategy;
+import org.apache.druid.segment.column.NumericRangeIndex;
+import org.apache.druid.segment.column.SimpleImmutableBitmapIndex;
+import org.apache.druid.segment.column.StringValueSetIndex;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.data.BitmapSerdeFactory;
+import org.apache.druid.segment.data.VByte;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Objects;
+import java.util.SortedSet;
+
+public class ConstantColumnAndIndexSupplier implements
Supplier<NestedCommonFormatColumn>, ColumnIndexSupplier
+{
+ public static ConstantColumnAndIndexSupplier read(
+ ColumnType logicalType,
+ BitmapSerdeFactory bitmapSerdeFactory,
+ ByteBuffer bb
+ )
+ {
+ final byte version = bb.get();
+ final int columnNameLength = VByte.readInt(bb);
+ final String columnName = StringUtils.fromUtf8(bb, columnNameLength);
+
+ if (version == NestedCommonFormatColumnSerializer.V0) {
+ try {
+ final int rowCount = VByte.readInt(bb);
+ final int valueLength = VByte.readInt(bb);
+ final Object constantValue =
NestedDataComplexTypeSerde.OBJECT_MAPPER.readValue(
+ IndexMerger.SERIALIZER_UTILS.readBytes(bb, valueLength),
+ Object.class
+ );
+ return new ConstantColumnAndIndexSupplier(logicalType,
bitmapSerdeFactory, rowCount, constantValue);
+ }
+ catch (IOException ex) {
+ throw new RE(ex, "Failed to deserialize V%s column [%s].", version,
columnName);
+ }
+ } else {
+ throw new RE("Unknown version " + version);
+ }
+ }
+
+ private final ColumnType logicalType;
+ private final BitmapSerdeFactory bitmapSerdeFactory;
+ @Nullable
+ private final Object constantValue;
+
+ private final ImmutableBitmap matchBitmap;
+ private final SimpleImmutableBitmapIndex match;
+ private final ImmutableBitmap noMatchBitmap;
+ private final SimpleImmutableBitmapIndex noMatch;
+
+ public ConstantColumnAndIndexSupplier(
+ ColumnType logicalType,
+ BitmapSerdeFactory bitmapSerdeFactory,
+ int numRows,
+ @Nullable Object constantValue
+ )
+ {
+ this.logicalType = logicalType;
+ this.bitmapSerdeFactory = bitmapSerdeFactory;
+ this.constantValue = constantValue;
+ this.matchBitmap = bitmapSerdeFactory.getBitmapFactory().complement(
+ bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(),
+ numRows
+ );
Review Comment:
Remind me of the lifecycle of the `ConstantColumnAndIndexSupplier`, I'm
worried that it exists on-heap on segment-load, where I don't think it's good
to pay the overhead of this on-heap object. It would be better to just build
it on-demand at query time (it's not really expensive to build).
##########
processing/src/test/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerdeTest.java:
##########
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.serde;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.druid.segment.TestHelper;
+import org.apache.druid.segment.column.ColumnType;
+import org.junit.Test;
+
+import java.nio.ByteOrder;
+
+public class NestedCommonFormatColumnPartSerdeTest
+{
+ private static final ObjectMapper OBJECT_MAPPER =
TestHelper.makeJsonMapper();
+
+ @Test
+ public void testSerde()
+ {
+ NestedCommonFormatColumnPartSerde partSerde =
NestedCommonFormatColumnPartSerde.serializerBuilder()
+
.isConstant(false)
+
.isVariantType(false)
+
.withByteOrder(ByteOrder.nativeOrder())
+
.withHasNulls(true)
+
.withLogicalType(ColumnType.LONG)
+
.build();
Review Comment:
I think that CodeQL has a point here, is this test doing something?
##########
processing/src/main/java/org/apache/druid/segment/nested/ConstantColumn.java:
##########
@@ -0,0 +1,363 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.nested;
+
+import org.apache.druid.java.util.common.UOE;
+import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExpressionType;
+import org.apache.druid.query.extraction.ExtractionFn;
+import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.DictionaryEncodedColumn;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.data.Indexed;
+import org.apache.druid.segment.data.IndexedInts;
+import org.apache.druid.segment.data.ListIndexed;
+import org.apache.druid.segment.data.ReadableOffset;
+import org.apache.druid.segment.vector.BaseDoubleVectorValueSelector;
+import org.apache.druid.segment.vector.BaseFloatVectorValueSelector;
+import org.apache.druid.segment.vector.BaseLongVectorValueSelector;
+import org.apache.druid.segment.vector.ConstantVectorSelectors;
+import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.ReadableVectorOffset;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+import org.apache.druid.segment.vector.VectorValueSelector;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Objects;
+
+public class ConstantColumn implements NestedCommonFormatColumn,
DictionaryEncodedColumn<String>
+{
+ private final ColumnType logicalType;
+ private final ExprEval<?> value;
+ private final String stringVal;
+
+ public ConstantColumn(ColumnType logicalType, @Nullable Object value)
+ {
+ this.logicalType = logicalType;
+ this.value = ExprEval.ofType(ExpressionType.fromColumnType(logicalType),
value);
+ this.stringVal = value == null ? null : String.valueOf(value);
+ }
+
+ @Override
+ public int length()
+ {
+ return 0;
+ }
+
+ @Override
+ public boolean hasMultipleValues()
+ {
+ return false;
+ }
+
+ @Override
+ public int getSingleValueRow(int rowNum)
+ {
+ return 0;
+ }
+
+ @Override
+ public IndexedInts getMultiValueRow(int rowNum)
+ {
+ throw new UnsupportedOperationException("Not a multi-value column");
+ }
+
+ @Nullable
+ @Override
+ public String lookupName(int id)
+ {
+ if (id == 0) {
+ return stringVal;
+ }
+ return null;
+ }
+
+ @Override
+ public int lookupId(String name)
+ {
+ if (Objects.equals(name, stringVal)) {
+ return 0;
+ }
+ return -1;
+ }
+
+ @Override
+ public int getCardinality()
+ {
+ return 1;
+ }
+
+ @Override
+ public DimensionSelector makeDimensionSelector(ReadableOffset offset,
@Nullable ExtractionFn extractionFn)
+ {
+ return DimensionSelector.constant(stringVal, extractionFn);
+ }
+
+ @Override
+ public ColumnValueSelector<?> makeColumnValueSelector(ReadableOffset offset)
+ {
+ return new ColumnValueSelector<Object>()
+ {
+ @Override
+ public double getDouble()
+ {
+ return value.asDouble();
+ }
+
+ @Override
+ public float getFloat()
+ {
+ return (float) value.asDouble();
+ }
+
+ @Override
+ public long getLong()
+ {
+ return value.asLong();
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+
+ }
+
+ @Override
+ public boolean isNull()
+ {
+ return value.isNumericNull();
+ }
+
+ @Nullable
+ @Override
+ public Object getObject()
+ {
+ return value.valueOrDefault();
+ }
+
+ @Override
+ public Class<?> classOfObject()
+ {
+ return Object.class;
+ }
+ };
+ }
+
+ @Override
+ public SingleValueDimensionVectorSelector
makeSingleValueDimensionVectorSelector(ReadableVectorOffset vectorOffset)
+ {
+ return
ConstantVectorSelectors.singleValueDimensionVectorSelector(vectorOffset,
stringVal);
+ }
+
+ @Override
+ public MultiValueDimensionVectorSelector
makeMultiValueDimensionVectorSelector(ReadableVectorOffset vectorOffset)
+ {
+ throw new UnsupportedOperationException("Not a multi-value column");
+ }
+
+ @Override
+ public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset
offset)
+ {
+ if (logicalType.isNumeric()) {
+ final boolean[] nulls;
+ if (value.isNumericNull()) {
+ nulls = new boolean[offset.getMaxVectorSize()];
+ Arrays.fill(nulls, true);
+ } else {
+ nulls = null;
+ }
+ switch (logicalType.getType()) {
Review Comment:
Can you add a default case that just falls through and have a comment that
explains why it's good to fall through?
##########
processing/src/main/java/org/apache/druid/segment/nested/ConstantColumn.java:
##########
@@ -0,0 +1,363 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.nested;
+
+import org.apache.druid.java.util.common.UOE;
+import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExpressionType;
+import org.apache.druid.query.extraction.ExtractionFn;
+import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.DictionaryEncodedColumn;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.data.Indexed;
+import org.apache.druid.segment.data.IndexedInts;
+import org.apache.druid.segment.data.ListIndexed;
+import org.apache.druid.segment.data.ReadableOffset;
+import org.apache.druid.segment.vector.BaseDoubleVectorValueSelector;
+import org.apache.druid.segment.vector.BaseFloatVectorValueSelector;
+import org.apache.druid.segment.vector.BaseLongVectorValueSelector;
+import org.apache.druid.segment.vector.ConstantVectorSelectors;
+import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.ReadableVectorOffset;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+import org.apache.druid.segment.vector.VectorValueSelector;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Objects;
+
+public class ConstantColumn implements NestedCommonFormatColumn,
DictionaryEncodedColumn<String>
+{
+ private final ColumnType logicalType;
+ private final ExprEval<?> value;
+ private final String stringVal;
+
+ public ConstantColumn(ColumnType logicalType, @Nullable Object value)
+ {
+ this.logicalType = logicalType;
+ this.value = ExprEval.ofType(ExpressionType.fromColumnType(logicalType),
value);
+ this.stringVal = value == null ? null : String.valueOf(value);
+ }
+
+ @Override
+ public int length()
+ {
+ return 0;
+ }
+
+ @Override
+ public boolean hasMultipleValues()
+ {
+ return false;
+ }
+
+ @Override
+ public int getSingleValueRow(int rowNum)
+ {
+ return 0;
+ }
+
+ @Override
+ public IndexedInts getMultiValueRow(int rowNum)
+ {
+ throw new UnsupportedOperationException("Not a multi-value column");
+ }
+
+ @Nullable
+ @Override
+ public String lookupName(int id)
+ {
+ if (id == 0) {
+ return stringVal;
+ }
+ return null;
+ }
+
+ @Override
+ public int lookupId(String name)
+ {
+ if (Objects.equals(name, stringVal)) {
+ return 0;
+ }
+ return -1;
+ }
+
+ @Override
+ public int getCardinality()
+ {
+ return 1;
+ }
+
+ @Override
+ public DimensionSelector makeDimensionSelector(ReadableOffset offset,
@Nullable ExtractionFn extractionFn)
+ {
+ return DimensionSelector.constant(stringVal, extractionFn);
+ }
+
+ @Override
+ public ColumnValueSelector<?> makeColumnValueSelector(ReadableOffset offset)
+ {
+ return new ColumnValueSelector<Object>()
+ {
+ @Override
+ public double getDouble()
+ {
+ return value.asDouble();
+ }
+
+ @Override
+ public float getFloat()
+ {
+ return (float) value.asDouble();
+ }
+
+ @Override
+ public long getLong()
+ {
+ return value.asLong();
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+
+ }
+
+ @Override
+ public boolean isNull()
+ {
+ return value.isNumericNull();
+ }
+
+ @Nullable
+ @Override
+ public Object getObject()
+ {
+ return value.valueOrDefault();
+ }
+
+ @Override
+ public Class<?> classOfObject()
+ {
+ return Object.class;
+ }
+ };
+ }
+
+ @Override
+ public SingleValueDimensionVectorSelector
makeSingleValueDimensionVectorSelector(ReadableVectorOffset vectorOffset)
+ {
+ return
ConstantVectorSelectors.singleValueDimensionVectorSelector(vectorOffset,
stringVal);
+ }
+
+ @Override
+ public MultiValueDimensionVectorSelector
makeMultiValueDimensionVectorSelector(ReadableVectorOffset vectorOffset)
+ {
+ throw new UnsupportedOperationException("Not a multi-value column");
+ }
+
+ @Override
+ public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset
offset)
+ {
+ if (logicalType.isNumeric()) {
+ final boolean[] nulls;
+ if (value.isNumericNull()) {
+ nulls = new boolean[offset.getMaxVectorSize()];
+ Arrays.fill(nulls, true);
+ } else {
+ nulls = null;
+ }
+ switch (logicalType.getType()) {
+ case LONG:
+ final long[] constantLong = new long[offset.getMaxVectorSize()];
+ Arrays.fill(constantLong, value.asLong());
+ return new BaseLongVectorValueSelector(offset)
+ {
+ @Override
+ public long[] getLongVector()
+ {
+ return constantLong;
+ }
+
+ @Nullable
+ @Override
+ public boolean[] getNullVector()
+ {
+ return nulls;
+ }
+ };
+
+ case DOUBLE:
+ final double[] constantDouble = new
double[offset.getMaxVectorSize()];
+ Arrays.fill(constantDouble, value.asDouble());
+ return new BaseDoubleVectorValueSelector(offset)
+ {
+ @Override
+ public double[] getDoubleVector()
+ {
+ return constantDouble;
+ }
+
+ @Nullable
+ @Override
+ public boolean[] getNullVector()
+ {
+ return nulls;
+ }
+ };
+
+ case FLOAT:
+ final float[] constantFloat = new float[offset.getMaxVectorSize()];
+ Arrays.fill(constantFloat, (float) value.asDouble());
+ return new BaseFloatVectorValueSelector(offset)
+ {
+ @Override
+ public float[] getFloatVector()
+ {
+ return constantFloat;
+ }
+
+ @Nullable
+ @Override
+ public boolean[] getNullVector()
+ {
+ return nulls;
+ }
+ };
+ }
+ }
+ throw new UOE("Cannot make VectorValueSelector for constant column of
type[%s]", logicalType);
+ }
+
+ @Override
+ public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset
offset)
+ {
+ final Object[] constant = new Object[offset.getMaxVectorSize()];
+ Arrays.fill(constant, value.valueOrDefault());
+ return new VectorObjectSelector()
+ {
+
+ @Override
+ public Object[] getObjectVector()
+ {
+ return constant;
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return offset.getMaxVectorSize();
+ }
+
+ @Override
+ public int getCurrentVectorSize()
+ {
+ return offset.getCurrentVectorSize();
+ }
+ };
+ }
+
+ @Override
+ public Indexed<String> getStringDictionary()
+ {
+ if (logicalType.is(ValueType.STRING)) {
+ return new ListIndexed<>(Collections.singletonList(value.asString()));
Review Comment:
This seems like a good place to introduce a `ConstantValueIndexed` or
something instead of jumping through these hoops.
##########
processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java:
##########
@@ -75,9 +75,11 @@ default Indexed<Object[]> getArrayDictionary()
default SortedMap<String, FieldTypeInfo.MutableTypeSet> getFieldTypeInfo()
{
- FieldTypeInfo.MutableTypeSet rootOnlyType = new
FieldTypeInfo.MutableTypeSet().add(getLogicalType());
SortedMap<String, FieldTypeInfo.MutableTypeSet> fields = new TreeMap<>();
- fields.put(NestedPathFinder.JSON_PATH_ROOT, rootOnlyType);
+ if (!getLogicalType().equals(ColumnType.NESTED_DATA)) {
Review Comment:
nit: invert it so that there's 0 chance of an NPE. I.e.
`ColumnType.NESTED_DATA.equals(getLogicalType())` cannot NPE on this line
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]