github-code-scanning[bot] commented on code in PR #14014: URL: https://github.com/apache/druid/pull/14014#discussion_r1155305352
########## processing/src/main/java/org/apache/druid/segment/serde/StandardDoubleColumnSerializer.java: ########## @@ -0,0 +1,248 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import com.google.common.base.Preconditions; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.collections.bitmap.MutableBitmap; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.ProgressIndicator; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.data.ColumnarDoublesSerializer; +import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer; +import org.apache.druid.segment.data.CompressionFactory; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.data.FixedIndexedWriter; +import org.apache.druid.segment.data.GenericIndexedWriter; +import org.apache.druid.segment.data.SingleValueColumnarIntsSerializer; +import org.apache.druid.segment.nested.DictionaryIdLookup; +import org.apache.druid.segment.nested.StructuredData; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.WritableByteChannel; + +public class StandardDoubleColumnSerializer extends StandardTypeColumnSerializer +{ + private static final Logger log = new Logger(StandardDoubleColumnSerializer.class); + + private final String name; + private final SegmentWriteOutMedium segmentWriteOutMedium; + private final IndexSpec indexSpec; + @SuppressWarnings("unused") + private final Closer closer; + private DictionaryIdLookup dictionaryIdLookup; + private FixedIndexedWriter<Double> doubleDictionaryWriter; + private int rowCount = 0; + private boolean closedForWrite = false; + private boolean dictionarySerialized = false; + + private SingleValueColumnarIntsSerializer encodedValueSerializer; + private ColumnarDoublesSerializer doublesSerializer; + private GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter; + private MutableBitmap[] bitmaps; + + public StandardDoubleColumnSerializer( + String name, + IndexSpec indexSpec, + SegmentWriteOutMedium segmentWriteOutMedium, + @SuppressWarnings("unused") ProgressIndicator progressIndicator, Review Comment: ## Useless parameter The parameter 'progressIndicator' is never used. [Show more details](https://github.com/apache/druid/security/code-scanning/4591) ########## processing/src/main/java/org/apache/druid/segment/serde/StandardLongColumnSerializer.java: ########## @@ -0,0 +1,247 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import com.google.common.base.Preconditions; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.collections.bitmap.MutableBitmap; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.ProgressIndicator; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.data.ColumnarLongsSerializer; +import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer; +import org.apache.druid.segment.data.CompressionFactory; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.data.FixedIndexedWriter; +import org.apache.druid.segment.data.GenericIndexedWriter; +import org.apache.druid.segment.data.SingleValueColumnarIntsSerializer; +import org.apache.druid.segment.nested.DictionaryIdLookup; +import org.apache.druid.segment.nested.StructuredData; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.WritableByteChannel; + +public class StandardLongColumnSerializer extends StandardTypeColumnSerializer +{ + private static final Logger log = new Logger(StandardLongColumnSerializer.class); + + private final String name; + private final SegmentWriteOutMedium segmentWriteOutMedium; + private final IndexSpec indexSpec; + @SuppressWarnings("unused") + private final Closer closer; + private DictionaryIdLookup dictionaryIdLookup; + private FixedIndexedWriter<Long> longDictionaryWriter; + private int rowCount = 0; + private boolean closedForWrite = false; + private boolean dictionarySerialized = false; + + private SingleValueColumnarIntsSerializer encodedValueSerializer; + private ColumnarLongsSerializer longsSerializer; + private GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter; + private MutableBitmap[] bitmaps; + + public StandardLongColumnSerializer( + String name, + IndexSpec indexSpec, + SegmentWriteOutMedium segmentWriteOutMedium, + @SuppressWarnings("unused") ProgressIndicator progressIndicator, Review Comment: ## Useless parameter The parameter 'progressIndicator' is never used. [Show more details](https://github.com/apache/druid/security/code-scanning/4592) ########## processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java: ########## @@ -337,6 +374,71 @@ return theColumn.makeColumnValueSelector(offset); } + if (parts.size() == 1 && parts.get(0) instanceof NestedPathArrayElement && theColumn instanceof StandardArrayColumn) { + final StandardArrayColumn<?> arrayColumn = (StandardArrayColumn<?>) theColumn; + ColumnValueSelector<?> arraySelector = arrayColumn.makeColumnValueSelector(offset); + final int elementNumber = ((NestedPathArrayElement) parts.get(0)).getIndex(); + if (elementNumber < 0) { + throw new IAE("Cannot make array element selector, negative array index not supported"); + } + return new ColumnValueSelector<Object>() + { + @Override + public boolean isNull() + { + Object o = getObject(); + return !(o instanceof Number); + } + + @Override + public long getLong() + { + Object o = getObject(); + return o instanceof Number ? ((Number) o).longValue() : 0L; + } + + @Override + public float getFloat() + { + Object o = getObject(); + return o instanceof Number ? ((Number) o).floatValue() : 0f; + } + + @Override + public double getDouble() + { + Object o = getObject(); + return o instanceof Number ? ((Number) o).doubleValue() : 0.0; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + arraySelector.inspectRuntimeShape(inspector); + } + + @Nullable + @Override + public Object getObject() + { + Object o = arraySelector.getObject(); + if (o instanceof Object[]) { + Object[] array = (Object[]) o; + if (elementNumber < array.length) { + return array[elementNumber]; Review Comment: ## Improper validation of user-provided array index This index depends on a [user-provided value](1) which can cause an ArrayIndexOutOfBoundsException. This index depends on a [user-provided value](2) which can cause an ArrayIndexOutOfBoundsException. This index depends on a [user-provided value](3) which can cause an ArrayIndexOutOfBoundsException. This index depends on a [user-provided value](4) which can cause an ArrayIndexOutOfBoundsException. This index depends on a [user-provided value](5) which can cause an ArrayIndexOutOfBoundsException. [Show more details](https://github.com/apache/druid/security/code-scanning/4595) ########## processing/src/main/java/org/apache/druid/segment/serde/StandardStringColumnSerializer.java: ########## @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import com.google.common.base.Preconditions; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.collections.bitmap.MutableBitmap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.ProgressIndicator; +import org.apache.druid.segment.column.StringEncodingStrategies; +import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.data.DictionaryWriter; +import org.apache.druid.segment.data.GenericIndexedWriter; +import org.apache.druid.segment.data.SingleValueColumnarIntsSerializer; +import org.apache.druid.segment.nested.DictionaryIdLookup; +import org.apache.druid.segment.nested.StructuredData; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; + +public class StandardStringColumnSerializer extends StandardTypeColumnSerializer +{ + private static final Logger log = new Logger(StandardStringColumnSerializer.class); + + private final String name; + private final SegmentWriteOutMedium segmentWriteOutMedium; + private final IndexSpec indexSpec; + @SuppressWarnings("unused") + private final Closer closer; + private DictionaryIdLookup dictionaryIdLookup; + private DictionaryWriter<String> dictionaryWriter; + private int rowCount = 0; + private boolean closedForWrite = false; + private boolean dictionarySerialized = false; + + private SingleValueColumnarIntsSerializer encodedValueSerializer; + private GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter; + private MutableBitmap[] bitmaps; + + public StandardStringColumnSerializer( + String name, + IndexSpec indexSpec, + SegmentWriteOutMedium segmentWriteOutMedium, + @SuppressWarnings("unused") ProgressIndicator progressIndicator, Review Comment: ## Useless parameter The parameter 'progressIndicator' is never used. [Show more details](https://github.com/apache/druid/security/code-scanning/4594) ########## processing/src/main/java/org/apache/druid/segment/serde/StandardArrayColumnSupplier.java: ########## @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import com.google.common.base.Supplier; +import org.apache.druid.collections.bitmap.BitmapFactory; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.RE; +import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; +import org.apache.druid.segment.column.ColumnBuilder; +import org.apache.druid.segment.column.ColumnIndexSupplier; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.StandardArrayColumn; +import org.apache.druid.segment.column.StandardTypeColumn; +import org.apache.druid.segment.column.StringEncodingStrategy; +import org.apache.druid.segment.data.BitmapSerdeFactory; +import org.apache.druid.segment.data.ColumnarInts; +import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSupplier; +import org.apache.druid.segment.data.EncodedStringDictionaryWriter; +import org.apache.druid.segment.data.FixedIndexed; +import org.apache.druid.segment.data.FrontCodedIndexed; +import org.apache.druid.segment.data.FrontCodedIntArrayIndexed; +import org.apache.druid.segment.data.GenericIndexed; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +public class StandardArrayColumnSupplier implements Supplier<StandardTypeColumn>, ColumnIndexSupplier +{ + public static StandardArrayColumnSupplier read( + String columnName, + ColumnType logicalType, + ByteOrder byteOrder, + BitmapSerdeFactory bitmapSerdeFactory, + ByteBuffer bb, + ColumnBuilder columnBuilder + ) + { + final byte version = bb.get(); + + if (version == StandardTypeColumnSerializer.V0) { + try { + final SmooshedFileMapper mapper = columnBuilder.getFileMapper(); + final GenericIndexed<ByteBuffer> stringDictionary; + final Supplier<FrontCodedIndexed> frontCodedStringDictionarySupplier; + final Supplier<FixedIndexed<Long>> longDictionarySupplier; + final Supplier<FixedIndexed<Double>> doubleDictionarySupplier; + final Supplier<FrontCodedIntArrayIndexed> arrayDictionarySupplier; + + final ByteBuffer stringDictionaryBuffer = StandardTypeColumnPartSerde.loadInternalFile( + mapper, + columnName, + StandardTypeColumnSerializer.STRING_DICTIONARY_FILE_NAME + ); + + final int dictionaryStartPosition = stringDictionaryBuffer.position(); + final byte dictionaryVersion = stringDictionaryBuffer.get(); + + if (dictionaryVersion == EncodedStringDictionaryWriter.VERSION) { + final byte encodingId = stringDictionaryBuffer.get(); + if (encodingId == StringEncodingStrategy.FRONT_CODED_ID) { + frontCodedStringDictionarySupplier = FrontCodedIndexed.read( + stringDictionaryBuffer, + byteOrder + ); + stringDictionary = null; + } else if (encodingId == StringEncodingStrategy.UTF8_ID) { + // this cannot happen naturally right now since generic indexed is written in the 'legacy' format, but + // this provides backwards compatibility should we switch at some point in the future to always + // writing dictionaryVersion + stringDictionary = GenericIndexed.read(stringDictionaryBuffer, GenericIndexed.UTF8_STRATEGY, mapper); + frontCodedStringDictionarySupplier = null; + } else { + throw new ISE("impossible, unknown encoding strategy id: %s", encodingId); + } + } else { + // legacy format that only supports plain utf8 enoding stored in GenericIndexed and the byte we are reading + // as dictionaryVersion is actually also the GenericIndexed version, so we reset start position so the + // GenericIndexed version can be correctly read + stringDictionaryBuffer.position(dictionaryStartPosition); + stringDictionary = GenericIndexed.read(stringDictionaryBuffer, GenericIndexed.UTF8_STRATEGY, mapper); + frontCodedStringDictionarySupplier = null; + } + final ByteBuffer encodedValueColumn = StandardTypeColumnPartSerde.loadInternalFile( + mapper, + columnName, + StandardTypeColumnSerializer.ENCODED_VALUE_COLUMN_FILE_NAME + ); + final CompressedVSizeColumnarIntsSupplier ints = CompressedVSizeColumnarIntsSupplier.fromByteBuffer( + encodedValueColumn, + byteOrder + ); + final ByteBuffer longDictionaryBuffer = StandardTypeColumnPartSerde.loadInternalFile( + mapper, + columnName, + StandardTypeColumnSerializer.LONG_DICTIONARY_FILE_NAME + ); + final ByteBuffer doubleDictionaryBuffer = StandardTypeColumnPartSerde.loadInternalFile( + mapper, + columnName, + StandardTypeColumnSerializer.DOUBLE_DICTIONARY_FILE_NAME + ); + final ByteBuffer valueIndexBuffer = StandardTypeColumnPartSerde.loadInternalFile( + mapper, + columnName, + StandardTypeColumnSerializer.BITMAP_INDEX_FILE_NAME + ); + GenericIndexed<ImmutableBitmap> valueIndexes = GenericIndexed.read( + valueIndexBuffer, + bitmapSerdeFactory.getObjectStrategy(), + columnBuilder.getFileMapper() + ); + + longDictionarySupplier = FixedIndexed.read( + longDictionaryBuffer, + ColumnType.LONG.getStrategy(), + byteOrder, + Long.BYTES + ); + doubleDictionarySupplier = FixedIndexed.read( + doubleDictionaryBuffer, + ColumnType.DOUBLE.getStrategy(), + byteOrder, + Double.BYTES + ); + final ByteBuffer arrayDictionarybuffer = StandardTypeColumnPartSerde.loadInternalFile( + mapper, + columnName, + StandardTypeColumnSerializer.ARRAY_DICTIONARY_FILE_NAME + ); + arrayDictionarySupplier = FrontCodedIntArrayIndexed.read( + arrayDictionarybuffer, + byteOrder + ); + return new StandardArrayColumnSupplier( + logicalType, + stringDictionary, + frontCodedStringDictionarySupplier, + longDictionarySupplier, + doubleDictionarySupplier, + arrayDictionarySupplier, + ints, + valueIndexes, + bitmapSerdeFactory.getBitmapFactory() + ); + } + catch (IOException ex) { + throw new RE(ex, "Failed to deserialize V%s column.", version); + } + } else { + throw new RE("Unknown version " + version); + } + } + + + private final ColumnType logicalType; + + private final GenericIndexed<ByteBuffer> stringDictionary; + private final Supplier<FrontCodedIndexed> frontCodedStringDictionarySupplier; + private final Supplier<FixedIndexed<Long>> longDictionarySupplier; + private final Supplier<FixedIndexed<Double>> doubleDictionarySupplier; + private final Supplier<FrontCodedIntArrayIndexed> arrayDictionarySupplier; + private final Supplier<ColumnarInts> encodedValueColumnSupplier; + @SuppressWarnings("unused") + private final GenericIndexed<ImmutableBitmap> valueIndexes; + private final ImmutableBitmap nullValueBitmap; + + public StandardArrayColumnSupplier( + ColumnType logicalType, + GenericIndexed<ByteBuffer> stringDictionary, + Supplier<FrontCodedIndexed> frontCodedStringDictionarySupplier, + Supplier<FixedIndexed<Long>> longDictionarySupplier, + Supplier<FixedIndexed<Double>> doubleDictionarySupplier, + Supplier<FrontCodedIntArrayIndexed> arrayDictionarySupplier, + Supplier<ColumnarInts> encodedValueColumnSupplier, + GenericIndexed<ImmutableBitmap> valueIndexes, + @SuppressWarnings("unused") BitmapFactory bitmapFactory Review Comment: ## Useless parameter The parameter 'bitmapFactory' is never used. [Show more details](https://github.com/apache/druid/security/code-scanning/4590) ########## processing/src/main/java/org/apache/druid/segment/serde/ComplexMetricSerde.java: ########## @@ -51,6 +51,21 @@ * @param builder ColumnBuilder to add the column to * @param columnConfig ColumnConfiguration used during deserialization */ + public void deserializeColumn( + @SuppressWarnings("unused") String columnName, + ByteBuffer buffer, + ColumnBuilder builder, + ColumnConfig columnConfig + ) + { + deserializeColumn(buffer, builder, columnConfig); Review Comment: ## Deprecated method or constructor invocation Invoking [ComplexMetricSerde.deserializeColumn](1) should be avoided because it has been deprecated. [Show more details](https://github.com/apache/druid/security/code-scanning/4585) ########## processing/src/main/java/org/apache/druid/segment/serde/StandardNestedColumnSerializer.java: ########## @@ -0,0 +1,457 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.collections.bitmap.MutableBitmap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.RE; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.IndexMerger; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.ProgressIndicator; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.StringEncodingStrategies; +import org.apache.druid.segment.column.Types; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.data.ByteBufferWriter; +import org.apache.druid.segment.data.CompressedVariableSizedBlobColumnSerializer; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.data.DictionaryWriter; +import org.apache.druid.segment.data.FixedIndexedWriter; +import org.apache.druid.segment.data.FrontCodedIntArrayIndexedWriter; +import org.apache.druid.segment.data.GenericIndexed; +import org.apache.druid.segment.data.GenericIndexedWriter; +import org.apache.druid.segment.nested.ArrayFieldColumnWriter; +import org.apache.druid.segment.nested.DictionaryIdLookup; +import org.apache.druid.segment.nested.DoubleFieldColumnWriter; +import org.apache.druid.segment.nested.FieldTypeInfo; +import org.apache.druid.segment.nested.GlobalDictionaryEncodedFieldColumnWriter; +import org.apache.druid.segment.nested.LongFieldColumnWriter; +import org.apache.druid.segment.nested.NestedDataColumnMetadata; +import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; +import org.apache.druid.segment.nested.NestedPathFinder; +import org.apache.druid.segment.nested.NestedPathPart; +import org.apache.druid.segment.nested.StringFieldColumnWriter; +import org.apache.druid.segment.nested.StructuredData; +import org.apache.druid.segment.nested.StructuredDataProcessor; +import org.apache.druid.segment.nested.VariantFieldColumnWriter; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import javax.annotation.Nullable; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.WritableByteChannel; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; + +public class StandardNestedColumnSerializer extends StandardTypeColumnSerializer +{ + private static final Logger log = new Logger(StandardNestedColumnSerializer.class); + + private final String name; + private final SegmentWriteOutMedium segmentWriteOutMedium; + private final IndexSpec indexSpec; + @SuppressWarnings("unused") + private final Closer closer; + + private final StructuredDataProcessor fieldProcessor = new StructuredDataProcessor() + { + @Override + public ProcessedValue<?> processField(ArrayList<NestedPathPart> fieldPath, @Nullable Object fieldValue) + { + final GlobalDictionaryEncodedFieldColumnWriter<?> writer = fieldWriters.get( + NestedPathFinder.toNormalizedJsonPath(fieldPath) + ); + if (writer != null) { + try { + final ExprEval<?> eval = ExprEval.bestEffortOf(fieldValue); + if (eval.type().isPrimitive() || (eval.type().isArray() && eval.type().getElementType().isPrimitive())) { + writer.addValue(rowCount, eval.value()); + } else { + // behave consistently with nested column indexer, which defaults to string + writer.addValue(rowCount, eval.asString()); + } + // serializer doesn't use size estimate + return ProcessedValue.NULL_LITERAL; + } + catch (IOException e) { + throw new RE(e, "Failed to write field [%s], unhandled value", fieldPath); + } + } + return ProcessedValue.NULL_LITERAL; + } + + @Nullable + @Override + public ProcessedValue<?> processArrayField( + ArrayList<NestedPathPart> fieldPath, + @Nullable List<?> array + ) + { + final ExprEval<?> eval = ExprEval.bestEffortArray(array); + if (eval.type().isArray() && eval.type().getElementType().isPrimitive()) { + final GlobalDictionaryEncodedFieldColumnWriter<?> writer = fieldWriters.get( + NestedPathFinder.toNormalizedJsonPath(fieldPath) + ); + if (writer != null) { + try { + writer.addValue(rowCount, eval.value()); + // serializer doesn't use size estimate + return ProcessedValue.NULL_LITERAL; + } + catch (IOException e) { + throw new RE(e, "Failed to write field [%s] value [%s]", fieldPath, array); + } + } + } + return null; + } + }; + + private DictionaryIdLookup globalDictionaryIdLookup; + private SortedMap<String, FieldTypeInfo.MutableTypeSet> fields; + private GenericIndexedWriter<String> fieldsWriter; + private FieldTypeInfo.Writer fieldsInfoWriter; + private DictionaryWriter<String> dictionaryWriter; + private FixedIndexedWriter<Long> longDictionaryWriter; + private FixedIndexedWriter<Double> doubleDictionaryWriter; + private FrontCodedIntArrayIndexedWriter arrayDictionaryWriter; + private CompressedVariableSizedBlobColumnSerializer rawWriter; + private ByteBufferWriter<ImmutableBitmap> nullBitmapWriter; + private MutableBitmap nullRowsBitmap; + private Map<String, GlobalDictionaryEncodedFieldColumnWriter<?>> fieldWriters; + private int rowCount = 0; + private boolean closedForWrite = false; + + private boolean dictionarySerialized = false; + + public StandardNestedColumnSerializer( + String name, + IndexSpec indexSpec, + SegmentWriteOutMedium segmentWriteOutMedium, + @SuppressWarnings("unused") ProgressIndicator progressIndicator, Review Comment: ## Useless parameter The parameter 'progressIndicator' is never used. [Show more details](https://github.com/apache/druid/security/code-scanning/4593) ########## processing/src/main/java/org/apache/druid/segment/serde/StandardArrayColumnSerializer.java: ########## @@ -0,0 +1,341 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.serde; + +import com.google.common.base.Preconditions; +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectRBTreeMap; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.collections.bitmap.MutableBitmap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.ProgressIndicator; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.StringEncodingStrategies; +import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.data.DictionaryWriter; +import org.apache.druid.segment.data.FixedIndexedIntWriter; +import org.apache.druid.segment.data.FixedIndexedWriter; +import org.apache.druid.segment.data.FrontCodedIntArrayIndexedWriter; +import org.apache.druid.segment.data.GenericIndexedWriter; +import org.apache.druid.segment.data.SingleValueColumnarIntsSerializer; +import org.apache.druid.segment.nested.DictionaryIdLookup; +import org.apache.druid.segment.nested.StructuredData; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.WritableByteChannel; + +public class StandardArrayColumnSerializer extends StandardTypeColumnSerializer +{ + private static final Logger log = new Logger(StandardArrayColumnSerializer.class); + + private final String name; + private final SegmentWriteOutMedium segmentWriteOutMedium; + private final IndexSpec indexSpec; + @SuppressWarnings("unused") + private final Closer closer; + private DictionaryIdLookup dictionaryIdLookup; + private DictionaryWriter<String> dictionaryWriter; + private FixedIndexedWriter<Long> longDictionaryWriter; + private FixedIndexedWriter<Double> doubleDictionaryWriter; + private FrontCodedIntArrayIndexedWriter arrayDictionaryWriter; + private FixedIndexedIntWriter arrayElementDictionaryWriter; + + private int rowCount = 0; + private boolean closedForWrite = false; + + private boolean dictionarySerialized = false; + + private SingleValueColumnarIntsSerializer encodedValueSerializer; + private GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter; + private GenericIndexedWriter<ImmutableBitmap> arrayElementIndexWriter; + private MutableBitmap[] bitmaps; + private final Int2ObjectRBTreeMap<MutableBitmap> arrayElements = new Int2ObjectRBTreeMap<>(); + + public StandardArrayColumnSerializer( + String name, + IndexSpec indexSpec, + SegmentWriteOutMedium segmentWriteOutMedium, + @SuppressWarnings("unused") ProgressIndicator progressIndicator, Review Comment: ## Useless parameter The parameter 'progressIndicator' is never used. [Show more details](https://github.com/apache/druid/security/code-scanning/4589) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
