github-code-scanning[bot] commented on code in PR #14014: URL: https://github.com/apache/druid/pull/14014#discussion_r1156511866
########## processing/src/main/java/org/apache/druid/segment/nested/ScalarStringColumnSerializer.java: ########## @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.nested; + +import com.google.common.base.Preconditions; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.collections.bitmap.MutableBitmap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.ProgressIndicator; +import org.apache.druid.segment.column.StringEncodingStrategies; +import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.data.DictionaryWriter; +import org.apache.druid.segment.data.GenericIndexedWriter; +import org.apache.druid.segment.data.SingleValueColumnarIntsSerializer; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; + +public class ScalarStringColumnSerializer extends NestedCommonFormatColumnSerializer +{ + private static final Logger log = new Logger(ScalarStringColumnSerializer.class); + + private final String name; + private final SegmentWriteOutMedium segmentWriteOutMedium; + private final IndexSpec indexSpec; + @SuppressWarnings("unused") + private final Closer closer; + private DictionaryIdLookup dictionaryIdLookup; + private DictionaryWriter<String> dictionaryWriter; + private int rowCount = 0; + private boolean closedForWrite = false; + private boolean dictionarySerialized = false; + + private SingleValueColumnarIntsSerializer encodedValueSerializer; + private GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter; + private MutableBitmap[] bitmaps; + + public ScalarStringColumnSerializer( + String name, + IndexSpec indexSpec, + SegmentWriteOutMedium segmentWriteOutMedium, + @SuppressWarnings("unused") ProgressIndicator progressIndicator, Review Comment: ## Useless parameter The parameter 'progressIndicator' is never used. [Show more details](https://github.com/apache/druid/security/code-scanning/4600) ########## processing/src/main/java/org/apache/druid/segment/nested/VariantArrayColumnSerializer.java: ########## @@ -0,0 +1,339 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.nested; + +import com.google.common.base.Preconditions; +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectRBTreeMap; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.collections.bitmap.MutableBitmap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.ProgressIndicator; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.StringEncodingStrategies; +import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.data.DictionaryWriter; +import org.apache.druid.segment.data.FixedIndexedIntWriter; +import org.apache.druid.segment.data.FixedIndexedWriter; +import org.apache.druid.segment.data.FrontCodedIntArrayIndexedWriter; +import org.apache.druid.segment.data.GenericIndexedWriter; +import org.apache.druid.segment.data.SingleValueColumnarIntsSerializer; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.WritableByteChannel; + +public class VariantArrayColumnSerializer extends NestedCommonFormatColumnSerializer +{ + private static final Logger log = new Logger(VariantArrayColumnSerializer.class); + + private final String name; + private final SegmentWriteOutMedium segmentWriteOutMedium; + private final IndexSpec indexSpec; + @SuppressWarnings("unused") + private final Closer closer; + private DictionaryIdLookup dictionaryIdLookup; + private DictionaryWriter<String> dictionaryWriter; + private FixedIndexedWriter<Long> longDictionaryWriter; + private FixedIndexedWriter<Double> doubleDictionaryWriter; + private FrontCodedIntArrayIndexedWriter arrayDictionaryWriter; + private FixedIndexedIntWriter arrayElementDictionaryWriter; + + private int rowCount = 0; + private boolean closedForWrite = false; + + private boolean dictionarySerialized = false; + + private SingleValueColumnarIntsSerializer encodedValueSerializer; + private GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter; + private GenericIndexedWriter<ImmutableBitmap> arrayElementIndexWriter; + private MutableBitmap[] bitmaps; + private final Int2ObjectRBTreeMap<MutableBitmap> arrayElements = new Int2ObjectRBTreeMap<>(); + + public VariantArrayColumnSerializer( + String name, + IndexSpec indexSpec, + SegmentWriteOutMedium segmentWriteOutMedium, + @SuppressWarnings("unused") ProgressIndicator progressIndicator, Review Comment: ## Useless parameter The parameter 'progressIndicator' is never used. [Show more details](https://github.com/apache/druid/security/code-scanning/4601) ########## processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnSerializer.java: ########## @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.nested; + +import com.google.common.base.Preconditions; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.collections.bitmap.MutableBitmap; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.ProgressIndicator; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.data.ColumnarLongsSerializer; +import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer; +import org.apache.druid.segment.data.CompressionFactory; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.data.FixedIndexedWriter; +import org.apache.druid.segment.data.GenericIndexedWriter; +import org.apache.druid.segment.data.SingleValueColumnarIntsSerializer; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.WritableByteChannel; + +public class ScalarLongColumnSerializer extends NestedCommonFormatColumnSerializer +{ + private static final Logger log = new Logger(ScalarLongColumnSerializer.class); + + private final String name; + private final SegmentWriteOutMedium segmentWriteOutMedium; + private final IndexSpec indexSpec; + @SuppressWarnings("unused") + private final Closer closer; + private DictionaryIdLookup dictionaryIdLookup; + private FixedIndexedWriter<Long> longDictionaryWriter; + private int rowCount = 0; + private boolean closedForWrite = false; + private boolean dictionarySerialized = false; + + private SingleValueColumnarIntsSerializer encodedValueSerializer; + private ColumnarLongsSerializer longsSerializer; + private GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter; + private MutableBitmap[] bitmaps; + + public ScalarLongColumnSerializer( + String name, + IndexSpec indexSpec, + SegmentWriteOutMedium segmentWriteOutMedium, + @SuppressWarnings("unused") ProgressIndicator progressIndicator, Review Comment: ## Useless parameter The parameter 'progressIndicator' is never used. [Show more details](https://github.com/apache/druid/security/code-scanning/4599) ########## processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnSerializer.java: ########## @@ -0,0 +1,246 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.nested; + +import com.google.common.base.Preconditions; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.collections.bitmap.MutableBitmap; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.ProgressIndicator; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.data.ColumnarDoublesSerializer; +import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer; +import org.apache.druid.segment.data.CompressionFactory; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.data.FixedIndexedWriter; +import org.apache.druid.segment.data.GenericIndexedWriter; +import org.apache.druid.segment.data.SingleValueColumnarIntsSerializer; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.WritableByteChannel; + +public class ScalarDoubleColumnSerializer extends NestedCommonFormatColumnSerializer +{ + private static final Logger log = new Logger(ScalarDoubleColumnSerializer.class); + + private final String name; + private final SegmentWriteOutMedium segmentWriteOutMedium; + private final IndexSpec indexSpec; + @SuppressWarnings("unused") + private final Closer closer; + private DictionaryIdLookup dictionaryIdLookup; + private FixedIndexedWriter<Double> doubleDictionaryWriter; + private int rowCount = 0; + private boolean closedForWrite = false; + private boolean dictionarySerialized = false; + + private SingleValueColumnarIntsSerializer encodedValueSerializer; + private ColumnarDoublesSerializer doublesSerializer; + private GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter; + private MutableBitmap[] bitmaps; + + public ScalarDoubleColumnSerializer( + String name, + IndexSpec indexSpec, + SegmentWriteOutMedium segmentWriteOutMedium, + @SuppressWarnings("unused") ProgressIndicator progressIndicator, Review Comment: ## Useless parameter The parameter 'progressIndicator' is never used. [Show more details](https://github.com/apache/druid/security/code-scanning/4598) ########## processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java: ########## @@ -0,0 +1,406 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.nested; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.collections.bitmap.MutableBitmap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.RE; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.GenericColumnSerializer; +import org.apache.druid.segment.IndexMerger; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.ProgressIndicator; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.StringEncodingStrategies; +import org.apache.druid.segment.column.Types; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.data.ByteBufferWriter; +import org.apache.druid.segment.data.CompressedVariableSizedBlobColumnSerializer; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.data.DictionaryWriter; +import org.apache.druid.segment.data.FixedIndexedWriter; +import org.apache.druid.segment.data.GenericIndexed; +import org.apache.druid.segment.data.GenericIndexedWriter; +import org.apache.druid.segment.serde.Serializer; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import javax.annotation.Nullable; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.WritableByteChannel; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; + +public class NestedDataColumnSerializerV4 implements GenericColumnSerializer<StructuredData> +{ + private static final Logger log = new Logger(NestedDataColumnSerializerV4.class); + public static final String STRING_DICTIONARY_FILE_NAME = "__stringDictionary"; + public static final String LONG_DICTIONARY_FILE_NAME = "__longDictionary"; + public static final String DOUBLE_DICTIONARY_FILE_NAME = "__doubleDictionary"; + public static final String ARRAY_DICTIONARY_FILE_NAME = "__arrayDictionary"; + public static final String RAW_FILE_NAME = "__raw"; + public static final String NULL_BITMAP_FILE_NAME = "__nullIndex"; + + public static final String NESTED_FIELD_PREFIX = "__field_"; + + private final String name; + private final SegmentWriteOutMedium segmentWriteOutMedium; + private final IndexSpec indexSpec; + @SuppressWarnings("unused") + private final Closer closer; + + private final StructuredDataProcessor fieldProcessor = new StructuredDataProcessor() + { + @Override + public ProcessedValue<?> processField(ArrayList<NestedPathPart> fieldPath, @Nullable Object fieldValue) + { + final GlobalDictionaryEncodedFieldColumnWriter<?> writer = fieldWriters.get( + NestedPathFinder.toNormalizedJsonPath(fieldPath) + ); + if (writer != null) { + try { + final ExprEval<?> eval = ExprEval.bestEffortOf(fieldValue); + if (eval.type().isPrimitive() || (eval.type().isArray() && eval.type().getElementType().isPrimitive())) { + writer.addValue(rowCount, eval.value()); + } else { + // behave consistently with nested column indexer, which defaults to string + writer.addValue(rowCount, eval.asString()); + } + // serializer doesn't use size estimate + return ProcessedValue.NULL_LITERAL; + } + catch (IOException e) { + throw new RE(e, "Failed to write field [%s], unhandled value", fieldPath); + } + } + return ProcessedValue.NULL_LITERAL; + } + + @Nullable + @Override + public ProcessedValue<?> processArrayField( + ArrayList<NestedPathPart> fieldPath, + @Nullable List<?> array + ) + { + // classic nested column ingestion does not support array fields + return null; + } + }; + + private byte[] metadataBytes; + private DictionaryIdLookup globalDictionaryIdLookup; + private SortedMap<String, FieldTypeInfo.MutableTypeSet> fields; + private GenericIndexedWriter<String> fieldsWriter; + private FieldTypeInfo.Writer fieldsInfoWriter; + private DictionaryWriter<String> dictionaryWriter; + private FixedIndexedWriter<Long> longDictionaryWriter; + private FixedIndexedWriter<Double> doubleDictionaryWriter; + private CompressedVariableSizedBlobColumnSerializer rawWriter; + private ByteBufferWriter<ImmutableBitmap> nullBitmapWriter; + private MutableBitmap nullRowsBitmap; + private Map<String, GlobalDictionaryEncodedFieldColumnWriter<?>> fieldWriters; + private int rowCount = 0; + private boolean closedForWrite = false; + + private boolean dictionarySerialized = false; + + public NestedDataColumnSerializerV4( + String name, + IndexSpec indexSpec, + SegmentWriteOutMedium segmentWriteOutMedium, + @SuppressWarnings("unused") ProgressIndicator progressIndicator, Review Comment: ## Useless parameter The parameter 'progressIndicator' is never used. [Show more details](https://github.com/apache/druid/security/code-scanning/4597) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
