gortiz commented on code in PR #10184:
URL: https://github.com/apache/pinot/pull/10184#discussion_r1149064068
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -379,340 +330,94 @@ private boolean
shouldCreateDictionaryWithinThreshold(ColumnIndexCreationInfo in
}
/**
- * Helper method that returns compression type to use based on segment
creation spec and field type.
- * <ul>
- * <li> Returns compression type from segment creation spec, if specified
there.</li>
- * <li> Else, returns PASS_THROUGH for metrics, and SNAPPY for dimensions.
This is because metrics are likely
- * to be spread in different chunks after applying predicates. Same
could be true for dimensions, but in that
- * case, clients are expected to explicitly specify the appropriate
compression type in the spec. </li>
- * </ul>
- * @param segmentCreationSpec Segment creation spec
- * @param fieldSpec Field spec for the column
- * @return Compression type to use
+ * @deprecated use {@link
ForwardIndexType#getDefaultCompressionType(FieldType)} instead
*/
- private ChunkCompressionType getColumnCompressionType(SegmentGeneratorConfig
segmentCreationSpec,
- FieldSpec fieldSpec) {
- ChunkCompressionType compressionType =
segmentCreationSpec.getRawIndexCompressionType().get(fieldSpec.getName());
- if (compressionType == null) {
- compressionType = getDefaultCompressionType(fieldSpec.getFieldType());
- }
-
- return compressionType;
- }
-
+ @Deprecated
public static ChunkCompressionType getDefaultCompressionType(FieldType
fieldType) {
- if (fieldType == FieldSpec.FieldType.METRIC) {
- return ChunkCompressionType.PASS_THROUGH;
- } else {
- return ChunkCompressionType.LZ4;
- }
+ return ForwardIndexType.getDefaultCompressionType(fieldType);
}
@Override
public void indexRow(GenericRow row)
throws IOException {
- for (Map.Entry<String, ForwardIndexCreator> entry :
_forwardIndexCreatorMap.entrySet()) {
- String columnName = entry.getKey();
- ForwardIndexCreator forwardIndexCreator = entry.getValue();
-
- Object columnValueToIndex = row.getValue(columnName);
- if (columnValueToIndex == null) {
- throw new RuntimeException("Null value for column:" + columnName);
- }
+ for (Map.Entry<String, Map<IndexType<?, ?, ?>, IndexCreator>> byColEntry :
_creatorsByColAndIndex.entrySet()) {
+ String columnName = byColEntry.getKey();
+ Map<IndexType<?, ?, ?>, IndexCreator> creatorsByIndex =
byColEntry.getValue();
FieldSpec fieldSpec = _schema.getFieldSpecFor(columnName);
-
- //get dictionaryCreator, will be null if column is not dictionaryEncoded
SegmentDictionaryCreator dictionaryCreator =
_dictionaryCreatorMap.get(columnName);
- // bloom filter
- BloomFilterCreator bloomFilterCreator =
_bloomFilterCreatorMap.get(columnName);
- if (bloomFilterCreator != null) {
- if (fieldSpec.isSingleValueField()) {
- if (fieldSpec.getDataType() == DataType.BYTES) {
- bloomFilterCreator.add(BytesUtils.toHexString((byte[])
columnValueToIndex));
- } else {
- bloomFilterCreator.add(columnValueToIndex.toString());
- }
- } else {
- Object[] values = (Object[]) columnValueToIndex;
- if (fieldSpec.getDataType() == DataType.BYTES) {
- for (Object value : values) {
- bloomFilterCreator.add(BytesUtils.toHexString((byte[]) value));
- }
- } else {
- for (Object value : values) {
- bloomFilterCreator.add(value.toString());
- }
- }
- }
- }
-
- // range index
- CombinedInvertedIndexCreator combinedInvertedIndexCreator =
_rangeIndexFilterCreatorMap.get(columnName);
- if (combinedInvertedIndexCreator != null) {
- if (dictionaryCreator != null) {
- if (fieldSpec.isSingleValueField()) {
-
combinedInvertedIndexCreator.add(dictionaryCreator.indexOfSV(columnValueToIndex));
- } else {
- int[] dictIds = dictionaryCreator.indexOfMV(columnValueToIndex);
- combinedInvertedIndexCreator.add(dictIds, dictIds.length);
- }
- } else {
- if (fieldSpec.isSingleValueField()) {
- switch (fieldSpec.getDataType()) {
- case INT:
- combinedInvertedIndexCreator.add((Integer) columnValueToIndex);
- break;
- case LONG:
- combinedInvertedIndexCreator.add((Long) columnValueToIndex);
- break;
- case FLOAT:
- combinedInvertedIndexCreator.add((Float) columnValueToIndex);
- break;
- case DOUBLE:
- combinedInvertedIndexCreator.add((Double) columnValueToIndex);
- break;
- default:
- throw new RuntimeException("Unsupported data type " +
fieldSpec.getDataType() + " for range index");
- }
- } else {
- Object[] values = (Object[]) columnValueToIndex;
- switch (fieldSpec.getDataType()) {
- case INT:
- int[] intValues = new int[values.length];
- for (int i = 0; i < values.length; i++) {
- intValues[i] = (Integer) values[i];
- }
- combinedInvertedIndexCreator.add(intValues, values.length);
- break;
- case LONG:
- long[] longValues = new long[values.length];
- for (int i = 0; i < values.length; i++) {
- longValues[i] = (Long) values[i];
- }
- combinedInvertedIndexCreator.add(longValues, values.length);
- break;
- case FLOAT:
- float[] floatValues = new float[values.length];
- for (int i = 0; i < values.length; i++) {
- floatValues[i] = (Float) values[i];
- }
- combinedInvertedIndexCreator.add(floatValues, values.length);
- break;
- case DOUBLE:
- double[] doubleValues = new double[values.length];
- for (int i = 0; i < values.length; i++) {
- doubleValues[i] = (Double) values[i];
- }
- combinedInvertedIndexCreator.add(doubleValues, values.length);
- break;
- default:
- throw new RuntimeException("Unsupported data type " +
fieldSpec.getDataType() + " for range index");
- }
- }
- }
- }
-
- // text-index
- TextIndexCreator textIndexCreator = _textIndexCreatorMap.get(columnName);
- if (textIndexCreator != null) {
- if (fieldSpec.isSingleValueField()) {
- textIndexCreator.add((String) columnValueToIndex);
- } else {
- Object[] values = (Object[]) columnValueToIndex;
- int length = values.length;
- if (values instanceof String[]) {
- textIndexCreator.add((String[]) values, length);
- } else {
- String[] strings = new String[length];
- for (int i = 0; i < length; i++) {
- strings[i] = (String) values[i];
- }
- textIndexCreator.add(strings, length);
- columnValueToIndex = strings;
- }
- }
+ Object columnValueToIndex = row.getValue(columnName);
Review Comment:
It would only save 1 map lookup, given that in case this is null, an
exception will be thrown, aborting the process.
The reason to do this later is to have the variable declaration closer to
where it is actually being used, but I don't have a very strong opinion about
it, so I'm gonna change it as requested.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]