cryptoe commented on a change in pull request #12078:
URL: https://github.com/apache/druid/pull/12078#discussion_r780362210
##########
File path:
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
##########
@@ -403,6 +412,20 @@ public GroupByColumnSelectorStrategy
makeColumnSelectorStrategy(
return makeNullableNumericStrategy(new
FloatGroupByColumnSelectorStrategy());
case DOUBLE:
return makeNullableNumericStrategy(new
DoubleGroupByColumnSelectorStrategy());
+ case ARRAY:
+ switch (capabilities.getElementType().getType()) {
+ case LONG:
+ return new ListLongGroupByColumnSelectorStrategy();
Review comment:
The reason was that ListLong internally working on a list as the DS and
hence the name but seems weird here.
Changed everything to ArrayX as I think that's a better name signifying
whats the selector strategy for.
##########
File path:
processing/src/main/java/org/apache/druid/segment/data/ComparableIntArray.java
##########
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.data;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonValue;
+import com.google.common.hash.Hashing;
+
+import java.util.Arrays;
+
+public class ComparableIntArray implements Comparable<ComparableIntArray>
Review comment:
acked
##########
File path:
processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java
##########
@@ -376,11 +374,58 @@ public static Float convertObjectToFloat(@Nullable Object
valObj, boolean report
return convertObjectToDouble(obj, reportParseExceptions);
case STRING:
return convertObjectToString(obj);
+ case ARRAY:
+ switch (type.getElementType().getType()) {
+ case STRING:
+ return convertToComparableStringArray(obj);
+ default:
+ return convertToList(obj);
+ }
+
default:
throw new IAE("Type[%s] is not supported for dimensions!", type);
}
}
+ private static ComparableList convertToList(Object obj)
+ {
+ if (obj == null) {
+ return null;
+ }
+ if (obj instanceof List) {
+ return new ComparableList((List) obj);
+ }
+ if (obj instanceof ComparableList) {
+ return (ComparableList) obj;
+ }
+ throw new ISE("Unable to convert type %s to %s", obj.getClass().getName(),
ComparableList.class.getName());
+ }
+
+
+ @Nullable
+ public static ComparableStringArray convertToComparableStringArray(Object
obj)
+ {
+ if (obj == null) {
+ return null;
+ }
+ if (obj instanceof ComparableStringArray) {
+ return (ComparableStringArray) obj;
+ }
+ if (obj instanceof String[]) {
+ return ComparableStringArray.of((String[]) obj);
+ }
+ // Jackson converts the serialized array into a string. Converting it back
to a string array
Review comment:
Thanks for the catch.
##########
File path:
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
##########
@@ -1426,6 +1464,186 @@ private RowBasedKeySerdeHelper makeNumericSerdeHelper(
}
}
+ private class ListRowBasedKeySerdeHelper implements RowBasedKeySerdeHelper
+ {
+ final int keyBufferPosition;
+ final BufferComparator bufferComparator;
+
+ public ListRowBasedKeySerdeHelper(
+ int keyBufferPosition,
+ @Nullable StringComparator stringComparator
+ )
+ {
+ this.keyBufferPosition = keyBufferPosition;
+ final StringComparator comparator = stringComparator == null
+ ? StringComparators.LEXICOGRAPHIC
+ : stringComparator;
+
+ this.bufferComparator = (lhsBuffer, rhsBuffer, lhsPosition,
rhsPosition) -> {
+ final ComparableList lhsComparableArray =
listDictionary.get(lhsBuffer.getInt(lhsPosition
+
+ keyBufferPosition));
+ final ComparableList rhsComparableArray =
listDictionary.get(rhsBuffer.getInt(rhsPosition
+
+ keyBufferPosition));
+ if (lhsComparableArray == null && rhsComparableArray == null) {
+ return 0;
+ } else if (lhsComparableArray == null) {
+ return -1;
+ } else if (rhsComparableArray == null) {
+ return 1;
+ }
+
+ List lhs = lhsComparableArray.getDelegate();
+ List rhs = rhsComparableArray.getDelegate();
+
+ int minLength = Math.min(lhs.size(), rhs.size());
+
+ //noinspection ArrayEquality
+ if (lhs == rhs) {
+ return 0;
+ }
+ for (int i = 0; i < minLength; i++) {
+ final int cmp = comparator.compare(String.valueOf(lhs.get(i)),
String.valueOf(rhs.get(i)));
+ if (cmp == 0) {
+ continue;
+ }
+ return cmp;
+ }
+ if (lhs.size() == rhs.size()) {
+ return 0;
+ } else if (lhs.size() < rhs.size()) {
+ return -1;
+ }
+ return 1;
+ };
+ }
+
+ @Override
+ public int getKeyBufferValueSize()
+ {
+ return Integer.BYTES;
+ }
+
+ @Override
+ public boolean putToKeyBuffer(RowBasedKey key, int idx)
+ {
+ final ComparableList comparableList = (ComparableList)
key.getKey()[idx];
+ int id = reverseDictionary.getInt(comparableList);
+ if (id == UNKNOWN_DICTIONARY_ID) {
+ id = listDictionary.size();
+ reverseListDictionary.put(comparableList, id);
+ listDictionary.add(comparableList);
+ }
+ keyBuffer.putInt(id);
+ return true;
+ }
+
+ @Override
+ public void getFromByteBuffer(ByteBuffer buffer, int initialOffset, int
dimValIdx, Comparable[] dimValues)
+ {
+ dimValues[dimValIdx] = listDictionary.get(buffer.getInt(initialOffset
+ keyBufferPosition));
+ }
+
+ @Override
+ public BufferComparator getBufferComparator()
+ {
+ return bufferComparator;
+ }
+ }
+ private class ArrayRowBasedKeySerdeHelper implements RowBasedKeySerdeHelper
+ {
+ final int keyBufferPosition;
+ final BufferComparator bufferComparator;
+
+ ArrayRowBasedKeySerdeHelper(
+ int keyBufferPosition,
+ @Nullable StringComparator stringComparator
+ )
+ {
+ //TODO: karan : add pushLimitDown ?
Review comment:
Acked
##########
File path:
processing/src/main/java/org/apache/druid/segment/data/ComparableList.java
##########
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.data;
+
+import com.fasterxml.jackson.annotation.JsonValue;
+
+import java.util.List;
+
+
+public class ComparableList<T extends Comparable> implements
Comparable<ComparableList>
Review comment:
Done
##########
File path:
processing/src/main/java/org/apache/druid/segment/data/ComparableStringArray.java
##########
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.data;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonValue;
+import com.google.common.hash.Hashing;
+
+import java.util.Arrays;
+
+public class ComparableStringArray implements Comparable<ComparableStringArray>
Review comment:
Done
##########
File path: core/src/main/java/org/apache/druid/math/expr/Function.java
##########
@@ -2994,8 +2994,14 @@ public ExprEval apply(List<Expr> args,
Expr.ObjectBinding bindings)
Object[] out = new Object[length];
ExpressionType arrayType = null;
+
for (int i = 0; i < length; i++) {
ExprEval<?> evaluated = args.get(i).eval(bindings);
+ // short circuit the case where you have one input argument whose
input type is array. So array function has
Review comment:
The original intention was to keep it close to SQL but i realized SQL
does not have multi value col's :). So a new function may be necessary .
Cool I am adding a new `MV_TO_ARRAY` function to both the native and the SQL
layer.
Thanks for the tip
##########
File path:
sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
##########
@@ -143,54 +144,6 @@ public void
testSelectNonConstantArrayExpressionFromTable() throws Exception
);
}
- @Test
- public void testSelectNonConstantArrayExpressionFromTableForMultival()
throws Exception
Review comment:
I have added this test back as the changes made to the PR as part of the
array constructor are being reverted in favour of the mv_to_array function
##########
File path:
sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
##########
@@ -923,6 +876,277 @@ public void testArrayOffset() throws Exception
);
}
+
+ @Test
+ public void testArrayGroupAsArray() throws Exception
+ {
+ // Cannot vectorize as we donot have support in native query subsytem for
grouping on arrays as keys
+ cannotVectorize();
+ testQuery(
+ "SELECT ARRAY[dim3], SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2
DESC",
+ QUERY_CONTEXT_NO_STRINGIFY_ARRAY,
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(CalciteTests.DATASOURCE3)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setVirtualColumns(expressionVirtualColumn(
+ "v0",
+ "array(\"dim3\")",
+ ColumnType.STRING_ARRAY
+ ))
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("v0", "_d0",
ColumnType.STRING_ARRAY)
+ )
+ )
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "cnt")))
+ .setLimitSpec(new DefaultLimitSpec(
+ ImmutableList.of(new OrderByColumnSpec(
+ "a0",
+ OrderByColumnSpec.Direction.DESCENDING,
+ StringComparators.NUMERIC
+ )),
+ Integer.MAX_VALUE
+ ))
+ .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{
+ new ArrayList<String>()
+ {
+ {
+ add(null);
+ }
+ }, 3L
+ },
+ new Object[]{ImmutableList.of("a", "b"), 1L},
+ new Object[]{ImmutableList.of("b", "c"), 1L},
+ new Object[]{ImmutableList.of("d"), 1L}
+ )
+ );
+ }
+
+ @Test
+ public void testArrayGroupAsLongArray() throws Exception
+ {
+ // Cannot vectorize as we donot have support in native query subsytem for
grouping on arrays as keys
+ cannotVectorize();
+ testQuery(
+ "SELECT ARRAY[l1], SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2
DESC",
+ QUERY_CONTEXT_NO_STRINGIFY_ARRAY,
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(CalciteTests.DATASOURCE3)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setVirtualColumns(expressionVirtualColumn(
+ "v0",
+ "array(\"l1\")",
+ ColumnType.LONG_ARRAY
+ ))
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("v0", "_d0",
ColumnType.LONG_ARRAY)
+ )
+ )
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "cnt")))
+ .setLimitSpec(new DefaultLimitSpec(
+ ImmutableList.of(new OrderByColumnSpec(
+ "a0",
+ OrderByColumnSpec.Direction.DESCENDING,
+ StringComparators.NUMERIC
+ )),
+ Integer.MAX_VALUE
+ ))
+ .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{ImmutableList.of(0L), 4L},
+ new Object[]{ImmutableList.of(325323L), 1L},
+ new Object[]{ImmutableList.of(7L), 1L}
+ )
+ );
+ }
+
+
+ @Test
+ public void testArrayGroupAsDoubleArray() throws Exception
+ {
+ // Cannot vectorize as we donot have support in native query subsytem for
grouping on arrays as keys
+ cannotVectorize();
+ testQuery(
+ "SELECT ARRAY[d1], SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2
DESC",
+ QUERY_CONTEXT_NO_STRINGIFY_ARRAY,
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(CalciteTests.DATASOURCE3)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setVirtualColumns(expressionVirtualColumn(
+ "v0",
+ "array(\"d1\")",
+ ColumnType.DOUBLE_ARRAY
+ ))
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("v0", "_d0",
ColumnType.DOUBLE_ARRAY)
+ )
+ )
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "cnt")))
+ .setLimitSpec(new DefaultLimitSpec(
+ ImmutableList.of(new OrderByColumnSpec(
+ "a0",
+ OrderByColumnSpec.Direction.DESCENDING,
+ StringComparators.NUMERIC
+ )),
+ Integer.MAX_VALUE
+ ))
+ .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{ImmutableList.of(0.0), 4L},
+ new Object[]{ImmutableList.of(1.0), 1L},
+ new Object[]{ImmutableList.of(1.7), 1L}
+ )
+ );
+ }
+
+ @Test
+ public void testArrayGroupAsFloatArray() throws Exception
+ {
+ // Cannot vectorize as we donot have support in native query subsytem for
grouping on arrays as keys
+ cannotVectorize();
+ testQuery(
+ "SELECT ARRAY[f1], SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2
DESC",
+ QUERY_CONTEXT_NO_STRINGIFY_ARRAY,
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(CalciteTests.DATASOURCE3)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setVirtualColumns(expressionVirtualColumn(
+ "v0",
+ "array(\"f1\")",
+ ColumnType.DOUBLE_ARRAY
+ ))
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("v0", "_d0",
ColumnType.DOUBLE_ARRAY)
+ )
+ )
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "cnt")))
+ .setLimitSpec(new DefaultLimitSpec(
+ ImmutableList.of(new OrderByColumnSpec(
+ "a0",
+ OrderByColumnSpec.Direction.DESCENDING,
+ StringComparators.NUMERIC
+ )),
+ Integer.MAX_VALUE
+ ))
+ .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{ImmutableList.of(0.0), 4L},
+ new Object[]{ImmutableList.of(0.10000000149011612), 1L},
+ new Object[]{ImmutableList.of(1.0), 1L}
+ )
+ );
+ }
+
+ @Test
Review comment:
As we no longer are changing the array constructor, will skip adding
these UT's.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]