clintropolis commented on code in PR #14510: URL: https://github.com/apache/druid/pull/14510#discussion_r1282540458
########## processing/src/main/java/org/apache/druid/query/FilterDataSource.java: ########## @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.planning.DataSourceAnalysis; +import org.apache.druid.segment.FilterStorageAdapter; +import org.apache.druid.segment.SegmentReference; +import org.apache.druid.segment.WrappedSegmentReference; +import org.apache.druid.utils.JvmUtils; + +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; + +/** + * This class models a data source to be unnested which is present along with a filter. Review Comment: nothing about this class really seems specific to UNNEST, and seems like it could work for any filter and datasource... I would suggest documenting it generically that it just wraps a datasource and applies a filter to the underlying storage adapter, and just linking to the `FilteredStorageAdapter` ########## processing/src/main/java/org/apache/druid/query/FilterDataSource.java: ########## @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.planning.DataSourceAnalysis; +import org.apache.druid.segment.FilterStorageAdapter; +import org.apache.druid.segment.SegmentReference; +import org.apache.druid.segment.WrappedSegmentReference; +import org.apache.druid.utils.JvmUtils; + +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; + +/** + * This class models a data source to be unnested which is present along with a filter. + * An example for this data source follows: + * + * Consider this query: + * SELECT d3 FROM (select * from druid.numfoo where dim2='a'), UNNEST(MV_TO_ARRAY(dim3)) + * + * where the filter data source has numFoo as base and dim2='a' as the filter + * + * Without this data source, the planner was converting the inner query to a query data source + * putting more work to be done at the broker level. This pushes the operations down to the + * segments and is more performant. + */ +public class FilterDataSource implements DataSource +{ + + private final DataSource base; + private final DimFilter filter; + + @JsonProperty("base") + public DataSource getBase() + { + return base; + } + + @JsonProperty("filter") + public DimFilter getFilter() + { + return filter; + } + + private FilterDataSource(DataSource base, DimFilter filter) + { + this.base = base; + this.filter = filter; + } + + @JsonCreator + public static FilterDataSource create( + @JsonProperty("base") DataSource base, + @JsonProperty("filter") DimFilter f + ) + { + return new FilterDataSource(base, f); + } + + @Override + public Set<String> getTableNames() + { + return base.getTableNames(); + } + + @Override + public List<DataSource> getChildren() + { + return ImmutableList.of(base); Review Comment: curious, why this instead of delegating to the base datasources children? I guess they would still be there, just wondering why to wrap like this instead of just passing through, since this datasources seems to effectively be the base datasource, just with a filter applied... ########## sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/SearchOperatorConversion.java: ########## @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.expression.builtin; + +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.sql.calcite.expression.DruidExpression; +import org.apache.druid.sql.calcite.expression.Expressions; +import org.apache.druid.sql.calcite.expression.SqlOperatorConversion; +import org.apache.druid.sql.calcite.planner.DruidTypeSystem; +import org.apache.druid.sql.calcite.planner.PlannerContext; +import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; + +import javax.annotation.Nullable; + +public class SearchOperatorConversion implements SqlOperatorConversion Review Comment: are there tests for this? maybe javadocs? regular docs? ########## sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/StringSqlAggregator.java: ########## @@ -96,37 +107,38 @@ public Aggregation toDruidAggregation( return null; } - RexNode separatorNode = Expressions.fromFieldAccess( - rexBuilder.getTypeFactory(), - rowSignature, - project, - aggregateCall.getArgList().get(1) - ); - if (!separatorNode.isA(SqlKind.LITERAL)) { - // separator must be a literal - return null; - } - String separator = RexLiteral.stringValue(separatorNode); + final String separator; - if (separator == null) { - // separator must not be null - return null; + if (arguments.size() > 1) { + separator = RexLiteral.stringValue( + Expressions.fromFieldAccess( + rexBuilder.getTypeFactory(), + rowSignature, + project, + aggregateCall.getArgList().get(1) + ) Review Comment: still wondering this ########## processing/src/main/java/org/apache/druid/query/FilterDataSource.java: ########## @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.planning.DataSourceAnalysis; +import org.apache.druid.segment.FilterStorageAdapter; +import org.apache.druid.segment.SegmentReference; +import org.apache.druid.segment.WrappedSegmentReference; +import org.apache.druid.utils.JvmUtils; + +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; + +/** + * This class models a data source to be unnested which is present along with a filter. + * An example for this data source follows: + * + * Consider this query: + * SELECT d3 FROM (select * from druid.numfoo where dim2='a'), UNNEST(MV_TO_ARRAY(dim3)) + * + * where the filter data source has numFoo as base and dim2='a' as the filter + * + * Without this data source, the planner was converting the inner query to a query data source + * putting more work to be done at the broker level. This pushes the operations down to the + * segments and is more performant. + */ +public class FilterDataSource implements DataSource +{ + + private final DataSource base; + private final DimFilter filter; + + @JsonProperty("base") + public DataSource getBase() + { + return base; + } + + @JsonProperty("filter") + public DimFilter getFilter() + { + return filter; + } + + private FilterDataSource(DataSource base, DimFilter filter) + { + this.base = base; + this.filter = filter; + } + + @JsonCreator + public static FilterDataSource create( + @JsonProperty("base") DataSource base, + @JsonProperty("filter") DimFilter f + ) + { + return new FilterDataSource(base, f); + } + + @Override + public Set<String> getTableNames() + { + return base.getTableNames(); + } + + @Override + public List<DataSource> getChildren() + { + return ImmutableList.of(base); + } + + @Override + public DataSource withChildren(List<DataSource> children) + { + if (children.size() != 1) { + throw new IAE("Expected [1] child, got [%d]", children.size()); + } + + return new FilterDataSource(children.get(0), filter); + } + + @Override + public boolean isCacheable(boolean isBroker) + { + return false; + } + + @Override + public boolean isGlobal() + { + return false; Review Comment: should this be based on if the base datasource is global? ########## processing/src/main/java/org/apache/druid/segment/FilterStorageAdapter.java: ########## @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.QueryMetrics; +import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.filter.AndFilter; +import org.joda.time.DateTime; +import org.joda.time.Interval; + +import javax.annotation.Nullable; + +public class FilterStorageAdapter implements StorageAdapter Review Comment: naming nit: should this be `FilteredStorageAdapter` instead of `FilterStorageAdapter`? Also javadocs indicating that the `Filter` is applied when making a cursor, or combined using an `AND` if a filter is passed into cursor creation ########## processing/src/main/java/org/apache/druid/query/FilterDataSource.java: ########## @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.planning.DataSourceAnalysis; +import org.apache.druid.segment.FilterStorageAdapter; +import org.apache.druid.segment.SegmentReference; +import org.apache.druid.segment.WrappedSegmentReference; +import org.apache.druid.utils.JvmUtils; + +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; + +/** + * This class models a data source to be unnested which is present along with a filter. + * An example for this data source follows: + * + * Consider this query: + * SELECT d3 FROM (select * from druid.numfoo where dim2='a'), UNNEST(MV_TO_ARRAY(dim3)) + * + * where the filter data source has numFoo as base and dim2='a' as the filter + * + * Without this data source, the planner was converting the inner query to a query data source + * putting more work to be done at the broker level. This pushes the operations down to the + * segments and is more performant. + */ +public class FilterDataSource implements DataSource Review Comment: naming nit: should this be `FIlteredDataSource` instead of `FilterDataSource`? ########## sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/LiteralSqlAggregator.java: ########## @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.aggregation.builtin; + +import com.google.common.collect.ImmutableList; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.fun.SqlInternalOperators; +import org.apache.druid.query.aggregation.post.ExpressionPostAggregator; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.sql.calcite.aggregation.Aggregation; +import org.apache.druid.sql.calcite.aggregation.SqlAggregator; +import org.apache.druid.sql.calcite.expression.DruidExpression; +import org.apache.druid.sql.calcite.expression.Expressions; +import org.apache.druid.sql.calcite.planner.PlannerContext; +import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; + +import javax.annotation.Nullable; +import java.util.List; + +/** + * Calcite 1.35 introduces an aggrgate function LITERAL_AGG that returns constant value regardless + * of how many rows are in the group. This also introduced a change to subquery + * remove rule as a part of https://issues.apache.org/jira/browse/CALCITE-4334 + * + * In this case a useless literal dimension is replaced with a post agg which makes queries performant + * This class supports the use of LITERAL_AGG for Druid queries Review Comment: is there a test for this? ########## extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java: ########## @@ -381,7 +381,12 @@ public void testQuantileOnInnerQuery() .setGranularity(Granularities.ALL) .setAggregatorSpecs( new DoubleSumAggregatorFactory("_a0:sum", "a0"), - new CountAggregatorFactory("_a0:count"), + NullHandling.replaceWithDefault() ? + new CountAggregatorFactory("_a0:count") : + new FilteredAggregatorFactory( + new CountAggregatorFactory("_a0:count"), + notNull("a0") + ), Review Comment: why did this change? ########## sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java: ########## @@ -65,9 +61,9 @@ import org.apache.calcite.util.Pair; import javax.annotation.Nullable; - import java.io.Reader; import java.util.List; +import java.util.Objects; import java.util.Properties; /** Review Comment: update the javadoc to indicate that (i assume) this has now been copied from 1.35 instead of 1.21, or if it hasn't been freshly copied we probably need to indicate what we have changed here ########## sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java: ########## @@ -184,6 +195,99 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) unnestFilterOnDataSource = null; } + final DruidRel<?> newLeftDruidRel; + final DruidQuery updatedLeftQuery; + + if (unnestDatasourceRel.getInputRexNode().getKind() == SqlKind.FIELD_ACCESS) { + final PartialDruidQuery leftPartialQueryToBeUpdated; + if (leftDruidRel instanceof DruidOuterQueryRel) { + leftPartialQueryToBeUpdated = ((DruidRel) leftDruidRel.getInputs().get(0)).getPartialDruidQuery(); + } else { + leftPartialQueryToBeUpdated = leftPartialQuery; + } + final Project leftProject = leftPartialQueryToBeUpdated.getSelectProject(); + final String dimensionToUpdate = expressionToUnnest.getDirectColumn(); + final LogicalProject newProject; + if (leftProject == null) { + newProject = null; + } else { + final ProjectUpdateShuttle pus = new ProjectUpdateShuttle( + unwrapMvToArray(rexNodeToUnnest), + leftProject, + dimensionToUpdate + ); + final List<RexNode> out = pus.visitList(leftProject.getProjects()); + final RelDataType structType = RexUtil.createStructType(getCluster().getTypeFactory(), out, pus.getTypeNames()); + newProject = LogicalProject.create( + leftProject.getInput(), + leftProject.getHints(), + out, + structType + ); + } + + final DruidRel leftFinalRel; + if (leftDruidRel instanceof DruidOuterQueryRel) { + leftFinalRel = (DruidRel) leftDruidRel.getInputs().get(0); + } else { + leftFinalRel = leftDruidRel; + } + final PartialDruidQuery pq = PartialDruidQuery.create(leftPartialQueryToBeUpdated.getScan()) + .withWhereFilter(leftPartialQueryToBeUpdated.getWhereFilter()) + .withSelectProject(newProject) + .withSort(leftPartialQueryToBeUpdated.getSort()); + + + if (leftPartialQuery.stage() == PartialDruidQuery.Stage.SORT_PROJECT) { + final Project sortProject = leftPartialQueryToBeUpdated.getSortProject(); + final ProjectUpdateShuttle pus = new ProjectUpdateShuttle( + unwrapMvToArray(rexNodeToUnnest), + sortProject, + dimensionToUpdate + ); + final List<RexNode> out = pus.visitList(sortProject.getProjects()); + final RelDataType structType = RexUtil.createStructType(getCluster().getTypeFactory(), out, pus.getTypeNames()); + final Project newSortProject = LogicalProject.create( + sortProject.getInput(), + sortProject.getHints(), + out, + structType + ); + newLeftDruidRel = leftFinalRel.withPartialQuery(pq.withSortProject(newSortProject)); + } else { + newLeftDruidRel = leftFinalRel.withPartialQuery(pq); + } + } else { + newLeftDruidRel = leftDruidRel; + } + updatedLeftQuery = Preconditions.checkNotNull(newLeftDruidRel.toDruidQuery(false), "leftQuery"); + VirtualColumns virtualColumns = updatedLeftQuery.getVirtualColumns(false); + + if (newLeftDruidRel.getPartialDruidQuery().stage().compareTo(PartialDruidQuery.Stage.SELECT_PROJECT) <= 0) { + final Filter whereFilter = newLeftDruidRel.getPartialDruidQuery().getWhereFilter(); + final RowSignature leftSignature = DruidRels.dataSourceSignature(newLeftDruidRel); + if (whereFilter == null) { + if (computeLeftRequiresSubquery(newLeftDruidRel)) { + // Left side is doing more than simple scan: generate a subquery. + leftDataSource1 = new QueryDataSource(updatedLeftQuery.getQuery()); + } else { + leftDataSource1 = updatedLeftQuery.getDataSource(); + } + } else { + final DimFilter dimFilter = Filtration.create(DruidQuery.getDimFilter( + getPlannerContext(), + leftSignature, + null, + whereFilter + )) + .optimizeFilterOnly(leftSignature).getDimFilter(); + leftDataSource1 = FilterDataSource.create(updatedLeftQuery.getDataSource(), dimFilter); + } + } else { + leftDataSource1 = new QueryDataSource(updatedLeftQuery.getQuery()); + } + //if(updatedLeftQuery.getV) Review Comment: ? ########## sql/src/main/java/org/apache/druid/sql/calcite/rule/GroupByRules.java: ########## @@ -64,6 +65,11 @@ public static Aggregation translateAggregateCall( final boolean finalizeAggregations ) { + if (!call.getCollation().getFieldCollations().isEmpty()) { + // TODO(gianm): validate no WITHIN GROUP elsewhere, mention location of validation code here Review Comment: still need answer or remove todo ########## sql/src/main/java/org/apache/druid/sql/calcite/planner/convertlet/TimeInIntervalConvertletFactory.java: ########## @@ -55,7 +55,7 @@ public class TimeInIntervalConvertletFactory implements DruidConvertletFactory .operatorBuilder(NAME) .operandTypeChecker( OperandTypes.sequence( - "'" + NAME + "(<TIMESTAMP>, <LITERAL ISO8601 INTERVAL>)'", + NAME + "(<TIMESTAMP>, <LITERAL ISO8601 INTERVAL>)", Review Comment: it seems like a handful of functions define their signature with single quotes around it, why are we removing them here only? ########## sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java: ########## @@ -2408,7 +2408,8 @@ public void testExactCountDistinctWithFilter() new CountAggregatorFactory("_a0"), and( not(selector("d0", null, null)), - selector("a1", "0", null) + selector("a1", "0", null), + expressionFilter("\"d1\"") Review Comment: still curious ########## sql/src/main/java/org/apache/druid/sql/calcite/rule/logical/DruidAggregateCaseToFilterRule.java: ########## @@ -133,7 +133,6 @@ public void onMatch(RelOptRuleCall call) .convert(aggregate.getRowType(), false); call.transformTo(relBuilder.build()); - call.getPlanner().setImportance(aggregate, 0.0); Review Comment: ? ########## sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java: ########## @@ -184,6 +195,99 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) unnestFilterOnDataSource = null; } + final DruidRel<?> newLeftDruidRel; + final DruidQuery updatedLeftQuery; + + if (unnestDatasourceRel.getInputRexNode().getKind() == SqlKind.FIELD_ACCESS) { Review Comment: nit: this could use some more comments to indicate what is going on since its a pretty big wall of code ########## extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchObjectSqlAggregator.java: ########## @@ -35,7 +35,7 @@ public class HllSketchObjectSqlAggregator extends HllSketchBaseSqlAggregator imp { private static final String NAME = "DS_HLL"; private static final SqlAggFunction FUNCTION_INSTANCE = - OperatorConversions.aggregatorBuilder(NAME) + (SqlAggFunction) OperatorConversions.aggregatorBuilder(NAME) Review Comment: still wondering the reason for this ########## processing/src/main/java/org/apache/druid/query/FilterDataSource.java: ########## @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.planning.DataSourceAnalysis; +import org.apache.druid.segment.FilterStorageAdapter; +import org.apache.druid.segment.SegmentReference; +import org.apache.druid.segment.WrappedSegmentReference; +import org.apache.druid.utils.JvmUtils; + +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; + +/** + * This class models a data source to be unnested which is present along with a filter. + * An example for this data source follows: + * + * Consider this query: + * SELECT d3 FROM (select * from druid.numfoo where dim2='a'), UNNEST(MV_TO_ARRAY(dim3)) + * + * where the filter data source has numFoo as base and dim2='a' as the filter + * + * Without this data source, the planner was converting the inner query to a query data source + * putting more work to be done at the broker level. This pushes the operations down to the + * segments and is more performant. + */ +public class FilterDataSource implements DataSource +{ + + private final DataSource base; + private final DimFilter filter; + + @JsonProperty("base") + public DataSource getBase() + { + return base; + } + + @JsonProperty("filter") + public DimFilter getFilter() + { + return filter; + } + + private FilterDataSource(DataSource base, DimFilter filter) + { + this.base = base; + this.filter = filter; + } + + @JsonCreator + public static FilterDataSource create( + @JsonProperty("base") DataSource base, + @JsonProperty("filter") DimFilter f + ) + { + return new FilterDataSource(base, f); + } + + @Override + public Set<String> getTableNames() + { + return base.getTableNames(); + } + + @Override + public List<DataSource> getChildren() + { + return ImmutableList.of(base); + } + + @Override + public DataSource withChildren(List<DataSource> children) + { + if (children.size() != 1) { + throw new IAE("Expected [1] child, got [%d]", children.size()); + } + + return new FilterDataSource(children.get(0), filter); + } + + @Override + public boolean isCacheable(boolean isBroker) + { + return false; + } + + @Override + public boolean isGlobal() + { + return false; + } + + @Override + public boolean isConcrete() + { + return false; Review Comment: same question about delegating to base datasource, like iirc this returning false means it doesn't get pushed down to historicals? (i might be misremembering...) ########## processing/src/main/java/org/apache/druid/segment/FilterStorageAdapter.java: ########## @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.QueryMetrics; +import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.filter.AndFilter; +import org.joda.time.DateTime; +import org.joda.time.Interval; + +import javax.annotation.Nullable; + +public class FilterStorageAdapter implements StorageAdapter +{ + private final DimFilter filterOnDataSource; + private final StorageAdapter baseStorageAdapter; + + public FilterStorageAdapter(final StorageAdapter adapter, final DimFilter filter) + { + this.baseStorageAdapter = adapter; + this.filterOnDataSource = filter; + } + + @Override + public Sequence<Cursor> makeCursors( + @Nullable Filter filter, + Interval interval, + VirtualColumns virtualColumns, + Granularity gran, + boolean descending, + @Nullable QueryMetrics<?> queryMetrics + ) + { + final Filter andFilter; + if (filter == null) { + if (filterOnDataSource != null) { + andFilter = filterOnDataSource.toFilter(); + } else { + andFilter = null; + } + } else { + andFilter = new AndFilter(ImmutableList.of(filter, filterOnDataSource.toFilter())); + } + return baseStorageAdapter.makeCursors(andFilter, interval, virtualColumns, gran, descending, queryMetrics); + } + + @Override + public Interval getInterval() + { + return baseStorageAdapter.getInterval(); + } + + @Override + public Indexed<String> getAvailableDimensions() + { + return baseStorageAdapter.getAvailableDimensions(); + } + + @Override + public Iterable<String> getAvailableMetrics() + { + return baseStorageAdapter.getAvailableMetrics(); + } + + @Override + public int getDimensionCardinality(String column) + { + return baseStorageAdapter.getDimensionCardinality(column); Review Comment: im not sure if it causes any problems, but this might not be technically correct after filtering, though if this is only used for upper bounds then it probably wont cause a problem since there cannot be higher cardinality than the base datasource (i forget off the top of my head all the things that use it) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
