clintropolis commented on code in PR #16533: URL: https://github.com/apache/druid/pull/16533#discussion_r1703791887
########## processing/src/main/java/org/apache/druid/segment/CursorBuildSpec.java: ########## @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.query.QueryContext; +import org.apache.druid.query.QueryMetrics; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.filter.Filter; +import org.joda.time.Interval; + +import javax.annotation.Nullable; +import java.util.List; + +public class CursorBuildSpec +{ + public static final CursorBuildSpec FULL_SCAN = CursorBuildSpec.builder().setGranularity(Granularities.ALL).build(); + + public static CursorBuildSpecBuilder builder() + { + return new CursorBuildSpecBuilder(); + } + + public static CursorBuildSpecBuilder builder(CursorBuildSpec spec) + { + return new CursorBuildSpecBuilder(spec); + } + + @Nullable + private final Filter filter; + private final Interval interval; + private final Granularity granularity; + @Nullable + private final List<String> groupingColumns; + private final VirtualColumns virtualColumns; + @Nullable + private final List<AggregatorFactory> aggregators; + + private final QueryContext queryContext; + + private final boolean descending; + @Nullable + private final QueryMetrics<?> queryMetrics; + + public CursorBuildSpec( + @Nullable Filter filter, + Interval interval, + Granularity granularity, + @Nullable List<String> groupingColumns, + VirtualColumns virtualColumns, + @Nullable List<AggregatorFactory> aggregators, + QueryContext queryContext, + boolean descending, + @Nullable QueryMetrics<?> queryMetrics + ) + { + this.filter = filter; + this.interval = interval; + this.granularity = granularity; + this.groupingColumns = groupingColumns; + this.virtualColumns = virtualColumns; + this.aggregators = aggregators; + this.descending = descending; + this.queryContext = queryContext; + this.queryMetrics = queryMetrics; + } + + @Nullable + public Filter getFilter() + { + return filter; + } + + public Interval getInterval() + { + return interval; + } + + public Granularity getGranularity() + { + return granularity; + } + + @Nullable + public List<String> getGroupingColumns() + { + return groupingColumns; + } + + public VirtualColumns getVirtualColumns() + { + return virtualColumns; + } + + @Nullable + public List<AggregatorFactory> getAggregators() + { + return aggregators; + } + + public boolean isDescending() Review Comment: still working on this one, I have it working using scan orderBy, but i'm a bit unsure about what to do about topN and groupBy, which never set descending and do their own sorting. The problem specifically is group by, which if we order by time descending with this new logic would try to do that too the cursor, which would mean it couldn't vectorize, but the engine doesn't actually care if the results are ordered by the queries order by or not. And even if they were already ordered from a projection or something, it seems like right now no way for the engines to take advantage of this. The workaround I have right now is basically only scan when ordered by time, timeseries, and timeboundary queries push in order by __time, everything else sets it to null. What should we do here? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
