Re: [PR] projections + compaction + catalog + msq (druid)

via GitHub Mon, 31 Mar 2025 20:40:22 -0700


gianm commented on code in PR #17803:
URL: https://github.com/apache/druid/pull/17803#discussion_r2019184490



##########
extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java:
##########
@@ -253,12 +259,29 @@ public static MSQSpec makeQuerySpec(
           
fieldMapping.stream().map(Entry::getValue).collect(Collectors.toSet())
       );
 
+      final List<AggregateProjectionSpec> projectionSpecs;

Review Comment:
   break this out into its own method please; the current function is long 
enough as it is.



##########
extensions-core/multi-stage-query/pom.xml:
##########
@@ -328,6 +328,12 @@
             <type>test-jar</type>
             <scope>test</scope>
         </dependency>
+      <dependency>

Review Comment:
   indentation seems off here



##########
server/src/main/java/org/apache/druid/client/indexing/ClientCompactionTaskQuery.java:
##########
@@ -139,6 +145,12 @@ public Map<String, Object> getContext()
     return context;
   }
 
+  @JsonProperty("projections")

Review Comment:
   It can be null, so how about adding `@Nullable` and also json-include only 
if nonnull?



##########
processing/src/main/java/org/apache/druid/timeline/CompactionState.java:
##########
@@ -108,6 +112,12 @@ public GranularitySpec getGranularitySpec()
     return granularitySpec;
   }
 
+  @JsonProperty

Review Comment:
   Should this be nullable? Are null and empty meaningfully different & can 
both happen?



##########
server/src/main/java/org/apache/druid/server/coordinator/DataSourceCompactionConfig.java:
##########
@@ -19,351 +19,74 @@
 
 package org.apache.druid.server.coordinator;
 
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import com.google.common.base.Preconditions;
+import com.fasterxml.jackson.annotation.JsonSubTypes;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+import org.apache.druid.data.input.impl.AggregateProjectionSpec;
 import org.apache.druid.indexer.CompactionEngine;
 import org.apache.druid.java.util.common.granularity.Granularity;
 import org.apache.druid.query.aggregation.AggregatorFactory;
 import org.apache.druid.segment.transform.CompactionTransformSpec;
 import org.joda.time.Period;
 
 import javax.annotation.Nullable;
-import java.util.Arrays;
+import java.util.List;
 import java.util.Map;
-import java.util.Objects;
 
-public class DataSourceCompactionConfig
+@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = 
InlineSchemaDataSourceCompactionConfig.class)

Review Comment:
   Do we need the `defaultImpl` for backwards-compat reasons?
   
   We should avoid it whenever possible, because it does this weird thing where 
if you provide an invalid type, rather than being an error, it's mapped to the 
`defaultImpl`. It tends to burn people that mistype things, forget to load 
extensions, etc.



##########
server/src/main/java/org/apache/druid/catalog/model/table/DatasourceDefn.java:
##########
@@ -70,13 +73,48 @@ public class DatasourceDefn extends TableDefn
 
   public static final String TABLE_TYPE = "datasource";
 
+  public DatasourceDefn()
+  {
+    super(
+        "Datasource",
+        TABLE_TYPE,
+        Arrays.asList(
+            new SegmentGranularityFieldDefn(),
+            new ModelProperties.IntPropertyDefn(TARGET_SEGMENT_ROWS_PROPERTY),
+            new ClusterKeysDefn(),
+            new HiddenColumnsDefn(),
+            new ModelProperties.BooleanPropertyDefn(SEALED_PROPERTY),
+            new ProjectionsDefn()
+        ),
+        null
+    );
+  }
+
+  @Override
+  protected void validateColumn(ColumnSpec spec)
+  {
+    super.validateColumn(spec);
+    if (Columns.isTimeColumn(spec.name()) && spec.dataType() != null) {
+      // Validate type in next PR
+    }
+  }
+
+  public static boolean isDatasource(String tableType)

Review Comment:
   Would be nice to have javadoc about what `tableType` is meant to be. Like, 
what kinds of types can be put in here? Perhaps it's obvious to people that are 
more familiar with the catalog than I am. But, to me, it's not obvious.



##########
extensions-core/druid-catalog/src/main/java/org/apache/druid/catalog/guice/CatalogClientModule.java:
##########
@@ -20,55 +20,59 @@
 package org.apache.druid.catalog.guice;
 
 import com.google.inject.Binder;
+import org.apache.druid.catalog.MetadataCatalog;
 import org.apache.druid.catalog.http.CatalogListenerResource;
 import org.apache.druid.catalog.model.SchemaRegistry;
 import org.apache.druid.catalog.model.SchemaRegistryImpl;
 import org.apache.druid.catalog.sql.LiveCatalogResolver;
 import org.apache.druid.catalog.sync.CachedMetadataCatalog;
 import org.apache.druid.catalog.sync.CatalogClient;
+import org.apache.druid.catalog.sync.CatalogClientConfig;
+import org.apache.druid.catalog.sync.CatalogSource;
 import org.apache.druid.catalog.sync.CatalogUpdateListener;
 import org.apache.druid.catalog.sync.CatalogUpdateReceiver;
-import org.apache.druid.catalog.sync.MetadataCatalog;
-import org.apache.druid.catalog.sync.MetadataCatalog.CatalogSource;
 import org.apache.druid.discovery.NodeRole;
 import org.apache.druid.guice.Jerseys;
+import org.apache.druid.guice.JsonConfigProvider;
 import org.apache.druid.guice.LazySingleton;
 import org.apache.druid.guice.LifecycleModule;
 import org.apache.druid.guice.ManageLifecycle;
+import org.apache.druid.guice.annotations.ExcludeScope;
 import org.apache.druid.guice.annotations.LoadScope;
 import org.apache.druid.initialization.DruidModule;
 import org.apache.druid.sql.calcite.planner.CatalogResolver;
 
 /**
- * Configures the metadata catalog on the Broker to use a cache
+ * Configures the metadata catalog on the Broker and Overlord to use a cache
  * and network communications for pull and push updates.
  */
-@LoadScope(roles = NodeRole.BROKER_JSON_NAME)
-public class CatalogBrokerModule implements DruidModule
+@LoadScope(roles = {NodeRole.BROKER_JSON_NAME, NodeRole.OVERLORD_JSON_NAME})
+@ExcludeScope(roles = {NodeRole.COORDINATOR_JSON_NAME})

Review Comment:
   Why do we need both `@LoadScope` and `@ExcludeScope`? Isn't _not_ listing 
`COORDINATOR_JSON_NAME` in `@LoadScope` enough to not load it there?



##########
server/src/main/java/org/apache/druid/catalog/model/ResolvedTable.java:
##########
@@ -84,6 +84,15 @@ public Map<String, Object> properties()
     return spec.properties();
   }
 
+  public <T> T getProperty(String key)

Review Comment:
   `@Nullable`? Also, the fact that decoding is automatic seems interesting and 
should be mentioned in the javadoc. Otherwise it's not clear why `getProperty` 
is different from calling `properties()` and doing a `get`.



##########
indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java:
##########
@@ -1086,6 +1130,60 @@ private void processMultiValuedDimensions(final 
QueryableIndex index)
       }
     }
 
+
+    private void processProjections(final QueryableIndex index)

Review Comment:
   some comments here about the logic would be useful, especially how conflicts 
are handled between projections of different names.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] projections + compaction + catalog + msq (druid)

Reply via email to