paul-rogers commented on code in PR #12647: URL: https://github.com/apache/druid/pull/12647#discussion_r910549204
########## server/src/main/java/org/apache/druid/catalog/ColumnSpec.java: ########## @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Strings; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.guice.annotations.PublicApi; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Map; + +/** + * Base class for table columns. Columns have multiple types + * represented as subclasses. + */ +@PublicApi Review Comment: Done. ########## server/src/main/java/org/apache/druid/catalog/InputTableSpec.java: ########## @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Strings; +import org.apache.druid.catalog.TableMetadata.TableType; +import org.apache.druid.data.input.InputFormat; +import org.apache.druid.data.input.InputSource; +import org.apache.druid.java.util.common.IAE; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +/** + * Definition of an external input source, primarily for ingestion. + * The components are derived from those for Druid ingestion: an + * input source, a format and a set of columns. Also provides + * properties, as do all table definitions. + */ +public class InputTableSpec extends TableSpec +{ + private final InputSource inputSource; + private final InputFormat format; + private final List<InputColumnSpec> columns; + + public InputTableSpec( + @JsonProperty("inputSource") InputSource inputSource, + @JsonProperty("format") InputFormat format, Review Comment: OK. I was kind of leaning toward the Go pattern: terseness. The only "format" it could possibly be is the "input format". Same is true for "input source", but I didn't spend time to simplify it yet. Same reason the other fields are not `tableColumns` and `userDefinedProperties`. Changed it to `inputFormat` for now; we can clean it up in a renaming pass later. ########## server/src/main/java/org/apache/druid/catalog/DatasourceSpec.java: ########## @@ -0,0 +1,376 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonInclude.Include; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Strings; +import org.apache.druid.catalog.DatasourceColumnSpec.DetailColumnSpec; +import org.apache.druid.catalog.DatasourceColumnSpec.DimensionSpec; +import org.apache.druid.catalog.DatasourceColumnSpec.MeasureSpec; +import org.apache.druid.catalog.TableMetadata.TableType; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.StringUtils; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +/** + * Datasource metadata exchanged via the REST API and stored + * in the catalog. + */ +public class DatasourceSpec extends TableSpec +{ + /** + * Segment grain at ingestion and initial compaction. Aging rules + * may override the value as segments age. If not provided here, + * then it must be provided at ingestion time. + */ + private final String segmentGranularity; + + /** + * Ingestion and auto-compaction rollup granularity. If null, then no + * rollup is enabled. Same as {@code queryGranularity} in and ingest spec, + * but renamed since this granularity affects rollup, not queries. Can be + * overridden at ingestion time. The grain may change as segments evolve: + * this is the grain only for ingest. + */ + private final String rollupGranularity; + + /** + * The target segment size at ingestion and initial compaction. + * If 0, then the system setting is used. + */ + private final int targetSegmentRows; + + /** + * Whether to enable auto-compaction. Only relevant if no auto-compaction + * spec is defined, since the existence of a spec overrides this setting. + */ + private final boolean enableAutoCompaction; + + /** + * The offset of segments to be auto-compacted relative to the current + * time. If not present, the auto-compaction default is used if + * auto-compaction is enabled. + */ + private final String autoCompactionDelay; + + private final List<DatasourceColumnSpec> columns; + + public DatasourceSpec( + @JsonProperty("segmentGranularity") String segmentGranularity, + @JsonProperty("rollupGranularity") String rollupGranularity, + @JsonProperty("targetSegmentRows") int targetSegmentRows, + @JsonProperty("enableAutoCompaction") boolean enableAutoCompaction, + @JsonProperty("autoCompactionDelay") String autoCompactionDelay, Review Comment: This is still preliminary: just working out the storage and REST layers at the moment. For this one, the thought is if there is a compaction spec, that takes precedence. If the spec exists, but leaves out this property, this is the value we use. If the spec doesn't exist, this info takes over (for the simple case of direct compaction.) Details TBD when we get that far. ########## server/src/main/java/org/apache/druid/catalog/DatasourceColumnSpec.java: ########## @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonSubTypes.Type; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.StringUtils; + +/** + * Description of a detail datasource column and a rollup + * dimension column. + */ +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") +@JsonSubTypes(value = { + @Type(name = "detail", value = DatasourceColumnSpec.DetailColumnSpec.class), + @Type(name = "dimension", value = DatasourceColumnSpec.DimensionSpec.class), + @Type(name = "measure", value = DatasourceColumnSpec.MeasureSpec.class), +}) +public abstract class DatasourceColumnSpec extends ColumnSpec +{ + private static final String TIME_COLUMN = "__time"; + + @JsonCreator + public DatasourceColumnSpec( + @JsonProperty("name") String name, + @JsonProperty("sqlType") String sqlType + ) + { + super(name, sqlType); + } + + @Override + public void validate() + { + super.validate(); + if (sqlType == null) { + return; + } + if (TIME_COLUMN.equals(name)) { + if (!"TIMESTAMP".equalsIgnoreCase(sqlType)) { + throw new IAE("__time column must have type TIMESTAMP"); + } + } else if (!VALID_SQL_TYPES.containsKey(StringUtils.toUpperCase(sqlType))) { Review Comment: Yeah, that's a later step. The thought is to invent new syntax. For a sum: `SUM(LONG)` or `SUM(DOUBLE)`. For a sketch, `<SKETCH_NAME>(<VALUE_TYPE>)`. The details are to be worked out when integrating with roll-up tables in the SQL layer for `INSERT` statements: the info here has to generate the required structures where we now use SQL aggregate functions (and, for dimensions, where we use `GROUP BY` keys.) ########## server/src/main/java/org/apache/druid/catalog/DatasourceColumnSpec.java: ########## @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonSubTypes.Type; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.StringUtils; + +/** + * Description of a detail datasource column and a rollup + * dimension column. + */ +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") +@JsonSubTypes(value = { + @Type(name = "detail", value = DatasourceColumnSpec.DetailColumnSpec.class), Review Comment: I think it is just used in Druid circles as the opposite of "rollup"? Somehow, "type=datasourceWithoutRollupEnabled" seemed a bit much, hence "rollup" and "detail". Open to suggestions! ########## server/src/main/java/org/apache/druid/catalog/DatasourceSpec.java: ########## @@ -0,0 +1,376 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonInclude.Include; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Strings; +import org.apache.druid.catalog.DatasourceColumnSpec.DetailColumnSpec; +import org.apache.druid.catalog.DatasourceColumnSpec.DimensionSpec; +import org.apache.druid.catalog.DatasourceColumnSpec.MeasureSpec; +import org.apache.druid.catalog.TableMetadata.TableType; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.StringUtils; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +/** + * Datasource metadata exchanged via the REST API and stored + * in the catalog. + */ +public class DatasourceSpec extends TableSpec +{ + /** + * Segment grain at ingestion and initial compaction. Aging rules + * may override the value as segments age. If not provided here, + * then it must be provided at ingestion time. + */ + private final String segmentGranularity; + + /** + * Ingestion and auto-compaction rollup granularity. If null, then no + * rollup is enabled. Same as {@code queryGranularity} in and ingest spec, + * but renamed since this granularity affects rollup, not queries. Can be + * overridden at ingestion time. The grain may change as segments evolve: + * this is the grain only for ingest. + */ + private final String rollupGranularity; + + /** + * The target segment size at ingestion and initial compaction. + * If 0, then the system setting is used. + */ + private final int targetSegmentRows; + + /** + * Whether to enable auto-compaction. Only relevant if no auto-compaction + * spec is defined, since the existence of a spec overrides this setting. + */ + private final boolean enableAutoCompaction; + + /** + * The offset of segments to be auto-compacted relative to the current + * time. If not present, the auto-compaction default is used if + * auto-compaction is enabled. + */ + private final String autoCompactionDelay; + + private final List<DatasourceColumnSpec> columns; + + public DatasourceSpec( + @JsonProperty("segmentGranularity") String segmentGranularity, Review Comment: The thought was that we don't actually use this as a `Granularity`: it is saved and used when doing query planning. The class has a validation method that checks each field, and that will do the check (and produce a reasonable error message). I see that the check is missing. Trying to add it pointed out that there is not a `Granularity` subclass for a simple period? Should I just use the Joda methods? And, where can I check that the granularity is one of the supported values? A concern with using specialized types is that the deserialization will fail, with a cryptic Jackson message, which didn't seem helpful. We'd like "[fortnight] is not a valid rollup granularity", not some obscure Jackson error. ########## server/src/main/java/org/apache/druid/server/http/CatalogResource.java: ########## @@ -0,0 +1,488 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.http; + +import com.google.common.base.Strings; +import org.apache.curator.shaded.com.google.common.collect.Lists; +import org.apache.druid.catalog.Actions; +import org.apache.druid.catalog.CatalogStorage; +import org.apache.druid.catalog.SchemaRegistry.SchemaSpec; +import org.apache.druid.catalog.TableId; +import org.apache.druid.catalog.TableMetadata; +import org.apache.druid.catalog.TableSpec; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.metadata.catalog.CatalogManager; +import org.apache.druid.metadata.catalog.CatalogManager.DuplicateKeyException; +import org.apache.druid.metadata.catalog.CatalogManager.NotFoundException; +import org.apache.druid.metadata.catalog.CatalogManager.OutOfDateException; +import org.apache.druid.server.security.Action; +import org.apache.druid.server.security.AuthorizationUtils; +import org.apache.druid.server.security.ForbiddenException; +import org.apache.druid.server.security.ResourceType; + +import javax.inject.Inject; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.Consumes; +import javax.ws.rs.DELETE; +import javax.ws.rs.GET; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * REST endpoint for user and internal catalog actions. Catalog actions + * occur at the global level (all schemas), the schema level, or the + * table level. + * + * @see {@link CatalogListenerResource} for the client-side API. + */ +@Path(CatalogResource.ROOT_PATH) +public class CatalogResource +{ + public static final String ROOT_PATH = "/druid/coordinator/v1/catalog"; + + private final CatalogStorage catalog; + + @Inject + public CatalogResource(CatalogStorage catalog) + { + this.catalog = catalog; + } + + /** + * Create a new table within the indicated schema. + * + * @param table The table specification to create. + * @param ifNew Whether to skip the action if the table already exists. + * This is the same as the SQL IF NOT EXISTS clause. If {@code false}, + * then an error is raised if the table exists. If {@code true}, then + * the action silently does nothing if the table exists. Primarily for + * use in scripts. + * @param req the HTTP request used for authorization. + * @return the version number of the table + */ + @POST + @Path("/tables") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response createTable( + TableMetadata table, + @QueryParam("ifnew") boolean ifNew, + @Context final HttpServletRequest req) + { + String dbSchema = table.resolveDbSchema(); + Pair<Response, SchemaSpec> result = validateSchema(dbSchema); + if (result.lhs != null) { + return result.lhs; + } + SchemaSpec schema = result.rhs; + if (!schema.writable()) { + return Actions.badRequest( + Actions.INVALID, + StringUtils.format("Cannot create tables in schema %s", dbSchema)); + } + table = table.withSchema(dbSchema); + try { + table.validate(); + } + catch (IAE e) { + return Actions.badRequest(Actions.INVALID, e.getMessage()); + } + TableSpec spec = table.spec(); + if (!schema.accepts(spec)) { + return Actions.badRequest( + Actions.INVALID, + StringUtils.format( + "Cannot create tables of type %s in schema %s", + spec == null ? "null" : spec.getClass().getSimpleName(), + dbSchema)); + } + try { + catalog.authorizer().authorizeTable(schema, table.name(), Action.WRITE, req); Review Comment: Moved earlier, just after resolving the schema used to do the check. ########## server/src/main/java/org/apache/druid/catalog/DatasourceSpec.java: ########## @@ -0,0 +1,376 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonInclude.Include; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Strings; +import org.apache.druid.catalog.DatasourceColumnSpec.DetailColumnSpec; +import org.apache.druid.catalog.DatasourceColumnSpec.DimensionSpec; +import org.apache.druid.catalog.DatasourceColumnSpec.MeasureSpec; +import org.apache.druid.catalog.TableMetadata.TableType; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.StringUtils; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +/** + * Datasource metadata exchanged via the REST API and stored + * in the catalog. + */ +public class DatasourceSpec extends TableSpec +{ + /** + * Segment grain at ingestion and initial compaction. Aging rules + * may override the value as segments age. If not provided here, + * then it must be provided at ingestion time. + */ + private final String segmentGranularity; + + /** + * Ingestion and auto-compaction rollup granularity. If null, then no + * rollup is enabled. Same as {@code queryGranularity} in and ingest spec, + * but renamed since this granularity affects rollup, not queries. Can be + * overridden at ingestion time. The grain may change as segments evolve: + * this is the grain only for ingest. + */ + private final String rollupGranularity; + + /** + * The target segment size at ingestion and initial compaction. + * If 0, then the system setting is used. + */ + private final int targetSegmentRows; + + /** + * Whether to enable auto-compaction. Only relevant if no auto-compaction + * spec is defined, since the existence of a spec overrides this setting. + */ + private final boolean enableAutoCompaction; + + /** + * The offset of segments to be auto-compacted relative to the current + * time. If not present, the auto-compaction default is used if + * auto-compaction is enabled. + */ + private final String autoCompactionDelay; + + private final List<DatasourceColumnSpec> columns; + + public DatasourceSpec( + @JsonProperty("segmentGranularity") String segmentGranularity, + @JsonProperty("rollupGranularity") String rollupGranularity, + @JsonProperty("targetSegmentRows") int targetSegmentRows, + @JsonProperty("enableAutoCompaction") boolean enableAutoCompaction, + @JsonProperty("autoCompactionDelay") String autoCompactionDelay, + @JsonProperty("properties") Map<String, Object> properties, Review Comment: Properties are "extensions" things provided by other than Druid itself. For example, I might want to track if the column contains PII. Or, might want to track the input source that defined the column. Or, might want to add info about the kind of UI widget to use to display it. Rather than creating my own parallel schema for such use cases, I just add a custom property. We might define a naming convention "com.foo.is-pii", or "org.whiz.lineage.input-source". Druid doesn't understand them, but the external tool (or user) does. ########## server/src/main/java/org/apache/druid/catalog/DatasourceSpec.java: ########## @@ -0,0 +1,376 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonInclude.Include; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Strings; +import org.apache.druid.catalog.DatasourceColumnSpec.DetailColumnSpec; +import org.apache.druid.catalog.DatasourceColumnSpec.DimensionSpec; +import org.apache.druid.catalog.DatasourceColumnSpec.MeasureSpec; +import org.apache.druid.catalog.TableMetadata.TableType; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.StringUtils; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +/** + * Datasource metadata exchanged via the REST API and stored + * in the catalog. + */ +public class DatasourceSpec extends TableSpec +{ + /** + * Segment grain at ingestion and initial compaction. Aging rules + * may override the value as segments age. If not provided here, + * then it must be provided at ingestion time. + */ + private final String segmentGranularity; + + /** + * Ingestion and auto-compaction rollup granularity. If null, then no + * rollup is enabled. Same as {@code queryGranularity} in and ingest spec, + * but renamed since this granularity affects rollup, not queries. Can be + * overridden at ingestion time. The grain may change as segments evolve: + * this is the grain only for ingest. + */ + private final String rollupGranularity; + + /** + * The target segment size at ingestion and initial compaction. + * If 0, then the system setting is used. + */ + private final int targetSegmentRows; + + /** + * Whether to enable auto-compaction. Only relevant if no auto-compaction + * spec is defined, since the existence of a spec overrides this setting. + */ + private final boolean enableAutoCompaction; + + /** + * The offset of segments to be auto-compacted relative to the current + * time. If not present, the auto-compaction default is used if + * auto-compaction is enabled. + */ + private final String autoCompactionDelay; + + private final List<DatasourceColumnSpec> columns; + + public DatasourceSpec( + @JsonProperty("segmentGranularity") String segmentGranularity, + @JsonProperty("rollupGranularity") String rollupGranularity, Review Comment: Good points. I was always confused by the "query" granularity: it has little to do with a query: it is the ingest/compaction time granularity. Hence my attempt to sneak in a different name. In fact, I could imagine having a "true" query granularity as s separate field: we decided to apply rollup of 1m, but used to have 1s. To get consistent results, use a query-time granularity of 1m, even for the older 1s segments. But, that's just a whim, not implemented here (because it would take additional query work.) As to the usage: I'm aware of the discussion. The other half of the argument is that other dimensions might be similarly trimmed. Geo data might be rounded to a city level. Sales data rounded from (store, cashier, lane) to just (store). In this case, time is similarly rounded. Since this is a prototype, the thought was that time is special: it is what enables other forms of compression. If the rollup granularity is missing, Druid stores data at the detail level, even if I do the other dimension rounding. Only by making a rollup grain of some actual time (even 1ms), do I get rollup. So, time is special. The other approach, when we work out the details of dimensions and measures, is to drop this field, add a "rollupEnabled" field, and require the user to specify the grain via a `TIME_FLOOR` attached to the `__time` column, in parallel with the `SUM(LONG)` attached to a measure. ########## server/src/main/java/org/apache/druid/catalog/InputTableSpec.java: ########## @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Strings; +import org.apache.druid.catalog.TableMetadata.TableType; +import org.apache.druid.data.input.InputFormat; +import org.apache.druid.data.input.InputSource; +import org.apache.druid.java.util.common.IAE; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +/** + * Definition of an external input source, primarily for ingestion. + * The components are derived from those for Druid ingestion: an + * input source, a format and a set of columns. Also provides + * properties, as do all table definitions. + */ +public class InputTableSpec extends TableSpec +{ + private final InputSource inputSource; + private final InputFormat format; + private final List<InputColumnSpec> columns; + + public InputTableSpec( + @JsonProperty("inputSource") InputSource inputSource, + @JsonProperty("format") InputFormat format, + @JsonProperty("columns") List<InputColumnSpec> columns, + @JsonProperty("properties") Map<String, Object> properties + ) + { + super(properties); + this.inputSource = inputSource; + this.format = format; + this.columns = columns; + } + + @Override + public TableType type() + { + return TableType.INPUT; + } + + @JsonProperty("inputSource") + public InputSource inputSource() Review Comment: Absolutely; that is the next bit of work to be done. Having fun with the old "what SQL syntax can we use for this non-standard concept" game. Current thought is a Calcite macro, something like: ```sql SELECT * FROM TABLE(INPUT(myInputTable, files = "foo.csv, bar.csv")) ``` With the bits and pieces adjusted to fit Calcite's existing constraints. Would love to mimic Snowflake: ```sql SELECT * FROM myInputTable(files = "foo.csv, bar.csv") ``` But Calcite has already grabbed that syntax to specify columns: ```sql FROM myInputTable(a VARCHAR, b INT, c DOUBLE) ``` Antway, this is work in progress, details to be discussed separately. ########## server/src/main/java/org/apache/druid/server/http/CatalogResource.java: ########## @@ -0,0 +1,488 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.http; + +import com.google.common.base.Strings; +import org.apache.curator.shaded.com.google.common.collect.Lists; +import org.apache.druid.catalog.Actions; +import org.apache.druid.catalog.CatalogStorage; +import org.apache.druid.catalog.SchemaRegistry.SchemaSpec; +import org.apache.druid.catalog.TableId; +import org.apache.druid.catalog.TableMetadata; +import org.apache.druid.catalog.TableSpec; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.metadata.catalog.CatalogManager; +import org.apache.druid.metadata.catalog.CatalogManager.DuplicateKeyException; +import org.apache.druid.metadata.catalog.CatalogManager.NotFoundException; +import org.apache.druid.metadata.catalog.CatalogManager.OutOfDateException; +import org.apache.druid.server.security.Action; +import org.apache.druid.server.security.AuthorizationUtils; +import org.apache.druid.server.security.ForbiddenException; +import org.apache.druid.server.security.ResourceType; + +import javax.inject.Inject; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.Consumes; +import javax.ws.rs.DELETE; +import javax.ws.rs.GET; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * REST endpoint for user and internal catalog actions. Catalog actions + * occur at the global level (all schemas), the schema level, or the + * table level. + * + * @see {@link CatalogListenerResource} for the client-side API. + */ +@Path(CatalogResource.ROOT_PATH) +public class CatalogResource +{ + public static final String ROOT_PATH = "/druid/coordinator/v1/catalog"; + + private final CatalogStorage catalog; + + @Inject + public CatalogResource(CatalogStorage catalog) + { + this.catalog = catalog; + } + + /** + * Create a new table within the indicated schema. + * + * @param table The table specification to create. + * @param ifNew Whether to skip the action if the table already exists. + * This is the same as the SQL IF NOT EXISTS clause. If {@code false}, + * then an error is raised if the table exists. If {@code true}, then + * the action silently does nothing if the table exists. Primarily for + * use in scripts. + * @param req the HTTP request used for authorization. + * @return the version number of the table + */ + @POST + @Path("/tables") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response createTable( + TableMetadata table, + @QueryParam("ifnew") boolean ifNew, + @Context final HttpServletRequest req) + { + String dbSchema = table.resolveDbSchema(); + Pair<Response, SchemaSpec> result = validateSchema(dbSchema); + if (result.lhs != null) { + return result.lhs; + } + SchemaSpec schema = result.rhs; + if (!schema.writable()) { + return Actions.badRequest( + Actions.INVALID, + StringUtils.format("Cannot create tables in schema %s", dbSchema)); + } + table = table.withSchema(dbSchema); + try { + table.validate(); + } + catch (IAE e) { + return Actions.badRequest(Actions.INVALID, e.getMessage()); + } + TableSpec spec = table.spec(); + if (!schema.accepts(spec)) { + return Actions.badRequest( + Actions.INVALID, + StringUtils.format( + "Cannot create tables of type %s in schema %s", + spec == null ? "null" : spec.getClass().getSimpleName(), + dbSchema)); + } + try { + catalog.authorizer().authorizeTable(schema, table.name(), Action.WRITE, req); + } + catch (ForbiddenException e) { + return Actions.forbidden(e); + } + try { + long createVersion = catalog.tables().create(table); + return Actions.okWithVersion(createVersion); + } + catch (DuplicateKeyException e) { + if (!ifNew) { + return Actions.badRequest( + Actions.DUPLICATE_ERROR, + StringUtils.format( + "A table of name %s.%s aleady exists", + table.dbSchema(), + table.name())); + } else { + return Actions.okWithVersion(0); Review Comment: It could, but that would take an extra DB read. This feature mimics the SQL `CREATE TABLE myTempTable IF NOT EXISTS` use case where all we want is to not fail if we've already done this step; we typically don't then change the definition. If we do want to change anything, we've got to read the existing values, which would provide the version. By not providing the version here, we save a DB read internally, since the failed SQL `INSERT` didn't fetch it. ########## server/src/main/java/org/apache/druid/catalog/Actions.java: ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog; + +import com.google.common.collect.ImmutableMap; +import org.apache.druid.server.security.ForbiddenException; + +import javax.ws.rs.core.Response; + +import java.util.Map; + +/** + * Helper functions for the catalog REST API actions. + */ +public class Actions Review Comment: Yes. I've already written variations of these several times here and there in Druid. Making it more general is left as a later exercise to minimize the size of this PR. ########## server/src/main/java/org/apache/druid/server/http/CatalogResource.java: ########## @@ -0,0 +1,488 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.http; + +import com.google.common.base.Strings; +import org.apache.curator.shaded.com.google.common.collect.Lists; +import org.apache.druid.catalog.Actions; +import org.apache.druid.catalog.CatalogStorage; +import org.apache.druid.catalog.SchemaRegistry.SchemaSpec; +import org.apache.druid.catalog.TableId; +import org.apache.druid.catalog.TableMetadata; +import org.apache.druid.catalog.TableSpec; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.metadata.catalog.CatalogManager; +import org.apache.druid.metadata.catalog.CatalogManager.DuplicateKeyException; +import org.apache.druid.metadata.catalog.CatalogManager.NotFoundException; +import org.apache.druid.metadata.catalog.CatalogManager.OutOfDateException; +import org.apache.druid.server.security.Action; +import org.apache.druid.server.security.AuthorizationUtils; +import org.apache.druid.server.security.ForbiddenException; +import org.apache.druid.server.security.ResourceType; + +import javax.inject.Inject; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.Consumes; +import javax.ws.rs.DELETE; +import javax.ws.rs.GET; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * REST endpoint for user and internal catalog actions. Catalog actions + * occur at the global level (all schemas), the schema level, or the + * table level. + * + * @see {@link CatalogListenerResource} for the client-side API. + */ +@Path(CatalogResource.ROOT_PATH) +public class CatalogResource +{ + public static final String ROOT_PATH = "/druid/coordinator/v1/catalog"; + + private final CatalogStorage catalog; + + @Inject + public CatalogResource(CatalogStorage catalog) + { + this.catalog = catalog; + } + + /** + * Create a new table within the indicated schema. + * + * @param table The table specification to create. + * @param ifNew Whether to skip the action if the table already exists. + * This is the same as the SQL IF NOT EXISTS clause. If {@code false}, + * then an error is raised if the table exists. If {@code true}, then + * the action silently does nothing if the table exists. Primarily for + * use in scripts. + * @param req the HTTP request used for authorization. + * @return the version number of the table + */ + @POST + @Path("/tables") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response createTable( + TableMetadata table, + @QueryParam("ifnew") boolean ifNew, + @Context final HttpServletRequest req) + { + String dbSchema = table.resolveDbSchema(); + Pair<Response, SchemaSpec> result = validateSchema(dbSchema); + if (result.lhs != null) { + return result.lhs; + } + SchemaSpec schema = result.rhs; + if (!schema.writable()) { + return Actions.badRequest( + Actions.INVALID, + StringUtils.format("Cannot create tables in schema %s", dbSchema)); + } + table = table.withSchema(dbSchema); + try { + table.validate(); + } + catch (IAE e) { + return Actions.badRequest(Actions.INVALID, e.getMessage()); + } + TableSpec spec = table.spec(); + if (!schema.accepts(spec)) { + return Actions.badRequest( + Actions.INVALID, + StringUtils.format( + "Cannot create tables of type %s in schema %s", + spec == null ? "null" : spec.getClass().getSimpleName(), + dbSchema)); + } + try { + catalog.authorizer().authorizeTable(schema, table.name(), Action.WRITE, req); + } + catch (ForbiddenException e) { + return Actions.forbidden(e); + } + try { + long createVersion = catalog.tables().create(table); + return Actions.okWithVersion(createVersion); + } + catch (DuplicateKeyException e) { + if (!ifNew) { + return Actions.badRequest( + Actions.DUPLICATE_ERROR, + StringUtils.format( + "A table of name %s.%s aleady exists", + table.dbSchema(), + table.name())); + } else { + return Actions.okWithVersion(0); + } + } + catch (Exception e) { + return Actions.exception(e); + } + } + + /** + * Update a table within the given schema. + * + * @param dbSchema The name of the Druid schema, which must be writable + * and the user must have at least read access. + * @param name The name of the table definition to modify. The user must + * have write access to the table. + * @param spec The new table definition. + * @param version An optional table version. If provided, the metadata DB + * entry for the table must be at this exact version or the update + * will fail. (Provides "optimistic locking.") If omitted (that is, + * if zero), then no update conflict change is done. + * @param req the HTTP request used for authorization. + * @return the new version number of the table + */ + @POST + @Path("/tables/{dbSchema}/{name}") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response updateTableDefn( + @PathParam("dbSchema") String dbSchema, + @PathParam("name") String name, + TableSpec spec, + @QueryParam("version") long version, Review Comment: Killed three birds with one stone: an update time, and a free way to do optimistic locking for those who are into such things. Also helps keep the remote cache in sync. It will be foiled by those who make more than one change per ms, but I suspect that will happen rarely. If it does, the cheat is to sleep for 1ms to bump the number. To be bullet-proof, there needs to be some prevention of moving backwards if the auto clock sync decides our system clock is moving fast and sets it back. We'll fine-tune that later, once the basics are seen to work. ########## server/src/main/java/org/apache/druid/catalog/DatasourceSpec.java: ########## @@ -0,0 +1,376 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonInclude.Include; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Strings; +import org.apache.druid.catalog.DatasourceColumnSpec.DetailColumnSpec; +import org.apache.druid.catalog.DatasourceColumnSpec.DimensionSpec; +import org.apache.druid.catalog.DatasourceColumnSpec.MeasureSpec; +import org.apache.druid.catalog.TableMetadata.TableType; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.StringUtils; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +/** + * Datasource metadata exchanged via the REST API and stored + * in the catalog. + */ +public class DatasourceSpec extends TableSpec +{ + /** + * Segment grain at ingestion and initial compaction. Aging rules + * may override the value as segments age. If not provided here, + * then it must be provided at ingestion time. + */ + private final String segmentGranularity; + + /** + * Ingestion and auto-compaction rollup granularity. If null, then no + * rollup is enabled. Same as {@code queryGranularity} in and ingest spec, + * but renamed since this granularity affects rollup, not queries. Can be + * overridden at ingestion time. The grain may change as segments evolve: + * this is the grain only for ingest. + */ + private final String rollupGranularity; + + /** + * The target segment size at ingestion and initial compaction. + * If 0, then the system setting is used. + */ + private final int targetSegmentRows; + + /** + * Whether to enable auto-compaction. Only relevant if no auto-compaction + * spec is defined, since the existence of a spec overrides this setting. + */ + private final boolean enableAutoCompaction; + + /** + * The offset of segments to be auto-compacted relative to the current + * time. If not present, the auto-compaction default is used if + * auto-compaction is enabled. + */ + private final String autoCompactionDelay; + + private final List<DatasourceColumnSpec> columns; + + public DatasourceSpec( + @JsonProperty("segmentGranularity") String segmentGranularity, + @JsonProperty("rollupGranularity") String rollupGranularity, + @JsonProperty("targetSegmentRows") int targetSegmentRows, + @JsonProperty("enableAutoCompaction") boolean enableAutoCompaction, + @JsonProperty("autoCompactionDelay") String autoCompactionDelay, + @JsonProperty("properties") Map<String, Object> properties, + @JsonProperty("columns") List<DatasourceColumnSpec> columns Review Comment: Partial: only those for which the user wants to provide info beyond what Druid already knows. Examples: 1. Add a new column, not yet in any datasource, to use in ingestion. 2. Column is ingested with multiple types, pick one as the preferred type. 3. Column exists, but no longer needed. Mark it as hidden. 4. Add a comment to explain the column. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
