zachjsh commented on code in PR #13627: URL: https://github.com/apache/druid/pull/13627#discussion_r1067568739
########## server/src/main/java/org/apache/druid/catalog/model/table/HttpInputSourceDefn.java: ########## @@ -0,0 +1,304 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog.model.table; + +import com.google.common.collect.ImmutableMap; +import org.apache.druid.catalog.model.CatalogUtils; +import org.apache.druid.catalog.model.ColumnSpec; +import org.apache.druid.catalog.model.table.BaseTableFunction.Parameter; +import org.apache.druid.catalog.model.table.TableFunction.ParameterDefn; +import org.apache.druid.catalog.model.table.TableFunction.ParameterType; +import org.apache.druid.data.input.InputSource; +import org.apache.druid.data.input.impl.HttpInputSource; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.metadata.DefaultPasswordProvider; +import org.apache.druid.metadata.EnvironmentVariablePasswordProvider; +import org.apache.druid.utils.CollectionUtils; + +import java.net.URI; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * Definition of an HTTP input source source. + * <p> + * Provides a parameterized form where the user defines a value for the + * {@code uriTemplate} table property in the table spec, then provides the partial URLs + * in a table function to use for that one query. The final URIs are created by combining + * the template and the arguments. Example: + * <li>{@code uriTemplate} property): "http://example.com/data/kttm-{}.json"</li> + * <li>{@code uris} function argument: "22-Nov-21, 22-Nov-22"</li> + * </ul> + * <p> + * When the template is used, the format is optional: it can be provided either with + * the table spec or at runtime, depending on what the user wants to accomplish. In the + * above, where the ".json" is encoded in the template, it makes sense to include the format + * with the spec. If the template was "http://example.com/data/{}", and the data comes in + * multiple formats, it might make sense to specify the format in the query. In this case, + * the table spec acts more like a connection. + * <p> + * If the template is not used, then the {@code uris} property must be provided in the + * table spec, along with the corresponding format. + * <p> + * The above semantics make a bit more sense when we realize that the spec can also + * provide a user name and password. When those are provided, then the input source must + * name a single site: the one for which the credentials are valid. Given this, the only + * table spec that makes sense is one where the URI is defined: either as a template or + * explicitly. + * <p> + * When used as an ad-hoc function, the user specifies the uris and optional user name + * and password: the template is not available (or useful) in the ad-hoc case. + * <p> + * Table function parameters are cleaned up relative to the input source field names to + * make them a bit easier to use. + */ +public class HttpInputSourceDefn extends FormattedInputSourceDefn +{ + public static final String TYPE_KEY = HttpInputSource.TYPE_KEY; + + // Catalog properties that map to fields in the HttpInputSource. See + // that class for the meaning of these properties. + + public static final String URI_TEMPLATE_PROPERTY = "uriTemplate"; + + public static final String URIS_PARAMETER = "uris"; + + // Note, cannot be the simpler "user" since USER is a reserved word in SQL + // and we don't want to require users to quote "user" each time it is used. + public static final String USER_PARAMETER = "userName"; + public static final String PASSWORD_PARAMETER = "password"; + public static final String PASSWORD_ENV_VAR_PARAMETER = "passwordEnvVar"; + + private static final List<ParameterDefn> URI_PARAMS = Arrays.asList( + new Parameter(URIS_PARAMETER, ParameterType.VARCHAR_ARRAY, true) + ); + + private static final List<ParameterDefn> USER_PWD_PARAMS = Arrays.asList( + new Parameter(USER_PARAMETER, ParameterType.VARCHAR, true), + new Parameter(PASSWORD_PARAMETER, ParameterType.VARCHAR, true), + new Parameter(PASSWORD_ENV_VAR_PARAMETER, ParameterType.VARCHAR, true) + ); + + // Field names in the HttpInputSource + private static final String URIS_FIELD = "uris"; + private static final String PASSWORD_FIELD = "httpAuthenticationPassword"; + private static final String USERNAME_FIELD = "httpAuthenticationUsername"; + + @Override + public String typeValue() + { + return TYPE_KEY; + } + + @Override + protected Class<? extends InputSource> inputSourceClass() + { + return HttpInputSource.class; + } + + @Override + public void validate(ResolvedExternalTable table) + { + final Map<String, Object> sourceMap = table.inputSourceMap; + final boolean hasUri = sourceMap.containsKey(URIS_FIELD); + final String uriTemplate = table.resolvedTable().stringProperty(URI_TEMPLATE_PROPERTY); + final boolean hasTemplate = uriTemplate != null; + final boolean hasFormat = table.inputFormatMap != null; + final boolean hasColumns = !CollectionUtils.isNullOrEmpty(table.resolvedTable().spec().columns()); + + if (!hasUri && !hasTemplate) { + throw new IAE( + "External HTTP tables must provide either a URI or a %s property", + URI_TEMPLATE_PROPERTY + ); + } + if (hasUri && hasTemplate) { + throw new IAE( + "External HTTP tables must provide only one of a URI or a %s property", + URI_TEMPLATE_PROPERTY + ); + } + if (hasUri && !hasFormat) { + throw new IAE( + "An external HTTP table with a URI must also provide the corresponding format" + ); + } + if (hasUri && !hasColumns) { + throw new IAE( + "An external HTTP table with a URI must also provide the corresponding columns" + ); + } + if (hasTemplate) { + + // Verify the template + templateMatcher(uriTemplate); + + // Patch in a dummy URI so that validation of the rest of the fields + // will pass. + try { + sourceMap.put( + URIS_FIELD, + Collections.singletonList(new URI("https://bogus.com/file")) + ); + } + catch (Exception e) { + throw new ISE(e, "URI parse failed"); + } + } + super.validate(table); + } + + private Matcher templateMatcher(String uriTemplate) + { + Pattern p = Pattern.compile("\\{}"); + Matcher m = p.matcher(uriTemplate); + if (!m.find()) { + throw new IAE( + "Value [%s] for property %s must include a '{}' placeholder", + uriTemplate, + URI_TEMPLATE_PROPERTY + ); + } + return m; + } + + @Override + protected List<ParameterDefn> adHocTableFnParameters() + { + return CatalogUtils.concatLists(URI_PARAMS, USER_PWD_PARAMS); + } + + @Override + protected void convertArgsToSourceMap(Map<String, Object> jsonMap, Map<String, Object> args) + { + jsonMap.put(InputSource.TYPE_PROPERTY, HttpInputSource.TYPE_KEY); + convertUriArg(jsonMap, args); + convertUserPasswordArgs(jsonMap, args); + } + + @Override + public TableFunction partialTableFn(ResolvedExternalTable table) + { + List<ParameterDefn> params = Collections.emptyList(); + + // Does the table define URIs? + Map<String, Object> sourceMap = table.inputSourceMap; + if (!sourceMap.containsKey(URIS_FIELD)) { + params = CatalogUtils.concatLists(params, URI_PARAMS); + } + + // Does the table define a user or password? + if (!sourceMap.containsKey(USERNAME_FIELD) && !sourceMap.containsKey(PASSWORD_FIELD)) { + params = CatalogUtils.concatLists(params, USER_PWD_PARAMS); + } + + // Does the table define a format? + if (table.inputFormatMap == null) { + params = addFormatParameters(params); + } + return new PartialTableFunction(table, params); + } + + @Override + protected ExternalTableSpec convertCompletedTable( + final ResolvedExternalTable table, + final Map<String, Object> args, + final List<ColumnSpec> columns + ) + { + // Get URIs from table if defined, else from arguments. + final Map<String, Object> sourceMap = new HashMap<>(table.inputSourceMap); + final String uriTemplate = table.resolvedTable().stringProperty(URI_TEMPLATE_PROPERTY); + if (uriTemplate != null) { + convertUriTemplateArgs(sourceMap, uriTemplate, args); + } else if (!sourceMap.containsKey(URIS_FIELD)) { + convertUriArg(sourceMap, args); + } + + // Get user and password from the table if defined, else from arguments. + if (!sourceMap.containsKey(USERNAME_FIELD) && !sourceMap.containsKey(PASSWORD_FIELD)) { + convertUserPasswordArgs(sourceMap, args); + } + return convertPartialFormattedTable(table, args, columns, sourceMap); + } + + private void convertUriTemplateArgs(Map<String, Object> jsonMap, String uriTemplate, Map<String, Object> args) + { + List<String> uriStrings = CatalogUtils.getStringArray(args, URIS_PARAMETER); Review Comment: Have we verified before this that the `URIS_PARAMETER` is present in the args, or can this lead to NPE somehow? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
