joshelser commented on a change in pull request #4149: NIFI-7257 Added HadoopDBCPConnectionPool URL: https://github.com/apache/nifi/pull/4149#discussion_r396487051
########## File path: nifi-nar-bundles/nifi-standard-services/nifi-hadoop-dbcp-service-bundle/nifi-hadoop-dbcp-service/src/main/java/org/apache/nifi/dbcp/HadoopDBCPConnectionPool.java ########## @@ -0,0 +1,607 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.dbcp; + +import org.apache.commons.dbcp2.BasicDataSource; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.nifi.annotation.behavior.DynamicProperty; +import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.annotation.lifecycle.OnDisabled; +import org.apache.nifi.annotation.lifecycle.OnEnabled; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.components.PropertyValue; +import org.apache.nifi.components.ValidationContext; +import org.apache.nifi.components.ValidationResult; +import org.apache.nifi.controller.AbstractControllerService; +import org.apache.nifi.controller.ConfigurationContext; +import org.apache.nifi.controller.ControllerServiceInitializationContext; +import org.apache.nifi.expression.ExpressionLanguageScope; +import org.apache.nifi.hadoop.KerberosProperties; +import org.apache.nifi.hadoop.SecurityUtil; +import org.apache.nifi.kerberos.KerberosCredentialsService; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.reporting.InitializationException; +import org.apache.nifi.security.krb.KerberosKeytabUser; +import org.apache.nifi.security.krb.KerberosPasswordUser; +import org.apache.nifi.security.krb.KerberosUser; + +import javax.security.auth.login.LoginException; +import java.io.File; +import java.io.IOException; +import java.lang.reflect.UndeclaredThrowableException; +import java.security.PrivilegedExceptionAction; +import java.sql.Connection; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Implementation of Database Connection Pooling Service for Hadoop related JDBC Service. + * Apache DBCP is used for connection pooling functionality. + * + */ +@RequiresInstanceClassLoading +@Tags({ "dbcp", "jdbc", "database", "connection", "pooling", "store", "hadoop" }) +@CapabilityDescription("Provides a Database Connection Pooling Service for Hadoop related JDBC services. This service requires that " + + "the Database Driver Locations contains some version of a hadoop-commons JAR, or a shaded JAR that shades hadoop-commons.") +@DynamicProperty(name = "The name of a Hadoop configuration property.", value = "The value of the given Hadoop configuration property.", + description = "These properties will be set on the Hadoop configuration after loading any provided configuration files.", + expressionLanguageScope = ExpressionLanguageScope.VARIABLE_REGISTRY) +public class HadoopDBCPConnectionPool extends AbstractControllerService implements DBCPService { + + private static final String ALLOW_EXPLICIT_KEYTAB = "NIFI_ALLOW_EXPLICIT_KEYTAB"; + + private static final String HADOOP_CONFIGURATION_CLASS = "org.apache.hadoop.conf.Configuration"; + private static final String HADOOP_UGI_CLASS = "org.apache.hadoop.security.UserGroupInformation"; + + private static final String DEFAULT_MIN_IDLE = "0"; + private static final String DEFAULT_MAX_IDLE = "8"; + private static final String DEFAULT_MAX_CONN_LIFETIME = "-1"; + private static final String DEFAULT_EVICTION_RUN_PERIOD = String.valueOf(-1L); + private static final String DEFAULT_MIN_EVICTABLE_IDLE_TIME = "30 mins"; + private static final String DEFAULT_SOFT_MIN_EVICTABLE_IDLE_TIME = String.valueOf(-1L); + + public static final PropertyDescriptor DATABASE_URL = new PropertyDescriptor.Builder() + .name("Database Connection URL") + .description("A database connection URL used to connect to a database. May contain database system name, host, port, database name and some parameters." + + " The exact syntax of a database connection URL is specified by your DBMS.") + .defaultValue(null) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .required(true) + .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) + .build(); + + public static final PropertyDescriptor DB_DRIVERNAME = new PropertyDescriptor.Builder() + .name("Database Driver Class Name") + .description("Database driver class name") + .defaultValue(null) + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) + .build(); + + public static final PropertyDescriptor DB_DRIVER_LOCATION = new PropertyDescriptor.Builder() + .name("database-driver-locations") + .displayName("Database Driver Location(s)") + .description("Comma-separated list of files/folders and/or URLs containing the driver JAR and its dependencies (if any). " + + "For example '/var/tmp/phoenix-client.jar'. NOTE: It is required that the resources specified by this property provide " + + "the classes from hadoop-common, such as Configuration and UserGroupInformation.") + .defaultValue(null) + .required(true) + .addValidator(StandardValidators.createListValidator(true, true, StandardValidators.createURLorFileValidator())) + .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) + .dynamicallyModifiesClasspath(true) + .build(); + + static final PropertyDescriptor HADOOP_CONFIGURATION_RESOURCES = new PropertyDescriptor.Builder() + .name("hadoop-config-resources") + .displayName("Hadoop Configuration Resources") + .description("A file, or comma separated list of files, which contain the Hadoop configuration (core-site.xml, etc.). Without this, Hadoop " + + "will search the classpath, or will revert to a default configuration. Note that to enable authentication with Kerberos, " + + "the appropriate properties must be set in the configuration files.") + .required(false) + .addValidator(StandardValidators.createListValidator(true, true, StandardValidators.createURLorFileValidator())) + .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) + .dynamicallyModifiesClasspath(true) + .build(); + + public static final PropertyDescriptor DB_USER = new PropertyDescriptor.Builder() + .name("Database User") + .description("Database user name") Review comment: Disparity between this descriptor and the corresponding DB_PASSWORD field. I think either is fine (e.g. `Database <foo>` or `The <foo> for the database`), but it would be nice to be consistent. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
