leesf commented on a change in pull request #969: [HUDI-251] JDBC incremental load to HUDI DeltaStreamer URL: https://github.com/apache/incubator-hudi/pull/969#discussion_r338723377
########## File path: hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JDBCSource.java ########## @@ -0,0 +1,235 @@ +package org.apache.hudi.utilities.sources; + +import java.util.Arrays; +import java.util.Set; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IOUtils; +import org.apache.hudi.DataSourceUtils; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.common.util.TypedProperties; +import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.utilities.schema.SchemaProvider; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Column; +import org.apache.spark.sql.DataFrameReader; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.functions; +import org.apache.spark.sql.types.DataTypes; +import org.jetbrains.annotations.NotNull; + + +public class JDBCSource extends RowSource { + + private static Logger LOG = LogManager.getLogger(JDBCSource.class); + + public JDBCSource(TypedProperties props, JavaSparkContext sparkContext, SparkSession sparkSession, + SchemaProvider schemaProvider) { + super(props, sparkContext, sparkSession, schemaProvider); + } + + private static DataFrameReader validatePropsAndGetDataFrameReader(final SparkSession session, + final TypedProperties properties) + throws HoodieException { + DataFrameReader dataFrameReader = null; + FSDataInputStream passwordFileStream = null; + try { + dataFrameReader = session.read().format("jdbc"); + dataFrameReader = dataFrameReader.option(Config.URL_PROP, properties.getString(Config.URL)); + dataFrameReader = dataFrameReader.option(Config.USER_PROP, properties.getString(Config.USER)); + dataFrameReader = dataFrameReader.option(Config.DRIVER_PROP, properties.getString(Config.DRIVER_CLASS)); + dataFrameReader = dataFrameReader + .option(Config.RDBMS_TABLE_PROP, properties.getString(Config.RDBMS_TABLE_NAME)); + + if (properties.containsKey(Config.PASSWORD) && !StringUtils + .isNullOrEmpty(properties.getString(Config.PASSWORD))) { Review comment: maybe the value of `Config.PASSWORD` would be empty in some case. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services