This is an automated email from the ASF dual-hosted git repository.
kirs pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/incubator-seatunnel.git
The following commit(s) were added to refs/heads/dev by this push:
new 5250c64 [Refactor][connector-spark][hbase] Conn spark hbase refactor
(#1541)
5250c64 is described below
commit 5250c641b7af7e89f2a7c91595f2268b4ebb8caa
Author: mans2singh <[email protected]>
AuthorDate: Wed Mar 23 00:47:02 2022 -0400
[Refactor][connector-spark][hbase] Conn spark hbase refactor (#1541)
* [Refactor][connector-spark][hbase] Refactored config parameters
* [Refactor][connector-spark][hbase] Separated imports
---
.../scala/org/apache/seatunnel/spark/Config.scala | 44 ++++++++++++++++++++++
.../org/apache/seatunnel/spark/sink/Hbase.scala | 11 +++---
.../org/apache/seatunnel/spark/source/Hbase.scala | 3 +-
3 files changed, 52 insertions(+), 6 deletions(-)
diff --git
a/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-hbase/src/main/scala/org/apache/seatunnel/spark/Config.scala
b/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-hbase/src/main/scala/org/apache/seatunnel/spark/Config.scala
new file mode 100644
index 0000000..f74128a
--- /dev/null
+++
b/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-hbase/src/main/scala/org/apache/seatunnel/spark/Config.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.seatunnel.spark
+
+/**
+ * Configurations for HBase source and sink
+ */
+object Config extends Serializable {
+
+ /**
+ * Staging directory
+ */
+ val STAGING_DIR = "staging_dir"
+
+ /**
+ * Hbase catalog
+ */
+ val CATALOG = "catalog"
+
+ /**
+ * Save mode
+ */
+ val SAVE_MODE = "save_mode"
+
+ /**
+ * Zookeeper quorum
+ */
+ val HBASE_ZOOKEEPER_QUORUM = "hbase.zookeeper.quorum"
+
+}
diff --git
a/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-hbase/src/main/scala/org/apache/seatunnel/spark/sink/Hbase.scala
b/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-hbase/src/main/scala/org/apache/seatunnel/spark/sink/Hbase.scala
index ed375de..62f5778 100644
---
a/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-hbase/src/main/scala/org/apache/seatunnel/spark/sink/Hbase.scala
+++
b/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-hbase/src/main/scala/org/apache/seatunnel/spark/sink/Hbase.scala
@@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.util.Bytes
import org.apache.seatunnel.common.config.CheckConfigUtil.checkAllExists
import org.apache.seatunnel.common.config.CheckResult
import org.apache.seatunnel.shade.com.typesafe.config.ConfigFactory
+import org.apache.seatunnel.spark.Config.{CATALOG, HBASE_ZOOKEEPER_QUORUM,
SAVE_MODE, STAGING_DIR}
import org.apache.seatunnel.spark.SparkEnvironment
import org.apache.seatunnel.spark.batch.SparkBatchSink
import org.apache.spark.internal.Logging
@@ -45,13 +46,13 @@ class Hbase extends SparkBatchSink with Logging {
var zookeeperPrefix = "zookeeper."
override def checkConfig(): CheckResult = {
- checkAllExists(config, "hbase.zookeeper.quorum", "catalog", "staging_dir")
+ checkAllExists(config, HBASE_ZOOKEEPER_QUORUM, CATALOG, STAGING_DIR)
}
override def prepare(env: SparkEnvironment): Unit = {
val defaultConfig = ConfigFactory.parseMap(
Map(
- "save_mode" -> HbaseSaveMode.Append.toString.toLowerCase))
+ SAVE_MODE -> HbaseSaveMode.Append.toString.toLowerCase))
config = config.withFallback(defaultConfig)
hbaseConf =
HBaseConfiguration.create(env.getSparkSession.sessionState.newHadoopConf())
@@ -70,8 +71,8 @@ class Hbase extends SparkBatchSink with Logging {
override def output(df: Dataset[Row], environment: SparkEnvironment): Unit =
{
var dfWithStringFields = df
val colNames = df.columns
- val catalog = config.getString("catalog")
- val stagingDir = config.getString("staging_dir") + "/" +
System.currentTimeMillis().toString
+ val catalog = config.getString(CATALOG)
+ val stagingDir = config.getString(STAGING_DIR) + "/" +
System.currentTimeMillis().toString
// convert all columns type to string
for (colName <- colNames) {
@@ -83,7 +84,7 @@ class Hbase extends SparkBatchSink with Logging {
val htc = HBaseTableCatalog(parameters)
val tableName = TableName.valueOf(htc.namespace + ":" + htc.name)
val columnFamily = htc.getColumnFamilies
- val saveMode = config.getString("save_mode").toLowerCase
+ val saveMode = config.getString(SAVE_MODE).toLowerCase
val hbaseConn = ConnectionFactory.createConnection(hbaseConf)
try {
diff --git
a/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-hbase/src/main/scala/org/apache/seatunnel/spark/source/Hbase.scala
b/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-hbase/src/main/scala/org/apache/seatunnel/spark/source/Hbase.scala
index 4c47a66..f967409 100644
---
a/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-hbase/src/main/scala/org/apache/seatunnel/spark/source/Hbase.scala
+++
b/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-hbase/src/main/scala/org/apache/seatunnel/spark/source/Hbase.scala
@@ -22,6 +22,7 @@ import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.seatunnel.common.config.CheckConfigUtil.checkAllExists
import org.apache.seatunnel.common.config.CheckResult
+import org.apache.seatunnel.spark.Config.{CATALOG, HBASE_ZOOKEEPER_QUORUM}
import org.apache.seatunnel.spark.SparkEnvironment
import org.apache.seatunnel.spark.batch.SparkBatchSource
import org.apache.spark.sql.{Dataset, Row}
@@ -31,7 +32,7 @@ class Hbase extends SparkBatchSource {
private final val FORMAT_SOURCE = "org.apache.hadoop.hbase.spark"
override def checkConfig(): CheckResult = {
- checkAllExists(config, "hbase.zookeeper.quorum", "catalog")
+ checkAllExists(config, HBASE_ZOOKEEPER_QUORUM, CATALOG)
}
override def getData(env: SparkEnvironment): Dataset[Row] = {