This is an automated email from the ASF dual-hosted git repository.
suvasude pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new b685271 [GOBBLIN-946] Add HttpDatasetDescriptor and HttpDataNode to
Gobblin Service
b685271 is described below
commit b685271bde7f6b767990033a5a6bed49c507f9b2
Author: Haoji Liu <[email protected]>
AuthorDate: Thu Nov 7 10:22:42 2019 -0800
[GOBBLIN-946] Add HttpDatasetDescriptor and HttpDataNode to Gobblin Service
Adding support for transmitting data from a
http/https source. This is an first effort and
minimum number of attributes were introduced.
Limited testing is done with unit tests.
Closes #2796 from haojiliu/master
---
.../modules/dataset/HttpDatasetDescriptor.java | 94 ++++++++++++++++++++++
.../modules/dataset/SqlDatasetDescriptor.java | 2 +-
.../flowgraph/FlowGraphConfigurationKeys.java | 6 ++
.../modules/flowgraph/datanodes/HttpDataNode.java | 61 ++++++++++++++
.../modules/dataset/HttpDatasetDescriptorTest.java | 55 +++++++++++++
.../flowgraph/datanodes/HttpDataNodeTest.java | 58 +++++++++++++
6 files changed, 275 insertions(+), 1 deletion(-)
diff --git
a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/HttpDatasetDescriptor.java
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/HttpDatasetDescriptor.java
new file mode 100644
index 0000000..0d45036
--- /dev/null
+++
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/HttpDatasetDescriptor.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.modules.dataset;
+
+import com.google.common.base.Enums;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigValueFactory;
+import java.io.IOException;
+
+import lombok.EqualsAndHashCode;
+import lombok.Getter;
+import lombok.ToString;
+import lombok.extern.slf4j.Slf4j;
+
+import
org.apache.gobblin.service.modules.flowgraph.DatasetDescriptorConfigKeys;
+import org.apache.gobblin.util.ConfigUtils;
+
+
+/**
+ * Describes a dataset behind a HTTP scheme.
+ * path refers to the HTTP path of a given dataset.
+ * e.g, https://some-api:443/user/123/names, where /user/123/names is the path
+ * query string is not supported
+ */
+@Slf4j
+@ToString (exclude = {"rawConfig"})
+@EqualsAndHashCode (exclude = {"rawConfig"}, callSuper = true)
+public class HttpDatasetDescriptor extends BaseDatasetDescriptor implements
DatasetDescriptor {
+
+ @Getter
+ private final String path;
+ @Getter
+ private final Config rawConfig;
+
+ public enum Platform {
+ HTTP("http"),
+ HTTPS("https");
+
+ private final String platform;
+
+ Platform(final String platform) {
+ this.platform = platform;
+ }
+
+ @Override
+ public String toString() {
+ return this.platform;
+ }
+ }
+
+ public HttpDatasetDescriptor(Config config) throws IOException {
+ super(config);
+ if (!isPlatformValid()) {
+ throw new IOException("Invalid platform specified for
HttpDatasetDescriptor: " + getPlatform());
+ }
+ // refers to the full HTTP url
+ this.path = ConfigUtils.getString(config,
DatasetDescriptorConfigKeys.PATH_KEY, "");
+ this.rawConfig = config.withValue(DatasetDescriptorConfigKeys.PATH_KEY,
ConfigValueFactory.fromAnyRef(this.path)).withFallback(super.getRawConfig());
+ }
+
+ /**
+ * @return true if the platform is valid, false otherwise
+ */
+ private boolean isPlatformValid() {
+ return Enums.getIfPresent(HttpDatasetDescriptor.Platform.class,
getPlatform().toUpperCase()).isPresent();
+ }
+
+ /**
+ * Check if this HTTP path equals the other HTTP path
+ *
+ * @param other whose path should be in the format of a HTTP path
+ */
+ @Override
+ protected boolean isPathContaining(DatasetDescriptor other) {
+ // Might be null
+ String otherPath = other.getPath();
+ return this.path.equals(otherPath);
+ }
+}
diff --git
a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/SqlDatasetDescriptor.java
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/SqlDatasetDescriptor.java
index a334ab6..b77addd 100644
---
a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/SqlDatasetDescriptor.java
+++
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/SqlDatasetDescriptor.java
@@ -51,7 +51,7 @@ public class SqlDatasetDescriptor extends
BaseDatasetDescriptor implements Datas
@Getter
private final Config rawConfig;
- public enum Platform {
+ public enum Platform {
SQLSERVER("sqlserver"),
MYSQL("mysql"),
ORACLE("oracle"),
diff --git
a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/flowgraph/FlowGraphConfigurationKeys.java
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/flowgraph/FlowGraphConfigurationKeys.java
index 5a43a83..67747fb 100644
---
a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/flowgraph/FlowGraphConfigurationKeys.java
+++
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/flowgraph/FlowGraphConfigurationKeys.java
@@ -30,6 +30,12 @@ public class FlowGraphConfigurationKeys {
public static final String DATA_NODE_IS_ACTIVE_KEY = DATA_NODE_PREFIX +
"isActive";
/**
+ * {@link
org.apache.gobblin.service.modules.flowgraph.datanodes.HttpDataNode} related
configuration keys.
+ */
+ public static final String DATA_NODE_HTTP_DOMAIN_KEY = DATA_NODE_PREFIX +
"http.domain";
+ public static final String DATA_NODE_HTTP_AUTHENTICATION_TYPE_KEY =
DATA_NODE_PREFIX + "http.authentication.type";
+
+ /**
* {@link FlowEdge} related configuration keys.
*/
public static final String FLOW_EDGE_FACTORY_CLASS = FLOW_EDGE_PREFIX +
"factory.class";
diff --git
a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/flowgraph/datanodes/HttpDataNode.java
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/flowgraph/datanodes/HttpDataNode.java
new file mode 100644
index 0000000..4e84715
--- /dev/null
+++
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/flowgraph/datanodes/HttpDataNode.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.modules.flowgraph.datanodes;
+
+import com.google.common.base.Preconditions;
+import com.typesafe.config.Config;
+
+import joptsimple.internal.Strings;
+import lombok.EqualsAndHashCode;
+import lombok.Getter;
+
+import org.apache.gobblin.service.modules.flowgraph.BaseDataNode;
+import org.apache.gobblin.service.modules.flowgraph.DataNode;
+import org.apache.gobblin.service.modules.flowgraph.FlowGraphConfigurationKeys;
+import org.apache.gobblin.util.ConfigUtils;
+
+
+/**
+ * Represents a HTTP source. Whether the source provides a REST complied API
is not enforced.
+ * Currently supports HTTPS with port default to 443
+ */
+@EqualsAndHashCode (callSuper = true)
+public class HttpDataNode extends BaseDataNode {
+
+ @Getter
+ private String httpDomain;
+ @Getter
+ private String authenticationType;
+
+ public HttpDataNode(Config nodeProps) throws
DataNode.DataNodeCreationException {
+ super(nodeProps);
+ try {
+ this.httpDomain = ConfigUtils.getString(nodeProps,
FlowGraphConfigurationKeys.DATA_NODE_HTTP_DOMAIN_KEY, "");
+ // Authentication details and credentials should reside in the Gobblin
job payload
+ this.authenticationType = ConfigUtils.getString(
+ nodeProps,
FlowGraphConfigurationKeys.DATA_NODE_HTTP_AUTHENTICATION_TYPE_KEY, "");
+
+ Preconditions.checkArgument(!Strings.isNullOrEmpty(httpDomain),
+ FlowGraphConfigurationKeys.DATA_NODE_HTTP_DOMAIN_KEY + " cannot be
null or empty.");
+ Preconditions.checkArgument(!Strings.isNullOrEmpty(authenticationType),
+ FlowGraphConfigurationKeys.DATA_NODE_HTTP_AUTHENTICATION_TYPE_KEY +
" cannot be null or empty.");
+ } catch (Exception e) {
+ throw new DataNode.DataNodeCreationException(e);
+ }
+ }
+}
diff --git
a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/dataset/HttpDatasetDescriptorTest.java
b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/dataset/HttpDatasetDescriptorTest.java
new file mode 100644
index 0000000..a90b2a6
--- /dev/null
+++
b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/dataset/HttpDatasetDescriptorTest.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.modules.dataset;
+
+import java.io.IOException;
+
+import org.junit.Assert;
+import org.testng.annotations.Test;
+
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
+import com.typesafe.config.ConfigValueFactory;
+
+import
org.apache.gobblin.service.modules.flowgraph.DatasetDescriptorConfigKeys;
+
+public class HttpDatasetDescriptorTest {
+
+ @Test
+ public void testContains() throws IOException {
+ Config config1 = ConfigFactory.empty()
+ .withValue(DatasetDescriptorConfigKeys.PLATFORM_KEY,
ConfigValueFactory.fromAnyRef("https"))
+ .withValue(DatasetDescriptorConfigKeys.PATH_KEY,
ConfigValueFactory.fromAnyRef("https://a.com/b"));
+ HttpDatasetDescriptor descriptor1 = new HttpDatasetDescriptor(config1);
+
+ // Verify that same path points to same dataset
+ Config config2 = ConfigFactory.empty()
+ .withValue(DatasetDescriptorConfigKeys.PLATFORM_KEY,
ConfigValueFactory.fromAnyRef("https"))
+ .withValue(DatasetDescriptorConfigKeys.PATH_KEY,
ConfigValueFactory.fromAnyRef("https://a.com/b"));
+ HttpDatasetDescriptor descriptor2 = new HttpDatasetDescriptor(config2);
+ Assert.assertTrue(descriptor2.contains(descriptor1));
+
+ // Verify that same path but different platform points to different dataset
+ Config config3 = ConfigFactory.empty()
+ .withValue(DatasetDescriptorConfigKeys.PLATFORM_KEY,
ConfigValueFactory.fromAnyRef("http"))
+ .withValue(DatasetDescriptorConfigKeys.PATH_KEY,
ConfigValueFactory.fromAnyRef("https://a.com/b"));
+ HttpDatasetDescriptor descriptor3 = new HttpDatasetDescriptor(config3);
+ Assert.assertFalse(descriptor3.contains(descriptor1));
+
+ }
+}
\ No newline at end of file
diff --git
a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/flowgraph/datanodes/HttpDataNodeTest.java
b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/flowgraph/datanodes/HttpDataNodeTest.java
new file mode 100644
index 0000000..ada9f6c
--- /dev/null
+++
b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/flowgraph/datanodes/HttpDataNodeTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.modules.flowgraph.datanodes;
+
+import org.apache.gobblin.service.modules.flowgraph.DataNode;
+import org.apache.gobblin.service.modules.flowgraph.FlowGraphConfigurationKeys;
+import org.apache.gobblin.util.ConfigUtils;
+import org.junit.Assert;
+import org.testng.annotations.Test;
+
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
+import com.typesafe.config.ConfigValueFactory;
+
+import
org.apache.gobblin.service.modules.flowgraph.DatasetDescriptorConfigKeys;
+
+public class HttpDataNodeTest {
+
+ @Test
+ public void testConfig() throws DataNode.DataNodeCreationException {
+ String expectedNodeId = "some-node-id";
+ String expectedHttpDomain = "https://a.b.c";
+ String expectedHttpAuthType = "oauth";
+
+ Config config = ConfigFactory.empty()
+ .withValue(FlowGraphConfigurationKeys.DATA_NODE_ID_KEY,
ConfigValueFactory.fromAnyRef(expectedNodeId))
+ .withValue(FlowGraphConfigurationKeys.DATA_NODE_HTTP_DOMAIN_KEY,
ConfigValueFactory.fromAnyRef(expectedHttpDomain))
+
.withValue(FlowGraphConfigurationKeys.DATA_NODE_HTTP_AUTHENTICATION_TYPE_KEY,
ConfigValueFactory.fromAnyRef(expectedHttpAuthType));
+ HttpDataNode node = new HttpDataNode(config);
+
+ // Verify the node id
+ String id = node.getId();
+ Assert.assertTrue(id.equals(expectedNodeId));
+
+ Config rawConfig = node.getRawConfig();
+ String httpDomain = ConfigUtils.getString(rawConfig,
FlowGraphConfigurationKeys.DATA_NODE_HTTP_DOMAIN_KEY, "");
+ String httpAuthType = ConfigUtils.getString(rawConfig,
FlowGraphConfigurationKeys.DATA_NODE_HTTP_AUTHENTICATION_TYPE_KEY, "");
+ // Verify config saved to the node successfully
+ Assert.assertTrue(httpDomain.equals(expectedHttpDomain));
+ Assert.assertTrue(httpAuthType.equals(expectedHttpAuthType));
+
+ }
+}
\ No newline at end of file