This is an automated email from the ASF dual-hosted git repository.

suvasude pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new b685271  [GOBBLIN-946] Add HttpDatasetDescriptor and HttpDataNode to 
Gobblin Service
b685271 is described below

commit b685271bde7f6b767990033a5a6bed49c507f9b2
Author: Haoji Liu <[email protected]>
AuthorDate: Thu Nov 7 10:22:42 2019 -0800

    [GOBBLIN-946] Add HttpDatasetDescriptor and HttpDataNode to Gobblin Service
    
    Adding support for transmitting data from a
    http/https source. This is an first effort and
    minimum number of attributes were introduced.
    Limited testing is done with unit tests.
    
    Closes #2796 from haojiliu/master
---
 .../modules/dataset/HttpDatasetDescriptor.java     | 94 ++++++++++++++++++++++
 .../modules/dataset/SqlDatasetDescriptor.java      |  2 +-
 .../flowgraph/FlowGraphConfigurationKeys.java      |  6 ++
 .../modules/flowgraph/datanodes/HttpDataNode.java  | 61 ++++++++++++++
 .../modules/dataset/HttpDatasetDescriptorTest.java | 55 +++++++++++++
 .../flowgraph/datanodes/HttpDataNodeTest.java      | 58 +++++++++++++
 6 files changed, 275 insertions(+), 1 deletion(-)

diff --git 
a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/HttpDatasetDescriptor.java
 
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/HttpDatasetDescriptor.java
new file mode 100644
index 0000000..0d45036
--- /dev/null
+++ 
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/HttpDatasetDescriptor.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.modules.dataset;
+
+import com.google.common.base.Enums;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigValueFactory;
+import java.io.IOException;
+
+import lombok.EqualsAndHashCode;
+import lombok.Getter;
+import lombok.ToString;
+import lombok.extern.slf4j.Slf4j;
+
+import 
org.apache.gobblin.service.modules.flowgraph.DatasetDescriptorConfigKeys;
+import org.apache.gobblin.util.ConfigUtils;
+
+
+/**
+ * Describes a dataset behind a HTTP scheme.
+ * path refers to the HTTP path of a given dataset.
+ * e.g, https://some-api:443/user/123/names, where /user/123/names is the path
+ * query string is not supported
+ */
+@Slf4j
+@ToString (exclude = {"rawConfig"})
+@EqualsAndHashCode (exclude = {"rawConfig"}, callSuper = true)
+public class HttpDatasetDescriptor extends BaseDatasetDescriptor implements 
DatasetDescriptor {
+
+  @Getter
+  private final String path;
+  @Getter
+  private final Config rawConfig;
+
+  public enum Platform {
+    HTTP("http"),
+    HTTPS("https");
+
+    private final String platform;
+
+    Platform(final String platform) {
+      this.platform = platform;
+    }
+
+    @Override
+    public String toString() {
+      return this.platform;
+    }
+  }
+
+  public HttpDatasetDescriptor(Config config) throws IOException {
+    super(config);
+    if (!isPlatformValid()) {
+      throw new IOException("Invalid platform specified for 
HttpDatasetDescriptor: " + getPlatform());
+    }
+    // refers to the full HTTP url
+    this.path = ConfigUtils.getString(config, 
DatasetDescriptorConfigKeys.PATH_KEY, "");
+    this.rawConfig = config.withValue(DatasetDescriptorConfigKeys.PATH_KEY, 
ConfigValueFactory.fromAnyRef(this.path)).withFallback(super.getRawConfig());
+  }
+
+  /**
+   * @return true if the platform is valid, false otherwise
+   */
+  private boolean isPlatformValid() {
+    return Enums.getIfPresent(HttpDatasetDescriptor.Platform.class, 
getPlatform().toUpperCase()).isPresent();
+  }
+
+  /**
+   * Check if this HTTP path equals the other HTTP path
+   *
+   * @param other whose path should be in the format of a HTTP path
+   */
+  @Override
+  protected boolean isPathContaining(DatasetDescriptor other) {
+    // Might be null
+    String otherPath = other.getPath();
+    return this.path.equals(otherPath);
+  }
+}
diff --git 
a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/SqlDatasetDescriptor.java
 
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/SqlDatasetDescriptor.java
index a334ab6..b77addd 100644
--- 
a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/SqlDatasetDescriptor.java
+++ 
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/SqlDatasetDescriptor.java
@@ -51,7 +51,7 @@ public class SqlDatasetDescriptor extends 
BaseDatasetDescriptor implements Datas
   @Getter
   private final Config rawConfig;
 
-  public enum  Platform {
+  public enum Platform {
     SQLSERVER("sqlserver"),
     MYSQL("mysql"),
     ORACLE("oracle"),
diff --git 
a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/flowgraph/FlowGraphConfigurationKeys.java
 
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/flowgraph/FlowGraphConfigurationKeys.java
index 5a43a83..67747fb 100644
--- 
a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/flowgraph/FlowGraphConfigurationKeys.java
+++ 
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/flowgraph/FlowGraphConfigurationKeys.java
@@ -30,6 +30,12 @@ public class FlowGraphConfigurationKeys {
   public static final String DATA_NODE_IS_ACTIVE_KEY = DATA_NODE_PREFIX + 
"isActive";
 
   /**
+   *   {@link 
org.apache.gobblin.service.modules.flowgraph.datanodes.HttpDataNode} related 
configuration keys.
+   */
+  public static final String DATA_NODE_HTTP_DOMAIN_KEY = DATA_NODE_PREFIX + 
"http.domain";
+  public static final String DATA_NODE_HTTP_AUTHENTICATION_TYPE_KEY = 
DATA_NODE_PREFIX + "http.authentication.type";
+
+  /**
    * {@link FlowEdge} related configuration keys.
    */
   public static final String FLOW_EDGE_FACTORY_CLASS = FLOW_EDGE_PREFIX + 
"factory.class";
diff --git 
a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/flowgraph/datanodes/HttpDataNode.java
 
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/flowgraph/datanodes/HttpDataNode.java
new file mode 100644
index 0000000..4e84715
--- /dev/null
+++ 
b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/flowgraph/datanodes/HttpDataNode.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.modules.flowgraph.datanodes;
+
+import com.google.common.base.Preconditions;
+import com.typesafe.config.Config;
+
+import joptsimple.internal.Strings;
+import lombok.EqualsAndHashCode;
+import lombok.Getter;
+
+import org.apache.gobblin.service.modules.flowgraph.BaseDataNode;
+import org.apache.gobblin.service.modules.flowgraph.DataNode;
+import org.apache.gobblin.service.modules.flowgraph.FlowGraphConfigurationKeys;
+import org.apache.gobblin.util.ConfigUtils;
+
+
+/**
+ * Represents a HTTP source. Whether the source provides a REST complied API 
is not enforced.
+ * Currently supports HTTPS with port default to 443
+ */
+@EqualsAndHashCode (callSuper = true)
+public class HttpDataNode extends BaseDataNode  {
+
+  @Getter
+  private String httpDomain;
+  @Getter
+  private String authenticationType;
+
+  public HttpDataNode(Config nodeProps) throws 
DataNode.DataNodeCreationException {
+    super(nodeProps);
+    try {
+      this.httpDomain = ConfigUtils.getString(nodeProps, 
FlowGraphConfigurationKeys.DATA_NODE_HTTP_DOMAIN_KEY, "");
+      // Authentication details and credentials should reside in the Gobblin 
job payload
+      this.authenticationType = ConfigUtils.getString(
+          nodeProps, 
FlowGraphConfigurationKeys.DATA_NODE_HTTP_AUTHENTICATION_TYPE_KEY, "");
+
+      Preconditions.checkArgument(!Strings.isNullOrEmpty(httpDomain),
+          FlowGraphConfigurationKeys.DATA_NODE_HTTP_DOMAIN_KEY + " cannot be 
null or empty.");
+      Preconditions.checkArgument(!Strings.isNullOrEmpty(authenticationType),
+          FlowGraphConfigurationKeys.DATA_NODE_HTTP_AUTHENTICATION_TYPE_KEY + 
" cannot be null or empty.");
+    } catch (Exception e) {
+      throw new DataNode.DataNodeCreationException(e);
+    }
+  }
+}
diff --git 
a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/dataset/HttpDatasetDescriptorTest.java
 
b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/dataset/HttpDatasetDescriptorTest.java
new file mode 100644
index 0000000..a90b2a6
--- /dev/null
+++ 
b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/dataset/HttpDatasetDescriptorTest.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.modules.dataset;
+
+import java.io.IOException;
+
+import org.junit.Assert;
+import org.testng.annotations.Test;
+
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
+import com.typesafe.config.ConfigValueFactory;
+
+import 
org.apache.gobblin.service.modules.flowgraph.DatasetDescriptorConfigKeys;
+
+public class HttpDatasetDescriptorTest {
+
+  @Test
+  public void testContains() throws IOException {
+    Config config1 = ConfigFactory.empty()
+        .withValue(DatasetDescriptorConfigKeys.PLATFORM_KEY, 
ConfigValueFactory.fromAnyRef("https"))
+        .withValue(DatasetDescriptorConfigKeys.PATH_KEY, 
ConfigValueFactory.fromAnyRef("https://a.com/b";));
+    HttpDatasetDescriptor descriptor1 = new HttpDatasetDescriptor(config1);
+
+    // Verify that same path points to same dataset
+    Config config2 = ConfigFactory.empty()
+        .withValue(DatasetDescriptorConfigKeys.PLATFORM_KEY, 
ConfigValueFactory.fromAnyRef("https"))
+        .withValue(DatasetDescriptorConfigKeys.PATH_KEY, 
ConfigValueFactory.fromAnyRef("https://a.com/b";));
+    HttpDatasetDescriptor descriptor2 = new HttpDatasetDescriptor(config2);
+    Assert.assertTrue(descriptor2.contains(descriptor1));
+
+    // Verify that same path but different platform points to different dataset
+    Config config3 = ConfigFactory.empty()
+        .withValue(DatasetDescriptorConfigKeys.PLATFORM_KEY, 
ConfigValueFactory.fromAnyRef("http"))
+        .withValue(DatasetDescriptorConfigKeys.PATH_KEY, 
ConfigValueFactory.fromAnyRef("https://a.com/b";));
+    HttpDatasetDescriptor descriptor3 = new HttpDatasetDescriptor(config3);
+    Assert.assertFalse(descriptor3.contains(descriptor1));
+
+  }
+}
\ No newline at end of file
diff --git 
a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/flowgraph/datanodes/HttpDataNodeTest.java
 
b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/flowgraph/datanodes/HttpDataNodeTest.java
new file mode 100644
index 0000000..ada9f6c
--- /dev/null
+++ 
b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/flowgraph/datanodes/HttpDataNodeTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.modules.flowgraph.datanodes;
+
+import org.apache.gobblin.service.modules.flowgraph.DataNode;
+import org.apache.gobblin.service.modules.flowgraph.FlowGraphConfigurationKeys;
+import org.apache.gobblin.util.ConfigUtils;
+import org.junit.Assert;
+import org.testng.annotations.Test;
+
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
+import com.typesafe.config.ConfigValueFactory;
+
+import 
org.apache.gobblin.service.modules.flowgraph.DatasetDescriptorConfigKeys;
+
+public class HttpDataNodeTest {
+
+  @Test
+  public void testConfig() throws DataNode.DataNodeCreationException {
+    String expectedNodeId = "some-node-id";
+    String expectedHttpDomain = "https://a.b.c";;
+    String expectedHttpAuthType = "oauth";
+
+    Config config = ConfigFactory.empty()
+        .withValue(FlowGraphConfigurationKeys.DATA_NODE_ID_KEY, 
ConfigValueFactory.fromAnyRef(expectedNodeId))
+        .withValue(FlowGraphConfigurationKeys.DATA_NODE_HTTP_DOMAIN_KEY, 
ConfigValueFactory.fromAnyRef(expectedHttpDomain))
+        
.withValue(FlowGraphConfigurationKeys.DATA_NODE_HTTP_AUTHENTICATION_TYPE_KEY, 
ConfigValueFactory.fromAnyRef(expectedHttpAuthType));
+    HttpDataNode node = new HttpDataNode(config);
+
+    // Verify the node id
+    String id = node.getId();
+    Assert.assertTrue(id.equals(expectedNodeId));
+
+    Config rawConfig = node.getRawConfig();
+    String httpDomain = ConfigUtils.getString(rawConfig, 
FlowGraphConfigurationKeys.DATA_NODE_HTTP_DOMAIN_KEY, "");
+    String httpAuthType = ConfigUtils.getString(rawConfig, 
FlowGraphConfigurationKeys.DATA_NODE_HTTP_AUTHENTICATION_TYPE_KEY, "");
+    // Verify config saved to the node successfully
+    Assert.assertTrue(httpDomain.equals(expectedHttpDomain));
+    Assert.assertTrue(httpAuthType.equals(expectedHttpAuthType));
+
+  }
+}
\ No newline at end of file

Reply via email to