Repository: incubator-atlas Updated Branches: refs/heads/master 4e1cc7625 -> 90a3a9e70
ATLAS-182 Add data model for Storm topology elements (svenkat,yhemanth via shwethags) Project: http://git-wip-us.apache.org/repos/asf/incubator-atlas/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-atlas/commit/90a3a9e7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-atlas/tree/90a3a9e7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-atlas/diff/90a3a9e7 Branch: refs/heads/master Commit: 90a3a9e708077304bb2fac90c3b13ce9359edd8c Parents: 4e1cc76 Author: Shwetha GS <[email protected]> Authored: Tue Jan 5 13:38:36 2016 +0530 Committer: Shwetha GS <[email protected]> Committed: Tue Jan 5 13:38:36 2016 +0530 ---------------------------------------------------------------------- addons/storm-bridge/pom.xml | 364 +++++++++++++++++++ .../atlas/storm/model/StormDataTypes.java | 44 +++ .../atlas/storm/model/StormDataModel.scala | 126 +++++++ pom.xml | 51 ++- release-log.txt | 1 + 5 files changed, 569 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/90a3a9e7/addons/storm-bridge/pom.xml ---------------------------------------------------------------------- diff --git a/addons/storm-bridge/pom.xml b/addons/storm-bridge/pom.xml new file mode 100644 index 0000000..5ec291c2 --- /dev/null +++ b/addons/storm-bridge/pom.xml @@ -0,0 +1,364 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, software + ~ distributed under the License is distributed on an "AS IS" BASIS, + ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ~ See the License for the specific language governing permissions and + ~ limitations under the License. + --> +<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <artifactId>apache-atlas</artifactId> + <groupId>org.apache.atlas</groupId> + <version>0.7-incubating-SNAPSHOT</version> + <relativePath>../../</relativePath> + </parent> + <artifactId>storm-bridge</artifactId> + <description>Apache Atlas Storm Bridge Module</description> + <name>Apache Atlas Storm Bridge</name> + <packaging>jar</packaging> + + <properties> + <storm.version>0.10.0.2.3.99.0-195</storm.version> + </properties> + + <dependencies> + <!-- Logging --> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + </dependency> + + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-log4j12</artifactId> + </dependency> + + <!-- apache atlas core dependencies --> + <dependency> + <groupId>org.apache.atlas</groupId> + <artifactId>atlas-typesystem</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.atlas</groupId> + <artifactId>atlas-client</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.atlas</groupId> + <artifactId>atlas-notification</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.atlas</groupId> + <artifactId>hive-bridge</artifactId> + </dependency> + + <!-- apache storm core dependencies --> + <dependency> + <groupId>org.apache.storm</groupId> + <artifactId>storm-core</artifactId> + <version>${storm.version}</version> + <type>jar</type> + <exclusions> + <exclusion> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-log4j12</artifactId> + </exclusion> + </exclusions> + </dependency> + + <!-- Testing dependencies --> + <dependency> + <groupId>org.testng</groupId> + <artifactId>testng</artifactId> + </dependency> + + <dependency> + <groupId>org.eclipse.jetty</groupId> + <artifactId>jetty-server</artifactId> + <scope>test</scope> + </dependency> + + <!-- to bring up atlas server for integration tests --> + <dependency> + <groupId>org.apache.atlas</groupId> + <artifactId>atlas-webapp</artifactId> + <type>war</type> + <scope>test</scope> + </dependency> + </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-dependency-plugin</artifactId> + <executions> + <execution> + <id>copy-hook-dependencies</id> + <phase>package</phase> + <goals> + <goal>copy</goal> + </goals> + <configuration> + <outputDirectory>${project.build.directory}/dependency/hook/storm</outputDirectory> + <overWriteReleases>false</overWriteReleases> + <overWriteSnapshots>false</overWriteSnapshots> + <overWriteIfNewer>true</overWriteIfNewer> + <artifactItems> + <artifactItem> + <groupId>${project.groupId}</groupId> + <artifactId>${project.artifactId}</artifactId> + <version>${project.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.json4s</groupId> + <artifactId>json4s-native_2.10</artifactId> + <version>${json.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.json4s</groupId> + <artifactId>json4s-core_2.10</artifactId> + <version>${json.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.json4s</groupId> + <artifactId>json4s-ast_2.10</artifactId> + <version>${json.version}</version> + </artifactItem> + <artifactItem> + <groupId>${project.groupId}</groupId> + <artifactId>atlas-client</artifactId> + <version>${project.version}</version> + </artifactItem> + <artifactItem> + <groupId>${project.groupId}</groupId> + <artifactId>atlas-typesystem</artifactId> + <version>${project.version}</version> + </artifactItem> + <artifactItem> + <groupId>${project.groupId}</groupId> + <artifactId>hive-bridge</artifactId> + <version>${project.version}</version> + </artifactItem> + <artifactItem> + <groupId>${project.groupId}</groupId> + <artifactId>atlas-notification</artifactId> + <version>${project.version}</version> + </artifactItem> + <artifactItem> + <groupId>${project.groupId}</groupId> + <artifactId>atlas-common</artifactId> + <version>${project.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.scala-lang</groupId> + <artifactId>scala-compiler</artifactId> + <version>${scala.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.scala-lang</groupId> + <artifactId>scala-reflect</artifactId> + <version>${scala.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.scala-lang</groupId> + <artifactId>scala-library</artifactId> + <version>${scala.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.scala-lang</groupId> + <artifactId>scalap</artifactId> + <version>${scala.version}</version> + </artifactItem> + <artifactItem> + <groupId>com.google.inject</groupId> + <artifactId>guice</artifactId> + <version>${guice.version}</version> + </artifactItem> + <artifactItem> + <groupId>com.google.inject.extensions</groupId> + <artifactId>guice-multibindings</artifactId> + <version>${guice.version}</version> + </artifactItem> + <artifactItem> + <groupId>com.google.inject.extensions</groupId> + <artifactId>guice-servlet</artifactId> + <version>${guice.version}</version> + </artifactItem> + <artifactItem> + <groupId>com.google.inject.extensions</groupId> + <artifactId>guice-throwingproviders</artifactId> + <version>${guice.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.apache.kafka</groupId> + <artifactId>kafka_${scala.binary.version}</artifactId> + <version>${kafka.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.apache.kafka</groupId> + <artifactId>kafka-clients</artifactId> + <version>${kafka.version}</version> + </artifactItem> + <artifactItem> + <groupId>aopalliance</groupId> + <artifactId>aopalliance</artifactId> + <version>${aopalliance.version}</version> + </artifactItem> + <artifactItem> + <groupId>commons-configuration</groupId> + <artifactId>commons-configuration</artifactId> + <version>${commons-conf.version}</version> + </artifactItem> + <artifactItem> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + <version>${commons-logging.version}</version> + </artifactItem> + <artifactItem> + <groupId>javax.inject</groupId> + <artifactId>javax.inject</artifactId> + <version>${javax-inject.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.codehaus.jettison</groupId> + <artifactId>jettison</artifactId> + <version>${jettison.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.codehaus.jettison</groupId> + <artifactId>jettison</artifactId> + <version>${jettison.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.json4s</groupId> + <artifactId>json4s-ast_2.10</artifactId> + <version>${json.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.json4s</groupId> + <artifactId>json4s-core_2.10</artifactId> + <version>${json.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.json4s</groupId> + <artifactId>json4s-native_2.10</artifactId> + <version>${json.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.json4s</groupId> + <artifactId>json4s-native_2.10</artifactId> + <version>${json.version}</version> + </artifactItem> + <artifactItem> + <groupId>com.thoughtworks.paranamer</groupId> + <artifactId>paranamer</artifactId> + <version>${paranamer.version}</version> + </artifactItem> + + </artifactItems> + </configuration> + </execution> + </executions> + </plugin> + + <plugin> + <groupId>org.eclipse.jetty</groupId> + <artifactId>jetty-maven-plugin</artifactId> + <configuration> + <skip>${skipTests}</skip> + <!--only skip int tests --> + <httpConnector> + <port>31000</port> + <idleTimeout>60000</idleTimeout> + </httpConnector> + <war>../../webapp/target/atlas-webapp-${project.version}.war</war> + <daemon>true</daemon> + <webApp> + <contextPath>/</contextPath> + <descriptor>../../webapp/src/test/webapp/WEB-INF/web.xml</descriptor> + </webApp> + <useTestScope>true</useTestScope> + <systemProperties> + <systemProperty> + <name>log4j.configuration</name> + <value>atlas-log4j.xml</value> + </systemProperty> + <systemProperty> + <name>atlas.log.dir</name> + <value>${project.build.directory}/logs</value> + </systemProperty> + <systemProperty> + <name>atlas.data</name> + <value>${project.build.directory}/data</value> + </systemProperty> + </systemProperties> + <stopKey>atlas-stop</stopKey> + <stopPort>31001</stopPort> + </configuration> + <executions> + <execution> + <id>start-jetty</id> + <phase>pre-integration-test</phase> + <goals> + <goal>deploy-war</goal> + </goals> + <configuration> + <daemon>true</daemon> + </configuration> + </execution> + <execution> + <id>stop-jetty</id> + <phase>post-integration-test</phase> + <goals> + <goal>stop</goal> + </goals> + </execution> + </executions> + </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-site-plugin</artifactId> + <dependencies> + <dependency> + <groupId>org.apache.maven.doxia</groupId> + <artifactId>doxia-module-twiki</artifactId> + <version>1.3</version> + </dependency> + </dependencies> + <executions> + <execution> + <goals> + <goal>site</goal> + </goals> + <phase>prepare-package</phase> + </execution> + </executions> + <configuration> + <generateProjectInfo>false</generateProjectInfo> + <generateReports>false</generateReports> + </configuration> + </plugin> + + <plugin> + <groupId>net.alchim31.maven</groupId> + <artifactId>scala-maven-plugin</artifactId> + </plugin> + </plugins> + </build> +</project> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/90a3a9e7/addons/storm-bridge/src/main/java/org/apache/atlas/storm/model/StormDataTypes.java ---------------------------------------------------------------------- diff --git a/addons/storm-bridge/src/main/java/org/apache/atlas/storm/model/StormDataTypes.java b/addons/storm-bridge/src/main/java/org/apache/atlas/storm/model/StormDataTypes.java new file mode 100644 index 0000000..86d692a --- /dev/null +++ b/addons/storm-bridge/src/main/java/org/apache/atlas/storm/model/StormDataTypes.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.storm.model; + + +/** + * Storm Data Types for model and hook. + */ +public enum StormDataTypes { + + // Topology Classes + STORM_TOPOLOGY, // represents the topology containing the DAG + + STORM_NODE, // base abstraction for producer and processor + STORM_SPOUT, // data producer node having only outputs + STORM_BOLT, // data processing node having both inputs and outputs + + // Data Sets + KAFKA_TOPIC, // kafka data set + JMS_TOPIC, // jms data set + HBASE_TABLE, // hbase table data set + HDFS_DATA_SET, // HDFS data set + ; + + public String getName() { + return name().toLowerCase(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/90a3a9e7/addons/storm-bridge/src/main/scala/org/apache/atlas/storm/model/StormDataModel.scala ---------------------------------------------------------------------- diff --git a/addons/storm-bridge/src/main/scala/org/apache/atlas/storm/model/StormDataModel.scala b/addons/storm-bridge/src/main/scala/org/apache/atlas/storm/model/StormDataModel.scala new file mode 100644 index 0000000..77a5454 --- /dev/null +++ b/addons/storm-bridge/src/main/scala/org/apache/atlas/storm/model/StormDataModel.scala @@ -0,0 +1,126 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.storm.model + +import org.apache.atlas.typesystem.TypesDef +import org.apache.atlas.typesystem.builders.TypesBuilder +import org.apache.atlas.typesystem.json.TypesSerialization + + +/** + * This represents the data model for a storm topology. + */ +object StormDataModel extends App { + + var typesDef : TypesDef = null + + val typesBuilder = new TypesBuilder + import typesBuilder._ + + typesDef = types { + + /** + * Model is represented as: + * Topology is a Process Super Type inheriting inputs/outputs + * Input DataSet(s) => Topology => Output DataSet(s) + * Also, Topology contains the Graph of Nodes + * Topology => Node(s) -> Spouts/Bolts + */ + _class(StormDataTypes.STORM_TOPOLOGY.getName, List("Process")) { + "id" ~ (string, required, indexed, unique) + "description" ~ (string, optional, indexed) + "owner" ~ (string, required, indexed) + "startTime" ~ long + "endTime" ~ long + "conf" ~ (map(string, string), optional) + "clusterName" ~ (string, optional, indexed) + + // Nodes in the Graph + "nodes" ~ (array(StormDataTypes.STORM_NODE.getName), collection, composite) + } + + // Base class for DataProducer aka Spouts and + // DataProcessor aka Bolts, also links from Topology + _class(StormDataTypes.STORM_NODE.getName) { + "name" ~ (string, required, indexed) + "description" ~ (string, optional, indexed) + // fully qualified driver java class name + "driverClass" ~ (string, required, indexed) + // spout or bolt configuration NVPs + "conf" ~ (map(string, string), optional) + } + + // Data Producer and hence only outputs + _class(StormDataTypes.STORM_SPOUT.getName, List(StormDataTypes.STORM_NODE.getName)) { + // "outputs" ~ (array(StormDataTypes.STORM_NODE.getName), collection, composite) + "outputs" ~ (array(string), collection) + } + + // Data Processor and hence both inputs and outputs (inherited from Spout) + _class(StormDataTypes.STORM_BOLT.getName, List(StormDataTypes.STORM_NODE.getName)) { + // "inputs" ~ (array(StormDataTypes.STORM_NODE.getName), collection, composite) + "inputs" ~ (array(string), collection) + "outputs" ~ (array(string), collection, optional) + } + + // Kafka Data Set + _class(StormDataTypes.KAFKA_TOPIC.getName, List("DataSet")) { + "topic" ~ (string, required, unique, indexed) + "uri" ~ (string, required) + "owner" ~ (string, required, indexed) + } + + // JMS Data Set + _class(StormDataTypes.JMS_TOPIC.getName, List("DataSet")) { + "topic" ~ (string, required, unique, indexed) + "uri" ~ (string, required) + "owner" ~ (string, required, indexed) + } + + // HBase Data Set + _class(StormDataTypes.HBASE_TABLE.getName, List("DataSet")) { + "tableName" ~ (string, required, unique, indexed) + "uri" ~ (string, required) + "owner" ~ (string, required, indexed) + } + + // HDFS Data Set + // todo: replace this with a generic data model for HDFS data sets + // todo: should only be used in light of storm + _class(StormDataTypes.HDFS_DATA_SET.getName, List("DataSet")) { + // fully qualified path to file or dir + "pathURI" ~ (string, required, unique, indexed) + "owner" ~ (string, required, indexed) + } + + _trait("DataProcessor") { + + } + + _trait("DataProducer") { + + } + // Hive table data set already exists in atlas. + } + + // add the types to atlas + val typesAsJSON = TypesSerialization.toJson(typesDef) + println("Storm Data Model as JSON: ") + println(typesAsJSON) +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/90a3a9e7/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index bd1f918..2131723 100755 --- a/pom.xml +++ b/pom.xml @@ -355,14 +355,16 @@ <fastutil.version>6.5.16</fastutil.version> <guice.version>4.0</guice.version> + <!-- Needed for hooks --> + <aopalliance.version>1.0</aopalliance.version> + <commons-conf.version>1.10</commons-conf.version> + <commons-logging.version>1.1.3</commons-logging.version> + <javax-inject.version>1</javax-inject.version> + <jettison.version>1.3.7</jettison.version> + <paranamer.version>2.3</paranamer.version> + <PermGen>64m</PermGen> <MaxPermGen>512m</MaxPermGen> - <SnapshotsId>apache.snapshots.repo</SnapshotsId> - <SnapshotsName>Apache Snapshot Repository</SnapshotsName> - <SnapshotsUrl>https://repository.apache.org/content/groups/snapshots</SnapshotsUrl> - <StagingId>apache-staging</StagingId> - <StagingName>Apache Release Distribution Repository</StagingName> - <StagingUrl>https://repository.apache.org/content/groups/staging</StagingUrl> <!-- skips checkstyle and find bugs --> <skipCheck>false</skipCheck> @@ -431,6 +433,7 @@ <module>addons/hive-bridge</module> <module>addons/falcon-bridge</module> <module>addons/sqoop-bridge</module> + <module>addons/storm-bridge</module> <module>distro</module> </modules> @@ -639,7 +642,7 @@ <dependency> <groupId>commons-configuration</groupId> <artifactId>commons-configuration</artifactId> - <version>1.10</version> + <version>${commons-conf.version}</version> </dependency> <dependency> @@ -757,7 +760,7 @@ <dependency> <groupId>org.codehaus.jettison</groupId> <artifactId>jettison</artifactId> - <version>1.3.7</version> + <version>${jettison.version}</version> </dependency> <dependency> @@ -978,34 +981,34 @@ <dependency> <groupId>org.apache.atlas</groupId> - <artifactId>hive-bridge</artifactId> + <artifactId>atlas-dashboard</artifactId> <version>${project.version}</version> + <type>war</type> </dependency> <dependency> <groupId>org.apache.atlas</groupId> - <artifactId>falcon-bridge</artifactId> + <artifactId>atlas-webapp</artifactId> <version>${project.version}</version> + <type>war</type> </dependency> <dependency> <groupId>org.apache.atlas</groupId> - <artifactId>sqoop-bridge</artifactId> + <artifactId>hive-bridge</artifactId> <version>${project.version}</version> </dependency> <dependency> <groupId>org.apache.atlas</groupId> - <artifactId>atlas-dashboard</artifactId> + <artifactId>falcon-bridge</artifactId> <version>${project.version}</version> - <type>war</type> </dependency> <dependency> <groupId>org.apache.atlas</groupId> - <artifactId>atlas-webapp</artifactId> + <artifactId>sqoop-bridge</artifactId> <version>${project.version}</version> - <type>war</type> </dependency> <!--Scala dependencies--> @@ -1257,7 +1260,7 @@ <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-source-plugin</artifactId> - <version>2.2.1</version> + <version>2.4</version> </plugin> <plugin> @@ -1403,6 +1406,20 @@ <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-source-plugin</artifactId> + <executions> + <execution> + <id>attach-sources</id> + <phase>verify</phase> + <goals> + <goal>jar-no-fork</goal> + <goal>test-jar-no-fork</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>3.1</version> <configuration> @@ -1677,4 +1694,4 @@ </plugin> </plugins> </build> -</project> +</project> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/90a3a9e7/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index af0d915..5f33ddc 100644 --- a/release-log.txt +++ b/release-log.txt @@ -6,6 +6,7 @@ INCOMPATIBLE CHANGES: ATLAS-379 Create sqoop and falcon metadata addons (venkatnrangan,bvellanki,sowmyaramesh via shwethags) ALL CHANGES: +ATLAS-182 Add data model for Storm topology elements (svenkat,yhemanth via shwethags) ATLAS-414 Doc: Increase MAVEN_OPTS limit to 512m in InstallationSteps.twiki (yhemanth via shwethags) ATLAS-418 Update atlas website (shwethags) ATLAS-392 Rename application.properties to atlas-application.properties (rishabhbhardwaj via shwethags)
