Repository: apex-malhar Updated Branches: refs/heads/master 118a75f5e -> c830f5e4f
SPOI-9236 Add example application for S3Output Module. Project: http://git-wip-us.apache.org/repos/asf/apex-malhar/repo Commit: http://git-wip-us.apache.org/repos/asf/apex-malhar/commit/41ddf617 Tree: http://git-wip-us.apache.org/repos/asf/apex-malhar/tree/41ddf617 Diff: http://git-wip-us.apache.org/repos/asf/apex-malhar/diff/41ddf617 Branch: refs/heads/master Commit: 41ddf61795309b7460d2bd1215ffb0d818075fec Parents: 6a617f9 Author: chaitanya <[email protected]> Authored: Tue Apr 18 21:51:12 2017 -0700 Committer: Lakshmi Prasanna Velineni <[email protected]> Committed: Thu May 18 16:53:50 2017 -0700 ---------------------------------------------------------------------- examples/s3output/README.md | 34 +++ .../s3output/XmlJavadocCommentsExtractor.xsl | 44 ++++ examples/s3output/pom.xml | 262 +++++++++++++++++++ examples/s3output/src/assemble/appPackage.xml | 43 +++ .../java/com/example/s3output/Application.java | 28 ++ .../src/main/resources/META-INF/properties.xml | 35 +++ 6 files changed, 446 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/41ddf617/examples/s3output/README.md ---------------------------------------------------------------------- diff --git a/examples/s3output/README.md b/examples/s3output/README.md new file mode 100644 index 0000000..fe5d7e4 --- /dev/null +++ b/examples/s3output/README.md @@ -0,0 +1,34 @@ +Sample application to show how to use the S3OutputModule to upload files into Amazon S3 Bucket. + +Operators in sample application are as follows: +1) FSInputModule which reads files from file systems HDFS/NFS and emits FileMetadata, BlockMetadata, BlockBytes. +2) S3OutputModule which uploads the files into S3 Bucket using multi-part upload feature. + +Please configure the below S3OutputModule properties in src/main/resources/META-INF/properties.xml before launching the application: + +- ***accessKey*** - String + - Specifies the AWS access key to access the Amazon S3 bucket. + +- ***secretAccessKey*** - String + - Specifies the AWS secret access key to access the Amazon S3 bucket. + +For more information about access key and secret access key, Please refer to [IAM](http://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html) + +- ***bucketName*** - String + - Specifies the name of the S3 bucket to copy the files/directories. + +- ***outputDirectoryPath*** - String + - Specifies the path of the output directory to copy the files/directories. + +Suppose, **app.hdfs2s3** is the name of the **bucket** and you want to copy the files to S3 location (app.hdfs2s3/apex/s3output) then configure the properties as below: + +```xml + <property> + <name>dt.operator.S3OutputModule.prop.bucketName</name> + <value>app.hdfs2s3</value> + </property> + <property> + <name>dt.operator.S3OutputModule.prop.outputDirectoryPath</name> + <value>apex/s3output</value> + </property> +``` http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/41ddf617/examples/s3output/XmlJavadocCommentsExtractor.xsl ---------------------------------------------------------------------- diff --git a/examples/s3output/XmlJavadocCommentsExtractor.xsl b/examples/s3output/XmlJavadocCommentsExtractor.xsl new file mode 100644 index 0000000..08075a9 --- /dev/null +++ b/examples/s3output/XmlJavadocCommentsExtractor.xsl @@ -0,0 +1,44 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + +<!-- + Document : XmlJavadocCommentsExtractor.xsl + Created on : September 16, 2014, 11:30 AM + Description: + The transformation strips off all information except for comments and tags from xml javadoc generated by xml-doclet. +--> + +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> + <xsl:output method="xml" standalone="yes"/> + + <!-- copy xml by selecting only the following nodes, attributes and text --> + <xsl:template match="node()|text()|@*"> + <xsl:copy> + <xsl:apply-templates select="root|package|class|interface|method|field|type|comment|tag|text()|@name|@qualified|@text"/> + </xsl:copy> + </xsl:template> + + <!-- Strip off the following paths from the selected xml --> + <xsl:template match="//root/package/interface/interface + |//root/package/interface/method/@qualified + |//root/package/class/interface + |//root/package/class/class + |//root/package/class/method/@qualified + |//root/package/class/field/@qualified" /> + + <xsl:strip-space elements="*"/> +</xsl:stylesheet> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/41ddf617/examples/s3output/pom.xml ---------------------------------------------------------------------- diff --git a/examples/s3output/pom.xml b/examples/s3output/pom.xml new file mode 100644 index 0000000..c81c6d8 --- /dev/null +++ b/examples/s3output/pom.xml @@ -0,0 +1,262 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <groupId>com.example</groupId> + <version>1.0-SNAPSHOT</version> + <artifactId>s3output</artifactId> + <packaging>jar</packaging> + + <name>S3 Output Application</name> + <description>Sample application for S3 output module</description> + + <properties> + <!-- change this if you desire to use a different version of Apex Core --> + <apex.version>3.5.0</apex.version> + <malhar.version>3.7.0-SNAPSHOT</malhar.version> + <apex.apppackage.classpath>lib/*.jar</apex.apppackage.classpath> + </properties> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-eclipse-plugin</artifactId> + <version>2.9</version> + <configuration> + <downloadSources>true</downloadSources> + </configuration> + </plugin> + <plugin> + <artifactId>maven-compiler-plugin</artifactId> + <version>3.3</version> + <configuration> + <encoding>UTF-8</encoding> + <source>1.7</source> + <target>1.7</target> + <debug>true</debug> + <optimize>false</optimize> + <showDeprecation>true</showDeprecation> + <showWarnings>true</showWarnings> + </configuration> + </plugin> + <plugin> + <artifactId>maven-dependency-plugin</artifactId> + <version>2.8</version> + <executions> + <execution> + <id>copy-dependencies</id> + <phase>prepare-package</phase> + <goals> + <goal>copy-dependencies</goal> + </goals> + <configuration> + <outputDirectory>target/deps</outputDirectory> + <includeScope>runtime</includeScope> + </configuration> + </execution> + </executions> + </plugin> + + <plugin> + <artifactId>maven-assembly-plugin</artifactId> + <executions> + <execution> + <id>app-package-assembly</id> + <phase>package</phase> + <goals> + <goal>single</goal> + </goals> + <configuration> + <finalName>${project.artifactId}-${project.version}-apexapp</finalName> + <appendAssemblyId>false</appendAssemblyId> + <descriptors> + <descriptor>src/assemble/appPackage.xml</descriptor> + </descriptors> + <archiverConfig> + <defaultDirectoryMode>0755</defaultDirectoryMode> + </archiverConfig> + <archive> + <manifestEntries> + <Class-Path>${apex.apppackage.classpath}</Class-Path> + <DT-Engine-Version>${apex.version}</DT-Engine-Version> + <DT-App-Package-Group-Id>${project.groupId}</DT-App-Package-Group-Id> + <DT-App-Package-Name>${project.artifactId}</DT-App-Package-Name> + <DT-App-Package-Version>${project.version}</DT-App-Package-Version> + <DT-App-Package-Display-Name>${project.name}</DT-App-Package-Display-Name> + <DT-App-Package-Description>${project.description}</DT-App-Package-Description> + </manifestEntries> + </archive> + </configuration> + </execution> + </executions> + </plugin> + + <plugin> + <artifactId>maven-antrun-plugin</artifactId> + <version>1.7</version> + <executions> + <execution> + <phase>package</phase> + <configuration> + <target> + <move file="${project.build.directory}/${project.artifactId}-${project.version}-apexapp.jar" + tofile="${project.build.directory}/${project.artifactId}-${project.version}.apa" /> + </target> + </configuration> + <goals> + <goal>run</goal> + </goals> + </execution> + <execution> + <!-- create resource directory for xml javadoc--> + <id>createJavadocDirectory</id> + <phase>generate-resources</phase> + <configuration> + <tasks> + <delete dir="${project.build.directory}/generated-resources/xml-javadoc"/> + <mkdir dir="${project.build.directory}/generated-resources/xml-javadoc"/> + </tasks> + </configuration> + <goals> + <goal>run</goal> + </goals> + </execution> + </executions> + </plugin> + + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>build-helper-maven-plugin</artifactId> + <version>1.9.1</version> + <executions> + <execution> + <id>attach-artifacts</id> + <phase>package</phase> + <goals> + <goal>attach-artifact</goal> + </goals> + <configuration> + <artifacts> + <artifact> + <file>target/${project.artifactId}-${project.version}.apa</file> + <type>apa</type> + </artifact> + </artifacts> + <skipAttach>false</skipAttach> + </configuration> + </execution> + </executions> + </plugin> + + <!-- generate javdoc --> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + <executions> + <!-- generate xml javadoc --> + <execution> + <id>xml-doclet</id> + <phase>generate-resources</phase> + <goals> + <goal>javadoc</goal> + </goals> + <configuration> + <doclet>com.github.markusbernhardt.xmldoclet.XmlDoclet</doclet> + <additionalparam>-d ${project.build.directory}/generated-resources/xml-javadoc -filename ${project.artifactId}-${project.version}-javadoc.xml</additionalparam> + <useStandardDocletOptions>false</useStandardDocletOptions> + <docletArtifact> + <groupId>com.github.markusbernhardt</groupId> + <artifactId>xml-doclet</artifactId> + <version>1.0.4</version> + </docletArtifact> + </configuration> + </execution> + </executions> + </plugin> + <!-- Transform xml javadoc to stripped down version containing only class/interface comments and tags--> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>xml-maven-plugin</artifactId> + <version>1.0</version> + <executions> + <execution> + <id>transform-xmljavadoc</id> + <phase>generate-resources</phase> + <goals> + <goal>transform</goal> + </goals> + </execution> + </executions> + <configuration> + <transformationSets> + <transformationSet> + <dir>${project.build.directory}/generated-resources/xml-javadoc</dir> + <includes> + <include>${project.artifactId}-${project.version}-javadoc.xml</include> + </includes> + <stylesheet>XmlJavadocCommentsExtractor.xsl</stylesheet> + <outputDir>${project.build.directory}/generated-resources/xml-javadoc</outputDir> + </transformationSet> + </transformationSets> + </configuration> + </plugin> + <!-- copy xml javadoc to class jar --> + <plugin> + <artifactId>maven-resources-plugin</artifactId> + <version>2.6</version> + <executions> + <execution> + <id>copy-resources</id> + <phase>process-resources</phase> + <goals> + <goal>copy-resources</goal> + </goals> + <configuration> + <outputDirectory>${basedir}/target/classes</outputDirectory> + <resources> + <resource> + <directory>${project.build.directory}/generated-resources/xml-javadoc</directory> + <includes> + <include>${project.artifactId}-${project.version}-javadoc.xml</include> + </includes> + <filtering>true</filtering> + </resource> + </resources> + </configuration> + </execution> + </executions> + </plugin> + + </plugins> + + </build> + + <dependencies> + <!-- add your dependencies here --> + <dependency> + <groupId>org.apache.apex</groupId> + <artifactId>malhar-library</artifactId> + <version>${malhar.version}</version> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>4.10</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.apex</groupId> + <artifactId>apex-common</artifactId> + <version>${apex.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.apex</groupId> + <artifactId>apex-engine</artifactId> + <version>${apex.version}</version> + <scope>test</scope> + </dependency> + </dependencies> + +</project> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/41ddf617/examples/s3output/src/assemble/appPackage.xml ---------------------------------------------------------------------- diff --git a/examples/s3output/src/assemble/appPackage.xml b/examples/s3output/src/assemble/appPackage.xml new file mode 100644 index 0000000..7ad071c --- /dev/null +++ b/examples/s3output/src/assemble/appPackage.xml @@ -0,0 +1,43 @@ +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd"> + <id>appPackage</id> + <formats> + <format>jar</format> + </formats> + <includeBaseDirectory>false</includeBaseDirectory> + <fileSets> + <fileSet> + <directory>${basedir}/target/</directory> + <outputDirectory>/app</outputDirectory> + <includes> + <include>${project.artifactId}-${project.version}.jar</include> + </includes> + </fileSet> + <fileSet> + <directory>${basedir}/target/deps</directory> + <outputDirectory>/lib</outputDirectory> + </fileSet> + <fileSet> + <directory>${basedir}/src/site/conf</directory> + <outputDirectory>/conf</outputDirectory> + <includes> + <include>*.xml</include> + </includes> + </fileSet> + <fileSet> + <directory>${basedir}/src/main/resources/META-INF</directory> + <outputDirectory>/META-INF</outputDirectory> + </fileSet> + <fileSet> + <directory>${basedir}/src/main/resources/app</directory> + <outputDirectory>/app</outputDirectory> + </fileSet> + <fileSet> + <directory>${basedir}/src/main/resources/resources</directory> + <outputDirectory>/resources</outputDirectory> + </fileSet> + </fileSets> + +</assembly> + http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/41ddf617/examples/s3output/src/main/java/com/example/s3output/Application.java ---------------------------------------------------------------------- diff --git a/examples/s3output/src/main/java/com/example/s3output/Application.java b/examples/s3output/src/main/java/com/example/s3output/Application.java new file mode 100644 index 0000000..0b9a0e7 --- /dev/null +++ b/examples/s3output/src/main/java/com/example/s3output/Application.java @@ -0,0 +1,28 @@ +package com.example.s3output; + +import org.apache.apex.malhar.lib.fs.s3.S3OutputModule; +import org.apache.hadoop.conf.Configuration; + +import com.datatorrent.api.DAG; +import com.datatorrent.api.StreamingApplication; +import com.datatorrent.api.annotation.ApplicationAnnotation; +import com.datatorrent.lib.io.fs.FSInputModule; + +/** + * Application illustrating copy files from HDFS to S3 bucket. + */ +@ApplicationAnnotation(name="HDFSToS3App") +public class Application implements StreamingApplication +{ + @Override + public void populateDAG(DAG dag, Configuration conf) + { + FSInputModule inputModule = dag.addModule("HDFSInputModule", new FSInputModule()); + S3OutputModule outputModule = dag.addModule("S3OutputModule", new S3OutputModule()); + + dag.addStream("FileMetaData", inputModule.filesMetadataOutput, outputModule.filesMetadataInput); + dag.addStream("BlocksMetaData", inputModule.blocksMetadataOutput, outputModule.blocksMetadataInput) + .setLocality(DAG.Locality.CONTAINER_LOCAL); + dag.addStream("BlocksData", inputModule.messages, outputModule.blockData).setLocality(DAG.Locality.CONTAINER_LOCAL); + } +} http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/41ddf617/examples/s3output/src/main/resources/META-INF/properties.xml ---------------------------------------------------------------------- diff --git a/examples/s3output/src/main/resources/META-INF/properties.xml b/examples/s3output/src/main/resources/META-INF/properties.xml new file mode 100644 index 0000000..5a07e12 --- /dev/null +++ b/examples/s3output/src/main/resources/META-INF/properties.xml @@ -0,0 +1,35 @@ +<?xml version="1.0"?> +<configuration> + <property> + <name>dt.operator.HDFSInputModule.prop.files</name> + <value>hdfs://source-namenode-service/user/dtuser/path-to-input-directory</value> + </property> + <property> + <name>dt.operator.HDFSInputModule.prop.maxReaders</name> + <value>6</value> + </property> + <property> + <name>dt.operator.HDFSInputModule.prop.minReaders</name> + <value>6</value> + </property> + <property> + <name>dt.operator.HDFSInputModule.prop.blocksThreshold</name> + <value>2</value> + </property> + <property> + <name>dt.operator.S3OutputModule.prop.accessKey</name> + <value></value> + </property> + <property> + <name>dt.operator.S3OutputModule.prop.secretAccessKey</name> + <value></value> + </property> + <property> + <name>dt.operator.S3OutputModule.prop.bucketName</name> + <value></value> + </property> + <property> + <name>dt.operator.S3OutputModule.prop.outputDirectoryPath</name> + <value></value> + </property> +</configuration>
