Repository: apex-malhar
Updated Branches:
  refs/heads/master 118a75f5e -> c830f5e4f


SPOI-9236 Add example application for S3Output Module.


Project: http://git-wip-us.apache.org/repos/asf/apex-malhar/repo
Commit: http://git-wip-us.apache.org/repos/asf/apex-malhar/commit/41ddf617
Tree: http://git-wip-us.apache.org/repos/asf/apex-malhar/tree/41ddf617
Diff: http://git-wip-us.apache.org/repos/asf/apex-malhar/diff/41ddf617

Branch: refs/heads/master
Commit: 41ddf61795309b7460d2bd1215ffb0d818075fec
Parents: 6a617f9
Author: chaitanya <[email protected]>
Authored: Tue Apr 18 21:51:12 2017 -0700
Committer: Lakshmi Prasanna Velineni <[email protected]>
Committed: Thu May 18 16:53:50 2017 -0700

----------------------------------------------------------------------
 examples/s3output/README.md                     |  34 +++
 .../s3output/XmlJavadocCommentsExtractor.xsl    |  44 ++++
 examples/s3output/pom.xml                       | 262 +++++++++++++++++++
 examples/s3output/src/assemble/appPackage.xml   |  43 +++
 .../java/com/example/s3output/Application.java  |  28 ++
 .../src/main/resources/META-INF/properties.xml  |  35 +++
 6 files changed, 446 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/41ddf617/examples/s3output/README.md
----------------------------------------------------------------------
diff --git a/examples/s3output/README.md b/examples/s3output/README.md
new file mode 100644
index 0000000..fe5d7e4
--- /dev/null
+++ b/examples/s3output/README.md
@@ -0,0 +1,34 @@
+Sample application to show how to use the S3OutputModule to upload files into 
Amazon S3 Bucket.
+
+Operators in sample application are as follows:
+1) FSInputModule which reads files from file systems HDFS/NFS and emits 
FileMetadata, BlockMetadata, BlockBytes.
+2) S3OutputModule which uploads the files into S3 Bucket using multi-part 
upload feature.
+
+Please configure the below S3OutputModule properties in 
src/main/resources/META-INF/properties.xml before launching the application:
+
+-   ***accessKey*** -   String
+    -   Specifies the AWS access key to access the Amazon S3 bucket.
+    
+-   ***secretAccessKey***   -   String
+    -   Specifies the AWS secret access key to access the Amazon S3 bucket.
+    
+For more information about access key and secret access key, Please refer to 
[IAM](http://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html)
+    
+-   ***bucketName***    -   String
+    -   Specifies the name of the S3 bucket to copy the files/directories.
+    
+-   ***outputDirectoryPath***   -   String
+    -   Specifies the path of the output directory to copy the 
files/directories.
+    
+Suppose, **app.hdfs2s3** is the name of the **bucket** and you want to copy 
the files to S3 location (app.hdfs2s3/apex/s3output) then configure the 
properties as below:
+
+```xml
+  <property>
+    <name>dt.operator.S3OutputModule.prop.bucketName</name>
+    <value>app.hdfs2s3</value>
+  </property>
+  <property>
+    <name>dt.operator.S3OutputModule.prop.outputDirectoryPath</name>
+    <value>apex/s3output</value>
+  </property>
+```

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/41ddf617/examples/s3output/XmlJavadocCommentsExtractor.xsl
----------------------------------------------------------------------
diff --git a/examples/s3output/XmlJavadocCommentsExtractor.xsl 
b/examples/s3output/XmlJavadocCommentsExtractor.xsl
new file mode 100644
index 0000000..08075a9
--- /dev/null
+++ b/examples/s3output/XmlJavadocCommentsExtractor.xsl
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+            http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+
+<!--
+    Document   : XmlJavadocCommentsExtractor.xsl
+    Created on : September 16, 2014, 11:30 AM
+    Description:
+        The transformation strips off all information except for comments and 
tags from xml javadoc generated by xml-doclet.
+-->
+
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"; version="1.0">
+  <xsl:output method="xml" standalone="yes"/>
+
+  <!-- copy xml by selecting only the following nodes, attributes and text -->
+  <xsl:template match="node()|text()|@*">
+    <xsl:copy>
+      <xsl:apply-templates 
select="root|package|class|interface|method|field|type|comment|tag|text()|@name|@qualified|@text"/>
+    </xsl:copy>
+  </xsl:template>
+
+  <!-- Strip off the following paths from the selected xml -->
+  <xsl:template match="//root/package/interface/interface
+                      |//root/package/interface/method/@qualified
+                      |//root/package/class/interface
+                      |//root/package/class/class
+                      |//root/package/class/method/@qualified
+                      |//root/package/class/field/@qualified" />
+
+  <xsl:strip-space elements="*"/>
+</xsl:stylesheet>

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/41ddf617/examples/s3output/pom.xml
----------------------------------------------------------------------
diff --git a/examples/s3output/pom.xml b/examples/s3output/pom.xml
new file mode 100644
index 0000000..c81c6d8
--- /dev/null
+++ b/examples/s3output/pom.xml
@@ -0,0 +1,262 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>com.example</groupId>
+  <version>1.0-SNAPSHOT</version>
+  <artifactId>s3output</artifactId>
+  <packaging>jar</packaging>
+
+  <name>S3 Output Application</name>
+  <description>Sample application for S3 output module</description>
+
+  <properties>
+    <!-- change this if you desire to use a different version of Apex Core -->
+    <apex.version>3.5.0</apex.version>
+    <malhar.version>3.7.0-SNAPSHOT</malhar.version>
+    <apex.apppackage.classpath>lib/*.jar</apex.apppackage.classpath>
+  </properties>
+
+  <build>
+    <plugins>
+       <plugin>
+         <groupId>org.apache.maven.plugins</groupId>
+         <artifactId>maven-eclipse-plugin</artifactId>
+         <version>2.9</version>
+         <configuration>
+           <downloadSources>true</downloadSources>
+         </configuration>
+       </plugin>
+       <plugin>
+         <artifactId>maven-compiler-plugin</artifactId>
+         <version>3.3</version>
+         <configuration>
+           <encoding>UTF-8</encoding>
+           <source>1.7</source>
+           <target>1.7</target>
+           <debug>true</debug>
+           <optimize>false</optimize>
+           <showDeprecation>true</showDeprecation>
+           <showWarnings>true</showWarnings>
+         </configuration>
+       </plugin>
+       <plugin>
+         <artifactId>maven-dependency-plugin</artifactId>
+         <version>2.8</version>
+         <executions>
+           <execution>
+             <id>copy-dependencies</id>
+             <phase>prepare-package</phase>
+             <goals>
+               <goal>copy-dependencies</goal>
+             </goals>
+             <configuration>
+               <outputDirectory>target/deps</outputDirectory>
+               <includeScope>runtime</includeScope>
+             </configuration>
+           </execution>
+         </executions>
+       </plugin>
+
+       <plugin>
+         <artifactId>maven-assembly-plugin</artifactId>
+         <executions>
+           <execution>
+             <id>app-package-assembly</id>
+             <phase>package</phase>
+             <goals>
+               <goal>single</goal>
+             </goals>
+             <configuration>
+               
<finalName>${project.artifactId}-${project.version}-apexapp</finalName>
+               <appendAssemblyId>false</appendAssemblyId>
+               <descriptors>
+                 <descriptor>src/assemble/appPackage.xml</descriptor>
+               </descriptors>
+               <archiverConfig>
+                 <defaultDirectoryMode>0755</defaultDirectoryMode>
+               </archiverConfig>
+               <archive>
+                 <manifestEntries>
+                   <Class-Path>${apex.apppackage.classpath}</Class-Path>
+                   <DT-Engine-Version>${apex.version}</DT-Engine-Version>
+                   
<DT-App-Package-Group-Id>${project.groupId}</DT-App-Package-Group-Id>
+                   
<DT-App-Package-Name>${project.artifactId}</DT-App-Package-Name>
+                   
<DT-App-Package-Version>${project.version}</DT-App-Package-Version>
+                   
<DT-App-Package-Display-Name>${project.name}</DT-App-Package-Display-Name>
+                   
<DT-App-Package-Description>${project.description}</DT-App-Package-Description>
+                 </manifestEntries>
+               </archive>
+             </configuration>
+           </execution>
+         </executions>
+       </plugin>
+
+       <plugin>
+         <artifactId>maven-antrun-plugin</artifactId>
+         <version>1.7</version>
+         <executions>
+           <execution>
+             <phase>package</phase>
+             <configuration>
+               <target>
+                 <move 
file="${project.build.directory}/${project.artifactId}-${project.version}-apexapp.jar"
+                       
tofile="${project.build.directory}/${project.artifactId}-${project.version}.apa"
 />
+               </target>
+             </configuration>
+             <goals>
+               <goal>run</goal>
+             </goals>
+           </execution>
+           <execution>
+             <!-- create resource directory for xml javadoc-->
+             <id>createJavadocDirectory</id>
+             <phase>generate-resources</phase>
+             <configuration>
+               <tasks>
+                 <delete 
dir="${project.build.directory}/generated-resources/xml-javadoc"/>
+                 <mkdir 
dir="${project.build.directory}/generated-resources/xml-javadoc"/>
+               </tasks>
+             </configuration>
+             <goals>
+               <goal>run</goal>
+             </goals>
+           </execution>
+         </executions>
+       </plugin>
+
+       <plugin>
+         <groupId>org.codehaus.mojo</groupId>
+         <artifactId>build-helper-maven-plugin</artifactId>
+         <version>1.9.1</version>
+         <executions>
+           <execution>
+             <id>attach-artifacts</id>
+             <phase>package</phase>
+             <goals>
+               <goal>attach-artifact</goal>
+             </goals>
+             <configuration>
+               <artifacts>
+                 <artifact>
+                   
<file>target/${project.artifactId}-${project.version}.apa</file>
+                   <type>apa</type>
+                 </artifact>
+               </artifacts>
+               <skipAttach>false</skipAttach>
+             </configuration>
+           </execution>
+         </executions>
+       </plugin>
+
+      <!-- generate javdoc -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <executions>
+          <!-- generate xml javadoc -->
+          <execution>
+            <id>xml-doclet</id>
+            <phase>generate-resources</phase>
+            <goals>
+              <goal>javadoc</goal>
+            </goals>
+            <configuration>
+              <doclet>com.github.markusbernhardt.xmldoclet.XmlDoclet</doclet>
+              <additionalparam>-d 
${project.build.directory}/generated-resources/xml-javadoc -filename 
${project.artifactId}-${project.version}-javadoc.xml</additionalparam>
+              <useStandardDocletOptions>false</useStandardDocletOptions>
+              <docletArtifact>
+                <groupId>com.github.markusbernhardt</groupId>
+                <artifactId>xml-doclet</artifactId>
+                <version>1.0.4</version>
+              </docletArtifact>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <!-- Transform xml javadoc to stripped down version containing only 
class/interface comments and tags-->
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>xml-maven-plugin</artifactId>
+        <version>1.0</version>
+        <executions>
+          <execution>
+            <id>transform-xmljavadoc</id>
+            <phase>generate-resources</phase>
+            <goals>
+              <goal>transform</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <transformationSets>
+            <transformationSet>
+              
<dir>${project.build.directory}/generated-resources/xml-javadoc</dir>
+              <includes>
+                
<include>${project.artifactId}-${project.version}-javadoc.xml</include>
+              </includes>
+              <stylesheet>XmlJavadocCommentsExtractor.xsl</stylesheet>
+              
<outputDir>${project.build.directory}/generated-resources/xml-javadoc</outputDir>
+            </transformationSet>
+          </transformationSets>
+        </configuration>
+      </plugin>
+      <!-- copy xml javadoc to class jar -->
+      <plugin>
+        <artifactId>maven-resources-plugin</artifactId>
+        <version>2.6</version>
+        <executions>
+          <execution>
+            <id>copy-resources</id>
+            <phase>process-resources</phase>
+            <goals>
+              <goal>copy-resources</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>${basedir}/target/classes</outputDirectory>
+              <resources>
+                <resource>
+                  
<directory>${project.build.directory}/generated-resources/xml-javadoc</directory>
+                  <includes>
+                    
<include>${project.artifactId}-${project.version}-javadoc.xml</include>
+                  </includes>
+                  <filtering>true</filtering>
+                </resource>
+              </resources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+    </plugins>
+
+  </build>
+
+  <dependencies>
+    <!-- add your dependencies here -->
+    <dependency>
+      <groupId>org.apache.apex</groupId>
+      <artifactId>malhar-library</artifactId>
+      <version>${malhar.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.10</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.apex</groupId>
+      <artifactId>apex-common</artifactId>
+      <version>${apex.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.apex</groupId>
+      <artifactId>apex-engine</artifactId>
+      <version>${apex.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+</project>

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/41ddf617/examples/s3output/src/assemble/appPackage.xml
----------------------------------------------------------------------
diff --git a/examples/s3output/src/assemble/appPackage.xml 
b/examples/s3output/src/assemble/appPackage.xml
new file mode 100644
index 0000000..7ad071c
--- /dev/null
+++ b/examples/s3output/src/assemble/appPackage.xml
@@ -0,0 +1,43 @@
+<assembly 
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2";
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+    
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2
 http://maven.apache.org/xsd/assembly-1.1.2.xsd";>
+  <id>appPackage</id>
+  <formats>
+    <format>jar</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+  <fileSets>
+    <fileSet>
+      <directory>${basedir}/target/</directory>
+      <outputDirectory>/app</outputDirectory>
+      <includes>
+        <include>${project.artifactId}-${project.version}.jar</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/target/deps</directory>
+      <outputDirectory>/lib</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/site/conf</directory>
+      <outputDirectory>/conf</outputDirectory>
+      <includes>
+        <include>*.xml</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/resources/META-INF</directory>
+      <outputDirectory>/META-INF</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/resources/app</directory>
+      <outputDirectory>/app</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/resources/resources</directory>
+      <outputDirectory>/resources</outputDirectory>
+    </fileSet>
+  </fileSets>
+
+</assembly>
+

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/41ddf617/examples/s3output/src/main/java/com/example/s3output/Application.java
----------------------------------------------------------------------
diff --git 
a/examples/s3output/src/main/java/com/example/s3output/Application.java 
b/examples/s3output/src/main/java/com/example/s3output/Application.java
new file mode 100644
index 0000000..0b9a0e7
--- /dev/null
+++ b/examples/s3output/src/main/java/com/example/s3output/Application.java
@@ -0,0 +1,28 @@
+package com.example.s3output;
+
+import org.apache.apex.malhar.lib.fs.s3.S3OutputModule;
+import org.apache.hadoop.conf.Configuration;
+
+import com.datatorrent.api.DAG;
+import com.datatorrent.api.StreamingApplication;
+import com.datatorrent.api.annotation.ApplicationAnnotation;
+import com.datatorrent.lib.io.fs.FSInputModule;
+
+/**
+ * Application illustrating copy files from HDFS to S3 bucket.
+ */
+@ApplicationAnnotation(name="HDFSToS3App")
+public class Application implements StreamingApplication
+{
+  @Override
+  public void populateDAG(DAG dag, Configuration conf)
+  {
+    FSInputModule inputModule = dag.addModule("HDFSInputModule", new 
FSInputModule());
+    S3OutputModule outputModule = dag.addModule("S3OutputModule", new 
S3OutputModule());
+
+    dag.addStream("FileMetaData", inputModule.filesMetadataOutput, 
outputModule.filesMetadataInput);
+    dag.addStream("BlocksMetaData", inputModule.blocksMetadataOutput, 
outputModule.blocksMetadataInput)
+      .setLocality(DAG.Locality.CONTAINER_LOCAL);
+    dag.addStream("BlocksData", inputModule.messages, 
outputModule.blockData).setLocality(DAG.Locality.CONTAINER_LOCAL);
+  }
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/41ddf617/examples/s3output/src/main/resources/META-INF/properties.xml
----------------------------------------------------------------------
diff --git a/examples/s3output/src/main/resources/META-INF/properties.xml 
b/examples/s3output/src/main/resources/META-INF/properties.xml
new file mode 100644
index 0000000..5a07e12
--- /dev/null
+++ b/examples/s3output/src/main/resources/META-INF/properties.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0"?>
+<configuration>
+  <property>
+    <name>dt.operator.HDFSInputModule.prop.files</name>
+    
<value>hdfs://source-namenode-service/user/dtuser/path-to-input-directory</value>
+  </property>
+  <property>
+    <name>dt.operator.HDFSInputModule.prop.maxReaders</name>
+    <value>6</value>
+  </property>
+  <property>
+    <name>dt.operator.HDFSInputModule.prop.minReaders</name>
+    <value>6</value>
+  </property>
+  <property>
+    <name>dt.operator.HDFSInputModule.prop.blocksThreshold</name>
+    <value>2</value>
+  </property>
+  <property>
+    <name>dt.operator.S3OutputModule.prop.accessKey</name>
+    <value></value>
+  </property>
+  <property>
+    <name>dt.operator.S3OutputModule.prop.secretAccessKey</name>
+    <value></value>
+  </property>
+  <property>
+    <name>dt.operator.S3OutputModule.prop.bucketName</name>
+    <value></value>
+  </property>
+  <property>
+    <name>dt.operator.S3OutputModule.prop.outputDirectoryPath</name>
+    <value></value>
+  </property>
+</configuration>

Reply via email to