SPOI-8848 S3 to HDFS example app. SPOI-10927 Example app s3 tuple output module.
Project: http://git-wip-us.apache.org/repos/asf/apex-malhar/repo Commit: http://git-wip-us.apache.org/repos/asf/apex-malhar/commit/6a617f9b Tree: http://git-wip-us.apache.org/repos/asf/apex-malhar/tree/6a617f9b Diff: http://git-wip-us.apache.org/repos/asf/apex-malhar/diff/6a617f9b Branch: refs/heads/master Commit: 6a617f9ba7d6bebb74dbe95964729a74bdb542e5 Parents: 118a75f Author: yogidevendra <[email protected]> Authored: Tue Apr 18 21:48:43 2017 -0700 Committer: Lakshmi Prasanna Velineni <[email protected]> Committed: Thu May 18 16:53:50 2017 -0700 ---------------------------------------------------------------------- examples/s3-to-hdfs-sync/README.md | 5 + .../XmlJavadocCommentsExtractor.xsl | 28 ++ examples/s3-to-hdfs-sync/pom.xml | 279 +++++++++++++++++++ .../s3-to-hdfs-sync/src/assemble/appPackage.xml | 43 +++ .../s3input/S3ToHDFSSyncApplication.java | 32 +++ .../src/main/resources/META-INF/properties.xml | 12 + .../src/test/resources/log4j.properties | 21 ++ examples/s3-tuple-output/README.md | 19 ++ .../XmlJavadocCommentsExtractor.xsl | 28 ++ examples/s3-tuple-output/pom.xml | 276 ++++++++++++++++++ .../s3-tuple-output/src/assemble/appPackage.xml | 43 +++ .../tutorials/s3output/Application.java | 27 ++ .../src/main/resources/META-INF/properties.xml | 47 ++++ .../src/test/resources/log4j.properties | 21 ++ 14 files changed, 881 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-to-hdfs-sync/README.md ---------------------------------------------------------------------- diff --git a/examples/s3-to-hdfs-sync/README.md b/examples/s3-to-hdfs-sync/README.md new file mode 100644 index 0000000..cee5baf --- /dev/null +++ b/examples/s3-to-hdfs-sync/README.md @@ -0,0 +1,5 @@ +# Amazon S3 to HDFS sync application +Ingest and backup Amazon S3 data to Hadoop HDFS for data download from Amazon to hadoop. + This application transfers files from the configured S3 location to the destination path in HDFS. + The source code is available at: https://github.com/DataTorrent/examples/tree/master/tutorials/s3-to-hdfs-sync + Send feedback or feature requests to [email protected] \ No newline at end of file http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-to-hdfs-sync/XmlJavadocCommentsExtractor.xsl ---------------------------------------------------------------------- diff --git a/examples/s3-to-hdfs-sync/XmlJavadocCommentsExtractor.xsl b/examples/s3-to-hdfs-sync/XmlJavadocCommentsExtractor.xsl new file mode 100644 index 0000000..1ddbbcc --- /dev/null +++ b/examples/s3-to-hdfs-sync/XmlJavadocCommentsExtractor.xsl @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Document : XmlJavadocCommentsExtractor.xsl + Created on : September 16, 2014, 11:30 AM + Description: + The transformation strips off all information except for comments and tags from xml javadoc generated by xml-doclet. +--> + +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> + <xsl:output method="xml" standalone="yes"/> + + <!-- copy xml by selecting only the following nodes, attributes and text --> + <xsl:template match="node()|text()|@*"> + <xsl:copy> + <xsl:apply-templates select="root|package|class|interface|method|field|type|comment|tag|text()|@name|@qualified|@text"/> + </xsl:copy> + </xsl:template> + + <!-- Strip off the following paths from the selected xml --> + <xsl:template match="//root/package/interface/interface + |//root/package/interface/method/@qualified + |//root/package/class/interface + |//root/package/class/class + |//root/package/class/method/@qualified + |//root/package/class/field/@qualified" /> + + <xsl:strip-space elements="*"/> +</xsl:stylesheet> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-to-hdfs-sync/pom.xml ---------------------------------------------------------------------- diff --git a/examples/s3-to-hdfs-sync/pom.xml b/examples/s3-to-hdfs-sync/pom.xml new file mode 100644 index 0000000..55d98de --- /dev/null +++ b/examples/s3-to-hdfs-sync/pom.xml @@ -0,0 +1,279 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <groupId>com.datatorrent.apps</groupId> + <version>0.8</version> + <artifactId>s3-to-hdfs-sync</artifactId> + <packaging>jar</packaging> + + <!-- change these to the appropriate values --> + <name>S3 to HDFS Sync App</name> + <description>Ingest and backup Amazon S3 data to Hadoop HDFS for data download from Amazon to hadoop. + This application transfers files from the configured S3 location to the destination path in HDFS. + The source code is available at: https://github.com/DataTorrent/examples/tree/master/tutorials/s3-to-hdfs-sync + Send feedback or feature requests to [email protected]</description> + + <properties> + <!-- skip tests by default as they depend on external setup --> + <skipTests>true</skipTests> + <!-- change this if you desire to use a different version of Apex Core --> + <apex.version>3.5.0</apex.version> + <malhar.version>3.6.0</malhar.version> + <apex.apppackage.classpath>lib/*.jar</apex.apppackage.classpath> + </properties> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-eclipse-plugin</artifactId> + <version>2.9</version> + <configuration> + <downloadSources>true</downloadSources> + </configuration> + </plugin> + <plugin> + <artifactId>maven-compiler-plugin</artifactId> + <version>3.3</version> + <configuration> + <encoding>UTF-8</encoding> + <source>1.7</source> + <target>1.7</target> + <debug>true</debug> + <optimize>false</optimize> + <showDeprecation>true</showDeprecation> + <showWarnings>true</showWarnings> + </configuration> + </plugin> + <plugin> + <artifactId>maven-dependency-plugin</artifactId> + <version>2.8</version> + <executions> + <execution> + <id>copy-dependencies</id> + <phase>prepare-package</phase> + <goals> + <goal>copy-dependencies</goal> + </goals> + <configuration> + <outputDirectory>target/deps</outputDirectory> + <includeScope>runtime</includeScope> + </configuration> + </execution> + </executions> + </plugin> + + <plugin> + <artifactId>maven-assembly-plugin</artifactId> + <executions> + <execution> + <id>app-package-assembly</id> + <phase>package</phase> + <goals> + <goal>single</goal> + </goals> + <configuration> + <finalName>${project.artifactId}-${project.version}-apexapp</finalName> + <appendAssemblyId>false</appendAssemblyId> + <descriptors> + <descriptor>src/assemble/appPackage.xml</descriptor> + </descriptors> + <archiverConfig> + <defaultDirectoryMode>0755</defaultDirectoryMode> + </archiverConfig> + <archive> + <manifestEntries> + <Class-Path>${apex.apppackage.classpath}</Class-Path> + <DT-Engine-Version>${apex.version}</DT-Engine-Version> + <DT-App-Package-Group-Id>${project.groupId}</DT-App-Package-Group-Id> + <DT-App-Package-Name>${project.artifactId}</DT-App-Package-Name> + <DT-App-Package-Version>${project.version}</DT-App-Package-Version> + <DT-App-Package-Display-Name>${project.name}</DT-App-Package-Display-Name> + <DT-App-Package-Description>${project.description}</DT-App-Package-Description> + </manifestEntries> + </archive> + </configuration> + </execution> + </executions> + </plugin> + + <plugin> + <artifactId>maven-antrun-plugin</artifactId> + <version>1.7</version> + <executions> + <execution> + <phase>package</phase> + <configuration> + <target> + <move file="${project.build.directory}/${project.artifactId}-${project.version}-apexapp.jar" + tofile="${project.build.directory}/${project.artifactId}-${project.version}.apa" /> + </target> + </configuration> + <goals> + <goal>run</goal> + </goals> + </execution> + <execution> + <!-- create resource directory for xml javadoc--> + <id>createJavadocDirectory</id> + <phase>generate-resources</phase> + <configuration> + <tasks> + <delete dir="${project.build.directory}/generated-resources/xml-javadoc"/> + <mkdir dir="${project.build.directory}/generated-resources/xml-javadoc"/> + </tasks> + </configuration> + <goals> + <goal>run</goal> + </goals> + </execution> + </executions> + </plugin> + + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>build-helper-maven-plugin</artifactId> + <version>1.9.1</version> + <executions> + <execution> + <id>attach-artifacts</id> + <phase>package</phase> + <goals> + <goal>attach-artifact</goal> + </goals> + <configuration> + <artifacts> + <artifact> + <file>target/${project.artifactId}-${project.version}.apa</file> + <type>apa</type> + </artifact> + </artifacts> + <skipAttach>false</skipAttach> + </configuration> + </execution> + </executions> + </plugin> + + <!-- generate javdoc --> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + <executions> + <!-- generate xml javadoc --> + <execution> + <id>xml-doclet</id> + <phase>generate-resources</phase> + <goals> + <goal>javadoc</goal> + </goals> + <configuration> + <doclet>com.github.markusbernhardt.xmldoclet.XmlDoclet</doclet> + <additionalparam>-d ${project.build.directory}/generated-resources/xml-javadoc -filename ${project.artifactId}-${project.version}-javadoc.xml</additionalparam> + <useStandardDocletOptions>false</useStandardDocletOptions> + <docletArtifact> + <groupId>com.github.markusbernhardt</groupId> + <artifactId>xml-doclet</artifactId> + <version>1.0.4</version> + </docletArtifact> + </configuration> + </execution> + </executions> + </plugin> + <!-- Transform xml javadoc to stripped down version containing only class/interface comments and tags--> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>xml-maven-plugin</artifactId> + <version>1.0</version> + <executions> + <execution> + <id>transform-xmljavadoc</id> + <phase>generate-resources</phase> + <goals> + <goal>transform</goal> + </goals> + </execution> + </executions> + <configuration> + <transformationSets> + <transformationSet> + <dir>${project.build.directory}/generated-resources/xml-javadoc</dir> + <includes> + <include>${project.artifactId}-${project.version}-javadoc.xml</include> + </includes> + <stylesheet>XmlJavadocCommentsExtractor.xsl</stylesheet> + <outputDir>${project.build.directory}/generated-resources/xml-javadoc</outputDir> + </transformationSet> + </transformationSets> + </configuration> + </plugin> + <!-- copy xml javadoc to class jar --> + <plugin> + <artifactId>maven-resources-plugin</artifactId> + <version>2.6</version> + <executions> + <execution> + <id>copy-resources</id> + <phase>process-resources</phase> + <goals> + <goal>copy-resources</goal> + </goals> + <configuration> + <outputDirectory>${basedir}/target/classes</outputDirectory> + <resources> + <resource> + <directory>${project.build.directory}/generated-resources/xml-javadoc</directory> + <includes> + <include>${project.artifactId}-${project.version}-javadoc.xml</include> + </includes> + <filtering>true</filtering> + </resource> + </resources> + </configuration> + </execution> + </executions> + </plugin> + + </plugins> + + </build> + + <dependencies> + <!-- add your dependencies here --> + <dependency> + <groupId>org.apache.apex</groupId> + <artifactId>malhar-library</artifactId> + <version>${malhar.version}</version> + <exclusions> + <exclusion> + <groupId>*</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>com.amazonaws</groupId> + <artifactId>aws-java-sdk-s3</artifactId> + <version>1.10.73</version> + </dependency> + <dependency> + <groupId>org.apache.apex</groupId> + <artifactId>apex-common</artifactId> + <version>${apex.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>4.10</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.apex</groupId> + <artifactId>apex-engine</artifactId> + <version>${apex.version}</version> + <scope>test</scope> + </dependency> + </dependencies> + +</project> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-to-hdfs-sync/src/assemble/appPackage.xml ---------------------------------------------------------------------- diff --git a/examples/s3-to-hdfs-sync/src/assemble/appPackage.xml b/examples/s3-to-hdfs-sync/src/assemble/appPackage.xml new file mode 100644 index 0000000..7ad071c --- /dev/null +++ b/examples/s3-to-hdfs-sync/src/assemble/appPackage.xml @@ -0,0 +1,43 @@ +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd"> + <id>appPackage</id> + <formats> + <format>jar</format> + </formats> + <includeBaseDirectory>false</includeBaseDirectory> + <fileSets> + <fileSet> + <directory>${basedir}/target/</directory> + <outputDirectory>/app</outputDirectory> + <includes> + <include>${project.artifactId}-${project.version}.jar</include> + </includes> + </fileSet> + <fileSet> + <directory>${basedir}/target/deps</directory> + <outputDirectory>/lib</outputDirectory> + </fileSet> + <fileSet> + <directory>${basedir}/src/site/conf</directory> + <outputDirectory>/conf</outputDirectory> + <includes> + <include>*.xml</include> + </includes> + </fileSet> + <fileSet> + <directory>${basedir}/src/main/resources/META-INF</directory> + <outputDirectory>/META-INF</outputDirectory> + </fileSet> + <fileSet> + <directory>${basedir}/src/main/resources/app</directory> + <outputDirectory>/app</outputDirectory> + </fileSet> + <fileSet> + <directory>${basedir}/src/main/resources/resources</directory> + <outputDirectory>/resources</outputDirectory> + </fileSet> + </fileSets> + +</assembly> + http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-to-hdfs-sync/src/main/java/com/datatorrent/tutorial/s3input/S3ToHDFSSyncApplication.java ---------------------------------------------------------------------- diff --git a/examples/s3-to-hdfs-sync/src/main/java/com/datatorrent/tutorial/s3input/S3ToHDFSSyncApplication.java b/examples/s3-to-hdfs-sync/src/main/java/com/datatorrent/tutorial/s3input/S3ToHDFSSyncApplication.java new file mode 100644 index 0000000..dae9c4a --- /dev/null +++ b/examples/s3-to-hdfs-sync/src/main/java/com/datatorrent/tutorial/s3input/S3ToHDFSSyncApplication.java @@ -0,0 +1,32 @@ +package com.datatorrent.tutorial.s3input; + +import org.apache.hadoop.conf.Configuration; + +import com.datatorrent.api.DAG; +import com.datatorrent.api.DAG.Locality; +import com.datatorrent.api.StreamingApplication; +import com.datatorrent.api.annotation.ApplicationAnnotation; +import com.datatorrent.lib.io.fs.HDFSFileCopyModule; +import com.datatorrent.lib.io.fs.S3InputModule; + +/** + * Simple application illustrating file copy from S3 + */ +@ApplicationAnnotation(name="S3-to-HDFS-Sync") +public class S3ToHDFSSyncApplication implements StreamingApplication +{ + + @Override + public void populateDAG(DAG dag, Configuration conf) + { + + S3InputModule inputModule = dag.addModule("S3InputModule", new S3InputModule()); + HDFSFileCopyModule outputModule = dag.addModule("HDFSFileCopyModule", new HDFSFileCopyModule()); + + dag.addStream("FileMetaData", inputModule.filesMetadataOutput, outputModule.filesMetadataInput); + dag.addStream("BlocksMetaData", inputModule.blocksMetadataOutput, outputModule.blocksMetadataInput) + .setLocality(Locality.THREAD_LOCAL); + dag.addStream("BlocksData", inputModule.messages, outputModule.blockData).setLocality(Locality.THREAD_LOCAL); + } + +} http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-to-hdfs-sync/src/main/resources/META-INF/properties.xml ---------------------------------------------------------------------- diff --git a/examples/s3-to-hdfs-sync/src/main/resources/META-INF/properties.xml b/examples/s3-to-hdfs-sync/src/main/resources/META-INF/properties.xml new file mode 100644 index 0000000..6d8ecea --- /dev/null +++ b/examples/s3-to-hdfs-sync/src/main/resources/META-INF/properties.xml @@ -0,0 +1,12 @@ +<?xml version="1.0"?> +<configuration> + <property> + <name>dt.operator.S3InputModule.prop.files</name> + <value>s3n://ACCESS_KEY_ID:SECRET_KEY@BUCKET_NAME/DIRECTORY</value> + </property> + <property> + <name>dt.operator.HDFSFileCopyModule.prop.outputDirectoryPath</name> + <value>hdfs://destination-namenode-service:port/path-to-output-directory</value> + </property> +</configuration> + http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-to-hdfs-sync/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/examples/s3-to-hdfs-sync/src/test/resources/log4j.properties b/examples/s3-to-hdfs-sync/src/test/resources/log4j.properties new file mode 100644 index 0000000..3bfcdc5 --- /dev/null +++ b/examples/s3-to-hdfs-sync/src/test/resources/log4j.properties @@ -0,0 +1,21 @@ +log4j.rootLogger=DEBUG,CONSOLE + +log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender +log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout +log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n + +log4j.appender.RFA=org.apache.log4j.RollingFileAppender +log4j.appender.RFA.layout=org.apache.log4j.PatternLayout +log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n +log4j.appender.RFA.File=/tmp/app.log + +# to enable, add SYSLOG to rootLogger +log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender +log4j.appender.SYSLOG.syslogHost=127.0.0.1 +log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout +log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n +log4j.appender.SYSLOG.Facility=LOCAL1 + +log4j.logger.org=info +#log4j.logger.org.apache.commons.beanutils=warn +log4j.logger.com.datatorrent=debug http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-tuple-output/README.md ---------------------------------------------------------------------- diff --git a/examples/s3-tuple-output/README.md b/examples/s3-tuple-output/README.md new file mode 100644 index 0000000..e8a98f4 --- /dev/null +++ b/examples/s3-tuple-output/README.md @@ -0,0 +1,19 @@ +# S3 tuple output example + +Sample application to show how to use the S3 tuple output module. + +The application reads records from HDFS using `FSRecordReaderModule`. +These records are then written to Amazon S3 using `S3BytesOutputModule`. + +### How to configure +The properties file META-INF/properties.xml shows how to configure the respective operators. + +### How to compile +`shell> mvn clean package` + +This will generate application package s3-tuple-output-1.0-SNAPSHOT.apa inside target directory. + +### How to run +Use the application package generated above to launch the application from UI console(if available) or apex command line interface. + +`apex> launch target/s3-tuple-output-1.0-SNAPSHOT.apa` http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-tuple-output/XmlJavadocCommentsExtractor.xsl ---------------------------------------------------------------------- diff --git a/examples/s3-tuple-output/XmlJavadocCommentsExtractor.xsl b/examples/s3-tuple-output/XmlJavadocCommentsExtractor.xsl new file mode 100644 index 0000000..1ddbbcc --- /dev/null +++ b/examples/s3-tuple-output/XmlJavadocCommentsExtractor.xsl @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Document : XmlJavadocCommentsExtractor.xsl + Created on : September 16, 2014, 11:30 AM + Description: + The transformation strips off all information except for comments and tags from xml javadoc generated by xml-doclet. +--> + +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> + <xsl:output method="xml" standalone="yes"/> + + <!-- copy xml by selecting only the following nodes, attributes and text --> + <xsl:template match="node()|text()|@*"> + <xsl:copy> + <xsl:apply-templates select="root|package|class|interface|method|field|type|comment|tag|text()|@name|@qualified|@text"/> + </xsl:copy> + </xsl:template> + + <!-- Strip off the following paths from the selected xml --> + <xsl:template match="//root/package/interface/interface + |//root/package/interface/method/@qualified + |//root/package/class/interface + |//root/package/class/class + |//root/package/class/method/@qualified + |//root/package/class/field/@qualified" /> + + <xsl:strip-space elements="*"/> +</xsl:stylesheet> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-tuple-output/pom.xml ---------------------------------------------------------------------- diff --git a/examples/s3-tuple-output/pom.xml b/examples/s3-tuple-output/pom.xml new file mode 100644 index 0000000..3a57dab --- /dev/null +++ b/examples/s3-tuple-output/pom.xml @@ -0,0 +1,276 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <groupId>com.datatorrent.apps</groupId> + <version>1.0-SNAPSHOT</version> + <artifactId>s3-tuple-output</artifactId> + <packaging>jar</packaging> + + <!-- change these to the appropriate values --> + <name>S3 Tuple output example</name> + <description>Example application for S3 Tuple output module</description> + + <properties> + <!-- skip tests by default as they depend on external setup --> + <skipTests>true</skipTests> + <!-- change this if you desire to use a different version of Apex Core --> + <apex.version>3.5.0</apex.version> + <malhar.version>3.7.0-SNAPSHOT</malhar.version> + <apex.apppackage.classpath>lib/*.jar</apex.apppackage.classpath> + </properties> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-eclipse-plugin</artifactId> + <version>2.9</version> + <configuration> + <downloadSources>true</downloadSources> + </configuration> + </plugin> + <plugin> + <artifactId>maven-compiler-plugin</artifactId> + <version>3.3</version> + <configuration> + <encoding>UTF-8</encoding> + <source>1.7</source> + <target>1.7</target> + <debug>true</debug> + <optimize>false</optimize> + <showDeprecation>true</showDeprecation> + <showWarnings>true</showWarnings> + </configuration> + </plugin> + <plugin> + <artifactId>maven-dependency-plugin</artifactId> + <version>2.8</version> + <executions> + <execution> + <id>copy-dependencies</id> + <phase>prepare-package</phase> + <goals> + <goal>copy-dependencies</goal> + </goals> + <configuration> + <outputDirectory>target/deps</outputDirectory> + <includeScope>runtime</includeScope> + </configuration> + </execution> + </executions> + </plugin> + + <plugin> + <artifactId>maven-assembly-plugin</artifactId> + <executions> + <execution> + <id>app-package-assembly</id> + <phase>package</phase> + <goals> + <goal>single</goal> + </goals> + <configuration> + <finalName>${project.artifactId}-${project.version}-apexapp</finalName> + <appendAssemblyId>false</appendAssemblyId> + <descriptors> + <descriptor>src/assemble/appPackage.xml</descriptor> + </descriptors> + <archiverConfig> + <defaultDirectoryMode>0755</defaultDirectoryMode> + </archiverConfig> + <archive> + <manifestEntries> + <Class-Path>${apex.apppackage.classpath}</Class-Path> + <DT-Engine-Version>${apex.version}</DT-Engine-Version> + <DT-App-Package-Group-Id>${project.groupId}</DT-App-Package-Group-Id> + <DT-App-Package-Name>${project.artifactId}</DT-App-Package-Name> + <DT-App-Package-Version>${project.version}</DT-App-Package-Version> + <DT-App-Package-Display-Name>${project.name}</DT-App-Package-Display-Name> + <DT-App-Package-Description>${project.description}</DT-App-Package-Description> + </manifestEntries> + </archive> + </configuration> + </execution> + </executions> + </plugin> + + <plugin> + <artifactId>maven-antrun-plugin</artifactId> + <version>1.7</version> + <executions> + <execution> + <phase>package</phase> + <configuration> + <target> + <move file="${project.build.directory}/${project.artifactId}-${project.version}-apexapp.jar" + tofile="${project.build.directory}/${project.artifactId}-${project.version}.apa" /> + </target> + </configuration> + <goals> + <goal>run</goal> + </goals> + </execution> + <execution> + <!-- create resource directory for xml javadoc--> + <id>createJavadocDirectory</id> + <phase>generate-resources</phase> + <configuration> + <tasks> + <delete dir="${project.build.directory}/generated-resources/xml-javadoc"/> + <mkdir dir="${project.build.directory}/generated-resources/xml-javadoc"/> + </tasks> + </configuration> + <goals> + <goal>run</goal> + </goals> + </execution> + </executions> + </plugin> + + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>build-helper-maven-plugin</artifactId> + <version>1.9.1</version> + <executions> + <execution> + <id>attach-artifacts</id> + <phase>package</phase> + <goals> + <goal>attach-artifact</goal> + </goals> + <configuration> + <artifacts> + <artifact> + <file>target/${project.artifactId}-${project.version}.apa</file> + <type>apa</type> + </artifact> + </artifacts> + <skipAttach>false</skipAttach> + </configuration> + </execution> + </executions> + </plugin> + + <!-- generate javdoc --> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + <executions> + <!-- generate xml javadoc --> + <execution> + <id>xml-doclet</id> + <phase>generate-resources</phase> + <goals> + <goal>javadoc</goal> + </goals> + <configuration> + <doclet>com.github.markusbernhardt.xmldoclet.XmlDoclet</doclet> + <additionalparam>-d ${project.build.directory}/generated-resources/xml-javadoc -filename ${project.artifactId}-${project.version}-javadoc.xml</additionalparam> + <useStandardDocletOptions>false</useStandardDocletOptions> + <docletArtifact> + <groupId>com.github.markusbernhardt</groupId> + <artifactId>xml-doclet</artifactId> + <version>1.0.4</version> + </docletArtifact> + </configuration> + </execution> + </executions> + </plugin> + <!-- Transform xml javadoc to stripped down version containing only class/interface comments and tags--> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>xml-maven-plugin</artifactId> + <version>1.0</version> + <executions> + <execution> + <id>transform-xmljavadoc</id> + <phase>generate-resources</phase> + <goals> + <goal>transform</goal> + </goals> + </execution> + </executions> + <configuration> + <transformationSets> + <transformationSet> + <dir>${project.build.directory}/generated-resources/xml-javadoc</dir> + <includes> + <include>${project.artifactId}-${project.version}-javadoc.xml</include> + </includes> + <stylesheet>XmlJavadocCommentsExtractor.xsl</stylesheet> + <outputDir>${project.build.directory}/generated-resources/xml-javadoc</outputDir> + </transformationSet> + </transformationSets> + </configuration> + </plugin> + <!-- copy xml javadoc to class jar --> + <plugin> + <artifactId>maven-resources-plugin</artifactId> + <version>2.6</version> + <executions> + <execution> + <id>copy-resources</id> + <phase>process-resources</phase> + <goals> + <goal>copy-resources</goal> + </goals> + <configuration> + <outputDirectory>${basedir}/target/classes</outputDirectory> + <resources> + <resource> + <directory>${project.build.directory}/generated-resources/xml-javadoc</directory> + <includes> + <include>${project.artifactId}-${project.version}-javadoc.xml</include> + </includes> + <filtering>true</filtering> + </resource> + </resources> + </configuration> + </execution> + </executions> + </plugin> + + </plugins> + + </build> + + <dependencies> + <!-- add your dependencies here --> + <dependency> + <groupId>org.apache.apex</groupId> + <artifactId>malhar-library</artifactId> + <version>${malhar.version}</version> + <exclusions> + <exclusion> + <groupId>*</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>com.amazonaws</groupId> + <artifactId>aws-java-sdk-s3</artifactId> + <version>1.10.73</version> + </dependency> + <dependency> + <groupId>org.apache.apex</groupId> + <artifactId>apex-common</artifactId> + <version>${apex.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>4.10</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.apex</groupId> + <artifactId>apex-engine</artifactId> + <version>${apex.version}</version> + <scope>test</scope> + </dependency> + </dependencies> + +</project> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-tuple-output/src/assemble/appPackage.xml ---------------------------------------------------------------------- diff --git a/examples/s3-tuple-output/src/assemble/appPackage.xml b/examples/s3-tuple-output/src/assemble/appPackage.xml new file mode 100644 index 0000000..7ad071c --- /dev/null +++ b/examples/s3-tuple-output/src/assemble/appPackage.xml @@ -0,0 +1,43 @@ +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd"> + <id>appPackage</id> + <formats> + <format>jar</format> + </formats> + <includeBaseDirectory>false</includeBaseDirectory> + <fileSets> + <fileSet> + <directory>${basedir}/target/</directory> + <outputDirectory>/app</outputDirectory> + <includes> + <include>${project.artifactId}-${project.version}.jar</include> + </includes> + </fileSet> + <fileSet> + <directory>${basedir}/target/deps</directory> + <outputDirectory>/lib</outputDirectory> + </fileSet> + <fileSet> + <directory>${basedir}/src/site/conf</directory> + <outputDirectory>/conf</outputDirectory> + <includes> + <include>*.xml</include> + </includes> + </fileSet> + <fileSet> + <directory>${basedir}/src/main/resources/META-INF</directory> + <outputDirectory>/META-INF</outputDirectory> + </fileSet> + <fileSet> + <directory>${basedir}/src/main/resources/app</directory> + <outputDirectory>/app</outputDirectory> + </fileSet> + <fileSet> + <directory>${basedir}/src/main/resources/resources</directory> + <outputDirectory>/resources</outputDirectory> + </fileSet> + </fileSets> + +</assembly> + http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-tuple-output/src/main/java/com/datatorrent/tutorials/s3output/Application.java ---------------------------------------------------------------------- diff --git a/examples/s3-tuple-output/src/main/java/com/datatorrent/tutorials/s3output/Application.java b/examples/s3-tuple-output/src/main/java/com/datatorrent/tutorials/s3output/Application.java new file mode 100644 index 0000000..a4f487d --- /dev/null +++ b/examples/s3-tuple-output/src/main/java/com/datatorrent/tutorials/s3output/Application.java @@ -0,0 +1,27 @@ +package com.datatorrent.tutorials.s3output; + +import org.apache.apex.malhar.lib.fs.FSRecordReaderModule; +import org.apache.apex.malhar.lib.fs.s3.S3TupleOutputModule.S3BytesOutputModule; +import org.apache.hadoop.conf.Configuration; + +import com.datatorrent.api.Context.PortContext; +import com.datatorrent.api.DAG; +import com.datatorrent.api.StreamingApplication; +import com.datatorrent.api.annotation.ApplicationAnnotation; + +/** + * Simple application illustrating file copy from S3 + */ +@ApplicationAnnotation(name="s3-output-line") +public class Application implements StreamingApplication +{ + + public void populateDAG(DAG dag, Configuration conf) + { + FSRecordReaderModule recordReader = dag.addModule("lineInput", FSRecordReaderModule.class); + S3BytesOutputModule s3StringOutputModule = dag.addModule("s3output", S3BytesOutputModule.class); + dag.addStream("data", recordReader.records, s3StringOutputModule.input); + + } + +} http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-tuple-output/src/main/resources/META-INF/properties.xml ---------------------------------------------------------------------- diff --git a/examples/s3-tuple-output/src/main/resources/META-INF/properties.xml b/examples/s3-tuple-output/src/main/resources/META-INF/properties.xml new file mode 100644 index 0000000..c1365bd --- /dev/null +++ b/examples/s3-tuple-output/src/main/resources/META-INF/properties.xml @@ -0,0 +1,47 @@ +<?xml version="1.0"?> +<configuration> + <property> + <name>dt.operator.lineInput.prop.files</name> + <value>/user/appuser/test</value> + </property> + <property> + <name>dt.operator.*.attr.MEMORY_MB</name> + <value>5000</value> + </property> + <property> + <name>dt.operator.s3output.prop.accessKey</name> + <value>ACCESS_KEY_ID</value> + </property> + <property> + <name>dt.operator.s3output.prop.secretAccessKey</name> + <value>SECRET_ACCESS_KEY</value> + </property> + <property> + <name>dt.operator.s3output.prop.bucketName</name> + <value>BUCKET_NAME</value> + </property> + <property> + <name>dt.operator.s3output.prop.outputDirectoryPath</name> + <value>test</value> + </property> + <property> + <name>dt.operator.s3output.prop.maxTuplesPerSecPerPartition</name> + <value>300000</value> + </property> + <property> + <name>dt.operator.s3output.prop.maxS3UploadPartitions</name> + <value>8</value> + </property> + <property> + <name>dt.operator.lineInput.prop.maxReaders</name> + <value>8</value> + </property> + <property> + <name>dt.operator.lineInput.prop.minReaders</name> + <value>1</value> + </property> + <property> + <name>dt.loggers.level</name> + <value>org.apache.apex.malhar.lib.fs.s3.*:DEBUG,org.apache.apex.*:DEBUG,com.datatorrent.stram.plan.physical.*:DEBUG,com.datatorrent.lib.*:DEBUG</value> + </property> +</configuration> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-tuple-output/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/examples/s3-tuple-output/src/test/resources/log4j.properties b/examples/s3-tuple-output/src/test/resources/log4j.properties new file mode 100644 index 0000000..3bfcdc5 --- /dev/null +++ b/examples/s3-tuple-output/src/test/resources/log4j.properties @@ -0,0 +1,21 @@ +log4j.rootLogger=DEBUG,CONSOLE + +log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender +log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout +log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n + +log4j.appender.RFA=org.apache.log4j.RollingFileAppender +log4j.appender.RFA.layout=org.apache.log4j.PatternLayout +log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n +log4j.appender.RFA.File=/tmp/app.log + +# to enable, add SYSLOG to rootLogger +log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender +log4j.appender.SYSLOG.syslogHost=127.0.0.1 +log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout +log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n +log4j.appender.SYSLOG.Facility=LOCAL1 + +log4j.logger.org=info +#log4j.logger.org.apache.commons.beanutils=warn +log4j.logger.com.datatorrent=debug
