Grouping and POM.xml changes
Project: http://git-wip-us.apache.org/repos/asf/apex-malhar/repo Commit: http://git-wip-us.apache.org/repos/asf/apex-malhar/commit/b25a1281 Tree: http://git-wip-us.apache.org/repos/asf/apex-malhar/tree/b25a1281 Diff: http://git-wip-us.apache.org/repos/asf/apex-malhar/diff/b25a1281 Branch: refs/heads/master Commit: b25a12819dfc3ef96a5d14c18d9ef0f77a3d1784 Parents: 41ddf61 Author: Lakshmi Prasanna Velineni <[email protected]> Authored: Wed Apr 19 13:45:57 2017 -0700 Committer: Lakshmi Prasanna Velineni <[email protected]> Committed: Thu May 18 16:55:07 2017 -0700 ---------------------------------------------------------------------- examples/pom.xml | 1 + examples/s3-to-hdfs-sync/README.md | 5 - .../XmlJavadocCommentsExtractor.xsl | 28 -- examples/s3-to-hdfs-sync/pom.xml | 279 ------------------- .../s3-to-hdfs-sync/src/assemble/appPackage.xml | 43 --- .../s3input/S3ToHDFSSyncApplication.java | 32 --- .../src/main/resources/META-INF/properties.xml | 12 - .../src/test/resources/log4j.properties | 21 -- examples/s3-tuple-output/README.md | 19 -- .../XmlJavadocCommentsExtractor.xsl | 28 -- examples/s3-tuple-output/pom.xml | 276 ------------------ .../s3-tuple-output/src/assemble/appPackage.xml | 43 --- .../tutorials/s3output/Application.java | 27 -- .../src/main/resources/META-INF/properties.xml | 47 ---- .../src/test/resources/log4j.properties | 21 -- examples/s3/README.md | 99 +++++++ examples/s3/pom.xml | 31 +++ examples/s3/src/assemble/appPackage.xml | 43 +++ .../apex/examples/s3Output/Application.java | 28 ++ .../s3ToHdfsSync/S3ToHDFSSyncApplication.java | 32 +++ .../examples/s3TupleOutput/Application.java | 26 ++ .../resources/META-INF/properties-s3Output.xml | 35 +++ .../META-INF/properties-s3ToHdfsSync.xml | 12 + .../META-INF/properties-s3TupleOutput.xml | 47 ++++ examples/s3/src/test/resources/log4j.properties | 21 ++ examples/s3output/README.md | 34 --- .../s3output/XmlJavadocCommentsExtractor.xsl | 44 --- examples/s3output/pom.xml | 262 ----------------- examples/s3output/src/assemble/appPackage.xml | 43 --- .../java/com/example/s3output/Application.java | 28 -- .../src/main/resources/META-INF/properties.xml | 35 --- 31 files changed, 375 insertions(+), 1327 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/pom.xml ---------------------------------------------------------------------- diff --git a/examples/pom.xml b/examples/pom.xml index cff85a7..180d7c9 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -199,6 +199,7 @@ <module>transform</module> <module>kafka</module> <module>ftp</module> + <module>s3</module> </modules> <dependencies> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3-to-hdfs-sync/README.md ---------------------------------------------------------------------- diff --git a/examples/s3-to-hdfs-sync/README.md b/examples/s3-to-hdfs-sync/README.md deleted file mode 100644 index cee5baf..0000000 --- a/examples/s3-to-hdfs-sync/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Amazon S3 to HDFS sync application -Ingest and backup Amazon S3 data to Hadoop HDFS for data download from Amazon to hadoop. - This application transfers files from the configured S3 location to the destination path in HDFS. - The source code is available at: https://github.com/DataTorrent/examples/tree/master/tutorials/s3-to-hdfs-sync - Send feedback or feature requests to [email protected] \ No newline at end of file http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3-to-hdfs-sync/XmlJavadocCommentsExtractor.xsl ---------------------------------------------------------------------- diff --git a/examples/s3-to-hdfs-sync/XmlJavadocCommentsExtractor.xsl b/examples/s3-to-hdfs-sync/XmlJavadocCommentsExtractor.xsl deleted file mode 100644 index 1ddbbcc..0000000 --- a/examples/s3-to-hdfs-sync/XmlJavadocCommentsExtractor.xsl +++ /dev/null @@ -1,28 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - Document : XmlJavadocCommentsExtractor.xsl - Created on : September 16, 2014, 11:30 AM - Description: - The transformation strips off all information except for comments and tags from xml javadoc generated by xml-doclet. ---> - -<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> - <xsl:output method="xml" standalone="yes"/> - - <!-- copy xml by selecting only the following nodes, attributes and text --> - <xsl:template match="node()|text()|@*"> - <xsl:copy> - <xsl:apply-templates select="root|package|class|interface|method|field|type|comment|tag|text()|@name|@qualified|@text"/> - </xsl:copy> - </xsl:template> - - <!-- Strip off the following paths from the selected xml --> - <xsl:template match="//root/package/interface/interface - |//root/package/interface/method/@qualified - |//root/package/class/interface - |//root/package/class/class - |//root/package/class/method/@qualified - |//root/package/class/field/@qualified" /> - - <xsl:strip-space elements="*"/> -</xsl:stylesheet> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3-to-hdfs-sync/pom.xml ---------------------------------------------------------------------- diff --git a/examples/s3-to-hdfs-sync/pom.xml b/examples/s3-to-hdfs-sync/pom.xml deleted file mode 100644 index 55d98de..0000000 --- a/examples/s3-to-hdfs-sync/pom.xml +++ /dev/null @@ -1,279 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <groupId>com.datatorrent.apps</groupId> - <version>0.8</version> - <artifactId>s3-to-hdfs-sync</artifactId> - <packaging>jar</packaging> - - <!-- change these to the appropriate values --> - <name>S3 to HDFS Sync App</name> - <description>Ingest and backup Amazon S3 data to Hadoop HDFS for data download from Amazon to hadoop. - This application transfers files from the configured S3 location to the destination path in HDFS. - The source code is available at: https://github.com/DataTorrent/examples/tree/master/tutorials/s3-to-hdfs-sync - Send feedback or feature requests to [email protected]</description> - - <properties> - <!-- skip tests by default as they depend on external setup --> - <skipTests>true</skipTests> - <!-- change this if you desire to use a different version of Apex Core --> - <apex.version>3.5.0</apex.version> - <malhar.version>3.6.0</malhar.version> - <apex.apppackage.classpath>lib/*.jar</apex.apppackage.classpath> - </properties> - - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-eclipse-plugin</artifactId> - <version>2.9</version> - <configuration> - <downloadSources>true</downloadSources> - </configuration> - </plugin> - <plugin> - <artifactId>maven-compiler-plugin</artifactId> - <version>3.3</version> - <configuration> - <encoding>UTF-8</encoding> - <source>1.7</source> - <target>1.7</target> - <debug>true</debug> - <optimize>false</optimize> - <showDeprecation>true</showDeprecation> - <showWarnings>true</showWarnings> - </configuration> - </plugin> - <plugin> - <artifactId>maven-dependency-plugin</artifactId> - <version>2.8</version> - <executions> - <execution> - <id>copy-dependencies</id> - <phase>prepare-package</phase> - <goals> - <goal>copy-dependencies</goal> - </goals> - <configuration> - <outputDirectory>target/deps</outputDirectory> - <includeScope>runtime</includeScope> - </configuration> - </execution> - </executions> - </plugin> - - <plugin> - <artifactId>maven-assembly-plugin</artifactId> - <executions> - <execution> - <id>app-package-assembly</id> - <phase>package</phase> - <goals> - <goal>single</goal> - </goals> - <configuration> - <finalName>${project.artifactId}-${project.version}-apexapp</finalName> - <appendAssemblyId>false</appendAssemblyId> - <descriptors> - <descriptor>src/assemble/appPackage.xml</descriptor> - </descriptors> - <archiverConfig> - <defaultDirectoryMode>0755</defaultDirectoryMode> - </archiverConfig> - <archive> - <manifestEntries> - <Class-Path>${apex.apppackage.classpath}</Class-Path> - <DT-Engine-Version>${apex.version}</DT-Engine-Version> - <DT-App-Package-Group-Id>${project.groupId}</DT-App-Package-Group-Id> - <DT-App-Package-Name>${project.artifactId}</DT-App-Package-Name> - <DT-App-Package-Version>${project.version}</DT-App-Package-Version> - <DT-App-Package-Display-Name>${project.name}</DT-App-Package-Display-Name> - <DT-App-Package-Description>${project.description}</DT-App-Package-Description> - </manifestEntries> - </archive> - </configuration> - </execution> - </executions> - </plugin> - - <plugin> - <artifactId>maven-antrun-plugin</artifactId> - <version>1.7</version> - <executions> - <execution> - <phase>package</phase> - <configuration> - <target> - <move file="${project.build.directory}/${project.artifactId}-${project.version}-apexapp.jar" - tofile="${project.build.directory}/${project.artifactId}-${project.version}.apa" /> - </target> - </configuration> - <goals> - <goal>run</goal> - </goals> - </execution> - <execution> - <!-- create resource directory for xml javadoc--> - <id>createJavadocDirectory</id> - <phase>generate-resources</phase> - <configuration> - <tasks> - <delete dir="${project.build.directory}/generated-resources/xml-javadoc"/> - <mkdir dir="${project.build.directory}/generated-resources/xml-javadoc"/> - </tasks> - </configuration> - <goals> - <goal>run</goal> - </goals> - </execution> - </executions> - </plugin> - - <plugin> - <groupId>org.codehaus.mojo</groupId> - <artifactId>build-helper-maven-plugin</artifactId> - <version>1.9.1</version> - <executions> - <execution> - <id>attach-artifacts</id> - <phase>package</phase> - <goals> - <goal>attach-artifact</goal> - </goals> - <configuration> - <artifacts> - <artifact> - <file>target/${project.artifactId}-${project.version}.apa</file> - <type>apa</type> - </artifact> - </artifacts> - <skipAttach>false</skipAttach> - </configuration> - </execution> - </executions> - </plugin> - - <!-- generate javdoc --> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-javadoc-plugin</artifactId> - <executions> - <!-- generate xml javadoc --> - <execution> - <id>xml-doclet</id> - <phase>generate-resources</phase> - <goals> - <goal>javadoc</goal> - </goals> - <configuration> - <doclet>com.github.markusbernhardt.xmldoclet.XmlDoclet</doclet> - <additionalparam>-d ${project.build.directory}/generated-resources/xml-javadoc -filename ${project.artifactId}-${project.version}-javadoc.xml</additionalparam> - <useStandardDocletOptions>false</useStandardDocletOptions> - <docletArtifact> - <groupId>com.github.markusbernhardt</groupId> - <artifactId>xml-doclet</artifactId> - <version>1.0.4</version> - </docletArtifact> - </configuration> - </execution> - </executions> - </plugin> - <!-- Transform xml javadoc to stripped down version containing only class/interface comments and tags--> - <plugin> - <groupId>org.codehaus.mojo</groupId> - <artifactId>xml-maven-plugin</artifactId> - <version>1.0</version> - <executions> - <execution> - <id>transform-xmljavadoc</id> - <phase>generate-resources</phase> - <goals> - <goal>transform</goal> - </goals> - </execution> - </executions> - <configuration> - <transformationSets> - <transformationSet> - <dir>${project.build.directory}/generated-resources/xml-javadoc</dir> - <includes> - <include>${project.artifactId}-${project.version}-javadoc.xml</include> - </includes> - <stylesheet>XmlJavadocCommentsExtractor.xsl</stylesheet> - <outputDir>${project.build.directory}/generated-resources/xml-javadoc</outputDir> - </transformationSet> - </transformationSets> - </configuration> - </plugin> - <!-- copy xml javadoc to class jar --> - <plugin> - <artifactId>maven-resources-plugin</artifactId> - <version>2.6</version> - <executions> - <execution> - <id>copy-resources</id> - <phase>process-resources</phase> - <goals> - <goal>copy-resources</goal> - </goals> - <configuration> - <outputDirectory>${basedir}/target/classes</outputDirectory> - <resources> - <resource> - <directory>${project.build.directory}/generated-resources/xml-javadoc</directory> - <includes> - <include>${project.artifactId}-${project.version}-javadoc.xml</include> - </includes> - <filtering>true</filtering> - </resource> - </resources> - </configuration> - </execution> - </executions> - </plugin> - - </plugins> - - </build> - - <dependencies> - <!-- add your dependencies here --> - <dependency> - <groupId>org.apache.apex</groupId> - <artifactId>malhar-library</artifactId> - <version>${malhar.version}</version> - <exclusions> - <exclusion> - <groupId>*</groupId> - <artifactId>*</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>com.amazonaws</groupId> - <artifactId>aws-java-sdk-s3</artifactId> - <version>1.10.73</version> - </dependency> - <dependency> - <groupId>org.apache.apex</groupId> - <artifactId>apex-common</artifactId> - <version>${apex.version}</version> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - <version>4.10</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.apache.apex</groupId> - <artifactId>apex-engine</artifactId> - <version>${apex.version}</version> - <scope>test</scope> - </dependency> - </dependencies> - -</project> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3-to-hdfs-sync/src/assemble/appPackage.xml ---------------------------------------------------------------------- diff --git a/examples/s3-to-hdfs-sync/src/assemble/appPackage.xml b/examples/s3-to-hdfs-sync/src/assemble/appPackage.xml deleted file mode 100644 index 7ad071c..0000000 --- a/examples/s3-to-hdfs-sync/src/assemble/appPackage.xml +++ /dev/null @@ -1,43 +0,0 @@ -<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd"> - <id>appPackage</id> - <formats> - <format>jar</format> - </formats> - <includeBaseDirectory>false</includeBaseDirectory> - <fileSets> - <fileSet> - <directory>${basedir}/target/</directory> - <outputDirectory>/app</outputDirectory> - <includes> - <include>${project.artifactId}-${project.version}.jar</include> - </includes> - </fileSet> - <fileSet> - <directory>${basedir}/target/deps</directory> - <outputDirectory>/lib</outputDirectory> - </fileSet> - <fileSet> - <directory>${basedir}/src/site/conf</directory> - <outputDirectory>/conf</outputDirectory> - <includes> - <include>*.xml</include> - </includes> - </fileSet> - <fileSet> - <directory>${basedir}/src/main/resources/META-INF</directory> - <outputDirectory>/META-INF</outputDirectory> - </fileSet> - <fileSet> - <directory>${basedir}/src/main/resources/app</directory> - <outputDirectory>/app</outputDirectory> - </fileSet> - <fileSet> - <directory>${basedir}/src/main/resources/resources</directory> - <outputDirectory>/resources</outputDirectory> - </fileSet> - </fileSets> - -</assembly> - http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3-to-hdfs-sync/src/main/java/com/datatorrent/tutorial/s3input/S3ToHDFSSyncApplication.java ---------------------------------------------------------------------- diff --git a/examples/s3-to-hdfs-sync/src/main/java/com/datatorrent/tutorial/s3input/S3ToHDFSSyncApplication.java b/examples/s3-to-hdfs-sync/src/main/java/com/datatorrent/tutorial/s3input/S3ToHDFSSyncApplication.java deleted file mode 100644 index dae9c4a..0000000 --- a/examples/s3-to-hdfs-sync/src/main/java/com/datatorrent/tutorial/s3input/S3ToHDFSSyncApplication.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.datatorrent.tutorial.s3input; - -import org.apache.hadoop.conf.Configuration; - -import com.datatorrent.api.DAG; -import com.datatorrent.api.DAG.Locality; -import com.datatorrent.api.StreamingApplication; -import com.datatorrent.api.annotation.ApplicationAnnotation; -import com.datatorrent.lib.io.fs.HDFSFileCopyModule; -import com.datatorrent.lib.io.fs.S3InputModule; - -/** - * Simple application illustrating file copy from S3 - */ -@ApplicationAnnotation(name="S3-to-HDFS-Sync") -public class S3ToHDFSSyncApplication implements StreamingApplication -{ - - @Override - public void populateDAG(DAG dag, Configuration conf) - { - - S3InputModule inputModule = dag.addModule("S3InputModule", new S3InputModule()); - HDFSFileCopyModule outputModule = dag.addModule("HDFSFileCopyModule", new HDFSFileCopyModule()); - - dag.addStream("FileMetaData", inputModule.filesMetadataOutput, outputModule.filesMetadataInput); - dag.addStream("BlocksMetaData", inputModule.blocksMetadataOutput, outputModule.blocksMetadataInput) - .setLocality(Locality.THREAD_LOCAL); - dag.addStream("BlocksData", inputModule.messages, outputModule.blockData).setLocality(Locality.THREAD_LOCAL); - } - -} http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3-to-hdfs-sync/src/main/resources/META-INF/properties.xml ---------------------------------------------------------------------- diff --git a/examples/s3-to-hdfs-sync/src/main/resources/META-INF/properties.xml b/examples/s3-to-hdfs-sync/src/main/resources/META-INF/properties.xml deleted file mode 100644 index 6d8ecea..0000000 --- a/examples/s3-to-hdfs-sync/src/main/resources/META-INF/properties.xml +++ /dev/null @@ -1,12 +0,0 @@ -<?xml version="1.0"?> -<configuration> - <property> - <name>dt.operator.S3InputModule.prop.files</name> - <value>s3n://ACCESS_KEY_ID:SECRET_KEY@BUCKET_NAME/DIRECTORY</value> - </property> - <property> - <name>dt.operator.HDFSFileCopyModule.prop.outputDirectoryPath</name> - <value>hdfs://destination-namenode-service:port/path-to-output-directory</value> - </property> -</configuration> - http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3-to-hdfs-sync/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/examples/s3-to-hdfs-sync/src/test/resources/log4j.properties b/examples/s3-to-hdfs-sync/src/test/resources/log4j.properties deleted file mode 100644 index 3bfcdc5..0000000 --- a/examples/s3-to-hdfs-sync/src/test/resources/log4j.properties +++ /dev/null @@ -1,21 +0,0 @@ -log4j.rootLogger=DEBUG,CONSOLE - -log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender -log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout -log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n - -log4j.appender.RFA=org.apache.log4j.RollingFileAppender -log4j.appender.RFA.layout=org.apache.log4j.PatternLayout -log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n -log4j.appender.RFA.File=/tmp/app.log - -# to enable, add SYSLOG to rootLogger -log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender -log4j.appender.SYSLOG.syslogHost=127.0.0.1 -log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout -log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n -log4j.appender.SYSLOG.Facility=LOCAL1 - -log4j.logger.org=info -#log4j.logger.org.apache.commons.beanutils=warn -log4j.logger.com.datatorrent=debug http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3-tuple-output/README.md ---------------------------------------------------------------------- diff --git a/examples/s3-tuple-output/README.md b/examples/s3-tuple-output/README.md deleted file mode 100644 index e8a98f4..0000000 --- a/examples/s3-tuple-output/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# S3 tuple output example - -Sample application to show how to use the S3 tuple output module. - -The application reads records from HDFS using `FSRecordReaderModule`. -These records are then written to Amazon S3 using `S3BytesOutputModule`. - -### How to configure -The properties file META-INF/properties.xml shows how to configure the respective operators. - -### How to compile -`shell> mvn clean package` - -This will generate application package s3-tuple-output-1.0-SNAPSHOT.apa inside target directory. - -### How to run -Use the application package generated above to launch the application from UI console(if available) or apex command line interface. - -`apex> launch target/s3-tuple-output-1.0-SNAPSHOT.apa` http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3-tuple-output/XmlJavadocCommentsExtractor.xsl ---------------------------------------------------------------------- diff --git a/examples/s3-tuple-output/XmlJavadocCommentsExtractor.xsl b/examples/s3-tuple-output/XmlJavadocCommentsExtractor.xsl deleted file mode 100644 index 1ddbbcc..0000000 --- a/examples/s3-tuple-output/XmlJavadocCommentsExtractor.xsl +++ /dev/null @@ -1,28 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - Document : XmlJavadocCommentsExtractor.xsl - Created on : September 16, 2014, 11:30 AM - Description: - The transformation strips off all information except for comments and tags from xml javadoc generated by xml-doclet. ---> - -<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> - <xsl:output method="xml" standalone="yes"/> - - <!-- copy xml by selecting only the following nodes, attributes and text --> - <xsl:template match="node()|text()|@*"> - <xsl:copy> - <xsl:apply-templates select="root|package|class|interface|method|field|type|comment|tag|text()|@name|@qualified|@text"/> - </xsl:copy> - </xsl:template> - - <!-- Strip off the following paths from the selected xml --> - <xsl:template match="//root/package/interface/interface - |//root/package/interface/method/@qualified - |//root/package/class/interface - |//root/package/class/class - |//root/package/class/method/@qualified - |//root/package/class/field/@qualified" /> - - <xsl:strip-space elements="*"/> -</xsl:stylesheet> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3-tuple-output/pom.xml ---------------------------------------------------------------------- diff --git a/examples/s3-tuple-output/pom.xml b/examples/s3-tuple-output/pom.xml deleted file mode 100644 index 3a57dab..0000000 --- a/examples/s3-tuple-output/pom.xml +++ /dev/null @@ -1,276 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <groupId>com.datatorrent.apps</groupId> - <version>1.0-SNAPSHOT</version> - <artifactId>s3-tuple-output</artifactId> - <packaging>jar</packaging> - - <!-- change these to the appropriate values --> - <name>S3 Tuple output example</name> - <description>Example application for S3 Tuple output module</description> - - <properties> - <!-- skip tests by default as they depend on external setup --> - <skipTests>true</skipTests> - <!-- change this if you desire to use a different version of Apex Core --> - <apex.version>3.5.0</apex.version> - <malhar.version>3.7.0-SNAPSHOT</malhar.version> - <apex.apppackage.classpath>lib/*.jar</apex.apppackage.classpath> - </properties> - - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-eclipse-plugin</artifactId> - <version>2.9</version> - <configuration> - <downloadSources>true</downloadSources> - </configuration> - </plugin> - <plugin> - <artifactId>maven-compiler-plugin</artifactId> - <version>3.3</version> - <configuration> - <encoding>UTF-8</encoding> - <source>1.7</source> - <target>1.7</target> - <debug>true</debug> - <optimize>false</optimize> - <showDeprecation>true</showDeprecation> - <showWarnings>true</showWarnings> - </configuration> - </plugin> - <plugin> - <artifactId>maven-dependency-plugin</artifactId> - <version>2.8</version> - <executions> - <execution> - <id>copy-dependencies</id> - <phase>prepare-package</phase> - <goals> - <goal>copy-dependencies</goal> - </goals> - <configuration> - <outputDirectory>target/deps</outputDirectory> - <includeScope>runtime</includeScope> - </configuration> - </execution> - </executions> - </plugin> - - <plugin> - <artifactId>maven-assembly-plugin</artifactId> - <executions> - <execution> - <id>app-package-assembly</id> - <phase>package</phase> - <goals> - <goal>single</goal> - </goals> - <configuration> - <finalName>${project.artifactId}-${project.version}-apexapp</finalName> - <appendAssemblyId>false</appendAssemblyId> - <descriptors> - <descriptor>src/assemble/appPackage.xml</descriptor> - </descriptors> - <archiverConfig> - <defaultDirectoryMode>0755</defaultDirectoryMode> - </archiverConfig> - <archive> - <manifestEntries> - <Class-Path>${apex.apppackage.classpath}</Class-Path> - <DT-Engine-Version>${apex.version}</DT-Engine-Version> - <DT-App-Package-Group-Id>${project.groupId}</DT-App-Package-Group-Id> - <DT-App-Package-Name>${project.artifactId}</DT-App-Package-Name> - <DT-App-Package-Version>${project.version}</DT-App-Package-Version> - <DT-App-Package-Display-Name>${project.name}</DT-App-Package-Display-Name> - <DT-App-Package-Description>${project.description}</DT-App-Package-Description> - </manifestEntries> - </archive> - </configuration> - </execution> - </executions> - </plugin> - - <plugin> - <artifactId>maven-antrun-plugin</artifactId> - <version>1.7</version> - <executions> - <execution> - <phase>package</phase> - <configuration> - <target> - <move file="${project.build.directory}/${project.artifactId}-${project.version}-apexapp.jar" - tofile="${project.build.directory}/${project.artifactId}-${project.version}.apa" /> - </target> - </configuration> - <goals> - <goal>run</goal> - </goals> - </execution> - <execution> - <!-- create resource directory for xml javadoc--> - <id>createJavadocDirectory</id> - <phase>generate-resources</phase> - <configuration> - <tasks> - <delete dir="${project.build.directory}/generated-resources/xml-javadoc"/> - <mkdir dir="${project.build.directory}/generated-resources/xml-javadoc"/> - </tasks> - </configuration> - <goals> - <goal>run</goal> - </goals> - </execution> - </executions> - </plugin> - - <plugin> - <groupId>org.codehaus.mojo</groupId> - <artifactId>build-helper-maven-plugin</artifactId> - <version>1.9.1</version> - <executions> - <execution> - <id>attach-artifacts</id> - <phase>package</phase> - <goals> - <goal>attach-artifact</goal> - </goals> - <configuration> - <artifacts> - <artifact> - <file>target/${project.artifactId}-${project.version}.apa</file> - <type>apa</type> - </artifact> - </artifacts> - <skipAttach>false</skipAttach> - </configuration> - </execution> - </executions> - </plugin> - - <!-- generate javdoc --> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-javadoc-plugin</artifactId> - <executions> - <!-- generate xml javadoc --> - <execution> - <id>xml-doclet</id> - <phase>generate-resources</phase> - <goals> - <goal>javadoc</goal> - </goals> - <configuration> - <doclet>com.github.markusbernhardt.xmldoclet.XmlDoclet</doclet> - <additionalparam>-d ${project.build.directory}/generated-resources/xml-javadoc -filename ${project.artifactId}-${project.version}-javadoc.xml</additionalparam> - <useStandardDocletOptions>false</useStandardDocletOptions> - <docletArtifact> - <groupId>com.github.markusbernhardt</groupId> - <artifactId>xml-doclet</artifactId> - <version>1.0.4</version> - </docletArtifact> - </configuration> - </execution> - </executions> - </plugin> - <!-- Transform xml javadoc to stripped down version containing only class/interface comments and tags--> - <plugin> - <groupId>org.codehaus.mojo</groupId> - <artifactId>xml-maven-plugin</artifactId> - <version>1.0</version> - <executions> - <execution> - <id>transform-xmljavadoc</id> - <phase>generate-resources</phase> - <goals> - <goal>transform</goal> - </goals> - </execution> - </executions> - <configuration> - <transformationSets> - <transformationSet> - <dir>${project.build.directory}/generated-resources/xml-javadoc</dir> - <includes> - <include>${project.artifactId}-${project.version}-javadoc.xml</include> - </includes> - <stylesheet>XmlJavadocCommentsExtractor.xsl</stylesheet> - <outputDir>${project.build.directory}/generated-resources/xml-javadoc</outputDir> - </transformationSet> - </transformationSets> - </configuration> - </plugin> - <!-- copy xml javadoc to class jar --> - <plugin> - <artifactId>maven-resources-plugin</artifactId> - <version>2.6</version> - <executions> - <execution> - <id>copy-resources</id> - <phase>process-resources</phase> - <goals> - <goal>copy-resources</goal> - </goals> - <configuration> - <outputDirectory>${basedir}/target/classes</outputDirectory> - <resources> - <resource> - <directory>${project.build.directory}/generated-resources/xml-javadoc</directory> - <includes> - <include>${project.artifactId}-${project.version}-javadoc.xml</include> - </includes> - <filtering>true</filtering> - </resource> - </resources> - </configuration> - </execution> - </executions> - </plugin> - - </plugins> - - </build> - - <dependencies> - <!-- add your dependencies here --> - <dependency> - <groupId>org.apache.apex</groupId> - <artifactId>malhar-library</artifactId> - <version>${malhar.version}</version> - <exclusions> - <exclusion> - <groupId>*</groupId> - <artifactId>*</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>com.amazonaws</groupId> - <artifactId>aws-java-sdk-s3</artifactId> - <version>1.10.73</version> - </dependency> - <dependency> - <groupId>org.apache.apex</groupId> - <artifactId>apex-common</artifactId> - <version>${apex.version}</version> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - <version>4.10</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.apache.apex</groupId> - <artifactId>apex-engine</artifactId> - <version>${apex.version}</version> - <scope>test</scope> - </dependency> - </dependencies> - -</project> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3-tuple-output/src/assemble/appPackage.xml ---------------------------------------------------------------------- diff --git a/examples/s3-tuple-output/src/assemble/appPackage.xml b/examples/s3-tuple-output/src/assemble/appPackage.xml deleted file mode 100644 index 7ad071c..0000000 --- a/examples/s3-tuple-output/src/assemble/appPackage.xml +++ /dev/null @@ -1,43 +0,0 @@ -<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd"> - <id>appPackage</id> - <formats> - <format>jar</format> - </formats> - <includeBaseDirectory>false</includeBaseDirectory> - <fileSets> - <fileSet> - <directory>${basedir}/target/</directory> - <outputDirectory>/app</outputDirectory> - <includes> - <include>${project.artifactId}-${project.version}.jar</include> - </includes> - </fileSet> - <fileSet> - <directory>${basedir}/target/deps</directory> - <outputDirectory>/lib</outputDirectory> - </fileSet> - <fileSet> - <directory>${basedir}/src/site/conf</directory> - <outputDirectory>/conf</outputDirectory> - <includes> - <include>*.xml</include> - </includes> - </fileSet> - <fileSet> - <directory>${basedir}/src/main/resources/META-INF</directory> - <outputDirectory>/META-INF</outputDirectory> - </fileSet> - <fileSet> - <directory>${basedir}/src/main/resources/app</directory> - <outputDirectory>/app</outputDirectory> - </fileSet> - <fileSet> - <directory>${basedir}/src/main/resources/resources</directory> - <outputDirectory>/resources</outputDirectory> - </fileSet> - </fileSets> - -</assembly> - http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3-tuple-output/src/main/java/com/datatorrent/tutorials/s3output/Application.java ---------------------------------------------------------------------- diff --git a/examples/s3-tuple-output/src/main/java/com/datatorrent/tutorials/s3output/Application.java b/examples/s3-tuple-output/src/main/java/com/datatorrent/tutorials/s3output/Application.java deleted file mode 100644 index a4f487d..0000000 --- a/examples/s3-tuple-output/src/main/java/com/datatorrent/tutorials/s3output/Application.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.datatorrent.tutorials.s3output; - -import org.apache.apex.malhar.lib.fs.FSRecordReaderModule; -import org.apache.apex.malhar.lib.fs.s3.S3TupleOutputModule.S3BytesOutputModule; -import org.apache.hadoop.conf.Configuration; - -import com.datatorrent.api.Context.PortContext; -import com.datatorrent.api.DAG; -import com.datatorrent.api.StreamingApplication; -import com.datatorrent.api.annotation.ApplicationAnnotation; - -/** - * Simple application illustrating file copy from S3 - */ -@ApplicationAnnotation(name="s3-output-line") -public class Application implements StreamingApplication -{ - - public void populateDAG(DAG dag, Configuration conf) - { - FSRecordReaderModule recordReader = dag.addModule("lineInput", FSRecordReaderModule.class); - S3BytesOutputModule s3StringOutputModule = dag.addModule("s3output", S3BytesOutputModule.class); - dag.addStream("data", recordReader.records, s3StringOutputModule.input); - - } - -} http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3-tuple-output/src/main/resources/META-INF/properties.xml ---------------------------------------------------------------------- diff --git a/examples/s3-tuple-output/src/main/resources/META-INF/properties.xml b/examples/s3-tuple-output/src/main/resources/META-INF/properties.xml deleted file mode 100644 index c1365bd..0000000 --- a/examples/s3-tuple-output/src/main/resources/META-INF/properties.xml +++ /dev/null @@ -1,47 +0,0 @@ -<?xml version="1.0"?> -<configuration> - <property> - <name>dt.operator.lineInput.prop.files</name> - <value>/user/appuser/test</value> - </property> - <property> - <name>dt.operator.*.attr.MEMORY_MB</name> - <value>5000</value> - </property> - <property> - <name>dt.operator.s3output.prop.accessKey</name> - <value>ACCESS_KEY_ID</value> - </property> - <property> - <name>dt.operator.s3output.prop.secretAccessKey</name> - <value>SECRET_ACCESS_KEY</value> - </property> - <property> - <name>dt.operator.s3output.prop.bucketName</name> - <value>BUCKET_NAME</value> - </property> - <property> - <name>dt.operator.s3output.prop.outputDirectoryPath</name> - <value>test</value> - </property> - <property> - <name>dt.operator.s3output.prop.maxTuplesPerSecPerPartition</name> - <value>300000</value> - </property> - <property> - <name>dt.operator.s3output.prop.maxS3UploadPartitions</name> - <value>8</value> - </property> - <property> - <name>dt.operator.lineInput.prop.maxReaders</name> - <value>8</value> - </property> - <property> - <name>dt.operator.lineInput.prop.minReaders</name> - <value>1</value> - </property> - <property> - <name>dt.loggers.level</name> - <value>org.apache.apex.malhar.lib.fs.s3.*:DEBUG,org.apache.apex.*:DEBUG,com.datatorrent.stram.plan.physical.*:DEBUG,com.datatorrent.lib.*:DEBUG</value> - </property> -</configuration> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3-tuple-output/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/examples/s3-tuple-output/src/test/resources/log4j.properties b/examples/s3-tuple-output/src/test/resources/log4j.properties deleted file mode 100644 index 3bfcdc5..0000000 --- a/examples/s3-tuple-output/src/test/resources/log4j.properties +++ /dev/null @@ -1,21 +0,0 @@ -log4j.rootLogger=DEBUG,CONSOLE - -log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender -log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout -log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n - -log4j.appender.RFA=org.apache.log4j.RollingFileAppender -log4j.appender.RFA.layout=org.apache.log4j.PatternLayout -log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n -log4j.appender.RFA.File=/tmp/app.log - -# to enable, add SYSLOG to rootLogger -log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender -log4j.appender.SYSLOG.syslogHost=127.0.0.1 -log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout -log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n -log4j.appender.SYSLOG.Facility=LOCAL1 - -log4j.logger.org=info -#log4j.logger.org.apache.commons.beanutils=warn -log4j.logger.com.datatorrent=debug http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3/README.md ---------------------------------------------------------------------- diff --git a/examples/s3/README.md b/examples/s3/README.md new file mode 100644 index 0000000..2365765 --- /dev/null +++ b/examples/s3/README.md @@ -0,0 +1,99 @@ +## Amazon S3 to HDFS sync application + +Ingest and backup Amazon S3 data to Hadoop HDFS for data download from Amazon to hadoop. + +This application transfers files from the configured S3 location to the destination path in HDFS. +The source code is available at: https://github.com/DataTorrent/examples/tree/master/tutorials/s3-to-hdfs-sync +Send feedback or feature requests to [email protected] + +## S3 tuple output example + +Sample application to show how to use the S3 tuple output module. + +The application reads records from HDFS using `FSRecordReaderModule`. +These records are then written to Amazon S3 using `S3BytesOutputModule`. + +### How to configure +The properties file META-INF/properties.xml shows how to configure the respective operators. + +### How to compile +`shell> mvn clean package` + +This will generate application package s3-tuple-output-1.0-SNAPSHOT.apa inside target directory. + +### How to run +Use the application package generated above to launch the application from UI console(if available) or apex command line interface. + +`apex> launch target/s3-tuple-output-1.0-SNAPSHOT.apa` + +Sample application to show how to use the S3OutputModule to upload files into Amazon S3 Bucket. + +Operators in sample application are as follows: +1) FSInputModule which reads files from file systems HDFS/NFS and emits FileMetadata, BlockMetadata, BlockBytes. +2) S3OutputModule which uploads the files into S3 Bucket using multi-part upload feature. + +Please configure the below S3OutputModule properties in src/main/resources/META-INF/properties.xml before launching the application: + +- ***accessKey*** - String + - Specifies the AWS access key to access the Amazon S3 bucket. + +- ***secretAccessKey*** - String + - Specifies the AWS secret access key to access the Amazon S3 bucket. + +For more information about access key and secret access key, Please refer to [IAM](http://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html) + +- ***bucketName*** - String + - Specifies the name of the S3 bucket to copy the files/directories. + +- ***outputDirectoryPath*** - String + - Specifies the path of the output directory to copy the files/directories. + +Suppose, **app.hdfs2s3** is the name of the **bucket** and you want to copy the files to S3 location (app.hdfs2s3/apex/s3output) then configure the properties as below: + +```xml + <property> + <name>dt.operator.S3OutputModule.prop.bucketName</name> + <value>app.hdfs2s3</value> + </property> + <property> + <name>dt.operator.S3OutputModule.prop.outputDirectoryPath</name> + <value>apex/s3output</value> + </property> +``` + +## S3Output + +Sample application to show how to use the S3OutputModule to upload files into Amazon S3 Bucket. + +Operators in sample application are as follows: +1) FSInputModule which reads files from file systems HDFS/NFS and emits FileMetadata, BlockMetadata, BlockBytes. +2) S3OutputModule which uploads the files into S3 Bucket using multi-part upload feature. + +Please configure the below S3OutputModule properties in src/main/resources/META-INF/properties.xml before launching the application: + +- ***accessKey*** - String + - Specifies the AWS access key to access the Amazon S3 bucket. + +- ***secretAccessKey*** - String + - Specifies the AWS secret access key to access the Amazon S3 bucket. + +For more information about access key and secret access key, Please refer to [IAM](http://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html) + +- ***bucketName*** - String + - Specifies the name of the S3 bucket to copy the files/directories. + +- ***outputDirectoryPath*** - String + - Specifies the path of the output directory to copy the files/directories. + +Suppose, **app.hdfs2s3** is the name of the **bucket** and you want to copy the files to S3 location (app.hdfs2s3/apex/s3output) then configure the properties as below: + +```xml + <property> + <name>dt.operator.S3OutputModule.prop.bucketName</name> + <value>app.hdfs2s3</value> + </property> + <property> + <name>dt.operator.S3OutputModule.prop.outputDirectoryPath</name> + <value>apex/s3output</value> + </property> +``` http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3/pom.xml ---------------------------------------------------------------------- diff --git a/examples/s3/pom.xml b/examples/s3/pom.xml new file mode 100644 index 0000000..07327c5 --- /dev/null +++ b/examples/s3/pom.xml @@ -0,0 +1,31 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <artifactId>malhar-examples</artifactId> + <groupId>org.apache.apex</groupId> + <version>3.8.0-SNAPSHOT</version> + </parent> + + <artifactId>malhar-examples-s3</artifactId> + <packaging>jar</packaging> + + <!-- change these to the appropriate values --> + + <dependencies> + <!-- add your dependencies here --> + <dependency> + <groupId>com.amazonaws</groupId> + <artifactId>aws-java-sdk-s3</artifactId> + <version>1.10.73</version> + </dependency> + <dependency> + <groupId>org.apache.apex</groupId> + <artifactId>apex-engine</artifactId> + <version>${apex.core.version}</version> + <scope>test</scope> + </dependency> + </dependencies> + +</project> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3/src/assemble/appPackage.xml ---------------------------------------------------------------------- diff --git a/examples/s3/src/assemble/appPackage.xml b/examples/s3/src/assemble/appPackage.xml new file mode 100644 index 0000000..7ad071c --- /dev/null +++ b/examples/s3/src/assemble/appPackage.xml @@ -0,0 +1,43 @@ +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd"> + <id>appPackage</id> + <formats> + <format>jar</format> + </formats> + <includeBaseDirectory>false</includeBaseDirectory> + <fileSets> + <fileSet> + <directory>${basedir}/target/</directory> + <outputDirectory>/app</outputDirectory> + <includes> + <include>${project.artifactId}-${project.version}.jar</include> + </includes> + </fileSet> + <fileSet> + <directory>${basedir}/target/deps</directory> + <outputDirectory>/lib</outputDirectory> + </fileSet> + <fileSet> + <directory>${basedir}/src/site/conf</directory> + <outputDirectory>/conf</outputDirectory> + <includes> + <include>*.xml</include> + </includes> + </fileSet> + <fileSet> + <directory>${basedir}/src/main/resources/META-INF</directory> + <outputDirectory>/META-INF</outputDirectory> + </fileSet> + <fileSet> + <directory>${basedir}/src/main/resources/app</directory> + <outputDirectory>/app</outputDirectory> + </fileSet> + <fileSet> + <directory>${basedir}/src/main/resources/resources</directory> + <outputDirectory>/resources</outputDirectory> + </fileSet> + </fileSets> + +</assembly> + http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3/src/main/java/org/apache/apex/examples/s3Output/Application.java ---------------------------------------------------------------------- diff --git a/examples/s3/src/main/java/org/apache/apex/examples/s3Output/Application.java b/examples/s3/src/main/java/org/apache/apex/examples/s3Output/Application.java new file mode 100644 index 0000000..574a17f --- /dev/null +++ b/examples/s3/src/main/java/org/apache/apex/examples/s3Output/Application.java @@ -0,0 +1,28 @@ +package org.apache.apex.examples.s3Output; + +import org.apache.apex.malhar.lib.fs.s3.S3OutputModule; +import org.apache.hadoop.conf.Configuration; + +import com.datatorrent.api.DAG; +import com.datatorrent.api.StreamingApplication; +import com.datatorrent.api.annotation.ApplicationAnnotation; +import com.datatorrent.lib.io.fs.FSInputModule; + +/** + * Application illustrating copy files from HDFS to S3 bucket. + */ +@ApplicationAnnotation(name="HDFSToS3App") +public class Application implements StreamingApplication +{ + @Override + public void populateDAG(DAG dag, Configuration conf) + { + FSInputModule inputModule = dag.addModule("HDFSInputModule", new FSInputModule()); + S3OutputModule outputModule = dag.addModule("S3OutputModule", new S3OutputModule()); + + dag.addStream("FileMetaData", inputModule.filesMetadataOutput, outputModule.filesMetadataInput); + dag.addStream("BlocksMetaData", inputModule.blocksMetadataOutput, outputModule.blocksMetadataInput) + .setLocality(DAG.Locality.CONTAINER_LOCAL); + dag.addStream("BlocksData", inputModule.messages, outputModule.blockData).setLocality(DAG.Locality.CONTAINER_LOCAL); + } +} http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3/src/main/java/org/apache/apex/examples/s3ToHdfsSync/S3ToHDFSSyncApplication.java ---------------------------------------------------------------------- diff --git a/examples/s3/src/main/java/org/apache/apex/examples/s3ToHdfsSync/S3ToHDFSSyncApplication.java b/examples/s3/src/main/java/org/apache/apex/examples/s3ToHdfsSync/S3ToHDFSSyncApplication.java new file mode 100644 index 0000000..9e64c59 --- /dev/null +++ b/examples/s3/src/main/java/org/apache/apex/examples/s3ToHdfsSync/S3ToHDFSSyncApplication.java @@ -0,0 +1,32 @@ +package org.apache.apex.examples.s3ToHdfsSync; + +import org.apache.hadoop.conf.Configuration; + +import com.datatorrent.api.DAG; +import com.datatorrent.api.DAG.Locality; +import com.datatorrent.api.StreamingApplication; +import com.datatorrent.api.annotation.ApplicationAnnotation; +import com.datatorrent.lib.io.fs.HDFSFileCopyModule; +import com.datatorrent.lib.io.fs.S3InputModule; + +/** + * Simple application illustrating file copy from S3. S3 Input + */ +@ApplicationAnnotation(name="S3-to-HDFS-Sync") +public class S3ToHDFSSyncApplication implements StreamingApplication +{ + + @Override + public void populateDAG(DAG dag, Configuration conf) + { + + S3InputModule inputModule = dag.addModule("S3InputModule", new S3InputModule()); + HDFSFileCopyModule outputModule = dag.addModule("HDFSFileCopyModule", new HDFSFileCopyModule()); + + dag.addStream("FileMetaData", inputModule.filesMetadataOutput, outputModule.filesMetadataInput); + dag.addStream("BlocksMetaData", inputModule.blocksMetadataOutput, outputModule.blocksMetadataInput) + .setLocality(Locality.THREAD_LOCAL); + dag.addStream("BlocksData", inputModule.messages, outputModule.blockData).setLocality(Locality.THREAD_LOCAL); + } + +} http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3/src/main/java/org/apache/apex/examples/s3TupleOutput/Application.java ---------------------------------------------------------------------- diff --git a/examples/s3/src/main/java/org/apache/apex/examples/s3TupleOutput/Application.java b/examples/s3/src/main/java/org/apache/apex/examples/s3TupleOutput/Application.java new file mode 100644 index 0000000..d5855a0 --- /dev/null +++ b/examples/s3/src/main/java/org/apache/apex/examples/s3TupleOutput/Application.java @@ -0,0 +1,26 @@ +package org.apache.apex.examples.s3TupleOutput; + +import org.apache.apex.malhar.lib.fs.FSRecordReaderModule; +import org.apache.apex.malhar.lib.fs.s3.S3TupleOutputModule.S3BytesOutputModule; +import org.apache.hadoop.conf.Configuration; + +import com.datatorrent.api.DAG; +import com.datatorrent.api.StreamingApplication; +import com.datatorrent.api.annotation.ApplicationAnnotation; + +/** + * Simple application illustrating file copy from S3 + */ +@ApplicationAnnotation(name="s3-output-line") +public class Application implements StreamingApplication +{ + + public void populateDAG(DAG dag, Configuration conf) + { + FSRecordReaderModule recordReader = dag.addModule("lineInput", FSRecordReaderModule.class); + S3BytesOutputModule s3StringOutputModule = dag.addModule("s3TupleOutput", S3BytesOutputModule.class); + dag.addStream("data", recordReader.records, s3StringOutputModule.input); + + } + +} http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3/src/main/resources/META-INF/properties-s3Output.xml ---------------------------------------------------------------------- diff --git a/examples/s3/src/main/resources/META-INF/properties-s3Output.xml b/examples/s3/src/main/resources/META-INF/properties-s3Output.xml new file mode 100644 index 0000000..5a07e12 --- /dev/null +++ b/examples/s3/src/main/resources/META-INF/properties-s3Output.xml @@ -0,0 +1,35 @@ +<?xml version="1.0"?> +<configuration> + <property> + <name>dt.operator.HDFSInputModule.prop.files</name> + <value>hdfs://source-namenode-service/user/dtuser/path-to-input-directory</value> + </property> + <property> + <name>dt.operator.HDFSInputModule.prop.maxReaders</name> + <value>6</value> + </property> + <property> + <name>dt.operator.HDFSInputModule.prop.minReaders</name> + <value>6</value> + </property> + <property> + <name>dt.operator.HDFSInputModule.prop.blocksThreshold</name> + <value>2</value> + </property> + <property> + <name>dt.operator.S3OutputModule.prop.accessKey</name> + <value></value> + </property> + <property> + <name>dt.operator.S3OutputModule.prop.secretAccessKey</name> + <value></value> + </property> + <property> + <name>dt.operator.S3OutputModule.prop.bucketName</name> + <value></value> + </property> + <property> + <name>dt.operator.S3OutputModule.prop.outputDirectoryPath</name> + <value></value> + </property> +</configuration> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3/src/main/resources/META-INF/properties-s3ToHdfsSync.xml ---------------------------------------------------------------------- diff --git a/examples/s3/src/main/resources/META-INF/properties-s3ToHdfsSync.xml b/examples/s3/src/main/resources/META-INF/properties-s3ToHdfsSync.xml new file mode 100644 index 0000000..6d8ecea --- /dev/null +++ b/examples/s3/src/main/resources/META-INF/properties-s3ToHdfsSync.xml @@ -0,0 +1,12 @@ +<?xml version="1.0"?> +<configuration> + <property> + <name>dt.operator.S3InputModule.prop.files</name> + <value>s3n://ACCESS_KEY_ID:SECRET_KEY@BUCKET_NAME/DIRECTORY</value> + </property> + <property> + <name>dt.operator.HDFSFileCopyModule.prop.outputDirectoryPath</name> + <value>hdfs://destination-namenode-service:port/path-to-output-directory</value> + </property> +</configuration> + http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3/src/main/resources/META-INF/properties-s3TupleOutput.xml ---------------------------------------------------------------------- diff --git a/examples/s3/src/main/resources/META-INF/properties-s3TupleOutput.xml b/examples/s3/src/main/resources/META-INF/properties-s3TupleOutput.xml new file mode 100644 index 0000000..c1365bd --- /dev/null +++ b/examples/s3/src/main/resources/META-INF/properties-s3TupleOutput.xml @@ -0,0 +1,47 @@ +<?xml version="1.0"?> +<configuration> + <property> + <name>dt.operator.lineInput.prop.files</name> + <value>/user/appuser/test</value> + </property> + <property> + <name>dt.operator.*.attr.MEMORY_MB</name> + <value>5000</value> + </property> + <property> + <name>dt.operator.s3output.prop.accessKey</name> + <value>ACCESS_KEY_ID</value> + </property> + <property> + <name>dt.operator.s3output.prop.secretAccessKey</name> + <value>SECRET_ACCESS_KEY</value> + </property> + <property> + <name>dt.operator.s3output.prop.bucketName</name> + <value>BUCKET_NAME</value> + </property> + <property> + <name>dt.operator.s3output.prop.outputDirectoryPath</name> + <value>test</value> + </property> + <property> + <name>dt.operator.s3output.prop.maxTuplesPerSecPerPartition</name> + <value>300000</value> + </property> + <property> + <name>dt.operator.s3output.prop.maxS3UploadPartitions</name> + <value>8</value> + </property> + <property> + <name>dt.operator.lineInput.prop.maxReaders</name> + <value>8</value> + </property> + <property> + <name>dt.operator.lineInput.prop.minReaders</name> + <value>1</value> + </property> + <property> + <name>dt.loggers.level</name> + <value>org.apache.apex.malhar.lib.fs.s3.*:DEBUG,org.apache.apex.*:DEBUG,com.datatorrent.stram.plan.physical.*:DEBUG,com.datatorrent.lib.*:DEBUG</value> + </property> +</configuration> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/examples/s3/src/test/resources/log4j.properties b/examples/s3/src/test/resources/log4j.properties new file mode 100644 index 0000000..3bfcdc5 --- /dev/null +++ b/examples/s3/src/test/resources/log4j.properties @@ -0,0 +1,21 @@ +log4j.rootLogger=DEBUG,CONSOLE + +log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender +log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout +log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n + +log4j.appender.RFA=org.apache.log4j.RollingFileAppender +log4j.appender.RFA.layout=org.apache.log4j.PatternLayout +log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n +log4j.appender.RFA.File=/tmp/app.log + +# to enable, add SYSLOG to rootLogger +log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender +log4j.appender.SYSLOG.syslogHost=127.0.0.1 +log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout +log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n +log4j.appender.SYSLOG.Facility=LOCAL1 + +log4j.logger.org=info +#log4j.logger.org.apache.commons.beanutils=warn +log4j.logger.com.datatorrent=debug http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3output/README.md ---------------------------------------------------------------------- diff --git a/examples/s3output/README.md b/examples/s3output/README.md deleted file mode 100644 index fe5d7e4..0000000 --- a/examples/s3output/README.md +++ /dev/null @@ -1,34 +0,0 @@ -Sample application to show how to use the S3OutputModule to upload files into Amazon S3 Bucket. - -Operators in sample application are as follows: -1) FSInputModule which reads files from file systems HDFS/NFS and emits FileMetadata, BlockMetadata, BlockBytes. -2) S3OutputModule which uploads the files into S3 Bucket using multi-part upload feature. - -Please configure the below S3OutputModule properties in src/main/resources/META-INF/properties.xml before launching the application: - -- ***accessKey*** - String - - Specifies the AWS access key to access the Amazon S3 bucket. - -- ***secretAccessKey*** - String - - Specifies the AWS secret access key to access the Amazon S3 bucket. - -For more information about access key and secret access key, Please refer to [IAM](http://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html) - -- ***bucketName*** - String - - Specifies the name of the S3 bucket to copy the files/directories. - -- ***outputDirectoryPath*** - String - - Specifies the path of the output directory to copy the files/directories. - -Suppose, **app.hdfs2s3** is the name of the **bucket** and you want to copy the files to S3 location (app.hdfs2s3/apex/s3output) then configure the properties as below: - -```xml - <property> - <name>dt.operator.S3OutputModule.prop.bucketName</name> - <value>app.hdfs2s3</value> - </property> - <property> - <name>dt.operator.S3OutputModule.prop.outputDirectoryPath</name> - <value>apex/s3output</value> - </property> -``` http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3output/XmlJavadocCommentsExtractor.xsl ---------------------------------------------------------------------- diff --git a/examples/s3output/XmlJavadocCommentsExtractor.xsl b/examples/s3output/XmlJavadocCommentsExtractor.xsl deleted file mode 100644 index 08075a9..0000000 --- a/examples/s3output/XmlJavadocCommentsExtractor.xsl +++ /dev/null @@ -1,44 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - ---> - -<!-- - Document : XmlJavadocCommentsExtractor.xsl - Created on : September 16, 2014, 11:30 AM - Description: - The transformation strips off all information except for comments and tags from xml javadoc generated by xml-doclet. ---> - -<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> - <xsl:output method="xml" standalone="yes"/> - - <!-- copy xml by selecting only the following nodes, attributes and text --> - <xsl:template match="node()|text()|@*"> - <xsl:copy> - <xsl:apply-templates select="root|package|class|interface|method|field|type|comment|tag|text()|@name|@qualified|@text"/> - </xsl:copy> - </xsl:template> - - <!-- Strip off the following paths from the selected xml --> - <xsl:template match="//root/package/interface/interface - |//root/package/interface/method/@qualified - |//root/package/class/interface - |//root/package/class/class - |//root/package/class/method/@qualified - |//root/package/class/field/@qualified" /> - - <xsl:strip-space elements="*"/> -</xsl:stylesheet> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3output/pom.xml ---------------------------------------------------------------------- diff --git a/examples/s3output/pom.xml b/examples/s3output/pom.xml deleted file mode 100644 index c81c6d8..0000000 --- a/examples/s3output/pom.xml +++ /dev/null @@ -1,262 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <groupId>com.example</groupId> - <version>1.0-SNAPSHOT</version> - <artifactId>s3output</artifactId> - <packaging>jar</packaging> - - <name>S3 Output Application</name> - <description>Sample application for S3 output module</description> - - <properties> - <!-- change this if you desire to use a different version of Apex Core --> - <apex.version>3.5.0</apex.version> - <malhar.version>3.7.0-SNAPSHOT</malhar.version> - <apex.apppackage.classpath>lib/*.jar</apex.apppackage.classpath> - </properties> - - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-eclipse-plugin</artifactId> - <version>2.9</version> - <configuration> - <downloadSources>true</downloadSources> - </configuration> - </plugin> - <plugin> - <artifactId>maven-compiler-plugin</artifactId> - <version>3.3</version> - <configuration> - <encoding>UTF-8</encoding> - <source>1.7</source> - <target>1.7</target> - <debug>true</debug> - <optimize>false</optimize> - <showDeprecation>true</showDeprecation> - <showWarnings>true</showWarnings> - </configuration> - </plugin> - <plugin> - <artifactId>maven-dependency-plugin</artifactId> - <version>2.8</version> - <executions> - <execution> - <id>copy-dependencies</id> - <phase>prepare-package</phase> - <goals> - <goal>copy-dependencies</goal> - </goals> - <configuration> - <outputDirectory>target/deps</outputDirectory> - <includeScope>runtime</includeScope> - </configuration> - </execution> - </executions> - </plugin> - - <plugin> - <artifactId>maven-assembly-plugin</artifactId> - <executions> - <execution> - <id>app-package-assembly</id> - <phase>package</phase> - <goals> - <goal>single</goal> - </goals> - <configuration> - <finalName>${project.artifactId}-${project.version}-apexapp</finalName> - <appendAssemblyId>false</appendAssemblyId> - <descriptors> - <descriptor>src/assemble/appPackage.xml</descriptor> - </descriptors> - <archiverConfig> - <defaultDirectoryMode>0755</defaultDirectoryMode> - </archiverConfig> - <archive> - <manifestEntries> - <Class-Path>${apex.apppackage.classpath}</Class-Path> - <DT-Engine-Version>${apex.version}</DT-Engine-Version> - <DT-App-Package-Group-Id>${project.groupId}</DT-App-Package-Group-Id> - <DT-App-Package-Name>${project.artifactId}</DT-App-Package-Name> - <DT-App-Package-Version>${project.version}</DT-App-Package-Version> - <DT-App-Package-Display-Name>${project.name}</DT-App-Package-Display-Name> - <DT-App-Package-Description>${project.description}</DT-App-Package-Description> - </manifestEntries> - </archive> - </configuration> - </execution> - </executions> - </plugin> - - <plugin> - <artifactId>maven-antrun-plugin</artifactId> - <version>1.7</version> - <executions> - <execution> - <phase>package</phase> - <configuration> - <target> - <move file="${project.build.directory}/${project.artifactId}-${project.version}-apexapp.jar" - tofile="${project.build.directory}/${project.artifactId}-${project.version}.apa" /> - </target> - </configuration> - <goals> - <goal>run</goal> - </goals> - </execution> - <execution> - <!-- create resource directory for xml javadoc--> - <id>createJavadocDirectory</id> - <phase>generate-resources</phase> - <configuration> - <tasks> - <delete dir="${project.build.directory}/generated-resources/xml-javadoc"/> - <mkdir dir="${project.build.directory}/generated-resources/xml-javadoc"/> - </tasks> - </configuration> - <goals> - <goal>run</goal> - </goals> - </execution> - </executions> - </plugin> - - <plugin> - <groupId>org.codehaus.mojo</groupId> - <artifactId>build-helper-maven-plugin</artifactId> - <version>1.9.1</version> - <executions> - <execution> - <id>attach-artifacts</id> - <phase>package</phase> - <goals> - <goal>attach-artifact</goal> - </goals> - <configuration> - <artifacts> - <artifact> - <file>target/${project.artifactId}-${project.version}.apa</file> - <type>apa</type> - </artifact> - </artifacts> - <skipAttach>false</skipAttach> - </configuration> - </execution> - </executions> - </plugin> - - <!-- generate javdoc --> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-javadoc-plugin</artifactId> - <executions> - <!-- generate xml javadoc --> - <execution> - <id>xml-doclet</id> - <phase>generate-resources</phase> - <goals> - <goal>javadoc</goal> - </goals> - <configuration> - <doclet>com.github.markusbernhardt.xmldoclet.XmlDoclet</doclet> - <additionalparam>-d ${project.build.directory}/generated-resources/xml-javadoc -filename ${project.artifactId}-${project.version}-javadoc.xml</additionalparam> - <useStandardDocletOptions>false</useStandardDocletOptions> - <docletArtifact> - <groupId>com.github.markusbernhardt</groupId> - <artifactId>xml-doclet</artifactId> - <version>1.0.4</version> - </docletArtifact> - </configuration> - </execution> - </executions> - </plugin> - <!-- Transform xml javadoc to stripped down version containing only class/interface comments and tags--> - <plugin> - <groupId>org.codehaus.mojo</groupId> - <artifactId>xml-maven-plugin</artifactId> - <version>1.0</version> - <executions> - <execution> - <id>transform-xmljavadoc</id> - <phase>generate-resources</phase> - <goals> - <goal>transform</goal> - </goals> - </execution> - </executions> - <configuration> - <transformationSets> - <transformationSet> - <dir>${project.build.directory}/generated-resources/xml-javadoc</dir> - <includes> - <include>${project.artifactId}-${project.version}-javadoc.xml</include> - </includes> - <stylesheet>XmlJavadocCommentsExtractor.xsl</stylesheet> - <outputDir>${project.build.directory}/generated-resources/xml-javadoc</outputDir> - </transformationSet> - </transformationSets> - </configuration> - </plugin> - <!-- copy xml javadoc to class jar --> - <plugin> - <artifactId>maven-resources-plugin</artifactId> - <version>2.6</version> - <executions> - <execution> - <id>copy-resources</id> - <phase>process-resources</phase> - <goals> - <goal>copy-resources</goal> - </goals> - <configuration> - <outputDirectory>${basedir}/target/classes</outputDirectory> - <resources> - <resource> - <directory>${project.build.directory}/generated-resources/xml-javadoc</directory> - <includes> - <include>${project.artifactId}-${project.version}-javadoc.xml</include> - </includes> - <filtering>true</filtering> - </resource> - </resources> - </configuration> - </execution> - </executions> - </plugin> - - </plugins> - - </build> - - <dependencies> - <!-- add your dependencies here --> - <dependency> - <groupId>org.apache.apex</groupId> - <artifactId>malhar-library</artifactId> - <version>${malhar.version}</version> - </dependency> - <dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - <version>4.10</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.apache.apex</groupId> - <artifactId>apex-common</artifactId> - <version>${apex.version}</version> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.apex</groupId> - <artifactId>apex-engine</artifactId> - <version>${apex.version}</version> - <scope>test</scope> - </dependency> - </dependencies> - -</project> http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3output/src/assemble/appPackage.xml ---------------------------------------------------------------------- diff --git a/examples/s3output/src/assemble/appPackage.xml b/examples/s3output/src/assemble/appPackage.xml deleted file mode 100644 index 7ad071c..0000000 --- a/examples/s3output/src/assemble/appPackage.xml +++ /dev/null @@ -1,43 +0,0 @@ -<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd"> - <id>appPackage</id> - <formats> - <format>jar</format> - </formats> - <includeBaseDirectory>false</includeBaseDirectory> - <fileSets> - <fileSet> - <directory>${basedir}/target/</directory> - <outputDirectory>/app</outputDirectory> - <includes> - <include>${project.artifactId}-${project.version}.jar</include> - </includes> - </fileSet> - <fileSet> - <directory>${basedir}/target/deps</directory> - <outputDirectory>/lib</outputDirectory> - </fileSet> - <fileSet> - <directory>${basedir}/src/site/conf</directory> - <outputDirectory>/conf</outputDirectory> - <includes> - <include>*.xml</include> - </includes> - </fileSet> - <fileSet> - <directory>${basedir}/src/main/resources/META-INF</directory> - <outputDirectory>/META-INF</outputDirectory> - </fileSet> - <fileSet> - <directory>${basedir}/src/main/resources/app</directory> - <outputDirectory>/app</outputDirectory> - </fileSet> - <fileSet> - <directory>${basedir}/src/main/resources/resources</directory> - <outputDirectory>/resources</outputDirectory> - </fileSet> - </fileSets> - -</assembly> - http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3output/src/main/java/com/example/s3output/Application.java ---------------------------------------------------------------------- diff --git a/examples/s3output/src/main/java/com/example/s3output/Application.java b/examples/s3output/src/main/java/com/example/s3output/Application.java deleted file mode 100644 index 0b9a0e7..0000000 --- a/examples/s3output/src/main/java/com/example/s3output/Application.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.example.s3output; - -import org.apache.apex.malhar.lib.fs.s3.S3OutputModule; -import org.apache.hadoop.conf.Configuration; - -import com.datatorrent.api.DAG; -import com.datatorrent.api.StreamingApplication; -import com.datatorrent.api.annotation.ApplicationAnnotation; -import com.datatorrent.lib.io.fs.FSInputModule; - -/** - * Application illustrating copy files from HDFS to S3 bucket. - */ -@ApplicationAnnotation(name="HDFSToS3App") -public class Application implements StreamingApplication -{ - @Override - public void populateDAG(DAG dag, Configuration conf) - { - FSInputModule inputModule = dag.addModule("HDFSInputModule", new FSInputModule()); - S3OutputModule outputModule = dag.addModule("S3OutputModule", new S3OutputModule()); - - dag.addStream("FileMetaData", inputModule.filesMetadataOutput, outputModule.filesMetadataInput); - dag.addStream("BlocksMetaData", inputModule.blocksMetadataOutput, outputModule.blocksMetadataInput) - .setLocality(DAG.Locality.CONTAINER_LOCAL); - dag.addStream("BlocksData", inputModule.messages, outputModule.blockData).setLocality(DAG.Locality.CONTAINER_LOCAL); - } -} http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/b25a1281/examples/s3output/src/main/resources/META-INF/properties.xml ---------------------------------------------------------------------- diff --git a/examples/s3output/src/main/resources/META-INF/properties.xml b/examples/s3output/src/main/resources/META-INF/properties.xml deleted file mode 100644 index 5a07e12..0000000 --- a/examples/s3output/src/main/resources/META-INF/properties.xml +++ /dev/null @@ -1,35 +0,0 @@ -<?xml version="1.0"?> -<configuration> - <property> - <name>dt.operator.HDFSInputModule.prop.files</name> - <value>hdfs://source-namenode-service/user/dtuser/path-to-input-directory</value> - </property> - <property> - <name>dt.operator.HDFSInputModule.prop.maxReaders</name> - <value>6</value> - </property> - <property> - <name>dt.operator.HDFSInputModule.prop.minReaders</name> - <value>6</value> - </property> - <property> - <name>dt.operator.HDFSInputModule.prop.blocksThreshold</name> - <value>2</value> - </property> - <property> - <name>dt.operator.S3OutputModule.prop.accessKey</name> - <value></value> - </property> - <property> - <name>dt.operator.S3OutputModule.prop.secretAccessKey</name> - <value></value> - </property> - <property> - <name>dt.operator.S3OutputModule.prop.bucketName</name> - <value></value> - </property> - <property> - <name>dt.operator.S3OutputModule.prop.outputDirectoryPath</name> - <value></value> - </property> -</configuration>
