http://git-wip-us.apache.org/repos/asf/hadoop/blob/6cf023f9/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_3.1.0.xml ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_3.1.0.xml b/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_3.1.0.xml new file mode 100644 index 0000000..f4762d9 --- /dev/null +++ b/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_3.1.0.xml @@ -0,0 +1,28075 @@ +<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> +<!-- Generated by the JDiff Javadoc doclet --> +<!-- (http://www.jdiff.org) --> +<!-- on Fri Mar 30 00:32:28 UTC 2018 --> + +<api + xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' + xsi:noNamespaceSchemaLocation='api.xsd' + name="Apache Hadoop MapReduce Core 3.1.0" + jdversion="1.0.9"> + +<!-- Command line arguments = -doclet org.apache.hadoop.classification.tools.IncludePublicAnnotationsJDiffDoclet -docletpath /build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/hadoop-annotations.jar:/build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/jdiff.jar -verbose -classpath /build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/classes:/build/source/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/target/hadoop-yarn-client-3.1.0.jar:/maven/log4j/log4j/1.2.17/log4j-1.2.17.jar:/build/source/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/target/hadoop-yarn-api-3.1.0.jar:/build/source/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/target/hadoop-yarn-common-3.1.0.jar:/build/source/hadoop-common-project/hadoop-auth/target/hadoop-auth-3.1.0.jar:/maven/com/nimbusds/nimbus-jose-jwt/4.41.1/nimbus-jose-jwt-4.41.1.jar:/maven/com/github/stephen c/jcip/jcip-annotations/1.0-1/jcip-annotations-1.0-1.jar:/maven/net/minidev/json-smart/2.3/json-smart-2.3.jar:/maven/net/minidev/accessors-smart/1.2/accessors-smart-1.2.jar:/maven/org/ow2/asm/asm/5.0.4/asm-5.0.4.jar:/maven/org/apache/curator/curator-framework/2.12.0/curator-framework-2.12.0.jar:/maven/javax/xml/bind/jaxb-api/2.2.11/jaxb-api-2.2.11.jar:/maven/org/apache/commons/commons-compress/1.4.1/commons-compress-1.4.1.jar:/maven/org/tukaani/xz/1.0/xz-1.0.jar:/maven/javax/servlet/javax.servlet-api/3.1.0/javax.servlet-api-3.1.0.jar:/maven/org/eclipse/jetty/jetty-util/9.3.19.v20170502/jetty-util-9.3.19.v20170502.jar:/maven/com/sun/jersey/jersey-core/1.19/jersey-core-1.19.jar:/maven/javax/ws/rs/jsr311-api/1.1.1/jsr311-api-1.1.1.jar:/maven/com/sun/jersey/jersey-client/1.19/jersey-client-1.19.jar:/maven/commons-io/commons-io/2.5/commons-io-2.5.jar:/maven/com/google/inject/guice/4.0/guice-4.0.jar:/maven/javax/inject/javax.inject/1/javax.inject-1.jar:/maven/aopalliance/aopalliance/1.0/a opalliance-1.0.jar:/maven/com/sun/jersey/jersey-server/1.19/jersey-server-1.19.jar:/maven/com/sun/jersey/jersey-json/1.19/jersey-json-1.19.jar:/maven/org/codehaus/jettison/jettison/1.1/jettison-1.1.jar:/maven/com/sun/xml/bind/jaxb-impl/2.2.3-1/jaxb-impl-2.2.3-1.jar:/maven/org/codehaus/jackson/jackson-jaxrs/1.9.13/jackson-jaxrs-1.9.13.jar:/maven/org/codehaus/jackson/jackson-xc/1.9.13/jackson-xc-1.9.13.jar:/maven/com/sun/jersey/contribs/jersey-guice/1.19/jersey-guice-1.19.jar:/maven/com/fasterxml/jackson/core/jackson-core/2.7.8/jackson-core-2.7.8.jar:/maven/com/fasterxml/jackson/module/jackson-module-jaxb-annotations/2.7.8/jackson-module-jaxb-annotations-2.7.8.jar:/maven/com/fasterxml/jackson/jaxrs/jackson-jaxrs-json-provider/2.7.8/jackson-jaxrs-json-provider-2.7.8.jar:/maven/com/fasterxml/jackson/jaxrs/jackson-jaxrs-base/2.7.8/jackson-jaxrs-base-2.7.8.jar:/build/source/hadoop-hdfs-project/hadoop-hdfs-client/target/hadoop-hdfs-client-3.1.0.jar:/maven/com/squareup/okhttp/okhttp/2.7.5/o khttp-2.7.5.jar:/maven/com/squareup/okio/okio/1.6.0/okio-1.6.0.jar:/maven/com/fasterxml/jackson/core/jackson-annotations/2.7.8/jackson-annotations-2.7.8.jar:/maven/org/eclipse/jetty/jetty-server/9.3.19.v20170502/jetty-server-9.3.19.v20170502.jar:/maven/org/eclipse/jetty/jetty-http/9.3.19.v20170502/jetty-http-9.3.19.v20170502.jar:/maven/org/eclipse/jetty/jetty-io/9.3.19.v20170502/jetty-io-9.3.19.v20170502.jar:/maven/org/apache/htrace/htrace-core4/4.1.0-incubating/htrace-core4-4.1.0-incubating.jar:/maven/com/fasterxml/jackson/core/jackson-databind/2.7.8/jackson-databind-2.7.8.jar:/maven/com/google/protobuf/protobuf-java/2.5.0/protobuf-java-2.5.0.jar:/maven/org/apache/avro/avro/1.7.7/avro-1.7.7.jar:/maven/org/codehaus/jackson/jackson-core-asl/1.9.13/jackson-core-asl-1.9.13.jar:/maven/org/codehaus/jackson/jackson-mapper-asl/1.9.13/jackson-mapper-asl-1.9.13.jar:/maven/com/thoughtworks/paranamer/paranamer/2.3/paranamer-2.3.jar:/maven/org/xerial/snappy/snappy-java/1.0.5/snappy-java-1.0.5.j ar:/build/source/hadoop-common-project/hadoop-common/target/hadoop-common-3.1.0.jar:/maven/org/apache/commons/commons-math3/3.1.1/commons-math3-3.1.1.jar:/maven/org/apache/httpcomponents/httpclient/4.5.2/httpclient-4.5.2.jar:/maven/org/apache/httpcomponents/httpcore/4.4.4/httpcore-4.4.4.jar:/maven/commons-net/commons-net/3.6/commons-net-3.6.jar:/maven/org/eclipse/jetty/jetty-servlet/9.3.19.v20170502/jetty-servlet-9.3.19.v20170502.jar:/maven/org/eclipse/jetty/jetty-security/9.3.19.v20170502/jetty-security-9.3.19.v20170502.jar:/maven/org/eclipse/jetty/jetty-webapp/9.3.19.v20170502/jetty-webapp-9.3.19.v20170502.jar:/maven/org/eclipse/jetty/jetty-xml/9.3.19.v20170502/jetty-xml-9.3.19.v20170502.jar:/maven/javax/servlet/jsp/jsp-api/2.1/jsp-api-2.1.jar:/maven/com/sun/jersey/jersey-servlet/1.19/jersey-servlet-1.19.jar:/maven/commons-beanutils/commons-beanutils/1.9.3/commons-beanutils-1.9.3.jar:/maven/org/apache/commons/commons-configuration2/2.1.1/commons-configuration2-2.1.1.jar:/maven/org /apache/commons/commons-lang3/3.4/commons-lang3-3.4.jar:/maven/com/google/re2j/re2j/1.1/re2j-1.1.jar:/maven/com/google/code/gson/gson/2.2.4/gson-2.2.4.jar:/maven/com/jcraft/jsch/0.1.54/jsch-0.1.54.jar:/maven/org/apache/curator/curator-client/2.12.0/curator-client-2.12.0.jar:/maven/org/apache/curator/curator-recipes/2.12.0/curator-recipes-2.12.0.jar:/maven/com/google/code/findbugs/jsr305/3.0.0/jsr305-3.0.0.jar:/maven/org/apache/zookeeper/zookeeper/3.4.9/zookeeper-3.4.9.jar:/maven/org/apache/kerby/kerb-simplekdc/1.0.1/kerb-simplekdc-1.0.1.jar:/maven/org/apache/kerby/kerb-client/1.0.1/kerb-client-1.0.1.jar:/maven/org/apache/kerby/kerby-config/1.0.1/kerby-config-1.0.1.jar:/maven/org/apache/kerby/kerb-core/1.0.1/kerb-core-1.0.1.jar:/maven/org/apache/kerby/kerby-pkix/1.0.1/kerby-pkix-1.0.1.jar:/maven/org/apache/kerby/kerby-asn1/1.0.1/kerby-asn1-1.0.1.jar:/maven/org/apache/kerby/kerby-util/1.0.1/kerby-util-1.0.1.jar:/maven/org/apache/kerby/kerb-common/1.0.1/kerb-common-1.0.1.jar:/maven/org /apache/kerby/kerb-crypto/1.0.1/kerb-crypto-1.0.1.jar:/maven/org/apache/kerby/kerb-util/1.0.1/kerb-util-1.0.1.jar:/maven/org/apache/kerby/token-provider/1.0.1/token-provider-1.0.1.jar:/maven/org/apache/kerby/kerb-admin/1.0.1/kerb-admin-1.0.1.jar:/maven/org/apache/kerby/kerb-server/1.0.1/kerb-server-1.0.1.jar:/maven/org/apache/kerby/kerb-identity/1.0.1/kerb-identity-1.0.1.jar:/maven/org/apache/kerby/kerby-xdr/1.0.1/kerby-xdr-1.0.1.jar:/maven/org/codehaus/woodstox/stax2-api/3.1.4/stax2-api-3.1.4.jar:/maven/com/fasterxml/woodstox/woodstox-core/5.0.3/woodstox-core-5.0.3.jar:/maven/org/slf4j/slf4j-api/1.7.25/slf4j-api-1.7.25.jar:/maven/org/slf4j/slf4j-log4j12/1.7.25/slf4j-log4j12-1.7.25.jar:/build/source/hadoop-common-project/hadoop-annotations/target/hadoop-annotations-3.1.0.jar:/usr/lib/jvm/java-8-openjdk-amd64/lib/tools.jar:/maven/com/google/inject/extensions/guice-servlet/4.0/guice-servlet-4.0.jar:/maven/io/netty/netty/3.10.5.Final/netty-3.10.5.Final.jar:/maven/commons-logging/common s-logging/1.1.3/commons-logging-1.1.3.jar:/maven/com/google/guava/guava/11.0.2/guava-11.0.2.jar:/maven/commons-codec/commons-codec/1.11/commons-codec-1.11.jar:/maven/commons-cli/commons-cli/1.2/commons-cli-1.2.jar:/maven/commons-lang/commons-lang/2.6/commons-lang-2.6.jar:/maven/commons-collections/commons-collections/3.2.2/commons-collections-3.2.2.jar:/maven/xerces/xercesImpl/2.11.0/xercesImpl-2.11.0.jar:/maven/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar -sourcepath /build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java -doclet org.apache.hadoop.classification.tools.IncludePublicAnnotationsJDiffDoclet -docletpath /build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/hadoop-annotations.jar:/build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/jdiff.jar -apidir /build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-clie nt-core/target/site/jdiff/xml -apiname Apache Hadoop MapReduce Core 3.1.0 --> +<package name="org.apache.hadoop.filecache"> + <!-- start class org.apache.hadoop.filecache.DistributedCache --> + <class name="DistributedCache" extends="org.apache.hadoop.mapreduce.filecache.DistributedCache" + abstract="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <constructor name="DistributedCache" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </constructor> + <method name="addLocalArchives" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.conf.Configuration"/> + <param name="str" type="java.lang.String"/> + <doc> + <![CDATA[Add a archive that has been localized to the conf. Used + by internal DistributedCache code. + @param conf The conf to modify to contain the localized caches + @param str a comma separated list of local archives]]> + </doc> + </method> + <method name="addLocalFiles" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.conf.Configuration"/> + <param name="str" type="java.lang.String"/> + <doc> + <![CDATA[Add a file that has been localized to the conf.. Used + by internal DistributedCache code. + @param conf The conf to modify to contain the localized caches + @param str a comma separated list of local files]]> + </doc> + </method> + <method name="createAllSymlink" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="Internal to MapReduce framework. Use DistributedCacheManager + instead."> + <param name="conf" type="org.apache.hadoop.conf.Configuration"/> + <param name="jobCacheDir" type="java.io.File"/> + <param name="workDir" type="java.io.File"/> + <exception name="IOException" type="java.io.IOException"/> + <doc> + <![CDATA[This method create symlinks for all files in a given dir in another + directory. Currently symlinks cannot be disabled. This is a NO-OP. + + @param conf the configuration + @param jobCacheDir the target directory for creating symlinks + @param workDir the directory in which the symlinks are created + @throws IOException + @deprecated Internal to MapReduce framework. Use DistributedCacheManager + instead.]]> + </doc> + </method> + <method name="getFileStatus" return="org.apache.hadoop.fs.FileStatus" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.conf.Configuration"/> + <param name="cache" type="java.net.URI"/> + <exception name="IOException" type="java.io.IOException"/> + <doc> + <![CDATA[Returns {@link FileStatus} of a given cache file on hdfs. Internal to + MapReduce. + @param conf configuration + @param cache cache file + @return <code>FileStatus</code> of a given cache file on hdfs + @throws IOException]]> + </doc> + </method> + <method name="getTimestamp" return="long" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.conf.Configuration"/> + <param name="cache" type="java.net.URI"/> + <exception name="IOException" type="java.io.IOException"/> + <doc> + <![CDATA[Returns mtime of a given cache file on hdfs. Internal to MapReduce. + @param conf configuration + @param cache cache file + @return mtime of a given cache file on hdfs + @throws IOException]]> + </doc> + </method> + <method name="setArchiveTimestamps" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.conf.Configuration"/> + <param name="timestamps" type="java.lang.String"/> + <doc> + <![CDATA[This is to check the timestamp of the archives to be localized. + Used by internal MapReduce code. + @param conf Configuration which stores the timestamp's + @param timestamps comma separated list of timestamps of archives. + The order should be the same as the order in which the archives are added.]]> + </doc> + </method> + <method name="setFileTimestamps" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.conf.Configuration"/> + <param name="timestamps" type="java.lang.String"/> + <doc> + <![CDATA[This is to check the timestamp of the files to be localized. + Used by internal MapReduce code. + @param conf Configuration which stores the timestamp's + @param timestamps comma separated list of timestamps of files. + The order should be the same as the order in which the files are added.]]> + </doc> + </method> + <method name="setLocalArchives" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.conf.Configuration"/> + <param name="str" type="java.lang.String"/> + <doc> + <![CDATA[Set the conf to contain the location for localized archives. Used + by internal DistributedCache code. + @param conf The conf to modify to contain the localized caches + @param str a comma separated list of local archives]]> + </doc> + </method> + <method name="setLocalFiles" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.conf.Configuration"/> + <param name="str" type="java.lang.String"/> + <doc> + <![CDATA[Set the conf to contain the location for localized files. Used + by internal DistributedCache code. + @param conf The conf to modify to contain the localized caches + @param str a comma separated list of local files]]> + </doc> + </method> + <field name="CACHE_FILES_SIZES" type="java.lang.String" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Warning: {@link #CACHE_FILES_SIZES} is not a *public* constant. + The variable is kept for M/R 1.x applications, M/R 2.x applications should + use {@link MRJobConfig#CACHE_FILES_SIZES}]]> + </doc> + </field> + <field name="CACHE_ARCHIVES_SIZES" type="java.lang.String" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Warning: {@link #CACHE_ARCHIVES_SIZES} is not a *public* constant. + The variable is kept for M/R 1.x applications, M/R 2.x applications should + use {@link MRJobConfig#CACHE_ARCHIVES_SIZES}]]> + </doc> + </field> + <field name="CACHE_ARCHIVES_TIMESTAMPS" type="java.lang.String" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Warning: {@link #CACHE_ARCHIVES_TIMESTAMPS} is not a *public* constant. + The variable is kept for M/R 1.x applications, M/R 2.x applications should + use {@link MRJobConfig#CACHE_ARCHIVES_TIMESTAMPS}]]> + </doc> + </field> + <field name="CACHE_FILES_TIMESTAMPS" type="java.lang.String" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Warning: {@link #CACHE_FILES_TIMESTAMPS} is not a *public* constant. + The variable is kept for M/R 1.x applications, M/R 2.x applications should + use {@link MRJobConfig#CACHE_FILE_TIMESTAMPS}]]> + </doc> + </field> + <field name="CACHE_ARCHIVES" type="java.lang.String" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Warning: {@link #CACHE_ARCHIVES} is not a *public* constant. + The variable is kept for M/R 1.x applications, M/R 2.x applications should + use {@link MRJobConfig#CACHE_ARCHIVES}]]> + </doc> + </field> + <field name="CACHE_FILES" type="java.lang.String" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Warning: {@link #CACHE_FILES} is not a *public* constant. + The variable is kept for M/R 1.x applications, M/R 2.x applications should + use {@link MRJobConfig#CACHE_FILES}]]> + </doc> + </field> + <field name="CACHE_LOCALARCHIVES" type="java.lang.String" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Warning: {@link #CACHE_LOCALARCHIVES} is not a *public* constant. + The variable is kept for M/R 1.x applications, M/R 2.x applications should + use {@link MRJobConfig#CACHE_LOCALARCHIVES}]]> + </doc> + </field> + <field name="CACHE_LOCALFILES" type="java.lang.String" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Warning: {@link #CACHE_LOCALFILES} is not a *public* constant. + The variable is kept for M/R 1.x applications, M/R 2.x applications should + use {@link MRJobConfig#CACHE_LOCALFILES}]]> + </doc> + </field> + <field name="CACHE_SYMLINK" type="java.lang.String" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Warning: {@link #CACHE_SYMLINK} is not a *public* constant. + The variable is kept for M/R 1.x applications, M/R 2.x applications should + use {@link MRJobConfig#CACHE_SYMLINK}]]> + </doc> + </field> + <doc> + <![CDATA[Distribute application-specific large, read-only files efficiently. + + <p><code>DistributedCache</code> is a facility provided by the Map-Reduce + framework to cache files (text, archives, jars etc.) needed by applications. + </p> + + <p>Applications specify the files, via urls (hdfs:// or http://) to be cached + via the {@link org.apache.hadoop.mapred.JobConf}. The + <code>DistributedCache</code> assumes that the files specified via urls are + already present on the {@link FileSystem} at the path specified by the url + and are accessible by every machine in the cluster.</p> + + <p>The framework will copy the necessary files on to the worker node before + any tasks for the job are executed on that node. Its efficiency stems from + the fact that the files are only copied once per job and the ability to + cache archives which are un-archived on the workers.</p> + + <p><code>DistributedCache</code> can be used to distribute simple, read-only + data/text files and/or more complex types such as archives, jars etc. + Archives (zip, tar and tgz/tar.gz files) are un-archived at the worker nodes. + Jars may be optionally added to the classpath of the tasks, a rudimentary + software distribution mechanism. Files have execution permissions. + In older version of Hadoop Map/Reduce users could optionally ask for symlinks + to be created in the working directory of the child task. In the current + version symlinks are always created. If the URL does not have a fragment + the name of the file or directory will be used. If multiple files or + directories map to the same link name, the last one added, will be used. All + others will not even be downloaded.</p> + + <p><code>DistributedCache</code> tracks modification timestamps of the cache + files. Clearly the cache files should not be modified by the application + or externally while the job is executing.</p> + + <p>Here is an illustrative example on how to use the + <code>DistributedCache</code>:</p> + <p><blockquote><pre> + // Setting up the cache for the application + + 1. Copy the requisite files to the <code>FileSystem</code>: + + $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat + $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip + $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar + $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar + $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz + $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz + + 2. Setup the application's <code>JobConf</code>: + + JobConf job = new JobConf(); + DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), + job); + DistributedCache.addCacheArchive(new URI("/myapp/map.zip"), job); + DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job); + DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar"), job); + DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz"), job); + DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz"), job); + + 3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper} + or {@link org.apache.hadoop.mapred.Reducer}: + + public static class MapClass extends MapReduceBase + implements Mapper<K, V, K, V> { + + private Path[] localArchives; + private Path[] localFiles; + + public void configure(JobConf job) { + // Get the cached archives/files + File f = new File("./map.zip/some/file/in/zip.txt"); + } + + public void map(K key, V value, + OutputCollector<K, V> output, Reporter reporter) + throws IOException { + // Use data from the cached archives/files here + // ... + // ... + output.collect(k, v); + } + } + + </pre></blockquote> + + It is also very common to use the DistributedCache by using + {@link org.apache.hadoop.util.GenericOptionsParser}. + + This class includes methods that should be used by users + (specifically those mentioned in the example above, as well + as {@link DistributedCache#addArchiveToClassPath(Path, Configuration)}), + as well as methods intended for use by the MapReduce framework + (e.g., {@link org.apache.hadoop.mapred.JobClient}). + + @see org.apache.hadoop.mapred.JobConf + @see org.apache.hadoop.mapred.JobClient + @see org.apache.hadoop.mapreduce.Job]]> + </doc> + </class> + <!-- end class org.apache.hadoop.filecache.DistributedCache --> +</package> +<package name="org.apache.hadoop.mapred"> + <!-- start class org.apache.hadoop.mapred.ClusterStatus --> + <class name="ClusterStatus" extends="java.lang.Object" + abstract="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <implements name="org.apache.hadoop.io.Writable"/> + <method name="getTaskTrackers" return="int" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Get the number of task trackers in the cluster. + + @return the number of task trackers in the cluster.]]> + </doc> + </method> + <method name="getActiveTrackerNames" return="java.util.Collection" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Get the names of task trackers in the cluster. + + @return the active task trackers in the cluster.]]> + </doc> + </method> + <method name="getBlacklistedTrackerNames" return="java.util.Collection" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Get the names of task trackers in the cluster. + + @return the blacklisted task trackers in the cluster.]]> + </doc> + </method> + <method name="getGraylistedTrackerNames" return="java.util.Collection" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Get the names of graylisted task trackers in the cluster. + + The gray list of trackers is no longer available on M/R 2.x. The function + is kept to be compatible with M/R 1.x applications. + + @return an empty graylisted task trackers in the cluster.]]> + </doc> + </method> + <method name="getGraylistedTrackers" return="int" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Get the number of graylisted task trackers in the cluster. + + The gray list of trackers is no longer available on M/R 2.x. The function + is kept to be compatible with M/R 1.x applications. + + @return 0 graylisted task trackers in the cluster.]]> + </doc> + </method> + <method name="getBlacklistedTrackers" return="int" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Get the number of blacklisted task trackers in the cluster. + + @return the number of blacklisted task trackers in the cluster.]]> + </doc> + </method> + <method name="getNumExcludedNodes" return="int" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Get the number of excluded hosts in the cluster. + @return the number of excluded hosts in the cluster.]]> + </doc> + </method> + <method name="getTTExpiryInterval" return="long" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Get the tasktracker expiry interval for the cluster + @return the expiry interval in msec]]> + </doc> + </method> + <method name="getMapTasks" return="int" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Get the number of currently running map tasks in the cluster. + + @return the number of currently running map tasks in the cluster.]]> + </doc> + </method> + <method name="getReduceTasks" return="int" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Get the number of currently running reduce tasks in the cluster. + + @return the number of currently running reduce tasks in the cluster.]]> + </doc> + </method> + <method name="getMaxMapTasks" return="int" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Get the maximum capacity for running map tasks in the cluster. + + @return the maximum capacity for running map tasks in the cluster.]]> + </doc> + </method> + <method name="getMaxReduceTasks" return="int" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Get the maximum capacity for running reduce tasks in the cluster. + + @return the maximum capacity for running reduce tasks in the cluster.]]> + </doc> + </method> + <method name="getJobTrackerStatus" return="org.apache.hadoop.mapreduce.Cluster.JobTrackerStatus" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Get the JobTracker's status. + + @return {@link JobTrackerStatus} of the JobTracker]]> + </doc> + </method> + <method name="getMaxMemory" return="long" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Returns UNINITIALIZED_MEMORY_VALUE (-1)]]> + </doc> + </method> + <method name="getUsedMemory" return="long" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Returns UNINITIALIZED_MEMORY_VALUE (-1)]]> + </doc> + </method> + <method name="getBlackListedTrackersInfo" return="java.util.Collection" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Gets the list of blacklisted trackers along with reasons for blacklisting. + + @return the collection of {@link BlackListInfo} objects.]]> + </doc> + </method> + <method name="getJobTrackerState" return="org.apache.hadoop.mapred.JobTracker.State" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Get the current state of the <code>JobTracker</code>, + as {@link JobTracker.State} + + {@link JobTracker.State} should no longer be used on M/R 2.x. The function + is kept to be compatible with M/R 1.x applications. + + @return the invalid state of the <code>JobTracker</code>.]]> + </doc> + </method> + <method name="write" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="out" type="java.io.DataOutput"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="readFields" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="in" type="java.io.DataInput"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <field name="UNINITIALIZED_MEMORY_VALUE" type="long" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + </field> + <doc> + <![CDATA[Status information on the current state of the Map-Reduce cluster. + + <p><code>ClusterStatus</code> provides clients with information such as: + <ol> + <li> + Size of the cluster. + </li> + <li> + Name of the trackers. + </li> + <li> + Task capacity of the cluster. + </li> + <li> + The number of currently running map and reduce tasks. + </li> + <li> + State of the <code>JobTracker</code>. + </li> + <li> + Details regarding black listed trackers. + </li> + </ol> + + <p>Clients can query for the latest <code>ClusterStatus</code>, via + {@link JobClient#getClusterStatus()}.</p> + + @see JobClient]]> + </doc> + </class> + <!-- end class org.apache.hadoop.mapred.ClusterStatus --> + <!-- start class org.apache.hadoop.mapred.Counters --> + <class name="Counters" extends="org.apache.hadoop.mapreduce.counters.AbstractCounters" + abstract="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <constructor name="Counters" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </constructor> + <constructor name="Counters" type="org.apache.hadoop.mapreduce.Counters" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </constructor> + <method name="getGroup" return="org.apache.hadoop.mapred.Counters.Group" + abstract="false" native="false" synchronized="true" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="groupName" type="java.lang.String"/> + </method> + <method name="getGroupNames" return="java.util.Collection" + abstract="false" native="false" synchronized="true" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </method> + <method name="makeCompactString" return="java.lang.String" + abstract="false" native="false" synchronized="true" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </method> + <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" + abstract="false" native="false" synchronized="true" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="group" type="java.lang.String"/> + <param name="name" type="java.lang.String"/> + </method> + <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="use {@link #findCounter(String, String)} instead"> + <param name="group" type="java.lang.String"/> + <param name="id" type="int"/> + <param name="name" type="java.lang.String"/> + <doc> + <![CDATA[Find a counter by using strings + @param group the name of the group + @param id the id of the counter within the group (0 to N-1) + @param name the internal name of the counter + @return the counter for that name + @deprecated use {@link #findCounter(String, String)} instead]]> + </doc> + </method> + <method name="incrCounter" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="key" type="java.lang.Enum"/> + <param name="amount" type="long"/> + <doc> + <![CDATA[Increments the specified counter by the specified amount, creating it if + it didn't already exist. + @param key identifies a counter + @param amount amount by which counter is to be incremented]]> + </doc> + </method> + <method name="incrCounter" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="group" type="java.lang.String"/> + <param name="counter" type="java.lang.String"/> + <param name="amount" type="long"/> + <doc> + <![CDATA[Increments the specified counter by the specified amount, creating it if + it didn't already exist. + @param group the name of the group + @param counter the internal name of the counter + @param amount amount by which counter is to be incremented]]> + </doc> + </method> + <method name="getCounter" return="long" + abstract="false" native="false" synchronized="true" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="key" type="java.lang.Enum"/> + <doc> + <![CDATA[Returns current value of the specified counter, or 0 if the counter + does not exist. + @param key the counter enum to lookup + @return the counter value or 0 if counter not found]]> + </doc> + </method> + <method name="incrAllCounters" + abstract="false" native="false" synchronized="true" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="other" type="org.apache.hadoop.mapred.Counters"/> + <doc> + <![CDATA[Increments multiple counters by their amounts in another Counters + instance. + @param other the other Counters instance]]> + </doc> + </method> + <method name="size" return="int" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="use {@link #countCounters()} instead"> + <doc> + <![CDATA[@return the total number of counters + @deprecated use {@link #countCounters()} instead]]> + </doc> + </method> + <method name="sum" return="org.apache.hadoop.mapred.Counters" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="a" type="org.apache.hadoop.mapred.Counters"/> + <param name="b" type="org.apache.hadoop.mapred.Counters"/> + <doc> + <![CDATA[Convenience method for computing the sum of two sets of counters. + @param a the first counters + @param b the second counters + @return a new summed counters object]]> + </doc> + </method> + <method name="log" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="log" type="org.slf4j.Logger"/> + <doc> + <![CDATA[Logs the current counter values. + @param log The log to use.]]> + </doc> + </method> + <method name="makeEscapedCompactString" return="java.lang.String" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Represent the counter in a textual format that can be converted back to + its object form + @return the string in the following format + {(groupName)(group-displayName)[(counterName)(displayName)(value)][]*}*]]> + </doc> + </method> + <method name="fromEscapedCompactString" return="org.apache.hadoop.mapred.Counters" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="compactString" type="java.lang.String"/> + <exception name="ParseException" type="java.text.ParseException"/> + <doc> + <![CDATA[Convert a stringified (by {@link #makeEscapedCompactString()} counter + representation into a counter object. + @param compactString to parse + @return a new counters object + @throws ParseException]]> + </doc> + </method> + <field name="MAX_COUNTER_LIMIT" type="int" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + </field> + <field name="MAX_GROUP_LIMIT" type="int" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + </field> + <doc> + <![CDATA[A set of named counters. + + <p><code>Counters</code> represent global counters, defined either by the + Map-Reduce framework or applications. Each <code>Counter</code> can be of + any {@link Enum} type.</p> + + <p><code>Counters</code> are bunched into {@link Group}s, each comprising of + counters from a particular <code>Enum</code> class.]]> + </doc> + </class> + <!-- end class org.apache.hadoop.mapred.Counters --> + <!-- start class org.apache.hadoop.mapred.Counters.Counter --> + <class name="Counters.Counter" extends="java.lang.Object" + abstract="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <implements name="org.apache.hadoop.mapreduce.Counter"/> + <constructor name="Counter" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </constructor> + <method name="setDisplayName" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="displayName" type="java.lang.String"/> + </method> + <method name="getName" return="java.lang.String" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </method> + <method name="getDisplayName" return="java.lang.String" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </method> + <method name="getValue" return="long" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </method> + <method name="setValue" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="value" type="long"/> + </method> + <method name="increment" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="incr" type="long"/> + </method> + <method name="write" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="out" type="java.io.DataOutput"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="readFields" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="in" type="java.io.DataInput"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="makeEscapedCompactString" return="java.lang.String" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Returns the compact stringified version of the counter in the format + [(actual-name)(display-name)(value)] + @return the stringified result]]> + </doc> + </method> + <method name="contentEquals" return="boolean" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="deprecated, no comment"> + <param name="counter" type="org.apache.hadoop.mapred.Counters.Counter"/> + <doc> + <![CDATA[Checks for (content) equality of two (basic) counters + @param counter to compare + @return true if content equals + @deprecated]]> + </doc> + </method> + <method name="getCounter" return="long" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[@return the value of the counter]]> + </doc> + </method> + <method name="getUnderlyingCounter" return="org.apache.hadoop.mapreduce.Counter" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </method> + <method name="equals" return="boolean" + abstract="false" native="false" synchronized="true" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="genericRight" type="java.lang.Object"/> + </method> + <method name="hashCode" return="int" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </method> + <doc> + <![CDATA[A counter record, comprising its name and value.]]> + </doc> + </class> + <!-- end class org.apache.hadoop.mapred.Counters.Counter --> + <!-- start class org.apache.hadoop.mapred.Counters.Group --> + <class name="Counters.Group" extends="java.lang.Object" + abstract="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <implements name="org.apache.hadoop.mapreduce.counters.CounterGroupBase"/> + <constructor name="Group" + static="false" final="false" visibility="protected" + deprecated="not deprecated"> + </constructor> + <method name="getCounter" return="long" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="counterName" type="java.lang.String"/> + <doc> + <![CDATA[@param counterName the name of the counter + @return the value of the specified counter, or 0 if the counter does + not exist.]]> + </doc> + </method> + <method name="makeEscapedCompactString" return="java.lang.String" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[@return the compact stringified version of the group in the format + {(actual-name)(display-name)(value)[][][]} where [] are compact strings + for the counters within.]]> + </doc> + </method> + <method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="use {@link #findCounter(String)} instead"> + <param name="id" type="int"/> + <param name="name" type="java.lang.String"/> + <doc> + <![CDATA[Get the counter for the given id and create it if it doesn't exist. + @param id the numeric id of the counter within the group + @param name the internal counter name + @return the counter + @deprecated use {@link #findCounter(String)} instead]]> + </doc> + </method> + <method name="getCounterForName" return="org.apache.hadoop.mapred.Counters.Counter" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="name" type="java.lang.String"/> + <doc> + <![CDATA[Get the counter for the given name and create it if it doesn't exist. + @param name the internal counter name + @return the counter]]> + </doc> + </method> + <method name="write" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="out" type="java.io.DataOutput"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="readFields" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="in" type="java.io.DataInput"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="iterator" return="java.util.Iterator" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </method> + <method name="getName" return="java.lang.String" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </method> + <method name="getDisplayName" return="java.lang.String" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </method> + <method name="setDisplayName" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="displayName" type="java.lang.String"/> + </method> + <method name="addCounter" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="counter" type="org.apache.hadoop.mapred.Counters.Counter"/> + </method> + <method name="addCounter" return="org.apache.hadoop.mapred.Counters.Counter" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="name" type="java.lang.String"/> + <param name="displayName" type="java.lang.String"/> + <param name="value" type="long"/> + </method> + <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="counterName" type="java.lang.String"/> + <param name="displayName" type="java.lang.String"/> + </method> + <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="counterName" type="java.lang.String"/> + <param name="create" type="boolean"/> + </method> + <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="counterName" type="java.lang.String"/> + </method> + <method name="size" return="int" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </method> + <method name="incrAllCounters" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="rightGroup" type="org.apache.hadoop.mapreduce.counters.CounterGroupBase"/> + </method> + <method name="getUnderlyingGroup" return="org.apache.hadoop.mapreduce.counters.CounterGroupBase" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </method> + <method name="equals" return="boolean" + abstract="false" native="false" synchronized="true" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="genericRight" type="java.lang.Object"/> + </method> + <method name="hashCode" return="int" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </method> + <doc> + <![CDATA[<code>Group</code> of counters, comprising of counters from a particular + counter {@link Enum} class. + + <p><code>Group</code>handles localization of the class name and the + counter names.</p>]]> + </doc> + </class> + <!-- end class org.apache.hadoop.mapred.Counters.Group --> + <!-- start class org.apache.hadoop.mapred.FileAlreadyExistsException --> + <class name="FileAlreadyExistsException" extends="java.io.IOException" + abstract="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <constructor name="FileAlreadyExistsException" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </constructor> + <constructor name="FileAlreadyExistsException" type="java.lang.String" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </constructor> + <doc> + <![CDATA[Used when target file already exists for any operation and + is not configured to be overwritten.]]> + </doc> + </class> + <!-- end class org.apache.hadoop.mapred.FileAlreadyExistsException --> + <!-- start class org.apache.hadoop.mapred.FileInputFormat --> + <class name="FileInputFormat" extends="java.lang.Object" + abstract="true" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <implements name="org.apache.hadoop.mapred.InputFormat"/> + <constructor name="FileInputFormat" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </constructor> + <method name="setMinSplitSize" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="protected" + deprecated="not deprecated"> + <param name="minSplitSize" type="long"/> + </method> + <method name="isSplitable" return="boolean" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="protected" + deprecated="not deprecated"> + <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> + <param name="filename" type="org.apache.hadoop.fs.Path"/> + <doc> + <![CDATA[Is the given filename splittable? Usually, true, but if the file is + stream compressed, it will not be. + + The default implementation in <code>FileInputFormat</code> always returns + true. Implementations that may deal with non-splittable files <i>must</i> + override this method. + + <code>FileInputFormat</code> implementations can override this and return + <code>false</code> to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param fs the file system that the file is on + @param filename the file name to check + @return is this file splitable?]]> + </doc> + </method> + <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" + abstract="true" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> + <param name="job" type="org.apache.hadoop.mapred.JobConf"/> + <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="setInputPathFilter" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <param name="filter" type="java.lang.Class"/> + <doc> + <![CDATA[Set a PathFilter to be applied to the input paths for the map-reduce job. + + @param filter the PathFilter class use for filtering the input paths.]]> + </doc> + </method> + <method name="getInputPathFilter" return="org.apache.hadoop.fs.PathFilter" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <doc> + <![CDATA[Get a PathFilter instance of the filter set for the input paths. + + @return the PathFilter instance set for the job, NULL if none has been set.]]> + </doc> + </method> + <method name="addInputPathRecursively" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="protected" + deprecated="not deprecated"> + <param name="result" type="java.util.List"/> + <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> + <param name="path" type="org.apache.hadoop.fs.Path"/> + <param name="inputFilter" type="org.apache.hadoop.fs.PathFilter"/> + <exception name="IOException" type="java.io.IOException"/> + <doc> + <![CDATA[Add files in the input path recursively into the results. + @param result + The List to store all files. + @param fs + The FileSystem. + @param path + The input path. + @param inputFilter + The input filter that can be used to filter files/dirs. + @throws IOException]]> + </doc> + </method> + <method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="protected" + deprecated="not deprecated"> + <param name="job" type="org.apache.hadoop.mapred.JobConf"/> + <exception name="IOException" type="java.io.IOException"/> + <doc> + <![CDATA[List input directories. + Subclasses may override to, e.g., select only files matching a regular + expression. + + @param job the job to list input paths for + @return array of FileStatus objects + @throws IOException if zero items.]]> + </doc> + </method> + <method name="makeSplit" return="org.apache.hadoop.mapred.FileSplit" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="protected" + deprecated="not deprecated"> + <param name="file" type="org.apache.hadoop.fs.Path"/> + <param name="start" type="long"/> + <param name="length" type="long"/> + <param name="hosts" type="java.lang.String[]"/> + <doc> + <![CDATA[A factory that makes the split for this class. It can be overridden + by sub-classes to make sub-types]]> + </doc> + </method> + <method name="makeSplit" return="org.apache.hadoop.mapred.FileSplit" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="protected" + deprecated="not deprecated"> + <param name="file" type="org.apache.hadoop.fs.Path"/> + <param name="start" type="long"/> + <param name="length" type="long"/> + <param name="hosts" type="java.lang.String[]"/> + <param name="inMemoryHosts" type="java.lang.String[]"/> + <doc> + <![CDATA[A factory that makes the split for this class. It can be overridden + by sub-classes to make sub-types]]> + </doc> + </method> + <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="job" type="org.apache.hadoop.mapred.JobConf"/> + <param name="numSplits" type="int"/> + <exception name="IOException" type="java.io.IOException"/> + <doc> + <![CDATA[Splits files returned by {@link #listStatus(JobConf)} when + they're too big.]]> + </doc> + </method> + <method name="computeSplitSize" return="long" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="protected" + deprecated="not deprecated"> + <param name="goalSize" type="long"/> + <param name="minSize" type="long"/> + <param name="blockSize" type="long"/> + </method> + <method name="getBlockIndex" return="int" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="protected" + deprecated="not deprecated"> + <param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/> + <param name="offset" type="long"/> + </method> + <method name="setInputPaths" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <param name="commaSeparatedPaths" type="java.lang.String"/> + <doc> + <![CDATA[Sets the given comma separated paths as the list of inputs + for the map-reduce job. + + @param conf Configuration of the job + @param commaSeparatedPaths Comma separated paths to be set as + the list of inputs for the map-reduce job.]]> + </doc> + </method> + <method name="addInputPaths" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <param name="commaSeparatedPaths" type="java.lang.String"/> + <doc> + <![CDATA[Add the given comma separated paths to the list of inputs for + the map-reduce job. + + @param conf The configuration of the job + @param commaSeparatedPaths Comma separated paths to be added to + the list of inputs for the map-reduce job.]]> + </doc> + </method> + <method name="setInputPaths" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <param name="inputPaths" type="org.apache.hadoop.fs.Path[]"/> + <doc> + <![CDATA[Set the array of {@link Path}s as the list of inputs + for the map-reduce job. + + @param conf Configuration of the job. + @param inputPaths the {@link Path}s of the input directories/files + for the map-reduce job.]]> + </doc> + </method> + <method name="addInputPath" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <param name="path" type="org.apache.hadoop.fs.Path"/> + <doc> + <![CDATA[Add a {@link Path} to the list of inputs for the map-reduce job. + + @param conf The configuration of the job + @param path {@link Path} to be added to the list of inputs for + the map-reduce job.]]> + </doc> + </method> + <method name="getInputPaths" return="org.apache.hadoop.fs.Path[]" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <doc> + <![CDATA[Get the list of input {@link Path}s for the map-reduce job. + + @param conf The configuration of the job + @return the list of input {@link Path}s for the map-reduce job.]]> + </doc> + </method> + <method name="getSplitHosts" return="java.lang.String[]" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="protected" + deprecated="not deprecated"> + <param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/> + <param name="offset" type="long"/> + <param name="splitSize" type="long"/> + <param name="clusterMap" type="org.apache.hadoop.net.NetworkTopology"/> + <exception name="IOException" type="java.io.IOException"/> + <doc> + <![CDATA[This function identifies and returns the hosts that contribute + most for a given split. For calculating the contribution, rack + locality is treated on par with host locality, so hosts from racks + that contribute the most are preferred over hosts on racks that + contribute less + @param blkLocations The list of block locations + @param offset + @param splitSize + @return an array of hosts that contribute most to this split + @throws IOException]]> + </doc> + </method> + <field name="LOG" type="org.slf4j.Logger" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + </field> + <field name="NUM_INPUT_FILES" type="java.lang.String" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + </field> + <field name="INPUT_DIR_RECURSIVE" type="java.lang.String" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + </field> + <doc> + <![CDATA[A base class for file-based {@link InputFormat}. + + <p><code>FileInputFormat</code> is the base class for all file-based + <code>InputFormat</code>s. This provides a generic implementation of + {@link #getSplits(JobConf, int)}. + + Implementations of <code>FileInputFormat</code> can also override the + {@link #isSplitable(FileSystem, Path)} method to prevent input files + from being split-up in certain situations. Implementations that may + deal with non-splittable files <i>must</i> override this method, since + the default implementation assumes splitting is always possible.]]> + </doc> + </class> + <!-- end class org.apache.hadoop.mapred.FileInputFormat --> + <!-- start class org.apache.hadoop.mapred.FileOutputCommitter --> + <class name="FileOutputCommitter" extends="org.apache.hadoop.mapred.OutputCommitter" + abstract="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <constructor name="FileOutputCommitter" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </constructor> + <method name="getWorkPath" return="org.apache.hadoop.fs.Path" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> + <param name="outputPath" type="org.apache.hadoop.fs.Path"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="setupJob" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="context" type="org.apache.hadoop.mapred.JobContext"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="commitJob" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="context" type="org.apache.hadoop.mapred.JobContext"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="cleanupJob" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="context" type="org.apache.hadoop.mapred.JobContext"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="abortJob" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="context" type="org.apache.hadoop.mapred.JobContext"/> + <param name="runState" type="int"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="setupTask" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="commitTask" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="abortTask" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="needsTaskCommit" return="boolean" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="isRecoverySupported" return="boolean" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </method> + <method name="isCommitJobRepeatable" return="boolean" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="context" type="org.apache.hadoop.mapred.JobContext"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="isRecoverySupported" return="boolean" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="context" type="org.apache.hadoop.mapred.JobContext"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="recoverTask" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <field name="LOG" type="org.slf4j.Logger" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + </field> + <field name="TEMP_DIR_NAME" type="java.lang.String" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Temporary directory name]]> + </doc> + </field> + <field name="SUCCEEDED_FILE_NAME" type="java.lang.String" + transient="false" volatile="false" + static="true" final="true" visibility="public" + deprecated="not deprecated"> + </field> + <doc> + <![CDATA[An {@link OutputCommitter} that commits files specified + in job output directory i.e. ${mapreduce.output.fileoutputformat.outputdir}.]]> + </doc> + </class> + <!-- end class org.apache.hadoop.mapred.FileOutputCommitter --> + <!-- start class org.apache.hadoop.mapred.FileOutputFormat --> + <class name="FileOutputFormat" extends="java.lang.Object" + abstract="true" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <implements name="org.apache.hadoop.mapred.OutputFormat"/> + <constructor name="FileOutputFormat" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </constructor> + <method name="setCompressOutput" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <param name="compress" type="boolean"/> + <doc> + <![CDATA[Set whether the output of the job is compressed. + @param conf the {@link JobConf} to modify + @param compress should the output of the job be compressed?]]> + </doc> + </method> + <method name="getCompressOutput" return="boolean" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <doc> + <![CDATA[Is the job output compressed? + @param conf the {@link JobConf} to look in + @return <code>true</code> if the job output should be compressed, + <code>false</code> otherwise]]> + </doc> + </method> + <method name="setOutputCompressorClass" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <param name="codecClass" type="java.lang.Class"/> + <doc> + <![CDATA[Set the {@link CompressionCodec} to be used to compress job outputs. + @param conf the {@link JobConf} to modify + @param codecClass the {@link CompressionCodec} to be used to + compress the job outputs]]> + </doc> + </method> + <method name="getOutputCompressorClass" return="java.lang.Class" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <param name="defaultValue" type="java.lang.Class"/> + <doc> + <![CDATA[Get the {@link CompressionCodec} for compressing the job outputs. + @param conf the {@link JobConf} to look in + @param defaultValue the {@link CompressionCodec} to return if not set + @return the {@link CompressionCodec} to be used to compress the + job outputs + @throws IllegalArgumentException if the class was specified, but not found]]> + </doc> + </method> + <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" + abstract="true" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> + <param name="job" type="org.apache.hadoop.mapred.JobConf"/> + <param name="name" type="java.lang.String"/> + <param name="progress" type="org.apache.hadoop.util.Progressable"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="checkOutputSpecs" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> + <param name="job" type="org.apache.hadoop.mapred.JobConf"/> + <exception name="FileAlreadyExistsException" type="org.apache.hadoop.mapred.FileAlreadyExistsException"/> + <exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="setOutputPath" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <param name="outputDir" type="org.apache.hadoop.fs.Path"/> + <doc> + <![CDATA[Set the {@link Path} of the output directory for the map-reduce job. + + @param conf The configuration of the job. + @param outputDir the {@link Path} of the output directory for + the map-reduce job.]]> + </doc> + </method> + <method name="getOutputPath" return="org.apache.hadoop.fs.Path" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <doc> + <![CDATA[Get the {@link Path} to the output directory for the map-reduce job. + + @return the {@link Path} to the output directory for the map-reduce job. + @see FileOutputFormat#getWorkOutputPath(JobConf)]]> + </doc> + </method> + <method name="getWorkOutputPath" return="org.apache.hadoop.fs.Path" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <doc> + <![CDATA[Get the {@link Path} to the task's temporary output directory + for the map-reduce job + + <b id="SideEffectFiles">Tasks' Side-Effect Files</b> + + <p><i>Note:</i> The following is valid only if the {@link OutputCommitter} + is {@link FileOutputCommitter}. If <code>OutputCommitter</code> is not + a <code>FileOutputCommitter</code>, the task's temporary output + directory is same as {@link #getOutputPath(JobConf)} i.e. + <tt>${mapreduce.output.fileoutputformat.outputdir}$</tt></p> + + <p>Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + + <p>In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + <tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p> + + <p>To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> (only) + are <i>promoted</i> to <tt>${mapreduce.output.fileoutputformat.outputdir}</tt>. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.</p> + + <p>The application-writer can take advantage of this by creating any + side-files required in <tt>${mapreduce.task.output.dir}</tt> during execution + of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the + framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.</p> + + <p><i>Note</i>: the value of <tt>${mapreduce.task.output.dir}</tt> during + execution of a particular task-attempt is actually + <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_{$taskid}</tt>, and this value is + set by the map-reduce framework. So, just create any side-files in the + path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce + task to take advantage of this feature.</p> + + <p>The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.</p> + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> + </doc> + </method> + <method name="getTaskOutputPath" return="org.apache.hadoop.fs.Path" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <param name="name" type="java.lang.String"/> + <exception name="IOException" type="java.io.IOException"/> + <doc> + <![CDATA[Helper function to create the task's temporary output directory and + return the path to the task's output file. + + @param conf job-configuration + @param name temporary task-output filename + @return path to the task's temporary output file + @throws IOException]]> + </doc> + </method> + <method name="getUniqueName" return="java.lang.String" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <param name="name" type="java.lang.String"/> + <doc> + <![CDATA[Helper function to generate a name that is unique for the task. + + <p>The generated name can be used to create custom files from within the + different tasks for the job, the names for different tasks will not collide + with each other.</p> + + <p>The given name is postfixed with the task type, 'm' for maps, 'r' for + reduces and the task partition number. For example, give a name 'test' + running on the first map o the job the generated name will be + 'test-m-00000'.</p> + + @param conf the configuration for the job. + @param name the name to make unique. + @return a unique name accross all tasks of the job.]]> + </doc> + </method> + <method name="getPathForCustomFile" return="org.apache.hadoop.fs.Path" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> + <param name="name" type="java.lang.String"/> + <doc> + <![CDATA[Helper function to generate a {@link Path} for a file that is unique for + the task within the job output directory. + + <p>The path can be used to create custom files from within the map and + reduce tasks. The path name will be unique for each task. The path parent + will be the job output directory.</p>ls + + <p>This method uses the {@link #getUniqueName} method to make the file name + unique for the task.</p> + + @param conf the configuration for the job. + @param name the name for the file. + @return a unique path accross all tasks of the job.]]> + </doc> + </method> + <doc> + <![CDATA[A base class for {@link OutputFormat}.]]> + </doc> + </class> + <!-- end class org.apache.hadoop.mapred.FileOutputFormat --> + <!-- start class org.apache.hadoop.mapred.FileSplit --> + <class name="FileSplit" extends="org.apache.hadoop.mapreduce.InputSplit" + abstract="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <implements name="org.apache.hadoop.mapred.InputSplitWithLocationInfo"/> + <constructor name="FileSplit" + static="false" final="false" visibility="protected" + deprecated="not deprecated"> + </constructor> + <constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, org.apache.hadoop.mapred.JobConf" + static="false" final="false" visibility="public" + deprecated="deprecated, no comment"> + <doc> + <![CDATA[Constructs a split. + @deprecated + @param file the file name + @param start the position of the first byte in the file to process + @param length the number of bytes in the file to process]]> + </doc> + </constructor> + <constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[]" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Constructs a split with host information + + @param file the file name + @param start the position of the first byte in the file to process + @param length the number of bytes in the file to process + @param hosts the list of hosts containing the block, possibly null]]> + </doc> + </constructor> + <constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[], java.lang.String[]" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[Constructs a split with host information + + @param file the file name + @param start the position of the first byte in the file to process + @param length the number of bytes in the file to process + @param hosts the list of hosts containing the block, possibly null + @param inMemoryHosts the list of hosts containing the block in memory]]> + </doc> + </constructor> + <constructor name="FileSplit" type="org.apache.hadoop.mapreduce.lib.input.FileSplit" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </constructor> + <method name="getPath" return="org.apache.hadoop.fs.Path" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[The file containing this split's data.]]> + </doc> + </method> + <method name="getStart" return="long" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[The position of the first byte in the file to process.]]> + </doc> + </method> + <method name="getLength" return="long" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <doc> + <![CDATA[The number of bytes in the file to process.]]> + </doc> + </method> + <method name="toString" return="java.lang.String" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </method> + <method name="write" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="out" type="java.io.DataOutput"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="readFields" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <param name="in" type="java.io.DataInput"/> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="getLocations" return="java.lang.String[]" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <exception name="IOException" type="java.io.IOException"/> + </method> + <method name="getLocationInfo" return="org.apache.hadoop.mapred.SplitLocationInfo[]" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <exception name="IOException" type="java.io.IOException"/> + </method> + <doc> + <![CDATA[A section of an input file. Returned by {@link + InputFormat#getSplits(JobConf, int)} and passed to + {@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}.]]> + </doc> + </class> + <!-- end class org.apache.hadoop.mapred.FileSplit --> + <!-- start class org.apache.hadoop.mapred.FixedLengthInputFormat --> + <class name="FixedLengthInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" + abstract="false" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + <implements name="org.apache.hadoop.mapred.JobConfigurable"/> + <constructor name="FixedLengthInputFormat" + static="false" final="false" visibility="public" + deprecated="not deprecated"> + </constructor> + <method name="setRecordLength" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.conf.Configuration"/> + <param name="recordLength" type="int"/> + <doc> + <![CDATA[Set the length of each record + @param conf configuration + @param recordLength the length of a record]]> + </doc> + </method> + <method name="getRecordLength" return="int" + abstract="false" native="false" synchronized="false" + static="true" final="false" visibility="public" + deprecated="not deprecated"> + <param name="conf" type="org.apache.hadoop.conf.Configuration"/> + <doc> + <![CDATA[Get record length value + @param conf configuration + @return the record length, zero means none was set]]> + </doc> + </method> + <method name="configure" + abstract="false" native="false" synchronized="false" + static="false" final="false" visibility="public" + deprecated="not deprecated
<TRUNCATED> --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org