Author: cutting Date: Fri Dec 7 15:29:41 2007 New Revision: 602270 URL: http://svn.apache.org/viewvc?rev=602270&view=rev Log: HADOOP-2382. Add hadoop-default.html to subversion.
Added: lucene/hadoop/trunk/docs/hadoop-default.html Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/build.xml Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=602270&r1=602269&r2=602270&view=diff ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Fri Dec 7 15:29:41 2007 @@ -243,9 +243,11 @@ IMPROVEMENTS HADOOP-2160. Remove project-level, non-user documentation from - releases, since it's now maintained in a separate tree. + releases, since it's now maintained in a separate tree. (cutting) - HADOOP-1327. Add user documentation for streaming. + HADOOP-1327. Add user documentation for streaming. (cutting) + + HADOOP-2382. Add hadoop-default.html to subversion. (cutting) Release 0.15.1 - 2007-11-27 Modified: lucene/hadoop/trunk/build.xml URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/build.xml?rev=602270&r1=602269&r2=602270&view=diff ============================================================================== --- lucene/hadoop/trunk/build.xml (original) +++ lucene/hadoop/trunk/build.xml Fri Dec 7 15:29:41 2007 @@ -22,7 +22,7 @@ <property name="conf.dir" value="${basedir}/conf"/> <property name="docs.dir" value="${basedir}/docs"/> <property name="contrib.dir" value="${basedir}/src/contrib"/> - <property name="docs.src" value="${basedir}/src/web"/> + <property name="docs.src" value="${basedir}/src/docs"/> <property name="c++.src" value="${basedir}/src/c++"/> <property name="c++.utils.src" value="${c++.src}/utils"/> <property name="c++.pipes.src" value="${c++.src}/pipes"/> @@ -608,14 +608,16 @@ <!-- Documentation --> <!-- ================================================================== --> - <target name="docs"> - <exec dir="src/docs" executable="forrest" failonerror="true" /> - <copy todir="docs/"> - <fileset dir="src/docs/build/site/" /> + <target name="docs" description="Generate documentation"> + <exec dir="${docs.src}" executable="forrest" failonerror="true" /> + <copy todir="${docs.dir}"> + <fileset dir="${docs.src}/build/site/" /> </copy> + <style basedir="${conf.dir}" destdir="${docs.dir}" + includes="hadoop-default.xml" style="conf/configuration.xsl"/> </target> - <target name="javadoc" depends="default-doc" description="Generate documentation"> + <target name="javadoc" description="Generate javadoc"> <mkdir dir="${build.javadoc}"/> <javadoc overview="${src.dir}/overview.html" @@ -655,14 +657,6 @@ </javadoc> </target> - <target name="default-doc"> - <style basedir="${conf.dir}" destdir="${build.docs}" - includes="hadoop-default.xml" style="conf/configuration.xsl"/> - <copy todir="${build.docs}"> - <fileset dir="${docs.dir}" /> - </copy> - </target> - <!-- ================================================================== --> <!-- D I S T R I B U T I O N --> <!-- ================================================================== --> @@ -716,6 +710,7 @@ </copy> <copy todir="${dist.dir}/docs"> + <fileset dir="${docs.dir}" /> <fileset dir="${build.docs}"/> </copy> @@ -773,7 +768,7 @@ <!-- ================================================================== --> <target name="clean" depends="clean-contrib" description="Clean. Delete the build files, and their directories"> <delete dir="${build.dir}"/> - <delete dir="src/docs/build"/> + <delete dir="${docs.src}/build"/> </target> <!-- ================================================================== --> Added: lucene/hadoop/trunk/docs/hadoop-default.html URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/docs/hadoop-default.html?rev=602270&view=auto ============================================================================== --- lucene/hadoop/trunk/docs/hadoop-default.html (added) +++ lucene/hadoop/trunk/docs/hadoop-default.html Fri Dec 7 15:29:41 2007 @@ -0,0 +1,599 @@ +<html> +<body> +<table border="1"> +<tr> +<td>name</td><td>value</td><td>description</td> +</tr> +<tr> +<td><a name="hadoop.tmp.dir">hadoop.tmp.dir</a></td><td>/tmp/hadoop-${user.name}</td><td>A base for other temporary directories.</td> +</tr> +<tr> +<td><a name="hadoop.native.lib">hadoop.native.lib</a></td><td>true</td><td>Should native hadoop libraries, if present, be used.</td> +</tr> +<tr> +<td><a name="hadoop.logfile.size">hadoop.logfile.size</a></td><td>10000000</td><td>The max size of each log file</td> +</tr> +<tr> +<td><a name="hadoop.logfile.count">hadoop.logfile.count</a></td><td>10</td><td>The max number of log files</td> +</tr> +<tr> +<td><a name="dfs.namenode.logging.level">dfs.namenode.logging.level</a></td><td>info</td><td>The logging level for dfs namenode. Other values are "dir"(trac +e namespace mutations), "block"(trace block under/over replications and block +creations/deletions), or "all".</td> +</tr> +<tr> +<td><a name="io.sort.factor">io.sort.factor</a></td><td>10</td><td>The number of streams to merge at once while sorting + files. This determines the number of open file handles.</td> +</tr> +<tr> +<td><a name="io.sort.mb">io.sort.mb</a></td><td>100</td><td>The total amount of buffer memory to use while sorting + files, in megabytes. By default, gives each merge stream 1MB, which + should minimize seeks.</td> +</tr> +<tr> +<td><a name="io.file.buffer.size">io.file.buffer.size</a></td><td>4096</td><td>The size of buffer for use in sequence files. + The size of this buffer should probably be a multiple of hardware + page size (4096 on Intel x86), and it determines how much data is + buffered during read and write operations.</td> +</tr> +<tr> +<td><a name="io.bytes.per.checksum">io.bytes.per.checksum</a></td><td>512</td><td>The number of bytes per checksum. Must not be larger than + io.file.buffer.size.</td> +</tr> +<tr> +<td><a name="io.skip.checksum.errors">io.skip.checksum.errors</a></td><td>false</td><td>If true, when a checksum error is encountered while + reading a sequence file, entries are skipped, instead of throwing an + exception.</td> +</tr> +<tr> +<td><a name="io.map.index.skip">io.map.index.skip</a></td><td>0</td><td>Number of index entries to skip between each entry. + Zero by default. Setting this to values larger than zero can + facilitate opening large map files using less memory.</td> +</tr> +<tr> +<td><a name="io.compression.codecs">io.compression.codecs</a></td><td>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec</td><td>A list of the compression codec classes that can be used + for compression/decompression.</td> +</tr> +<tr> +<td><a name="fs.default.name">fs.default.name</a></td><td>file:///</td><td>The name of the default file system. A URI whose + scheme and authority determine the FileSystem implementation. The + uri's scheme determines the config property (fs.SCHEME.impl) naming + the FileSystem implementation class. The uri's authority is used to + determine the host, port, etc. for a filesystem.</td> +</tr> +<tr> +<td><a name="fs.trash.root">fs.trash.root</a></td><td>${hadoop.tmp.dir}/Trash</td><td>The trash directory, used by FsShell's 'rm' command. + </td> +</tr> +<tr> +<td><a name="fs.trash.interval">fs.trash.interval</a></td><td>0</td><td>Number of minutes between trash checkpoints. + If zero, the trash feature is disabled. + </td> +</tr> +<tr> +<td><a name="fs.file.impl">fs.file.impl</a></td><td>org.apache.hadoop.fs.LocalFileSystem</td><td>The FileSystem for file: uris.</td> +</tr> +<tr> +<td><a name="fs.hdfs.impl">fs.hdfs.impl</a></td><td>org.apache.hadoop.dfs.DistributedFileSystem</td><td>The FileSystem for hdfs: uris.</td> +</tr> +<tr> +<td><a name="fs.s3.impl">fs.s3.impl</a></td><td>org.apache.hadoop.fs.s3.S3FileSystem</td><td>The FileSystem for s3: uris.</td> +</tr> +<tr> +<td><a name="fs.kfs.impl">fs.kfs.impl</a></td><td>org.apache.hadoop.fs.kfs.KosmosFileSystem</td><td>The FileSystem for kfs: uris.</td> +</tr> +<tr> +<td><a name="fs.hftp.impl">fs.hftp.impl</a></td><td>org.apache.hadoop.dfs.HftpFileSystem</td><td></td> +</tr> +<tr> +<td><a name="fs.ramfs.impl">fs.ramfs.impl</a></td><td>org.apache.hadoop.fs.InMemoryFileSystem</td><td>The FileSystem for ramfs: uris.</td> +</tr> +<tr> +<td><a name="fs.inmemory.size.mb">fs.inmemory.size.mb</a></td><td>75</td><td>The size of the in-memory filsystem instance in MB</td> +</tr> +<tr> +<td><a name="fs.checkpoint.dir">fs.checkpoint.dir</a></td><td>${hadoop.tmp.dir}/dfs/namesecondary</td><td>Determines where on the local filesystem the DFS secondary + name node should store the temporary images and edits to merge. + </td> +</tr> +<tr> +<td><a name="fs.checkpoint.period">fs.checkpoint.period</a></td><td>3600</td><td>The number of seconds between two periodic checkpoints. + </td> +</tr> +<tr> +<td><a name="fs.checkpoint.size">fs.checkpoint.size</a></td><td>67108864</td><td>The size of the current edit log (in bytes) that triggers + a periodic checkpoint even if the fs.checkpoint.period hasn't expired. + </td> +</tr> +<tr> +<td><a name="dfs.secondary.http.bindAddress">dfs.secondary.http.bindAddress</a></td><td>0.0.0.0:50090</td><td> + The secondary namenode http server bind address and port. + If the port is 0 then the server will start on a free port. + </td> +</tr> +<tr> +<td><a name="dfs.datanode.bindAddress">dfs.datanode.bindAddress</a></td><td>0.0.0.0:50010</td><td> + The address where the datanode will listen to. + If the port is 0 then the server will start on a free port. + </td> +</tr> +<tr> +<td><a name="dfs.datanode.http.bindAddress">dfs.datanode.http.bindAddress</a></td><td>0.0.0.0:50075</td><td> + The datanode http server bind address and port. + If the port is 0 then the server will start on a free port. + </td> +</tr> +<tr> +<td><a name="dfs.http.bindAddress">dfs.http.bindAddress</a></td><td>0.0.0.0:50070</td><td> + The address and the base port where the dfs namenode web ui will listen on. + If the port is 0 then the server will start on a free port. + </td> +</tr> +<tr> +<td><a name="dfs.datanode.dns.interface">dfs.datanode.dns.interface</a></td><td>default</td><td>The name of the Network Interface from which a data node should + report its IP address. + </td> +</tr> +<tr> +<td><a name="dfs.datanode.dns.nameserver">dfs.datanode.dns.nameserver</a></td><td>default</td><td>The host name or IP address of the name server (DNS) + which a DataNode should use to determine the host name used by the + NameNode for communication and display purposes. + </td> +</tr> +<tr> +<td><a name="dfs.replication.considerLoad">dfs.replication.considerLoad</a></td><td>true</td><td>Decide if chooseTarget considers the target's load or not + </td> +</tr> +<tr> +<td><a name="dfs.default.chunk.view.size">dfs.default.chunk.view.size</a></td><td>32768</td><td>The number of bytes to view for a file on the browser. + </td> +</tr> +<tr> +<td><a name="dfs.datanode.du.reserved">dfs.datanode.du.reserved</a></td><td>0</td><td>Reserved space in bytes per volume. Always leave this much space free for non dfs use. + </td> +</tr> +<tr> +<td><a name="dfs.datanode.du.pct">dfs.datanode.du.pct</a></td><td>0.98f</td><td>When calculating remaining space, only use this percentage of the real available space + </td> +</tr> +<tr> +<td><a name="dfs.name.dir">dfs.name.dir</a></td><td>${hadoop.tmp.dir}/dfs/name</td><td>Determines where on the local filesystem the DFS name node + should store the name table. If this is a comma-delimited list + of directories then the name table is replicated in all of the + directories, for redundancy. </td> +</tr> +<tr> +<td><a name="dfs.client.buffer.dir">dfs.client.buffer.dir</a></td><td>${hadoop.tmp.dir}/dfs/tmp</td><td>Determines where on the local filesystem an DFS client + should store its blocks before it sends them to the datanode. + </td> +</tr> +<tr> +<td><a name="dfs.data.dir">dfs.data.dir</a></td><td>${hadoop.tmp.dir}/dfs/data</td><td>Determines where on the local filesystem an DFS data node + should store its blocks. If this is a comma-delimited + list of directories, then data will be stored in all named + directories, typically on different devices. + Directories that do not exist are ignored. + </td> +</tr> +<tr> +<td><a name="dfs.replication">dfs.replication</a></td><td>3</td><td>Default block replication. + The actual number of replications can be specified when the file is created. + The default is used if replication is not specified in create time. + </td> +</tr> +<tr> +<td><a name="dfs.replication.max">dfs.replication.max</a></td><td>512</td><td>Maximal block replication. + </td> +</tr> +<tr> +<td><a name="dfs.replication.min">dfs.replication.min</a></td><td>1</td><td>Minimal block replication. + </td> +</tr> +<tr> +<td><a name="dfs.block.size">dfs.block.size</a></td><td>67108864</td><td>The default block size for new files.</td> +</tr> +<tr> +<td><a name="dfs.df.interval">dfs.df.interval</a></td><td>60000</td><td>Disk usage statistics refresh interval in msec.</td> +</tr> +<tr> +<td><a name="dfs.client.block.write.retries">dfs.client.block.write.retries</a></td><td>3</td><td>The number of retries for writing blocks to the data nodes, + before we signal failure to the application. + </td> +</tr> +<tr> +<td><a name="dfs.blockreport.intervalMsec">dfs.blockreport.intervalMsec</a></td><td>3600000</td><td>Determines block reporting interval in milliseconds.</td> +</tr> +<tr> +<td><a name="dfs.heartbeat.interval">dfs.heartbeat.interval</a></td><td>3</td><td>Determines datanode heartbeat interval in seconds.</td> +</tr> +<tr> +<td><a name="dfs.namenode.handler.count">dfs.namenode.handler.count</a></td><td>10</td><td>The number of server threads for the namenode.</td> +</tr> +<tr> +<td><a name="dfs.safemode.threshold.pct">dfs.safemode.threshold.pct</a></td><td>0.999f</td><td> + Specifies the percentage of blocks that should satisfy + the minimal replication requirement defined by dfs.replication.min. + Values less than or equal to 0 mean not to start in safe mode. + Values greater than 1 will make safe mode permanent. + </td> +</tr> +<tr> +<td><a name="dfs.safemode.extension">dfs.safemode.extension</a></td><td>30000</td><td> + Determines extension of safe mode in milliseconds + after the threshold level is reached. + </td> +</tr> +<tr> +<td><a name="dfs.network.script">dfs.network.script</a></td><td></td><td> + Specifies a script name that print the network location path + of the current machine. + </td> +</tr> +<tr> +<td><a name="dfs.balance.bandwidthPerSec">dfs.balance.bandwidthPerSec</a></td><td>1048576</td><td> + Specifies the maximum amount of bandwidth that each datanode + can utilize for the balancing purpose in term of + the number of bytes per second. + </td> +</tr> +<tr> +<td><a name="dfs.hosts">dfs.hosts</a></td><td></td><td>Names a file that contains a list of hosts that are + permitted to connect to the namenode. The full pathname of the file + must be specified. If the value is empty, all hosts are + permitted.</td> +</tr> +<tr> +<td><a name="dfs.hosts.exclude">dfs.hosts.exclude</a></td><td></td><td>Names a file that contains a list of hosts that are + not permitted to connect to the namenode. The full pathname of the + file must be specified. If the value is empty, no hosts are + excluded.</td> +</tr> +<tr> +<td><a name="fs.s3.block.size">fs.s3.block.size</a></td><td>67108864</td><td>Block size to use when writing files to S3.</td> +</tr> +<tr> +<td><a name="fs.s3.buffer.dir">fs.s3.buffer.dir</a></td><td>${hadoop.tmp.dir}/s3</td><td>Determines where on the local filesystem the S3 filesystem + should store its blocks before it sends them to S3 + or after it retrieves them from S3. + </td> +</tr> +<tr> +<td><a name="fs.s3.maxRetries">fs.s3.maxRetries</a></td><td>4</td><td>The maximum number of retries for reading or writing blocks to S3, + before we signal failure to the application. + </td> +</tr> +<tr> +<td><a name="fs.s3.sleepTimeSeconds">fs.s3.sleepTimeSeconds</a></td><td>10</td><td>The number of seconds to sleep between each S3 retry. + </td> +</tr> +<tr> +<td><a name="mapred.job.tracker">mapred.job.tracker</a></td><td>local</td><td>The host and port that the MapReduce job tracker runs + at. If "local", then jobs are run in-process as a single map + and reduce task. + </td> +</tr> +<tr> +<td><a name="mapred.job.tracker.http.bindAddress">mapred.job.tracker.http.bindAddress</a></td><td>0.0.0.0:50030</td><td> + The job tracker http server bind address and port. + If the port is 0 then the server will start on a free port. + </td> +</tr> +<tr> +<td><a name="mapred.job.tracker.handler.count">mapred.job.tracker.handler.count</a></td><td>10</td><td> + The number of server threads for the JobTracker. This should be roughly + 4% of the number of tasktracker nodes. + </td> +</tr> +<tr> +<td><a name="mapred.task.tracker.report.bindAddress">mapred.task.tracker.report.bindAddress</a></td><td>127.0.0.1:0</td><td>The interface that task processes use to communicate + with their parent tasktracker process.</td> +</tr> +<tr> +<td><a name="mapred.local.dir">mapred.local.dir</a></td><td>${hadoop.tmp.dir}/mapred/local</td><td>The local directory where MapReduce stores intermediate + data files. May be a comma-separated list of + directories on different devices in order to spread disk i/o. + Directories that do not exist are ignored. + </td> +</tr> +<tr> +<td><a name="local.cache.size">local.cache.size</a></td><td>10737418240</td><td>The limit on the size of cache you want to keep, set by default + to 10GB. This will act as a soft limit on the cache directory for out of band data. + </td> +</tr> +<tr> +<td><a name="mapred.system.dir">mapred.system.dir</a></td><td>${hadoop.tmp.dir}/mapred/system</td><td>The shared directory where MapReduce stores control files. + </td> +</tr> +<tr> +<td><a name="mapred.temp.dir">mapred.temp.dir</a></td><td>${hadoop.tmp.dir}/mapred/temp</td><td>A shared directory for temporary files. + </td> +</tr> +<tr> +<td><a name="mapred.local.dir.minspacestart">mapred.local.dir.minspacestart</a></td><td>0</td><td>If the space in mapred.local.dir drops under this, + do not ask for more tasks. + Value in bytes. + </td> +</tr> +<tr> +<td><a name="mapred.local.dir.minspacekill">mapred.local.dir.minspacekill</a></td><td>0</td><td>If the space in mapred.local.dir drops under this, + do not ask more tasks until all the current ones have finished and + cleaned up. Also, to save the rest of the tasks we have running, + kill one of them, to clean up some space. Start with the reduce tasks, + then go with the ones that have finished the least. + Value in bytes. + </td> +</tr> +<tr> +<td><a name="mapred.tasktracker.expiry.interval">mapred.tasktracker.expiry.interval</a></td><td>600000</td><td>Expert: The time-interval, in miliseconds, after which + a tasktracker is declared 'lost' if it doesn't send heartbeats. + </td> +</tr> +<tr> +<td><a name="mapred.map.tasks">mapred.map.tasks</a></td><td>2</td><td>The default number of map tasks per job. Typically set + to a prime several times greater than number of available hosts. + Ignored when mapred.job.tracker is "local". + </td> +</tr> +<tr> +<td><a name="mapred.reduce.tasks">mapred.reduce.tasks</a></td><td>1</td><td>The default number of reduce tasks per job. Typically set + to a prime close to the number of available hosts. Ignored when + mapred.job.tracker is "local". + </td> +</tr> +<tr> +<td><a name="mapred.map.max.attempts">mapred.map.max.attempts</a></td><td>4</td><td>Expert: The maximum number of attempts per map task. + In other words, framework will try to execute a map task these many number + of times before giving up on it. + </td> +</tr> +<tr> +<td><a name="mapred.reduce.max.attempts">mapred.reduce.max.attempts</a></td><td>4</td><td>Expert: The maximum number of attempts per reduce task. + In other words, framework will try to execute a reduce task these many number + of times before giving up on it. + </td> +</tr> +<tr> +<td><a name="mapred.reduce.parallel.copies">mapred.reduce.parallel.copies</a></td><td>5</td><td>The default number of parallel transfers run by reduce + during the copy(shuffle) phase. + </td> +</tr> +<tr> +<td><a name="mapred.reduce.copy.backoff">mapred.reduce.copy.backoff</a></td><td>300</td><td>The maximum amount of time (in seconds) a reducer spends on + fetching one map output before declaring it as failed. + </td> +</tr> +<tr> +<td><a name="mapred.task.timeout">mapred.task.timeout</a></td><td>600000</td><td>The number of milliseconds before a task will be + terminated if it neither reads an input, writes an output, nor + updates its status string. + </td> +</tr> +<tr> +<td><a name="mapred.tasktracker.map.tasks.maximum">mapred.tasktracker.map.tasks.maximum</a></td><td>2</td><td>The maximum number of map tasks that will be run + simultaneously by a task tracker. + </td> +</tr> +<tr> +<td><a name="mapred.tasktracker.reduce.tasks.maximum">mapred.tasktracker.reduce.tasks.maximum</a></td><td>2</td><td>The maximum number of reduce tasks that will be run + simultaneously by a task tracker. + </td> +</tr> +<tr> +<td><a name="mapred.jobtracker.completeuserjobs.maximum">mapred.jobtracker.completeuserjobs.maximum</a></td><td>100</td><td>The maximum number of complete jobs per user to keep around before delegating them to the job history. + </td> +</tr> +<tr> +<td><a name="mapred.child.java.opts">mapred.child.java.opts</a></td><td>-Xmx200m</td><td>Java opts for the task tracker child processes. Subsumes + 'mapred.child.heap.size' (If a mapred.child.heap.size value is found + in a configuration, its maximum heap size will be used and a warning + emitted that heap.size has been deprecated). Also, the following symbol, + if present, will be interpolated: @taskid@ is replaced by current TaskID. + Any other occurrences of '@' will go unchanged. For + example, to enable verbose gc logging to a file named for the taskid in + /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of: + -Xmx1024m -verbose:gc -Xloggc:/tmp/@[EMAIL PROTECTED] + </td> +</tr> +<tr> +<td><a name="mapred.inmem.merge.threshold">mapred.inmem.merge.threshold</a></td><td>1000</td><td>The threshold, in terms of the number of files + for the in-memory merge process. When we accumulate threshold number of files + we initiate the in-memory merge and spill to disk. A value of 0 or less than + 0 indicates we want to DON'T have any threshold and instead depend only on + the ramfs's memory consumption to trigger the merge. + </td> +</tr> +<tr> +<td><a name="mapred.speculative.execution">mapred.speculative.execution</a></td><td>true</td><td>If true, then multiple instances of some map and reduce tasks + may be executed in parallel.</td> +</tr> +<tr> +<td><a name="mapred.min.split.size">mapred.min.split.size</a></td><td>0</td><td>The minimum size chunk that map input should be split + into. Note that some file formats may have minimum split sizes that + take priority over this setting.</td> +</tr> +<tr> +<td><a name="mapred.submit.replication">mapred.submit.replication</a></td><td>10</td><td>The replication level for submitted job files. This + should be around the square root of the number of nodes. + </td> +</tr> +<tr> +<td><a name="mapred.tasktracker.dns.interface">mapred.tasktracker.dns.interface</a></td><td>default</td><td>The name of the Network Interface from which a task + tracker should report its IP address. + </td> +</tr> +<tr> +<td><a name="mapred.tasktracker.dns.nameserver">mapred.tasktracker.dns.nameserver</a></td><td>default</td><td>The host name or IP address of the name server (DNS) + which a TaskTracker should use to determine the host name used by + the JobTracker for communication and display purposes. + </td> +</tr> +<tr> +<td><a name="tasktracker.http.threads">tasktracker.http.threads</a></td><td>40</td><td>The number of worker threads that for the http server. This is + used for map output fetching + </td> +</tr> +<tr> +<td><a name="mapred.task.tracker.http.bindAddress">mapred.task.tracker.http.bindAddress</a></td><td>0.0.0.0:50060</td><td> + The task tracker http server bind address and port. + If the port is 0 then the server will start on a free port. + </td> +</tr> +<tr> +<td><a name="keep.failed.task.files">keep.failed.task.files</a></td><td>false</td><td>Should the files for failed tasks be kept. This should only be + used on jobs that are failing, because the storage is never + reclaimed. It also prevents the map outputs from being erased + from the reduce directory as they are consumed.</td> +</tr> +<tr> +<td><a name="mapred.output.compress">mapred.output.compress</a></td><td>false</td><td>Should the job outputs be compressed? + </td> +</tr> +<tr> +<td><a name="mapred.output.compression.type">mapred.output.compression.type</a></td><td>RECORD</td><td>If the job outputs are to compressed as SequenceFiles, how should + they be compressed? Should be one of NONE, RECORD or BLOCK. + </td> +</tr> +<tr> +<td><a name="mapred.output.compression.codec">mapred.output.compression.codec</a></td><td>org.apache.hadoop.io.compress.DefaultCodec</td><td>If the job outputs are compressed, how should they be compressed? + </td> +</tr> +<tr> +<td><a name="mapred.compress.map.output">mapred.compress.map.output</a></td><td>false</td><td>Should the outputs of the maps be compressed before being + sent across the network. Uses SequenceFile compression. + </td> +</tr> +<tr> +<td><a name="mapred.map.output.compression.type">mapred.map.output.compression.type</a></td><td>RECORD</td><td>If the map outputs are to compressed, how should they + be compressed? Should be one of NONE, RECORD or BLOCK. + </td> +</tr> +<tr> +<td><a name="mapred.map.output.compression.codec">mapred.map.output.compression.codec</a></td><td>org.apache.hadoop.io.compress.DefaultCodec</td><td>If the map outputs are compressed, how should they be + compressed? + </td> +</tr> +<tr> +<td><a name="io.seqfile.compress.blocksize">io.seqfile.compress.blocksize</a></td><td>1000000</td><td>The minimum block size for compression in block compressed + SequenceFiles. + </td> +</tr> +<tr> +<td><a name="io.seqfile.lazydecompress">io.seqfile.lazydecompress</a></td><td>true</td><td>Should values of block-compressed SequenceFiles be decompressed + only when necessary. + </td> +</tr> +<tr> +<td><a name="io.seqfile.sorter.recordlimit">io.seqfile.sorter.recordlimit</a></td><td>1000000</td><td>The limit on number of records to be kept in memory in a spill + in SequenceFiles.Sorter + </td> +</tr> +<tr> +<td><a name="io.seqfile.compression.type">io.seqfile.compression.type</a></td><td>RECORD</td><td>The default compression type for SequenceFile.Writer. + </td> +</tr> +<tr> +<td><a name="map.sort.class">map.sort.class</a></td><td>org.apache.hadoop.mapred.MergeSorter</td><td>The default sort class for sorting keys. + </td> +</tr> +<tr> +<td><a name="mapred.userlog.limit.kb">mapred.userlog.limit.kb</a></td><td>0</td><td>The maximum size of user-logs of each task in KB. 0 disables the cap. + </td> +</tr> +<tr> +<td><a name="mapred.userlog.retain.hours">mapred.userlog.retain.hours</a></td><td>24</td><td>The maximum time, in hours, for which the user-logs are to be + retained. + </td> +</tr> +<tr> +<td><a name="mapred.hosts">mapred.hosts</a></td><td></td><td>Names a file that contains the list of nodes that may + connect to the jobtracker. If the value is empty, all hosts are + permitted.</td> +</tr> +<tr> +<td><a name="mapred.hosts.exclude">mapred.hosts.exclude</a></td><td></td><td>Names a file that contains the list of hosts that + should be excluded by the jobtracker. If the value is empty, no + hosts are excluded.</td> +</tr> +<tr> +<td><a name="mapred.max.tracker.failures">mapred.max.tracker.failures</a></td><td>4</td><td>The number of task-failures on a tasktracker of a given job + after which new tasks of that job aren't assigned to it. + </td> +</tr> +<tr> +<td><a name="jobclient.output.filter">jobclient.output.filter</a></td><td>FAILED</td><td>The filter for controlling the output of the task's userlogs sent + to the console of the JobClient. + The permissible options are: NONE, KILLED, FAILED, SUCCEEDED and + ALL. + </td> +</tr> +<tr> +<td><a name="ipc.client.timeout">ipc.client.timeout</a></td><td>60000</td><td>Defines the timeout for IPC calls in milliseconds.</td> +</tr> +<tr> +<td><a name="ipc.client.idlethreshold">ipc.client.idlethreshold</a></td><td>4000</td><td>Defines the threshold number of connections after which + connections will be inspected for idleness. + </td> +</tr> +<tr> +<td><a name="ipc.client.maxidletime">ipc.client.maxidletime</a></td><td>120000</td><td>Defines the maximum idle time for a connected client after + which it may be disconnected. + </td> +</tr> +<tr> +<td><a name="ipc.client.kill.max">ipc.client.kill.max</a></td><td>10</td><td>Defines the maximum number of clients to disconnect in one go. + </td> +</tr> +<tr> +<td><a name="ipc.client.connection.maxidletime">ipc.client.connection.maxidletime</a></td><td>1000</td><td>The maximum time after which a client will bring down the + connection to the server. + </td> +</tr> +<tr> +<td><a name="ipc.client.connect.max.retries">ipc.client.connect.max.retries</a></td><td>10</td><td>Indicates the number of retries a client will make to establish + a server connection. + </td> +</tr> +<tr> +<td><a name="ipc.server.listen.queue.size">ipc.server.listen.queue.size</a></td><td>128</td><td>Indicates the length of the listen queue for servers accepting + client connections. + </td> +</tr> +<tr> +<td><a name="job.end.retry.attempts">job.end.retry.attempts</a></td><td>0</td><td>Indicates how many times hadoop should attempt to contact the + notification URL </td> +</tr> +<tr> +<td><a name="job.end.retry.interval">job.end.retry.interval</a></td><td>30000</td><td>Indicates time in milliseconds between notification URL retry + calls</td> +</tr> +<tr> +<td><a name="webinterface.private.actions">webinterface.private.actions</a></td><td>false</td><td> If set to true, the web interfaces of JT and NN may contain + actions, such as kill job, delete file, etc., that should + not be exposed to public. Enable this option if the interfaces + are only reachable by those who have the right authorization. + </td> +</tr> +<tr> +<td><a name="hadoop.rpc.socket.factory.class.default">hadoop.rpc.socket.factory.class.default</a></td><td>org.apache.hadoop.net.StandardSocketFactory</td><td> Default SocketFactory to use. This parameter is expected to be + formatted as "package.FactoryClassName". + </td> +</tr> +<tr> +<td><a name="hadoop.rpc.socket.factory.class.ClientProtocol">hadoop.rpc.socket.factory.class.ClientProtocol</a></td><td></td><td> SocketFactory to use to connect to a DFS. If null or empty, use + hadoop.rpc.socket.class.default. This socket factory is also used by + DFSClient to create sockets to DataNodes. + </td> +</tr> +<tr> +<td><a name="hadoop.rpc.socket.factory.class.JobSubmissionProtocol">hadoop.rpc.socket.factory.class.JobSubmissionProtocol</a></td><td></td><td> SocketFactory to use to connect to a Map/Reduce master + (JobTracker). If null or empty, then use hadoop.rpc.socket.class.default. + </td> +</tr> +<tr> +<td><a name="hadoop.socks.server">hadoop.socks.server</a></td><td></td><td> Address (host:port) of the SOCKS server to be used by the + SocksSocketFactory. + </td> +</tr> +</table> +</body> +</html>