Author: ddas
Date: Mon Jan 19 11:28:48 2009
New Revision: 735777
URL: http://svn.apache.org/viewvc?rev=735777&view=rev
Log:
HADOOP-4828. Updates documents to do with configuration (HADOOP-4631).
Contributed by Sharad Agarwal.
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/c++/libhdfs/hdfs.h
hadoop/core/trunk/src/c++/libhdfs/tests/conf/hadoop-site.xml
hadoop/core/trunk/src/c++/libhdfs/tests/test-libhdfs.sh
hadoop/core/trunk/src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/datacollection/writer/SeqFileWriter.java
hadoop/core/trunk/src/contrib/fairscheduler/README
hadoop/core/trunk/src/contrib/hdfsproxy/README
hadoop/core/trunk/src/contrib/test/hadoop-site.xml
hadoop/core/trunk/src/core/org/apache/hadoop/fs/kfs/package.html
hadoop/core/trunk/src/core/overview.html
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/cluster_setup.xml
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/libhdfs.xml
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/quickstart.xml
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/site.xml
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/streaming.xml
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/InputFormat.java
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/JobConf.java
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Mapper.java
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Reducer.java
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapreduce/InputFormat.java
hadoop/core/trunk/src/test/hadoop-site.xml
Modified: hadoop/core/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Mon Jan 19 11:28:48 2009
@@ -363,6 +363,9 @@
HADOOP-5030. Build Chukwa RPM to install into configured directory. (Eric
Yang via cdouglas)
+
+ HADOOP-4828. Updates documents to do with configuration (HADOOP-4631).
+ (Sharad Agarwal via ddas)
OPTIMIZATIONS
Modified: hadoop/core/trunk/src/c++/libhdfs/hdfs.h
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/c%2B%2B/libhdfs/hdfs.h?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/c++/libhdfs/hdfs.h (original)
+++ hadoop/core/trunk/src/c++/libhdfs/hdfs.h Mon Jan 19 11:28:48 2009
@@ -98,7 +98,7 @@
* of the namenode of a hdfs cluster. 'host' should be passed as NULL if
* you want to connect to local filesystem. 'host' should be passed as
* 'default' (and port as 0) to used the 'configured' filesystem
- * (hadoop-site/hadoop-default.xml).
+ * (core-site/core-default.xml).
* @param port The port on which the server is listening.
* @param user the user name (this is hadoop domain user). Or NULL is
equivelant to hhdfsConnect(host, port)
* @param groups the groups (these are hadoop domain groups)
@@ -114,7 +114,7 @@
* of the namenode of a hdfs cluster. 'host' should be passed as NULL if
* you want to connect to local filesystem. 'host' should be passed as
* 'default' (and port as 0) to used the 'configured' filesystem
- * (hadoop-site/hadoop-default.xml).
+ * (core-site/core-default.xml).
* @param port The port on which the server is listening.
* @return Returns a handle to the filesystem or NULL on error.
*/
Modified: hadoop/core/trunk/src/c++/libhdfs/tests/conf/hadoop-site.xml
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/c%2B%2B/libhdfs/tests/conf/hadoop-site.xml?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/c++/libhdfs/tests/conf/hadoop-site.xml (original)
+++ hadoop/core/trunk/src/c++/libhdfs/tests/conf/hadoop-site.xml Mon Jan 19
11:28:48 2009
@@ -1,9 +1,11 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
-<!-- Values used when running libhdfs unit tests. -->
-<!-- This is mostly empty, to use the default values, overriding the -->
-<!-- potentially user-editted hadoop-site.xml in the conf/ directory. -->
+<!-- DO NOT PUT ANY PROPERTY IN THIS FILE. INSTEAD USE -->
+<!-- core-site.xml, mapred-site.xml OR hdfs-site.xml -->
+<!-- This empty script is to avoid picking properties from -->
+<!-- conf/hadoop-site.xml This would be removed once support -->
+<!-- for hadoop-site.xml is removed. -->
<configuration>
Modified: hadoop/core/trunk/src/c++/libhdfs/tests/test-libhdfs.sh
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/c%2B%2B/libhdfs/tests/test-libhdfs.sh?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/c++/libhdfs/tests/test-libhdfs.sh (original)
+++ hadoop/core/trunk/src/c++/libhdfs/tests/test-libhdfs.sh Mon Jan 19 11:28:48
2009
@@ -30,8 +30,7 @@
HADOOP_LIB_DIR=$HADOOP_HOME/lib
HADOOP_BIN_DIR=$HADOOP_HOME/bin
-# Manipulate HADOOP_CONF_DIR so as to include
-# HADOOP_HOME/conf/hadoop-default.xml too
+# Manipulate HADOOP_CONF_DIR too
# which is necessary to circumvent bin/hadoop
HADOOP_CONF_DIR=$HADOOP_CONF_DIR:$HADOOP_HOME/conf
Modified:
hadoop/core/trunk/src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/datacollection/writer/SeqFileWriter.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/datacollection/writer/SeqFileWriter.java?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
---
hadoop/core/trunk/src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/datacollection/writer/SeqFileWriter.java
(original)
+++
hadoop/core/trunk/src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/datacollection/writer/SeqFileWriter.java
Mon Jan 19 11:28:48 2009
@@ -105,7 +105,7 @@
log.info("rotateInterval is " + rotateInterval);
log.info("outputDir is " + outputDir);
log.info("fsname is " + fsname);
- log.info("filesystem type from hadoop-default.xml is "
+ log.info("filesystem type from core-default.xml is "
+ conf.get("fs.hdfs.impl"));
if (fsname == null) {
Modified: hadoop/core/trunk/src/contrib/fairscheduler/README
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/fairscheduler/README?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/fairscheduler/README (original)
+++ hadoop/core/trunk/src/contrib/fairscheduler/README Mon Jan 19 11:28:48 2009
@@ -70,7 +70,7 @@
you can modify HADOOP_CLASSPATH to include this jar, in conf/hadoop-env.sh.
You will also need to set the following property in the Hadoop config file
-(conf/hadoop-site.xml) to have Hadoop use the fair scheduler:
+(conf/mapred-site.xml) to have Hadoop use the fair scheduler:
<property>
<name>mapred.jobtracker.taskScheduler</name>
@@ -86,7 +86,7 @@
CONFIGURING:
-The following properties can be set in hadoop-site.xml to configure the
+The following properties can be set in mapred-site.xml to configure the
scheduler:
mapred.fairscheduler.allocation.file:
Modified: hadoop/core/trunk/src/contrib/hdfsproxy/README
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hdfsproxy/README?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hdfsproxy/README (original)
+++ hadoop/core/trunk/src/contrib/hdfsproxy/README Mon Jan 19 11:28:48 2009
@@ -13,7 +13,7 @@
hdfsproxy-default.xml. Environmental variable HDFSPROXY_CONF_DIR can be used to
point to the directory where these configuration files are located. The
configuration files of the proxied HDFS cluster should also be available on the
-classpath (hadoop-default.xml and hadoop-site.xml).
+classpath (hdfs-default.xml and hdfs-site.xml).
Mirroring those used in HDFS, a few shell scripts are provided to start and
stop a group of proxy servers. The hosts to run hdfsproxy on are specified in
Modified: hadoop/core/trunk/src/contrib/test/hadoop-site.xml
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/test/hadoop-site.xml?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/test/hadoop-site.xml (original)
+++ hadoop/core/trunk/src/contrib/test/hadoop-site.xml Mon Jan 19 11:28:48 2009
@@ -1,9 +1,11 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
-<!-- Values used when running unit tests. This is mostly empty, to -->
-<!-- use of the default values, overriding the potentially -->
-<!-- user-editted hadoop-site.xml in the conf/ directory. -->
+<!-- DO NOT PUT ANY PROPERTY IN THIS FILE. INSTEAD USE -->
+<!-- core-site.xml, mapred-site.xml OR hdfs-site.xml -->
+<!-- This empty script is to avoid picking properties from -->
+<!-- conf/hadoop-site.xml This would be removed once support -->
+<!-- for hadoop-site.xml is removed. -->
<configuration>
Modified: hadoop/core/trunk/src/core/org/apache/hadoop/fs/kfs/package.html
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/fs/kfs/package.html?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/fs/kfs/package.html (original)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/fs/kfs/package.html Mon Jan 19
11:28:48 2009
@@ -32,7 +32,7 @@
<h3>Steps</h3>
<ul>
- <li>In the Hadoop conf directory edit hadoop-default.xml,
+ <li>In the Hadoop conf directory edit core-site.xml,
add the following:
<pre>
<property>
@@ -42,7 +42,7 @@
</property>
</pre>
- <li>In the Hadoop conf directory edit hadoop-site.xml,
+ <li>In the Hadoop conf directory edit core-site.xml,
adding the following (with appropriate values for
<server> and <port>):
<pre>
Modified: hadoop/core/trunk/src/core/overview.html
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/overview.html?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/core/overview.html (original)
+++ hadoop/core/trunk/src/core/overview.html Mon Jan 19 11:28:48 2009
@@ -134,13 +134,13 @@
<li>The NameNode (Distributed Filesystem master) host. This is
specified with the configuration property <tt><a
- href="../hadoop-default.html#fs.default.name">fs.default.name</a></tt>.
+ href="../core-default.html#fs.default.name">fs.default.name</a></tt>.
</li>
<li>The {...@link org.apache.hadoop.mapred.JobTracker} (MapReduce master)
host and port. This is specified with the configuration property
<tt><a
-href="../hadoop-default.html#mapred.job.tracker">mapred.job.tracker</a></tt>.
+href="../mapred-default.html#mapred.job.tracker">mapred.job.tracker</a></tt>.
</li>
<li>A <em>slaves</em> file that lists the names of all the hosts in
@@ -151,8 +151,10 @@
<h3>Pseudo-distributed configuration</h3>
You can in fact run everything on a single host. To run things this
-way, put the following in conf/hadoop-site.xml:
-
+way, put the following in:
+<br/>
+<br/>
+conf/core-site.xml:
<xmp><configuration>
<property>
@@ -160,10 +162,10 @@
<value>hdfs://localhost/</value>
</property>
- <property>
- <name>mapred.job.tracker</name>
- <value>localhost:9001</value>
- </property>
+</configuration></xmp>
+
+conf/hdfs-site.xml:
+<xmp><configuration>
<property>
<name>dfs.replication</name>
@@ -172,6 +174,16 @@
</configuration></xmp>
+conf/mapred-site.xml:
+<xmp><configuration>
+
+ <property>
+ <name>mapred.job.tracker</name>
+ <value>localhost:9001</value>
+ </property>
+
+</configuration></xmp>
+
<p>(We also set the HDFS replication level to 1 in order to
reduce warnings when running on a single node.)</p>
@@ -217,36 +229,38 @@
<h3>Fully-distributed operation</h3>
<p>Fully distributed operation is just like the pseudo-distributed operation
-described above, except, in <tt>conf/hadoop-site.xml</tt>, specify:</p>
+described above, except, specify:</p>
<ol>
<li>The hostname or IP address of your master server in the value
for <tt><a
-href="../hadoop-default.html#fs.default.name">fs.default.name</a></tt>,
- as <tt><em>hdfs://master.example.com/</em></tt>.</li>
+href="../core-default.html#fs.default.name">fs.default.name</a></tt>,
+ as <tt><em>hdfs://master.example.com/</em></tt> in
<tt>conf/core-site.xml</tt>.</li>
<li>The host and port of the your master server in the value
-of <tt><a
href="../hadoop-default.html#mapred.job.tracker">mapred.job.tracker</a></tt>
-as <tt><em>master.example.com</em>:<em>port</em></tt>.</li>
+of <tt><a
href="../mapred-default.html#mapred.job.tracker">mapred.job.tracker</a></tt>
+as <tt><em>master.example.com</em>:<em>port</em></tt> in
<tt>conf/mapred-site.xml</tt>.</li>
<li>Directories for <tt><a
-href="../hadoop-default.html#dfs.name.dir">dfs.name.dir</a></tt> and
-<tt><a href="../hadoop-default.html#dfs.data.dir">dfs.data.dir</a>.
+href="../hdfs-default.html#dfs.name.dir">dfs.name.dir</a></tt> and
+<tt><a href="../hdfs-default.html#dfs.data.dir">dfs.data.dir</a>
+in <tt>conf/hdfs-site.xml</tt>.
</tt>These are local directories used to hold distributed filesystem
data on the master node and slave nodes respectively. Note
that <tt>dfs.data.dir</tt> may contain a space- or comma-separated
list of directory names, so that data may be stored on multiple local
devices.</li>
-<li><tt><a
href="../hadoop-default.html#mapred.local.dir">mapred.local.dir</a></tt>,
- the local directory where temporary MapReduce data is stored. It
- also may be a list of directories.</li>
+<li><tt><a
href="../mapred-default.html#mapred.local.dir">mapred.local.dir</a></tt>
+ in <tt>conf/mapred-site.xml</tt>, the local directory where temporary
+ MapReduce data is stored. It also may be a list of directories.</li>
<li><tt><a
-href="../hadoop-default.html#mapred.map.tasks">mapred.map.tasks</a></tt>
+href="../mapred-default.html#mapred.map.tasks">mapred.map.tasks</a></tt>
and <tt><a
-href="../hadoop-default.html#mapred.reduce.tasks">mapred.reduce.tasks</a></tt>.
+href="../mapred-default.html#mapred.reduce.tasks">mapred.reduce.tasks</a></tt>
+in <tt>conf/mapred-site.xml</tt>.
As a rule of thumb, use 10x the
number of slave processors for <tt>mapred.map.tasks</tt>, and 2x the
number of slave processors for <tt>mapred.reduce.tasks</tt>.</li>
Modified:
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/cluster_setup.xml
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/docs/src/documentation/content/xdocs/cluster_setup.xml?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
---
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/cluster_setup.xml
(original)
+++
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/cluster_setup.xml
Mon Jan 19 11:28:48 2009
@@ -75,15 +75,20 @@
<section>
<title>Configuration Files</title>
- <p>Hadoop configuration is driven by two important configuration files
- found in the <code>conf/</code> directory of the distribution:</p>
+ <p>Hadoop configuration is driven by two types of important
+ configuration files:</p>
<ol>
<li>
- <a href="ext:hadoop-default">hadoop-default.xml</a> - Read-only
- default configuration.
+ Read-only default configuration -
+ <a href="ext:core-default">src/core/core-default.xml</a>,
+ <a href="ext:hdfs-default">src/hdfs/hdfs-default.xml</a> and
+ <a href="ext:mapred-default">src/mapred/mapred-default.xml</a>.
</li>
<li>
- <em>hadoop-site.xml</em> - Site-specific configuration.
+ Site-specific configuration -
+ <em>conf/core-site.xml</em>,
+ <em>conf/hdfs-site.xml</em> and
+ <em>conf/mapred-site.xml</em>.
</li>
</ol>
@@ -157,7 +162,9 @@
<title>Configuring the Hadoop Daemons</title>
<p>This section deals with important parameters to be specified in
the
- <code>conf/hadoop-site.xml</code> for the Hadoop cluster.</p>
+ following:
+ <br/>
+ <code>conf/core-site.xml</code>:</p>
<table>
<tr>
@@ -165,16 +172,21 @@
<th>Value</th>
<th>Notes</th>
</tr>
- <tr>
- <td>fs.default.name</td>
- <td>URI of <code>NameNode</code>.</td>
- <td><em>hdfs://hostname/</em></td>
- </tr>
<tr>
- <td>mapred.job.tracker</td>
- <td>Host or IP and port of <code>JobTracker</code>.</td>
- <td><em>host:port</em> pair.</td>
- </tr>
+ <td>fs.default.name</td>
+ <td>URI of <code>NameNode</code>.</td>
+ <td><em>hdfs://hostname/</em></td>
+ </tr>
+ </table>
+
+ <p><br/><code>conf/hdfs-site.xml</code>:</p>
+
+ <table>
+ <tr>
+ <th>Parameter</th>
+ <th>Value</th>
+ <th>Notes</th>
+ </tr>
<tr>
<td>dfs.name.dir</td>
<td>
@@ -197,6 +209,21 @@
devices.
</td>
</tr>
+ </table>
+
+ <p><br/><code>conf/mapred-site.xml</code>:</p>
+
+ <table>
+ <tr>
+ <th>Parameter</th>
+ <th>Value</th>
+ <th>Notes</th>
+ </tr>
+ <tr>
+ <td>mapred.job.tracker</td>
+ <td>Host or IP and port of <code>JobTracker</code>.</td>
+ <td><em>host:port</em> pair.</td>
+ </tr>
<tr>
<td>mapred.system.dir</td>
<td>
@@ -322,16 +349,19 @@
that is 9TB of data sorted on a cluster with 900 nodes:</p>
<table>
<tr>
+ <th>Configuration File</th>
<th>Parameter</th>
<th>Value</th>
<th>Notes</th>
</tr>
<tr>
+ <td>conf/hdfs-site.xml</td>
<td>dfs.block.size</td>
<td>134217728</td>
<td>HDFS blocksize of 128MB for large file-systems.</td>
</tr>
<tr>
+ <td>conf/hdfs-site.xml</td>
<td>dfs.namenode.handler.count</td>
<td>40</td>
<td>
@@ -340,6 +370,7 @@
</td>
</tr>
<tr>
+ <td>conf/mapred-site.xml</td>
<td>mapred.reduce.parallel.copies</td>
<td>20</td>
<td>
@@ -348,6 +379,7 @@
</td>
</tr>
<tr>
+ <td>conf/mapred-site.xml</td>
<td>mapred.child.java.opts</td>
<td>-Xmx512M</td>
<td>
@@ -355,6 +387,7 @@
</td>
</tr>
<tr>
+ <td>conf/core-site.xml</td>
<td>fs.inmemory.size.mb</td>
<td>200</td>
<td>
@@ -363,16 +396,19 @@
</td>
</tr>
<tr>
+ <td>conf/core-site.xml</td>
<td>io.sort.factor</td>
<td>100</td>
<td>More streams merged at once while sorting files.</td>
</tr>
<tr>
+ <td>conf/core-site.xml</td>
<td>io.sort.mb</td>
<td>200</td>
<td>Higher memory-limit while sorting data.</td>
</tr>
<tr>
+ <td>conf/core-site.xml</td>
<td>io.file.buffer.size</td>
<td>131072</td>
<td>Size of read/write buffer used in SequenceFiles.</td>
@@ -385,11 +421,13 @@
data sorted on 2000 nodes:</p>
<table>
<tr>
+ <th>Configuration File</th>
<th>Parameter</th>
<th>Value</th>
<th>Notes</th>
</tr>
<tr>
+ <td>conf/mapred-site.xml</td>
<td>mapred.job.tracker.handler.count</td>
<td>60</td>
<td>
@@ -398,11 +436,13 @@
</td>
</tr>
<tr>
+ <td>conf/mapred-site.xml</td>
<td>mapred.reduce.parallel.copies</td>
<td>50</td>
<td></td>
</tr>
<tr>
+ <td>conf/mapred-site.xml</td>
<td>tasktracker.http.threads</td>
<td>50</td>
<td>
@@ -412,6 +452,7 @@
</td>
</tr>
<tr>
+ <td>conf/mapred-site.xml</td>
<td>mapred.child.java.opts</td>
<td>-Xmx1024M</td>
<td>Larger heap-size for child jvms of maps/reduces.</td>
Modified:
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
---
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml
(original)
+++
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml
Mon Jan 19 11:28:48 2009
@@ -474,7 +474,7 @@
<a
href="mailto:[email protected]">core-user[at]hadoop.apache.org</a>.
</li>
<li>
- Explore <code>conf/hadoop-default.xml</code>.
+ Explore <code>src/hdfs/hdfs-default.xml</code>.
It includes brief
description of most of the configuration variables available.
</li>
Modified: hadoop/core/trunk/src/docs/src/documentation/content/xdocs/libhdfs.xml
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/docs/src/documentation/content/xdocs/libhdfs.xml?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/docs/src/documentation/content/xdocs/libhdfs.xml
(original)
+++ hadoop/core/trunk/src/docs/src/documentation/content/xdocs/libhdfs.xml Mon
Jan 19 11:28:48 2009
@@ -82,7 +82,7 @@
<section>
<title>Common problems</title>
<p>
-The most common problem is the CLASSPATH is not set properly when calling a
program that uses libhdfs. Make sure you set it to all the hadoop jars needed
to run Hadoop itself. Currently, there is no way to programmatically generate
the classpath, but a good bet is to include all the jar files in ${HADOOP_HOME}
and ${HADOOP_HOME}/lib as well as the right configuration directory containing
hadoop-site.xml
+The most common problem is the CLASSPATH is not set properly when calling a
program that uses libhdfs. Make sure you set it to all the hadoop jars needed
to run Hadoop itself. Currently, there is no way to programmatically generate
the classpath, but a good bet is to include all the jar files in ${HADOOP_HOME}
and ${HADOOP_HOME}/lib as well as the right configuration directory containing
hdfs-site.xml
</p>
</section>
<section>
Modified:
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/quickstart.xml
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/docs/src/documentation/content/xdocs/quickstart.xml?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/docs/src/documentation/content/xdocs/quickstart.xml
(original)
+++ hadoop/core/trunk/src/docs/src/documentation/content/xdocs/quickstart.xml
Mon Jan 19 11:28:48 2009
@@ -161,7 +161,9 @@
<section>
<title>Configuration</title>
- <p>Use the following <code>conf/hadoop-site.xml</code>:</p>
+ <p>Use the following:
+ <br/>
+ <code>conf/core-site.xml</code>:</p>
<table>
<tr><td><configuration></td></tr>
@@ -170,10 +172,12 @@
<tr><td> <value>hdfs://localhost:9000</value></td></tr>
<tr><td> </property></td></tr>
- <tr><td> <property></td></tr>
-
<tr><td> <name>mapred.job.tracker</name></td></tr>
-
<tr><td> <value>localhost:9001</value></td></tr>
- <tr><td> </property></td></tr>
+ <tr><td></configuration></td></tr>
+ </table>
+
+ <p><br/><code>conf/hdfs-site.xml</code>:</p>
+ <table>
+ <tr><td><configuration></td></tr>
<tr><td> <property></td></tr>
<tr><td> <name>dfs.replication</name></td></tr>
@@ -182,6 +186,18 @@
<tr><td></configuration></td></tr>
</table>
+
+ <p><br/><code>conf/mapred-site.xml</code>:</p>
+ <table>
+ <tr><td><configuration></td></tr>
+
+ <tr><td> <property></td></tr>
+
<tr><td> <name>mapred.job.tracker</name></td></tr>
+
<tr><td> <value>localhost:9001</value></td></tr>
+ <tr><td> </property></td></tr>
+
+ <tr><td></configuration></td></tr>
+ </table>
</section>
<section>
Modified: hadoop/core/trunk/src/docs/src/documentation/content/xdocs/site.xml
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/docs/src/documentation/content/xdocs/site.xml?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/docs/src/documentation/content/xdocs/site.xml
(original)
+++ hadoop/core/trunk/src/docs/src/documentation/content/xdocs/site.xml Mon Jan
19 11:28:48 2009
@@ -73,6 +73,9 @@
<wiki href="http://wiki.apache.org/hadoop/" />
<faq href="http://wiki.apache.org/hadoop/FAQ" />
<hadoop-default
href="http://hadoop.apache.org/core/docs/current/hadoop-default.html" />
+ <core-default
href="http://hadoop.apache.org/core/docs/current/core-default.html" />
+ <hdfs-default
href="http://hadoop.apache.org/core/docs/current/hdfs-default.html" />
+ <mapred-default
href="http://hadoop.apache.org/core/docs/current/mapred-default.html" />
<zlib href="http://www.zlib.net/" />
<gzip href="http://www.gzip.org/" />
<bzip href="http://www.bzip.org/" />
Modified:
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/streaming.xml
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/docs/src/documentation/content/xdocs/streaming.xml?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/docs/src/documentation/content/xdocs/streaming.xml
(original)
+++ hadoop/core/trunk/src/docs/src/documentation/content/xdocs/streaming.xml
Mon Jan 19 11:28:48 2009
@@ -223,7 +223,7 @@
</p>
<p>
For more details on the jobconf parameters see:
-<a href="ext:hadoop-default">hadoop-default.html</a></p>
+<a href="ext:mapred-default">mapred-default.html</a></p>
</section>
<section>
@@ -276,7 +276,7 @@
</source>
<p>
For more details on jobconf parameters see:
-<a href="ext:hadoop-default">hadoop-default.html</a></p>
+<a href="ext:mapred-default">mapred-default.html</a></p>
<p>
To set an environment variable in a streaming command use:
</p>
Modified: hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/InputFormat.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/InputFormat.java?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/InputFormat.java
(original)
+++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/InputFormat.java Mon
Jan 19 11:28:48 2009
@@ -48,7 +48,7 @@
* bytes, of the input files. However, the {...@link FileSystem} blocksize of
* the input files is treated as an upper bound for input splits. A lower
bound
* on the split size can be set via
- * <a href="{...@docroot}/../hadoop-default.html#mapred.min.split.size">
+ * <a href="{...@docroot}/../mapred-default.html#mapred.min.split.size">
* mapred.min.split.size</a>.</p>
*
* <p>Clearly, logical splits based on input-size is insufficient for many
Modified: hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/JobConf.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/JobConf.java?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/JobConf.java
(original)
+++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/JobConf.java Mon Jan
19 11:28:48 2009
@@ -1047,7 +1047,7 @@
* bytes, of input files. However, the {...@link FileSystem} blocksize of
the
* input files is treated as an upper bound for input splits. A lower bound
* on the split size can be set via
- * <a href="{...@docroot}/../hadoop-default.html#mapred.min.split.size">
+ * <a href="{...@docroot}/../mapred-default.html#mapred.min.split.size">
* mapred.min.split.size</a>.</p>
*
* <p>Thus, if you expect 10TB of input data and have a blocksize of 128MB,
@@ -1077,7 +1077,7 @@
*
* <p>The right number of reduces seems to be <code>0.95</code> or
* <code>1.75</code> multiplied by (<<i>no. of nodes</i>> *
- * <a
href="{...@docroot}/../hadoop-default.html#mapred.tasktracker.reduce.tasks.maximum">
+ * <a
href="{...@docroot}/../mapred-default.html#mapred.tasktracker.reduce.tasks.maximum">
* mapred.tasktracker.reduce.tasks.maximum</a>).
* </p>
*
@@ -1173,7 +1173,7 @@
* performance metrics system via the org.apache.hadoop.metrics API. The
* session identifier is intended, in particular, for use by
Hadoop-On-Demand
* (HOD) which allocates a virtual Hadoop cluster dynamically and
transiently.
- * HOD will set the session identifier by modifying the hadoop-site.xml file
+ * HOD will set the session identifier by modifying the mapred-site.xml file
* before starting the cluster.
*
* When not running under HOD, this identifer is expected to remain set to
Modified: hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Mapper.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Mapper.java?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Mapper.java (original)
+++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Mapper.java Mon Jan
19 11:28:48 2009
@@ -145,7 +145,7 @@
* takes an insignificant amount of time to process individual key/value
* pairs, this is crucial since the framework might assume that the task has
* timed-out and kill that task. The other way of avoiding this is to set
- * <a href="{...@docroot}/../hadoop-default.html#mapred.task.timeout">
+ * <a href="{...@docroot}/../mapred-default.html#mapred.task.timeout">
* mapred.task.timeout</a> to a high-enough value (or even zero for no
* time-outs).</p>
*
Modified: hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Reducer.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Reducer.java?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Reducer.java
(original)
+++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/Reducer.java Mon Jan
19 11:28:48 2009
@@ -185,7 +185,7 @@
* takes an insignificant amount of time to process individual key/value
* pairs, this is crucial since the framework might assume that the task has
* timed-out and kill that task. The other way of avoiding this is to set
- * <a href="{...@docroot}/../hadoop-default.html#mapred.task.timeout">
+ * <a href="{...@docroot}/../mapred-default.html#mapred.task.timeout">
* mapred.task.timeout</a> to a high-enough value (or even zero for no
* time-outs).</p>
*
Modified:
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapreduce/InputFormat.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapreduce/InputFormat.java?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapreduce/InputFormat.java
(original)
+++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapreduce/InputFormat.java
Mon Jan 19 11:28:48 2009
@@ -50,7 +50,7 @@
* bytes, of the input files. However, the {...@link FileSystem} blocksize of
* the input files is treated as an upper bound for input splits. A lower
bound
* on the split size can be set via
- * <a href="{...@docroot}/../hadoop-default.html#mapred.min.split.size">
+ * <a href="{...@docroot}/../mapred-default.html#mapred.min.split.size">
* mapred.min.split.size</a>.</p>
*
* <p>Clearly, logical splits based on input-size is insufficient for many
Modified: hadoop/core/trunk/src/test/hadoop-site.xml
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/hadoop-site.xml?rev=735777&r1=735776&r2=735777&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/hadoop-site.xml (original)
+++ hadoop/core/trunk/src/test/hadoop-site.xml Mon Jan 19 11:28:48 2009
@@ -1,9 +1,11 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
-<!-- Values used when running unit tests. This is mostly empty, to -->
-<!-- use of the default values, overriding the potentially -->
-<!-- user-editted hadoop-site.xml in the conf/ directory. -->
+<!-- DO NOT PUT ANY PROPERTY IN THIS FILE. INSTEAD USE -->
+<!-- core-site.xml, mapred-site.xml OR hdfs-site.xml -->
+<!-- This empty script is to avoid picking properties from -->
+<!-- conf/hadoop-site.xml This would be removed once support -->
+<!-- for hadoop-site.xml is removed. -->
<configuration>