Author: zshao
Date: Wed Aug 6 13:20:31 2008
New Revision: 683392
URL: http://svn.apache.org/viewvc?rev=683392&view=rev
Log:
HADOOP-3805. Improve fuse-dfs write performance.
(Pete Wyckoff via zshao)
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/contrib/fuse-dfs/README
hadoop/core/trunk/src/contrib/fuse-dfs/build.xml
hadoop/core/trunk/src/contrib/fuse-dfs/src/Makefile.am
hadoop/core/trunk/src/contrib/fuse-dfs/src/fuse_dfs.c
hadoop/core/trunk/src/contrib/fuse-dfs/src/test/TestFuseDFS.java
Modified: hadoop/core/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=683392&r1=683391&r2=683392&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Wed Aug 6 13:20:31 2008
@@ -221,6 +221,9 @@
HADOOP-3836. Fix TestMultipleOutputs to correctly clean up. (Alejandro
Abdelnur via acmurthy)
+ HADOOP-3805. Improve fuse-dfs write performance.
+ (Pete Wyckoff via zshao)
+
Release 0.18.0 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/core/trunk/src/contrib/fuse-dfs/README
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/fuse-dfs/README?rev=683392&r1=683391&r2=683392&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/fuse-dfs/README (original)
+++ hadoop/core/trunk/src/contrib/fuse-dfs/README Wed Aug 6 13:20:31 2008
@@ -89,6 +89,7 @@
-ousetrash (should fuse dfs throw things in /Trash when deleting them)
-onotrash (opposite of usetrash)
-odebug (do not daemonize - aka -d in fuse speak)
+-obig_writes (use fuse big_writes option so as to allow better performance of
writes on kernels >= 2.6.26)
The defaults are:
@@ -124,7 +125,7 @@
1. From /bin, ln -s $HADOOP_HOME/contrib/fuse-dfs/fuse_dfs* .
2. Always start with debug on so you can see if you are missing a classpath or
something like that.
-
+3. use -obig_writes
--------------------------------------------------------------------------------
@@ -132,7 +133,7 @@
1. if you alias ls to ls --color=auto and try listing a directory with lots
(over thousands) of files, expect it to be slow and at 10s of thousands, expect
it to be very very slow. This is because --color=auto causes ls to stat every
file in the directory. Since fuse-dfs does not cache attribute entries when
doing a readdir, this is very slow. see
https://issues.apache.org/jira/browse/HADOOP-3797
-2. Writes are approximately 33% slower than the DFSClient. TBD how to optimize
this. see: https://issues.apache.org/jira/browse/HADOOP-3805
+2. Writes are approximately 33% slower than the DFSClient. TBD how to optimize
this. see: https://issues.apache.org/jira/browse/HADOOP-3805 - try using
-obig_writes and if on a >2.6.26 kernel, should perform much better since
bigger writes implies less context switching.
3. Reads are ~20-30% slower even with the read buffering.
Modified: hadoop/core/trunk/src/contrib/fuse-dfs/build.xml
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/fuse-dfs/build.xml?rev=683392&r1=683391&r2=683392&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/fuse-dfs/build.xml (original)
+++ hadoop/core/trunk/src/contrib/fuse-dfs/build.xml Wed Aug 6 13:20:31 2008
@@ -72,7 +72,7 @@
</exec>
</target>
- <target name="test" if="libhdfs-fuse">
+ <target name="test" if="fusedfs">
<echo message="testing FuseDFS ..."/>
<antcall target="hadoopbuildcontrib.test">
</antcall>
Modified: hadoop/core/trunk/src/contrib/fuse-dfs/src/Makefile.am
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/fuse-dfs/src/Makefile.am?rev=683392&r1=683391&r2=683392&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/fuse-dfs/src/Makefile.am (original)
+++ hadoop/core/trunk/src/contrib/fuse-dfs/src/Makefile.am Wed Aug 6 13:20:31
2008
@@ -15,6 +15,7 @@
#
bin_PROGRAMS = fuse_dfs
fuse_dfs_SOURCES = fuse_dfs.c
-AM_CPPFLAGS= -D_FILE_OFFSET_BITS=64 -I$(JAVA_HOME)/include
-I$(HADOOP_HOME)/src/c++/libhdfs/ -I$(JAVA_HOME)/include/linux/
-D_FUSE_DFS_VERSION=\"$(PACKAGE_VERSION)\"
-DPROTECTED_PATHS=\"$(PROTECTED_PATHS)\"
+AM_CPPFLAGS= -D_FILE_OFFSET_BITS=64 -I$(JAVA_HOME)/include
-I$(HADOOP_HOME)/src/c++/libhdfs/ -I$(JAVA_HOME)/include/linux/
-I$(FUSE_HOME)/include
+AM_CPPFLAGS+= -D_FUSE_DFS_VERSION=\"$(PACKAGE_VERSION)\"
AM_LDFLAGS= -L$(HADOOP_HOME)/build/libhdfs -lhdfs -L$(FUSE_HOME)/lib -lfuse
-L$(JAVA_HOME)/jre/lib/$(OS_ARCH)/server -ljvm
Modified: hadoop/core/trunk/src/contrib/fuse-dfs/src/fuse_dfs.c
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/fuse-dfs/src/fuse_dfs.c?rev=683392&r1=683391&r2=683392&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/fuse-dfs/src/fuse_dfs.c (original)
+++ hadoop/core/trunk/src/contrib/fuse-dfs/src/fuse_dfs.c Wed Aug 6 13:20:31
2008
@@ -101,7 +101,7 @@
/** keys for FUSE_OPT_ options */
static void print_usage(const char *pname)
{
- fprintf(stdout,"USAGE: %s [debug] [--help] [--version]
[protected=<colon_seped_list_of_paths] [rw] [notrash] [usetrash] [private
(single user)] [ro] server=<hadoop_servername> port=<hadoop_port>
[entry_timeout=<secs>] [attribute_timeout=<secs>] <mntpoint> [fuse
options]\n",pname);
+ fprintf(stdout,"USAGE: %s [debug] [--help] [--version]
[-oprotected=<colon_seped_list_of_paths] [rw] [-onotrash] [-ousetrash]
[-obig_writes] [-oprivate (single user)] [ro] [-oserver=<hadoop_servername>]
[-oport=<hadoop_port>] [-oentry_timeout=<secs>] [-oattribute_timeout=<secs>]
<mntpoint> [fuse options]\n",pname);
fprintf(stdout,"NOTE: debugging option for fuse is -debug\n");
}
@@ -115,6 +115,7 @@
KEY_RO,
KEY_RW,
KEY_PRIVATE,
+ KEY_BIGWRITES,
KEY_DEBUG,
};
@@ -130,6 +131,7 @@
FUSE_OPT_KEY("private", KEY_PRIVATE),
FUSE_OPT_KEY("ro", KEY_RO),
FUSE_OPT_KEY("debug", KEY_DEBUG),
+ FUSE_OPT_KEY("big_writes", KEY_BIGWRITES),
FUSE_OPT_KEY("rw", KEY_RW),
FUSE_OPT_KEY("usetrash", KEY_USETRASH),
FUSE_OPT_KEY("notrash", KEY_NOTRASH),
@@ -175,6 +177,11 @@
fuse_opt_add_arg(outargs, "-d");
options.debug = 1;
break;
+ case KEY_BIGWRITES:
+#ifdef FUSE_CAP_BIG_WRITES
+ fuse_opt_add_arg(outargs, "-obig_writes");
+#endif
+ break;
default: {
// try and see if the arg is a URI for DFS
int tmp_port;
Modified: hadoop/core/trunk/src/contrib/fuse-dfs/src/test/TestFuseDFS.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/fuse-dfs/src/test/TestFuseDFS.java?rev=683392&r1=683391&r2=683392&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/fuse-dfs/src/test/TestFuseDFS.java (original)
+++ hadoop/core/trunk/src/contrib/fuse-dfs/src/test/TestFuseDFS.java Wed Aug 6
13:20:31 2008
@@ -57,7 +57,7 @@
String lp = System.getProperty("LD_LIBRARY_PATH") + ":" +
"/usr/local/lib:" + libhdfs + ":" + jvm;
System.err.println("LD_LIBRARY_PATH=" + lp);
String cmd[] = { fuse_cmd, "dfs://" + dfs.getHost() + ":" +
String.valueOf(dfs.getPort()),
- mountpoint, "-odebug", "-oentry_timeout=1",
"-oattribute_timeout=1", "-ousetrash", "rw" };
+ mountpoint, "-obig_writes", "-odebug",
"-oentry_timeout=1", "-oattribute_timeout=1", "-ousetrash", "rw" };
final String [] envp = {
"CLASSPATH="+ cp,
"LD_LIBRARY_PATH=" + lp,