Author: dhruba Date: Thu Dec 13 15:08:50 2007 New Revision: 604059 URL: http://svn.apache.org/viewvc?rev=604059&view=rev Log: HADOOP-2158. hdfsListDirectory calls FileSystem.listStatus instead of FileSystem.listPaths. This reduces the number of RPC calls on the namenode, thereby improving scalability. (Christian Kunz via dhruba)
svn merge -c 603000 from trunk. Modified: lucene/hadoop/branches/branch-0.15/CHANGES.txt lucene/hadoop/branches/branch-0.15/src/c++/libhdfs/hdfs.c Modified: lucene/hadoop/branches/branch-0.15/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/CHANGES.txt?rev=604059&r1=604058&r2=604059&view=diff ============================================================================== --- lucene/hadoop/branches/branch-0.15/CHANGES.txt (original) +++ lucene/hadoop/branches/branch-0.15/CHANGES.txt Thu Dec 13 15:08:50 2007 @@ -24,6 +24,9 @@ HADOOP-2382. Add hadoop-default.html to subversion. (cutting) + HADOOP-2158. hdfsListDirectory calls FileSystem.listStatus instead + of FileSystem.listPaths. This reduces the number of RPC calls on the + namenode, thereby improving scalability. (Christian Kunz via dhruba) Release 0.15.1 - 2007-11-27 Modified: lucene/hadoop/branches/branch-0.15/src/c++/libhdfs/hdfs.c URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/src/c%2B%2B/libhdfs/hdfs.c?rev=604059&r1=604058&r2=604059&view=diff ============================================================================== --- lucene/hadoop/branches/branch-0.15/src/c++/libhdfs/hdfs.c (original) +++ lucene/hadoop/branches/branch-0.15/src/c++/libhdfs/hdfs.c Thu Dec 13 15:08:50 2007 @@ -1316,41 +1316,9 @@ static int -getFileInfo(JNIEnv *env, jobject jFS, jobject jPath, hdfsFileInfo *fileInfo) +getFileInfoFromStat(JNIEnv *env, jobject jStat, hdfsFileInfo *fileInfo) { - // JAVA EQUIVALENT: - // fs.isDirectory(f) - // fs.lastModified() ?? - // fs.getLength(f) - // f.getPath() - - jobject jStat; - jvalue jVal; - - if (invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS, - "exists", JMETHOD1(JPARAM(HADOOP_PATH), "Z"), - jPath) != 0) { - fprintf(stderr, "Call to org.apache.hadoop.fs." - "FileSystem::exists failed!\n"); - errno = EINTERNAL; - return -1; - } - - if (jVal.z == 0) { - errno = ENOENT; - return -1; - } - - if (invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS, - "getFileStatus", JMETHOD1(JPARAM(HADOOP_PATH), JPARAM(HADOOP_STAT)), - jPath) != 0) { - fprintf(stderr, "Call to org.apache.hadoop.fs." - "FileSystem::getFileStatus failed!\n"); - errno = EINTERNAL; - return -1; - } - jStat = jVal.l; - + jvalue jVal; if (invokeMethod(env, &jVal, INSTANCE, jStat, HADOOP_STAT, "isDir", "()Z") != 0) { fprintf(stderr, "Call to org.apache.hadoop.fs." @@ -1398,6 +1366,17 @@ fileInfo->mSize = jVal.j; } + jobject jPath; + if (invokeMethod(env, &jVal, INSTANCE, jStat, HADOOP_STAT, + "getPath", "()Lorg/apache/hadoop/fs/Path;") || + jVal.l == NULL) { + fprintf(stderr, "Call to org.apache.hadoop.fs." + "FileStatus::getPath failed!\n"); + errno = EINTERNAL; + return -1; + } + jPath = jVal.l; + jstring jPathName; const char *cPathName; if (invokeMethod(env, &jVal, INSTANCE, jPath, HADOOP_PATH, @@ -1405,17 +1384,59 @@ fprintf(stderr, "Call to org.apache.hadoop.fs." "Path::toString failed!\n"); errno = EINTERNAL; + destroyLocalReference(env, jPath); return -1; } jPathName = jVal.l; cPathName = (const char*) ((*env)->GetStringUTFChars(env, jPathName, NULL)); fileInfo->mName = strdup(cPathName); (*env)->ReleaseStringUTFChars(env, jPathName, cPathName); + destroyLocalReference(env, jPath); destroyLocalReference(env, jPathName); return 0; } +static int +getFileInfo(JNIEnv *env, jobject jFS, jobject jPath, hdfsFileInfo *fileInfo) +{ + // JAVA EQUIVALENT: + // fs.isDirectory(f) + // fs.lastModified() ?? + // fs.getLength(f) + // f.getPath() + + jobject jStat; + jvalue jVal; + + if (invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS, + "exists", JMETHOD1(JPARAM(HADOOP_PATH), "Z"), + jPath) != 0) { + fprintf(stderr, "Call to org.apache.hadoop.fs." + "FileSystem::exists failed!\n"); + errno = EINTERNAL; + return -1; + } + + if (jVal.z == 0) { + errno = ENOENT; + return -1; + } + + if (invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS, + "getFileStatus", JMETHOD1(JPARAM(HADOOP_PATH), JPARAM(HADOOP_STAT)), + jPath) != 0) { + fprintf(stderr, "Call to org.apache.hadoop.fs." + "FileSystem::getFileStatus failed!\n"); + errno = EINTERNAL; + return -1; + } + jStat = jVal.l; + int ret = getFileInfoFromStat(env, jStat, fileInfo); + destroyLocalReference(env, jStat); + return ret; +} + hdfsFileInfo* hdfsListDirectory(hdfsFS fs, const char* path, int *numEntries) @@ -1441,11 +1462,11 @@ jobjectArray jPathList = NULL; jvalue jVal; - if (invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS, "listPaths", - JMETHOD1(JPARAM(HADOOP_PATH), JARRPARAM(HADOOP_PATH)), + if (invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_DFS, "listStatus", + JMETHOD1(JPARAM(HADOOP_PATH), JARRPARAM(HADOOP_STAT)), jPath) != 0) { fprintf(stderr, "Call to org.apache.hadoop.fs." - "FileSystem::listPaths failed!\n"); + "FileSystem::listStatus failed!\n"); errno = EINTERNAL; destroyLocalReference(env, jPath); return NULL; @@ -1469,17 +1490,17 @@ //Save path information in pathList jsize i; - jobject tmpPath; + jobject tmpStat; for (i=0; i < jPathListSize; ++i) { - tmpPath = (*env)->GetObjectArrayElement(env, jPathList, i); - if (getFileInfo(env, jFS, tmpPath, &pathList[i])) { + tmpStat = (*env)->GetObjectArrayElement(env, jPathList, i); + if (getFileInfoFromStat(env, tmpStat, &pathList[i])) { errno = EINTERNAL; hdfsFreeFileInfo(pathList, jPathListSize); - destroyLocalReference(env, tmpPath); + destroyLocalReference(env, tmpStat); pathList = NULL; goto done; } - destroyLocalReference(env, tmpPath); + destroyLocalReference(env, tmpStat); } done: