Hi all, Here's my attempt at a JNI GNU/Posix path canonicalizer. If you missed my previous mail, this is required in order for FilePermission checks to work (PR classpath/24895). Classpath's canonicalizer doesn't handle symbolic links, whereas GCJ's does, just not very well.
This patch makes Classpath do the right thing on GNU/Posix systems. I haven't committed it as it almost certainly breaks builds on Windows, and I need some help to get stuff building conditionally. After that I can have a go at porting the Windows canonicalizer from GCJ (I don't think Classpath's current one does much on Windows). Thanks, Gary
Index: java/io/File.java =================================================================== RCS file: /cvsroot/classpath/classpath/java/io/File.java,v retrieving revision 1.61 diff -u -r1.61 File.java --- java/io/File.java 17 Dec 2005 21:16:23 -0000 1.61 +++ java/io/File.java 29 Mar 2006 08:40:39 -0000 @@ -484,9 +484,9 @@ /** * This method returns a canonical representation of the pathname of * this file. The actual form of the canonical representation is - * different. On the GNU system, the canonical form differs from the - * absolute form in that all relative file references to "." and ".." - * are resolved and removed. + * system-dependent. On the GNU system, conversion to canonical + * form involves the removal of redundant separators, references to + * "." and "..", and symbolic links. * <p> * Note that this method, unlike the other methods which return path * names, can throw an IOException. This is because native method Index: vm/reference/java/io/VMFile.java =================================================================== RCS file: /cvsroot/classpath/classpath/vm/reference/java/io/VMFile.java,v retrieving revision 1.7 diff -u -r1.7 VMFile.java --- vm/reference/java/io/VMFile.java 2 Jul 2005 20:33:08 -0000 1.7 +++ vm/reference/java/io/VMFile.java 29 Mar 2006 08:40:39 -0000 @@ -210,10 +210,10 @@ /** * This method returns a canonical representation of the pathname of - * the given path. The actual form of the canonical representation is - * different. On the GNU system, the canonical form differs from the - * absolute form in that all relative file references to "." and ".." - * are resolved and removed. + * this file. The actual form of the canonical representation is + * system-dependent. On the GNU system, conversion to canonical + * form involves the removal of redundant separators, references to + * "." and "..", and symbolic links. * <p> * Note that this method, unlike the other methods which return path * names, can throw an IOException. This is because native method @@ -221,9 +221,5 @@ * * @exception IOException If an error occurs */ - public static String toCanonicalForm(String path) throws IOException - { - // FIXME: this only works on UNIX - return PlatformHelper.toCanonicalForm(path); - } + public static native String toCanonicalForm(String path) throws IOException; } Index: include/java_io_VMFile.h =================================================================== RCS file: /cvsroot/classpath/classpath/include/java_io_VMFile.h,v retrieving revision 1.3 diff -u -r1.3 java_io_VMFile.h --- include/java_io_VMFile.h 11 Nov 2004 17:31:31 -0000 1.3 +++ include/java_io_VMFile.h 29 Mar 2006 08:40:39 -0000 @@ -24,6 +24,7 @@ JNIEXPORT jboolean JNICALL Java_java_io_VMFile_canWrite (JNIEnv *env, jclass, jstring); JNIEXPORT jboolean JNICALL Java_java_io_VMFile_canRead (JNIEnv *env, jclass, jstring); JNIEXPORT jboolean JNICALL Java_java_io_VMFile_isDirectory (JNIEnv *env, jclass, jstring); +JNIEXPORT jstring JNICALL Java_java_io_VMFile_toCanonicalForm (JNIEnv *env,jclass, jstring); #undef java_io_VMFile_IS_CASE_SENSITIVE #define java_io_VMFile_IS_CASE_SENSITIVE 1L #undef java_io_VMFile_IS_DOS_8_3 Index: native/jni/java-io/java_io_VMFile.c =================================================================== RCS file: /cvsroot/classpath/classpath/native/jni/java-io/java_io_VMFile.c,v retrieving revision 1.10 diff -u -r1.10 java_io_VMFile.c --- native/jni/java-io/java_io_VMFile.c 25 Jan 2006 10:40:12 -0000 1.10 +++ native/jni/java-io/java_io_VMFile.c 29 Mar 2006 08:40:39 -0000 @@ -1,5 +1,5 @@ /* java_io_VMFile.c - Native methods for java.io.File class - Copyright (C) 1998, 2004 Free Software Foundation, Inc. + Copyright (C) 1998, 2004, 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -41,6 +41,8 @@ #include <stdio.h> #include <stdlib.h> +#include <limits.h> + #include <jni.h> #include <jcl.h> @@ -730,3 +732,173 @@ return (0); #endif /* not WITHOUT_FILESYSTEM */ } + +/*************************************************************************/ + +/* + * This method converts a path to canonical form on GNU/Posix systems. + * + * Class: java_io_VMFile + * Method: toCanonicalForm + * Signature: (Ljava/lang/String)Ljava/lang/String + */ + +JNIEXPORT jstring JNICALL +Java_java_io_VMFile_toCanonicalForm (JNIEnv *env, + jclass class __attribute__ ((__unused__)), + jstring jpath) +{ +#ifndef WITHOUT_FILESYSTEM + const char *path; + char *src, *dst, *tmp; + int srci, dsti, tmpi; + int len, dsti_save; + int fschecks = 1; + struct stat sb; + + path = JCL_jstring_to_cstring(env, jpath); + if (path == NULL) + return NULL; + + /* It is the caller's responsibility to ensure the path is absolute. */ + if (path[0] == 0 || path[0] != '/') + { + JCL_free_cstring(env, jpath, path); + JCL_ThrowException(env, "java/lang/RuntimeException", "Not absolute"); + return NULL; + } + + len = strlen(path); + if (len >= PATH_MAX) + { + JCL_free_cstring(env, jpath, path); + JCL_ThrowException(env, "java/io/IOException", "Path too long"); + return NULL; + } + src = JCL_malloc(env, PATH_MAX * 3); + if (src == NULL) + { + JCL_free_cstring(env, jpath, path); + return NULL; + } + dst = src + PATH_MAX; + tmp = dst + PATH_MAX; + + strcpy(src, path); + JCL_free_cstring(env, jpath, path); + + dst[0] = '/'; + dst[1] = '\0'; + dsti = 1; + + srci = 1; + + while (src[srci] != '\0') + { + /* Skip slashes. */ + while (src[srci] == '/') + srci++; + tmpi = srci; + /* Find next slash. */ + while (src[srci] != '/' && src[srci] != '\0') + srci++; + if (srci == tmpi) + /* We hit the end. */ + break; + len = srci - tmpi; + + /* Handle "." and "..". */ + if (len == 1 && src[tmpi] == '.') + continue; + if (len == 2 && src[tmpi] == '.' && src[tmpi + 1] == '.') + { + if (dsti == 1) + { + /* Unlike other JVMs we do not rewind past the root + directory. I can't see any legitimate reason why you + would want this, and chopping off bits of path seems + like a sure-fire way to introduce vulnerabilities. */ + JCL_free(env, src); + JCL_ThrowException(env, "java/io/IOException", + "Too many up-level references"); + + return NULL; + } + while (dsti > 1 && dst[dsti - 1] != '/') + dsti--; + if (dsti != 1) + dsti--; + /* Reenable filesystem checking if disabled, as we might + have reversed over whatever caused the problem before. + At least one proprietary JVM has inconsistencies because + it does not do this. */ + fschecks = 1; + continue; + } + + /* Handle real path components. */ + if (dsti + len + 1 >= PATH_MAX) + { + JCL_free(env, src); + JCL_ThrowException(env, "java/io/IOException", "Path too long"); + return NULL; + } + dsti_save = dsti; + if (dsti > 1) + dst[dsti++] = '/'; + strncpy(&dst[dsti], &src[tmpi], len); + dsti += len; + if (fschecks == 0) + continue; + + dst[dsti] = '\0'; + if (lstat(dst, &sb) == 0) + { + if (S_ISLNK(sb.st_mode)) + { + tmpi = readlink(dst, tmp, PATH_MAX); + if (tmpi < 1 || tmpi == PATH_MAX) + { + JCL_free(env, src); + JCL_ThrowException(env, "java/io/IOException", + "Path too long"); + return NULL; + } + + /* Prepend the link's path to src. */ + if (tmpi + strlen(&src[srci]) >= PATH_MAX) + { + JCL_free(env, src); + JCL_ThrowException(env, "java/io/IOException", + "Path too long"); + return NULL; + } + while (src[srci] != '\0') + tmp[tmpi++] = src[srci++]; + tmp[tmpi] = '\0'; + strcpy(src, tmp); + srci = 0; + + /* Either replace or append dst depending on whether the + link is relative or absolute. */ + dsti = tmp[0] == '/' ? 1 : dsti_save; + } + } + else + { + /* Something doesn't exist, or we don't have permission to + read it, or a previous path component is a directory, or + a symlink is looped. Whatever, we can't check the + filesystem any more. */ + fschecks = 0; + } + } + dst[dsti] = '\0'; + + path = (*env)->NewStringUTF (env, dst); + JCL_free(env, src); + return path; +#else /* not WITHOUT_FILESYSTEM */ + return NULL; +#endif /* not WITHOUT_FILESYSTEM */ +} Index: gnu/java/io/PlatformHelper.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/java/io/PlatformHelper.java,v retrieving revision 1.6 diff -u -r1.6 PlatformHelper.java --- gnu/java/io/PlatformHelper.java 14 Nov 2005 13:08:11 -0000 1.6 +++ gnu/java/io/PlatformHelper.java 29 Mar 2006 08:40:39 -0000 @@ -97,98 +97,6 @@ } /** - * This routine canonicalizes input param "path" to formal path representation - * for current platform, including interpreting ".." and "." . - */ - public static final String toCanonicalForm(String path) - { - /*?? - if(path.indexOf('.') < 0 && path.indexOf("..") < 0) - return path; - */ - String tmppath = path.replace('/', separatorChar); - StringBuffer canonpath; - - int i; - - if ((i = beginWithRootPathPrefix(tmppath)) == 0 ) - return path; - - /* The original - "canonpath = new StringBuffer(tmppath.substring(0, i))" - isn't very efficient because StringBuffer's - ensureCapacity_unsynchronized will fail definitely each time - and will enlarge buffer and copy contents. . - */ - canonpath = new StringBuffer(INITIAL_MAX_PATH); - canonpath.append(tmppath.substring(0, i)); - tmppath = tmppath.substring(i); - // pathdepth==0 indicates there're only root path in the buffer - int pathdepth = 0; - - StringTokenizer st = new StringTokenizer(tmppath, separator); - - // Traverse each element of the path, handling "." and ".." - // Should handle "~" too? - if (st.hasMoreTokens()) - do - { - String s = st.nextToken(); - - // Handle "." or an empty element. - if (s.equals(".") || s.equals("")) - continue; - - // Handle ".." by deleting the last element from the path - if (s.equals("..")) - { - if (pathdepth == 0) - continue; - - // Strip of trailing separator - canonpath.setLength(canonpath.length() - 1/*separator.length()*/); - String tmpstr = canonpath.toString(); - int idx = tmpstr.lastIndexOf(separator); - - if ((idx == -1) || ((idx + 1/*separator.length()*/) > tmpstr.length())) - //throw new IOException("Can't happen error"); - return path; // Shouldn't happen - - canonpath.setLength(idx + 1/*separator.length()*/); - pathdepth--; - continue; - } - - canonpath.append(s); - pathdepth++; //now it's more than root path - - if (st.hasMoreTokens()) - canonpath.append(separator); - } - while (st.hasMoreTokens()); - - if (endWithSeparator(path)) - canonpath.append(separator); - - String tmpstr = canonpath.toString(); - //if (pathdepth > 0 && endWithSeparator(tmpstr) ) - // tmpstr = tmpstr.substring(0, tmpstr.length() - 1/*separator.length()*/); - - return tmpstr; - } - - /** - * This routine canonicalizes input param "path" to formal path representation - * for current platform, and normalize all separators to "sepchar". - */ - public static final String toCanonicalForm(String path, char sepchar) - { - String tmpstr = toCanonicalForm(path); - tmpstr = tmpstr.replace(separatorChar, sepchar); - return tmpstr; - } - - /** * This routine checks whether input param "path" ends with separator */ public static final boolean endWithSeparator(String path)