Hi all,

Here's my attempt at a JNI GNU/Posix path canonicalizer.  If you
missed my previous mail, this is required in order for FilePermission
checks to work (PR classpath/24895).  Classpath's canonicalizer
doesn't handle symbolic links, whereas GCJ's does, just not very well.

This patch makes Classpath do the right thing on GNU/Posix systems.
I haven't committed it as it almost certainly breaks builds on
Windows, and I need some help to get stuff building conditionally.
After that I can have a go at porting the Windows canonicalizer from
GCJ (I don't think Classpath's current one does much on Windows).

Thanks,
Gary
Index: java/io/File.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/io/File.java,v
retrieving revision 1.61
diff -u -r1.61 File.java
--- java/io/File.java   17 Dec 2005 21:16:23 -0000      1.61
+++ java/io/File.java   29 Mar 2006 08:40:39 -0000
@@ -484,9 +484,9 @@
   /**
    * This method returns a canonical representation of the pathname of
    * this file.  The actual form of the canonical representation is
-   * different.  On the GNU system, the canonical form differs from the
-   * absolute form in that all relative file references to "." and ".."
-   * are resolved and removed.
+   * system-dependent.  On the GNU system, conversion to canonical
+   * form involves the removal of redundant separators, references to
+   * "." and "..", and symbolic links.
    * <p>
    * Note that this method, unlike the other methods which return path
    * names, can throw an IOException.  This is because native method 
Index: vm/reference/java/io/VMFile.java
===================================================================
RCS file: /cvsroot/classpath/classpath/vm/reference/java/io/VMFile.java,v
retrieving revision 1.7
diff -u -r1.7 VMFile.java
--- vm/reference/java/io/VMFile.java    2 Jul 2005 20:33:08 -0000       1.7
+++ vm/reference/java/io/VMFile.java    29 Mar 2006 08:40:39 -0000
@@ -210,10 +210,10 @@
 
   /**
    * This method returns a canonical representation of the pathname of
-   * the given path.  The actual form of the canonical representation is
-   * different.  On the GNU system, the canonical form differs from the
-   * absolute form in that all relative file references to "." and ".."
-   * are resolved and removed.
+   * this file.  The actual form of the canonical representation is
+   * system-dependent.  On the GNU system, conversion to canonical
+   * form involves the removal of redundant separators, references to
+   * "." and "..", and symbolic links.
    * <p>
    * Note that this method, unlike the other methods which return path
    * names, can throw an IOException.  This is because native method 
@@ -221,9 +221,5 @@
    *
    * @exception IOException If an error occurs
    */
-  public static String toCanonicalForm(String path) throws IOException
-  {
-       // FIXME: this only works on UNIX
-       return PlatformHelper.toCanonicalForm(path);
-  }
+  public static native String toCanonicalForm(String path) throws IOException;
 }
Index: include/java_io_VMFile.h
===================================================================
RCS file: /cvsroot/classpath/classpath/include/java_io_VMFile.h,v
retrieving revision 1.3
diff -u -r1.3 java_io_VMFile.h
--- include/java_io_VMFile.h    11 Nov 2004 17:31:31 -0000      1.3
+++ include/java_io_VMFile.h    29 Mar 2006 08:40:39 -0000
@@ -24,6 +24,7 @@
 JNIEXPORT jboolean JNICALL Java_java_io_VMFile_canWrite (JNIEnv *env, jclass, 
jstring);
 JNIEXPORT jboolean JNICALL Java_java_io_VMFile_canRead (JNIEnv *env, jclass, 
jstring);
 JNIEXPORT jboolean JNICALL Java_java_io_VMFile_isDirectory (JNIEnv *env, 
jclass, jstring);
+JNIEXPORT jstring JNICALL Java_java_io_VMFile_toCanonicalForm (JNIEnv 
*env,jclass, jstring);
 #undef java_io_VMFile_IS_CASE_SENSITIVE
 #define java_io_VMFile_IS_CASE_SENSITIVE 1L
 #undef java_io_VMFile_IS_DOS_8_3
Index: native/jni/java-io/java_io_VMFile.c
===================================================================
RCS file: /cvsroot/classpath/classpath/native/jni/java-io/java_io_VMFile.c,v
retrieving revision 1.10
diff -u -r1.10 java_io_VMFile.c
--- native/jni/java-io/java_io_VMFile.c 25 Jan 2006 10:40:12 -0000      1.10
+++ native/jni/java-io/java_io_VMFile.c 29 Mar 2006 08:40:39 -0000
@@ -1,5 +1,5 @@
 /* java_io_VMFile.c - Native methods for java.io.File class
-   Copyright (C) 1998, 2004 Free Software Foundation, Inc.
+   Copyright (C) 1998, 2004, 2006 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -41,6 +41,8 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+#include <limits.h>
+
 #include <jni.h>
 #include <jcl.h>
 
@@ -730,3 +732,173 @@
   return (0);
 #endif /* not WITHOUT_FILESYSTEM */
 }
+
+/*************************************************************************/
+
+/*
+ * This method converts a path to canonical form on GNU/Posix systems.
+ *
+ * Class:     java_io_VMFile
+ * Method:    toCanonicalForm
+ * Signature: (Ljava/lang/String)Ljava/lang/String
+ */
+
+JNIEXPORT jstring JNICALL
+Java_java_io_VMFile_toCanonicalForm (JNIEnv *env,
+                                    jclass class __attribute__ ((__unused__)),
+                                    jstring jpath)
+{
+#ifndef WITHOUT_FILESYSTEM
+  const char *path;
+  char *src, *dst, *tmp;
+  int srci, dsti, tmpi;
+  int len, dsti_save;
+  int fschecks = 1;
+  struct stat sb;
+
+  path = JCL_jstring_to_cstring(env, jpath);
+  if (path == NULL)
+    return NULL;
+
+  /* It is the caller's responsibility to ensure the path is absolute. */
+  if (path[0] == 0 || path[0] != '/')
+    {
+      JCL_free_cstring(env, jpath, path);
+      JCL_ThrowException(env, "java/lang/RuntimeException", "Not absolute");
+      return NULL;
+    }
+
+  len = strlen(path);
+  if (len >= PATH_MAX)
+    {
+      JCL_free_cstring(env, jpath, path);
+      JCL_ThrowException(env, "java/io/IOException", "Path too long");
+      return NULL;
+    }
+  src = JCL_malloc(env, PATH_MAX * 3);
+  if (src == NULL)
+    {
+      JCL_free_cstring(env, jpath, path);
+      return NULL;
+    }
+  dst = src + PATH_MAX;
+  tmp = dst + PATH_MAX;
+
+  strcpy(src, path);
+  JCL_free_cstring(env, jpath, path);
+
+  dst[0] = '/';
+  dst[1] = '\0';
+  dsti = 1;
+
+  srci = 1;
+
+  while (src[srci] != '\0')
+    {
+      /* Skip slashes. */
+      while (src[srci] == '/')
+       srci++;
+      tmpi = srci;
+      /* Find next slash. */
+      while (src[srci] != '/' && src[srci] != '\0')
+       srci++;
+      if (srci == tmpi)
+       /* We hit the end. */
+       break;
+      len = srci - tmpi;
+
+      /* Handle "." and "..". */
+      if (len == 1 && src[tmpi] == '.')
+       continue;
+      if (len == 2 && src[tmpi] == '.' && src[tmpi + 1] == '.')
+       {
+         if (dsti == 1)
+           {
+             /* Unlike other JVMs we do not rewind past the root
+                directory.  I can't see any legitimate reason why you
+                would want this, and chopping off bits of path seems
+                like a sure-fire way to introduce vulnerabilities. */
+             JCL_free(env, src);
+             JCL_ThrowException(env, "java/io/IOException",
+                                "Too many up-level references");
+             
+             return NULL;
+           }
+         while (dsti > 1 && dst[dsti - 1] != '/')
+           dsti--;
+         if (dsti != 1)
+           dsti--;
+         /* Reenable filesystem checking if disabled, as we might
+            have reversed over whatever caused the problem before.
+            At least one proprietary JVM has inconsistencies because
+            it does not do this. */
+         fschecks = 1;
+         continue;
+       }
+
+      /* Handle real path components. */
+      if (dsti + len + 1 >= PATH_MAX)
+       {
+         JCL_free(env, src);
+         JCL_ThrowException(env, "java/io/IOException", "Path too long");
+         return NULL;
+       }
+      dsti_save = dsti;
+      if (dsti > 1)
+       dst[dsti++] = '/';
+      strncpy(&dst[dsti], &src[tmpi], len);
+      dsti += len;
+      if (fschecks == 0)
+       continue;
+
+      dst[dsti] = '\0';
+      if (lstat(dst, &sb) == 0)
+       {
+         if (S_ISLNK(sb.st_mode))
+           {
+             tmpi = readlink(dst, tmp, PATH_MAX);
+             if (tmpi < 1 || tmpi == PATH_MAX)
+               {
+                 JCL_free(env, src);
+                 JCL_ThrowException(env, "java/io/IOException",
+                                    "Path too long");
+                 return NULL;
+               }
+
+             /* Prepend the link's path to src. */
+             if (tmpi + strlen(&src[srci]) >= PATH_MAX)
+               {
+                 JCL_free(env, src);
+                 JCL_ThrowException(env, "java/io/IOException",
+                                    "Path too long");
+                 return NULL;
+               }
+             while (src[srci] != '\0')
+               tmp[tmpi++] = src[srci++];
+             tmp[tmpi] = '\0';
+             strcpy(src, tmp);
+             srci = 0;
+
+             /* Either replace or append dst depending on whether the
+                link is relative or absolute. */
+             dsti = tmp[0] == '/' ? 1 : dsti_save;
+           }
+       }
+      else
+       {
+         /* Something doesn't exist, or we don't have permission to
+            read it, or a previous path component is a directory, or
+            a symlink is looped.  Whatever, we can't check the
+            filesystem any more. */
+         fschecks = 0;
+       }
+    }
+  dst[dsti] = '\0';
+
+  path = (*env)->NewStringUTF (env, dst);
+  JCL_free(env, src);
+  return path;
+#else /* not WITHOUT_FILESYSTEM */
+  return NULL;
+#endif /* not WITHOUT_FILESYSTEM */
+}
Index: gnu/java/io/PlatformHelper.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/io/PlatformHelper.java,v
retrieving revision 1.6
diff -u -r1.6 PlatformHelper.java
--- gnu/java/io/PlatformHelper.java     14 Nov 2005 13:08:11 -0000      1.6
+++ gnu/java/io/PlatformHelper.java     29 Mar 2006 08:40:39 -0000
@@ -97,98 +97,6 @@
   }
 
   /**
-   * This routine canonicalizes input param "path" to formal path 
representation
-   *  for current platform, including interpreting ".." and "." .
-   */
-  public static final String toCanonicalForm(String path)
-  {
-    /*??
-    if(path.indexOf('.') < 0 && path.indexOf("..") < 0)
-        return path; 
-    */
-    String tmppath = path.replace('/', separatorChar);
-    StringBuffer canonpath;
-
-    int i;
-
-    if ((i = beginWithRootPathPrefix(tmppath)) == 0 )
-      return path;
-    
-    /* The original 
-           "canonpath = new StringBuffer(tmppath.substring(0, i))"
-       isn't very efficient because StringBuffer's 
-       ensureCapacity_unsynchronized will fail definitely each time 
-       and will enlarge buffer and copy contents.       .
-    */
-    canonpath = new StringBuffer(INITIAL_MAX_PATH);
-    canonpath.append(tmppath.substring(0, i));
-    tmppath = tmppath.substring(i);
-    // pathdepth==0 indicates there're only root path in the buffer
-    int pathdepth = 0;
-    
-    StringTokenizer st = new StringTokenizer(tmppath, separator);
-    
-    // Traverse each element of the path, handling "." and ".."
-    // Should handle "~" too?
-    if (st.hasMoreTokens())
-      do
-        {
-          String s = st.nextToken();
-        
-          // Handle "." or an empty element.  
-          if (s.equals(".") || s.equals(""))
-            continue;
-        
-          // Handle ".." by deleting the last element from the path
-          if (s.equals(".."))
-            {
-              if (pathdepth == 0)
-                continue;
-
-              // Strip of trailing separator
-              canonpath.setLength(canonpath.length() - 
1/*separator.length()*/);
-              String tmpstr = canonpath.toString();
-              int idx = tmpstr.lastIndexOf(separator); 
-
-              if ((idx == -1) || ((idx + 1/*separator.length()*/) > 
tmpstr.length()))
-                //throw new IOException("Can't happen error"); 
-                return path; // Shouldn't happen 
-        
-              canonpath.setLength(idx + 1/*separator.length()*/);
-              pathdepth--;
-              continue;
-            }
-        
-          canonpath.append(s);
-          pathdepth++; //now it's more than root path
-
-          if (st.hasMoreTokens())
-            canonpath.append(separator);
-        }
-      while (st.hasMoreTokens());
-    
-    if (endWithSeparator(path))
-      canonpath.append(separator);
-        
-    String tmpstr = canonpath.toString();
-    //if (pathdepth > 0 && endWithSeparator(tmpstr) )
-    //    tmpstr = tmpstr.substring(0, tmpstr.length() - 
1/*separator.length()*/);
-    
-    return tmpstr;
-  }
-
-  /**
-   * This routine canonicalizes input param "path" to formal path 
representation
-   *  for current platform, and normalize all separators to "sepchar".
-   */
-  public static final String toCanonicalForm(String path, char sepchar)
-  {
-    String tmpstr = toCanonicalForm(path);
-    tmpstr = tmpstr.replace(separatorChar, sepchar);
-    return tmpstr;
-  }
-
-  /**
    * This routine checks whether input param "path" ends with separator
    */
   public static final boolean endWithSeparator(String path)

Reply via email to