JRuby 1.1RC2 (and previous versions) cannot open nor 'stat'
non-ASCII file names.  I found this bug when writing a
servlet in JRuby to upload files (see our pages:
   http://www.okisoft.co.jp/esc/ruby/jservlet/
   http://www.okisoft.co.jp/esc/python/upload-cgi.html
).

For example,

01:~/tmp/tmp$ export LC_ALL=en_US.UTF-8
01:~/tmp/tmp$ uname -v
Darwin Kernel Version 8.11.1: Wed Oct 10 18:23:28 PDT 2007; 
root:xnu-792.25.20~1/RELEASE_I386
01:~/tmp/tmp$ jruby -v
ruby 1.8.6 (2008-02-16 rev 5944) [i386-jruby1.1RC2]
01:~/tmp/tmp$ java -version
java version "1.5.0_13"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.5.0_13-b05-241)
Java HotSpot(TM) Client VM (build 1.5.0_13-121, mixed mode, sharing)
01:~/tmp/tmp$ /bin/ls -lR | cat -v
total 8
drwxr-xr-x   4 suzuki  suzuki  136 Feb 18 19:52 a
-rwxr-xr-x   1 suzuki  suzuki   65 Feb 18 19:55 poi.rb

./a:
total 0
drwxr-xr-x   4 suzuki  suzuki  136 Feb 18 19:53 M-cM-^AM-^D
-rw-r--r--   1 suzuki  suzuki    0 Feb 18 19:52 b

./a/M-cM-^AM-^D:
total 0
-rw-r--r--   1 suzuki  suzuki  0 Feb 18 19:53 c
-rw-r--r--   1 suzuki  suzuki  0 Feb 18 19:53 M-cM-^AM-^F
01:~/tmp/tmp$ ruby poi.rb
"a/b"
Mon Feb 18 19:52:51 +0900 2008
"a/\343\201\204"
Mon Feb 18 19:53:27 +0900 2008
"a/\343\201\204/c"
Mon Feb 18 19:53:27 +0900 2008
"a/\343\201\204/\343\201\206"
Mon Feb 18 19:53:24 +0900 2008
01:~/tmp/tmp$ jruby poi.rb
"a/b"
Mon Feb 18 19:52:51 +0900 2008
"a/\202\242"
poi.rb:1: No such file or directory - No such file or directory - a/??? 
(Errno::ENOENT)
        from poi.rb:1:in `each'
        from poi.rb:1
11:~/tmp/tmp$

The script "poi.rb" is as follows:


for name in Dir.glob("a/**/*")
  p name
  p File.mtime(name)
end


The present JRuby treats any string as a byte-transparent
iso-8859-1 sequence and passes it reinterpreting into UTF-8
to Swing and other Java APIs.  For consistency, the same
reinterpretation is necessary for JRuby to pass strings
to java.io.* APIs in order to implement the File and Dir
classes of Ruby.

For now, you cannot open nor 'stat' any files with non-ASCII
names.  More precisely, you can still use iso-8859-1 names, but
you cannot pass the names to other Java APIs transparently.
Anyway, it is a severe limitation for the rest of the world,
including us.

As a possible fix, I made patches on JRubyFile.java and
Dir.java in JRuby 1.1RC2 today.
"ant test" goes well with them.  And the above "poi.rb"
also goes well:

01:~/tmp/tmp$ jruby poi.rb
"a/b"
Mon Feb 18 19:52:51 +0900 2008
"a/\343\201\204"
Mon Feb 18 19:53:27 +0900 2008
"a/\343\201\204/c"
Mon Feb 18 19:53:27 +0900 2008
"a/\343\201\204/\343\201\206"
Mon Feb 18 19:53:24 +0900 2008
01:~/tmp/tmp$

Note that they will work also on any file system in
any character encoding other than UTF-8.

JRuby/UTF-8 <=> Java API/Unicode <=> native file systems

-- SUZUKI Hisao <[EMAIL PROTECTED]>
--- src/org/jruby/util/JRubyFile.java~orig      2008-02-17 04:40:34.000000000 
+0900
+++ src/org/jruby/util/JRubyFile.java   2008-02-18 17:58:44.000000000 +0900
@@ -46,6 +46,15 @@
     private static final long serialVersionUID = 435364547567567L;
 
     public static JRubyFile create(String cwd, String pathname) {
+        try {
+            pathname = new String(pathname.getBytes("ISO-8859-1"), "UTF-8");
+        } catch (java.io.UnsupportedEncodingException ex) {
+            // NOT REACHED HERE
+        }
+        return create0(cwd, pathname);
+    }
+
+    private static JRubyFile create0(String cwd, String pathname) {
         if (pathname == null || pathname.equals("") || 
Ruby.isSecurityRestricted()) {
             return JRubyNonExistentFile.NOT_EXIST;
         }
@@ -159,7 +168,7 @@
         
         JRubyFile[] smartFiles = new JRubyFile[files.length];
         for (int i = 0, j = files.length; i < j; i++) {
-            smartFiles[i] = create(super.getAbsolutePath(),files[i].getPath());
+            smartFiles[i] = create0(super.getAbsolutePath(), 
files[i].getPath());
         }
         return smartFiles;
     }
@@ -172,7 +181,7 @@
         
         JRubyFile[] smartFiles = new JRubyFile[files.length];
         for (int i = 0,j = files.length; i < j; i++) {
-            smartFiles[i] = create(super.getAbsolutePath(),files[i].getPath());
+            smartFiles[i] = create0(super.getAbsolutePath(), 
files[i].getPath());
         }
         return smartFiles;
     }
@@ -185,7 +194,7 @@
         
         JRubyFile[] smartFiles = new JRubyFile[files.length];
         for (int i = 0,j = files.length; i < j; i++) {
-            smartFiles[i] = create(super.getAbsolutePath(),files[i].getPath());
+            smartFiles[i] = create0(super.getAbsolutePath(), 
files[i].getPath());
         }
         return smartFiles;
     }
--- src/org/jruby/util/Dir.java~orig    2008-02-17 04:40:34.000000000 +0900
+++ src/org/jruby/util/Dir.java 2008-02-18 18:45:02.000000000 +0900
@@ -475,7 +475,7 @@
             }
 
             if (bytes[begin] == '/' || (DOSISH && begin+2<end && 
bytes[begin+1] == ':' && isdirsep(bytes[begin+2]))) {
-                if (new File(new String(bytes, begin, end - begin)).exists()) {
+                if (new File(newStringFromUTF8(bytes, begin, end - 
begin)).exists()) {
                     status = func.call(bytes, begin, end, arg);
                 }
             } else if (isJarFilePath(bytes, begin, end)) {
@@ -487,8 +487,8 @@
                     }
                 }
 
-                st = new File(new String(bytes, begin+5, ix-5));
-                String jar = new String(bytes, begin+ix+1, end-(ix+1));
+                st = new File(newStringFromUTF8(bytes, begin+5, ix-5));
+                String jar = newStringFromUTF8(bytes, begin+ix+1, end-(ix+1));
                 try {
                     JarFile jf = new JarFile(st);
                     
@@ -499,7 +499,7 @@
                     }
                 } catch(Exception e) {}
             } else if ((end - begin) > 0) { // Length check is a hack.  We 
should not be reeiving "" as a filename ever. 
-                if (new File(cwd, new String(bytes, begin, end - 
begin)).exists()) {
+                if (new File(cwd, newStringFromUTF8(bytes, begin, end - 
begin)).exists()) {
                     status = func.call(bytes, begin, end, arg);
                 }
             }
@@ -523,7 +523,7 @@
                     JarFile jf = null;
 
                     if(dir[0] == '/'  || (DOSISH && 2<dir.length && dir[1] == 
':' && isdirsep(dir[2]))) {
-                        st = new File(new String(dir));
+                        st = new File(newStringFromUTF8(dir));
                     } else if(isJarFilePath(dir, 0, dir.length)) {
                         int ix = -1;
                         for(int i = 0;i<dir.length;i++) {
@@ -533,8 +533,8 @@
                             }
                         }
 
-                        st = new File(new String(dir, 5, ix-5));
-                        jar = new String(dir, ix+1, dir.length-(ix+1));
+                        st = new File(newStringFromUTF8(dir, 5, ix-5));
+                        jar = newStringFromUTF8(dir, ix+1, dir.length-(ix+1));
                         try {
                             jf = new JarFile(st);
 
@@ -545,7 +545,7 @@
                             jf = null;
                         }
                     } else {
-                        st = new File(cwd, new String(dir));
+                        st = new File(cwd, newStringFromUTF8(dir));
                     }
 
                     if((jf != null && ("".equals(jar) || (jf.getJarEntry(jar) 
!= null && jf.getJarEntry(jar).isDirectory()))) || st.isDirectory()) {
@@ -569,18 +569,18 @@
 
                         for(int i=0;i<dirp.length;i++) {
                             if(recursive) {
-                                byte[] bs = dirp[i].getBytes();
+                                byte[] bs = getBytesInUTF8(dirp[i]);
                                 if (fnmatch(STAR,0,1,bs,0,bs.length,flags) != 
0) {
                                     continue;
                                 }
                                 buf.length(0);
                                 buf.append(base);
                                 buf.append( BASE(base) ? SLASH : EMPTY );
-                                buf.append(dirp[i].getBytes());
+                                buf.append(getBytesInUTF8(dirp[i]));
                                 if (buf.bytes[0] == '/' || (DOSISH && 
2<buf.realSize && buf.bytes[1] == ':' && isdirsep(buf.bytes[2]))) {
-                                    st = new File(new String(buf.bytes, 
buf.begin, buf.realSize));
+                                    st = new File(newStringFromUTF8(buf.bytes, 
buf.begin, buf.realSize));
                                 } else {
-                                    st = new File(cwd, new String(buf.bytes, 
buf.begin, buf.realSize));
+                                    st = new File(cwd, 
newStringFromUTF8(buf.bytes, buf.begin, buf.realSize));
                                 }
                                 if(st.isDirectory() && !".".equals(dirp[i]) && 
!"..".equals(dirp[i])) {
                                     int t = buf.realSize;
@@ -594,12 +594,12 @@
                                 }
                                 continue;
                             }
-                            byte[] bs = dirp[i].getBytes();
+                            byte[] bs = getBytesInUTF8(dirp[i]);
                             if(fnmatch(magic,0,magic.length,bs,0, 
bs.length,flags) == 0) {
                                 buf.length(0);
                                 buf.append(base);
                                 buf.append( BASE(base) ? SLASH : EMPTY );
-                                buf.append(dirp[i].getBytes());
+                                buf.append(getBytesInUTF8(dirp[i]));
                                 if(m == -1) {
                                     status = 
func.call(buf.bytes,0,buf.realSize,arg);
                                     if(status != 0) {
@@ -625,7 +625,7 @@
                                 }
                             }
                             for(JarEntry je : dirp) {
-                                byte[] bs = je.getName().getBytes();
+                                byte[] bs = getBytesInUTF8(je.getName());
                                 int len = bs.length;
 
                                 if(je.isDirectory()) {
@@ -678,9 +678,9 @@
                     for (ByteList b : link) {
                         if (status == 0) {
                             if(b.bytes[0] == '/'  || (DOSISH && 2<b.realSize 
&& b.bytes[1] == ':' && isdirsep(b.bytes[2]))) {
-                                st = new File(new String(b.bytes, 0, 
b.realSize));
+                                st = new File(newStringFromUTF8(b.bytes, 0, 
b.realSize));
                             } else {
-                                st = new File(cwd, new String(b.bytes, 0, 
b.realSize));
+                                st = new File(cwd, newStringFromUTF8(b.bytes, 
0, b.realSize));
                             }
 
                             if(st.isDirectory()) {
@@ -699,4 +699,28 @@
         }
         return status;
     }
+
+    private static byte[] getBytesInUTF8(String s) {
+        try {
+            return s.getBytes("UTF-8");
+        } catch (java.io.UnsupportedEncodingException ex) {
+            return s.getBytes(); // NOT REACHED HERE
+        }
+    }
+
+    private static String newStringFromUTF8(byte[] buf, int offset, int len) {
+        try {
+            return new String(buf, offset, len, "UTF-8");
+        } catch (java.io.UnsupportedEncodingException ex) {
+            return new String(buf, offset, len); // NOT REACHED HERE
+        }
+    }
+
+    private static String newStringFromUTF8(byte[] buf) {
+        try {
+            return new String(buf, "UTF-8");
+        } catch (java.io.UnsupportedEncodingException ex) {
+            return new String(buf); // NOT REACHED HERE
+        }
+    }
 }
---------------------------------------------------------------------
To unsubscribe from this list, please visit:

    http://xircles.codehaus.org/manage_email

Reply via email to