JRuby 1.1RC2 (and previous versions) cannot open nor 'stat'
non-ASCII file names. I found this bug when writing a
servlet in JRuby to upload files (see our pages:
http://www.okisoft.co.jp/esc/ruby/jservlet/
http://www.okisoft.co.jp/esc/python/upload-cgi.html
).
For example,
01:~/tmp/tmp$ export LC_ALL=en_US.UTF-8
01:~/tmp/tmp$ uname -v
Darwin Kernel Version 8.11.1: Wed Oct 10 18:23:28 PDT 2007;
root:xnu-792.25.20~1/RELEASE_I386
01:~/tmp/tmp$ jruby -v
ruby 1.8.6 (2008-02-16 rev 5944) [i386-jruby1.1RC2]
01:~/tmp/tmp$ java -version
java version "1.5.0_13"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.5.0_13-b05-241)
Java HotSpot(TM) Client VM (build 1.5.0_13-121, mixed mode, sharing)
01:~/tmp/tmp$ /bin/ls -lR | cat -v
total 8
drwxr-xr-x 4 suzuki suzuki 136 Feb 18 19:52 a
-rwxr-xr-x 1 suzuki suzuki 65 Feb 18 19:55 poi.rb
./a:
total 0
drwxr-xr-x 4 suzuki suzuki 136 Feb 18 19:53 M-cM-^AM-^D
-rw-r--r-- 1 suzuki suzuki 0 Feb 18 19:52 b
./a/M-cM-^AM-^D:
total 0
-rw-r--r-- 1 suzuki suzuki 0 Feb 18 19:53 c
-rw-r--r-- 1 suzuki suzuki 0 Feb 18 19:53 M-cM-^AM-^F
01:~/tmp/tmp$ ruby poi.rb
"a/b"
Mon Feb 18 19:52:51 +0900 2008
"a/\343\201\204"
Mon Feb 18 19:53:27 +0900 2008
"a/\343\201\204/c"
Mon Feb 18 19:53:27 +0900 2008
"a/\343\201\204/\343\201\206"
Mon Feb 18 19:53:24 +0900 2008
01:~/tmp/tmp$ jruby poi.rb
"a/b"
Mon Feb 18 19:52:51 +0900 2008
"a/\202\242"
poi.rb:1: No such file or directory - No such file or directory - a/???
(Errno::ENOENT)
from poi.rb:1:in `each'
from poi.rb:1
11:~/tmp/tmp$
The script "poi.rb" is as follows:
for name in Dir.glob("a/**/*")
p name
p File.mtime(name)
end
The present JRuby treats any string as a byte-transparent
iso-8859-1 sequence and passes it reinterpreting into UTF-8
to Swing and other Java APIs. For consistency, the same
reinterpretation is necessary for JRuby to pass strings
to java.io.* APIs in order to implement the File and Dir
classes of Ruby.
For now, you cannot open nor 'stat' any files with non-ASCII
names. More precisely, you can still use iso-8859-1 names, but
you cannot pass the names to other Java APIs transparently.
Anyway, it is a severe limitation for the rest of the world,
including us.
As a possible fix, I made patches on JRubyFile.java and
Dir.java in JRuby 1.1RC2 today.
"ant test" goes well with them. And the above "poi.rb"
also goes well:
01:~/tmp/tmp$ jruby poi.rb
"a/b"
Mon Feb 18 19:52:51 +0900 2008
"a/\343\201\204"
Mon Feb 18 19:53:27 +0900 2008
"a/\343\201\204/c"
Mon Feb 18 19:53:27 +0900 2008
"a/\343\201\204/\343\201\206"
Mon Feb 18 19:53:24 +0900 2008
01:~/tmp/tmp$
Note that they will work also on any file system in
any character encoding other than UTF-8.
JRuby/UTF-8 <=> Java API/Unicode <=> native file systems
-- SUZUKI Hisao <[EMAIL PROTECTED]>
--- src/org/jruby/util/JRubyFile.java~orig 2008-02-17 04:40:34.000000000
+0900
+++ src/org/jruby/util/JRubyFile.java 2008-02-18 17:58:44.000000000 +0900
@@ -46,6 +46,15 @@
private static final long serialVersionUID = 435364547567567L;
public static JRubyFile create(String cwd, String pathname) {
+ try {
+ pathname = new String(pathname.getBytes("ISO-8859-1"), "UTF-8");
+ } catch (java.io.UnsupportedEncodingException ex) {
+ // NOT REACHED HERE
+ }
+ return create0(cwd, pathname);
+ }
+
+ private static JRubyFile create0(String cwd, String pathname) {
if (pathname == null || pathname.equals("") ||
Ruby.isSecurityRestricted()) {
return JRubyNonExistentFile.NOT_EXIST;
}
@@ -159,7 +168,7 @@
JRubyFile[] smartFiles = new JRubyFile[files.length];
for (int i = 0, j = files.length; i < j; i++) {
- smartFiles[i] = create(super.getAbsolutePath(),files[i].getPath());
+ smartFiles[i] = create0(super.getAbsolutePath(),
files[i].getPath());
}
return smartFiles;
}
@@ -172,7 +181,7 @@
JRubyFile[] smartFiles = new JRubyFile[files.length];
for (int i = 0,j = files.length; i < j; i++) {
- smartFiles[i] = create(super.getAbsolutePath(),files[i].getPath());
+ smartFiles[i] = create0(super.getAbsolutePath(),
files[i].getPath());
}
return smartFiles;
}
@@ -185,7 +194,7 @@
JRubyFile[] smartFiles = new JRubyFile[files.length];
for (int i = 0,j = files.length; i < j; i++) {
- smartFiles[i] = create(super.getAbsolutePath(),files[i].getPath());
+ smartFiles[i] = create0(super.getAbsolutePath(),
files[i].getPath());
}
return smartFiles;
}
--- src/org/jruby/util/Dir.java~orig 2008-02-17 04:40:34.000000000 +0900
+++ src/org/jruby/util/Dir.java 2008-02-18 18:45:02.000000000 +0900
@@ -475,7 +475,7 @@
}
if (bytes[begin] == '/' || (DOSISH && begin+2<end &&
bytes[begin+1] == ':' && isdirsep(bytes[begin+2]))) {
- if (new File(new String(bytes, begin, end - begin)).exists()) {
+ if (new File(newStringFromUTF8(bytes, begin, end -
begin)).exists()) {
status = func.call(bytes, begin, end, arg);
}
} else if (isJarFilePath(bytes, begin, end)) {
@@ -487,8 +487,8 @@
}
}
- st = new File(new String(bytes, begin+5, ix-5));
- String jar = new String(bytes, begin+ix+1, end-(ix+1));
+ st = new File(newStringFromUTF8(bytes, begin+5, ix-5));
+ String jar = newStringFromUTF8(bytes, begin+ix+1, end-(ix+1));
try {
JarFile jf = new JarFile(st);
@@ -499,7 +499,7 @@
}
} catch(Exception e) {}
} else if ((end - begin) > 0) { // Length check is a hack. We
should not be reeiving "" as a filename ever.
- if (new File(cwd, new String(bytes, begin, end -
begin)).exists()) {
+ if (new File(cwd, newStringFromUTF8(bytes, begin, end -
begin)).exists()) {
status = func.call(bytes, begin, end, arg);
}
}
@@ -523,7 +523,7 @@
JarFile jf = null;
if(dir[0] == '/' || (DOSISH && 2<dir.length && dir[1] ==
':' && isdirsep(dir[2]))) {
- st = new File(new String(dir));
+ st = new File(newStringFromUTF8(dir));
} else if(isJarFilePath(dir, 0, dir.length)) {
int ix = -1;
for(int i = 0;i<dir.length;i++) {
@@ -533,8 +533,8 @@
}
}
- st = new File(new String(dir, 5, ix-5));
- jar = new String(dir, ix+1, dir.length-(ix+1));
+ st = new File(newStringFromUTF8(dir, 5, ix-5));
+ jar = newStringFromUTF8(dir, ix+1, dir.length-(ix+1));
try {
jf = new JarFile(st);
@@ -545,7 +545,7 @@
jf = null;
}
} else {
- st = new File(cwd, new String(dir));
+ st = new File(cwd, newStringFromUTF8(dir));
}
if((jf != null && ("".equals(jar) || (jf.getJarEntry(jar)
!= null && jf.getJarEntry(jar).isDirectory()))) || st.isDirectory()) {
@@ -569,18 +569,18 @@
for(int i=0;i<dirp.length;i++) {
if(recursive) {
- byte[] bs = dirp[i].getBytes();
+ byte[] bs = getBytesInUTF8(dirp[i]);
if (fnmatch(STAR,0,1,bs,0,bs.length,flags) !=
0) {
continue;
}
buf.length(0);
buf.append(base);
buf.append( BASE(base) ? SLASH : EMPTY );
- buf.append(dirp[i].getBytes());
+ buf.append(getBytesInUTF8(dirp[i]));
if (buf.bytes[0] == '/' || (DOSISH &&
2<buf.realSize && buf.bytes[1] == ':' && isdirsep(buf.bytes[2]))) {
- st = new File(new String(buf.bytes,
buf.begin, buf.realSize));
+ st = new File(newStringFromUTF8(buf.bytes,
buf.begin, buf.realSize));
} else {
- st = new File(cwd, new String(buf.bytes,
buf.begin, buf.realSize));
+ st = new File(cwd,
newStringFromUTF8(buf.bytes, buf.begin, buf.realSize));
}
if(st.isDirectory() && !".".equals(dirp[i]) &&
!"..".equals(dirp[i])) {
int t = buf.realSize;
@@ -594,12 +594,12 @@
}
continue;
}
- byte[] bs = dirp[i].getBytes();
+ byte[] bs = getBytesInUTF8(dirp[i]);
if(fnmatch(magic,0,magic.length,bs,0,
bs.length,flags) == 0) {
buf.length(0);
buf.append(base);
buf.append( BASE(base) ? SLASH : EMPTY );
- buf.append(dirp[i].getBytes());
+ buf.append(getBytesInUTF8(dirp[i]));
if(m == -1) {
status =
func.call(buf.bytes,0,buf.realSize,arg);
if(status != 0) {
@@ -625,7 +625,7 @@
}
}
for(JarEntry je : dirp) {
- byte[] bs = je.getName().getBytes();
+ byte[] bs = getBytesInUTF8(je.getName());
int len = bs.length;
if(je.isDirectory()) {
@@ -678,9 +678,9 @@
for (ByteList b : link) {
if (status == 0) {
if(b.bytes[0] == '/' || (DOSISH && 2<b.realSize
&& b.bytes[1] == ':' && isdirsep(b.bytes[2]))) {
- st = new File(new String(b.bytes, 0,
b.realSize));
+ st = new File(newStringFromUTF8(b.bytes, 0,
b.realSize));
} else {
- st = new File(cwd, new String(b.bytes, 0,
b.realSize));
+ st = new File(cwd, newStringFromUTF8(b.bytes,
0, b.realSize));
}
if(st.isDirectory()) {
@@ -699,4 +699,28 @@
}
return status;
}
+
+ private static byte[] getBytesInUTF8(String s) {
+ try {
+ return s.getBytes("UTF-8");
+ } catch (java.io.UnsupportedEncodingException ex) {
+ return s.getBytes(); // NOT REACHED HERE
+ }
+ }
+
+ private static String newStringFromUTF8(byte[] buf, int offset, int len) {
+ try {
+ return new String(buf, offset, len, "UTF-8");
+ } catch (java.io.UnsupportedEncodingException ex) {
+ return new String(buf, offset, len); // NOT REACHED HERE
+ }
+ }
+
+ private static String newStringFromUTF8(byte[] buf) {
+ try {
+ return new String(buf, "UTF-8");
+ } catch (java.io.UnsupportedEncodingException ex) {
+ return new String(buf); // NOT REACHED HERE
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe from this list, please visit:
http://xircles.codehaus.org/manage_email