Hello everyone. I have found the next bug using present org.apache.tools.tar package. Tar Archive was created on one system (for example Windows XP - default charset CP-1251). This tar archive contains TarEntries were named with using national characters like German umlauts. Than this archive file was copied on Linux system (default charset UTF-8) - after unpackin this archive file there - information was lost (TarEntries names were lost). There is possible solution for this problem.
Index: ant-core/src/main/org/apache/tools/tar/TarInputStream.java =================================================================== --- ant-core/src/main/org/apache/tools/tar/TarInputStream.java (revision 776302) +++ ant-core/src/main/org/apache/tools/tar/TarInputStream.java (working copy) @@ -264,10 +264,10 @@ if (currEntry != null && currEntry.isGNULongNameEntry()) { // read in the name StringBuffer longName = new StringBuffer(); - byte[] buf = new byte[SMALL_BUFFER_SIZE]; + byte[] buf = new byte[(int)currEntry.getSize()]; int length = 0; while ((length = read(buf)) >= 0) { - longName.append(new String(buf, 0, length)); + longName.append(new String(buf, 0, length, "UTF-8")); } getNextEntry(); if (currEntry == null) { Index: ant-core/src/main/org/apache/tools/tar/TarOutputStream.java =================================================================== --- ant-core/src/main/org/apache/tools/tar/TarOutputStream.java (revision 776302) +++ ant-core/src/main/org/apache/tools/tar/TarOutputStream.java (working copy) @@ -179,9 +179,10 @@ TarEntry longLinkEntry = new TarEntry(TarConstants.GNU_LONGLINK, TarConstants.LF_GNUTYPE_LONGNAME); - longLinkEntry.setSize(entry.getName().length() + 1); + byte[] nameBytes = entry.getName().getBytes("UTF-8"); + longLinkEntry.setSize(nameBytes.length + 1); putNextEntry(longLinkEntry); - write(entry.getName().getBytes()); + write(nameBytes); write(0); closeEntry(); } else if (longFileMode != LONGFILE_TRUNCATE) { Index: ant-core/src/main/org/apache/tools/tar/TarUtils.java =================================================================== --- ant-core/src/main/org/apache/tools/tar/TarUtils.java (revision 776302) +++ ant-core/src/main/org/apache/tools/tar/TarUtils.java (working copy) @@ -23,6 +23,8 @@ package org.apache.tools.tar; +import java.io.UnsupportedEncodingException; + /** * This class provides static utility methods to work with byte streams. * @@ -79,15 +81,21 @@ * @return The header's entry name. */ public static StringBuffer parseName(byte[] header, int offset, int length) { - StringBuffer result = new StringBuffer(length); + StringBuffer result = null; + int nameLen = length; + int end = offset + length; - for (int i = offset; i < end; ++i) { - if (header[i] == 0) { + if(header[i] == 0) { + nameLen = i - offset; break; } + } - result.append((char) header[i]); + try { + result = new StringBuffer(new String(header, offset, nameLen, "UTF-8")); + } catch(UnsupportedEncodingException e) { + e.printStackTrace(); } return result; @@ -103,18 +111,23 @@ * @return The number of bytes in a header's entry name. */ public static int getNameBytes(StringBuffer name, byte[] buf, int offset, int length) { - int i; + int nameLength = -1; + try + { + byte nameBytes[] = name.toString().getBytes("UTF-8"); + nameLength = nameBytes.length ; + System.arraycopy(nameBytes, 0, buf, offset, nameLength); + } catch(UnsupportedEncodingException e) { + e.printStackTrace(); + } - for (i = 0; i < length && i < name.length(); ++i) { - buf[offset + i] = (byte) name.charAt(i); - } - for (; i < length; ++i) { - buf[offset + i] = 0; - } + for (; nameLength < length; ++nameLength) { + buf[offset + nameLength] = 0; + } - return offset + length; - } + return offset + length; + } /** * Parse an octal integer from a header buffer. Best Regards. Alexander Borisevich BelDTS Minsk Belarus