This is an automated email from the ASF dual-hosted git repository. matthiasblaesing pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/netbeans.git
The following commit(s) were added to refs/heads/master by this push: new 7f2559d13c [NETBEANS-6147] Initial implementation of better long string handling new a58c6677a9 Merge pull request #6157 from SirIntellegence/NETBEANS-6147 7f2559d13c is described below commit 7f2559d13c24beaf4dc55f7acf9ae5805ab23fa3 Author: Austin Stephens <sirintellegen...@gmail.com> AuthorDate: Wed Jul 5 10:28:11 2023 -0600 [NETBEANS-6147] Initial implementation of better long string handling Co-authored-by: Matthias Bläsing <mblaes...@doppel-helix.eu> --- .../debugger/jpda/models/ShortenedStrings.java | 337 ++++++++++++++++----- 1 file changed, 259 insertions(+), 78 deletions(-) diff --git a/java/debugger.jpda/src/org/netbeans/modules/debugger/jpda/models/ShortenedStrings.java b/java/debugger.jpda/src/org/netbeans/modules/debugger/jpda/models/ShortenedStrings.java index f2e8892055..85558c6716 100644 --- a/java/debugger.jpda/src/org/netbeans/modules/debugger/jpda/models/ShortenedStrings.java +++ b/java/debugger.jpda/src/org/netbeans/modules/debugger/jpda/models/ShortenedStrings.java @@ -41,6 +41,7 @@ import java.io.IOException; import java.io.Reader; import java.lang.ref.Reference; import java.lang.ref.WeakReference; +import java.text.MessageFormat; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -49,6 +50,7 @@ import java.util.Set; import java.util.WeakHashMap; import java.util.logging.Level; import java.util.logging.Logger; +import org.netbeans.api.annotations.common.NullAllowed; import org.netbeans.api.debugger.DebuggerManager; import org.netbeans.api.debugger.DebuggerManagerAdapter; import org.netbeans.api.debugger.Session; @@ -165,8 +167,11 @@ public final class ShortenedStrings { } } - private static void register(String shortedString, StringReference sr, int length, ArrayReference chars) { - StringInfo si = new StringInfo(sr, shortedString.length() - 3, length, chars); + private static void register(String shortedString, StringReference sr, + int length, ArrayReference chars, InternalStringEncoding backingEncoding, + boolean isLittleEndian) { + StringInfo si = new StringInfo(sr, shortedString.length() - 3, length, + chars, backingEncoding, isLittleEndian); synchronized (infoStrings) { infoStrings.put(shortedString, si); } @@ -204,6 +209,7 @@ public final class ShortenedStrings { } String string = null; boolean isShort = true; + InternalStringEncoding backingEncoding = InternalStringEncoding.CHAR_ARRAY; try { ReferenceType st = ObjectReferenceWrapper.referenceType(sr); ArrayReference sa = null; @@ -262,6 +268,9 @@ public final class ShortenedStrings { isUTF16 = true; } } + backingEncoding = isUTF16 ? + InternalStringEncoding.BYTE_ARRAY_UTF16 : + InternalStringEncoding.BYTE_ARRAY_LATIN1; } int limit = AbstractObjectVariable.MAX_STRING_LENGTH; if (isUTF16){ @@ -288,65 +297,19 @@ public final class ShortenedStrings { } else { assert sa != null; int l = AbstractObjectVariable.MAX_STRING_LENGTH; - List<Value> values = ArrayReferenceWrapper.getValues(sa, 0, - isUTF16 ? (l * 2) : l); char[] characters = new char[l + 3]; - if (isCompactImpl) { - //java compact string - if (!isUTF16) { - //we can just cast to char - for (int i = 0; i < l; i++) { - Value v = values.get(i); - if (!(v instanceof ByteValue)) { - return ERROR_RESULT; - } - char c = (char)((ByteValue) v).byteValue(); - //remove the extended sign - c &= 0xFF; - characters[i] = c; - } - } - else { - int hiByteShift; - int lowByteShift; - //is it little or big endian? - if (isLittleEndian(sr.virtualMachine())){ - hiByteShift = 0; - lowByteShift = 8; - } - else{ - hiByteShift = 8; - lowByteShift = 0; - } - for (int i = 0; i < l; i++) { - int index = i * 2; - Value v = values.get(index); - if (!(v instanceof ByteValue)) { - return ERROR_RESULT; - } - Value v2 = values.get(index + 1); - if (!(v instanceof ByteValue)) { - return ERROR_RESULT; - } - char c1 = (char) ((ByteValue) v).byteValue(); - char c2 = (char) ((ByteValue) v2).byteValue(); - //remove the extended sign - c1 = (char) (0xFF & c1); - c2 = (char) (0xFF & c2); - char c = (char)(c1 << hiByteShift | - c2 << lowByteShift); - characters[i] = c; - } - } + //is it little or big endian? + //checking if the encoding is Utf16 to avoid a call to + //`isLittleEndian` if it isn't Utf16 + Boolean isLittleEndian = backingEncoding == + InternalStringEncoding.BYTE_ARRAY_UTF16 && + isLittleEndian(sr.virtualMachine()); + try{ + copyToCharArray(sa, 0, characters, 0, l, + backingEncoding, isLittleEndian); } - else{ - for (int i = 0; i < l; i++) { - Value v = values.get(i); - if (!(v instanceof CharValue)) { - return ERROR_RESULT; - } - characters[i] = ((CharValue) v).charValue(); - } + catch (IOException ioe){ + return ERROR_RESULT; } // Add 3 dots: for (int i = l; i < (l + 3); i++) { @@ -354,7 +317,8 @@ public final class ShortenedStrings { } String shortedString = new String(characters); int stringLength = isUTF16 ? saLength / 2 : saLength; - ShortenedStrings.register(shortedString, sr, stringLength, sa); + ShortenedStrings.register(shortedString, sr, stringLength, sa, + backingEncoding, isLittleEndian); string = shortedString; } } @@ -386,18 +350,225 @@ public final class ShortenedStrings { return string; } + /** + * (Currently untested) Grab the char at the given index in the array. + * Returns -1 on an error. Note: returning int instead of char because + * exceptions are expensive and so is boxing into a Character + * @param sourceArray Backing array reference. May be a byte or char array + * @param index + * @param backing + * @param isLittleEndian + * @return + */ + private static int charAt(ArrayReference sourceArray, int index, + InternalStringEncoding encoding, boolean isLittleEndian) throws + InternalExceptionWrapper, ObjectCollectedExceptionWrapper, + VMDisconnectedExceptionWrapper{ + if (encoding == InternalStringEncoding.CHAR_ARRAY){ + //that was easy + Value v = ArrayReferenceWrapper.getValue(sourceArray, index); + if (!(v instanceof CharValue)){ + return -1; + } + return ((CharValue)v).charValue(); + } + if (encoding == InternalStringEncoding.BYTE_ARRAY_LATIN1){ + //that was also easy + Value v = ArrayReferenceWrapper.getValue(sourceArray, index); + if (!(v instanceof ByteValue)){ + return -1; + } + char c = (char)((ByteValue)v).byteValue(); + //strip off the sign value + c &= 0xff; + return c; + } + //uft16 it is + List<Value> vals = ArrayReferenceWrapper.getValues(sourceArray, + index * 2, 2); + Value left = vals.get(0); + Value right = vals.get(1); + if (!(left instanceof ByteValue && right instanceof ByteValue)){ + return -1; + } + return utf16Combine(((ByteValue)left).byteValue(), + ((ByteValue)right).value(), isLittleEndian); + } + + /** + * (Currently untested) Grab the char at the given index in the array. + * Returns -1 on an error. Note: returning int instead of char because + * exceptions are expensive and so is boxing into a Character + * @param sourceArray Backing array reference. May be a byte or char array + * @param index + * @param encoding + * @param isLittleEndian + * @return + */ + private static int charAt(List<Value> sourceArray, int index, + InternalStringEncoding encoding, boolean isLittleEndian){ + if (encoding == InternalStringEncoding.CHAR_ARRAY){ + //that was easy + Value v = sourceArray.get(index); + if (!(v instanceof CharValue)){ + return -1; + } + return ((CharValue)v).charValue(); + } + if (encoding == InternalStringEncoding.BYTE_ARRAY_LATIN1){ + //that was also easy + Value v = sourceArray.get(index); + if (!(v instanceof ByteValue)){ + return -1; + } + char c = (char)((ByteValue)v).byteValue(); + //strip off the sign value + c &= 0xff; + return c; + } + //uft16 it is + Value left = sourceArray.get(index * 2); + Value right = sourceArray.get((index * 2) + 1); + if (!(left instanceof ByteValue && right instanceof ByteValue)){ + return -1; + } + return utf16Combine(((ByteValue)left).byteValue(), + ((ByteValue)right).value(), isLittleEndian); + } + + /** + * Copy the input to the destination array as if by {@link System#arrayCopy} + * @param sourceArray Backing array reference. May be a byte or char array + * @param encoding + * @param isLittleEndian + * @param start + * @param length + * @param dest + */ + private static void copyToCharArray(ArrayReference sourceArray, + int srcPos, char[] dest, int destPos, int length, + InternalStringEncoding encoding, boolean isLittleEndian) throws + ObjectCollectedExceptionWrapper, VMDisconnectedExceptionWrapper, + InternalExceptionWrapper, IOException{ + //grab applicable values + int realStart = srcPos; + int realLength = length; + if (encoding == InternalStringEncoding.BYTE_ARRAY_UTF16){ + realStart *= 2; + realLength *= 2; + } + List<Value> values = ArrayReferenceWrapper.getValues(sourceArray, + realStart, realLength); + //copy them + copyToCharArray(values, 0, dest, destPos, length, encoding, + isLittleEndian); + } + + /** + * Copy the input to the destination array as if by {@link System#arrayCopy} + * @param sourceArray Backing array reference. May be a byte or char array + * @param backing + * @param isLittleEndian + * @param start + * @param length + * @param dest + */ + private static void copyToCharArray(List<Value> sourceArray, int srcPos, + char[] dest, int destPos, int length, InternalStringEncoding backing, + boolean isLittleEndian) throws IOException{ + if (backing == InternalStringEncoding.CHAR_ARRAY){ + //that was easy + for (int i = 0; i < length; i++) { + Value v = sourceArray.get(i + srcPos); + if (!(v instanceof CharValue)){ + throw new IOException(MessageFormat.format("Char at {0} " + + "is not a character: {1}", srcPos + i, v)); + } + dest[destPos + i] = ((CharValue)v).charValue(); + } + return; + } + if (backing == InternalStringEncoding.BYTE_ARRAY_LATIN1){ + //that was also easy + for (int i = 0; i < length; i++) { + Value v = sourceArray.get(i + srcPos); + if (!(v instanceof ByteValue)){ + throw new IOException(MessageFormat.format("Char at {0} " + + "is not a byte: {1}", srcPos + i, v)); + } + char c = (char)((ByteValue)v).byteValue(); + //strip off the sign value + c &= 0xff; + dest[destPos + i] = c; + } + return; + } + //uft16 it is + assert backing == InternalStringEncoding.BYTE_ARRAY_UTF16; + for (int i = 0; i < length; i++) { + Value left = sourceArray.get(i * 2); + Value right = sourceArray.get((i * 2) + 1); + if (!(left instanceof ByteValue && right instanceof ByteValue)){ + throw new IOException(MessageFormat.format("Char at {0} is " + + "not a byte pair: {1},{2}", srcPos + i, left, right)); + } + dest[destPos + i] = utf16Combine(((ByteValue)left).byteValue(), + ((ByteValue)right).byteValue(), isLittleEndian); + } + } + + private static char utf16Combine(byte left, byte right, boolean isLittleEndian){ + int hiByteShift, lowByteShift; + if (isLittleEndian){ + hiByteShift = 0; + lowByteShift = 8; + } + else{ + hiByteShift = 8; + lowByteShift = 0; + } + char c1 = (char)left; + char c2 = (char)right; + //remove the extended sign + c1 = (char) (0xFF & c1); + c2 = (char) (0xFF & c2); + char c = (char)(c1 << hiByteShift | + c2 << lowByteShift); + return c; + } + + private static int length(int arrayLength, InternalStringEncoding backingEncoding){ + switch (backingEncoding) { + case CHAR_ARRAY: + case BYTE_ARRAY_LATIN1: + return arrayLength; + case BYTE_ARRAY_UTF16: + return arrayLength / 2; + default: + throw new AssertionError(); + } + } + public static class StringInfo { + private final InternalStringEncoding backingEncoding; + private final boolean isLittleEndian; private final StringReference sr; private final int shortLength; private final int length; private final ArrayReference chars; - private StringInfo(StringReference sr, int shortLength, int length, ArrayReference chars) { + private StringInfo(StringReference sr, int shortLength, int length, + ArrayReference chars, InternalStringEncoding backingEncoding, + boolean isLittleEndian) { this.sr = sr; this.shortLength = shortLength; this.length = length; this.chars = chars; + this.backingEncoding = backingEncoding; + //caching that so we don't have to risk more exceptions to figure + //out what it is + this.isLittleEndian = isLittleEndian; } public int getShortLength() { @@ -422,31 +593,24 @@ public final class ShortenedStrings { public Reader getContent() { return new Reader() { - int pos = 0; - @Override public int read(char[] cbuf, int off, int len) throws IOException { if (pos + len > length) { len = length - pos; } - List<Value> values; + if (len == 0){ + return -1; + } try { - values = ArrayReferenceWrapper.getValues(chars, pos, len); - } catch (InternalExceptionWrapper ex) { - throw new IOException(ex); - } catch (VMDisconnectedExceptionWrapper ex) { - throw new IOException(ex); - } catch (ObjectCollectedExceptionWrapper ex) { - throw new IOException(ex); + copyToCharArray(chars, pos, cbuf, 0, len, + backingEncoding, isLittleEndian); } - for (int i = 0; i < len; i++) { - Value v = values.get(i); - if (!(v instanceof CharValue)) { - int p = pos + i; - throw new IOException("Char at "+p+" is not a character: "+v); - } - cbuf[off + i] = ((CharValue) v).charValue(); + catch (IOException ioe){ throw ioe; }//for clarity + catch (InternalExceptionWrapper | + VMDisconnectedExceptionWrapper | + ObjectCollectedExceptionWrapper ex) { + throw new IOException(ex); } pos += len; return len; @@ -472,4 +636,21 @@ public final class ShortenedStrings { this.shortValueRef = new WeakReference<String>(shortenedValue); } } + + private enum InternalStringEncoding { + /** + * The string is backed by an array of chars + */ + CHAR_ARRAY, + /** + * The string is backed by an array of bytes with one byte per char + */ + BYTE_ARRAY_LATIN1, + /** + * The string is backed by an array of bytes with two bytes per char. + * Use {@link #isLittleEndian(com.sun.jdi.VirtualMachine)} to determine + * the byte order + */ + BYTE_ARRAY_UTF16 + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@netbeans.apache.org For additional commands, e-mail: commits-h...@netbeans.apache.org For further information about the NetBeans mailing lists, visit: https://cwiki.apache.org/confluence/display/NETBEANS/Mailing+lists