[netbeans] branch master updated: [NETBEANS-6147] Initial implementation of better long string handling

matthiasblaesing Mon, 17 Jul 2023 09:31:59 -0700

This is an automated email from the ASF dual-hosted git repository.

matthiasblaesing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/netbeans.git



The following commit(s) were added to refs/heads/master by this push:
     new 7f2559d13c [NETBEANS-6147] Initial implementation of better long 
string handling
     new a58c6677a9 Merge pull request #6157 from SirIntellegence/NETBEANS-6147
7f2559d13c is described below

commit 7f2559d13c24beaf4dc55f7acf9ae5805ab23fa3
Author: Austin Stephens <[email protected]>
AuthorDate: Wed Jul 5 10:28:11 2023 -0600

    [NETBEANS-6147] Initial implementation of better long string handling
    
    Co-authored-by: Matthias Bläsing <[email protected]>
---
 .../debugger/jpda/models/ShortenedStrings.java     | 337 ++++++++++++++++-----
 1 file changed, 259 insertions(+), 78 deletions(-)

diff --git 
a/java/debugger.jpda/src/org/netbeans/modules/debugger/jpda/models/ShortenedStrings.java
 
b/java/debugger.jpda/src/org/netbeans/modules/debugger/jpda/models/ShortenedStrings.java
index f2e8892055..85558c6716 100644
--- 
a/java/debugger.jpda/src/org/netbeans/modules/debugger/jpda/models/ShortenedStrings.java
+++ 
b/java/debugger.jpda/src/org/netbeans/modules/debugger/jpda/models/ShortenedStrings.java
@@ -41,6 +41,7 @@ import java.io.IOException;
 import java.io.Reader;
 import java.lang.ref.Reference;
 import java.lang.ref.WeakReference;
+import java.text.MessageFormat;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
@@ -49,6 +50,7 @@ import java.util.Set;
 import java.util.WeakHashMap;
 import java.util.logging.Level;
 import java.util.logging.Logger;
+import org.netbeans.api.annotations.common.NullAllowed;
 import org.netbeans.api.debugger.DebuggerManager;
 import org.netbeans.api.debugger.DebuggerManagerAdapter;
 import org.netbeans.api.debugger.Session;
@@ -165,8 +167,11 @@ public final class ShortenedStrings {
         }
     }
 
-    private static void register(String shortedString, StringReference sr, int 
length, ArrayReference chars) {
-        StringInfo si = new StringInfo(sr, shortedString.length() - 3, length, 
chars);
+    private static void register(String shortedString, StringReference sr,
+            int length, ArrayReference chars, InternalStringEncoding 
backingEncoding,
+            boolean isLittleEndian) {
+        StringInfo si = new StringInfo(sr, shortedString.length() - 3, length,
+                chars, backingEncoding, isLittleEndian);
         synchronized (infoStrings) {
             infoStrings.put(shortedString, si);
         }
@@ -204,6 +209,7 @@ public final class ShortenedStrings {
         }
         String string = null;
         boolean isShort = true;
+        InternalStringEncoding backingEncoding = 
InternalStringEncoding.CHAR_ARRAY;
         try {
             ReferenceType st = ObjectReferenceWrapper.referenceType(sr);
             ArrayReference sa = null;
@@ -262,6 +268,9 @@ public final class ShortenedStrings {
                                 isUTF16 = true;
                             }
                         }
+                        backingEncoding = isUTF16 ?
+                                InternalStringEncoding.BYTE_ARRAY_UTF16 :
+                                InternalStringEncoding.BYTE_ARRAY_LATIN1;
                     }
                     int limit = AbstractObjectVariable.MAX_STRING_LENGTH;
                     if (isUTF16){
@@ -288,65 +297,19 @@ public final class ShortenedStrings {
             } else {
                 assert sa != null;
                 int l = AbstractObjectVariable.MAX_STRING_LENGTH;
-                List<Value> values = ArrayReferenceWrapper.getValues(sa, 0,
-                        isUTF16 ? (l * 2) : l);
                 char[] characters = new char[l + 3];
-                if (isCompactImpl) {
-                    //java compact string
-                    if (!isUTF16) {
-                        //we can just cast to char
-                        for (int i = 0; i < l; i++) {
-                            Value v = values.get(i);
-                            if (!(v instanceof ByteValue)) {
-                                return ERROR_RESULT;
-                            }
-                            char c = (char)((ByteValue) v).byteValue();
-                            //remove the extended sign
-                            c &= 0xFF;
-                            characters[i] = c;
-                        }
-                    }
-                    else {
-                        int hiByteShift;
-                        int lowByteShift;
-                        //is it little or big endian?
-                        if (isLittleEndian(sr.virtualMachine())){
-                            hiByteShift = 0;
-                            lowByteShift = 8;
-                        }
-                        else{
-                            hiByteShift = 8;
-                            lowByteShift = 0;
-                        }
-                        for (int i = 0; i < l; i++) {
-                            int index = i * 2;
-                            Value v = values.get(index);
-                            if (!(v instanceof ByteValue)) {
-                                return ERROR_RESULT;
-                            }
-                            Value v2 = values.get(index + 1);
-                            if (!(v instanceof ByteValue)) {
-                                return ERROR_RESULT;
-                            }
-                            char c1 = (char) ((ByteValue) v).byteValue();
-                            char c2 = (char) ((ByteValue) v2).byteValue();
-                            //remove the extended sign
-                            c1 = (char) (0xFF & c1);
-                            c2 = (char) (0xFF & c2);
-                            char c = (char)(c1 << hiByteShift |
-                                    c2 << lowByteShift);
-                            characters[i] = c;
-                        }
-                    }
+                //is it little or big endian?
+                //checking if the encoding is Utf16 to avoid a call to
+                //`isLittleEndian` if it isn't Utf16
+                Boolean isLittleEndian = backingEncoding ==
+                        InternalStringEncoding.BYTE_ARRAY_UTF16 &&
+                        isLittleEndian(sr.virtualMachine());
+                try{
+                    copyToCharArray(sa, 0, characters, 0, l,
+                            backingEncoding, isLittleEndian);
                 }
-                else{
-                    for (int i = 0; i < l; i++) {
-                        Value v = values.get(i);
-                        if (!(v instanceof CharValue)) {
-                            return ERROR_RESULT;
-                        }
-                        characters[i] = ((CharValue) v).charValue();
-                    }
+                catch (IOException ioe){
+                    return ERROR_RESULT;
                 }
                 // Add 3 dots:
                 for (int i = l; i < (l + 3); i++) {
@@ -354,7 +317,8 @@ public final class ShortenedStrings {
                 }
                 String shortedString = new String(characters);
                 int stringLength = isUTF16 ? saLength / 2 : saLength;
-                ShortenedStrings.register(shortedString, sr, stringLength, sa);
+                ShortenedStrings.register(shortedString, sr, stringLength, sa,
+                        backingEncoding, isLittleEndian);
                 string = shortedString;
             }
         }
@@ -386,18 +350,225 @@ public final class ShortenedStrings {
         return string;
     }
 
+    /**
+     * (Currently untested) Grab the char at the given index in the array.
+     * Returns -1 on an error. Note: returning int instead of char because
+     * exceptions are expensive and so is boxing into a Character
+     * @param sourceArray Backing array reference. May be a byte or char array
+     * @param index
+     * @param backing
+     * @param isLittleEndian
+     * @return
+     */
+    private static int charAt(ArrayReference sourceArray, int index,
+            InternalStringEncoding encoding, boolean isLittleEndian) throws
+            InternalExceptionWrapper, ObjectCollectedExceptionWrapper,
+            VMDisconnectedExceptionWrapper{
+        if (encoding == InternalStringEncoding.CHAR_ARRAY){
+            //that was easy
+            Value v = ArrayReferenceWrapper.getValue(sourceArray, index);
+            if (!(v instanceof CharValue)){
+                return -1;
+            }
+            return ((CharValue)v).charValue();
+        }
+        if (encoding == InternalStringEncoding.BYTE_ARRAY_LATIN1){
+            //that was also easy
+            Value v = ArrayReferenceWrapper.getValue(sourceArray, index);
+            if (!(v instanceof ByteValue)){
+                return -1;
+            }
+            char c = (char)((ByteValue)v).byteValue();
+            //strip off the sign value
+            c &= 0xff;
+            return c;
+        }
+        //uft16 it is
+        List<Value> vals = ArrayReferenceWrapper.getValues(sourceArray,
+                index * 2, 2);
+        Value left = vals.get(0);
+        Value right = vals.get(1);
+        if (!(left instanceof ByteValue && right instanceof ByteValue)){
+            return -1;
+        }
+        return utf16Combine(((ByteValue)left).byteValue(),
+                ((ByteValue)right).value(), isLittleEndian);
+    }
+
+    /**
+     * (Currently untested) Grab the char at the given index in the array.
+     * Returns -1 on an error. Note: returning int instead of char because
+     * exceptions are expensive and so is boxing into a Character
+     * @param sourceArray Backing array reference. May be a byte or char array
+     * @param index
+     * @param encoding
+     * @param isLittleEndian
+     * @return
+     */
+    private static int charAt(List<Value> sourceArray, int index,
+            InternalStringEncoding encoding, boolean isLittleEndian){
+        if (encoding == InternalStringEncoding.CHAR_ARRAY){
+            //that was easy
+            Value v = sourceArray.get(index);
+            if (!(v instanceof CharValue)){
+                return -1;
+            }
+            return ((CharValue)v).charValue();
+        }
+        if (encoding == InternalStringEncoding.BYTE_ARRAY_LATIN1){
+            //that was also easy
+            Value v = sourceArray.get(index);
+            if (!(v instanceof ByteValue)){
+                return -1;
+            }
+            char c = (char)((ByteValue)v).byteValue();
+            //strip off the sign value
+            c &= 0xff;
+            return c;
+        }
+        //uft16 it is
+        Value left = sourceArray.get(index * 2);
+        Value right = sourceArray.get((index * 2) + 1);
+        if (!(left instanceof ByteValue && right instanceof ByteValue)){
+            return -1;
+        }
+        return utf16Combine(((ByteValue)left).byteValue(),
+                ((ByteValue)right).value(), isLittleEndian);
+    }
+
+    /**
+     * Copy the input to the destination array as if by {@link 
System#arrayCopy}
+     * @param sourceArray Backing array reference. May be a byte or char array
+     * @param encoding
+     * @param isLittleEndian
+     * @param start
+     * @param length
+     * @param dest
+     */
+    private static void copyToCharArray(ArrayReference sourceArray,
+            int srcPos, char[] dest, int destPos, int length,
+            InternalStringEncoding encoding, boolean isLittleEndian) throws
+            ObjectCollectedExceptionWrapper, VMDisconnectedExceptionWrapper,
+            InternalExceptionWrapper, IOException{
+        //grab applicable values
+        int realStart = srcPos;
+        int realLength = length;
+        if (encoding == InternalStringEncoding.BYTE_ARRAY_UTF16){
+            realStart *= 2;
+            realLength *= 2;
+        }
+        List<Value> values = ArrayReferenceWrapper.getValues(sourceArray,
+                realStart, realLength);
+        //copy them
+        copyToCharArray(values, 0, dest, destPos, length, encoding,
+                isLittleEndian);
+    }
+
+    /**
+     * Copy the input to the destination array as if by {@link 
System#arrayCopy}
+     * @param sourceArray Backing array reference. May be a byte or char array
+     * @param backing
+     * @param isLittleEndian
+     * @param start
+     * @param length
+     * @param dest
+     */
+    private static void copyToCharArray(List<Value> sourceArray, int srcPos,
+            char[] dest, int destPos, int length, InternalStringEncoding 
backing,
+            boolean isLittleEndian) throws IOException{
+        if (backing == InternalStringEncoding.CHAR_ARRAY){
+            //that was easy
+            for (int i = 0; i < length; i++) {
+                Value v = sourceArray.get(i + srcPos);
+                if (!(v instanceof CharValue)){
+                    throw new IOException(MessageFormat.format("Char at {0} "
+                            + "is not a character: {1}", srcPos + i, v));
+                }
+                dest[destPos + i] = ((CharValue)v).charValue();
+            }
+            return;
+        }
+        if (backing == InternalStringEncoding.BYTE_ARRAY_LATIN1){
+            //that was also easy
+            for (int i = 0; i < length; i++) {
+                Value v = sourceArray.get(i + srcPos);
+                if (!(v instanceof ByteValue)){
+                    throw new IOException(MessageFormat.format("Char at {0} "
+                            + "is not a byte: {1}", srcPos + i, v));
+                }
+                char c = (char)((ByteValue)v).byteValue();
+                //strip off the sign value
+                c &= 0xff;
+                dest[destPos + i] = c;
+            }
+            return;
+        }
+        //uft16 it is
+        assert backing == InternalStringEncoding.BYTE_ARRAY_UTF16;
+        for (int i = 0; i < length; i++) {
+            Value left = sourceArray.get(i * 2);
+            Value right = sourceArray.get((i * 2) + 1);
+            if (!(left instanceof ByteValue && right instanceof ByteValue)){
+                throw new IOException(MessageFormat.format("Char at {0} is "
+                        + "not a byte pair: {1},{2}", srcPos + i, left, 
right));
+            }
+            dest[destPos + i] = utf16Combine(((ByteValue)left).byteValue(),
+                    ((ByteValue)right).byteValue(), isLittleEndian);
+        }
+    }
+
+    private static char utf16Combine(byte left, byte right, boolean 
isLittleEndian){
+        int hiByteShift, lowByteShift;
+        if (isLittleEndian){
+            hiByteShift = 0;
+            lowByteShift = 8;
+        }
+        else{
+            hiByteShift = 8;
+            lowByteShift = 0;
+        }
+        char c1 = (char)left;
+        char c2 = (char)right;
+        //remove the extended sign
+        c1 = (char) (0xFF & c1);
+        c2 = (char) (0xFF & c2);
+        char c = (char)(c1 << hiByteShift |
+                c2 << lowByteShift);
+        return c;
+    }
+
+    private static int length(int arrayLength, InternalStringEncoding 
backingEncoding){
+        switch (backingEncoding) {
+            case CHAR_ARRAY:
+            case BYTE_ARRAY_LATIN1:
+                return arrayLength;
+            case BYTE_ARRAY_UTF16:
+                return arrayLength / 2;
+            default:
+                throw new AssertionError();
+        }
+    }
+
     public static class StringInfo {
 
+        private final InternalStringEncoding backingEncoding;
+        private final boolean isLittleEndian;
         private final StringReference sr;
         private final int shortLength;
         private final int length;
         private final ArrayReference chars;
 
-        private StringInfo(StringReference sr, int shortLength, int length, 
ArrayReference chars) {
+        private StringInfo(StringReference sr, int shortLength, int length,
+                ArrayReference chars, InternalStringEncoding backingEncoding,
+                boolean isLittleEndian) {
             this.sr = sr;
             this.shortLength = shortLength;
             this.length = length;
             this.chars = chars;
+            this.backingEncoding = backingEncoding;
+            //caching that so we don't have to risk more exceptions to figure
+            //out what it is
+            this.isLittleEndian = isLittleEndian;
         }
 
         public int getShortLength() {
@@ -422,31 +593,24 @@ public final class ShortenedStrings {
 
         public Reader getContent() {
             return new Reader() {
-
                 int pos = 0;
-
                 @Override
                 public int read(char[] cbuf, int off, int len) throws 
IOException {
                     if (pos + len > length) {
                         len = length - pos;
                     }
-                    List<Value> values;
+                    if (len == 0){
+                        return -1;
+                    }
                     try {
-                        values = ArrayReferenceWrapper.getValues(chars, pos, 
len);
-                    } catch (InternalExceptionWrapper ex) {
-                        throw new IOException(ex);
-                    } catch (VMDisconnectedExceptionWrapper ex) {
-                        throw new IOException(ex);
-                    } catch (ObjectCollectedExceptionWrapper ex) {
-                        throw new IOException(ex);
+                        copyToCharArray(chars, pos, cbuf, 0, len,
+                                backingEncoding, isLittleEndian);
                     }
-                    for (int i = 0; i < len; i++) {
-                        Value v = values.get(i);
-                        if (!(v instanceof CharValue)) {
-                            int p = pos + i;
-                            throw new IOException("Char at "+p+" is not a 
character: "+v);
-                        }
-                        cbuf[off + i] = ((CharValue) v).charValue();
+                    catch (IOException ioe){ throw ioe; }//for clarity
+                    catch (InternalExceptionWrapper |
+                            VMDisconnectedExceptionWrapper |
+                            ObjectCollectedExceptionWrapper ex) {
+                        throw new IOException(ex);
                     }
                     pos += len;
                     return len;
@@ -472,4 +636,21 @@ public final class ShortenedStrings {
             this.shortValueRef = new WeakReference<String>(shortenedValue);
         }
     }
+
+    private enum InternalStringEncoding {
+        /**
+         * The string is backed by an array of chars
+         */
+        CHAR_ARRAY,
+        /**
+         * The string is backed by an array of bytes with one byte per char
+         */
+        BYTE_ARRAY_LATIN1,
+        /**
+         * The string is backed by an array of bytes with two bytes per char.
+         * Use {@link #isLittleEndian(com.sun.jdi.VirtualMachine)} to determine
+         * the byte order
+         */
+        BYTE_ARRAY_UTF16
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

For further information about the NetBeans mailing lists, visit:
https://cwiki.apache.org/confluence/display/NETBEANS/Mailing+lists

[netbeans] branch master updated: [NETBEANS-6147] Initial implementation of better long string handling

Reply via email to