This is an automated email from the ASF dual-hosted git repository.

dmollitor pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/main by this push:
     new 67263a370 AVRO-4065: Do Not Copy Array Contents when Expanding UTF-8 
Arrays (#3181)
67263a370 is described below

commit 67263a370d5489d9a9040e26503065db4ebd9672
Author: belugabehr <[email protected]>
AuthorDate: Mon Oct 7 07:16:09 2024 -0400

    AVRO-4065: Do Not Copy Array Contents when Expanding UTF-8 Arrays (#3181)
---
 .../src/main/java/org/apache/avro/util/Utf8.java   | 46 +++++++++++-----------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java 
b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
index ae4df8e5c..22c21c76b 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
@@ -69,46 +69,44 @@ public class Utf8 implements Comparable<Utf8>, 
CharSequence, Externalizable {
   }
 
   /**
-   * Return UTF-8 encoded bytes. Only valid through {@link #getByteLength()}.
+   * Return UTF-8 encoded bytes. Only valid through {@link #getByteLength()}
+   * assuming the bytes have been fully copied into the underlying buffer from 
the
+   * source.
+   *
+   * @see #setByteLength(int)
+   * @return a reference to the underlying byte array
    */
   public byte[] getBytes() {
     return bytes;
   }
 
-  /**
-   * Return length in bytes.
-   *
-   * @deprecated call {@link #getByteLength()} instead.
-   */
-  @Deprecated
-  public int getLength() {
-    return length;
-  }
-
   /** Return length in bytes. */
   public int getByteLength() {
     return length;
   }
 
   /**
-   * Set length in bytes. Should called whenever byte content changes, even if 
the
-   * length does not change, as this also clears the cached String.
+   * Set length in bytes. When calling this method, even if the new length is 
the
+   * same as the current length, the cached contents of this Utf8 object will 
be
+   * wiped out. After calling this method, no assumptions should be made about 
the
+   * internal state (e.g., contents, hashcode, equality, etc.) of this Utf8 
String
+   * other than the internal buffer being large enough to accommodate a String 
of
+   * the new length. This should be called immediately before reading a String
+   * from the underlying data source.
    *
-   * @deprecated call {@link #setByteLength(int)} instead.
-   */
-  @Deprecated
-  public Utf8 setLength(int newLength) {
-    return setByteLength(newLength);
-  }
-
-  /**
-   * Set length in bytes. Should called whenever byte content changes, even if 
the
-   * length does not change, as this also clears the cached String.
+   * @param newLength the new length of the underlying buffer
+   * @return a reference to this object.
+   * @see org.apache.avro.io.BinaryDecoder#readString(Utf8)
    */
   public Utf8 setByteLength(int newLength) {
     SystemLimitException.checkMaxStringLength(newLength);
+
+    // Note that if the buffer size increases, the internal buffer is zero-ed 
out.
+    // If the buffer is large enough, just the length pointer moves and the old
+    // contents remain. For consistency's sake, we could zero-out the buffer in
+    // both cases, but would be a perf hit.
     if (this.bytes.length < newLength) {
-      this.bytes = Arrays.copyOf(this.bytes, newLength);
+      this.bytes = new byte[newLength];
     }
     this.length = newLength;
     this.string = null;

Reply via email to