baeminbo commented on code in PR #32398:
URL: https://github.com/apache/beam/pull/32398#discussion_r1746700502
##########
sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextSource.java:
##########
@@ -487,4 +457,110 @@ private boolean readCustomLine() throws IOException {
return true;
}
}
+
+ /**
+ * This class is created to avoid multiple bytes-copy when making a
substring of the output.
+ * Without this class, it requires two bytes copies.
+ *
+ * <pre>{@code
+ * ByteArrayOutputStream out = ...;
+ * byte[] buffer = out.toByteArray(); // 1st-copy
+ * String s = new String(buffer, offset, length); // 2nd-copy
+ * }</pre>
+ */
+ static class SubstringByteArrayOutputStream extends ByteArrayOutputStream {
+ public String toString(int offset, int length, Charset charset) {
+ if (offset < 0) {
+ throw new IllegalArgumentException("offset is negative: " + offset);
+ }
+ if (offset > count) {
+ throw new IllegalArgumentException(
+ "offset exceeds the buffer limit. offset: " + offset + ", limit: "
+ count);
+ }
+
+ if (length < 0) {
+ throw new IllegalArgumentException("length is negative: " + length);
+ }
+
+ if (offset + length > count) {
+ throw new IllegalArgumentException(
+ "offset + length exceeds the buffer limit. offset: "
+ + offset
+ + ", length: "
+ + length
+ + ", limit: "
+ + count);
+ }
+
+ return new String(buf, offset, length, charset);
+ }
+ }
+
+ /**
+ * @see <a
+ *
href="https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm">Knuth–Morris–Pratt
+ * algorithm</a>
+ */
+ static class KMPDelimiterFinder {
+ private final byte[] delimiter;
+ private final int[] table;
+ int k; // the current position in delimiter
Review Comment:
Done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]