This is an automated email from the ASF dual-hosted git repository.
pjfanning pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/pekko.git
The following commit(s) were added to refs/heads/main by this push:
new 8675ef58e7 optimize ByteStrings startswith/endswith (#2896)
8675ef58e7 is described below
commit 8675ef58e7c884d2ffc582b8d6ce1c05cce57685
Author: PJ Fanning <[email protected]>
AuthorDate: Fri May 8 15:07:52 2026 +0100
optimize ByteStrings startswith/endswith (#2896)
* optimize ByteStrings startsWith, endsWith, foreach, and map without
compacting
Agent-Logs-Url:
https://github.com/pjfanning/incubator-pekko/sessions/e1fe758b-faef-44d1-8052-a234aaa0e3c4
Co-authored-by: pjfanning <[email protected]>
* add additional map and foreach test cases from PR #35
Agent-Logs-Url:
https://github.com/pjfanning/incubator-pekko/sessions/e1fe758b-faef-44d1-8052-a234aaa0e3c4
Co-authored-by: pjfanning <[email protected]>
* optimize ByteStrings.endsWith: locate suffix start from tail, not head
Agent-Logs-Url:
https://github.com/pjfanning/incubator-pekko/sessions/d413cbb5-d980-4190-bf6f-fb3a409ee50e
Co-authored-by: pjfanning <[email protected]>
---------
Co-authored-by: copilot-swe-agent[bot]
<[email protected]>
Co-authored-by: pjfanning <[email protected]>
---
.../org/apache/pekko/util/ByteStringSpec.scala | 11 ++-
.../scala/org/apache/pekko/util/ByteString.scala | 103 +++++++++++++++++++++
2 files changed, 112 insertions(+), 2 deletions(-)
diff --git
a/actor-tests/src/test/scala/org/apache/pekko/util/ByteStringSpec.scala
b/actor-tests/src/test/scala/org/apache/pekko/util/ByteStringSpec.scala
index 6c026e45c1..f4075ec5a5 100644
--- a/actor-tests/src/test/scala/org/apache/pekko/util/ByteStringSpec.scala
+++ b/actor-tests/src/test/scala/org/apache/pekko/util/ByteStringSpec.scala
@@ -2261,9 +2261,13 @@ class ByteStringSpec extends AnyWordSpec with Matchers
with Checkers {
ByteString1C(Array[Byte](1, 2, 3)).map(inc) should
===(ByteString(Array[Byte](2, 3, 4)))
// ByteString1 with offset
ByteString1(Array[Byte](0, 1, 2, 3, 4), 1, 3).map(inc) should
===(ByteString(Array[Byte](2, 3, 4)))
- // ByteStrings
+ // ByteStrings (2 segments)
val bss = ByteStrings(ByteString1.fromString("ab"),
ByteString1.fromString("cd"))
bss.map(b => (b + 1).toByte) should ===(ByteString("bcde"))
+ // ByteStrings (3 segments, including negative byte values)
+ val bss3 = ByteString1(Array[Byte](-1, -2)) ++
ByteString1(Array[Byte](0, 1)) ++ ByteString1(
+ Array[Byte](126, 127))
+ bss3.map(b => (b + 1).toByte) should ===(ByteString(Array[Byte](0, -1,
1, 2, 127, -128)))
// empty
ByteString.empty.map(inc) should ===(ByteString.empty)
}
@@ -2278,9 +2282,12 @@ class ByteStringSpec extends AnyWordSpec with Matchers
with Checkers {
collect(ByteString1C(Array[Byte](10, 20, 30))) should ===(Seq[Byte](10,
20, 30))
// ByteString1 with internal offset
collect(ByteString1(Array[Byte](0, 10, 20, 30, 40), 1, 3)) should
===(Seq[Byte](10, 20, 30))
- // ByteStrings (multi-segment)
+ // ByteStrings (multi-segment, 2 segments)
collect(ByteStrings(ByteString1.fromString("ab"),
ByteString1.fromString("cd"))) should ===(
Seq[Byte]('a', 'b', 'c', 'd'))
+ // ByteStrings (multi-segment, 3+ segments, including negative byte
values)
+ val bss3 = ByteString1(Array[Byte](-1, -128)) ++
ByteString1(Array[Byte](0)) ++ ByteString1(Array[Byte](127))
+ collect(bss3) should ===(Seq[Byte](-1, -128, 0, 127))
// empty
collect(ByteString.empty) should ===(Seq.empty[Byte])
}
diff --git a/actor/src/main/scala/org/apache/pekko/util/ByteString.scala
b/actor/src/main/scala/org/apache/pekko/util/ByteString.scala
index bf91ab24fd..42aa48d941 100644
--- a/actor/src/main/scala/org/apache/pekko/util/ByteString.scala
+++ b/actor/src/main/scala/org/apache/pekko/util/ByteString.scala
@@ -402,6 +402,24 @@ object ByteString {
java.util.Arrays.equals(this.bytes, hIdx, hIdx + (needleLen - nIdx),
bytes, nIdx, needleLen)
}
+ /**
+ * INTERNAL API: compare `len` bytes from this ByteString starting at
`haystackOffset`
+ * against `needle[needleOffset..needleOffset+len)`.
+ */
+ private[pekko] def matchesAt(haystackOffset: Int, needle: Array[Byte],
needleOffset: Int, len: Int): Boolean = {
+ var hIdx = haystackOffset
+ var nIdx = needleOffset
+ var remaining = len
+ while (remaining >= 8) {
+ if (SWARUtil.getLong(bytes, hIdx, ByteOrder.BIG_ENDIAN) !=
+ SWARUtil.getLong(needle, nIdx, ByteOrder.BIG_ENDIAN)) return false
+ hIdx += 8
+ nIdx += 8
+ remaining -= 8
+ }
+ java.util.Arrays.equals(bytes, hIdx, hIdx + remaining, needle, nIdx,
nIdx + remaining)
+ }
+
override def slice(from: Int, until: Int): ByteString =
if (from <= 0 && until >= length) this
else if (from >= length || until <= 0 || from >= until) ByteString.empty
@@ -798,6 +816,24 @@ object ByteString {
java.util.Arrays.equals(this.bytes, hIdx, hIdx + (needleLen - nIdx),
bytes, nIdx, needleLen)
}
+ /**
+ * INTERNAL API: compare `len` bytes from this ByteString starting at
logical `haystackOffset`
+ * against `needle[needleOffset..needleOffset+len)`.
+ */
+ private[pekko] def matchesAt(haystackOffset: Int, needle: Array[Byte],
needleOffset: Int, len: Int): Boolean = {
+ var hIdx = startIndex + haystackOffset
+ var nIdx = needleOffset
+ var remaining = len
+ while (remaining >= 8) {
+ if (SWARUtil.getLong(bytes, hIdx, ByteOrder.BIG_ENDIAN) !=
+ SWARUtil.getLong(needle, nIdx, ByteOrder.BIG_ENDIAN)) return false
+ hIdx += 8
+ nIdx += 8
+ remaining -= 8
+ }
+ java.util.Arrays.equals(bytes, hIdx, hIdx + remaining, needle, nIdx,
nIdx + remaining)
+ }
+
override def copyToArray[B >: Byte](dest: Array[B], start: Int, len: Int):
Int = {
// min of the bytes available to copy, bytes there is room for in dest
and the requested number of bytes
val toCopy = math.max(0, math.min(math.min(len, length), dest.length -
start))
@@ -1225,6 +1261,73 @@ object ByteString {
}
}
+ override def startsWith(bytes: Array[Byte], offset: Int): Boolean = {
+ val needleLen = bytes.length
+ if (length - offset < needleLen) return false
+ if (needleLen == 0) return true
+ // Locate the fragment that contains position `offset`
+ var fragIdx = 0
+ var fragOffset = offset
+ while (fragIdx < bytestrings.length && fragOffset >=
bytestrings(fragIdx).length) {
+ fragOffset -= bytestrings(fragIdx).length
+ fragIdx += 1
+ }
+ // Compare needle against consecutive fragments without compacting
+ var nIdx = 0
+ while (nIdx < needleLen) {
+ val frag = bytestrings(fragIdx)
+ val toCmp = math.min(needleLen - nIdx, frag.length - fragOffset)
+ if (!frag.matchesAt(fragOffset, bytes, nIdx, toCmp)) return false
+ nIdx += toCmp
+ fragIdx += 1
+ fragOffset = 0
+ }
+ true
+ }
+
+ override def endsWith(bytes: Array[Byte]): Boolean = {
+ val needleLen = bytes.length
+ if (length < needleLen) return false
+ if (needleLen == 0) return true
+ // Locate the fragment containing `length - needleLen` by walking
backward from the last
+ // fragment, so only the suffix fragments are touched (O(suffix
fragments), not O(all fragments)).
+ var fragIdx = bytestrings.length - 1
+ var bytesFromTail = needleLen
+ while (fragIdx > 0 && bytesFromTail > bytestrings(fragIdx).length) {
+ bytesFromTail -= bytestrings(fragIdx).length
+ fragIdx -= 1
+ }
+ var fragOffset = bytestrings(fragIdx).length - bytesFromTail
+ // Compare needle against consecutive fragments without compacting
+ var nIdx = 0
+ while (nIdx < needleLen) {
+ val frag = bytestrings(fragIdx)
+ val toCmp = math.min(needleLen - nIdx, frag.length - fragOffset)
+ if (!frag.matchesAt(fragOffset, bytes, nIdx, toCmp)) return false
+ nIdx += toCmp
+ fragIdx += 1
+ fragOffset = 0
+ }
+ true
+ }
+
+ override def foreach[@specialized U](f: Byte => U): Unit =
+ bytestrings.foreach(_.foreach(f))
+
+ override def map[A](f: Byte => Byte): ByteString = {
+ val result = new Array[Byte](length)
+ var pos = 0
+ bytestrings.foreach { bs =>
+ var i = 0
+ while (i < bs.length) {
+ result(pos) = f(bs(i))
+ pos += 1
+ i += 1
+ }
+ }
+ ByteString1C(result)
+ }
+
override def copyToArray[B >: Byte](dest: Array[B], start: Int, len: Int):
Int = {
if (bytestrings.size == 1) bytestrings.head.copyToArray(dest, start, len)
else {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]