This is an automated email from the ASF dual-hosted git repository.
yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1bc5b29161b5 [SPARK-54862][DSTREAM] Remove unused private object
`RawTextHelper`
1bc5b29161b5 is described below
commit 1bc5b29161b5e969910475f6d521dd894c734489
Author: yangjie01 <[email protected]>
AuthorDate: Tue Dec 30 13:50:14 2025 +0800
[SPARK-54862][DSTREAM] Remove unused private object `RawTextHelper`
### What changes were proposed in this pull request?
This pr aims to remove the unused private object `RawTextHelper` from
`streaming` module. It was introduced in
https://github.com/apache/spark/commit/ae61ebaee64fad117155d65bcdfc8520bda0e6b4
and is no longer used after the integration of SPARK-1637 |
https://github.com/apache/spark/pull/571 .
### Why are the changes needed?
Code cleanup
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Pass Github Actions
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #53635 from LuciferYang/SPARK-54862.
Authored-by: yangjie01 <[email protected]>
Signed-off-by: yangjie01 <[email protected]>
---
.../spark/streaming/util/RawTextHelper.scala | 117 ---------------------
1 file changed, 117 deletions(-)
diff --git
a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
deleted file mode 100644
index b6439262981a..000000000000
---
a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming.util
-
-import org.apache.spark.SparkContext
-import org.apache.spark.util.collection.OpenHashMap
-
-private[streaming]
-object RawTextHelper {
-
- /**
- * Splits lines and counts the words.
- */
- def splitAndCountPartitions(iter: Iterator[String]): Iterator[(String,
Long)] = {
- val map = new OpenHashMap[String, Long]
- var i = 0
- var j = 0
- while (iter.hasNext) {
- val s = iter.next()
- i = 0
- while (i < s.length) {
- j = i
- while (j < s.length && s.charAt(j) != ' ') {
- j += 1
- }
- if (j > i) {
- val w = s.substring(i, j)
- map.changeValue(w, 1L, _ + 1L)
- }
- i = j
- while (i < s.length && s.charAt(i) == ' ') {
- i += 1
- }
- }
- map.iterator.map {
- case (k, v) => (k, v)
- }
- }
- map.iterator.map{case (k, v) => (k, v)}
- }
-
- /**
- * Gets the top k words in terms of word counts. Assumes that each word
exists only once
- * in the `data` iterator (that is, the counts have been reduced).
- */
- def topK(data: Iterator[(String, Long)], k: Int): Iterator[(String, Long)] =
{
- val taken = new Array[(String, Long)](k)
-
- var i = 0
- var len = 0
- var value: (String, Long) = null
- var swap: (String, Long) = null
- var count = 0
-
- while (data.hasNext) {
- value = data.next()
- if (value != null) {
- count += 1
- if (len == 0) {
- taken(0) = value
- len = 1
- } else if (len < k || value._2 > taken(len - 1)._2) {
- if (len < k) {
- len += 1
- }
- taken(len - 1) = value
- i = len - 1
- while (i > 0 && taken(i - 1)._2 < taken(i)._2) {
- swap = taken(i)
- taken(i) = taken(i-1)
- taken(i - 1) = swap
- i -= 1
- }
- }
- }
- }
- taken.iterator
- }
-
- /**
- * Warms up the SparkContext in master and executor by running tasks to
force JIT kick in
- * before real workload starts.
- */
- def warmUp(sc: SparkContext): Unit = {
- for (i <- 0 to 1) {
- sc.parallelize(1 to 200000, 1000)
- .map(_ % 1331).map(_.toString)
- .mapPartitions(splitAndCountPartitions).reduceByKey(_ + _, 10)
- .count()
- }
- }
-
- def add(v1: Long, v2: Long): Long = {
- v1 + v2
- }
-
- def subtract(v1: Long, v2: Long): Long = {
- v1 - v2
- }
-
- def max(v1: Long, v2: Long): Long = math.max(v1, v2)
-}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]