(spark) branch master updated: [SPARK-53391][CORE] Remove unused PrimitiveKeyOpenHashMap

yao Wed, 27 Aug 2025 03:52:46 -0700

This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new fb8df345c886 [SPARK-53391][CORE] Remove unused PrimitiveKeyOpenHashMap
fb8df345c886 is described below

commit fb8df345c886401947a6d18d39114a4fb8403812
Author: Kent Yao <y...@apache.org>
AuthorDate: Wed Aug 27 18:52:25 2025 +0800

    [SPARK-53391][CORE] Remove unused PrimitiveKeyOpenHashMap
    
    ### What changes were proposed in this pull request?
    
    This PR removed the unused PrimitiveKeyOpenHashMap
    
    ### Why are the changes needed?
    
    PrimitiveKeyOpenHashMap is unused. And it's marked as specialized, so it 
also generates a lot of sp class files
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Passing CI
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #52136 from yaooqinn/SPARK-53391.
    
    Authored-by: Kent Yao <y...@apache.org>
    Signed-off-by: Kent Yao <y...@apache.org>
---
 .../util/collection/PrimitiveKeyOpenHashMap.scala  | 128 ---------------------
 .../collection/PrimitiveKeyOpenHashMapSuite.scala  | 126 --------------------
 .../org/apache/spark/mllib/fpm/PrefixSpan.scala    |   1 -
 project/MimaExcludes.scala                         |   4 +-
 4 files changed, 3 insertions(+), 256 deletions(-)

diff --git 
a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala
 
b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala
deleted file mode 100644
index 69665aaeac4d..000000000000
--- 
a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util.collection
-
-import scala.reflect._
-
-/**
- * A fast hash map implementation for primitive, non-null keys. This hash map 
supports
- * insertions and updates, but not deletions. This map is about an order of 
magnitude
- * faster than java.util.HashMap, while using much less space overhead.
- *
- * Under the hood, it uses our OpenHashSet implementation.
- */
-private[spark]
-class PrimitiveKeyOpenHashMap[@specialized(Long, Int) K: ClassTag,
-                              @specialized(Long, Int, Double) V: ClassTag](
-    initialCapacity: Int)
-  extends Iterable[(K, V)]
-  with Serializable {
-
-  def this() = this(64)
-
-  require(classTag[K] == classTag[Long] || classTag[K] == classTag[Int])
-
-  // Init in constructor (instead of in declaration) to work around a Scala 
compiler specialization
-  // bug that would generate two arrays (one for Object and one for 
specialized T).
-  protected var _keySet: OpenHashSet[K] = _
-  private var _values: Array[V] = _
-  _keySet = new OpenHashSet[K](initialCapacity)
-  _values = new Array[V](_keySet.capacity)
-
-  private var _oldValues: Array[V] = null
-
-  override def size: Int = _keySet.size
-
-  /** Tests whether this map contains a binding for a key. */
-  def contains(k: K): Boolean = {
-    _keySet.getPos(k) != OpenHashSet.INVALID_POS
-  }
-
-  /** Get the value for a given key */
-  def apply(k: K): V = {
-    val pos = _keySet.getPos(k)
-    _values(pos)
-  }
-
-  /** Get the value for a given key, or returns elseValue if it doesn't exist. 
*/
-  def getOrElse(k: K, elseValue: V): V = {
-    val pos = _keySet.getPos(k)
-    if (pos >= 0) _values(pos) else elseValue
-  }
-
-  /** Set the value for a key */
-  def update(k: K, v: V): Unit = {
-    val pos = _keySet.addWithoutResize(k) & OpenHashSet.POSITION_MASK
-    _values(pos) = v
-    _keySet.rehashIfNeeded(k, grow, move)
-    _oldValues = null
-  }
-
-  /**
-   * If the key doesn't exist yet in the hash map, set its value to 
defaultValue; otherwise,
-   * set its value to mergeValue(oldValue).
-   *
-   * @return the newly updated value.
-   */
-  def changeValue(k: K, defaultValue: => V, mergeValue: (V) => V): V = {
-    val pos = _keySet.addWithoutResize(k)
-    if ((pos & OpenHashSet.NONEXISTENCE_MASK) != 0) {
-      val newValue = defaultValue
-      _values(pos & OpenHashSet.POSITION_MASK) = newValue
-      _keySet.rehashIfNeeded(k, grow, move)
-      newValue
-    } else {
-      _values(pos) = mergeValue(_values(pos))
-      _values(pos)
-    }
-  }
-
-  override def iterator: Iterator[(K, V)] = new Iterator[(K, V)] {
-    var pos = 0
-    var nextPair: (K, V) = computeNextPair()
-
-    /** Get the next value we should return from next(), or null if we're 
finished iterating */
-    def computeNextPair(): (K, V) = {
-      pos = _keySet.nextPos(pos)
-      if (pos >= 0) {
-        val ret = (_keySet.getValue(pos), _values(pos))
-        pos += 1
-        ret
-      } else {
-        null
-      }
-    }
-
-    def hasNext: Boolean = nextPair != null
-
-    def next(): (K, V) = {
-      val pair = nextPair
-      nextPair = computeNextPair()
-      pair
-    }
-  }
-
-  private def grow(newCapacity: Int): Unit = {
-    _oldValues = _values
-    _values = new Array[V](newCapacity)
-  }
-
-  private def move(oldPos: Int, newPos: Int): Unit = {
-    _values(newPos) = _oldValues(oldPos)
-  }
-}
diff --git 
a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
 
b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
deleted file mode 100644
index 636ea7b0ee70..000000000000
--- 
a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util.collection
-
-import scala.collection.mutable.HashSet
-
-import org.scalatest.matchers.must.Matchers
-import org.scalatest.matchers.should.Matchers._
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.util.SizeEstimator
-
-class PrimitiveKeyOpenHashMapSuite extends SparkFunSuite with Matchers {
-
-  test("size for specialized, primitive key, value (int, int)") {
-    val capacity = 1024
-    val map = new PrimitiveKeyOpenHashMap[Int, Int](capacity)
-    val actualSize = SizeEstimator.estimate(map)
-    // 32 bit for keys, 32 bit for values, and 1 bit for the bitset.
-    val expectedSize = capacity * (32 + 32 + 1) / 8
-    // Make sure we are not allocating a significant amount of memory beyond 
our expected.
-    actualSize should be <= (expectedSize * 1.1).toLong
-  }
-
-  test("initialization") {
-    val goodMap1 = new PrimitiveKeyOpenHashMap[Int, Int](1)
-    assert(goodMap1.size === 0)
-    val goodMap2 = new PrimitiveKeyOpenHashMap[Int, Int](255)
-    assert(goodMap2.size === 0)
-    val goodMap3 = new PrimitiveKeyOpenHashMap[Int, Int](256)
-    assert(goodMap3.size === 0)
-    intercept[IllegalArgumentException] {
-      new PrimitiveKeyOpenHashMap[Int, Int](1 << 30 + 1) // Invalid map size: 
bigger than 2^30
-    }
-    intercept[IllegalArgumentException] {
-      new PrimitiveKeyOpenHashMap[Int, Int](-1)
-    }
-  }
-
-  test("basic operations") {
-    val longBase = 1000000L
-    val map = new PrimitiveKeyOpenHashMap[Long, Int]
-
-    for (i <- 1 to 1000) {
-      map(i + longBase) = i
-      assert(map(i + longBase) === i)
-    }
-
-    assert(map.size === 1000)
-
-    for (i <- 1 to 1000) {
-      assert(map(i + longBase) === i)
-    }
-
-    // Test iterator
-    val set = new HashSet[(Long, Int)]
-    for ((k, v) <- map) {
-      set.add((k, v))
-    }
-    assert(set === (1 to 1000).map(x => (x + longBase, x)).toSet)
-  }
-
-  test("null values") {
-    val map = new PrimitiveKeyOpenHashMap[Long, String]()
-    for (i <- 1 to 100) {
-      map(i.toLong) = null
-    }
-    assert(map.size === 100)
-    assert(map(1.toLong) === null)
-  }
-
-  test("changeValue") {
-    val map = new PrimitiveKeyOpenHashMap[Long, String]()
-    for (i <- 1 to 100) {
-      map(i.toLong) = i.toString
-    }
-    assert(map.size === 100)
-    for (i <- 1 to 100) {
-      val res = map.changeValue(i.toLong, { assert(false); "" }, v => {
-        assert(v === i.toString)
-        v + "!"
-      })
-      assert(res === s"$i!")
-    }
-    // Iterate from 101 to 400 to make sure the map grows a couple of times, 
because we had a
-    // bug where changeValue would return the wrong result when the map grew 
on that insert
-    for (i <- 101 to 400) {
-      val res = map.changeValue(i.toLong, { s"$i!" }, v => { assert(false); v 
})
-      assert(res === s"$i!")
-    }
-    assert(map.size === 400)
-  }
-
-  test("inserting in capacity-1 map") {
-    val map = new PrimitiveKeyOpenHashMap[Long, String](1)
-    for (i <- 1 to 100) {
-      map(i.toLong) = i.toString
-    }
-    assert(map.size === 100)
-    for (i <- 1 to 100) {
-      assert(map(i.toLong) === i.toString)
-    }
-  }
-
-  test("contains") {
-    val map = new PrimitiveKeyOpenHashMap[Int, Int](1)
-    map(0) = 0
-    assert(map.contains(0))
-    assert(!map.contains(1))
-  }
-}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
index df10fee3330b..a81bd823415a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
@@ -460,7 +460,6 @@ object PrefixSpan extends Logging {
     def genPrefixItems: Iterator[(Int, Long)] = {
       val n1 = items.length - 1
       // For each unique item (subject to sign) in this sequence, we output 
exact one split.
-      // TODO: use PrimitiveKeyOpenHashMap
       val prefixes = mutable.Map.empty[Int, Long]
       // a) items that can be assembled to the last itemset of the prefix
       partialStarts.foreach { start =>
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index f11896a1bba7..d18d804e2544 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -39,7 +39,9 @@ object MimaExcludes {
     
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Vector.getSizeInBytes"),
 
     // [SPARK-52221][SQL] Refactor SqlScriptingLocalVariableManager into more 
generic context manager
-    
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.scripting.SqlScriptingExecution.withLocalVariableManager")
+    
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.scripting.SqlScriptingExecution.withLocalVariableManager"),
+
+    
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.util.collection.PrimitiveKeyOpenHashMap*")
   )
 
   // Default exclude rules


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-53391][CORE] Remove unused PrimitiveKeyOpenHashMap

Reply via email to