This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new fb8df345c886 [SPARK-53391][CORE] Remove unused PrimitiveKeyOpenHashMap fb8df345c886 is described below commit fb8df345c886401947a6d18d39114a4fb8403812 Author: Kent Yao <y...@apache.org> AuthorDate: Wed Aug 27 18:52:25 2025 +0800 [SPARK-53391][CORE] Remove unused PrimitiveKeyOpenHashMap ### What changes were proposed in this pull request? This PR removed the unused PrimitiveKeyOpenHashMap ### Why are the changes needed? PrimitiveKeyOpenHashMap is unused. And it's marked as specialized, so it also generates a lot of sp class files ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Passing CI ### Was this patch authored or co-authored using generative AI tooling? no Closes #52136 from yaooqinn/SPARK-53391. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Kent Yao <y...@apache.org> --- .../util/collection/PrimitiveKeyOpenHashMap.scala | 128 --------------------- .../collection/PrimitiveKeyOpenHashMapSuite.scala | 126 -------------------- .../org/apache/spark/mllib/fpm/PrefixSpan.scala | 1 - project/MimaExcludes.scala | 4 +- 4 files changed, 3 insertions(+), 256 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala deleted file mode 100644 index 69665aaeac4d..000000000000 --- a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.util.collection - -import scala.reflect._ - -/** - * A fast hash map implementation for primitive, non-null keys. This hash map supports - * insertions and updates, but not deletions. This map is about an order of magnitude - * faster than java.util.HashMap, while using much less space overhead. - * - * Under the hood, it uses our OpenHashSet implementation. - */ -private[spark] -class PrimitiveKeyOpenHashMap[@specialized(Long, Int) K: ClassTag, - @specialized(Long, Int, Double) V: ClassTag]( - initialCapacity: Int) - extends Iterable[(K, V)] - with Serializable { - - def this() = this(64) - - require(classTag[K] == classTag[Long] || classTag[K] == classTag[Int]) - - // Init in constructor (instead of in declaration) to work around a Scala compiler specialization - // bug that would generate two arrays (one for Object and one for specialized T). - protected var _keySet: OpenHashSet[K] = _ - private var _values: Array[V] = _ - _keySet = new OpenHashSet[K](initialCapacity) - _values = new Array[V](_keySet.capacity) - - private var _oldValues: Array[V] = null - - override def size: Int = _keySet.size - - /** Tests whether this map contains a binding for a key. */ - def contains(k: K): Boolean = { - _keySet.getPos(k) != OpenHashSet.INVALID_POS - } - - /** Get the value for a given key */ - def apply(k: K): V = { - val pos = _keySet.getPos(k) - _values(pos) - } - - /** Get the value for a given key, or returns elseValue if it doesn't exist. */ - def getOrElse(k: K, elseValue: V): V = { - val pos = _keySet.getPos(k) - if (pos >= 0) _values(pos) else elseValue - } - - /** Set the value for a key */ - def update(k: K, v: V): Unit = { - val pos = _keySet.addWithoutResize(k) & OpenHashSet.POSITION_MASK - _values(pos) = v - _keySet.rehashIfNeeded(k, grow, move) - _oldValues = null - } - - /** - * If the key doesn't exist yet in the hash map, set its value to defaultValue; otherwise, - * set its value to mergeValue(oldValue). - * - * @return the newly updated value. - */ - def changeValue(k: K, defaultValue: => V, mergeValue: (V) => V): V = { - val pos = _keySet.addWithoutResize(k) - if ((pos & OpenHashSet.NONEXISTENCE_MASK) != 0) { - val newValue = defaultValue - _values(pos & OpenHashSet.POSITION_MASK) = newValue - _keySet.rehashIfNeeded(k, grow, move) - newValue - } else { - _values(pos) = mergeValue(_values(pos)) - _values(pos) - } - } - - override def iterator: Iterator[(K, V)] = new Iterator[(K, V)] { - var pos = 0 - var nextPair: (K, V) = computeNextPair() - - /** Get the next value we should return from next(), or null if we're finished iterating */ - def computeNextPair(): (K, V) = { - pos = _keySet.nextPos(pos) - if (pos >= 0) { - val ret = (_keySet.getValue(pos), _values(pos)) - pos += 1 - ret - } else { - null - } - } - - def hasNext: Boolean = nextPair != null - - def next(): (K, V) = { - val pair = nextPair - nextPair = computeNextPair() - pair - } - } - - private def grow(newCapacity: Int): Unit = { - _oldValues = _values - _values = new Array[V](newCapacity) - } - - private def move(oldPos: Int, newPos: Int): Unit = { - _values(newPos) = _oldValues(oldPos) - } -} diff --git a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala deleted file mode 100644 index 636ea7b0ee70..000000000000 --- a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.util.collection - -import scala.collection.mutable.HashSet - -import org.scalatest.matchers.must.Matchers -import org.scalatest.matchers.should.Matchers._ - -import org.apache.spark.SparkFunSuite -import org.apache.spark.util.SizeEstimator - -class PrimitiveKeyOpenHashMapSuite extends SparkFunSuite with Matchers { - - test("size for specialized, primitive key, value (int, int)") { - val capacity = 1024 - val map = new PrimitiveKeyOpenHashMap[Int, Int](capacity) - val actualSize = SizeEstimator.estimate(map) - // 32 bit for keys, 32 bit for values, and 1 bit for the bitset. - val expectedSize = capacity * (32 + 32 + 1) / 8 - // Make sure we are not allocating a significant amount of memory beyond our expected. - actualSize should be <= (expectedSize * 1.1).toLong - } - - test("initialization") { - val goodMap1 = new PrimitiveKeyOpenHashMap[Int, Int](1) - assert(goodMap1.size === 0) - val goodMap2 = new PrimitiveKeyOpenHashMap[Int, Int](255) - assert(goodMap2.size === 0) - val goodMap3 = new PrimitiveKeyOpenHashMap[Int, Int](256) - assert(goodMap3.size === 0) - intercept[IllegalArgumentException] { - new PrimitiveKeyOpenHashMap[Int, Int](1 << 30 + 1) // Invalid map size: bigger than 2^30 - } - intercept[IllegalArgumentException] { - new PrimitiveKeyOpenHashMap[Int, Int](-1) - } - } - - test("basic operations") { - val longBase = 1000000L - val map = new PrimitiveKeyOpenHashMap[Long, Int] - - for (i <- 1 to 1000) { - map(i + longBase) = i - assert(map(i + longBase) === i) - } - - assert(map.size === 1000) - - for (i <- 1 to 1000) { - assert(map(i + longBase) === i) - } - - // Test iterator - val set = new HashSet[(Long, Int)] - for ((k, v) <- map) { - set.add((k, v)) - } - assert(set === (1 to 1000).map(x => (x + longBase, x)).toSet) - } - - test("null values") { - val map = new PrimitiveKeyOpenHashMap[Long, String]() - for (i <- 1 to 100) { - map(i.toLong) = null - } - assert(map.size === 100) - assert(map(1.toLong) === null) - } - - test("changeValue") { - val map = new PrimitiveKeyOpenHashMap[Long, String]() - for (i <- 1 to 100) { - map(i.toLong) = i.toString - } - assert(map.size === 100) - for (i <- 1 to 100) { - val res = map.changeValue(i.toLong, { assert(false); "" }, v => { - assert(v === i.toString) - v + "!" - }) - assert(res === s"$i!") - } - // Iterate from 101 to 400 to make sure the map grows a couple of times, because we had a - // bug where changeValue would return the wrong result when the map grew on that insert - for (i <- 101 to 400) { - val res = map.changeValue(i.toLong, { s"$i!" }, v => { assert(false); v }) - assert(res === s"$i!") - } - assert(map.size === 400) - } - - test("inserting in capacity-1 map") { - val map = new PrimitiveKeyOpenHashMap[Long, String](1) - for (i <- 1 to 100) { - map(i.toLong) = i.toString - } - assert(map.size === 100) - for (i <- 1 to 100) { - assert(map(i.toLong) === i.toString) - } - } - - test("contains") { - val map = new PrimitiveKeyOpenHashMap[Int, Int](1) - map(0) = 0 - assert(map.contains(0)) - assert(!map.contains(1)) - } -} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala index df10fee3330b..a81bd823415a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala @@ -460,7 +460,6 @@ object PrefixSpan extends Logging { def genPrefixItems: Iterator[(Int, Long)] = { val n1 = items.length - 1 // For each unique item (subject to sign) in this sequence, we output exact one split. - // TODO: use PrimitiveKeyOpenHashMap val prefixes = mutable.Map.empty[Int, Long] // a) items that can be assembled to the last itemset of the prefix partialStarts.foreach { start => diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index f11896a1bba7..d18d804e2544 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -39,7 +39,9 @@ object MimaExcludes { ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Vector.getSizeInBytes"), // [SPARK-52221][SQL] Refactor SqlScriptingLocalVariableManager into more generic context manager - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.scripting.SqlScriptingExecution.withLocalVariableManager") + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.scripting.SqlScriptingExecution.withLocalVariableManager"), + + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.util.collection.PrimitiveKeyOpenHashMap*") ) // Default exclude rules --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org