This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new 3ae7142d5dab [SPARK-54592][ML][4.1] Make `estimatedSize` private
3ae7142d5dab is described below

commit 3ae7142d5dabf52ddc702a43622a4d0a43aa03de
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Thu Dec 4 08:27:48 2025 -0800

    [SPARK-54592][ML][4.1] Make `estimatedSize` private
    
    ### What changes were proposed in this pull request?
    Make estimatedSize private
    
    ### Why are the changes needed?
    it is a internal method, should not be exposed to users
    
    ### Does this PR introduce _any_ user-facing change?
    no
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #53322 from zhengruifeng/ml_private_estimate_size_41.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../org/apache/spark/ml/classification/DecisionTreeClassifier.scala   | 2 +-
 .../main/scala/org/apache/spark/ml/classification/FMClassifier.scala  | 2 +-
 .../main/scala/org/apache/spark/ml/classification/GBTClassifier.scala | 2 +-
 .../org/apache/spark/ml/classification/RandomForestClassifier.scala   | 2 +-
 .../main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala   | 3 ++-
 .../main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala   | 3 ++-
 mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala      | 3 ++-
 mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala         | 2 +-
 mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala           | 2 +-
 mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala     | 2 +-
 .../scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala  | 2 +-
 mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala | 4 ++--
 .../src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala  | 2 +-
 .../scala/org/apache/spark/ml/regression/RandomForestRegressor.scala  | 2 +-
 mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala        | 2 +-
 15 files changed, 19 insertions(+), 16 deletions(-)

diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 8902d12bdf94..887d8277d311 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -195,7 +195,7 @@ class DecisionTreeClassificationModel private[ml] (
   // For ml connect only
   private[ml] def this() = this("", Node.dummyNode, -1, -1)
 
-  override def estimatedSize: Long = getEstimatedSize()
+  private[spark] override def estimatedSize: Long = getEstimatedSize()
 
   override def predict(features: Vector): Double = {
     rootNode.predictImpl(features).prediction
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala 
b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
index b653383161e7..29ca909f7930 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
@@ -314,7 +314,7 @@ class FMClassificationModel private[classification] (
     copyValues(new FMClassificationModel(uid, intercept, linear, factors), 
extra)
   }
 
-  override def estimatedSize: Long = {
+  private[spark] override def estimatedSize: Long = {
     var size = this.estimateMatadataSize
     if (this.linear != null) {
       size += this.linear.getSizeInBytes
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala 
b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index 9ca3a1660958..2c5c7e7740a3 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -276,7 +276,7 @@ class GBTClassificationModel private[ml](
   private[ml] def this() = this("",
     Array(new DecisionTreeRegressionModel), Array(Double.NaN), -1, -1)
 
-  override def estimatedSize: Long = getEstimatedSize()
+  private[spark] override def estimatedSize: Long = getEstimatedSize()
 
   @Since("1.4.0")
   override def trees: Array[DecisionTreeRegressionModel] = _trees
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index 8b580b1e075c..fb61358536d0 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -240,7 +240,7 @@ class RandomForestClassificationModel private[ml] (
   // For ml connect only
   private[ml] def this() = this("", Array(new 
DecisionTreeClassificationModel), -1, -1)
 
-  override def estimatedSize: Long = getEstimatedSize()
+  private[spark] override def estimatedSize: Long = getEstimatedSize()
 
   @Since("1.4.0")
   override def trees: Array[DecisionTreeClassificationModel] = _trees
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala 
b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index 9e09ee00c3e3..c129da5f0d7e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -179,7 +179,8 @@ class BisectingKMeansModel private[ml] (
   @Since("2.1.0")
   override def summary: BisectingKMeansSummary = super.summary
 
-  override def estimatedSize: Long = SizeEstimator.estimate(parentModel)
+  private[spark] override def estimatedSize: Long =
+    SizeEstimator.estimate(parentModel)
 
   // BisectingKMeans model hasn't supported offloading, so put an empty 
`saveSummary` here for now
   override private[spark] def saveSummary(path: String): Unit = {}
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala 
b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 5d3e36be2808..ddcca167ff30 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -222,7 +222,8 @@ class GaussianMixtureModel private[ml] (
   @Since("2.0.0")
   override def summary: GaussianMixtureSummary = super.summary
 
-  override def estimatedSize: Long = SizeEstimator.estimate((weights, 
gaussians))
+  private[spark] override def estimatedSize: Long =
+    SizeEstimator.estimate((weights, gaussians))
 
   private[spark] def createSummary(
     predictions: DataFrame, logLikelihood: Double, iteration: Int
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala 
b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index 2abd82c71296..ad6ca0924064 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -213,7 +213,8 @@ class KMeansModel private[ml] (
   @Since("2.0.0")
   override def summary: KMeansSummary = super.summary
 
-  override def estimatedSize: Long = 
SizeEstimator.estimate(parentModel.clusterCenters)
+  private[spark] override def estimatedSize: Long =
+    SizeEstimator.estimate(parentModel.clusterCenters)
 
   private[spark] def createSummary(
     predictions: DataFrame, numIter: Int, trainingCost: Double
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala 
b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index 67c9a8f58dd2..c64d3a98c0a9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -826,7 +826,7 @@ class DistributedLDAModel private[ml] (
     s"DistributedLDAModel: uid=$uid, k=${$(k)}, numFeatures=$vocabSize"
   }
 
-  override def estimatedSize: Long = {
+  private[spark] override def estimatedSize: Long = {
     // TODO: Implement this method.
     throw new UnsupportedOperationException
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala 
b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
index 6fd20ceb562b..acd4635c5bbf 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
@@ -323,7 +323,7 @@ class FPGrowthModel private[ml] (
     s"FPGrowthModel: uid=$uid, numTrainingRecords=$numTrainingRecords"
   }
 
-  override def estimatedSize: Long = {
+  private[spark] override def estimatedSize: Long = {
     // TODO: Implement this method.
     throw new UnsupportedOperationException
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala 
b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 538ad0382075..1cee915046c0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -540,7 +540,7 @@ class ALSModel private[ml] (
     }
   }
 
-  override def estimatedSize: Long = {
+  private[spark] override def estimatedSize: Long = {
     val userCount = userFactors.count()
     val itemCount = itemFactors.count()
     (userCount + itemCount) * (rank + 1) * 4
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index f049e9a44cc2..5387e0e282a3 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -190,7 +190,7 @@ class DecisionTreeRegressionModel private[ml] (
   // For ml connect only
   private[ml] def this() = this("", Node.dummyNode, -1)
 
-  override def estimatedSize: Long = getEstimatedSize()
+  private[spark] override def estimatedSize: Long = getEstimatedSize()
 
   override def predict(features: Vector): Double = {
     rootNode.predictImpl(features).prediction
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala 
b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
index 1b624895c7f3..d2fcb9280c63 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
@@ -443,7 +443,7 @@ class FMRegressor @Since("3.0.0") (
   @Since("3.0.0")
   override def copy(extra: ParamMap): FMRegressor = defaultCopy(extra)
 
-  override def estimateModelSize(dataset: Dataset[_]): Long = {
+  private[spark] override def estimateModelSize(dataset: Dataset[_]): Long = {
     val numFeatures = DatasetUtils.getNumFeatures(dataset, $(featuresCol))
 
     var size = this.estimateMatadataSize
@@ -488,7 +488,7 @@ class FMRegressionModel private[regression] (
     copyValues(new FMRegressionModel(uid, intercept, linear, factors), extra)
   }
 
-  override def estimatedSize: Long = {
+  private[spark] override def estimatedSize: Long = {
     var size = this.estimateMatadataSize
     if (this.linear != null) {
       size += this.linear.getSizeInBytes
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala 
b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index c8fa97bfccce..71436036d1ea 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -245,7 +245,7 @@ class GBTRegressionModel private[ml](
   // For ml connect only
   private[ml] def this() = this("", Array(new DecisionTreeRegressionModel), 
Array(Double.NaN), -1)
 
-  override def estimatedSize: Long = getEstimatedSize()
+  private[spark] override def estimatedSize: Long = getEstimatedSize()
 
   @Since("1.4.0")
   override def trees: Array[DecisionTreeRegressionModel] = _trees
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index a9e2c47a3229..8d9b4817833b 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -215,7 +215,7 @@ class RandomForestRegressionModel private[ml] (
   // For ml connect only
   private[ml] def this() = this("", Array(new DecisionTreeRegressionModel), -1)
 
-  override def estimatedSize: Long = getEstimatedSize()
+  private[spark] override def estimatedSize: Long = getEstimatedSize()
 
   @Since("1.4.0")
   override def trees: Array[DecisionTreeRegressionModel] = _trees
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala 
b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
index b20b2e943dee..4e9fa89cbde9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
@@ -175,7 +175,7 @@ private[spark] trait TreeEnsembleModel[M <: 
DecisionTreeModel] {
     new AttributeGroup(leafCol, attrs = trees.map(_.leafAttr)).toStructField()
   }
 
-  def getEstimatedSize(): Long = {
+  private[ml] def getEstimatedSize(): Long = {
     org.apache.spark.util.SizeEstimator.estimate(trees.map(_.rootNode))
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to