This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 8d646fe50965 [MINOR][PYTHON] Remove _inferSchema in SQLContext
8d646fe50965 is described below
commit 8d646fe509651dde60a80eab97e7261020b0df70
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Mon Nov 27 17:03:19 2023 +0900
[MINOR][PYTHON] Remove _inferSchema in SQLContext
### What changes were proposed in this pull request?
There are only two places that use `SQLContext_inferSchema` that can be
safely converted to `SQLContext.sparkSession._inferSchema` instead.
### Why are the changes needed?
For code cleanup, and remove unused private method
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing test cases should cover them.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #44031 from HyukjinKwon/minor-remove-inferSchema.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/mllib/evaluation.py | 4 ++--
python/pyspark/sql/context.py | 18 ------------------
2 files changed, 2 insertions(+), 20 deletions(-)
diff --git a/python/pyspark/mllib/evaluation.py
b/python/pyspark/mllib/evaluation.py
index 73696ab46f87..df756f848429 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -462,7 +462,7 @@ class RankingMetrics(JavaModelWrapper, Generic[T]):
sc = predictionAndLabels.ctx
sql_ctx = SQLContext.getOrCreate(sc)
df = sql_ctx.createDataFrame(
- predictionAndLabels,
schema=sql_ctx._inferSchema(predictionAndLabels)
+ predictionAndLabels,
schema=sql_ctx.sparkSession._inferSchema(predictionAndLabels)
)
java_model = callMLlibFunc("newRankingMetrics", df._jdf)
super(RankingMetrics, self).__init__(java_model)
@@ -576,7 +576,7 @@ class MultilabelMetrics(JavaModelWrapper):
sc = predictionAndLabels.ctx
sql_ctx = SQLContext.getOrCreate(sc)
df = sql_ctx.createDataFrame(
- predictionAndLabels,
schema=sql_ctx._inferSchema(predictionAndLabels)
+ predictionAndLabels,
schema=sql_ctx.sparkSession._inferSchema(predictionAndLabels)
)
assert sc._jvm is not None
java_class =
sc._jvm.org.apache.spark.mllib.evaluation.MultilabelMetrics
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index efc9760edf8b..7ef7b320eeb4 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -311,24 +311,6 @@ class SQLContext:
)
return self.sparkSession.udf.registerJavaFunction(name, javaClassName,
returnType)
- # TODO(andrew): delete this once we refactor things to take in SparkSession
- def _inferSchema(self, rdd: RDD, samplingRatio: Optional[float] = None) ->
StructType:
- """
- Infer schema from an RDD of Row or tuple.
-
- Parameters
- ----------
- rdd : :class:`RDD`
- an RDD of Row or tuple
- samplingRatio : float, optional
- sampling ratio, or no sampling (default)
-
- Returns
- -------
- :class:`pyspark.sql.types.StructType`
- """
- return self.sparkSession._inferSchema(rdd, samplingRatio)
-
@overload
def createDataFrame(
self,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]