svn commit: r27352 - in /dev/spark/2.3.2-SNAPSHOT-2018_06_08_22_01-1582945-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Sat Jun 9 05:19:58 2018 New Revision: 27352 Log: Apache Spark 2.3.2-SNAPSHOT-2018_06_08_22_01-1582945 docs [This commit notification would consist of 1443 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r27349 - in /dev/spark/2.4.0-SNAPSHOT-2018_06_08_20_01-f07c506-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Sat Jun 9 03:19:24 2018 New Revision: 27349 Log: Apache Spark 2.4.0-SNAPSHOT-2018_06_08_20_01-f07c506 docs [This commit notification would consist of 1466 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24468][SQL] Handle negative scale when adjusting precision for decimal operations
Repository: spark Updated Branches: refs/heads/branch-2.3 36f1d5e17 -> 1582945d0 [SPARK-24468][SQL] Handle negative scale when adjusting precision for decimal operations ## What changes were proposed in this pull request? In SPARK-22036 we introduced the possibility to allow precision loss in arithmetic operations (according to the SQL standard). The implementation was drawn from Hive's one, where Decimals with a negative scale are not allowed in the operations. The PR handles the case when the scale is negative, removing the assertion that it is not. ## How was this patch tested? added UTs Author: Marco Gaido Closes #21499 from mgaido91/SPARK-24468. (cherry picked from commit f07c5064a3967cdddf57c2469635ee50a26d864c) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1582945d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1582945d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1582945d Branch: refs/heads/branch-2.3 Commit: 1582945d0472bd16b07fd4e79ac872533a6885cd Parents: 36f1d5e Author: Marco Gaido Authored: Fri Jun 8 18:51:56 2018 -0700 Committer: Wenchen Fan Committed: Fri Jun 8 18:52:13 2018 -0700 -- .../apache/spark/sql/types/DecimalType.scala| 8 +- .../analysis/DecimalPrecisionSuite.scala| 9 + .../native/decimalArithmeticOperations.sql | 4 + .../native/decimalArithmeticOperations.sql.out | 164 +++ 4 files changed, 117 insertions(+), 68 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1582945d/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala index ef3b67c..dbf51c3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala @@ -161,13 +161,17 @@ object DecimalType extends AbstractDataType { * This method is used only when `spark.sql.decimalOperations.allowPrecisionLoss` is set to true. */ private[sql] def adjustPrecisionScale(precision: Int, scale: Int): DecimalType = { -// Assumptions: +// Assumption: assert(precision >= scale) -assert(scale >= 0) if (precision <= MAX_PRECISION) { // Adjustment only needed when we exceed max precision DecimalType(precision, scale) +} else if (scale < 0) { + // Decimal can have negative scale (SPARK-24468). In this case, we cannot allow a precision + // loss since we would cause a loss of digits in the integer part. + // In this case, we are likely to meet an overflow. + DecimalType(MAX_PRECISION, scale) } else { // Precision/scale exceed maximum precision. Result must be adjusted to MAX_PRECISION. val intDigits = precision - scale http://git-wip-us.apache.org/repos/asf/spark/blob/1582945d/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala index c86dc18..bd87ca6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala @@ -272,6 +272,15 @@ class DecimalPrecisionSuite extends AnalysisTest with BeforeAndAfter { } } + test("SPARK-24468: operations on decimals with negative scale") { +val a = AttributeReference("a", DecimalType(3, -10))() +val b = AttributeReference("b", DecimalType(1, -1))() +val c = AttributeReference("c", DecimalType(35, 1))() +checkType(Multiply(a, b), DecimalType(5, -11)) +checkType(Multiply(a, c), DecimalType(38, -9)) +checkType(Multiply(b, c), DecimalType(37, 0)) + } + /** strength reduction for integer/decimal comparisons */ def ruleTest(initial: Expression, transformed: Expression): Unit = { val testRelation = LocalRelation(AttributeReference("a", IntegerType)()) http://git-wip-us.apache.org/repos/asf/spark/blob/1582945d/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalArithmeticOperations.sql -- diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalArithmeticOperations.sql
spark git commit: [SPARK-24468][SQL] Handle negative scale when adjusting precision for decimal operations
Repository: spark Updated Branches: refs/heads/master 36a340913 -> f07c5064a [SPARK-24468][SQL] Handle negative scale when adjusting precision for decimal operations ## What changes were proposed in this pull request? In SPARK-22036 we introduced the possibility to allow precision loss in arithmetic operations (according to the SQL standard). The implementation was drawn from Hive's one, where Decimals with a negative scale are not allowed in the operations. The PR handles the case when the scale is negative, removing the assertion that it is not. ## How was this patch tested? added UTs Author: Marco Gaido Closes #21499 from mgaido91/SPARK-24468. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f07c5064 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f07c5064 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f07c5064 Branch: refs/heads/master Commit: f07c5064a3967cdddf57c2469635ee50a26d864c Parents: 36a3409 Author: Marco Gaido Authored: Fri Jun 8 18:51:56 2018 -0700 Committer: Wenchen Fan Committed: Fri Jun 8 18:51:56 2018 -0700 -- .../apache/spark/sql/types/DecimalType.scala| 8 +- .../analysis/DecimalPrecisionSuite.scala| 9 + .../native/decimalArithmeticOperations.sql | 4 + .../native/decimalArithmeticOperations.sql.out | 164 +++ 4 files changed, 117 insertions(+), 68 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f07c5064/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala index ef3b67c..dbf51c3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala @@ -161,13 +161,17 @@ object DecimalType extends AbstractDataType { * This method is used only when `spark.sql.decimalOperations.allowPrecisionLoss` is set to true. */ private[sql] def adjustPrecisionScale(precision: Int, scale: Int): DecimalType = { -// Assumptions: +// Assumption: assert(precision >= scale) -assert(scale >= 0) if (precision <= MAX_PRECISION) { // Adjustment only needed when we exceed max precision DecimalType(precision, scale) +} else if (scale < 0) { + // Decimal can have negative scale (SPARK-24468). In this case, we cannot allow a precision + // loss since we would cause a loss of digits in the integer part. + // In this case, we are likely to meet an overflow. + DecimalType(MAX_PRECISION, scale) } else { // Precision/scale exceed maximum precision. Result must be adjusted to MAX_PRECISION. val intDigits = precision - scale http://git-wip-us.apache.org/repos/asf/spark/blob/f07c5064/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala index c86dc18..bd87ca6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala @@ -272,6 +272,15 @@ class DecimalPrecisionSuite extends AnalysisTest with BeforeAndAfter { } } + test("SPARK-24468: operations on decimals with negative scale") { +val a = AttributeReference("a", DecimalType(3, -10))() +val b = AttributeReference("b", DecimalType(1, -1))() +val c = AttributeReference("c", DecimalType(35, 1))() +checkType(Multiply(a, b), DecimalType(5, -11)) +checkType(Multiply(a, c), DecimalType(38, -9)) +checkType(Multiply(b, c), DecimalType(37, 0)) + } + /** strength reduction for integer/decimal comparisons */ def ruleTest(initial: Expression, transformed: Expression): Unit = { val testRelation = LocalRelation(AttributeReference("a", IntegerType)()) http://git-wip-us.apache.org/repos/asf/spark/blob/f07c5064/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalArithmeticOperations.sql -- diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalArithmeticOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalArithmeticOperations.sql index 9be7fcd..28a0e20 100644 ---
spark git commit: [SPARK-24412][SQL] Adding docs about automagical type casting in `isin` and `isInCollection` APIs
Repository: spark Updated Branches: refs/heads/master f433ef786 -> 36a340913 [SPARK-24412][SQL] Adding docs about automagical type casting in `isin` and `isInCollection` APIs ## What changes were proposed in this pull request? Update documentation for `isInCollection` API to clealy explain the "auto-casting" of elements if their types are different. ## How was this patch tested? No-Op Author: Thiruvasakan Paramasivan Closes #21519 from trvskn/sql-doc-update. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/36a34091 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/36a34091 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/36a34091 Branch: refs/heads/master Commit: 36a3409134687d6a2894cd6a77554b8439cacec1 Parents: f433ef7 Author: Thiruvasakan Paramasivan Authored: Fri Jun 8 17:17:43 2018 -0700 Committer: DB Tsai Committed: Fri Jun 8 17:17:43 2018 -0700 -- .../scala/org/apache/spark/sql/Column.scala | 24 1 file changed, 24 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/36a34091/sql/core/src/main/scala/org/apache/spark/sql/Column.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index b3e59f5..2dbb53e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -781,6 +781,14 @@ class Column(val expr: Expression) extends Logging { * A boolean expression that is evaluated to true if the value of this expression is contained * by the evaluated values of the arguments. * + * Note: Since the type of the elements in the list are inferred only during the run time, + * the elements will be "up-casted" to the most common type for comparison. + * For eg: + * 1) In the case of "Int vs String", the "Int" will be up-casted to "String" and the + * comparison will look like "String vs String". + * 2) In the case of "Float vs Double", the "Float" will be up-casted to "Double" and the + * comparison will look like "Double vs Double" + * * @group expr_ops * @since 1.5.0 */ @@ -791,6 +799,14 @@ class Column(val expr: Expression) extends Logging { * A boolean expression that is evaluated to true if the value of this expression is contained * by the provided collection. * + * Note: Since the type of the elements in the collection are inferred only during the run time, + * the elements will be "up-casted" to the most common type for comparison. + * For eg: + * 1) In the case of "Int vs String", the "Int" will be up-casted to "String" and the + * comparison will look like "String vs String". + * 2) In the case of "Float vs Double", the "Float" will be up-casted to "Double" and the + * comparison will look like "Double vs Double" + * * @group expr_ops * @since 2.4.0 */ @@ -800,6 +816,14 @@ class Column(val expr: Expression) extends Logging { * A boolean expression that is evaluated to true if the value of this expression is contained * by the provided collection. * + * Note: Since the type of the elements in the collection are inferred only during the run time, + * the elements will be "up-casted" to the most common type for comparison. + * For eg: + * 1) In the case of "Int vs String", the "Int" will be up-casted to "String" and the + * comparison will look like "String vs String". + * 2) In the case of "Float vs Double", the "Float" will be up-casted to "Double" and the + * comparison will look like "Double vs Double" + * * @group java_expr_ops * @since 2.4.0 */ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r27346 - in /dev/spark/2.4.0-SNAPSHOT-2018_06_08_16_01-f433ef7-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Jun 8 23:16:11 2018 New Revision: 27346 Log: Apache Spark 2.4.0-SNAPSHOT-2018_06_08_16_01-f433ef7 docs [This commit notification would consist of 1466 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23010][K8S] Initial checkin of k8s integration tests.
Repository: spark Updated Branches: refs/heads/master b070ded28 -> f433ef786 [SPARK-23010][K8S] Initial checkin of k8s integration tests. These tests were developed in the https://github.com/apache-spark-on-k8s/spark-integration repo by several contributors. This is a copy of the current state into the main apache spark repo. The only changes from the current spark-integration repo state are: * Move the files from the repo root into resource-managers/kubernetes/integration-tests * Add a reference to these tests in the root README.md * Fix a path reference in dev/dev-run-integration-tests.sh * Add a TODO in include/util.sh ## What changes were proposed in this pull request? Incorporation of Kubernetes integration tests. ## How was this patch tested? This code has its own unit tests, but the main purpose is to provide the integration tests. I tested this on my laptop by running dev/dev-run-integration-tests.sh --spark-tgz ~/spark-2.4.0-SNAPSHOT-bin--.tgz The spark-integration tests have already been running for months in AMPLab, here is an example: https://amplab.cs.berkeley.edu/jenkins/job/testing-k8s-scheduled-spark-integration-master/ Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Sean Suchter Author: Sean Suchter Closes #20697 from ssuchter/ssuchter-k8s-integration-tests. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f433ef78 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f433ef78 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f433ef78 Branch: refs/heads/master Commit: f433ef786770e48e3594ad158ce9908f98ef0d9a Parents: b070ded Author: Sean Suchter Authored: Fri Jun 8 15:15:24 2018 -0700 Committer: mcheah Committed: Fri Jun 8 15:15:24 2018 -0700 -- README.md | 2 + dev/tox.ini | 2 +- pom.xml | 1 + .../kubernetes/integration-tests/README.md | 52 .../dev/dev-run-integration-tests.sh| 93 ++ .../integration-tests/dev/spark-rbac.yaml | 52 .../kubernetes/integration-tests/pom.xml| 155 ++ .../scripts/setup-integration-test-env.sh | 91 ++ .../src/test/resources/log4j.properties | 31 ++ .../k8s/integrationtest/KubernetesSuite.scala | 294 +++ .../KubernetesTestComponents.scala | 120 .../k8s/integrationtest/ProcessUtils.scala | 46 +++ .../integrationtest/SparkReadinessWatcher.scala | 41 +++ .../deploy/k8s/integrationtest/Utils.scala | 30 ++ .../backend/IntegrationTestBackend.scala| 43 +++ .../backend/minikube/Minikube.scala | 84 ++ .../backend/minikube/MinikubeTestBackend.scala | 42 +++ .../deploy/k8s/integrationtest/config.scala | 38 +++ .../deploy/k8s/integrationtest/constants.scala | 22 ++ 19 files changed, 1238 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f433ef78/README.md -- diff --git a/README.md b/README.md index 1e521a7..531d330 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,8 @@ can be run using: Please see the guidance on how to [run tests for a module, or individual tests](http://spark.apache.org/developer-tools.html#individual-tests). +There is also a Kubernetes integration test, see resource-managers/kubernetes/integration-tests/README.md + ## A Note About Hadoop Versions Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported http://git-wip-us.apache.org/repos/asf/spark/blob/f433ef78/dev/tox.ini -- diff --git a/dev/tox.ini b/dev/tox.ini index 583c1ea..28dad8f 100644 --- a/dev/tox.ini +++ b/dev/tox.ini @@ -16,4 +16,4 @@ [pycodestyle] ignore=E402,E731,E241,W503,E226,E722,E741,E305 max-line-length=100 -exclude=cloudpickle.py,heapq3.py,shared.py,python/docs/conf.py,work/*/*.py,python/.eggs/* +exclude=cloudpickle.py,heapq3.py,shared.py,python/docs/conf.py,work/*/*.py,python/.eggs/*,dist/* http://git-wip-us.apache.org/repos/asf/spark/blob/f433ef78/pom.xml -- diff --git a/pom.xml b/pom.xml index 883c096..23bbd3b 100644 --- a/pom.xml +++ b/pom.xml @@ -2705,6 +2705,7 @@ kubernetes resource-managers/kubernetes/core +resource-managers/kubernetes/integration-tests http://git-wip-us.apache.org/repos/asf/spark/blob/f433ef78/resource-managers/kubernetes/integration-tests/README.md -- diff --git
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v2.3.1 [created] 30aaa5a3a - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r27341 - /dev/spark/v2.3.1-rc4-bin/ /release/spark/spark-2.3.1/
Author: vanzin Date: Fri Jun 8 20:27:13 2018 New Revision: 27341 Log: Moving Spark 2.3.1-rc4 to release area. Added: release/spark/spark-2.3.1/ - copied from r27339, dev/spark/v2.3.1-rc4-bin/ Removed: dev/spark/v2.3.1-rc4-bin/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r27339 - /release/spark/KEYS
Author: vanzin Date: Fri Jun 8 20:23:28 2018 New Revision: 27339 Log: Updating KEYS file. Modified: release/spark/KEYS Modified: release/spark/KEYS == --- release/spark/KEYS (original) +++ release/spark/KEYS Fri Jun 8 20:23:28 2018 @@ -347,63 +347,6 @@ MTh6A0xHJkPaf2mQH4N6G3o5Vxs6oTv5oGr2vw6C =B+j6 -END PGP PUBLIC KEY BLOCK- -pub rsa4096 2011-05-17 [SC] - 5EFE 458D 5502 2D56 E1A5 E0F2 D5E0 A69C 0CBA AE9F -uid [ unknown] Sean Owen (CODE SIGNING KEY) -sub rsa4096 2011-05-17 [E] - --BEGIN PGP PUBLIC KEY BLOCK- - -mQINBE3S6EIBEAC1vT2Z0WK/efTD8OfB0EbYNPrHBZI8ZhJFVwec68/Ax7gt/JS5 -XtFWA/BXUnhb/BVbvBa0Hleh67nLhVwoKBwNKcuSL4ti2x8dSnLUfy9m1DQhQtNP -rUfOX0wNA8Q2sesiGU9Aol7rf4PVoSSWpOfId7Qac/BA1SHvHnvGVaywKYf8EaNv -6pr/u4fb1Ljs9VTv8hWoxO+r2AWUfEMzzb9cUJ/UBwPhBbLKBzwBNICVLD42wWwc -umU/U6EVqkdvYRKy31NU+28tamba+4WeQBXSj7Tpqezn2spTZXAfbuEdAXV/RXbo -MslnlFqI4xUxzdyHrDWXkfOGUG1chThrFRM3pqMY0nO8frOZwvNQiabcoKfkDjEP -jvmvXZBYsLUFkYZM5KysAQsxKzjsFCd9c6ojtZNsDmzN88ab4XGWsHXLsVwdKREj -mXG3KJ4Qs1m1K8nb/4rVKf5ExIRQecKdNPUx3wVkuuQypSu4sdIHnUZo/gv430aF -CUmptdDLoB8bmg+XCAyBZi4ySq/75OY8h3MR9RHeyAirR6BZG91fHsPZGKklABeq -t+lM3T81o8jGG2SsDHvlNR6QtLN6D51R/hRAD8rcxQbUXHbdPsokIII5bRD9t/Na -cY9cVzHrJp3qeusQQ8mYEjV8QAq3V8i3cjCLS3iVfRk9ShiqBSHnDRuBxwARAQAB -tDBTZWFuIE93ZW4gKENPREUgU0lHTklORyBLRVkpIDxzcm93ZW5AYXBhY2hlLm9y -Zz6JAjgEEwECACIFAk3S6EICGwMGCwkIBwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJ -ENXgppwMuq6ffZ8P+gJTwUEKL+lMa8or6MublJUoaU5g1NcuEfyokp5acZO+EiKU -57BMibNArQf09rZLk4GH2HcAgT8DxcCWrpTanmI4O2gAfAAWIIYJg55LLD995Ptx -4GoZQ49HVA5d97oAdvOx+38F3kWd7qaAu9iXJG0LCApF+u302XBGuv+hgiKhjgpd -hJumcuhPrgoy2RVZCSwDUvFECgKXikiwm2Aiop2nwyfdxIaikGwB9Ket4jTzk3CG -8ekjmXnIplLML2M+Nu2K0hzB13vlLL7nwGsuVzIw+AwPxXFc3f3lDzdW1QQ9j+Du -HMlD9bw6HIBSGZcAGhtYDoJyQZVFWC05ZD9ri0J3XPaQvfvEbTlGaxo67L87d1ur -yixXyksSZQGotq+Yn2Gj+o/aI2Hle5q1c4uC4htx6Ike87NnwozXH1+8qMSpWw57 -gWaBn5FPScb2l+Z96NXSwmZH3zFsZJWV6fVtbDUAxeBSY0V0wPb5mJr1Lr8RpQLt -tgXgIwC33mgEoJmGzleMCsZnp8TD+UVn9aHwrh6NPY2I39Sc8OxLRFKW9SVTKJcZ -pwAcF2PeWvItJkSdADe5aLKU7yKiBViXK21zebwaEwSbBwS43ISGNPZd6NGq4Ck4 -oVPK5OtKeUieXNjcl2eO5+9TlhE9UNziUrRKlVp1F/3VcrvprxOzlghG2P6huQIN -BE3S6EIBEAC5Je1+wkfBHdWNQgRO2iMuGCWIo55BYMncIUzfy7XHghUqOjhZEcGp -woalxRYsBZXYzUTrUrI1JJTdyDrLJyglgiD8HvEux5h8MHb5WNtZre+MZGBxYcXE -CqsuP7RLwyykrSrK48jfK2FFOSS/srUXyMTSw4mjgHS1joWKszbq8MlGu5oqJJTQ -6Vj6anEUwO6e5Bx1OoOqO6h9/p54P+FgJaKdFaMtUN5VOGt4S9lNa7CXMwn/nc18 -Zhqy70o6gD7x9IoSreeaGFWLvGjKNaJSWs2TnGeGhLDFwU8BSm0d7RMQp258nFGU -Vr0Y8FVhdFylMR49xPFDVPT9r/YIDnvHdN9/2vPAiJJksmotkiSxdsWZlNia0pei -lT+hd8WZAbMJKfAEYOryLoRNWDiiLXbh065FoVMdi9Az+SAxzgShZIDhnmQx115l -mXNSeMW5aKlCb4+cqBWlM9Mn3qaezEN1E0ouwuHcJgMUVEKLwOPP/awPeyM4RiKo -d04YdteW+W0Dm16ILMCKqIxIgClv5eNajQXpHbF5s0HvTm09CtrheyWRqS5G17PL -mumUgk37/lZeNsvGx5fxokqfKUDf3HbiYNzbibXOpoijlu4zftc2ySnDfc1x0e1N -OEKMJMUv+LUoN9aaa1zdEmGT1/sJKn9mST7TwScVo1kx0Kjvv20REQARAQABiQIf -BBgBAgAJBQJN0uhCAhsMAAoJENXgppwMuq6fLFUP/3vcQ4IruWo7Ef1kPDvSRgaO -PJHwl9kSRP/ck5Zu+yKlUwZ2OOmjZwmTVzxwAAVfagHDsUPM+DDyl/O8Kfdm0IGn -cMWKx2t7bemV5SInemB9Oc1zAPlk8roAq5B3WxZYDRrC+CUFedBpoi8ylaMZiuCe -kJFPmwydCY9qu8rZ4ylkNEuCuN4889D4lrUZM4gjZKKWMYsHlyPUyzUgTtBuu6bS -VnuTPVYER0LcHpJY0eC+lq1tW1hSkD7P25gr5RSAbaHMOdt//2wtVxEj/x9vrUNu -Gp3QYQX6omlHBW2kBzIYEfMohumMmSEhoYQZ1J5RPk9hYsF4gSRx5Bhatzh1NMSj -QjMdoGMeGvGhNbp4MCZ65x4M9w2rwVbHMz9IBhQtYhAMQQnGxmzqABRONkNA85EY -kzKWualdhPJSFt1GIHSU03HX3uyUnDqWDqgdS14vrGL3/7oq8u56RLzf2squupR4 -r2q6EAsAidY1n+aAPVp9QXyKlkyIXkc8vVrrOuwy5UR0ASs4MAZ2/SpUDNmkmYuv -2q7b18HnQexhQU3CRjCmkAojB0Rg5UlMDaChJP+XGI7DZsBNsLsxtEGVUGwpwaEY -rrwcirZBOi6ybqSekOSrKHxW5K1P6k8/pz2vXsjZnm3cOkVS2BnpjnnMWOMu6Y1Z -d9jg+uf1KLq73my/he/k -=Rm7o --END PGP PUBLIC KEY BLOCK- - pub 4096R/85040118 2017-11-12 uid Felix Cheung (CODE SIGNING KEY) sub 4096R/2A9FD6E3 2017-11-12 @@ -518,3 +461,131 @@ BqxvTcOR/5gnf89L6zOkcUE5Ig== =UnR0 -END PGP PUBLIC KEY BLOCK- +pub 4096R/3A0D5564 2015-07-22 +uid Marcelo M. Vanzin +sub 4096R/725B7C50 2015-07-22 + +-BEGIN PGP PUBLIC KEY BLOCK- +Version: GnuPG v1 + +mQINBFWvCEMBEAC9wZokaYywq2XYACA2qERgeT/eF67SoGiY/Jh2tDmptNmxs20S +4sMs+5Sclv7xCFriaQ3FQsl3u6+R88aFWgR53yF3qlEPGOlyWTuw2t2pvWwES9sh +kNLVJ3nOZ9iEsIxllpb5oRSxnnbepwfm5pJr0L9S9nCzLWQfUphF0M11L9BUQVr0 +arqd1nh24cTHrnCY0fz8nb23WInN5GiRzcJghMIhMr1o46OmjbCnWw8xZD+pqBxL +kXQmMPhLBPRbK/BtM9mrBxHIJOXpRe6iOQT3jMPkp7YTunIDk9z20J3QRuWKImy6 +7wuhj69uW/Txm3MMN6wlqXjW+F1YL4EXMJNFjYiDiewUrh65wqBfnNjG3NoBynYL +9ocVWZAsLZVQlmEAtWvGE0nmT9Pz8T5CQA82VMO28oB1HxqAn+OxzPFyXfTHmPe7 +t6z7C6XauXIMXSWU6C0Ksll/h6lmB7Is/bQSe3wxec0J1CWcWPvxJsbOkhP3Aeoz +WsYXyzTww/owdp8ezwfpyH2+IUR/CSZjs3xPWsPvDXbE59nBGRgzS1CILRI8xsrC +sxPPP2pSpBKdxRAoJwqUIS+J0RN/UIM3+71QaXagtLw+xrpg3YqQIPrn/aPo+gIv +7ImGFa19Lq14x0N1jIKU+F+uIRBnN/mBPJQn+I5LqxWcAmX4m8nlauyuwQARAQAB +tCdNYXJjZWxvIE0uIFZhbnppbiA8dmFuemluQGNsb3VkZXJhLmNvbT6JAjgEEwEC +ACIFAlWvCnECGwMGCwkIBwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJEP2P/Uw6DVVk
svn commit: r27338 - in /dev/spark/2.4.0-SNAPSHOT-2018_06_08_12_01-b070ded-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Jun 8 19:16:03 2018 New Revision: 27338 Log: Apache Spark 2.4.0-SNAPSHOT-2018_06_08_12_01-b070ded docs [This commit notification would consist of 1466 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17756][PYTHON][STREAMING] Workaround to avoid return type mismatch in PythonTransformFunction
Repository: spark Updated Branches: refs/heads/master 1a644afba -> b070ded28 [SPARK-17756][PYTHON][STREAMING] Workaround to avoid return type mismatch in PythonTransformFunction ## What changes were proposed in this pull request? This PR proposes to wrap the transformed rdd within `TransformFunction`. `PythonTransformFunction` looks requiring to return `JavaRDD` in `_jrdd`. https://github.com/apache/spark/blob/39e2bad6a866d27c3ca594d15e574a1da3ee84cc/python/pyspark/streaming/util.py#L67 https://github.com/apache/spark/blob/6ee28423ad1b2e6089b82af64a31d77d3552bb38/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala#L43 However, this could be `JavaPairRDD` by some APIs, for example, `zip` in PySpark's RDD API. `_jrdd` could be checked as below: ```python >>> rdd.zip(rdd)._jrdd.getClass().toString() u'class org.apache.spark.api.java.JavaPairRDD' ``` So, here, I wrapped it with `map` so that it ensures returning `JavaRDD`. ```python >>> rdd.zip(rdd).map(lambda x: x)._jrdd.getClass().toString() u'class org.apache.spark.api.java.JavaRDD' ``` I tried to elaborate some failure cases as below: ```python from pyspark.streaming import StreamingContext ssc = StreamingContext(spark.sparkContext, 10) ssc.queueStream([sc.range(10)]) \ .transform(lambda rdd: rdd.cartesian(rdd)) \ .pprint() ssc.start() ``` ```python from pyspark.streaming import StreamingContext ssc = StreamingContext(spark.sparkContext, 10) ssc.queueStream([sc.range(10)]).foreachRDD(lambda rdd: rdd.cartesian(rdd)) ssc.start() ``` ```python from pyspark.streaming import StreamingContext ssc = StreamingContext(spark.sparkContext, 10) ssc.queueStream([sc.range(10)]).foreachRDD(lambda rdd: rdd.zip(rdd)) ssc.start() ``` ```python from pyspark.streaming import StreamingContext ssc = StreamingContext(spark.sparkContext, 10) ssc.queueStream([sc.range(10)]).foreachRDD(lambda rdd: rdd.zip(rdd).union(rdd.zip(rdd))) ssc.start() ``` ```python from pyspark.streaming import StreamingContext ssc = StreamingContext(spark.sparkContext, 10) ssc.queueStream([sc.range(10)]).foreachRDD(lambda rdd: rdd.zip(rdd).coalesce(1)) ssc.start() ``` ## How was this patch tested? Unit tests were added in `python/pyspark/streaming/tests.py` and manually tested. Author: hyukjinkwon Closes #19498 from HyukjinKwon/SPARK-17756. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b070ded2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b070ded2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b070ded2 Branch: refs/heads/master Commit: b070ded2843e88131c90cb9ef1b4f8d533f8361d Parents: 1a644af Author: hyukjinkwon Authored: Sat Jun 9 01:27:51 2018 +0700 Committer: hyukjinkwon Committed: Sat Jun 9 01:27:51 2018 +0700 -- python/pyspark/streaming/context.py | 2 +- python/pyspark/streaming/tests.py | 6 ++ python/pyspark/streaming/util.py| 11 ++- 3 files changed, 17 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b070ded2/python/pyspark/streaming/context.py -- diff --git a/python/pyspark/streaming/context.py b/python/pyspark/streaming/context.py index 17c34f8..dd924ef 100644 --- a/python/pyspark/streaming/context.py +++ b/python/pyspark/streaming/context.py @@ -338,7 +338,7 @@ class StreamingContext(object): jdstreams = [d._jdstream for d in dstreams] # change the final serializer to sc.serializer func = TransformFunction(self._sc, - lambda t, *rdds: transformFunc(rdds).map(lambda x: x), + lambda t, *rdds: transformFunc(rdds), *[d._jrdd_deserializer for d in dstreams]) jfunc = self._jvm.TransformFunction(func) jdstream = self._jssc.transform(jdstreams, jfunc) http://git-wip-us.apache.org/repos/asf/spark/blob/b070ded2/python/pyspark/streaming/tests.py -- diff --git a/python/pyspark/streaming/tests.py b/python/pyspark/streaming/tests.py index e4a428a..373784f 100644 --- a/python/pyspark/streaming/tests.py +++ b/python/pyspark/streaming/tests.py @@ -779,6 +779,12 @@ class StreamingContextTests(PySparkStreamingTestCase): self.assertEqual([2, 3, 1], self._take(dstream, 3)) +def test_transform_pairrdd(self): +# This regression test case is for SPARK-17756. +dstream = self.ssc.queueStream( +[[1], [2], [3]]).transform(lambda rdd: rdd.cartesian(rdd)) +self.assertEqual([(1, 1), (2, 2), (3, 3)], self._take(dstream, 3)) + def test_get_active(self): self.assertEqual(StreamingContext.getActive(), None)
spark git commit: [SPARK-23984][K8S] Initial Python Bindings for PySpark on K8s
Repository: spark Updated Branches: refs/heads/master 173fe450d -> 1a644afba [SPARK-23984][K8S] Initial Python Bindings for PySpark on K8s ## What changes were proposed in this pull request? Introducing Python Bindings for PySpark. - [x] Running PySpark Jobs - [x] Increased Default Memory Overhead value - [ ] Dependency Management for virtualenv/conda ## How was this patch tested? This patch was tested with - [x] Unit Tests - [x] Integration tests with [this addition](https://github.com/apache-spark-on-k8s/spark-integration/pull/46) ``` KubernetesSuite: - Run SparkPi with no resources - Run SparkPi with a very long application name. - Run SparkPi with a master URL without a scheme. - Run SparkPi with an argument. - Run SparkPi with custom labels, annotations, and environment variables. - Run SparkPi with a test secret mounted into the driver and executor pods - Run extraJVMOptions check on driver - Run SparkRemoteFileTest using a remote data file - Run PySpark on simple pi.py example - Run PySpark with Python2 to test a pyfiles example - Run PySpark with Python3 to test a pyfiles example Run completed in 4 minutes, 28 seconds. Total number of tests run: 11 Suites: completed 2, aborted 0 Tests: succeeded 11, failed 0, canceled 0, ignored 0, pending 0 All tests passed. ``` Author: Ilan Filonenko Author: Ilan Filonenko Closes #21092 from ifilonenko/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1a644afb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1a644afb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1a644afb Branch: refs/heads/master Commit: 1a644afbac35c204f9ad55f86999319a9ab458c6 Parents: 173fe45 Author: Ilan Filonenko Authored: Fri Jun 8 11:18:34 2018 -0700 Committer: mcheah Committed: Fri Jun 8 11:18:34 2018 -0700 -- bin/docker-image-tool.sh| 23 ++-- .../org/apache/spark/deploy/SparkSubmit.scala | 14 ++- docs/running-on-kubernetes.md | 16 ++- examples/src/main/python/py_container_checks.py | 32 ++ examples/src/main/python/pyfiles.py | 38 +++ .../org/apache/spark/deploy/k8s/Config.scala| 40 +++ .../org/apache/spark/deploy/k8s/Constants.scala | 7 +- .../spark/deploy/k8s/KubernetesConf.scala | 62 --- .../spark/deploy/k8s/KubernetesUtils.scala | 2 +- .../k8s/features/BasicDriverFeatureStep.scala | 14 +-- .../k8s/features/BasicExecutorFeatureStep.scala | 3 +- .../bindings/JavaDriverFeatureStep.scala| 44 .../bindings/PythonDriverFeatureStep.scala | 73 + .../submit/KubernetesClientApplication.scala| 16 ++- .../k8s/submit/KubernetesDriverBuilder.scala| 39 +-- .../deploy/k8s/submit/MainAppResource.scala | 5 + .../cluster/k8s/KubernetesExecutorBuilder.scala | 22 ++-- .../spark/deploy/k8s/KubernetesConfSuite.scala | 66 ++-- .../features/BasicDriverFeatureStepSuite.scala | 58 +- .../BasicExecutorFeatureStepSuite.scala | 9 +- ...rKubernetesCredentialsFeatureStepSuite.scala | 9 +- .../DriverServiceFeatureStepSuite.scala | 18 ++-- .../features/EnvSecretsFeatureStepSuite.scala | 3 +- .../features/LocalDirsFeatureStepSuite.scala| 3 +- .../features/MountSecretsFeatureStepSuite.scala | 3 +- .../bindings/JavaDriverFeatureStepSuite.scala | 60 +++ .../bindings/PythonDriverFeatureStepSuite.scala | 108 +++ .../spark/deploy/k8s/submit/ClientSuite.scala | 3 +- .../submit/KubernetesDriverBuilderSuite.scala | 78 -- .../k8s/KubernetesExecutorBuilderSuite.scala| 6 +- .../spark/bindings/python/Dockerfile| 39 +++ .../src/main/dockerfiles/spark/entrypoint.sh| 30 ++ 32 files changed, 842 insertions(+), 101 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1a644afb/bin/docker-image-tool.sh -- diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index f090240..a871ab5 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -63,12 +63,20 @@ function build { if [ ! -d "$IMG_PATH" ]; then error "Cannot find docker image. This script must be run from a runnable distribution of Apache Spark." fi - - local DOCKERFILE=${DOCKERFILE:-"$IMG_PATH/spark/Dockerfile"} + local BINDING_BUILD_ARGS=( +--build-arg +base_img=$(image_ref spark) + ) + local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/spark/Dockerfile"} + local PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/spark/bindings/python/Dockerfile"} docker build "${BUILD_ARGS[@]}" \ -t $(image_ref spark) \ --f "$DOCKERFILE" . +-f "$BASEDOCKERFILE" . + +docker build
spark git commit: [SPARK-24477][SPARK-24454][ML][PYTHON] Imports submodule in ml/__init__.py and add ImageSchema into __all__
Repository: spark Updated Branches: refs/heads/master a5d775a1f -> 173fe450d [SPARK-24477][SPARK-24454][ML][PYTHON] Imports submodule in ml/__init__.py and add ImageSchema into __all__ ## What changes were proposed in this pull request? This PR attaches submodules to ml's `__init__.py` module. Also, adds `ImageSchema` into `image.py` explicitly. ## How was this patch tested? Before: ```python >>> from pyspark import ml >>> ml.image Traceback (most recent call last): File "", line 1, in AttributeError: 'module' object has no attribute 'image' >>> ml.image.ImageSchema Traceback (most recent call last): File "", line 1, in AttributeError: 'module' object has no attribute 'image' ``` ```python >>> "image" in globals() False >>> from pyspark.ml import * >>> "image" in globals() False >>> image Traceback (most recent call last): File "", line 1, in NameError: name 'image' is not defined ``` After: ```python >>> from pyspark import ml >>> ml.image >>> ml.image.ImageSchema ``` ```python >>> "image" in globals() False >>> from pyspark.ml import * >>> "image" in globals() True >>> image ``` Author: hyukjinkwon Closes #21483 from HyukjinKwon/SPARK-24454. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/173fe450 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/173fe450 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/173fe450 Branch: refs/heads/master Commit: 173fe450df203b262b58f7e71c6b52a79db95ee0 Parents: a5d775a Author: hyukjinkwon Authored: Fri Jun 8 09:32:11 2018 -0700 Committer: Xiangrui Meng Committed: Fri Jun 8 09:32:11 2018 -0700 -- python/pyspark/ml/__init__.py | 8 +++- python/pyspark/ml/image.py| 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/173fe450/python/pyspark/ml/__init__.py -- diff --git a/python/pyspark/ml/__init__.py b/python/pyspark/ml/__init__.py index 129d7d6..d99a253 100644 --- a/python/pyspark/ml/__init__.py +++ b/python/pyspark/ml/__init__.py @@ -21,5 +21,11 @@ machine learning pipelines. """ from pyspark.ml.base import Estimator, Model, Transformer, UnaryTransformer from pyspark.ml.pipeline import Pipeline, PipelineModel +from pyspark.ml import classification, clustering, evaluation, feature, fpm, \ +image, pipeline, recommendation, regression, stat, tuning, util, linalg, param -__all__ = ["Transformer", "UnaryTransformer", "Estimator", "Model", "Pipeline", "PipelineModel"] +__all__ = [ +"Transformer", "UnaryTransformer", "Estimator", "Model", "Pipeline", "PipelineModel", +"classification", "clustering", "evaluation", "feature", "fpm", "image", +"recommendation", "regression", "stat", "tuning", "util", "linalg", "param", +] http://git-wip-us.apache.org/repos/asf/spark/blob/173fe450/python/pyspark/ml/image.py -- diff --git a/python/pyspark/ml/image.py b/python/pyspark/ml/image.py index 96d702f..5f0c57e 100644 --- a/python/pyspark/ml/image.py +++ b/python/pyspark/ml/image.py @@ -31,6 +31,8 @@ from pyspark import SparkContext from pyspark.sql.types import Row, _create_row, _parse_datatype_json_string from pyspark.sql import DataFrame, SparkSession +__all__ = ["ImageSchema"] + class _ImageSchema(object): """ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r27329 - in /dev/spark/2.4.0-SNAPSHOT-2018_06_08_08_01-a5d775a-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Jun 8 15:17:07 2018 New Revision: 27329 Log: Apache Spark 2.4.0-SNAPSHOT-2018_06_08_08_01-a5d775a docs [This commit notification would consist of 1466 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24224][ML-EXAMPLES] Java example code for Power Iteration Clustering in spark.ml
Repository: spark Updated Branches: refs/heads/master 1462bba4f -> 2c100209f [SPARK-24224][ML-EXAMPLES] Java example code for Power Iteration Clustering in spark.ml ## What changes were proposed in this pull request? Java example code for Power Iteration Clustering in spark.ml ## How was this patch tested? Locally tested Author: Shahid Closes #21283 from shahidki31/JavaPicExample. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2c100209 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2c100209 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2c100209 Branch: refs/heads/master Commit: 2c100209f0b73e882ab953993b307867d1df7c2f Parents: 1462bba Author: Shahid Authored: Fri Jun 8 08:44:59 2018 -0500 Committer: Sean Owen Committed: Fri Jun 8 08:44:59 2018 -0500 -- .../ml/JavaPowerIterationClusteringExample.java | 71 1 file changed, 71 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2c100209/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java -- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java new file mode 100644 index 000..5186563 --- /dev/null +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.ml; + +// $example on$ +import java.util.Arrays; +import java.util.List; + +import org.apache.spark.ml.clustering.PowerIterationClustering; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; +// $example off$ + +public class JavaPowerIterationClusteringExample { + public static void main(String[] args) { +// Create a SparkSession. +SparkSession spark = SparkSession + .builder() + .appName("JavaPowerIterationClustering") + .getOrCreate(); + +// $example on$ +List data = Arrays.asList( + RowFactory.create(0L, 1L, 1.0), + RowFactory.create(0L, 2L, 1.0), + RowFactory.create(1L, 2L, 1.0), + RowFactory.create(3L, 4L, 1.0), + RowFactory.create(4L, 0L, 0.1) +); + +StructType schema = new StructType(new StructField[]{ + new StructField("src", DataTypes.LongType, false, Metadata.empty()), + new StructField("dst", DataTypes.LongType, false, Metadata.empty()), + new StructField("weight", DataTypes.DoubleType, false, Metadata.empty()) +}); + +Dataset df = spark.createDataFrame(data, schema); + +PowerIterationClustering model = new PowerIterationClustering() + .setK(2) + .setMaxIter(10) + .setInitMode("degree") + .setWeightCol("weight"); + +Dataset result = model.assignClusters(df); +result.show(false); +// $example off$ +spark.stop(); + } +} - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org