[spark] branch master updated: [SPARK-37843][CORE] Suppress NoSuchFieldError at setMDCForTask

2022-01-07 Thread dongjoon
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new f051b4b  [SPARK-37843][CORE] Suppress NoSuchFieldError at setMDCForTask
f051b4b is described below

commit f051b4be1c17cd3d8789787e5dec25bfcd749442
Author: Dongjoon Hyun 
AuthorDate: Fri Jan 7 18:12:48 2022 -0800

[SPARK-37843][CORE] Suppress NoSuchFieldError at setMDCForTask

### What changes were proposed in this pull request?

This PR aims to suppress `NoSuchFieldError` at `setMDCForTask`.

### Why are the changes needed?

This is observed on `master` branch, Java 17, Apple Silicon combination.
```
$ build/mvn package 
-Dtest.exclude.tags=org.apache.spark.tags.ExtendedLevelDBTest,org.apache.spark.tags.ExtendedRocksDBTest
```

```
00:57:11 2022-01-07 15:57:11.693 - stderr> Exception in thread "Executor 
task launch worker-0" java.lang.NoSuchFieldError: mdc
00:57:11 2022-01-07 15:57:11.693 - stderr>  at 
org.apache.log4j.MDCFriend.fixForJava9(MDCFriend.java:11)
00:57:11 2022-01-07 15:57:11.693 - stderr>  at 
org.slf4j.impl.Log4jMDCAdapter.(Log4jMDCAdapter.java:38)
00:57:11 2022-01-07 15:57:11.693 - stderr>  at 
org.slf4j.impl.StaticMDCBinder.getMDCA(StaticMDCBinder.java:59)
00:57:11 2022-01-07 15:57:11.693 - stderr>  at 
org.slf4j.MDC.bwCompatibleGetMDCAdapterFromBinder(MDC.java:99)
00:57:11 2022-01-07 15:57:11.693 - stderr>  at 
org.slf4j.MDC.(MDC.java:108)
00:57:11 2022-01-07 15:57:11.693 - stderr>  at 
org.apache.spark.executor.Executor.org$apache$spark$executor$Executor$$setMDCForTask(Executor.scala:750)
00:57:11 2022-01-07 15:57:11.693 - stderr>  at 
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:441)
00:57:11 2022-01-07 15:57:11.693 - stderr>  at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
00:57:11 2022-01-07 15:57:11.693 - stderr>  at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
00:57:11 2022-01-07 15:57:11.693 - stderr>  at 
java.base/java.lang.Thread.run(Thread.java:833)
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #35141 from dongjoon-hyun/SPARK-37843.

Authored-by: Dongjoon Hyun 
Signed-off-by: Dongjoon Hyun 
---
 .../main/scala/org/apache/spark/executor/Executor.scala| 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala 
b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 81edccc..79d7190 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -746,11 +746,15 @@ private[spark] class Executor(
   }
 
   private def setMDCForTask(taskName: String, mdc: Seq[(String, String)]): 
Unit = {
-// make sure we run the task with the user-specified mdc properties only
-MDC.clear()
-mdc.foreach { case (key, value) => MDC.put(key, value) }
-// avoid overriding the takName by the user
-MDC.put("mdc.taskName", taskName)
+try {
+  // make sure we run the task with the user-specified mdc properties only
+  MDC.clear()
+  mdc.foreach { case (key, value) => MDC.put(key, value) }
+  // avoid overriding the takName by the user
+  MDC.put("mdc.taskName", taskName)
+} catch {
+  case _: NoSuchFieldError => logInfo("MDC is not supported.")
+}
   }
 
   /**

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] 01/01: Preparing development version 3.2.2-SNAPSHOT

2022-01-07 Thread huaxingao
This is an automated email from the ASF dual-hosted git repository.

huaxingao pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git

commit ae309f0c60c3db8c2a4f1b1a75f99146fb172554
Author: Huaxin Gao 
AuthorDate: Fri Jan 7 17:38:42 2022 +

Preparing development version 3.2.2-SNAPSHOT
---
 R/pkg/DESCRIPTION  | 2 +-
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 6 +++---
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 python/pyspark/version.py  | 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 39 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 2abad61..5590c86 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.2.1
+Version: 3.2.2
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' 
.
 Authors@R: c(person("Shivaram", "Venkataraman", role = "aut",
diff --git a/assembly/pom.xml b/assembly/pom.xml
index a852011..9584884 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.2.1
+3.2.2-SNAPSHOT
 ../pom.xml
   
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 11cf0cb..167e69f 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.2.1
+3.2.2-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 9957a77..eaf1c1e 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.2.1
+3.2.2-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index b3ea287..811e503 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.2.1
+3.2.2-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 8fb7d4e..23513f6 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.2.1
+3.2.2-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 7e4c6c3..c5c6161 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.2.1
+3.2.2-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 

[spark] branch branch-3.2 updated (4b5d2d7 -> ae309f0)

2022-01-07 Thread huaxingao
This is an automated email from the ASF dual-hosted git repository.

huaxingao pushed a change to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 4b5d2d7  [SPARK-37802][SQL][3.2] Composite field name should work with 
Aggregate push down
 add 2b0ee22  Preparing Spark release v3.2.1-rc1
 new ae309f0  Preparing development version 3.2.2-SNAPSHOT

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] 01/01: Preparing Spark release v3.2.1-rc1

2022-01-07 Thread huaxingao
This is an automated email from the ASF dual-hosted git repository.

huaxingao pushed a commit to tag v3.2.1-rc1
in repository https://gitbox.apache.org/repos/asf/spark.git

commit 2b0ee226f8dd17b278ad11139e62464433191653
Author: Huaxin Gao 
AuthorDate: Fri Jan 7 17:38:35 2022 +

Preparing Spark release v3.2.1-rc1
---
 R/pkg/DESCRIPTION  | 2 +-
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 6 +++---
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 python/pyspark/version.py  | 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 39 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 5590c86..2abad61 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.2.2
+Version: 3.2.1
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' 
.
 Authors@R: c(person("Shivaram", "Venkataraman", role = "aut",
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 9584884..a852011 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.2.2-SNAPSHOT
+3.2.1
 ../pom.xml
   
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 167e69f..11cf0cb 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.2.2-SNAPSHOT
+3.2.1
 ../../pom.xml
   
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index eaf1c1e..9957a77 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.2.2-SNAPSHOT
+3.2.1
 ../../pom.xml
   
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 811e503..b3ea287 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.2.2-SNAPSHOT
+3.2.1
 ../../pom.xml
   
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 23513f6..8fb7d4e 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.2.2-SNAPSHOT
+3.2.1
 ../../pom.xml
   
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index c5c6161..7e4c6c3 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.2.2-SNAPSHOT
+3.2.1
 ../../pom.xml
   
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index cffc824..bdf992c 

[spark] tag v3.2.1-rc1 created (now 2b0ee22)

2022-01-07 Thread huaxingao
This is an automated email from the ASF dual-hosted git repository.

huaxingao pushed a change to tag v3.2.1-rc1
in repository https://gitbox.apache.org/repos/asf/spark.git.


  at 2b0ee22  (commit)
This tag includes the following new commits:

 new 2b0ee22  Preparing Spark release v3.2.1-rc1

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated: [SPARK-37837][INFRA] Enable black formatter in dev Python scripts

2022-01-07 Thread dongjoon
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new ead131f  [SPARK-37837][INFRA] Enable black formatter in dev Python 
scripts
ead131f is described below

commit ead131fc6387ca510996e561d69e9fcc86067158
Author: Hyukjin Kwon 
AuthorDate: Fri Jan 7 09:01:22 2022 -0800

[SPARK-37837][INFRA] Enable black formatter in dev Python scripts

### What changes were proposed in this pull request?

This PR proposes to enable [black](https://github.com/psf/black) formatter 
(automatic Python code formatter) for `dev` directory as well.

### Why are the changes needed?

To have the consistent style, and make for a better development cycle

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

I manually verified it as below:

```bash
dev/reformat-python
dev/linter-python
```

Closes #35127 from HyukjinKwon/SPARK-37837.

Authored-by: Hyukjin Kwon 
Signed-off-by: Dongjoon Hyun 
---
 .../files/util_scripts/kill_zinc_nailgun.py|  25 ++-
 .../files/util_scripts/post_github_pr_comment.py   |  45 +++--
 .../files/util_scripts/session_lock_resource.py|  29 +--
 dev/create-release/generate-contributors.py|  49 --
 dev/create-release/releaseutils.py |  29 +--
 dev/create-release/translate-contributors.py   |  46 +++--
 dev/github_jira_sync.py|  57 +++---
 dev/is-changed.py  |  26 +--
 dev/lint-python|   2 +-
 dev/merge_spark_pr.py  | 153 +---
 dev/pip-sanity-check.py|   7 +-
 dev/reformat-python|   2 +-
 dev/run-tests-jenkins.py   | 131 +++---
 dev/run-tests.py   | 195 +
 dev/sparktestsupport/__init__.py   |   2 +-
 dev/sparktestsupport/modules.py| 164 -
 dev/sparktestsupport/shellutils.py |   6 +-
 dev/sparktestsupport/toposort.py   |  26 +--
 dev/sparktestsupport/utils.py  |  16 +-
 19 files changed, 561 insertions(+), 449 deletions(-)

diff --git 
a/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/kill_zinc_nailgun.py
 
b/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/kill_zinc_nailgun.py
index 40887e8..3b605c9 100755
--- 
a/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/kill_zinc_nailgun.py
+++ 
b/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/kill_zinc_nailgun.py
@@ -12,14 +12,19 @@ def _parse_args():
 zinc_port_var = "ZINC_PORT"
 zinc_port_option = "--zinc-port"
 parser = argparse.ArgumentParser()
-parser.add_argument(zinc_port_option,
-type=int,
-default=int(os.environ.get(zinc_port_var, "0")),
-help="Specify zinc port")
+parser.add_argument(
+zinc_port_option,
+type=int,
+default=int(os.environ.get(zinc_port_var, "0")),
+help="Specify zinc port",
+)
 args = parser.parse_args()
 if not args.zinc_port:
-parser.error("Specify either environment variable {0} or option 
{1}".format(
-zinc_port_var, zinc_port_option))
+parser.error(
+"Specify either environment variable {0} or option {1}".format(
+zinc_port_var, zinc_port_option
+)
+)
 return args
 
 
@@ -36,9 +41,11 @@ def _yield_processes_listening_on_port(port):
 innocuous_errors = re.compile(
 r"^\s*Output information may be incomplete.\s*$"
 r"|^lsof: WARNING: can't stat\(\) 
(?:tracefs|nsfs|overlay|tmpfs|aufs|zfs) file system .*$"
-r"|^\s*$")
-lsof_process = subprocess.Popen(["lsof", "-P"], stdout=subprocess.PIPE,
-stderr=subprocess.PIPE, 
universal_newlines=True)
+r"|^\s*$"
+)
+lsof_process = subprocess.Popen(
+["lsof", "-P"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, 
universal_newlines=True
+)
 stdout, stderr = lsof_process.communicate()
 if lsof_process.returncode != 0:
 raise OSError("Can't run lsof -P, stderr:\n{}".format(stderr))
diff --git 
a/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/post_github_pr_comment.py
 
b/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/post_github_pr_comment.py
index 68e31d4..d55295d 100755
--- 
a/dev/ansible-for-test-node/roles/jenkins-worker/files/util_scripts/post_github_pr_comment.py
+++ 

[spark] branch master updated (9000339 -> 16eb9d8)

2022-01-07 Thread zero323
This is an automated email from the ASF dual-hosted git repository.

zero323 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 9000339  [SPARK-37836][PYTHON][INFRA] Enable F841, E722, E305 and E226 
for PEP 8 compliance
 add 16eb9d8  [SPARK-37419][PYTHON][ML] Rewrite _shared_params_code_gen.py 
to inline type hints for ml/param/shared.py

No new revisions were added by this update.

Summary of changes:
 python/pyspark/ml/param/__init__.pyi   |   5 +-
 python/pyspark/ml/param/_shared_params_code_gen.py | 164 ++-
 .../pyspark/ml/param/_shared_params_code_gen.pyi   |  19 --
 python/pyspark/ml/param/shared.py  | 226 +++--
 python/pyspark/ml/param/shared.pyi | 192 -
 5 files changed, 240 insertions(+), 366 deletions(-)
 delete mode 100644 python/pyspark/ml/param/_shared_params_code_gen.pyi
 delete mode 100644 python/pyspark/ml/param/shared.pyi

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated (a1180e4 -> 9000339)

2022-01-07 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from a1180e4  Revert "[SPARK-37193][SQL] 
DynamicJoinSelection.shouldDemoteBroadcastHashJoin should not apply to outer 
joins"
 add 9000339  [SPARK-37836][PYTHON][INFRA] Enable F841, E722, E305 and E226 
for PEP 8 compliance

No new revisions were added by this update.

Summary of changes:
 dev/.gitignore   |  2 --
 dev/create-release/generate-contributors.py  |  2 ++
 dev/create-release/releaseutils.py   |  1 +
 dev/create-release/translate-contributors.py |  1 +
 dev/github_jira_sync.py  |  2 +-
 dev/merge_spark_pr.py| 12 +---
 dev/sparktestsupport/modules.py  |  1 +
 dev/sparktestsupport/utils.py|  3 ++-
 dev/tox.ini  | 20 
 examples/src/main/python/logistic_regression.py  |  1 +
 examples/src/main/python/ml/bucketizer_example.py|  2 +-
 examples/src/main/python/sql/basic.py|  1 +
 examples/src/main/python/status_api_demo.py  |  1 +
 examples/src/main/python/streaming/hdfs_wordcount.py |  2 +-
 .../src/main/python/streaming/network_wordcount.py   |  2 +-
 .../python/streaming/network_wordjoinsentiments.py   |  1 +
 .../streaming/recoverable_network_wordcount.py   |  1 +
 .../main/python/streaming/sql_network_wordcount.py   |  2 +-
 .../examples/streaming/kinesis_wordcount_asl.py  |  2 +-
 python/pyspark/context.py|  2 +-
 python/pyspark/daemon.py |  2 +-
 python/pyspark/ml/linalg/__init__.py |  2 +-
 python/pyspark/ml/tests/test_persistence.py  |  2 +-
 python/pyspark/ml/tests/test_tuning.py   |  6 +-
 python/pyspark/ml/tests/test_wrapper.py  |  2 +-
 python/pyspark/mllib/classification.py   |  4 ++--
 python/pyspark/mllib/linalg/__init__.py  |  2 +-
 python/pyspark/mllib/regression.py   |  6 +++---
 python/pyspark/mllib/tests/test_algorithms.py|  2 +-
 .../pyspark/mllib/tests/test_streaming_algorithms.py |  1 -
 python/pyspark/mllib/tests/test_util.py  |  2 +-
 python/pyspark/pandas/data_type_ops/base.py  |  1 -
 python/pyspark/pandas/frame.py   |  8 
 python/pyspark/pandas/sql_processor.py   |  4 ++--
 .../pandas/tests/data_type_ops/test_boolean_ops.py   |  4 ++--
 .../pandas/tests/test_dataframe_conversion.py|  3 +--
 .../pyspark/pandas/tests/test_dataframe_spark_io.py  |  4 
 python/pyspark/pandas/tests/test_series_datetime.py  | 14 --
 python/pyspark/rdd.py|  8 
 python/pyspark/sql/dataframe.py  |  4 ++--
 python/pyspark/sql/tests/test_column.py  |  2 +-
 python/pyspark/sql/tests/test_streaming.py   |  2 +-
 python/pyspark/sql/types.py  |  4 ++--
 python/pyspark/streaming/tests/test_dstream.py   |  8 
 python/pyspark/streaming/tests/test_kinesis.py   |  2 +-
 python/pyspark/streaming/util.py |  6 +++---
 python/pyspark/testing/streamingutils.py |  4 ++--
 python/pyspark/tests/test_context.py |  8 
 python/pyspark/tests/test_profiler.py|  2 +-
 python/pyspark/tests/test_rdd.py |  2 +-
 python/pyspark/tests/test_readwrite.py   | 10 ++
 python/pyspark/tests/test_shuffle.py |  6 +++---
 python/run-tests.py  |  8 
 python/setup.py  |  2 +-
 .../test/resources/data/scripts/input20_script.py|  2 +-
 55 files changed, 107 insertions(+), 103 deletions(-)
 delete mode 100644 dev/.gitignore

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated (213c299 -> a1180e4)

2022-01-07 Thread wenchen
This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 213c299  Revert "[SPARK-37833][INFRA] Add `precondition` job to skip 
the main GitHub Action jobs"
 add a1180e4  Revert "[SPARK-37193][SQL] 
DynamicJoinSelection.shouldDemoteBroadcastHashJoin should not apply to outer 
joins"

No new revisions were added by this update.

Summary of changes:
 python/pyspark/pandas/tests/test_ops_on_diff_frames.py |  2 +-
 .../sql/execution/adaptive/DynamicJoinSelection.scala  | 18 +-
 .../execution/adaptive/AdaptiveQueryExecSuite.scala| 17 -
 3 files changed, 6 insertions(+), 31 deletions(-)

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated (0f2e9fc -> 213c299)

2022-01-07 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 0f2e9fc  [SPARK-37833][INFRA][FOLLOW-UP] Run checking modules of 
precondition only in forked repository
 add 11950d0  Revert "[SPARK-37833][INFRA][FOLLOW-UP] Run checking modules 
of precondition only in forked repository"
 add 213c299  Revert "[SPARK-37833][INFRA] Add `precondition` job to skip 
the main GitHub Action jobs"

No new revisions were added by this update.

Summary of changes:
 .github/workflows/build_and_test.yml | 61 +---
 1 file changed, 14 insertions(+), 47 deletions(-)

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated: [SPARK-37833][INFRA][FOLLOW-UP] Run checking modules of precondition only in forked repository

2022-01-07 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 0f2e9fc  [SPARK-37833][INFRA][FOLLOW-UP] Run checking modules of 
precondition only in forked repository
0f2e9fc is described below

commit 0f2e9fcc695274f9fa2a0d8be80f3c22db6b7b95
Author: Hyukjin Kwon 
AuthorDate: Fri Jan 7 19:05:38 2022 +0900

[SPARK-37833][INFRA][FOLLOW-UP] Run checking modules of precondition only 
in forked repository

### What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/35121. We 
should run "Check all modules" in precondition job only in the forked 
repository because `is-changed.py` requires `APACHE_SPARK_REF` to be set: 
https://github.com/apache/spark/blob/master/dev/is-changed.py#L60

### Why are the changes needed?

To fix broken build in main branch. PRs are not affected.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

Should be merged to test.

Closes #35133 from HyukjinKwon/SPARK-37833.

Authored-by: Hyukjin Kwon 
Signed-off-by: Hyukjin Kwon 
---
 .github/workflows/build_and_test.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build_and_test.yml 
b/.github/workflows/build_and_test.yml
index 250cd72..2c57a5d 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -116,6 +116,7 @@ jobs:
 git -c user.name='Apache Spark Test Account' -c 
user.email='sparktest...@gmail.com' merge --no-commit --progress --squash 
FETCH_HEAD
 git -c user.name='Apache Spark Test Account' -c 
user.email='sparktest...@gmail.com' commit -m "Merged commit"
 - name: Check all modules
+  if: github.repository != 'apache/spark'
   id: set-outputs
   run: |
 build=`./dev/is-changed.py -m 
avro,build,catalyst,core,docker-integration-tests,examples,graphx,hadoop-cloud,hive,hive-thriftserver,kubernetes,kvstore,launcher,mesos,mllib,mllib-local,network-common,network-shuffle,pyspark-core,pyspark-ml,pyspark-mllib,pyspark-pandas,pyspark-pandas-slow,pyspark-resource,pyspark-sql,pyspark-streaming,repl,sketch,spark-ganglia-lgpl,sparkr,sql,sql-kafka-0-10,streaming,streaming-kafka-0-10,streaming-kinesis-asl,tags,unsafe,yarn`

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated: [SPARK-37833][INFRA] Add `precondition` job to skip the main GitHub Action jobs

2022-01-07 Thread dongjoon
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 2a329a8  [SPARK-37833][INFRA] Add `precondition` job to skip the main 
GitHub Action jobs
2a329a8 is described below

commit 2a329a853e42bb42d874e1736f04b84a70efb561
Author: Dongjoon Hyun 
AuthorDate: Fri Jan 7 01:25:12 2022 -0800

[SPARK-37833][INFRA] Add `precondition` job to skip the main GitHub Action 
jobs

### What changes were proposed in this pull request?

This PR aims to introduce `precondition` jobs to skip the main GitHub 
Action jobs.

### Why are the changes needed?

This will save huge community GitHub Action resource and speed up our 
develop and PR review process.
- For example, GitHub Action will run only `linter` job for `docs` only PR.

https://user-images.githubusercontent.com/9700541/148512753-bd9b7e49-0e7b-47dd-9ce5-31f684dac666.png;>

### Does this PR introduce _any_ user-facing change?

No. This is a dev-only infra.

### How was this patch tested?

Manually review and check the result on this PR.

Closes #35121 from dongjoon-hyun/SPARK-37833.

Authored-by: Dongjoon Hyun 
Signed-off-by: Dongjoon Hyun 
---
 .github/workflows/build_and_test.yml | 60 +++-
 1 file changed, 46 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/build_and_test.yml 
b/.github/workflows/build_and_test.yml
index 0f04147..250cd72 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -96,15 +96,46 @@ jobs:
   echo '::set-output name=hadoop::hadoop3'
 fi
 
+  precondition:
+name: Check changes
+runs-on: ubuntu-20.04
+outputs:
+  required: ${{ steps.set-outputs.outputs.required }}
+steps:
+- name: Checkout Spark repository
+  uses: actions/checkout@v2
+  with:
+fetch-depth: 0
+repository: apache/spark
+ref: master
+- name: Sync the current branch with the latest in Apache Spark
+  if: github.repository != 'apache/spark'
+  run: |
+echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
+git fetch https://github.com/$GITHUB_REPOSITORY.git 
${GITHUB_REF#refs/heads/}
+git -c user.name='Apache Spark Test Account' -c 
user.email='sparktest...@gmail.com' merge --no-commit --progress --squash 
FETCH_HEAD
+git -c user.name='Apache Spark Test Account' -c 
user.email='sparktest...@gmail.com' commit -m "Merged commit"
+- name: Check all modules
+  id: set-outputs
+  run: |
+build=`./dev/is-changed.py -m 
avro,build,catalyst,core,docker-integration-tests,examples,graphx,hadoop-cloud,hive,hive-thriftserver,kubernetes,kvstore,launcher,mesos,mllib,mllib-local,network-common,network-shuffle,pyspark-core,pyspark-ml,pyspark-mllib,pyspark-pandas,pyspark-pandas-slow,pyspark-resource,pyspark-sql,pyspark-streaming,repl,sketch,spark-ganglia-lgpl,sparkr,sql,sql-kafka-0-10,streaming,streaming-kafka-0-10,streaming-kinesis-asl,tags,unsafe,yarn`
+pyspark=`./dev/is-changed.py -m 
avro,build,catalyst,core,graphx,hive,kvstore,launcher,mllib,mllib-local,network-common,network-shuffle,pyspark-core,pyspark-ml,pyspark-mllib,pyspark-pandas,pyspark-pandas-slow,pyspark-resource,pyspark-sql,pyspark-streaming,repl,sketch,sql,tags,unsafe`
+sparkr=`./dev/is-changed.py -m 
avro,build,catalyst,core,hive,kvstore,launcher,mllib,mllib-local,network-common,network-shuffle,repl,sketch,sparkr,sql,tags,unsafe`
+tpcds=`./dev/is-changed.py -m 
build,catalyst,core,hive,kvstore,launcher,network-common,network-shuffle,repl,sketch,sql,tags,unsafe`
+docker=`./dev/is-changed.py -m 
build,catalyst,core,docker-integration-tests,hive,kvstore,launcher,network-common,network-shuffle,repl,sketch,sql,tags,unsafe`
+echo "{\"build\": \"$build\", \"pyspark\": \"$pyspark\", \"sparkr\": 
\"$sparkr\", \"tpcds\": \"$tpcds\", \"docker\": \"$docker\"}" > required.json
+cat required.json
+echo "::set-output name=required::$(cat required.json)"
+
   # Build: build Spark and run the tests for specified modules.
   build:
 name: "Build modules (${{ format('{0}, {1} job', 
needs.configure-jobs.outputs.branch, needs.configure-jobs.outputs.type) }}): 
${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ 
matrix.hadoop }}, ${{ matrix.hive }})"
-needs: configure-jobs
+needs: [configure-jobs, precondition]
 # Run scheduled jobs for Apache Spark only
 # Run regular jobs for commit in both Apache Spark and forked repository
 if: >-
   (github.repository == 'apache/spark' && 
needs.configure-jobs.outputs.type == 'scheduled')
-  || needs.configure-jobs.outputs.type == 'regular'
+  || (needs.configure-jobs.outputs.type ==