huaxingao commented on code in PR #56453:
URL: https://github.com/apache/spark/pull/56453#discussion_r3398870997


##########
dev/make-distribution.sh:
##########
@@ -259,9 +274,38 @@ if [ "$MAKE_PIP" == "true" ]; then
   pushd "$SPARK_HOME/python" > /dev/null
   # Delete the egg info file if it exists, this can cache older setup files.
   rm -rf pyspark.egg-info || echo "No existing egg info file, skipping 
deletion"
+  # Ship the Apache LICENSE and NOTICE inside the PySpark source distributions
+  # (see MANIFEST.in). These are removed again after the sdists are built.
+  #
+  # The classic pyspark sdist bundles the assembly jars 
(packaging/classic/setup.py
+  # builds a deps/jars symlink farm), so it ships the binary LICENSE/NOTICE 
that
+  # enumerate the bundled third-party jars' licenses, mirroring the binary
+  # distribution above. The connect and client sdists bundle no jars and ship 
the
+  # plain source LICENSE/NOTICE.
+  if [ -e "$SPARK_HOME/LICENSE-binary" ]; then
+    cp "$SPARK_HOME/LICENSE-binary" LICENSE
+    cp "$SPARK_HOME/NOTICE-binary" NOTICE
+  else
+    cp "$SPARK_HOME/LICENSE" LICENSE
+    cp "$SPARK_HOME/NOTICE" NOTICE
+  fi
   python3 packaging/classic/setup.py sdist
+
+  cp "$SPARK_HOME/LICENSE" LICENSE
+  cp "$SPARK_HOME/NOTICE" NOTICE
   python3 packaging/connect/setup.py sdist
   python3 packaging/client/setup.py sdist
+  rm -f LICENSE NOTICE
+
+  # Guard against regressions: every PySpark sdist must contain LICENSE and 
NOTICE
+  # at the package root. The missing files were only caught by a Spark 4.2.0 
RC1
+  # vote -1 (SPARK-57393); fail the release build here instead of at vote time.
+  for f in dist/pyspark*.tar.gz; do
+    for required in LICENSE NOTICE; do
+      tar tzf "$f" | grep -qE "/$required\$" || \
+        { echo "ERROR: $f is missing $required at the package root"; exit 1; }
+    done
+  done

Review Comment:
   Done



##########
dev/make-distribution.sh:
##########
@@ -272,9 +316,32 @@ if [ "$MAKE_R" == "true" ]; then
   echo "Building R source package"
   R_PACKAGE_VERSION=`grep Version "$SPARK_HOME/R/pkg/DESCRIPTION" | awk 
'{print $NF}'`
   pushd "$SPARK_HOME/R" > /dev/null
+  # Ship the Apache LICENSE and NOTICE inside the SparkR source package. These
+  # are removed again after the package is built.
+  cp "$SPARK_HOME/LICENSE" pkg/LICENSE
+  cp "$SPARK_HOME/NOTICE" pkg/NOTICE
+  # Reference the bundled LICENSE from DESCRIPTION so `R CMD check --as-cran` 
does
+  # not emit "File LICENSE is not mentioned in the DESCRIPTION file". The 
committed
+  # DESCRIPTION is left untouched because SparkR CI runs check-cran.sh without 
the
+  # LICENSE file present; this edit is transient and restored after the build 
(and
+  # by the EXIT trap on failure). The backup lives outside pkg/ so R CMD check 
does
+  # not flag it as a non-standard file. NOTE: the "Non-standard file 'NOTICE'" 
note
+  # cannot be silenced this way and is expected.
+  cp pkg/DESCRIPTION "$SPARK_HOME/R/DESCRIPTION.orig"
+  sed 's/^License: Apache License (== 2.0)$/License: Apache License (== 2.0) + 
file LICENSE/' \
+    "$SPARK_HOME/R/DESCRIPTION.orig" > pkg/DESCRIPTION
   # Build source package and run full checks
   # Do not source the check-cran.sh - it should be run from where it is for it 
to set SPARK_HOME
   NO_TESTS=1 "$SPARK_HOME/R/check-cran.sh"
+  mv -f "$SPARK_HOME/R/DESCRIPTION.orig" pkg/DESCRIPTION
+  rm -f pkg/LICENSE pkg/NOTICE
+
+  # Guard against regressions: the SparkR source package must contain LICENSE 
and
+  # NOTICE at the package root (SPARK-57393).
+  for required in LICENSE NOTICE; do
+    tar tzf "SparkR_$R_PACKAGE_VERSION.tar.gz" | grep -qE "/$required\$" || \
+      { echo "ERROR: SparkR source package is missing $required"; exit 1; }
+  done

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to