This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new e7b48b4aa [CI] Add pre-commit hook shellcheck-py to lint shell files
(#1626)
e7b48b4aa is described below
commit e7b48b4aa3ba206dae175ab7220d810b4d059c9d
Author: John Bampton <[email protected]>
AuthorDate: Thu Oct 24 11:16:12 2024 +1000
[CI] Add pre-commit hook shellcheck-py to lint shell files (#1626)
https://github.com/shellcheck-py/shellcheck-py
https://www.shellcheck.net/
https://github.com/koalaman/shellcheck
---
.pre-commit-config.yaml | 4 ++++
.shellcheckrc | 1 +
docker/sedona-spark-jupyterlab/build.sh | 14 +++++++-------
docker/sedona-spark-jupyterlab/start.sh | 18 +++++++++---------
docker/sedona.sh | 18 +++++++++---------
docker/spark.sh | 16 ++++++++--------
6 files changed, 38 insertions(+), 33 deletions(-)
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9a41bd1a3..2d35c90bc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -91,6 +91,10 @@ repos:
exclude: ^\.github/.*$
types: [markdown]
files: \.(md|mdown|markdown)$
+ - repo: https://github.com/shellcheck-py/shellcheck-py
+ rev: v0.10.0.1
+ hooks:
+ - id: shellcheck
- repo: https://github.com/adrienverge/yamllint
rev: v1.35.1
hooks:
diff --git a/.shellcheckrc b/.shellcheckrc
new file mode 100644
index 000000000..2279b949f
--- /dev/null
+++ b/.shellcheckrc
@@ -0,0 +1 @@
+disable=SC2004,SC2041,SC2155,SC2181
diff --git a/docker/sedona-spark-jupyterlab/build.sh
b/docker/sedona-spark-jupyterlab/build.sh
index c13c91368..960f97cf3 100755
--- a/docker/sedona-spark-jupyterlab/build.sh
+++ b/docker/sedona-spark-jupyterlab/build.sh
@@ -23,7 +23,7 @@ BUILD_MODE=$3
GEOTOOLS_VERSION=${4:-auto}
SEDONA_SPARK_VERSION=${SPARK_VERSION:0:3}
-if [ ${SPARK_VERSION:0:1} -eq "3" ] && [ ${SPARK_VERSION:2:1} -le "3" ]; then
+if [ "${SPARK_VERSION:0:1}" -eq "3" ] && [ "${SPARK_VERSION:2:1}" -le "3" ];
then
# 3.0, 3.1, 3.2, 3.3
SEDONA_SPARK_VERSION=3.0
fi
@@ -42,7 +42,7 @@ get_latest_version_with_suffix() {
# Fetch the maven-metadata.xml file
METADATA_URL="${BASE_URL}maven-metadata.xml"
- METADATA_XML=$(curl -s $METADATA_URL)
+ METADATA_XML=$(curl -s "$METADATA_URL")
# Extract versions from the XML
VERSIONS=$(echo "$METADATA_XML" | grep -o '<version>[^<]*</version>' | awk
-F'[<>]' '{print $3}')
@@ -52,7 +52,7 @@ get_latest_version_with_suffix() {
# Filter versions that end with the specified suffix and find the largest one
for VERSION in $VERSIONS; do
if [[ $VERSION == *$SUFFIX ]]; then
- if [[ -z $LATEST_VERSION ]] || version_gt $VERSION $LATEST_VERSION; then
+ if [[ -z $LATEST_VERSION ]] || version_gt "$VERSION" "$LATEST_VERSION";
then
LATEST_VERSION=$VERSION
fi
fi
@@ -61,7 +61,7 @@ get_latest_version_with_suffix() {
if [[ -z $LATEST_VERSION ]]; then
exit 1
else
- echo $LATEST_VERSION
+ echo "$LATEST_VERSION"
fi
}
@@ -80,7 +80,7 @@ if [ "$SEDONA_VERSION" = "latest" ]; then
echo "Using latest geotools-wrapper version: $GEOTOOLS_WRAPPER_VERSION"
# The compilation must take place outside Docker to avoid unnecessary
maven packages
- mvn clean install -DskipTests -Dspark=${SEDONA_SPARK_VERSION} -Dscala=2.12
+ mvn clean install -DskipTests -Dspark="${SEDONA_SPARK_VERSION}"
-Dscala=2.12
fi
# -- Building the image
@@ -92,7 +92,7 @@ if [ -z "$BUILD_MODE" ] || [ "$BUILD_MODE" = "local" ]; then
--build-arg sedona_version="${SEDONA_VERSION}" \
--build-arg geotools_wrapper_version="${GEOTOOLS_WRAPPER_VERSION}" \
-f docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile \
- -t apache/sedona:${SEDONA_VERSION} .
+ -t apache/sedona:"${SEDONA_VERSION}" .
else
# If release, build the image for cross-platform
docker buildx build --platform linux/amd64,linux/arm64 \
@@ -103,5 +103,5 @@ else
--build-arg sedona_version="${SEDONA_VERSION}" \
--build-arg geotools_wrapper_version="${GEOTOOLS_WRAPPER_VERSION}" \
-f docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile \
- -t apache/sedona:${SEDONA_VERSION} .
+ -t apache/sedona:"${SEDONA_VERSION}" .
fi
diff --git a/docker/sedona-spark-jupyterlab/start.sh
b/docker/sedona-spark-jupyterlab/start.sh
index e5de2b70d..99391977d 100755
--- a/docker/sedona-spark-jupyterlab/start.sh
+++ b/docker/sedona-spark-jupyterlab/start.sh
@@ -29,7 +29,7 @@ convert_to_mb() {
echo $(($mem_value * 1024))
;;
[mM])
- echo $mem_value
+ echo "$mem_value"
;;
*)
echo "Invalid memory unit: $mem_str" >&2
@@ -39,13 +39,13 @@ convert_to_mb() {
}
# Convert DRIVER_MEM and EXECUTOR_MEM to megabytes
-DRIVER_MEM_MB=$(convert_to_mb $DRIVER_MEM)
+DRIVER_MEM_MB=$(convert_to_mb "$DRIVER_MEM")
if [ $? -ne 0 ]; then
echo "Error converting DRIVER_MEM to megabytes." >&2
exit 1
fi
-EXECUTOR_MEM_MB=$(convert_to_mb $EXECUTOR_MEM)
+EXECUTOR_MEM_MB=$(convert_to_mb "$EXECUTOR_MEM")
if [ $? -ne 0 ]; then
echo "Error converting EXECUTOR_MEM to megabytes." >&2
exit 1
@@ -58,7 +58,7 @@ TOTAL_PHYSICAL_MEM_MB=$(free -m | awk '/^Mem:/{print $2}')
TOTAL_REQUIRED_MEM_MB=$(($DRIVER_MEM_MB + $EXECUTOR_MEM_MB))
# Compare total required memory with total physical memory
-if [ $TOTAL_REQUIRED_MEM_MB -gt $TOTAL_PHYSICAL_MEM_MB ]; then
+if [ $TOTAL_REQUIRED_MEM_MB -gt "$TOTAL_PHYSICAL_MEM_MB" ]; then
echo "Error: Insufficient memory" >&2
echo " total: $TOTAL_PHYSICAL_MEM_MB MB" >&2
echo " required: $TOTAL_REQUIRED_MEM_MB MB (driver: $DRIVER_MEM_MB MB,
executor: $EXECUTOR_MEM_MB MB)" >&2
@@ -68,14 +68,14 @@ if [ $TOTAL_REQUIRED_MEM_MB -gt $TOTAL_PHYSICAL_MEM_MB ];
then
fi
# Configure spark
-cp ${SPARK_HOME}/conf/spark-env.sh.template ${SPARK_HOME}/conf/spark-env.sh
-echo "SPARK_WORKER_MEMORY=${EXECUTOR_MEM}" >> ${SPARK_HOME}/conf/spark-env.sh
-echo "spark.driver.memory $DRIVER_MEM" >>
${SPARK_HOME}/conf/spark-defaults.conf
-echo "spark.executor.memory $EXECUTOR_MEM" >>
${SPARK_HOME}/conf/spark-defaults.conf
+cp "${SPARK_HOME}"/conf/spark-env.sh.template "${SPARK_HOME}"/conf/spark-env.sh
+echo "SPARK_WORKER_MEMORY=${EXECUTOR_MEM}" >> "${SPARK_HOME}"/conf/spark-env.sh
+echo "spark.driver.memory $DRIVER_MEM" >>
"${SPARK_HOME}"/conf/spark-defaults.conf
+echo "spark.executor.memory $EXECUTOR_MEM" >>
"${SPARK_HOME}"/conf/spark-defaults.conf
# Start spark standalone cluster
service ssh start
-${SPARK_HOME}/sbin/start-all.sh
+"${SPARK_HOME}"/sbin/start-all.sh
# Start jupyter lab
exec jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root
--NotebookApp.token=
diff --git a/docker/sedona.sh b/docker/sedona.sh
index 6392c5b8a..af8446c58 100755
--- a/docker/sedona.sh
+++ b/docker/sedona.sh
@@ -25,29 +25,29 @@ spark_extension_version=$4
spark_compat_version=${spark_version:0:3}
sedona_spark_version=${spark_compat_version}
-if [ ${spark_version:0:1} -eq "3" ] && [ ${spark_version:2:1} -le "3" ]; then
+if [ "${spark_version:0:1}" -eq "3" ] && [ "${spark_version:2:1}" -le "3" ];
then
# 3.0, 3.1, 3.2, 3.3
sedona_spark_version=3.0
fi
-if [ $sedona_version = "latest" ]; then
+if [ "$sedona_version" = "latest" ]; then
# Code to execute when SEDONA_VERSION is "latest"
- cp ${SEDONA_HOME}/spark-shaded/target/sedona-spark-shaded-*.jar
${SPARK_HOME}/jars/
- cd ${SEDONA_HOME}/python;pip3 install .
+ cp "${SEDONA_HOME}"/spark-shaded/target/sedona-spark-shaded-*.jar
"${SPARK_HOME}"/jars/
+ cd "${SEDONA_HOME}"/python;pip3 install .
else
# Code to execute when SEDONA_VERSION is not "latest"
# Download Sedona
- curl
https://repo1.maven.org/maven2/org/apache/sedona/sedona-spark-shaded-${sedona_spark_version}_2.12/${sedona_version}/sedona-spark-shaded-${sedona_spark_version}_2.12-${sedona_version}.jar
-o
$SPARK_HOME/jars/sedona-spark-shaded-${sedona_spark_version}_2.12-${sedona_version}.jar
+ curl
https://repo1.maven.org/maven2/org/apache/sedona/sedona-spark-shaded-"${sedona_spark_version}"_2.12/"${sedona_version}"/sedona-spark-shaded-"${sedona_spark_version}"_2.12-"${sedona_version}".jar
-o
"$SPARK_HOME"/jars/sedona-spark-shaded-"${sedona_spark_version}"_2.12-"${sedona_version}".jar
# Install Sedona Python
- pip3 install apache-sedona==${sedona_version}
+ pip3 install apache-sedona=="${sedona_version}"
fi
# Download gresearch spark extension
-curl
https://repo1.maven.org/maven2/uk/co/gresearch/spark/spark-extension_2.12/${spark_extension_version}-${spark_compat_version}/spark-extension_2.12-${spark_extension_version}-${spark_compat_version}.jar
-o
$SPARK_HOME/jars/spark-extension_2.12-${spark_extension_version}-${spark_compat_version}.jar
+curl
https://repo1.maven.org/maven2/uk/co/gresearch/spark/spark-extension_2.12/"${spark_extension_version}"-"${spark_compat_version}"/spark-extension_2.12-"${spark_extension_version}"-"${spark_compat_version}".jar
-o
"$SPARK_HOME"/jars/spark-extension_2.12-"${spark_extension_version}"-"${spark_compat_version}".jar
# Install Spark extension Python
-pip3 install
pyspark-extension==${spark_extension_version}.${spark_compat_version}
+pip3 install
pyspark-extension=="${spark_extension_version}"."${spark_compat_version}"
# Download GeoTools jar
-curl
https://repo1.maven.org/maven2/org/datasyslab/geotools-wrapper/${geotools_wrapper_version}/geotools-wrapper-${geotools_wrapper_version}.jar
-o $SPARK_HOME/jars/geotools-wrapper-${geotools_wrapper_version}.jar
+curl
https://repo1.maven.org/maven2/org/datasyslab/geotools-wrapper/"${geotools_wrapper_version}"/geotools-wrapper-"${geotools_wrapper_version}".jar
-o "$SPARK_HOME"/jars/geotools-wrapper-"${geotools_wrapper_version}".jar
diff --git a/docker/spark.sh b/docker/spark.sh
index d978b4f49..8cca154a3 100755
--- a/docker/spark.sh
+++ b/docker/spark.sh
@@ -30,20 +30,20 @@ apt-get install -y openjdk-19-jdk-headless curl python3-pip
maven
pip3 install --upgrade pip && pip3 install pipenv
# Download Spark jar and set up PySpark
-curl
https://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop${hadoop_version}.tgz
-o spark.tgz
-tar -xf spark.tgz && mv spark-${spark_version}-bin-hadoop${hadoop_version}/*
${SPARK_HOME}/
-rm spark.tgz && rm -rf spark-${spark_version}-bin-hadoop${hadoop_version}
-pip3 install pyspark==${spark_version}
+curl
https://archive.apache.org/dist/spark/spark-"${spark_version}"/spark-"${spark_version}"-bin-hadoop"${hadoop_version}".tgz
-o spark.tgz
+tar -xf spark.tgz && mv
spark-"${spark_version}"-bin-hadoop"${hadoop_version}"/* "${SPARK_HOME}"/
+rm spark.tgz && rm -rf spark-"${spark_version}"-bin-hadoop"${hadoop_version}"
+pip3 install pyspark=="${spark_version}"
# Add S3 jars
-curl
https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${hadoop_s3_version}/hadoop-aws-${hadoop_s3_version}.jar
-o ${SPARK_HOME}/jars/hadoop-aws-${hadoop_s3_version}.jar
-curl
https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${aws_sdk_version}/aws-java-sdk-bundle-${aws_sdk_version}.jar
-o ${SPARK_HOME}/jars/aws-java-sdk-bundle-${aws_sdk_version}.jar
+curl
https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/"${hadoop_s3_version}"/hadoop-aws-"${hadoop_s3_version}".jar
-o "${SPARK_HOME}"/jars/hadoop-aws-"${hadoop_s3_version}".jar
+curl
https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/"${aws_sdk_version}"/aws-java-sdk-bundle-"${aws_sdk_version}".jar
-o "${SPARK_HOME}"/jars/aws-java-sdk-bundle-"${aws_sdk_version}".jar
# Add spark-xml jar
-curl
https://repo1.maven.org/maven2/com/databricks/spark-xml_2.12/${spark_xml_version}/spark-xml_2.12-${spark_xml_version}.jar
-o ${SPARK_HOME}/jars/spark-xml_2.12-${spark_xml_version}.jar
+curl
https://repo1.maven.org/maven2/com/databricks/spark-xml_2.12/"${spark_xml_version}"/spark-xml_2.12-"${spark_xml_version}".jar
-o "${SPARK_HOME}"/jars/spark-xml_2.12-"${spark_xml_version}".jar
# Set up master IP address and executor memory
-cp ${SPARK_HOME}/conf/spark-defaults.conf.template
${SPARK_HOME}/conf/spark-defaults.conf
+cp "${SPARK_HOME}"/conf/spark-defaults.conf.template
"${SPARK_HOME}"/conf/spark-defaults.conf
# Install required libraries for GeoPandas on Apple chip mac
apt-get install -y gdal-bin libgdal-dev