ShreyeshArangath commented on code in PR #1310:
URL: https://github.com/apache/auron/pull/1310#discussion_r2364024866
##########
.github/workflows/tpcds-reusable.yml:
##########
@@ -223,19 +236,33 @@ jobs:
id: setup-spark-bin
if: steps.cache-spark-bin.outputs.cache-hit != 'true'
run: |
- wget -c ${{ steps.get-spark-version.outputs.sparkurl }}
+ SPARK_PATH="spark/spark-${{
steps.get-dependency-version.outputs.sparkversion }}"
+ if [ ${{ inputs.scalaver }} = "2.13" ]; then
+ SPARK_FILE="spark-${{
steps.get-dependency-version.outputs.sparkversion }}-bin-${{
inputs.hadoop-profile }}-scala${{ inputs.scalaver }}.tgz"
+ else
+ SPARK_FILE="spark-${{
steps.get-dependency-version.outputs.sparkversion }}-bin-${{
inputs.hadoop-profile }}.tgz"
+ fi
+ SPARK_URL="${APACHE_MIRROR}/${SPARK_PATH}/${SPARK_FILE}"
+
+ wget -c ${SPARK_URL}
Review Comment:
Should we add retries/timeouts here?
##########
.github/workflows/tpcds-reusable.yml:
##########
@@ -255,86 +282,98 @@ jobs:
java-version: ${{ inputs.javaver }}
cache: 'maven'
- - name: Cache Celeborn-${{ inputs.celebornver }}
+ - name: Cache Celeborn-${{
steps.get-dependency-version.outputs.celebornversion }}
uses: actions/cache@v4
- if: ${{ inputs.celebornver != '' && inputs.celebornurl != '' }}
+ if: ${{ inputs.celebornver != '' }}
id: cache-celeborn-bin
with:
- path: celeborn-bin-${{ inputs.celebornver }}
- key: celeborn-bin-${{ inputs.celebornver }}
+ path: celeborn-bin-${{
steps.get-dependency-version.outputs.celebornversion }}
+ key: celeborn-bin-${{
steps.get-dependency-version.outputs.celebornversion }}
- - name: Setup Celeborn-${{ inputs.celebornver }}
+ - name: Setup Celeborn-${{
steps.get-dependency-version.outputs.celebornversion }}
id: setup-celeborn-bin
- if: ${{ inputs.celebornver != '' && inputs.celebornurl != '' &&
steps.cache-celeborn-bin.outputs.cache-hit != 'true' }}
+ if: ${{ inputs.celebornver != '' &&
steps.cache-celeborn-bin.outputs.cache-hit != 'true' }}
run: |
- wget -c ${{ inputs.celebornurl }} && \
- mkdir -p celeborn-bin-${{ inputs.celebornver }} && \
- cd celeborn-bin-${{ inputs.celebornver }} && tar -xf
../apache-celeborn-*.tgz --strip-component=1
-
- - name: Start Celeborn-${{ inputs.celebornver }}
- if: ${{ inputs.celebornver != '' && inputs.celebornurl != '' }}
+ CELEBORN_PATH="celeborn/celeborn-${{
steps.get-dependency-version.outputs.celebornversion }}"
+ CELEBORN_FILE="apache-celeborn-${{
steps.get-dependency-version.outputs.celebornversion }}-bin.tgz"
+ CELEBORN_URL="${APACHE_MIRROR}/${CELEBORN_PATH}/${CELEBORN_FILE}"
+
+ wget -c ${CELEBORN_URL} && \
+ mkdir -p celeborn-bin-${{
steps.get-dependency-version.outputs.celebornversion }} && \
+ cd celeborn-bin-${{
steps.get-dependency-version.outputs.celebornversion }} && tar -xf
../apache-celeborn-*.tgz --strip-component=1
+
+ - name: Start Celeborn-${{
steps.get-dependency-version.outputs.celebornversion }}
+ if: ${{ inputs.celebornver != '' }}
run: |
mkdir -p /tmp/rss/data && mkdir -p /tmp/rss/logs && \
- cd celeborn-bin-${{ inputs.celebornver }} && \
+ cd celeborn-bin-${{
steps.get-dependency-version.outputs.celebornversion }} && \
bash -c "echo -e
'CELEBORN_MASTER_MEMORY=4g\nCELEBORN_WORKER_MEMORY=4g\nCELEBORN_WORKER_OFFHEAP_MEMORY=8g\nCELEBORN_LOG_DIR=/tmp/rss/logs'
> ./conf/celeborn-env.sh" && \
bash -c "echo -e 'celeborn.worker.storage.dirs
/tmp/rss\nceleborn.worker.storage.workingDir
data\nceleborn.worker.commitFiles.threads
128\nceleborn.worker.sortPartition.threads 64' > ./conf/celeborn-defaults.conf"
&& \
bash ./sbin/start-master.sh && \
bash ./sbin/start-worker.sh
- name: Install Celeborn JAR
- if: ${{ inputs.celebornver != '' && inputs.celebornurl != '' }}
+ if: ${{ inputs.celebornver != '' }}
run: |
- ls -la celeborn-bin-${{ inputs.celebornver }}/spark
- cp celeborn-bin-${{ inputs.celebornver
}}/spark/celeborn-client-spark-*_${{ inputs.scalaver }}-*.jar spark-bin-${{
inputs.sparkver }}_${{ inputs.scalaver }}/jars/
+ ls -la celeborn-bin-${{
steps.get-dependency-version.outputs.celebornversion }}/spark
+ cp celeborn-bin-${{
steps.get-dependency-version.outputs.celebornversion
}}/spark/celeborn-client-spark-*_${{ inputs.scalaver }}-*.jar spark-bin-${{
inputs.sparkver }}_${{ inputs.scalaver }}/jars/
- - name: Cache Uniffle-${{ inputs.unifflever }}
+ - name: Cache Uniffle-${{
steps.get-dependency-version.outputs.uniffleversion }}
uses: actions/cache@v4
- if: ${{ inputs.unifflever != '' && inputs.uniffleurl != '' }}
+ if: ${{ inputs.unifflever != '' }}
id: cache-uniffle-bin
with:
- path: uniffle-bin-${{ inputs.unifflever }}
- key: uniffle-bin-${{ inputs.unifflever }}
+ path: uniffle-bin-${{
steps.get-dependency-version.outputs.uniffleversion }}
+ key: uniffle-bin-${{
steps.get-dependency-version.outputs.uniffleversion }}
- - name: Setup Uniffle-${{ inputs.unifflever }}
+ - name: Setup Uniffle-${{
steps.get-dependency-version.outputs.uniffleversion }}
id: setup-uniffle-bin
- if: ${{ inputs.unifflever != '' && inputs.uniffleurl != '' &&
steps.cache-uniffle-bin.outputs.cache-hit != 'true' }}
+ if: ${{ inputs.unifflever != '' &&
steps.cache-uniffle-bin.outputs.cache-hit != 'true' }}
run: |
- wget -c ${{ inputs.uniffleurl }} && \
- mkdir -p uniffle-bin-${{ inputs.unifflever }} && \
- tar -xf ./apache-uniffle-${{ inputs.unifflever
}}-incubating-bin.tar.gz -C uniffle-bin-${{ inputs.unifflever }}
--strip-component=1
+ UNIFFLE_PATH="uniffle/${{
steps.get-dependency-version.outputs.uniffleversion }}"
+ UNIFFLE_FILE="apache-uniffle-${{
steps.get-dependency-version.outputs.uniffleversion }}-incubating-bin.tar.gz"
+ UNIFFLE_URL="${APACHE_MIRROR}/${UNIFFLE_PATH}/${UNIFFLE_FILE}"
+
+ wget -c ${UNIFFLE_URL} && \
+ mkdir -p uniffle-bin-${{
steps.get-dependency-version.outputs.uniffleversion }} && \
+ tar -xf ./${UNIFFLE_FILE} -C uniffle-bin-${{
steps.get-dependency-version.outputs.uniffleversion }} --strip-component=1
- name: Cache hadoop-${{ inputs.hadoopver }}
uses: actions/cache@v4
- if: ${{ inputs.hadoopver != '' && inputs.hadoopurl != '' }}
+ if: ${{ inputs.hadoopver != '' }}
id: cache-hadoop-bin
with:
path: hadoop-bin-${{ inputs.hadoopver }}
key: hadoop-bin-${{ inputs.hadoopver }}
- name: Setup hadoop-${{ inputs.hadoopver }}
id: setup-hadoop-bin
- if: ${{ inputs.hadoopver != '' && inputs.hadoopurl != '' &&
steps.cache-hadoop-bin.outputs.cache-hit != 'true' }}
+ if: ${{ inputs.hadoopver != '' &&
steps.cache-hadoop-bin.outputs.cache-hit != 'true' }}
run: |
- wget -c ${{ inputs.hadoopurl }} && \
+ HADOOP_PATH="hadoop/common/hadoop-${{ inputs.hadoopver }}"
+ HADOOP_FILE="hadoop-${{ inputs.hadoopver }}.tar.gz"
+ HADOOP_URL="${APACHE_MIRROR}/${HADOOP_PATH}/${HADOOP_FILE}"
+
+ wget -c ${HADOOP_URL} && \
Review Comment:
Same here ^
##########
.github/workflows/tpcds-reusable.yml:
##########
@@ -178,12 +159,38 @@ jobs:
run: |
sed -i 's/opt-level = 1/opt-level = 0/g' Cargo.toml # use opt-level 0
rm -f .build-checksum_*.cache
- ./build/mvn package -Ppre -P${{ inputs.sparkver }} -Pscala-${{
inputs.scalaver }} -Pjdk-${{ inputs.javaver }} ${{ inputs.extrabuildopt }}
+
+ SPARK_NUMBER="${{ inputs.sparkver }}"
+ SPARK_NUMBER="${SPARK_NUMBER#spark-}"
+
+ CELEBORN_NUMBER="${{ inputs.celebornver }}"
+ if [ -n "${{ inputs.celebornver }}" ]; then
+ CELEBORN_NUMBER="${CELEBORN_NUMBER#celeborn-}"
+ fi
+
+ UNIFFLE_NUMBER="${{ inputs.unifflever }}"
+ if [ -n "${{ inputs.unifflever }}" ]; then
+ UNIFFLE_NUMBER="${UNIFFLE_NUMBER#uniffle-}"
+ fi
+
+ CMD="./auron-build.sh --pre --sparkver $SPARK_NUMBER --scalaver ${{
inputs.scalaver }}"
+ if [ -n "${{ inputs.celebornver }}" ]; then
+ CMD="$CMD --celeborn $CELEBORN_NUMBER"
+ fi
+ if [ -n "${{ inputs.unifflever }}" ]; then
+ CMD="$CMD --uniffle $UNIFFLE_NUMBER"
+ fi
+
+ echo "Running: $CMD"
+ eval $CMD
Review Comment:
Using eval on a constructed command string can be unsafe and fragile, should
we look into using exec here instead?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]