This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 09508528e0 [SYSTEMDS-2926] AWS script fix for EMR-7.0.0, part 2
09508528e0 is described below
commit 09508528e0ed893ffb38c43cf83e8ef9993d9efe
Author: lachezar-n <[email protected]>
AuthorDate: Sat Mar 16 16:00:38 2024 +0100
[SYSTEMDS-2926] AWS script fix for EMR-7.0.0, part 2
Additional fixes for EMR 7
Closes #2004.
---
scripts/aws/run_systemds_script.sh | 9 +++++----
scripts/aws/spinup_systemds_cluster.sh | 26 ++++++++++++++++++--------
scripts/aws/terminate_systemds_cluster.sh | 5 +++--
3 files changed, 26 insertions(+), 14 deletions(-)
diff --git a/scripts/aws/run_systemds_script.sh
b/scripts/aws/run_systemds_script.sh
index db2d7185e2..48f5a59f66 100755
--- a/scripts/aws/run_systemds_script.sh
+++ b/scripts/aws/run_systemds_script.sh
@@ -41,7 +41,7 @@ fi
dml_filename=$(basename $1)
-STEP_INFO=$(aws emr add-steps --cluster-id $CLUSTER_ID --steps "Type=Spark,
+STEP_INFO=$(aws emr add-steps --cluster-id $CLUSTER_ID --region $REGION
--steps "Type=Spark,
Name='SystemDS Spark Program',
ActionOnFailure=CONTINUE,
Args=[
@@ -54,7 +54,8 @@ STEP_INFO=$(aws emr add-steps --cluster-id $CLUSTER_ID
--steps "Type=Spark,
STEP_ID=$(echo $STEP_INFO | jq .StepIds | tr -d '"' | tr -d ']' | tr -d '[' |
tr -d '[:space:]' )
echo "Waiting for the step to finish"
-aws emr wait step-complete --cluster-id $CLUSTER_ID --step-id $STEP_ID
+aws emr wait step-complete --cluster-id $CLUSTER_ID --step-id $STEP_ID
--region $REGION
+
+aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem
--region $REGION --command "cat /mnt/var/log/hadoop/steps/$STEP_ID/stderr"
+aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem
--region $REGION --command "cat /mnt/var/log/hadoop/steps/$STEP_ID/stdout"
-aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem
--command "cat /mnt/var/log/hadoop/steps/$STEP_ID/stderr"
-aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem
--command "cat /mnt/var/log/hadoop/steps/$STEP_ID/stdout"
\ No newline at end of file
diff --git a/scripts/aws/spinup_systemds_cluster.sh
b/scripts/aws/spinup_systemds_cluster.sh
index 58f9f2db05..c319f270d3 100755
--- a/scripts/aws/spinup_systemds_cluster.sh
+++ b/scripts/aws/spinup_systemds_cluster.sh
@@ -49,9 +49,11 @@ set_config "BUCKET" $BUCKET-$(((RANDOM % 999) + 1000))
#Source again to update the changes for the current session
source systemds_cluster.config
-#Create systemDS bucket (LocationConstraint configuration required regions
outside of us-east-1)
-aws s3api create-bucket --bucket $BUCKET --region $REGION
--create-bucket-configuration LocationConstraint=$REGION &> /dev/null
-aws s3api create-bucket --bucket $BUCKET-logs --region $REGION
--create-bucket-configuration LocationConstraint=$REGION &> /dev/null
+#Create systemDS bucket
+#LocationConstraint configuration required regions outside of us-east-1
+if [ "$REGION" = "us-east-1" ]; then LOCATION_CONSTRAINT=""; else
LOCATION_CONSTRAINT="--create-bucket-configuration LocationConstraint=$REGION";
fi
+aws s3api create-bucket --bucket $BUCKET --region $REGION $LOCATION_CONSTRAINT
&> /dev/null
+aws s3api create-bucket --bucket $BUCKET-logs --region $REGION
$LOCATION_CONSTRAINT &> /dev/null
# Upload Jar and scripts to s3
aws s3 sync $SYSTEMDS_TARGET_DIRECTORY s3://$BUCKET --exclude "*" --include
"*.dml" --include "*config.xml" --include "*DS.jar*"
@@ -87,7 +89,13 @@ CLUSTER_INFO=$(aws emr create-cluster \
"InstanceGroupType":"CORE",
"InstanceType":"'${INSTANCES_TYPE}'",
"Name":"Core Instance Group"}]'\
- --configurations
'[{"Classification":"spark","Properties":{"maximizeResourceAllocation":
"true"}}]'\
+ --configurations
'[{"Classification":"spark","Properties":{"maximizeResourceAllocation":
"true"}},
+ {"Classification": "spark-env",
+ "Configurations": [{
+ "Classification": "export",
+ "Properties": {"JAVA_HOME": "/usr/lib/jvm/jre-11"}
+ }]
+ }]'\
--scale-down-behavior TERMINATE_AT_TASK_COMPLETION \
--region $REGION)
@@ -98,21 +106,23 @@ set_config "CLUSTER_ID" $CLUSTER_ID
ip_address=$(curl ipecho.net/plain ; echo)
#Add your ip to the security group
-aws ec2 create-security-group --group-name ElasticMapReduce-master
--description "info" &> /dev/null
+aws ec2 create-security-group --group-name ElasticMapReduce-master
--description "info" --region $REGION &> /dev/null
aws ec2 authorize-security-group-ingress \
--group-name ElasticMapReduce-master \
--protocol tcp \
--port 22 \
- --cidr "${ip_address}"/24 &> /dev/null
+ --cidr "${ip_address}"/24 \
+ --region $REGION &> /dev/null
# Wait for cluster to start
echo "Waiting for cluster running state"
-aws emr wait cluster-running --cluster-id $CLUSTER_ID
+aws emr wait cluster-running --cluster-id $CLUSTER_ID --region $REGION
echo "Cluster info:"
-export CLUSTER_URL=$(aws emr describe-cluster --cluster-id $CLUSTER_ID | jq
.Cluster.MasterPublicDnsName | tr -d '"')
+export CLUSTER_URL=$(aws emr describe-cluster --cluster-id $CLUSTER_ID
--region $REGION | jq .Cluster.MasterPublicDnsName | tr -d '"')
aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem
--region $REGION \
--command 'aws s3 cp s3://'${BUCKET}' . --recursive --exclude "*"
--include "*DS.jar*"'
echo "Spinup finished."
+
diff --git a/scripts/aws/terminate_systemds_cluster.sh
b/scripts/aws/terminate_systemds_cluster.sh
index 2de546723c..835dcf5fb6 100755
--- a/scripts/aws/terminate_systemds_cluster.sh
+++ b/scripts/aws/terminate_systemds_cluster.sh
@@ -22,10 +22,11 @@
source systemds_cluster.config
-aws emr terminate-clusters --cluster-ids $CLUSTER_ID
+aws emr terminate-clusters --cluster-ids $CLUSTER_ID --region $REGION
# Wait for cluster to start
echo "Waiting for cluster terminated state"
-aws emr wait cluster-terminated --cluster-id $CLUSTER_ID
+aws emr wait cluster-terminated --cluster-id $CLUSTER_ID --region $REGION
echo "Cluster: ${CLUSTER_ID} terminated."
+