This is an automated email from the ASF dual-hosted git repository. omartushevskyi pushed a commit to branch DLAB-483-RC2 in repository https://gitbox.apache.org/repos/asf/incubator-dlab.git
commit 25aae258c8b390234c7db690c9e82d5877a87a68 Author: Oleh Martushevskyi <[email protected]> AuthorDate: Tue Mar 26 11:53:02 2019 +0200 [DLAB-483]: fixed issue with reconfiguring Spark --- .../src/general/lib/aws/actions_lib.py | 18 ++++++++++++++---- .../src/general/lib/azure/actions_lib.py | 12 ++++++++++-- .../src/general/lib/gcp/actions_lib.py | 14 +++++++++++--- .../src/general/scripts/os/reconfigure_spark.py | 3 +++ 4 files changed, 38 insertions(+), 9 deletions(-) diff --git a/infrastructure-provisioning/src/general/lib/aws/actions_lib.py b/infrastructure-provisioning/src/general/lib/aws/actions_lib.py index 2209bee..f39f64e 100644 --- a/infrastructure-provisioning/src/general/lib/aws/actions_lib.py +++ b/infrastructure-provisioning/src/general/lib/aws/actions_lib.py @@ -1648,7 +1648,7 @@ def configure_zeppelin_emr_interpreter(emr_version, cluster_name, region, spark_ def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_enabled, spark_configs=''): - local("jar_list=`find {0} -name '*.jar' | tr '\\n' ','` ; echo \"spark.jars $jar_list\" >> \ + local("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars $jar_list\" >> \ /tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name)) region = local('curl http://169.254.169.254/latest/meta-data/placement/availability-zone', capture=True)[:-1] if region == 'us-east-1': @@ -1657,9 +1657,19 @@ def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_ena endpoint_url = "https://s3.{}.amazonaws.com.cn".format(region) else: endpoint_url = 'https://s3-' + region + '.amazonaws.com' - local("""bash -c 'echo "spark.hadoop.fs.s3a.endpoint """ + endpoint_url + """" >> /tmp/{}/notebook_spark-defaults_local.conf'""".format(cluster_name)) - local('echo "spark.hadoop.fs.s3a.server-side-encryption-algorithm AES256" >> /tmp/{}/notebook_spark-defaults_local.conf'.format(cluster_name)) - local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name, cluster_dir)) + local("""bash -c 'echo "spark.hadoop.fs.s3a.endpoint """ + endpoint_url + + """" >> /tmp/{}/notebook_spark-defaults_local.conf'""".format(cluster_name)) + local('echo "spark.hadoop.fs.s3a.server-side-encryption-algorithm AES256" >> ' + '/tmp/{}/notebook_spark-defaults_local.conf'.format(cluster_name)) + if os.path.exists('{0}spark/conf/spark-defaults.conf'.format(cluster_dir)): + additional_spark_properties = local('diff --changed-group-format="%>" --unchanged-group-format="" ' + '/tmp/{0}/notebook_spark-defaults_local.conf ' + '{1}spark/conf/spark-defaults.conf | grep -v "^#"'.format( + cluster_name, cluster_dir), capture=True) + for property in additional_spark_properties.split('\n'): + local('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name)) + local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name, + cluster_dir)) if spark_configs: spark_configurations = ast.literal_eval(spark_configs) new_spark_defaults = list() diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py index c9195ab..2719ec4 100644 --- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py +++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py @@ -1150,9 +1150,17 @@ def configure_local_spark(jars_dir, templates_dir, memory_type='driver'): def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_enabled, spark_configs=''): - local("jar_list=`find {0} -name '*.jar' | tr '\\n' ','` ; echo \"spark.jars $jar_list\" >> \ + local("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars $jar_list\" >> \ /tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name)) - local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name, cluster_dir)) + if os.path.exists('{0}spark/conf/spark-defaults.conf'.format(cluster_dir)): + additional_spark_properties = local('diff --changed-group-format="%>" --unchanged-group-format="" ' + '/tmp/{0}/notebook_spark-defaults_local.conf ' + '{1}spark/conf/spark-defaults.conf | grep -v "^#"'.format( + cluster_name, cluster_dir), capture=True) + for property in additional_spark_properties.split('\n'): + local('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name)) + local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name, + cluster_dir)) if datalake_enabled == 'false': local('cp -f /opt/spark/conf/core-site.xml {}spark/conf/'.format(cluster_dir)) else: diff --git a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py index 396b466..23a3941 100644 --- a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py +++ b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py @@ -1338,9 +1338,17 @@ def install_dataengine_spark(cluster_name, spark_link, spark_version, hadoop_ver def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_enabled, spark_configs=''): - local("jar_list=`find {0} -name '*.jar' | tr '\\n' ','` ; echo \"spark.jars $jar_list\" >> \ - /tmp/{1}notebook_spark-defaults_local.conf".format(jars_dir, cluster_name)) - local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name, cluster_dir)) + local("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars $jar_list\" >> \ + /tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name)) + if os.path.exists('{0}spark/conf/spark-defaults.conf'.format(cluster_dir)): + additional_spark_properties = local('diff --changed-group-format="%>" --unchanged-group-format="" ' + '/tmp/{0}/notebook_spark-defaults_local.conf ' + '{1}spark/conf/spark-defaults.conf | grep -v "^#"'.format( + cluster_name, cluster_dir), capture=True) + for property in additional_spark_properties.split('\n'): + local('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name)) + local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name, + cluster_dir)) local('cp -f /opt/spark/conf/core-site.xml {}spark/conf/'.format(cluster_dir)) if spark_configs: spark_configurations = ast.literal_eval(spark_configs) diff --git a/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py b/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py index 0d645ff..9be3147 100644 --- a/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py +++ b/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py @@ -60,6 +60,9 @@ if __name__ == "__main__": '/tmp/notebook_reconfigure_dataengine_spark.py') sudo('mv /tmp/notebook_reconfigure_dataengine_spark.py ' '/usr/local/bin/notebook_reconfigure_dataengine_spark.py') + sudo('mkdir -p /tmp/{}'.format(args.cluster_name)) + put('{}notebook_spark-defaults_local.conf'.format(templates_dir), + '/tmp/{}/notebook_spark-defaults_local.conf'.format(args.cluster_name), use_sudo=True) cluster_dir = '/opt/' + args.cluster_name + '/' if 'azure_datalake_enable' in os.environ: datalake_enabled = os.environ['azure_datalake_enable'] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
