This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-ballista.git


The following commit(s) were added to refs/heads/master by this push:
     new 9ad583ea Downgrade `docker-compose.yaml` to version 3.3 so that we can 
support Ubuntu 20.04.4 LTS (#329)
9ad583ea is described below

commit 9ad583eaa9b4feae8b03c7c544eb8d3fa8b83a2d
Author: Andy Grove <[email protected]>
AuthorDate: Sat Oct 15 15:18:22 2022 -0600

    Downgrade `docker-compose.yaml` to version 3.3 so that we can support 
Ubuntu 20.04.4 LTS (#329)
    
    * Use Docker version 3.3 so that we can support Ubuntu 20.04.4 LTS
    
    * fix labeler
    
    * executor retry connect to scheduler
    
    * log info message when connected to scheduler
---
 ballista/rust/executor/executor_config_spec.toml |  6 +++
 ballista/rust/executor/src/main.rs               | 47 +++++++++++++++++++++---
 benchmarks/README.md                             |  1 +
 docker-compose.yml                               |  8 ++--
 4 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/ballista/rust/executor/executor_config_spec.toml 
b/ballista/rust/executor/executor_config_spec.toml
index f8f8c5a1..7b9f11dd 100644
--- a/ballista/rust/executor/executor_config_spec.toml
+++ b/ballista/rust/executor/executor_config_spec.toml
@@ -60,6 +60,12 @@ type = "u16"
 default = "50052"
 doc = "bind grpc service port"
 
+[[param]]
+name = "scheduler_connect_timeout_seconds"
+type = "u16"
+default = "0"
+doc = "How long to try connecting to scheduler before failing. Set to zero to 
fail after first attempt."
+
 [[param]]
 name = "work_dir"
 type = "String"
diff --git a/ballista/rust/executor/src/main.rs 
b/ballista/rust/executor/src/main.rs
index 9688377a..260a7204 100644
--- a/ballista/rust/executor/src/main.rs
+++ b/ballista/rust/executor/src/main.rs
@@ -20,13 +20,13 @@
 use chrono::{DateTime, Duration, Utc};
 use std::net::SocketAddr;
 use std::sync::Arc;
-use std::time::Duration as Core_Duration;
+use std::time::{Duration as Core_Duration, Instant};
 use std::{env, io};
 
 use anyhow::{Context, Result};
 use arrow_flight::flight_service_server::FlightServiceServer;
 use ballista_executor::{execution_loop, executor_server};
-use log::{error, info};
+use log::{error, info, warn};
 use tempfile::TempDir;
 use tokio::fs::ReadDir;
 use tokio::signal;
@@ -181,9 +181,46 @@ async fn main() -> Result<()> {
         concurrent_tasks,
     ));
 
-    let connection = create_grpc_client_connection(scheduler_url)
-        .await
-        .context("Could not connect to scheduler")?;
+    let connect_timeout = opt.scheduler_connect_timeout_seconds as u64;
+    let connection = if connect_timeout == 0 {
+        create_grpc_client_connection(scheduler_url)
+            .await
+            .context("Could not connect to scheduler")
+    } else {
+        // this feature was added to support docker-compose so that we can 
have the executor
+        // wait for the scheduler to start, or at least run for 10 seconds 
before failing so
+        // that docker-compose's restart policy will restart the container.
+        let start_time = Instant::now().elapsed().as_secs();
+        let mut x = None;
+        while x.is_none()
+            && Instant::now().elapsed().as_secs() - start_time < 
connect_timeout
+        {
+            match create_grpc_client_connection(scheduler_url.clone())
+                .await
+                .context("Could not connect to scheduler")
+            {
+                Ok(conn) => {
+                    info!("Connected to scheduler at {}", scheduler_url);
+                    x = Some(conn);
+                }
+                Err(e) => {
+                    warn!(
+                        "Failed to connect to scheduler at {} ({}); retrying 
...",
+                        scheduler_url, e
+                    );
+                    std::thread::sleep(time::Duration::from_millis(500));
+                }
+            }
+        }
+        match x {
+            Some(conn) => Ok(conn),
+            _ => Err(BallistaError::General(format!(
+                "Timed out attempting to connect to scheduler at {}",
+                scheduler_url
+            ))
+            .into()),
+        }
+    }?;
 
     let mut scheduler = SchedulerGrpcClient::new(connection);
 
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 6b0acbaa..11b35b93 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -151,6 +151,7 @@ cargo run --release --bin tpch benchmark ballista --host 
localhost --port 50050
 To start a Rust scheduler and executor using Docker Compose:
 
 ```bash
+cargo build --release
 docker-compose up --build
 ```
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 5e71f2e5..f2459236 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-version: '3.8'
+version: '3.3'
 services:
   etcd:
     image: quay.io/coreos/etcd:v3.4.9
@@ -46,16 +46,16 @@ services:
     build:
       dockerfile: dev/docker/ballista-executor.Dockerfile
       context: .
-    command: "--bind-host 0.0.0.0 --scheduler-host ballista-scheduler"
+    command: "--bind-host 0.0.0.0 --scheduler-host ballista-scheduler 
--scheduler-connect-timeout-seconds 15"
     deploy:
       replicas: 2
+    restart: always
     environment:
       - RUST_LOG=ballista=debug,info
     volumes:
       - ./benchmarks/data:/data
     depends_on:
-      ballista-scheduler:
-        condition: service_healthy
+      - ballista-scheduler
     healthcheck:
       test: ["CMD", "nc", "-z", "ballista-executor", "50051"]
       interval: 5s

Reply via email to