script executions lock agent to python 3.12 and update pyproject.toml update file listing and python execution cmds

lahirujayathilake Wed, 26 Feb 2025 09:25:35 -0800

This is an automated email from the ASF dual-hosted git repository.

lahirujayathilake pushed a commit to branch cybershuttle-dev
in repository https://gitbox.apache.org/repos/asf/airavata.git


commit c09d8235e31eb492bda74534d85ff98b0792fe84
Author: yasith <[email protected]>
AuthorDate: Wed Dec 18 01:32:11 2024 -0600

    fix bug in code execution, remove jupyter kernel start from agent
    add support to cold-start analysis agents
    reset changes to sftp_file_handling_client
    update notebooks
    remove verbose log from remote code execution
    separate python env creation and code execution steps. always return 
stdout+err for command/script executions
    lock agent to python 3.12 and update pyproject.toml
    update file listing and python execution cmds
---
 .../airavata_experiments/airavata.py               |  90 +++++++---
 .../airavata_experiments/runtime.py                |  17 +-
 .../clients/sftp_file_handling_client.py           |  45 +++--
 .../airavata-python-sdk/pyproject.toml             |   2 +-
 dev-tools/deployment/scripts/expanse/agent.sh      |  40 +++++
 dev-tools/deployment/scripts/expanse/namd-agent.sh |   3 +-
 modules/agent-framework/airavata-agent/agent.go    | 190 +++++++++++----------
 .../jupyterhub/data/1_experiment_sdk.ipynb         | 117 ++++++-------
 .../data/{1_experiment_sdk.ipynb => smd_cpu.ipynb} | 109 ++++++------
 .../data/{1_experiment_sdk.ipynb => smd_gpu.ipynb} | 117 ++++++-------
 10 files changed, 396 insertions(+), 334 deletions(-)

diff --git 
a/airavata-api/airavata-client-sdks/airavata-python-sdk/airavata_experiments/airavata.py
 
b/airavata-api/airavata-client-sdks/airavata-python-sdk/airavata_experiments/airavata.py
index bf778a05df..ebab013cba 100644
--- 
a/airavata-api/airavata-client-sdks/airavata-python-sdk/airavata_experiments/airavata.py
+++ 
b/airavata-api/airavata-client-sdks/airavata-python-sdk/airavata_experiments/airavata.py
@@ -400,7 +400,7 @@ class AiravataOperator:
     res = 
requests.post(f"{self.connection_svc_url()}/agent/executecommandrequest", json={
         "agentId": agent_ref,
         "workingDir": ".",
-        "arguments": ["sh", "-c", "cd /data && find . -type f -printf '%P\n'"]
+        "arguments": ["sh", "-c", r"find /data -type d -name 'venv' -prune -o 
-type f -printf '%P\n' | sort"]
     })
     data = res.json()
     if data["error"] is not None:
@@ -539,7 +539,6 @@ class AiravataOperator:
     sr_host = str(sr_host or self.default_sr_hostname())
     mount_point = Path(self.default_gateway_data_store_dir()) / self.user_id
     project_name = str(project_name or self.default_project_name())
-    agent_ref = str(uuid.uuid4())
     server_url = urlparse(self.connection_svc_url()).netloc
 
     # validate args (str)
@@ -575,7 +574,8 @@ class AiravataOperator:
       else:
         assert isinstance(input_value, (int, float, str)), f"Invalid 
{input_name}: {input_value}"
         data_inputs[input_name] = input_value
-    data_inputs.update({"agent_id": agent_ref, "server_url": server_url})
+    data_inputs.update({"agent_id": data_inputs.get("agent_id", 
str(uuid.uuid4()))})
+    data_inputs.update({"server_url": server_url})
 
     # setup runtime params
     print("[AV] Setting up runtime params...")
@@ -685,7 +685,7 @@ class AiravataOperator:
 
     return LaunchState(
       experiment_id=ex_id,
-      agent_ref=agent_ref,
+      agent_ref=str(data_inputs["agent_id"]),
       process_id=process_id,
       mount_point=mount_point,
       experiment_dir=exp_dir,
@@ -702,31 +702,73 @@ class AiravataOperator:
         self.airavata_token, experiment_id, self.default_gateway_id())
     return status
   
-  def execute_py(self, libraries: list[str], code: str, agent_ref: str) -> str 
| None:
-    print(f"[av] Executing Python Code...")
+  def execute_py(self, libraries: list[str], code: str, agent_id: str, pid: 
str, runtime_args: dict, cold_start: bool = True) -> str | None:
+    # lambda to send request
+    print(f"[av] Attempting to submit to agent {agent_id}...")
+    make_request = lambda: 
requests.post(f"{self.connection_svc_url()}/agent/executepythonrequest", json={
+      "libraries": libraries,
+      "code": code,
+      "pythonVersion": "3.10", # TODO verify
+      "keepAlive": False, # TODO verify
+      "parentExperimentId": "/data", # the working directory
+      "agentId": agent_id,
+    })
     try:
-      res = 
requests.post(f"{self.connection_svc_url()}/agent/executepythonrequest", json={
-          "libraries": libraries,
-          "code": code,
-          "pythonVersion": "3.10", # TODO verify
-          "keepAlive": False, # TODO verify
-          "parentExperimentId": "/data", # the working directory
-          "agentId": agent_ref,
-      })
-      data = res.json()
-      if data["error"] is not None:
-        raise Exception(data["error"])
+      if cold_start:
+        res = make_request()
+        data = res.json()
+        if data["error"] == "Agent not found":
+          # waiting for agent to be available
+          print(f"[av] Agent {agent_id} not found! Relaunching...")
+          self.launch_experiment(
+            experiment_name="Agent",
+            app_name="AiravataAgent",
+            inputs={
+              "agent_id": {"type": "str", "value": agent_id},
+              "server_url": {"type": "str", "value": 
urlparse(self.connection_svc_url()).netloc},
+              "process_id": {"type": "str", "value": pid},
+            },
+            computation_resource_name=runtime_args["cluster"],
+            queue_name=runtime_args["queue_name"],
+            node_count=1,
+            cpu_count=runtime_args["cpu_count"],
+            walltime=runtime_args["walltime"],
+          )
+          return self.execute_py(libraries, code, agent_id, pid, runtime_args, 
cold_start=False)
+        elif data["executionId"] is not None:
+          print(f"[av] Submitted to Python Interpreter")
+          # agent response
+          exc_id = data["executionId"]
+        else:
+          # unrecoverable error
+          raise Exception(data["error"])
       else:
-        exc_id = data["executionId"]
+        # poll until agent is available
         while True:
-          res = 
requests.get(f"{self.connection_svc_url()}/agent/executepythonresponse/{exc_id}")
+          res = make_request()
           data = res.json()
-          if data["available"]:
-            response = str(data["responseString"])
-            return response
-          time.sleep(1)
+          if data["error"] == "Agent not found":
+            # print(f"[av] Waiting for Agent {agent_id}...")
+            time.sleep(2)
+            continue
+          elif data["executionId"] is not None:
+            print(f"[av] Submitted to Python Interpreter")
+            exc_id = data["executionId"]
+            break
+          else:
+            raise Exception(data["error"])
+      assert exc_id is not None, f"Invalid execution id: {exc_id}"
+      
+      # wait for the execution response to be available
+      while True:
+        res = 
requests.get(f"{self.connection_svc_url()}/agent/executepythonresponse/{exc_id}")
+        data = res.json()
+        if data["available"]:
+          response = str(data["responseString"])
+          return response
+        time.sleep(1)
     except Exception as e:
-      print("[av] Remote execution failed! {e}")
+      print(f"[av] Remote execution failed! {e}")
       return None
     
   def get_available_runtimes(self):
diff --git 
a/airavata-api/airavata-client-sdks/airavata-python-sdk/airavata_experiments/runtime.py
 
b/airavata-api/airavata-client-sdks/airavata-python-sdk/airavata_experiments/runtime.py
index 0633e4d76b..260b784e65 100644
--- 
a/airavata-api/airavata-client-sdks/airavata-python-sdk/airavata_experiments/runtime.py
+++ 
b/airavata-api/airavata-client-sdks/airavata-python-sdk/airavata_experiments/runtime.py
@@ -165,12 +165,11 @@ class Remote(Runtime):
   def execute_py(self, libraries: list[str], code: str, task: Task) -> None:
     assert task.ref is not None
     assert task.agent_ref is not None
-    print(f"* Packages: {libraries}")
-    print(f"* Code:\n{code}")
+    assert task.pid is not None
 
     from .airavata import AiravataOperator
     av = AiravataOperator(context.access_token)
-    result = av.execute_py(libraries, code, task.agent_ref)
+    result = av.execute_py(libraries, code, task.agent_ref, task.pid, 
task.runtime.args)
     print(result)
 
   def status(self, task: Task):
@@ -246,11 +245,21 @@ class Remote(Runtime):
 def list_runtimes(
     cluster: str | None = None,
     category: str | None = None,
+    node_count: int | None = None,
+    cpu_count: int | None = None,
+    walltime: int | None = None,
 ) -> list[Runtime]:
   from .airavata import AiravataOperator
   av = AiravataOperator(context.access_token)
   all_runtimes = av.get_available_runtimes()
-  return [*filter(lambda r: (cluster in [None, r.args["cluster"]]) and 
(category in [None, r.args["category"]]), all_runtimes)]
+  out_runtimes = []
+  for r in all_runtimes:
+    if (cluster in [None, r.args["cluster"]]) and (category in [None, 
r.args["category"]]):
+      r.args["node_count"] = node_count or r.args["node_count"]
+      r.args["cpu_count"] = cpu_count or r.args["cpu_count"]
+      r.args["walltime"] = walltime or r.args["walltime"]
+      out_runtimes.append(r)
+  return out_runtimes
 
 def is_terminal_state(x):
   return x in ["CANCELED", "COMPLETED", "FAILED"]
\ No newline at end of file
diff --git 
a/airavata-api/airavata-client-sdks/airavata-python-sdk/airavata_sdk/clients/sftp_file_handling_client.py
 
b/airavata-api/airavata-client-sdks/airavata-python-sdk/airavata_sdk/clients/sftp_file_handling_client.py
index 734ec82b47..3cbe194e97 100644
--- 
a/airavata-api/airavata-client-sdks/airavata-python-sdk/airavata_sdk/clients/sftp_file_handling_client.py
+++ 
b/airavata-api/airavata-client-sdks/airavata-python-sdk/airavata_sdk/clients/sftp_file_handling_client.py
@@ -28,20 +28,13 @@ logger.setLevel(logging.INFO)
 logging.getLogger("paramiko").setLevel(logging.WARNING)
 
 
-def create_pkey(pkey_path):
-    if pkey_path is not None:
-        return paramiko.RSAKey.from_private_key_file(pkey_path)
-    return None
-
-
 class SFTPConnector(object):
 
-    def __init__(self, host, port, username, password = None, pkey = None):
+    def __init__(self, host, port, username, password):
         self.host = host
         self.port = port
         self.username = username
         self.password = password
-        self.pkey = pkey
 
         ssh = paramiko.SSHClient()
         self.ssh = ssh
@@ -51,38 +44,38 @@ class SFTPConnector(object):
         ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
 
 
-    def upload_files(self, local_path, remote_base, project_name, 
exprement_id):
+    def upload_files(self, local_path, project_name, exprement_id):
         project_name = project_name.replace(" ", "_")
         time = datetime.now().strftime('%Y-%m-%d %H:%M:%S').replace(" ", "_")
         time = time.replace(":", "_")
         time = time.replace("-", "_")
         exprement_id = exprement_id+"_"+time
-        base_path = remote_base + "/" + project_name
-        remote_path = base_path + "/" + exprement_id
-        # pathsuffix = self.username + remote_path
+        remote_path = "/" + project_name + "/" + exprement_id + "/"
+        pathsuffix = self.username + remote_path
         files = os.listdir(local_path)
-        transport = Transport(sock=(self.host, int(self.port)))
-        transport.connect(username=self.username, password=self.password, 
pkey=create_pkey(self.pkey))
-        try:
-          for file in files:
+        for file in files:
+                try:
+                    transport = Transport(sock=(self.host, int(self.port)))
+                    transport.connect(username=self.username, 
password=self.password)
                     connection = SFTPClient.from_transport(transport)
                     try:
-                        connection.lstat(base_path)  # Test if remote_path 
exists
+                        base_path = "/" + project_name
+                        connection.chdir(base_path)  # Test if remote_path 
exists
                     except IOError:
+
                         connection.mkdir(base_path)
                     try:
-                        connection.lstat(remote_path)  # Test if remote_path 
exists
+                        connection.chdir(remote_path)  # Test if remote_path 
exists
                     except IOError:
                         connection.mkdir(remote_path)
-                    remote_fpath = remote_path + "/" + file
-                    print(f"{file} -> {remote_fpath}")
-                    connection.put(os.path.join(local_path, file), 
remote_fpath)
-        finally:
-            transport.close()
-        return remote_path
+                    connection.put(os.path.join(local_path, file), remote_path 
+ "/" + file)
+                finally:
+                    transport.close()
+        return pathsuffix
 
     def download_files(self, local_path, remote_path):
-        self.ssh.connect(self.host, self.port, self.username, 
password=self.password, pkey=create_pkey(self.pkey))
+
+        self.ssh.connect(self.host, self.port, self.username, password = 
self.password)
         with SCPClient(self.ssh.get_transport()) as conn:
             conn.get(remote_path=remote_path, local_path= local_path, 
recursive= True)
         self.ssh.close()
@@ -90,4 +83,4 @@ class SFTPConnector(object):
     @staticmethod
     def uploading_info(uploaded_file_size, total_file_size):
         logging.info('uploaded_file_size : {} total_file_size : {}'.
-                     format(uploaded_file_size, total_file_size))
+                     format(uploaded_file_size, total_file_size))
\ No newline at end of file
diff --git 
a/airavata-api/airavata-client-sdks/airavata-python-sdk/pyproject.toml 
b/airavata-api/airavata-client-sdks/airavata-python-sdk/pyproject.toml
index c4b9f9cf42..1024105272 100644
--- a/airavata-api/airavata-client-sdks/airavata-python-sdk/pyproject.toml
+++ b/airavata-api/airavata-client-sdks/airavata-python-sdk/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "airavata-python-sdk-test"
-version = "0.0.10.post1"
+version = "0.0.13"
 description = "Apache Airavata Python SDK"
 readme = "README.md"
 license = { text = "Apache License 2.0" }
diff --git a/dev-tools/deployment/scripts/expanse/agent.sh 
b/dev-tools/deployment/scripts/expanse/agent.sh
new file mode 100644
index 0000000000..b0df5471e3
--- /dev/null
+++ b/dev-tools/deployment/scripts/expanse/agent.sh
@@ -0,0 +1,40 @@
+#!/bin/bash -x
+
+# #####################################################################
+# Standalone Airavata Agent for Expanse
+# #####################################################################
+#
+# ----------------------------------------------------------------------
+# CONTRIBUTORS
+# ----------------------------------------------------------------------
+# * Dimuthu Wannipurage
+# * Lahiru Jayathilake
+# * Yasith Jayawardana
+# ######################################################################
+
+#-----------------------------------------------------------------------
+# STEP 1 - PARSE COMMAND LINE ARGS
+#-----------------------------------------------------------------------
+while getopts a:s:p: option; do
+  case $option in
+  a) AGENT_ID=$OPTARG ;;
+  s) SERVER_URL=$OPTARG ;;
+  p) PROCESS_ID=$OPTARG ;;
+  \?) cat <<ENDCAT ;;
+>! Usage: $0  [-a AGENT_ID ]    !<
+>!            [-s SERVER_URL]   !<
+>!            [-w PROCESS_ID]     !<
+ENDCAT
+  esac
+done
+
+echo "AGENT_ID=$AGENT_ID"
+echo "SERVER_URL=$SERVER_URL"
+echo "PROCESS_ID=$PROCESS_ID"
+
+# ----------------------------------------------------------------------
+# STEP 2 - RUN AGENT
+# ----------------------------------------------------------------------
+SIF_PATH=/home/scigap/agent-framework/airavata-agent.sif
+module load singularitypro
+singularity exec --bind 
/expanse/lustre/scratch/scigap/temp_project/neuro-workdirs/$PROCESS_ID:/data 
$SIF_PATH bash -c "/opt/airavata-agent $SERVER_URL:19900 $AGENT_ID"
diff --git a/dev-tools/deployment/scripts/expanse/namd-agent.sh 
b/dev-tools/deployment/scripts/expanse/namd-agent.sh
index b2a7301822..63ea64bd29 100755
--- a/dev-tools/deployment/scripts/expanse/namd-agent.sh
+++ b/dev-tools/deployment/scripts/expanse/namd-agent.sh
@@ -150,7 +150,7 @@ if [ $ExeTyp == "CPU" ]; then
   export NAMDPATH="$APP_PATH/NAMD_3.1alpha2_Linux-x86_64-multicore"
 fi
 if [ $ExeTyp == "GPU" ]; then
-  export NAMDPATH="$APP_PATH/NAMD_3.1alpha2_Linux-x86_64-multicore-CUDA"
+  export NAMDPATH="$APP_PATH/NAMD_3.0.1_Linux-x86_64-multicore-CUDA"
 fi
 
 #-----------------------------------------------------------------------
@@ -214,7 +214,6 @@ cd ${subdir}
 ########################################################################
 # Part 3 - Output Flattening
 ########################################################################
-num_rep=3
 for replica in $(seq 1 ${num_rep}); do
   for file in $(ls ${replica}/*.*); do
     mv ${file} ${replica}"_"$(basename $file)
diff --git a/modules/agent-framework/airavata-agent/agent.go 
b/modules/agent-framework/airavata-agent/agent.go
index 5052770733..7ae28fcc37 100644
--- a/modules/agent-framework/airavata-agent/agent.go
+++ b/modules/agent-framework/airavata-agent/agent.go
@@ -2,7 +2,6 @@ package main
 
 import (
        protos "airavata-agent/protos"
-       "bufio"
        "bytes"
        "context"
        "encoding/json"
@@ -50,57 +49,57 @@ func main() {
                log.Printf("Connected to the server...")
        }
 
-       go func() {
-               log.Printf("Starting jupyter kernel")
-               cmd := exec.Command("python", "/opt/jupyter/kernel.py")
-               //cmd := exec.Command("jupyter/venv/bin/python", 
"jupyter/kernel.py")
-               stdout, err := cmd.StdoutPipe()
-
-               if err != nil {
-                       fmt.Println("[agent.go] Error creating StdoutPipe:", 
err)
-                       return
-               }
-
-               // Get stderr pipe
-               stderr, err := cmd.StderrPipe()
-               if err != nil {
-                       fmt.Println("[agent.go] Error creating StderrPipe:", 
err)
-                       return
-               }
-
-               log.Printf("[agent.go] Starting command for execution")
-               // Start the command
-               if err := cmd.Start(); err != nil {
-                       fmt.Println("[agent.go] Error starting command:", err)
-                       return
-               }
-
-               // Create channels to read from stdout and stderr
-               stdoutScanner := bufio.NewScanner(stdout)
-               stderrScanner := bufio.NewScanner(stderr)
-
-               // Stream stdout
-               go func() {
-                       for stdoutScanner.Scan() {
-                               fmt.Printf("[agent.go] stdout: %s\n", 
stdoutScanner.Text())
-                       }
-               }()
-
-               // Stream stderr
-               go func() {
-                       for stderrScanner.Scan() {
-                               fmt.Printf("[agent.go] stderr: %s\n", 
stderrScanner.Text())
-                       }
-               }()
-
-               // Wait for the command to finish
-               if err := cmd.Wait(); err != nil {
-                       fmt.Println("[agent.go] Error waiting for command:", 
err)
-                       return
-               }
-
-               fmt.Println("[agent.go] Command finished")
-       }()
+       // go func() {
+       //      log.Printf("Starting jupyter kernel")
+       //      cmd := exec.Command("python", "/opt/jupyter/kernel.py")
+       //      //cmd := exec.Command("jupyter/venv/bin/python", 
"jupyter/kernel.py")
+       //      stdout, err := cmd.StdoutPipe()
+
+       //      if err != nil {
+       //              fmt.Println("[agent.go] Error creating StdoutPipe:", 
err)
+       //              return
+       //      }
+
+       //      // Get stderr pipe
+       //      stderr, err := cmd.StderrPipe()
+       //      if err != nil {
+       //              fmt.Println("[agent.go] Error creating StderrPipe:", 
err)
+       //              return
+       //      }
+
+       //      log.Printf("[agent.go] Starting command for execution")
+       //      // Start the command
+       //      if err := cmd.Start(); err != nil {
+       //              fmt.Println("[agent.go] Error starting command:", err)
+       //              return
+       //      }
+
+       //      // Create channels to read from stdout and stderr
+       //      stdoutScanner := bufio.NewScanner(stdout)
+       //      stderrScanner := bufio.NewScanner(stderr)
+
+       //      // Stream stdout
+       //      go func() {
+       //              for stdoutScanner.Scan() {
+       //                      fmt.Printf("[agent.go] stdout: %s\n", 
stdoutScanner.Text())
+       //              }
+       //      }()
+
+       //      // Stream stderr
+       //      go func() {
+       //              for stderrScanner.Scan() {
+       //                      fmt.Printf("[agent.go] stderr: %s\n", 
stderrScanner.Text())
+       //              }
+       //      }()
+
+       //      // Wait for the command to finish
+       //      if err := cmd.Wait(); err != nil {
+       //              fmt.Println("[agent.go] Error waiting for command:", 
err)
+       //              return
+       //      }
+
+       //      fmt.Println("[agent.go] Command finished")
+       // }()
 
        go func() {
                for {
@@ -128,45 +127,59 @@ func main() {
                                log.Printf("[agent.go] Working Dir %s", 
workingDir)
                                log.Printf("[agent.go] Libraries %s", libraries)
 
-                               // bash script to
-                               // (a) create the virtual environment,
-                               // (b) source it, and
-                               // (c) run a python code
-                               bashScript := `
-        workingDir="%s";
-        cd $workingDir;
-        if [ ! -f "$workingDir/venv/pyenv.cfg" ]; then
-          rm -rf $workingDir/venv;
-          python3 -m venv $workingDir/venv;
-        fi
-        source $workingDir/venv/bin/activate
-        pip install %s > /dev/null
-        python -c "%s"
-        `
-
-                               runCmd := fmt.Sprintf(
-                                       bashScript,
-                                       workingDir,
-                                       strings.Join(libraries, " "),
-                                       strings.ReplaceAll(code, `"`, `\"`),
-                               )
-                               log.Printf("[agent.go] Running bash 
script:\n%s", runCmd)
-                               cmd := exec.Command("bash", "-c", runCmd)
-
                                go func() {
-                                       output, err := cmd.Output()
-                                       if err != nil {
-                                               fmt.Println("[agent.go] Failed 
to run python command:", err)
+
+                                       // setup the venv
+                                       venvCmd := fmt.Sprintf(`
+                                       agentId="%s"
+                                       pkgs="%s"
+
+                                       if [ ! -f "/tmp/$agentId/venv" ]; then
+                                               mkdir -p /tmp/$agentId
+                                               python3 -m venv 
/tmp/$agentId/venv
+                                       fi
+
+                                       source /tmp/$agentId/venv/bin/activate
+                                       python3 -m pip install $pkgs
+                                       
+                                       `, agentId, strings.Join(libraries, " 
"))
+                                       log.Println("[agent.go] venv setup:", 
venvCmd)
+                                       venvExc := exec.Command("bash", "-c", 
venvCmd)
+                                       venvOut, venvErr := 
venvExc.CombinedOutput()
+                                       if venvErr != nil {
+                                               fmt.Println("[agent.go] venv 
setup: ERR", venvErr)
                                                return
                                        }
-                                       stdoutString := string(output)
-                                       log.Printf("[agent.go] Execution output 
is %s", stdoutString)
+                                       venvStdout := string(venvOut)
+                                       fmt.Println("[agent.go] venv setup:", 
venvStdout)
+
+                                       // execute the python code
+                                       pyCmd := fmt.Sprintf(`
+                                       workingDir="%s";
+                                       agentId="%s";
+
+                                       cd $workingDir;
+                                       source /tmp/$agentId/venv/bin/activate;
+                                       python3 <<EOF
+%s
+EOF`, workingDir, agentId, code)
+                                       log.Println("[agent.go] python code:", 
pyCmd)
+                                       pyExc := exec.Command("bash", "-c", 
pyCmd)
+                                       pyOut, pyErr := pyExc.CombinedOutput()
+                                       if pyErr != nil {
+                                               fmt.Println("[agent.go] python 
code: ERR", pyErr)
+                                       }
+
+                                       // send the result back to the server
+                                       pyStdout := string(pyOut)
                                        if err := 
stream.Send(&protos.AgentMessage{Message: 
&protos.AgentMessage_PythonExecutionResponse{
                                                PythonExecutionResponse: 
&protos.PythonExecutionResponse{
                                                        SessionId:      
sessionId,
                                                        ExecutionId:    
executionId,
-                                                       ResponseString: 
stdoutString}}}); err != nil {
+                                                       ResponseString: 
pyStdout}}}); err != nil {
                                                log.Printf("[agent.go] Failed 
to send execution result to server: %v", err)
+                                       } else {
+                                               log.Printf("[agent.go] Sent 
execution result to the server: %v", pyStdout)
                                        }
                                }()
 
@@ -177,17 +190,16 @@ func main() {
                                log.Printf("[agent.go] Execution id %s", 
executionId)
                                cmd := exec.Command(execArgs[0], 
execArgs[1:]...)
                                log.Printf("[agent.go] Completed execution with 
the id %s", executionId)
-                               stdout, err := cmd.Output()
+                               output, err := cmd.CombinedOutput() // combined 
output of stdout and stderr
                                if err != nil {
-                                       log.Fatalf(err.Error())
-                                       return
+                                       log.Printf("[agent.go] command 
execution failed: %s", err)
                                }
 
-                               stdoutString := string(stdout)
-                               log.Printf("[agent.go] Execution output is %s", 
stdoutString)
+                               outputString := string(output)
+                               log.Printf("[agent.go] Execution output is %s", 
outputString)
 
                                if err := 
stream.Send(&protos.AgentMessage{Message: 
&protos.AgentMessage_CommandExecutionResponse{
-                                       CommandExecutionResponse: 
&protos.CommandExecutionResponse{ExecutionId: executionId, ResponseString: 
stdoutString}}}); err != nil {
+                                       CommandExecutionResponse: 
&protos.CommandExecutionResponse{ExecutionId: executionId, ResponseString: 
outputString}}}); err != nil {
                                        log.Printf("[agent.go] Failed to send 
execution result to server: %v", err)
                                }
 
diff --git 
a/modules/agent-framework/deployments/jupyterhub/data/1_experiment_sdk.ipynb 
b/modules/agent-framework/deployments/jupyterhub/data/1_experiment_sdk.ipynb
index f89b022ecd..d9de2a5b38 100644
--- a/modules/agent-framework/deployments/jupyterhub/data/1_experiment_sdk.ipynb
+++ b/modules/agent-framework/deployments/jupyterhub/data/1_experiment_sdk.ipynb
@@ -4,20 +4,16 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Cybershuttle SDK -  Molecular Dynamics\n",
-    "> Define, run, monitor, and analyze molecular dynamics experiments in a 
HPC-agnostic way.\n",
     "# Cybershuttle SDK -  Molecular Dynamics\n",
     "> Define, run, monitor, and analyze molecular dynamics experiments in a 
HPC-agnostic way.\n",
     "\n",
     "This notebook shows how users can setup and launch a **NAMD** experiment 
with replicas, monitor its execution, and run analyses both during and after 
execution."
-    "This notebook shows how users can setup and launch a **NAMD** experiment 
with replicas, monitor its execution, and run analyses both during and after 
execution."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Installing Required Packages\n",
     "## Installing Required Packages\n",
     "\n",
     "First, install the `airavata-python-sdk-test` package from the pip 
repository."
@@ -37,7 +33,6 @@
    "metadata": {},
    "source": [
     "## Importing the SDK"
-    "## Importing the SDK"
    ]
   },
   {
@@ -54,7 +49,6 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Authenticating\n",
     "## Authenticating\n",
     "\n",
     "To authenticate for remote execution, call the `ae.login()` method.\n",
@@ -64,10 +58,8 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "outputs": [],
    "source": [
     "ae.login()"
    ]
@@ -77,7 +69,6 @@
    "metadata": {},
    "source": [
     "Once authenticated, the `ae.list_runtimes()` function can be called to 
list HPC resources that the user has access to."
-    "Once authenticated, the `ae.list_runtimes()` function can be called to 
list HPC resources that the user has access to."
    ]
   },
   {
@@ -88,7 +79,6 @@
    "source": [
     "runtimes = ae.list_runtimes()\n",
     "ae.display(runtimes)"
-    "ae.display(runtimes)"
    ]
   },
   {
@@ -121,7 +111,6 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Defining a NAMD Experiment\n",
     "## Defining a NAMD Experiment\n",
     "\n",
     "The `md.NAMD.initialize()` is used to define a NAMD experiment.\n",
@@ -146,13 +135,6 @@
     "Any optional resource constraint can be provided here.\n",
     "\n",
     "You can also call `ae.display()` to pretty-print the experiment."
-    "```\n",
-    "\n",
-    "To add replica runs, simply call the `exp.add_replica()` function.\n",
-    "You can call the `add_replica()` function as many times as you want 
replicas.\n",
-    "Any optional resource constraint can be provided here.\n",
-    "\n",
-    "You can also call `ae.display()` to pretty-print the experiment."
    ]
   },
   {
@@ -163,7 +145,7 @@
    "source": [
     "exp = md.NAMD.initialize(\n",
     "    name=\"SMD\",\n",
-    "    config_file=\"data/pull_cpu.conf\",\n",
+    "    config_file=\"data/pull_gpu.conf\",\n",
     "    pdb_file=\"data/structure.pdb\",\n",
     "    psf_file=\"data/structure.psf\",\n",
     "    ffp_files=[\n",
@@ -176,10 +158,9 @@
     "      \"data/b4pull.restart.vel\",\n",
     "      \"data/b4pull.restart.xsc\",\n",
     "    ],\n",
-    "    parallelism=\"CPU\",\n",
-    "    num_replicas=1,\n",
+    "    parallelism=\"GPU\",\n",
     ")\n",
-    "exp.add_replica(*ae.list_runtimes(cluster=\"login.expanse.sdsc.edu\", 
category=\"cpu\"))\n",
+    "exp.add_replica(*ae.list_runtimes(cluster=\"login.expanse.sdsc.edu\", 
category=\"gpu\", walltime=180))\n",
     "ae.display(exp)"
    ]
   },
@@ -190,9 +171,6 @@
     "## Creating an Execution Plan\n",
     "\n",
     "Call the `exp.plan()` function to transform the experiment definition + 
replicas into a stateful execution plan."
-    "## Creating an Execution Plan\n",
-    "\n",
-    "Call the `exp.plan()` function to transform the experiment definition + 
replicas into a stateful execution plan."
    ]
   },
   {
@@ -203,19 +181,15 @@
    "source": [
     "plan = exp.plan()\n",
     "ae.display(plan)"
-    "plan = exp.plan()\n",
-    "ae.display(plan)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Saving the Plan\n",
     "## Saving the Plan\n",
     "\n",
     "A created plan can be saved locally (in JSON) or remotely (in a 
user-local DB) for later reference."
-    "A created plan can be saved locally (in JSON) or remotely (in a 
user-local DB) for later reference."
    ]
   },
   {
@@ -224,8 +198,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plan.save()  # this will save the plan in DB\n",
-    "plan.save_json(\"plan.json\")  # save the plan state locally"
+    "plan.save()                   # this will save the plan in DB\n",
+    "plan.save_json(\"plan_gpu.json\")   # save the plan state locally"
    ]
   },
   {
@@ -237,11 +211,6 @@
     "A created plan can be launched using the `plan.launch()` function.\n",
     "Changes to plan states will be automatically saved onto the remote.\n",
     "However, plan state can also be tracked locally by invoking 
`plan.save_json()`."
-    "## Launching the Plan\n",
-    "\n",
-    "A created plan can be launched using the `plan.launch()` function.\n",
-    "Changes to plan states will be automatically saved onto the remote.\n",
-    "However, plan state can also be tracked locally by invoking 
`plan.save_json()`."
    ]
   },
   {
@@ -251,8 +220,24 @@
    "outputs": [],
    "source": [
     "plan.launch()\n",
-    "plan.save_json(\"plan.json\")"
-    "plan.save_json(\"plan.json\")"
+    "plan.save_json(\"plan_gpu.json\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Checking the Plan Status\n",
+    "The status of a plan can be retrieved by calling `plan.status()`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plan.status()"
    ]
   },
   {
@@ -270,7 +255,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plan = ae.plan.load_json(\"plan.json\")\n",
+    "plan = ae.plan.load_json(\"plan_gpu.json\")\n",
     "plan = ae.plan.load(plan.id)\n",
     "plan.status()\n",
     "ae.display(plan)"
@@ -288,7 +273,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -313,14 +297,7 @@
    "outputs": [],
    "source": [
     "# plan.stop()\n",
-    "plan.wait_for_completion()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Running File Operations"
+    "# plan.wait_for_completion()"
    ]
   },
   {
@@ -340,22 +317,27 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "for task in plan.tasks:\n",
     "    print(task.name, task.pid)\n",
-    "    # display files\n",
-    "    display(task.ls())\n",
-    "    # upload a file\n",
-    "    task.upload(\"data/sample.txt\")\n",
-    "    # preview contents of a file\n",
-    "    display(task.cat(\"sample.txt\"))\n",
-    "    # download a specific file\n",
-    "    task.download(\"sample.txt\", f\"./results_{task.name}\")\n",
-    "    # download all files\n",
-    "    task.download_all(f\"./results_{task.name}\")"
+    "    display(task.ls())                                    # list files\n",
+    "    task.upload(\"data/sample.txt\")                        # upload 
sample.txt\n",
+    "    display(task.ls())                                    # list files 
AFTER upload\n",
+    "    display(task.cat(\"sample.txt\"))                       # preview 
sample.txt\n",
+    "    task.download(\"sample.txt\", f\"./results_{task.name}\") # download 
sample.txt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plan.wait_for_completion()                                # wait for plan 
to complete\n",
+    "# for task in plan.tasks:\n",
+    "#   task.download_all(f\"./results_{task.name}\")             # download 
plan outputs"
    ]
   },
   {
@@ -380,17 +362,24 @@
     "    @task.context(packages=[\"numpy\", \"pandas\"])\n",
     "    def analyze() -> None:\n",
     "        import numpy as np\n",
-    "        with open(\"pull.conf\", \"r\") as f:\n",
+    "        with open(\"pull_gpu.conf\", \"r\") as f:\n",
     "            data = f.read()\n",
-    "        print(\"pull.conf has\", len(data), \"chars\")\n",
+    "        print(\"pull_gpu.conf has\", len(data), \"chars\")\n",
     "        print(np.arange(10))\n",
     "    analyze()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "airavata",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -404,9 +393,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.11.6"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git 
a/modules/agent-framework/deployments/jupyterhub/data/1_experiment_sdk.ipynb 
b/modules/agent-framework/deployments/jupyterhub/data/smd_cpu.ipynb
similarity index 79%
copy from 
modules/agent-framework/deployments/jupyterhub/data/1_experiment_sdk.ipynb
copy to modules/agent-framework/deployments/jupyterhub/data/smd_cpu.ipynb
index f89b022ecd..22b4faa37e 100644
--- a/modules/agent-framework/deployments/jupyterhub/data/1_experiment_sdk.ipynb
+++ b/modules/agent-framework/deployments/jupyterhub/data/smd_cpu.ipynb
@@ -4,20 +4,16 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Cybershuttle SDK -  Molecular Dynamics\n",
-    "> Define, run, monitor, and analyze molecular dynamics experiments in a 
HPC-agnostic way.\n",
     "# Cybershuttle SDK -  Molecular Dynamics\n",
     "> Define, run, monitor, and analyze molecular dynamics experiments in a 
HPC-agnostic way.\n",
     "\n",
     "This notebook shows how users can setup and launch a **NAMD** experiment 
with replicas, monitor its execution, and run analyses both during and after 
execution."
-    "This notebook shows how users can setup and launch a **NAMD** experiment 
with replicas, monitor its execution, and run analyses both during and after 
execution."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Installing Required Packages\n",
     "## Installing Required Packages\n",
     "\n",
     "First, install the `airavata-python-sdk-test` package from the pip 
repository."
@@ -37,7 +33,6 @@
    "metadata": {},
    "source": [
     "## Importing the SDK"
-    "## Importing the SDK"
    ]
   },
   {
@@ -54,7 +49,6 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Authenticating\n",
     "## Authenticating\n",
     "\n",
     "To authenticate for remote execution, call the `ae.login()` method.\n",
@@ -64,10 +58,8 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "outputs": [],
    "source": [
     "ae.login()"
    ]
@@ -77,7 +69,6 @@
    "metadata": {},
    "source": [
     "Once authenticated, the `ae.list_runtimes()` function can be called to 
list HPC resources that the user has access to."
-    "Once authenticated, the `ae.list_runtimes()` function can be called to 
list HPC resources that the user has access to."
    ]
   },
   {
@@ -88,7 +79,6 @@
    "source": [
     "runtimes = ae.list_runtimes()\n",
     "ae.display(runtimes)"
-    "ae.display(runtimes)"
    ]
   },
   {
@@ -121,7 +111,6 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Defining a NAMD Experiment\n",
     "## Defining a NAMD Experiment\n",
     "\n",
     "The `md.NAMD.initialize()` is used to define a NAMD experiment.\n",
@@ -146,13 +135,6 @@
     "Any optional resource constraint can be provided here.\n",
     "\n",
     "You can also call `ae.display()` to pretty-print the experiment."
-    "```\n",
-    "\n",
-    "To add replica runs, simply call the `exp.add_replica()` function.\n",
-    "You can call the `add_replica()` function as many times as you want 
replicas.\n",
-    "Any optional resource constraint can be provided here.\n",
-    "\n",
-    "You can also call `ae.display()` to pretty-print the experiment."
    ]
   },
   {
@@ -177,9 +159,8 @@
     "      \"data/b4pull.restart.xsc\",\n",
     "    ],\n",
     "    parallelism=\"CPU\",\n",
-    "    num_replicas=1,\n",
     ")\n",
-    "exp.add_replica(*ae.list_runtimes(cluster=\"login.expanse.sdsc.edu\", 
category=\"cpu\"))\n",
+    "exp.add_replica(*ae.list_runtimes(cluster=\"login.expanse.sdsc.edu\", 
category=\"cpu\", walltime=60))\n",
     "ae.display(exp)"
    ]
   },
@@ -190,9 +171,6 @@
     "## Creating an Execution Plan\n",
     "\n",
     "Call the `exp.plan()` function to transform the experiment definition + 
replicas into a stateful execution plan."
-    "## Creating an Execution Plan\n",
-    "\n",
-    "Call the `exp.plan()` function to transform the experiment definition + 
replicas into a stateful execution plan."
    ]
   },
   {
@@ -203,19 +181,15 @@
    "source": [
     "plan = exp.plan()\n",
     "ae.display(plan)"
-    "plan = exp.plan()\n",
-    "ae.display(plan)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Saving the Plan\n",
     "## Saving the Plan\n",
     "\n",
     "A created plan can be saved locally (in JSON) or remotely (in a 
user-local DB) for later reference."
-    "A created plan can be saved locally (in JSON) or remotely (in a 
user-local DB) for later reference."
    ]
   },
   {
@@ -224,8 +198,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plan.save()  # this will save the plan in DB\n",
-    "plan.save_json(\"plan.json\")  # save the plan state locally"
+    "plan.save()                   # this will save the plan in DB\n",
+    "plan.save_json(\"plan.json\")   # save the plan state locally"
    ]
   },
   {
@@ -237,11 +211,6 @@
     "A created plan can be launched using the `plan.launch()` function.\n",
     "Changes to plan states will be automatically saved onto the remote.\n",
     "However, plan state can also be tracked locally by invoking 
`plan.save_json()`."
-    "## Launching the Plan\n",
-    "\n",
-    "A created plan can be launched using the `plan.launch()` function.\n",
-    "Changes to plan states will be automatically saved onto the remote.\n",
-    "However, plan state can also be tracked locally by invoking 
`plan.save_json()`."
    ]
   },
   {
@@ -252,7 +221,23 @@
    "source": [
     "plan.launch()\n",
     "plan.save_json(\"plan.json\")"
-    "plan.save_json(\"plan.json\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Checking the Plan Status\n",
+    "The status of a plan can be retrieved by calling `plan.status()`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plan.status()"
    ]
   },
   {
@@ -288,7 +273,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -313,14 +297,7 @@
    "outputs": [],
    "source": [
     "# plan.stop()\n",
-    "plan.wait_for_completion()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Running File Operations"
+    "# plan.wait_for_completion()"
    ]
   },
   {
@@ -340,22 +317,27 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "for task in plan.tasks:\n",
     "    print(task.name, task.pid)\n",
-    "    # display files\n",
-    "    display(task.ls())\n",
-    "    # upload a file\n",
-    "    task.upload(\"data/sample.txt\")\n",
-    "    # preview contents of a file\n",
-    "    display(task.cat(\"sample.txt\"))\n",
-    "    # download a specific file\n",
-    "    task.download(\"sample.txt\", f\"./results_{task.name}\")\n",
-    "    # download all files\n",
-    "    task.download_all(f\"./results_{task.name}\")"
+    "    display(task.ls())                                    # list files\n",
+    "    task.upload(\"data/sample.txt\")                        # upload 
sample.txt\n",
+    "    display(task.ls())                                    # list files 
AFTER upload\n",
+    "    display(task.cat(\"sample.txt\"))                       # preview 
sample.txt\n",
+    "    task.download(\"sample.txt\", f\"./results_{task.name}\") # download 
sample.txt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plan.wait_for_completion()                                # wait for plan 
to complete\n",
+    "# for task in plan.tasks:\n",
+    "#   task.download_all(f\"./results_{task.name}\")             # download 
plan outputs"
    ]
   },
   {
@@ -380,17 +362,24 @@
     "    @task.context(packages=[\"numpy\", \"pandas\"])\n",
     "    def analyze() -> None:\n",
     "        import numpy as np\n",
-    "        with open(\"pull.conf\", \"r\") as f:\n",
+    "        with open(\"pull_cpu.conf\", \"r\") as f:\n",
     "            data = f.read()\n",
-    "        print(\"pull.conf has\", len(data), \"chars\")\n",
+    "        print(\"pull_cpu.conf has\", len(data), \"chars\")\n",
     "        print(np.arange(10))\n",
     "    analyze()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "airavata",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -404,9 +393,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.11.6"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git 
a/modules/agent-framework/deployments/jupyterhub/data/1_experiment_sdk.ipynb 
b/modules/agent-framework/deployments/jupyterhub/data/smd_gpu.ipynb
similarity index 78%
copy from 
modules/agent-framework/deployments/jupyterhub/data/1_experiment_sdk.ipynb
copy to modules/agent-framework/deployments/jupyterhub/data/smd_gpu.ipynb
index f89b022ecd..d9de2a5b38 100644
--- a/modules/agent-framework/deployments/jupyterhub/data/1_experiment_sdk.ipynb
+++ b/modules/agent-framework/deployments/jupyterhub/data/smd_gpu.ipynb
@@ -4,20 +4,16 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Cybershuttle SDK -  Molecular Dynamics\n",
-    "> Define, run, monitor, and analyze molecular dynamics experiments in a 
HPC-agnostic way.\n",
     "# Cybershuttle SDK -  Molecular Dynamics\n",
     "> Define, run, monitor, and analyze molecular dynamics experiments in a 
HPC-agnostic way.\n",
     "\n",
     "This notebook shows how users can setup and launch a **NAMD** experiment 
with replicas, monitor its execution, and run analyses both during and after 
execution."
-    "This notebook shows how users can setup and launch a **NAMD** experiment 
with replicas, monitor its execution, and run analyses both during and after 
execution."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Installing Required Packages\n",
     "## Installing Required Packages\n",
     "\n",
     "First, install the `airavata-python-sdk-test` package from the pip 
repository."
@@ -37,7 +33,6 @@
    "metadata": {},
    "source": [
     "## Importing the SDK"
-    "## Importing the SDK"
    ]
   },
   {
@@ -54,7 +49,6 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Authenticating\n",
     "## Authenticating\n",
     "\n",
     "To authenticate for remote execution, call the `ae.login()` method.\n",
@@ -64,10 +58,8 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "outputs": [],
    "source": [
     "ae.login()"
    ]
@@ -77,7 +69,6 @@
    "metadata": {},
    "source": [
     "Once authenticated, the `ae.list_runtimes()` function can be called to 
list HPC resources that the user has access to."
-    "Once authenticated, the `ae.list_runtimes()` function can be called to 
list HPC resources that the user has access to."
    ]
   },
   {
@@ -88,7 +79,6 @@
    "source": [
     "runtimes = ae.list_runtimes()\n",
     "ae.display(runtimes)"
-    "ae.display(runtimes)"
    ]
   },
   {
@@ -121,7 +111,6 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Defining a NAMD Experiment\n",
     "## Defining a NAMD Experiment\n",
     "\n",
     "The `md.NAMD.initialize()` is used to define a NAMD experiment.\n",
@@ -146,13 +135,6 @@
     "Any optional resource constraint can be provided here.\n",
     "\n",
     "You can also call `ae.display()` to pretty-print the experiment."
-    "```\n",
-    "\n",
-    "To add replica runs, simply call the `exp.add_replica()` function.\n",
-    "You can call the `add_replica()` function as many times as you want 
replicas.\n",
-    "Any optional resource constraint can be provided here.\n",
-    "\n",
-    "You can also call `ae.display()` to pretty-print the experiment."
    ]
   },
   {
@@ -163,7 +145,7 @@
    "source": [
     "exp = md.NAMD.initialize(\n",
     "    name=\"SMD\",\n",
-    "    config_file=\"data/pull_cpu.conf\",\n",
+    "    config_file=\"data/pull_gpu.conf\",\n",
     "    pdb_file=\"data/structure.pdb\",\n",
     "    psf_file=\"data/structure.psf\",\n",
     "    ffp_files=[\n",
@@ -176,10 +158,9 @@
     "      \"data/b4pull.restart.vel\",\n",
     "      \"data/b4pull.restart.xsc\",\n",
     "    ],\n",
-    "    parallelism=\"CPU\",\n",
-    "    num_replicas=1,\n",
+    "    parallelism=\"GPU\",\n",
     ")\n",
-    "exp.add_replica(*ae.list_runtimes(cluster=\"login.expanse.sdsc.edu\", 
category=\"cpu\"))\n",
+    "exp.add_replica(*ae.list_runtimes(cluster=\"login.expanse.sdsc.edu\", 
category=\"gpu\", walltime=180))\n",
     "ae.display(exp)"
    ]
   },
@@ -190,9 +171,6 @@
     "## Creating an Execution Plan\n",
     "\n",
     "Call the `exp.plan()` function to transform the experiment definition + 
replicas into a stateful execution plan."
-    "## Creating an Execution Plan\n",
-    "\n",
-    "Call the `exp.plan()` function to transform the experiment definition + 
replicas into a stateful execution plan."
    ]
   },
   {
@@ -203,19 +181,15 @@
    "source": [
     "plan = exp.plan()\n",
     "ae.display(plan)"
-    "plan = exp.plan()\n",
-    "ae.display(plan)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Saving the Plan\n",
     "## Saving the Plan\n",
     "\n",
     "A created plan can be saved locally (in JSON) or remotely (in a 
user-local DB) for later reference."
-    "A created plan can be saved locally (in JSON) or remotely (in a 
user-local DB) for later reference."
    ]
   },
   {
@@ -224,8 +198,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plan.save()  # this will save the plan in DB\n",
-    "plan.save_json(\"plan.json\")  # save the plan state locally"
+    "plan.save()                   # this will save the plan in DB\n",
+    "plan.save_json(\"plan_gpu.json\")   # save the plan state locally"
    ]
   },
   {
@@ -237,11 +211,6 @@
     "A created plan can be launched using the `plan.launch()` function.\n",
     "Changes to plan states will be automatically saved onto the remote.\n",
     "However, plan state can also be tracked locally by invoking 
`plan.save_json()`."
-    "## Launching the Plan\n",
-    "\n",
-    "A created plan can be launched using the `plan.launch()` function.\n",
-    "Changes to plan states will be automatically saved onto the remote.\n",
-    "However, plan state can also be tracked locally by invoking 
`plan.save_json()`."
    ]
   },
   {
@@ -251,8 +220,24 @@
    "outputs": [],
    "source": [
     "plan.launch()\n",
-    "plan.save_json(\"plan.json\")"
-    "plan.save_json(\"plan.json\")"
+    "plan.save_json(\"plan_gpu.json\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Checking the Plan Status\n",
+    "The status of a plan can be retrieved by calling `plan.status()`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plan.status()"
    ]
   },
   {
@@ -270,7 +255,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plan = ae.plan.load_json(\"plan.json\")\n",
+    "plan = ae.plan.load_json(\"plan_gpu.json\")\n",
     "plan = ae.plan.load(plan.id)\n",
     "plan.status()\n",
     "ae.display(plan)"
@@ -288,7 +273,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -313,14 +297,7 @@
    "outputs": [],
    "source": [
     "# plan.stop()\n",
-    "plan.wait_for_completion()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Running File Operations"
+    "# plan.wait_for_completion()"
    ]
   },
   {
@@ -340,22 +317,27 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "for task in plan.tasks:\n",
     "    print(task.name, task.pid)\n",
-    "    # display files\n",
-    "    display(task.ls())\n",
-    "    # upload a file\n",
-    "    task.upload(\"data/sample.txt\")\n",
-    "    # preview contents of a file\n",
-    "    display(task.cat(\"sample.txt\"))\n",
-    "    # download a specific file\n",
-    "    task.download(\"sample.txt\", f\"./results_{task.name}\")\n",
-    "    # download all files\n",
-    "    task.download_all(f\"./results_{task.name}\")"
+    "    display(task.ls())                                    # list files\n",
+    "    task.upload(\"data/sample.txt\")                        # upload 
sample.txt\n",
+    "    display(task.ls())                                    # list files 
AFTER upload\n",
+    "    display(task.cat(\"sample.txt\"))                       # preview 
sample.txt\n",
+    "    task.download(\"sample.txt\", f\"./results_{task.name}\") # download 
sample.txt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plan.wait_for_completion()                                # wait for plan 
to complete\n",
+    "# for task in plan.tasks:\n",
+    "#   task.download_all(f\"./results_{task.name}\")             # download 
plan outputs"
    ]
   },
   {
@@ -380,17 +362,24 @@
     "    @task.context(packages=[\"numpy\", \"pandas\"])\n",
     "    def analyze() -> None:\n",
     "        import numpy as np\n",
-    "        with open(\"pull.conf\", \"r\") as f:\n",
+    "        with open(\"pull_gpu.conf\", \"r\") as f:\n",
     "            data = f.read()\n",
-    "        print(\"pull.conf has\", len(data), \"chars\")\n",
+    "        print(\"pull_gpu.conf has\", len(data), \"chars\")\n",
     "        print(np.arange(10))\n",
     "    analyze()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "airavata",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -404,9 +393,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.11.6"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }

Reply via email to