build generic

lahirujayathilake Mon, 10 Mar 2025 21:03:05 -0700

This is an automated email from the ASF dual-hosted git repository.

lahirujayathilake pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airavata.git


commit 684711d1804a397487e44eca3446824473e08d04
Author: yasith <[email protected]>
AuthorDate: Tue Feb 25 15:59:00 2025 -0600

    add airavata magics to dev-tools/, make airavata-agent/ build generic
---
 .../user-container/MD/labconfig/airavata_magics.py | 220 +++--
 .../user-container/MD/labconfig/device_auth.py     |   1 +
 modules/agent-framework/airavata-agent/Dockerfile  |   5 +-
 .../airavata-agent/build-container.sh              |   7 +-
 .../airavata-agent/jupyter/Dockerfile              |   4 +-
 .../airavata-agent/jupyter/analysis.ipynb          | 888 ---------------------
 .../airavata-agent/jupyter/data.tar.gz             | Bin 10625 -> 0 bytes
 7 files changed, 159 insertions(+), 966 deletions(-)

diff --git 
a/dev-tools/deployment/jupyterhub/user-container/MD/labconfig/airavata_magics.py
 
b/dev-tools/deployment/jupyterhub/user-container/MD/labconfig/airavata_magics.py
index f3d91cc447..b6fd96db2a 100644
--- 
a/dev-tools/deployment/jupyterhub/user-container/MD/labconfig/airavata_magics.py
+++ 
b/dev-tools/deployment/jupyterhub/user-container/MD/labconfig/airavata_magics.py
@@ -1,62 +1,76 @@
-from IPython.core.magic import register_cell_magic
-from IPython.core.magic import register_line_magic
-
-from IPython.display import display, Image, HTML
 import base64
-import requests
+import binascii
 import json
-import jwt
 import time
 from pathlib import Path
+from typing import NamedTuple
+import jwt
 import os
+import requests
+from IPython.core.magic import register_cell_magic, register_line_magic
+from IPython.display import HTML, Image, display
 from device_auth import DeviceFlowAuthenticator
 
 
-current_agent_info = None
-
-EXPLICIT_TOKEN_FILE = (
-        Path(os.path.expanduser("~")) / "csagent" / "token" / "keys.json"
-)
-
-def read_explicit_token_file():
-    if not EXPLICIT_TOKEN_FILE.exists():
-        return None
-    else:
-        with open(EXPLICIT_TOKEN_FILE, "r") as f:
-            return json.load(f)
-
-def get_access_token():
-
+AgentInfo = NamedTuple('AgentInfo', [
+    ('agentId', str),
+    ('experimentId', str),
+    ('processId', str),
+    ('cluster', str),
+    ('queue', str),
+    ('cpus', int),
+    ('memory', int),
+    ('walltime', int),
+    ('gateway_id', str),
+    ('group', str),
+])
+api_base_url = "https://api.gateway.cybershuttle.org";
+file_server_url = "http://3.142.234.94:8050";
+current_agent : AgentInfo | None = None
+MSG_NOT_INITIALIZED = r"Remote agent not initialized. Please run %init_remote 
cluster=<cluster> cpu=<cpu> memory=<memory mb> queue=<queue> walltime=<walltime 
minutes> group=<group>"
+
+
+def get_access_token() -> str | None:
     token_from_env = os.getenv('CS_ACCESS_TOKEN')
     if token_from_env:
         return token_from_env
+    EXPLICIT_TOKEN_FILE = Path("~").expanduser() / "csagent" / "token" / 
"keys.json"
+    if EXPLICIT_TOKEN_FILE.exists():
+        with open(EXPLICIT_TOKEN_FILE, "r") as f:
+            return json.load(f).get("access_token")
 
-    expl_token_data = read_explicit_token_file()
-    if expl_token_data:
-        return expl_token_data["access_token"]
-
-def get_agent_status():
-    global current_agent_info
-
-    if not current_agent_info:
-        print("No agent was scheduled yet. Please run %init_remote 
cluster=<cluster> cpu=<cpu> memory=<memory mb> queue=<queue> walltime=<walltime 
minutes>")
-        return
 
-    url = 'https://api.gateway.cybershuttle.org/api/v1/agent/' + 
current_agent_info['agentId']
+def get_agent_status() -> dict | None:
+    if not current_agent:
+        return print(MSG_NOT_INITIALIZED)
+    url = f"{api_base_url}/api/v1/agent/{current_agent.agentId}"
     response = requests.get(url)
     if response.status_code == 202:
-        print("Got response for agent")
-        print(response.json())
         return response.json()
-    else:
-        print('Invalid response reveived. Status code:', response.status_code)
-        print('Response:', response.text)
+    return print(f"Got [{response.status_code}] Response: {response.text}")
+
 
-def submit_agent_job(experiment_name, cluster, queue, cpus, memory, wallTime, 
access_token, gateway_id='default'):
+def get_process_id(experiment_id: str, headers) -> str:
+    """
+    Get process id by experiment id
+
+    """
+    url = f"{api_base_url}/api/v1/exp/{experiment_id}/process"
+    process_id = ""
+    while not process_id:
+        response = requests.get(url, headers=headers)
+        if response.status_code == 200:
+            process_id = response.json().get("processId")
+        else:
+            time.sleep(5)
+    return process_id
+
+
+def submit_agent_job(experiment_name, cluster, queue, cpus, memory, walltime, 
access_token, group, gateway_id='default'):
+    global current_agent
 
-    global current_agent_info
     # URL to which the POST request will be sent
-    url = 'https://api.gateway.cybershuttle.org/api/v1/exp/launch'
+    url = api_base_url + '/api/v1/exp/launch'
 
     # Data to be sent in the POST request
     data = {
@@ -65,8 +79,9 @@ def submit_agent_job(experiment_name, cluster, queue, cpus, 
memory, wallTime, ac
         'cpuCount': cpus,
         'nodeCount': 1,
         'memory': memory,
-        'wallTime': wallTime,
-        'queue': queue
+        'wallTime': walltime,
+        'queue': queue,
+        'group': group,
     }
 
     # Convert the data to JSON format
@@ -92,19 +107,32 @@ def submit_agent_job(experiment_name, cluster, queue, 
cpus, memory, wallTime, ac
     # Check if the request was successful
     if response.status_code == 200:
         # Parse the JSON response
-        response_data = response.json()
-        print('Response:', response_data)
-        current_agent_info = response_data
+        obj = response.json()
+        current_agent = AgentInfo(
+            agentId=obj['agentId'],
+            experimentId=obj['experimentId'],
+            processId=get_process_id(obj['experimentId'], headers=headers),
+            cluster=cluster,
+            queue=queue,
+            cpus=cpus,
+            memory=memory,
+            walltime=walltime,
+            gateway_id=gateway_id,
+            group=group,
+        )
+        print('Agent Initialized:', current_agent)
     else:
         print('Failed to send POST request. Status code:', 
response.status_code)
         print('Response:', response.text)
 
-def terminate_agent(access_token, gateway_id='default'):
 
-    global current_agent_info
+def terminate_agent(access_token, gateway_id='default'):
+    global current_agent
+    if not current_agent:
+        return print(MSG_NOT_INITIALIZED)
 
-    expId = current_agent_info['experimentId']
-    url = 'https://api.gateway.cybershuttle.org/api/v1/exp/terminate/' + expId
+    expId = current_agent.experimentId
+    url = api_base_url + '/api/v1/exp/terminate/' + expId
 
     decode = jwt.decode(access_token, options={"verify_signature": False})
     user_id = decode['preferred_username']
@@ -127,7 +155,7 @@ def terminate_agent(access_token, gateway_id='default'):
         # Parse the JSON response
         response_data = response.json()
         print('Agent terminated:', response_data)
-        current_agent_info = None
+        current_agent = None
     else:
         print('Failed to send termination request. Status code:', 
response.status_code)
         print('Response:', response.text)
@@ -135,19 +163,17 @@ def terminate_agent(access_token, gateway_id='default'):
 
 @register_cell_magic
 def run_remote(line, cell):
+    global current_agent
+    if not current_agent:
+        return print(MSG_NOT_INITIALIZED)
 
-    global current_agent_info
-    if not current_agent_info:
-        print("No agent was scheduled yet. Please run %init_remote 
cluster=<cluster> cpu=<cpu> memory=<memory mb> queue=<queue> walltime=<walltime 
minutes>")
-        return
-
-    url = 
'https://api.gateway.cybershuttle.org/api/v1/agent/executejupyterrequest'
+    url = api_base_url + '/api/v1/agent/executejupyterrequest'
 
     data = {
         "sessionId": "session1",
         "keepAlive": True,
         "code": cell,
-        "agentId": current_agent_info["agentId"]
+        "agentId": current_agent.agentId
     }
 
     json_data = json.dumps(data)
@@ -159,7 +185,7 @@ def run_remote(line, cell):
         print("Cell execution failed. Error: " + error)
     if execution_id:
         while True:
-            url = 
"https://api.gateway.cybershuttle.org/api/v1/agent/executejupyterresponse/"; + 
execution_id
+            url = api_base_url + "/api/v1/agent/executejupyterresponse/" + 
execution_id
             response = requests.get(url, headers={'Accept': 
'application/json'})
             json_response = response.json()
             if json_response.get('available'):
@@ -180,7 +206,7 @@ def run_remote(line, cell):
                                 try:
                                     image_bytes = base64.b64decode(image_data)
                                     display(Image(data=image_bytes, 
format='png'))
-                                except base64.binascii.Error as e:
+                                except binascii.Error as e:
                                     print(f"Failed to decode image data: {e}")
                             # Ignoring any texts in the display data
                             # if 'text/plain' in data_obj:
@@ -195,7 +221,7 @@ def run_remote(line, cell):
                                     color: #a71d5d;
                                     background-color: #fdd;
                                     border: 1px solid #a71d5d;
-                                    padding: 10px;
+                                    padding: 5px;
                                     border-radius: 5px;
                                     font-family: Consolas, 'Courier New', 
monospace;
                                     white-space: pre-wrap;
@@ -217,7 +243,7 @@ def run_remote(line, cell):
                                 color: #a71d5d;
                                 background-color: #fdd;
                                 border: 1px solid #a71d5d;
-                                padding: 10px;
+                                padding: 5px;
                                 border-radius: 5px;
                                 font-family: Consolas, 'Courier New', 
monospace;
                             ">
@@ -246,11 +272,12 @@ def run_remote(line, cell):
                             try:
                                 image_bytes = base64.b64decode(image_data)
                                 display(Image(data=image_bytes, format='png'))
-                            except base64.binascii.Error as e:
+                            except binascii.Error as e:
                                 print(f"Failed to decode image data: {e}")
                 break
             time.sleep(1)
 
+
 @register_line_magic
 def cs_login(line):
     try:
@@ -259,10 +286,10 @@ def cs_login(line):
     except ValueError as e:
         print(f"Configuration error: {e}")
 
+
 @register_line_magic
 def init_remote(line):
-
-    if current_agent_info:
+    if current_agent:
         status = get_agent_status()
         if status:
             if status['agentUp']:
@@ -281,6 +308,7 @@ def init_remote(line):
     cpu_value = None
     queue_value = None
     walltime_value = None
+    group_value = ""
 
     # Iterate through the pairs to find the cluster value
     for pair in pairs:
@@ -294,8 +322,11 @@ def init_remote(line):
             queue_value = pair.split("=")[1]
         if pair.startswith("walltime="):
             walltime_value = pair.split("=")[1]
+        if pair.startswith("group="):
+            group_value = pair.split("=")[1]
+
+    submit_agent_job('CS_Agent', cluster_value, queue_value, cpu_value, 
memory_value, walltime_value, access_token, group_value)
 
-    submit_agent_job('CS_Agent', cluster_value, queue_value, cpu_value, 
memory_value, walltime_value, access_token)
 
 @register_line_magic
 def status_remote(line):
@@ -304,19 +335,70 @@ def status_remote(line):
         if status['agentUp']:
             print("Agent", status['agentId'], 'is running')
         else:
-            print("Agent", status['agentId'], 'is still prepairing. Please 
wait')
+            print("Agent", status['agentId'], 'is still preparing. Please 
wait')
+
 
 @register_line_magic
 def terminate_remote(line):
-    global current_agent_info
+    global current_agent
     access_token = get_access_token()
-    if current_agent_info:
+    if current_agent:
         terminate_agent(access_token)
 
 
+@register_line_magic
+def push_remote(line):
+    if not current_agent:
+        return print(MSG_NOT_INITIALIZED)
+    pairs = line.split()
+    remot_path = None
+    local_path = None
+    for pair in pairs:
+        if pair.startswith("source="):
+            local_path = pair.split("=")[1]
+        if pair.startswith("target="):
+            remot_path = pair.split("=")[1]
+    # validate paths
+    if not remot_path or not local_path:
+        return print("Please provide paths for both source and target")
+    # upload file
+    print(f"Pushing local:{local_path} to remote:{remot_path}")
+    url = 
f"{file_server_url}/upload/live/{current_agent.processId}/{remot_path}"
+    with open(local_path, "rb") as file:
+        files = {"file": file}
+        response = requests.post(url, files=files)
+    print(f"[{response.status_code}] Uploaded local:{local_path} to 
remote:{remot_path}")
+
+
+@register_line_magic
+def pull_remote(line):
+    if not current_agent:
+        return print(MSG_NOT_INITIALIZED)
+    pairs = line.split()
+    remot_path = None
+    local_path = None
+    for pair in pairs:
+        if pair.startswith("source="):
+            remot_path = pair.split("=")[1]
+        if pair.startswith("target="):
+            local_path = pair.split("=")[1]
+    # validate paths
+    if not remot_path or not local_path:
+        return print("Please provide paths for both source and target")
+    # download file
+    print(f"Pulling remote:{remot_path} to local:{local_path}")
+    url = 
f"{file_server_url}/download/live/{current_agent.processId}/{remot_path}"
+    response = requests.get(url)
+    with open(local_path, "wb") as file:
+        file.write(response.content)
+    print(f"[{response.status_code}] Downloaded remote:{remot_path} to 
local:{local_path}")
+
+
 def load_ipython_extension(ipython):
+    ipython.register_magic_function(cs_login)
     ipython.register_magic_function(init_remote)
     ipython.register_magic_function(status_remote)
     ipython.register_magic_function(terminate_remote)
     ipython.register_magic_function(run_remote)
-    ipython.register_magic_function(cs_login)
+    ipython.register_magic_function(push_remote)
+    ipython.register_magic_function(pull_remote)
diff --git 
a/dev-tools/deployment/jupyterhub/user-container/MD/labconfig/device_auth.py 
b/dev-tools/deployment/jupyterhub/user-container/MD/labconfig/device_auth.py
index 269085ca69..58f325b387 100644
--- a/dev-tools/deployment/jupyterhub/user-container/MD/labconfig/device_auth.py
+++ b/dev-tools/deployment/jupyterhub/user-container/MD/labconfig/device_auth.py
@@ -35,6 +35,7 @@ class DeviceFlowAuthenticator:
         self.poll_for_token()
 
     def poll_for_token(self):
+        assert self.interval is not None
         token_url = 
f"{self.auth_server_url}/realms/{self.realm}/protocol/openid-connect/token"
         while True:
             response = requests.post(token_url, data={
diff --git a/modules/agent-framework/airavata-agent/Dockerfile 
b/modules/agent-framework/airavata-agent/Dockerfile
index f90643cbc7..fae0ca4db9 100644
--- a/modules/agent-framework/airavata-agent/Dockerfile
+++ b/modules/agent-framework/airavata-agent/Dockerfile
@@ -1,8 +1,7 @@
-FROM python:slim
+FROM python:3.12-slim
 
 RUN pip install flask jupyter jupyter-client
 RUN mkdir -p /opt/jupyter
 RUN python -m venv /opt/jupyter/venv
-ADD airavata-agent-linux /opt/airavata-agent
+ADD airavata-agent /opt/airavata-agent
 ADD jupyter/kernel.py /opt/jupyter/kernel.py
-
diff --git a/modules/agent-framework/airavata-agent/build-container.sh 
b/modules/agent-framework/airavata-agent/build-container.sh
index a7c5dc772b..ed3c10e02f 100755
--- a/modules/agent-framework/airavata-agent/build-container.sh
+++ b/modules/agent-framework/airavata-agent/build-container.sh
@@ -1,4 +1,3 @@
-env GOOS=linux GOARCH=amd64 go build
-cp airavata-agent airavata-agent-linux
-docker build --platform linux/x86_64 -t cybershuttle/airavata-agent .
-docker push cybershuttle/airavata-agent
\ No newline at end of file
+env GOOS=linux GOARCH=amd64 go build -o airavata-agent
+docker build --platform linux/x86_64 -t cybershuttle/remote-agent-base .
+docker push cybershuttle/remote-agent-base
\ No newline at end of file
diff --git a/modules/agent-framework/airavata-agent/jupyter/Dockerfile 
b/modules/agent-framework/airavata-agent/jupyter/Dockerfile
index 5ca284a63d..1f0f6d1de0 100644
--- a/modules/agent-framework/airavata-agent/jupyter/Dockerfile
+++ b/modules/agent-framework/airavata-agent/jupyter/Dockerfile
@@ -20,8 +20,8 @@ COPY labconfig/device_auth.py /device_auth.py
 COPY labconfig/bootstrap.sh /bootstrap.sh
 RUN chmod +x /bootstrap.sh
 
-COPY data.tar.gz /home/jovyan/data.tar.gz
-COPY analysis.ipynb /home/jovyan/analysis.ipynb
+COPY deployments/cyberfaces/data.tar.gz /home/jovyan/data.tar.gz
+COPY deployments/cyberfaces/analysis.ipynb /home/jovyan/analysis.ipynb
 
 USER ${NB_UID}
 
diff --git a/modules/agent-framework/airavata-agent/jupyter/analysis.ipynb 
b/modules/agent-framework/airavata-agent/jupyter/analysis.ipynb
deleted file mode 100644
index 5c50689d8e..0000000000
--- a/modules/agent-framework/airavata-agent/jupyter/analysis.ipynb
+++ /dev/null
@@ -1,888 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "769a199b",
-   "metadata": {},
-   "source": [
-    "# Local spatial correlation between the social vulnerability of 
population and inundation risk of dam failure\n",
-    "\n",
-    "This notebook demonstrates how the social vulnerability of the population 
(i.e., socially disadvantaged or non-disadvantaged) is spatially correlated 
with the inundation risk of a dam failure, focusing on a single dam. The 
analysis consists of the following two steps.\n",
-    "1. it identifies census blocks inundated from flooding caused by three 
dam failure scenarios with different water levels: normal high (NH), top of 
active storage (TAS), and maximum high (MH), retrived from the National 
Inventory of Dams (NID) of US Army Corps of Engineers. \n",
-    "2. it employs two spatial correlation metrics (i.e., Bivariate Moran’s I 
and Bivariate Local Indicator of Spatial Association (LISA)) to investigate the 
relationship between the inundation risk and social vulnerability of the 
population per impacted locations from a single dam failure. In this context, 
we employed 16 census variables related to the social vulnerability index.\n",
-    "\n",
-    "| Acronym | Note                            |\n",
-    "|---------|---------------------------------|\n",
-    "| POV150  | Below 150% Poverty              |\n",
-    "| UNEMP   | Unemployment Rate               |\n",
-    "| HBURD   | Housing Cost Burden             |\n",
-    "| NOHSDP  | No High School Diploma          |\n",
-    "| NOINSUR | No Health Insurance             |\n",
-    "| AGE65   | Aged 65 & Older                 |\n",
-    "| AGE17   | Aged 17 & Younger               |\n",
-    "| DISABL  | Civilian with a Disability      |\n",
-    "| SNGPNT  | Single-Parent Households        |\n",
-    "| LIMENG  | English Language Proficiency    |\n",
-    "| MINRTY  | Racial & Ethnic Minority Status |\n",
-    "| MUNIT   | Multi-Unit Structures           |\n",
-    "| MOBILE  | Mobile Homes                    |\n",
-    "| CROWD   | Crowding                        |\n",
-    "| NOVEH   | No Vehicle Household            |\n",
-    "| GROUPQ  | Group Quarters                  |\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9ada5513",
-   "metadata": {},
-   "source": [
-    "## Setting Up Remote Execution (Cybershuttle)\n",
-    "\n",
-    "Cybershuttle provides 6 jupyter magics for seamless remote execution and 
data movement.\n",
-    "\n",
-    "| Command         | Note                                                  
   |\n",
-    
"|-----------------|----------------------------------------------------------|\n",
-    "| `%cs_login`     | Log into cybershuttle to gain access                  
   |\n",
-    "| `%init_remote`  | Initialize a specific cluster to run remote commands  
   |\n",
-    "| `%status_remote`| Check if the cluster is ready to accept remote 
commands  |\n",
-    "| `%%run_remote`  | Run commands remotely on cluster                      
   |\n",
-    "| `%push_remote`  | Push a file into cluster                              
   |\n",
-    "| `%pull_remote`  | Fetch a file from the cluster                         
   |\n",
-    "\n",
-    "\n",
-    "Workflow: `%cs_login -> %init_remote -> %status_remote -> {%%run_remote, 
%push_remote, %pull_remote }`"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d9c4a169",
-   "metadata": {},
-   "source": [
-    "#### Load remote cell execution plugin"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "41d9e046-cd40-4ae4-9076-e7fe0ec8b378",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import sys\n",
-    "sys.path.append('/')\n",
-    "\n",
-    "import airavata_magics\n",
-    "airavata_magics.load_ipython_extension(get_ipython())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d882c035",
-   "metadata": {},
-   "source": [
-    "#### Authenticate to Cybershuttle"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9ed61023",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%cs_login"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1fc1082f",
-   "metadata": {},
-   "source": [
-    "#### Start a Cybershuttle Remote Executor\n",
-    "This will submit a remote job to Anvil\n",
-    "\n",
-    "- Cluster - Anvil\n",
-    "- Community Allocation - CyberFaCES\n",
-    "- Requested Resources - 4 CPUs, 4GB Memory, 60 Minutes, Shared Queue"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f116ac05",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%init_remote cluster=Anvil cpu=4 memory=4096 queue=shared walltime=60 
group=CyberFaCES"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1cddc213",
-   "metadata": {},
-   "source": [
-    "#### Wait for Remote Executor to Start"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1c63906b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%status_remote"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7664b5cf",
-   "metadata": {},
-   "source": [
-    "#### Running Remote Commands"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "46223ddd-6ad6-45ca-b5a0-ce4b80575091",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%run_remote\n",
-    "!ls"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "213e87d6",
-   "metadata": {},
-   "source": [
-    "#### Upload Data Files"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "de9ea9a1-7337-4763-a47d-df4b5059d915",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%push_remote source=data.tar.gz target=data.tar.gz"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7d40a170",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%run_remote\n",
-    "!ls"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4fe8fe7a",
-   "metadata": {},
-   "source": [
-    "#### Download Data Files"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f53b9d4f-8de4-4dcd-aebb-c60be9ef31dc",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%pull_remote source=CyberFaCESAgent.stdout target=CyberFaCESAgent.stdout"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b60d0730",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!cat CyberFaCESAgent.stdout"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8f897049",
-   "metadata": {},
-   "source": [
-    "#### To Stop the Agent run"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c404db19",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# %terminate_remote"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b0bc48d6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%run_remote\n",
-    "\n",
-    "# Import necessary packages\n",
-    "import os\n",
-    "import pandas as pd\n",
-    "import geopandas as gpd\n",
-    "import subprocess\n",
-    "import json\n",
-    "import pygeos\n",
-    "import requests\n",
-    "import matplotlib.pyplot as plt\n",
-    "from matplotlib.colors import LinearSegmentedColormap\n",
-    "\n",
-    "\n",
-    "try:\n",
-    "    import libpysal\n",
-    "except:\n",
-    "    !pip install -U libpysal\n",
-    "    import libpysal\n",
-    "    \n",
-    "try:\n",
-    "    import esda\n",
-    "except:\n",
-    "    !pip install -U esda\n",
-    "    import esda\n",
-    "try:\n",
-    "    import contextily as cx\n",
-    "except:\n",
-    "    !pip3 install contextily\n",
-    "    import contextily as cx\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "956a91db",
-   "metadata": {},
-   "source": [
-    "## 0. Set up environment varialbes"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "69cbdaa0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%run_remote\n",
-    "\n",
-    "# Working directory\n",
-    "data_dir = '/data/sample_data'\n",
-    "output_dir = os.getcwd()\n",
-    "\n",
-    "# Find the list of dams\n",
-    "fed_dams = pd.read_csv('/data/dam_list.csv')\n",
-    "fed_dams = gpd.GeoDataFrame(fed_dams, 
geometry=gpd.points_from_xy(fed_dams['LON'], fed_dams['LAT'], 
crs=\"EPSG:4326\"))\n",
-    "print(f'Total count of dams available for local analysis: 
{fed_dams.shape[0]}')\n",
-    "\n",
-    "# Dam of Interest\n",
-    "dam_id = 'CA10022' # example dam for demonstrating local analysis\n",
-    "API_Key = 'ENTER CENSUS API KEY HERE' # The API key can be obtained 
through https://api.census.gov/data/key_signup.html\n";,
-    "\n",
-    "print(f'Dam of Interest: {dam_id}')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0229db15",
-   "metadata": {},
-   "source": [
-    "## 1. Find census blocks impacted by three dam failure scenarios\n",
-    "\n",
-    "Spatial intersections are conducted to find the submerged census blocks 
from three dam failure scenarios (i.e., breach of dam structure with the water 
level of normal high (NH), top of active storage (TAS), and maximum high (MH)). 
<br> Per each scenario, <br>\n",
-    "1. we vectorized the original raster into a polygon and spatially queried 
census blocks using Boolean logic. In other words, a census block has a value 
of one if it intersects with the inundation polygon; otherwise, it has zero. 
\n",
-    "2. we overlaid three polygons from three dam breach scenarios (i.e., MH, 
TAS, and NH) to estimate the potential risk of flooding. <br>\n",
-    "3. we found census blocks that intersected with the polygonized geometry 
and then kept the highest inundation risk value (i.e., 1~3) per census block. 
\n",
-    "\n",
-    "As a result, the inundation risk of a single dam failure provides 
polygons with values ranging from one through three.  A census block with a 
value of three means the location will be submerged for every scenario (NH, 
TAS, and MH), whereas a polygon having a value of one means that the area would 
only get impacted for the worst-case scenario (MH breach)."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a30e85e6",
-   "metadata": {},
-   "source": [
-    "### 1.1. Vectorize inundations of three scenarios and overlaid them"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "fafd5982",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%run_remote\n",
-    "\n",
-    "def resample_raster(rasterfile_path, filename, target_path, 
rescale_factor):\n",
-    "    # first determine pixel size for resampling\n",
-    "    xres = 0\n",
-    "    yres = 0\n",
-    "    \n",
-    "    out = 
subprocess.run([\"gdalinfo\",\"-json\",rasterfile_path],stdout=subprocess.PIPE)\n",
-    "    raster_meta = json.loads(out.stdout.decode('utf-8'))\n",
-    "    if 'geoTransform' in raster_meta:\n",
-    "        xres = raster_meta['geoTransform'][1]\n",
-    "        yres = raster_meta['geoTransform'][5]\n",
-    "        xres = xres * rescale_factor\n",
-    "        yres = yres * rescale_factor\n",
-    "\n",
-    "    if (xres != 0) and (yres != 0):\n",
-    "        # resample raster\n",
-    "        save_path = target_path +\"/\"+ filename + f\"_resample.tiff\"\n",
-    "        
subprocess.run([\"gdalwarp\",\"-r\",\"bilinear\",\"-of\",\"GTiff\",\"-tr\",str(xres),str(yres),rasterfile_path,save_path])\n",
-    "\n",
-    "        return save_path, raster_meta\n",
-    "\n",
-    "def polygonize_fim(rasterfile_path, target_path):\n",
-    "\n",
-    "    # Extract target path and filename from the given raster file path\n",
-    "    # target_path = '/'.join(rasterfile_path.split('/')[:-1])\n",
-    "    filename = rasterfile_path.split(\"/\")[-1].split(\".\")[-2]\n",
-    "\n",
-    "    # Define paths\n",
-    "    resample_path = target_path +\"/\"+ filename + f\"_resample.tiff\"\n",
-    "    reclass_file = target_path + \"/\" + filename + \"_reclass.tiff\"\n",
-    "    geojson_out = \"%s/%s.json\" % (target_path, filename)\n",
-    "\n",
-    "    for temp_path_ in [resample_path, reclass_file, geojson_out]:\n",
-    "        if os.path.exists(temp_path_):\n",
-    "            os.remove(temp_path_)\n",
-    "\n",
-    "    # Resample raster file to 10-times smaller\n",
-    "    resample_path, raster_meta = resample_raster(rasterfile_path, 
filename, target_path, rescale_factor=4)\n",
-    "\n",
-    "    # Reclassify raster\n",
-    "    outfile = \"--outfile=\"+reclass_file\n",
-    "    print(outfile)\n",
-    "    no_data_val = raster_meta['bands'][0]['noDataValue']\n",
-    "    # 
subprocess.run([\"gdal_calc.py\",\"-A\",resample_path,outfile,f\"--calc=1*(A>0)\",f\"--NoDataValue={no_data_val}\"],stdout=subprocess.PIPE)\n",
-    "    
subprocess.run([\"gdal_calc.py\",\"-A\",resample_path,outfile,f\"--calc=-9999*(A<=0)+1*(A>0)\",f\"--NoDataValue={no_data_val}\"],stdout=subprocess.PIPE)\n",
-    "        \n",
-    "    # Polygonize the reclassified raster\n",
-    "    subprocess.run([\"gdal_polygonize.py\", reclass_file, \"-b\", \"1\", 
geojson_out, filename, \"value\"])\n",
-    "\n",
-    "    inund_polygons = gpd.read_file(geojson_out)\n",
-    "\n",
-    "    # If GeoDataFrame has records\n",
-    "    if inund_polygons.shape[0] != 0:\n",
-    "        inund_polygons = inund_polygons.loc[(inund_polygons['value'] != 
-9999) & (inund_polygons['value'] != 0)]  # Remove pixels of null value\n",
-    "\n",
-    "        # drop invalid geometries\n",
-    "        inund_polygons = 
inund_polygons.loc[inund_polygons['geometry'].is_valid, :]\n",
-    "\n",
-    "        # Coverage for each class of inundation map\n",
-    "        inund_per_cls = inund_polygons.dissolve(by='value')\n",
-    "        inund_per_cls.reset_index(inplace=True)\n",
-    "\n",
-    "        # remove all temp files\n",
-    "        os.remove(resample_path)\n",
-    "        os.remove(reclass_file)\n",
-    "        os.remove(geojson_out)\n",
-    "\n",
-    "        # inundation_per_cls: GeoDataFrame \n",
-    "        return inund_per_cls\n",
-    "\n",
-    "    else:\n",
-    "        return gpd.GeoDataFrame(data={'value': 1}, index=[0], 
geometry=[None])\n",
-    "    \n",
-    "    \n",
-    "def fim_multiple_scenarios(dam_id, input_dir, output_dir):\n",
-    "    \n",
-    "    sce_mh = {'loadCondition': 'MH', 'breachCondition': 'F'}  # Maximun 
Height scenario\n",
-    "    sce_tas = {'loadCondition': 'TAS', 'breachCondition': 'F'}  # Top of 
Active Storage scenario\n",
-    "    sce_nh = {'loadCondition': 'NH', 'breachCondition': 'F'}  # Normal 
Height scenario\n",
-    "\n",
-    "    # Maximun Height scenario (weight: 1)\n",
-    "    fim_path_mh = 
f\"{input_dir}/NID_FIM_{sce_mh['loadCondition']}_{sce_mh['breachCondition']}/{sce_mh['loadCondition']}_{sce_mh['breachCondition']}_{dam_id}.tiff\"\n",
-    "    fim_gdf_mh = polygonize_fim(fim_path_mh, output_dir)\n",
-    "    fim_gdf_mh['value_mh'] = fim_gdf_mh['value'] * 1\n",
-    "    fim_gdf_mh.drop(columns=['value'], inplace=True)\n",
-    "\n",
-    "    # Top of Active Storage scenario (weight: 1)\n",
-    "    fim_path_tas = 
f\"{input_dir}/NID_FIM_{sce_tas['loadCondition']}_{sce_tas['breachCondition']}/{sce_tas['loadCondition']}_{sce_tas['breachCondition']}_{dam_id}.tiff\"\n",
-    "    fim_gdf_tas = polygonize_fim(fim_path_tas, output_dir)\n",
-    "    fim_gdf_tas['value_tas'] = fim_gdf_tas['value'] * 1\n",
-    "    fim_gdf_tas.drop(columns=['value'], inplace=True)\n",
-    "\n",
-    "    # Normal Height scenario (weight: 1)\n",
-    "    fim_path_nh = 
f\"{input_dir}/NID_FIM_{sce_nh['loadCondition']}_{sce_nh['breachCondition']}/{sce_nh['loadCondition']}_{sce_nh['breachCondition']}_{dam_id}.tiff\"\n",
-    "    fim_gdf_nh = polygonize_fim(fim_path_nh, output_dir)\n",
-    "    fim_gdf_nh['value_nh'] = fim_gdf_nh['value'] * 1\n",
-    "    fim_gdf_nh.drop(columns=['value'], inplace=True)\n",
-    "\n",
-    "    # Find intersections of inundated area across multiple scenarios\n",
-    "    temp_fim_gdf = gpd.overlay(fim_gdf_nh, fim_gdf_tas, how='union')\n",
-    "    fim_gdf = gpd.overlay(temp_fim_gdf, fim_gdf_mh, how='union')\n",
-    "    fim_gdf.fillna(0, inplace=True)\n",
-    "\n",
-    "    # Sum values (1: MH only, 2: MH + TAS , 3: MH + TAS + NH)\n",
-    "    fim_gdf['value'] = fim_gdf.apply(lambda x:x['value_mh'] + 
x['value_tas'] + x['value_nh'], axis=1)\n",
-    "    fim_gdf.drop(columns=['value_mh', 'value_tas', 'value_nh'], 
inplace=True)\n",
-    "    fim_gdf['Dam_ID'] = dam_id\n",
-    "        \n",
-    "    return fim_gdf\n",
-    "\n",
-    "\n",
-    "\n",
-    "fim_gdf = fim_multiple_scenarios(dam_id, data_dir, output_dir)\n",
-    "\n",
-    "# Plot polygonized inundation risk of a dam failure\n",
-    "color_brewer = ['#9ecae1','#4292c6','#084594']\n",
-    "cm = LinearSegmentedColormap.from_list('cb_', color_brewer, N=3)\n",
-    "\n",
-    "fig, ax = plt.subplots(figsize=(8,8))\n",
-    "fim_gdf.plot('value', ax=ax, cmap=cm, alpha=0.8)\n",
-    "cx.add_basemap(ax=ax, source=cx.providers.Stamen.TonerLite, 
crs=fim_gdf.crs, attribution_size=0, zoom=10) # Add basemap\n",
-    "\n",
-    "# Hide coordinates in the plot axes\n",
-    "ax.get_xaxis().set_visible(False)\n",
-    "ax.get_yaxis().set_visible(False)\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "34703c75",
-   "metadata": {},
-   "source": [
-    "### 1.2. Calculate the highest inundation risk value per census block"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "598a8159",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%run_remote\n",
-    "\n",
-    "# This function returns the id of states that would be impacted by the 
failure of the dam of interest\n",
-    "def state_num_related_to_fim(fim_gdf, tract_gdf):\n",
-    "    \n",
-    "    tract_geoms = pygeos.from_shapely(tract_gdf['geometry'].values)\n",
-    "    tract_geoms_tree = pygeos.STRtree(tract_geoms, leafsize=50)\n",
-    "\n",
-    "    fim_geom_union = pygeos.from_shapely(fim_gdf['geometry'].unary_union) 
   \n",
-    "    query_intersect = tract_geoms_tree.query(fim_geom_union, 
predicate='intersects')\n",
-    "    tract_gdf = tract_gdf.loc[query_intersect]\n",
-    "\n",
-    "    tract_gdf['STATE'] = tract_gdf.apply(lambda x:x['GEOID'][0:2], 
axis=1)\n",
-    "    unique_state = tract_gdf['STATE'].unique()\n",
-    "    \n",
-    "    # return type: list\n",
-    "    return unique_state\n",
-    "\n",
-    "# Census tract geometry across the conterminous United States\n",
-    "tract_gdf = gpd.read_file(os.path.join(data_dir, 'census_geometry', 
'census_tract_from_api.geojson'))\n",
-    "fim_state = state_num_related_to_fim(fim_gdf, tract_gdf)\n",
-    "\n",
-    "# Load census block geometry of the selected states\n",
-    "if len(fim_state) == 1: # If only one state is associated with the 
inundation mapping\n",
-    "    block_gdf = gpd.read_file(os.path.join(data_dir, 'census_geometry', 
f'tl_2020_{fim_state[0]}_tabblock20.geojson'))\n",
-    "elif len(fim_state) >= 2: # If multiple states are associated with the 
inundation mapping\n",
-    "    block_gdf = pd.DataFrame()\n",
-    "    for state_num in fim_state:\n",
-    "        temp_gdf = gpd.read_file(os.path.join(data_dir, 
'census_geometry', f'tl_2020_{state_num}_tabblock20.geojson'))\n",
-    "        block_gdf = pd.concat([temp_gdf, 
block_gdf]).reset_index(drop=True)\n",
-    "    block_gdf = gpd.GeoDataFrame(block_gdf, 
geometry=block_gdf['geometry'], crs=\"EPSG:4326\")\n",
-    "else:\n",
-    "    raise AttributeError('NO STATE is related to Inundation Mapping')\n",
-    "\n",
-    "# Destination dataframe to save the results\n",
-    "fim_geoid_gdf = gpd.GeoDataFrame({'Dam_ID': pd.Series(dtype='str'),\n",
-    "                                  'GEOID': pd.Series(dtype='str'),\n",
-    "                                  'Class': pd.Series(dtype='str')\n",
-    "                                 }\n",
-    "                            )    \n",
-    "\n",
-    "# Create STRtree for block_gdf\n",
-    "block_geoms = pygeos.from_shapely(block_gdf['geometry'].values)\n",
-    "block_geoms_tree = pygeos.STRtree(block_geoms, leafsize=50)\n",
-    "\n",
-    "# Extract census tract intersecting with each class of inundation map\n",
-    "for water_cls in fim_gdf['value'].unique():\n",
-    "    fim_geom_ = pygeos.from_shapely(fim_gdf.loc[fim_gdf['value'] == 
water_cls, 'geometry'].unary_union)\n",
-    "    query_fim_geom_ = block_geoms_tree.query(fim_geom_, 
predicate='intersects')\n",
-    "    fim_geoid_ = block_gdf.loc[query_fim_geom_]\n",
-    "    fim_geoid_['Class'] = water_cls\n",
-    "    fim_geoid_['Dam_ID'] = dam_id\n",
-    "    \n",
-    "    fim_geoid_gdf = pd.concat([fim_geoid_, 
fim_geoid_gdf]).reset_index(drop=True)\n",
-    "\n",
-    "# Cleaning output\n",
-    "fim_geoid_gdf = fim_geoid_gdf[['GEOID', 'Dam_ID', 'Class', 
'geometry']]\n",
-    "\n",
-    "# Find the highest inundation risk per census block\n",
-    "fim_geoid_gdf = fim_geoid_gdf.groupby(['Dam_ID', 'GEOID'], \n",
-    "                                group_keys=False).apply(lambda 
x:x.loc[x['Class'].idxmax()]\n",
-    "                                                        
).reset_index(drop=True)\n",
-    "fim_geoid_gdf = fim_geoid_gdf.set_crs(epsg=4326)\n",
-    "fim_geoid_gdf"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9591cac6",
-   "metadata": {},
-   "source": [
-    "### 1.3. Step 1 results (Inundation risk per census block)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "50dcf54e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%run_remote\n",
-    "\n",
-    "# Plot census block with inundation risk of a dam failure\n",
-    "color_brewer = ['#9ecae1','#4292c6','#084594']\n",
-    "cm = LinearSegmentedColormap.from_list('cb_', color_brewer, N=3)\n",
-    "\n",
-    "fig, ax = plt.subplots(figsize=(8,8))\n",
-    "fim_geoid_gdf.plot('Class', ax=ax, cmap=cm, alpha=0.8)\n",
-    "cx.add_basemap(ax=ax, source=cx.providers.Stamen.TonerLite, 
crs=fim_gdf.crs, attribution_size=0, zoom=10)\n",
-    "ax.get_xaxis().set_visible(False)\n",
-    "ax.get_yaxis().set_visible(False)\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e990bd22",
-   "metadata": {},
-   "source": [
-    "## 2. Retrive Census Data\n",
-    "\n",
-    "To investigate the social vulnerability of the population at-risk of dam 
failure, we retrieve the following variables from Census API.\n",
-    "\n",
-    
"![](https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/SVI-Variables.png?_=02699)\n",
-    "\n",
-    "Source: 
https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/SVI_documentation_2020.html";
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "43f9fb48",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%run_remote\n",
-    "\n",
-    "def call_census_table(state_list, table_name, key):\n",
-    "    \n",
-    "    result_df = pd.DataFrame()\n",
-    "    \n",
-    "    # querying at census tract level\n",
-    "    for state in state_list:\n",
-    "        if table_name.startswith('DP'):\n",
-    "            address = 
f'https://api.census.gov/data/2020/acs/acs5/profile?get=NAME,{table_name}&for=tract:*&in=state:{state}&in=county:*'\n",
-    "        elif table_name.startswith('S'):\n",
-    "            address = 
f'https://api.census.gov/data/2020/acs/acs5/subject?get=NAME,{table_name}&for=tract:*&in=state:{state}&in=county:*'\n",
-    "        elif table_name.startswith('B'):\n",
-    "            address = 
f'https://api.census.gov/data/2020/acs/acs5?get=NAME,{table_name}&for=tract:*&in=state:{state}&in=county:*'\n",
-    "        else:\n",
-    "            raise AttributeError('Proper Table Name Is Needed.')\n",
-    "            \n",
-    "        response = requests.get(f'{address}&key={key}').json()\n",
-    "        result_ = pd.DataFrame(response)\n",
-    "        \n",
-    "        result_.columns = response[0]\n",
-    "        result_.drop(0, axis=0, inplace=True)\n",
-    "        \n",
-    "        result_df = pd.concat([result_, 
result_df]).reset_index(drop=True)\n",
-    "        \n",
-    "    # result_df = result_df.rename(columns={'GEO_ID':'GEOID_T'})\n",
-    "    result_df['GEOID_T'] = result_df.apply(lambda x: x['state'] + 
x['county'] + x['tract'], axis=1)\n",
-    "    result_df[table_name] = result_df[table_name].astype(float)\n",
-    "        \n",
-    "    return result_df[['GEOID_T', table_name]]\n",
-    "\n",
-    "\n",
-    "# Employed census tables to calculate each social vulnerabilty related 
variables.\n",
-    "# str: single variable\n",
-    "# list: [[To be summed and set as numerator], demonimator]  \n",
-    "census_info = {\n",
-    "    \"EP_POV150\" : [['S1701_C01_040E'], 'S1701_C01_001E'],\n",
-    "    \"EP_UNEMP\"  : 'DP03_0009PE',\n",
-    "    \"EP_HBURD\"  : [['S2503_C01_028E', 'S2503_C01_032E', 
'S2503_C01_036E', 'S2503_C01_040E'], \n",
-    "                   'S2503_C01_001E'],\n",
-    "    \"EP_NOHSDP\" : 'S0601_C01_033E',\n",
-    "    \"EP_UNINSUR\" : 'S2701_C05_001E',\n",
-    "    \"EP_AGE65\" : 'S0101_C02_030E',\n",
-    "    \"EP_AGE17\" : [['B09001_001E'], \n",
-    "                  'S0601_C01_001E'],\n",
-    "    \"EP_DISABL\" : 'DP02_0072PE',\n",
-    "    \"EP_SNGPNT\" : [['B11012_010E', 'B11012_015E'], 'DP02_0001E'],\n",
-    "    \"EP_LIMENG\" : [['B16005_007E', 'B16005_008E', 'B16005_012E', 
'B16005_013E', 'B16005_017E', 'B16005_018E', \n",
-    "                    'B16005_022E', 'B16005_023E', 'B16005_029E', 
'B16005_030E', 'B16005_034E', 'B16005_035E',\n",
-    "                    'B16005_039E', 'B16005_040E', 'B16005_044E', 
'B16005_045E'], \n",
-    "                   'B16005_001E'],\n",
-    "    \"EP_MINRTY\" : [['DP05_0071E', 'DP05_0078E', 'DP05_0079E', 
'DP05_0080E', \n",
-    "                    'DP05_0081E', 'DP05_0082E', 'DP05_0083E'],\n",
-    "                   'S0601_C01_001E'],\n",
-    "    \"EP_MUNIT\" : [['DP04_0012E', 'DP04_0013E'], \n",
-    "                  'DP04_0001E'],\n",
-    "    \"EP_MOBILE\" : 'DP04_0014PE',\n",
-    "    \"EP_CROWD\" : [['DP04_0078E', 'DP04_0079E'], \n",
-    "                  'DP04_0002E'],\n",
-    "    \"EP_NOVEH\" : 'DP04_0058PE',\n",
-    "    \"EP_GROUPQ\": [['B26001_001E'], \n",
-    "                  'S0601_C01_001E']\n",
-    "}\n",
-    "\n",
-    "\n",
-    "# Calculate GEOID for census tract as the census data is provided based 
on census tract\n",
-    "fim_geoid_gdf['GEOID_T'] = fim_geoid_gdf.apply(lambda x:x['GEOID'][0:11], 
axis=1)\n",
-    "\n",
-    "# List of states that is associated with the dam failure\n",
-    "state_list = fim_geoid_gdf.apply(lambda x:x['GEOID'][0:2], 
axis=1).unique()\n",
-    "\n",
-    "cols = list(census_info.keys())\n",
-    "cols.append('GEOID_T')\n",
-    "\n",
-    "'''\n",
-    "This commented cell demonstrates how the census data was retrieved from 
the Census API.\n",
-    "To avoid potential issue during the API calling, this demonstration loads 
`census_data.csv`,\n",
-    "which is already retrieved and stored in the repo. \n",
-    "\n",
-    "attr_df = 
pd.DataFrame({'GEOID_T':fim_geoid_gdf['GEOID_T'].unique().tolist()})\n",
-    "for attr in census_info.keys(): # attr: svi-related census abbriviation 
on the final table\n",
-    "    print(f'Retrieving {attr} from Census API')\n",
-    "    \n",
-    "    if type(census_info[attr]) == str:\n",
-    "        temp_table = call_census_table(state_list, census_info[attr], 
API_Key)\n",
-    "        attr_df = attr_df.merge(temp_table, on='GEOID_T')\n",
-    "        attr_df = attr_df.rename(columns={census_info[attr]: attr})\n",
-    "    else:\n",
-    "        for table in census_info[attr][0]: # Retrieve numerator 
variables\n",
-    "            temp_table = call_census_table(state_list, table, API_Key)\n",
-    "            attr_df = attr_df.merge(temp_table, on='GEOID_T')\n",
-    "\n",
-    "        temp_table = call_census_table(state_list, census_info[attr][1], 
API_Key) # Retrieve denominator variable\n",
-    "        attr_df = attr_df.merge(temp_table, on='GEOID_T')\n",
-    "\n",
-    "        # Calculate the ratio of each variable\n",
-    "        attr_df[attr] = attr_df[census_info[attr][0]].sum(axis=1) / 
attr_df[census_info[attr][1]] * 100\n",
-    "\n",
-    "    # Remove intermediate columns used for SVI related census 
calculation\n",
-    "    attr_df = attr_df[attr_df.columns.intersection(cols)]\n",
-    "\n",
-    "    # Replace not valid value (e.g., -666666) from census with nan 
value\n",
-    "    attr_df[attr] = attr_df.apply(lambda x: float('nan') if x[attr] < 0 
else x[attr], axis=1)\n",
-    "'''\n",
-    "\n",
-    "# Merge census data with fim_geoid_gdf\n",
-    "attr_df = pd.read_csv(os.path.join(data_dir, 'census_geometry', 
'census_data.csv')) \n",
-    "attr_df['GEOID_T'] = attr_df['GEOID_T'].astype(str)\n",
-    "attr_df['GEOID_T'] = attr_df.apply(lambda x:x['GEOID_T'].zfill(11), 
axis=1)\n",
-    "fim_geoid_gdf = fim_geoid_gdf.merge(attr_df, on='GEOID_T')\n",
-    "\n",
-    "# Reproject fim_geoid to EPSG:5070, NAD83 / Conus Albers (meters)\n",
-    "fim_geoid_gdf = fim_geoid_gdf.to_crs(epsg=5070)\n",
-    "fim_geoid_gdf.head(3)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7be1b230",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%run_remote\n",
-    "\n",
-    "# Plot an example of census variable (POV150: Below 150% Poverty)\n",
-    "fig, ax = plt.subplots(figsize=(8,8))\n",
-    "\n",
-    "fim_geoid_gdf.plot('EP_POV150', ax=ax, scheme='naturalbreaks', 
cmap='Reds', legend=True)\n",
-    "cx.add_basemap(ax=ax, source=cx.providers.Stamen.TonerLite, 
crs=fim_geoid_gdf.crs, attribution_size=0, zoom=10)\n",
-    "ax.get_xaxis().set_visible(False)\n",
-    "ax.get_yaxis().set_visible(False)\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "aac41f7e",
-   "metadata": {},
-   "source": [
-    "## 3. Relationship between social vulnerabilty of population and 
inundation risk of dam failure\n",
-    "\n",
-    "Based on two well-known metrics (Bivariate Moran’s I and Bivariate Local 
Indicator of Spatial Association (LISA)) spatial correlation step investigates 
the relationship between the inundation risk and social vulnerability of each 
census variable at the census block level. Each metric takes two input 
variables: 1) the potential risk of dam failure-induced flooding and 2) one out 
of 16 census data related to social vulnerability. Here, we employed the 
Queen’s case as spatial contiguit [...]
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ffeb00a7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%run_remote\n",
-    "\n",
-    "def calculate_bivariate_Moran_I_and_LISA(dam_id, census_dic, 
fim_geoid_gdf, dams_gdf):\n",
-    "\n",
-    "    input_cols = list(census_dic.keys())\n",
-    "    input_cols.extend(['Dam_ID', 'GEOID', 'Class', 'geometry'])\n",
-    "    fim_geoid_local = fim_geoid_gdf.loc[fim_geoid_gdf['Dam_ID'] == 
dam_id, input_cols].reset_index(drop=True)\n",
-    "    dam_local = dams_gdf.loc[dams_gdf['ID'] == 
dam_id].reset_index(drop=True)\n",
-    "\n",
-    "    # Iterate through all census variables\n",
-    "    for census_name in census_dic.keys():\n",
-    "        new_col_name = census_name.split(\"_\")[1]\n",
-    "        \n",
-    "        # Remove invalid values of census data for local fim_geoid \n",
-    "        fim_geoid_local_var = 
fim_geoid_local.loc[(~fim_geoid_local[census_name].isna()) & 
(fim_geoid_local[census_name] >= 0), \n",
-    "        ['Dam_ID', 'GEOID', 'Class', census_name, 
'geometry']].reset_index(drop=True)\n",
-    "        \n",
-    "        # Calculate Bivaraite Moran's I & Local Moran's I with Queen's 
Case Contiguity\n",
-    "        w = libpysal.weights.Queen.from_dataframe(fim_geoid_local_var)  # 
Adjacency matrix (Queen case)\n",
-    "        bv_mi = esda.Moran_BV(fim_geoid_local_var['Class'], 
fim_geoid_local_var[census_name], w)          \n",
-    "        bv_lm = esda.Moran_Local_BV(fim_geoid_local_var['Class'], 
fim_geoid_local_var[census_name], w, seed=17)\n",
-    "\n",
-    "        # Enter results of Bivariate LISA into each census region\n",
-    "        lm_dict = {1: 'HH', 2: 'LH', 3: 'LL', 4: 'HL'}\n",
-    "        for idx in range(fim_geoid_local_var.shape[0]):\n",
-    "            if bv_lm.p_sim[idx] < 0.05:\n",
-    "                fim_geoid_local_var.loc[idx, f'LISA_{new_col_name}'] = 
lm_dict[bv_lm.q[idx]]\n",
-    "            else:\n",
-    "                fim_geoid_local_var.loc[idx, f'LISA_{new_col_name}'] = 
'Not_Sig'\n",
-    "\n",
-    "        fim_geoid_local_na = 
fim_geoid_local.loc[fim_geoid_local[census_name].isna(), ['Dam_ID', 'GEOID', 
'Class', census_name, 'geometry']]\n",
-    "        fim_geoid_local_na[f'LISA_{new_col_name}'] = 'NA'\n",
-    "        fim_geoid_local_var = pd.concat([fim_geoid_local_var, 
fim_geoid_local_na]).reset_index(drop=True)       \n",
-    "        fim_geoid_local = 
fim_geoid_local.merge(fim_geoid_local_var[['GEOID', f'LISA_{new_col_name}']], 
on='GEOID')\n",
-    "\n",
-    "        # Enter Bivariate Moran's I result into each dam\n",
-    "        dam_local[f'MI_{new_col_name}'] = bv_mi.I\n",
-    "        dam_local[f'pval_{new_col_name}'] = bv_mi.p_z_sim\n",
-    "        \n",
-    "    return dam_local, fim_geoid_local\n",
-    "\n",
-    "# Calculates Bivariate Moran's I (mi) and Bivariate LISA (lm)\n",
-    "mi, lm = calculate_bivariate_Moran_I_and_LISA(dam_id, census_info, 
fim_geoid_gdf, fed_dams)\n",
-    "lm.head(3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "29f7f34b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%run_remote\n",
-    "\n",
-    "# Plot Inundation Risk\n",
-    "color_brewer = ['#9ecae1','#4292c6','#084594']\n",
-    "cm = LinearSegmentedColormap.from_list('cb_', color_brewer, N=3)\n",
-    "\n",
-    "fig, ax = plt.subplots(figsize=(8,8))\n",
-    "lm.plot('Class', ax=ax, cmap=cm, alpha=0.8)\n",
-    "mi_plot = mi.to_crs(epsg=5070)\n",
-    "mi_plot.plot(marker=\"*\", color='red', markersize=200, ax=ax)\n",
-    "cx.add_basemap(ax=ax, source=cx.providers.Stamen.TonerLite, crs=lm.crs, 
attribution_size=0, zoom=10)\n",
-    "ax.set_title('Inundation Risk', fontsize=18)\n",
-    "ax.get_xaxis().set_visible(False)\n",
-    "ax.get_yaxis().set_visible(False)\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "510c853f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%run_remote\n",
-    "\n",
-    "# Plot Bivariate LISA\n",
-    "'''\n",
-    "# The title of each cell provides Bivariate Moran's I \n",
-    "# Census block color codes:\n",
-    "## Red: HH cluster (high inundation risk and high social 
vulnerability)\n",
-    "## Skyblue: HL cluster (high inundation risk and low social 
vulnerability)\n",
-    "## Orange: LH cluster (low inundation risk and high social 
vulnerability)\n",
-    "## Blue: LL cluster (low inundation risk and low social vulnerability)\n",
-    "'''\n",
-    "plot_cols = [ col.split('_')[1] for col in census_info.keys()]\n",
-    "\n",
-    "# LISA\n",
-    "fig, axes = plt.subplots(4, 4, figsize=(15, 15))\n",
-    "axes = axes.reshape(-1)\n",
-    "\n",
-    "lisa_color = {'HH': 'red', 'LL': 'blue', 'HL': 'skyblue', 'LH': 'orange', 
'Not_Sig': 'white'}\n",
-    "boundary_gdf = gpd.GeoDataFrame([0], geometry=[lm.unary_union])\n",
-    "\n",
-    "for idx, val in enumerate(plot_cols):\n",
-    "    for key in lisa_color.keys():\n",
-    "        lm.loc[(lm[f'LISA_{val}'] == key)].plot(ax=axes[idx], 
color=lisa_color[key], edgecolor='face', lw=0.3, legend=True)\n",
-    "\n",
-    "    boundary_gdf.boundary.plot(ax=axes[idx], lw=0.5, color='black')\n",
-    "    axes[idx].set_title(label=f\"{val}\\n(BV Moran's I: 
{round(mi_plot.loc[mi_plot['ID'] == dam_id, f'MI_{val}'].values[0], 2)})\",\n",
-    "                        fontsize=18)\n",
-    "    mi_plot.plot(marker=\"*\", color='black', markersize=400, 
ax=axes[idx])\n",
-    "\n",
-    "    axes[idx].get_xaxis().set_visible(False)\n",
-    "    axes[idx].get_yaxis().set_visible(False)\n",
-    "\n",
-    "plt.tight_layout()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d3a8e506",
-   "metadata": {},
-   "source": [
-    "# Done"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/modules/agent-framework/airavata-agent/jupyter/data.tar.gz 
b/modules/agent-framework/airavata-agent/jupyter/data.tar.gz
deleted file mode 100644
index 9ced33122c..0000000000
Binary files a/modules/agent-framework/airavata-agent/jupyter/data.tar.gz and 
/dev/null differ

(airavata) 05/31: add airavata magics to dev-tools/, make airavata-agent/ build generic

Reply via email to