Re: [PR] HDDS-14816. Add Recon AI Assistant backend foundation with Multi LLM integration [ozone]

via GitHub Mon, 11 May 2026 06:11:11 -0700


devmadhuu commented on code in PR #9915:
URL: https://github.com/apache/ozone/pull/9915#discussion_r3218443087



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/llm/AnthropicClient.java:
##########
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.recon.chatbot.llm;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.recon.chatbot.ChatbotConfigKeys;
+import org.apache.hadoop.ozone.recon.chatbot.security.CredentialHelper;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Direct client for Anthropic Claude models using Composition.
+ */
+public class AnthropicClient implements LLMClient {
+
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+  private static final String ANTHROPIC_VERSION = "2023-06-01";
+
+  private final OzoneConfiguration configuration;
+  private final CredentialHelper credentialHelper;
+  private final LLMNetworkClient networkClient;
+
+  public AnthropicClient(OzoneConfiguration configuration,
+                         CredentialHelper credentialHelper,
+                         int timeoutMs) {
+    this.configuration = configuration;
+    this.credentialHelper = credentialHelper;
+    this.networkClient = new LLMNetworkClient(timeoutMs);
+  }
+
+  @Override
+  public LLMResponse chatCompletion(List<ChatMessage> messages, String model, 
String apiKey, Map<String, Object> parameters) throws LLMException {

Review Comment:
   Do we not need to whitelists body parameters via `ALLOWED_BODY_PARAMS` here 
also same as `OpenAIClient` ?



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/agent/ChatbotAgent.java:
##########
@@ -0,0 +1,809 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.recon.chatbot.agent;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.inject.Inject;
+import com.google.inject.Singleton;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.recon.chatbot.ChatbotConfigKeys;
+import org.apache.hadoop.ozone.recon.chatbot.llm.LLMClient;
+import org.apache.hadoop.ozone.recon.chatbot.llm.LLMClient.ChatMessage;
+import org.apache.hadoop.ozone.recon.chatbot.llm.LLMClient.LLMResponse;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Main chatbot agent that orchestrates the conversation flow.
+ * Handles tool selection (figuring out what API to call), executing those 
calls,
+ * and summarization (feeding the data back to the LLM to write a nice answer).
+ */
+@Singleton
+public class ChatbotAgent {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(ChatbotAgent.class);
+
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+
+  // A specific Recon API endpoint we want to handle carefully because it can 
return millions of rows.
+  private static final String LIST_KEYS_ENDPOINT_SUFFIX = "/keys/listKeys";
+
+  // The connection to Gemini/OpenAI
+  private final LLMClient llmClient;
+
+  // The hands that execute the internal API calls
+  private final ToolExecutor toolExecutor;
+
+  // The Cheat Sheet of all available APIs loaded from the .md file
+  private final String apiSchema;
+
+  // Max API calls we allow per question (so the LLM doesn't DOS our server)
+  private final int maxToolCalls;
+
+
+  private final String defaultModel;
+  private final int maxRecordsPerAnswer;
+  private final int maxPagesPerAnswer;
+  private final int pageSizePerCall;
+  private final boolean requireSafeScope;
+
+  @Inject
+  public ChatbotAgent(LLMClient llmClient,
+                      ToolExecutor toolExecutor,
+                      OzoneConfiguration configuration) {
+    this.llmClient = llmClient;
+    this.toolExecutor = toolExecutor;
+
+    // Read the Schema (Cheat Sheet) from the resources' folder.
+    this.apiSchema = loadApiSchema();
+
+    // Load all the safeguards and settings from ozone-site.xml
+    this.maxToolCalls = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_MAX_TOOL_CALLS,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_MAX_TOOL_CALLS_DEFAULT);
+    this.defaultModel = configuration.get(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_DEFAULT_MODEL,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_DEFAULT_MODEL_DEFAULT);
+    this.maxRecordsPerAnswer = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_RECORDS,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_RECORDS_DEFAULT);
+    this.maxPagesPerAnswer = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_PAGES,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_PAGES_DEFAULT);
+    this.pageSizePerCall = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_PAGE_SIZE,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_PAGE_SIZE_DEFAULT);
+    this.requireSafeScope = configuration.getBoolean(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_REQUIRE_SAFE_SCOPE,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_REQUIRE_SAFE_SCOPE_DEFAULT);
+
+    LOG.info("ChatbotAgent initialized with model={}, maxRecords={}, " +
+            "maxPages={}, pageSize={}, requireSafeScope={}",
+        defaultModel, maxRecordsPerAnswer, maxPagesPerAnswer,
+        pageSizePerCall, requireSafeScope);
+  }
+
+  /**
+   * THE MAIN ENTRY POINT. Processes a user query and returns a response.
+   *
+   * @param userQuery the user's question
+   * @param model     the LLM model to use
+   * @param provider  explicit provider name (optional, e.g. "gemini", 
"openai")
+   * @param apiKey    the user's API key (optional)
+   * @return the chatbot response
+   */
+  public String processQuery(String userQuery, String model,
+                             String provider, String apiKey)
+      throws Exception {

Review Comment:
   That's too general exception : `throws Exception`. Better define specific 
exceptions to throw which might be some related to LLM, so can define custom 
exception as `LLMException` , `IOException` or wrap and rethrow as a typed 
checked exception.



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/llm/GeminiClient.java:
##########
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.recon.chatbot.llm;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.recon.chatbot.ChatbotConfigKeys;
+import org.apache.hadoop.ozone.recon.chatbot.security.CredentialHelper;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Direct client for Google Gemini models using Composition.
+ */
+public class GeminiClient implements LLMClient {
+
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+  private final OzoneConfiguration configuration;
+  private final CredentialHelper credentialHelper;
+  private final LLMNetworkClient networkClient;
+
+  public GeminiClient(OzoneConfiguration configuration,
+                      CredentialHelper credentialHelper,
+                      int timeoutMs) {
+    this.configuration = configuration;
+    this.credentialHelper = credentialHelper;
+    this.networkClient = new LLMNetworkClient(timeoutMs);
+  }
+
+  @Override
+  public LLMResponse chatCompletion(List<ChatMessage> messages, String model, 
String apiKey, Map<String, Object> parameters) throws LLMException {
+    String resolvedKey = resolveApiKey(apiKey);
+    if (resolvedKey == null || resolvedKey.isEmpty()) {
+      throw new LLMException("No API key configured for provider 'gemini'.");
+    }
+
+    // Pass the API key in the x-goog-api-key header instead of as a URL query
+    // parameter. URLs leak into access logs, proxies, monitoring tools and
+    // browser history — headers don't. Google officially supports both forms.
+    String url = getBaseUrl() + "/v1beta/models/" + model + ":generateContent";
+    Map<String, String> headers = new HashMap<>();
+    headers.put("x-goog-api-key", resolvedKey);
+
+    ObjectNode body = MAPPER.createObjectNode();
+    ArrayNode contents = body.putArray("contents");
+
+    for (ChatMessage msg : messages) {
+      if ("system".equals(msg.getRole())) {
+        ObjectNode sysInstruction = body.putObject("systemInstruction");

Review Comment:
   `putObject("systemInstruction")` replaces the previous node on every loop 
iteration, so only the last system message survives. The AnthropicClient 
handles this correctly by accumulating into a `StringBuilder`. `GeminiClient` 
needs the same treatment.



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/llm/OpenAIClient.java:
##########
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.recon.chatbot.llm;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.recon.chatbot.ChatbotConfigKeys;
+import org.apache.hadoop.ozone.recon.chatbot.security.CredentialHelper;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Direct client for OpenAI models (GPT-4, GPT-4o, o1, o3, etc.).
+ * Talks to {@code api.openai.com/v1/chat/completions}.
+ */
+public class OpenAIClient implements LLMClient {
+
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+
+  /**
+   * Parameters that are valid in the OpenAI Chat Completions request body.
+   * Anything not on this list (e.g. our internal "provider" routing hint, or
+   * unknown future fields) is dropped before the request is sent so the API
+   * doesn't reject us with "unrecognized field" in strict mode.
+   */
+  private static final Set<String> ALLOWED_BODY_PARAMS = 
Collections.unmodifiableSet(
+      new HashSet<>(Arrays.asList(
+          "temperature", "max_tokens", "top_p", "n", "stream", "stop",
+          "presence_penalty", "frequency_penalty", "logit_bias", "user",
+          "response_format", "seed")));
+
+  private final OzoneConfiguration configuration;
+  private final CredentialHelper credentialHelper;
+  private final LLMNetworkClient networkClient;
+
+  public OpenAIClient(OzoneConfiguration configuration,
+                      CredentialHelper credentialHelper,
+                      int timeoutMs) {
+    this.configuration = configuration;
+    this.credentialHelper = credentialHelper;
+    this.networkClient = new LLMNetworkClient(timeoutMs);
+  }
+
+  @Override
+  public LLMResponse chatCompletion(List<ChatMessage> messages, String model, 
String apiKey, Map<String, Object> parameters) throws LLMException {
+    String resolvedKey = resolveApiKey(apiKey);
+    if (resolvedKey == null || resolvedKey.isEmpty()) {
+      throw new LLMException("No API key configured for provider 'openai'.");
+    }
+
+    String url = getBaseUrl() + "/v1/chat/completions";
+    ObjectNode body = buildOpenAIRequestBody(messages, model, parameters);
+
+    Map<String, String> headers = new HashMap<>();
+    headers.put("Authorization", "Bearer " + resolvedKey);
+
+    try {
+      String responseBody = networkClient.executePost(url, headers, 
MAPPER.writeValueAsString(body), "openai");
+      return parseOpenAIResponse(responseBody, model);
+    } catch (Exception e) {
+      if (e instanceof LLMException) {
+        throw (LLMException) e;
+      }
+      throw new LLMException("OpenAI Request Failed: " + e.getMessage(), e);
+    }
+  }
+
+  @Override
+  public boolean isAvailable() {
+    String key = 
credentialHelper.getSecret(ChatbotConfigKeys.OZONE_RECON_CHATBOT_OPENAI_API_KEY);
+    return key != null && !key.isEmpty();
+  }
+
+  @Override
+  public List<String> getSupportedModels() {
+    return GeminiClient.loadModelsFromConfig(configuration,

Review Comment:
   This is quite confusing. Loading `OpenAI` models from `GeminiClient` class 
in static method ? How `OpenAI` and `Gemini` models are related ? Not sure , 
what is the intent, but if we want to load the models for each provider, we 
should define the common method in a common utility class.



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/agent/ToolExecutor.java:
##########
@@ -0,0 +1,450 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.recon.chatbot.agent;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.google.inject.Inject;
+import com.google.inject.Singleton;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.recon.chatbot.ChatbotConfigKeys;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Executes tool calls by making HTTP requests to Recon API endpoints.
+ */
+@Singleton
+public class ToolExecutor {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(ToolExecutor.class);
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+
+  // We define the specific String suffixes for APIs we want to explicitly 
watch out for
+  private static final String LIST_KEYS_ENDPOINT_SUFFIX = "/keys/listKeys";
+
+  // Hardcoded security timeouts. If Recon takes longer than 30 seconds to 
connect
+  // or return data, kill the request so we don't freeze the chatbot.
+  private static final int CONNECT_TIMEOUT_MS = 30_000;
+  private static final int READ_TIMEOUT_MS = 30_000;
+
+  private final String reconBaseUrl;
+  private final int defaultMaxRecords;  // Max records to fetch in total
+  private final int defaultMaxPages;   // Max pages to loop through
+  private final int defaultPageSize;  // Default size of one page
+
+  @Inject
+  public ToolExecutor(OzoneConfiguration configuration) {
+    // Get Recon base URL from configuration
+    // Default to localhost for local development
+    this.reconBaseUrl = "http://localhost:9888";;
+
+    this.defaultMaxRecords = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_RECORDS,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_RECORDS_DEFAULT);
+    this.defaultMaxPages = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_PAGES,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_PAGES_DEFAULT);
+    this.defaultPageSize = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_PAGE_SIZE,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_PAGE_SIZE_DEFAULT);
+
+    LOG.info("ToolExecutor initialized with Recon URL: {}, maxRecords={}, 
maxPages={}, pageSize={}",
+        reconBaseUrl, defaultMaxRecords, defaultMaxPages, defaultPageSize);
+  }
+
+  /**
+   * What this does: It receives the request from the ChatbotAgent, cleans up 
the URL, and decides if it needs the
+   * complex paging system (for listKeys) or just a simple, single network hit 
(for everything else).
+   */
+  public ToolExecutionOutcome executeToolCallWithPolicy(
+      String endpoint,
+      String method,
+      Map<String, String> parameters,
+      int maxRecords,
+      int maxPages,
+      int pageSize) throws IOException {
+
+    // First, make a safe copy of the parameters (like `limit=10`) so we can 
edit it without breaking anything
+    Map<String, String> safeParams = parameters == null ? new HashMap<>() : 
new HashMap<>(parameters);
+
+    // Normalize string. E.g., Change "clusterState" to "/api/v1/clusterState"
+    String fullEndpoint = normalizeEndpoint(endpoint);
+
+    // If the LLM asked to list keys, redirect to our special paging loop 
logic!
+    if (fullEndpoint.endsWith(LIST_KEYS_ENDPOINT_SUFFIX) && 
"GET".equalsIgnoreCase(method)) {
+      return executeListKeysWithPaging(fullEndpoint, method, safeParams, 
maxRecords, maxPages, pageSize);
+    }
+
+    // For EVERY OTHER endpoint, just run a single, normal HTTP request
+    JsonNode response = executeSingleCall(fullEndpoint, method, safeParams);
+
+    // Count how many records we got back and return our structured DTO tracker
+    int records = estimateRecordCount(response);
+    return new ToolExecutionOutcome(response, records, 1, false, null,
+        createLimitsMap(maxRecords, maxPages, pageSize));
+  }
+
+
+  /**
+   * The listKeys Pager - It uses a while() loop to continuously execute API 
calls, stitching all the
+   * individual pages into one massive JSON array until it runs out of data or 
hits a hard security constraint limit.
+   */
+  private ToolExecutionOutcome executeListKeysWithPaging(
+      String endpoint, String method, Map<String, String> parameters,
+      int maxRecords, int maxPages, int pageSize)
+      throws IOException {
+
+    // Safety Check: Did the LLM provide a bucket path to search in?
+    String startPrefix = parameters.get("startPrefix");
+    if (startPrefix == null || startPrefix.trim().isEmpty() || 
"/".equals(startPrefix.trim())) {
+      throw new IllegalArgumentException(
+          "listKeys requires 'startPrefix' at bucket level or deeper (for 
example /volume/bucket).");
+    }
+
+    // Figure out limits... Either use what the LLM specifically requested, or 
our system defaults.
+    int requestedLimit = parsePositiveInt(parameters.get("limit"), pageSize);
+    int effectivePageSize = Math.max(1, Math.min(pageSize, requestedLimit));
+    int safeMaxRecords = Math.max(1, maxRecords);
+    int safeMaxPages = Math.max(1, maxPages);
+
+
+    ObjectNode merged = null;                               // This will hold 
the final, massive JSON object
+    ArrayNode aggregatedKeys = MAPPER.createArrayNode();    // This will hold 
all the individual rows we find
+    String nextCursor = parameters.get("prevKey");          // The "ID" of the 
last record so we know where to pick up
+    int recordsProcessed = 0;                               // Counter for rows
+    int pagesFetched = 0;                                   // Counter for 
pages
+    boolean truncated = false;                              // Did we hit a 
hard limit?
+
+    // THE ENGINE LOOP: Keep pulling pages until we hit our max Page count or 
Record count
+    while (pagesFetched < safeMaxPages && recordsProcessed < safeMaxRecords) {
+      // Calculate how many records we still need and inject it into the API 
call
+      Map<String, String> pageParams = new HashMap<>(parameters);
+      int remaining = safeMaxRecords - recordsProcessed;
+      int pageLimit = Math.max(1, Math.min(effectivePageSize, remaining));
+      pageParams.put("limit", String.valueOf(pageLimit));
+
+      // If we have a cursor from a previous page, inject it so Recon gives us 
the NEXT page
+      if (nextCursor != null && !nextCursor.isEmpty()) {
+        pageParams.put("prevKey", nextCursor);
+      } else {
+        pageParams.remove("prevKey");
+      }
+
+      // FIRE THE API CALL FOR A SINGLE PAGE!
+      JsonNode pageResponse = executeSingleCall(endpoint, method, pageParams);
+      pagesFetched++;
+
+      // If this is the first page, copy all the root JSON data (like total 
counts) into our master `merged` object
+      if (merged == null && pageResponse != null && pageResponse.isObject()) {
+        merged = ((ObjectNode) pageResponse).deepCopy();
+      }
+
+      // Loop over the list of keys (the rows) that Recon just gave us
+      JsonNode keys = pageResponse == null ? null : pageResponse.get("keys");
+      int pageCount = 0;
+      if (keys != null && keys.isArray()) {
+        for (JsonNode key : keys) {
+          if (recordsProcessed >= safeMaxRecords) {
+            truncated = true;
+            break;
+          }
+          aggregatedKeys.add(key);
+          recordsProcessed++;
+          pageCount++;
+        }
+      }
+
+      // Find the ID of the last row on this page so we can pass it into the 
loop for the next page
+      String lastKey = extractStringField(pageResponse, "lastKey");
+      if (lastKey == null || lastKey.isEmpty() || pageCount == 0) {
+        nextCursor = null;
+        break;
+      }
+      nextCursor = lastKey;
+
+      // If we hit limits, flag this dataset as truncated
+      if (recordsProcessed >= safeMaxRecords || pagesFetched >= safeMaxPages) {
+        truncated = true;
+      }
+    }
+
+    // Now that the loop is finished, reconstruct the final JSON block
+    if (merged == null) {
+      merged = MAPPER.createObjectNode();
+    }
+    merged.set("keys", aggregatedKeys);
+    if (nextCursor != null) {
+      merged.put("lastKey", nextCursor);
+    }
+    // Inject our metadata so ChatbotAgent can see what happened
+    merged.put("truncated", truncated);
+    merged.put("recordsProcessed", recordsProcessed);
+    merged.put("pagesFetched", pagesFetched);
+
+    // Package the results and send them back up to the ChatbotAgent
+    return new ToolExecutionOutcome(merged, recordsProcessed, pagesFetched, 
truncated, nextCursor,
+        createLimitsMap(safeMaxRecords, safeMaxPages, effectivePageSize));
+  }
+
+  /**
+   * The Actual HTTP Execution.
+   */
+  private JsonNode executeSingleCall(String endpoint, String method,
+                                     Map<String, String> parameters)
+      throws IOException {
+    String url = buildUrl(endpoint, parameters);
+    LOG.debug("Executing tool call: {} {}", method, url);
+
+    HttpURLConnection conn = null;
+    try {
+      // Connect to the Recon URL
+      conn = (HttpURLConnection) new URL(url).openConnection();
+      conn.setRequestMethod(
+          "GET".equalsIgnoreCase(method) ? "GET" : "POST");
+      conn.setConnectTimeout(CONNECT_TIMEOUT_MS);
+      conn.setReadTimeout(READ_TIMEOUT_MS);
+
+      // Tell Recon we expect to receive JSON data format
+      conn.setRequestProperty("Accept", "application/json");
+      conn.setRequestProperty("Content-Type", "application/json");
+
+      // Execute request.
+      int statusCode = conn.getResponseCode();
+      if (statusCode != 200) {
+        // If the server threw a 500 error or a 404, capture the failure text 
and throw an exception
+        String errorBody = readErrorStream(conn);
+        String errorMsg = String.format(
+            "API request failed with status %d: %s",
+            statusCode, errorBody);
+        LOG.error(errorMsg);
+        throw new IOException(errorMsg);
+      }
+
+      // Request succeeded! Read the raw byte data and convert it into a string
+      String body = readInputStream(conn);
+      return parseJsonSafely(body);
+    } finally {
+      // Always disconnect to free up memory on the server
+      if (conn != null) {
+        conn.disconnect();
+      }
+    }
+  }
+
+  private String normalizeEndpoint(String endpoint) {
+    if (endpoint == null || endpoint.trim().isEmpty()) {
+      throw new IllegalArgumentException("Tool endpoint cannot be empty");
+    }
+    String fullEndpoint = endpoint;
+
+    // Ensure the path always starts with "/api/v1/"
+    if (!fullEndpoint.startsWith("/api/v1/")) {

Review Comment:
   @ArafatKhan2198 this is still not resolved. Basically 
`ToolExecutor.normalizeEndpoint() `only ensures the path starts with 
`/api/v1/`, but a compromised prompt or a hallucinating LLM could inject paths 
like `/api/v1/../actuator/shutdown or /api/v1/%2F..%2F.` You should maintain an 
allowlist of accepted path prefixes (e.g., `/api/v1/containers`, 
`/api/v1/datanodes`, etc.) and reject anything not on the list. The current 
`requireSafeScope` guard only checks for listKeys — it provides no protection 
for other endpoints.



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/agent/ChatbotAgent.java:
##########
@@ -0,0 +1,773 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.recon.chatbot.agent;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.inject.Inject;
+import com.google.inject.Singleton;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.recon.chatbot.ChatbotConfigKeys;
+import org.apache.hadoop.ozone.recon.chatbot.llm.LLMClient;
+import org.apache.hadoop.ozone.recon.chatbot.llm.LLMClient.ChatMessage;
+import org.apache.hadoop.ozone.recon.chatbot.llm.LLMClient.LLMResponse;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Main chatbot agent that orchestrates the conversation flow.
+ * Handles tool selection (figuring out what API to call), executing those 
calls,
+ * and summarization (feeding the data back to the LLM to write a nice answer).
+ */
+@Singleton
+public class ChatbotAgent {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(ChatbotAgent.class);
+
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+  private static final Pattern JSON_PATTERN = Pattern.compile("\\{.*\\}", 
Pattern.DOTALL);
+
+  // A specific Recon API endpoint we want to handle carefully because it can 
return millions of rows.
+  private static final String LIST_KEYS_ENDPOINT_SUFFIX = "/keys/listKeys";
+
+  // The connection to Gemini/OpenAI
+  private final LLMClient llmClient;
+
+  // The hands that execute the internal API calls
+  private final ToolExecutor toolExecutor;
+
+  // The Cheat Sheet of all available APIs loaded from the .md file
+  private final String apiSchema;
+
+  // Max API calls we allow per question (so the LLM doesn't DOS our server)
+  private final int maxToolCalls;
+
+
+  private final String defaultModel;
+  private final int maxRecordsPerAnswer;
+  private final int maxPagesPerAnswer;
+  private final int pageSizePerCall;
+  private final boolean requireSafeScope;
+
+  /**
+   * Set per-request by processQuery; used to inject provider hint.
+   */
+  private volatile String currentProvider;
+
+  @Inject
+  public ChatbotAgent(LLMClient llmClient,
+                      ToolExecutor toolExecutor,
+                      OzoneConfiguration configuration) {
+    this.llmClient = llmClient;
+    this.toolExecutor = toolExecutor;
+
+    // Read the Schema (Cheat Sheet) from the resources' folder.
+    this.apiSchema = loadApiSchema();
+
+    // Load all the safeguards and settings from ozone-site.xml
+    this.maxToolCalls = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_MAX_TOOL_CALLS,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_MAX_TOOL_CALLS_DEFAULT);
+    this.defaultModel = configuration.get(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_DEFAULT_MODEL,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_DEFAULT_MODEL_DEFAULT);
+    this.maxRecordsPerAnswer = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_RECORDS,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_RECORDS_DEFAULT);
+    this.maxPagesPerAnswer = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_PAGES,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_PAGES_DEFAULT);
+    this.pageSizePerCall = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_PAGE_SIZE,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_PAGE_SIZE_DEFAULT);
+    this.requireSafeScope = configuration.getBoolean(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_REQUIRE_SAFE_SCOPE,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_REQUIRE_SAFE_SCOPE_DEFAULT);
+
+    LOG.info("ChatbotAgent initialized with model={}, maxRecords={}, " +
+            "maxPages={}, pageSize={}, requireSafeScope={}",
+        defaultModel, maxRecordsPerAnswer, maxPagesPerAnswer,
+        pageSizePerCall, requireSafeScope);
+  }
+
+  /**
+   * THE MAIN ENTRY POINT. Processes a user query and returns a response.
+   *
+   * @param userQuery the user's question
+   * @param model     the LLM model to use
+   * @param provider  explicit provider name (optional, e.g. "gemini", 
"openai")
+   * @param apiKey    the user's API key (optional)
+   * @return the chatbot response
+   */
+  public String processQuery(String userQuery, String model,
+                             String provider, String apiKey)
+      throws Exception {
+
+    // Safety check
+    if (userQuery == null || userQuery.trim().isEmpty()) {
+      throw new IllegalArgumentException("Query cannot be empty");
+    }
+
+    // Use default model if the user didn't specify one.
+    String effectiveModel = (model != null && !model.isEmpty()) ? model : 
defaultModel;
+
+    // Store provider so private helper methods can inject it
+    // into LLM call parameters.
+    this.currentProvider = provider;
+
+    LOG.info("Processing query with model: {}, provider: {}", effectiveModel, 
provider == null ? "auto" : provider);
+
+    // STEP 1: Ask the LLM what API tools it wants to use to answer the 
question.
+    ToolCall toolCall = getToolCall(userQuery, effectiveModel, apiKey);
+
+    // If the LLM doesn't know what API to call...
+    if (toolCall == null) {
+      // No suitable endpoint found
+      LOG.info("Tool selection result: NO_SUITABLE_ENDPOINT; using fallback");
+      return handleFallback(userQuery, effectiveModel, apiKey);
+    }
+
+    // If the user asked a general question (e.g. "What is Ozone?"), the LLM 
answers it directly without an API call.
+    if (toolCall.isDocumentationQuery()) {
+      LOG.info("Tool selection result: DOCUMENTATION_QUERY (no Recon API 
call)");
+      return toolCall.getAnswer();
+    }
+
+    // STEP 2: Execute the internal Recon API calls
+    Map<String, Object> apiResponses;
+    Map<String, Object> executionMetadata = new HashMap<>();
+
+    // Scenario A: LLM says we need to call MULTIPLE APIs to get the answer
+    if (toolCall.isMultipleEndpoints()) {
+
+      if (toolCall.getToolCalls() == null || 
toolCall.getToolCalls().isEmpty()) {
+        LOG.warn("LLM returned MULTI_ENDPOINT but no tool calls");
+        return handleFallback(userQuery, effectiveModel, apiKey);
+      }
+      LOG.info("Tool selection result: MULTI_ENDPOINT count={}",
+          toolCall.getToolCalls().size());
+
+      // Check if the LLM asked for something dangerous (like scanning the 
whole cluster without a limit)
+      String clarification = 
buildClarificationForToolCalls(toolCall.getToolCalls());
+      if (clarification != null) {
+        LOG.info("Execution policy returned clarification for multi-endpoint " 
+
+            "request: {}", clarification);
+        return clarification;
+      }
+      for (ToolCall selected : toolCall.getToolCalls()) {
+        LOG.info("Selected Recon API: method={}, endpoint={}, paramKeys={}, 
reasoning={}",
+            selected.getMethod(),
+            selected.getEndpoint(),
+            selected.getParameters() == null ? "[]" : 
selected.getParameters().keySet(),
+            selected.getReasoning());
+      }
+
+      // Execute all the API calls securely
+      apiResponses = executeMultipleToolCalls(toolCall.getToolCalls(), 
executionMetadata);
+
+      // Scenario B: LLM says we only need ONE API call
+    } else {
+      if (toolCall.getEndpoint() == null || toolCall.getEndpoint().isEmpty()) {
+        LOG.warn("LLM returned SINGLE_ENDPOINT with empty endpoint");
+        return handleFallback(userQuery, effectiveModel, apiKey);
+      }
+      LOG.info("Tool selection result: SINGLE_ENDPOINT method={}, endpoint={}, 
paramKeys={}, reasoning={}",
+          toolCall.getMethod(),
+          toolCall.getEndpoint(),
+          toolCall.getParameters() == null ? "[]" : 
toolCall.getParameters().keySet(),
+          toolCall.getReasoning());
+      String clarification = validateToolCallForExecution(toolCall);
+      if (clarification != null) {
+        LOG.info("Execution policy returned clarification for endpoint {}: {}",
+            toolCall.getEndpoint(), clarification);
+        return clarification;
+      }
+      // Go fetch the data using our ToolExecutor!
+      ToolExecutor.ToolExecutionOutcome outcome = 
toolExecutor.executeToolCallWithPolicy(
+          toolCall.getEndpoint(),
+          toolCall.getMethod(),
+          toolCall.getParameters(),
+          maxRecordsPerAnswer,
+          maxPagesPerAnswer,
+          pageSizePerCall);
+
+      // Save the raw JSON data the API returned
+      apiResponses = new HashMap<>();
+      apiResponses.put(toolCall.getEndpoint(), outcome.getResponseBody());
+      executionMetadata.put(toolCall.getEndpoint(),
+          createExecutionMetadataMap(outcome));
+    }
+
+    // STEP 3: Send the raw JSON data BACK to the LLM to format a nice answer
+    LOG.info("Summarization input prepared: endpointCount={}, endpoints={}",
+        apiResponses.size(), apiResponses.keySet());
+    return summarizeResponse(userQuery, apiResponses, executionMetadata, 
effectiveModel, apiKey);
+  }
+
+  /**
+   * "Step 1" Helper: Talks to the LLM and asks for a JSON object telling us 
which API to call.
+   */
+  private ToolCall getToolCall(String userQuery, String model, String apiKey)
+      throws Exception {
+
+    // Build the "cheat sheet" prompt (includes the recon-api-guide.md)
+    String systemPrompt = buildToolSelectionPrompt();
+    String userPrompt = "User Query: " + userQuery;
+
+    List<ChatMessage> messages = new ArrayList<>();
+    messages.add(new ChatMessage("system", systemPrompt));
+    messages.add(new ChatMessage("user", userPrompt));
+
+    // Tuning the LLM: Temperature 0.1 means we want it to be very strict and 
robotic, not creative.
+    Map<String, Object> parameters = new HashMap<>();
+    parameters.put("temperature", 0.1);
+    parameters.put("max_tokens", 8192);
+    if (currentProvider != null && !currentProvider.isEmpty()) {
+      parameters.put("_provider", currentProvider);
+    }
+
+    // Send the request to the LLM
+    LLMResponse response = llmClient.chatCompletion(messages, model, apiKey, 
parameters);
+
+    LOG.info("Tool selection LLM response: model={}, promptTokens={}, 
completionTokens={}, totalTokens={}",
+        response.getModel(),
+        response.getPromptTokens(),
+        response.getCompletionTokens(),
+        response.getTotalTokens());
+
+    String content = response.getContent().trim();
+
+    if (content.contains("NO_SUITABLE_ENDPOINT")) {
+      return null;
+    }
+
+    // Extract JSON from response
+    Matcher matcher = JSON_PATTERN.matcher(content);
+    if (!matcher.find()) {
+      LOG.warn("No JSON found in LLM response");
+      return null;
+    }
+
+    // Convert the JSON string into our Java "ToolCall" object
+    String jsonStr = matcher.group();
+    JsonNode jsonNode = MAPPER.readTree(jsonStr);
+    return parseToolCall(jsonNode);
+  }
+
+  /**
+   * Executes multiple tool calls.
+   */
+  private Map<String, Object> executeMultipleToolCalls(
+      List<ToolCall> toolCalls, Map<String, Object> executionMetadata) {
+    Map<String, Object> responses = new HashMap<>();
+
+    for (int i = 0; i < toolCalls.size(); i++) {
+      ToolCall toolCall = toolCalls.get(i);
+      String responseKey = buildResponseKey(toolCall, i, toolCalls.size());
+      try {
+        LOG.info("Executing Recon API call: method={}, endpoint={}", 
toolCall.getMethod(), toolCall.getEndpoint());
+        ToolExecutor.ToolExecutionOutcome outcome = 
toolExecutor.executeToolCallWithPolicy(
+            toolCall.getEndpoint(),
+            toolCall.getMethod(),
+            toolCall.getParameters(),
+            maxRecordsPerAnswer,
+            maxPagesPerAnswer,
+            pageSizePerCall);
+        responses.put(responseKey, outcome.getResponseBody());
+        executionMetadata.put(responseKey, 
createExecutionMetadataMap(outcome));
+        LOG.info("Recon API call completed: endpoint={}, records={}, pages={}, 
truncated={}",
+            toolCall.getEndpoint(),
+            outcome.getRecordsProcessed(),
+            outcome.getPagesFetched(),
+            outcome.isTruncated());
+      } catch (Exception e) {
+        LOG.error("Tool call failed for endpoint: {}", toolCall.getEndpoint(), 
e);
+        Map<String, Object> errorMap = new HashMap<>();
+        errorMap.put("error", e.getMessage());
+        responses.put(responseKey, errorMap);
+        Map<String, Object> errorMeta = new HashMap<>();
+        errorMeta.put("error", e.getMessage());
+        errorMeta.put("truncated", false);
+        executionMetadata.put(responseKey, errorMeta);
+      }
+    }
+
+    return responses;
+  }
+
+  /**
+   * "Step 3" Helper: Takes the raw JSON API data and asks the LLM to write a 
sentence about it.
+   */
+  private String summarizeResponse(String userQuery,
+                                   Map<String, Object> apiResponses,
+                                   Map<String, Object> executionMetadata,
+                                   String model, String apiKey)
+      throws Exception {
+
+    // Give the LLM a new set of rules
+    String systemPrompt = buildSummarizationPrompt();
+    // Stitch the raw JSON strings and the user's original question together
+    String userPrompt = buildSummarizationUserPrompt(userQuery, apiResponses, 
executionMetadata);
+
+    List<ChatMessage> messages = new ArrayList<>();
+    messages.add(new ChatMessage("system", systemPrompt));
+    messages.add(new ChatMessage("user", userPrompt));
+
+    // Temperature 0.3 allows a tiny bit more natural/human-like language 
creativity.
+    Map<String, Object> parameters = new HashMap<>();
+    parameters.put("temperature", 0.3);
+    parameters.put("max_tokens", 2000);
+    if (currentProvider != null && !currentProvider.isEmpty()) {
+      parameters.put("_provider", currentProvider);
+    }
+
+    // Send the request to the LLM
+    LLMResponse response = llmClient.chatCompletion(messages, model, apiKey, 
parameters);
+
+    LOG.info("Summarization LLM response: model={}, promptTokens={}, " +
+            "completionTokens={}, totalTokens={}",
+        response.getModel(),
+        response.getPromptTokens(),
+        response.getCompletionTokens(),
+        response.getTotalTokens());
+
+    return response.getContent();
+  }
+
+  /**
+   * Helper: If the user asks "What is the meaning of life?", we use this to 
say
+   * "Sorry, I only know about Hadoop."
+   */
+  private String handleFallback(String userQuery, String model, String apiKey)
+      throws Exception {
+    String prompt = String.format(
+        "The user asked: \"%s\"\n\n" +
+            "This question cannot be answered using the available " +
+            "Ozone Recon API endpoints.\n\n" +
+            "Provide a helpful response that:\n" +
+            "1. Politely explains that you can only answer questions about " +
+            "Ozone Recon cluster data\n" +
+            "2. Briefly mentions the types of information you can provide " +
+            "(containers, keys, datanodes, pipelines, cluster state, etc.)\n" +
+            "3. Suggests how they might rephrase their question if it's 
related to Ozone\n\n" +
+            "Keep the response friendly and concise.",
+        userQuery);
+
+    List<ChatMessage> messages = new ArrayList<>();
+    messages.add(new ChatMessage("user", prompt));
+
+    Map<String, Object> parameters = new HashMap<>();
+    parameters.put("temperature", 0.5);
+    parameters.put("max_tokens", 500);
+    if (currentProvider != null && !currentProvider.isEmpty()) {
+      parameters.put("_provider", currentProvider);
+    }
+
+    LLMResponse response = llmClient.chatCompletion(
+        messages, model, apiKey, parameters);
+
+    return response.getContent();
+  }
+
+  /**
+   * Creates the master rules (System Prompt) we send to the LLM during Step 1.
+   * Notice how we teach the LLM exactly what JSON to output!
+   */
+  private String buildToolSelectionPrompt() {

Review Comment:
   Still not resolved.



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/api/ChatbotEndpoint.java:
##########
@@ -0,0 +1,341 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.recon.chatbot.api;
+
+import javax.inject.Inject;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.recon.chatbot.ChatbotConfigKeys;
+import org.apache.hadoop.ozone.recon.chatbot.agent.ChatbotAgent;
+import org.apache.hadoop.ozone.recon.chatbot.llm.LLMClient;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.ws.rs.Consumes;
+import javax.ws.rs.GET;
+import javax.ws.rs.POST;
+import javax.ws.rs.Path;
+import javax.ws.rs.Produces;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+import java.util.concurrent.RejectedExecutionException;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * REST API endpoint for the Recon Chatbot.
+ *
+ * <p>
+ * API keys are managed via JCEKS (admin-configured),
+ * so there are no per-user key storage endpoints.
+ * </p>
+ */
+@Path("/chatbot")
+@Produces(MediaType.APPLICATION_JSON)
+public class ChatbotEndpoint {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(ChatbotEndpoint.class);
+
+  private final ChatbotAgent chatbotAgent;
+  private final LLMClient llmClient;
+  private final OzoneConfiguration configuration;
+
+  // Bounded executor isolates chatbot work from Jetty's main thread pool.
+  // Daemon threads → no explicit shutdown needed; they die with the JVM.
+  private final ThreadPoolExecutor chatbotExecutor;
+  private final long requestTimeoutMs;
+
+  @Inject
+  public ChatbotEndpoint(ChatbotAgent chatbotAgent,
+      LLMClient llmClient,
+      OzoneConfiguration configuration) {
+    this.chatbotAgent = chatbotAgent;
+    this.llmClient = llmClient;
+    this.configuration = configuration;
+
+    int poolSize = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_THREAD_POOL_SIZE,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_THREAD_POOL_SIZE_DEFAULT);
+    int queueCapacity = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_QUEUE_CAPACITY,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_QUEUE_CAPACITY_DEFAULT);
+    this.requestTimeoutMs = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_REQUEST_TIMEOUT_MS,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_REQUEST_TIMEOUT_MS_DEFAULT);
+
+    // Fixed-size pool + bounded queue + AbortPolicy.
+    // When pool is busy AND queue is full → new submissions throw
+    // RejectedExecutionException, which we map to HTTP 503. This is what
+    // protects the Jetty pool: chatbot saturation can never spill over.
+    AtomicInteger threadCounter = new AtomicInteger();
+    ThreadFactory threadFactory = r -> {
+      Thread t = new Thread(r, "recon-chatbot-" + 
threadCounter.incrementAndGet());
+      t.setDaemon(true);
+      return t;
+    };
+    this.chatbotExecutor = new ThreadPoolExecutor(
+        poolSize, poolSize,
+        0L, TimeUnit.MILLISECONDS,
+        new ArrayBlockingQueue<>(queueCapacity),
+        threadFactory,
+        new ThreadPoolExecutor.AbortPolicy());
+
+    LOG.info("ChatbotEndpoint initialized: poolSize={}, queueCapacity={}, 
requestTimeoutMs={}",
+        poolSize, queueCapacity, requestTimeoutMs);
+  }
+
+  /**
+   * Checks if the chatbot is enabled in configuration.
+   */
+  private boolean isChatbotEnabled() {
+    return configuration.getBoolean(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_ENABLED,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_ENABLED_DEFAULT);
+  }
+
+  /**
+   * Health check endpoint.
+   */
+  @GET
+  @Path("/health")
+  public Response health() {
+    Map<String, Object> response = new HashMap<>();
+    boolean enabled = isChatbotEnabled();
+    response.put("enabled", enabled);
+    response.put("llmClientAvailable",
+        enabled && llmClient != null && llmClient.isAvailable());
+    return Response.ok(response).build();
+  }
+
+  /**
+   * Chat endpoint - processes a user query.
+   */
+  @POST
+  @Path("/chat")
+  @Consumes(MediaType.APPLICATION_JSON)
+  public Response chat(ChatRequest request) {
+
+    // Safety check 1: If chatbot is disabled, throw a 503 Service Unavailable 
error immediately.
+    if (!isChatbotEnabled()) {
+      return Response.status(Response.Status.SERVICE_UNAVAILABLE)
+          .entity(Collections.singletonMap("error", "Chatbot service is not 
enabled"))
+          .build();
+    }
+
+    // Safety check 2: If the user didn't really ask a question, throw a 400 
Bad Request.
+    if (request.getQuery() == null || request.getQuery().trim().isEmpty()) {
+      return Response.status(Response.Status.BAD_REQUEST)
+          .entity(Collections.singletonMap("error", "Query cannot be empty"))
+          .build();
+    }
+
+    LOG.info("Chat request: userId={}, model={}, provider={}",
+        sanitizeUserId(request.getUserId()),
+        request.getModel() == null ? "default" : request.getModel(),
+        request.getProvider() == null ? "auto" : request.getProvider());
+
+    // Submit the heavy work (LLM calls + Recon API calls) to the chatbot's
+    // bounded executor instead of running it on the Jetty request thread.
+    // If the pool + queue are full, AbortPolicy throws immediately and we
+    // return 503 — Jetty pool stays free for the rest of Recon.
+    Future<String> future;
+    try {
+      future = chatbotExecutor.submit(() -> chatbotAgent.processQuery(
+          request.getQuery(),
+          request.getModel(),
+          request.getProvider(),
+          null));
+    } catch (RejectedExecutionException rej) {
+      LOG.warn("Chatbot is at capacity; rejecting request");
+      return Response.status(Response.Status.SERVICE_UNAVAILABLE)
+          .entity(Collections.singletonMap("error",
+              "Chatbot is busy. Please retry in a few moments."))
+          .build();
+    }
+
+    try {
+      // Bounded wait: caps how long a Jetty thread can be parked on the 
future.
+      String response = future.get(requestTimeoutMs, TimeUnit.MILLISECONDS);

Review Comment:
   future.get(requestTimeoutMs, ...) call still holds a Jetty thread for up to 
7 minutes (OZONE_RECON_CHATBOT_REQUEST_TIMEOUT_MS_DEFAULT = 420_000). With 
`poolSize=4 and queueCapacity=8`, up to 12 Jetty threads can be simultaneously 
blocked. My earlier suggestion of JAX-RS `@Suspended AsyncResponse` (or 
returning a streaming response) is the proper solution. At minimum, 
`requestTimeoutMs` should default to a much lower value (e.g., `90 second`s).



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/agent/ChatbotAgent.java:
##########
@@ -0,0 +1,809 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.recon.chatbot.agent;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.inject.Inject;
+import com.google.inject.Singleton;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.recon.chatbot.ChatbotConfigKeys;
+import org.apache.hadoop.ozone.recon.chatbot.llm.LLMClient;
+import org.apache.hadoop.ozone.recon.chatbot.llm.LLMClient.ChatMessage;
+import org.apache.hadoop.ozone.recon.chatbot.llm.LLMClient.LLMResponse;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Main chatbot agent that orchestrates the conversation flow.
+ * Handles tool selection (figuring out what API to call), executing those 
calls,
+ * and summarization (feeding the data back to the LLM to write a nice answer).
+ */
+@Singleton
+public class ChatbotAgent {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(ChatbotAgent.class);
+
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+
+  // A specific Recon API endpoint we want to handle carefully because it can 
return millions of rows.
+  private static final String LIST_KEYS_ENDPOINT_SUFFIX = "/keys/listKeys";
+
+  // The connection to Gemini/OpenAI
+  private final LLMClient llmClient;
+
+  // The hands that execute the internal API calls
+  private final ToolExecutor toolExecutor;
+
+  // The Cheat Sheet of all available APIs loaded from the .md file
+  private final String apiSchema;
+
+  // Max API calls we allow per question (so the LLM doesn't DOS our server)
+  private final int maxToolCalls;
+
+
+  private final String defaultModel;
+  private final int maxRecordsPerAnswer;
+  private final int maxPagesPerAnswer;
+  private final int pageSizePerCall;
+  private final boolean requireSafeScope;
+
+  @Inject
+  public ChatbotAgent(LLMClient llmClient,
+                      ToolExecutor toolExecutor,
+                      OzoneConfiguration configuration) {
+    this.llmClient = llmClient;
+    this.toolExecutor = toolExecutor;
+
+    // Read the Schema (Cheat Sheet) from the resources' folder.
+    this.apiSchema = loadApiSchema();
+
+    // Load all the safeguards and settings from ozone-site.xml
+    this.maxToolCalls = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_MAX_TOOL_CALLS,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_MAX_TOOL_CALLS_DEFAULT);
+    this.defaultModel = configuration.get(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_DEFAULT_MODEL,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_DEFAULT_MODEL_DEFAULT);
+    this.maxRecordsPerAnswer = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_RECORDS,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_RECORDS_DEFAULT);
+    this.maxPagesPerAnswer = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_PAGES,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_MAX_PAGES_DEFAULT);
+    this.pageSizePerCall = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_PAGE_SIZE,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_PAGE_SIZE_DEFAULT);
+    this.requireSafeScope = configuration.getBoolean(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_REQUIRE_SAFE_SCOPE,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_EXEC_REQUIRE_SAFE_SCOPE_DEFAULT);
+
+    LOG.info("ChatbotAgent initialized with model={}, maxRecords={}, " +
+            "maxPages={}, pageSize={}, requireSafeScope={}",
+        defaultModel, maxRecordsPerAnswer, maxPagesPerAnswer,
+        pageSizePerCall, requireSafeScope);
+  }
+
+  /**
+   * THE MAIN ENTRY POINT. Processes a user query and returns a response.
+   *
+   * @param userQuery the user's question
+   * @param model     the LLM model to use
+   * @param provider  explicit provider name (optional, e.g. "gemini", 
"openai")
+   * @param apiKey    the user's API key (optional)
+   * @return the chatbot response
+   */
+  public String processQuery(String userQuery, String model,
+                             String provider, String apiKey)
+      throws Exception {
+
+    // Safety check
+    if (userQuery == null || userQuery.trim().isEmpty()) {
+      throw new IllegalArgumentException("Query cannot be empty");
+    }
+
+    // Use default model if the user didn't specify one.
+    String effectiveModel = (model != null && !model.isEmpty()) ? model : 
defaultModel;
+
+    LOG.info("Processing query with model: {}, provider: {}", effectiveModel, 
provider == null ? "auto" : provider);
+
+    // STEP 1: Ask the LLM what API tools it wants to use to answer the 
question.
+    // NOTE: provider is passed as a method arg (NOT instance state) because
+    // ChatbotAgent is @Singleton — concurrent requests would race on a shared 
field.
+    ToolCall toolCall = getToolCall(userQuery, effectiveModel, provider, 
apiKey);
+
+    // If the LLM doesn't know what API to call...
+    if (toolCall == null) {
+      // No suitable endpoint found
+      LOG.info("Tool selection result: NO_SUITABLE_ENDPOINT; using fallback");
+      return handleFallback(userQuery, effectiveModel, provider, apiKey);
+    }
+
+    // If the user asked a general question (e.g. "What is Ozone?"), the LLM 
answers it directly without an API call.
+    if (toolCall.isDocumentationQuery()) {
+      LOG.info("Tool selection result: DOCUMENTATION_QUERY (no Recon API 
call)");
+      return toolCall.getAnswer();
+    }
+
+    // STEP 2: Execute the internal Recon API calls
+    Map<String, Object> apiResponses;
+    Map<String, Object> executionMetadata = new HashMap<>();
+
+    // Scenario A: LLM says we need to call MULTIPLE APIs to get the answer
+    if (toolCall.isMultipleEndpoints()) {
+
+      if (toolCall.getToolCalls() == null || 
toolCall.getToolCalls().isEmpty()) {
+        LOG.warn("LLM returned MULTI_ENDPOINT but no tool calls");
+        return handleFallback(userQuery, effectiveModel, provider, apiKey);
+      }
+      LOG.info("Tool selection result: MULTI_ENDPOINT count={}",
+          toolCall.getToolCalls().size());
+
+      // Check if the LLM asked for something dangerous (like scanning the 
whole cluster without a limit)
+      String clarification = 
buildClarificationForToolCalls(toolCall.getToolCalls());
+      if (clarification != null) {
+        LOG.info("Execution policy returned clarification for multi-endpoint " 
+
+            "request: {}", clarification);
+        return clarification;
+      }
+      for (ToolCall selected : toolCall.getToolCalls()) {
+        LOG.info("Selected Recon API: method={}, endpoint={}, paramKeys={}, 
reasoning={}",
+            selected.getMethod(),
+            selected.getEndpoint(),
+            selected.getParameters() == null ? "[]" : 
selected.getParameters().keySet(),
+            selected.getReasoning());
+      }
+
+      // Execute all the API calls securely
+      apiResponses = executeMultipleToolCalls(toolCall.getToolCalls(), 
executionMetadata);
+
+      // Scenario B: LLM says we only need ONE API call
+    } else {
+      if (toolCall.getEndpoint() == null || toolCall.getEndpoint().isEmpty()) {
+        LOG.warn("LLM returned SINGLE_ENDPOINT with empty endpoint");
+        return handleFallback(userQuery, effectiveModel, provider, apiKey);
+      }
+      LOG.info("Tool selection result: SINGLE_ENDPOINT method={}, endpoint={}, 
paramKeys={}, reasoning={}",
+          toolCall.getMethod(),
+          toolCall.getEndpoint(),
+          toolCall.getParameters() == null ? "[]" : 
toolCall.getParameters().keySet(),
+          toolCall.getReasoning());
+      String clarification = validateToolCallForExecution(toolCall);
+      if (clarification != null) {
+        LOG.info("Execution policy returned clarification for endpoint {}: {}",
+            toolCall.getEndpoint(), clarification);
+        return clarification;
+      }
+      // Go fetch the data using our ToolExecutor!
+      ToolExecutor.ToolExecutionOutcome outcome = 
toolExecutor.executeToolCallWithPolicy(
+          toolCall.getEndpoint(),
+          toolCall.getMethod(),
+          toolCall.getParameters(),
+          maxRecordsPerAnswer,
+          maxPagesPerAnswer,
+          pageSizePerCall);
+
+      // Save the raw JSON data the API returned
+      apiResponses = new HashMap<>();
+      apiResponses.put(toolCall.getEndpoint(), outcome.getResponseBody());
+      executionMetadata.put(toolCall.getEndpoint(),
+          createExecutionMetadataMap(outcome));
+    }
+
+    // STEP 3: Send the raw JSON data BACK to the LLM to format a nice answer
+    LOG.info("Summarization input prepared: endpointCount={}, endpoints={}",
+        apiResponses.size(), apiResponses.keySet());
+    return summarizeResponse(userQuery, apiResponses, executionMetadata, 
effectiveModel, provider, apiKey);
+  }
+
+  /**
+   * "Step 1" Helper: Talks to the LLM and asks for a JSON object telling us 
which API to call.
+   */
+  private ToolCall getToolCall(String userQuery, String model, String 
provider, String apiKey)
+      throws Exception {
+
+    // Build the "cheat sheet" prompt (includes the recon-api-guide.md)
+    String systemPrompt = buildToolSelectionPrompt();
+    String userPrompt = "User Query: " + userQuery;
+
+    List<ChatMessage> messages = new ArrayList<>();
+    messages.add(new ChatMessage("system", systemPrompt));
+    messages.add(new ChatMessage("user", userPrompt));
+
+    // Tuning the LLM: Temperature 0.1 means we want it to be very strict and 
robotic, not creative.
+    Map<String, Object> parameters = new HashMap<>();
+    parameters.put("temperature", 0.1);
+    parameters.put("max_tokens", 8192);
+    // Key MUST match what LLMDispatcher.chatCompletion() reads ("provider").
+    if (provider != null && !provider.isEmpty()) {
+      parameters.put("provider", provider);
+    }
+
+    // Send the request to the LLM
+    LLMResponse response = llmClient.chatCompletion(messages, model, apiKey, 
parameters);
+
+    LOG.info("Tool selection LLM response: model={}, promptTokens={}, 
completionTokens={}, totalTokens={}",
+        response.getModel(),
+        response.getPromptTokens(),
+        response.getCompletionTokens(),
+        response.getTotalTokens());
+
+    String content = response.getContent().trim();
+
+    if (content.contains("NO_SUITABLE_ENDPOINT")) {
+      return null;
+    }
+
+    // Extract the first top-level JSON object from the response. We can't use
+    // a greedy regex like \{.*\} because LLMs sometimes emit multiple JSON
+    // fragments (reasoning + tool call), and a non-greedy regex breaks on
+    // nested objects. Walking the brace depth handles both cases correctly.
+    String jsonStr = extractFirstJsonObject(content);
+    if (jsonStr == null) {
+      LOG.warn("No JSON found in LLM response");
+      return null;
+    }
+
+    JsonNode jsonNode = MAPPER.readTree(jsonStr);
+    return parseToolCall(jsonNode);
+  }
+
+  /**
+   * Extracts the first balanced top-level JSON object from a string by
+   * walking brace depth. Properly skips braces inside string literals and
+   * handles escape sequences. Returns null if no valid JSON object found.
+   */
+  static String extractFirstJsonObject(String content) {
+    if (content == null) {
+      return null;
+    }
+    int start = content.indexOf('{');
+    if (start < 0) {
+      return null;
+    }
+    int depth = 0;
+    boolean inString = false;
+    boolean escape = false;
+    for (int i = start; i < content.length(); i++) {
+      char c = content.charAt(i);
+      if (escape) {
+        escape = false;
+        continue;
+      }
+      if (inString) {
+        if (c == '\\') {
+          escape = true;
+        } else if (c == '"') {
+          inString = false;
+        }
+        continue;
+      }
+      if (c == '"') {
+        inString = true;
+      } else if (c == '{') {
+        depth++;
+      } else if (c == '}') {
+        depth--;
+        if (depth == 0) {
+          return content.substring(start, i + 1);
+        }
+      }
+    }
+    return null;
+  }
+
+  /**
+   * Executes multiple tool calls.
+   */
+  private Map<String, Object> executeMultipleToolCalls(
+      List<ToolCall> toolCalls, Map<String, Object> executionMetadata) {
+    Map<String, Object> responses = new HashMap<>();
+
+    for (int i = 0; i < toolCalls.size(); i++) {
+      ToolCall toolCall = toolCalls.get(i);
+      String responseKey = buildResponseKey(toolCall, i, toolCalls.size());
+      try {
+        LOG.info("Executing Recon API call: method={}, endpoint={}", 
toolCall.getMethod(), toolCall.getEndpoint());
+        ToolExecutor.ToolExecutionOutcome outcome = 
toolExecutor.executeToolCallWithPolicy(
+            toolCall.getEndpoint(),
+            toolCall.getMethod(),
+            toolCall.getParameters(),
+            maxRecordsPerAnswer,
+            maxPagesPerAnswer,
+            pageSizePerCall);
+        responses.put(responseKey, outcome.getResponseBody());
+        executionMetadata.put(responseKey, 
createExecutionMetadataMap(outcome));
+        LOG.info("Recon API call completed: endpoint={}, records={}, pages={}, 
truncated={}",
+            toolCall.getEndpoint(),
+            outcome.getRecordsProcessed(),
+            outcome.getPagesFetched(),
+            outcome.isTruncated());
+      } catch (Exception e) {
+        LOG.error("Tool call failed for endpoint: {}", toolCall.getEndpoint(), 
e);
+        Map<String, Object> errorMap = new HashMap<>();
+        errorMap.put("error", e.getMessage());
+        responses.put(responseKey, errorMap);
+        Map<String, Object> errorMeta = new HashMap<>();
+        errorMeta.put("error", e.getMessage());
+        errorMeta.put("truncated", false);
+        executionMetadata.put(responseKey, errorMeta);
+      }
+    }
+
+    return responses;
+  }
+
+  /**
+   * "Step 3" Helper: Takes the raw JSON API data and asks the LLM to write a 
sentence about it.
+   */
+  private String summarizeResponse(String userQuery,
+                                   Map<String, Object> apiResponses,
+                                   Map<String, Object> executionMetadata,
+                                   String model, String provider, String 
apiKey)
+      throws Exception {
+
+    // Give the LLM a new set of rules
+    String systemPrompt = buildSummarizationPrompt();
+    // Stitch the raw JSON strings and the user's original question together
+    String userPrompt = buildSummarizationUserPrompt(userQuery, apiResponses, 
executionMetadata);
+
+    List<ChatMessage> messages = new ArrayList<>();
+    messages.add(new ChatMessage("system", systemPrompt));
+    messages.add(new ChatMessage("user", userPrompt));
+
+    // Temperature 0.3 allows a tiny bit more natural/human-like language 
creativity.
+    Map<String, Object> parameters = new HashMap<>();
+    parameters.put("temperature", 0.3);
+    parameters.put("max_tokens", 2000);
+    if (provider != null && !provider.isEmpty()) {
+      parameters.put("provider", provider);
+    }
+
+    // Send the request to the LLM
+    LLMResponse response = llmClient.chatCompletion(messages, model, apiKey, 
parameters);
+
+    LOG.info("Summarization LLM response: model={}, promptTokens={}, " +
+            "completionTokens={}, totalTokens={}",
+        response.getModel(),
+        response.getPromptTokens(),
+        response.getCompletionTokens(),
+        response.getTotalTokens());
+
+    return response.getContent();
+  }
+
+  /**
+   * Helper: If the user asks "What is the meaning of life?", we use this to 
say
+   * "Sorry, I only know about Hadoop."
+   */
+  private String handleFallback(String userQuery, String model, String 
provider, String apiKey)
+      throws Exception {
+    String prompt = String.format(
+        "The user asked: \"%s\"\n\n" +
+            "This question cannot be answered using the available " +
+            "Ozone Recon API endpoints.\n\n" +
+            "Provide a helpful response that:\n" +
+            "1. Politely explains that you can only answer questions about " +
+            "Ozone Recon cluster data\n" +
+            "2. Briefly mentions the types of information you can provide " +
+            "(containers, keys, datanodes, pipelines, cluster state, etc.)\n" +
+            "3. Suggests how they might rephrase their question if it's 
related to Ozone\n\n" +
+            "Keep the response friendly and concise.",
+        userQuery);
+
+    List<ChatMessage> messages = new ArrayList<>();
+    messages.add(new ChatMessage("user", prompt));
+
+    Map<String, Object> parameters = new HashMap<>();
+    parameters.put("temperature", 0.5);
+    parameters.put("max_tokens", 500);
+    if (provider != null && !provider.isEmpty()) {
+      parameters.put("provider", provider);
+    }
+
+    LLMResponse response = llmClient.chatCompletion(
+        messages, model, apiKey, parameters);
+
+    return response.getContent();
+  }
+
+  /**
+   * Creates the master rules (System Prompt) we send to the LLM during Step 1.
+   * Notice how we teach the LLM exactly what JSON to output!
+   */
+  private String buildToolSelectionPrompt() {
+    return "You are an expert on Apache Ozone Recon, a service that provides 
insights into Ozone cluster data.\n\n" +
+        "Your task is to analyze user queries and determine the appropriate 
response:\n\n" +
+        "1. **For DATA queries** (asking for current cluster information): 
Identify the most appropriate API endpoint(s) to call\n" +
+        "2. **For DOCUMENTATION queries** (asking about API use cases, 
purposes, or capabilities): Respond with a DOCUMENTATION_QUERY and provide the 
information directly\n\n" +
+        "IMPORTANT: If the user's question requires data from MULTIPLE API 
endpoints to give a complete answer, return ALL needed endpoints in an 
array.\n\n" +
+        "For SINGLE endpoint DATA queries, return this JSON format:\n" +
+        "{\n" +
+        "  \"endpoint\": \"/api/v1/path\",\n" +
+        "  \"method\": \"GET\",\n" +
+        "  \"parameters\": {},\n" +
+        "  \"reasoning\": \"Brief explanation of why this endpoint was 
chosen\"\n" +
+        "}\n\n" +
+        "For MULTIPLE endpoint DATA queries, return this JSON format:\n" +
+        "{\n" +
+        "  \"tool_calls\": [\n" +
+        "    { \"endpoint\": \"/api/v1/path1\", \"method\": \"GET\", 
\"parameters\": {}, \"reasoning\": \"Explain what data this provides\" },\n" +
+        "    { \"endpoint\": \"/api/v1/path2\", \"method\": \"GET\", 
\"parameters\": {}, \"reasoning\": \"Explain what data this provides\" }\n" +
+        "  ],\n" +
+        "  \"requires_multiple_calls\": true\n" +
+        "}\n\n" +
+        "Examples requiring MULTIPLE endpoints:\n" +
+        "- \"How many total keys and how many are open?\" -> /clusterState + 
/keys/open/summary\n" +
+        "- \"Show datanodes and pipeline status\" -> /datanodes + 
/pipelines\n" +
+        "- \"List unhealthy and missing containers\" -> /containers/unhealthy 
+ /containers/missing\n" +
+        "- \"Cluster state and open keys summary\" -> /clusterState + 
/keys/open/summary\n\n" +
+        "For DOCUMENTATION queries, return this JSON format:\n" +
+        "{\n" +
+        "  \"type\": \"DOCUMENTATION_QUERY\",\n" +
+        "  \"answer\": \"Direct answer based on the API guide\",\n" +
+        "  \"reasoning\": \"Explanation of what documentation was 
referenced\"\n" +
+        "}\n\n" +
+        "If the query cannot be answered by any available API endpoint OR 
documentation, respond with: NO_SUITABLE_ENDPOINT\n\n" +
+        "Safety rules:\n" +
+        "- Do not invent parameter values.\n" +
+        "- For /keys/listKeys, always provide startPrefix with at least " +
+        "/<volume>/<bucket> scope when selecting this tool.\n\n" +
+        "API Specification:\n" + apiSchema;
+  }
+
+  /**
+   * Builds the system prompt for response summarization.
+   */
+  private String buildSummarizationPrompt() {
+    return "You are an expert on Apache Ozone Recon data analysis.\n\n" +

Review Comment:
   Same comment: move to resource files



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/llm/AnthropicClient.java:
##########
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.recon.chatbot.llm;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.recon.chatbot.ChatbotConfigKeys;
+import org.apache.hadoop.ozone.recon.chatbot.security.CredentialHelper;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Direct client for Anthropic Claude models using Composition.
+ */
+public class AnthropicClient implements LLMClient {
+
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+  private static final String ANTHROPIC_VERSION = "2023-06-01";

Review Comment:
   Why this is hardcoded version always ?



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/api/ChatbotEndpoint.java:
##########
@@ -0,0 +1,341 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.recon.chatbot.api;
+
+import javax.inject.Inject;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.recon.chatbot.ChatbotConfigKeys;
+import org.apache.hadoop.ozone.recon.chatbot.agent.ChatbotAgent;
+import org.apache.hadoop.ozone.recon.chatbot.llm.LLMClient;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.ws.rs.Consumes;
+import javax.ws.rs.GET;
+import javax.ws.rs.POST;
+import javax.ws.rs.Path;
+import javax.ws.rs.Produces;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+import java.util.concurrent.RejectedExecutionException;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * REST API endpoint for the Recon Chatbot.
+ *
+ * <p>
+ * API keys are managed via JCEKS (admin-configured),
+ * so there are no per-user key storage endpoints.
+ * </p>
+ */
+@Path("/chatbot")
+@Produces(MediaType.APPLICATION_JSON)
+public class ChatbotEndpoint {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(ChatbotEndpoint.class);
+
+  private final ChatbotAgent chatbotAgent;
+  private final LLMClient llmClient;
+  private final OzoneConfiguration configuration;
+
+  // Bounded executor isolates chatbot work from Jetty's main thread pool.
+  // Daemon threads → no explicit shutdown needed; they die with the JVM.
+  private final ThreadPoolExecutor chatbotExecutor;
+  private final long requestTimeoutMs;
+
+  @Inject
+  public ChatbotEndpoint(ChatbotAgent chatbotAgent,
+      LLMClient llmClient,
+      OzoneConfiguration configuration) {
+    this.chatbotAgent = chatbotAgent;
+    this.llmClient = llmClient;
+    this.configuration = configuration;
+
+    int poolSize = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_THREAD_POOL_SIZE,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_THREAD_POOL_SIZE_DEFAULT);
+    int queueCapacity = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_QUEUE_CAPACITY,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_QUEUE_CAPACITY_DEFAULT);
+    this.requestTimeoutMs = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_REQUEST_TIMEOUT_MS,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_REQUEST_TIMEOUT_MS_DEFAULT);
+
+    // Fixed-size pool + bounded queue + AbortPolicy.
+    // When pool is busy AND queue is full → new submissions throw
+    // RejectedExecutionException, which we map to HTTP 503. This is what
+    // protects the Jetty pool: chatbot saturation can never spill over.
+    AtomicInteger threadCounter = new AtomicInteger();
+    ThreadFactory threadFactory = r -> {
+      Thread t = new Thread(r, "recon-chatbot-" + 
threadCounter.incrementAndGet());
+      t.setDaemon(true);
+      return t;
+    };
+    this.chatbotExecutor = new ThreadPoolExecutor(
+        poolSize, poolSize,
+        0L, TimeUnit.MILLISECONDS,
+        new ArrayBlockingQueue<>(queueCapacity),
+        threadFactory,
+        new ThreadPoolExecutor.AbortPolicy());
+
+    LOG.info("ChatbotEndpoint initialized: poolSize={}, queueCapacity={}, 
requestTimeoutMs={}",
+        poolSize, queueCapacity, requestTimeoutMs);
+  }
+
+  /**
+   * Checks if the chatbot is enabled in configuration.
+   */
+  private boolean isChatbotEnabled() {
+    return configuration.getBoolean(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_ENABLED,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_ENABLED_DEFAULT);
+  }
+
+  /**
+   * Health check endpoint.
+   */
+  @GET
+  @Path("/health")
+  public Response health() {
+    Map<String, Object> response = new HashMap<>();
+    boolean enabled = isChatbotEnabled();
+    response.put("enabled", enabled);
+    response.put("llmClientAvailable",
+        enabled && llmClient != null && llmClient.isAvailable());
+    return Response.ok(response).build();
+  }
+
+  /**
+   * Chat endpoint - processes a user query.
+   */
+  @POST
+  @Path("/chat")
+  @Consumes(MediaType.APPLICATION_JSON)
+  public Response chat(ChatRequest request) {
+
+    // Safety check 1: If chatbot is disabled, throw a 503 Service Unavailable 
error immediately.
+    if (!isChatbotEnabled()) {
+      return Response.status(Response.Status.SERVICE_UNAVAILABLE)
+          .entity(Collections.singletonMap("error", "Chatbot service is not 
enabled"))
+          .build();
+    }
+
+    // Safety check 2: If the user didn't really ask a question, throw a 400 
Bad Request.
+    if (request.getQuery() == null || request.getQuery().trim().isEmpty()) {
+      return Response.status(Response.Status.BAD_REQUEST)
+          .entity(Collections.singletonMap("error", "Query cannot be empty"))
+          .build();
+    }
+
+    LOG.info("Chat request: userId={}, model={}, provider={}",
+        sanitizeUserId(request.getUserId()),
+        request.getModel() == null ? "default" : request.getModel(),
+        request.getProvider() == null ? "auto" : request.getProvider());
+
+    // Submit the heavy work (LLM calls + Recon API calls) to the chatbot's
+    // bounded executor instead of running it on the Jetty request thread.
+    // If the pool + queue are full, AbortPolicy throws immediately and we
+    // return 503 — Jetty pool stays free for the rest of Recon.
+    Future<String> future;
+    try {
+      future = chatbotExecutor.submit(() -> chatbotAgent.processQuery(
+          request.getQuery(),
+          request.getModel(),
+          request.getProvider(),
+          null));

Review Comment:
   Passing `apiKey` always as null, then why to keep it as an argument ? Better 
remove the `apiKey` parameter from `processQuery` entirely since key resolution 
is delegated to `CredentialHelper`



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconControllerModule.java:
##########
@@ -131,6 +130,7 @@ protected void configure() {
     install(new ReconOmTaskBindingModule());
     install(new ReconDaoBindingModule());
     bind(ReconTaskStatusUpdaterManager.class).in(Singleton.class);
+    install(new ChatbotModule());

Review Comment:
   this chatbot module is installed even when chatbot is not enabled based on 
`ozone.recon.chatbot.enabled` flag. The Guice module still binds and 
instantiates `LLMDispatcher`, `CredentialHelper`, `ChatbotAgent`, and 
`ToolExecutor`. This means startup-time failures (e.g., a bad credential 
provider path) will break Recon even when the chatbot is intentionally disabled.



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/security/LLMClient2.java:
##########
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.recon.chatbot.security;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * LLMClient is the "Master Contract" for the whole Chatbot system.
+ * 
+ * Purpose:
+ * The ChatbotAgent doesn't know (or care) if it's talking to OpenAI, Gemini, 
or a Local LLM.
+ * It strictly relies on this interface. This interface forces every AI client 
to guarantee
+ * that they will accept exactly the same input and return exactly the same 
output.
+ * 
+ * By using this contract, we can add 10 new AI models to Recon tomorrow,
+ * and we will never have to edit the ChatbotAgent's code to support them!
+ */
+public interface LLMClient2 {

Review Comment:
   Yes this is a dead code. Must remove.



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/api/ChatbotEndpoint.java:
##########
@@ -0,0 +1,341 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.recon.chatbot.api;
+
+import javax.inject.Inject;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.recon.chatbot.ChatbotConfigKeys;
+import org.apache.hadoop.ozone.recon.chatbot.agent.ChatbotAgent;
+import org.apache.hadoop.ozone.recon.chatbot.llm.LLMClient;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.ws.rs.Consumes;
+import javax.ws.rs.GET;
+import javax.ws.rs.POST;
+import javax.ws.rs.Path;
+import javax.ws.rs.Produces;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+import java.util.concurrent.RejectedExecutionException;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * REST API endpoint for the Recon Chatbot.
+ *
+ * <p>
+ * API keys are managed via JCEKS (admin-configured),
+ * so there are no per-user key storage endpoints.
+ * </p>
+ */
+@Path("/chatbot")
+@Produces(MediaType.APPLICATION_JSON)
+public class ChatbotEndpoint {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(ChatbotEndpoint.class);
+
+  private final ChatbotAgent chatbotAgent;
+  private final LLMClient llmClient;
+  private final OzoneConfiguration configuration;
+
+  // Bounded executor isolates chatbot work from Jetty's main thread pool.
+  // Daemon threads → no explicit shutdown needed; they die with the JVM.
+  private final ThreadPoolExecutor chatbotExecutor;
+  private final long requestTimeoutMs;
+
+  @Inject
+  public ChatbotEndpoint(ChatbotAgent chatbotAgent,
+      LLMClient llmClient,
+      OzoneConfiguration configuration) {
+    this.chatbotAgent = chatbotAgent;
+    this.llmClient = llmClient;
+    this.configuration = configuration;
+
+    int poolSize = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_THREAD_POOL_SIZE,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_THREAD_POOL_SIZE_DEFAULT);
+    int queueCapacity = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_QUEUE_CAPACITY,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_QUEUE_CAPACITY_DEFAULT);
+    this.requestTimeoutMs = configuration.getInt(
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_REQUEST_TIMEOUT_MS,
+        ChatbotConfigKeys.OZONE_RECON_CHATBOT_REQUEST_TIMEOUT_MS_DEFAULT);
+
+    // Fixed-size pool + bounded queue + AbortPolicy.
+    // When pool is busy AND queue is full → new submissions throw
+    // RejectedExecutionException, which we map to HTTP 503. This is what
+    // protects the Jetty pool: chatbot saturation can never spill over.
+    AtomicInteger threadCounter = new AtomicInteger();
+    ThreadFactory threadFactory = r -> {
+      Thread t = new Thread(r, "recon-chatbot-" + 
threadCounter.incrementAndGet());
+      t.setDaemon(true);
+      return t;
+    };
+    this.chatbotExecutor = new ThreadPoolExecutor(
+        poolSize, poolSize,
+        0L, TimeUnit.MILLISECONDS,
+        new ArrayBlockingQueue<>(queueCapacity),
+        threadFactory,
+        new ThreadPoolExecutor.AbortPolicy());
+
+    LOG.info("ChatbotEndpoint initialized: poolSize={}, queueCapacity={}, 
requestTimeoutMs={}",
+        poolSize, queueCapacity, requestTimeoutMs);
+  }
+
+  /**
+   * Checks if the chatbot is enabled in configuration.
+   */
+  private boolean isChatbotEnabled() {

Review Comment:
   This shouldn't be defined here. It should be used as utility method in some 
utility class to not instantiate and install guice bindings for chatbot related 
classes.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] HDDS-14816. Add Recon AI Assistant backend foundation with Multi LLM integration [ozone]

Reply via email to