This is an automated email from the ASF dual-hosted git repository.

saadurrahman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-heron.git


The following commit(s) were added to refs/heads/master by this push:
     new 3fdf1f86071 Fix HealthManager (#3819)
3fdf1f86071 is described below

commit 3fdf1f8607183d6fcd687bf3a4557537a5bad79b
Author: choi se <[email protected]>
AuthorDate: Thu Apr 28 03:25:47 2022 +0900

    Fix HealthManager (#3819)
    
    Bug fixes in the HealthManager endpoint.
---
 .../org/apache/heron/healthmgr/HealthManager.java  |  2 --
 heron/tools/tracker/src/python/BUILD               |  2 +-
 heron/tools/tracker/src/python/app.py              |  6 +++-
 heron/tools/tracker/src/python/metricstimeline.py  | 11 ++++++++
 heron/tools/tracker/src/python/query_operators.py  |  7 +++--
 heron/tools/tracker/src/python/routers/metrics.py  | 33 +++++++++++++++++-----
 heron/tools/tracker/src/python/topology.py         |  8 ++----
 heron/tools/tracker/src/python/utils.py            |  4 +--
 heron/tools/ui/resources/static/js/topologies.js   |  2 +-
 heron/tools/ui/src/python/main.py                  |  7 ++++-
 10 files changed, 60 insertions(+), 22 deletions(-)

diff --git 
a/heron/healthmgr/src/java/org/apache/heron/healthmgr/HealthManager.java 
b/heron/healthmgr/src/java/org/apache/heron/healthmgr/HealthManager.java
index dd0c7f7af09..f41d516fd3f 100644
--- a/heron/healthmgr/src/java/org/apache/heron/healthmgr/HealthManager.java
+++ b/heron/healthmgr/src/java/org/apache/heron/healthmgr/HealthManager.java
@@ -48,7 +48,6 @@ import org.apache.commons.cli.HelpFormatter;
 import org.apache.commons.cli.Option;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
-import org.apache.heron.api.utils.Slf4jUtils;
 import org.apache.heron.classification.InterfaceStability.Evolving;
 import org.apache.heron.classification.InterfaceStability.Unstable;
 import org.apache.heron.common.basics.SingletonRegistry;
@@ -153,7 +152,6 @@ public class HealthManager {
   }
 
   public static void main(String[] args) throws Exception {
-    Slf4jUtils.installSLF4JBridge();
     CommandLineParser parser = new DefaultParser();
     Options slaManagerCliOptions = constructCliOptions();
 
diff --git a/heron/tools/tracker/src/python/BUILD 
b/heron/tools/tracker/src/python/BUILD
index 947e4695337..ccc581dd8b2 100644
--- a/heron/tools/tracker/src/python/BUILD
+++ b/heron/tools/tracker/src/python/BUILD
@@ -10,7 +10,7 @@ pex_library(
         "click==7.1.2",
         "fastapi==0.75.0",
         "httpx==0.16.1",
-        "javaobj-py3==0.4.1",
+        "javaobj-py3==0.4.3",
         "networkx==2.5",
         "protobuf==3.16.0",
         "uvicorn==0.11.7",
diff --git a/heron/tools/tracker/src/python/app.py 
b/heron/tools/tracker/src/python/app.py
index 3c6e6f408f8..94ad43f1b99 100644
--- a/heron/tools/tracker/src/python/app.py
+++ b/heron/tools/tracker/src/python/app.py
@@ -28,7 +28,7 @@ from typing import Dict, List, Optional
 
 from fastapi import FastAPI, Query, Request
 from fastapi.exceptions import RequestValidationError
-from fastapi.responses import JSONResponse
+from fastapi.responses import JSONResponse, PlainTextResponse
 from starlette.exceptions import HTTPException as StarletteHTTPException
 
 from heron.tools.tracker.src.python import constants, state, query
@@ -128,3 +128,7 @@ async def get_machines(
     ] = topology.get_machines()
 
   return response
+
[email protected]("/health", response_class=PlainTextResponse)
+def healthcheck():
+  return "ok"
diff --git a/heron/tools/tracker/src/python/metricstimeline.py 
b/heron/tools/tracker/src/python/metricstimeline.py
index 5bfb0e9d519..5e55cd4abcf 100644
--- a/heron/tools/tracker/src/python/metricstimeline.py
+++ b/heron/tools/tracker/src/python/metricstimeline.py
@@ -39,6 +39,17 @@ class MetricsTimeline(BaseModel):
       description="map of (metric name, instance, start) to metric value",
   )
 
+
+class LegacyMetricsTimeline(BaseModel):
+  component: str
+  starttime: int
+  endtime: int
+  timeline: Dict[str, Dict[str, Dict[int, str]]] = Field(
+      ...,
+      description="map of (metric name, instance, start) to metric value",
+  )
+
+
 # pylint: disable=too-many-locals, too-many-branches, unused-argument
 async def get_metrics_timeline(
     tmanager: tmanager_pb2.TManagerLocation,
diff --git a/heron/tools/tracker/src/python/query_operators.py 
b/heron/tools/tracker/src/python/query_operators.py
index 68a005a3e37..6d4ce830552 100644
--- a/heron/tools/tracker/src/python/query_operators.py
+++ b/heron/tools/tracker/src/python/query_operators.py
@@ -460,7 +460,7 @@ class _SimpleArithmaticOperator(Operator):
       for key, metric in metrics2.items():
         # Initialize with first metrics timeline, but second metric's instance
         # because that is multivariate
-        if metrics:
+        if "" in metrics:
           met = Metrics(None, None, metric.instance, start, end, 
metrics[""].timeline.copy())
           for timestamp in list(met.timeline.keys()):
             v = self._f(met.timeline[timestamp], 
metric.timeline.get(timestamp))
@@ -477,7 +477,10 @@ class _SimpleArithmaticOperator(Operator):
       # Initialize with first metrics timeline and its instance
       met = Metrics(None, None, metric.instance, start, end, 
metric.timeline.copy())
       for timestamp in list(met.timeline.keys()):
-        v = self._f(met.timeline[timestamp], 
metrics2[""].timeline.get(timestamp))
+        met2_value = None
+        if "" in metrics2:
+          met2_value = metrics2[""].timeline.get(timestamp)
+        v = self._f(met.timeline[timestamp], met2_value)
         if v is None:
           met.timeline.pop(timestamp, None)
         else:
diff --git a/heron/tools/tracker/src/python/routers/metrics.py 
b/heron/tools/tracker/src/python/routers/metrics.py
index 5e50c4bd1d6..7e5ff3c97ab 100644
--- a/heron/tools/tracker/src/python/routers/metrics.py
+++ b/heron/tools/tracker/src/python/routers/metrics.py
@@ -79,7 +79,7 @@ async def get_component_metrics(
   if metric_response.status.status == common_pb2.NOTOK:
     if metric_response.status.HasField("message"):
       Log.warn(
-          "Recieved response from Tmanager: %s", metric_response.status.message
+          "Received response from Tmanager: %s", metric_response.status.message
       )
 
   metrics = {}
@@ -119,8 +119,6 @@ async def get_metrics( # pylint: disable=too-many-arguments
   )
 
 
[email protected]("/metricstimeline", response_model=metricstimeline.MetricsTimeline,
-    deprecated=True)
 @router.get("/metrics/timeline", 
response_model=metricstimeline.MetricsTimeline)
 async def get_metrics_timeline( # pylint: disable=too-many-arguments
     cluster: str,
@@ -134,7 +132,6 @@ async def get_metrics_timeline( # pylint: 
disable=too-many-arguments
     instances: Optional[List[str]] = Query(None, alias="instance"),
 ):
   """
-  '/metricstimeline' 0.20.5 below.
   '/metrics/timeline' 0.20.5 above.
   Return metrics over the given interval.
   """
@@ -146,6 +143,31 @@ async def get_metrics_timeline( # pylint: 
disable=too-many-arguments
   )
 
 
[email protected]("/metricstimeline", 
response_model=metricstimeline.LegacyMetricsTimeline,
+    deprecated=True)
+async def get_legacy_metrics_timeline(  # pylint: disable=too-many-arguments
+    cluster: str,
+    environ: str,
+    component: str,
+    start_time: int = Query(..., alias="starttime"),
+    end_time: int = Query(..., alias="endtime"),
+    role: Optional[str] = None,
+    topology_name: str = Query(..., alias="topology"),
+    metric_names: Optional[List[str]] = Query(None, alias="metricname"),
+    instances: Optional[List[str]] = Query(None, alias="instance"),
+):
+  """
+  '/metricstimeline' 0.20.5 below.
+  Return metrics over the given interval.
+  """
+  if start_time > end_time:
+    raise BadRequest("start_time > end_time")
+  topology = state.tracker.get_topology(cluster, role, environ, topology_name)
+  return await metricstimeline.get_metrics_timeline(
+      topology.tmanager, component, metric_names, instances, start_time, 
end_time
+  )
+
+
 class TimelinePoint(BaseModel): # pylint: disable=too-few-public-methods
   """A metric at discrete points in time."""
   instance: Optional[str] = Field(
@@ -163,8 +185,6 @@ class MetricsQueryResponse(BaseModel): # pylint: 
disable=too-few-public-methods
       ..., description="list of timeline point objects",
   )
 
[email protected]("/metricsquery", response_model=MetricsQueryResponse,
-    deprecated=True)
 @router.get("/metrics/query", response_model=MetricsQueryResponse)
 async def get_metrics_query( # pylint: disable=too-many-arguments
     cluster: str,
@@ -176,7 +196,6 @@ async def get_metrics_query( # pylint: 
disable=too-many-arguments
     topology_name: str = Query(..., alias="topology"),
 ) -> MetricsQueryResponse:
   """
-  '/metricsquery' 0.20.5 below.
   '/metrics/query' 0.20.5 above.
   Run a metrics query against a particular topology.
   """
diff --git a/heron/tools/tracker/src/python/topology.py 
b/heron/tools/tracker/src/python/topology.py
index 140242974ec..89a6ac6409c 100644
--- a/heron/tools/tracker/src/python/topology.py
+++ b/heron/tools/tracker/src/python/topology.py
@@ -66,6 +66,7 @@ class TopologyInfoExecutionState(TopologyInfoMetadata):
   has_packing_plan: bool
   has_tmanager_location: bool
   has_scheduler_location: bool
+  status: str
 
 class RuntimeStateStatemanager(BaseModel):
   is_registered: bool
@@ -551,11 +552,8 @@ class Topology:
     return TopologyInfoSchedulerLocation(
         name=scheduler_location.topology_name,
         http_endpoint=scheduler_location.http_endpoint,
-        job_page_link=(
-            scheduler_location.job_page_link[0]
-            if scheduler_location.job_page_link
-            else ""
-        ),
+        job_page_link=scheduler_location.job_page_link \
+            if scheduler_location.job_page_link else "",
     )
 
   @staticmethod
diff --git a/heron/tools/tracker/src/python/utils.py 
b/heron/tools/tracker/src/python/utils.py
index 50553baa4fc..723cd4b6a50 100644
--- a/heron/tools/tracker/src/python/utils.py
+++ b/heron/tools/tracker/src/python/utils.py
@@ -214,10 +214,10 @@ def convert_pb_kvs(kvs, include_non_primitives=True) -> 
dict:
 def _convert_java_value(kv, include_non_primitives=True):
   try:
     pobj = javaobj.loads(kv.serialized_value)
-    if isinstance(pobj, str):
+    if isinstance(pobj, (str, int, float, bool)):
       return pobj
 
-    if isinstance(pobj, 
javaobj.transformers.DefaultObjectTransformer.JavaPrimitiveClass):
+    if hasattr(pobj, 'value'):
       return pobj.value
 
     if include_non_primitives:
diff --git a/heron/tools/ui/resources/static/js/topologies.js 
b/heron/tools/ui/resources/static/js/topologies.js
index 7ed9b8adb0c..8435bf11414 100644
--- a/heron/tools/ui/resources/static/js/topologies.js
+++ b/heron/tools/ui/resources/static/js/topologies.js
@@ -994,7 +994,7 @@ var BoltRunningInfo = React.createClass({
               var latencyMetrics = 
metrics[boltName][time][executeLatencyMetricName][stream].metrics;
               // For each intance
               for (var m in countMetrics) {
-                if (countMetrics.hasOwnProperty(m) && 
latencyMetrics.hasOwnProperty(m)) {
+                if (countMetrics.hasOwnProperty(m) && latencyMetrics && 
latencyMetrics.hasOwnProperty(m)) {
                   var count = Number(countMetrics[m]) / 
(metrics[boltName][time][executeCountMetricName][stream].scaleDevisor || 1);
                   var latency = Number(latencyMetrics[m]) / 
(metrics[boltName][time][executeLatencyMetricName][stream].scaleDevisor || 1);
                   var utilization = count * latency;
diff --git a/heron/tools/ui/src/python/main.py 
b/heron/tools/ui/src/python/main.py
index c4b3dc6b704..a4422e990f0 100644
--- a/heron/tools/ui/src/python/main.py
+++ b/heron/tools/ui/src/python/main.py
@@ -38,7 +38,7 @@ import requests
 import uvicorn
 
 from fastapi import APIRouter, FastAPI, Query, Request
-from fastapi.responses import HTMLResponse
+from fastapi.responses import HTMLResponse, PlainTextResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from starlette.responses import RedirectResponse, Response
@@ -614,6 +614,11 @@ def histogram_snippet(
   )
 
 
[email protected]("/health", response_class=PlainTextResponse)
+def healthcheck():
+  return "ok"
+
+
 app.include_router(topologies_router, prefix="/topologies")
 app.mount(
     "/static",

Reply via email to