This is an automated email from the ASF dual-hosted git repository.
saadurrahman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-heron.git
The following commit(s) were added to refs/heads/master by this push:
new 3fdf1f86071 Fix HealthManager (#3819)
3fdf1f86071 is described below
commit 3fdf1f8607183d6fcd687bf3a4557537a5bad79b
Author: choi se <[email protected]>
AuthorDate: Thu Apr 28 03:25:47 2022 +0900
Fix HealthManager (#3819)
Bug fixes in the HealthManager endpoint.
---
.../org/apache/heron/healthmgr/HealthManager.java | 2 --
heron/tools/tracker/src/python/BUILD | 2 +-
heron/tools/tracker/src/python/app.py | 6 +++-
heron/tools/tracker/src/python/metricstimeline.py | 11 ++++++++
heron/tools/tracker/src/python/query_operators.py | 7 +++--
heron/tools/tracker/src/python/routers/metrics.py | 33 +++++++++++++++++-----
heron/tools/tracker/src/python/topology.py | 8 ++----
heron/tools/tracker/src/python/utils.py | 4 +--
heron/tools/ui/resources/static/js/topologies.js | 2 +-
heron/tools/ui/src/python/main.py | 7 ++++-
10 files changed, 60 insertions(+), 22 deletions(-)
diff --git
a/heron/healthmgr/src/java/org/apache/heron/healthmgr/HealthManager.java
b/heron/healthmgr/src/java/org/apache/heron/healthmgr/HealthManager.java
index dd0c7f7af09..f41d516fd3f 100644
--- a/heron/healthmgr/src/java/org/apache/heron/healthmgr/HealthManager.java
+++ b/heron/healthmgr/src/java/org/apache/heron/healthmgr/HealthManager.java
@@ -48,7 +48,6 @@ import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
-import org.apache.heron.api.utils.Slf4jUtils;
import org.apache.heron.classification.InterfaceStability.Evolving;
import org.apache.heron.classification.InterfaceStability.Unstable;
import org.apache.heron.common.basics.SingletonRegistry;
@@ -153,7 +152,6 @@ public class HealthManager {
}
public static void main(String[] args) throws Exception {
- Slf4jUtils.installSLF4JBridge();
CommandLineParser parser = new DefaultParser();
Options slaManagerCliOptions = constructCliOptions();
diff --git a/heron/tools/tracker/src/python/BUILD
b/heron/tools/tracker/src/python/BUILD
index 947e4695337..ccc581dd8b2 100644
--- a/heron/tools/tracker/src/python/BUILD
+++ b/heron/tools/tracker/src/python/BUILD
@@ -10,7 +10,7 @@ pex_library(
"click==7.1.2",
"fastapi==0.75.0",
"httpx==0.16.1",
- "javaobj-py3==0.4.1",
+ "javaobj-py3==0.4.3",
"networkx==2.5",
"protobuf==3.16.0",
"uvicorn==0.11.7",
diff --git a/heron/tools/tracker/src/python/app.py
b/heron/tools/tracker/src/python/app.py
index 3c6e6f408f8..94ad43f1b99 100644
--- a/heron/tools/tracker/src/python/app.py
+++ b/heron/tools/tracker/src/python/app.py
@@ -28,7 +28,7 @@ from typing import Dict, List, Optional
from fastapi import FastAPI, Query, Request
from fastapi.exceptions import RequestValidationError
-from fastapi.responses import JSONResponse
+from fastapi.responses import JSONResponse, PlainTextResponse
from starlette.exceptions import HTTPException as StarletteHTTPException
from heron.tools.tracker.src.python import constants, state, query
@@ -128,3 +128,7 @@ async def get_machines(
] = topology.get_machines()
return response
+
[email protected]("/health", response_class=PlainTextResponse)
+def healthcheck():
+ return "ok"
diff --git a/heron/tools/tracker/src/python/metricstimeline.py
b/heron/tools/tracker/src/python/metricstimeline.py
index 5bfb0e9d519..5e55cd4abcf 100644
--- a/heron/tools/tracker/src/python/metricstimeline.py
+++ b/heron/tools/tracker/src/python/metricstimeline.py
@@ -39,6 +39,17 @@ class MetricsTimeline(BaseModel):
description="map of (metric name, instance, start) to metric value",
)
+
+class LegacyMetricsTimeline(BaseModel):
+ component: str
+ starttime: int
+ endtime: int
+ timeline: Dict[str, Dict[str, Dict[int, str]]] = Field(
+ ...,
+ description="map of (metric name, instance, start) to metric value",
+ )
+
+
# pylint: disable=too-many-locals, too-many-branches, unused-argument
async def get_metrics_timeline(
tmanager: tmanager_pb2.TManagerLocation,
diff --git a/heron/tools/tracker/src/python/query_operators.py
b/heron/tools/tracker/src/python/query_operators.py
index 68a005a3e37..6d4ce830552 100644
--- a/heron/tools/tracker/src/python/query_operators.py
+++ b/heron/tools/tracker/src/python/query_operators.py
@@ -460,7 +460,7 @@ class _SimpleArithmaticOperator(Operator):
for key, metric in metrics2.items():
# Initialize with first metrics timeline, but second metric's instance
# because that is multivariate
- if metrics:
+ if "" in metrics:
met = Metrics(None, None, metric.instance, start, end,
metrics[""].timeline.copy())
for timestamp in list(met.timeline.keys()):
v = self._f(met.timeline[timestamp],
metric.timeline.get(timestamp))
@@ -477,7 +477,10 @@ class _SimpleArithmaticOperator(Operator):
# Initialize with first metrics timeline and its instance
met = Metrics(None, None, metric.instance, start, end,
metric.timeline.copy())
for timestamp in list(met.timeline.keys()):
- v = self._f(met.timeline[timestamp],
metrics2[""].timeline.get(timestamp))
+ met2_value = None
+ if "" in metrics2:
+ met2_value = metrics2[""].timeline.get(timestamp)
+ v = self._f(met.timeline[timestamp], met2_value)
if v is None:
met.timeline.pop(timestamp, None)
else:
diff --git a/heron/tools/tracker/src/python/routers/metrics.py
b/heron/tools/tracker/src/python/routers/metrics.py
index 5e50c4bd1d6..7e5ff3c97ab 100644
--- a/heron/tools/tracker/src/python/routers/metrics.py
+++ b/heron/tools/tracker/src/python/routers/metrics.py
@@ -79,7 +79,7 @@ async def get_component_metrics(
if metric_response.status.status == common_pb2.NOTOK:
if metric_response.status.HasField("message"):
Log.warn(
- "Recieved response from Tmanager: %s", metric_response.status.message
+ "Received response from Tmanager: %s", metric_response.status.message
)
metrics = {}
@@ -119,8 +119,6 @@ async def get_metrics( # pylint: disable=too-many-arguments
)
[email protected]("/metricstimeline", response_model=metricstimeline.MetricsTimeline,
- deprecated=True)
@router.get("/metrics/timeline",
response_model=metricstimeline.MetricsTimeline)
async def get_metrics_timeline( # pylint: disable=too-many-arguments
cluster: str,
@@ -134,7 +132,6 @@ async def get_metrics_timeline( # pylint:
disable=too-many-arguments
instances: Optional[List[str]] = Query(None, alias="instance"),
):
"""
- '/metricstimeline' 0.20.5 below.
'/metrics/timeline' 0.20.5 above.
Return metrics over the given interval.
"""
@@ -146,6 +143,31 @@ async def get_metrics_timeline( # pylint:
disable=too-many-arguments
)
[email protected]("/metricstimeline",
response_model=metricstimeline.LegacyMetricsTimeline,
+ deprecated=True)
+async def get_legacy_metrics_timeline( # pylint: disable=too-many-arguments
+ cluster: str,
+ environ: str,
+ component: str,
+ start_time: int = Query(..., alias="starttime"),
+ end_time: int = Query(..., alias="endtime"),
+ role: Optional[str] = None,
+ topology_name: str = Query(..., alias="topology"),
+ metric_names: Optional[List[str]] = Query(None, alias="metricname"),
+ instances: Optional[List[str]] = Query(None, alias="instance"),
+):
+ """
+ '/metricstimeline' 0.20.5 below.
+ Return metrics over the given interval.
+ """
+ if start_time > end_time:
+ raise BadRequest("start_time > end_time")
+ topology = state.tracker.get_topology(cluster, role, environ, topology_name)
+ return await metricstimeline.get_metrics_timeline(
+ topology.tmanager, component, metric_names, instances, start_time,
end_time
+ )
+
+
class TimelinePoint(BaseModel): # pylint: disable=too-few-public-methods
"""A metric at discrete points in time."""
instance: Optional[str] = Field(
@@ -163,8 +185,6 @@ class MetricsQueryResponse(BaseModel): # pylint:
disable=too-few-public-methods
..., description="list of timeline point objects",
)
[email protected]("/metricsquery", response_model=MetricsQueryResponse,
- deprecated=True)
@router.get("/metrics/query", response_model=MetricsQueryResponse)
async def get_metrics_query( # pylint: disable=too-many-arguments
cluster: str,
@@ -176,7 +196,6 @@ async def get_metrics_query( # pylint:
disable=too-many-arguments
topology_name: str = Query(..., alias="topology"),
) -> MetricsQueryResponse:
"""
- '/metricsquery' 0.20.5 below.
'/metrics/query' 0.20.5 above.
Run a metrics query against a particular topology.
"""
diff --git a/heron/tools/tracker/src/python/topology.py
b/heron/tools/tracker/src/python/topology.py
index 140242974ec..89a6ac6409c 100644
--- a/heron/tools/tracker/src/python/topology.py
+++ b/heron/tools/tracker/src/python/topology.py
@@ -66,6 +66,7 @@ class TopologyInfoExecutionState(TopologyInfoMetadata):
has_packing_plan: bool
has_tmanager_location: bool
has_scheduler_location: bool
+ status: str
class RuntimeStateStatemanager(BaseModel):
is_registered: bool
@@ -551,11 +552,8 @@ class Topology:
return TopologyInfoSchedulerLocation(
name=scheduler_location.topology_name,
http_endpoint=scheduler_location.http_endpoint,
- job_page_link=(
- scheduler_location.job_page_link[0]
- if scheduler_location.job_page_link
- else ""
- ),
+ job_page_link=scheduler_location.job_page_link \
+ if scheduler_location.job_page_link else "",
)
@staticmethod
diff --git a/heron/tools/tracker/src/python/utils.py
b/heron/tools/tracker/src/python/utils.py
index 50553baa4fc..723cd4b6a50 100644
--- a/heron/tools/tracker/src/python/utils.py
+++ b/heron/tools/tracker/src/python/utils.py
@@ -214,10 +214,10 @@ def convert_pb_kvs(kvs, include_non_primitives=True) ->
dict:
def _convert_java_value(kv, include_non_primitives=True):
try:
pobj = javaobj.loads(kv.serialized_value)
- if isinstance(pobj, str):
+ if isinstance(pobj, (str, int, float, bool)):
return pobj
- if isinstance(pobj,
javaobj.transformers.DefaultObjectTransformer.JavaPrimitiveClass):
+ if hasattr(pobj, 'value'):
return pobj.value
if include_non_primitives:
diff --git a/heron/tools/ui/resources/static/js/topologies.js
b/heron/tools/ui/resources/static/js/topologies.js
index 7ed9b8adb0c..8435bf11414 100644
--- a/heron/tools/ui/resources/static/js/topologies.js
+++ b/heron/tools/ui/resources/static/js/topologies.js
@@ -994,7 +994,7 @@ var BoltRunningInfo = React.createClass({
var latencyMetrics =
metrics[boltName][time][executeLatencyMetricName][stream].metrics;
// For each intance
for (var m in countMetrics) {
- if (countMetrics.hasOwnProperty(m) &&
latencyMetrics.hasOwnProperty(m)) {
+ if (countMetrics.hasOwnProperty(m) && latencyMetrics &&
latencyMetrics.hasOwnProperty(m)) {
var count = Number(countMetrics[m]) /
(metrics[boltName][time][executeCountMetricName][stream].scaleDevisor || 1);
var latency = Number(latencyMetrics[m]) /
(metrics[boltName][time][executeLatencyMetricName][stream].scaleDevisor || 1);
var utilization = count * latency;
diff --git a/heron/tools/ui/src/python/main.py
b/heron/tools/ui/src/python/main.py
index c4b3dc6b704..a4422e990f0 100644
--- a/heron/tools/ui/src/python/main.py
+++ b/heron/tools/ui/src/python/main.py
@@ -38,7 +38,7 @@ import requests
import uvicorn
from fastapi import APIRouter, FastAPI, Query, Request
-from fastapi.responses import HTMLResponse
+from fastapi.responses import HTMLResponse, PlainTextResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from starlette.responses import RedirectResponse, Response
@@ -614,6 +614,11 @@ def histogram_snippet(
)
[email protected]("/health", response_class=PlainTextResponse)
+def healthcheck():
+ return "ok"
+
+
app.include_router(topologies_router, prefix="/topologies")
app.mount(
"/static",