This is an automated email from the ASF dual-hosted git repository. Caideyipi pushed a commit to branch hotfix/2.0.9.4-sjzt in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 029f935869a78c9b222b928beed7f3daed3c6ba2 Author: 陈荣钊 <[email protected]> AuthorDate: Thu May 28 11:36:21 2026 +0800 [TIMECHODB][AINode] Add GC after non-pool AINode inference --- .../iotdb/ainode/core/manager/inference_manager.py | 46 ++++++++++++++++------ 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py b/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py index 64bca30dae8..527661e119d 100644 --- a/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py +++ b/iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py @@ -16,6 +16,7 @@ # under the License. # +import gc import threading import time import traceback @@ -197,28 +198,49 @@ class InferenceManager: ) outputs = self._process_request(infer_req) else: + outputs = self._run_inference_without_pool( + model_id, model_inputs, inference_attrs + ) + + # convert tensor into tsblock for the output in each batch + resp_list = [] + for batch_idx, output in enumerate(outputs): + resp = convert_tensor_to_tsblock(output) + resp_list.append(resp) + return resp_list + + def _run_inference_without_pool( + self, + model_id: str, + model_inputs, + inference_attrs: dict, + ) -> list[torch.Tensor]: + inference_pipeline = None + inputs = None + raw_outputs = None + try: model_info = self._model_manager.get_model_info(model_id) inference_pipeline = load_pipeline( model_info, device=self._backend.torch_device("cpu") ) inputs = inference_pipeline.preprocess(model_inputs, **inference_attrs) if isinstance(inference_pipeline, ForecastPipeline): - outputs = inference_pipeline.forecast(inputs, **inference_attrs) + raw_outputs = inference_pipeline.forecast(inputs, **inference_attrs) elif isinstance(inference_pipeline, ClassificationPipeline): - outputs = inference_pipeline.classify(inputs, **inference_attrs) + raw_outputs = inference_pipeline.classify(inputs, **inference_attrs) elif isinstance(inference_pipeline, ChatPipeline): - outputs = inference_pipeline.chat(inputs, **inference_attrs) + raw_outputs = inference_pipeline.chat(inputs, **inference_attrs) else: - outputs = None logger.error("[Inference] Unsupported pipeline type.") - outputs = inference_pipeline.postprocess(outputs, **inference_attrs) - - # convert tensor into tsblock for the output in each batch - resp_list = [] - for batch_idx, output in enumerate(outputs): - resp = convert_tensor_to_tsblock(output) - resp_list.append(resp) - return resp_list + return inference_pipeline.postprocess(raw_outputs, **inference_attrs) + finally: + del inference_pipeline + del inputs + del raw_outputs + collected = gc.collect() + logger.debug( + f"[Inference] Collected {collected} objects after non-pool inference for model {model_id}." + ) def _run_forecast( self,
