This is an automated email from the ASF dual-hosted git repository. liyuheng pushed a commit to branch lyh/ainode/logger-refactor in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit e8c4c9ad9d57ebc200bf822da59c8c81c7025487 Author: liyuheng <[email protected]> AuthorDate: Thu Jul 31 23:36:29 2025 +0800 self review --- iotdb-core/ainode/ainode/core/constant.py | 7 +++--- .../ainode/core/inference/inference_request.py | 4 ---- .../core/inference/inference_request_pool.py | 8 +++---- .../ainode/core/manager/inference_manager.py | 26 +--------------------- .../ainode/resources/conf/iotdb-ainode.properties | 2 +- 5 files changed, 9 insertions(+), 38 deletions(-) diff --git a/iotdb-core/ainode/ainode/core/constant.py b/iotdb-core/ainode/ainode/core/constant.py index 99aa49d1851..aef7fdb9cdf 100644 --- a/iotdb-core/ainode/ainode/core/constant.py +++ b/iotdb-core/ainode/ainode/core/constant.py @@ -34,11 +34,11 @@ AINODE_SYSTEM_FILE_NAME = "system.properties" # AINode cluster configuration AINODE_CLUSTER_NAME = "defaultCluster" -AINODE_TARGET_CONFIG_NODE_LIST = TEndPoint("127.0.0.1", 12710) +AINODE_TARGET_CONFIG_NODE_LIST = TEndPoint("127.0.0.1", 10710) AINODE_RPC_ADDRESS = "127.0.0.1" -AINODE_RPC_PORT = 12810 +AINODE_RPC_PORT = 10810 AINODE_CLUSTER_INGRESS_ADDRESS = "127.0.0.1" -AINODE_CLUSTER_INGRESS_PORT = 13667 +AINODE_CLUSTER_INGRESS_PORT = 6667 AINODE_CLUSTER_INGRESS_USERNAME = "root" AINODE_CLUSTER_INGRESS_PASSWORD = "root" AINODE_CLUSTER_INGRESS_TIME_ZONE = "UTC+8" @@ -70,6 +70,7 @@ AINODE_LOG_FILE_NAMES = [ ] AINODE_LOG_FILE_LEVELS = [logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR] STD_LEVEL = logging.INFO +AINODE_INFERENCE_LOG_DIR_TEMPLATE = "log_inference_rank_{}" # AINode model management MODEL_WEIGHTS_FILE_IN_SAFETENSORS = "model.safetensors" diff --git a/iotdb-core/ainode/ainode/core/inference/inference_request.py b/iotdb-core/ainode/ainode/core/inference/inference_request.py index 714bde05bf0..2c45826fd26 100644 --- a/iotdb-core/ainode/ainode/core/inference/inference_request.py +++ b/iotdb-core/ainode/ainode/core/inference/inference_request.py @@ -63,15 +63,11 @@ class InferenceRequest: self.batch_size, max_new_tokens, device="cpu" ) # shape: [self.batch_size, max_new_steps] - self.logger = Logger() - def mark_running(self): self.state = InferenceRequestState.RUNNING - logger.info("mark_running") def mark_finished(self): self.state = InferenceRequestState.FINISHED - logger.info("mark_finished") def is_finished(self) -> bool: return ( diff --git a/iotdb-core/ainode/ainode/core/inference/inference_request_pool.py b/iotdb-core/ainode/ainode/core/inference/inference_request_pool.py index 7245d9c8bac..6ccee0b7467 100644 --- a/iotdb-core/ainode/ainode/core/inference/inference_request_pool.py +++ b/iotdb-core/ainode/ainode/core/inference/inference_request_pool.py @@ -26,6 +26,7 @@ import torch import torch.multiprocessing as mp from transformers import PretrainedConfig +from ainode.core.constant import AINODE_INFERENCE_LOG_DIR_TEMPLATE from ainode.core.logger.base_logger import BaseLogger from ainode.core.config import AINodeDescriptor from ainode.core.inference.inference_request import InferenceRequest @@ -61,11 +62,8 @@ class InferenceRequestPool(mp.Process): self.model = None self._model_manager = None self.device = None - self.logger = BaseLogger(f"log_inference_rank_{self.device}") - self.logger.info("info") - self.logger.warning("warning") - self.logger.error("error") - self.logger.debug("debug") + self.logger = BaseLogger(AINODE_INFERENCE_LOG_DIR_TEMPLATE.format(self.device)) + self.logger.info("gg") self._threads = [] self._waiting_queue = request_queue # Requests that are waiting to be processed diff --git a/iotdb-core/ainode/ainode/core/manager/inference_manager.py b/iotdb-core/ainode/ainode/core/manager/inference_manager.py index f5682a0ae53..5a853ac4e72 100644 --- a/iotdb-core/ainode/ainode/core/manager/inference_manager.py +++ b/iotdb-core/ainode/ainode/core/manager/inference_manager.py @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. # -import os import threading import time from abc import ABC, abstractmethod @@ -73,28 +72,6 @@ class InferenceStrategy(ABC): # we only get valueList currently. class TimerXLStrategy(InferenceStrategy): def infer(self, full_data, predict_length=96, **_): - if torch.cuda.is_available(): - device = next(self.model.parameters()).device - else: - device = torch.device("cpu") - # Get possible rank - if torch.distributed.is_initialized(): - global_rank = torch.distributed.get_rank() - world_size = torch.distributed.get_world_size() - else: - # Not distribution, default rank=0, world_size=1 - global_rank, world_size = 0, 1 - - if device.type == "cuda": - gpu_name = torch.cuda.get_device_name(device.index) - logger.info( - f"[rank {global_rank}/{world_size}] " - f"Running on GPU {device.index} ({gpu_name})" - ) - else: - logger.info(f"[rank {global_rank}/{world_size}] Running on CPU") - - logger.info("Start inference") data = full_data[1][0] if data.dtype.byteorder not in ("=", "|"): data = data.byteswap().newbyteorder() @@ -102,7 +79,6 @@ class TimerXLStrategy(InferenceStrategy): # TODO: unify model inference input output = self.model.generate(seqs, max_new_tokens=predict_length, revin=True) df = pd.DataFrame(output[0]) - logger.info("Complete inference") return convert_to_binary(df) @@ -163,7 +139,7 @@ class InferenceManager: # DEFAULT_DEVICE = "cpu" DEFAULT_DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") DEFAULT_POOL_SIZE = ( - 1 # TODO: Remove these parameter by sampling model inference consumption + 0 # TODO: Remove these parameter by sampling model inference consumption ) WAITING_INTERVAL_IN_MS = ( AINodeDescriptor().get_config().get_ain_inference_batch_interval_in_ms() diff --git a/iotdb-core/ainode/resources/conf/iotdb-ainode.properties b/iotdb-core/ainode/resources/conf/iotdb-ainode.properties index 6c8c28e3f26..b31079ce00d 100644 --- a/iotdb-core/ainode/resources/conf/iotdb-ainode.properties +++ b/iotdb-core/ainode/resources/conf/iotdb-ainode.properties @@ -19,7 +19,7 @@ # Used for indicate cluster name and distinguish different cluster. # Datatype: string -cluster_name=???? +cluster_name=defaultCluster # ConfigNode address registered at AINode startup. # Allow modifications only before starting the service for the first time.
