Re: [PR] built in timerxl model for inference [iotdb]

via GitHub Tue, 06 May 2025 22:51:19 -0700


ycycse commented on code in PR #15449:
URL: https://github.com/apache/iotdb/pull/15449#discussion_r2076818465



##########
iotdb-core/ainode/ainode/TimerXL/__init__.py:
##########


Review Comment:
   add apache header for each new file in the beginning. Like:
   ```
   # Licensed to the Apache Software Foundation (ASF) under one
   # or more contributor license agreements.  See the NOTICE file
   # distributed with this work for additional information
   # regarding copyright ownership.  The ASF licenses this file
   # to you under the Apache License, Version 2.0 (the
   # "License"); you may not use this file except in compliance
   # with the License.  You may obtain a copy of the License at
   #
   #     http://www.apache.org/licenses/LICENSE-2.0
   #
   # Unless required by applicable law or agreed to in writing,
   # software distributed under the License is distributed on an
   # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
   # KIND, either express or implied.  See the License for the
   # specific language governing permissions and limitations
   # under the License.
   #
   ```



##########
iotdb-core/ainode/ainode/TimerXL/models/timer_xl.py:
##########
@@ -0,0 +1,363 @@
+import torch
+from torch import nn
+from typing import Optional, List, Dict, Any, Tuple
+from dataclasses import dataclass
+from ainode.TimerXL.layers.Transformer_EncDec import TimerDecoderLayer
+from ainode.TimerXL.layers.Embed import TimerPatchEmbedding
+from ainode.TimerXL.models.configuration_timer import TimerxlConfig
+from ainode.core.util.masking import prepare_4d_causal_attention_mask
+from ainode.core.util.huggingface_cache import Cache, DynamicCache
+
+@dataclass
+class Output:
+    outputs: torch.Tensor
+    past_key_values: Optional[Any] = None
+
+class TimerModel(nn.Module):
+    def __init__(self, config: TimerxlConfig):
+        super().__init__()
+        self.config = config
+        self.embed_layer = TimerPatchEmbedding(config)
+        self.layers = nn.ModuleList(
+            [TimerDecoderLayer(config, layer_idx)
+             for layer_idx in range(config.num_hidden_layers)]
+        )
+        self.norm = torch.nn.LayerNorm(config.hidden_size)
+        self.gradient_checkpointing = False
+
+    def forward(
+        self,
+        input_ids: torch.FloatTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        use_cache: bool = None,
+    ):
+        # input_ids is the input of time series, its shape is [batch_size, 
seq_len]
+        
+        if input_ids is not None:
+            batch_size, seq_length = input_ids.shape
+        else:
+            raise ValueError(
+                "You have to specify either decoder_input_ids or 
decoder_inputs_embeds")
+
+        inputs_embeds = self.embed_layer(input_ids)    
+        
+        seq_length = inputs_embeds.shape[1]
+
+        past_key_values_length = 0
+
+        if use_cache:
+            use_legacy_cache = not isinstance(past_key_values, Cache)
+            if use_legacy_cache:
+                past_key_values = DynamicCache.from_legacy_cache(
+                    past_key_values)
+            past_key_values_length = past_key_values.get_usable_length(
+                seq_length)
+
+        if position_ids is None:
+            device = input_ids.device if input_ids is not None else 
inputs_embeds.device
+            position_ids = torch.arange(
+                past_key_values_length, seq_length + past_key_values_length, 
dtype=torch.long, device=device
+            )
+            position_ids = position_ids.view(-1, seq_length)
+        else:
+            position_ids = position_ids.view(-1, seq_length).long()
+
+        # 4d mask is passed through the layers
+        attention_mask = prepare_4d_causal_attention_mask(
+            attention_mask,
+            (batch_size, seq_length),
+            inputs_embeds,
+            past_key_values_length,
+        )
+
+        hidden_states = inputs_embeds
+
+        # decoder layers
+        next_decoder_cache = None
+
+        for decoder_layer in self.layers:
+            layer_outputs = decoder_layer(
+                hidden_states,
+                attention_mask=attention_mask,
+                position_ids=position_ids,
+                past_key_value=past_key_values,
+                use_cache=use_cache,
+            )
+
+            hidden_states = layer_outputs[0]
+            
+            if use_cache:
+                next_decoder_cache = layer_outputs[1]
+                
+        hidden_states = self.norm(hidden_states)
+
+        next_cache = None
+        if use_cache:
+            next_cache = next_decoder_cache.to_legacy_cache(
+            ) if use_legacy_cache else next_decoder_cache
+
+        return Output(
+            outputs=hidden_states,
+            past_key_values=next_cache
+        )
+
+class TimerForPrediction(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.model = TimerModel(self.config)
+        lm_head_list = []
+        self.output_token_len_map = {}
+        for i, output_token_len in enumerate(self.config.output_token_lens):
+            lm_head_list.append(
+                nn.Linear(self.config.hidden_size, output_token_len, 
bias=False))
+            self.output_token_len_map[output_token_len] = i
+        self.lm_heads = nn.ModuleList(lm_head_list)
+        self.loss_function = torch.nn.MSELoss(reduction='none')
+        
+    def forward(
+        self,
+        input_ids: torch.FloatTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        use_cache: Optional[bool] = None,
+        max_output_length: Optional[int] = None,
+        revin: Optional[bool] = True,
+    ):
+        if revin:
+            means, stdev = input_ids.mean(dim=-1, keepdim=True), 
input_ids.std(dim=-1, keepdim=True)
+            input_ids = (input_ids - means) / stdev
+   
+        outputs = self.model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            use_cache=use_cache,
+        )
+        hidden_states = outputs.outputs
+        
+        if max_output_length is None:
+            output_token_len = self.config.output_token_lens[0]
+            max_output_length = output_token_len
+        else:
+            output_token_len = self.config.output_token_lens[0]
+            for h in self.config.output_token_lens[1:]:
+                if h > max_output_length:
+                    break
+                else:
+                    output_token_len = h
+        
+        lm_head = self.lm_heads[self.output_token_len_map[output_token_len]]
+        predictions = lm_head(hidden_states)[:, -1, :]
+                    
+        if output_token_len > max_output_length:
+            predictions = predictions[:, :max_output_length]
+        if revin:
+            predictions = predictions * stdev + means
+            
+        return Output(predictions, outputs.past_key_values)
+
+
+class Model(nn.Module):
+    """
+    Timer-XL: Long-Context Transformers for Unified Time Series Forecasting 
+
+    Paper: https://arxiv.org/abs/2410.04803
+    
+    GitHub: https://github.com/thuml/Timer-XL
+    
+    Citation: @article{liu2024timer,
+        title={Timer-XL: Long-Context Transformers for Unified Time Series 
Forecasting},
+        author={Liu, Yong and Qin, Guo and Huang, Xiangdong and Wang, Jianmin 
and Long, Mingsheng},
+        journal={arXiv preprint arXiv:2410.04803},
+        year={2024}
+    }
+    """
+    def __init__(self, config: TimerxlConfig):
+        super().__init__()
+        self.config = config      # can't be scripted by torch
+
+        self.model = TimerForPrediction(config)
+        if config.ckpt_path is not None and config.ckpt_path != '':
+            if config.ckpt_path.endswith('.pt') or 
config.ckpt_path.endswith('.pth'):
+                state_dict = torch.load(config.ckpt_path)
+            else:
+                raise ValueError('unsupported model weight type')
+            # 
如果state_dict中没有'model.model'开头的key，则给所有key前面加上一个'model.'（这里的代码相比于huggingface上的代码多了一层）

Review Comment:
   Try not to use Chinese in comment



##########
iotdb-core/ainode/ainode/TimerXL/models/configuration_timer.py:
##########
@@ -0,0 +1,44 @@
+from typing import List
+
+class TimerxlConfig:
+    model_type = "timerxl"
+    # keys_to_ignore_at_inference = ["past_key_values"]
+
+    def __init__(
+        self,
+        input_token_len: int = 96,
+        hidden_size: int = 1024,
+        intermediate_size: int = 2048,
+        output_token_lens: List[int] = [96],
+        num_hidden_layers: int = 8,
+        num_attention_heads: int = 8,
+        hidden_act: str = "silu",
+        use_cache: bool = True,
+        rope_theta: int = 10000,
+        attention_dropout: float = 0.0,
+        initializer_range: float = 0.02,
+        max_position_embeddings: int = 10000,
+        ckpt_path: str = None,
+        **kwargs,
+    ):
+        self.input_token_len = input_token_len
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.hidden_act = hidden_act
+        self.output_token_lens = output_token_lens
+        self.use_cache = use_cache
+        self.rope_theta = rope_theta
+        self.attention_dropout = attention_dropout
+        self.initializer_range = initializer_range
+        self.max_position_embeddings = max_position_embeddings
+        self.ckpt_path = ckpt_path

Review Comment:
   It will be better to add some comments to explain the function of these 
parameters.



##########
iotdb-core/ainode/ainode/core/util/masking.py:
##########
@@ -0,0 +1,69 @@
+import torch
+
+class TriangularCausalMask():
+    def __init__(self, B, L, device="cpu"):
+        mask_shape = [B, 1, L, L]
+        with torch.no_grad():
+            self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), 
diagonal=1).to(device)
+
+    @property
+    def mask(self):
+        return self._mask
+
+class TimerMultivariateMask():
+    def __init__(self, B, n_vars, n_tokens, device="cpu"):
+        mask_shape = [B, 1, n_tokens, n_tokens]
+        with torch.no_grad():
+            self._mask1 = torch.ones((n_vars, n_vars), 
dtype=torch.bool).to(device)
+            self._mask2 = torch.triu(torch.ones(mask_shape, dtype=torch.bool), 
diagonal=1).to(device)
+            self._mask = torch.kron(self._mask1, self._mask2)
+    @property
+    def mask(self):
+        return self._mask
+
+class TimerCovariateMask():
+    def __init__(self, B, n_vars, n_tokens, device="cpu"):
+        mask_shape = [B, 1, n_tokens, n_tokens]
+        with torch.no_grad():
+            self._mask1 = torch.eye(n_vars, dtype=torch.bool).to(device)
+            self._mask2 = torch.tril(torch.ones(mask_shape, 
dtype=torch.bool)).to(device)
+            self._mask = ~torch.kron(self._mask1, self._mask2)
+            self._mask[:, :, -n_tokens:, :-n_tokens] = False
+            
+    @property
+    def mask(self):
+        return self._mask
+    
+def prepare_4d_causal_attention_mask(
+    attention_mask,
+    input_shape,                  # (B, T_query)
+    inputs_embeds: torch.Tensor,
+    past_key_values_length: int = 0,
+):
+    """
+    返回形状 [B, 1, T_query, T_total] 的加性掩码:
+        ─ 允许位置: 0
+        ─ 屏蔽位置: -inf
+    """

Review Comment:
   The same, don't use Chinese



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] built in timerxl model for inference [iotdb]

Reply via email to