This is an automated email from the ASF dual-hosted git repository.

cdionysio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 496a22f9d3 [SYSTEMDS-3835] Scuro window aggregation operator
496a22f9d3 is described below

commit 496a22f9d3e82910927fac6e8c7bfeb275388664
Author: Christina Dionysio <[email protected]>
AuthorDate: Wed May 14 14:57:11 2025 +0200

    [SYSTEMDS-3835] Scuro window aggregation operator
    
    This patch adds a window aggregation operator with a mean, min, max, sum 
aggregation function.
    The window aggregation is applied to the individual modalities and can 
handle multiple subtypes of modalities.
    This PR also adds tests to verify the correctness of the operator.
    
    Closes #2225
---
 .github/workflows/python.yml                       |   5 +-
 .../systemds/scuro/dataloader/audio_loader.py      |  15 +-
 .../systemds/scuro/dataloader/base_loader.py       |  12 +-
 .../systemds/scuro/dataloader/json_loader.py       |   9 +-
 .../systemds/scuro/dataloader/text_loader.py       |   7 +-
 .../systemds/scuro/dataloader/video_loader.py      |  20 ++-
 src/main/python/systemds/scuro/modality/joined.py  |  33 +++--
 .../systemds/scuro/modality/joined_transformed.py  |   7 +
 .../python/systemds/scuro/modality/modality.py     | 108 ++++++++-------
 .../modality_identifier.py}                        |  27 ++--
 .../python/systemds/scuro/modality/transformed.py  |  36 +++--
 src/main/python/systemds/scuro/modality/type.py    | 153 +++++++++++++++++++--
 .../systemds/scuro/modality/unimodal_modality.py   |  46 +++++--
 .../systemds/scuro/representations/aggregate.py    |  80 ++++++++---
 .../python/systemds/scuro/representations/bert.py  |  11 +-
 .../python/systemds/scuro/representations/bow.py   |  10 +-
 .../representations/{unimodal.py => context.py}    |  29 ++--
 .../systemds/scuro/representations/fusion.py       |  22 ++-
 .../python/systemds/scuro/representations/glove.py |   3 +-
 .../scuro/representations/mel_spectrogram.py       |  25 ++--
 .../scuro/representations/representation.py        |  18 ++-
 .../systemds/scuro/representations/resnet.py       |  32 ++++-
 .../python/systemds/scuro/representations/tfidf.py |  15 +-
 .../systemds/scuro/representations/unimodal.py     |  12 +-
 .../systemds/scuro/representations/window.py       |  74 +++++++---
 .../systemds/scuro/representations/word2vec.py     |  18 ++-
 src/main/python/tests/scuro/data_generator.py      |  48 ++++++-
 src/main/python/tests/scuro/test_data_loaders.py   |  26 ++--
 src/main/python/tests/scuro/test_dr_search.py      |   4 +-
 .../python/tests/scuro/test_multimodal_join.py     |  11 +-
 .../tests/scuro/test_unimodal_representations.py   |  11 +-
 .../python/tests/scuro/test_window_operations.py   | 106 ++++++++++++++
 32 files changed, 773 insertions(+), 260 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 4e5baa9d81..cea222a4a7 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -116,8 +116,9 @@ jobs:
           h5py \
           gensim \
           black \
-          opt-einsum
-                
+          opt-einsum \
+          nltk
+        
     - name: Build Python Package
       run: |
         cd src/main/python
diff --git a/src/main/python/systemds/scuro/dataloader/audio_loader.py 
b/src/main/python/systemds/scuro/dataloader/audio_loader.py
index f7319fe191..a6a164b4fb 100644
--- a/src/main/python/systemds/scuro/dataloader/audio_loader.py
+++ b/src/main/python/systemds/scuro/dataloader/audio_loader.py
@@ -22,23 +22,18 @@ from typing import List, Optional, Union
 
 import librosa
 from systemds.scuro.dataloader.base_loader import BaseLoader
-from systemds.scuro.utils.schema_helpers import create_timestamps
+from systemds.scuro.modality.type import ModalityType
 
 
 class AudioLoader(BaseLoader):
     def __init__(
-        self,
-        source_path: str,
-        indices: List[str],
-        chunk_size: Optional[int] = None,
+        self, source_path: str, indices: List[str], chunk_size: Optional[int] 
= None
     ):
-        super().__init__(source_path, indices, chunk_size)
+        super().__init__(source_path, indices, chunk_size, ModalityType.AUDIO)
 
     def extract(self, file: str, index: Optional[Union[str, List[str]]] = 
None):
         self.file_sanity_check(file)
         audio, sr = librosa.load(file)
-        self.metadata[file] = {"sample_rate": sr, "length": audio.shape[0]}
-        self.metadata[file]["timestamp"] = create_timestamps(
-            self.metadata[file]["sample_rate"], self.metadata[file]["length"]
-        )
+        self.metadata[file] = self.modality_type.create_audio_metadata(sr, 
audio)
+
         self.data.append(audio)
diff --git a/src/main/python/systemds/scuro/dataloader/base_loader.py 
b/src/main/python/systemds/scuro/dataloader/base_loader.py
index 5cdf63f584..ea2b25bbb4 100644
--- a/src/main/python/systemds/scuro/dataloader/base_loader.py
+++ b/src/main/python/systemds/scuro/dataloader/base_loader.py
@@ -25,7 +25,11 @@ from typing import List, Optional, Union
 
 class BaseLoader(ABC):
     def __init__(
-        self, source_path: str, indices: List[str], chunk_size: Optional[int] 
= None
+        self,
+        source_path: str,
+        indices: List[str],
+        chunk_size: Optional[int] = None,
+        modality_type=None,
     ):
         """
         Base class to load raw data for a given list of indices and stores 
them in the data object
@@ -40,6 +44,7 @@ class BaseLoader(ABC):
         )  # TODO: check what the index should be for storing the metadata 
(file_name, counter, ...)
         self.source_path = source_path
         self.indices = indices
+        self.modality_type = modality_type
         self._next_chunk = 0
         self._num_chunks = 1
         self._chunk_size = None
@@ -64,6 +69,11 @@ class BaseLoader(ABC):
     def next_chunk(self):
         return self._next_chunk
 
+    def reset(self):
+        self._next_chunk = 0
+        self.data = []
+        self.metadata = {}
+
     def load(self):
         """
         Takes care of loading the raw data either chunk wise (if chunk size is 
defined) or all at once
diff --git a/src/main/python/systemds/scuro/dataloader/json_loader.py 
b/src/main/python/systemds/scuro/dataloader/json_loader.py
index ac37545188..edef7f205b 100644
--- a/src/main/python/systemds/scuro/dataloader/json_loader.py
+++ b/src/main/python/systemds/scuro/dataloader/json_loader.py
@@ -20,6 +20,7 @@
 # -------------------------------------------------------------
 import json
 
+from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.dataloader.base_loader import BaseLoader
 from typing import Optional, List, Union
 
@@ -32,7 +33,7 @@ class JSONLoader(BaseLoader):
         field: str,
         chunk_size: Optional[int] = None,
     ):
-        super().__init__(source_path, indices, chunk_size)
+        super().__init__(source_path, indices, chunk_size, ModalityType.TEXT)
         self.field = field
 
     def extract(self, file: str, index: Optional[Union[str, List[str]]] = 
None):
@@ -40,4 +41,8 @@ class JSONLoader(BaseLoader):
         with open(file) as f:
             json_file = json.load(f)
             for idx in index:
-                self.data.append(json_file[idx][self.field])
+                sentence = json_file[idx][self.field]
+                self.data.append(sentence)
+                self.metadata[idx] = self.modality_type.create_text_metadata(
+                    len(sentence), sentence
+                )
diff --git a/src/main/python/systemds/scuro/dataloader/text_loader.py 
b/src/main/python/systemds/scuro/dataloader/text_loader.py
index bf34cf85c7..3f87155147 100644
--- a/src/main/python/systemds/scuro/dataloader/text_loader.py
+++ b/src/main/python/systemds/scuro/dataloader/text_loader.py
@@ -20,6 +20,7 @@
 # -------------------------------------------------------------
 from systemds.scuro.dataloader.base_loader import BaseLoader
 from typing import Optional, Pattern, List, Union
+from systemds.scuro.modality.type import ModalityType
 import re
 
 
@@ -31,7 +32,7 @@ class TextLoader(BaseLoader):
         chunk_size: Optional[int] = None,
         prefix: Optional[Pattern[str]] = None,
     ):
-        super().__init__(source_path, indices, chunk_size)
+        super().__init__(source_path, indices, chunk_size, ModalityType.TEXT)
         self.prefix = prefix
 
     def extract(self, file: str, index: Optional[Union[str, List[str]]] = 
None):
@@ -41,5 +42,7 @@ class TextLoader(BaseLoader):
                 if self.prefix:
                     line = re.sub(self.prefix, "", line)
                 line = line.replace("\n", "")
-                self.metadata[file] = {"length": len(line.split())}
+                self.metadata[file] = self.modality_type.create_text_metadata(
+                    len(line.split()), line
+                )
                 self.data.append(line)
diff --git a/src/main/python/systemds/scuro/dataloader/video_loader.py 
b/src/main/python/systemds/scuro/dataloader/video_loader.py
index 807a43b21c..333960e698 100644
--- a/src/main/python/systemds/scuro/dataloader/video_loader.py
+++ b/src/main/python/systemds/scuro/dataloader/video_loader.py
@@ -23,8 +23,8 @@ from typing import List, Optional, Union
 import numpy as np
 
 from systemds.scuro.dataloader.base_loader import BaseLoader
-from systemds.scuro.utils.schema_helpers import create_timestamps
 import cv2
+from systemds.scuro.modality.type import ModalityType
 
 
 class VideoLoader(BaseLoader):
@@ -34,7 +34,7 @@ class VideoLoader(BaseLoader):
         indices: List[str],
         chunk_size: Optional[int] = None,
     ):
-        super().__init__(source_path, indices, chunk_size)
+        super().__init__(source_path, indices, chunk_size, ModalityType.VIDEO)
 
     def extract(self, file: str, index: Optional[Union[str, List[str]]] = 
None):
         self.file_sanity_check(file)
@@ -43,16 +43,14 @@ class VideoLoader(BaseLoader):
         if not cap.isOpened():
             raise f"Could not read video at path: {file}"
 
-        self.metadata[file] = {
-            "fps": cap.get(cv2.CAP_PROP_FPS),
-            "length": int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
-            "width": int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
-            "height": int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
-            "num_channels": 3,
-        }
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        num_channels = 3
 
-        self.metadata[file]["timestamp"] = create_timestamps(
-            self.metadata[file]["fps"], self.metadata[file]["length"]
+        self.metadata[file] = self.modality_type.create_video_metadata(
+            fps, length, width, height, num_channels
         )
 
         frames = []
diff --git a/src/main/python/systemds/scuro/modality/joined.py 
b/src/main/python/systemds/scuro/modality/joined.py
index acdf4fb94f..c1aa26abf6 100644
--- a/src/main/python/systemds/scuro/modality/joined.py
+++ b/src/main/python/systemds/scuro/modality/joined.py
@@ -104,7 +104,7 @@ class JoinedModality(Modality):
                 self.joined_right.data[i - starting_idx].append([])
                 right = np.array([])
                 if self.condition.join_type == "<":
-                    while c < len(idx_2) and idx_2[c] < nextIdx[j]:
+                    while c < len(idx_2) - 1 and idx_2[c] < nextIdx[j]:
                         if right.size == 0:
                             right = self.right_modality.data[i][c]
                             if right.ndim == 1:
@@ -125,7 +125,7 @@ class JoinedModality(Modality):
                                 )
                         c = c + 1
                 else:
-                    while c < len(idx_2) and idx_2[c] <= idx_1[j]:
+                    while c < len(idx_2) - 1 and idx_2[c] <= idx_1[j]:
                         if idx_2[c] == idx_1[j]:
                             right.append(self.right_modality.data[i][c])
                         c = c + 1
@@ -141,18 +141,17 @@ class JoinedModality(Modality):
 
                 self.joined_right.data[i - starting_idx][j] = right
 
-    def apply_representation(self, representation, aggregation):
+    def apply_representation(self, representation, aggregation=None):
         self.aggregation = aggregation
         if self.chunked_execution:
             return self._handle_chunked_execution(representation)
-        elif self.left_type.__name__.__contains__("Unimodal"):
-            self.left_modality.extract_raw_data()
-            if self.left_type == self.right_type:
-                self.right_modality.extract_raw_data()
-        elif self.right_type.__name__.__contains__("Unimodal"):
-            self.right_modality.extract_raw_data()
+        # elif self.left_type.__name__.__contains__("Unimodal"):
+        #     self.left_modality.extract_raw_data()
+        #     if self.left_type == self.right_type:
+        #         self.right_modality.extract_raw_data()
+        # elif self.right_type.__name__.__contains__("Unimodal") and not 
self.right_modality.has_data():
+        #     self.right_modality.extract_raw_data()
 
-        self.execute()
         left_transformed = self._apply_representation(
             self.left_modality, representation
         )
@@ -263,12 +262,12 @@ class JoinedModality(Modality):
 
     def _apply_representation(self, modality, representation):
         transformed = representation.transform(modality)
-        if self.aggregation:
-            aggregated_data_left = self.aggregation.window(transformed)
-            transformed = Modality(
-                transformed.modality_type,
-                transformed.metadata,
-            )
-            transformed.data = aggregated_data_left
+        # if self.aggregation:
+        #     aggregated_data_left = self.aggregation.execute(transformed)
+        #     transformed = Modality(
+        #         transformed.modality_type,
+        #         transformed.metadata,
+        #     )
+        #     transformed.data = aggregated_data_left
 
         return transformed
diff --git a/src/main/python/systemds/scuro/modality/joined_transformed.py 
b/src/main/python/systemds/scuro/modality/joined_transformed.py
index e2b53671aa..a0ab8c4ce9 100644
--- a/src/main/python/systemds/scuro/modality/joined_transformed.py
+++ b/src/main/python/systemds/scuro/modality/joined_transformed.py
@@ -25,6 +25,7 @@ import numpy as np
 
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.representations.utils import pad_sequences
+from systemds.scuro.representations.window import WindowAggregation
 
 
 class JoinedTransformedModality(Modality):
@@ -68,3 +69,9 @@ class JoinedTransformedModality(Modality):
             self.data[i] = np.array(r)
         self.data = pad_sequences(self.data)
         return self
+
+    def window(self, window_size, aggregation):
+        w = WindowAggregation(window_size, aggregation)
+        self.left_modality.data = w.execute(self.left_modality)
+        self.right_modality.data = w.execute(self.right_modality)
+        return self
diff --git a/src/main/python/systemds/scuro/modality/modality.py 
b/src/main/python/systemds/scuro/modality/modality.py
index cce26eee01..c110a24eba 100644
--- a/src/main/python/systemds/scuro/modality/modality.py
+++ b/src/main/python/systemds/scuro/modality/modality.py
@@ -18,28 +18,42 @@
 # under the License.
 #
 # -------------------------------------------------------------
+from copy import deepcopy
 from typing import List
 
 import numpy as np
 
-from systemds.scuro.modality.type import ModalityType
+from systemds.scuro.modality.type import ModalityType, DataLayout
+from systemds.scuro.representations import utils
 
 
 class Modality:
 
-    def __init__(self, modalityType: ModalityType, metadata=None):
+    def __init__(self, modalityType: ModalityType, modality_id=-1, 
metadata={}):
         """
         Parent class of the different Modalities (unimodal & multimodal)
         :param modality_type: Type of the modality
         """
         self.modality_type = modalityType
         self.schema = modalityType.get_schema()
+        self.metadata = metadata
         self.data = []
         self.data_type = None
         self.cost = None
         self.shape = None
-        self.dataIndex = None
-        self.metadata = metadata
+        self.modality_id = modality_id
+
+    @property
+    def data(self):
+        return self._data
+
+    @data.setter
+    def data(self, value):
+        """
+        This method ensures that the data layout in the metadata is updated 
when the data changes
+        """
+        self._data = value
+        self.update_metadata()
 
     def get_modality_names(self) -> List[str]:
         """
@@ -50,64 +64,66 @@ class Modality:
         ]
 
     def copy_from_instance(self):
+        """
+        Create a copy of the modality instance
+        """
         return type(self)(self.modality_type, self.metadata)
 
     def update_metadata(self):
-        md_copy = self.metadata
+        """
+        Updates the metadata of the modality (i.e.: updates timestamps)
+        """
+        if (
+            not self.has_metadata()
+            or not self.has_data()
+            or len(self.data) < len(self.metadata)
+        ):
+            return
+
+        md_copy = deepcopy(self.metadata)
         self.metadata = {}
         for i, (md_k, md_v) in enumerate(md_copy.items()):
             updated_md = self.modality_type.update_metadata(md_v, self.data[i])
             self.metadata[md_k] = updated_md
 
-    def get_metadata_at_position(self, position: int):
-        return self.metadata[self.dataIndex][position]
-
-    def flatten(self):
+    def flatten(self, padding=True):
+        """
+        Flattens modality data by row-wise concatenation
+        Prerequisite for some ML-models
+        """
+        max_len = 0
         for num_instance, instance in enumerate(self.data):
             if type(instance) is np.ndarray:
                 self.data[num_instance] = instance.flatten()
-            elif type(instance) is list:
+            elif isinstance(instance, List):
                 self.data[num_instance] = np.array(
                     [item for sublist in instance for item in sublist]
-                )
-
+                ).flatten()
+            max_len = max(max_len, len(self.data[num_instance]))
+
+        if padding:
+            for i, instance in enumerate(self.data):
+                if isinstance(instance, np.ndarray):
+                    if len(instance) < max_len:
+                        padded_data = np.zeros(max_len, dtype=instance.dtype)
+                        padded_data[: len(instance)] = instance
+                        self.data[i] = padded_data
+                else:
+                    padded_data = []
+                    for entry in instance:
+                        padded_data.append(utils.pad_sequences(entry, max_len))
+                    self.data[i] = padded_data
         self.data = np.array(self.data)
         return self
 
     def get_data_layout(self):
-        if not self.data:
-            return self.data
-
-        if isinstance(self.data[0], list):
-            return "list_of_lists_of_numpy_array"
-        elif isinstance(self.data[0], np.ndarray):
-            return "list_of_numpy_array"
-
-    def get_data_shape(self):
-        layout = self.get_data_layout()
-        if not layout:
-            return None
-
-        if layout == "list_of_lists_of_numpy_array":
-            return self.data[0][0].shape
-        elif layout == "list_of_numpy_array":
-            return self.data[0].shape
-
-    def get_data_dtype(self):
-        layout = self.get_data_layout()
-        if not layout:
-            return None
-
-        if layout == "list_of_lists_of_numpy_array":
-            return self.data[0][0].dtype
-        elif layout == "list_of_numpy_array":
-            return self.data[0].dtype
-
-    def update_data_layout(self):
-        if not self.data:
-            return
+        if self.has_metadata():
+            return 
list(self.metadata.values())[0]["data_layout"]["representation"]
+
+        return None
 
-        self.schema["data_layout"]["representation"] = self.get_data_layout()
+    def has_data(self):
+        return self.data is not None and len(self.data) != 0
 
-        self.shape = self.get_data_shape()
-        self.schema["data_layout"]["type"] = self.get_data_dtype()
+    def has_metadata(self):
+        return self.metadata is not None and self.metadata != {}
diff --git a/src/main/python/systemds/scuro/representations/unimodal.py 
b/src/main/python/systemds/scuro/modality/modality_identifier.py
similarity index 69%
copy from src/main/python/systemds/scuro/representations/unimodal.py
copy to src/main/python/systemds/scuro/modality/modality_identifier.py
index c56d611a74..95668c6e58 100644
--- a/src/main/python/systemds/scuro/representations/unimodal.py
+++ b/src/main/python/systemds/scuro/modality/modality_identifier.py
@@ -18,21 +18,24 @@
 # under the License.
 #
 # -------------------------------------------------------------
+import os
+import pickle
+from typing import List, Dict, Any, Union
+import tempfile
 from systemds.scuro.representations.representation import Representation
 
 
-class UnimodalRepresentation(Representation):
-    def __init__(self, name):
-        """
-        Parent class for all unimodal representation types
-        :param name: name of the representation
-        """
-        super().__init__(name)
+class ModalityIdentifier:
+    """ """
 
-    def transform(self, data):
-        raise f"Not implemented for {self.name}"
+    _instance = None
+    id = -1
 
+    def __new__(cls):
+        if not cls._instance:
+            cls._instance = super().__new__(cls)
+        return cls._instance
 
-class PixelRepresentation(UnimodalRepresentation):
-    def __init__(self):
-        super().__init__("Pixel")
+    def new_id(self):  # TODO: make threadsafe when parallelizing
+        self.id += 1
+        return self.id
diff --git a/src/main/python/systemds/scuro/modality/transformed.py 
b/src/main/python/systemds/scuro/modality/transformed.py
index 64bfba0819..2b4b049ef4 100644
--- a/src/main/python/systemds/scuro/modality/transformed.py
+++ b/src/main/python/systemds/scuro/modality/transformed.py
@@ -21,6 +21,7 @@
 from functools import reduce
 from operator import or_
 
+from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.modality.joined import JoinedModality
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.representations.window import WindowAggregation
@@ -28,25 +29,26 @@ from systemds.scuro.representations.window import 
WindowAggregation
 
 class TransformedModality(Modality):
 
-    def __init__(self, modality_type, transformation, metadata):
+    def __init__(self, modality_type, transformation, modality_id, metadata):
         """
         Parent class of the different Modalities (unimodal & multimodal)
         :param modality_type: Type of the original modality(ies)
         :param transformation: Representation to be applied on the modality
         """
-        super().__init__(modality_type, metadata)
+        super().__init__(modality_type, modality_id, metadata)
         self.transformation = transformation
-        self.data = []
 
     def copy_from_instance(self):
-        return type(self)(self.modality_type, self.transformation, 
self.metadata)
+        return type(self)(
+            self.modality_type, self.transformation, self.modality_id, 
self.metadata
+        )
 
     def join(self, right, join_condition):
         chunked_execution = False
         if type(right).__name__.__contains__("Unimodal"):
             if right.data_loader.chunk_size:
                 chunked_execution = True
-            elif right.data is None or len(right.data) == 0:
+            elif not right.has_data():
                 right.extract_raw_data()
 
         joined_modality = JoinedModality(
@@ -59,24 +61,29 @@ class TransformedModality(Modality):
 
         if not chunked_execution:
             joined_modality.execute(0)
+            joined_modality.joined_right.update_metadata()
 
         return joined_modality
 
-    def window(self, windowSize, aggregationFunction, fieldName=None):
+    def window(self, windowSize, aggregation):
         transformed_modality = TransformedModality(
-            self.modality_type, "window", self.metadata
+            self.modality_type, "window", self.modality_id, self.metadata
         )
-        w = WindowAggregation(windowSize, aggregationFunction)
-        transformed_modality.data = w.window(self)
+        w = WindowAggregation(windowSize, aggregation)
+        transformed_modality.data = w.execute(self)
 
         return transformed_modality
 
-    def apply_representation(self, representation, aggregation):
-        new_modality = representation.transform(self)
+    def context(self, context_operator):
+        transformed_modality = TransformedModality(
+            self.modality_type, context_operator.name, self.modality_id, 
self.metadata
+        )
 
-        if aggregation:
-            new_modality.data = aggregation.window(new_modality)
+        transformed_modality.data = context_operator.execute(self)
+        return transformed_modality
 
+    def apply_representation(self, representation):
+        new_modality = representation.transform(self)
         new_modality.update_metadata()
         return new_modality
 
@@ -87,8 +94,9 @@ class TransformedModality(Modality):
         :param fusion_method: The fusion method to be used to combine 
modalities
         """
         fused_modality = TransformedModality(
-            reduce(or_, (o.modality_type for o in other), self.modality_type),
+            ModalityType.EMBEDDING,
             fusion_method,
+            self.modality_id,
             self.metadata,
         )
         modalities = [self]
diff --git a/src/main/python/systemds/scuro/modality/type.py 
b/src/main/python/systemds/scuro/modality/type.py
index 197ad23c54..4b59c263d6 100644
--- a/src/main/python/systemds/scuro/modality/type.py
+++ b/src/main/python/systemds/scuro/modality/type.py
@@ -18,7 +18,10 @@
 # under the License.
 #
 # -------------------------------------------------------------
-from enum import Flag, auto
+from enum import Flag, auto, Enum
+from copy import deepcopy
+import numpy as np
+
 from systemds.scuro.utils.schema_helpers import (
     calculate_new_frequency,
     create_timestamps,
@@ -28,25 +31,40 @@ from systemds.scuro.utils.schema_helpers import (
 # TODO: needs a way to define if data comes from a dataset with multiple 
instances or is like a streaming scenario where we only have one instance
 # right now it is a list of instances (if only one instance the list would 
contain only a single item)
 class ModalitySchemas:
-    TEXT_SCHEMA = {"type": "string", "length": "int"}
+    BASE_SCHEMA = {"data_layout": {"type": "?", "representation": "?", 
"shape": "?"}}
 
-    AUDIO_SCHEMA = {
+    TEMPORAL_BASE_SCHEMA = {
+        **BASE_SCHEMA,
         "timestamp": "array",
-        "data_layout": {"type": "?", "representation": "?"},
-        "sample_rate": "integer",
         "length": "integer",
+        "frequency": "float",
     }
 
+    TEXT_SCHEMA = {**BASE_SCHEMA, "length": "int"}
+
+    EMBEDDING_SCHEMA = {**BASE_SCHEMA, "length": "int"}
+
+    AUDIO_SCHEMA = {**TEMPORAL_BASE_SCHEMA, "frequency": "integer"}
+
     VIDEO_SCHEMA = {
-        "timestamp": "array",
-        "data_layout": {"type": "?", "representation": "?"},
-        "fps": "integer",
-        "length": "integer",
+        **TEMPORAL_BASE_SCHEMA,
         "width": "integer",
         "height": "integer",
         "num_channels": "integer",
     }
 
+    IMAGE_SCHEMA = {
+        **BASE_SCHEMA,
+        "width": "integer",
+        "height": "integer",
+        "num_channels": "integer",
+    }
+
+    TIMESERIES_SCHEMA = {
+        **TEMPORAL_BASE_SCHEMA,
+        "num_columns": "integer",
+    }
+
     _metadata_handlers = {}
 
     @classmethod
@@ -67,10 +85,31 @@ class ModalitySchemas:
 
     @classmethod
     def update_metadata(cls, name, md, data):
+        md = cls.update_base_metadata(md, data)
         mdHandler = cls._metadata_handlers.get(name)
         if mdHandler:
             return mdHandler(md, data)
 
+        return md
+
+    @classmethod
+    def update_base_metadata(cls, md, data, data_is_single_instance=True):
+        data_layout = DataLayout.get_data_layout(data, data_is_single_instance)
+
+        dtype = np.nan
+        shape = None
+        if data_layout is DataLayout.SINGLE_LEVEL:
+            dtype = data.dtype
+            shape = data.shape
+        elif data_layout is DataLayout.NESTED_LEVEL:
+            shape = data[0].shape
+            dtype = data[0].dtype
+
+        md["data_layout"].update(
+            {"representation": data_layout, "type": dtype, "shape": shape}
+        )
+        return md
+
     def extract_data(self, data, index):
         if self.get("data_layout").get("representation") == "list_array":
             return data[index]
@@ -80,11 +119,11 @@ class ModalitySchemas:
 
 @ModalitySchemas.register_metadata_handler("AUDIO")
 def handle_audio_metadata(md, data):
-    new_frequency = calculate_new_frequency(len(data), md["length"], 
md["sample_rate"])
+    new_frequency = calculate_new_frequency(len(data), md["length"], 
md["frequency"])
     md.update(
         {
             "length": len(data),
-            "sample_rate": new_frequency,
+            "frequency": new_frequency,
             "timestamp": create_timestamps(new_frequency, len(data)),
         }
     )
@@ -93,24 +132,112 @@ def handle_audio_metadata(md, data):
 
 @ModalitySchemas.register_metadata_handler("VIDEO")
 def handle_video_metadata(md, data):
-    new_frequency = calculate_new_frequency(len(data), md["length"], md["fps"])
+    new_frequency = calculate_new_frequency(len(data), md["length"], 
md["frequency"])
+    md.update(
+        {
+            "length": len(data),
+            "frequency": new_frequency,
+            "timestamp": create_timestamps(new_frequency, len(data)),
+        }
+    )
+    return md
+
+
[email protected]_metadata_handler("IMAGE")
+def handle_image_metadata(md, data):
+    md.update(
+        {
+            "width": data.shape[1] if isinstance(data, np.ndarray) else 1,
+            "height": data.shape[0] if isinstance(data, np.ndarray) else 
len(data),
+            "num_channels": 1,  # if data.ndim <= 2 else data.shape[2],
+        }
+    )
+    return md
+
+
[email protected]_metadata_handler("TIMESERIES")
+def handle_timeseries_metadata(md, data):
+    new_frequency = calculate_new_frequency(len(data), md["length"], 
md["frequency"])
     md.update(
         {
             "length": len(data),
-            "fps": new_frequency,
+            "num_columns": (
+                data.shape[1] if isinstance(data, np.ndarray) and data.ndim > 
1 else 1
+            ),
+            "frequency": new_frequency,
             "timestamp": create_timestamps(new_frequency, len(data)),
         }
     )
     return md
 
 
[email protected]_metadata_handler("TEXT")
+def handle_text_metadata(md, data):
+    md.update({"length": len(data)})
+    return md
+
+
 class ModalityType(Flag):
     TEXT = auto()
     AUDIO = auto()
     VIDEO = auto()
+    IMAGE = auto()
+    TIMESERIES = auto()
+    EMBEDDING = auto()
 
     def get_schema(self):
         return ModalitySchemas.get(self.name)
 
     def update_metadata(self, md, data):
         return ModalitySchemas.update_metadata(self.name, md, data)
+
+    def create_audio_metadata(self, sampling_rate, data):
+        md = deepcopy(self.get_schema())
+        md = ModalitySchemas.update_base_metadata(md, data, True)
+        md["frequency"] = sampling_rate
+        md["length"] = data.shape[0]
+        md["timestamp"] = create_timestamps(sampling_rate, md["length"])
+        return md
+
+    def create_text_metadata(self, length, data):
+        md = deepcopy(self.get_schema())
+        md["data_layout"]["representation"] = DataLayout.SINGLE_LEVEL
+        md["data_layout"]["shape"] = (1, length)
+        md["data_layout"]["type"] = str
+        md["length"] = length
+        return md
+
+    def create_video_metadata(self, frequency, length, width, height, 
num_channels):
+        md = deepcopy(self.get_schema())
+        md["frequency"] = frequency
+        md["length"] = length
+        md["width"] = width
+        md["height"] = height
+        md["num_channels"] = num_channels
+        md["timestamp"] = create_timestamps(frequency, length)
+        md["data_layout"]["representation"] = DataLayout.NESTED_LEVEL
+        md["data_layout"]["type"] = float
+        md["data_layout"]["shape"] = (width, height, num_channels)
+
+        return md
+
+
+class DataLayout(Enum):
+    SINGLE_LEVEL = 1
+    NESTED_LEVEL = 2
+
+    @classmethod
+    def get_data_layout(cls, data, data_is_single_instance):
+        if data is None or len(data) == 0:
+            return None
+
+        if data_is_single_instance:
+            if isinstance(data, list):
+                return DataLayout.NESTED_LEVEL
+            elif isinstance(data, np.ndarray):
+                return DataLayout.SINGLE_LEVEL
+
+        if isinstance(data[0], list):
+            return DataLayout.NESTED_LEVEL
+        elif isinstance(data[0], np.ndarray):
+            return DataLayout.SINGLE_LEVEL
diff --git a/src/main/python/systemds/scuro/modality/unimodal_modality.py 
b/src/main/python/systemds/scuro/modality/unimodal_modality.py
index ae33b6605b..6173237e0a 100644
--- a/src/main/python/systemds/scuro/modality/unimodal_modality.py
+++ b/src/main/python/systemds/scuro/modality/unimodal_modality.py
@@ -27,21 +27,22 @@ from systemds.scuro.modality.modality import Modality
 from systemds.scuro.modality.joined import JoinedModality
 from systemds.scuro.modality.transformed import TransformedModality
 from systemds.scuro.modality.type import ModalityType
+from systemds.scuro.modality.modality_identifier import ModalityIdentifier
 
 
 class UnimodalModality(Modality):
 
-    def __init__(self, data_loader: BaseLoader, modality_type: ModalityType):
+    def __init__(self, data_loader: BaseLoader):
         """
-        This class represents a unimodal modality.
+        This class represents an unimodal modality.
         :param data_loader: Defines how the raw data should be loaded
         :param modality_type: Type of the modality
         """
-        super().__init__(modality_type, None)
+        super().__init__(data_loader.modality_type, 
ModalityIdentifier().new_id(), None)
         self.data_loader = data_loader
 
     def copy_from_instance(self):
-        new_instance = type(self)(self.data_loader, self.modality_type)
+        new_instance = type(self)(self.data_loader)
         if self.metadata:
             new_instance.metadata = self.metadata.copy()
         return new_instance
@@ -73,20 +74,46 @@ class UnimodalModality(Modality):
             self.data_loader.chunk_size is not None,
         )
 
+        if self.data_loader.chunk_size is None:
+            self.extract_raw_data()
+            joined_modality.execute(0)
+            joined_modality.joined_right.update_metadata()
+
         return joined_modality
 
-    def apply_representation(self, representation, aggregation=None):
+    def context(self, context_operator):
+        if not self.has_data():
+            self.extract_raw_data()
+
+        transformed_modality = TransformedModality(
+            self.modality_type, context_operator.name, self.modality_id, 
self.metadata
+        )
+
+        transformed_modality.data = context_operator.execute(self)
+        return transformed_modality
+
+    def aggregate(self, aggregation_function):
+        if self.data is None:
+            raise Exception("Data is None")
+
+    def apply_representations(self, representations):
+        # TODO
+        pass
+
+    def apply_representation(self, representation):
         new_modality = TransformedModality(
-            self.modality_type, representation.name, 
self.data_loader.metadata.copy()
+            self.modality_type,
+            representation.name,
+            self.modality_id,
+            self.data_loader.metadata.copy(),
         )
         new_modality.data = []
 
         if self.data_loader.chunk_size:
+            self.data_loader.reset()
             while self.data_loader.next_chunk < self.data_loader.num_chunks:
                 self.extract_raw_data()
                 transformed_chunk = representation.transform(self)
-                if aggregation:
-                    transformed_chunk.data = 
aggregation.window(transformed_chunk)
                 new_modality.data.extend(transformed_chunk.data)
                 new_modality.metadata.update(transformed_chunk.metadata)
         else:
@@ -94,8 +121,5 @@ class UnimodalModality(Modality):
                 self.extract_raw_data()
             new_modality = representation.transform(self)
 
-            if aggregation:
-                new_modality.data = aggregation.window(new_modality)
-
         new_modality.update_metadata()
         return new_modality
diff --git a/src/main/python/systemds/scuro/representations/aggregate.py 
b/src/main/python/systemds/scuro/representations/aggregate.py
index 7c8d1c68d1..4b4545ef47 100644
--- a/src/main/python/systemds/scuro/representations/aggregate.py
+++ b/src/main/python/systemds/scuro/representations/aggregate.py
@@ -21,31 +21,77 @@
 import numpy as np
 
 from systemds.scuro.modality.modality import Modality
+from systemds.scuro.representations import utils
 
 
-# TODO: make this a Representation and add a fusion method that fuses two 
modalities with each other
+class Aggregation:
+    @staticmethod
+    def _mean_agg(data):
+        return np.mean(data, axis=0)
 
+    @staticmethod
+    def _max_agg(data):
+        return np.max(data, axis=0)
 
-class Aggregation:
-    def __init__(self, aggregation_function, field_name):
-        self.aggregation_function = aggregation_function
-        self.field_name = field_name
+    @staticmethod
+    def _min_agg(data):
+        return np.min(data, axis=0)
+
+    @staticmethod
+    def _sum_agg(data):
+        return np.sum(data, axis=0)
+
+    _aggregation_function = {
+        "mean": _mean_agg.__func__,
+        "max": _max_agg.__func__,
+        "min": _min_agg.__func__,
+        "sum": _sum_agg.__func__,
+    }
+
+    def __init__(self, aggregation_function="mean", pad_modality=False):
+        if aggregation_function not in self._aggregation_function.keys():
+            raise ValueError("Invalid aggregation function")
+        self._aggregation_func = 
self._aggregation_function[aggregation_function]
+        self.name = "Aggregation"
+        self.pad_modality = pad_modality
 
-    def aggregate(self, modality):
-        aggregated_modality = Modality(modality.modality_type, 
modality.metadata)
+    def execute(self, modality):
+        aggregated_modality = Modality(
+            modality.modality_type, modality.modality_id, modality.metadata
+        )
         aggregated_modality.data = []
+        max_len = 0
         for i, instance in enumerate(modality.data):
             aggregated_modality.data.append([])
-            for j, entry in enumerate(instance):
-                if self.aggregation_function == "sum":
-                    aggregated_modality.data[i].append(np.sum(entry, axis=0))
-                elif self.aggregation_function == "mean":
-                    aggregated_modality.data[i].append(np.mean(entry, axis=0))
-                elif self.aggregation_function == "min":
-                    aggregated_modality.data[i].append(np.min(entry, axis=0))
-                elif self.aggregation_function == "max":
-                    aggregated_modality.data[i].append(np.max(entry, axis=0))
+            if isinstance(instance, np.ndarray):
+                aggregated_data = self._aggregation_func(instance)
+            else:
+                aggregated_data = []
+                for entry in instance:
+                    aggregated_data.append(self._aggregation_func(entry))
+            max_len = max(max_len, len(aggregated_data))
+            aggregated_modality.data[i] = aggregated_data
+
+        if self.pad_modality:
+            for i, instance in enumerate(aggregated_modality.data):
+                if isinstance(instance, np.ndarray):
+                    if len(instance) < max_len:
+                        padded_data = np.zeros(max_len, dtype=instance.dtype)
+                        padded_data[: len(instance)] = instance
+                        aggregated_modality.data[i] = padded_data
                 else:
-                    raise ValueError("Invalid aggregation function")
+                    padded_data = []
+                    for entry in instance:
+                        padded_data.append(utils.pad_sequences(entry, max_len))
+                    aggregated_modality.data[i] = padded_data
 
         return aggregated_modality
+
+    def transform(self, modality):
+        return self.execute(modality)
+
+    def aggregate_instance(self, instance):
+        return self._aggregation_func(instance)
+
+    def get_aggregation_functions(self):
+        return self._aggregation_function.keys()
diff --git a/src/main/python/systemds/scuro/representations/bert.py 
b/src/main/python/systemds/scuro/representations/bert.py
index bfaaa22642..6395d0b9e6 100644
--- a/src/main/python/systemds/scuro/representations/bert.py
+++ b/src/main/python/systemds/scuro/representations/bert.py
@@ -26,17 +26,20 @@ from systemds.scuro.representations.unimodal import 
UnimodalRepresentation
 import torch
 from transformers import BertTokenizer, BertModel
 from systemds.scuro.representations.utils import save_embeddings
+from systemds.scuro.modality.type import ModalityType
 
 
 class Bert(UnimodalRepresentation):
-    def __init__(self, output_file=None):
-        super().__init__("Bert")
+    def __init__(self, model_name="bert", output_file=None):
+        parameters = {"model_name": "bert"}
+        self.model_name = model_name
+        super().__init__("Bert", ModalityType.EMBEDDING, parameters)
 
         self.output_file = output_file
 
     def transform(self, modality):
         transformed_modality = TransformedModality(
-            modality.modality_type, self, modality.metadata
+            modality.modality_type, self, modality.modality_id, 
modality.metadata
         )
         model_name = "bert-base-uncased"
         tokenizer = BertTokenizer.from_pretrained(
@@ -46,7 +49,7 @@ class Bert(UnimodalRepresentation):
         model = BertModel.from_pretrained(model_name)
 
         embeddings = self.create_embeddings(modality.data, model, tokenizer)
-
+        embeddings = [embeddings[i : i + 1] for i in 
range(embeddings.shape[0])]
         if self.output_file is not None:
             save_embeddings(embeddings, self.output_file)
 
diff --git a/src/main/python/systemds/scuro/representations/bow.py 
b/src/main/python/systemds/scuro/representations/bow.py
index f16f6ec04d..52fddc7d3f 100644
--- a/src/main/python/systemds/scuro/representations/bow.py
+++ b/src/main/python/systemds/scuro/representations/bow.py
@@ -25,23 +25,27 @@ from systemds.scuro.modality.transformed import 
TransformedModality
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
 from systemds.scuro.representations.utils import save_embeddings
 
+from systemds.scuro.modality.type import ModalityType
+
 
 class BoW(UnimodalRepresentation):
-    def __init__(self, ngram_range, min_df, output_file=None):
-        super().__init__("BoW")
+    def __init__(self, ngram_range=2, min_df=2, output_file=None):
+        parameters = {"ngram_range": [ngram_range], "min_df": [min_df]}
+        super().__init__("BoW", ModalityType.EMBEDDING, parameters)
         self.ngram_range = ngram_range
         self.min_df = min_df
         self.output_file = output_file
 
     def transform(self, modality):
         transformed_modality = TransformedModality(
-            modality.modality_type, self, modality.metadata
+            modality.modality_type, self, modality.modality_id, 
modality.metadata
         )
         vectorizer = CountVectorizer(
             ngram_range=(1, self.ngram_range), min_df=self.min_df
         )
 
         X = vectorizer.fit_transform(modality.data).toarray()
+        X = [X[i : i + 1] for i in range(X.shape[0])]
 
         if self.output_file is not None:
             save_embeddings(X, self.output_file)
diff --git a/src/main/python/systemds/scuro/representations/unimodal.py 
b/src/main/python/systemds/scuro/representations/context.py
similarity index 60%
copy from src/main/python/systemds/scuro/representations/unimodal.py
copy to src/main/python/systemds/scuro/representations/context.py
index c56d611a74..4cbcf54f8e 100644
--- a/src/main/python/systemds/scuro/representations/unimodal.py
+++ b/src/main/python/systemds/scuro/representations/context.py
@@ -18,21 +18,26 @@
 # under the License.
 #
 # -------------------------------------------------------------
+import abc
+from typing import List
+
+from systemds.scuro.modality.modality import Modality
 from systemds.scuro.representations.representation import Representation
 
 
-class UnimodalRepresentation(Representation):
-    def __init__(self, name):
+class Context(Representation):
+    def __init__(self, name, parameters=None):
         """
-        Parent class for all unimodal representation types
-        :param name: name of the representation
+        Parent class for different context operations
+        :param name: Name of the context operator
         """
-        super().__init__(name)
-
-    def transform(self, data):
-        raise f"Not implemented for {self.name}"
-
+        super().__init__(name, parameters)
 
-class PixelRepresentation(UnimodalRepresentation):
-    def __init__(self):
-        super().__init__("Pixel")
+    @abc.abstractmethod
+    def execute(self, modality: Modality):
+        """
+        Implemented for every child class and creates a contextualized 
representation for a given modality
+        :param modality: modality to use
+        :return: contextualized data
+        """
+        raise f"Not implemented for Context Operator: {self.name}"
diff --git a/src/main/python/systemds/scuro/representations/fusion.py 
b/src/main/python/systemds/scuro/representations/fusion.py
index 623979dd05..773452371b 100644
--- a/src/main/python/systemds/scuro/representations/fusion.py
+++ b/src/main/python/systemds/scuro/representations/fusion.py
@@ -20,17 +20,19 @@
 # -------------------------------------------------------------
 from typing import List
 
+import numpy as np
+
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.representations.representation import Representation
 
 
 class Fusion(Representation):
-    def __init__(self, name):
+    def __init__(self, name, parameters=None):
         """
         Parent class for different multimodal fusion types
         :param name: Name of the fusion type
         """
-        super().__init__(name)
+        super().__init__(name, parameters)
 
     def transform(self, modalities: List[Modality]):
         """
@@ -47,10 +49,20 @@ class Fusion(Representation):
         :param modalities: List of modalities
         :return: maximum embedding size
         """
-        max_size = modalities[0].data.shape[1]
+        if isinstance(modalities[0].data[0], list):
+            max_size = modalities[0].data[0][0].shape[1]
+        elif isinstance(modalities[0].data, np.ndarray):
+            max_size = modalities[0].data.shape[1]
+        else:
+            max_size = modalities[0].data[0].shape[1]
         for idx in range(1, len(modalities)):
-            curr_shape = modalities[idx].data.shape
-            if len(modalities[idx - 1].data) != curr_shape[0]:
+            if isinstance(modalities[idx].data[0], list):
+                curr_shape = modalities[idx].data[0][0].shape
+            elif isinstance(modalities[idx].data, np.ndarray):
+                curr_shape = modalities[idx].data.shape
+            else:
+                curr_shape = modalities[idx].data[0].shape
+            if len(modalities[idx - 1].data) != len(modalities[idx].data):
                 raise f"Modality sizes don't match!"
             elif curr_shape[1] > max_size:
                 max_size = curr_shape[1]
diff --git a/src/main/python/systemds/scuro/representations/glove.py 
b/src/main/python/systemds/scuro/representations/glove.py
index 767fc8d375..7bb586dc99 100644
--- a/src/main/python/systemds/scuro/representations/glove.py
+++ b/src/main/python/systemds/scuro/representations/glove.py
@@ -24,6 +24,7 @@ from gensim.utils import tokenize
 
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
 from systemds.scuro.representations.utils import read_data_from_file, 
save_embeddings
+from systemds.scuro.modality.type import ModalityType
 
 
 def load_glove_embeddings(file_path):
@@ -39,7 +40,7 @@ def load_glove_embeddings(file_path):
 
 class GloVe(UnimodalRepresentation):
     def __init__(self, glove_path, output_file=None):
-        super().__init__("GloVe")
+        super().__init__("GloVe", ModalityType.TEXT)
         self.glove_path = glove_path
         self.output_file = output_file
 
diff --git a/src/main/python/systemds/scuro/representations/mel_spectrogram.py 
b/src/main/python/systemds/scuro/representations/mel_spectrogram.py
index 483ea181b8..dfff4f3b7e 100644
--- a/src/main/python/systemds/scuro/representations/mel_spectrogram.py
+++ b/src/main/python/systemds/scuro/representations/mel_spectrogram.py
@@ -21,19 +21,27 @@
 import librosa
 import numpy as np
 
+from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.modality.transformed import TransformedModality
 
-# import matplotlib.pyplot as plt
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
 
 
 class MelSpectrogram(UnimodalRepresentation):
-    def __init__(self):
-        super().__init__("MelSpectrogram")
+    def __init__(self, n_mels=128, hop_length=512, n_fft=2048):
+        parameters = {
+            "n_mels": [20, 32, 64, 128],
+            "hop_length": [256, 512, 1024, 2048],
+            "n_fft": [1024, 2048, 4096],
+        }
+        super().__init__("MelSpectrogram", ModalityType.TIMESERIES, parameters)
+        self.n_mels = n_mels
+        self.hop_length = hop_length
+        self.n_fft = n_fft
 
     def transform(self, modality):
         transformed_modality = TransformedModality(
-            modality.modality_type, self, modality.metadata
+            self.output_modality_type, self, modality.modality_id, 
modality.metadata
         )
         result = []
         max_length = 0
@@ -46,12 +54,3 @@ class MelSpectrogram(UnimodalRepresentation):
 
         transformed_modality.data = result
         return transformed_modality
-
-    # def plot_spectrogram(self, spectrogram):
-    #     plt.figure(figsize=(10, 4))
-    #     librosa.display.specshow(
-    #         spectrogram, x_axis="time", y_axis="mel", sr=22050, 
cmap="viridis"
-    #     )
-    #     plt.colorbar(format="%+2.0f dB")
-    #     plt.title("Mel Spectrogram")
-    #     plt.savefig("spectrogram.jpg")
diff --git a/src/main/python/systemds/scuro/representations/representation.py 
b/src/main/python/systemds/scuro/representations/representation.py
index 92967ed9c5..a9f283b6fe 100644
--- a/src/main/python/systemds/scuro/representations/representation.py
+++ b/src/main/python/systemds/scuro/representations/representation.py
@@ -18,8 +18,24 @@
 # under the License.
 #
 # -------------------------------------------------------------
+from abc import abstractmethod
 
 
 class Representation:
-    def __init__(self, name):
+    def __init__(self, name, parameters):
         self.name = name
+        self._parameters = parameters
+
+    @property
+    def parameters(self):
+        return self._parameters
+
+    def get_current_parameters(self):
+        current_params = {}
+        for parameter in self.parameters.keys():
+            current_params[parameter] = getattr(self, parameter)
+        return current_params
+
+    def set_parameters(self, parameters):
+        for parameter in parameters:
+            setattr(self, parameter, parameters[parameter])
diff --git a/src/main/python/systemds/scuro/representations/resnet.py 
b/src/main/python/systemds/scuro/representations/resnet.py
index ff63e6766b..60eed9ea12 100644
--- a/src/main/python/systemds/scuro/representations/resnet.py
+++ b/src/main/python/systemds/scuro/representations/resnet.py
@@ -27,6 +27,7 @@ import torch
 import torchvision.models as models
 import torchvision.transforms as transforms
 import numpy as np
+from systemds.scuro.modality.type import ModalityType
 
 if torch.backends.mps.is_available():
     DEVICE = torch.device("mps")
@@ -38,7 +39,11 @@ else:
 
 class ResNet(UnimodalRepresentation):
     def __init__(self, layer="avgpool", model_name="ResNet18", 
output_file=None):
-        super().__init__("ResNet")
+        self.model_name = model_name
+        parameters = self._get_parameters()
+        super().__init__(
+            "ResNet", ModalityType.TIMESERIES, parameters
+        )  # TODO: TIMESERIES only for videos - images would be handled as 
EMBEDDIGN
 
         self.output_file = output_file
         self.layer_name = layer
@@ -82,6 +87,25 @@ class ResNet(UnimodalRepresentation):
         else:
             raise NotImplementedError
 
+    def _get_parameters(self, high_level=True):
+        parameters = {"model_name": [], "layer_name": []}
+        for m in ["ResNet18", "ResNet34", "ResNet50", "ResNet101", 
"ResNet152"]:
+            parameters["model_name"].append(m)
+
+        if high_level:
+            parameters["layer_name"] = [
+                "conv1",
+                "layer1",
+                "layer2",
+                "layer3",
+                "layer4",
+                "avgpool",
+            ]
+        else:
+            for name, layer in self.model.named_modules():
+                parameters["layer_name"].append(name)
+        return parameters
+
     def transform(self, modality):
 
         t = transforms.Compose(
@@ -135,11 +159,13 @@ class ResNet(UnimodalRepresentation):
                     torch.flatten(pooled, 1).detach().cpu().numpy()
                 )
 
+            embeddings[video_id] = np.array(embeddings[video_id])
+
         transformed_modality = TransformedModality(
-            modality.modality_type, "resnet", modality.metadata
+            self.output_modality_type, "resnet", modality.modality_id, 
modality.metadata
         )
+
         transformed_modality.data = list(embeddings.values())
-        transformed_modality.update_data_layout()
 
         return transformed_modality
 
diff --git a/src/main/python/systemds/scuro/representations/tfidf.py 
b/src/main/python/systemds/scuro/representations/tfidf.py
index 02cfb927c7..30a6655150 100644
--- a/src/main/python/systemds/scuro/representations/tfidf.py
+++ b/src/main/python/systemds/scuro/representations/tfidf.py
@@ -18,27 +18,32 @@
 # under the License.
 #
 # -------------------------------------------------------------
+import numpy as np
 
 from sklearn.feature_extraction.text import TfidfVectorizer
 from systemds.scuro.modality.transformed import TransformedModality
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
-from systemds.scuro.representations.utils import read_data_from_file, 
save_embeddings
+from systemds.scuro.representations.utils import save_embeddings
+
+from systemds.scuro.modality.type import ModalityType
 
 
 class TfIdf(UnimodalRepresentation):
-    def __init__(self, min_df, output_file=None):
-        super().__init__("TF-IDF")
+    def __init__(self, min_df=2, output_file=None):
+        parameters = {"min_df": [min_df]}
+        super().__init__("TF-IDF", ModalityType.EMBEDDING, parameters)
         self.min_df = min_df
         self.output_file = output_file
 
     def transform(self, modality):
         transformed_modality = TransformedModality(
-            modality.modality_type, self, modality.metadata
+            modality.modality_type, self, modality.modality_id, 
modality.metadata
         )
+
         vectorizer = TfidfVectorizer(min_df=self.min_df)
 
         X = vectorizer.fit_transform(modality.data)
-        X = X.toarray()
+        X = [np.array(x).reshape(1, -1) for x in X.toarray()]
 
         if self.output_file is not None:
             save_embeddings(X, self.output_file)
diff --git a/src/main/python/systemds/scuro/representations/unimodal.py 
b/src/main/python/systemds/scuro/representations/unimodal.py
index c56d611a74..559eec1401 100644
--- a/src/main/python/systemds/scuro/representations/unimodal.py
+++ b/src/main/python/systemds/scuro/representations/unimodal.py
@@ -18,17 +18,25 @@
 # under the License.
 #
 # -------------------------------------------------------------
+import abc
+
 from systemds.scuro.representations.representation import Representation
 
 
 class UnimodalRepresentation(Representation):
-    def __init__(self, name):
+    def __init__(self, name: str, output_modality_type, parameters=None):
         """
         Parent class for all unimodal representation types
         :param name: name of the representation
+        :param parameters: parameters of the representation; name of the 
parameter and
+        possible parameter values
         """
-        super().__init__(name)
+        super().__init__(name, parameters)
+        self.output_modality_type = output_modality_type
+        if parameters is None:
+            parameters = {}
 
+    @abc.abstractmethod
     def transform(self, data):
         raise f"Not implemented for {self.name}"
 
diff --git a/src/main/python/systemds/scuro/representations/window.py 
b/src/main/python/systemds/scuro/representations/window.py
index af0301d0e3..264d40ca42 100644
--- a/src/main/python/systemds/scuro/representations/window.py
+++ b/src/main/python/systemds/scuro/representations/window.py
@@ -21,29 +21,67 @@
 import numpy as np
 import math
 
+from systemds.scuro.modality.type import DataLayout
 
-# TODO: move this into the aggregation class and add an aggregate() and a 
window(window_size) function there so they can use the same functionality.
-class WindowAggregation:
-    def __init__(self, window_size, aggregation_function):
+# from systemds.scuro.drsearch.operator_registry import 
register_context_operator
+from systemds.scuro.representations.aggregate import Aggregation
+from systemds.scuro.representations.context import Context
+
+
+# @register_context_operator()
+class WindowAggregation(Context):
+    def __init__(self, window_size=10, aggregation_function="mean"):
+        parameters = {
+            "window_size": [window_size],
+            "aggregation_function": 
list(Aggregation().get_aggregation_functions()),
+        }  # TODO: window_size should be dynamic and adapted to the shape of 
the data
+        super().__init__("WindowAggregation", parameters)
         self.window_size = window_size
         self.aggregation_function = aggregation_function
 
-    def window(self, modality):
-        # data is a 2d array
+    @property
+    def aggregation_function(self):
+        return self._aggregation_function
+
+    @aggregation_function.setter
+    def aggregation_function(self, value):
+        self._aggregation_function = Aggregation(value)
+
+    def execute(self, modality):
         windowed_data = []
         for instance in modality.data:
-            window_length = math.ceil(len(instance) / self.window_size)
-            result = [[] for _ in range(0, window_length)]
-            # if modality.schema["data_layout"]["representation"] == 
"list_of_lists_of_numpy_array":
-            data = np.stack(instance)
-            for i in range(0, window_length):
-                result[i] = np.mean(
-                    data[
-                        i * self.window_size : i * self.window_size + 
self.window_size
-                    ],
-                    axis=0,
-                )  # TODO: add actual aggregation function here
-
-            windowed_data.append(result)
+            new_length = math.ceil(len(instance) / self.window_size)
+            if modality.get_data_layout() == DataLayout.SINGLE_LEVEL:
+                windowed_instance = self.window_aggregate_single_level(
+                    instance, new_length
+                )
+            else:
+                windowed_instance = self.window_aggregate_nested_level(
+                    instance, new_length
+                )
+
+            windowed_data.append(windowed_instance)
 
         return windowed_data
+
+    def window_aggregate_single_level(self, instance, new_length):
+        num_cols = instance.shape[1] if instance.ndim > 1 else 1
+        result = np.empty((new_length, num_cols))
+        for i in range(0, new_length):
+            result[i] = self.aggregation_function.aggregate_instance(
+                instance[i * self.window_size : i * self.window_size + 
self.window_size]
+            )
+
+        if num_cols == 1:
+            result = result.reshape(-1)
+        return result
+
+    def window_aggregate_nested_level(self, instance, new_length):
+        result = [[] for _ in range(0, new_length)]
+        data = np.stack(instance)
+        for i in range(0, new_length):
+            result[i] = self.aggregation_function.aggregate_instance(
+                data[i * self.window_size : i * self.window_size + 
self.window_size]
+            )
+
+        return result
diff --git a/src/main/python/systemds/scuro/representations/word2vec.py 
b/src/main/python/systemds/scuro/representations/word2vec.py
index b68a9fd3eb..929dbd4415 100644
--- a/src/main/python/systemds/scuro/representations/word2vec.py
+++ b/src/main/python/systemds/scuro/representations/word2vec.py
@@ -25,6 +25,11 @@ from systemds.scuro.representations.utils import 
save_embeddings
 from gensim.models import Word2Vec
 from gensim.utils import tokenize
 
+from systemds.scuro.modality.type import ModalityType
+import nltk
+
+nltk.download("punkt_tab")
+
 
 def get_embedding(sentence, model):
     vectors = []
@@ -36,8 +41,13 @@ def get_embedding(sentence, model):
 
 
 class W2V(UnimodalRepresentation):
-    def __init__(self, vector_size, min_count, window, output_file=None):
-        super().__init__("Word2Vec")
+    def __init__(self, vector_size=3, min_count=2, window=2, output_file=None):
+        parameters = {
+            "vector_size": [vector_size],
+            "min_count": [min_count],
+            "window": [window],
+        }  # TODO
+        super().__init__("Word2Vec", ModalityType.EMBEDDING, parameters)
         self.vector_size = vector_size
         self.min_count = min_count
         self.window = window
@@ -45,7 +55,7 @@ class W2V(UnimodalRepresentation):
 
     def transform(self, modality):
         transformed_modality = TransformedModality(
-            modality.modality_type, self, modality.metadata
+            modality.modality_type, self, modality.modality_id, 
modality.metadata
         )
         t = [list(tokenize(s.lower())) for s in modality.data]
         model = Word2Vec(
@@ -57,7 +67,7 @@ class W2V(UnimodalRepresentation):
         embeddings = []
         for sentences in modality.data:
             tokens = list(tokenize(sentences.lower()))
-            embeddings.append(get_embedding(tokens, model))
+            embeddings.append(np.array(get_embedding(tokens, 
model)).reshape(1, -1))
 
         if self.output_file is not None:
             save_embeddings(np.array(embeddings), self.output_file)
diff --git a/src/main/python/tests/scuro/data_generator.py 
b/src/main/python/tests/scuro/data_generator.py
index ec0783df9c..48ff208e43 100644
--- a/src/main/python/tests/scuro/data_generator.py
+++ b/src/main/python/tests/scuro/data_generator.py
@@ -26,10 +26,54 @@ from scipy.io.wavfile import write
 import random
 import os
 
-from systemds.scuro import VideoLoader, AudioLoader, TextLoader, 
UnimodalModality
+from systemds.scuro import (
+    VideoLoader,
+    AudioLoader,
+    TextLoader,
+    UnimodalModality,
+    TransformedModality,
+)
 from systemds.scuro.modality.type import ModalityType
 
 
+class ModalityRandomDataGenerator:
+
+    def __init__(self):
+        self._modality_id = 0
+
+    def create1DModality(
+        self,
+        num_instances,
+        num_features,
+        modality_type,
+    ):
+        data = np.random.rand(num_instances, num_features)
+        # TODO: write a dummy method to create the same metadata for all 
instances to avoid the for loop
+        metadata = {}
+        for i in range(num_instances):
+            if modality_type == ModalityType.AUDIO:
+                metadata[i] = modality_type.create_audio_metadata(
+                    num_features / 10, data[i]
+                )
+            elif modality_type == ModalityType.TEXT:
+                metadata[i] = modality_type.create_text_metadata(
+                    num_features / 10, data[i]
+                )
+            elif modality_type == ModalityType.VIDEO:
+                metadata[i] = modality_type.create_video_metadata(
+                    num_features / 30, 10, 0, 0, 1
+                )
+            else:
+                raise NotImplementedError
+
+        tf_modality = TransformedModality(
+            modality_type, "test_transformation", self._modality_id, metadata
+        )
+        tf_modality.data = data
+        self._modality_id += 1
+        return tf_modality
+
+
 def setup_data(modalities, num_instances, path):
     if os.path.isdir(path):
         shutil.rmtree(path)
@@ -51,7 +95,7 @@ def setup_data(modalities, num_instances, path):
         else:
             raise "Modality not supported in DataGenerator"
 
-        modalities_to_create.append(UnimodalModality(data_loader, modality))
+        modalities_to_create.append(UnimodalModality(data_loader))
 
     data_generator = TestDataGenerator(modalities_to_create, path)
     data_generator.create_multimodal_data(num_instances)
diff --git a/src/main/python/tests/scuro/test_data_loaders.py 
b/src/main/python/tests/scuro/test_data_loaders.py
index 4ca77b205d..85da2919a0 100644
--- a/src/main/python/tests/scuro/test_data_loaders.py
+++ b/src/main/python/tests/scuro/test_data_loaders.py
@@ -22,6 +22,8 @@
 import os
 import shutil
 import unittest
+import numpy as np
+
 from systemds.scuro.modality.unimodal_modality import UnimodalModality
 from systemds.scuro.representations.bert import Bert
 from systemds.scuro.representations.mel_spectrogram import MelSpectrogram
@@ -72,40 +74,32 @@ class TestDataLoaders(unittest.TestCase):
             self.data_generator.get_modality_path(ModalityType.AUDIO),
             self.data_generator.indices,
         )
-        audio = UnimodalModality(
-            audio_data_loader, ModalityType.AUDIO
-        ).apply_representation(MelSpectrogram())
+        audio = UnimodalModality(audio_data_loader).apply_representation(
+            MelSpectrogram()
+        )
 
         for i in range(0, self.num_instances):
-            assert round(sum(sum(self.audio_ref.data[i])), 4) == round(
-                sum(sum(audio.data[i])), 4
-            )
+            np.testing.assert_almost_equal(self.audio_ref.data[i], 
audio.data[i])
 
     def test_load_video_data_from_file(self):
         video_data_loader = VideoLoader(
             self.data_generator.get_modality_path(ModalityType.VIDEO),
             self.data_generator.indices,
         )
-        video = UnimodalModality(
-            video_data_loader, ModalityType.VIDEO
-        ).apply_representation(ResNet())
+        video = 
UnimodalModality(video_data_loader).apply_representation(ResNet())
 
         for i in range(0, self.num_instances):
-            assert round(sum(sum(self.video_ref.data[i])), 4) == round(
-                sum(sum(video.data[i])), 4
-            )
+            np.testing.assert_almost_equal(self.video_ref.data[i], 
video.data[i])
 
     def test_load_text_data_from_file(self):
         text_data_loader = TextLoader(
             self.data_generator.get_modality_path(ModalityType.TEXT),
             self.data_generator.indices,
         )
-        text = UnimodalModality(
-            text_data_loader, ModalityType.TEXT
-        ).apply_representation(Bert())
+        text = UnimodalModality(text_data_loader).apply_representation(Bert())
 
         for i in range(0, self.num_instances):
-            assert round(sum(self.text_ref.data[i]), 4) == 
round(sum(text.data[i]), 4)
+            np.testing.assert_almost_equal(self.text_ref.data[i], text.data[i])
 
 
 if __name__ == "__main__":
diff --git a/src/main/python/tests/scuro/test_dr_search.py 
b/src/main/python/tests/scuro/test_dr_search.py
index f2ba9d2d79..0959c246e0 100644
--- a/src/main/python/tests/scuro/test_dr_search.py
+++ b/src/main/python/tests/scuro/test_dr_search.py
@@ -43,6 +43,7 @@ from systemds.scuro.representations.resnet import ResNet
 from systemds.scuro.representations.sum import Sum
 from tests.scuro.data_generator import setup_data
 
+
 import warnings
 
 warnings.filterwarnings("always")
@@ -70,6 +71,7 @@ class TestSVM(Model):
 
 
 def scale_data(data, train_indizes):
+    data = np.array(data).reshape(len(data), -1)
     scaler = MinMaxScaler(feature_range=(0, 1))
     scaler.fit(data[train_indizes])
     return scaler.transform(data)
@@ -111,7 +113,7 @@ class TestDataLoaders(unittest.TestCase):
         cls.resnet = (
             cls.data_generator.modalities_by_type[ModalityType.VIDEO]
             .apply_representation(ResNet())
-            .window(10, "avg")
+            .window(10, "mean")
             .flatten()
         )
         cls.mods = [cls.bert, cls.mel_spe, cls.resnet]
diff --git a/src/main/python/tests/scuro/test_multimodal_join.py 
b/src/main/python/tests/scuro/test_multimodal_join.py
index c48f5f56b2..8388829f30 100644
--- a/src/main/python/tests/scuro/test_multimodal_join.py
+++ b/src/main/python/tests/scuro/test_multimodal_join.py
@@ -24,6 +24,7 @@ import shutil
 import unittest
 
 from systemds.scuro.modality.joined import JoinCondition
+from systemds.scuro.representations.aggregate import Aggregation
 from systemds.scuro.representations.window import WindowAggregation
 from systemds.scuro.modality.unimodal_modality import UnimodalModality
 from systemds.scuro.representations.mel_spectrogram import MelSpectrogram
@@ -97,14 +98,14 @@ class TestMultimodalJoin(unittest.TestCase):
             self.data_generator.indices,
             chunk_size=l_chunk_size,
         )
-        video = UnimodalModality(video_data_loader, ModalityType.VIDEO)
+        video = UnimodalModality(video_data_loader)
 
         audio_data_loader = AudioLoader(
             self.data_generator.get_modality_path(ModalityType.AUDIO),
             self.data_generator.indices,
             r_chunk_size,
         )
-        audio = UnimodalModality(audio_data_loader, ModalityType.AUDIO)
+        audio = UnimodalModality(audio_data_loader)
 
         mel_audio = audio.apply_representation(MelSpectrogram())
 
@@ -115,10 +116,8 @@ class TestMultimodalJoin(unittest.TestCase):
             left_modality.join(
                 right_modality, JoinCondition("timestamp", "timestamp", "<")
             )
-            .apply_representation(
-                ResNet(layer="layer1.0.conv2", model_name="ResNet50"),
-                WindowAggregation(window_size=window_size, 
aggregation_function="mean"),
-            )
+            .apply_representation(ResNet(layer="layer1.0.conv2", 
model_name="ResNet50"))
+            .window(window_size, "mean")
             .combine("concat")
         )
 
diff --git a/src/main/python/tests/scuro/test_unimodal_representations.py 
b/src/main/python/tests/scuro/test_unimodal_representations.py
index d566830697..ac167e8fbf 100644
--- a/src/main/python/tests/scuro/test_unimodal_representations.py
+++ b/src/main/python/tests/scuro/test_unimodal_representations.py
@@ -46,7 +46,6 @@ class TestUnimodalRepresentations(unittest.TestCase):
     video = None
     data_generator = None
     num_instances = 0
-    indizes = []
 
     @classmethod
     def setUpClass(cls):
@@ -69,7 +68,7 @@ class TestUnimodalRepresentations(unittest.TestCase):
             self.data_generator.get_modality_path(ModalityType.AUDIO),
             self.data_generator.indices,
         )
-        audio = UnimodalModality(audio_data_loader, ModalityType.AUDIO)
+        audio = UnimodalModality(audio_data_loader)
 
         for representation in audio_representations:
             r = audio.apply_representation(representation)
@@ -82,20 +81,19 @@ class TestUnimodalRepresentations(unittest.TestCase):
             self.data_generator.get_modality_path(ModalityType.VIDEO),
             self.data_generator.indices,
         )
-        video = UnimodalModality(video_data_loader, ModalityType.VIDEO)
+        video = UnimodalModality(video_data_loader)
         for representation in video_representations:
             r = video.apply_representation(representation)
             assert r.data is not None
             assert len(r.data) == self.num_instances
 
     def test_text_representations(self):
-        # TODO: check params fro BOW, W2V, TfIdf
         test_representations = [BoW(2, 2), W2V(5, 2, 2), TfIdf(2), Bert()]
         text_data_loader = TextLoader(
             self.data_generator.get_modality_path(ModalityType.TEXT),
             self.data_generator.indices,
         )
-        text = UnimodalModality(text_data_loader, ModalityType.TEXT)
+        text = UnimodalModality(text_data_loader)
 
         for representation in test_representations:
             r = text.apply_representation(representation)
@@ -109,11 +107,12 @@ class TestUnimodalRepresentations(unittest.TestCase):
             self.data_generator.indices,
             chunk_size=2,
         )
-        video = UnimodalModality(video_data_loader, ModalityType.VIDEO)
+        video = UnimodalModality(video_data_loader)
         for representation in video_representations:
             r = video.apply_representation(representation)
             assert r.data is not None
             assert len(r.data) == self.num_instances
+            assert len(r.metadata) == self.num_instances
 
 
 if __name__ == "__main__":
diff --git a/src/main/python/tests/scuro/test_window_operations.py 
b/src/main/python/tests/scuro/test_window_operations.py
new file mode 100644
index 0000000000..d7210ddb6d
--- /dev/null
+++ b/src/main/python/tests/scuro/test_window_operations.py
@@ -0,0 +1,106 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import unittest
+import math
+
+import numpy as np
+
+from tests.scuro.data_generator import ModalityRandomDataGenerator
+from systemds.scuro.modality.type import ModalityType
+
+
+class TestWindowOperations(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.num_instances = 40
+        cls.data_generator = ModalityRandomDataGenerator()
+        cls.aggregations = ["mean", "sum", "max", "min"]
+
+    def test_window_operations_on_audio_representations(self):
+        window_size = 10
+        self.run_window_operations_for_modality(ModalityType.AUDIO, 
window_size)
+
+    def test_window_operations_on_video_representations(self):
+        window_size = 10
+        self.run_window_operations_for_modality(ModalityType.VIDEO, 
window_size)
+
+    def test_window_operations_on_text_representations(self):
+        window_size = 10
+
+        self.run_window_operations_for_modality(ModalityType.TEXT, window_size)
+
+    def run_window_operations_for_modality(self, modality_type, window_size):
+        r = self.data_generator.create1DModality(40, 100, modality_type)
+        for aggregation in self.aggregations:
+            windowed_modality = r.window(window_size, aggregation)
+
+            self.verify_window_operation(aggregation, r, windowed_modality, 
window_size)
+
+    def verify_window_operation(
+        self, aggregation, modality, windowed_modality, window_size
+    ):
+        assert windowed_modality.data is not None
+        assert len(windowed_modality.data) == self.num_instances
+
+        for i, instance in enumerate(windowed_modality.data):
+            # assert (
+            #     
list(windowed_modality.metadata.values())[i]["data_layout"]["shape"][0]
+            #     == 
list(modality.metadata.values())[i]["data_layout"]["shape"][0]
+            # )
+            assert len(instance) == math.ceil(len(modality.data[i]) / 
window_size)
+            for j in range(0, len(instance)):
+                if aggregation == "mean":
+                    np.testing.assert_almost_equal(
+                        instance[j],
+                        np.mean(
+                            modality.data[i][j * window_size : (j + 1) * 
window_size],
+                            axis=0,
+                        ),
+                    )
+                elif aggregation == "sum":
+                    np.testing.assert_almost_equal(
+                        instance[j],
+                        np.sum(
+                            modality.data[i][j * window_size : (j + 1) * 
window_size],
+                            axis=0,
+                        ),
+                    )
+                elif aggregation == "max":
+                    np.testing.assert_almost_equal(
+                        instance[j],
+                        np.max(
+                            modality.data[i][j * window_size : (j + 1) * 
window_size],
+                            axis=0,
+                        ),
+                    )
+                elif aggregation == "min":
+                    np.testing.assert_almost_equal(
+                        instance[j],
+                        np.min(
+                            modality.data[i][j * window_size : (j + 1) * 
window_size],
+                            axis=0,
+                        ),
+                    )
+
+
+if __name__ == "__main__":
+    unittest.main()

Reply via email to