(systemds) branch main updated: [SYSTEMDS-3887] Create representation optimizer

cdionysio Wed, 28 May 2025 03:27:45 -0700

This is an automated email from the ASF dual-hosted git repository.

cdionysio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git



The following commit(s) were added to refs/heads/main by this push:
     new 23b3d08eb1 [SYSTEMDS-3887] Create representation optimizer
23b3d08eb1 is described below

commit 23b3d08eb18305612ce038edd0fbd50a011aba9b
Author: Christina Dionysio <diony...@tu-berlin.de>
AuthorDate: Wed May 28 10:52:52 2025 +0200

    [SYSTEMDS-3887] Create representation optimizer
    
    This patch adds an initial version of the representation optimizer for the 
Scuro library. It is a two stage optimization where in the first step the best 
unimodal representation for given raw modalities is found and in the next step 
the k-best unimodal rerpesentations are combined into multimodal 
representations and evaluated against the target downstream task. Additionally, 
this patch adds tests for each stage of the optimizer.
    
    Closes #2267
---
 src/main/python/systemds/scuro/__init__.py         |  83 ++++--
 .../python/systemds/scuro/aligner/alignment.py     |  48 ----
 .../systemds/scuro/dataloader/audio_loader.py      |  11 +-
 .../scuro/{aligner => drsearch}/__init__.py        |   0
 .../scuro/{aligner => drsearch}/dr_search.py       |   4 +-
 .../systemds/scuro/drsearch/fusion_optimizer.py    | 295 +++++++++++++++++++++
 .../scuro/drsearch/hyperparameter_tuner.py         | 106 ++++++++
 .../systemds/scuro/drsearch/operator_registry.py   | 107 ++++++++
 .../systemds/scuro/drsearch/optimization_data.py   | 164 ++++++++++++
 .../scuro/drsearch/representation_cache.py         | 127 +++++++++
 .../{aligner => drsearch}/similarity_measures.py   |   0
 .../systemds/scuro/{aligner => drsearch}/task.py   |  22 +-
 .../drsearch/unimodal_representation_optimizer.py  | 271 +++++++++++++++++++
 src/main/python/systemds/scuro/main.py             |   4 +-
 src/main/python/systemds/scuro/modality/joined.py  |   6 +-
 .../python/systemds/scuro/modality/modality.py     |   2 +-
 .../systemds/scuro/modality/modality_identifier.py |   7 -
 .../python/systemds/scuro/modality/transformed.py  |   5 +-
 .../systemds/scuro/modality/unimodal_modality.py   |   1 -
 .../systemds/scuro/representations/aggregate.py    |  30 ++-
 .../aggregated_representation.py}                  |  29 +-
 .../systemds/scuro/representations/average.py      |   5 +
 .../python/systemds/scuro/representations/bert.py  |  11 +-
 .../python/systemds/scuro/representations/bow.py   |   2 +
 .../scuro/representations/concatenation.py         |   3 +
 .../systemds/scuro/representations/context.py      |   1 -
 .../python/systemds/scuro/representations/glove.py |   4 +-
 .../python/systemds/scuro/representations/lstm.py  |   3 +
 .../python/systemds/scuro/representations/max.py   |   3 +
 .../scuro/representations/mel_spectrogram.py       |  13 +-
 .../{mel_spectrogram.py => mfcc.py}                |  38 ++-
 .../scuro/representations/multiplication.py        |   3 +
 .../systemds/scuro/representations/optical_flow.py |  79 ++++++
 .../systemds/scuro/representations/resnet.py       |  84 ++----
 .../systemds/scuro/representations/rowmax.py       |  78 ------
 .../{mel_spectrogram.py => spectrogram.py}         |  23 +-
 .../python/systemds/scuro/representations/sum.py   |   3 +
 .../representations/swin_video_transformer.py      | 111 ++++++++
 .../python/systemds/scuro/representations/tfidf.py |   2 +
 .../{mel_spectrogram.py => wav2vec.py}             |  52 ++--
 .../systemds/scuro/representations/window.py       |   6 +-
 .../systemds/scuro/representations/word2vec.py     |   6 +-
 .../scuro/representations/{resnet.py => x3d.py}    | 123 +++------
 .../python/systemds/scuro/utils/schema_helpers.py  |   1 -
 .../python/systemds/scuro/utils/torch_dataset.py   |  63 +++++
 src/main/python/tests/scuro/data_generator.py      |  12 +-
 src/main/python/tests/scuro/test_dr_search.py      |   4 +-
 .../python/tests/scuro/test_multimodal_fusion.py   | 202 ++++++++++++++
 .../python/tests/scuro/test_multimodal_join.py     |   2 -
 .../python/tests/scuro/test_operator_registry.py   |  87 ++++++
 .../python/tests/scuro/test_unimodal_optimizer.py  | 203 ++++++++++++++
 51 files changed, 2133 insertions(+), 416 deletions(-)

diff --git a/src/main/python/systemds/scuro/__init__.py 
b/src/main/python/systemds/scuro/__init__.py
index 53b68d430f..4b2185316a 100644
--- a/src/main/python/systemds/scuro/__init__.py
+++ b/src/main/python/systemds/scuro/__init__.py
@@ -24,27 +24,55 @@ from systemds.scuro.dataloader.video_loader import 
VideoLoader
 from systemds.scuro.dataloader.text_loader import TextLoader
 from systemds.scuro.dataloader.json_loader import JSONLoader
 from systemds.scuro.representations.representation import Representation
+from systemds.scuro.representations.aggregate import Aggregation
+from systemds.scuro.representations.aggregated_representation import (
+    AggregatedRepresentation,
+)
 from systemds.scuro.representations.average import Average
+from systemds.scuro.representations.bert import Bert
+from systemds.scuro.representations.bow import BoW
 from systemds.scuro.representations.concatenation import Concatenation
-from systemds.scuro.representations.sum import Sum
+from systemds.scuro.representations.context import Context
+from systemds.scuro.representations.fusion import Fusion
+from systemds.scuro.representations.glove import GloVe
+from systemds.scuro.representations.lstm import LSTM
 from systemds.scuro.representations.max import RowMax
-from systemds.scuro.representations.multiplication import Multiplication
 from systemds.scuro.representations.mel_spectrogram import MelSpectrogram
+from systemds.scuro.representations.mfcc import MFCC
+from systemds.scuro.representations.multiplication import Multiplication
+from systemds.scuro.representations.optical_flow import OpticalFlow
+from systemds.scuro.representations.representation import Representation
+from systemds.scuro.representations.representation_dataloader import NPY
+from systemds.scuro.representations.representation_dataloader import JSON
+from systemds.scuro.representations.representation_dataloader import Pickle
 from systemds.scuro.representations.resnet import ResNet
-from systemds.scuro.representations.bert import Bert
-from systemds.scuro.representations.lstm import LSTM
-from systemds.scuro.representations.bow import BoW
-from systemds.scuro.representations.glove import GloVe
+from systemds.scuro.representations.spectrogram import Spectrogram
+from systemds.scuro.representations.sum import Sum
+from systemds.scuro.representations.swin_video_transformer import 
SwinVideoTransformer
 from systemds.scuro.representations.tfidf import TfIdf
+from systemds.scuro.representations.unimodal import UnimodalRepresentation
+from systemds.scuro.representations.wav2vec import Wav2Vec
+from systemds.scuro.representations.window import WindowAggregation
 from systemds.scuro.representations.word2vec import W2V
+from systemds.scuro.representations.x3d import X3D
 from systemds.scuro.models.model import Model
 from systemds.scuro.models.discrete_model import DiscreteModel
+from systemds.scuro.modality.joined import JoinedModality
+from systemds.scuro.modality.joined_transformed import 
JoinedTransformedModality
 from systemds.scuro.modality.modality import Modality
-from systemds.scuro.modality.unimodal_modality import UnimodalModality
+from systemds.scuro.modality.modality_identifier import ModalityIdentifier
 from systemds.scuro.modality.transformed import TransformedModality
 from systemds.scuro.modality.type import ModalityType
-from systemds.scuro.aligner.dr_search import DRSearch
-from systemds.scuro.aligner.task import Task
+from systemds.scuro.modality.unimodal_modality import UnimodalModality
+from systemds.scuro.drsearch.dr_search import DRSearch
+from systemds.scuro.drsearch.task import Task
+from systemds.scuro.drsearch.fusion_optimizer import FusionOptimizer
+from systemds.scuro.drsearch.operator_registry import Registry
+from systemds.scuro.drsearch.optimization_data import OptimizationData
+from systemds.scuro.drsearch.representation_cache import RepresentationCache
+from systemds.scuro.drsearch.unimodal_representation_optimizer import (
+    UnimodalRepresentationOptimizer,
+)
 
 
 __all__ = [
@@ -53,25 +81,50 @@ __all__ = [
     "VideoLoader",
     "TextLoader",
     "Representation",
+    "Aggregation",
+    "AggregatedRepresentation",
     "Average",
+    "Bert",
+    "BoW",
     "Concatenation",
-    "Sum",
+    "Context",
+    "Fusion",
+    "GloVe",
+    "LSTM",
     "RowMax",
-    "Multiplication",
     "MelSpectrogram",
+    "MFCC",
+    "Multiplication",
+    "OpticalFlow",
+    "Representation",
+    "NPY",
+    "JSON",
+    "Pickle",
     "ResNet",
-    "Bert",
-    "LSTM",
+    "Spectrogram",
+    "Sum",
     "BoW",
-    "GloVe",
+    "SwinVideoTransformer",
     "TfIdf",
+    "UnimodalRepresentation",
+    "Wav2Vec",
+    "WindowAggregation",
     "W2V",
+    "X3D",
     "Model",
     "DiscreteModel",
+    "JoinedModality",
+    "JoinedTransformedModality",
     "Modality",
-    "UnimodalModality",
+    "ModalityIdentifier",
     "TransformedModality",
     "ModalityType",
+    "UnimodalModality",
     "DRSearch",
     "Task",
+    "FusionOptimizer",
+    "Registry",
+    "OptimizationData",
+    "RepresentationCache",
+    "UnimodalRepresentationOptimizer",
 ]
diff --git a/src/main/python/systemds/scuro/aligner/alignment.py 
b/src/main/python/systemds/scuro/aligner/alignment.py
deleted file mode 100644
index 62f88a272b..0000000000
--- a/src/main/python/systemds/scuro/aligner/alignment.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# -------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# -------------------------------------------------------------
-from aligner.alignment_strategy import AlignmentStrategy
-from modality.modality import Modality
-from modality.representation import Representation
-from aligner.similarity_measures import Measure
-
-
-class Alignment:
-    def __init__(
-        self,
-        modality_a: Modality,
-        modality_b: Modality,
-        strategy: AlignmentStrategy,
-        similarity_measure: Measure,
-    ):
-        """
-        Defines the core of the library where the alignment of two modalities 
is performed
-        :param modality_a: first modality
-        :param modality_b: second modality
-        :param strategy: the alignment strategy used in the alignment process
-        :param similarity_measure: the similarity measure used to check the 
score of the alignment
-        """
-        self.modality_a = modality_a
-        self.modality_b = modality_b
-        self.strategy = strategy
-        self.similarity_measure = similarity_measure
-
-    def align_modalities(self) -> Modality:
-        return Modality(Representation())
diff --git a/src/main/python/systemds/scuro/dataloader/audio_loader.py 
b/src/main/python/systemds/scuro/dataloader/audio_loader.py
index a6a164b4fb..a008962680 100644
--- a/src/main/python/systemds/scuro/dataloader/audio_loader.py
+++ b/src/main/python/systemds/scuro/dataloader/audio_loader.py
@@ -27,13 +27,22 @@ from systemds.scuro.modality.type import ModalityType
 
 class AudioLoader(BaseLoader):
     def __init__(
-        self, source_path: str, indices: List[str], chunk_size: Optional[int] 
= None
+        self,
+        source_path: str,
+        indices: List[str],
+        chunk_size: Optional[int] = None,
+        normalize: bool = True,
     ):
         super().__init__(source_path, indices, chunk_size, ModalityType.AUDIO)
+        self.normalize = normalize
 
     def extract(self, file: str, index: Optional[Union[str, List[str]]] = 
None):
         self.file_sanity_check(file)
         audio, sr = librosa.load(file)
+
+        if self.normalize:
+            audio = librosa.util.normalize(audio)
+
         self.metadata[file] = self.modality_type.create_audio_metadata(sr, 
audio)
 
         self.data.append(audio)
diff --git a/src/main/python/systemds/scuro/aligner/__init__.py 
b/src/main/python/systemds/scuro/drsearch/__init__.py
similarity index 100%
rename from src/main/python/systemds/scuro/aligner/__init__.py
rename to src/main/python/systemds/scuro/drsearch/__init__.py
diff --git a/src/main/python/systemds/scuro/aligner/dr_search.py 
b/src/main/python/systemds/scuro/drsearch/dr_search.py
similarity index 98%
rename from src/main/python/systemds/scuro/aligner/dr_search.py
rename to src/main/python/systemds/scuro/drsearch/dr_search.py
index b46139dff3..2000608a1d 100644
--- a/src/main/python/systemds/scuro/aligner/dr_search.py
+++ b/src/main/python/systemds/scuro/drsearch/dr_search.py
@@ -22,7 +22,7 @@ import itertools
 import random
 from typing import List
 
-from systemds.scuro.aligner.task import Task
+from systemds.scuro.drsearch.task import Task
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.representations.representation import Representation
 
@@ -111,7 +111,7 @@ class DRSearch:
         representation = random.choice(self.representations)
 
         modality = modality_combination[0].combine(
-            modality_combination[1:], representation
+            list(modality_combination[1:]), representation
         )
 
         scores = self.task.run(modality.data)
diff --git a/src/main/python/systemds/scuro/drsearch/fusion_optimizer.py 
b/src/main/python/systemds/scuro/drsearch/fusion_optimizer.py
new file mode 100644
index 0000000000..7247720f55
--- /dev/null
+++ b/src/main/python/systemds/scuro/drsearch/fusion_optimizer.py
@@ -0,0 +1,295 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+import time
+import copy
+import pickle
+from systemds.scuro.drsearch.operator_registry import Registry
+from systemds.scuro.drsearch.optimization_data import (
+    OptimizationResult,
+    OptimizationStatistics,
+)
+from systemds.scuro.drsearch.representation_cache import RepresentationCache
+from systemds.scuro.drsearch.task import Task
+from systemds.scuro.representations.aggregate import Aggregation
+from systemds.scuro.representations.context import Context
+
+
+def extract_names(operator_chain):
+    result = []
+    for op in operator_chain:
+        result.append(op.name)
+
+    return result
+
+
+class FusionOptimizer:
+    def __init__(
+        self,
+        modalities,
+        task: Task,
+        unimodal_representations_candidates,
+        representation_cache: RepresentationCache,
+        num_best_candidates=4,
+        max_chain_depth=5,
+        debug=False,
+    ):
+        self.modalities = modalities
+        self.task = task
+        self.unimodal_representations_candidates = 
unimodal_representations_candidates
+        self.num_best_candidates = num_best_candidates
+        self.k_best_candidates, self.candidates_per_modality = 
self.get_k_best_results(
+            num_best_candidates
+        )
+        self.operator_registry = Registry()
+        self.max_chain_depth = max_chain_depth
+        self.debug = debug
+        self.evaluated_candidates = set()
+        self.cache = representation_cache
+        self.optimization_statistics = 
OptimizationStatistics(self.k_best_candidates)
+        self.optimization_results = []
+
+    def optimize(self):
+        """
+        This method finds different ways in how to combine modalities and 
evaluates the fused representations against
+        the given task. It can fuse different representations from the same 
modality as well as fuse representations
+        form different modalities.
+        """
+
+        # TODO: add an aligned representation for all modalities with a 
temporal dimension
+        # TODO: keep a map of operator chains so that we don't evaluate them 
multiple times in different orders (if it does not make a difference)
+
+        r = []
+
+        for candidate in self.k_best_candidates:
+            modality = self.candidates_per_modality[str(candidate)]
+            cached_representation, representation_ops, used_op_names = (
+                self.cache.load_from_cache(modality, candidate.operator_chain)
+            )
+            if cached_representation is not None:
+                modality = cached_representation
+            store = False
+            for representation in representation_ops:
+                if isinstance(representation, Context):
+                    modality = modality.context(representation)
+                elif representation.name == "RowWiseConcatenation":
+                    modality = modality.flatten(True)
+                else:
+                    modality = modality.apply_representation(representation)
+                store = True
+            if store:
+                self.cache.save_to_cache(modality, used_op_names, 
representation_ops)
+
+            remaining_candidates = [c for c in self.k_best_candidates if c != 
candidate]
+            r.append(
+                self._optimize_candidate(modality, candidate, 
remaining_candidates, 1)
+            )
+
+        if self.debug:
+            with open(
+                
f"fusion_statistics_{self.task.model.name}_{self.num_best_candidates}_{self.max_chain_depth}.pkl",
+                "wb",
+            ) as fp:
+                pickle.dump(
+                    self.optimization_statistics,
+                    fp,
+                    protocol=pickle.HIGHEST_PROTOCOL,
+                )
+
+            opt_results = copy.deepcopy(self.optimization_results)
+            for i, opt_res in enumerate(self.optimization_results):
+                op_name = []
+                for op in opt_res.operator_chain:
+                    if isinstance(op, list):
+                        for o in op:
+                            if isinstance(o, list):
+                                for j in o:
+                                    op_name.append(j.name)
+                            elif isinstance(o, str):
+                                op_name.append(o)
+                            else:
+                                op_name.append(o.name)
+                    elif isinstance(op, str):
+                        op_name.append(op)
+                    else:
+                        op_name.append(op.name)
+                opt_results[i].operator_chain = op_name
+            with open(
+                
f"fusion_results_{self.task.model.name}_{self.num_best_candidates}_{self.max_chain_depth}.pkl",
+                "wb",
+            ) as fp:
+                pickle.dump(opt_results, fp, protocol=pickle.HIGHEST_PROTOCOL)
+
+            self.optimization_statistics.print_statistics()
+
+    def get_k_best_results(self, k: int):
+        """
+        Get the k best results per modality
+        :param k: number of best results
+        """
+        best_results = []
+        candidate_for_modality = {}
+        for modality in self.modalities:
+            k_results = sorted(
+                self.unimodal_representations_candidates[modality.modality_id][
+                    self.task.model.name
+                ],
+                key=lambda x: x.test_accuracy,
+                reverse=True,
+            )[:k]
+            for k_result in k_results:
+                candidate_for_modality[str(k_result)] = modality
+            best_results.extend(k_results)
+
+        return best_results, candidate_for_modality
+
+    def _optimize_candidate(
+        self, modality, candidate, remaining_candidates, chain_depth
+    ):
+        """
+        Optimize a single candidate by fusing it with others recursively.
+
+        :param candidate: The current candidate representation.
+        :param chain_depth: The current depth of fusion chains.
+        """
+        if chain_depth > self.max_chain_depth:
+            return
+
+        for other_candidate in remaining_candidates:
+            other_modality = self.candidates_per_modality[str(other_candidate)]
+            cached_representation, representation_ops, used_op_names = (
+                self.cache.load_from_cache(
+                    other_modality, other_candidate.operator_chain
+                )
+            )
+            if cached_representation is not None:
+                other_modality = cached_representation
+            store = False
+            for representation in representation_ops:
+                if representation.name == "Aggregation":
+                    params = other_candidate.parameters[representation.name]
+                    representation = Aggregation(
+                        aggregation_function=params["aggregation"]
+                    )
+                if isinstance(representation, Context):
+                    other_modality = other_modality.context(representation)
+                elif isinstance(representation, Aggregation):
+                    other_modality = representation.execute(other_modality)
+                elif representation.name == "RowWiseConcatenation":
+                    other_modality = other_modality.flatten(True)
+                else:
+                    other_modality = 
other_modality.apply_representation(representation)
+                store = True
+            if store:
+                self.cache.save_to_cache(
+                    other_modality, used_op_names, representation_ops
+                )
+
+            fusion_results = self.operator_registry.get_fusion_operators()
+            fusion_representation = None
+            for fusion_operator in fusion_results:
+                fusion_operator = fusion_operator()
+                chain_key = self.create_identifier(
+                    candidate, fusion_operator, other_candidate
+                )
+                # print(fusion_operator.name)
+                representation_start = time.time()
+                if (
+                    isinstance(fusion_operator, Context)
+                    and fusion_representation is not None
+                ):
+                    fusion_representation.context(fusion_operator)
+                elif isinstance(fusion_operator, Context):
+                    continue
+                else:
+                    fused_representation = modality.combine(
+                        other_modality, fusion_operator
+                    )
+
+                representation_end = time.time()
+                if chain_key not in self.evaluated_candidates:
+                    # Evaluate the fused representation
+
+                    score = self.task.run(fused_representation.data)
+                    fusion_params = {fusion_operator.name: 
fusion_operator.parameters}
+                    result = OptimizationResult(
+                        operator_chain=[
+                            candidate.operator_chain,
+                            fusion_operator.name,
+                            other_candidate.operator_chain,
+                        ],
+                        parameters=[
+                            candidate.parameters,
+                            fusion_params,
+                            other_candidate.parameters,
+                        ],
+                        train_accuracy=score[0],
+                        test_accuracy=score[1],
+                        # train_min_it_acc=score[2],
+                        # test_min_it_acc=score[3],
+                        training_runtime=self.task.training_time,
+                        inference_runtime=self.task.inference_time,
+                        representation_time=representation_end - 
representation_start,
+                        output_shape=(1, 1),  # TODO
+                    )
+
+                    # Store the result
+                    self.optimization_results.append(result)
+                    self.optimization_statistics.add_entry(
+                        [
+                            candidate.operator_chain,
+                            [fusion_operator.name],
+                            other_candidate.operator_chain,
+                        ],
+                        score[1],
+                    )
+
+                    # Mark this chain as evaluated
+                    self.evaluated_candidates.add(chain_key)
+
+                    if self.debug:
+                        print(
+                            f"Evaluated chain: {candidate.operator_chain} + 
{fusion_operator.name} + {other_candidate.operator_chain} -> {score[1]}"
+                        )
+
+                    # Recursively optimize further with this fused 
representation
+                    self._optimize_candidate(
+                        fused_representation,
+                        result,
+                        [c for c in remaining_candidates if c != 
other_candidate],
+                        chain_depth + 1,
+                    )
+
+    def create_identifier(self, candidate, fusion, other_candidate):
+        identifier = "".join(flatten_and_join(candidate.operator_chain))
+        identifier += fusion.name
+        identifier += "".join(flatten_and_join(other_candidate.operator_chain))
+
+        return identifier
+
+
+def flatten_and_join(data):
+    flat_list = []
+    for item in data:
+        if isinstance(item, list):
+            flat_list.extend(flatten_and_join(item))
+        else:
+            flat_list.append(item.name if not isinstance(item, str) else item)
+    return flat_list
diff --git a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py 
b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
new file mode 100644
index 0000000000..04a3fa4701
--- /dev/null
+++ b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
@@ -0,0 +1,106 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+import itertools
+import time
+
+import numpy as np
+
+from systemds.scuro.drsearch.optimization_data import OptimizationResult
+from systemds.scuro.representations.context import Context
+
+
+class HyperparameterTuner:
+    def __init__(self, task, n_trials=10, early_stopping_patience=5):
+        self.task = task
+        self.n_trials = n_trials
+        self.early_stopping_patience = early_stopping_patience
+
+    def tune_operator_chain(self, modality, operator_chain):
+        best_result = None
+        best_score = -np.inf
+
+        param_grids = {}
+
+        for operator in operator_chain:
+            param_grids[operator.name] = operator.parameters
+
+        param_combinations = self._generate_search_space(param_grids)
+
+        for params in param_combinations:
+            modified_modality = modality
+            current_chain = []
+
+            representation_start = time.time()
+            try:
+                for operator in operator_chain:
+
+                    if operator.name in params:
+                        operator.set_parameters(params[operator.name])
+
+                    if isinstance(operator, Context):
+                        modified_modality = modified_modality.context(operator)
+                    else:
+                        modified_modality = 
modified_modality.apply_representation(
+                            operator
+                        )
+
+                    current_chain.append(operator)
+
+                representation_end = time.time()
+
+                score = self.task.run(modified_modality.data)
+
+                if score[1] > best_score:
+                    best_score = score[1]
+                    best_params = params
+                    best_result = OptimizationResult(
+                        operator_chain=current_chain,
+                        parameters=params,
+                        train_accuracy=score[0],
+                        test_accuracy=score[1],
+                        training_runtime=self.task.training_time,
+                        inference_runtime=self.task.inference_time,
+                        representation_time=representation_end - 
representation_start,
+                        output_shape=(1, 1),
+                    )
+
+            except Exception as e:
+                print(f"Failed parameter combination {params}: {str(e)}")
+                continue
+
+        return best_result
+
+    def _generate_search_space(self, param_grids):
+        combinations = {}
+        for operator_name, params in param_grids.items():
+            operator_combinations = [
+                dict(zip(params.keys(), v)) for v in 
itertools.product(*params.values())
+            ]
+            combinations[operator_name] = operator_combinations
+
+        keys = list(combinations.keys())
+        values = [combinations[key] for key in keys]
+
+        parameter_grid = [
+            dict(zip(keys, combo)) for combo in itertools.product(*values)
+        ]
+
+        return parameter_grid
diff --git a/src/main/python/systemds/scuro/drsearch/operator_registry.py 
b/src/main/python/systemds/scuro/drsearch/operator_registry.py
new file mode 100644
index 0000000000..942e5bb80e
--- /dev/null
+++ b/src/main/python/systemds/scuro/drsearch/operator_registry.py
@@ -0,0 +1,107 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+from typing import Union, List
+
+from systemds.scuro.modality.type import ModalityType
+from systemds.scuro.representations.representation import Representation
+
+
+class Registry:
+    """
+    A registry for all representations per modality.
+    The representations are stored in a dictionary where a specific modality 
type is the key.
+    Implemented as a singleton.
+    """
+
+    _instance = None
+    _representations = {}
+    _context_operators = []
+    _fusion_operators = []
+
+    def __new__(cls):
+        if not cls._instance:
+            cls._instance = super().__new__(cls)
+            for m_type in ModalityType:
+                cls._representations[m_type] = []
+        return cls._instance
+
+    def add_representation(
+        self, representation: Representation, modality: ModalityType
+    ):
+        self._representations[modality].append(representation)
+
+    def add_context_operator(self, context_operator):
+        self._context_operators.append(context_operator)
+
+    def add_fusion_operator(self, fusion_operator):
+        self._fusion_operators.append(fusion_operator)
+
+    def get_representations(self, modality: ModalityType):
+        return self._representations[modality]
+
+    def get_context_operators(self):
+        return self._context_operators
+
+    def get_fusion_operators(self):
+        return self._fusion_operators
+
+
+def register_representation(modalities: Union[ModalityType, 
List[ModalityType]]):
+    """
+    Decorator to register representation for a specific modality.
+    :param modalities: The modalities for which the representation is to be 
registered
+    """
+    if isinstance(modalities, ModalityType):
+        modalities = [modalities]
+
+    def decorator(cls):
+        for modality in modalities:
+            if modality not in ModalityType:
+                raise f"Modality {modality} not in ModalityTypes please add it 
to constants.py ModalityTypes first!"
+
+            Registry().add_representation(cls, modality)
+        return cls
+
+    return decorator
+
+
+def register_context_operator():
+    """
+    Decorator to register a context operator.
+    """
+
+    def decorator(cls):
+        Registry().add_context_operator(cls)
+        return cls
+
+    return decorator
+
+
+def register_fusion_operator():
+    """
+    Decorator to register a fusion operator.
+    """
+
+    def decorator(cls):
+        Registry().add_fusion_operator(cls)
+        return cls
+
+    return decorator
diff --git a/src/main/python/systemds/scuro/drsearch/optimization_data.py 
b/src/main/python/systemds/scuro/drsearch/optimization_data.py
new file mode 100644
index 0000000000..4ca54c10d3
--- /dev/null
+++ b/src/main/python/systemds/scuro/drsearch/optimization_data.py
@@ -0,0 +1,164 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+from dataclasses import dataclass
+from typing import List, Dict, Any, Union
+
+from systemds.scuro.drsearch.operator_registry import Registry
+from systemds.scuro.representations.representation import Representation
+
+
+@dataclass
+class OptimizationResult:
+    """
+    The OptimizationResult class stores the results of an individual 
optimization
+
+    Attributes:
+        operator_chain (List[str]): stores the name of the operators used in 
the optimization run
+        parameters (Dict[str, Any]): stores the parameters used for the 
operators in the optimization run
+        accuracy (float): stores the test accuracy of the optimization run
+        training_runtime (float): stores the training runtime of the 
optimization run
+        inference_runtime (float): stores the inference runtime of the 
optimization run
+        output_shape (tupe): stores the output shape of the data produced by 
the optimization run
+    """
+
+    operator_chain: List[Representation]
+    parameters: Union[Dict[str, Any], List[Any]]
+    train_accuracy: float
+    test_accuracy: float
+    # train_min_it_acc: float
+    # test_min_it_acc: float
+    training_runtime: float
+    inference_runtime: float
+    representation_time: float
+    output_shape: tuple
+
+    # def __str__(self):
+    #     result_string = ""
+    #     for operator in self.operator_chain:
+    #         if isinstance(operator, List):
+    #             result_string += extract_operator_names(operator)
+    #         else:
+    #             result_string += operator.name
+    #     return result_string
+
+
+@dataclass
+class OptimizationData:
+    representation_name: str
+    mean_accuracy = 0.0
+    min_accuracy = 1.0
+    max_accuracy = 0.0
+    num_times_used = 0
+
+    def add_entry(self, score):
+        self.num_times_used += 1
+        self.min_accuracy = min(score, self.min_accuracy)
+        self.max_accuracy = max(score, self.max_accuracy)
+        if self.num_times_used > 1:
+            self.mean_accuracy += (score - self.mean_accuracy) / 
self.num_times_used
+        else:
+            self.mean_accuracy = score
+
+    def __str__(self):
+        return f"Name: {self.representation_name}  mean: {self.mean_accuracy} 
max: {self.max_accuracy} min: {self.min_accuracy} num_times: 
{self.num_times_used}"
+
+
+def extract_names(operator_chain):
+    result = []
+    for op in operator_chain:
+        result.append(op.name if not isinstance(op, str) else op)
+
+    return result
+
+
+class OptimizationStatistics:
+    optimization_data: Dict[str, OptimizationData] = {}
+    fusion_names = []
+
+    def __init__(self, candidates):
+        for candidate in candidates:
+            representation_name = 
"".join(extract_names(candidate.operator_chain))
+            self.optimization_data[representation_name] = OptimizationData(
+                representation_name
+            )
+
+        for fusion_method in Registry().get_fusion_operators():
+            self.optimization_data[fusion_method.__name__] = OptimizationData(
+                fusion_method.__name__
+            )
+            self.fusion_names.append(fusion_method.__name__)
+
+    def parse_representation_name(self, name):
+        parts = []
+        current_part = ""
+
+        i = 0
+        while i < len(name):
+            found_fusion = False
+            for fusion in self.fusion_names:
+                if name[i:].startswith(fusion):
+                    if current_part:
+                        parts.append(current_part)
+                    parts.append(fusion)
+                    i += len(fusion)
+                    found_fusion = True
+                    break
+
+            if not found_fusion:
+                current_part += name[i]
+                i += 1
+            else:
+                current_part = ""
+
+        if current_part:
+            parts.append(current_part)
+
+        return parts
+
+    def add_entry(self, representations, score):
+        # names = self.parse_representation_name(representation_name)
+
+        for rep in representations:
+            if isinstance(rep[0], list):
+                for r in rep:
+                    name = "".join(extract_names(r))
+                    if self.optimization_data.get(name) is None:
+                        self.optimization_data[name] = OptimizationData(name)
+                    self.optimization_data[name].add_entry(score)
+            else:
+                name = "".join(extract_names(rep))
+                if self.optimization_data.get(name) is None:
+                    self.optimization_data[name] = OptimizationData(name)
+                self.optimization_data[name].add_entry(score)
+
+    def print_statistics(self):
+        for statistic in self.optimization_data.values():
+            print(statistic)
+
+
+def extract_operator_names(operators):
+    names = ""
+    for operator in operators:
+        if isinstance(operator, List):
+            names += extract_operator_names(operator)
+        else:
+            names += operator.name
+    return names
diff --git a/src/main/python/systemds/scuro/drsearch/representation_cache.py 
b/src/main/python/systemds/scuro/drsearch/representation_cache.py
new file mode 100644
index 0000000000..fc78167f2e
--- /dev/null
+++ b/src/main/python/systemds/scuro/drsearch/representation_cache.py
@@ -0,0 +1,127 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+import copy
+import os
+import pickle
+import tempfile
+
+from systemds.scuro.modality.transformed import TransformedModality
+
+
+class RepresentationCache:
+    """ """
+
+    _instance = None
+    _cache_dir = None
+    debug = False
+
+    def __new__(cls, debug=False):
+        if not cls._instance:
+            cls.debug = debug
+            cls._instance = super().__new__(cls)
+            cls._cache_dir = tempfile.TemporaryDirectory()
+            # cls._cache_dir = "representation_cache"
+        return cls._instance
+
+    def _generate_cache_filename(self, modality_id, operators):
+        """
+        Generate a unique filename for an operator based on its name.
+
+        :param operator_name: The name of the operator.
+        :return: A full path to the cache file.
+        """
+        op_names = []
+        filename = modality_id
+        for operator in operators:
+            if isinstance(operator, str):
+                op_names.append(operator)
+                filename += operator
+            else:
+                op_names.append(operator.name)
+                filename += operator.name
+
+        return os.path.join(self._cache_dir.name, filename), op_names  # 
_cache_dir.name
+
+    def save_to_cache(self, modality, used_op_names, operators):
+        """
+        Save data to a cache file.
+
+        :param operator_name: The name of the operator.
+        :param data: The data to save.
+        """
+        filename, op_names = self._generate_cache_filename(
+            str(modality.modality_id) + used_op_names, operators
+        )
+        if not os.path.exists(filename):
+            with open(f"{filename}.pkl", "wb") as f:
+                pickle.dump(modality.data, f)
+
+            with open(f"{filename}.meta", "wb") as f:
+                pickle.dump(modality.metadata, f)
+
+            if self.debug:
+                str_names = ", ".join(op_names)
+                print(
+                    f"Saved data for operator 
{str(modality.modality_id)}{used_op_names}{str_names} to cache: {filename}"
+                )
+
+    def load_from_cache(self, modality, operators):
+        """
+        Load data from a cache file if it exists.
+
+        :param operator_name: The name of the operator.
+        :return: The cached data or None if not found.
+        """
+        ops = copy.deepcopy(operators)
+        filename, op_names = self._generate_cache_filename(
+            str(modality.modality_id), ops
+        )
+        dropped_ops = []
+        while not os.path.exists(f"{filename}.pkl"):
+            op_names.pop()
+            dropped_ops.append(ops.pop())
+            if len(ops) < 1:
+                break
+            filename, op_names = self._generate_cache_filename(
+                str(modality.modality_id), ops
+            )
+
+        dropped_ops.reverse()
+        op_names = "".join(op_names)
+
+        if os.path.exists(f"{filename}.pkl"):
+            with open(f"{filename}.meta", "rb") as f:
+                metadata = pickle.load(f)
+
+            transformed_modality = TransformedModality(
+                modality.modality_type, op_names, modality.modality_id, 
metadata
+            )
+            data = None
+            with open(f"{filename}.pkl", "rb") as f:
+                if self.debug:
+                    print(
+                        f"Loaded cached data for operator 
'{str(modality.modality_id) + op_names}' from {filename}"
+                    )
+                data = pickle.load(f)
+            transformed_modality.data = data
+            return transformed_modality, dropped_ops, op_names
+
+        return None, dropped_ops, op_names
diff --git a/src/main/python/systemds/scuro/aligner/similarity_measures.py 
b/src/main/python/systemds/scuro/drsearch/similarity_measures.py
similarity index 100%
rename from src/main/python/systemds/scuro/aligner/similarity_measures.py
rename to src/main/python/systemds/scuro/drsearch/similarity_measures.py
diff --git a/src/main/python/systemds/scuro/aligner/task.py 
b/src/main/python/systemds/scuro/drsearch/task.py
similarity index 80%
rename from src/main/python/systemds/scuro/aligner/task.py
rename to src/main/python/systemds/scuro/drsearch/task.py
index f33546ae65..7e05a489e4 100644
--- a/src/main/python/systemds/scuro/aligner/task.py
+++ b/src/main/python/systemds/scuro/drsearch/task.py
@@ -18,6 +18,7 @@
 # under the License.
 #
 # -------------------------------------------------------------
+import time
 from typing import List
 
 from systemds.scuro.models.model import Model
@@ -34,6 +35,7 @@ class Task:
         train_indices: List,
         val_indices: List,
         kfold=5,
+        measure_performance=True,
     ):
         """
         Parent class for the prediction task that is performed on top of the 
aligned representation
@@ -51,6 +53,10 @@ class Task:
         self.train_indices = train_indices
         self.val_indices = val_indices
         self.kfold = kfold
+        self.measure_performance = measure_performance
+        self.inference_time = []
+        self.training_time = []
+        self.expected_dim = 1
 
     def get_train_test_split(self, data):
         X_train = [data[i] for i in self.train_indices]
@@ -67,6 +73,8 @@ class Task:
          :param data: The aligned data used in the prediction process
          :return: the validation accuracy
         """
+        self.inference_time = []
+        self.training_time = []
         skf = KFold(n_splits=self.kfold, shuffle=True, random_state=11)
         train_scores = []
         test_scores = []
@@ -76,13 +84,21 @@ class Task:
         for train, test in skf.split(X, y):
             train_X = np.array(X)[train]
             train_y = np.array(y)[train]
-
+            train_start = time.time()
             train_score = self.model.fit(train_X, train_y, X_test, y_test)
+            train_end = time.time()
+            self.training_time.append(train_end - train_start)
             train_scores.append(train_score)
-
-            test_score = self.model.test(X_test, y_test)
+            test_start = time.time()
+            test_score = self.model.test(np.array(X_test), y_test)
+            test_end = time.time()
+            self.inference_time.append(test_end - test_start)
             test_scores.append(test_score)
 
             fold += 1
 
+        if self.measure_performance:
+            self.inference_time = np.mean(self.inference_time)
+            self.training_time = np.mean(self.training_time)
+
         return [np.mean(train_scores), np.mean(test_scores)]
diff --git 
a/src/main/python/systemds/scuro/drsearch/unimodal_representation_optimizer.py 
b/src/main/python/systemds/scuro/drsearch/unimodal_representation_optimizer.py
new file mode 100644
index 0000000000..e59ddbe9be
--- /dev/null
+++ 
b/src/main/python/systemds/scuro/drsearch/unimodal_representation_optimizer.py
@@ -0,0 +1,271 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+import copy
+import os
+import pickle
+import time
+from typing import List
+
+from systemds.scuro.drsearch.operator_registry import Registry
+from systemds.scuro.drsearch.optimization_data import OptimizationResult
+from systemds.scuro.drsearch.representation_cache import RepresentationCache
+from systemds.scuro.drsearch.task import Task
+from systemds.scuro.modality.modality import Modality
+from systemds.scuro.representations.aggregate import Aggregation
+from systemds.scuro.representations.context import Context
+
+
+class UnimodalRepresentationOptimizer:
+    def __init__(
+        self,
+        modalities: List[Modality],
+        tasks: List[Task],
+        max_chain_depth=5,
+        debug=False,
+        folder_name=None,
+    ):
+        self.optimization_results = {}
+        self.modalities = modalities
+        self.tasks = tasks
+        self.operator_registry = Registry()
+        self.initialize_optimization_results()
+        self.max_chain_depth = max_chain_depth
+        self.debug = debug
+        self.cache = RepresentationCache(self.debug)
+        if self.debug:
+            self.folder_name = folder_name
+            os.makedirs(self.folder_name, exist_ok=True)
+
+    def initialize_optimization_results(self):
+        for modality in self.modalities:
+            self.optimization_results[modality.modality_id] = {}
+            for task in self.tasks:
+                
self.optimization_results[modality.modality_id][task.model.name] = []
+
+    def optimize(self):
+        """
+        This method finds different unimodal representations for all given 
modalities
+        """
+
+        for modality in self.modalities:
+            self._optimize_modality(modality)
+
+            copy_results = copy.deepcopy(
+                self.optimization_results[modality.modality_id]
+            )
+            for model in copy_results:
+                for i, model_task in enumerate(copy_results[model]):
+                    ops = []
+                    for op in model_task.operator_chain:
+                        if not isinstance(op, str):
+                            ops.append(op.name)
+                    if len(ops) > 0:
+                        copy_results[model][i].operator_chain = ops
+                if self.debug:
+                    with open(
+                        
f"{self.folder_name}/results_{model}_{modality.modality_type.name}.p",
+                        "wb",
+                    ) as fp:
+                        pickle.dump(
+                            copy_results[model], fp, 
protocol=pickle.HIGHEST_PROTOCOL
+                        )
+
+    def get_k_best_results(self, modality: Modality, k: int, task: Task):
+        """
+        Get the k best results for the given modality
+        :param modality: modality to get the best results for
+        :param k: number of best results
+        """
+        results = sorted(
+            self.optimization_results[modality.modality_id][task.model.name],
+            key=lambda x: x.test_accuracy,
+            reverse=True,
+        )[:k]
+
+        return results
+
+    def _optimize_modality(self, modality: Modality):
+        """
+        Optimize a single modality by leveraging modality specific heuristics 
and incorporating context and
+        stores the resulting operation chains as optimization results.
+        :param modality: modality to optimize
+        """
+
+        representations = 
self._get_compatible_operators(modality.modality_type, [])
+
+        for rep in representations:
+            self._build_operator_chain(modality, [rep()], 1)
+
+    def _get_compatible_operators(self, modality_type, used_operators):
+        next_operators = []
+        for operator in 
self.operator_registry.get_representations(modality_type):
+            if operator.__name__ not in used_operators:
+                next_operators.append(operator)
+
+        for context_operator in self.operator_registry.get_context_operators():
+            if (
+                len(used_operators) == 0
+                or context_operator.__name__ not in used_operators[-1]
+            ):
+                next_operators.append(context_operator)
+
+        return next_operators
+
+    def _build_operator_chain(self, modality, current_operator_chain, depth):
+
+        if depth > self.max_chain_depth:
+            return
+
+        self._apply_operator_chain(modality, current_operator_chain)
+
+        current_modality_type = modality.modality_type
+
+        for operator in current_operator_chain:
+            if hasattr(operator, "output_modality_type"):
+                current_modality_type = operator.output_modality_type
+
+        next_representations = self._get_compatible_operators(
+            current_modality_type, [type(op).__name__ for op in 
current_operator_chain]
+        )
+
+        for next_rep in next_representations:
+            rep_instance = next_rep()
+            new_chain = current_operator_chain + [rep_instance]
+            self._build_operator_chain(modality, new_chain, depth + 1)
+
+    def _evaluate_with_flattened_data(
+        self, modality, operator_chain, op_params, representation_time, task
+    ):
+        from systemds.scuro.representations.aggregated_representation import (
+            AggregatedRepresentation,
+        )
+
+        results = []
+        for aggregation in Aggregation().get_aggregation_functions():
+            start = time.time()
+            agg_operator = AggregatedRepresentation(Aggregation(aggregation, 
True))
+            agg_modality = agg_operator.transform(modality)
+            end = time.time()
+
+            agg_opperator_chain = operator_chain + [agg_operator]
+            agg_params = dict(op_params)
+            agg_params.update({agg_operator.name: agg_operator.parameters})
+
+            score = task.run(agg_modality.data)
+            result = OptimizationResult(
+                operator_chain=agg_opperator_chain,
+                parameters=agg_params,
+                train_accuracy=score[0],
+                test_accuracy=score[1],
+                # train_min_it_acc=score[2],
+                # test_min_it_acc=score[3],
+                training_runtime=task.training_time,
+                inference_runtime=task.inference_time,
+                representation_time=representation_time + end - start,
+                output_shape=(1, 1),  # TODO
+            )
+            results.append(result)
+
+            if self.debug:
+                op_name = ""
+                for operator in agg_opperator_chain:
+                    op_name += str(operator.__class__.__name__)
+                print(f"{task.name} {task.model.name} {op_name}: {score[1]}")
+
+        return results
+
+    def _evaluate_operator_chain(
+        self, modality, operator_chain, op_params, representation_time
+    ):
+        for task in self.tasks:
+            if isinstance(modality.data[0], str):
+                continue
+
+            if (
+                task.expected_dim == 1
+                and not isinstance(modality.data[0], list)
+                and modality.data[0].ndim > 1
+            ):
+                r = self._evaluate_with_flattened_data(
+                    modality, operator_chain, op_params, representation_time, 
task
+                )
+                
self.optimization_results[modality.modality_id][task.model.name].extend(
+                    r
+                )
+            else:
+                score = task.run(modality.data)
+                result = OptimizationResult(
+                    operator_chain=operator_chain,
+                    parameters=op_params,
+                    train_accuracy=score[0],
+                    test_accuracy=score[1],
+                    # train_min_it_acc=score[2],
+                    # test_min_it_acc=score[3],
+                    training_runtime=task.training_time,
+                    inference_runtime=task.inference_time,
+                    representation_time=representation_time,
+                    output_shape=(1, 1),
+                )  # TODO
+                
self.optimization_results[modality.modality_id][task.model.name].append(
+                    result
+                )
+                if self.debug:
+                    op_name = ""
+                    for operator in operator_chain:
+                        op_name += str(operator.__class__.__name__)
+                    print(f"{task.name} {task.model.name} - {op_name}: 
{score[1]}")
+
+    def _apply_operator_chain(self, current_modality, operator_chain):
+        op_params = {}
+        modified_modality = current_modality
+
+        representation_start = time.time()
+        try:
+            cached_representation, representation_ops, used_op_names = (
+                self.cache.load_from_cache(
+                    modified_modality, copy.deepcopy(operator_chain)
+                )
+            )
+            if cached_representation is not None:
+                modified_modality = cached_representation
+            store = False
+            for operator in representation_ops:
+                if isinstance(operator, Context):
+                    modified_modality = modified_modality.context(operator)
+                else:
+                    modified_modality = 
modified_modality.apply_representation(operator)
+                store = True
+                op_params[operator.name] = operator.get_current_parameters()
+            if store:
+                self.cache.save_to_cache(
+                    modified_modality, used_op_names, representation_ops
+                )
+            representation_end = time.time()
+
+            self._evaluate_operator_chain(
+                modified_modality,
+                operator_chain,
+                op_params,
+                representation_end - representation_start,
+            )
+        except Exception as e:
+            print(f"Failed to evaluate chain {operator_chain}: {str(e)}")
+            return
diff --git a/src/main/python/systemds/scuro/main.py 
b/src/main/python/systemds/scuro/main.py
index 8a51e098cc..f88e211157 100644
--- a/src/main/python/systemds/scuro/main.py
+++ b/src/main/python/systemds/scuro/main.py
@@ -25,8 +25,8 @@ from systemds.scuro.representations.average import Average
 from systemds.scuro.representations.concatenation import Concatenation
 from systemds.scuro.modality.unimodal_modality import UnimodalModality
 from systemds.scuro.models.discrete_model import DiscreteModel
-from systemds.scuro.aligner.task import Task
-from systemds.scuro.aligner.dr_search import DRSearch
+from systemds.scuro.drsearch.task import Task
+from systemds.scuro.drsearch.dr_search import DRSearch
 
 from systemds.scuro.dataloader.audio_loader import AudioLoader
 from systemds.scuro.dataloader.text_loader import TextLoader
diff --git a/src/main/python/systemds/scuro/modality/joined.py 
b/src/main/python/systemds/scuro/modality/joined.py
index c1aa26abf6..1a58df9256 100644
--- a/src/main/python/systemds/scuro/modality/joined.py
+++ b/src/main/python/systemds/scuro/modality/joined.py
@@ -18,13 +18,13 @@
 # under the License.
 #
 # -------------------------------------------------------------
+import importlib
 import sys
 
 import numpy as np
 
 from systemds.scuro.modality.joined_transformed import 
JoinedTransformedModality
 from systemds.scuro.modality.modality import Modality
-from systemds.scuro.representations.aggregate import Aggregation
 from systemds.scuro.representations.utils import pad_sequences
 
 
@@ -167,7 +167,9 @@ class JoinedModality(Modality):
     def aggregate(
         self, aggregation_function, field_name
     ):  # TODO: use the filed name to extract data entries from modalities
-        self.aggregation = Aggregation(aggregation_function, field_name)
+        module = 
importlib.import_module("systemds.scuro.representations.aggregate")
+
+        self.aggregation = module.Aggregation(aggregation_function, field_name)
 
         if not self.chunked_execution and self.joined_right:
             return self.aggregation.aggregate(self.joined_right)
diff --git a/src/main/python/systemds/scuro/modality/modality.py 
b/src/main/python/systemds/scuro/modality/modality.py
index c110a24eba..c16db00172 100644
--- a/src/main/python/systemds/scuro/modality/modality.py
+++ b/src/main/python/systemds/scuro/modality/modality.py
@@ -23,7 +23,7 @@ from typing import List
 
 import numpy as np
 
-from systemds.scuro.modality.type import ModalityType, DataLayout
+from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.representations import utils
 
 
diff --git a/src/main/python/systemds/scuro/modality/modality_identifier.py 
b/src/main/python/systemds/scuro/modality/modality_identifier.py
index 95668c6e58..5eeee7dc13 100644
--- a/src/main/python/systemds/scuro/modality/modality_identifier.py
+++ b/src/main/python/systemds/scuro/modality/modality_identifier.py
@@ -18,13 +18,6 @@
 # under the License.
 #
 # -------------------------------------------------------------
-import os
-import pickle
-from typing import List, Dict, Any, Union
-import tempfile
-from systemds.scuro.representations.representation import Representation
-
-
 class ModalityIdentifier:
     """ """
 
diff --git a/src/main/python/systemds/scuro/modality/transformed.py 
b/src/main/python/systemds/scuro/modality/transformed.py
index 2b4b049ef4..aba59c1efb 100644
--- a/src/main/python/systemds/scuro/modality/transformed.py
+++ b/src/main/python/systemds/scuro/modality/transformed.py
@@ -100,7 +100,10 @@ class TransformedModality(Modality):
             self.metadata,
         )
         modalities = [self]
-        modalities.extend(other)
+        if isinstance(other, list):
+            modalities.extend(other)
+        else:
+            modalities.append(other)
         fused_modality.data = fusion_method.transform(modalities)
 
         return fused_modality
diff --git a/src/main/python/systemds/scuro/modality/unimodal_modality.py 
b/src/main/python/systemds/scuro/modality/unimodal_modality.py
index 6173237e0a..714fe42c33 100644
--- a/src/main/python/systemds/scuro/modality/unimodal_modality.py
+++ b/src/main/python/systemds/scuro/modality/unimodal_modality.py
@@ -26,7 +26,6 @@ from systemds.scuro.dataloader.base_loader import BaseLoader
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.modality.joined import JoinedModality
 from systemds.scuro.modality.transformed import TransformedModality
-from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.modality.modality_identifier import ModalityIdentifier
 
 
diff --git a/src/main/python/systemds/scuro/representations/aggregate.py 
b/src/main/python/systemds/scuro/representations/aggregate.py
index 4b4545ef47..756e6271ea 100644
--- a/src/main/python/systemds/scuro/representations/aggregate.py
+++ b/src/main/python/systemds/scuro/representations/aggregate.py
@@ -20,7 +20,6 @@
 # -------------------------------------------------------------
 import numpy as np
 
-from systemds.scuro.modality.modality import Modality
 from systemds.scuro.representations import utils
 
 
@@ -48,21 +47,28 @@ class Aggregation:
         "sum": _sum_agg.__func__,
     }
 
-    def __init__(self, aggregation_function="mean", pad_modality=False):
+    def __init__(self, aggregation_function="mean", pad_modality=False, 
params=None):
+        if params is not None:
+            aggregation_function = params["aggregation_function"]
+            pad_modality = params["pad_modality"]
+
         if aggregation_function not in self._aggregation_function.keys():
             raise ValueError("Invalid aggregation function")
+
         self._aggregation_func = 
self._aggregation_function[aggregation_function]
         self.name = "Aggregation"
         self.pad_modality = pad_modality
 
+        self.parameters = {
+            "aggregation_function": aggregation_function,
+            "pad_modality": pad_modality,
+        }
+
     def execute(self, modality):
-        aggregated_modality = Modality(
-            modality.modality_type, modality.modality_id, modality.metadata
-        )
-        aggregated_modality.data = []
+        data = []
         max_len = 0
         for i, instance in enumerate(modality.data):
-            aggregated_modality.data.append([])
+            data.append([])
             if isinstance(instance, np.ndarray):
                 aggregated_data = self._aggregation_func(instance)
             else:
@@ -70,22 +76,22 @@ class Aggregation:
                 for entry in instance:
                     aggregated_data.append(self._aggregation_func(entry))
             max_len = max(max_len, len(aggregated_data))
-            aggregated_modality.data[i] = aggregated_data
+            data[i] = aggregated_data
 
         if self.pad_modality:
-            for i, instance in enumerate(aggregated_modality.data):
+            for i, instance in enumerate(data):
                 if isinstance(instance, np.ndarray):
                     if len(instance) < max_len:
                         padded_data = np.zeros(max_len, dtype=instance.dtype)
                         padded_data[: len(instance)] = instance
-                        aggregated_modality.data[i] = padded_data
+                        data[i] = padded_data
                 else:
                     padded_data = []
                     for entry in instance:
                         padded_data.append(utils.pad_sequences(entry, max_len))
-                    aggregated_modality.data[i] = padded_data
+                    data[i] = padded_data
 
-        return aggregated_modality
+        return data
 
     def transform(self, modality):
         return self.execute(modality)
diff --git a/src/main/python/systemds/scuro/aligner/alignment_strategy.py 
b/src/main/python/systemds/scuro/representations/aggregated_representation.py
similarity index 59%
rename from src/main/python/systemds/scuro/aligner/alignment_strategy.py
rename to 
src/main/python/systemds/scuro/representations/aggregated_representation.py
index 698a6d0d98..46e6b8bed2 100644
--- a/src/main/python/systemds/scuro/aligner/alignment_strategy.py
+++ 
b/src/main/python/systemds/scuro/representations/aggregated_representation.py
@@ -18,23 +18,18 @@
 # under the License.
 #
 # -------------------------------------------------------------
-from aligner.similarity_measures import Measure
+from systemds.scuro.modality.transformed import TransformedModality
+from systemds.scuro.representations.representation import Representation
 
 
-class AlignmentStrategy:
-    def __init__(self):
-        pass
+class AggregatedRepresentation(Representation):
+    def __init__(self, aggregation):
+        super().__init__("AggregatedRepresentation", aggregation.parameters)
+        self.aggregation = aggregation
 
-    def align_chunk(self, chunk_a, chunk_b, similarity_measure: Measure):
-        raise "Not implemented error"
-
-
-class ChunkedCrossCorrelation(AlignmentStrategy):
-    def __init__(self):
-        super().__init__()
-
-    def align_chunk(self, chunk_a, chunk_b, similarity_measure: Measure):
-        raise "Not implemented error"
-
-
-# TODO: Add additional alignment methods
+    def transform(self, modality):
+        aggregated_modality = TransformedModality(
+            modality.modality_type, self.name, modality.modality_id, 
modality.metadata
+        )
+        aggregated_modality.data = self.aggregation.execute(modality)
+        return aggregated_modality
diff --git a/src/main/python/systemds/scuro/representations/average.py 
b/src/main/python/systemds/scuro/representations/average.py
index db44050e9e..4c6b0e1787 100644
--- a/src/main/python/systemds/scuro/representations/average.py
+++ b/src/main/python/systemds/scuro/representations/average.py
@@ -27,8 +27,10 @@ from systemds.scuro.modality.modality import Modality
 from systemds.scuro.representations.utils import pad_sequences
 
 from systemds.scuro.representations.fusion import Fusion
+from systemds.scuro.drsearch.operator_registry import register_fusion_operator
 
 
+@register_fusion_operator()
 class Average(Fusion):
     def __init__(self):
         """
@@ -37,6 +39,9 @@ class Average(Fusion):
         super().__init__("Average")
 
     def transform(self, modalities: List[Modality]):
+        for modality in modalities:
+            modality.flatten()
+
         max_emb_size = self.get_max_embedding_size(modalities)
 
         padded_modalities = []
diff --git a/src/main/python/systemds/scuro/representations/bert.py 
b/src/main/python/systemds/scuro/representations/bert.py
index 6395d0b9e6..802d7e3d0b 100644
--- a/src/main/python/systemds/scuro/representations/bert.py
+++ b/src/main/python/systemds/scuro/representations/bert.py
@@ -19,16 +19,16 @@
 #
 # -------------------------------------------------------------
 
-import numpy as np
-
 from systemds.scuro.modality.transformed import TransformedModality
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
 import torch
 from transformers import BertTokenizer, BertModel
 from systemds.scuro.representations.utils import save_embeddings
 from systemds.scuro.modality.type import ModalityType
+from systemds.scuro.drsearch.operator_registry import register_representation
 
 
+@register_representation(ModalityType.TEXT)
 class Bert(UnimodalRepresentation):
     def __init__(self, model_name="bert", output_file=None):
         parameters = {"model_name": "bert"}
@@ -49,7 +49,7 @@ class Bert(UnimodalRepresentation):
         model = BertModel.from_pretrained(model_name)
 
         embeddings = self.create_embeddings(modality.data, model, tokenizer)
-        embeddings = [embeddings[i : i + 1] for i in 
range(embeddings.shape[0])]
+
         if self.output_file is not None:
             save_embeddings(embeddings, self.output_file)
 
@@ -65,7 +65,6 @@ class Bert(UnimodalRepresentation):
                 outputs = model(**inputs)
 
                 cls_embedding = outputs.last_hidden_state[:, 0, 
:].squeeze().numpy()
-                embeddings.append(cls_embedding)
+                embeddings.append(cls_embedding.reshape(1, -1))
 
-        embeddings = np.array(embeddings)
-        return embeddings.reshape((embeddings.shape[0], embeddings.shape[-1]))
+        return embeddings
diff --git a/src/main/python/systemds/scuro/representations/bow.py 
b/src/main/python/systemds/scuro/representations/bow.py
index 52fddc7d3f..e2bc94041f 100644
--- a/src/main/python/systemds/scuro/representations/bow.py
+++ b/src/main/python/systemds/scuro/representations/bow.py
@@ -26,8 +26,10 @@ from systemds.scuro.representations.unimodal import 
UnimodalRepresentation
 from systemds.scuro.representations.utils import save_embeddings
 
 from systemds.scuro.modality.type import ModalityType
+from systemds.scuro.drsearch.operator_registry import register_representation
 
 
+@register_representation(ModalityType.TEXT)
 class BoW(UnimodalRepresentation):
     def __init__(self, ngram_range=2, min_df=2, output_file=None):
         parameters = {"ngram_range": [ngram_range], "min_df": [min_df]}
diff --git a/src/main/python/systemds/scuro/representations/concatenation.py 
b/src/main/python/systemds/scuro/representations/concatenation.py
index fd9293d399..1265563b6c 100644
--- a/src/main/python/systemds/scuro/representations/concatenation.py
+++ b/src/main/python/systemds/scuro/representations/concatenation.py
@@ -28,7 +28,10 @@ from systemds.scuro.representations.utils import 
pad_sequences
 
 from systemds.scuro.representations.fusion import Fusion
 
+from systemds.scuro.drsearch.operator_registry import register_fusion_operator
 
+
+@register_fusion_operator()
 class Concatenation(Fusion):
     def __init__(self, padding=True):
         """
diff --git a/src/main/python/systemds/scuro/representations/context.py 
b/src/main/python/systemds/scuro/representations/context.py
index 4cbcf54f8e..54f22633cc 100644
--- a/src/main/python/systemds/scuro/representations/context.py
+++ b/src/main/python/systemds/scuro/representations/context.py
@@ -19,7 +19,6 @@
 #
 # -------------------------------------------------------------
 import abc
-from typing import List
 
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.representations.representation import Representation
diff --git a/src/main/python/systemds/scuro/representations/glove.py 
b/src/main/python/systemds/scuro/representations/glove.py
index 7bb586dc99..66a6847a94 100644
--- a/src/main/python/systemds/scuro/representations/glove.py
+++ b/src/main/python/systemds/scuro/representations/glove.py
@@ -23,8 +23,9 @@ from gensim.utils import tokenize
 
 
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
-from systemds.scuro.representations.utils import read_data_from_file, 
save_embeddings
+from systemds.scuro.representations.utils import save_embeddings
 from systemds.scuro.modality.type import ModalityType
+from systemds.scuro.drsearch.operator_registry import register_representation
 
 
 def load_glove_embeddings(file_path):
@@ -38,6 +39,7 @@ def load_glove_embeddings(file_path):
     return embeddings
 
 
+# @register_representation(ModalityType.TEXT)
 class GloVe(UnimodalRepresentation):
     def __init__(self, glove_path, output_file=None):
         super().__init__("GloVe", ModalityType.TEXT)
diff --git a/src/main/python/systemds/scuro/representations/lstm.py 
b/src/main/python/systemds/scuro/representations/lstm.py
index 6f06e762a5..a82a1e2500 100644
--- a/src/main/python/systemds/scuro/representations/lstm.py
+++ b/src/main/python/systemds/scuro/representations/lstm.py
@@ -28,7 +28,10 @@ import numpy as np
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.representations.fusion import Fusion
 
+from systemds.scuro.drsearch.operator_registry import register_fusion_operator
 
+
+@register_fusion_operator()
 class LSTM(Fusion):
     def __init__(self, width=128, depth=1, dropout_rate=0.1):
         """
diff --git a/src/main/python/systemds/scuro/representations/max.py 
b/src/main/python/systemds/scuro/representations/max.py
index 194b20801e..5a787dcf0c 100644
--- a/src/main/python/systemds/scuro/representations/max.py
+++ b/src/main/python/systemds/scuro/representations/max.py
@@ -28,7 +28,10 @@ from systemds.scuro.representations.utils import 
pad_sequences
 
 from systemds.scuro.representations.fusion import Fusion
 
+from systemds.scuro.drsearch.operator_registry import register_fusion_operator
 
+
+@register_fusion_operator()
 class RowMax(Fusion):
     def __init__(self, split=4):
         """
diff --git a/src/main/python/systemds/scuro/representations/mel_spectrogram.py 
b/src/main/python/systemds/scuro/representations/mel_spectrogram.py
index dfff4f3b7e..4095ceead0 100644
--- a/src/main/python/systemds/scuro/representations/mel_spectrogram.py
+++ b/src/main/python/systemds/scuro/representations/mel_spectrogram.py
@@ -25,8 +25,10 @@ from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.modality.transformed import TransformedModality
 
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
+from systemds.scuro.drsearch.operator_registry import register_representation
 
 
+@register_representation(ModalityType.AUDIO)
 class MelSpectrogram(UnimodalRepresentation):
     def __init__(self, n_mels=128, hop_length=512, n_fft=2048):
         parameters = {
@@ -45,8 +47,15 @@ class MelSpectrogram(UnimodalRepresentation):
         )
         result = []
         max_length = 0
-        for sample in modality.data:
-            S = librosa.feature.melspectrogram(y=sample, sr=22050)
+        for i, sample in enumerate(modality.data):
+            sr = list(modality.metadata.values())[i]["frequency"]
+            S = librosa.feature.melspectrogram(
+                y=sample,
+                sr=sr,
+                n_mels=self.n_mels,
+                hop_length=self.hop_length,
+                n_fft=self.n_fft,
+            )
             S_dB = librosa.power_to_db(S, ref=np.max)
             if S_dB.shape[-1] > max_length:
                 max_length = S_dB.shape[-1]
diff --git a/src/main/python/systemds/scuro/representations/mel_spectrogram.py 
b/src/main/python/systemds/scuro/representations/mfcc.py
similarity index 60%
copy from src/main/python/systemds/scuro/representations/mel_spectrogram.py
copy to src/main/python/systemds/scuro/representations/mfcc.py
index dfff4f3b7e..75cc00d62d 100644
--- a/src/main/python/systemds/scuro/representations/mel_spectrogram.py
+++ b/src/main/python/systemds/scuro/representations/mfcc.py
@@ -25,19 +25,23 @@ from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.modality.transformed import TransformedModality
 
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
+from systemds.scuro.drsearch.operator_registry import register_representation
 
 
-class MelSpectrogram(UnimodalRepresentation):
-    def __init__(self, n_mels=128, hop_length=512, n_fft=2048):
+@register_representation(ModalityType.AUDIO)
+class MFCC(UnimodalRepresentation):
+    def __init__(self, n_mfcc=12, dct_type=2, n_mels=128, hop_length=512):
         parameters = {
-            "n_mels": [20, 32, 64, 128],
+            "n_mfcc": [x for x in range(10, 26)],
+            "dct_type": [1, 2, 3],
             "hop_length": [256, 512, 1024, 2048],
-            "n_fft": [1024, 2048, 4096],
-        }
-        super().__init__("MelSpectrogram", ModalityType.TIMESERIES, parameters)
+            "n_mels": [20, 32, 64, 128],
+        }  # TODO
+        super().__init__("MFCC", ModalityType.TIMESERIES, parameters)
+        self.n_mfcc = n_mfcc
+        self.dct_type = dct_type
         self.n_mels = n_mels
         self.hop_length = hop_length
-        self.n_fft = n_fft
 
     def transform(self, modality):
         transformed_modality = TransformedModality(
@@ -45,12 +49,20 @@ class MelSpectrogram(UnimodalRepresentation):
         )
         result = []
         max_length = 0
-        for sample in modality.data:
-            S = librosa.feature.melspectrogram(y=sample, sr=22050)
-            S_dB = librosa.power_to_db(S, ref=np.max)
-            if S_dB.shape[-1] > max_length:
-                max_length = S_dB.shape[-1]
-            result.append(S_dB.T)
+        for i, sample in enumerate(modality.data):
+            sr = list(modality.metadata.values())[i]["frequency"]
+            mfcc = librosa.feature.mfcc(
+                y=sample,
+                sr=sr,
+                n_mfcc=self.n_mfcc,
+                dct_type=self.dct_type,
+                hop_length=self.hop_length,
+                n_mels=self.n_mels,
+            )
+            mfcc = (mfcc - np.mean(mfcc)) / np.std(mfcc)
+            if mfcc.shape[-1] > max_length:  # TODO: check if this needs to be 
done
+                max_length = mfcc.shape[-1]
+            result.append(mfcc.T)
 
         transformed_modality.data = result
         return transformed_modality
diff --git a/src/main/python/systemds/scuro/representations/multiplication.py 
b/src/main/python/systemds/scuro/representations/multiplication.py
index 2934fe5b3c..8d1e7f8c90 100644
--- a/src/main/python/systemds/scuro/representations/multiplication.py
+++ b/src/main/python/systemds/scuro/representations/multiplication.py
@@ -28,7 +28,10 @@ from systemds.scuro.representations.utils import 
pad_sequences
 
 from systemds.scuro.representations.fusion import Fusion
 
+from systemds.scuro.drsearch.operator_registry import register_fusion_operator
 
+
+@register_fusion_operator()
 class Multiplication(Fusion):
     def __init__(self):
         """
diff --git a/src/main/python/systemds/scuro/representations/optical_flow.py 
b/src/main/python/systemds/scuro/representations/optical_flow.py
new file mode 100644
index 0000000000..1fb922d7a3
--- /dev/null
+++ b/src/main/python/systemds/scuro/representations/optical_flow.py
@@ -0,0 +1,79 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+import cv2
+
+from systemds.scuro.modality.transformed import TransformedModality
+from systemds.scuro.representations.unimodal import UnimodalRepresentation
+from typing import Callable, Dict, Tuple, Any
+import torch.utils.data
+import torch
+import torchvision.models as models
+import numpy as np
+from systemds.scuro.modality.type import ModalityType
+from systemds.scuro.drsearch.operator_registry import register_representation
+
+from systemds.scuro.utils.torch_dataset import CustomDataset
+
+if torch.backends.mps.is_available():
+    DEVICE = torch.device("mps")
+# elif torch.cuda.is_available():
+#     DEVICE = torch.device("cuda")
+else:
+    DEVICE = torch.device("cpu")
+
+
+# @register_representation([ModalityType.VIDEO])
+class OpticalFlow(UnimodalRepresentation):
+    def __init__(self):
+        parameters = {}
+        super().__init__("OpticalFlow", ModalityType.TIMESERIES, parameters)
+
+    def transform(self, modality):
+        transformed_modality = TransformedModality(
+            self.output_modality_type,
+            "opticalFlow",
+            modality.modality_id,
+            modality.metadata,
+        )
+
+        for video_id, instance in enumerate(modality.data):
+            transformed_modality.data.append([])
+
+            previous_gray = cv2.cvtColor(instance[0], cv2.COLOR_BGR2GRAY)
+            for frame_id in range(1, len(instance)):
+                gray = cv2.cvtColor(instance[frame_id], cv2.COLOR_BGR2GRAY)
+
+                flow = cv2.calcOpticalFlowFarneback(
+                    previous_gray,
+                    gray,
+                    None,
+                    pyr_scale=0.5,
+                    levels=3,
+                    winsize=15,
+                    iterations=3,
+                    poly_n=5,
+                    poly_sigma=1.1,
+                    flags=0,
+                )
+
+                transformed_modality.data[video_id].append(flow)
+        transformed_modality.update_metadata()
+        return transformed_modality
diff --git a/src/main/python/systemds/scuro/representations/resnet.py 
b/src/main/python/systemds/scuro/representations/resnet.py
index 60eed9ea12..68771eccdd 100644
--- a/src/main/python/systemds/scuro/representations/resnet.py
+++ b/src/main/python/systemds/scuro/representations/resnet.py
@@ -18,14 +18,14 @@
 # under the License.
 #
 # -------------------------------------------------------------
-
+from systemds.scuro.utils.torch_dataset import CustomDataset
 from systemds.scuro.modality.transformed import TransformedModality
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
 from typing import Callable, Dict, Tuple, Any
+from systemds.scuro.drsearch.operator_registry import register_representation
 import torch.utils.data
 import torch
 import torchvision.models as models
-import torchvision.transforms as transforms
 import numpy as np
 from systemds.scuro.modality.type import ModalityType
 
@@ -37,17 +37,19 @@ else:
     DEVICE = torch.device("cpu")
 
 
+@register_representation(
+    [ModalityType.IMAGE, ModalityType.VIDEO, ModalityType.TIMESERIES]
+)
 class ResNet(UnimodalRepresentation):
     def __init__(self, layer="avgpool", model_name="ResNet18", 
output_file=None):
         self.model_name = model_name
         parameters = self._get_parameters()
         super().__init__(
             "ResNet", ModalityType.TIMESERIES, parameters
-        )  # TODO: TIMESERIES only for videos - images would be handled as 
EMBEDDIGN
+        )  # TODO: TIMESERIES only for videos - images would be handled as 
EMBEDDING
 
         self.output_file = output_file
         self.layer_name = layer
-        self.model = model_name
         self.model.eval()
         for param in self.model.parameters():
             param.requires_grad = False
@@ -59,29 +61,30 @@ class ResNet(UnimodalRepresentation):
         self.model.fc = Identity()
 
     @property
-    def model(self):
-        return self._model
-
-    @model.setter
-    def model(self, model):
-        if model == "ResNet18":
-            self._model = 
models.resnet18(weights=models.ResNet18_Weights.DEFAULT).to(
+    def model_name(self):
+        return self._model_name
+
+    @model_name.setter
+    def model_name(self, model_name):
+        self._model_name = model_name
+        if model_name == "ResNet18":
+            self.model = 
models.resnet18(weights=models.ResNet18_Weights.DEFAULT).to(
                 DEVICE
             )
-        elif model == "ResNet34":
-            self._model = 
models.resnet34(weights=models.ResNet34_Weights.DEFAULT).to(
+        elif model_name == "ResNet34":
+            self.model = 
models.resnet34(weights=models.ResNet34_Weights.DEFAULT).to(
                 DEVICE
             )
-        elif model == "ResNet50":
-            self._model = 
models.resnet50(weights=models.ResNet50_Weights.DEFAULT).to(
+        elif model_name == "ResNet50":
+            self.model = 
models.resnet50(weights=models.ResNet50_Weights.DEFAULT).to(
                 DEVICE
             )
-        elif model == "ResNet101":
-            self._model = 
models.resnet101(weights=models.ResNet101_Weights.DEFAULT).to(
+        elif model_name == "ResNet101":
+            self.model = 
models.resnet101(weights=models.ResNet101_Weights.DEFAULT).to(
                 DEVICE
             )
-        elif model == "ResNet152":
-            self._model = 
models.resnet152(weights=models.ResNet152_Weights.DEFAULT).to(
+        elif model_name == "ResNet152":
+            self.model = 
models.resnet152(weights=models.ResNet152_Weights.DEFAULT).to(
                 DEVICE
             )
         else:
@@ -107,20 +110,7 @@ class ResNet(UnimodalRepresentation):
         return parameters
 
     def transform(self, modality):
-
-        t = transforms.Compose(
-            [
-                transforms.ToPILImage(),
-                transforms.Resize(256),
-                transforms.CenterCrop(224),
-                transforms.ToTensor(),
-                transforms.Normalize(
-                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
-                ),
-            ]
-        )
-
-        dataset = ResNetDataset(modality.data, t)
+        dataset = CustomDataset(modality.data)
         embeddings = {}
 
         res5c_output = None
@@ -168,31 +158,3 @@ class ResNet(UnimodalRepresentation):
         transformed_modality.data = list(embeddings.values())
 
         return transformed_modality
-
-
-class ResNetDataset(torch.utils.data.Dataset):
-    def __init__(self, data: str, tf: Callable = None):
-        self.data = data
-        self.tf = tf
-
-    def __getitem__(self, index) -> Dict[str, object]:
-        data = self.data[index]
-        if type(data) is np.ndarray:
-            output = torch.empty((1, 3, 224, 224))
-            d = torch.tensor(data)
-            d = d.repeat(3, 1, 1)
-            output[0] = self.tf(d)
-        else:
-            output = torch.empty((len(data), 3, 224, 224))
-
-            for i, d in enumerate(data):
-                if data[0].ndim < 3:
-                    d = torch.tensor(d)
-                    d = d.repeat(3, 1, 1)
-
-                output[i] = self.tf(d)
-
-        return {"id": index, "data": output}
-
-    def __len__(self) -> int:
-        return len(self.data)
diff --git a/src/main/python/systemds/scuro/representations/rowmax.py 
b/src/main/python/systemds/scuro/representations/rowmax.py
deleted file mode 100644
index 3152782026..0000000000
--- a/src/main/python/systemds/scuro/representations/rowmax.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# -------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# -------------------------------------------------------------
-import itertools
-from typing import List
-
-import numpy as np
-
-from systemds.scuro.modality.modality import Modality
-from systemds.scuro.representations.utils import pad_sequences
-
-from systemds.scuro.representations.fusion import Fusion
-
-
-class RowMax(Fusion):
-    def __init__(self, split=1):
-        """
-        Combines modalities by computing the outer product of a modality 
combination and
-        taking the row max
-        """
-        super().__init__("RowMax")
-        self.split = split
-
-    def transform(self, modalities: List[Modality]):
-        if len(modalities) < 2:
-            return np.array(modalities)
-
-        max_emb_size = self.get_max_embedding_size(modalities)
-
-        padded_modalities = []
-        for modality in modalities:
-            d = pad_sequences(modality.data, maxlen=max_emb_size, 
dtype="float32")
-            padded_modalities.append(d)
-
-        split_rows = int(len(modalities[0].data) / self.split)
-
-        data = []
-
-        for combination in itertools.combinations(padded_modalities, 2):
-            combined = None
-            for i in range(0, self.split):
-                start = split_rows * i
-                end = (
-                    split_rows * (i + 1)
-                    if i < (self.split - 1)
-                    else len(modalities[0].data)
-                )
-                m = np.einsum(
-                    "bi,bo->bio", combination[0][start:end], 
combination[1][start:end]
-                )
-                m = m.max(axis=2)
-                if combined is None:
-                    combined = m
-                else:
-                    combined = np.concatenate((combined, m), axis=0)
-            data.append(combined)
-
-        data = np.stack(data)
-        data = data.max(axis=0)
-
-        return np.array(data)
diff --git a/src/main/python/systemds/scuro/representations/mel_spectrogram.py 
b/src/main/python/systemds/scuro/representations/spectrogram.py
similarity index 72%
copy from src/main/python/systemds/scuro/representations/mel_spectrogram.py
copy to src/main/python/systemds/scuro/representations/spectrogram.py
index dfff4f3b7e..b5558b1b26 100644
--- a/src/main/python/systemds/scuro/representations/mel_spectrogram.py
+++ b/src/main/python/systemds/scuro/representations/spectrogram.py
@@ -25,17 +25,14 @@ from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.modality.transformed import TransformedModality
 
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
+from systemds.scuro.drsearch.operator_registry import register_representation
 
 
-class MelSpectrogram(UnimodalRepresentation):
-    def __init__(self, n_mels=128, hop_length=512, n_fft=2048):
-        parameters = {
-            "n_mels": [20, 32, 64, 128],
-            "hop_length": [256, 512, 1024, 2048],
-            "n_fft": [1024, 2048, 4096],
-        }
-        super().__init__("MelSpectrogram", ModalityType.TIMESERIES, parameters)
-        self.n_mels = n_mels
+@register_representation(ModalityType.AUDIO)
+class Spectrogram(UnimodalRepresentation):
+    def __init__(self, hop_length=512, n_fft=2048):
+        parameters = {"hop_length": [256, 512, 1024, 2048], "n_fft": [1024, 
2048, 4096]}
+        super().__init__("Spectrogram", ModalityType.TIMESERIES, parameters)
         self.hop_length = hop_length
         self.n_fft = n_fft
 
@@ -45,9 +42,11 @@ class MelSpectrogram(UnimodalRepresentation):
         )
         result = []
         max_length = 0
-        for sample in modality.data:
-            S = librosa.feature.melspectrogram(y=sample, sr=22050)
-            S_dB = librosa.power_to_db(S, ref=np.max)
+        for i, sample in enumerate(modality.data):
+            spectrogram = librosa.stft(
+                y=sample, hop_length=self.hop_length, n_fft=self.n_fft
+            )
+            S_dB = librosa.amplitude_to_db(np.abs(spectrogram))
             if S_dB.shape[-1] > max_length:
                 max_length = S_dB.shape[-1]
             result.append(S_dB.T)
diff --git a/src/main/python/systemds/scuro/representations/sum.py 
b/src/main/python/systemds/scuro/representations/sum.py
index 0608338a0f..46d93f2eda 100644
--- a/src/main/python/systemds/scuro/representations/sum.py
+++ b/src/main/python/systemds/scuro/representations/sum.py
@@ -27,7 +27,10 @@ from systemds.scuro.representations.utils import 
pad_sequences
 
 from systemds.scuro.representations.fusion import Fusion
 
+from systemds.scuro.drsearch.operator_registry import register_fusion_operator
 
+
+@register_fusion_operator()
 class Sum(Fusion):
     def __init__(self):
         """
diff --git 
a/src/main/python/systemds/scuro/representations/swin_video_transformer.py 
b/src/main/python/systemds/scuro/representations/swin_video_transformer.py
new file mode 100644
index 0000000000..19b2fd05c4
--- /dev/null
+++ b/src/main/python/systemds/scuro/representations/swin_video_transformer.py
@@ -0,0 +1,111 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+# from torchvision.models.video.swin_transformer import swin3d_t
+
+from systemds.scuro.modality.transformed import TransformedModality
+from systemds.scuro.representations.unimodal import UnimodalRepresentation
+from typing import Callable, Dict, Tuple, Any
+import torch.utils.data
+import torch
+import torchvision.models as models
+import numpy as np
+from systemds.scuro.modality.type import ModalityType
+from systemds.scuro.drsearch.operator_registry import register_representation
+
+from systemds.scuro.utils.torch_dataset import CustomDataset
+
+if torch.backends.mps.is_available():
+    DEVICE = torch.device("mps")
+# elif torch.cuda.is_available():
+#     DEVICE = torch.device("cuda")
+else:
+    DEVICE = torch.device("cpu")
+
+
+# @register_representation([ModalityType.VIDEO])
+class SwinVideoTransformer(UnimodalRepresentation):
+    def __init__(self, layer_name="avgpool"):
+        parameters = {
+            "layer_name": [
+                "features",
+                "features.1",
+                "features.2",
+                "features.3",
+                "features.4",
+                "features.5",
+                "features.6",
+                "avgpool",
+            ],
+        }
+        super().__init__("SwinVideoTransformer", ModalityType.TIMESERIES, 
parameters)
+        self.layer_name = layer_name
+        # self.model = 
swin3d_t(weights=models.video.Swin3D_T_Weights).to(DEVICE)
+        self.model.eval()
+        for param in self.model.parameters():
+            param.requires_grad = False
+
+    def transform(self, modality):
+        # model = swin3d_t(weights=models.video.Swin3D_T_Weights)
+
+        embeddings = {}
+        swin_output = None
+
+        def get_features(name_):
+            def hook(
+                _module: torch.nn.Module, input_: Tuple[torch.Tensor], output: 
Any
+            ):
+                nonlocal swin_output
+                swin_output = output
+
+            return hook
+
+        if self.layer_name:
+            for name, layer in self.model.named_modules():
+                if name == self.layer_name:
+                    layer.register_forward_hook(get_features(name))
+                    break
+        dataset = CustomDataset(modality.data)
+
+        for instance in dataset:
+            video_id = instance["id"]
+            frames = instance["data"].to(DEVICE)
+            embeddings[video_id] = []
+
+            frames = frames.unsqueeze(0).permute(0, 2, 1, 3, 4)
+
+            _ = self.model(frames)
+            values = swin_output
+            pooled = torch.nn.functional.adaptive_avg_pool2d(values, (1, 1))
+
+            embeddings[video_id].extend(torch.flatten(pooled, 
1).detach().cpu().numpy())
+
+            embeddings[video_id] = np.array(embeddings[video_id])
+
+        transformed_modality = TransformedModality(
+            self.output_modality_type,
+            "swinVideoTransformer",
+            modality.modality_id,
+            modality.metadata,
+        )
+
+        transformed_modality.data = list(embeddings.values())
+
+        return transformed_modality
diff --git a/src/main/python/systemds/scuro/representations/tfidf.py 
b/src/main/python/systemds/scuro/representations/tfidf.py
index 30a6655150..c17527b476 100644
--- a/src/main/python/systemds/scuro/representations/tfidf.py
+++ b/src/main/python/systemds/scuro/representations/tfidf.py
@@ -26,8 +26,10 @@ from systemds.scuro.representations.unimodal import 
UnimodalRepresentation
 from systemds.scuro.representations.utils import save_embeddings
 
 from systemds.scuro.modality.type import ModalityType
+from systemds.scuro.drsearch.operator_registry import register_representation
 
 
+@register_representation(ModalityType.TEXT)
 class TfIdf(UnimodalRepresentation):
     def __init__(self, min_df=2, output_file=None):
         parameters = {"min_df": [min_df]}
diff --git a/src/main/python/systemds/scuro/representations/mel_spectrogram.py 
b/src/main/python/systemds/scuro/representations/wav2vec.py
similarity index 50%
copy from src/main/python/systemds/scuro/representations/mel_spectrogram.py
copy to src/main/python/systemds/scuro/representations/wav2vec.py
index dfff4f3b7e..bf251b101c 100644
--- a/src/main/python/systemds/scuro/representations/mel_spectrogram.py
+++ b/src/main/python/systemds/scuro/representations/wav2vec.py
@@ -18,39 +18,51 @@
 # under the License.
 #
 # -------------------------------------------------------------
-import librosa
 import numpy as np
-
+from transformers import Wav2Vec2Processor, Wav2Vec2Model
+import librosa
+import torch
 from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.modality.transformed import TransformedModality
 
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
+from systemds.scuro.drsearch.operator_registry import register_representation
+
+import warnings
 
+warnings.filterwarnings("ignore", message="Some weights of")
 
-class MelSpectrogram(UnimodalRepresentation):
-    def __init__(self, n_mels=128, hop_length=512, n_fft=2048):
-        parameters = {
-            "n_mels": [20, 32, 64, 128],
-            "hop_length": [256, 512, 1024, 2048],
-            "n_fft": [1024, 2048, 4096],
-        }
-        super().__init__("MelSpectrogram", ModalityType.TIMESERIES, parameters)
-        self.n_mels = n_mels
-        self.hop_length = hop_length
-        self.n_fft = n_fft
+
+@register_representation(ModalityType.AUDIO)
+class Wav2Vec(UnimodalRepresentation):
+    def __init__(self):
+        super().__init__("Wav2Vec", ModalityType.TIMESERIES, {})
+        self.processor = Wav2Vec2Processor.from_pretrained(
+            "facebook/wav2vec2-base-960h"
+        )
+        self.model = Wav2Vec2Model.from_pretrained(
+            "facebook/wav2vec2-base-960h"
+        ).float()
 
     def transform(self, modality):
         transformed_modality = TransformedModality(
             self.output_modality_type, self, modality.modality_id, 
modality.metadata
         )
+
         result = []
-        max_length = 0
-        for sample in modality.data:
-            S = librosa.feature.melspectrogram(y=sample, sr=22050)
-            S_dB = librosa.power_to_db(S, ref=np.max)
-            if S_dB.shape[-1] > max_length:
-                max_length = S_dB.shape[-1]
-            result.append(S_dB.T)
+        for i, sample in enumerate(modality.data):
+            sr = list(modality.metadata.values())[i]["frequency"]
+            audio_resampled = librosa.resample(sample, orig_sr=sr, 
target_sr=16000)
+            input = self.processor(
+                audio_resampled, sampling_rate=16000, return_tensors="pt", 
padding=True
+            )
+            input.input_values = input.input_values.float()
+            input.data["input_values"] = input.data["input_values"].float()
+            with torch.no_grad():
+                outputs = self.model(**input)
+                features = outputs.extract_features
+                # TODO: check how to get intermediate representations
+            result.append(torch.flatten(features.mean(dim=1), 
1).detach().cpu().numpy())
 
         transformed_modality.data = result
         return transformed_modality
diff --git a/src/main/python/systemds/scuro/representations/window.py 
b/src/main/python/systemds/scuro/representations/window.py
index 264d40ca42..bff63729c7 100644
--- a/src/main/python/systemds/scuro/representations/window.py
+++ b/src/main/python/systemds/scuro/representations/window.py
@@ -23,12 +23,12 @@ import math
 
 from systemds.scuro.modality.type import DataLayout
 
-# from systemds.scuro.drsearch.operator_registry import 
register_context_operator
+from systemds.scuro.drsearch.operator_registry import register_context_operator
 from systemds.scuro.representations.aggregate import Aggregation
 from systemds.scuro.representations.context import Context
 
 
-# @register_context_operator()
+@register_context_operator()
 class WindowAggregation(Context):
     def __init__(self, window_size=10, aggregation_function="mean"):
         parameters = {
@@ -65,6 +65,8 @@ class WindowAggregation(Context):
         return windowed_data
 
     def window_aggregate_single_level(self, instance, new_length):
+        if isinstance(instance, str):
+            return instance
         num_cols = instance.shape[1] if instance.ndim > 1 else 1
         result = np.empty((new_length, num_cols))
         for i in range(0, new_length):
diff --git a/src/main/python/systemds/scuro/representations/word2vec.py 
b/src/main/python/systemds/scuro/representations/word2vec.py
index 929dbd4415..e1d1669d9b 100644
--- a/src/main/python/systemds/scuro/representations/word2vec.py
+++ b/src/main/python/systemds/scuro/representations/word2vec.py
@@ -26,10 +26,9 @@ from gensim.models import Word2Vec
 from gensim.utils import tokenize
 
 from systemds.scuro.modality.type import ModalityType
+from systemds.scuro.drsearch.operator_registry import register_representation
 import nltk
 
-nltk.download("punkt_tab")
-
 
 def get_embedding(sentence, model):
     vectors = []
@@ -40,6 +39,7 @@ def get_embedding(sentence, model):
     return np.mean(vectors, axis=0) if vectors else np.zeros(model.vector_size)
 
 
+@register_representation(ModalityType.TEXT)
 class W2V(UnimodalRepresentation):
     def __init__(self, vector_size=3, min_count=2, window=2, output_file=None):
         parameters = {
@@ -71,5 +71,5 @@ class W2V(UnimodalRepresentation):
 
         if self.output_file is not None:
             save_embeddings(np.array(embeddings), self.output_file)
-        transformed_modality.data = np.array(embeddings)
+        transformed_modality.data = embeddings
         return transformed_modality
diff --git a/src/main/python/systemds/scuro/representations/resnet.py 
b/src/main/python/systemds/scuro/representations/x3d.py
similarity index 50%
copy from src/main/python/systemds/scuro/representations/resnet.py
copy to src/main/python/systemds/scuro/representations/x3d.py
index 60eed9ea12..bb5d1ec5ed 100644
--- a/src/main/python/systemds/scuro/representations/resnet.py
+++ b/src/main/python/systemds/scuro/representations/x3d.py
@@ -18,36 +18,36 @@
 # under the License.
 #
 # -------------------------------------------------------------
-
+from systemds.scuro.utils.torch_dataset import CustomDataset
 from systemds.scuro.modality.transformed import TransformedModality
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
 from typing import Callable, Dict, Tuple, Any
 import torch.utils.data
 import torch
+from torchvision.models.video import r3d_18, s3d
 import torchvision.models as models
 import torchvision.transforms as transforms
 import numpy as np
 from systemds.scuro.modality.type import ModalityType
+from systemds.scuro.drsearch.operator_registry import register_representation
 
 if torch.backends.mps.is_available():
     DEVICE = torch.device("mps")
-elif torch.cuda.is_available():
-    DEVICE = torch.device("cuda")
+# elif torch.cuda.is_available():
+#     DEVICE = torch.device("cuda")
 else:
     DEVICE = torch.device("cpu")
 
 
-class ResNet(UnimodalRepresentation):
-    def __init__(self, layer="avgpool", model_name="ResNet18", 
output_file=None):
+# @register_representation([ModalityType.VIDEO])
+class X3D(UnimodalRepresentation):
+    def __init__(self, layer="avgpool", model_name="r3d", output_file=None):
         self.model_name = model_name
         parameters = self._get_parameters()
-        super().__init__(
-            "ResNet", ModalityType.TIMESERIES, parameters
-        )  # TODO: TIMESERIES only for videos - images would be handled as 
EMBEDDIGN
+        super().__init__("X3D", ModalityType.TIMESERIES, parameters)
 
         self.output_file = output_file
         self.layer_name = layer
-        self.model = model_name
         self.model.eval()
         for param in self.model.parameters():
             param.requires_grad = False
@@ -59,37 +59,22 @@ class ResNet(UnimodalRepresentation):
         self.model.fc = Identity()
 
     @property
-    def model(self):
-        return self._model
-
-    @model.setter
-    def model(self, model):
-        if model == "ResNet18":
-            self._model = 
models.resnet18(weights=models.ResNet18_Weights.DEFAULT).to(
-                DEVICE
-            )
-        elif model == "ResNet34":
-            self._model = 
models.resnet34(weights=models.ResNet34_Weights.DEFAULT).to(
-                DEVICE
-            )
-        elif model == "ResNet50":
-            self._model = 
models.resnet50(weights=models.ResNet50_Weights.DEFAULT).to(
-                DEVICE
-            )
-        elif model == "ResNet101":
-            self._model = 
models.resnet101(weights=models.ResNet101_Weights.DEFAULT).to(
-                DEVICE
-            )
-        elif model == "ResNet152":
-            self._model = 
models.resnet152(weights=models.ResNet152_Weights.DEFAULT).to(
-                DEVICE
-            )
+    def model_name(self):
+        return self._model_name
+
+    @model_name.setter
+    def model_name(self, model_name):
+        self._model_name = model_name
+        if model_name == "r3d":
+            self.model = r3d_18(pretrained=True).to(DEVICE)
+        elif model_name == "s3d":
+            self.model = 
s3d(weights=models.video.S3D_Weights.DEFAULT).to(DEVICE)
         else:
             raise NotImplementedError
 
     def _get_parameters(self, high_level=True):
         parameters = {"model_name": [], "layer_name": []}
-        for m in ["ResNet18", "ResNet34", "ResNet50", "ResNet101", 
"ResNet152"]:
+        for m in ["r3d", "s3d"]:
             parameters["model_name"].append(m)
 
         if high_level:
@@ -107,20 +92,7 @@ class ResNet(UnimodalRepresentation):
         return parameters
 
     def transform(self, modality):
-
-        t = transforms.Compose(
-            [
-                transforms.ToPILImage(),
-                transforms.Resize(256),
-                transforms.CenterCrop(224),
-                transforms.ToTensor(),
-                transforms.Normalize(
-                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
-                ),
-            ]
-        )
-
-        dataset = ResNetDataset(modality.data, t)
+        dataset = CustomDataset(modality.data)
         embeddings = {}
 
         res5c_output = None
@@ -140,59 +112,24 @@ class ResNet(UnimodalRepresentation):
                     layer.register_forward_hook(get_features(name))
                     break
 
-        for instance in torch.utils.data.DataLoader(dataset):
-            video_id = instance["id"][0]
-            frames = instance["data"][0].to(DEVICE)
+        for instance in dataset:
+            video_id = instance["id"]
+            frames = instance["data"].to(DEVICE)
             embeddings[video_id] = []
-            batch_size = 64
-
-            for start_index in range(0, len(frames), batch_size):
-                end_index = min(start_index + batch_size, len(frames))
-                frame_ids_range = range(start_index, end_index)
-                frame_batch = frames[frame_ids_range]
 
-                _ = self.model(frame_batch)
-                values = res5c_output
-                pooled = torch.nn.functional.adaptive_avg_pool2d(values, (1, 
1))
+            frames = frames.unsqueeze(0).permute(0, 2, 1, 3, 4)
+            _ = self.model(frames)
+            values = res5c_output
+            pooled = torch.nn.functional.adaptive_avg_pool2d(values, (1, 1))
 
-                embeddings[video_id].extend(
-                    torch.flatten(pooled, 1).detach().cpu().numpy()
-                )
+            embeddings[video_id].extend(torch.flatten(pooled, 
1).detach().cpu().numpy())
 
             embeddings[video_id] = np.array(embeddings[video_id])
 
         transformed_modality = TransformedModality(
-            self.output_modality_type, "resnet", modality.modality_id, 
modality.metadata
+            self.output_modality_type, "x3d", modality.modality_id, 
modality.metadata
         )
 
         transformed_modality.data = list(embeddings.values())
 
         return transformed_modality
-
-
-class ResNetDataset(torch.utils.data.Dataset):
-    def __init__(self, data: str, tf: Callable = None):
-        self.data = data
-        self.tf = tf
-
-    def __getitem__(self, index) -> Dict[str, object]:
-        data = self.data[index]
-        if type(data) is np.ndarray:
-            output = torch.empty((1, 3, 224, 224))
-            d = torch.tensor(data)
-            d = d.repeat(3, 1, 1)
-            output[0] = self.tf(d)
-        else:
-            output = torch.empty((len(data), 3, 224, 224))
-
-            for i, d in enumerate(data):
-                if data[0].ndim < 3:
-                    d = torch.tensor(d)
-                    d = d.repeat(3, 1, 1)
-
-                output[i] = self.tf(d)
-
-        return {"id": index, "data": output}
-
-    def __len__(self) -> int:
-        return len(self.data)
diff --git a/src/main/python/systemds/scuro/utils/schema_helpers.py 
b/src/main/python/systemds/scuro/utils/schema_helpers.py
index a88e81f716..28af476cca 100644
--- a/src/main/python/systemds/scuro/utils/schema_helpers.py
+++ b/src/main/python/systemds/scuro/utils/schema_helpers.py
@@ -18,7 +18,6 @@
 # under the License.
 #
 # -------------------------------------------------------------
-import math
 import numpy as np
 
 
diff --git a/src/main/python/systemds/scuro/utils/torch_dataset.py 
b/src/main/python/systemds/scuro/utils/torch_dataset.py
new file mode 100644
index 0000000000..a0f3d88b6a
--- /dev/null
+++ b/src/main/python/systemds/scuro/utils/torch_dataset.py
@@ -0,0 +1,63 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+from typing import Dict
+
+import numpy as np
+import torch
+import torchvision.transforms as transforms
+
+
+class CustomDataset(torch.utils.data.Dataset):
+    def __init__(self, data):
+        self.data = data
+        self.tf = transforms.Compose(
+            [
+                transforms.ToPILImage(),
+                transforms.Resize(256),
+                transforms.CenterCrop(224),
+                transforms.ToTensor(),
+                transforms.Normalize(
+                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+                ),
+            ]
+        )
+
+    def __getitem__(self, index) -> Dict[str, object]:
+        data = self.data[index]
+        if type(data) is np.ndarray:
+            output = torch.empty((1, 3, 224, 224))
+            d = torch.tensor(data)
+            d = d.repeat(3, 1, 1)
+            output[0] = self.tf(d)
+        else:
+            output = torch.empty((len(data), 3, 224, 224))
+
+            for i, d in enumerate(data):
+                if data[0].ndim < 3:
+                    d = torch.tensor(d)
+                    d = d.repeat(3, 1, 1)
+
+                output[i] = self.tf(d)
+
+        return {"id": index, "data": output}
+
+    def __len__(self) -> int:
+        return len(self.data)
diff --git a/src/main/python/tests/scuro/data_generator.py 
b/src/main/python/tests/scuro/data_generator.py
index 48ff208e43..e31887ff83 100644
--- a/src/main/python/tests/scuro/data_generator.py
+++ b/src/main/python/tests/scuro/data_generator.py
@@ -26,13 +26,11 @@ from scipy.io.wavfile import write
 import random
 import os
 
-from systemds.scuro import (
-    VideoLoader,
-    AudioLoader,
-    TextLoader,
-    UnimodalModality,
-    TransformedModality,
-)
+from systemds.scuro.dataloader.video_loader import VideoLoader
+from systemds.scuro.dataloader.audio_loader import AudioLoader
+from systemds.scuro.dataloader.text_loader import TextLoader
+from systemds.scuro.modality.unimodal_modality import UnimodalModality
+from systemds.scuro.modality.transformed import TransformedModality
 from systemds.scuro.modality.type import ModalityType
 
 
diff --git a/src/main/python/tests/scuro/test_dr_search.py 
b/src/main/python/tests/scuro/test_dr_search.py
index 0959c246e0..521ff3f468 100644
--- a/src/main/python/tests/scuro/test_dr_search.py
+++ b/src/main/python/tests/scuro/test_dr_search.py
@@ -29,8 +29,8 @@ from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import MinMaxScaler
 
 from systemds.scuro.modality.type import ModalityType
-from systemds.scuro.aligner.dr_search import DRSearch
-from systemds.scuro.aligner.task import Task
+from systemds.scuro.drsearch.dr_search import DRSearch
+from systemds.scuro.drsearch.task import Task
 from systemds.scuro.models.model import Model
 from systemds.scuro.representations.average import Average
 from systemds.scuro.representations.bert import Bert
diff --git a/src/main/python/tests/scuro/test_multimodal_fusion.py 
b/src/main/python/tests/scuro/test_multimodal_fusion.py
new file mode 100644
index 0000000000..8456279c3d
--- /dev/null
+++ b/src/main/python/tests/scuro/test_multimodal_fusion.py
@@ -0,0 +1,202 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+
+import shutil
+import unittest
+
+import numpy as np
+from sklearn import svm
+from sklearn.metrics import classification_report
+from sklearn.model_selection import train_test_split
+
+from systemds.scuro.representations.concatenation import Concatenation
+from systemds.scuro.representations.average import Average
+from systemds.scuro.drsearch.fusion_optimizer import FusionOptimizer
+from systemds.scuro.drsearch.operator_registry import Registry
+from systemds.scuro.models.model import Model
+from systemds.scuro.drsearch.task import Task
+from systemds.scuro.drsearch.unimodal_representation_optimizer import (
+    UnimodalRepresentationOptimizer,
+)
+
+from systemds.scuro.representations.spectrogram import Spectrogram
+from systemds.scuro.representations.word2vec import W2V
+from systemds.scuro.modality.unimodal_modality import UnimodalModality
+from systemds.scuro.representations.resnet import ResNet
+from tests.scuro.data_generator import setup_data
+
+from systemds.scuro.dataloader.audio_loader import AudioLoader
+from systemds.scuro.dataloader.video_loader import VideoLoader
+from systemds.scuro.dataloader.text_loader import TextLoader
+from systemds.scuro.modality.type import ModalityType
+
+from unittest.mock import patch
+
+
+class TestSVM(Model):
+    def __init__(self):
+        super().__init__("TestSVM")
+
+    def fit(self, X, y, X_test, y_test):
+        if X.ndim > 2:
+            X = X.reshape(X.shape[0], -1)
+        self.clf = svm.SVC(C=1, gamma="scale", kernel="rbf", verbose=False)
+        self.clf = self.clf.fit(X, np.array(y))
+        y_pred = self.clf.predict(X)
+
+        return classification_report(
+            y, y_pred, output_dict=True, digits=3, zero_division=1
+        )["accuracy"]
+
+    def test(self, test_X: np.ndarray, test_y: np.ndarray):
+        if test_X.ndim > 2:
+            test_X = test_X.reshape(test_X.shape[0], -1)
+        y_pred = self.clf.predict(np.array(test_X))  # noqa
+
+        return classification_report(
+            np.array(test_y), y_pred, output_dict=True, digits=3, 
zero_division=1
+        )["accuracy"]
+
+
+class TestCNN(Model):
+    def __init__(self):
+        super().__init__("TestCNN")
+
+    def fit(self, X, y, X_test, y_test):
+        if X.ndim > 2:
+            X = X.reshape(X.shape[0], -1)
+        self.clf = svm.SVC(C=1, gamma="scale", kernel="rbf", verbose=False)
+        self.clf = self.clf.fit(X, np.array(y))
+        y_pred = self.clf.predict(X)
+
+        return classification_report(
+            y, y_pred, output_dict=True, digits=3, zero_division=1
+        )["accuracy"]
+
+    def test(self, test_X: np.ndarray, test_y: np.ndarray):
+        if test_X.ndim > 2:
+            test_X = test_X.reshape(test_X.shape[0], -1)
+        y_pred = self.clf.predict(np.array(test_X))  # noqa
+
+        return classification_report(
+            np.array(test_y), y_pred, output_dict=True, digits=3, 
zero_division=1
+        )["accuracy"]
+
+
+class TestMultimodalRepresentationOptimizer(unittest.TestCase):
+    test_file_path = None
+    data_generator = None
+    num_instances = 0
+
+    @classmethod
+    def setUpClass(cls):
+        cls.test_file_path = "fusion_optimizer_test_data"
+
+        cls.num_instances = 10
+        cls.mods = [ModalityType.VIDEO, ModalityType.AUDIO, ModalityType.TEXT]
+
+        cls.data_generator = setup_data(cls.mods, cls.num_instances, 
cls.test_file_path)
+        split = train_test_split(
+            cls.data_generator.indices,
+            cls.data_generator.labels,
+            test_size=0.2,
+            random_state=42,
+        )
+        cls.train_indizes, cls.val_indizes = [int(i) for i in split[0]], [
+            int(i) for i in split[1]
+        ]
+
+        cls.tasks = [
+            Task(
+                "UnimodalRepresentationTask1",
+                TestSVM(),
+                cls.data_generator.labels,
+                cls.train_indizes,
+                cls.val_indizes,
+            ),
+            Task(
+                "UnimodalRepresentationTask2",
+                TestCNN(),
+                cls.data_generator.labels,
+                cls.train_indizes,
+                cls.val_indizes,
+            ),
+        ]
+
+    @classmethod
+    def tearDownClass(cls):
+        shutil.rmtree(cls.test_file_path)
+
+    def test_multimodal_fusion(self):
+        task = Task(
+            "UnimodalRepresentationTask1",
+            TestSVM(),
+            self.data_generator.labels,
+            self.train_indizes,
+            self.val_indizes,
+        )
+        audio_data_loader = AudioLoader(
+            self.data_generator.get_modality_path(ModalityType.AUDIO),
+            self.data_generator.indices,
+        )
+        audio = UnimodalModality(audio_data_loader)
+
+        text_data_loader = TextLoader(
+            self.data_generator.get_modality_path(ModalityType.TEXT),
+            self.data_generator.indices,
+        )
+        text = UnimodalModality(text_data_loader)
+
+        video_data_loader = VideoLoader(
+            self.data_generator.get_modality_path(ModalityType.VIDEO),
+            self.data_generator.indices,
+        )
+        video = UnimodalModality(video_data_loader)
+
+        with patch.object(
+            Registry,
+            "_representations",
+            {
+                ModalityType.TEXT: [W2V],
+                ModalityType.AUDIO: [Spectrogram],
+                ModalityType.TIMESERIES: [ResNet],
+                ModalityType.VIDEO: [ResNet],
+                ModalityType.EMBEDDING: [],
+            },
+        ):
+            registry = Registry()
+            registry._fusion_operators = [Average, Concatenation]
+            unimodal_optimizer = UnimodalRepresentationOptimizer(
+                [text, audio, video], [task], max_chain_depth=2
+            )
+            unimodal_optimizer.optimize()
+
+            multimodal_optimizer = FusionOptimizer(
+                [audio, text, video],
+                task,
+                unimodal_optimizer.optimization_results,
+                unimodal_optimizer.cache,
+                2,
+                2,
+                debug=False,
+            )
+            multimodal_optimizer.optimize()
diff --git a/src/main/python/tests/scuro/test_multimodal_join.py 
b/src/main/python/tests/scuro/test_multimodal_join.py
index 8388829f30..a5e3a7caf9 100644
--- a/src/main/python/tests/scuro/test_multimodal_join.py
+++ b/src/main/python/tests/scuro/test_multimodal_join.py
@@ -24,8 +24,6 @@ import shutil
 import unittest
 
 from systemds.scuro.modality.joined import JoinCondition
-from systemds.scuro.representations.aggregate import Aggregation
-from systemds.scuro.representations.window import WindowAggregation
 from systemds.scuro.modality.unimodal_modality import UnimodalModality
 from systemds.scuro.representations.mel_spectrogram import MelSpectrogram
 from systemds.scuro.representations.resnet import ResNet
diff --git a/src/main/python/tests/scuro/test_operator_registry.py 
b/src/main/python/tests/scuro/test_operator_registry.py
new file mode 100644
index 0000000000..aaecde2991
--- /dev/null
+++ b/src/main/python/tests/scuro/test_operator_registry.py
@@ -0,0 +1,87 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import unittest
+
+from systemds.scuro.representations.mfcc import MFCC
+from systemds.scuro.representations.wav2vec import Wav2Vec
+from systemds.scuro.representations.window import WindowAggregation
+from systemds.scuro.representations.bow import BoW
+from systemds.scuro.representations.word2vec import W2V
+from systemds.scuro.representations.tfidf import TfIdf
+from systemds.scuro.drsearch.operator_registry import Registry
+from systemds.scuro.modality.type import ModalityType
+from systemds.scuro.representations.average import Average
+from systemds.scuro.representations.bert import Bert
+from systemds.scuro.representations.concatenation import Concatenation
+from systemds.scuro.representations.lstm import LSTM
+from systemds.scuro.representations.max import RowMax
+from systemds.scuro.representations.mel_spectrogram import MelSpectrogram
+from systemds.scuro.representations.spectrogram import Spectrogram
+from systemds.scuro.representations.multiplication import Multiplication
+from systemds.scuro.representations.resnet import ResNet
+from systemds.scuro.representations.sum import Sum
+
+
+class TestOperatorRegistry(unittest.TestCase):
+    def test_audio_representations_in_registry(self):
+        registry = Registry()
+        for representation in [Spectrogram, MelSpectrogram, Wav2Vec, MFCC]:
+            assert representation in registry.get_representations(
+                ModalityType.AUDIO
+            ), f"{representation} not in registry"
+
+    def test_video_representations_in_registry(self):
+        registry = Registry()
+        assert registry.get_representations(ModalityType.VIDEO) == [ResNet]
+
+    def test_timeseries_representations_in_registry(self):
+        registry = Registry()
+        assert registry.get_representations(ModalityType.TIMESERIES) == 
[ResNet]
+
+    def test_text_representations_in_registry(self):
+        registry = Registry()
+        for representation in [BoW, TfIdf, W2V, Bert]:
+            assert representation in registry.get_representations(
+                ModalityType.TEXT
+            ), f"{representation} not in registry"
+
+    def test_context_operator_in_registry(self):
+        registry = Registry()
+        assert registry.get_context_operators() == [WindowAggregation]
+
+    # def test_fusion_operator_in_registry(self):
+    #     registry = Registry()
+    #     for fusion_operator in [
+    #         # RowMax,
+    #         Sum,
+    #         Average,
+    #         Concatenation,
+    #         LSTM,
+    #         Multiplication,
+    #     ]:
+    #         assert (
+    #             fusion_operator in registry.get_fusion_operators()
+    #         ), f"{fusion_operator} not in registry"
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/main/python/tests/scuro/test_unimodal_optimizer.py 
b/src/main/python/tests/scuro/test_unimodal_optimizer.py
new file mode 100644
index 0000000000..bfc52f0103
--- /dev/null
+++ b/src/main/python/tests/scuro/test_unimodal_optimizer.py
@@ -0,0 +1,203 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+
+import shutil
+import unittest
+
+import numpy as np
+from sklearn import svm
+from sklearn.metrics import classification_report
+from sklearn.model_selection import train_test_split
+
+from systemds.scuro.drsearch.operator_registry import Registry
+from systemds.scuro.models.model import Model
+from systemds.scuro.drsearch.task import Task
+from systemds.scuro.drsearch.unimodal_representation_optimizer import (
+    UnimodalRepresentationOptimizer,
+)
+
+from systemds.scuro.representations.spectrogram import Spectrogram
+from systemds.scuro.representations.word2vec import W2V
+from systemds.scuro.modality.unimodal_modality import UnimodalModality
+from systemds.scuro.representations.resnet import ResNet
+from tests.scuro.data_generator import setup_data
+
+from systemds.scuro.dataloader.audio_loader import AudioLoader
+from systemds.scuro.dataloader.video_loader import VideoLoader
+from systemds.scuro.dataloader.text_loader import TextLoader
+from systemds.scuro.modality.type import ModalityType
+
+
+class TestSVM(Model):
+    def __init__(self):
+        super().__init__("TestSVM")
+
+    def fit(self, X, y, X_test, y_test):
+        if X.ndim > 2:
+            X = X.reshape(X.shape[0], -1)
+        self.clf = svm.SVC(C=1, gamma="scale", kernel="rbf", verbose=False)
+        self.clf = self.clf.fit(X, np.array(y))
+        y_pred = self.clf.predict(X)
+
+        return classification_report(
+            y, y_pred, output_dict=True, digits=3, zero_division=1
+        )["accuracy"]
+
+    def test(self, test_X: np.ndarray, test_y: np.ndarray):
+        if test_X.ndim > 2:
+            test_X = test_X.reshape(test_X.shape[0], -1)
+        y_pred = self.clf.predict(np.array(test_X))  # noqa
+
+        return classification_report(
+            np.array(test_y), y_pred, output_dict=True, digits=3, 
zero_division=1
+        )["accuracy"]
+
+
+class TestCNN(Model):
+    def __init__(self):
+        super().__init__("TestCNN")
+
+    def fit(self, X, y, X_test, y_test):
+        if X.ndim > 2:
+            X = X.reshape(X.shape[0], -1)
+        self.clf = svm.SVC(C=1, gamma="scale", kernel="rbf", verbose=False)
+        self.clf = self.clf.fit(X, np.array(y))
+        y_pred = self.clf.predict(X)
+
+        return classification_report(
+            y, y_pred, output_dict=True, digits=3, zero_division=1
+        )["accuracy"]
+
+    def test(self, test_X: np.ndarray, test_y: np.ndarray):
+        if test_X.ndim > 2:
+            test_X = test_X.reshape(test_X.shape[0], -1)
+        y_pred = self.clf.predict(np.array(test_X))  # noqa
+
+        return classification_report(
+            np.array(test_y), y_pred, output_dict=True, digits=3, 
zero_division=1
+        )["accuracy"]
+
+
+from unittest.mock import patch
+
+
+class TestUnimodalRepresentationOptimizer(unittest.TestCase):
+    test_file_path = None
+    data_generator = None
+    num_instances = 0
+
+    @classmethod
+    def setUpClass(cls):
+        cls.test_file_path = "unimodal_optimizer_test_data"
+
+        cls.num_instances = 10
+        cls.mods = [ModalityType.VIDEO, ModalityType.AUDIO, ModalityType.TEXT]
+
+        cls.data_generator = setup_data(cls.mods, cls.num_instances, 
cls.test_file_path)
+        split = train_test_split(
+            cls.data_generator.indices,
+            cls.data_generator.labels,
+            test_size=0.2,
+            random_state=42,
+        )
+        cls.train_indizes, cls.val_indizes = [int(i) for i in split[0]], [
+            int(i) for i in split[1]
+        ]
+
+        cls.tasks = [
+            Task(
+                "UnimodalRepresentationTask1",
+                TestSVM(),
+                cls.data_generator.labels,
+                cls.train_indizes,
+                cls.val_indizes,
+            ),
+            Task(
+                "UnimodalRepresentationTask2",
+                TestCNN(),
+                cls.data_generator.labels,
+                cls.train_indizes,
+                cls.val_indizes,
+            ),
+        ]
+
+    @classmethod
+    def tearDownClass(cls):
+        shutil.rmtree(cls.test_file_path)
+
+    def test_unimodal_optimizer_for_audio_modality(self):
+        audio_data_loader = AudioLoader(
+            self.data_generator.get_modality_path(ModalityType.AUDIO),
+            self.data_generator.indices,
+        )
+        audio = UnimodalModality(audio_data_loader)
+
+        self.optimize_unimodal_representation_for_modality(audio)
+
+    def test_unimodal_optimizer_for_text_modality(self):
+        text_data_loader = TextLoader(
+            self.data_generator.get_modality_path(ModalityType.TEXT),
+            self.data_generator.indices,
+        )
+        text = UnimodalModality(text_data_loader)
+        self.optimize_unimodal_representation_for_modality(text)
+
+    def test_unimodal_optimizer_for_video_modality(self):
+        video_data_loader = VideoLoader(
+            self.data_generator.get_modality_path(ModalityType.VIDEO),
+            self.data_generator.indices,
+        )
+        video = UnimodalModality(video_data_loader)
+        self.optimize_unimodal_representation_for_modality(video)
+
+    def optimize_unimodal_representation_for_modality(self, modality):
+        with patch.object(
+            Registry,
+            "_representations",
+            {
+                ModalityType.TEXT: [W2V],
+                ModalityType.AUDIO: [Spectrogram],
+                ModalityType.TIMESERIES: [ResNet],
+                ModalityType.VIDEO: [ResNet],
+                ModalityType.EMBEDDING: [],
+            },
+        ):
+            registry = Registry()
+
+            unimodal_optimizer = UnimodalRepresentationOptimizer(
+                [modality], self.tasks, max_chain_depth=2
+            )
+            unimodal_optimizer.optimize()
+
+            assert (
+                list(unimodal_optimizer.optimization_results.keys())[0]
+                == modality.modality_id
+            )
+            assert 
len(list(unimodal_optimizer.optimization_results.values())[0]) == 2
+            assert (
+                len(
+                    unimodal_optimizer.get_k_best_results(modality, 1, 
self.tasks[0])[
+                        0
+                    ].operator_chain
+                )
+                >= 1
+            )

(systemds) branch main updated: [SYSTEMDS-3887] Create representation optimizer

Reply via email to