This is an automated email from the ASF dual-hosted git repository.
cdionysio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 496a22f9d3 [SYSTEMDS-3835] Scuro window aggregation operator
496a22f9d3 is described below
commit 496a22f9d3e82910927fac6e8c7bfeb275388664
Author: Christina Dionysio <[email protected]>
AuthorDate: Wed May 14 14:57:11 2025 +0200
[SYSTEMDS-3835] Scuro window aggregation operator
This patch adds a window aggregation operator with a mean, min, max, sum
aggregation function.
The window aggregation is applied to the individual modalities and can
handle multiple subtypes of modalities.
This PR also adds tests to verify the correctness of the operator.
Closes #2225
---
.github/workflows/python.yml | 5 +-
.../systemds/scuro/dataloader/audio_loader.py | 15 +-
.../systemds/scuro/dataloader/base_loader.py | 12 +-
.../systemds/scuro/dataloader/json_loader.py | 9 +-
.../systemds/scuro/dataloader/text_loader.py | 7 +-
.../systemds/scuro/dataloader/video_loader.py | 20 ++-
src/main/python/systemds/scuro/modality/joined.py | 33 +++--
.../systemds/scuro/modality/joined_transformed.py | 7 +
.../python/systemds/scuro/modality/modality.py | 108 ++++++++-------
.../modality_identifier.py} | 27 ++--
.../python/systemds/scuro/modality/transformed.py | 36 +++--
src/main/python/systemds/scuro/modality/type.py | 153 +++++++++++++++++++--
.../systemds/scuro/modality/unimodal_modality.py | 46 +++++--
.../systemds/scuro/representations/aggregate.py | 80 ++++++++---
.../python/systemds/scuro/representations/bert.py | 11 +-
.../python/systemds/scuro/representations/bow.py | 10 +-
.../representations/{unimodal.py => context.py} | 29 ++--
.../systemds/scuro/representations/fusion.py | 22 ++-
.../python/systemds/scuro/representations/glove.py | 3 +-
.../scuro/representations/mel_spectrogram.py | 25 ++--
.../scuro/representations/representation.py | 18 ++-
.../systemds/scuro/representations/resnet.py | 32 ++++-
.../python/systemds/scuro/representations/tfidf.py | 15 +-
.../systemds/scuro/representations/unimodal.py | 12 +-
.../systemds/scuro/representations/window.py | 74 +++++++---
.../systemds/scuro/representations/word2vec.py | 18 ++-
src/main/python/tests/scuro/data_generator.py | 48 ++++++-
src/main/python/tests/scuro/test_data_loaders.py | 26 ++--
src/main/python/tests/scuro/test_dr_search.py | 4 +-
.../python/tests/scuro/test_multimodal_join.py | 11 +-
.../tests/scuro/test_unimodal_representations.py | 11 +-
.../python/tests/scuro/test_window_operations.py | 106 ++++++++++++++
32 files changed, 773 insertions(+), 260 deletions(-)
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 4e5baa9d81..cea222a4a7 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -116,8 +116,9 @@ jobs:
h5py \
gensim \
black \
- opt-einsum
-
+ opt-einsum \
+ nltk
+
- name: Build Python Package
run: |
cd src/main/python
diff --git a/src/main/python/systemds/scuro/dataloader/audio_loader.py
b/src/main/python/systemds/scuro/dataloader/audio_loader.py
index f7319fe191..a6a164b4fb 100644
--- a/src/main/python/systemds/scuro/dataloader/audio_loader.py
+++ b/src/main/python/systemds/scuro/dataloader/audio_loader.py
@@ -22,23 +22,18 @@ from typing import List, Optional, Union
import librosa
from systemds.scuro.dataloader.base_loader import BaseLoader
-from systemds.scuro.utils.schema_helpers import create_timestamps
+from systemds.scuro.modality.type import ModalityType
class AudioLoader(BaseLoader):
def __init__(
- self,
- source_path: str,
- indices: List[str],
- chunk_size: Optional[int] = None,
+ self, source_path: str, indices: List[str], chunk_size: Optional[int]
= None
):
- super().__init__(source_path, indices, chunk_size)
+ super().__init__(source_path, indices, chunk_size, ModalityType.AUDIO)
def extract(self, file: str, index: Optional[Union[str, List[str]]] =
None):
self.file_sanity_check(file)
audio, sr = librosa.load(file)
- self.metadata[file] = {"sample_rate": sr, "length": audio.shape[0]}
- self.metadata[file]["timestamp"] = create_timestamps(
- self.metadata[file]["sample_rate"], self.metadata[file]["length"]
- )
+ self.metadata[file] = self.modality_type.create_audio_metadata(sr,
audio)
+
self.data.append(audio)
diff --git a/src/main/python/systemds/scuro/dataloader/base_loader.py
b/src/main/python/systemds/scuro/dataloader/base_loader.py
index 5cdf63f584..ea2b25bbb4 100644
--- a/src/main/python/systemds/scuro/dataloader/base_loader.py
+++ b/src/main/python/systemds/scuro/dataloader/base_loader.py
@@ -25,7 +25,11 @@ from typing import List, Optional, Union
class BaseLoader(ABC):
def __init__(
- self, source_path: str, indices: List[str], chunk_size: Optional[int]
= None
+ self,
+ source_path: str,
+ indices: List[str],
+ chunk_size: Optional[int] = None,
+ modality_type=None,
):
"""
Base class to load raw data for a given list of indices and stores
them in the data object
@@ -40,6 +44,7 @@ class BaseLoader(ABC):
) # TODO: check what the index should be for storing the metadata
(file_name, counter, ...)
self.source_path = source_path
self.indices = indices
+ self.modality_type = modality_type
self._next_chunk = 0
self._num_chunks = 1
self._chunk_size = None
@@ -64,6 +69,11 @@ class BaseLoader(ABC):
def next_chunk(self):
return self._next_chunk
+ def reset(self):
+ self._next_chunk = 0
+ self.data = []
+ self.metadata = {}
+
def load(self):
"""
Takes care of loading the raw data either chunk wise (if chunk size is
defined) or all at once
diff --git a/src/main/python/systemds/scuro/dataloader/json_loader.py
b/src/main/python/systemds/scuro/dataloader/json_loader.py
index ac37545188..edef7f205b 100644
--- a/src/main/python/systemds/scuro/dataloader/json_loader.py
+++ b/src/main/python/systemds/scuro/dataloader/json_loader.py
@@ -20,6 +20,7 @@
# -------------------------------------------------------------
import json
+from systemds.scuro.modality.type import ModalityType
from systemds.scuro.dataloader.base_loader import BaseLoader
from typing import Optional, List, Union
@@ -32,7 +33,7 @@ class JSONLoader(BaseLoader):
field: str,
chunk_size: Optional[int] = None,
):
- super().__init__(source_path, indices, chunk_size)
+ super().__init__(source_path, indices, chunk_size, ModalityType.TEXT)
self.field = field
def extract(self, file: str, index: Optional[Union[str, List[str]]] =
None):
@@ -40,4 +41,8 @@ class JSONLoader(BaseLoader):
with open(file) as f:
json_file = json.load(f)
for idx in index:
- self.data.append(json_file[idx][self.field])
+ sentence = json_file[idx][self.field]
+ self.data.append(sentence)
+ self.metadata[idx] = self.modality_type.create_text_metadata(
+ len(sentence), sentence
+ )
diff --git a/src/main/python/systemds/scuro/dataloader/text_loader.py
b/src/main/python/systemds/scuro/dataloader/text_loader.py
index bf34cf85c7..3f87155147 100644
--- a/src/main/python/systemds/scuro/dataloader/text_loader.py
+++ b/src/main/python/systemds/scuro/dataloader/text_loader.py
@@ -20,6 +20,7 @@
# -------------------------------------------------------------
from systemds.scuro.dataloader.base_loader import BaseLoader
from typing import Optional, Pattern, List, Union
+from systemds.scuro.modality.type import ModalityType
import re
@@ -31,7 +32,7 @@ class TextLoader(BaseLoader):
chunk_size: Optional[int] = None,
prefix: Optional[Pattern[str]] = None,
):
- super().__init__(source_path, indices, chunk_size)
+ super().__init__(source_path, indices, chunk_size, ModalityType.TEXT)
self.prefix = prefix
def extract(self, file: str, index: Optional[Union[str, List[str]]] =
None):
@@ -41,5 +42,7 @@ class TextLoader(BaseLoader):
if self.prefix:
line = re.sub(self.prefix, "", line)
line = line.replace("\n", "")
- self.metadata[file] = {"length": len(line.split())}
+ self.metadata[file] = self.modality_type.create_text_metadata(
+ len(line.split()), line
+ )
self.data.append(line)
diff --git a/src/main/python/systemds/scuro/dataloader/video_loader.py
b/src/main/python/systemds/scuro/dataloader/video_loader.py
index 807a43b21c..333960e698 100644
--- a/src/main/python/systemds/scuro/dataloader/video_loader.py
+++ b/src/main/python/systemds/scuro/dataloader/video_loader.py
@@ -23,8 +23,8 @@ from typing import List, Optional, Union
import numpy as np
from systemds.scuro.dataloader.base_loader import BaseLoader
-from systemds.scuro.utils.schema_helpers import create_timestamps
import cv2
+from systemds.scuro.modality.type import ModalityType
class VideoLoader(BaseLoader):
@@ -34,7 +34,7 @@ class VideoLoader(BaseLoader):
indices: List[str],
chunk_size: Optional[int] = None,
):
- super().__init__(source_path, indices, chunk_size)
+ super().__init__(source_path, indices, chunk_size, ModalityType.VIDEO)
def extract(self, file: str, index: Optional[Union[str, List[str]]] =
None):
self.file_sanity_check(file)
@@ -43,16 +43,14 @@ class VideoLoader(BaseLoader):
if not cap.isOpened():
raise f"Could not read video at path: {file}"
- self.metadata[file] = {
- "fps": cap.get(cv2.CAP_PROP_FPS),
- "length": int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
- "width": int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
- "height": int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
- "num_channels": 3,
- }
+ fps = cap.get(cv2.CAP_PROP_FPS)
+ length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ num_channels = 3
- self.metadata[file]["timestamp"] = create_timestamps(
- self.metadata[file]["fps"], self.metadata[file]["length"]
+ self.metadata[file] = self.modality_type.create_video_metadata(
+ fps, length, width, height, num_channels
)
frames = []
diff --git a/src/main/python/systemds/scuro/modality/joined.py
b/src/main/python/systemds/scuro/modality/joined.py
index acdf4fb94f..c1aa26abf6 100644
--- a/src/main/python/systemds/scuro/modality/joined.py
+++ b/src/main/python/systemds/scuro/modality/joined.py
@@ -104,7 +104,7 @@ class JoinedModality(Modality):
self.joined_right.data[i - starting_idx].append([])
right = np.array([])
if self.condition.join_type == "<":
- while c < len(idx_2) and idx_2[c] < nextIdx[j]:
+ while c < len(idx_2) - 1 and idx_2[c] < nextIdx[j]:
if right.size == 0:
right = self.right_modality.data[i][c]
if right.ndim == 1:
@@ -125,7 +125,7 @@ class JoinedModality(Modality):
)
c = c + 1
else:
- while c < len(idx_2) and idx_2[c] <= idx_1[j]:
+ while c < len(idx_2) - 1 and idx_2[c] <= idx_1[j]:
if idx_2[c] == idx_1[j]:
right.append(self.right_modality.data[i][c])
c = c + 1
@@ -141,18 +141,17 @@ class JoinedModality(Modality):
self.joined_right.data[i - starting_idx][j] = right
- def apply_representation(self, representation, aggregation):
+ def apply_representation(self, representation, aggregation=None):
self.aggregation = aggregation
if self.chunked_execution:
return self._handle_chunked_execution(representation)
- elif self.left_type.__name__.__contains__("Unimodal"):
- self.left_modality.extract_raw_data()
- if self.left_type == self.right_type:
- self.right_modality.extract_raw_data()
- elif self.right_type.__name__.__contains__("Unimodal"):
- self.right_modality.extract_raw_data()
+ # elif self.left_type.__name__.__contains__("Unimodal"):
+ # self.left_modality.extract_raw_data()
+ # if self.left_type == self.right_type:
+ # self.right_modality.extract_raw_data()
+ # elif self.right_type.__name__.__contains__("Unimodal") and not
self.right_modality.has_data():
+ # self.right_modality.extract_raw_data()
- self.execute()
left_transformed = self._apply_representation(
self.left_modality, representation
)
@@ -263,12 +262,12 @@ class JoinedModality(Modality):
def _apply_representation(self, modality, representation):
transformed = representation.transform(modality)
- if self.aggregation:
- aggregated_data_left = self.aggregation.window(transformed)
- transformed = Modality(
- transformed.modality_type,
- transformed.metadata,
- )
- transformed.data = aggregated_data_left
+ # if self.aggregation:
+ # aggregated_data_left = self.aggregation.execute(transformed)
+ # transformed = Modality(
+ # transformed.modality_type,
+ # transformed.metadata,
+ # )
+ # transformed.data = aggregated_data_left
return transformed
diff --git a/src/main/python/systemds/scuro/modality/joined_transformed.py
b/src/main/python/systemds/scuro/modality/joined_transformed.py
index e2b53671aa..a0ab8c4ce9 100644
--- a/src/main/python/systemds/scuro/modality/joined_transformed.py
+++ b/src/main/python/systemds/scuro/modality/joined_transformed.py
@@ -25,6 +25,7 @@ import numpy as np
from systemds.scuro.modality.modality import Modality
from systemds.scuro.representations.utils import pad_sequences
+from systemds.scuro.representations.window import WindowAggregation
class JoinedTransformedModality(Modality):
@@ -68,3 +69,9 @@ class JoinedTransformedModality(Modality):
self.data[i] = np.array(r)
self.data = pad_sequences(self.data)
return self
+
+ def window(self, window_size, aggregation):
+ w = WindowAggregation(window_size, aggregation)
+ self.left_modality.data = w.execute(self.left_modality)
+ self.right_modality.data = w.execute(self.right_modality)
+ return self
diff --git a/src/main/python/systemds/scuro/modality/modality.py
b/src/main/python/systemds/scuro/modality/modality.py
index cce26eee01..c110a24eba 100644
--- a/src/main/python/systemds/scuro/modality/modality.py
+++ b/src/main/python/systemds/scuro/modality/modality.py
@@ -18,28 +18,42 @@
# under the License.
#
# -------------------------------------------------------------
+from copy import deepcopy
from typing import List
import numpy as np
-from systemds.scuro.modality.type import ModalityType
+from systemds.scuro.modality.type import ModalityType, DataLayout
+from systemds.scuro.representations import utils
class Modality:
- def __init__(self, modalityType: ModalityType, metadata=None):
+ def __init__(self, modalityType: ModalityType, modality_id=-1,
metadata={}):
"""
Parent class of the different Modalities (unimodal & multimodal)
:param modality_type: Type of the modality
"""
self.modality_type = modalityType
self.schema = modalityType.get_schema()
+ self.metadata = metadata
self.data = []
self.data_type = None
self.cost = None
self.shape = None
- self.dataIndex = None
- self.metadata = metadata
+ self.modality_id = modality_id
+
+ @property
+ def data(self):
+ return self._data
+
+ @data.setter
+ def data(self, value):
+ """
+ This method ensures that the data layout in the metadata is updated
when the data changes
+ """
+ self._data = value
+ self.update_metadata()
def get_modality_names(self) -> List[str]:
"""
@@ -50,64 +64,66 @@ class Modality:
]
def copy_from_instance(self):
+ """
+ Create a copy of the modality instance
+ """
return type(self)(self.modality_type, self.metadata)
def update_metadata(self):
- md_copy = self.metadata
+ """
+ Updates the metadata of the modality (i.e.: updates timestamps)
+ """
+ if (
+ not self.has_metadata()
+ or not self.has_data()
+ or len(self.data) < len(self.metadata)
+ ):
+ return
+
+ md_copy = deepcopy(self.metadata)
self.metadata = {}
for i, (md_k, md_v) in enumerate(md_copy.items()):
updated_md = self.modality_type.update_metadata(md_v, self.data[i])
self.metadata[md_k] = updated_md
- def get_metadata_at_position(self, position: int):
- return self.metadata[self.dataIndex][position]
-
- def flatten(self):
+ def flatten(self, padding=True):
+ """
+ Flattens modality data by row-wise concatenation
+ Prerequisite for some ML-models
+ """
+ max_len = 0
for num_instance, instance in enumerate(self.data):
if type(instance) is np.ndarray:
self.data[num_instance] = instance.flatten()
- elif type(instance) is list:
+ elif isinstance(instance, List):
self.data[num_instance] = np.array(
[item for sublist in instance for item in sublist]
- )
-
+ ).flatten()
+ max_len = max(max_len, len(self.data[num_instance]))
+
+ if padding:
+ for i, instance in enumerate(self.data):
+ if isinstance(instance, np.ndarray):
+ if len(instance) < max_len:
+ padded_data = np.zeros(max_len, dtype=instance.dtype)
+ padded_data[: len(instance)] = instance
+ self.data[i] = padded_data
+ else:
+ padded_data = []
+ for entry in instance:
+ padded_data.append(utils.pad_sequences(entry, max_len))
+ self.data[i] = padded_data
self.data = np.array(self.data)
return self
def get_data_layout(self):
- if not self.data:
- return self.data
-
- if isinstance(self.data[0], list):
- return "list_of_lists_of_numpy_array"
- elif isinstance(self.data[0], np.ndarray):
- return "list_of_numpy_array"
-
- def get_data_shape(self):
- layout = self.get_data_layout()
- if not layout:
- return None
-
- if layout == "list_of_lists_of_numpy_array":
- return self.data[0][0].shape
- elif layout == "list_of_numpy_array":
- return self.data[0].shape
-
- def get_data_dtype(self):
- layout = self.get_data_layout()
- if not layout:
- return None
-
- if layout == "list_of_lists_of_numpy_array":
- return self.data[0][0].dtype
- elif layout == "list_of_numpy_array":
- return self.data[0].dtype
-
- def update_data_layout(self):
- if not self.data:
- return
+ if self.has_metadata():
+ return
list(self.metadata.values())[0]["data_layout"]["representation"]
+
+ return None
- self.schema["data_layout"]["representation"] = self.get_data_layout()
+ def has_data(self):
+ return self.data is not None and len(self.data) != 0
- self.shape = self.get_data_shape()
- self.schema["data_layout"]["type"] = self.get_data_dtype()
+ def has_metadata(self):
+ return self.metadata is not None and self.metadata != {}
diff --git a/src/main/python/systemds/scuro/representations/unimodal.py
b/src/main/python/systemds/scuro/modality/modality_identifier.py
similarity index 69%
copy from src/main/python/systemds/scuro/representations/unimodal.py
copy to src/main/python/systemds/scuro/modality/modality_identifier.py
index c56d611a74..95668c6e58 100644
--- a/src/main/python/systemds/scuro/representations/unimodal.py
+++ b/src/main/python/systemds/scuro/modality/modality_identifier.py
@@ -18,21 +18,24 @@
# under the License.
#
# -------------------------------------------------------------
+import os
+import pickle
+from typing import List, Dict, Any, Union
+import tempfile
from systemds.scuro.representations.representation import Representation
-class UnimodalRepresentation(Representation):
- def __init__(self, name):
- """
- Parent class for all unimodal representation types
- :param name: name of the representation
- """
- super().__init__(name)
+class ModalityIdentifier:
+ """ """
- def transform(self, data):
- raise f"Not implemented for {self.name}"
+ _instance = None
+ id = -1
+ def __new__(cls):
+ if not cls._instance:
+ cls._instance = super().__new__(cls)
+ return cls._instance
-class PixelRepresentation(UnimodalRepresentation):
- def __init__(self):
- super().__init__("Pixel")
+ def new_id(self): # TODO: make threadsafe when parallelizing
+ self.id += 1
+ return self.id
diff --git a/src/main/python/systemds/scuro/modality/transformed.py
b/src/main/python/systemds/scuro/modality/transformed.py
index 64bfba0819..2b4b049ef4 100644
--- a/src/main/python/systemds/scuro/modality/transformed.py
+++ b/src/main/python/systemds/scuro/modality/transformed.py
@@ -21,6 +21,7 @@
from functools import reduce
from operator import or_
+from systemds.scuro.modality.type import ModalityType
from systemds.scuro.modality.joined import JoinedModality
from systemds.scuro.modality.modality import Modality
from systemds.scuro.representations.window import WindowAggregation
@@ -28,25 +29,26 @@ from systemds.scuro.representations.window import
WindowAggregation
class TransformedModality(Modality):
- def __init__(self, modality_type, transformation, metadata):
+ def __init__(self, modality_type, transformation, modality_id, metadata):
"""
Parent class of the different Modalities (unimodal & multimodal)
:param modality_type: Type of the original modality(ies)
:param transformation: Representation to be applied on the modality
"""
- super().__init__(modality_type, metadata)
+ super().__init__(modality_type, modality_id, metadata)
self.transformation = transformation
- self.data = []
def copy_from_instance(self):
- return type(self)(self.modality_type, self.transformation,
self.metadata)
+ return type(self)(
+ self.modality_type, self.transformation, self.modality_id,
self.metadata
+ )
def join(self, right, join_condition):
chunked_execution = False
if type(right).__name__.__contains__("Unimodal"):
if right.data_loader.chunk_size:
chunked_execution = True
- elif right.data is None or len(right.data) == 0:
+ elif not right.has_data():
right.extract_raw_data()
joined_modality = JoinedModality(
@@ -59,24 +61,29 @@ class TransformedModality(Modality):
if not chunked_execution:
joined_modality.execute(0)
+ joined_modality.joined_right.update_metadata()
return joined_modality
- def window(self, windowSize, aggregationFunction, fieldName=None):
+ def window(self, windowSize, aggregation):
transformed_modality = TransformedModality(
- self.modality_type, "window", self.metadata
+ self.modality_type, "window", self.modality_id, self.metadata
)
- w = WindowAggregation(windowSize, aggregationFunction)
- transformed_modality.data = w.window(self)
+ w = WindowAggregation(windowSize, aggregation)
+ transformed_modality.data = w.execute(self)
return transformed_modality
- def apply_representation(self, representation, aggregation):
- new_modality = representation.transform(self)
+ def context(self, context_operator):
+ transformed_modality = TransformedModality(
+ self.modality_type, context_operator.name, self.modality_id,
self.metadata
+ )
- if aggregation:
- new_modality.data = aggregation.window(new_modality)
+ transformed_modality.data = context_operator.execute(self)
+ return transformed_modality
+ def apply_representation(self, representation):
+ new_modality = representation.transform(self)
new_modality.update_metadata()
return new_modality
@@ -87,8 +94,9 @@ class TransformedModality(Modality):
:param fusion_method: The fusion method to be used to combine
modalities
"""
fused_modality = TransformedModality(
- reduce(or_, (o.modality_type for o in other), self.modality_type),
+ ModalityType.EMBEDDING,
fusion_method,
+ self.modality_id,
self.metadata,
)
modalities = [self]
diff --git a/src/main/python/systemds/scuro/modality/type.py
b/src/main/python/systemds/scuro/modality/type.py
index 197ad23c54..4b59c263d6 100644
--- a/src/main/python/systemds/scuro/modality/type.py
+++ b/src/main/python/systemds/scuro/modality/type.py
@@ -18,7 +18,10 @@
# under the License.
#
# -------------------------------------------------------------
-from enum import Flag, auto
+from enum import Flag, auto, Enum
+from copy import deepcopy
+import numpy as np
+
from systemds.scuro.utils.schema_helpers import (
calculate_new_frequency,
create_timestamps,
@@ -28,25 +31,40 @@ from systemds.scuro.utils.schema_helpers import (
# TODO: needs a way to define if data comes from a dataset with multiple
instances or is like a streaming scenario where we only have one instance
# right now it is a list of instances (if only one instance the list would
contain only a single item)
class ModalitySchemas:
- TEXT_SCHEMA = {"type": "string", "length": "int"}
+ BASE_SCHEMA = {"data_layout": {"type": "?", "representation": "?",
"shape": "?"}}
- AUDIO_SCHEMA = {
+ TEMPORAL_BASE_SCHEMA = {
+ **BASE_SCHEMA,
"timestamp": "array",
- "data_layout": {"type": "?", "representation": "?"},
- "sample_rate": "integer",
"length": "integer",
+ "frequency": "float",
}
+ TEXT_SCHEMA = {**BASE_SCHEMA, "length": "int"}
+
+ EMBEDDING_SCHEMA = {**BASE_SCHEMA, "length": "int"}
+
+ AUDIO_SCHEMA = {**TEMPORAL_BASE_SCHEMA, "frequency": "integer"}
+
VIDEO_SCHEMA = {
- "timestamp": "array",
- "data_layout": {"type": "?", "representation": "?"},
- "fps": "integer",
- "length": "integer",
+ **TEMPORAL_BASE_SCHEMA,
"width": "integer",
"height": "integer",
"num_channels": "integer",
}
+ IMAGE_SCHEMA = {
+ **BASE_SCHEMA,
+ "width": "integer",
+ "height": "integer",
+ "num_channels": "integer",
+ }
+
+ TIMESERIES_SCHEMA = {
+ **TEMPORAL_BASE_SCHEMA,
+ "num_columns": "integer",
+ }
+
_metadata_handlers = {}
@classmethod
@@ -67,10 +85,31 @@ class ModalitySchemas:
@classmethod
def update_metadata(cls, name, md, data):
+ md = cls.update_base_metadata(md, data)
mdHandler = cls._metadata_handlers.get(name)
if mdHandler:
return mdHandler(md, data)
+ return md
+
+ @classmethod
+ def update_base_metadata(cls, md, data, data_is_single_instance=True):
+ data_layout = DataLayout.get_data_layout(data, data_is_single_instance)
+
+ dtype = np.nan
+ shape = None
+ if data_layout is DataLayout.SINGLE_LEVEL:
+ dtype = data.dtype
+ shape = data.shape
+ elif data_layout is DataLayout.NESTED_LEVEL:
+ shape = data[0].shape
+ dtype = data[0].dtype
+
+ md["data_layout"].update(
+ {"representation": data_layout, "type": dtype, "shape": shape}
+ )
+ return md
+
def extract_data(self, data, index):
if self.get("data_layout").get("representation") == "list_array":
return data[index]
@@ -80,11 +119,11 @@ class ModalitySchemas:
@ModalitySchemas.register_metadata_handler("AUDIO")
def handle_audio_metadata(md, data):
- new_frequency = calculate_new_frequency(len(data), md["length"],
md["sample_rate"])
+ new_frequency = calculate_new_frequency(len(data), md["length"],
md["frequency"])
md.update(
{
"length": len(data),
- "sample_rate": new_frequency,
+ "frequency": new_frequency,
"timestamp": create_timestamps(new_frequency, len(data)),
}
)
@@ -93,24 +132,112 @@ def handle_audio_metadata(md, data):
@ModalitySchemas.register_metadata_handler("VIDEO")
def handle_video_metadata(md, data):
- new_frequency = calculate_new_frequency(len(data), md["length"], md["fps"])
+ new_frequency = calculate_new_frequency(len(data), md["length"],
md["frequency"])
+ md.update(
+ {
+ "length": len(data),
+ "frequency": new_frequency,
+ "timestamp": create_timestamps(new_frequency, len(data)),
+ }
+ )
+ return md
+
+
[email protected]_metadata_handler("IMAGE")
+def handle_image_metadata(md, data):
+ md.update(
+ {
+ "width": data.shape[1] if isinstance(data, np.ndarray) else 1,
+ "height": data.shape[0] if isinstance(data, np.ndarray) else
len(data),
+ "num_channels": 1, # if data.ndim <= 2 else data.shape[2],
+ }
+ )
+ return md
+
+
[email protected]_metadata_handler("TIMESERIES")
+def handle_timeseries_metadata(md, data):
+ new_frequency = calculate_new_frequency(len(data), md["length"],
md["frequency"])
md.update(
{
"length": len(data),
- "fps": new_frequency,
+ "num_columns": (
+ data.shape[1] if isinstance(data, np.ndarray) and data.ndim >
1 else 1
+ ),
+ "frequency": new_frequency,
"timestamp": create_timestamps(new_frequency, len(data)),
}
)
return md
[email protected]_metadata_handler("TEXT")
+def handle_text_metadata(md, data):
+ md.update({"length": len(data)})
+ return md
+
+
class ModalityType(Flag):
TEXT = auto()
AUDIO = auto()
VIDEO = auto()
+ IMAGE = auto()
+ TIMESERIES = auto()
+ EMBEDDING = auto()
def get_schema(self):
return ModalitySchemas.get(self.name)
def update_metadata(self, md, data):
return ModalitySchemas.update_metadata(self.name, md, data)
+
+ def create_audio_metadata(self, sampling_rate, data):
+ md = deepcopy(self.get_schema())
+ md = ModalitySchemas.update_base_metadata(md, data, True)
+ md["frequency"] = sampling_rate
+ md["length"] = data.shape[0]
+ md["timestamp"] = create_timestamps(sampling_rate, md["length"])
+ return md
+
+ def create_text_metadata(self, length, data):
+ md = deepcopy(self.get_schema())
+ md["data_layout"]["representation"] = DataLayout.SINGLE_LEVEL
+ md["data_layout"]["shape"] = (1, length)
+ md["data_layout"]["type"] = str
+ md["length"] = length
+ return md
+
+ def create_video_metadata(self, frequency, length, width, height,
num_channels):
+ md = deepcopy(self.get_schema())
+ md["frequency"] = frequency
+ md["length"] = length
+ md["width"] = width
+ md["height"] = height
+ md["num_channels"] = num_channels
+ md["timestamp"] = create_timestamps(frequency, length)
+ md["data_layout"]["representation"] = DataLayout.NESTED_LEVEL
+ md["data_layout"]["type"] = float
+ md["data_layout"]["shape"] = (width, height, num_channels)
+
+ return md
+
+
+class DataLayout(Enum):
+ SINGLE_LEVEL = 1
+ NESTED_LEVEL = 2
+
+ @classmethod
+ def get_data_layout(cls, data, data_is_single_instance):
+ if data is None or len(data) == 0:
+ return None
+
+ if data_is_single_instance:
+ if isinstance(data, list):
+ return DataLayout.NESTED_LEVEL
+ elif isinstance(data, np.ndarray):
+ return DataLayout.SINGLE_LEVEL
+
+ if isinstance(data[0], list):
+ return DataLayout.NESTED_LEVEL
+ elif isinstance(data[0], np.ndarray):
+ return DataLayout.SINGLE_LEVEL
diff --git a/src/main/python/systemds/scuro/modality/unimodal_modality.py
b/src/main/python/systemds/scuro/modality/unimodal_modality.py
index ae33b6605b..6173237e0a 100644
--- a/src/main/python/systemds/scuro/modality/unimodal_modality.py
+++ b/src/main/python/systemds/scuro/modality/unimodal_modality.py
@@ -27,21 +27,22 @@ from systemds.scuro.modality.modality import Modality
from systemds.scuro.modality.joined import JoinedModality
from systemds.scuro.modality.transformed import TransformedModality
from systemds.scuro.modality.type import ModalityType
+from systemds.scuro.modality.modality_identifier import ModalityIdentifier
class UnimodalModality(Modality):
- def __init__(self, data_loader: BaseLoader, modality_type: ModalityType):
+ def __init__(self, data_loader: BaseLoader):
"""
- This class represents a unimodal modality.
+ This class represents an unimodal modality.
:param data_loader: Defines how the raw data should be loaded
:param modality_type: Type of the modality
"""
- super().__init__(modality_type, None)
+ super().__init__(data_loader.modality_type,
ModalityIdentifier().new_id(), None)
self.data_loader = data_loader
def copy_from_instance(self):
- new_instance = type(self)(self.data_loader, self.modality_type)
+ new_instance = type(self)(self.data_loader)
if self.metadata:
new_instance.metadata = self.metadata.copy()
return new_instance
@@ -73,20 +74,46 @@ class UnimodalModality(Modality):
self.data_loader.chunk_size is not None,
)
+ if self.data_loader.chunk_size is None:
+ self.extract_raw_data()
+ joined_modality.execute(0)
+ joined_modality.joined_right.update_metadata()
+
return joined_modality
- def apply_representation(self, representation, aggregation=None):
+ def context(self, context_operator):
+ if not self.has_data():
+ self.extract_raw_data()
+
+ transformed_modality = TransformedModality(
+ self.modality_type, context_operator.name, self.modality_id,
self.metadata
+ )
+
+ transformed_modality.data = context_operator.execute(self)
+ return transformed_modality
+
+ def aggregate(self, aggregation_function):
+ if self.data is None:
+ raise Exception("Data is None")
+
+ def apply_representations(self, representations):
+ # TODO
+ pass
+
+ def apply_representation(self, representation):
new_modality = TransformedModality(
- self.modality_type, representation.name,
self.data_loader.metadata.copy()
+ self.modality_type,
+ representation.name,
+ self.modality_id,
+ self.data_loader.metadata.copy(),
)
new_modality.data = []
if self.data_loader.chunk_size:
+ self.data_loader.reset()
while self.data_loader.next_chunk < self.data_loader.num_chunks:
self.extract_raw_data()
transformed_chunk = representation.transform(self)
- if aggregation:
- transformed_chunk.data =
aggregation.window(transformed_chunk)
new_modality.data.extend(transformed_chunk.data)
new_modality.metadata.update(transformed_chunk.metadata)
else:
@@ -94,8 +121,5 @@ class UnimodalModality(Modality):
self.extract_raw_data()
new_modality = representation.transform(self)
- if aggregation:
- new_modality.data = aggregation.window(new_modality)
-
new_modality.update_metadata()
return new_modality
diff --git a/src/main/python/systemds/scuro/representations/aggregate.py
b/src/main/python/systemds/scuro/representations/aggregate.py
index 7c8d1c68d1..4b4545ef47 100644
--- a/src/main/python/systemds/scuro/representations/aggregate.py
+++ b/src/main/python/systemds/scuro/representations/aggregate.py
@@ -21,31 +21,77 @@
import numpy as np
from systemds.scuro.modality.modality import Modality
+from systemds.scuro.representations import utils
-# TODO: make this a Representation and add a fusion method that fuses two
modalities with each other
+class Aggregation:
+ @staticmethod
+ def _mean_agg(data):
+ return np.mean(data, axis=0)
+ @staticmethod
+ def _max_agg(data):
+ return np.max(data, axis=0)
-class Aggregation:
- def __init__(self, aggregation_function, field_name):
- self.aggregation_function = aggregation_function
- self.field_name = field_name
+ @staticmethod
+ def _min_agg(data):
+ return np.min(data, axis=0)
+
+ @staticmethod
+ def _sum_agg(data):
+ return np.sum(data, axis=0)
+
+ _aggregation_function = {
+ "mean": _mean_agg.__func__,
+ "max": _max_agg.__func__,
+ "min": _min_agg.__func__,
+ "sum": _sum_agg.__func__,
+ }
+
+ def __init__(self, aggregation_function="mean", pad_modality=False):
+ if aggregation_function not in self._aggregation_function.keys():
+ raise ValueError("Invalid aggregation function")
+ self._aggregation_func =
self._aggregation_function[aggregation_function]
+ self.name = "Aggregation"
+ self.pad_modality = pad_modality
- def aggregate(self, modality):
- aggregated_modality = Modality(modality.modality_type,
modality.metadata)
+ def execute(self, modality):
+ aggregated_modality = Modality(
+ modality.modality_type, modality.modality_id, modality.metadata
+ )
aggregated_modality.data = []
+ max_len = 0
for i, instance in enumerate(modality.data):
aggregated_modality.data.append([])
- for j, entry in enumerate(instance):
- if self.aggregation_function == "sum":
- aggregated_modality.data[i].append(np.sum(entry, axis=0))
- elif self.aggregation_function == "mean":
- aggregated_modality.data[i].append(np.mean(entry, axis=0))
- elif self.aggregation_function == "min":
- aggregated_modality.data[i].append(np.min(entry, axis=0))
- elif self.aggregation_function == "max":
- aggregated_modality.data[i].append(np.max(entry, axis=0))
+ if isinstance(instance, np.ndarray):
+ aggregated_data = self._aggregation_func(instance)
+ else:
+ aggregated_data = []
+ for entry in instance:
+ aggregated_data.append(self._aggregation_func(entry))
+ max_len = max(max_len, len(aggregated_data))
+ aggregated_modality.data[i] = aggregated_data
+
+ if self.pad_modality:
+ for i, instance in enumerate(aggregated_modality.data):
+ if isinstance(instance, np.ndarray):
+ if len(instance) < max_len:
+ padded_data = np.zeros(max_len, dtype=instance.dtype)
+ padded_data[: len(instance)] = instance
+ aggregated_modality.data[i] = padded_data
else:
- raise ValueError("Invalid aggregation function")
+ padded_data = []
+ for entry in instance:
+ padded_data.append(utils.pad_sequences(entry, max_len))
+ aggregated_modality.data[i] = padded_data
return aggregated_modality
+
+ def transform(self, modality):
+ return self.execute(modality)
+
+ def aggregate_instance(self, instance):
+ return self._aggregation_func(instance)
+
+ def get_aggregation_functions(self):
+ return self._aggregation_function.keys()
diff --git a/src/main/python/systemds/scuro/representations/bert.py
b/src/main/python/systemds/scuro/representations/bert.py
index bfaaa22642..6395d0b9e6 100644
--- a/src/main/python/systemds/scuro/representations/bert.py
+++ b/src/main/python/systemds/scuro/representations/bert.py
@@ -26,17 +26,20 @@ from systemds.scuro.representations.unimodal import
UnimodalRepresentation
import torch
from transformers import BertTokenizer, BertModel
from systemds.scuro.representations.utils import save_embeddings
+from systemds.scuro.modality.type import ModalityType
class Bert(UnimodalRepresentation):
- def __init__(self, output_file=None):
- super().__init__("Bert")
+ def __init__(self, model_name="bert", output_file=None):
+ parameters = {"model_name": "bert"}
+ self.model_name = model_name
+ super().__init__("Bert", ModalityType.EMBEDDING, parameters)
self.output_file = output_file
def transform(self, modality):
transformed_modality = TransformedModality(
- modality.modality_type, self, modality.metadata
+ modality.modality_type, self, modality.modality_id,
modality.metadata
)
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(
@@ -46,7 +49,7 @@ class Bert(UnimodalRepresentation):
model = BertModel.from_pretrained(model_name)
embeddings = self.create_embeddings(modality.data, model, tokenizer)
-
+ embeddings = [embeddings[i : i + 1] for i in
range(embeddings.shape[0])]
if self.output_file is not None:
save_embeddings(embeddings, self.output_file)
diff --git a/src/main/python/systemds/scuro/representations/bow.py
b/src/main/python/systemds/scuro/representations/bow.py
index f16f6ec04d..52fddc7d3f 100644
--- a/src/main/python/systemds/scuro/representations/bow.py
+++ b/src/main/python/systemds/scuro/representations/bow.py
@@ -25,23 +25,27 @@ from systemds.scuro.modality.transformed import
TransformedModality
from systemds.scuro.representations.unimodal import UnimodalRepresentation
from systemds.scuro.representations.utils import save_embeddings
+from systemds.scuro.modality.type import ModalityType
+
class BoW(UnimodalRepresentation):
- def __init__(self, ngram_range, min_df, output_file=None):
- super().__init__("BoW")
+ def __init__(self, ngram_range=2, min_df=2, output_file=None):
+ parameters = {"ngram_range": [ngram_range], "min_df": [min_df]}
+ super().__init__("BoW", ModalityType.EMBEDDING, parameters)
self.ngram_range = ngram_range
self.min_df = min_df
self.output_file = output_file
def transform(self, modality):
transformed_modality = TransformedModality(
- modality.modality_type, self, modality.metadata
+ modality.modality_type, self, modality.modality_id,
modality.metadata
)
vectorizer = CountVectorizer(
ngram_range=(1, self.ngram_range), min_df=self.min_df
)
X = vectorizer.fit_transform(modality.data).toarray()
+ X = [X[i : i + 1] for i in range(X.shape[0])]
if self.output_file is not None:
save_embeddings(X, self.output_file)
diff --git a/src/main/python/systemds/scuro/representations/unimodal.py
b/src/main/python/systemds/scuro/representations/context.py
similarity index 60%
copy from src/main/python/systemds/scuro/representations/unimodal.py
copy to src/main/python/systemds/scuro/representations/context.py
index c56d611a74..4cbcf54f8e 100644
--- a/src/main/python/systemds/scuro/representations/unimodal.py
+++ b/src/main/python/systemds/scuro/representations/context.py
@@ -18,21 +18,26 @@
# under the License.
#
# -------------------------------------------------------------
+import abc
+from typing import List
+
+from systemds.scuro.modality.modality import Modality
from systemds.scuro.representations.representation import Representation
-class UnimodalRepresentation(Representation):
- def __init__(self, name):
+class Context(Representation):
+ def __init__(self, name, parameters=None):
"""
- Parent class for all unimodal representation types
- :param name: name of the representation
+ Parent class for different context operations
+ :param name: Name of the context operator
"""
- super().__init__(name)
-
- def transform(self, data):
- raise f"Not implemented for {self.name}"
-
+ super().__init__(name, parameters)
-class PixelRepresentation(UnimodalRepresentation):
- def __init__(self):
- super().__init__("Pixel")
+ @abc.abstractmethod
+ def execute(self, modality: Modality):
+ """
+ Implemented for every child class and creates a contextualized
representation for a given modality
+ :param modality: modality to use
+ :return: contextualized data
+ """
+ raise f"Not implemented for Context Operator: {self.name}"
diff --git a/src/main/python/systemds/scuro/representations/fusion.py
b/src/main/python/systemds/scuro/representations/fusion.py
index 623979dd05..773452371b 100644
--- a/src/main/python/systemds/scuro/representations/fusion.py
+++ b/src/main/python/systemds/scuro/representations/fusion.py
@@ -20,17 +20,19 @@
# -------------------------------------------------------------
from typing import List
+import numpy as np
+
from systemds.scuro.modality.modality import Modality
from systemds.scuro.representations.representation import Representation
class Fusion(Representation):
- def __init__(self, name):
+ def __init__(self, name, parameters=None):
"""
Parent class for different multimodal fusion types
:param name: Name of the fusion type
"""
- super().__init__(name)
+ super().__init__(name, parameters)
def transform(self, modalities: List[Modality]):
"""
@@ -47,10 +49,20 @@ class Fusion(Representation):
:param modalities: List of modalities
:return: maximum embedding size
"""
- max_size = modalities[0].data.shape[1]
+ if isinstance(modalities[0].data[0], list):
+ max_size = modalities[0].data[0][0].shape[1]
+ elif isinstance(modalities[0].data, np.ndarray):
+ max_size = modalities[0].data.shape[1]
+ else:
+ max_size = modalities[0].data[0].shape[1]
for idx in range(1, len(modalities)):
- curr_shape = modalities[idx].data.shape
- if len(modalities[idx - 1].data) != curr_shape[0]:
+ if isinstance(modalities[idx].data[0], list):
+ curr_shape = modalities[idx].data[0][0].shape
+ elif isinstance(modalities[idx].data, np.ndarray):
+ curr_shape = modalities[idx].data.shape
+ else:
+ curr_shape = modalities[idx].data[0].shape
+ if len(modalities[idx - 1].data) != len(modalities[idx].data):
raise f"Modality sizes don't match!"
elif curr_shape[1] > max_size:
max_size = curr_shape[1]
diff --git a/src/main/python/systemds/scuro/representations/glove.py
b/src/main/python/systemds/scuro/representations/glove.py
index 767fc8d375..7bb586dc99 100644
--- a/src/main/python/systemds/scuro/representations/glove.py
+++ b/src/main/python/systemds/scuro/representations/glove.py
@@ -24,6 +24,7 @@ from gensim.utils import tokenize
from systemds.scuro.representations.unimodal import UnimodalRepresentation
from systemds.scuro.representations.utils import read_data_from_file,
save_embeddings
+from systemds.scuro.modality.type import ModalityType
def load_glove_embeddings(file_path):
@@ -39,7 +40,7 @@ def load_glove_embeddings(file_path):
class GloVe(UnimodalRepresentation):
def __init__(self, glove_path, output_file=None):
- super().__init__("GloVe")
+ super().__init__("GloVe", ModalityType.TEXT)
self.glove_path = glove_path
self.output_file = output_file
diff --git a/src/main/python/systemds/scuro/representations/mel_spectrogram.py
b/src/main/python/systemds/scuro/representations/mel_spectrogram.py
index 483ea181b8..dfff4f3b7e 100644
--- a/src/main/python/systemds/scuro/representations/mel_spectrogram.py
+++ b/src/main/python/systemds/scuro/representations/mel_spectrogram.py
@@ -21,19 +21,27 @@
import librosa
import numpy as np
+from systemds.scuro.modality.type import ModalityType
from systemds.scuro.modality.transformed import TransformedModality
-# import matplotlib.pyplot as plt
from systemds.scuro.representations.unimodal import UnimodalRepresentation
class MelSpectrogram(UnimodalRepresentation):
- def __init__(self):
- super().__init__("MelSpectrogram")
+ def __init__(self, n_mels=128, hop_length=512, n_fft=2048):
+ parameters = {
+ "n_mels": [20, 32, 64, 128],
+ "hop_length": [256, 512, 1024, 2048],
+ "n_fft": [1024, 2048, 4096],
+ }
+ super().__init__("MelSpectrogram", ModalityType.TIMESERIES, parameters)
+ self.n_mels = n_mels
+ self.hop_length = hop_length
+ self.n_fft = n_fft
def transform(self, modality):
transformed_modality = TransformedModality(
- modality.modality_type, self, modality.metadata
+ self.output_modality_type, self, modality.modality_id,
modality.metadata
)
result = []
max_length = 0
@@ -46,12 +54,3 @@ class MelSpectrogram(UnimodalRepresentation):
transformed_modality.data = result
return transformed_modality
-
- # def plot_spectrogram(self, spectrogram):
- # plt.figure(figsize=(10, 4))
- # librosa.display.specshow(
- # spectrogram, x_axis="time", y_axis="mel", sr=22050,
cmap="viridis"
- # )
- # plt.colorbar(format="%+2.0f dB")
- # plt.title("Mel Spectrogram")
- # plt.savefig("spectrogram.jpg")
diff --git a/src/main/python/systemds/scuro/representations/representation.py
b/src/main/python/systemds/scuro/representations/representation.py
index 92967ed9c5..a9f283b6fe 100644
--- a/src/main/python/systemds/scuro/representations/representation.py
+++ b/src/main/python/systemds/scuro/representations/representation.py
@@ -18,8 +18,24 @@
# under the License.
#
# -------------------------------------------------------------
+from abc import abstractmethod
class Representation:
- def __init__(self, name):
+ def __init__(self, name, parameters):
self.name = name
+ self._parameters = parameters
+
+ @property
+ def parameters(self):
+ return self._parameters
+
+ def get_current_parameters(self):
+ current_params = {}
+ for parameter in self.parameters.keys():
+ current_params[parameter] = getattr(self, parameter)
+ return current_params
+
+ def set_parameters(self, parameters):
+ for parameter in parameters:
+ setattr(self, parameter, parameters[parameter])
diff --git a/src/main/python/systemds/scuro/representations/resnet.py
b/src/main/python/systemds/scuro/representations/resnet.py
index ff63e6766b..60eed9ea12 100644
--- a/src/main/python/systemds/scuro/representations/resnet.py
+++ b/src/main/python/systemds/scuro/representations/resnet.py
@@ -27,6 +27,7 @@ import torch
import torchvision.models as models
import torchvision.transforms as transforms
import numpy as np
+from systemds.scuro.modality.type import ModalityType
if torch.backends.mps.is_available():
DEVICE = torch.device("mps")
@@ -38,7 +39,11 @@ else:
class ResNet(UnimodalRepresentation):
def __init__(self, layer="avgpool", model_name="ResNet18",
output_file=None):
- super().__init__("ResNet")
+ self.model_name = model_name
+ parameters = self._get_parameters()
+ super().__init__(
+ "ResNet", ModalityType.TIMESERIES, parameters
+ ) # TODO: TIMESERIES only for videos - images would be handled as
EMBEDDIGN
self.output_file = output_file
self.layer_name = layer
@@ -82,6 +87,25 @@ class ResNet(UnimodalRepresentation):
else:
raise NotImplementedError
+ def _get_parameters(self, high_level=True):
+ parameters = {"model_name": [], "layer_name": []}
+ for m in ["ResNet18", "ResNet34", "ResNet50", "ResNet101",
"ResNet152"]:
+ parameters["model_name"].append(m)
+
+ if high_level:
+ parameters["layer_name"] = [
+ "conv1",
+ "layer1",
+ "layer2",
+ "layer3",
+ "layer4",
+ "avgpool",
+ ]
+ else:
+ for name, layer in self.model.named_modules():
+ parameters["layer_name"].append(name)
+ return parameters
+
def transform(self, modality):
t = transforms.Compose(
@@ -135,11 +159,13 @@ class ResNet(UnimodalRepresentation):
torch.flatten(pooled, 1).detach().cpu().numpy()
)
+ embeddings[video_id] = np.array(embeddings[video_id])
+
transformed_modality = TransformedModality(
- modality.modality_type, "resnet", modality.metadata
+ self.output_modality_type, "resnet", modality.modality_id,
modality.metadata
)
+
transformed_modality.data = list(embeddings.values())
- transformed_modality.update_data_layout()
return transformed_modality
diff --git a/src/main/python/systemds/scuro/representations/tfidf.py
b/src/main/python/systemds/scuro/representations/tfidf.py
index 02cfb927c7..30a6655150 100644
--- a/src/main/python/systemds/scuro/representations/tfidf.py
+++ b/src/main/python/systemds/scuro/representations/tfidf.py
@@ -18,27 +18,32 @@
# under the License.
#
# -------------------------------------------------------------
+import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from systemds.scuro.modality.transformed import TransformedModality
from systemds.scuro.representations.unimodal import UnimodalRepresentation
-from systemds.scuro.representations.utils import read_data_from_file,
save_embeddings
+from systemds.scuro.representations.utils import save_embeddings
+
+from systemds.scuro.modality.type import ModalityType
class TfIdf(UnimodalRepresentation):
- def __init__(self, min_df, output_file=None):
- super().__init__("TF-IDF")
+ def __init__(self, min_df=2, output_file=None):
+ parameters = {"min_df": [min_df]}
+ super().__init__("TF-IDF", ModalityType.EMBEDDING, parameters)
self.min_df = min_df
self.output_file = output_file
def transform(self, modality):
transformed_modality = TransformedModality(
- modality.modality_type, self, modality.metadata
+ modality.modality_type, self, modality.modality_id,
modality.metadata
)
+
vectorizer = TfidfVectorizer(min_df=self.min_df)
X = vectorizer.fit_transform(modality.data)
- X = X.toarray()
+ X = [np.array(x).reshape(1, -1) for x in X.toarray()]
if self.output_file is not None:
save_embeddings(X, self.output_file)
diff --git a/src/main/python/systemds/scuro/representations/unimodal.py
b/src/main/python/systemds/scuro/representations/unimodal.py
index c56d611a74..559eec1401 100644
--- a/src/main/python/systemds/scuro/representations/unimodal.py
+++ b/src/main/python/systemds/scuro/representations/unimodal.py
@@ -18,17 +18,25 @@
# under the License.
#
# -------------------------------------------------------------
+import abc
+
from systemds.scuro.representations.representation import Representation
class UnimodalRepresentation(Representation):
- def __init__(self, name):
+ def __init__(self, name: str, output_modality_type, parameters=None):
"""
Parent class for all unimodal representation types
:param name: name of the representation
+ :param parameters: parameters of the representation; name of the
parameter and
+ possible parameter values
"""
- super().__init__(name)
+ super().__init__(name, parameters)
+ self.output_modality_type = output_modality_type
+ if parameters is None:
+ parameters = {}
+ @abc.abstractmethod
def transform(self, data):
raise f"Not implemented for {self.name}"
diff --git a/src/main/python/systemds/scuro/representations/window.py
b/src/main/python/systemds/scuro/representations/window.py
index af0301d0e3..264d40ca42 100644
--- a/src/main/python/systemds/scuro/representations/window.py
+++ b/src/main/python/systemds/scuro/representations/window.py
@@ -21,29 +21,67 @@
import numpy as np
import math
+from systemds.scuro.modality.type import DataLayout
-# TODO: move this into the aggregation class and add an aggregate() and a
window(window_size) function there so they can use the same functionality.
-class WindowAggregation:
- def __init__(self, window_size, aggregation_function):
+# from systemds.scuro.drsearch.operator_registry import
register_context_operator
+from systemds.scuro.representations.aggregate import Aggregation
+from systemds.scuro.representations.context import Context
+
+
+# @register_context_operator()
+class WindowAggregation(Context):
+ def __init__(self, window_size=10, aggregation_function="mean"):
+ parameters = {
+ "window_size": [window_size],
+ "aggregation_function":
list(Aggregation().get_aggregation_functions()),
+ } # TODO: window_size should be dynamic and adapted to the shape of
the data
+ super().__init__("WindowAggregation", parameters)
self.window_size = window_size
self.aggregation_function = aggregation_function
- def window(self, modality):
- # data is a 2d array
+ @property
+ def aggregation_function(self):
+ return self._aggregation_function
+
+ @aggregation_function.setter
+ def aggregation_function(self, value):
+ self._aggregation_function = Aggregation(value)
+
+ def execute(self, modality):
windowed_data = []
for instance in modality.data:
- window_length = math.ceil(len(instance) / self.window_size)
- result = [[] for _ in range(0, window_length)]
- # if modality.schema["data_layout"]["representation"] ==
"list_of_lists_of_numpy_array":
- data = np.stack(instance)
- for i in range(0, window_length):
- result[i] = np.mean(
- data[
- i * self.window_size : i * self.window_size +
self.window_size
- ],
- axis=0,
- ) # TODO: add actual aggregation function here
-
- windowed_data.append(result)
+ new_length = math.ceil(len(instance) / self.window_size)
+ if modality.get_data_layout() == DataLayout.SINGLE_LEVEL:
+ windowed_instance = self.window_aggregate_single_level(
+ instance, new_length
+ )
+ else:
+ windowed_instance = self.window_aggregate_nested_level(
+ instance, new_length
+ )
+
+ windowed_data.append(windowed_instance)
return windowed_data
+
+ def window_aggregate_single_level(self, instance, new_length):
+ num_cols = instance.shape[1] if instance.ndim > 1 else 1
+ result = np.empty((new_length, num_cols))
+ for i in range(0, new_length):
+ result[i] = self.aggregation_function.aggregate_instance(
+ instance[i * self.window_size : i * self.window_size +
self.window_size]
+ )
+
+ if num_cols == 1:
+ result = result.reshape(-1)
+ return result
+
+ def window_aggregate_nested_level(self, instance, new_length):
+ result = [[] for _ in range(0, new_length)]
+ data = np.stack(instance)
+ for i in range(0, new_length):
+ result[i] = self.aggregation_function.aggregate_instance(
+ data[i * self.window_size : i * self.window_size +
self.window_size]
+ )
+
+ return result
diff --git a/src/main/python/systemds/scuro/representations/word2vec.py
b/src/main/python/systemds/scuro/representations/word2vec.py
index b68a9fd3eb..929dbd4415 100644
--- a/src/main/python/systemds/scuro/representations/word2vec.py
+++ b/src/main/python/systemds/scuro/representations/word2vec.py
@@ -25,6 +25,11 @@ from systemds.scuro.representations.utils import
save_embeddings
from gensim.models import Word2Vec
from gensim.utils import tokenize
+from systemds.scuro.modality.type import ModalityType
+import nltk
+
+nltk.download("punkt_tab")
+
def get_embedding(sentence, model):
vectors = []
@@ -36,8 +41,13 @@ def get_embedding(sentence, model):
class W2V(UnimodalRepresentation):
- def __init__(self, vector_size, min_count, window, output_file=None):
- super().__init__("Word2Vec")
+ def __init__(self, vector_size=3, min_count=2, window=2, output_file=None):
+ parameters = {
+ "vector_size": [vector_size],
+ "min_count": [min_count],
+ "window": [window],
+ } # TODO
+ super().__init__("Word2Vec", ModalityType.EMBEDDING, parameters)
self.vector_size = vector_size
self.min_count = min_count
self.window = window
@@ -45,7 +55,7 @@ class W2V(UnimodalRepresentation):
def transform(self, modality):
transformed_modality = TransformedModality(
- modality.modality_type, self, modality.metadata
+ modality.modality_type, self, modality.modality_id,
modality.metadata
)
t = [list(tokenize(s.lower())) for s in modality.data]
model = Word2Vec(
@@ -57,7 +67,7 @@ class W2V(UnimodalRepresentation):
embeddings = []
for sentences in modality.data:
tokens = list(tokenize(sentences.lower()))
- embeddings.append(get_embedding(tokens, model))
+ embeddings.append(np.array(get_embedding(tokens,
model)).reshape(1, -1))
if self.output_file is not None:
save_embeddings(np.array(embeddings), self.output_file)
diff --git a/src/main/python/tests/scuro/data_generator.py
b/src/main/python/tests/scuro/data_generator.py
index ec0783df9c..48ff208e43 100644
--- a/src/main/python/tests/scuro/data_generator.py
+++ b/src/main/python/tests/scuro/data_generator.py
@@ -26,10 +26,54 @@ from scipy.io.wavfile import write
import random
import os
-from systemds.scuro import VideoLoader, AudioLoader, TextLoader,
UnimodalModality
+from systemds.scuro import (
+ VideoLoader,
+ AudioLoader,
+ TextLoader,
+ UnimodalModality,
+ TransformedModality,
+)
from systemds.scuro.modality.type import ModalityType
+class ModalityRandomDataGenerator:
+
+ def __init__(self):
+ self._modality_id = 0
+
+ def create1DModality(
+ self,
+ num_instances,
+ num_features,
+ modality_type,
+ ):
+ data = np.random.rand(num_instances, num_features)
+ # TODO: write a dummy method to create the same metadata for all
instances to avoid the for loop
+ metadata = {}
+ for i in range(num_instances):
+ if modality_type == ModalityType.AUDIO:
+ metadata[i] = modality_type.create_audio_metadata(
+ num_features / 10, data[i]
+ )
+ elif modality_type == ModalityType.TEXT:
+ metadata[i] = modality_type.create_text_metadata(
+ num_features / 10, data[i]
+ )
+ elif modality_type == ModalityType.VIDEO:
+ metadata[i] = modality_type.create_video_metadata(
+ num_features / 30, 10, 0, 0, 1
+ )
+ else:
+ raise NotImplementedError
+
+ tf_modality = TransformedModality(
+ modality_type, "test_transformation", self._modality_id, metadata
+ )
+ tf_modality.data = data
+ self._modality_id += 1
+ return tf_modality
+
+
def setup_data(modalities, num_instances, path):
if os.path.isdir(path):
shutil.rmtree(path)
@@ -51,7 +95,7 @@ def setup_data(modalities, num_instances, path):
else:
raise "Modality not supported in DataGenerator"
- modalities_to_create.append(UnimodalModality(data_loader, modality))
+ modalities_to_create.append(UnimodalModality(data_loader))
data_generator = TestDataGenerator(modalities_to_create, path)
data_generator.create_multimodal_data(num_instances)
diff --git a/src/main/python/tests/scuro/test_data_loaders.py
b/src/main/python/tests/scuro/test_data_loaders.py
index 4ca77b205d..85da2919a0 100644
--- a/src/main/python/tests/scuro/test_data_loaders.py
+++ b/src/main/python/tests/scuro/test_data_loaders.py
@@ -22,6 +22,8 @@
import os
import shutil
import unittest
+import numpy as np
+
from systemds.scuro.modality.unimodal_modality import UnimodalModality
from systemds.scuro.representations.bert import Bert
from systemds.scuro.representations.mel_spectrogram import MelSpectrogram
@@ -72,40 +74,32 @@ class TestDataLoaders(unittest.TestCase):
self.data_generator.get_modality_path(ModalityType.AUDIO),
self.data_generator.indices,
)
- audio = UnimodalModality(
- audio_data_loader, ModalityType.AUDIO
- ).apply_representation(MelSpectrogram())
+ audio = UnimodalModality(audio_data_loader).apply_representation(
+ MelSpectrogram()
+ )
for i in range(0, self.num_instances):
- assert round(sum(sum(self.audio_ref.data[i])), 4) == round(
- sum(sum(audio.data[i])), 4
- )
+ np.testing.assert_almost_equal(self.audio_ref.data[i],
audio.data[i])
def test_load_video_data_from_file(self):
video_data_loader = VideoLoader(
self.data_generator.get_modality_path(ModalityType.VIDEO),
self.data_generator.indices,
)
- video = UnimodalModality(
- video_data_loader, ModalityType.VIDEO
- ).apply_representation(ResNet())
+ video =
UnimodalModality(video_data_loader).apply_representation(ResNet())
for i in range(0, self.num_instances):
- assert round(sum(sum(self.video_ref.data[i])), 4) == round(
- sum(sum(video.data[i])), 4
- )
+ np.testing.assert_almost_equal(self.video_ref.data[i],
video.data[i])
def test_load_text_data_from_file(self):
text_data_loader = TextLoader(
self.data_generator.get_modality_path(ModalityType.TEXT),
self.data_generator.indices,
)
- text = UnimodalModality(
- text_data_loader, ModalityType.TEXT
- ).apply_representation(Bert())
+ text = UnimodalModality(text_data_loader).apply_representation(Bert())
for i in range(0, self.num_instances):
- assert round(sum(self.text_ref.data[i]), 4) ==
round(sum(text.data[i]), 4)
+ np.testing.assert_almost_equal(self.text_ref.data[i], text.data[i])
if __name__ == "__main__":
diff --git a/src/main/python/tests/scuro/test_dr_search.py
b/src/main/python/tests/scuro/test_dr_search.py
index f2ba9d2d79..0959c246e0 100644
--- a/src/main/python/tests/scuro/test_dr_search.py
+++ b/src/main/python/tests/scuro/test_dr_search.py
@@ -43,6 +43,7 @@ from systemds.scuro.representations.resnet import ResNet
from systemds.scuro.representations.sum import Sum
from tests.scuro.data_generator import setup_data
+
import warnings
warnings.filterwarnings("always")
@@ -70,6 +71,7 @@ class TestSVM(Model):
def scale_data(data, train_indizes):
+ data = np.array(data).reshape(len(data), -1)
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(data[train_indizes])
return scaler.transform(data)
@@ -111,7 +113,7 @@ class TestDataLoaders(unittest.TestCase):
cls.resnet = (
cls.data_generator.modalities_by_type[ModalityType.VIDEO]
.apply_representation(ResNet())
- .window(10, "avg")
+ .window(10, "mean")
.flatten()
)
cls.mods = [cls.bert, cls.mel_spe, cls.resnet]
diff --git a/src/main/python/tests/scuro/test_multimodal_join.py
b/src/main/python/tests/scuro/test_multimodal_join.py
index c48f5f56b2..8388829f30 100644
--- a/src/main/python/tests/scuro/test_multimodal_join.py
+++ b/src/main/python/tests/scuro/test_multimodal_join.py
@@ -24,6 +24,7 @@ import shutil
import unittest
from systemds.scuro.modality.joined import JoinCondition
+from systemds.scuro.representations.aggregate import Aggregation
from systemds.scuro.representations.window import WindowAggregation
from systemds.scuro.modality.unimodal_modality import UnimodalModality
from systemds.scuro.representations.mel_spectrogram import MelSpectrogram
@@ -97,14 +98,14 @@ class TestMultimodalJoin(unittest.TestCase):
self.data_generator.indices,
chunk_size=l_chunk_size,
)
- video = UnimodalModality(video_data_loader, ModalityType.VIDEO)
+ video = UnimodalModality(video_data_loader)
audio_data_loader = AudioLoader(
self.data_generator.get_modality_path(ModalityType.AUDIO),
self.data_generator.indices,
r_chunk_size,
)
- audio = UnimodalModality(audio_data_loader, ModalityType.AUDIO)
+ audio = UnimodalModality(audio_data_loader)
mel_audio = audio.apply_representation(MelSpectrogram())
@@ -115,10 +116,8 @@ class TestMultimodalJoin(unittest.TestCase):
left_modality.join(
right_modality, JoinCondition("timestamp", "timestamp", "<")
)
- .apply_representation(
- ResNet(layer="layer1.0.conv2", model_name="ResNet50"),
- WindowAggregation(window_size=window_size,
aggregation_function="mean"),
- )
+ .apply_representation(ResNet(layer="layer1.0.conv2",
model_name="ResNet50"))
+ .window(window_size, "mean")
.combine("concat")
)
diff --git a/src/main/python/tests/scuro/test_unimodal_representations.py
b/src/main/python/tests/scuro/test_unimodal_representations.py
index d566830697..ac167e8fbf 100644
--- a/src/main/python/tests/scuro/test_unimodal_representations.py
+++ b/src/main/python/tests/scuro/test_unimodal_representations.py
@@ -46,7 +46,6 @@ class TestUnimodalRepresentations(unittest.TestCase):
video = None
data_generator = None
num_instances = 0
- indizes = []
@classmethod
def setUpClass(cls):
@@ -69,7 +68,7 @@ class TestUnimodalRepresentations(unittest.TestCase):
self.data_generator.get_modality_path(ModalityType.AUDIO),
self.data_generator.indices,
)
- audio = UnimodalModality(audio_data_loader, ModalityType.AUDIO)
+ audio = UnimodalModality(audio_data_loader)
for representation in audio_representations:
r = audio.apply_representation(representation)
@@ -82,20 +81,19 @@ class TestUnimodalRepresentations(unittest.TestCase):
self.data_generator.get_modality_path(ModalityType.VIDEO),
self.data_generator.indices,
)
- video = UnimodalModality(video_data_loader, ModalityType.VIDEO)
+ video = UnimodalModality(video_data_loader)
for representation in video_representations:
r = video.apply_representation(representation)
assert r.data is not None
assert len(r.data) == self.num_instances
def test_text_representations(self):
- # TODO: check params fro BOW, W2V, TfIdf
test_representations = [BoW(2, 2), W2V(5, 2, 2), TfIdf(2), Bert()]
text_data_loader = TextLoader(
self.data_generator.get_modality_path(ModalityType.TEXT),
self.data_generator.indices,
)
- text = UnimodalModality(text_data_loader, ModalityType.TEXT)
+ text = UnimodalModality(text_data_loader)
for representation in test_representations:
r = text.apply_representation(representation)
@@ -109,11 +107,12 @@ class TestUnimodalRepresentations(unittest.TestCase):
self.data_generator.indices,
chunk_size=2,
)
- video = UnimodalModality(video_data_loader, ModalityType.VIDEO)
+ video = UnimodalModality(video_data_loader)
for representation in video_representations:
r = video.apply_representation(representation)
assert r.data is not None
assert len(r.data) == self.num_instances
+ assert len(r.metadata) == self.num_instances
if __name__ == "__main__":
diff --git a/src/main/python/tests/scuro/test_window_operations.py
b/src/main/python/tests/scuro/test_window_operations.py
new file mode 100644
index 0000000000..d7210ddb6d
--- /dev/null
+++ b/src/main/python/tests/scuro/test_window_operations.py
@@ -0,0 +1,106 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import unittest
+import math
+
+import numpy as np
+
+from tests.scuro.data_generator import ModalityRandomDataGenerator
+from systemds.scuro.modality.type import ModalityType
+
+
+class TestWindowOperations(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ cls.num_instances = 40
+ cls.data_generator = ModalityRandomDataGenerator()
+ cls.aggregations = ["mean", "sum", "max", "min"]
+
+ def test_window_operations_on_audio_representations(self):
+ window_size = 10
+ self.run_window_operations_for_modality(ModalityType.AUDIO,
window_size)
+
+ def test_window_operations_on_video_representations(self):
+ window_size = 10
+ self.run_window_operations_for_modality(ModalityType.VIDEO,
window_size)
+
+ def test_window_operations_on_text_representations(self):
+ window_size = 10
+
+ self.run_window_operations_for_modality(ModalityType.TEXT, window_size)
+
+ def run_window_operations_for_modality(self, modality_type, window_size):
+ r = self.data_generator.create1DModality(40, 100, modality_type)
+ for aggregation in self.aggregations:
+ windowed_modality = r.window(window_size, aggregation)
+
+ self.verify_window_operation(aggregation, r, windowed_modality,
window_size)
+
+ def verify_window_operation(
+ self, aggregation, modality, windowed_modality, window_size
+ ):
+ assert windowed_modality.data is not None
+ assert len(windowed_modality.data) == self.num_instances
+
+ for i, instance in enumerate(windowed_modality.data):
+ # assert (
+ #
list(windowed_modality.metadata.values())[i]["data_layout"]["shape"][0]
+ # ==
list(modality.metadata.values())[i]["data_layout"]["shape"][0]
+ # )
+ assert len(instance) == math.ceil(len(modality.data[i]) /
window_size)
+ for j in range(0, len(instance)):
+ if aggregation == "mean":
+ np.testing.assert_almost_equal(
+ instance[j],
+ np.mean(
+ modality.data[i][j * window_size : (j + 1) *
window_size],
+ axis=0,
+ ),
+ )
+ elif aggregation == "sum":
+ np.testing.assert_almost_equal(
+ instance[j],
+ np.sum(
+ modality.data[i][j * window_size : (j + 1) *
window_size],
+ axis=0,
+ ),
+ )
+ elif aggregation == "max":
+ np.testing.assert_almost_equal(
+ instance[j],
+ np.max(
+ modality.data[i][j * window_size : (j + 1) *
window_size],
+ axis=0,
+ ),
+ )
+ elif aggregation == "min":
+ np.testing.assert_almost_equal(
+ instance[j],
+ np.min(
+ modality.data[i][j * window_size : (j + 1) *
window_size],
+ axis=0,
+ ),
+ )
+
+
+if __name__ == "__main__":
+ unittest.main()