This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 95c74be188 [MINOR] add correct python dependencies
95c74be188 is described below

commit 95c74be188ca35de762380525197881d2a045ff1
Author: Christina Dionysio <[email protected]>
AuthorDate: Tue Sep 24 15:35:51 2024 +0200

    [MINOR] add correct python dependencies
    
    This commit fixes the python dependencies to support scuro.
    
    Closes #2117
---
 .github/workflows/python.yml                              | 15 ++++++++++++++-
 src/main/python/systemds/scuro/representations/average.py |  4 ++--
 .../systemds/scuro/representations/concatenation.py       |  4 ++--
 src/main/python/systemds/scuro/representations/max.py     |  4 ++--
 .../systemds/scuro/representations/mel_spectrogram.py     |  4 ++--
 .../systemds/scuro/representations/multiplication.py      |  6 +++---
 src/main/python/systemds/scuro/representations/rowmax.py  |  4 ++--
 src/main/python/systemds/scuro/representations/sum.py     |  6 +++---
 src/main/python/systemds/scuro/representations/utils.py   | 13 +++++++++++++
 9 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 217e2c157d..79b2277b08 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -100,7 +100,20 @@ jobs:
         pip install --upgrade pip 
         pip install --upgrade pip
         pip install wheel
-        pip install numpy py4j scipy scikit-learn keras requests pandas 
unittest-parallel
+        pip install \
+          numpy \
+          py4j \
+          scipy \
+          scikit-learn \
+          requests \
+          pandas \
+          unittest-parallel \
+          torchvision \
+          transformers \
+          opencv-python \
+          torch \
+          librosa \
+          h5py
 
     - name: Build Python Package
       run: |
diff --git a/src/main/python/systemds/scuro/representations/average.py 
b/src/main/python/systemds/scuro/representations/average.py
index 11ce431566..708812d21b 100644
--- a/src/main/python/systemds/scuro/representations/average.py
+++ b/src/main/python/systemds/scuro/representations/average.py
@@ -24,7 +24,7 @@ from typing import List
 import numpy as np
 
 from systemds.scuro.modality.modality import Modality
-from keras.api.preprocessing.sequence import pad_sequences
+from systemds.scuro.representations.utils import pad_sequences
 
 from systemds.scuro.representations.fusion import Fusion
 
@@ -41,7 +41,7 @@ class Average(Fusion):
 
         padded_modalities = []
         for modality in modalities:
-            d = pad_sequences(modality.data, maxlen=max_emb_size, 
dtype='float32', padding='post')
+            d = pad_sequences(modality.data, maxlen=max_emb_size, 
dtype='float32')
             padded_modalities.append(d)
 
         data = padded_modalities[0]
diff --git a/src/main/python/systemds/scuro/representations/concatenation.py 
b/src/main/python/systemds/scuro/representations/concatenation.py
index 81b6fe33fc..a61ab69fce 100644
--- a/src/main/python/systemds/scuro/representations/concatenation.py
+++ b/src/main/python/systemds/scuro/representations/concatenation.py
@@ -24,7 +24,7 @@ from typing import List
 import numpy as np
 
 from systemds.scuro.modality.modality import Modality
-from keras.api.preprocessing.sequence import pad_sequences
+from systemds.scuro.representations.utils import pad_sequences
 
 from systemds.scuro.representations.fusion import Fusion
 
@@ -51,7 +51,7 @@ class Concatenation(Fusion):
 
         for modality in modalities:
             if self.padding:
-                data = np.concatenate([data, pad_sequences(modality.data, 
maxlen=max_emb_size, dtype='float32', padding='post')], axis=-1)
+                data = np.concatenate([data, pad_sequences(modality.data, 
maxlen=max_emb_size, dtype='float32')], axis=-1)
             else:
                 data = np.concatenate([data, modality.data], axis=-1)
 
diff --git a/src/main/python/systemds/scuro/representations/max.py 
b/src/main/python/systemds/scuro/representations/max.py
index 2f58581cb8..50038d5463 100644
--- a/src/main/python/systemds/scuro/representations/max.py
+++ b/src/main/python/systemds/scuro/representations/max.py
@@ -24,7 +24,7 @@ from typing import List
 import numpy as np
 
 from systemds.scuro.modality.modality import Modality
-from keras.preprocessing.sequence import pad_sequences
+from systemds.scuro.representations.utils import pad_sequences
 
 from systemds.scuro.representations.fusion import Fusion
 
@@ -46,7 +46,7 @@ class RowMax(Fusion):
         
         padded_modalities = []
         for modality in modalities:
-            d = pad_sequences(modality.data, maxlen=max_emb_size, 
dtype='float32', padding='post')
+            d = pad_sequences(modality.data, maxlen=max_emb_size, 
dtype='float32')
             padded_modalities.append(d)
         
         split_rows = int(len(modalities[0].data) / self.split)
diff --git a/src/main/python/systemds/scuro/representations/mel_spectrogram.py 
b/src/main/python/systemds/scuro/representations/mel_spectrogram.py
index 395b2977a0..15d4277c2c 100644
--- a/src/main/python/systemds/scuro/representations/mel_spectrogram.py
+++ b/src/main/python/systemds/scuro/representations/mel_spectrogram.py
@@ -24,7 +24,7 @@ import pickle
 
 import librosa
 import numpy as np
-from keras.src.utils import pad_sequences
+from systemds.scuro.representations.utils import pad_sequences
 
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
 
@@ -51,7 +51,7 @@ class MelSpectrogram(UnimodalRepresentation):
         
         r = []
         for elem in result:
-            d = pad_sequences(elem, maxlen=max_length, dtype='float32', 
padding='post')
+            d = pad_sequences(elem, maxlen=max_length, dtype='float32')
             r.append(d)
         
         np_array_r = np.array(r) if not self.avg else np.mean(np.array(r), 
axis=1)
diff --git a/src/main/python/systemds/scuro/representations/multiplication.py 
b/src/main/python/systemds/scuro/representations/multiplication.py
index 2b3ae64eac..032ae70fe4 100644
--- a/src/main/python/systemds/scuro/representations/multiplication.py
+++ b/src/main/python/systemds/scuro/representations/multiplication.py
@@ -24,7 +24,7 @@ from typing import List
 import numpy as np
 
 from systemds.scuro.modality.modality import Modality
-from keras.preprocessing.sequence import pad_sequences
+from systemds.scuro.representations.utils import pad_sequences
 
 from systemds.scuro.representations.fusion import Fusion
 
@@ -39,10 +39,10 @@ class Multiplication(Fusion):
     def fuse(self, modalities: List[Modality], train_indices=None):
         max_emb_size = self.get_max_embedding_size(modalities)
         
-        data = pad_sequences(modalities[0].data, maxlen=max_emb_size, 
dtype='float32', padding='post')
+        data = pad_sequences(modalities[0].data, maxlen=max_emb_size, 
dtype='float32')
         
         for m in range(1, len(modalities)):
             # scaled = self.scale_data(modalities[m].data, train_indices)
-            data = np.multiply(data, pad_sequences(modalities[m].data, 
maxlen=max_emb_size, dtype='float32', padding='post'))
+            data = np.multiply(data, pad_sequences(modalities[m].data, 
maxlen=max_emb_size, dtype='float32'))
         
         return data
diff --git a/src/main/python/systemds/scuro/representations/rowmax.py 
b/src/main/python/systemds/scuro/representations/rowmax.py
index c4184687a1..b06839b334 100644
--- a/src/main/python/systemds/scuro/representations/rowmax.py
+++ b/src/main/python/systemds/scuro/representations/rowmax.py
@@ -24,7 +24,7 @@ from typing import List
 import numpy as np
 
 from modality.modality import Modality
-from keras.api.preprocessing.sequence import pad_sequences
+from systemds.scuro.representations.utils import pad_sequences
 
 from representations.fusion import Fusion
 
@@ -47,7 +47,7 @@ class RowMax(Fusion):
         padded_modalities = []
         for modality in modalities:
             scaled = self.scale_data(modality.data, train_indices)
-            d = pad_sequences(scaled, maxlen=max_emb_size, dtype='float32', 
padding='post')
+            d = pad_sequences(scaled, maxlen=max_emb_size, dtype='float32')
             padded_modalities.append(d)
 
         split_rows = int(len(modalities[0].data) / self.split)
diff --git a/src/main/python/systemds/scuro/representations/sum.py 
b/src/main/python/systemds/scuro/representations/sum.py
index 9c75606627..1f1740c548 100644
--- a/src/main/python/systemds/scuro/representations/sum.py
+++ b/src/main/python/systemds/scuro/representations/sum.py
@@ -23,7 +23,7 @@ from typing import List
 
 
 from systemds.scuro.modality.modality import Modality
-from keras.preprocessing.sequence import pad_sequences
+from systemds.scuro.representations.utils import pad_sequences
 
 from systemds.scuro.representations.fusion import Fusion
 
@@ -38,9 +38,9 @@ class Sum(Fusion):
     def fuse(self, modalities: List[Modality]):
         max_emb_size = self.get_max_embedding_size(modalities)
 
-        data = pad_sequences(modalities[0].data, maxlen=max_emb_size, 
dtype='float32', padding='post')
+        data = pad_sequences(modalities[0].data, maxlen=max_emb_size, 
dtype='float32')
         
         for m in range(1, len(modalities)):
-            data += pad_sequences(modalities[m].data, maxlen=max_emb_size, 
dtype='float32', padding='post')
+            data += pad_sequences(modalities[m].data, maxlen=max_emb_size, 
dtype='float32')
         
         return data
diff --git a/src/main/python/systemds/scuro/representations/utils.py 
b/src/main/python/systemds/scuro/representations/utils.py
index d611cd9c71..720c3386d7 100644
--- a/src/main/python/systemds/scuro/representations/utils.py
+++ b/src/main/python/systemds/scuro/representations/utils.py
@@ -93,3 +93,16 @@ class JSON(UnimodalRepresentation):
     def parse_all(self, filepath, indices):
         with open(filepath) as file:
             return json.load(file)
+
+
+def pad_sequences(sequences, maxlen=None, dtype='float32', value=0):
+    if maxlen is None:
+        maxlen = max([len(seq) for seq in sequences])
+
+    result = np.full((len(sequences), maxlen), value, dtype=dtype)
+
+    for i, seq in enumerate(sequences):
+        data = seq[:maxlen]
+        result[i, :len(data)] = data
+
+    return result

Reply via email to