This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 436562767b [SYSTEMDS-3535] Scalable Linear Algebra Benchmark (SLAB)
436562767b is described below
commit 436562767b5e98d231fb714f01a561b3f19c6d0f
Author: ReneEnjilian <[email protected]>
AuthorDate: Sun Aug 18 19:01:39 2024 +0200
[SYSTEMDS-3535] Scalable Linear Algebra Benchmark (SLAB)
Closes #2055.
---
scripts/perftest/slab/data/gen_dense_data.py | 68 ++++
scripts/perftest/slab/data/gen_sparse_data.py | 77 +++++
.../distributed/run_distributed_ml_algorithms.sh | 89 +++++
...HeteroscedasticityRobustStandardErrorsDistr.dml | 56 ++++
.../distributed/slabLogisticRegressionDistr.dml | 56 ++++
.../slabNonNegativeMatrixFactorizationDistr.dml | 55 +++
.../slabOrdinaryLeastSquaresRegressionDistr.dml | 41 +++
.../slab/mlAlgorithms/distributed/slabPCADistr.dml | 57 ++++
.../native/run_native_ml_algorithms.sh | 74 ++++
.../slab/mlAlgorithms/native/slabLinearRegCG.dml | 193 +++++++++++
.../slab/mlAlgorithms/native/slabMultiLogitReg.dml | 373 +++++++++++++++++++++
.../slab/mlAlgorithms/native/slabNativePCA.dml | 130 +++++++
.../single_node_dense/run_single_node_dense_ml.sh | 68 ++++
.../slabHeteroscedasticityRobustStandardErrors.dml | 55 +++
.../single_node_dense/slabLogisticRegression.dml | 55 +++
.../slabNonNegativeMatrixFactorization.dml | 54 +++
.../slabOrdinaryLeastSquaresRegression.dml | 40 +++
.../mlAlgorithms/single_node_dense/slabPCA.dml | 56 ++++
.../run_distributed_matrix_sparse.sh | 102 ++++++
.../distributed_sparse/slabFrobeniusNormSparse.dml | 35 ++
.../distributed_sparse/slabGramMatrixSparse.dml | 35 ++
.../slabMatrixAdditionSparse.dml | 37 ++
.../distributed_sparse/slabMatrixMultSparse.dml | 37 ++
.../slabMatrixVectorMultSparse.dml | 36 ++
.../distributed_sparse/slabTransposeSparse.dml | 34 ++
.../run_single_node_matrix_dense.sh | 64 ++++
.../single_node_dense/slabFrobeniusNorm.dml | 29 ++
.../operators/single_node_dense/slabGramMatrix.dml | 29 ++
.../single_node_dense/slabMatrixAddition.dml | 34 ++
.../operators/single_node_dense/slabMatrixMult.dml | 32 ++
.../single_node_dense/slabMatrixVectorMult.dml | 33 ++
.../operators/single_node_dense/slabTranspose.dml | 30 ++
.../perftest/slab/pipeline/run_slab_pipeline.sh | 78 +++++
.../slab/pipeline/slabMultiplicationChain.dml | 38 +++
scripts/perftest/slab/pipeline/slabSVD.dml | 38 +++
scripts/perftest/slab/slabUtils.dml | 49 +++
src/main/java/org/apache/sysds/api/DMLScript.java | 2 +-
37 files changed, 2368 insertions(+), 1 deletion(-)
diff --git a/scripts/perftest/slab/data/gen_dense_data.py
b/scripts/perftest/slab/data/gen_dense_data.py
new file mode 100644
index 0000000000..82d0e7db91
--- /dev/null
+++ b/scripts/perftest/slab/data/gen_dense_data.py
@@ -0,0 +1,68 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+import os
+import numpy as np
+import pandas as pd
+
+def gen_data_dense(rows, cols, path, chunk_size=10000):
+ """
+ Generate a dense matrix and save it to a CSV file.
+
+ Parameters:
+ rows (int): Number of rows.
+ cols (int): Number of columns.
+ path (str): Path to save the generated matrix.
+ chunk_size (int): Number of rows per chunk to generate and save.
+ """
+ with open(path, 'w') as f:
+ for start_row in range(0, rows, chunk_size):
+ end_row = min(start_row + chunk_size, rows)
+ chunk_rows = end_row - start_row
+
+ # Generate a dense matrix with random values
+ chunk_matrix = np.random.random((chunk_rows, cols))
+
+ # Save the chunk to the CSV file
+ np.savetxt(f, chunk_matrix, delimiter=',')
+ # np.savetxt(f, chunk_matrix, delimiter=',', fmt='%.10f')
+ print(f"Saved chunk {start_row} to {end_row} to {path}")
+
+def main():
+ # Hardcoded parameters
+ dense_gb = 0.0001
+
+ current_directory = os.getcwd()
+ target_directory = os.path.abspath(os.path.join(current_directory,
'../../../../src/test/resources/datasets/slab/dense'))
+ os.makedirs(target_directory, exist_ok=True)
+
+ k = int(np.ceil((dense_gb * 1e9) / float(8 * 100)))
+
+ # Paths for saving the matrices
+ mpath_tall = os.path.join(target_directory, 'M_dense_tall.csv')
+ mpath_wide = os.path.join(target_directory, 'M_dense_wide.csv')
+
+ # Generate and save dense matrices
+ gen_data_dense(k, 100, mpath_tall)
+ gen_data_dense(100, k, mpath_wide)
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/perftest/slab/data/gen_sparse_data.py
b/scripts/perftest/slab/data/gen_sparse_data.py
new file mode 100644
index 0000000000..3279a46f14
--- /dev/null
+++ b/scripts/perftest/slab/data/gen_sparse_data.py
@@ -0,0 +1,77 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+import os
+import numpy as np
+import pandas as pd
+
+def gen_data_sparse(rows, cols, density, path, chunk_size=10000):
+ """
+ Generate a sparse matrix with given density and save it to a CSV file in a
dense format.
+
+ Parameters:
+ rows (int): Number of rows.
+ cols (int): Number of columns.
+ density (float): Fraction of non-zero elements.
+ path (str): Path to save the generated matrix.
+ chunk_size (int): Number of rows per chunk to generate and save.
+ """
+ with open(path, 'w') as f:
+ for start_row in range(0, rows, chunk_size):
+ end_row = min(start_row + chunk_size, rows)
+ chunk_rows = end_row - start_row
+
+ chunk_matrix = np.zeros((chunk_rows, cols))
+
+ n_nonzero = int(density * chunk_rows * cols)
+ nonzero_indices = (np.random.randint(chunk_rows, size=n_nonzero),
np.random.randint(cols, size=n_nonzero))
+ chunk_matrix[nonzero_indices] = np.random.random(n_nonzero)
+
+ np.savetxt(f, chunk_matrix, delimiter=',')
+ #np.savetxt(f, chunk_matrix, delimiter=',', fmt='%.10f')
+
+ print(f"Saved chunk {start_row} to {end_row} to {path}")
+
+def main():
+ # Hardcoded parameters
+ sparse_gb = 0.0001
+ sparsity_values = [0.0001, 0.001, 0.01, 0.1]
+
+ current_directory = os.getcwd()
+ target_directory = os.path.abspath(os.path.join(current_directory,
'../../../../src/test/resources/datasets/slab/sparse'))
+ os.makedirs(target_directory, exist_ok=True)
+
+
+ for sr in sparsity_values:
+ stub = str(sr).replace('.', '_')
+ stub = "sparsity_"+stub
+ k = int(np.ceil((sparse_gb * 1e9) / float(8 * 100)))
+
+ # Paths for saving the matrices
+ mpath_tall = os.path.join(target_directory, f'M_{stub}_tall.csv')
+ mpath_wide = os.path.join(target_directory, f'M_{stub}_wide.csv')
+
+ # Generate and save sparse matrices
+ gen_data_sparse(k, 100, sr, mpath_tall)
+ gen_data_sparse(100, k, sr, mpath_wide)
+
+if __name__ == "__main__":
+ main()
diff --git
a/scripts/perftest/slab/mlAlgorithms/distributed/run_distributed_ml_algorithms.sh
b/scripts/perftest/slab/mlAlgorithms/distributed/run_distributed_ml_algorithms.sh
new file mode 100755
index 0000000000..29f5c35877
--- /dev/null
+++
b/scripts/perftest/slab/mlAlgorithms/distributed/run_distributed_ml_algorithms.sh
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Ensure script is run from the 'pipeline' directory
+if [ "$(basename $PWD)" != "distributed" ]; then
+ echo "Please execute scripts from directory 'distributed'"
+ exit 1
+fi
+
+
+# Set up the output directory
+OUTPUT_DIR="output"
+mkdir -p $OUTPUT_DIR
+
+# Define datasets
+DATASET_PATH_DENSE="../../../../../src/test/resources/datasets/slab/dense"
+DATASET_PATH_SPARSE="../../../../../src/test/resources/datasets/slab/sparse"
+DENSE_DATASETS=("M_dense_tall.csv" "M_dense_wide.csv")
+SPARSE_DATASETS=("M_sparsity_0_0001_tall.csv" "M_sparsity_0_0001_wide.csv"
"M_sparsity_0_001_tall.csv" "M_sparsity_0_001_wide.csv"
"M_sparsity_0_01_tall.csv" "M_sparsity_0_01_wide.csv" "M_sparsity_0_1_tall.csv"
"M_sparsity_0_1_wide.csv")
+
+# Define DML files and corresponding output files
+DML_FILES=("slabHeteroscedasticityRobustStandardErrorsDistr.dml"
"slabLogisticRegressionDistr.dml" "slabNonNegativeMatrixFactorizationDistr.dml"
"slabOrdinaryLeastSquaresRegressionDistr.dml" "slabPCADistr.dml")
+OUTPUT_FILES=("slabHeteroscedasticityRobustStandardErrorsDistr_stats.txt"
"slabLogisticRegressionDistr_stats.txt"
"slabNonNegativeMatrixFactorizationDistr_stats.txt"
"slabOrdinaryLeastSquaresRegressionDistr_stats.txt" "slabPCADistr_stats.txt")
+
+# Function to run DML script and handle errors
+run_dml() {
+ local DML_FILE=$1
+ local ARGS=$2
+ local SPARSITY=$3
+ local SHAPE=$4
+ local OUTPUT_FILE=$5
+
+ # Run the DML script with -exec spark and -stats flag, and capture the output
+ TEMP_FILE=$(mktemp)
+ if systemds $DML_FILE -exec spark -args $ARGS -stats > $TEMP_FILE 2>&1; then
+ # Write the sparsity, shape, and SystemDS Statistics section to the output
file
+ echo "Sparsity: $SPARSITY, Shape: $SHAPE" >> $OUTPUT_FILE
+ awk '/SystemDS Statistics:/{flag=1}flag' $TEMP_FILE >> $OUTPUT_FILE
+ else
+ echo "An error occurred while executing ${DML_FILE} with arguments
${ARGS}. Check ${TEMP_FILE} for details." >> $OUTPUT_FILE
+ fi
+ echo -e "\n\n\n\n" >> $OUTPUT_FILE # Add empty lines for separation
+ rm $TEMP_FILE
+}
+
+# Iterate over each DML file
+for index in ${!DML_FILES[@]}; do
+ DML_FILE=${DML_FILES[$index]}
+ OUTPUT_FILE=${OUTPUT_DIR}/${OUTPUT_FILES[$index]}
+
+ # Clear the output file before writing
+ > $OUTPUT_FILE
+
+ # Run with dense datasets
+ for DATASET in ${DENSE_DATASETS[@]}; do
+ SHAPE=$(echo $DATASET | grep -oP '(tall|wide)')
+ SPARSITY="dense"
+ run_dml $DML_FILE "${DATASET_PATH_DENSE}/${DATASET}" $SPARSITY $SHAPE
$OUTPUT_FILE
+ echo "Execution of ${DML_FILE} with dataset ${DATASET} completed.
Statistics appended to ${OUTPUT_FILE}"
+ done
+
+ # Run with sparse datasets
+ for DATASET in ${SPARSE_DATASETS[@]}; do
+ SHAPE=$(echo $DATASET | grep -oP '(tall|wide)')
+ SPARSITY=$(echo $DATASET | grep -oP '0_\d+')
+ SPARSITY=${SPARSITY//_/\.} # Replace underscore with dot
+ run_dml $DML_FILE "${DATASET_PATH_SPARSE}/${DATASET}" $SPARSITY $SHAPE
$OUTPUT_FILE
+ echo "Execution of ${DML_FILE} with dataset ${DATASET} completed.
Statistics appended to ${OUTPUT_FILE}"
+ done
+done
diff --git
a/scripts/perftest/slab/mlAlgorithms/distributed/slabHeteroscedasticityRobustStandardErrorsDistr.dml
b/scripts/perftest/slab/mlAlgorithms/distributed/slabHeteroscedasticityRobustStandardErrorsDistr.dml
new file mode 100644
index 0000000000..ef0044f5c3
--- /dev/null
+++
b/scripts/perftest/slab/mlAlgorithms/distributed/slabHeteroscedasticityRobustStandardErrorsDistr.dml
@@ -0,0 +1,56 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+dataPath = $1
+X = read(dataPath, format="csv")
+rvect = rand(rows=nrow(X), cols=1, pdf='uniform')
+y = rvect > 0.80
+p = sum( X )
+q = sum( y )
+print(p)
+print(q)
+b = reg(X,y)
+y_hat = X %*% b
+r2 = (y - y_hat)^2
+
+for(ix in 1:5) {
+ tmp = robust_se(X, r2)
+ utils::printRandElements(tmp, 10)
+}
+
+reg = function(matrix[double] X, matrix[double] y)
+ return (matrix[double] b) {
+ b = solve(t(X) %*% X, t(X) %*% y)
+}
+
+
+robust_se = function(matrix[double] X,
+ matrix[double] r2)
+ return (matrix[double] se) {
+ # NOTE: SVD is cheap since XTX is small!
+ [U, H, V] = svd(t(X) %*% X)
+ h = diag(H)
+ XTX_INV = U %*% diag(h^-1) %*% t(V)
+ S = diag(r2)
+ se = XTX_INV %*% (t(X) %*% S %*% X) %*% XTX_INV
+}
diff --git
a/scripts/perftest/slab/mlAlgorithms/distributed/slabLogisticRegressionDistr.dml
b/scripts/perftest/slab/mlAlgorithms/distributed/slabLogisticRegressionDistr.dml
new file mode 100644
index 0000000000..265317ff64
--- /dev/null
+++
b/scripts/perftest/slab/mlAlgorithms/distributed/slabLogisticRegressionDistr.dml
@@ -0,0 +1,56 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+dataPath = $1
+X = read(dataPath, format="csv")
+rvect = rand(rows=nrow(X), cols=1, pdf='uniform')
+y = rvect > 0.80
+p = sum( X )
+q = sum( y )
+print(p)
+print(q)
+
+for(ix in 1:5){
+ tmp = logit(X, y, 10)
+ utils::printRandElements(tmp, 10)
+}
+
+logit = function(matrix[double] X,
+ matrix[double] y,
+ Integer iterations)
+ return (matrix[double] w) {
+
+ N = nrow(X)
+ w = matrix(0, rows=ncol(X), cols=1)
+ iteration = 0
+ stepSize = 10
+
+ while (iteration < iterations) {
+ xb = X %*% w
+ delta = 1/(1+exp(-xb)) - y
+ stepSize = stepSize / 2
+ w = w - ((stepSize * t(X) %*% delta)/N)
+
+ iteration = iteration + 1
+ }
+}
diff --git
a/scripts/perftest/slab/mlAlgorithms/distributed/slabNonNegativeMatrixFactorizationDistr.dml
b/scripts/perftest/slab/mlAlgorithms/distributed/slabNonNegativeMatrixFactorizationDistr.dml
new file mode 100644
index 0000000000..f844695cb3
--- /dev/null
+++
b/scripts/perftest/slab/mlAlgorithms/distributed/slabNonNegativeMatrixFactorizationDistr.dml
@@ -0,0 +1,55 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+dataPath = $1
+X = read(dataPath, format="csv")
+rvect = rand(rows=1, cols=1)
+y = rvect > 0.80
+p = sum( X )
+q = sum( y )
+print(p)
+print(q)
+
+
+for(ix in 1:5){
+ tmp = gnmf(X, 10, 10)
+ print(tmp)
+}
+
+gnmf = function(matrix[double] X, Integer r, Integer iterations)
+ return (integer iteration) {
+
+ W = rand(rows = nrow(X), cols = r, pdf = 'uniform')
+ H = rand(rows = r, cols = ncol(X), pdf = 'uniform')
+
+ for (i in 1:3) {
+ W = W * ((X %*% t(H)) / (W %*% (H %*% t(H))))
+ H = H * ((t(W) %*% X) / ((t(W) %*% W) %*% H))
+ }
+ if ((as.scalar(W[1,1]) > 0) & (as.scalar(H[1,1]) > 0)) {
+ print(as.scalar(H[1,1]))
+ print(as.scalar(W[1,1]))
+ }
+
+ iteration = 0
+}
diff --git
a/scripts/perftest/slab/mlAlgorithms/distributed/slabOrdinaryLeastSquaresRegressionDistr.dml
b/scripts/perftest/slab/mlAlgorithms/distributed/slabOrdinaryLeastSquaresRegressionDistr.dml
new file mode 100644
index 0000000000..d6607a5e0d
--- /dev/null
+++
b/scripts/perftest/slab/mlAlgorithms/distributed/slabOrdinaryLeastSquaresRegressionDistr.dml
@@ -0,0 +1,41 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+dataPath = $1
+X = read(dataPath, format="csv")
+rvect = rand(rows=nrow(X), cols=1, pdf='uniform')
+y = rvect > 0.80
+p = sum( X )
+q = sum( y )
+print(p)
+print(q)
+
+for(ix in 1:5){
+ tmp = reg(X, y)
+ utils::printRandElements(tmp, 10)
+}
+
+reg = function(matrix[double] X, matrix[double] y)
+ return (matrix[double] b) {
+ b = solve(t(X) %*% X, t(X) %*% y)
+}
diff --git a/scripts/perftest/slab/mlAlgorithms/distributed/slabPCADistr.dml
b/scripts/perftest/slab/mlAlgorithms/distributed/slabPCADistr.dml
new file mode 100644
index 0000000000..72c9415d8b
--- /dev/null
+++ b/scripts/perftest/slab/mlAlgorithms/distributed/slabPCADistr.dml
@@ -0,0 +1,57 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+dataPath = $1
+X = read(dataPath, format="csv")
+rvect = rand(rows=nrow(X), cols=1, pdf='uniform')
+y = rvect > 0.80
+p = sum( X )
+q = sum( y )
+print(p)
+print(q)
+
+for(ix in 1:5){
+ tmp = pca(X, 5)
+ utils::printRandElements(tmp, 10)
+}
+
+pca = function(matrix[double] X, Integer k)
+ return (matrix[double] PRJ) {
+ N = nrow(X)
+ K = ncol(X)
+ XS = X - colMeans(X)
+ S = (1/(N-1)) * (t(XS) %*% XS)
+ [eigvals, eigvects] = eigen(S)
+
+ # Thanks to the Sysml implementation for this helpful bit
+ # of code to sort the eigenvectors
+ eigssorted = order(target=eigvals, by=1,
+ decreasing=TRUE,
+ index.return=TRUE)
+ diagmat = table(seq(1, K), eigssorted)
+ eigvals = diagmat %*% eigvals
+ eigvects = eigvects %*% diagmat
+ eigvects = eigvects[, 1:k]
+
+ PRJ = XS %*% eigvects
+}
diff --git
a/scripts/perftest/slab/mlAlgorithms/native/run_native_ml_algorithms.sh
b/scripts/perftest/slab/mlAlgorithms/native/run_native_ml_algorithms.sh
new file mode 100755
index 0000000000..f45bcddcb5
--- /dev/null
+++ b/scripts/perftest/slab/mlAlgorithms/native/run_native_ml_algorithms.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Ensure script is run from the 'pipeline' directory
+if [ "$(basename $PWD)" != "native" ]; then
+ echo "Please execute scripts from directory 'native'"
+ exit 1
+fi
+
+
+
+# Set up the output directory
+OUTPUT_DIR="output"
+mkdir -p $OUTPUT_DIR
+
+# Define row numbers for the DML scripts
+ROW_NUMBERS=("1000" "10000" "100000" "1000000")
+
+# Define DML files and corresponding output files
+DML_FILES=("slabLinearRegCG.dml" "slabMultiLogitReg.dml" "slabNativePCA.dml")
+OUTPUT_FILES=("slabLinearRegCG_stats.txt" "slabMultiLogitReg_stats.txt"
"slabNativePCA_stats.txt")
+
+# Function to run DML script and handle errors
+run_dml() {
+ local DML_FILE=$1
+ local ARGS=$2
+ local OUTPUT_FILE=$3
+
+ # Run the DML script with -stats flag and capture the output
+ TEMP_FILE=$(mktemp)
+ if systemds $DML_FILE -args $ARGS -stats > $TEMP_FILE 2>&1; then
+ # Write the number of rows and SystemDS Statistics section to the output
file
+ echo "Number of rows: $ARGS" >> $OUTPUT_FILE
+ awk '/SystemDS Statistics:/{flag=1}flag' $TEMP_FILE >> $OUTPUT_FILE
+ else
+ echo "An error occurred while executing ${DML_FILE} with arguments
${ARGS}. Check ${TEMP_FILE} for details." >> $OUTPUT_FILE
+ fi
+ echo -e "\n\n\n\n" >> $OUTPUT_FILE # Add empty lines for separation
+ rm $TEMP_FILE
+}
+
+# Iterate over each DML file
+for index in ${!DML_FILES[@]}; do
+ DML_FILE=${DML_FILES[$index]}
+ OUTPUT_FILE=${OUTPUT_DIR}/${OUTPUT_FILES[$index]}
+
+ # Clear the output file before writing
+ > $OUTPUT_FILE
+
+ # Iterate over each row number and execute the DML file
+ for ROW in ${ROW_NUMBERS[@]}; do
+ run_dml $DML_FILE $ROW $OUTPUT_FILE
+ echo "Execution of ${DML_FILE} with ${ROW} rows completed. Statistics
appended to ${OUTPUT_FILE}"
+ done
+done
diff --git a/scripts/perftest/slab/mlAlgorithms/native/slabLinearRegCG.dml
b/scripts/perftest/slab/mlAlgorithms/native/slabLinearRegCG.dml
new file mode 100644
index 0000000000..c04519b60e
--- /dev/null
+++ b/scripts/perftest/slab/mlAlgorithms/native/slabLinearRegCG.dml
@@ -0,0 +1,193 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# THIS SCRIPT SOLVES LINEAR REGRESSION USING THE CONJUGATE GRADIENT ALGORITHM
+#
+
+
+
+# Parameters
+intercept_status = 2; # 0: no intercept, 1: add intercept, 2: add
intercept, shift & rescale
+tolerance = 0.000001; # Tolerance for convergence
+max_iteration = 100; # Maximum number of iterations
+regularization = 0.000001; # Regularization constant
+
+print ("BEGIN LINEAR REGRESSION SCRIPT");
+
+# Generate data internally
+n = $1; # number of rows
+m = 100; # number of columns
+X = rand(rows=n, cols=m, min=0, max=1, sparsity=0.9, seed=42)
+y = rand(rows=n, cols=1, min=0, max=1, sparsity=0.9, seed=24)
+
+sum_x = sum(X)
+sum_y = sum(y)
+for (ix in 1:5) {
+
+
+ ones_n = matrix(1, rows = n, cols = 1)
+ zero_cell = matrix(0, rows = 1, cols = 1)
+
+ # Introduce the intercept, shift and rescale the columns of X if needed
+ m_ext = m
+ if (intercept_status == 1 | intercept_status == 2) { # add the intercept
column
+ X = cbind(X, ones_n)
+ m_ext = ncol(X)
+ }
+
+ scale_lambda = matrix(1, rows = m_ext, cols = 1)
+ if (intercept_status == 1 | intercept_status == 2) {
+ scale_lambda[m_ext, 1] = 0
+ }
+
+ if (intercept_status == 2) { # scale-&-shift X columns to mean 0,
variance 1
+ avg_X_cols = t(colSums(X)) / n
+ var_X_cols = (t(colSums(X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1)
+ is_unsafe = (var_X_cols <= 0)
+ scale_X = 1.0 / sqrt(var_X_cols * (1 - is_unsafe) + is_unsafe)
+ scale_X[m_ext, 1] = 1
+ shift_X = -avg_X_cols * scale_X
+ shift_X[m_ext, 1] = 0
+ } else {
+ scale_X = matrix(1, rows = m_ext, cols = 1)
+ shift_X = matrix(0, rows = m_ext, cols = 1)
+ }
+
+ lambda = scale_lambda * regularization
+ beta_unscaled = matrix(0, rows = m_ext, cols = 1)
+
+ if (max_iteration == 0) {
+ max_iteration = m_ext
+ }
+ i = 0
+
+ # BEGIN THE CONJUGATE GRADIENT ALGORITHM
+ print ("Running the CG algorithm...")
+
+ r = -t(X) %*% y
+
+ if (intercept_status == 2) {
+ r = scale_X * r + shift_X %*% r[m_ext, ]
+ }
+
+ p = -r
+ norm_r2 = sum(r ^ 2)
+ norm_r2_initial = norm_r2
+ norm_r2_target = norm_r2_initial * tolerance ^ 2
+ print ("||r|| initial value = " + sqrt(norm_r2_initial) + ", target value
= " + sqrt(norm_r2_target))
+
+ while (i < max_iteration & norm_r2 > norm_r2_target) {
+ if (intercept_status == 2) {
+ ssX_p = scale_X * p
+ ssX_p[m_ext, ] = ssX_p[m_ext, ] + t(shift_X) %*% p
+ } else {
+ ssX_p = p
+ }
+
+ q = t(X) %*% (X %*% ssX_p)
+
+ if (intercept_status == 2) {
+ q = scale_X * q + shift_X %*% q[m_ext, ]
+ }
+
+ q = q + lambda * p
+ a = norm_r2 / sum(p * q)
+ beta_unscaled = beta_unscaled + a * p
+ r = r + a * q
+ old_norm_r2 = norm_r2
+ norm_r2 = sum(r ^ 2)
+ p = -r + (norm_r2 / old_norm_r2) * p
+ i = i + 1
+ print ("Iteration " + i + ": ||r|| / ||r init|| = " + sqrt(norm_r2 /
norm_r2_initial))
+ }
+
+ if (i >= max_iteration) {
+ print ("Warning: the maximum number of iterations has been reached.")
+ }
+ print ("The CG algorithm is done.")
+ # END THE CONJUGATE GRADIENT ALGORITHM
+
+ if (intercept_status == 2) {
+ beta = scale_X * beta_unscaled
+ beta[m_ext, ] = beta[m_ext, ] + t(shift_X) %*% beta_unscaled
+ } else {
+ beta = beta_unscaled
+ }
+
+ print ("Computing the statistics...")
+
+ avg_tot = sum(y) / n
+ ss_tot = sum(y ^ 2)
+ ss_avg_tot = ss_tot - n * avg_tot ^ 2
+ var_tot = ss_avg_tot / (n - 1)
+ y_residual = y - X %*% beta
+ avg_res = sum(y_residual) / n
+ ss_res = sum(y_residual ^ 2)
+ ss_avg_res = ss_res - n * avg_res ^ 2
+
+ R2 = 1 - ss_res / ss_avg_tot
+ if (n > m_ext) {
+ dispersion = ss_res / (n - m_ext)
+ adjusted_R2 = 1 - dispersion / (ss_avg_tot / (n - 1))
+ } else {
+ dispersion = 0.0 / 0.0
+ adjusted_R2 = 0.0 / 0.0
+ }
+
+ R2_nobias = 1 - ss_avg_res / ss_avg_tot
+ deg_freedom = n - m - 1
+ if (deg_freedom > 0) {
+ var_res = ss_avg_res / deg_freedom
+ adjusted_R2_nobias = 1 - var_res / (ss_avg_tot / (n - 1))
+ } else {
+ var_res = 0.0 / 0.0
+ adjusted_R2_nobias = 0.0 / 0.0
+ print ("Warning: zero or negative number of degrees of freedom.")
+ }
+
+ R2_vs_0 = 1 - ss_res / ss_tot
+ if (n > m) {
+ adjusted_R2_vs_0 = 1 - (ss_res / (n - m)) / (ss_tot / n)
+ } else {
+ adjusted_R2_vs_0 = 0.0 / 0.0
+ }
+
+ str = "AVG_TOT_Y," + avg_tot; #
Average of the response value Y
+ str = append (str, "STDEV_TOT_Y," + sqrt (var_tot)); #
Standard Deviation of the response value Y
+ str = append (str, "AVG_RES_Y," + avg_res); #
Average of the residual Y - pred(Y|X), i.e. residual bias
+ str = append (str, "STDEV_RES_Y," + sqrt (var_res)); #
Standard Deviation of the residual Y - pred(Y|X)
+ str = append (str, "DISPERSION," + dispersion); #
GLM-style dispersion, i.e. residual sum of squares / # d.f.
+ str = append (str, "R2," + R2); # R^2 of
residual with bias included vs. total average
+ str = append (str, "ADJUSTED_R2," + adjusted_R2); #
Adjusted R^2 of residual with bias included vs. total average
+ str = append (str, "R2_NOBIAS," + R2_nobias); # R^2 of
residual with bias subtracted vs. total average
+ str = append (str, "ADJUSTED_R2_NOBIAS," + adjusted_R2_nobias); #
Adjusted R^2 of residual with bias subtracted vs. total average
+ if (intercept_status == 0) {
+ str = append (str, "R2_VS_0," + R2_vs_0); # R^2 of
residual with bias included vs. zero constant
+ str = append (str, "ADJUSTED_R2_VS_0," + adjusted_R2_vs_0); #
Adjusted R^2 of residual with bias included vs. zero constant
+ }
+
+ print (str);
+
+
+}
+
+
diff --git a/scripts/perftest/slab/mlAlgorithms/native/slabMultiLogitReg.dml
b/scripts/perftest/slab/mlAlgorithms/native/slabMultiLogitReg.dml
new file mode 100644
index 0000000000..cde11b5ea1
--- /dev/null
+++ b/scripts/perftest/slab/mlAlgorithms/native/slabMultiLogitReg.dml
@@ -0,0 +1,373 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Solves Multinomial Logistic Regression using Trust Region methods.
+# (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and
Keerthi, JMLR 9 (2008) 627-650)
+
+# INPUT PARAMETERS:
+#
--------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+#
--------------------------------------------------------------------------------------------
+# X String --- Location to read the matrix of feature vectors
+# Y String --- Location to read the matrix with category labels
+# B String --- Location to store estimated regression parameters (the
betas)
+# Log String " " Location to write per-iteration variables for
log/debugging purposes
+# icpt Int 0 Intercept presence, shifting and rescaling X columns:
+# 0 = no intercept, no shifting, no rescaling;
+# 1 = add intercept, but neither shift nor rescale X;
+# 2 = add intercept, shift & rescale X columns to mean =
0, variance = 1
+# reg Double 0.0 regularization parameter (lambda = 1/C); intercept is
not regularized
+# tol Double 0.000001 tolerance ("epsilon")
+# moi Int 100 max. number of outer (Newton) iterations
+# mii Int 0 max. number of inner (conjugate gradient) iterations,
0 = no max
+# fmt String "text" Matrix output format, usually "text" or "csv" (for
matrices only)
+#
--------------------------------------------------------------------------------------------
+# The largest label represents the baseline category; if label -1 or 0 is
present, then it is
+# the baseline label (and it is converted to the largest label).
+#
+# The Log file, when requested, contains the following per-iteration variables
in CSV format,
+# each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for
initial values:
+#
+# NAME MEANING
+#
-------------------------------------------------------------------------------------------
+# LINEAR_TERM_MIN The minimum value of X %*% B, used to check for
overflows
+# LINEAR_TERM_MAX The maximum value of X %*% B, used to check for
overflows
+# NUM_CG_ITERS Number of inner (Conj.Gradient) iterations in this
outer iteration
+# IS_TRUST_REACHED 1 = trust region boundary was reached, 0 = otherwise
+# POINT_STEP_NORM L2-norm of iteration step from old point (i.e. matrix
B) to new point
+# OBJECTIVE The loss function we minimize (negative regularized
log-likelihood)
+# OBJ_DROP_REAL Reduction in the objective during this iteration,
actual value
+# OBJ_DROP_PRED Reduction in the objective predicted by a quadratic
approximation
+# OBJ_DROP_RATIO Actual-to-predicted reduction ratio, used to update
the trust region
+# IS_POINT_UPDATED 1 = new point accepted; 0 = new point rejected, old
point restored
+# GRADIENT_NORM L2-norm of the loss function gradient (omitted if
point is rejected)
+# TRUST_DELTA Updated trust region size, the "delta"
+#
-------------------------------------------------------------------------------------------
+#
+# Script invocation example:
+# hadoop jar SystemML.jar -f MultiLogReg.dml -nvargs icpt=2 reg=1.0
tol=0.000001 moi=100 mii=20
+# X=INPUT_DIR/X123 Y=INPUT_DIR/Y123 B=OUTPUT_DIR/B123 fmt=csv
Log=OUTPUT_DIR/log
+
+
+
+fileLog = ifdef($Log, " ")
+fmtB = ifdef($fmt, "text")
+
+intercept_status = ifdef($icpt, 0) # $icpt = 0
+regularization = ifdef($reg, 0.0) # $reg = 0.0
+tol = ifdef($tol, 0.000001) # $tol = 0.000001
+maxiter = ifdef($moi, 100) # $moi = 100
+maxinneriter = ifdef($mii, 0) # $mii = 0
+tol = as.double(tol)
+
+print("BEGIN MULTINOMIAL LOGISTIC REGRESSION SCRIPT")
+
+# Generate data internally
+n = $1 # number of rows
+m = 100 # number of columns
+X = rand(rows=n, cols=m, min=0, max=1, sparsity=0.9, seed=42)
+Y_vec = rand(rows=n, cols=1, min=1, max=3, sparsity=0.9, seed=24) # Random
labels between 1 and 3
+
+# force a pass over the data
+sum_x = sum(X)
+sum_y = sum(Y_vec)
+
+for (ix in 1:5) {
+
+
+ eta0 = 0.0001
+ eta1 = 0.25
+ eta2 = 0.75
+ sigma1 = 0.25
+ sigma2 = 0.5
+ sigma3 = 4.0
+ psi = 0.1
+
+ N = nrow(X)
+ D = ncol(X)
+
+ # Introduce the intercept, shift and rescale the columns of X if needed
+ if (intercept_status == 1 | intercept_status == 2) { # add the intercept
column
+ X = cbind(X, matrix(1, rows=N, cols=1))
+ D = ncol(X)
+ }
+
+ scale_lambda = matrix(1, rows=D, cols=1)
+ if (intercept_status == 1 | intercept_status == 2) {
+ scale_lambda[D, 1] = 0
+ }
+
+ if (intercept_status == 2) { # scale-&-shift X columns to mean 0,
variance 1
+ avg_X_cols = t(colSums(X)) / N
+ var_X_cols = (t(colSums(X ^ 2)) - N * (avg_X_cols ^ 2)) / (N - 1)
+ is_unsafe = var_X_cols <= 0
+ scale_X = 1.0 / sqrt(var_X_cols * (1 - is_unsafe) + is_unsafe)
+ scale_X[D, 1] = 1
+ shift_X = -avg_X_cols * scale_X
+ shift_X[D, 1] = 0
+ rowSums_X_sq = (X ^ 2) %*% (scale_X ^ 2) + X %*% (2 * scale_X *
shift_X) + sum(shift_X ^ 2)
+ } else {
+ scale_X = matrix(1, rows=D, cols=1)
+ shift_X = matrix(0, rows=D, cols=1)
+ rowSums_X_sq = rowSums(X ^ 2)
+ }
+
+ # Henceforth we replace "X" with "X %*% (SHIFT/SCALE TRANSFORM)" and
rowSums(X ^ 2)
+ # with "rowSums_X_sq" in order to preserve the sparsity of X under shift
and scale.
+ # The transform is then associatively applied to the other side of the
expression,
+ # and is rewritten via "scale_X" and "shift_X" as follows:
+ #
+ # ssX_A = (SHIFT/SCALE TRANSFORM) %*% A --- is rewritten as:
+ # ssX_A = diag(scale_X) %*% A;
+ # ssX_A[D, ] = ssX_A[D, ] + t(shift_X) %*% A;
+ #
+ # tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A --- is rewritten as:
+ # tssX_A = diag(scale_X) %*% A + shift_X %*% A[D, ];
+
+ # Convert "Y_vec" into indicator matrix:
+ max_y = max(Y_vec)
+ if (min(Y_vec) <= 0) {
+ # Category labels "0", "-1" etc. are converted into the largest label
+ Y_vec = Y_vec + (-Y_vec + max_y + 1) * (Y_vec <= 0)
+ max_y = max_y + 1
+ }
+ Y = table(seq(1, N, 1), Y_vec, N, max_y)
+ K = ncol(Y) - 1 # The number of non-baseline categories
+
+ lambda = (scale_lambda %*% matrix(1, rows=1, cols=K)) * regularization
+ delta = 0.5 * sqrt(D) / max(sqrt(rowSums_X_sq))
+
+ B = matrix(0, rows=D, cols=K) ### LT = X %*% (SHIFT/SCALE TRANSFORM)
%*% B;
+ ### LT = cbind(LT, matrix(0, rows=N,
cols=1));
+ ### LT = LT - rowMaxs(LT) %*%
matrix(1, rows=1, cols=K+1);
+ P = matrix(1, rows=N, cols=K+1) ### exp_LT = exp(LT);
+ P = P / (K + 1) ### P = exp_LT / (rowSums(exp_LT) %*%
matrix(1, rows=1, cols=K+1));
+ obj = N * log(K + 1) ### obj = - sum(Y * LT) +
sum(log(rowSums(exp_LT))) + 0.5 * sum(lambda * (B_new ^ 2));
+
+ Grad = t(X) %*% (P[, 1:K] - Y[, 1:K])
+ if (intercept_status == 2) {
+ Grad = diag(scale_X) %*% Grad + shift_X %*% Grad[D, ]
+ }
+ Grad = Grad + lambda * B
+ norm_Grad = sqrt(sum(Grad ^ 2))
+ norm_Grad_initial = norm_Grad
+
+ if (maxinneriter == 0) {
+ maxinneriter = D * K
+ }
+ iter = 1
+
+ # boolean for convergence check
+ converge = (norm_Grad < tol) | (iter > maxiter)
+
+ print("-- Initially: Objective = " + obj + ", Gradient Norm = " +
norm_Grad + ", Trust Delta = " + delta)
+
+ if (fileLog != " ") {
+ log_str = "OBJECTIVE,0," + obj
+ log_str = append(log_str, "GRADIENT_NORM,0," + norm_Grad)
+ log_str = append(log_str, "TRUST_DELTA,0," + delta)
+ } else {
+ log_str = " "
+ }
+
+ while (!converge) {
+ # SOLVE TRUST REGION SUB-PROBLEM
+ S = matrix(0, rows=D, cols=K)
+ R = -Grad
+ V = R
+ delta2 = delta ^ 2
+ inneriter = 1
+ norm_R2 = sum(R ^ 2)
+ innerconverge = (sqrt(norm_R2) <= psi * norm_Grad)
+ is_trust_boundary_reached = 0
+
+ while (!innerconverge) {
+ if (intercept_status == 2) {
+ ssX_V = diag(scale_X) %*% V
+ ssX_V[D, ] = ssX_V[D, ] + t(shift_X) %*% V
+ } else {
+ ssX_V = V
+ }
+ Q = P[, 1:K] * (X %*% ssX_V)
+ HV = t(X) %*% (Q - P[, 1:K] * (rowSums(Q) %*% matrix(1, rows=1,
cols=K)))
+ if (intercept_status == 2) {
+ HV = diag(scale_X) %*% HV + shift_X %*% HV[D, ]
+ }
+ HV = HV + lambda * V
+ alpha = norm_R2 / sum(V * HV)
+ Snew = S + alpha * V
+ norm_Snew2 = sum(Snew ^ 2)
+ if (norm_Snew2 <= delta2) {
+ S = Snew
+ R = R - alpha * HV
+ old_norm_R2 = norm_R2
+ norm_R2 = sum(R ^ 2)
+ V = R + (norm_R2 / old_norm_R2) * V
+ innerconverge = (sqrt(norm_R2) <= psi * norm_Grad)
+ } else {
+ is_trust_boundary_reached = 1
+ sv = sum(S * V)
+ v2 = sum(V ^ 2)
+ s2 = sum(S ^ 2)
+ rad = sqrt(sv ^ 2 + v2 * (delta2 - s2))
+ if (sv >= 0) {
+ alpha = (delta2 - s2) / (sv + rad)
+ } else {
+ alpha = (rad - sv) / v2
+ }
+ S = S + alpha * V
+ R = R - alpha * HV
+ innerconverge = TRUE
+ }
+ inneriter = inneriter + 1
+ innerconverge = innerconverge | (inneriter > maxinneriter)
+ }
+
+ # END TRUST REGION SUB-PROBLEM
+
+ # compute rho, update B, obtain delta
+ gs = sum(S * Grad)
+ qk = -0.5 * (gs - sum(S * R))
+ B_new = B + S
+ if (intercept_status == 2) {
+ ssX_B_new = diag(scale_X) %*% B_new
+ ssX_B_new[D, ] = ssX_B_new[D, ] + t(shift_X) %*% B_new
+ } else {
+ ssX_B_new = B_new
+ }
+
+ LT = cbind((X %*% ssX_B_new), matrix(0, rows=N, cols=1))
+ if (fileLog != " ") {
+ log_str = append(log_str, "LINEAR_TERM_MIN," + iter + "," +
min(LT))
+ log_str = append(log_str, "LINEAR_TERM_MAX," + iter + "," +
max(LT))
+ }
+ LT = LT - rowMaxs(LT) %*% matrix(1, rows=1, cols=K+1)
+ exp_LT = exp(LT)
+ P_new = exp_LT / (rowSums(exp_LT) %*% matrix(1, rows=1, cols=K+1))
+ obj_new = -sum(Y * LT) + sum(log(rowSums(exp_LT))) + 0.5 * sum(lambda
* (B_new ^ 2))
+
+ # Consider updating LT in the inner loop
+ # Consider the big "obj" and "obj_new" rounding-off their small
difference below:
+
+ actred = (obj - obj_new)
+
+ rho = actred / qk
+ is_rho_accepted = (rho > eta0)
+ snorm = sqrt(sum(S ^ 2))
+
+ if (fileLog != " ") {
+ log_str = append(log_str, "NUM_CG_ITERS," + iter + "," +
(inneriter - 1))
+ log_str = append(log_str, "IS_TRUST_REACHED," + iter + "," +
is_trust_boundary_reached)
+ log_str = append(log_str, "POINT_STEP_NORM," + iter + "," + snorm)
+ log_str = append(log_str, "OBJECTIVE," + iter + "," + obj_new)
+ log_str = append(log_str, "OBJ_DROP_REAL," + iter + "," + actred)
+ log_str = append(log_str, "OBJ_DROP_PRED," + iter + "," + qk)
+ log_str = append(log_str, "OBJ_DROP_RATIO," + iter + "," + rho)
+ }
+
+ if (iter == 1) {
+ delta = min(delta, snorm)
+ }
+
+ alpha2 = obj_new - obj - gs
+ if (alpha2 <= 0) {
+ alpha = sigma3
+ } else {
+ alpha = max(sigma1, -0.5 * gs / alpha2)
+ }
+
+ if (rho < eta0) {
+ delta = min(max(alpha, sigma1) * snorm, sigma2 * delta)
+ } else {
+ if (rho < eta1) {
+ delta = max(sigma1 * delta, min(alpha * snorm, sigma2 * delta))
+ } else {
+ if (rho < eta2) {
+ delta = max(sigma1 * delta, min(alpha * snorm, sigma3 *
delta))
+ } else {
+ delta = max(delta, min(alpha * snorm, sigma3 * delta))
+ }
+ }
+ }
+
+ if (is_trust_boundary_reached == 1) {
+ print("-- Outer Iteration " + iter + ": Had " + (inneriter - 1) +
" CG iterations, trust bound REACHED")
+ } else {
+ print("-- Outer Iteration " + iter + ": Had " + (inneriter - 1) +
" CG iterations")
+ }
+ print(" -- Obj.Reduction: Actual = " + actred + ", Predicted = " +
qk +
+ " (A/P: " + (round(10000.0 * rho) / 10000.0) + "), Trust Delta
= " + delta)
+
+ if (is_rho_accepted) {
+ B = B_new
+ P = P_new
+ Grad = t(X) %*% (P[, 1:K] - Y[, 1:K])
+ if (intercept_status == 2) {
+ Grad = diag(scale_X) %*% Grad + shift_X %*% Grad[D, ]
+ }
+ Grad = Grad + lambda * B
+ norm_Grad = sqrt(sum(Grad ^ 2))
+ obj = obj_new
+ print(" -- New Objective = " + obj + ", Beta Change Norm = " +
snorm + ", Gradient Norm = " + norm_Grad)
+ if (fileLog != " ") {
+ log_str = append(log_str, "IS_POINT_UPDATED," + iter + ",1")
+ log_str = append(log_str, "GRADIENT_NORM," + iter + "," +
norm_Grad)
+ }
+ } else {
+ if (fileLog != " ") {
+ log_str = append(log_str, "IS_POINT_UPDATED," + iter + ",0")
+ }
+ }
+
+ if (fileLog != " ") {
+ log_str = append(log_str, "TRUST_DELTA," + iter + "," + delta)
+ }
+
+ iter = iter + 1
+ converge = ((norm_Grad < (tol * norm_Grad_initial)) | (iter > maxiter)
|
+ ((is_trust_boundary_reached == 0) & (abs(actred) < (abs(obj) +
abs(obj_new)) * 0.00000000000001)))
+ if (converge) {
+ print("Termination / Convergence condition satisfied.")
+ } else {
+ print(" ")
+ }
+ }
+
+ if (intercept_status == 2) {
+ B_out = diag(scale_X) %*% B
+ B_out[D, ] = B_out[D, ] + t(shift_X) %*% B
+ } else {
+ B_out = B
+ }
+ # write(B_out, fileB, format=fmtB)
+
+ if (sum_x > 0.0) {
+ print(as.scalar(B[1, 1]))
+ }
+
+
+}
+
+if (fileLog != " ") {
+ write(log_str, fileLog)
+}
+
+
diff --git a/scripts/perftest/slab/mlAlgorithms/native/slabNativePCA.dml
b/scripts/perftest/slab/mlAlgorithms/native/slabNativePCA.dml
new file mode 100644
index 0000000000..3f0da1848f
--- /dev/null
+++ b/scripts/perftest/slab/mlAlgorithms/native/slabNativePCA.dml
@@ -0,0 +1,130 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# This script performs Principal Component Analysis (PCA) on the given input
data.
+#
+# INPUT PARAMETERS:
+#
---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+#
---------------------------------------------------------------------------------------------
+# INPUT String --- Location to read the matrix A of feature vectors
+# K Int --- Indicates dimension of the new vector space
constructed from eigen vectors
+# CENTER Int 0 Indicates whether or not to center data
+# SCALE Int 0 Indicates whether or not to scale data
+# OFMT String --- Output data format
+# PROJDATA Int 0 This argument indicates if the data should be
projected or not
+# MODEL String --- Location to already existing model: eigenvectors and
eigenvalues
+# OUTPUT String / Location to write output matrices (covariance matrix,
new basis vectors,
+# and data projected onto new basis vectors)
+# hadoop jar SystemML.jar -f PCA.dml -nvargs INPUT=INPUT_DIR/pca-1000x1000
+# OUTPUT=OUTPUT_DIR/pca-1000x1000-model PROJDATA=1 CENTER=1 SCALE=1
+#
---------------------------------------------------------------------------------------------
+
+# Generate data internally instead of reading from a file
+n = $1 # number of rows
+m = 100 # number of columns
+A = rand(rows=n, cols=m, min=0, max=1, sparsity=0.9, seed=42)
+
+
+
+sum_A = sum(A)
+for (ix in 1:5) {
+
+
+ K = ifdef($K, ncol(A))
+ ofmt = ifdef($OFMT, "CSV")
+ projectData = ifdef($PROJDATA, 1)
+ model = ifdef($MODEL, "")
+ center = ifdef($CENTER, 0)
+ scale = ifdef($SCALE, 0)
+ output = ifdef($OUTPUT, "/")
+
+ evec_dominant = matrix(0, cols=1, rows=1)
+
+ if (model != "") {
+ pass = 1.0
+ # reuse existing model to project data
+ #evec_dominant = read(model+"/dominant.eigen.vectors")
+ } else {
+ if (model == "") {
+ model = output
+ }
+
+ N = nrow(A)
+ D = ncol(A)
+
+ # perform z-scoring (centering and scaling)
+ if (center == 1) {
+ cm = colMeans(A)
+ A = A - cm
+ }
+ if (scale == 1) {
+ cvars = colSums(A^2)
+ if (center == 1) {
+ cm = colMeans(A)
+ cvars = (cvars - N * (cm^2)) / (N - 1)
+ }
+ Azscored = A / sqrt(cvars)
+ A = Azscored
+ }
+
+ # co-variance matrix
+ mu = colSums(A) / N
+ C = (t(A) %*% A) / (N - 1) - (N / (N - 1)) * t(mu) %*% mu
+
+ # compute eigen vectors and values
+ [evalues, evectors] = eigen(C)
+
+ decreasing_Idx = order(target=evalues, by=1, decreasing=TRUE,
index.return=TRUE)
+ diagmat = table(seq(1, D), decreasing_Idx)
+ # sorts eigenvalues by decreasing order
+ evalues = diagmat %*% evalues
+ # sorts eigenvectors column-wise in the order of decreasing eigenvalues
+ evectors = evectors %*% diagmat
+
+ # select K dominant eigen vectors
+ nvec = ncol(evectors)
+
+ eval_dominant = evalues[1:K, 1]
+ evec_dominant = evectors[, 1:K]
+
+ # the square root of eigenvalues
+ eval_stdev_dominant = sqrt(eval_dominant)
+
+ #write(eval_stdev_dominant,
model+"/dominant.eigen.standard.deviations", format=ofmt)
+ #write(eval_dominant, model+"/dominant.eigen.values", format=ofmt)
+ #write(evec_dominant, model+"/dominant.eigen.vectors", format=ofmt)
+ }
+ if (projectData == 1 | model != "") {
+ # Construct new data set by treating computed dominant eigenvectors as
the basis vectors
+ newA = A %*% evec_dominant
+ sum_newA = sum(newA)
+ if (sum_newA > 0) {
+ print(sum_newA)
+ }
+ #write(newA, output+"/projected.data", format=ofmt)
+ }
+
+
+}
+
+
diff --git
a/scripts/perftest/slab/mlAlgorithms/single_node_dense/run_single_node_dense_ml.sh
b/scripts/perftest/slab/mlAlgorithms/single_node_dense/run_single_node_dense_ml.sh
new file mode 100755
index 0000000000..0d527da0d2
--- /dev/null
+++
b/scripts/perftest/slab/mlAlgorithms/single_node_dense/run_single_node_dense_ml.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+
+# Set up the output directory
+OUTPUT_DIR="output"
+mkdir -p $OUTPUT_DIR
+
+# Define row numbers for the DML scripts
+ROW_NUMBERS=("1000" "10000" "100000" "1000000")
+
+# Define DML files and corresponding output files
+DML_FILES=("slabLogisticRegression.dml"
"slabOrdinaryLeastSquaresRegression.dml"
"slabHeteroscedasticityRobustStandardErrors.dml"
"slabNonNegativeMatrixFactorization.dml" "slabPCA.dml")
+OUTPUT_FILES=("slabLogisticRegression_stats.txt"
"slabOrdinaryLeastSquaresRegression_stats.txt"
"slabHeteroscedasticityRobustStandardErrors_stats.txt"
"slabNonNegativeMatrixFactorization_stats.txt" "slabPCA_stats.txt")
+
+# Function to run DML script and handle errors
+run_dml() {
+ local DML_FILE=$1
+ local ARGS=$2
+ local OUTPUT_FILE=$3
+
+ # Run the DML script with -stats flag and capture the output
+ TEMP_FILE=$(mktemp)
+ if systemds $DML_FILE -args $ARGS -stats > $TEMP_FILE 2>&1; then
+ # Write the number of rows and SystemDS Statistics section to the output
file
+ echo "Number of rows: $ARGS" >> $OUTPUT_FILE
+ awk '/SystemDS Statistics:/{flag=1}flag' $TEMP_FILE >> $OUTPUT_FILE
+ else
+ echo "An error occurred while executing ${DML_FILE} with arguments
${ARGS}. Check ${TEMP_FILE} for details." >> $OUTPUT_FILE
+ fi
+ echo -e "\n\n\n\n" >> $OUTPUT_FILE # Add empty lines for separation
+ rm $TEMP_FILE
+}
+
+# Iterate over each DML file
+for index in ${!DML_FILES[@]}; do
+ DML_FILE=${DML_FILES[$index]}
+ OUTPUT_FILE=${OUTPUT_DIR}/${OUTPUT_FILES[$index]}
+
+ # Clear the output file before writing
+ > $OUTPUT_FILE
+
+ # Iterate over each row number and execute the DML file
+ for ROW in ${ROW_NUMBERS[@]}; do
+ run_dml $DML_FILE $ROW $OUTPUT_FILE
+ echo "Execution of ${DML_FILE} with ${ROW} rows completed. Statistics
appended to ${OUTPUT_FILE}"
+ done
+done
diff --git
a/scripts/perftest/slab/mlAlgorithms/single_node_dense/slabHeteroscedasticityRobustStandardErrors.dml
b/scripts/perftest/slab/mlAlgorithms/single_node_dense/slabHeteroscedasticityRobustStandardErrors.dml
new file mode 100644
index 0000000000..4c012f2eef
--- /dev/null
+++
b/scripts/perftest/slab/mlAlgorithms/single_node_dense/slabHeteroscedasticityRobustStandardErrors.dml
@@ -0,0 +1,55 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+X = rand(rows=$1, cols=100)
+rvect = rand(rows=$1, cols=1, pdf='uniform')
+y = rvect > 0.80
+p = sum( X )
+q = sum( y )
+print(p)
+print(q)
+b = reg(X,y)
+y_hat = X %*% b
+r2 = (y - y_hat)^2
+
+for(ix in 1:5) {
+ tmp = robust_se(X, r2)
+ utils::printRandElements(tmp, 10)
+}
+
+reg = function(matrix[double] X, matrix[double] y)
+ return (matrix[double] b) {
+ b = solve(t(X) %*% X, t(X) %*% y)
+}
+
+
+robust_se = function(matrix[double] X,
+ matrix[double] r2)
+ return (matrix[double] se) {
+ # NOTE: SVD is cheap since XTX is small!
+ [U, H, V] = svd(t(X) %*% X)
+ h = diag(H)
+ XTX_INV = U %*% diag(h^-1) %*% t(V)
+ S = diag(r2)
+ se = XTX_INV %*% (t(X) %*% S %*% X) %*% XTX_INV
+}
diff --git
a/scripts/perftest/slab/mlAlgorithms/single_node_dense/slabLogisticRegression.dml
b/scripts/perftest/slab/mlAlgorithms/single_node_dense/slabLogisticRegression.dml
new file mode 100644
index 0000000000..a568f7a0b6
--- /dev/null
+++
b/scripts/perftest/slab/mlAlgorithms/single_node_dense/slabLogisticRegression.dml
@@ -0,0 +1,55 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+X = rand(rows=$1, cols=100)
+rvect = rand(rows=$1, cols=1, pdf='uniform')
+y = rvect > 0.80
+p = sum( X )
+q = sum( y )
+print(p)
+print(q)
+
+for(ix in 1:5){
+ tmp = logit(X, y, 10)
+ utils::printRandElements(tmp, 10)
+}
+
+logit = function(matrix[double] X,
+ matrix[double] y,
+ Integer iterations)
+ return (matrix[double] w) {
+
+ N = nrow(X)
+ w = matrix(0, rows=ncol(X), cols=1)
+ iteration = 0
+ stepSize = 10
+
+ while (iteration < iterations) {
+ xb = X %*% w
+ delta = 1/(1+exp(-xb)) - y
+ stepSize = stepSize / 2
+ w = w - ((stepSize * t(X) %*% delta)/N)
+
+ iteration = iteration + 1
+ }
+}
diff --git
a/scripts/perftest/slab/mlAlgorithms/single_node_dense/slabNonNegativeMatrixFactorization.dml
b/scripts/perftest/slab/mlAlgorithms/single_node_dense/slabNonNegativeMatrixFactorization.dml
new file mode 100644
index 0000000000..2c32cb9081
--- /dev/null
+++
b/scripts/perftest/slab/mlAlgorithms/single_node_dense/slabNonNegativeMatrixFactorization.dml
@@ -0,0 +1,54 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+X = rand(rows=$1, cols=100)
+rvect = rand(rows=1, cols=1)
+y = rvect > 0.80
+p = sum( X )
+q = sum( y )
+print(p)
+print(q)
+
+
+for(ix in 1:5){
+ tmp = gnmf(X, 10, 10)
+ print(tmp)
+}
+
+gnmf = function(matrix[double] X, Integer r, Integer iterations)
+ return (integer iteration) {
+
+ W = rand(rows = nrow(X), cols = r, pdf = 'uniform')
+ H = rand(rows = r, cols = ncol(X), pdf = 'uniform')
+
+ for (i in 1:3) {
+ W = W * ((X %*% t(H)) / (W %*% (H %*% t(H))))
+ H = H * ((t(W) %*% X) / ((t(W) %*% W) %*% H))
+ }
+ if ((as.scalar(W[1,1]) > 0) & (as.scalar(H[1,1]) > 0)) {
+ print(as.scalar(H[1,1]))
+ print(as.scalar(W[1,1]))
+ }
+
+ iteration = 0
+}
diff --git
a/scripts/perftest/slab/mlAlgorithms/single_node_dense/slabOrdinaryLeastSquaresRegression.dml
b/scripts/perftest/slab/mlAlgorithms/single_node_dense/slabOrdinaryLeastSquaresRegression.dml
new file mode 100644
index 0000000000..eee7aa7dc5
--- /dev/null
+++
b/scripts/perftest/slab/mlAlgorithms/single_node_dense/slabOrdinaryLeastSquaresRegression.dml
@@ -0,0 +1,40 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+X = rand(rows=$1, cols=100)
+rvect = rand(rows=$1, cols=1, pdf='uniform')
+y = rvect > 0.80
+p = sum( X )
+q = sum( y )
+print(p)
+print(q)
+
+for(ix in 1:5){
+ tmp = reg(X, y)
+ utils::printRandElements(tmp, 10)
+}
+
+reg = function(matrix[double] X, matrix[double] y)
+ return (matrix[double] b) {
+ b = solve(t(X) %*% X, t(X) %*% y)
+}
diff --git a/scripts/perftest/slab/mlAlgorithms/single_node_dense/slabPCA.dml
b/scripts/perftest/slab/mlAlgorithms/single_node_dense/slabPCA.dml
new file mode 100644
index 0000000000..7058073409
--- /dev/null
+++ b/scripts/perftest/slab/mlAlgorithms/single_node_dense/slabPCA.dml
@@ -0,0 +1,56 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+X = rand(rows=$1, cols=100)
+rvect = rand(rows=$1, cols=1, pdf='uniform')
+y = rvect > 0.80
+p = sum( X )
+q = sum( y )
+print(p)
+print(q)
+
+for(ix in 1:5){
+ tmp = pca(X, 5)
+ utils::printRandElements(tmp, 10)
+}
+
+pca = function(matrix[double] X, Integer k)
+ return (matrix[double] PRJ) {
+ N = nrow(X)
+ K = ncol(X)
+ XS = X - colMeans(X)
+ S = (1/(N-1)) * (t(XS) %*% XS)
+ [eigvals, eigvects] = eigen(S)
+
+ # Thanks to the Sysml implementation for this helpful bit
+ # of code to sort the eigenvectors
+ eigssorted = order(target=eigvals, by=1,
+ decreasing=TRUE,
+ index.return=TRUE)
+ diagmat = table(seq(1, K), eigssorted)
+ eigvals = diagmat %*% eigvals
+ eigvects = eigvects %*% diagmat
+ eigvects = eigvects[, 1:k]
+
+ PRJ = XS %*% eigvects
+}
diff --git
a/scripts/perftest/slab/operators/distributed_sparse/run_distributed_matrix_sparse.sh
b/scripts/perftest/slab/operators/distributed_sparse/run_distributed_matrix_sparse.sh
new file mode 100755
index 0000000000..acc6397d9c
--- /dev/null
+++
b/scripts/perftest/slab/operators/distributed_sparse/run_distributed_matrix_sparse.sh
@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Ensure script is run from the 'single_node_dense' directory
+#if [ "$(basename $PWD)" != "single_node_dense" ]; then
+# echo "Please execute scripts from directory 'single_node_dense'"
+# exit 1
+#fi
+
+
+# Set up the output directory
+OUTPUT_DIR="output"
+mkdir -p $OUTPUT_DIR
+
+# Define datasets
+SPARSITY=("0_0001" "0_001" "0_01" "0_1")
+SHAPES=("tall" "wide")
+
+# Define DML files and corresponding output files
+DML_FILES=("slabFrobeniusNormSparse.dml" "slabGramMatrixSparse.dml"
"slabMatrixAdditionSparse.dml" "slabMatrixMultSparse.dml"
"slabMatrixVectorMultSparse.dml" "slabTransposeSparse.dml")
+OUTPUT_FILES=("slabFrobeniusNormSparse_stats.txt"
"slabGramMatrixSparse_stats.txt" "slabMatrixAdditionSparse_stats.txt"
"slabMatrixMultSparse_stats.txt" "slabMatrixVectorMultSparse_stats.txt"
"slabTransposeSparse_stats.txt")
+
+# Base path to datasets
+DATASET_PATH="../../../../../src/test/resources/datasets/slab/sparse"
+
+# Iterate over each DML file
+for index in ${!DML_FILES[@]}; do
+ DML_FILE=${DML_FILES[$index]}
+ OUTPUT_FILE=${OUTPUT_DIR}/${OUTPUT_FILES[$index]}
+
+ # Clear the output file before writing
+ > $OUTPUT_FILE
+
+ # Special handling for slabMatrixMultSparse.dml
+ if [ "$DML_FILE" == "slabMatrixMultSparse.dml" ]; then
+ for SPARSE in ${SPARSITY[@]}; do
+ for SHAPE in ${SHAPES[@]}; do
+ if [ "$SHAPE" == "tall" ]; then
+ CSV_FILE1="${DATASET_PATH}/M_sparsity_${SPARSE}_tall.csv"
+ CSV_FILE2="${DATASET_PATH}/M_sparsity_${SPARSE}_wide.csv"
+ else
+ CSV_FILE1="${DATASET_PATH}/M_sparsity_${SPARSE}_wide.csv"
+ CSV_FILE2="${DATASET_PATH}/M_sparsity_${SPARSE}_tall.csv"
+ fi
+
+ # Run the DML script with -stats flag and capture the output
+ TEMP_FILE=$(mktemp)
+ systemds $DML_FILE -exec spark -args $CSV_FILE1 $CSV_FILE2 -stats >
$TEMP_FILE 2>&1
+
+ # Write the sparsity and shape and SystemDS Statistics section to the
output file
+ echo "Sparsity: ${SPARSE//_/\.}, Shape: $SHAPE" >> $OUTPUT_FILE
+ awk '/SystemDS Statistics:/{flag=1}flag' $TEMP_FILE >> $OUTPUT_FILE
+ echo -e "\n\n\n\n" >> $OUTPUT_FILE # Add empty lines for separation
+
+ # Clean up temporary file
+ rm $TEMP_FILE
+
+ echo "Execution of ${DML_FILE} with ${CSV_FILE1} and ${CSV_FILE2}
completed. Statistics appended to ${OUTPUT_FILE}"
+ done
+ done
+ else
+ # Handling for other DML files
+ for SPARSE in ${SPARSITY[@]}; do
+ for SHAPE in ${SHAPES[@]}; do
+ CSV_FILE="${DATASET_PATH}/M_sparsity_${SPARSE}_${SHAPE}.csv"
+
+ # Run the DML script with -stats flag and capture the output
+ TEMP_FILE=$(mktemp)
+ systemds $DML_FILE -exec spark -args $CSV_FILE -stats > $TEMP_FILE 2>&1
+
+ # Write the sparsity and shape and SystemDS Statistics section to the
output file
+ echo "Sparsity: ${SPARSE//_/\.}, Shape: $SHAPE" >> $OUTPUT_FILE
+ awk '/SystemDS Statistics:/{flag=1}flag' $TEMP_FILE >> $OUTPUT_FILE
+ echo -e "\n\n\n\n" >> $OUTPUT_FILE # Add empty lines for separation
+
+ # Clean up temporary file
+ rm $TEMP_FILE
+
+ echo "Execution of ${DML_FILE} with ${CSV_FILE} completed. Statistics
appended to ${OUTPUT_FILE}"
+ done
+ done
+ fi
+done
diff --git
a/scripts/perftest/slab/operators/distributed_sparse/slabFrobeniusNormSparse.dml
b/scripts/perftest/slab/operators/distributed_sparse/slabFrobeniusNormSparse.dml
new file mode 100644
index 0000000000..ddd9d15cc5
--- /dev/null
+++
b/scripts/perftest/slab/operators/distributed_sparse/slabFrobeniusNormSparse.dml
@@ -0,0 +1,35 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+#dataPath =
"../../../../../src/test/resources/datasets/slab/sparse/M_sparsity_0_1_tall.csv"
+dataPath = $1
+M = read(dataPath, format="csv")
+K = sum( M )
+print(K)
+
+for (ix in 1:5) {
+ R = sqrt(sum(M^2))
+ if(K > 0.0) {
+ print(R)
+ }
+}
diff --git
a/scripts/perftest/slab/operators/distributed_sparse/slabGramMatrixSparse.dml
b/scripts/perftest/slab/operators/distributed_sparse/slabGramMatrixSparse.dml
new file mode 100644
index 0000000000..21571aa8b4
--- /dev/null
+++
b/scripts/perftest/slab/operators/distributed_sparse/slabGramMatrixSparse.dml
@@ -0,0 +1,35 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+dataPath = $1
+
+M = read(dataPath, format="csv")
+K = sum( M )
+print(K)
+
+for (ix in 1:5) {
+ R = t(M) %*% M
+ if(K > 0.0) {
+ utils::printRandElements(R, 10)
+ }
+}
diff --git
a/scripts/perftest/slab/operators/distributed_sparse/slabMatrixAdditionSparse.dml
b/scripts/perftest/slab/operators/distributed_sparse/slabMatrixAdditionSparse.dml
new file mode 100644
index 0000000000..d141cd6ea8
--- /dev/null
+++
b/scripts/perftest/slab/operators/distributed_sparse/slabMatrixAdditionSparse.dml
@@ -0,0 +1,37 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+dataPath = $1
+M = read(dataPath, format="csv")
+K = sum( M )
+print(K)
+N = read(dataPath, format="csv")
+print(sum(N))
+
+for (ix in 1:5) {
+ R = M + N
+ if(K > 0.0) {
+ utils::printRandElements(R, 10)
+ }
+}
+
diff --git
a/scripts/perftest/slab/operators/distributed_sparse/slabMatrixMultSparse.dml
b/scripts/perftest/slab/operators/distributed_sparse/slabMatrixMultSparse.dml
new file mode 100644
index 0000000000..32f9a3eff7
--- /dev/null
+++
b/scripts/perftest/slab/operators/distributed_sparse/slabMatrixMultSparse.dml
@@ -0,0 +1,37 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+dataPathM = $1
+dataPathN = $2
+M = read(dataPathM, format="csv")
+K = sum( M )
+print(K)
+N = read(dataPathN, format="csv")
+print(sum(N))
+
+for (ix in 1:5) {
+ R = M %*% N
+ if(K > 0.0) {
+ utils::printRandElements(R, 10)
+ }
+}
diff --git
a/scripts/perftest/slab/operators/distributed_sparse/slabMatrixVectorMultSparse.dml
b/scripts/perftest/slab/operators/distributed_sparse/slabMatrixVectorMultSparse.dml
new file mode 100644
index 0000000000..205158368f
--- /dev/null
+++
b/scripts/perftest/slab/operators/distributed_sparse/slabMatrixVectorMultSparse.dml
@@ -0,0 +1,36 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+dataPath = $1
+M = read(dataPath, format="csv")
+K = sum( M )
+print(K)
+w = rand(rows=ncol(M), cols=1)
+print(sum(w))
+
+for (ix in 1:5) {
+ R = M %*% w
+ if(K > 0.0) {
+ utils::printRandElements(R, 10)
+ }
+}
diff --git
a/scripts/perftest/slab/operators/distributed_sparse/slabTransposeSparse.dml
b/scripts/perftest/slab/operators/distributed_sparse/slabTransposeSparse.dml
new file mode 100644
index 0000000000..ad0970047b
--- /dev/null
+++ b/scripts/perftest/slab/operators/distributed_sparse/slabTransposeSparse.dml
@@ -0,0 +1,34 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+dataPath = $1
+M = read(dataPath, format="csv")
+K = sum( M )
+print(K)
+
+for (ix in 1:5) {
+ R = t(M)
+ if(K > 0.0) {
+ utils::printRandElements(R, 10)
+ }
+}
diff --git
a/scripts/perftest/slab/operators/single_node_dense/run_single_node_matrix_dense.sh
b/scripts/perftest/slab/operators/single_node_dense/run_single_node_matrix_dense.sh
new file mode 100755
index 0000000000..27e4475961
--- /dev/null
+++
b/scripts/perftest/slab/operators/single_node_dense/run_single_node_matrix_dense.sh
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Ensure script is run from the 'single_node_dense' directory
+if [ "$(basename $PWD)" != "single_node_dense" ]; then
+ echo "Please execute scripts from directory 'single_node_dense'"
+ exit 1
+fi
+
+
+# Set up the output directory
+OUTPUT_DIR="output"
+mkdir -p $OUTPUT_DIR
+
+# List of row numbers for testing
+ROW_NUMBERS=("2500000" "5000000" "10000000" "20000000")
+
+# List of DML files and corresponding output files
+DML_FILES=("slabFrobeniusNorm.dml" "slabGramMatrix.dml"
"slabMatrixAddition.dml" "slabMatrixMult.dml" "slabMatrixVectorMult.dml"
"slabTranspose.dml")
+OUTPUT_FILES=("slabFrobeniusNorm_stats.txt" "slabGramMatrix_stats.txt"
"slabMatrixAddition_stats.txt" "slabMatrixMult_stats.txt"
"slabMatrixVectorMult_stats.txt" "slabTranspose_stats.txt")
+
+# Iterate over each DML file and execute it with different row numbers
+for index in ${!DML_FILES[@]}; do
+ DML_FILE=${DML_FILES[$index]}
+ OUTPUT_FILE=${OUTPUT_DIR}/${OUTPUT_FILES[$index]}
+
+ # Clear the output file before writing
+ > $OUTPUT_FILE
+
+ for ROW in ${ROW_NUMBERS[@]}; do
+ # Run the DML script with -stats flag and capture the output
+ TEMP_FILE=$(mktemp)
+ systemds $DML_FILE -args $ROW -stats > $TEMP_FILE 2>&1
+
+ # Write the number of rows and SystemDS Statistics section to the output
file
+ echo "Number of rows: $ROW" >> $OUTPUT_FILE
+ awk '/SystemDS Statistics:/{flag=1}flag' $TEMP_FILE >> $OUTPUT_FILE
+ echo -e "\n\n\n\n" >> $OUTPUT_FILE # Add empty lines for separation
+
+ # Clean up temporary file
+ rm $TEMP_FILE
+
+ echo "Execution of ${DML_FILE} with ${ROW} rows completed. Statistics
appended to ${OUTPUT_FILE}"
+ done
+done
diff --git
a/scripts/perftest/slab/operators/single_node_dense/slabFrobeniusNorm.dml
b/scripts/perftest/slab/operators/single_node_dense/slabFrobeniusNorm.dml
new file mode 100644
index 0000000000..acacd9a7b3
--- /dev/null
+++ b/scripts/perftest/slab/operators/single_node_dense/slabFrobeniusNorm.dml
@@ -0,0 +1,29 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+M = rand(rows = $1, cols = 100, pdf = 'uniform')
+k = sum(M)
+for (ix in 1:5) {
+ R = sqrt(sum(M^2))
+ print(R)
+}
diff --git
a/scripts/perftest/slab/operators/single_node_dense/slabGramMatrix.dml
b/scripts/perftest/slab/operators/single_node_dense/slabGramMatrix.dml
new file mode 100644
index 0000000000..7c3a061525
--- /dev/null
+++ b/scripts/perftest/slab/operators/single_node_dense/slabGramMatrix.dml
@@ -0,0 +1,29 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+M = rand(rows = $1, cols = 100, pdf = 'uniform')
+k = sum(M)
+for (ix in 1:5) {
+ R = t(M) %*% M
+ utils::printRandElements(R,10)
+}
diff --git
a/scripts/perftest/slab/operators/single_node_dense/slabMatrixAddition.dml
b/scripts/perftest/slab/operators/single_node_dense/slabMatrixAddition.dml
new file mode 100644
index 0000000000..1d6489ec44
--- /dev/null
+++ b/scripts/perftest/slab/operators/single_node_dense/slabMatrixAddition.dml
@@ -0,0 +1,34 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+source("../../slabUtils.dml") as utils
+
+M = rand(rows = $1, cols = 100, pdf = 'uniform')
+X = rand(rows = $1, cols = 100, pdf = 'uniform')
+k = sum(M)
+
+for (ix in 1:5) {
+ R = M + X
+ utils::printRandElements(R,10)
+}
+
+
diff --git
a/scripts/perftest/slab/operators/single_node_dense/slabMatrixMult.dml
b/scripts/perftest/slab/operators/single_node_dense/slabMatrixMult.dml
new file mode 100644
index 0000000000..555caae4ed
--- /dev/null
+++ b/scripts/perftest/slab/operators/single_node_dense/slabMatrixMult.dml
@@ -0,0 +1,32 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+source("../../slabUtils.dml") as utils
+
+M = rand(rows = $1, cols = 100, pdf = 'uniform')
+N = rand(rows = 100, cols = $1, pdf = 'uniform')
+k = sum(M)
+
+for (ix in 1:5) {
+ R = M %*% N
+ utils::printRandElements(R,10)
+}
diff --git
a/scripts/perftest/slab/operators/single_node_dense/slabMatrixVectorMult.dml
b/scripts/perftest/slab/operators/single_node_dense/slabMatrixVectorMult.dml
new file mode 100644
index 0000000000..e2cec6a7e7
--- /dev/null
+++ b/scripts/perftest/slab/operators/single_node_dense/slabMatrixVectorMult.dml
@@ -0,0 +1,33 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+
+M = rand(rows = $1, cols = 100, pdf = 'uniform')
+w = rand(rows = 100, cols = 1, pdf = 'uniform')
+k = sum(M)
+
+for (ix in 1:5) {
+ R = M %*% w
+ utils::printRandElements(R,10)
+}
+
diff --git
a/scripts/perftest/slab/operators/single_node_dense/slabTranspose.dml
b/scripts/perftest/slab/operators/single_node_dense/slabTranspose.dml
new file mode 100644
index 0000000000..add5c94578
--- /dev/null
+++ b/scripts/perftest/slab/operators/single_node_dense/slabTranspose.dml
@@ -0,0 +1,30 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../../slabUtils.dml") as utils
+
+M = rand(rows = $1, cols = 100, pdf = 'uniform')
+k = sum(M)
+
+for (ix in 1:5) {
+ R = t(M)
+ utils::printRandElements(R,10)
+}
diff --git a/scripts/perftest/slab/pipeline/run_slab_pipeline.sh
b/scripts/perftest/slab/pipeline/run_slab_pipeline.sh
new file mode 100755
index 0000000000..9de2ecf2f8
--- /dev/null
+++ b/scripts/perftest/slab/pipeline/run_slab_pipeline.sh
@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Ensure script is run from the 'pipeline' directory
+if [ "$(basename $PWD)" != "pipeline" ]; then
+ echo "Please execute scripts from directory 'pipeline'"
+ exit 1
+fi
+
+
+# Set up the output directory
+OUTPUT_DIR="output"
+mkdir -p $OUTPUT_DIR
+
+# Define row numbers for slabMultiplicationChain.dml
+ROW_NUMBERS=("1000" "10000" "100000" "1000000")
+MULTIPLICATION_CHAIN_FILE="slabMultiplicationChain.dml"
+MULTIPLICATION_CHAIN_OUTPUT="${OUTPUT_DIR}/slabMultiplicationChain_stats.txt"
+
+# Clear the output file before writing
+> $MULTIPLICATION_CHAIN_OUTPUT
+
+# Iterate over each row number and execute slabMultiplicationChain.dml
+for ROW in ${ROW_NUMBERS[@]}; do
+ TEMP_FILE=$(mktemp)
+ if systemds $MULTIPLICATION_CHAIN_FILE -exec spark -args $ROW -stats >
$TEMP_FILE 2>&1; then
+ echo "Number of rows: $ROW" >> $MULTIPLICATION_CHAIN_OUTPUT
+ awk '/SystemDS Statistics:/{flag=1}flag' $TEMP_FILE >>
$MULTIPLICATION_CHAIN_OUTPUT
+ else
+ echo "An error occurred while executing ${MULTIPLICATION_CHAIN_FILE} with
rows ${ROW}. Check ${TEMP_FILE} for details." >> $MULTIPLICATION_CHAIN_OUTPUT
+ fi
+ echo -e "\n\n\n\n" >> $MULTIPLICATION_CHAIN_OUTPUT # Add empty lines for
separation
+ rm $TEMP_FILE
+ echo "Execution of ${MULTIPLICATION_CHAIN_FILE} with ${ROW} rows completed.
Statistics appended to ${MULTIPLICATION_CHAIN_OUTPUT}"
+done
+
+# Define datasets for slabSVD.dml
+DATASET_PATH="../../../../src/test/resources/datasets/slab/dense"
+DATASETS=("M_dense_tall.csv" "M_dense_wide.csv")
+SVD_FILE="slabSVD.dml"
+SVD_OUTPUT="${OUTPUT_DIR}/slabSVD_stats.txt"
+
+# Clear the output file before writing
+> $SVD_OUTPUT
+
+# Iterate over each dataset and execute slabSVD.dml
+for DATASET in ${DATASETS[@]}; do
+ SHAPE=$(echo $DATASET | grep -oP '(tall|wide)')
+ TEMP_FILE=$(mktemp)
+ if systemds $SVD_FILE -exec spark -args ${DATASET_PATH}/${DATASET} -stats >
$TEMP_FILE 2>&1; then
+ echo "Shape: $SHAPE" >> $SVD_OUTPUT
+ awk '/SystemDS Statistics:/{flag=1}flag' $TEMP_FILE >> $SVD_OUTPUT
+ else
+ echo "An error occurred while executing ${SVD_FILE} with dataset
${DATASET}. Check ${TEMP_FILE} for details." >> $SVD_OUTPUT
+ fi
+ echo -e "\n\n\n\n" >> $SVD_OUTPUT # Add empty lines for separation
+ rm $TEMP_FILE
+ echo "Execution of ${SVD_FILE} with dataset ${DATASET} completed. Statistics
appended to ${SVD_OUTPUT}"
+done
diff --git a/scripts/perftest/slab/pipeline/slabMultiplicationChain.dml
b/scripts/perftest/slab/pipeline/slabMultiplicationChain.dml
new file mode 100644
index 0000000000..5da116bbce
--- /dev/null
+++ b/scripts/perftest/slab/pipeline/slabMultiplicationChain.dml
@@ -0,0 +1,38 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../slabUtils.dml") as utils
+
+t = rand(rows=$1, cols=1, pdf = 'uniform')
+u = rand(rows=1, cols=$1, pdf = 'uniform')
+v = rand(rows=$1, cols=1, pdf = 'uniform')
+
+q = sum(t)
+r = sum(u)
+s = sum(v)
+
+for(ix in 1:5) {
+ res = t %*% u %*% v
+ if ((q != 0) & (r != 0) & (s != 0)) {
+ print(as.scalar(res[1,1]))
+ }
+}
+
diff --git a/scripts/perftest/slab/pipeline/slabSVD.dml
b/scripts/perftest/slab/pipeline/slabSVD.dml
new file mode 100644
index 0000000000..36b262c57b
--- /dev/null
+++ b/scripts/perftest/slab/pipeline/slabSVD.dml
@@ -0,0 +1,38 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("../slabUtils.dml") as utils
+
+dataPath = $1
+M = read(dataPath, format="csv")
+K = sum(M)
+print(K)
+
+for(ix in 1:5){
+ [U,D,V] = svd( M )
+ if(K > 0){
+ print(as.scalar(U[1,1]))
+ print(as.scalar(D[1,1]))
+ print(as.scalar(V[1,1]))
+ }
+}
+
+
diff --git a/scripts/perftest/slab/slabUtils.dml
b/scripts/perftest/slab/slabUtils.dml
new file mode 100644
index 0000000000..5f1659efc1
--- /dev/null
+++ b/scripts/perftest/slab/slabUtils.dml
@@ -0,0 +1,49 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+genClusters = function(Integer nClust,
+ Integer nObs,
+ Integer nCol,
+ Double sd,
+ Double sep) return (matrix[double] X) {
+ print("Hi from utils")
+ X = sample(nClust, nObs, TRUE)%*%matrix(sep, rows = 1, cols = nCol)
+ X = X + rand(rows = nObs, cols = nCol, pdf = 'normal')
+ print(sum(X))
+}
+
+allocMatrix = function(Integer rows, Integer cols) return (matrix[double] X) {
+ X = rand(rows = rows, cols = cols, pdf = 'uniform')
+ print(sum(X))
+}
+
+printRandElements = function(matrix[double] M, Integer numel) {
+ for (ix in 1:numel) {
+ r = rand(rows=1,cols=1,min=1,max=nrow(M),pdf="uniform")
+ row = as.integer(as.scalar(r))
+
+ c = rand(rows=1,cols=1,min=1,max=ncol(M),pdf="uniform")
+ col = as.integer(as.scalar(c))
+ tmp = M[row,col]
+ print(as.scalar(tmp))
+ }
+}
diff --git a/src/main/java/org/apache/sysds/api/DMLScript.java
b/src/main/java/org/apache/sysds/api/DMLScript.java
index cd86426a42..cd70760ea7 100644
--- a/src/main/java/org/apache/sysds/api/DMLScript.java
+++ b/src/main/java/org/apache/sysds/api/DMLScript.java
@@ -156,7 +156,7 @@ public class DMLScript
// flag that indicates whether or not to suppress any prints to stdout
public static boolean _suppressPrint2Stdout = false;
//set default local spark configuration - used for local testing
- public static boolean USE_LOCAL_SPARK_CONFIG = false;
+ public static boolean USE_LOCAL_SPARK_CONFIG = false;
public static boolean _activeAM = false;
/**
* If true, allow DMLProgram to be generated while not halting due to
validation errors/warnings