This is an automated email from the ASF dual-hosted git repository.
pingsutw pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/submarine.git
The following commit(s) were added to refs/heads/master by this push:
new 8b3e463 SUBMARINE-1164. Update example python code
8b3e463 is described below
commit 8b3e46312dbac627bde98a9d859a36bc516285e2
Author: jeff-901 <[email protected]>
AuthorDate: Sat Jan 1 21:31:06 2022 +0800
SUBMARINE-1164. Update example python code
### What is this PR for?
Change ModelsClient in example python code to Submarine Client because
mlflow package will be removed.
Also, some bugs fixed:
1. The bug of chart in experiment info UI.
2. The bug of default timestamp value for log_metric.
3. Incorrect image name of post.sh under quickstart.
### What type of PR is it?
Refactoring
### Todos
### What is the Jira issue?
https://issues.apache.org/jira/browse/SUBMARINE-1164
### How should this be tested?
### Screenshots (if appropriate)
### Questions:
* Do the license files need updating? No
* Are there breaking changes for older versions? No
* Does this need new documentation? No
Author: jeff-901 <[email protected]>
Signed-off-by: Kevin <[email protected]>
Closes #856 from jeff-901/SUBMARINE-1164 and squashes the following commits:
dc63b578 [jeff-901] checkstyle
de559f40 [jeff-901] update example
---
.../mnist-pytorch/DDP/mnist_distributed.py | 22 +++++-----
.../MirroredStrategy/mnist_keras_distributed.py | 26 +++++------
.../mnist_keras_distributed.py | 16 +++----
.../mnist_keras_distributed.py | 16 +++----
dev-support/examples/nn-pytorch/model.py | 7 +--
dev-support/examples/nn-pytorch/readme.md | 51 ----------------------
dev-support/examples/quickstart/post.sh | 2 +-
dev-support/examples/quickstart/train.py | 9 ++--
dev-support/examples/tracking/readme.md | 2 +-
dev-support/examples/tracking/tracking.py | 14 +++---
.../pysubmarine/submarine/tracking/client.py | 6 +--
.../pysubmarine/submarine/tracking/fluent.py | 13 +++++-
.../experiment-info/charts/charts.component.ts | 4 +-
13 files changed, 67 insertions(+), 121 deletions(-)
diff --git a/dev-support/examples/mnist-pytorch/DDP/mnist_distributed.py
b/dev-support/examples/mnist-pytorch/DDP/mnist_distributed.py
index 76161bc..110f314 100644
--- a/dev-support/examples/mnist-pytorch/DDP/mnist_distributed.py
+++ b/dev-support/examples/mnist-pytorch/DDP/mnist_distributed.py
@@ -28,7 +28,7 @@ import torch.optim as optim
from tensorboardX import SummaryWriter
from torchvision import datasets, transforms
-from submarine import ModelsClient
+import submarine
WORLD_SIZE = int(os.environ.get("WORLD_SIZE", 1))
rank = int(os.environ.get("RANK", 0))
@@ -55,7 +55,7 @@ class Net(nn.Module):
return F.log_softmax(x, dim=1)
-def train(args, model, device, train_loader, optimizer, epoch, writer,
periscope):
+def train(args, model, device, train_loader, optimizer, epoch, writer):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
@@ -76,10 +76,10 @@ def train(args, model, device, train_loader, optimizer,
epoch, writer, periscope
)
niter = epoch * len(train_loader) + batch_idx
writer.add_scalar("loss", loss.item(), niter)
- periscope.log_metric("loss", loss.item(), niter)
+ submarine.log_metric("loss", loss.item(), niter)
-def test(args, model, device, test_loader, writer, epoch, periscope):
+def test(args, model, device, test_loader, writer, epoch):
model.eval()
test_loss = 0
correct = 0
@@ -94,7 +94,7 @@ def test(args, model, device, test_loader, writer, epoch,
periscope):
test_loss /= len(test_loader.dataset)
print("\naccuracy={:.4f}\n".format(float(correct) /
len(test_loader.dataset)))
writer.add_scalar("accuracy", float(correct) / len(test_loader.dataset),
epoch)
- periscope.log_metric("accuracy", float(correct) /
len(test_loader.dataset), epoch)
+ submarine.log_metric("accuracy", float(correct) /
len(test_loader.dataset), epoch)
def should_distribute():
@@ -219,13 +219,11 @@ if __name__ == "__main__":
model = Distributor(model)
optimizer = optim.SGD(model.parameters(), lr=args.lr,
momentum=args.momentum)
- periscope = ModelsClient()
- with periscope.start() as run:
- periscope.log_param("learning_rate", args.lr)
- periscope.log_param("batch_size", args.batch_size)
- for epoch in range(1, args.epochs + 1):
- train(args, model, device, train_loader, optimizer, epoch, writer,
periscope)
- test(args, model, device, test_loader, writer, epoch, periscope)
+ submarine.log_param("learning_rate", args.lr)
+ submarine.log_param("batch_size", args.batch_size)
+ for epoch in range(1, args.epochs + 1):
+ train(args, model, device, train_loader, optimizer, epoch, writer)
+ test(args, model, device, test_loader, writer, epoch)
if args.save_model:
torch.save(model.state_dict(), "mnist_cnn.pt")
diff --git
a/dev-support/examples/mnist-tensorflow/MirroredStrategy/mnist_keras_distributed.py
b/dev-support/examples/mnist-tensorflow/MirroredStrategy/mnist_keras_distributed.py
index 7cc71b4..eabf9bd 100644
---
a/dev-support/examples/mnist-tensorflow/MirroredStrategy/mnist_keras_distributed.py
+++
b/dev-support/examples/mnist-tensorflow/MirroredStrategy/mnist_keras_distributed.py
@@ -20,7 +20,7 @@ import os
import tensorflow as tf
import tensorflow_datasets as tfds
-from submarine import ModelsClient
+import submarine
datasets, info = tfds.load(name="mnist", with_info=True, as_supervised=True)
mnist_train, mnist_test = datasets["train"], datasets["test"]
@@ -89,7 +89,7 @@ def decay(epoch):
class PrintLR(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs=None):
print("\nLearning rate for epoch {} is {}".format(epoch + 1,
model.optimizer.lr.numpy()))
- modelClient.log_metric("lr", model.optimizer.lr.numpy())
+ submarine.log_metric("lr", model.optimizer.lr.numpy())
# Put all the callbacks together.
@@ -101,18 +101,16 @@ callbacks = [
]
if __name__ == "__main__":
- modelClient = ModelsClient()
- with modelClient.start() as run:
- EPOCHS = 5
- hist = model.fit(train_dataset, epochs=EPOCHS, callbacks=callbacks)
- for i in range(EPOCHS):
- modelClient.log_metric("val_loss", hist.history["loss"][i])
- modelClient.log_metric("Val_accuracy", hist.history["accuracy"][i])
- model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
- eval_loss, eval_acc = model.evaluate(eval_dataset)
- print("Eval loss: {}, Eval accuracy: {}".format(eval_loss, eval_acc))
- modelClient.log_param("loss", eval_loss)
- modelClient.log_param("acc", eval_acc)
+ EPOCHS = 5
+ hist = model.fit(train_dataset, epochs=EPOCHS, callbacks=callbacks)
+ for i in range(EPOCHS):
+ submarine.log_metric("val_loss", hist.history["loss"][i], i)
+ submarine.log_metric("Val_accuracy", hist.history["accuracy"][i], i)
+ model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
+ eval_loss, eval_acc = model.evaluate(eval_dataset)
+ print("Eval loss: {}, Eval accuracy: {}".format(eval_loss, eval_acc))
+ submarine.log_param("loss", eval_loss)
+ submarine.log_param("acc", eval_acc)
"""Reference:
https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy
diff --git
a/dev-support/examples/mnist-tensorflow/MultiWorkerMirroredStrategy/mnist_keras_distributed.py
b/dev-support/examples/mnist-tensorflow/MultiWorkerMirroredStrategy/mnist_keras_distributed.py
index 21df78b..d535377 100644
---
a/dev-support/examples/mnist-tensorflow/MultiWorkerMirroredStrategy/mnist_keras_distributed.py
+++
b/dev-support/examples/mnist-tensorflow/MultiWorkerMirroredStrategy/mnist_keras_distributed.py
@@ -20,7 +20,7 @@ import os
import tensorflow as tf
import tensorflow_datasets as tfds
-from submarine import ModelsClient
+import submarine
BUFFER_SIZE = 10000
BATCH_SIZE = 32
@@ -93,15 +93,11 @@ with strategy.scope():
# attention: x=train_datasets_no_auto_shard , not x = train_datasets
if __name__ == "__main__":
- modelClient = ModelsClient()
- with modelClient.start() as run:
- EPOCHS = 5
- hist = multi_worker_model.fit(
- x=train_datasets_no_auto_shard, epochs=EPOCHS, steps_per_epoch=5
- )
- for i in range(EPOCHS):
- modelClient.log_metric("val_loss", hist.history["loss"][i])
- modelClient.log_metric("Val_accuracy", hist.history["accuracy"][i])
+ EPOCHS = 5
+ hist = multi_worker_model.fit(x=train_datasets_no_auto_shard,
epochs=EPOCHS, steps_per_epoch=5)
+ for i in range(EPOCHS):
+ submarine.log_metric("val_loss", hist.history["loss"][i], i)
+ submarine.log_metric("Val_accuracy", hist.history["accuracy"][i], i)
"""Reference
diff --git
a/dev-support/examples/mnist-tensorflow/ParameterServerStrategy/mnist_keras_distributed.py
b/dev-support/examples/mnist-tensorflow/ParameterServerStrategy/mnist_keras_distributed.py
index b65a584..d1e14db 100644
---
a/dev-support/examples/mnist-tensorflow/ParameterServerStrategy/mnist_keras_distributed.py
+++
b/dev-support/examples/mnist-tensorflow/ParameterServerStrategy/mnist_keras_distributed.py
@@ -19,7 +19,7 @@ import os
import tensorflow as tf
-from submarine import ModelsClient
+import submarine
print(tf.__version__)
@@ -74,14 +74,12 @@ checkpoint_dir = "./training_checkpoints"
model.fit(dc, epochs=5, steps_per_epoch=20, callbacks=callbacks)
if __name__ == "__main__":
- modelClient = ModelsClient()
- with modelClient.start() as run:
- EPOCHS = 5
- hist = model.fit(dc, epochs=EPOCHS, steps_per_epoch=20,
callbacks=callbacks)
- for i in range(EPOCHS):
- modelClient.log_metric("val_loss", hist.history["loss"][i])
- modelClient.log_metric("Val_accuracy", hist.history["accuracy"][i])
- model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
+ EPOCHS = 5
+ hist = model.fit(dc, epochs=EPOCHS, steps_per_epoch=20,
callbacks=callbacks)
+ for i in range(EPOCHS):
+ submarine.log_metric("val_loss", hist.history["loss"][i], i)
+ submarine.log_metric("Val_accuracy", hist.history["accuracy"][i], i)
+ model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
"""
Reference:
diff --git a/dev-support/examples/nn-pytorch/model.py
b/dev-support/examples/nn-pytorch/model.py
index 48558a3..f236281 100644
--- a/dev-support/examples/nn-pytorch/model.py
+++ b/dev-support/examples/nn-pytorch/model.py
@@ -16,7 +16,7 @@
"""
import torch
-from submarine import ModelsClient
+import submarine
class LinearNNModel(torch.nn.Module):
@@ -30,11 +30,12 @@ class LinearNNModel(torch.nn.Module):
if __name__ == "__main__":
- client = ModelsClient()
net = LinearNNModel()
- client.save_model(
+ submarine.save_model(
model_type="pytorch",
model=net,
artifact_path="pytorch-nn-model",
registered_model_name="simple-nn-model",
+ input_dim=[2],
+ output_dim=[1],
)
diff --git a/dev-support/examples/nn-pytorch/readme.md
b/dev-support/examples/nn-pytorch/readme.md
index ea999fc..5e64177 100644
--- a/dev-support/examples/nn-pytorch/readme.md
+++ b/dev-support/examples/nn-pytorch/readme.md
@@ -17,54 +17,3 @@ This is an easy example of saving a pytorch linear model to
model registry.
```bash
./dev-support/examples/nn-pytorch/post.sh
```
-
-## Serve the model by Serve API
-
-1. Make sure the model is saved in the model registry (viewed on MLflow UI)
-2. Call serve API to create serve resource
-
-- Request
- ```
- curl -X POST -H "Content-Type: application/json" -d '
- {
- "modelName":"simple-nn-model",
- "modelVersion":"1",
- "namespace":"default"
- }
- ' http://127.0.0.1:32080/api/v1/experiment/serve
- ```
-- Response
- ```
- {
- "status": "OK",
- "code": 200,
- "success": true,
- "message": null,
- "result": {
- "url": "/serve/simple-nn-model-1"
- },
- "attributes": {}
- }
- ```
-
-3. Send data to inference
-
-- Request
- ```
- curl -d '{"data":[[-1, -1]]}' -H 'Content-Type: application/json;
format=pandas-split' -X POST
http://127.0.0.1:32080/serve/simple-nn-model-1/invocations
- ```
-- Response
- ```
- [{"0": -0.5663654804229736}]
- ```
-
-4. Call serve API to delete serve resource
-
-- Request
- ```
- curl -X DELETE
http://0.0.0.0:32080/api/v1/experiment/serve?modelName=simple-nn-model&modelVersion=1&namespace=default
- ```
-- Response
- ```
-
{"status":"OK","code":200,"success":true,"message":null,"result":{"url":"/serve/simple-nn-model-1"},"attributes":{}}
- ```
diff --git a/dev-support/examples/quickstart/post.sh
b/dev-support/examples/quickstart/post.sh
index d645939..62aa6cb 100755
--- a/dev-support/examples/quickstart/post.sh
+++ b/dev-support/examples/quickstart/post.sh
@@ -26,7 +26,7 @@ curl -X POST -H "Content-Type: application/json" -d '
}
},
"environment": {
- "image": "quickstart:0.7.0-SNAPSHOT"
+ "image": "apache/submarine:quickstart-0.7.0-SNAPSHOT"
},
"spec": {
"Worker": {
diff --git a/dev-support/examples/quickstart/train.py
b/dev-support/examples/quickstart/train.py
index c9476bb..2b4069a 100644
--- a/dev-support/examples/quickstart/train.py
+++ b/dev-support/examples/quickstart/train.py
@@ -20,7 +20,7 @@ import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers, models
-from submarine import ModelsClient
+import submarine
def make_datasets_unbatched():
@@ -77,12 +77,11 @@ def main():
def on_epoch_end(self, epoch, logs=None):
# monitor the loss and accuracy
print(logs)
- modelClient.log_metrics({"loss": logs["loss"], "accuracy":
logs["accuracy"]}, epoch)
+ submarine.log_metric("loss", logs["loss"], epoch)
+ submarine.log_metric("accuracy", logs["accuracy"], epoch)
- with modelClient.start():
- multi_worker_model.fit(ds_train, epochs=10, steps_per_epoch=70,
callbacks=[MyCallback()])
+ multi_worker_model.fit(ds_train, epochs=10, steps_per_epoch=70,
callbacks=[MyCallback()])
if __name__ == "__main__":
- modelClient = ModelsClient()
main()
diff --git a/dev-support/examples/tracking/readme.md
b/dev-support/examples/tracking/readme.md
index e452a16..10d3bec 100644
--- a/dev-support/examples/tracking/readme.md
+++ b/dev-support/examples/tracking/readme.md
@@ -5,7 +5,7 @@ This is an easy example of how to track the metric and
paramater in submarine-sd
For example, you start an experiment with 3 workers. Suppose the experiment is
assigned with an ID `experiment_12345678`, and the operator launches 3 pods
with worker_id `worker-0`, `worker-1` and `worker-2` respectively.
-The logging of `worker-i` will be directed to `experiment_12345678` /
`worker-i` in the mlflow server
+The logging of `worker-i` will be directed to `experiment_12345678` /
`worker-i` in the submarine server
## How to execute
diff --git a/dev-support/examples/tracking/tracking.py
b/dev-support/examples/tracking/tracking.py
index 4f84251..0bb8a43 100644
--- a/dev-support/examples/tracking/tracking.py
+++ b/dev-support/examples/tracking/tracking.py
@@ -18,13 +18,11 @@
import random
import time
-from submarine import ModelsClient
+import submarine
if __name__ == "__main__":
- modelClient = ModelsClient()
- with modelClient.start() as run:
- modelClient.log_param("learning_rate", random.random())
- for i in range(100):
- time.sleep(1)
- modelClient.log_metric("mse", random.random() * 100, i)
- modelClient.log_metric("acc", random.random(), i)
+ submarine.log_param("learning_rate", random.random())
+ for i in range(100):
+ time.sleep(1)
+ submarine.log_metric("mse", random.random() * 100, i)
+ submarine.log_metric("acc", random.random(), i)
diff --git a/submarine-sdk/pysubmarine/submarine/tracking/client.py
b/submarine-sdk/pysubmarine/submarine/tracking/client.py
index 25ed6b7..f563254 100644
--- a/submarine-sdk/pysubmarine/submarine/tracking/client.py
+++ b/submarine-sdk/pysubmarine/submarine/tracking/client.py
@@ -16,7 +16,7 @@ import json
import os
import re
import tempfile
-import time
+from datetime import datetime
from typing import Any, Dict
import submarine
@@ -61,7 +61,7 @@ class SubmarineClient(object):
key: str,
value: float,
worker_index: str,
- timestamp: int = None,
+ timestamp: datetime = None,
step: int = None,
) -> None:
"""
@@ -75,7 +75,7 @@ class SubmarineClient(object):
:param timestamp: Time when this metric was calculated. Defaults to
the current system time.
:param step: Training step (iteration) at which was the metric
calculated. Defaults to 0.
"""
- timestamp = timestamp if timestamp is not None else int(time.time())
+ timestamp = timestamp if timestamp is not None else datetime.now()
step = step if step is not None else 0
validate_metric(key, value, timestamp, step)
metric = Metric(key, value, worker_index, timestamp, step)
diff --git a/submarine-sdk/pysubmarine/submarine/tracking/fluent.py
b/submarine-sdk/pysubmarine/submarine/tracking/fluent.py
index aabe7ed..dce8cfe 100644
--- a/submarine-sdk/pysubmarine/submarine/tracking/fluent.py
+++ b/submarine-sdk/pysubmarine/submarine/tracking/fluent.py
@@ -54,7 +54,14 @@ def log_metric(key, value, step=None):
SubmarineClient().log_metric(job_id, key, value, worker_index,
datetime.now(), step or 0)
-def save_model(model_type: str, model, artifact_path: str,
registered_model_name: str = None):
+def save_model(
+ model_type: str,
+ model,
+ artifact_path: str,
+ registered_model_name: str = None,
+ input_dim: list = None,
+ output_dim: list = None,
+):
"""
Save a model into the minio pod.
:param model_type: The type of the model.
@@ -63,4 +70,6 @@ def save_model(model_type: str, model, artifact_path: str,
registered_model_name
:param registered_model_name: If none None, register model into the model
registry with
this name. If None, the model only be saved
in minio pod.
"""
- SubmarineClient().save_model(model_type, model, artifact_path,
registered_model_name)
+ SubmarineClient().save_model(
+ model_type, model, artifact_path, registered_model_name, input_dim,
output_dim
+ )
diff --git
a/submarine-workbench/workbench-web/src/app/pages/workbench/experiment/experiment-info/charts/charts.component.ts
b/submarine-workbench/workbench-web/src/app/pages/workbench/experiment/experiment-info/charts/charts.component.ts
index 6c0828b..ebd18aa 100644
---
a/submarine-workbench/workbench-web/src/app/pages/workbench/experiment/experiment-info/charts/charts.component.ts
+++
b/submarine-workbench/workbench-web/src/app/pages/workbench/experiment/experiment-info/charts/charts.component.ts
@@ -81,8 +81,8 @@ export class ChartsComponent implements OnInit {
metrics = [];
}
key = data.key;
- const d = new Date(0);
- d.setUTCMilliseconds(data.timestamp);
+ data.timestamp = data.timestamp.replace(" ", "T")
+ const d = new Date(data.timestamp);
const metric = { name: d, value: data.value };
metrics.push(metric);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]