This is an automated email from the ASF dual-hosted git repository.
liuxun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/submarine.git
The following commit(s) were added to refs/heads/master by this push:
new f9b5eff SUBMARINE-401. [SDK] Add Factorization Machines model
f9b5eff is described below
commit f9b5eff9879916a7d6fe985f00ddc5fd89b4e652
Author: pingsutw <[email protected]>
AuthorDate: Tue Mar 3 17:28:04 2020 +0800
SUBMARINE-401. [SDK] Add Factorization Machines model
### What is this PR for?
Add TensorFlow implementation of Factorization Machines model
### What type of PR is it?
[Feature]
### Todos
* [ ] - Task
### What is the Jira issue?
https://issues.apache.org/jira/browse/SUBMARINE-401
### How should this be tested?
https://travis-ci.org/pingsutw/hadoop-submarine/builds/659560320
### Screenshots (if appropriate)
### Questions:
* Does the licenses files need update? No
* Is there breaking changes for older versions? No
* Does this needs documentation? No
Author: pingsutw <[email protected]>
Closes #208 from pingsutw/SUBMARINE-401 and squashes the following commits:
2edabb5 [pingsutw] SUBMARINE-401. [SDK] Add Factorization Machines model
---
.../example/{deepfm => }/data/te.libsvm | 0
.../example/{deepfm => }/data/tr.libsvm | 0
.../example/{deepfm => }/data/va.libsvm | 0
submarine-sdk/pysubmarine/example/deepfm/README.md | 8 ++-
.../pysubmarine/example/deepfm/deepfm.json | 6 +-
.../pysubmarine/example/deepfm/run_deepfm.py | 18 +++---
.../pysubmarine/example/{deepfm => fm}/README.md | 32 ++++------
.../example/{deepfm/deepfm.json => fm/fm.json} | 10 +---
.../{deepfm/deepfm.json => fm/fm_distributed.json} | 14 ++---
.../example/{deepfm/run_deepfm.py => fm/run_fm.py} | 22 ++++---
.../pysubmarine/submarine/ml/model/__init__.py | 3 +-
.../pysubmarine/submarine/ml/model/deepfm.py | 63 ++-----------------
submarine-sdk/pysubmarine/submarine/ml/model/fm.py | 70 ++++++++++++++++++++++
.../pysubmarine/submarine/ml/optimizer.py | 45 ++++++++++++++
.../pysubmarine/submarine/utils/tf_utils.py | 67 ++++++++++++++++++++-
.../tests/ml/model/{test_deepfm.py => conftest.py} | 13 ++--
.../pysubmarine/tests/ml/model/test_deepfm.py | 36 +----------
.../__init__.py => tests/ml/model/test_fm.py} | 12 +++-
.../run_deepfm.py => tests/ml/test_optimizer.py} | 25 ++++----
19 files changed, 269 insertions(+), 175 deletions(-)
diff --git a/submarine-sdk/pysubmarine/example/deepfm/data/te.libsvm
b/submarine-sdk/pysubmarine/example/data/te.libsvm
similarity index 100%
rename from submarine-sdk/pysubmarine/example/deepfm/data/te.libsvm
rename to submarine-sdk/pysubmarine/example/data/te.libsvm
diff --git a/submarine-sdk/pysubmarine/example/deepfm/data/tr.libsvm
b/submarine-sdk/pysubmarine/example/data/tr.libsvm
similarity index 100%
rename from submarine-sdk/pysubmarine/example/deepfm/data/tr.libsvm
rename to submarine-sdk/pysubmarine/example/data/tr.libsvm
diff --git a/submarine-sdk/pysubmarine/example/deepfm/data/va.libsvm
b/submarine-sdk/pysubmarine/example/data/va.libsvm
similarity index 100%
rename from submarine-sdk/pysubmarine/example/deepfm/data/va.libsvm
rename to submarine-sdk/pysubmarine/example/data/va.libsvm
diff --git a/submarine-sdk/pysubmarine/example/deepfm/README.md
b/submarine-sdk/pysubmarine/example/deepfm/README.md
index 8f510ee..185d137 100644
--- a/submarine-sdk/pysubmarine/example/deepfm/README.md
+++ b/submarine-sdk/pysubmarine/example/deepfm/README.md
@@ -23,9 +23,13 @@ metrics, save model path, resources. e.g.
[deepfm.json](./deepfm.json)
```
python ./submarine/submarine-sdk/setup.py install
```
-3. Run DeepFM
+2. Train
```
-python run_deepfm.py -conf=deepfm.json
+python run_deepfm.py -conf=deepfm.json -task_type train
+```
+3. Evaluate
+```
+python run_deepfm.py -conf=deepfm.json -task_type evaluate
```
### Running DeepFM on Submarine
1. Upload data to a shared file system like hdfs, s3.
diff --git a/submarine-sdk/pysubmarine/example/deepfm/deepfm.json
b/submarine-sdk/pysubmarine/example/deepfm/deepfm.json
index 1befdc7..57aa9bf 100644
--- a/submarine-sdk/pysubmarine/example/deepfm/deepfm.json
+++ b/submarine-sdk/pysubmarine/example/deepfm/deepfm.json
@@ -1,8 +1,8 @@
{
"input": {
- "train_data": ["./data/tr.libsvm"],
- "valid_data": ["./data/va.libsvm"],
- "test_data": ["./data/te.libsvm"],
+ "train_data": ["../data/tr.libsvm"],
+ "valid_data": ["../data/va.libsvm"],
+ "test_data": ["../data/te.libsvm"],
"type": "libsvm"
},
"output": {
diff --git a/submarine-sdk/pysubmarine/example/deepfm/run_deepfm.py
b/submarine-sdk/pysubmarine/example/deepfm/run_deepfm.py
index e878abb..4c5be91 100644
--- a/submarine-sdk/pysubmarine/example/deepfm/run_deepfm.py
+++ b/submarine-sdk/pysubmarine/example/deepfm/run_deepfm.py
@@ -18,13 +18,17 @@ import argparse
if __name__ == '__main__':
parser = argparse.ArgumentParser()
- parser.add_argument("-conf", help="a JSON configuration file for DeepFM",
type=str)
+ parser.add_argument("-conf", help="a JSON configuration file for FM",
type=str)
+ parser.add_argument("-task_type", default='train',
+ help="train or evaluate, by default is train")
args = parser.parse_args()
-
json_path = args.conf
+ task_type = args.task_type
+
model = DeepFM(json_path=json_path)
- # Training
- model.train()
- # Evaluate
- result = model.evaluate()
- print("Model metrics : ", result)
+
+ if task_type == 'train':
+ model.train()
+ if task_type == 'evaluate':
+ result = model.evaluate()
+ print("Model metrics : ", result)
diff --git a/submarine-sdk/pysubmarine/example/deepfm/README.md
b/submarine-sdk/pysubmarine/example/fm/README.md
similarity index 55%
copy from submarine-sdk/pysubmarine/example/deepfm/README.md
copy to submarine-sdk/pysubmarine/example/fm/README.md
index 8f510ee..0aca2bf 100644
--- a/submarine-sdk/pysubmarine/example/deepfm/README.md
+++ b/submarine-sdk/pysubmarine/example/fm/README.md
@@ -11,26 +11,22 @@
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
-### Running Examples
-To run the examples here, you need to:
-- Build a Python virtual environment with pysubmarine installed
-- Install Submarine 0.3.0+
-### Running DeepFM on a local machine
+### Running FM on a local machine
1. Create a JSON configuration file containing train,valid and test data,
model parameters,
-metrics, save model path, resources. e.g. [deepfm.json](./deepfm.json)
+metrics, save model path, resources. e.g. [fm.json](./fm.json)
-2. Install submarine python bindings by setup.py:
+2. Train
```
-python ./submarine/submarine-sdk/setup.py install
+python run_fm.py -conf=fm.json -task_type train
```
-3. Run DeepFM
+3. Evaluate
```
-python run_deepfm.py -conf=deepfm.json
+python run_fm.py -conf=fm.json -task_type evaluate
```
-### Running DeepFM on Submarine
+### Running FM on Submarine
1. Upload data to a shared file system like hdfs, s3.
-2. Create a JSON configuration file for distributed training. e.g.
[deepfm_distributed.json](./deepfm_distributed.json)
+2. Create a JSON configuration file for distributed training. e.g.
[fm_distributed.json](./fm_distributed.json)
3. Submit Job
```
@@ -38,7 +34,7 @@ SUBMARINE_VERSION=0.4.0
SUBMARINE_HADOOP_VERSION=2.9
java -cp $(${HADOOP_COMMON_HOME}/bin/hadoop classpath
--glob):submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar:${HADOOP_CONF_PATH}
\
- org.apache.submarine.client.cli.Cli job run --name deepfm-job-001 \
+ org.apache.submarine.client.cli.Cli job run --name fm-job-001 \
--framework tensorflow \
--verbose \
--input_path "" \
@@ -46,12 +42,8 @@ java -cp $(${HADOOP_COMMON_HOME}/bin/hadoop classpath
--glob):submarine-all-${SU
--worker_resources memory=4G,vcores=4 \
--num_ps 1 \
--ps_resources memory=4G,vcores=4 \
- --worker_launch_cmd "myvenv.zip/venv/bin/python run_deepfm.py
-conf=deepfm_distributed.json" \
- --ps_launch_cmd "myvenv.zip/venv/bin/python run_deepfm.py
-conf=deepfm_distributed.json" \
+ --worker_launch_cmd "myvenv.zip/venv/bin/python run_fm.py
-conf=fm_distributed.json" \
+ --ps_launch_cmd "myvenv.zip/venv/bin/python run_fm.py
-conf=fm_distributed.json" \
--insecure \
- --conf
tony.containers.resources=myvenv.zip#archive,submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar,deepfm_distributed.json,run_deepfm.py
\
- --conf tony.chief.instances=1 \
- --conf tony.chief.memory=4G \
- --conf tony.chief.vcores=4 \
- --conf tony.chief.command="myvenv.zip/venv/bin/python run_deepfm.py
-conf=deepfm_distributed.json"
+ --conf
tony.containers.resources=myvenv.zip#archive,submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar,fm_distributed.json,run_fm.py
```
diff --git a/submarine-sdk/pysubmarine/example/deepfm/deepfm.json
b/submarine-sdk/pysubmarine/example/fm/fm.json
similarity index 66%
copy from submarine-sdk/pysubmarine/example/deepfm/deepfm.json
copy to submarine-sdk/pysubmarine/example/fm/fm.json
index 1befdc7..b71e30d 100644
--- a/submarine-sdk/pysubmarine/example/deepfm/deepfm.json
+++ b/submarine-sdk/pysubmarine/example/fm/fm.json
@@ -1,8 +1,8 @@
{
"input": {
- "train_data": ["./data/tr.libsvm"],
- "valid_data": ["./data/va.libsvm"],
- "test_data": ["./data/te.libsvm"],
+ "train_data": ["../data/tr.libsvm"],
+ "valid_data": ["../data/va.libsvm"],
+ "test_data": ["../data/te.libsvm"],
"type": "libsvm"
},
"output": {
@@ -16,11 +16,7 @@
"feature_size": 117581,
"embedding_size": 256,
"learning_rate": 0.0005,
- "batch_norm_decay": 0.9,
"l2_reg": 0.0001,
- "deep_layers": [400, 400, 400],
- "dropout": [0.3, 0.3, 0.3],
- "batch_norm": false,
"optimizer": "adam",
"log_steps": 10,
"seed": 77,
diff --git a/submarine-sdk/pysubmarine/example/deepfm/deepfm.json
b/submarine-sdk/pysubmarine/example/fm/fm_distributed.json
similarity index 58%
copy from submarine-sdk/pysubmarine/example/deepfm/deepfm.json
copy to submarine-sdk/pysubmarine/example/fm/fm_distributed.json
index 1befdc7..3edf06e 100644
--- a/submarine-sdk/pysubmarine/example/deepfm/deepfm.json
+++ b/submarine-sdk/pysubmarine/example/fm/fm_distributed.json
@@ -1,12 +1,12 @@
{
"input": {
- "train_data": ["./data/tr.libsvm"],
- "valid_data": ["./data/va.libsvm"],
- "test_data": ["./data/te.libsvm"],
+ "train_data": ["hdfs:///user/submarine/data/tr.libsvm"],
+ "valid_data": ["hdfs:///user/submarine/data/va.libsvm"],
+ "test_data": ["hdfs:///user/submarine/data/te.libsvm"],
"type": "libsvm"
},
"output": {
- "save_model_dir": "./experiment",
+ "save_model_dir": "hdfs:///user/submarine/deepfm",
"metric": "auc"
},
"training": {
@@ -16,15 +16,11 @@
"feature_size": 117581,
"embedding_size": 256,
"learning_rate": 0.0005,
- "batch_norm_decay": 0.9,
"l2_reg": 0.0001,
- "deep_layers": [400, 400, 400],
- "dropout": [0.3, 0.3, 0.3],
- "batch_norm": false,
"optimizer": "adam",
"log_steps": 10,
"seed": 77,
- "mode": "local"
+ "mode": "distributed"
},
"resource": {
"num_cpu": 4,
diff --git a/submarine-sdk/pysubmarine/example/deepfm/run_deepfm.py
b/submarine-sdk/pysubmarine/example/fm/run_fm.py
similarity index 70%
copy from submarine-sdk/pysubmarine/example/deepfm/run_deepfm.py
copy to submarine-sdk/pysubmarine/example/fm/run_fm.py
index e878abb..5a73336 100644
--- a/submarine-sdk/pysubmarine/example/deepfm/run_deepfm.py
+++ b/submarine-sdk/pysubmarine/example/fm/run_fm.py
@@ -13,18 +13,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from submarine.ml.model import DeepFM
+from submarine.ml.model import FM
import argparse
if __name__ == '__main__':
parser = argparse.ArgumentParser()
- parser.add_argument("-conf", help="a JSON configuration file for DeepFM",
type=str)
+ parser.add_argument("-conf", help="a JSON configuration file for FM",
type=str)
+ parser.add_argument("-task_type", default='train',
+ help="train or evaluate, by default is train")
args = parser.parse_args()
-
json_path = args.conf
- model = DeepFM(json_path=json_path)
- # Training
- model.train()
- # Evaluate
- result = model.evaluate()
- print("Model metrics : ", result)
+ task_type = args.task_type
+
+ model = FM(json_path=json_path)
+
+ if task_type == 'train':
+ model.train()
+ if task_type == 'evaluate':
+ result = model.evaluate()
+ print("Model metrics : ", result)
diff --git a/submarine-sdk/pysubmarine/submarine/ml/model/__init__.py
b/submarine-sdk/pysubmarine/submarine/ml/model/__init__.py
index 3fb4935..cf6064d 100644
--- a/submarine-sdk/pysubmarine/submarine/ml/model/__init__.py
+++ b/submarine-sdk/pysubmarine/submarine/ml/model/__init__.py
@@ -14,5 +14,6 @@
# limitations under the License.
from .deepfm import DeepFM
+from .fm import FM
-__all__ = ["DeepFM"]
+__all__ = ["DeepFM", "FM"]
diff --git a/submarine-sdk/pysubmarine/submarine/ml/model/deepfm.py
b/submarine-sdk/pysubmarine/submarine/ml/model/deepfm.py
index 1f69ccb..1cbc1b7 100644
--- a/submarine-sdk/pysubmarine/submarine/ml/model/deepfm.py
+++ b/submarine-sdk/pysubmarine/submarine/ml/model/deepfm.py
@@ -29,6 +29,7 @@ import logging
import tensorflow as tf
import numpy as np
from submarine.ml.model.base_tf_model import BaseTFModel
+from submarine.utils.tf_utils import get_estimator_spec
logger = logging.getLogger(__name__)
@@ -50,12 +51,9 @@ class DeepFM(BaseTFModel):
feature_size = params["training"]["feature_size"]
embedding_size = params["training"]["embedding_size"]
l2_reg = params["training"]["l2_reg"]
- learning_rate = params["training"]["learning_rate"]
batch_norm = params["training"]["batch_norm"]
batch_norm_decay = params["training"]["batch_norm_decay"]
- optimizer = params["training"]["optimizer"]
seed = params["training"]["seed"]
- metric = params['output']['metric']
layers = params["training"]["deep_layers"]
dropout = params["training"]["dropout"]
@@ -115,59 +113,6 @@ class DeepFM(BaseTFModel):
with tf.variable_scope("DeepFM-out"):
y_bias = fm_bias * tf.ones_like(y_d, dtype=tf.float32)
- y = y_bias + y_w + y_v + y_d
- pred = tf.sigmoid(y)
-
- predictions = {"probabilities": pred}
- export_outputs = {
-
tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
- tf.estimator.export.PredictOutput(predictions)}
- # Provide an estimator spec for `ModeKeys.PREDICT`
- if mode == tf.estimator.ModeKeys.PREDICT:
- return tf.estimator.EstimatorSpec(
- mode=mode,
- predictions=predictions,
- export_outputs=export_outputs)
-
- with tf.name_scope("Loss"):
- loss = tf.reduce_mean(
- tf.nn.sigmoid_cross_entropy_with_logits(logits=y,
labels=labels)) + \
- l2_reg * tf.nn.l2_loss(fm_weight) + l2_reg *
tf.nn.l2_loss(fm_vector)
-
- # Provide an estimator spec for `ModeKeys.EVAL`
- eval_metric_ops = {}
- if metric == 'auc':
- eval_metric_ops['auc'] = tf.metrics.auc(labels, pred)
- else:
- raise TypeError("Can not find loss_type :",
params['training']['loss_type'])
-
- if mode == tf.estimator.ModeKeys.EVAL:
- return tf.estimator.EstimatorSpec(
- mode=mode,
- predictions=predictions,
- loss=loss,
- eval_metric_ops=eval_metric_ops)
-
- with tf.name_scope("Optimizer"):
- if optimizer == 'adam':
- op = tf.train.AdamOptimizer(learning_rate=learning_rate,
- beta1=0.9, beta2=0.999,
epsilon=1e-8)
- elif optimizer == 'adagrad':
- op = tf.train.AdagradOptimizer(
- learning_rate=learning_rate,
initial_accumulator_value=1e-8)
- elif optimizer == 'momentum':
- op = tf.train.MomentumOptimizer(learning_rate=learning_rate,
momentum=0.95)
- elif optimizer == 'ftrl':
- op = tf.train.FtrlOptimizer(learning_rate)
- else:
- raise TypeError("Can not find optimizer :", optimizer)
-
- train_op = op.minimize(loss, global_step=tf.train.get_global_step())
-
- # Provide an estimator spec for `ModeKeys.TRAIN` modes
- if mode == tf.estimator.ModeKeys.TRAIN:
- return tf.estimator.EstimatorSpec(
- mode=mode,
- predictions=predictions,
- loss=loss,
- train_op=train_op)
+ logit = y_bias + y_w + y_v + y_d
+
+ return get_estimator_spec(logit, labels, mode, params, [fm_vector,
fm_weight])
diff --git a/submarine-sdk/pysubmarine/submarine/ml/model/fm.py
b/submarine-sdk/pysubmarine/submarine/ml/model/fm.py
new file mode 100644
index 0000000..fed9524
--- /dev/null
+++ b/submarine-sdk/pysubmarine/submarine/ml/model/fm.py
@@ -0,0 +1,70 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+TensorFlow implementation of FM
+
+Reference:
+[1] Factorization machines for CTR Prediction,
+ Steffen Rendle
+"""
+
+import logging
+import tensorflow as tf
+import numpy as np
+from submarine.ml.model.base_tf_model import BaseTFModel
+from submarine.utils.tf_utils import get_estimator_spec
+
+logger = logging.getLogger(__name__)
+
+
+class FM(BaseTFModel):
+ def model_fn(self, features, labels, mode, params):
+ field_size = params["training"]["field_size"]
+ feature_size = params["training"]["feature_size"]
+ embedding_size = params["training"]["embedding_size"]
+ seed = params["training"]["seed"]
+
+ np.random.seed(seed)
+ tf.set_random_seed(seed)
+
+ fm_bias = tf.get_variable(name='fm_bias', shape=[1],
+ initializer=tf.constant_initializer(0.0))
+ fm_weight = tf.get_variable(name='fm_weight', shape=[feature_size],
+ initializer=tf.glorot_normal_initializer())
+ fm_vector = tf.get_variable(name='fm_vector', shape=[feature_size,
embedding_size],
+ initializer=tf.glorot_normal_initializer())
+
+ with tf.variable_scope("Feature"):
+ feat_ids = features['feat_ids']
+ feat_ids = tf.reshape(feat_ids, shape=[-1, field_size])
+ feat_vals = features['feat_vals']
+ feat_vals = tf.reshape(feat_vals, shape=[-1, field_size])
+
+ with tf.variable_scope("First_order"):
+ feat_weights = tf.nn.embedding_lookup(fm_weight, feat_ids)
+ y_w = tf.reduce_sum(tf.multiply(feat_weights, feat_vals), 1)
+
+ with tf.variable_scope("Second_order"):
+ embeddings = tf.nn.embedding_lookup(fm_vector, feat_ids)
+ feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1])
+ embeddings = tf.multiply(embeddings, feat_vals)
+ sum_square = tf.square(tf.reduce_sum(embeddings, 1))
+ square_sum = tf.reduce_sum(tf.square(embeddings), 1)
+ y_v = 0.5 * tf.reduce_sum(tf.subtract(sum_square, square_sum), 1)
+
+ y = fm_bias + y_w + y_v
+
+ return get_estimator_spec(y, labels, mode, params, [fm_vector,
fm_weight])
diff --git a/submarine-sdk/pysubmarine/submarine/ml/optimizer.py
b/submarine-sdk/pysubmarine/submarine/ml/optimizer.py
new file mode 100644
index 0000000..6719e10
--- /dev/null
+++ b/submarine-sdk/pysubmarine/submarine/ml/optimizer.py
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import tensorflow as tf
+
+logger = logging.getLogger(__name__)
+
+
+class OptimizerKey(object):
+ """Optimizer key strings."""
+ ADAM = 'adam'
+ ADAGRAD = 'adagrad'
+ MOMENTUM = 'momentum'
+ FTRL = 'ftrl'
+
+
+def get_optimizer(optimizer_key, learning_rate):
+ optimizer_key = optimizer_key.lower()
+
+ if optimizer_key == OptimizerKey.ADAM:
+ op = tf.train.AdamOptimizer(learning_rate=learning_rate,
+ beta1=0.9, beta2=0.999, epsilon=1e-8)
+ elif optimizer_key == OptimizerKey.ADAGRAD:
+ op = tf.train.AdagradOptimizer(
+ learning_rate=learning_rate, initial_accumulator_value=1e-8)
+ elif optimizer_key == OptimizerKey.MOMENTUM:
+ op = tf.train.MomentumOptimizer(learning_rate=learning_rate,
momentum=0.95)
+ elif optimizer_key == OptimizerKey.FTRL:
+ op = tf.train.FtrlOptimizer(learning_rate)
+ else:
+ raise ValueError("Invalid optimizer_key :", optimizer_key)
+ return op
diff --git a/submarine-sdk/pysubmarine/submarine/utils/tf_utils.py
b/submarine-sdk/pysubmarine/submarine/utils/tf_utils.py
index 3e8fc53..3d42628 100644
--- a/submarine-sdk/pysubmarine/submarine/utils/tf_utils.py
+++ b/submarine-sdk/pysubmarine/submarine/utils/tf_utils.py
@@ -13,9 +13,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import tensorflow as tf
-import json
import os
+import json
+import tensorflow as tf
+from submarine.ml.optimizer import get_optimizer
def _get_session_config_from_env_var(params):
@@ -69,3 +70,65 @@ def get_tf_config(params):
else:
raise ValueError("mode should be local or distributed")
return tf_config
+
+
+def get_estimator_spec(logit, labels, mode, params, weights):
+ """
+ Returns `EstimatorSpec` that a model_fn can return.
+ :param logit: logits `Tensor` to be used.
+ :param labels: Labels `Tensor`, or `dict` of same.
+ :param mode: Estimator's `ModeKeys`.
+ :param params: Optional dict of hyperparameters. Will receive what is
passed to Estimator
+ in params parameter.
+ :param weights: a list of weights that need L2 regularization
+ :return:
+ """
+ l2_reg = params["training"]["l2_reg"]
+ learning_rate = params["training"]["learning_rate"]
+ optimizer = params["training"]["optimizer"]
+ metric = params['output']['metric']
+
+ output = tf.sigmoid(logit)
+ predictions = {"probabilities": output}
+ export_outputs = {
+ tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+ tf.estimator.export.PredictOutput(predictions)}
+ # Provide an estimator spec for `ModeKeys.PREDICT`
+ if mode == tf.estimator.ModeKeys.PREDICT:
+ return tf.estimator.EstimatorSpec(
+ mode=mode,
+ predictions=predictions,
+ export_outputs=export_outputs)
+
+ with tf.name_scope("Loss"):
+ l2 = 0
+ for weight in weights:
+ l2 = l2_reg * tf.nn.l2_loss(weight)
+ loss = tf.reduce_mean(
+ tf.nn.sigmoid_cross_entropy_with_logits(logits=logit,
labels=labels)) + l2
+
+ # Provide an estimator spec for `ModeKeys.EVAL`
+ eval_metric_ops = {}
+ if metric == 'auc':
+ eval_metric_ops['auc'] = tf.metrics.auc(labels, output)
+ else:
+ raise TypeError("Invalid metric :", metric)
+
+ if mode == tf.estimator.ModeKeys.EVAL:
+ return tf.estimator.EstimatorSpec(
+ mode=mode,
+ predictions=predictions,
+ loss=loss,
+ eval_metric_ops=eval_metric_ops)
+
+ with tf.name_scope("Train"):
+ op = get_optimizer(optimizer, learning_rate)
+ train_op = op.minimize(loss, global_step=tf.train.get_global_step())
+
+ # Provide an estimator spec for `ModeKeys.TRAIN` modes
+ if mode == tf.estimator.ModeKeys.TRAIN:
+ return tf.estimator.EstimatorSpec(
+ mode=mode,
+ predictions=predictions,
+ loss=loss,
+ train_op=train_op)
diff --git a/submarine-sdk/pysubmarine/tests/ml/model/test_deepfm.py
b/submarine-sdk/pysubmarine/tests/ml/model/conftest.py
similarity index 92%
copy from submarine-sdk/pysubmarine/tests/ml/model/test_deepfm.py
copy to submarine-sdk/pysubmarine/tests/ml/model/conftest.py
index 21ec17a..4623088 100644
--- a/submarine-sdk/pysubmarine/tests/ml/model/test_deepfm.py
+++ b/submarine-sdk/pysubmarine/tests/ml/model/conftest.py
@@ -13,8 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-
-from submarine.ml.model import DeepFM
+import pytest
import os
LIBSVM_DATA = """1 1:0 2:0.051495 3:0.5 4:0.1 5:0.113437 6:0.874 7:0.01 8:0.08
9:0.028 10:0
@@ -28,7 +27,8 @@ LIBSVM_DATA = """1 1:0 2:0.051495 3:0.5 4:0.1 5:0.113437
6:0.874 7:0.01 8:0.08 9
"""
-def test_run_deepfm(tmpdir):
[email protected]
+def get_model_param(tmpdir):
data_file = os.path.join(tmpdir, "libsvm.txt")
save_model_dir = os.path.join(tmpdir, "experiment")
with open(data_file, "wt") as writer:
@@ -52,7 +52,6 @@ def test_run_deepfm(tmpdir):
"feature_size": 1000
}
}
- model = DeepFM(model_params=params)
- model.train()
- model.evaluate()
- model.predict()
+
+ yield params
+ os.remove(data_file)
diff --git a/submarine-sdk/pysubmarine/tests/ml/model/test_deepfm.py
b/submarine-sdk/pysubmarine/tests/ml/model/test_deepfm.py
index 21ec17a..c997c54 100644
--- a/submarine-sdk/pysubmarine/tests/ml/model/test_deepfm.py
+++ b/submarine-sdk/pysubmarine/tests/ml/model/test_deepfm.py
@@ -15,43 +15,11 @@
from submarine.ml.model import DeepFM
-import os
-LIBSVM_DATA = """1 1:0 2:0.051495 3:0.5 4:0.1 5:0.113437 6:0.874 7:0.01 8:0.08
9:0.028 10:0
-1 1:1.35 2:0.031561 3:0.45 4:0.56 5:0.000031 6:0.056 7:0.27 8:0.58 9:0.056
10:0.166667
-1 1:0.05 2:0.004983 3:0.19 4:0.14 5:0.000016 6:0.006 7:0.01 8:0.14 9:0.014
10:0.166667
-1 1:0.2 2:0.004983 3:0 4:0.12 5:0.016422 6:0.268 7:0.04 8:0.7 9:0.144
10:0.166667
-1 1:0 2:0.051495 3:0.5 4:0.1 5:0.113437 6:0.874 7:0.01 8:0.08 9:0.028 10:0
-1 1:1.35 2:0.031561 3:0.45 4:0.56 5:0.000031 6:0.056 7:0.27 8:0.58 9:0.056
10:0.166667
-1 1:0.05 2:0.004983 3:0.19 4:0.14 5:0.000016 6:0.006 7:0.01 8:0.14 9:0.014
10:0.166667
-1 1:0.2 2:0.004983 3:0 4:0.12 5:0.016422 6:0.268 7:0.04 8:0.7 9:0.144
10:0.166667
-"""
+def test_run_deepfm(get_model_param):
+ params = get_model_param
-def test_run_deepfm(tmpdir):
- data_file = os.path.join(tmpdir, "libsvm.txt")
- save_model_dir = os.path.join(tmpdir, "experiment")
- with open(data_file, "wt") as writer:
- writer.write(LIBSVM_DATA)
-
- params = {
- "input": {
- "train_data": data_file,
- "valid_data": data_file,
- "test_data": data_file,
- "type": "libsvm"
- },
- "output": {
- "save_model_dir": save_model_dir,
- "metric": "auc"
- },
- "training": {
- "batch_size": 256,
- "num_epochs": 1,
- "field_size": 10,
- "feature_size": 1000
- }
- }
model = DeepFM(model_params=params)
model.train()
model.evaluate()
diff --git a/submarine-sdk/pysubmarine/submarine/ml/model/__init__.py
b/submarine-sdk/pysubmarine/tests/ml/model/test_fm.py
similarity index 79%
copy from submarine-sdk/pysubmarine/submarine/ml/model/__init__.py
copy to submarine-sdk/pysubmarine/tests/ml/model/test_fm.py
index 3fb4935..9b83b1f 100644
--- a/submarine-sdk/pysubmarine/submarine/ml/model/__init__.py
+++ b/submarine-sdk/pysubmarine/tests/ml/model/test_fm.py
@@ -13,6 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from .deepfm import DeepFM
-__all__ = ["DeepFM"]
+from submarine.ml.model import FM
+
+
+def test_run_fm(get_model_param):
+ params = get_model_param
+
+ model = FM(model_params=params)
+ model.train()
+ model.evaluate()
+ model.predict()
diff --git a/submarine-sdk/pysubmarine/example/deepfm/run_deepfm.py
b/submarine-sdk/pysubmarine/tests/ml/test_optimizer.py
similarity index 60%
copy from submarine-sdk/pysubmarine/example/deepfm/run_deepfm.py
copy to submarine-sdk/pysubmarine/tests/ml/test_optimizer.py
index e878abb..a51b992 100644
--- a/submarine-sdk/pysubmarine/example/deepfm/run_deepfm.py
+++ b/submarine-sdk/pysubmarine/tests/ml/test_optimizer.py
@@ -13,18 +13,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from submarine.ml.model import DeepFM
-import argparse
+import pytest
+from submarine.ml.optimizer import get_optimizer
-if __name__ == '__main__':
- parser = argparse.ArgumentParser()
- parser.add_argument("-conf", help="a JSON configuration file for DeepFM",
type=str)
- args = parser.parse_args()
- json_path = args.conf
- model = DeepFM(json_path=json_path)
- # Training
- model.train()
- # Evaluate
- result = model.evaluate()
- print("Model metrics : ", result)
+def test_get_optimizer():
+ optimizer_keys = ['adam', 'adagrad', 'momentum', 'ftrl']
+ invalid_optimizer_keys = ['adddam']
+
+ for optimizer_key in optimizer_keys:
+ get_optimizer(optimizer_key=optimizer_key, learning_rate=0.3)
+
+ for invalid_optimizer_key in invalid_optimizer_keys:
+ with pytest.raises(ValueError, match="Invalid optimizer_key :"):
+ get_optimizer(optimizer_key=invalid_optimizer_key,
learning_rate=0.3)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]