This is an automated email from the ASF dual-hosted git repository.

haibin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 9158352  Add contrib.rand_zipfian (#9747)
9158352 is described below

commit 9158352ec3061fe75af0c0d18770c2a2fd56595a
Author: Haibin Lin <linhaibin.e...@gmail.com>
AuthorDate: Fri Feb 23 21:53:07 2018 +0800

    Add contrib.rand_zipfian (#9747)
    
    * draft
    
    * move to contrib
    
    * rename op
    
    * CR comments
    
    * Update contrib.py
    
    * Update contrib.py
    
    * Update random.py
    
    * update example in the doc
    
    * update example in symbol doc
    
    * CR comments
    
    * update op name
    
    * update op name
    
    * update op name in test
    
    * update test
    
    * Update contrib.py
---
 python/mxnet/ndarray/contrib.py      | 74 +++++++++++++++++++++++++++++++++++-
 python/mxnet/symbol/contrib.py       | 69 ++++++++++++++++++++++++++++++++-
 tests/python/unittest/test_random.py | 30 +++++++++++++++
 3 files changed, 171 insertions(+), 2 deletions(-)

diff --git a/python/mxnet/ndarray/contrib.py b/python/mxnet/ndarray/contrib.py
index 0a3fe48..ba402e6 100644
--- a/python/mxnet/ndarray/contrib.py
+++ b/python/mxnet/ndarray/contrib.py
@@ -18,9 +18,81 @@
 # coding: utf-8
 # pylint: disable=wildcard-import, unused-wildcard-import
 """Contrib NDArray API of MXNet."""
+import math
+from ..context import current_context
+from ..random import uniform
 try:
     from .gen_contrib import *
 except ImportError:
     pass
 
-__all__ = []
+__all__ = ["rand_zipfian"]
+
+# pylint: disable=line-too-long
+def rand_zipfian(true_classes, num_sampled, range_max, ctx=None):
+    """Draw random samples from an approximately log-uniform or Zipfian 
distribution.
+
+    This operation randomly samples *num_sampled* candidates the range of 
integers [0, range_max).
+    The elements of sampled_candidates are drawn with replacement from the 
base distribution.
+
+    The base distribution for this operator is an approximately log-uniform or 
Zipfian distribution:
+
+    P(class) = (log(class + 2) - log(class + 1)) / log(range_max + 1)
+
+    This sampler is useful when the true classes approximately follow such a 
distribution.
+    For example, if the classes represent words in a lexicon sorted in 
decreasing order of \
+    frequency. If your classes are not ordered by decreasing frequency, do not 
use this op.
+
+    Additionaly, it also returns the number of times each of the \
+    true classes and the sampled classes is expected to occur.
+
+    Parameters
+    ----------
+    true_classes : NDArray
+        A 1-D NDArray of the target classes.
+    num_sampled: int
+        The number of classes to randomly sample.
+    range_max: int
+        The number of possible classes.
+    ctx : Context
+        Device context of output. Default is current context. Overridden by
+        `mu.context` when `mu` is an NDArray.
+
+    Returns
+    -------
+    samples: NDArray
+        The sampled candidate classes in 1-D `int64` dtype.
+    expected_count_true: NDArray
+        The expected count for true classes in 1-D `float64` dtype.
+    expected_count_sample: NDArray
+        The expected count for sampled candidates in 1-D `float64` dtype.
+
+    Examples
+    --------
+    >>> true_cls = mx.nd.array([3])
+    >>> samples, exp_count_true, exp_count_sample = 
mx.nd.contrib.rand_zipfian(true_cls, 4, 5)
+    >>> samples
+    [1 3 3 3]
+    <NDArray 4 @cpu(0)>
+    >>> exp_count_true
+    [ 0.12453879]
+    <NDArray 1 @cpu(0)>
+    >>> exp_count_sample
+    [ 0.22629439  0.12453879  0.12453879  0.12453879]
+    <NDArray 4 @cpu(0)>
+    """
+    if ctx is None:
+        ctx = current_context()
+    log_range = math.log(range_max + 1)
+    rand = uniform(0, log_range, shape=(num_sampled,), dtype='float64', 
ctx=ctx)
+    # make sure sampled_classes are in the range of [0, range_max)
+    sampled_classes = (rand.exp() - 1).astype('int64') % range_max
+
+    true_cls = true_classes.as_in_context(ctx).astype('float64')
+    expected_count_true = ((true_cls + 2.0) / (true_cls + 1.0)).log() / 
log_range * num_sampled
+    # cast sampled classes to fp64 to avoid interget division
+    sampled_cls_fp64 = sampled_classes.astype('float64')
+    expected_prob_sampled = ((sampled_cls_fp64 + 2.0) / (sampled_cls_fp64 + 
1.0)).log() / log_range
+    expected_count_sampled = expected_prob_sampled * num_sampled
+    return sampled_classes, expected_count_true, expected_count_sampled
+# pylint: enable=line-too-long
diff --git a/python/mxnet/symbol/contrib.py b/python/mxnet/symbol/contrib.py
index 13feb78..83e90e6 100644
--- a/python/mxnet/symbol/contrib.py
+++ b/python/mxnet/symbol/contrib.py
@@ -18,9 +18,76 @@
 # coding: utf-8
 # pylint: disable=wildcard-import, unused-wildcard-import
 """Contrib Symbol API of MXNet."""
+import math
+from .random import uniform
+from .symbol import Symbol
 try:
     from .gen_contrib import *
 except ImportError:
     pass
 
-__all__ = []
+__all__ = ["rand_zipfian"]
+
+def rand_zipfian(true_classes, num_sampled, range_max):
+    """Draw random samples from an approximately log-uniform or Zipfian 
distribution.
+
+    This operation randomly samples *num_sampled* candidates the range of 
integers [0, range_max).
+    The elements of sampled_candidates are drawn with replacement from the 
base distribution.
+
+    The base distribution for this operator is an approximately log-uniform or 
Zipfian distribution:
+
+    P(class) = (log(class + 2) - log(class + 1)) / log(range_max + 1)
+
+    This sampler is useful when the true classes approximately follow such a 
distribution.
+    For example, if the classes represent words in a lexicon sorted in 
decreasing order of \
+    frequency. If your classes are not ordered by decreasing frequency, do not 
use this op.
+
+    Additionaly, it also returns the number of times each of the \
+    true classes and the sampled classes is expected to occur.
+
+    Parameters
+    ----------
+    true_classes : Symbol
+        The target classes in 1-D.
+    num_sampled: int
+        The number of classes to randomly sample.
+    range_max: int
+        The number of possible classes.
+
+    Returns
+    -------
+    samples: Symbol
+        The sampled candidate classes in 1-D `int64` dtype.
+    expected_count_true: Symbol
+        The expected count for true classes in 1-D `float64` dtype.
+    expected_count_sample: Symbol
+        The expected count for sampled candidates in 1-D `float64` dtype.
+
+    Examples
+    --------
+    >>> true_cls = mx.nd.array([3])
+    >>> samples, exp_count_true, exp_count_sample = 
mx.nd.contrib.rand_zipfian(true_cls, 4, 5)
+    >>> samples
+    [1 3 3 3]
+    <NDArray 4 @cpu(0)>
+    >>> exp_count_true
+    [ 0.12453879]
+    <NDArray 1 @cpu(0)>
+    >>> exp_count_sample
+    [ 0.22629439  0.12453879  0.12453879  0.12453879]
+    <NDArray 4 @cpu(0)>
+    """
+    assert(isinstance(true_classes, Symbol)), "unexpected type %s" % 
type(true_classes)
+    log_range = math.log(range_max + 1)
+    rand = uniform(0, log_range, shape=(num_sampled,), dtype='float64')
+    # make sure sampled_classes are in the range of [0, range_max)
+    sampled_classes = (rand.exp() - 1).astype('int64') % range_max
+
+    true_classes = true_classes.astype('float64')
+    expected_prob_true = ((true_classes + 2.0) / (true_classes + 1.0)).log() / 
log_range
+    expected_count_true = expected_prob_true * num_sampled
+    # cast sampled classes to fp64 to avoid interget division
+    sampled_cls_fp64 = sampled_classes.astype('float64')
+    expected_prob_sampled = ((sampled_cls_fp64 + 2.0) / (sampled_cls_fp64 + 
1.0)).log() / log_range
+    expected_count_sampled = expected_prob_sampled * num_sampled
+    return sampled_classes, expected_count_true, expected_count_sampled
diff --git a/tests/python/unittest/test_random.py 
b/tests/python/unittest/test_random.py
index 6b77fc1..f042f57 100644
--- a/tests/python/unittest/test_random.py
+++ b/tests/python/unittest/test_random.py
@@ -521,6 +521,36 @@ def test_with_random_seed():
         for j in range(i+1, num_seeds):
             check_data(data[i],data[j])
 
+@with_seed()
+def test_zipfian_generator():
+    # dummy true classes
+    num_true = 5
+    num_sampled = 1000
+    range_max = 20
+
+    def compute_expected_prob():
+        # P(class) = (log(class + 2) - log(class + 1)) / log(range_max + 1)
+        classes = mx.nd.arange(0, range_max)
+        expected_counts = ((classes + 2).log() - (classes + 1).log()) / 
np.log(range_max + 1)
+        return expected_counts
+
+    exp_cnt = compute_expected_prob() * num_sampled
+
+    # test ndarray
+    true_classes = mx.nd.random.uniform(0, range_max, 
shape=(num_true,)).astype('int32')
+    sampled_classes, exp_cnt_true, exp_cnt_sampled = 
mx.nd.contrib.rand_zipfian(true_classes, num_sampled, range_max)
+    mx.test_utils.assert_almost_equal(exp_cnt_sampled.asnumpy(), 
exp_cnt[sampled_classes].asnumpy(), rtol=1e-1, atol=1e-2)
+    mx.test_utils.assert_almost_equal(exp_cnt_true.asnumpy(), 
exp_cnt[true_classes].asnumpy(), rtol=1e-1, atol=1e-2)
+
+    # test symbol
+    true_classes_var = mx.sym.var('true_classes')
+    outputs = mx.sym.contrib.rand_zipfian(true_classes_var, num_sampled, 
range_max)
+    outputs = mx.sym.Group(outputs)
+    executor = outputs.bind(mx.context.current_context(), {'true_classes' : 
true_classes})
+    executor.forward()
+    sampled_classes, exp_cnt_true, exp_cnt_sampled = executor.outputs
+    mx.test_utils.assert_almost_equal(exp_cnt_sampled.asnumpy(), 
exp_cnt[sampled_classes].asnumpy(), rtol=1e-1, atol=1e-2)
+    mx.test_utils.assert_almost_equal(exp_cnt_true.asnumpy(), 
exp_cnt[true_classes].asnumpy(), rtol=1e-1, atol=1e-2)
 
 if __name__ == '__main__':
     import nose

-- 
To stop receiving notification emails like this one, please contact
hai...@apache.org.

Reply via email to