This is an automated email from the ASF dual-hosted git repository.
zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 80b7715 Update documentation for count_tokens_from_str (#11800)
80b7715 is described below
commit 80b7715328b76292375e9d6ab3d475cb5f1c2c60
Author: Vandana Kannan <[email protected]>
AuthorDate: Wed Jul 18 13:53:21 2018 -0700
Update documentation for count_tokens_from_str (#11800)
---
python/mxnet/contrib/text/utils.py | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/python/mxnet/contrib/text/utils.py
b/python/mxnet/contrib/text/utils.py
index cd8ce5b..88ed759 100644
--- a/python/mxnet/contrib/text/utils.py
+++ b/python/mxnet/contrib/text/utils.py
@@ -29,11 +29,14 @@ def count_tokens_from_str(source_str, token_delim=' ',
seq_delim='\n',
to_lower=False, counter_to_update=None):
"""Counts tokens in the specified string.
- For token_delim='<td>' and seq_delim='<sd>', a specified string of two
sequences of tokens may
- look like::
+ For token_delim=\'<td>\' and seq_delim=\'<sd>\', a specified string of two
sequences of
+ tokens may look like::
<td>token1<td>token2<td>token3<td><sd><td>token4<td>token5<td><sd>
+ <td> and <sd> are regular expressions. Make use of \\\\ to allow special
characters as
+ delimiters. The list of
+ special characters can be found at
https://docs.python.org/3/library/re.html.
Parameters
----------
@@ -63,6 +66,11 @@ def count_tokens_from_str(source_str, token_delim=' ',
seq_delim='\n',
>>> source_str = ' Life is great ! \\n life is good . \\n'
>>> count_tokens_from_str(token_line, ' ', '\\n', True)
Counter({'!': 1, '.': 1, 'good': 1, 'great': 1, 'is': 2, 'life': 2})
+
+
+ >>> source_str = '*Life*is*great*!*\\n*life*is*good*.*\\n'
+ >>> count_tokens_from_str(token_line, '\\*', '\\n', True)
+ Counter({'is': 2, 'life': 2, '!': 1, 'great': 1, 'good': 1, '.': 1})
"""
source_str = filter(None,