Peng Yu wrote:
For example, the long string is 'abcabc' and the given string is
'abc', then 'abc' appears 2 times in 'abcabc'. Currently, I am calling
'find()' multiple times to figure out how many times a given string
appears in a long string. I'm wondering if there is a function in
python which can directly return this information.
re.findall?
>>> patt = re.compile('abc')
>>> len(patt.findall('abcabc'))
2
For groups of non-overlapping substrings, tested only as far as you see:
8<----------------------------------------------------------------------
import re
from collections import defaultdict
def count(text, *args):
"""
>>> ret = count('abcabc', 'abc')
>>> ret['abc']
2
>>> ret = count('xabcxabcx', 'abc', 'x')
>>> ret['abc']
2
>>> ret['x']
3
>>> ret = count('abcabc', 'abc', 'cab')
>>> ret['abc']
2
>>> ret['cab']
0
>>> ret = count('abcabc', 'abc', 'ab')
>>> ret['abc']
2
>>> ret['ab']
0
"""
args = map(re.escape, args)
args.sort()
args.reverse()
pattern = re.compile('|'.join(args))
result = defaultdict(int)
def callback(match):
matched = match.group(0)
result[matched] += 1
return matched
pattern.sub(callback, text)
return result
if __name__ == '__main__':
import doctest
doctest.testmod()
8<----------------------------------------------------------------------
--
http://mail.python.org/mailman/listinfo/python-list