Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-emoji for openSUSE:Factory checked in at 2022-01-16 23:18:24 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-emoji (Old) and /work/SRC/openSUSE:Factory/.python-emoji.new.1892 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-emoji" Sun Jan 16 23:18:24 2022 rev:13 rq:946783 version:1.6.3 Changes: -------- --- /work/SRC/openSUSE:Factory/python-emoji/python-emoji.changes 2021-10-15 23:05:13.766151611 +0200 +++ /work/SRC/openSUSE:Factory/.python-emoji.new.1892/python-emoji.changes 2022-01-16 23:19:21.886380425 +0100 @@ -1,0 +2,7 @@ +Sat Jan 15 15:47:10 UTC 2022 - Matthias Bach <[email protected]> - 1.6.3 + +- Update to 1.6.3 + * Added support for counting unique emojis + * Improved performance of demojize() + +------------------------------------------------------------------- Old: ---- emoji-1.6.1.tar.gz New: ---- emoji-1.6.3.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-emoji.spec ++++++ --- /var/tmp/diff_new_pack.0nzieh/_old 2022-01-16 23:19:22.334380645 +0100 +++ /var/tmp/diff_new_pack.0nzieh/_new 2022-01-16 23:19:22.342380649 +0100 @@ -1,7 +1,7 @@ # # spec file for package python-emoji # -# Copyright (c) 2021 SUSE LLC +# Copyright (c) 2022 SUSE LLC # Copyright (c) 2021 Matthias Bach <[email protected]> # # All modifications and additions to the file contributed by third parties @@ -18,7 +18,7 @@ Name: python-emoji -Version: 1.6.1 +Version: 1.6.3 Release: 0 Summary: Emoji for Python License: BSD-3-Clause ++++++ emoji-1.6.1.tar.gz -> emoji-1.6.3.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/emoji-1.6.1/CHANGES.md new/emoji-1.6.3/CHANGES.md --- old/emoji-1.6.1/CHANGES.md 2021-10-13 14:21:53.000000000 +0200 +++ new/emoji-1.6.3/CHANGES.md 2022-01-15 14:31:07.000000000 +0100 @@ -1,6 +1,16 @@ emoji ===== +1.6.3 +----- +* Added support for counting unique emojis + +1.6.2 +----- +* Improve performance of demojize() +* Added more tests +* Added warning when someone uses any other language than 'en' with use_aliases=True in emojize() + 1.6.1 ----- * Allow multiple aliases @@ -14,7 +24,6 @@ * emoji.version(string) method added * Included 'variant' in the dict of dicts - 1.5.0 ----- * Emojis of English version updated to the Emoji Charts v14.0 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/emoji-1.6.1/PKG-INFO new/emoji-1.6.3/PKG-INFO --- old/emoji-1.6.1/PKG-INFO 2021-10-13 14:23:08.380595400 +0200 +++ new/emoji-1.6.3/PKG-INFO 2022-01-15 14:38:34.890504100 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: emoji -Version: 1.6.1 +Version: 1.6.3 Summary: Emoji for Python Home-page: https://github.com/carpedm20/emoji/ Author: Taehoon Kim, Kevin Wurster and Tahir Jalilov diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/emoji-1.6.1/emoji/__init__.py new/emoji-1.6.3/emoji/__init__.py --- old/emoji-1.6.1/emoji/__init__.py 2021-10-13 14:21:53.000000000 +0200 +++ new/emoji-1.6.3/emoji/__init__.py 2022-01-15 14:29:11.000000000 +0100 @@ -30,7 +30,7 @@ 'EMOJI_ALIAS_UNICODE_ENGLISH', 'UNICODE_EMOJI_ALIAS_ENGLISH', 'EMOJI_DATA', ] -__version__ = '1.6.1' +__version__ = '1.6.3' __author__ = 'Taehoon Kim, Kevin Wurster and Tahir Jalilov' __email__ = '[email protected]' # and [email protected], [email protected] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/emoji-1.6.1/emoji/core.py new/emoji-1.6.3/emoji/core.py --- old/emoji-1.6.1/emoji/core.py 2021-10-13 14:21:53.000000000 +0200 +++ new/emoji-1.6.3/emoji/core.py 2022-01-15 14:22:32.000000000 +0100 @@ -11,6 +11,7 @@ import re import sys +import warnings from emoji import unicode_codes @@ -23,6 +24,7 @@ PY2 = sys.version_info[0] == 2 _EMOJI_REGEXP = None +_SEARCH_TREE = None _DEFAULT_DELIMITER = ':' @@ -52,7 +54,8 @@ :param use_aliases: (optional) Enable emoji aliases. See ``emoji.UNICODE_EMOJI_ALIAS``. :param delimiters: (optional) Use delimiters other than _DEFAULT_DELIMITER :param variant: (optional) Choose variation selector between "base"(None), VS-15 ("text_type") and VS-16 ("emoji_type") - :param language: Choose language of emoji name + :param language: Choose language of emoji name: language code 'es', 'de', etc. or 'alias' + to use English aliases :param version: (optional) Max version. If set to an Emoji Version, all emoji above this version will be ignored. :param handle_version: (optional) Replace the emoji above ``version`` @@ -74,20 +77,22 @@ :raises ValueError: if ``variant`` is neither None, 'text_type' or 'emoji_type' """ - EMOJI_UNICODE = unicode_codes.EMOJI_UNICODE[language] + + if use_aliases or language == 'alias': + if language not in ('en', 'alias'): + warnings.warn("use_aliases=True is only supported for language='en'. " + "It is recommended to use emojize(string, language='alias') instead", stacklevel=2) + use_aliases = True + language = 'en' + + EMOJI_UNICODE = unicode_codes.EMOJI_ALIAS_UNICODE_ENGLISH if use_aliases else unicode_codes.EMOJI_UNICODE[language] pattern = re.compile(u'(%s[\\w\\-&.?????????()!#*+????,/]+%s)' % delimiters, flags=re.UNICODE) def replace(match): - mg = match.group(1).replace(delimiters[0], _DEFAULT_DELIMITER).replace( - delimiters[1], _DEFAULT_DELIMITER - ) - if use_aliases: - emj = unicode_codes.EMOJI_ALIAS_UNICODE_ENGLISH.get(mg) - else: - emj = EMOJI_UNICODE.get(mg) - + mg = match.group(1)[len(delimiters[0]):-len(delimiters[1])] + emj = EMOJI_UNICODE.get(_DEFAULT_DELIMITER + mg + _DEFAULT_DELIMITER) if emj is None: - return mg + return match.group(1) if version is not None: if emj in unicode_codes.EMOJI_DATA and unicode_codes.EMOJI_DATA[emj]['E'] > version: @@ -134,7 +139,8 @@ :param string: String contains unicode characters. MUST BE UNICODE. :param use_aliases: (optional) Return emoji aliases. See ``emoji.UNICODE_EMOJI_ALIAS``. :param delimiters: (optional) User delimiters other than ``_DEFAULT_DELIMITER`` - :param language: (optional) Choose language of emoji name + :param language: Choose language of emoji name: language code 'es', 'de', etc. or 'alias' + to use English aliases :param version: (optional) Max version. If set to an Emoji Version, all emoji above this version will be removed. :param handle_version: (optional) Replace the emoji above ``version`` @@ -156,24 +162,60 @@ """ - codes_dict = unicode_codes.UNICODE_EMOJI_ALIAS_ENGLISH if use_aliases else unicode_codes.UNICODE_EMOJI[language] - - def replace(match): - emj = match.group(0) - val = codes_dict.get(emj) - if val is None: - return emj - if version is not None: - if emj in unicode_codes.EMOJI_DATA and unicode_codes.EMOJI_DATA[emj]['E'] > version: - if callable(handle_version): - return handle_version(emj, unicode_codes.EMOJI_DATA[emj]) - elif handle_version is not None: - return str(handle_version) + if language == 'alias': + language = 'en' + use_aliases = True + elif use_aliases and language != 'en': + warnings.warn("use_aliases=True is only supported for language='en'. " + "It is recommended to use demojize(string, language='alias') instead", stacklevel=2) + language = 'en' + + tree = _get_search_tree() + result = [] + i = 0 + length = len(string) + while i < length: + consumed = False + char = string[i] + if char in tree: + j = i + 1 + sub_tree = tree[char] + while j < length and string[j] in sub_tree: + sub_tree = sub_tree[string[j]] + j += 1 + if 'data' in sub_tree: + emj_data = sub_tree['data'] + code_points = string[i:j] + replace_str = None + if version is not None and emj_data['E'] > version: + if callable(handle_version): + emj_data = emj_data.copy() + emj_data['match_start'] = i + emj_data['match_end'] = j + replace_str = handle_version(code_points, emj_data) + elif handle_version is not None: + replace_str = str(handle_version) + else: + replace_str = None + elif language in emj_data: + if use_aliases and 'alias' in emj_data: + replace_str = delimiters[0] + emj_data['alias'][0][1:-1] + delimiters[1] + else: + replace_str = delimiters[0] + emj_data[language][1:-1] + delimiters[1] else: - return '' - return delimiters[0] + val[1:-1] + delimiters[1] + # The emoji exists, but it is not translated, so we keep the emoji + replace_str = code_points - return get_emoji_regexp().sub(replace, string).replace(u'\ufe0e', '').replace(u'\ufe0f', '') + i = j - 1 + consumed = True + if replace_str: + result.append(replace_str) + + if not consumed and char != u'\ufe0e' and char != u'\ufe0f': + result.append(char) + i += 1 + + return "".join(result) def replace_emoji(string, replace='', language=None, version=-1): @@ -189,20 +231,17 @@ :param language: (optional) Parameter is no longer used """ - if version <= 0 and not callable(replace): - return get_emoji_regexp().sub(replace, string).replace(u'\ufe0e', '').replace(u'\ufe0f', '') - - def replace_fct(match): - emj = match.group(0) - - if emj in unicode_codes.EMOJI_DATA and unicode_codes.EMOJI_DATA[emj]['E'] > version: + if version > -1: + def f(emj, emj_data): + if emj_data['E'] <= version: + return emj # Do not replace emj if callable(replace): - return replace(emj, unicode_codes.EMOJI_DATA[emj]) - else: - return str(replace) - return emj + return replace(emj, emj_data) + return str(replace) - return get_emoji_regexp().sub(replace_fct, string).replace(u'\ufe0e', '').replace(u'\ufe0f', '') + return demojize(string, use_aliases=False, language='en', version=-1, handle_version=f) + else: + return demojize(string, use_aliases=False, language='en', version=-1, handle_version=replace) def get_emoji_regexp(language=None): @@ -232,12 +271,14 @@ """ _entities = [] - for match in get_emoji_regexp().finditer(string): + def f(emj, emj_data): _entities.append({ - 'location': match.start(), - 'emoji': match.group(), + 'location': emj_data['match_start'], + 'emoji': emj, }) + demojize(string, use_aliases=False, language='en', + version=-1, handle_version=f) return _entities @@ -252,8 +293,13 @@ return distinct_list -def emoji_count(string): - """Returns the count of emojis in a string.""" +def emoji_count(string, unique=False): + """Returns the count of emojis in a string. + + :param unique: (optional) True if count only unique emojis + """ + if unique: + return len(distinct_emoji_lis(string)) return len(emoji_lis(string)) @@ -285,6 +331,7 @@ # Try to find first emoji in string version = [] + def f(e, emoji_data): version.append(emoji_data['E']) return '' @@ -300,3 +347,66 @@ return version[0] raise ValueError("No emoji found in string") + + +def _get_search_tree(): + """ + Generate a search tree for demojize() + Example of a search tree:: + + EMOJI_DATA = + {'a': {'en': ':Apple:'}, + 'b': {'en': ':Bus:'}, + 'ba': {'en': ':Bat:'}, + 'band': {'en': ':Beatles:'}, + 'bandit': {'en': ':Outlaw:'}, + 'bank': {'en': ':BankOfEngland:'}, + 'bb': {'en': ':BB-gun:'}, + 'c': {'en': ':Car:'}} + + _SEARCH_TREE = + {'a': {'data': {'en': ':Apple:'}}, + 'b': {'a': {'data': {'en': ':Bat:'}, + 'n': {'d': {'data': {'en': ':Beatles:'}, + 'i': {'t': {'data': {'en': ':Outlaw:'}}}}, + 'k': {'data': {'en': ':BankOfEngland:'}}}}, + 'b': {'data': {'en': ':BB-gun:'}}, + 'data': {'en': ':Bus:'}}, + 'c': {'data': {'en': ':Car:'}}} + + _SEARCH_TREE + / | ??? + / | ??? + a b c + | / | ??? | + | / | ??? | + :Apple: ba :Bus: bb :Car: + / ??? | + / ??? | + :Bat: ban :BB-gun: + / ??? + / ??? + band bank + / ??? | + / ??? | + bandi :Beatles: :BankOfEngland: + | + bandit + | + :Outlaw: + + + """ + global _SEARCH_TREE + if _SEARCH_TREE is None: + _SEARCH_TREE = {} + for emj in unicode_codes.EMOJI_DATA: + sub_tree = _SEARCH_TREE + lastidx = len(emj) - 1 + for i, char in enumerate(emj): + if char not in sub_tree: + sub_tree[char] = {} + sub_tree = sub_tree[char] + if i == lastidx: + sub_tree['data'] = unicode_codes.EMOJI_DATA[emj] + return _SEARCH_TREE diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/emoji-1.6.1/emoji.egg-info/PKG-INFO new/emoji-1.6.3/emoji.egg-info/PKG-INFO --- old/emoji-1.6.1/emoji.egg-info/PKG-INFO 2021-10-13 14:23:08.000000000 +0200 +++ new/emoji-1.6.3/emoji.egg-info/PKG-INFO 2022-01-15 14:38:34.000000000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: emoji -Version: 1.6.1 +Version: 1.6.3 Summary: Emoji for Python Home-page: https://github.com/carpedm20/emoji/ Author: Taehoon Kim, Kevin Wurster and Tahir Jalilov diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/emoji-1.6.1/tests/test_core.py new/emoji-1.6.3/tests/test_core.py --- old/emoji-1.6.1/tests/test_core.py 2021-10-13 14:21:53.000000000 +0200 +++ new/emoji-1.6.3/tests/test_core.py 2022-01-15 14:22:32.000000000 +0100 @@ -5,11 +5,17 @@ from __future__ import unicode_literals +import random import re import emoji import pytest +def ascii(s): + # return escaped Code points \U000AB123 + return s.encode("unicode-escape").decode() + + def test_emojize_name_only(): for lang_code, emoji_pack in emoji.EMOJI_UNICODE.items(): for name in emoji_pack.keys(): @@ -36,29 +42,38 @@ expected = emoji.emojize(actual, False) assert expected == actual, '%s != %s' % (expected, actual) + def test_emojize_languages(): for lang_code, emoji_pack in emoji.EMOJI_UNICODE.items(): for name, emj in emoji_pack.items(): assert emoji.emojize(name, language=lang_code) == emj + def test_demojize_languages(): - for lang_code, emoji_pack in emoji.UNICODE_EMOJI.items(): - for emj, name in emoji_pack.items(): + for lang_code, emoji_pack in emoji.EMOJI_UNICODE.items(): + for name, emj in emoji_pack.items(): assert emoji.demojize(emj, language=lang_code) == name def test_emojize_variant(): - remove_variant = lambda s: re.sub(u'[\ufe0e\ufe0f]$', '', s) + def remove_variant(s): return re.sub(u'[\ufe0e\ufe0f]$', '', s) - assert emoji.emojize(':Taurus:', variant=None) == emoji.EMOJI_UNICODE['en'][':Taurus:'] - assert emoji.emojize(':Taurus:', variant=None) == emoji.emojize(':Taurus:') - assert emoji.emojize(':Taurus:', variant='text_type') == remove_variant(emoji.EMOJI_UNICODE['en'][':Taurus:']) + u'\ufe0e' - assert emoji.emojize(':Taurus:', variant='emoji_type') == remove_variant(emoji.EMOJI_UNICODE['en'][':Taurus:']) + u'\ufe0f' - - assert emoji.emojize(':admission_tickets:', variant=None) == emoji.EMOJI_UNICODE['en'][':admission_tickets:'] - assert emoji.emojize(':admission_tickets:', variant=None) == emoji.emojize(':admission_tickets:') - assert emoji.emojize(':admission_tickets:', variant='text_type') == remove_variant(emoji.EMOJI_UNICODE['en'][':admission_tickets:']) + u'\ufe0e' - assert emoji.emojize(':admission_tickets:', variant='emoji_type') == remove_variant(emoji.EMOJI_UNICODE['en'][':admission_tickets:']) + u'\ufe0f' + assert emoji.emojize( + ':Taurus:', variant=None) == emoji.EMOJI_UNICODE['en'][':Taurus:'] + assert emoji.emojize(':Taurus:', variant=None) == emoji.emojize(':Taurus:') + assert emoji.emojize(':Taurus:', variant='text_type') == remove_variant( + emoji.EMOJI_UNICODE['en'][':Taurus:']) + u'\ufe0e' + assert emoji.emojize(':Taurus:', variant='emoji_type') == remove_variant( + emoji.EMOJI_UNICODE['en'][':Taurus:']) + u'\ufe0f' + + assert emoji.emojize( + ':admission_tickets:', variant=None) == emoji.EMOJI_UNICODE['en'][':admission_tickets:'] + assert emoji.emojize(':admission_tickets:', variant=None) == emoji.emojize( + ':admission_tickets:') + assert emoji.emojize(':admission_tickets:', variant='text_type') == remove_variant( + emoji.EMOJI_UNICODE['en'][':admission_tickets:']) + u'\ufe0e' + assert emoji.emojize(':admission_tickets:', variant='emoji_type') == remove_variant( + emoji.EMOJI_UNICODE['en'][':admission_tickets:']) + u'\ufe0f' with pytest.raises(ValueError): emoji.emojize(':admission_tickets:', variant=False) @@ -70,21 +85,23 @@ emoji.emojize(':admission_tickets:', variant='wrong') assert emoji.emojize(":football:", use_aliases=False) == ':football:' - assert emoji.emojize(":football:", variant="text_type", use_aliases=False) == ':football:' - assert emoji.emojize(":football:", use_aliases=True) == u'\U0001F3C8' - assert emoji.emojize(":football:", variant="emoji_type", use_aliases=True) == u'\U0001F3C8' + assert emoji.emojize(":football:", variant="text_type", + use_aliases=False) == ':football:' + assert emoji.emojize(":football:", use_aliases=True) == u'\U0001F3C8' + assert emoji.emojize(":football:", variant="emoji_type", + use_aliases=True) == u'\U0001F3C8' def test_demojize_removes_variant(): # demojize should remove all variant indicators \ufe0e and \ufe0f from the string text = "".join([emoji.emojize(':Taurus:', variant='text_type'), - emoji.emojize(':Taurus:', variant='emoji_type'), - emoji.emojize(':admission_tickets:', variant='text_type'), - emoji.emojize(':admission_tickets:', variant='emoji_type'), - emoji.emojize(':alien:', variant='text_type'), - emoji.emojize(':alien:', variant='emoji_type'), - emoji.emojize(':atom_symbol:', variant='text_type'), - emoji.emojize(':atom_symbol:', variant='emoji_type')]) + emoji.emojize(':Taurus:', variant='emoji_type'), + emoji.emojize(':admission_tickets:', variant='text_type'), + emoji.emojize(':admission_tickets:', variant='emoji_type'), + emoji.emojize(':alien:', variant='text_type'), + emoji.emojize(':alien:', variant='emoji_type'), + emoji.emojize(':atom_symbol:', variant='text_type'), + emoji.emojize(':atom_symbol:', variant='emoji_type')]) for lang_code in emoji.UNICODE_EMOJI: result = emoji.demojize(text, language=lang_code) @@ -96,32 +113,99 @@ string = '__---___--Invalid__--__-Name' assert emoji.emojize(string, False) == string + string = ':: baby:: :_: : : : : :-: :+:' + assert emoji.emojize(string, False) == string + def test_alias(): # When use_aliases=False aliases should be passed through untouched assert emoji.emojize(':soccer:', use_aliases=False) == ':soccer:' assert emoji.emojize(':soccer:', use_aliases=True) == u'\U000026BD' assert emoji.emojize(':football:', use_aliases=False) == ':football:' - assert emoji.emojize(':football:', use_aliases=True) == u'\U0001F3C8' + assert emoji.emojize(':football:', use_aliases=True) == u'\U0001F3C8' # Multiple aliases for one emoji: - assert emoji.emojize(':thumbsup:', use_aliases=True) == emoji.emojize(':+1:', use_aliases=True) - assert emoji.emojize(':thumbsup:', use_aliases=True) == emoji.emojize(':thumbs_up:', use_aliases=True) + assert emoji.emojize(':thumbsup:', use_aliases=True) == emoji.emojize( + ':+1:', use_aliases=True) + assert emoji.emojize(':thumbsup:', use_aliases=True) == emoji.emojize( + ':thumbs_up:', use_aliases=True) assert emoji.emojize(':thumbsup:', use_aliases=True) == u'\U0001f44d' + thumbsup = u'\U0001f44d' + assert emoji.demojize(thumbsup, use_aliases=True) != thumbsup + assert emoji.demojize(thumbsup, use_aliases=True) != ':thumbs_up:' + assert emoji.demojize(thumbsup, use_aliases=True) != emoji.demojize( + thumbsup, use_aliases=False) + + thailand = u'????????' + assert emoji.demojize(thailand, use_aliases=True) != thailand + assert emoji.demojize(thailand, use_aliases=True) != ':Thailand:' + assert emoji.demojize(thailand, use_aliases=True) != emoji.demojize( + thailand, use_aliases=False) + assert emoji.demojize(thailand, use_aliases=True, version=1.0) != emoji.demojize( + thailand, use_aliases=True) + + # No alias + for emj, emoji_data in emoji.EMOJI_DATA.items(): + if emoji_data['status'] != emoji.STATUS['fully_qualified']: + continue + if 'alias' not in emoji_data: + assert emoji.emojize(emoji_data['en'], use_aliases=True) != emoji_data['en'] + assert emoji.demojize(emj, use_aliases=True) == emoji_data['en'] + + # language='alias' + assert emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="en") == thailand + assert emoji.emojize(':flag_for_Thailand:', language="alias") == thailand + assert emoji.emojize(':flag_for_Thailand:', language="alias", use_aliases=True) == thailand + assert emoji.demojize(thailand, use_aliases=True, language="en") == ':flag_for_Thailand:' + assert emoji.demojize(thailand, language="alias") ==':flag_for_Thailand:' + assert emoji.demojize(thailand, language="alias", use_aliases=True) ==':flag_for_Thailand:' + def test_invalid_alias(): # Invalid aliases should be passed through untouched assert emoji.emojize(':tester:', use_aliases=True) == ':tester:' assert emoji.emojize(':footbal:', use_aliases=True) == ':footbal:' assert emoji.emojize(':socer:', use_aliases=True) == ':socer:' - emoji.emojize(':socer:', use_aliases=True, variant="text_type") == ':socer:' + assert emoji.emojize(':socer:', use_aliases=True, + variant="text_type") == ':socer:' + + +def test_alias_wrong_language(): + # Alias with wrong languages + thailand = u'????????' + with pytest.warns(UserWarning) as w: + emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="es") + with pytest.warns(UserWarning) as w: + assert emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="de") == thailand + with pytest.warns(UserWarning) as w: + assert emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="es") == thailand + assert emoji.emojize(':flag_for_Thailand:', use_aliases=False, language="es") == ':flag_for_Thailand:' + assert emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="en") == thailand + assert emoji.emojize(':flag_for_Thailand:', use_aliases=False, language="alias") == thailand + assert emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="alias") == thailand + + with pytest.warns(UserWarning) as w: + emoji.demojize(thailand, use_aliases=True, language="es") + with pytest.warns(UserWarning) as w: + assert emoji.demojize(thailand, use_aliases=True, language="es") == ':flag_for_Thailand:' + assert emoji.demojize(thailand, use_aliases=False, language="es") == ':bandera_tailandia:' + assert emoji.demojize(thailand, use_aliases=True, language="en") == ':flag_for_Thailand:' + assert emoji.demojize(thailand, use_aliases=False, language="alias") == ':flag_for_Thailand:' + assert emoji.demojize(thailand, use_aliases=True, language="alias") == ':flag_for_Thailand:' def test_demojize_name_only(): - for name in emoji.EMOJI_UNICODE.keys(): - oneway = emoji.emojize(name, False) - roundtrip = emoji.demojize(oneway) - assert name == roundtrip, '%s != %s' % (name, roundtrip) + for emj, item in emoji.EMOJI_DATA.items(): + if item['status'] != emoji.STATUS['fully_qualified']: + continue + for lang_code in emoji.UNICODE_EMOJI: + if not lang_code in item: + continue + name = item[lang_code] + oneway = emoji.emojize(name, use_aliases=False, language=lang_code) + assert oneway == emj + roundtrip = emoji.demojize(oneway, language=lang_code) + assert name == roundtrip, '%s != %s' % (name, roundtrip) def test_demojize_complicated_string(): @@ -131,11 +215,34 @@ assert constructed == destructed, '%s != %s' % (constructed, destructed) +def test_demojize_delimiters(): + for e in [u'\U000026BD', u'\U0001f44d', u'\U0001F3C8']: + for d in [(":", ":"), ("a", "b"), ("!", "!!"), ("123", "456"), (u"????", u"????")]: + s = emoji.demojize(e, delimiters=d) + assert s.startswith(d[0]) + assert s.endswith(d[1]) + + text = u"Example of a text with an emoji%sin a sentence" + for e in [u'\U000026BD', u'\U0001f44d', u'\U0001F3C8']: + for d in [(":", ":"), ("!", "-!-"), ("-", "-"), (":", "::"), ("::", "::"), (u"????", u"????")]: + text_with_unicode = text % e + demojized_text = emoji.demojize(text_with_unicode, delimiters=d) + assert text_with_unicode != demojized_text + assert e not in demojized_text + assert emoji.emojize(demojized_text, delimiters=d) == text_with_unicode + text_with_emoji = text % emoji.demojize(e, delimiters=d) + assert demojized_text == text_with_emoji + assert emoji.emojize(text_with_emoji, delimiters=d) == text_with_unicode + + def test_emoji_lis(): - assert emoji.emoji_lis('Hi, I am fine. ????') == [{'location': 15, 'emoji': '????'}] + assert emoji.emoji_lis('Hi, I am ???? test')[0]['location'] == 9 assert emoji.emoji_lis('Hi') == [] - if len('Hello ????????????') < 10: # skip this test on python with UCS-2 as the string length/positions are different - assert emoji.emoji_lis('Hello ????????????') == [{'emoji': '????????', 'location': 6}, {'emoji': '????', 'location': 8}] + if len('Hello ????????????') < 10: # skip these tests on python with UCS-2 as the string length/positions are different + assert emoji.emoji_lis('Hi, I am fine. ????') == [ + {'location': 15, 'emoji': '????'}] + assert emoji.emoji_lis('Hello ????????????') == [ + {'emoji': '????????', 'location': 6}, {'emoji': '????', 'location': 8}] def test_distinct_emoji_lis(): @@ -149,6 +256,7 @@ assert emoji.emoji_count('Hi, I am fine. ????') == 1 assert emoji.emoji_count('Hi') == 0 assert emoji.emoji_count('Hello ????????????') == 2 + assert emoji.emoji_count('Hello ????????????????????', unique=True) == 2 def test_replace_emoji(): @@ -167,3 +275,159 @@ assert emoji.is_emoji('????') assert not emoji.is_emoji('H') assert emoji.is_emoji('????????') + + +def test_long_emoji(): + assert emoji.demojize('This is \U0001F9D1\U0001F3FC\U0000200D\U0001F37C example text') == 'This is :person_feeding_baby_medium-light_skin_tone: example text' + assert emoji.demojize('This is \U0001f468\U0001f3ff\u200d\u2764\ufe0f\u200d\U0001f468\U0001f3ff example text \U0001F469\U0001F3FB\U0000200D\U0001F91D\U0000200D\U0001F468\U0001F3FF') == 'This is :couple_with_heart_man_man_dark_skin_tone: example text :woman_and_man_holding_hands_light_skin_tone_dark_skin_tone:' + assert emoji.demojize('This is \U0001f468\U0001f3ff\u200d\u2764\ufe0f\u200d\U0001f468\U0001f3ff\U0001f468\U0001f3ff\u200d\u2764\ufe0f\u200d\U0001f48b\u200d\U0001f468\U0001f3ff example text \U0001F469\U0001F3FB\U0000200D\U0001F91D\U0000200D\U0001F468\U0001F3FF') == 'This is :couple_with_heart_man_man_dark_skin_tone::kiss_man_man_dark_skin_tone: example text :woman_and_man_holding_hands_light_skin_tone_dark_skin_tone:' + assert emoji.demojize('\U0001F46B\U0001F3FB This is \U0001f468\U0001f3ff\U0001f468\U0001f3ff\u200d\u2764\ufe0f\u200d\U0001f468\U0001f3ff\U0001f468\U0001f3ff\u200d\u2764\ufe0f\u200d\U0001f48b\u200d\U0001f468\U0001f3ff example text \U0001F469\U0001F3FB\U0000200D\U0001F91D\U0000200D\U0001F468\U0001F3FF') == ':woman_and_man_holding_hands_light_skin_tone: This is :man_dark_skin_tone::couple_with_heart_man_man_dark_skin_tone::kiss_man_man_dark_skin_tone: example text :woman_and_man_holding_hands_light_skin_tone_dark_skin_tone:' + assert emoji.demojize('\U0001F46B\U0001F3FB\U0001F46B\U0001F3FB\U0001F469\U0001F3FB\U0000200D\U0001F91D\U0000200D\U0001F468\U0001F3FF\U0001FAF1\U0001F3FD\U0001FAF1\U0001F3FD\U0000200D\U0001FAF2\U0001F3FF') == ':woman_and_man_holding_hands_light_skin_tone::woman_and_man_holding_hands_light_skin_tone::woman_and_man_holding_hands_light_skin_tone_dark_skin_tone::rightwards_hand_medium_skin_tone::handshake_medium_skin_tone_dark_skin_tone:' + s = ":crossed_fingers_medium-light_skin_tone::crossed_fingers::crossed_fingers_dark_skin_tone:" + assert emoji.demojize(emoji.demojize(s)) == s + + +def test_untranslated(): + for emj, item in emoji.EMOJI_DATA.items(): + if item['status'] != emoji.STATUS['fully_qualified']: + continue + if 'es' not in item: + # untranslated + value = emoji.emojize(item['en'], language='en') + roundtrip = emoji.demojize(value, language='es') + assert roundtrip == value, '%s != %s (from %s)' % (ascii(roundtrip), ascii(value), item['en']) + else: + # translated + value = emoji.emojize(item['en'], language='en') + roundtrip = emoji.demojize(value, language='es') + assert roundtrip == item['es'], '%s != %s' % (roundtrip, item['es']) + + +def test_text(): + UCS2 = len('Hello ????????????') > 9 # don't break up characters on python with UCS-2 + + text = u"""Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +Excepteur sint occaecat in reprehenderit in cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +Str???? pchn???? ko???? w quiz g??d??b vel fax myj??. +H??j bly gom vandt fr??k sexquiz p?? wc. +?????????? ???? ?????? ???????? ???????????? ?????????????????????? ??????????, ???? ?????????? ??????. +???? ?????? ?????? ?? ???????? ???????????? ?????????????? ??????????????. +?????? ???????? ?????? ???????? ?????? ?????????? ??? ???????? ???????? ?????????? ?????? ???????????? +?????? ?????? ?????? ?????? ???????? ??????, ?????? +?????????????????? ?????? ??????????????? ???????????? ??????????????? ???????????????????????? ?????? ???????????? ???????????? ?????? ????????????????????? ???????????? ???????????? ?????????????????????????????? ??????????????? ?????????????????????, ????????????????????? ?????? ?????????????????? ???????????? ?????? ???????????? ?????????????????? ????????? +????????????????????? ??????????????? ????????????????????? ??????????????? ????????????????????? ??????????????? ????????????????????? ??????????????? +????????????????????????????????? +Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +""" + + def add_random_emoji(text, lst, select=lambda emj_data: emj_data['en']): + + text = text + + emoji_list = [] + text_with_unicode = u"" + text_with_placeholder = u"" + for i in range(0, len(text), 10): + while True: + emj, emj_data = random.choice(lst) + placeholder = select(emj_data) + if placeholder: + break + + if UCS2: + j = text.find(u" ", i, i + 10) + if j == -1: + continue + else: + j = random.randint(i, i + 10) + + text_with_unicode += text[i:j] + text_with_unicode += emj + text_with_unicode += text[j:i + 10] + + text_with_placeholder += text[i:j] + text_with_placeholder += placeholder + text_with_placeholder += text[j:i + 10] + + emoji_list.append(emj) + + return text_with_unicode, text_with_placeholder, emoji_list + + def clean(s): + return s.replace(u'\u200d', '').replace(u'\ufe0f', '') + + all_emoji_list = list(emoji.EMOJI_DATA.items()) + qualified_emoji_list = list((emj, item) for emj, item in emoji.EMOJI_DATA.items() if item['status'] == emoji.STATUS['fully_qualified']) + + # qualified emoji + text_with_unicode, text_with_placeholder, emoji_list = add_random_emoji(text, qualified_emoji_list) + assert emoji.demojize(text_with_unicode) == text_with_placeholder + assert emoji.emojize(text_with_placeholder) == text_with_unicode + if not UCS2: + assert emoji.replace_emoji(text_with_unicode, u'') == text + assert set(emoji.distinct_emoji_lis(text_with_unicode)) == set(emoji_list) + for i, lis in enumerate(emoji.emoji_lis(text_with_unicode)): + assert lis['emoji'] == emoji_list[i] + + # qualified emoji from "es" + selector = lambda emoji_data: emoji_data["es"] if "es" in emoji_data else False + text_with_unicode, text_with_placeholder, emoji_list = add_random_emoji(text, qualified_emoji_list, selector) + assert emoji.demojize(text_with_unicode, language="es") == text_with_placeholder + assert emoji.emojize(text_with_placeholder, language="es") == text_with_unicode + if not UCS2: + assert emoji.replace_emoji(text_with_unicode, u'') == text + assert set(emoji.distinct_emoji_lis(text_with_unicode)) == set(emoji_list) + for i, lis in enumerate(emoji.emoji_lis(text_with_unicode)): + assert lis['emoji'] == emoji_list[i] + + # qualified emoji from "alias" + selector = lambda emoji_data: emoji_data["alias"][0] if "alias" in emoji_data else False + text_with_unicode, text_with_placeholder, emoji_list = add_random_emoji(text, qualified_emoji_list, selector) + assert emoji.demojize(text_with_unicode, use_aliases=True) == text_with_placeholder + assert emoji.emojize(text_with_placeholder, use_aliases=True) == text_with_unicode + if not UCS2: + assert emoji.replace_emoji(text_with_unicode, u'') == text + assert set(emoji.distinct_emoji_lis(text_with_unicode)) == set(emoji_list) + for i, lis in enumerate(emoji.emoji_lis(text_with_unicode)): + assert lis['emoji'] == emoji_list[i] + + # all emoji + text_with_unicode, text_with_placeholder, emoji_list = add_random_emoji(text, all_emoji_list) + assert emoji.demojize(text_with_unicode) == text_with_placeholder + assert clean(emoji.emojize(text_with_placeholder)) == clean(text_with_unicode) + if not UCS2: + assert emoji.replace_emoji(text_with_unicode, u'') == text + assert set(emoji.distinct_emoji_lis(text_with_unicode)) == set(emoji_list) + for i, lis in enumerate(emoji.emoji_lis(text_with_unicode)): + assert lis['emoji'] == emoji_list[i] + + +def test_text_multiple_times(): + # Run test_text() multiple times because it relies on a random text + for i in range(100): + test_text() + + +def test_invalid_chars(): + invalidchar = u"\U0001F20F" + assert emoji.demojize(invalidchar) == invalidchar, "%r != %r" % (ascii(emoji.demojize(invalidchar)), ascii(invalidchar)) + assert emoji.demojize(invalidchar) == invalidchar, "%r != %r" % (ascii(emoji.demojize(invalidchar)), ascii(invalidchar)) + + invalidchar = u"u\2302 ???" + assert emoji.demojize(invalidchar) == invalidchar, "%r != %r" % (ascii(emoji.demojize(invalidchar)), ascii(invalidchar)) + assert emoji.demojize(invalidchar) == invalidchar, "%r != %r" % (ascii(emoji.demojize(invalidchar)), ascii(invalidchar)) + + +def test_combine_with_component(): + text = u"Example of a combined emoji%sin a sentence" + + combined = emoji.emojize(text % u":woman_dark_skin_tone:") + seperated = emoji.emojize(text % u":woman::dark_skin_tone:") + assert combined == seperated, "%r != %r" % (ascii(combined), ascii(seperated)) + + combined = emoji.emojize(text % u":woman_dark_skin_tone_white_hair:") + seperated = emoji.emojize(text % u":woman::dark_skin_tone:\u200d:white_hair:") + assert combined == seperated, "%r != %r" % (ascii(combined), ascii(seperated)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/emoji-1.6.1/tests/test_dict.py new/emoji-1.6.3/tests/test_dict.py --- old/emoji-1.6.1/tests/test_dict.py 2021-10-04 22:12:50.000000000 +0200 +++ new/emoji-1.6.3/tests/test_dict.py 2021-12-06 12:09:55.000000000 +0100 @@ -8,6 +8,8 @@ import emoji _all_languages = None + + def all_languages(): """List of all language keys in EMOJI_DATA""" @@ -48,6 +50,7 @@ assert name not in seen seen[name] = 0 + def test_duplicate_names(): """Check that there are no duplicate names in the fully_qualified except for differnt variants""" for lang in all_languages():
