Skip to content

Commit b3658d3

Browse files
authored
Add replace colons support for zwj joined emojis with skintones (#7)
zero-width-joined emojis would 'just work' if you appended the skin-tone after them, i.e 👨:skin-tone-3:. However, this breaks down for multibyte emojis with skintones, i.e :male-technologist::skin-tone-3:, which is actually 👨:skin-tone-3:💻. This PR handles these cases explicitly, by utilising data from the emoji source while retaining previous functionality.
1 parent 88bdd04 commit b3658d3

3 files changed

Lines changed: 34 additions & 16 deletions

File tree

emoji_data_python/replacement.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import re
2+
from typing import cast
23

34

4-
def replace_colons(text: str, strip: bool=False) -> str:
5+
def replace_colons(text: str, strip: bool = False) -> str:
56
"""Parses a string with colon encoded emoji and renders found emoji.
67
Unknown emoji are left as is unless `strip` is set to `True`
78
@@ -11,23 +12,29 @@ def replace_colons(text: str, strip: bool=False) -> str:
1112
>>> emoji_data_python.replace_colons('Hello world ! :wave::skin-tone-3: :earth_africa: :exclamation:')
1213
'Hello world ! 👋🏼 🌍 ❗'
1314
"""
14-
from emoji_data_python import emoji_short_names
15+
from emoji_data_python import emoji_short_names, EmojiChar
1516

1617
def emoji_repl(matchobj) -> str:
17-
match = matchobj.group(0)
18-
codes = match.split(':')
19-
res = ''
20-
for code in codes:
21-
if len(code) > 0:
22-
try:
23-
res += emoji_short_names.get(code.replace('-', '_')).char
24-
except AttributeError:
25-
if not strip:
26-
res += f':{code}:'
27-
28-
return res
29-
30-
return re.sub(r'\:[a-zA-Z0-9-_+]+\:(\:skin-tone-[2-6]\:)?', emoji_repl, text)
18+
emoji_match = matchobj.group(1)
19+
base_emoji = emoji_short_names.get(emoji_match.strip(':').replace('-', '_'))
20+
21+
if matchobj.lastindex == 2:
22+
skin_tone_match = matchobj.group(2)
23+
skin_tone = cast(EmojiChar, emoji_short_names.get(skin_tone_match.strip(':')))
24+
25+
if base_emoji is None:
26+
return f'{emoji_match if strip is False else ""}{skin_tone.char}'
27+
28+
emoji_with_skin_tone = base_emoji.skin_variations.get(skin_tone.unified)
29+
if emoji_with_skin_tone is None:
30+
return f'{base_emoji.char}{skin_tone.char}'
31+
return emoji_with_skin_tone.char
32+
33+
if base_emoji is None:
34+
return f'{emoji_match if strip is False else ""}'
35+
return base_emoji.char
36+
37+
return re.sub(r'(\:[a-zA-Z0-9-_+]+\:)(\:skin-tone-[2-6]\:)?', emoji_repl, text)
3138

3239

3340
def get_emoji_regex():

tests/test_conversion.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ def test_longer_unified(self):
1212

1313
def test_multiple_unified_to_char(self):
1414
self.assertEqual('👨‍🌾', unified_to_char('1F468-200D-1F33E'))
15+
self.assertEqual('👩🏼‍🌾', unified_to_char('1F469-1F3FC-200D-1F33E'))
1516
self.assertEqual('🇳🇬', unified_to_char('1F1F3-1F1EC'))
1617
self.assertEqual('\U0001F1F3\U0001F1EC', unified_to_char('1F1F3-1F1EC'))
1718
self.assertEqual('4⃣', unified_to_char('0034-20E3'))
@@ -24,6 +25,7 @@ def test_longer_char_to_unified(self):
2425
self.assertEqual('1F1E6', char_to_unified('\U0001F1E6'))
2526

2627
def test_multiple_char_to_unified(self):
28+
self.assertEqual('1F469-1F3FC-200D-1F33E', char_to_unified('👩🏼‍🌾'))
2729
self.assertEqual('1F468-200D-1F33E', char_to_unified('👨‍🌾'))
2830
self.assertEqual('1F1F3-1F1EC', char_to_unified('🇳🇬'))
2931
self.assertEqual('1F1F3-1F1EC', char_to_unified('\U0001F1F3\U0001F1EC'))

tests/test_replacement.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ def test_replace_colons(self):
1111
def test_skin_tone(self):
1212
self.assertEqual('👋🏼', replace_colons(':wave::skin-tone-3:'))
1313

14+
def test_skin_tone_appended_to_emoji_with_no_skin_tone(self):
15+
self.assertEqual('💩🏼', replace_colons(':poop::skin-tone-3:'))
16+
1417
def test_underscore_hyphenated_codes(self):
1518
self.assertEqual('😙', replace_colons(':kissing_smiling_eyes:'))
1619
self.assertEqual('😘', replace_colons(':kissing-heart:'))
@@ -24,11 +27,17 @@ def test_zwj_emoji(self):
2427
self.assertEqual('👨‍👩‍👦', replace_colons(':man-woman-boy:'))
2528
self.assertEqual('👨‍🌾', replace_colons(':male-farmer:'))
2629

30+
def test_zwj_emoji_skin_tone(self):
31+
"""This tests zwj emojis that also have a skin tone"""
32+
self.assertEqual('👨🏼‍🌾', replace_colons(':male-farmer::skin-tone-3:'))
33+
2734
def test_unknown_code(self):
2835
self.assertEqual('💩💩 :poo:🏼', replace_colons(':hankey::poop: :poo::skin-tone-3:'))
36+
self.assertEqual('💩:poo: 🐶 :poo:', replace_colons(':poop::poo: :dog: :poo:'))
2937

3038
def test_strip_unknown_code(self):
3139
self.assertEqual('💩💩 🏼', replace_colons(':hankey::poop: :poo::skin-tone-3:', strip=True))
40+
self.assertEqual('💩 🐶 ', replace_colons(':poop::poo: :dog: :poo:', strip=True))
3241

3342
def test_multiline_sentence(self):
3443
self.assertEqual("""

0 commit comments

Comments
 (0)