Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 33 additions & 23 deletions linkedin2username.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,8 @@ def split_name(name):
Some people have funny names. We assume the most important names are:
first name, last name, and the name right before the last name (if they have one)
"""
# Split on spaces and dashes (included repeated)
parsed = re.split(r'[\s-]+', name)
# Split on whitespace only; hyphens are part of compound names (e.g. "Jean-Charles")
parsed = re.split(r'\s+', name)

# Iterate and remove empty strings
parsed = [part for part in parsed if part]
Expand All @@ -170,61 +170,71 @@ def split_name(name):

return split_name

@staticmethod
def _hyphen_variants(name_part):
"""Return the full part plus each sub-part if hyphenated.

Handles cases like 'davidson-smith' -> ['davidson-smith', 'davidson', 'smith']
so callers can generate usernames for both the compound form and each component.
"""
if '-' in name_part:
return [name_part] + name_part.split('-')
return [name_part]

def f_last(self):
"""jsmith"""
names = set()
names.add(self.name['first'][0] + self.name['last'])

for last in self._hyphen_variants(self.name['last']):
names.add(self.name['first'][0] + last)
if self.name['second']:
names.add(self.name['first'][0] + self.name['second'])

for second in self._hyphen_variants(self.name['second']):
names.add(self.name['first'][0] + second)
return names

def f_dot_last(self):
"""j.smith"""
names = set()
names.add(self.name['first'][0] + '.' + self.name['last'])

for last in self._hyphen_variants(self.name['last']):
names.add(self.name['first'][0] + '.' + last)
if self.name['second']:
names.add(self.name['first'][0] + '.' + self.name['second'])

for second in self._hyphen_variants(self.name['second']):
names.add(self.name['first'][0] + '.' + second)
return names

def last_f(self):
"""smithj"""
names = set()
names.add(self.name['last'] + self.name['first'][0])

for last in self._hyphen_variants(self.name['last']):
names.add(last + self.name['first'][0])
if self.name['second']:
names.add(self.name['second'] + self.name['first'][0])

for second in self._hyphen_variants(self.name['second']):
names.add(second + self.name['first'][0])
return names

def first_dot_last(self):
"""john.smith"""
names = set()
names.add(self.name['first'] + '.' + self.name['last'])

for last in self._hyphen_variants(self.name['last']):
names.add(self.name['first'] + '.' + last)
if self.name['second']:
names.add(self.name['first'] + '.' + self.name['second'])

for second in self._hyphen_variants(self.name['second']):
names.add(self.name['first'] + '.' + second)
return names

def first_l(self):
"""johns"""
names = set()
names.add(self.name['first'] + self.name['last'][0])

for last in self._hyphen_variants(self.name['last']):
names.add(self.name['first'] + last[0])
if self.name['second']:
names.add(self.name['first'] + self.name['second'][0])

for second in self._hyphen_variants(self.name['second']):
names.add(self.name['first'] + second[0])
return names

def first(self):
"""john"""
names = set()
names.add(self.name['first'])

return names


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "linkedin2username"
version = "0.29"
version = "0.30"
requires-python = ">=3.10"
dependencies = [
"requests>=2.34.2",
Expand Down
138 changes: 107 additions & 31 deletions tests/test_linkedin2username.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,140 +8,206 @@
2: "John Davidson-Smith",
3: "John-Paul Smith-Robinson",
4: "José Gonzáles",
5: "🙂 Emoji Folks 🙂"
5: "🙂 Emoji Folks 🙂",
6: "Jean-Charles Martin",
7: "Madonna Wayne Gacey", # 3-word name: exercises the 'second' field in mutations
}


def test_f_last():
name = TEST_NAMES[1]
mutator = NameMutator(name)
assert mutator.f_last() == set(["jsmith", ])
assert mutator.f_last() == set(["jsmith"])

# Hyphenated last: compound form + each part
name = TEST_NAMES[2]
mutator = NameMutator(name)
assert mutator.f_last() == set(["jsmith", "jdavidson"])
assert mutator.f_last() == set(["jdavidson-smith", "jdavidson", "jsmith"])

# Hyphenated first and last: compound last + each last part; first stays compound
name = TEST_NAMES[3]
mutator = NameMutator(name)
assert mutator.f_last() == set(["jsmith", "jrobinson"])
assert mutator.f_last() == set(["jsmith-robinson", "jsmith", "jrobinson"])

name = TEST_NAMES[4]
mutator = NameMutator(name)
assert mutator.f_last() == set(["jgonzales", ])
assert mutator.f_last() == set(["jgonzales"])

name = TEST_NAMES[5]
mutator = NameMutator(name)
assert mutator.f_last() == set(["efolks", ])
assert mutator.f_last() == set(["efolks"])

# Compound hyphenated first name: jean-charles.martin must be generated (issue #82)
name = TEST_NAMES[6]
mutator = NameMutator(name)
assert mutator.f_last() == set(["jmartin"])

# 3-word name: second field produces an extra variant using the middle name
name = TEST_NAMES[7]
mutator = NameMutator(name)
assert mutator.f_last() == set(["mgacey", "mwayne"])


def test_f_dot_last():
name = TEST_NAMES[1]
mutator = NameMutator(name)
assert mutator.f_dot_last() == set(["j.smith", ])
assert mutator.f_dot_last() == set(["j.smith"])

name = TEST_NAMES[2]
mutator = NameMutator(name)
assert mutator.f_dot_last() == set(["j.smith", "j.davidson"])
assert mutator.f_dot_last() == set(["j.davidson-smith", "j.davidson", "j.smith"])

name = TEST_NAMES[3]
mutator = NameMutator(name)
assert mutator.f_dot_last() == set(["j.smith", "j.robinson"])
assert mutator.f_dot_last() == set(["j.smith-robinson", "j.smith", "j.robinson"])

name = TEST_NAMES[4]
mutator = NameMutator(name)
assert mutator.f_dot_last() == set(["j.gonzales", ])
assert mutator.f_dot_last() == set(["j.gonzales"])

name = TEST_NAMES[5]
mutator = NameMutator(name)
assert mutator.f_dot_last() == set(["e.folks", ])
assert mutator.f_dot_last() == set(["e.folks"])

name = TEST_NAMES[6]
mutator = NameMutator(name)
assert mutator.f_dot_last() == set(["j.martin"])

name = TEST_NAMES[7]
mutator = NameMutator(name)
assert mutator.f_dot_last() == set(["m.gacey", "m.wayne"])


def test_last_f():
name = TEST_NAMES[1]
mutator = NameMutator(name)
assert mutator.last_f() == set(["smithj", ])
assert mutator.last_f() == set(["smithj"])

name = TEST_NAMES[2]
mutator = NameMutator(name)
assert mutator.last_f() == set(["smithj", "davidsonj"])
assert mutator.last_f() == set(["davidson-smithj", "davidsonj", "smithj"])

name = TEST_NAMES[3]
mutator = NameMutator(name)
assert mutator.last_f() == set(["smithj", "robinsonj"])
assert mutator.last_f() == set(["smith-robinsonj", "smithj", "robinsonj"])

name = TEST_NAMES[4]
mutator = NameMutator(name)
assert mutator.last_f() == set(["gonzalesj", ])
assert mutator.last_f() == set(["gonzalesj"])

name = TEST_NAMES[5]
mutator = NameMutator(name)
assert mutator.last_f() == set(["folkse", ])
assert mutator.last_f() == set(["folkse"])

name = TEST_NAMES[6]
mutator = NameMutator(name)
assert mutator.last_f() == set(["martinj"])

name = TEST_NAMES[7]
mutator = NameMutator(name)
assert mutator.last_f() == set(["gaceym", "waynem"])


def test_first_dot_last():
name = TEST_NAMES[1]
mutator = NameMutator(name)
assert mutator.first_dot_last() == set(["john.smith", ])
assert mutator.first_dot_last() == set(["john.smith"])

name = TEST_NAMES[2]
mutator = NameMutator(name)
assert mutator.first_dot_last() == set(["john.smith", "john.davidson"])
assert mutator.first_dot_last() == set(["john.davidson-smith", "john.davidson", "john.smith"])

# Compound first name is preserved intact; last name variants are expanded
name = TEST_NAMES[3]
mutator = NameMutator(name)
assert mutator.first_dot_last() == set(["john.smith", "john.robinson"])
assert mutator.first_dot_last() == set(["john-paul.smith-robinson", "john-paul.smith", "john-paul.robinson"])

name = TEST_NAMES[4]
mutator = NameMutator(name)
assert mutator.first_dot_last() == set(["jose.gonzales", ])
assert mutator.first_dot_last() == set(["jose.gonzales"])

name = TEST_NAMES[5]
mutator = NameMutator(name)
assert mutator.first_dot_last() == set(["emoji.folks", ])
assert mutator.first_dot_last() == set(["emoji.folks"])

# The core fix for issue #82: compound first name generates the correct username
name = TEST_NAMES[6]
mutator = NameMutator(name)
assert mutator.first_dot_last() == set(["jean-charles.martin"])

name = TEST_NAMES[7]
mutator = NameMutator(name)
assert mutator.first_dot_last() == set(["madonna.gacey", "madonna.wayne"])


def test_first_l():
name = TEST_NAMES[1]
mutator = NameMutator(name)
assert mutator.first_l() == set(["johns", ])
assert mutator.first_l() == set(["johns"])

# davidson-smith[0]='d', davidson[0]='d' (dup), smith[0]='s'
name = TEST_NAMES[2]
mutator = NameMutator(name)
assert mutator.first_l() == set(["johns", "johnd"])
assert mutator.first_l() == set(["johnd", "johns"])

# smith-robinson[0]='s', smith[0]='s' (dup), robinson[0]='r'
name = TEST_NAMES[3]
mutator = NameMutator(name)
assert mutator.first_l() == set(["johns", "johnr"])
assert mutator.first_l() == set(["john-pauls", "john-paulr"])

name = TEST_NAMES[4]
mutator = NameMutator(name)
assert mutator.first_l() == set(["joseg", ])
assert mutator.first_l() == set(["joseg"])

name = TEST_NAMES[5]
mutator = NameMutator(name)
assert mutator.first_l() == set(["emojif", ])
assert mutator.first_l() == set(["emojif"])

name = TEST_NAMES[6]
mutator = NameMutator(name)
assert mutator.first_l() == set(["jean-charlesm"])

name = TEST_NAMES[7]
mutator = NameMutator(name)
assert mutator.first_l() == set(["madonnag", "madonnaw"])


def test_first():
name = TEST_NAMES[1]
mutator = NameMutator(name)
assert mutator.first() == set(["john", ])
assert mutator.first() == set(["john"])

name = TEST_NAMES[2]
mutator = NameMutator(name)
assert mutator.first() == set(["john", ])
assert mutator.first() == set(["john"])

# Compound first name is preserved intact
name = TEST_NAMES[3]
mutator = NameMutator(name)
assert mutator.first() == set(["john", ])
assert mutator.first() == set(["john-paul"])

name = TEST_NAMES[4]
mutator = NameMutator(name)
assert mutator.first() == set(["jose", ])
assert mutator.first() == set(["jose"])

name = TEST_NAMES[5]
mutator = NameMutator(name)
assert mutator.first() == set(["emoji", ])
assert mutator.first() == set(["emoji"])

name = TEST_NAMES[6]
mutator = NameMutator(name)
assert mutator.first() == set(["jean-charles"])

# 3-word name: first() always returns only the first token regardless of second/last
name = TEST_NAMES[7]
mutator = NameMutator(name)
assert mutator.first() == set(["madonna"])


def test_hyphen_variants():
assert NameMutator._hyphen_variants("smith") == ["smith"]
assert NameMutator._hyphen_variants("davidson-smith") == ["davidson-smith", "davidson", "smith"]
assert NameMutator._hyphen_variants("a-b-c") == ["a-b-c", "a", "b", "c"]


def test_clean_name():
Expand Down Expand Up @@ -170,6 +236,16 @@ def test_split_name():
name = "brian warner is marilyn manson"
assert mutator.split_name(name) == {"first": "brian", "second": "marilyn", "last": "manson"}

# Hyphens within a name segment are preserved (not treated as word separators)
name = "jean-charles martin"
assert mutator.split_name(name) == {"first": "jean-charles", "second": "", "last": "martin"}

name = "john davidson-smith"
assert mutator.split_name(name) == {"first": "john", "second": "", "last": "davidson-smith"}

name = "john-paul smith-robinson"
assert mutator.split_name(name) == {"first": "john-paul", "second": "", "last": "smith-robinson"}


def test_find_employees():
with open("tests/mock-employee-response", "r") as infile:
Expand Down
Loading