diff --git a/0721.Accounts-Merge/memo.md b/0721.Accounts-Merge/memo.md new file mode 100644 index 0000000..36588a6 --- /dev/null +++ b/0721.Accounts-Merge/memo.md @@ -0,0 +1,53 @@ +# 721. Accounts Merge + +## step1 +40m ぐらいかかった。UnionFindは記憶に自信がなく少し調べてしまった。 +同値類を管理するデータ構造としてこれが素直な方針だと個人的には思った + +revised: 変数名を改善 + + +## 他の人のコード +https://github.com/huyfififi/coding-challenges/pull/48 + +```python +for i, (_, *emails) in enumerate(accounts): +``` +この書き方は知らなかった。 + +https://peps.python.org/pep-3132/ + +> This PEP proposes a change to iterable unpacking syntax, allowing to specify a “catch-all” name which will be assigned a list of all items not assigned to a “regular” name. + +```python +[accounts[i][0]] + sorted(emails) +``` +より +```python +[accounts[i][0], *sorted(emails)] +``` +の方がlistの生成回数が少ない + +自分はnameで分離した後にunion-findで同値類を求めたがnameで分離しない方が自然だな。 + +DFSでも書ける、なるほど。 + +## step2 +DFSとunion findで書き直し + +時間計算量: + +N = accounts数、E = 全email出現回数、U = ユニークemail数 + +DFS: O(E+UlogU) + +UnionFind: O(E\alpha(N)+UlogU) \alpha: 逆アッカーマン関数 + +## C++ +LLMのコードを写経する形になった。自分ではまだ書けない。 + +- unionは共用体を表す予約語 + - 共用体:すべてのメンバ変数が、メモリ上の同じ場所を共有する +- std::setは赤黒木で管理されているので重複排除とソートを同時に行える + - https://ja.wikipedia.org/wiki/%E8%B5%A4%E9%BB%92%E6%9C%A8 + diff --git a/0721.Accounts-Merge/step1.py b/0721.Accounts-Merge/step1.py new file mode 100644 index 0000000..daeda2c --- /dev/null +++ b/0721.Accounts-Merge/step1.py @@ -0,0 +1,61 @@ +class UnionFind: + def __init__(self, n): + self.parent = list(range(n)) + self.size = [1] * n + + def find(self, x): + if x != self.parent[x]: + self.parent[x] = self.find(self.parent[x]) + return self.parent[x] + + def union(self, x, y): + parent_x = self.find(x) + parent_y = self.find(y) + + if parent_x == parent_y: + return parent_x + + if self.size[parent_x] < self.size[parent_y]: + parent_x, parent_y = parent_y, parent_x + + self.parent[parent_y] = parent_x + self.size[parent_x] += self.size[parent_y] + + +class Solution: + def accountsMerge(self, accounts: list[list[str]]) -> list[list[str]]: + def merge_accounts_with_same_name(name, indexes): + if len(indexes) == 1: + return [ + [accounts[indexes[0]][0]] + sorted(set(accounts[indexes[0]][1:])) + ] + + mail_to_rank = {} + account_union_find = UnionFind(len(indexes)) + for r, i in enumerate(indexes): + for email in accounts[i][1:]: + if email in mail_to_rank: + account_union_find.union(r, mail_to_rank[email]) + else: + mail_to_rank[email] = r + rank_to_index = {r: i for r, i in enumerate(indexes)} + root_to_emails = {} + for r in range(len(indexes)): + root = account_union_find.find(r) + root_to_emails.setdefault(root, []).extend( + accounts[rank_to_index[r]][1:] + ) + result = [] + for emails in root_to_emails.values(): + result.append([name] + sorted(set(emails))) + return result + + name_to_indexes = {} + for i, account in enumerate(accounts): + name_to_indexes.setdefault(account[0], []).append(i) + + accounts_merged = [] + for name, indexes in name_to_indexes.items(): + accounts_merged.extend(merge_accounts_with_same_name(name, indexes)) + + return accounts_merged diff --git a/0721.Accounts-Merge/step1_revised.py b/0721.Accounts-Merge/step1_revised.py new file mode 100644 index 0000000..bab8e32 --- /dev/null +++ b/0721.Accounts-Merge/step1_revised.py @@ -0,0 +1,67 @@ +class UnionFind: + def __init__(self, n): + self.parent = list(range(n)) + self.size = [1] * n + + def find(self, x): + if x != self.parent[x]: + self.parent[x] = self.find(self.parent[x]) + return self.parent[x] + + def union(self, x, y): + parent_x = self.find(x) + parent_y = self.find(y) + + if parent_x == parent_y: + return parent_x + + if self.size[parent_x] < self.size[parent_y]: + parent_x, parent_y = parent_y, parent_x + + self.parent[parent_y] = parent_x + self.size[parent_x] += self.size[parent_y] + + +class Solution: + def accountsMerge(self, accounts: list[list[str]]) -> list[list[str]]: + def merge_accounts_with_same_name(name, account_indexes): + if len(account_indexes) == 1: + return [ + [accounts[account_indexes[0]][0]] + + sorted(set(accounts[account_indexes[0]][1:])) + ] + + mail_to_pos = {} + account_union_find = UnionFind(len(account_indexes)) + for account_pos, account_index in enumerate(account_indexes): + for email in accounts[account_index][1:]: + if email in mail_to_pos: + account_union_find.union(account_pos, mail_to_pos[email]) + else: + mail_to_pos[email] = account_pos + pos_to_account_index = { + account_pos: account_index + for account_pos, account_index in enumerate(account_indexes) + } + root_to_emails = {} + for account_pos in range(len(account_indexes)): + root = account_union_find.find(account_pos) + root_to_emails.setdefault(root, []).extend( + accounts[pos_to_account_index[account_pos]][1:] + ) + result = [] + for emails in root_to_emails.values(): + result.append([name] + sorted(set(emails))) + return result + + name_to_account_indexes = {} + for account_index, account in enumerate(accounts): + name_to_account_indexes.setdefault(account[0], []).append(account_index) + + accounts_merged = [] + for name, account_indexes in name_to_account_indexes.items(): + accounts_merged.extend( + merge_accounts_with_same_name(name, account_indexes) + ) + + return accounts_merged diff --git a/0721.Accounts-Merge/step2_dfs.py b/0721.Accounts-Merge/step2_dfs.py new file mode 100644 index 0000000..c0dcbcb --- /dev/null +++ b/0721.Accounts-Merge/step2_dfs.py @@ -0,0 +1,36 @@ +class Solution: + def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]: + email_to_name = {} + email_to_neighbor = {} + hubs = [] + for name, hub, *rest in accounts: + email_to_name[hub] = name + hubs.append(hub) + for email in rest: + email_to_name[email] = name + email_to_neighbor.setdefault(hub, []).append(email) + email_to_neighbor.setdefault(email, []).append(hub) + + accounts_merged = [] + visited = set() + + def traverse(email_start): + if email_start in visited: + return + + visited.add(email_start) + stack = [email_start] + emails = [email_start] + while stack: + email = stack.pop() + for neighbor in email_to_neighbor.get(email, []): + if neighbor not in visited: + stack.append(neighbor) + emails.append(neighbor) + visited.add(neighbor) + accounts_merged.append([email_to_name[email_start], *sorted(set(emails))]) + + for email_start in hubs: + traverse(email_start) + + return accounts_merged diff --git a/0721.Accounts-Merge/step2_union_find.cpp b/0721.Accounts-Merge/step2_union_find.cpp new file mode 100644 index 0000000..e5100ff --- /dev/null +++ b/0721.Accounts-Merge/step2_union_find.cpp @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include + +class UnionFind { +public: + std::vector parent; + std::vector size; + + UnionFind(int n) { + parent.resize(n); + std::iota(parent.begin(), parent.end(), 0); + size.assign(n, 1); + } + + int find(int x) { + if (x != parent[x]) { + parent[x] = find(parent[x]); + } + return parent[x]; + } + + void union_op(int x, int y) { + int parent_x = find(x); + int parent_y = find(y); + + if (parent_x == parent_y) { + return; + } + + if (size[parent_x] < size[parent_y]) { + std::swap(parent_x, parent_y); + } + + parent[parent_y] = parent_x; + size[parent_x] += size[parent_y]; + return; + } +}; + +class Solution { +public: + std::vector> accountsMerge(std::vector>& accounts) { + UnionFind union_find(accounts.size()); + std::unordered_map mail_to_account_index; + std::vector> accounts_merged; + + for (int account_index = 0; account_index < accounts.size(); ++account_index) { + for (size_t i = 1; i < accounts[account_index].size(); ++i) { + const std::string& email = accounts[account_index][i]; + if (mail_to_account_index.count(email)) { + union_find.union_op(account_index, mail_to_account_index[email]); + } else { + mail_to_account_index[email] = account_index; + } + } + } + + std::unordered_map> root_to_emails; + for (int account_index = 0; account_index < accounts.size(); ++account_index) { + int root = union_find.find(account_index); + for (size_t i = 1; i < accounts[account_index].size(); ++i) { + root_to_emails[root].push_back(accounts[account_index][i]); + } + } + + for (auto& [root, emails] : root_to_emails) { + std::set unique_emails(emails.begin(), emails.end()); + + std::vector merged_row; + merged_row.push_back(accounts[root][0]); + for (const auto& email : unique_emails) { + merged_row.push_back(email); + } + accounts_merged.push_back(merged_row); + } + + return accounts_merged; + } +}; diff --git a/0721.Accounts-Merge/step2_union_find.py b/0721.Accounts-Merge/step2_union_find.py new file mode 100644 index 0000000..f014027 --- /dev/null +++ b/0721.Accounts-Merge/step2_union_find.py @@ -0,0 +1,46 @@ +class UnionFind: + def __init__(self, n): + self.parent = list(range(n)) + self.size = [1] * n + + def find(self, x): + if x != self.parent[x]: + self.parent[x] = self.find(self.parent[x]) + return self.parent[x] + + def union(self, x, y): + parent_x = self.find(x) + parent_y = self.find(y) + + if parent_x == parent_y: + return parent_x + + if self.size[parent_x] < self.size[parent_y]: + parent_x, parent_y = parent_y, parent_x + + self.parent[parent_y] = parent_x + self.size[parent_x] += self.size[parent_y] + + +class Solution: + def accountsMerge(self, accounts: list[list[str]]) -> list[list[str]]: + union_find = UnionFind(len(accounts)) + mail_to_account_index = {} + accounts_merged = [] + for account_index, (_, *emails) in enumerate(accounts): + for email in emails: + if email in mail_to_account_index: + union_find.union(account_index, mail_to_account_index[email]) + else: + mail_to_account_index[email] = account_index + + root_to_emails = {} + for account_index in range(len(accounts)): + root = union_find.find(account_index) + root_to_emails.setdefault(root, []).extend(accounts[account_index][1:]) + + accounts_merged = [] + for root, emails in root_to_emails.items(): + accounts_merged.append([accounts[root][0], *sorted(set(emails))]) + + return accounts_merged