-
Notifications
You must be signed in to change notification settings - Fork 13
PPR C++ Tracer Bullet #556
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
mkolodner-sc
wants to merge
12
commits into
mkolodner-sc/cpp-infrastructure
Choose a base branch
from
mkolodner-sc/cpp_ppr_tracer
base: mkolodner-sc/cpp-infrastructure
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
87dc95b
Initial commit
mkolodner-sc a231796
small precision fix
mkolodner-sc a19db88
Optimize
mkolodner-sc fed3815
Add explanatory comments to ppr_forward_push.cpp for C++ newcomers
mkolodner-sc 906df01
Apply clang-format to ppr_forward_push.cpp
mkolodner-sc dd118ef
Move PPR C++ to gigl/csrc following PyTorch csrc conventions
mkolodner-sc c66a6e5
Update
mkolodner-sc 0a43cff
Merge branch 'mkolodner-sc/cpp-infrastructure' into mkolodner-sc/cpp_…
mkolodner-sc 6e63172
Update
mkolodner-sc 643470c
Merge branch 'mkolodner-sc/cpp-infrastructure' into mkolodner-sc/cpp_…
mkolodner-sc dff6c86
Fix merge conflicts
mkolodner-sc c16dd9d
Fix type check and remove unused etypes from num_sampled_edges
mkolodner-sc File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| try: | ||
| from gigl.csrc.sampling.ppr_forward_push import PPRForwardPushState | ||
| except ImportError as e: | ||
| raise ImportError( | ||
| "PPR C++ extension not compiled. " | ||
| "Run `make build_cpp_extensions` from the GiGL root to build it." | ||
| ) from e | ||
|
|
||
| __all__ = ["PPRForwardPushState"] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,247 @@ | ||
| #include "ppr_forward_push.h" | ||
|
|
||
| PPRForwardPushState::PPRForwardPushState( | ||
| torch::Tensor seed_nodes, int32_t seed_node_type_id, double alpha, | ||
| double requeue_threshold_factor, | ||
| std::vector<std::vector<int32_t>> node_type_to_edge_type_ids, | ||
| std::vector<int32_t> edge_type_to_dst_ntype_id, std::vector<torch::Tensor> degree_tensors) | ||
| : alpha_(alpha), | ||
| one_minus_alpha_(1.0 - alpha), | ||
| requeue_threshold_factor_(requeue_threshold_factor), | ||
| // std::move transfers ownership of each vector into the member variable | ||
| // without copying its contents — equivalent to Python's list hand-off | ||
| // when you no longer need the original. | ||
| node_type_to_edge_type_ids_(std::move(node_type_to_edge_type_ids)), | ||
| edge_type_to_dst_ntype_id_(std::move(edge_type_to_dst_ntype_id)), | ||
| degree_tensors_(std::move(degree_tensors)) { | ||
| TORCH_CHECK(seed_nodes.dim() == 1, "seed_nodes must be 1D"); | ||
| batch_size_ = static_cast<int32_t>(seed_nodes.size(0)); | ||
| num_node_types_ = static_cast<int32_t>(node_type_to_edge_type_ids_.size()); | ||
|
|
||
| // Allocate per-seed, per-node-type tables. | ||
| // .assign(n, val) fills a vector with n copies of val — like [val] * n in Python. | ||
| ppr_scores_.assign(batch_size_, | ||
| std::vector<std::unordered_map<int32_t, double>>(num_node_types_)); | ||
| residuals_.assign(batch_size_, | ||
| std::vector<std::unordered_map<int32_t, double>>(num_node_types_)); | ||
| queue_.assign(batch_size_, std::vector<std::unordered_set<int32_t>>(num_node_types_)); | ||
| queued_nodes_.assign(batch_size_, | ||
| std::vector<std::unordered_set<int32_t>>(num_node_types_)); | ||
|
|
||
| // accessor<dtype, ndim>() returns a typed view into the tensor's data that | ||
| // supports [i] indexing with bounds checking in debug builds. | ||
| auto acc = seed_nodes.accessor<int64_t, 1>(); | ||
| num_nodes_in_queue_ = batch_size_; | ||
| for (int32_t i = 0; i < batch_size_; ++i) { | ||
| int32_t seed = static_cast<int32_t>(acc[i]); | ||
| // PPR initialisation: each seed starts with residual = alpha (the | ||
| // restart probability). The first push will move alpha into ppr_score | ||
| // and distribute (1-alpha)*alpha to the seed's neighbors. | ||
| residuals_[i][seed_node_type_id][seed] = alpha_; | ||
| queue_[i][seed_node_type_id].insert(seed); | ||
| } | ||
| } | ||
|
|
||
| std::optional<std::unordered_map<int32_t, torch::Tensor>> PPRForwardPushState::drain_queue() { | ||
| if (num_nodes_in_queue_ == 0) { | ||
| return std::nullopt; | ||
| } | ||
|
|
||
| // Reset the snapshot from the previous iteration. | ||
| for (int32_t s = 0; s < batch_size_; ++s) | ||
| for (auto& qs : queued_nodes_[s]) | ||
| qs.clear(); | ||
|
|
||
| // nodes_to_lookup[eid] = set of node IDs that need a neighbor fetch for | ||
| // edge type eid this round. Using a set deduplicates nodes that appear | ||
| // in multiple seeds' queues: we only fetch each (node, etype) pair once. | ||
| std::unordered_map<int32_t, std::unordered_set<int32_t>> nodes_to_lookup; | ||
|
|
||
| for (int32_t s = 0; s < batch_size_; ++s) { | ||
| for (int32_t nt = 0; nt < num_node_types_; ++nt) { | ||
| if (queue_[s][nt].empty()) | ||
| continue; | ||
|
|
||
| // Move the live queue into the snapshot (no data copy — O(1)). | ||
| queued_nodes_[s][nt] = std::move(queue_[s][nt]); | ||
| queue_[s][nt].clear(); | ||
| num_nodes_in_queue_ -= static_cast<int32_t>(queued_nodes_[s][nt].size()); | ||
|
|
||
| for (int32_t node_id : queued_nodes_[s][nt]) { | ||
| for (int32_t eid : node_type_to_edge_type_ids_[nt]) { | ||
| if (neighbor_cache_.find(pack_key(node_id, eid)) == neighbor_cache_.end()) { | ||
| nodes_to_lookup[eid].insert(node_id); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| std::unordered_map<int32_t, torch::Tensor> result; | ||
| for (auto& [eid, node_set] : nodes_to_lookup) { | ||
| std::vector<int64_t> ids(node_set.begin(), node_set.end()); | ||
| result[eid] = torch::tensor(ids, torch::kLong); | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| void PPRForwardPushState::push_residuals( | ||
| const std::unordered_map<int32_t, std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>>& | ||
| fetched_by_etype_id) { | ||
| // Step 1: Unpack the input map into a C++ map keyed by pack_key(node_id, etype_id) | ||
| // for fast lookup during the residual-push loop below. | ||
| std::unordered_map<uint64_t, std::vector<int32_t>> fetched; | ||
| for (const auto& [eid, tup] : fetched_by_etype_id) { | ||
| const auto& node_ids_t = std::get<0>(tup); | ||
| const auto& flat_nbrs_t = std::get<1>(tup); | ||
| const auto& counts_t = std::get<2>(tup); | ||
|
|
||
| // accessor<int64_t, 1>() gives a bounds-checked, typed 1-D view into | ||
| // each tensor's data — equivalent to iterating over a NumPy array. | ||
| auto node_acc = node_ids_t.accessor<int64_t, 1>(); | ||
| auto nbr_acc = flat_nbrs_t.accessor<int64_t, 1>(); | ||
| auto cnt_acc = counts_t.accessor<int64_t, 1>(); | ||
|
|
||
| // Walk the flat neighbor list, slicing out each node's neighbors using | ||
| // the running offset into the concatenated flat buffer. | ||
| int64_t offset = 0; | ||
| for (int64_t i = 0; i < node_ids_t.size(0); ++i) { | ||
| int32_t nid = static_cast<int32_t>(node_acc[i]); | ||
| int64_t count = cnt_acc[i]; | ||
| std::vector<int32_t> nbrs(count); | ||
| for (int64_t j = 0; j < count; ++j) | ||
| nbrs[j] = static_cast<int32_t>(nbr_acc[offset + j]); | ||
| fetched[pack_key(nid, eid)] = std::move(nbrs); | ||
| offset += count; | ||
| } | ||
| } | ||
|
|
||
| // Step 2: For every node that was in the queue (captured in queued_nodes_ | ||
| // by drain_queue()), apply one PPR push step: | ||
| // a. Absorb residual into the PPR score. | ||
| // b. Distribute (1-alpha) * residual equally to each neighbor. | ||
| // c. Enqueue any neighbor whose residual now exceeds the requeue threshold. | ||
| for (int32_t s = 0; s < batch_size_; ++s) { | ||
| for (int32_t nt = 0; nt < num_node_types_; ++nt) { | ||
| if (queued_nodes_[s][nt].empty()) | ||
| continue; | ||
|
|
||
| for (int32_t src : queued_nodes_[s][nt]) { | ||
| auto& src_res = residuals_[s][nt]; | ||
| auto it = src_res.find(src); | ||
| double res = (it != src_res.end()) ? it->second : 0.0; | ||
|
|
||
| // a. Absorb: move residual into the PPR score. | ||
| ppr_scores_[s][nt][src] += res; | ||
| src_res[src] = 0.0; | ||
|
|
||
| int32_t total_deg = get_total_degree(src, nt); | ||
| // Destination-only nodes absorb residual but do not push further. | ||
| if (total_deg == 0) | ||
| continue; | ||
|
|
||
| // b. Distribute: each neighbor receives an equal share. | ||
| double res_per_nbr = one_minus_alpha_ * res / static_cast<double>(total_deg); | ||
|
|
||
| for (int32_t eid : node_type_to_edge_type_ids_[nt]) { | ||
| // Invariant: fetched and neighbor_cache_ are mutually exclusive for | ||
| // any given (node, etype) key within one iteration. drain_queue() | ||
| // only requests a fetch for nodes absent from neighbor_cache_, so a | ||
| // key is in at most one of the two. | ||
| const std::vector<int32_t>* nbr_list = nullptr; | ||
| auto fi = fetched.find(pack_key(src, eid)); | ||
| if (fi != fetched.end()) { | ||
| nbr_list = &fi->second; | ||
| } else { | ||
| auto ci = neighbor_cache_.find(pack_key(src, eid)); | ||
| if (ci != neighbor_cache_.end()) | ||
| nbr_list = &ci->second; | ||
| } | ||
| if (!nbr_list || nbr_list->empty()) | ||
| continue; | ||
|
|
||
| int32_t dst_nt = edge_type_to_dst_ntype_id_[eid]; | ||
|
|
||
| // c. Accumulate residual for each neighbor and re-enqueue if threshold | ||
| // exceeded. | ||
| for (int32_t nbr : *nbr_list) { | ||
| residuals_[s][dst_nt][nbr] += res_per_nbr; | ||
|
|
||
| double threshold = requeue_threshold_factor_ * | ||
| static_cast<double>(get_total_degree(nbr, dst_nt)); | ||
|
|
||
| if (queue_[s][dst_nt].find(nbr) == queue_[s][dst_nt].end() && | ||
| residuals_[s][dst_nt][nbr] >= threshold) { | ||
| queue_[s][dst_nt].insert(nbr); | ||
| ++num_nodes_in_queue_; | ||
|
|
||
| // Promote neighbor lists to the persistent cache: this node will | ||
| // be processed next iteration, so caching avoids a re-fetch. | ||
| for (int32_t peid : node_type_to_edge_type_ids_[dst_nt]) { | ||
| uint64_t pk = pack_key(nbr, peid); | ||
| if (neighbor_cache_.find(pk) == neighbor_cache_.end()) { | ||
| auto pfi = fetched.find(pk); | ||
| if (pfi != fetched.end()) | ||
| neighbor_cache_[pk] = pfi->second; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| std::unordered_map<int32_t, std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>> | ||
| PPRForwardPushState::extract_top_k(int32_t max_ppr_nodes) { | ||
| std::unordered_set<int32_t> active; | ||
| for (int32_t s = 0; s < batch_size_; ++s) | ||
| for (int32_t nt = 0; nt < num_node_types_; ++nt) | ||
| if (!ppr_scores_[s][nt].empty()) | ||
| active.insert(nt); | ||
|
|
||
| std::unordered_map<int32_t, std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>> result; | ||
| for (int32_t nt : active) { | ||
| std::vector<int64_t> flat_ids; | ||
| std::vector<float> flat_weights; | ||
| std::vector<int64_t> valid_counts; | ||
|
|
||
| for (int32_t s = 0; s < batch_size_; ++s) { | ||
| const auto& scores = ppr_scores_[s][nt]; | ||
| int32_t k = std::min(max_ppr_nodes, static_cast<int32_t>(scores.size())); | ||
| if (k > 0) { | ||
| std::vector<std::pair<int32_t, double>> items(scores.begin(), scores.end()); | ||
| std::partial_sort( | ||
| items.begin(), items.begin() + k, items.end(), | ||
| [](const auto& a, const auto& b) { return a.second > b.second; }); | ||
|
|
||
| for (int32_t i = 0; i < k; ++i) { | ||
| flat_ids.push_back(static_cast<int64_t>(items[i].first)); | ||
| // Cast to float32 for output; internal scores stay double to | ||
| // avoid accumulated rounding errors in the push loop. | ||
| flat_weights.push_back(static_cast<float>(items[i].second)); | ||
| } | ||
| } | ||
| valid_counts.push_back(static_cast<int64_t>(k)); | ||
| } | ||
|
|
||
| result[nt] = {torch::tensor(flat_ids, torch::kLong), | ||
| torch::tensor(flat_weights, torch::kFloat), | ||
| torch::tensor(valid_counts, torch::kLong)}; | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| int32_t PPRForwardPushState::get_total_degree(int32_t node_id, int32_t ntype_id) const { | ||
| if (ntype_id >= static_cast<int32_t>(degree_tensors_.size())) | ||
| return 0; | ||
| const auto& t = degree_tensors_[ntype_id]; | ||
| if (t.numel() == 0) | ||
| return 0; | ||
| TORCH_CHECK(node_id < static_cast<int32_t>(t.size(0)), "Node ID ", node_id, | ||
| " out of range for degree tensor of ntype_id ", ntype_id, " (size=", t.size(0), | ||
| "). This indicates corrupted graph data or a sampler bug."); | ||
| // data_ptr<int32_t>() returns a raw C pointer to the tensor's int32 data buffer. | ||
| return t.data_ptr<int32_t>()[node_id]; | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| #pragma once | ||
|
|
||
| #include <torch/torch.h> | ||
|
|
||
| #include <algorithm> // std::partial_sort, std::min | ||
| #include <cstdint> // Fixed-width integer types: int32_t, int64_t, uint32_t, uint64_t | ||
| #include <optional> // std::optional for nullable return values | ||
| #include <tuple> // std::tuple for multi-value returns | ||
| #include <unordered_map> // std::unordered_map — like Python dict, O(1) average lookup | ||
| #include <unordered_set> // std::unordered_set — like Python set, O(1) average lookup | ||
| #include <vector> // std::vector — like Python list, contiguous in memory | ||
|
|
||
| // Combine (node_id, etype_id) into a single 64-bit integer for use as a hash | ||
| // map key. A single 64-bit integer is cheaper to hash than a pair of two | ||
| // integers (std::unordered_map has no built-in pair hash). | ||
| // | ||
| // Bit layout: | ||
| // bits 63–32: node_id (upper half) | ||
| // bits 31– 0: etype_id (lower half) | ||
| // | ||
| // Both inputs are cast through uint32_t before packing. Without this, a | ||
| // negative int32_t (e.g. -1 = 0xFFFFFFFF) would be sign-extended to a full | ||
| // 64-bit value, corrupting the upper bits when shifted. Reinterpreting as | ||
| // uint32_t first treats the bit pattern as-is (no sign extension). | ||
| static inline uint64_t pack_key(int32_t node_id, int32_t etype_id) { | ||
| return (static_cast<uint64_t>(static_cast<uint32_t>(node_id)) << 32) | | ||
| static_cast<uint32_t>(etype_id); | ||
| } | ||
|
|
||
| // C++ kernel for the PPR Forward Push algorithm (Andersen et al., 2006). | ||
| // | ||
| // All hot-loop state (scores, residuals, queue, neighbor cache) lives inside | ||
| // this object. The distributed neighbor fetch is kept in Python because it | ||
| // involves async RPC calls that C++ cannot drive directly. | ||
| // | ||
| // Owned state: ppr_scores, residuals, queue, queued_nodes, neighbor_cache. | ||
| // Python retains ownership of: the distributed neighbor fetch (_batch_fetch_neighbors). | ||
| // | ||
| // Typical call sequence per batch: | ||
| // 1. PPRForwardPushState(seed_nodes, ...) — init per-seed residuals / queue | ||
| // while True: | ||
| // 2. drain_queue() — drain queue → nodes needing lookup | ||
| // 3. <Python: _batch_fetch_neighbors(...)> — distributed RPC fetch (stays in Python) | ||
| // 4. push_residuals(fetched_by_etype_id) — push residuals, update queue | ||
| // 5. extract_top_k(max_ppr_nodes) — top-k selection per seed per node type | ||
| class PPRForwardPushState { | ||
| public: | ||
| PPRForwardPushState(torch::Tensor seed_nodes, int32_t seed_node_type_id, double alpha, | ||
| double requeue_threshold_factor, | ||
| std::vector<std::vector<int32_t>> node_type_to_edge_type_ids, | ||
| std::vector<int32_t> edge_type_to_dst_ntype_id, | ||
| std::vector<torch::Tensor> degree_tensors); | ||
|
|
||
| // Drain all queued nodes and return {etype_id: tensor[node_ids]} for batch | ||
| // neighbor lookup. Also snapshots the drained nodes into queued_nodes_ for | ||
| // use by push_residuals(). | ||
| // | ||
| // Return value semantics: | ||
| // - std::nullopt → queue was already empty; convergence achieved; stop the loop. | ||
| // - empty map → nodes were drained but all were cached; call push_residuals({}). | ||
| // - non-empty map → {etype_id → 1-D int64 tensor of node IDs} needing neighbor lookup. | ||
| std::optional<std::unordered_map<int32_t, torch::Tensor>> drain_queue(); | ||
|
|
||
| // Push residuals to neighbors given the fetched neighbor data. | ||
| // | ||
| // fetched_by_etype_id: {etype_id: (node_ids_tensor, flat_nbrs_tensor, counts_tensor)} | ||
| // - node_ids_tensor: [N] int64 — source node IDs fetched for this edge type | ||
| // - flat_nbrs_tensor: [sum(counts)] int64 — all neighbor lists concatenated flat | ||
| // - counts_tensor: [N] int64 — neighbor count for each source node | ||
| void push_residuals(const std::unordered_map< | ||
| int32_t, std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>>& | ||
| fetched_by_etype_id); | ||
|
|
||
| // Extract top-k PPR nodes per seed per node type. | ||
| // | ||
| // Returns {ntype_id: (flat_ids_tensor, flat_weights_tensor, valid_counts_tensor)}. | ||
| // Only node types that received any PPR score are included in the output. | ||
| // | ||
| // Output layout for a batch of B seeds: | ||
| // flat_ids[0 : valid_counts[0]] → top-k nodes for seed 0 | ||
| // flat_ids[valid_counts[0] : valid_counts[0]+valid_counts[1]] → top-k for seed 1 | ||
| // ... | ||
| std::unordered_map<int32_t, std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>> | ||
| extract_top_k(int32_t max_ppr_nodes); | ||
|
|
||
| private: | ||
| // Look up the total (across all edge types) out-degree of a node. | ||
| // Returns 0 for destination-only node types (no outgoing edges). | ||
| int32_t get_total_degree(int32_t node_id, int32_t ntype_id) const; | ||
|
|
||
| // ------------------------------------------------------------------------- | ||
| // Scalar algorithm parameters | ||
| // ------------------------------------------------------------------------- | ||
| double alpha_; // Restart probability | ||
| double one_minus_alpha_; // 1 - alpha, precomputed to avoid repeated subtraction | ||
| double requeue_threshold_factor_; // alpha * eps; multiplied by degree to get per-node threshold | ||
|
|
||
| int32_t batch_size_; // Number of seeds in the current batch | ||
| int32_t num_node_types_; // Total number of node types (homo + hetero) | ||
| int32_t num_nodes_in_queue_{0}; // Running count of nodes across all seeds / types | ||
|
|
||
| // ------------------------------------------------------------------------- | ||
| // Graph structure (read-only after construction) | ||
| // ------------------------------------------------------------------------- | ||
| std::vector<std::vector<int32_t>> node_type_to_edge_type_ids_; | ||
| std::vector<int32_t> edge_type_to_dst_ntype_id_; | ||
| std::vector<torch::Tensor> degree_tensors_; | ||
|
|
||
| // ------------------------------------------------------------------------- | ||
| // Per-seed, per-node-type PPR state (indexed [seed_idx][ntype_id]) | ||
| // ------------------------------------------------------------------------- | ||
| std::vector<std::vector<std::unordered_map<int32_t, double>>> ppr_scores_; | ||
| std::vector<std::vector<std::unordered_map<int32_t, double>>> residuals_; | ||
| std::vector<std::vector<std::unordered_set<int32_t>>> queue_; | ||
| std::vector<std::vector<std::unordered_set<int32_t>>> queued_nodes_; | ||
|
|
||
| // ------------------------------------------------------------------------- | ||
| // Neighbor cache | ||
| // ------------------------------------------------------------------------- | ||
| std::unordered_map<uint64_t, std::vector<int32_t>> neighbor_cache_; | ||
| }; |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note to self:
Look into std::map -> hashmap
Look into unordered set