From 2479202fb0856ccc8c79c583588da0da38a8a319 Mon Sep 17 00:00:00 2001 From: Soutaro Matsumoto Date: Thu, 21 May 2026 17:07:19 +0900 Subject: [PATCH 1/4] Update Cargo.toml --- rust/Cargo.lock | 15 +++++++++++---- rust/ruby-rbs/Cargo.toml | 1 + 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index eb97bc3e3..d2d0d79f5 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -92,15 +92,15 @@ checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" [[package]] name = "hashbrown" -version = "0.15.5" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" [[package]] name = "indexmap" -version = "2.10.0" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", "hashbrown", @@ -229,6 +229,7 @@ dependencies = [ "ruby-rbs-sys", "serde", "serde_yaml", + "xxhash-rust", ] [[package]] @@ -376,3 +377,9 @@ name = "windows_x86_64_msvc" version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + +[[package]] +name = "xxhash-rust" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" diff --git a/rust/ruby-rbs/Cargo.toml b/rust/ruby-rbs/Cargo.toml index 6358cc107..0ba4dd411 100644 --- a/rust/ruby-rbs/Cargo.toml +++ b/rust/ruby-rbs/Cargo.toml @@ -16,6 +16,7 @@ include = [ [dependencies] ruby-rbs-sys = { version = "0.3", path = "../ruby-rbs-sys" } +xxhash-rust = { version = "0.8", features = ["xxh3"] } [build-dependencies] serde = { version = "1.0", features = ["derive"] } From 7807b0c7fc6d8da6f988688690132c7de5f7fa0f Mon Sep 17 00:00:00 2001 From: Soutaro Matsumoto Date: Thu, 21 May 2026 17:08:54 +0900 Subject: [PATCH 2/4] Add ids module --- rust/ruby-rbs/src/ids.rs | 174 +++++++++++++++++++++++++++++++++++++++ rust/ruby-rbs/src/lib.rs | 1 + 2 files changed, 175 insertions(+) create mode 100644 rust/ruby-rbs/src/ids.rs diff --git a/rust/ruby-rbs/src/ids.rs b/rust/ruby-rbs/src/ids.rs new file mode 100644 index 000000000..aef034499 --- /dev/null +++ b/rust/ruby-rbs/src/ids.rs @@ -0,0 +1,174 @@ +//! Content-addressed, type-tagged 64-bit IDs. +//! +//! Different ID domains (interned strings, type names, ...) share the same +//! underlying `NonZeroU64` representation but are distinguished at the +//! type level via a phantom tag, so a `SymbolId` cannot be silently used +//! where a `TypeName` is expected. +//! +//! The value of an ID is the 64-bit xxh3 hash of its content (with `0` +//! folded to `1` to keep the niche optimization on `Option>`). +//! Because IDs are derived from content, two independently-built +//! interners that see the same value assign the same ID — merging is +//! just a `HashMap` union, no remap walk needed. +//! +//! ``` +//! use ruby_rbs::ids::{Id, SymbolId}; +//! enum OtherTag {} +//! type OtherId = Id; +//! +//! fn takes_symbol(_: SymbolId) {} +//! // takes_symbol(OtherId::from_hash(0)); // compile error +//! ``` + +use std::cmp::Ordering; +use std::hash::{Hash, Hasher}; +use std::marker::PhantomData; +use std::num::NonZeroU64; + +/// A 64-bit content-addressed ID tagged with a domain marker `T`. +/// +/// The tag is a zero-sized type parameter — typically an uninhabited enum — +/// used only to distinguish ID domains at the type level. +pub struct Id { + raw: NonZeroU64, + _tag: PhantomData T>, +} + +impl Id { + /// Wrap a 64-bit hash as an `Id`. A hash of `0` is folded to `1` + /// so the representation stays non-zero (enabling niche optimization + /// in `Option>`). Collisions on the folded value are vanishingly + /// rare in 2^64 space. + #[must_use] + pub fn from_hash(h: u64) -> Self { + let raw = NonZeroU64::new(h).unwrap_or(NonZeroU64::new(1).unwrap()); + Self { + raw, + _tag: PhantomData, + } + } + + /// Wrap a pre-validated non-zero value. + #[must_use] + pub fn from_raw(raw: NonZeroU64) -> Self { + Self { + raw, + _tag: PhantomData, + } + } + + /// The underlying non-zero 64-bit value. + #[must_use] + pub fn raw(self) -> NonZeroU64 { + self.raw + } + + /// The underlying 64-bit value as a plain integer. + #[must_use] + pub fn get(self) -> u64 { + self.raw.get() + } +} + +// Manual trait impls — `#[derive(...)]` on a struct with `PhantomData` +// would add unnecessary bounds on `T`, but `Id` is always copyable, +// hashable, etc. regardless of the tag. + +impl Copy for Id {} + +impl Clone for Id { + fn clone(&self) -> Self { + *self + } +} + +impl PartialEq for Id { + fn eq(&self, other: &Self) -> bool { + self.raw == other.raw + } +} + +impl Eq for Id {} + +impl PartialOrd for Id { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Id { + fn cmp(&self, other: &Self) -> Ordering { + self.raw.cmp(&other.raw) + } +} + +impl Hash for Id { + fn hash(&self, state: &mut H) { + self.raw.hash(state); + } +} + +impl std::fmt::Debug for Id { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let tag = std::any::type_name::() + .rsplit("::") + .next() + .unwrap_or("Id"); + write!(f, "{}({:#018x})", tag, self.raw.get()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + enum FooTag {} + enum BarTag {} + type FooId = Id; + type BarId = Id; + + #[test] + fn from_hash_preserves_nonzero_value() { + let id = FooId::from_hash(42); + assert_eq!(id.get(), 42); + } + + #[test] + fn from_hash_folds_zero_to_one() { + let id = FooId::from_hash(0); + assert_eq!(id.get(), 1); + } + + #[test] + fn option_is_niche_optimized() { + assert_eq!( + std::mem::size_of::>(), + std::mem::size_of::(), + ); + assert_eq!(std::mem::size_of::(), 8); + } + + #[test] + fn equality_and_ordering() { + let a = FooId::from_hash(10); + let b = FooId::from_hash(10); + let c = FooId::from_hash(20); + assert_eq!(a, b); + assert_ne!(a, c); + assert!(a < c); + } + + #[test] + fn different_tags_are_distinct_types() { + let _foo = FooId::from_hash(1); + let _bar = BarId::from_hash(1); + // The following would not compile: + // let _: FooId = _bar; + } + + #[test] + fn debug_shows_tag_name_and_hex() { + let foo = FooId::from_hash(0xDEAD_BEEF); + assert_eq!(format!("{foo:?}"), "FooTag(0x00000000deadbeef)"); + } +} diff --git a/rust/ruby-rbs/src/lib.rs b/rust/ruby-rbs/src/lib.rs index 492bc84b4..6790f3f42 100644 --- a/rust/ruby-rbs/src/lib.rs +++ b/rust/ruby-rbs/src/lib.rs @@ -1 +1,2 @@ +pub mod ids; pub mod node; From fac08cd4e98adeb654c92e15720df297b88896c6 Mon Sep 17 00:00:00 2001 From: Soutaro Matsumoto Date: Thu, 21 May 2026 17:09:23 +0900 Subject: [PATCH 3/4] Add StringInterner --- rust/ruby-rbs/src/ids.rs | 7 ++ rust/ruby-rbs/src/interner.rs | 214 ++++++++++++++++++++++++++++++++++ rust/ruby-rbs/src/lib.rs | 1 + 3 files changed, 222 insertions(+) create mode 100644 rust/ruby-rbs/src/interner.rs diff --git a/rust/ruby-rbs/src/ids.rs b/rust/ruby-rbs/src/ids.rs index aef034499..8f6854f63 100644 --- a/rust/ruby-rbs/src/ids.rs +++ b/rust/ruby-rbs/src/ids.rs @@ -118,6 +118,13 @@ impl std::fmt::Debug for Id { } } +/// Tag for IDs produced by the string interner. +pub enum SymbolTag {} + +/// Identifier for an interned string. Content-addressed: `xxh3_64` of the +/// string bytes. +pub type SymbolId = Id; + #[cfg(test)] mod tests { use super::*; diff --git a/rust/ruby-rbs/src/interner.rs b/rust/ruby-rbs/src/interner.rs new file mode 100644 index 000000000..16f0a9785 --- /dev/null +++ b/rust/ruby-rbs/src/interner.rs @@ -0,0 +1,214 @@ +//! Content-addressed string interner producing [`SymbolId`]s. +//! +//! Each [`SymbolId`] is the `xxh3_64` hash of the interned bytes, so the +//! same string always produces the same `SymbolId` — regardless of which +//! [`StringInterner`] (and therefore which thread) it was interned in. To merge +//! per-thread interners into one, just take the union of their backing +//! maps; no `Remap` walk is needed. +//! +//! ``` +//! use ruby_rbs::interner::StringInterner; +//! +//! let mut a = StringInterner::new(); +//! let mut b = StringInterner::new(); +//! let a_string = a.intern("String"); +//! let a_int = a.intern("Integer"); +//! let b_string = b.intern("String"); +//! let b_array = b.intern("Array"); +//! +//! // Same content ⇒ same id across independent interners. +//! assert_eq!(a_string, b_string); +//! +//! let mut global = StringInterner::new(); +//! global.merge(a); +//! global.merge(b); +//! +//! assert_eq!(global.resolve(a_int), "Integer"); +//! assert_eq!(global.resolve(b_array), "Array"); +//! ``` + +use crate::ids::SymbolId; +use std::collections::HashMap; +use xxhash_rust::xxh3::xxh3_64; + +/// Interns strings and assigns each the content-addressed [`SymbolId`] +/// `xxh3_64(s.as_bytes())`. +/// +/// One `StringInterner` per thread during parallel work, then [`merge`] them all +/// into a single destination `StringInterner` for the final shared view. +/// +/// [`merge`]: Self::merge +#[derive(Default)] +pub struct StringInterner { + map: HashMap>, +} + +impl StringInterner { + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Returns the content-addressed [`SymbolId`] for `s`, allocating + /// storage only when `s` is new to this interner. + pub fn intern(&mut self, s: &str) -> SymbolId { + let id = SymbolId::from_hash(xxh3_64(s.as_bytes())); + self.map.entry(id).or_insert_with(|| Box::::from(s)); + id + } + + /// Returns the string previously interned for `id`. + /// + /// # Panics + /// If `id` was not issued by this interner (or one merged into it). + #[must_use] + pub fn resolve(&self, id: SymbolId) -> &str { + &self.map[&id] + } + + /// Returns the string for `id`, or `None` if it was never interned here. + #[must_use] + pub fn try_resolve(&self, id: SymbolId) -> Option<&str> { + self.map.get(&id).map(|s| &**s) + } + + /// Returns the number of interned strings. + #[must_use] + pub fn len(&self) -> usize { + self.map.len() + } + + #[must_use] + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } + + /// Move every entry from `other` into `self`. Because IDs are + /// content-addressed, entries already present in `self` are kept; new + /// ones are absorbed without reallocating their `Box` storage. + pub fn merge(&mut self, other: StringInterner) { + for (id, boxed) in other.map { + self.map.entry(id).or_insert(boxed); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn intern_is_stable_within_one_interner() { + let mut i = StringInterner::new(); + let a1 = i.intern("String"); + let a2 = i.intern("String"); + let b = i.intern("Integer"); + assert_eq!(a1, a2); + assert_ne!(a1, b); + assert_eq!(i.resolve(a1), "String"); + assert_eq!(i.resolve(b), "Integer"); + assert_eq!(i.len(), 2); + } + + #[test] + fn same_string_yields_same_id_across_interners() { + let mut a = StringInterner::new(); + let mut b = StringInterner::new(); + assert_eq!(a.intern("String"), b.intern("String")); + assert_eq!(a.intern(""), b.intern("")); + } + + #[test] + fn merge_unions_entries() { + let mut local = StringInterner::new(); + let l_string = local.intern("String"); + let l_array = local.intern("Array"); + + let mut global = StringInterner::new(); + let g_string = global.intern("String"); + let g_int = global.intern("Integer"); + + // Same content ⇒ same id, even before merge. + assert_eq!(l_string, g_string); + + global.merge(local); + + assert_eq!(global.resolve(l_string), "String"); + assert_eq!(global.resolve(l_array), "Array"); + assert_eq!(global.resolve(g_int), "Integer"); + assert_eq!(global.len(), 3); + } + + #[test] + fn merge_into_empty_global() { + let mut local = StringInterner::new(); + let a = local.intern("Foo"); + let b = local.intern("Bar"); + + let mut global = StringInterner::new(); + global.merge(local); + + assert_eq!(global.resolve(a), "Foo"); + assert_eq!(global.resolve(b), "Bar"); + assert_eq!(global.len(), 2); + } + + #[test] + fn interner_is_send() { + fn assert_send() {} + assert_send::(); + } + + #[test] + fn parallel_intern_then_merge() { + // Each thread interns some strings with overlap. Because IDs are + // content-addressed, no remap step is needed. + let inputs: Vec> = vec![ + vec!["String", "Integer", "Foo"], + vec!["String", "Array", "Bar"], + vec!["Integer", "Array", "Baz"], + ]; + + let handles: Vec<_> = inputs + .into_iter() + .map(|words| { + std::thread::spawn(move || { + let mut interner = StringInterner::new(); + let ids: Vec = words.iter().map(|w| interner.intern(w)).collect(); + (interner, ids, words) + }) + }) + .collect(); + + let per_thread: Vec<_> = handles.into_iter().map(|h| h.join().unwrap()).collect(); + + let mut global = StringInterner::new(); + let mut translated: Vec<(Vec, Vec<&'static str>)> = Vec::new(); + for (local, ids, words) in per_thread { + global.merge(local); + translated.push((ids, words)); + } + + // Every id resolves to its original string in the global interner. + for (ids, words) in &translated { + for (id, word) in ids.iter().zip(words.iter()) { + assert_eq!(global.resolve(*id), *word); + } + } + + // The same string always yields the same SymbolId across threads. + let mut expected = std::collections::HashMap::<&str, SymbolId>::new(); + for (ids, words) in &translated { + for (id, word) in ids.iter().zip(words.iter()) { + if let Some(&prev) = expected.get(*word) { + assert_eq!(prev, *id, "{word} got different ids"); + } else { + expected.insert(*word, *id); + } + } + } + + // Unique strings across all threads: String, Integer, Foo, Array, Bar, Baz = 6. + assert_eq!(global.len(), 6); + } +} diff --git a/rust/ruby-rbs/src/lib.rs b/rust/ruby-rbs/src/lib.rs index 6790f3f42..a874e3a66 100644 --- a/rust/ruby-rbs/src/lib.rs +++ b/rust/ruby-rbs/src/lib.rs @@ -1,2 +1,3 @@ pub mod ids; +pub mod interner; pub mod node; From b0d2358ce4009fa04ec51adec07d3f86ea29f2d0 Mon Sep 17 00:00:00 2001 From: Soutaro Matsumoto Date: Thu, 21 May 2026 17:09:32 +0900 Subject: [PATCH 4/4] Add TypeName --- rust/ruby-rbs/src/ids.rs | 7 + rust/ruby-rbs/src/lib.rs | 1 + rust/ruby-rbs/src/type_name.rs | 725 +++++++++++++++++++++++++++++++++ 3 files changed, 733 insertions(+) create mode 100644 rust/ruby-rbs/src/type_name.rs diff --git a/rust/ruby-rbs/src/ids.rs b/rust/ruby-rbs/src/ids.rs index 8f6854f63..a72678b4e 100644 --- a/rust/ruby-rbs/src/ids.rs +++ b/rust/ruby-rbs/src/ids.rs @@ -125,6 +125,13 @@ pub enum SymbolTag {} /// string bytes. pub type SymbolId = Id; +/// Tag for IDs produced by the type-name interner. +pub enum TypeNameTag {} + +/// Identifier for an interned type name. Content-addressed: derived from +/// the parent type name's hash and the last segment's hash. +pub type TypeName = Id; + #[cfg(test)] mod tests { use super::*; diff --git a/rust/ruby-rbs/src/lib.rs b/rust/ruby-rbs/src/lib.rs index a874e3a66..d63bbfa3c 100644 --- a/rust/ruby-rbs/src/lib.rs +++ b/rust/ruby-rbs/src/lib.rs @@ -1,3 +1,4 @@ pub mod ids; pub mod interner; pub mod node; +pub mod type_name; diff --git a/rust/ruby-rbs/src/type_name.rs b/rust/ruby-rbs/src/type_name.rs new file mode 100644 index 000000000..eadb497d9 --- /dev/null +++ b/rust/ruby-rbs/src/type_name.rs @@ -0,0 +1,725 @@ +//! Content-addressed flyweight type names. +//! +//! Unlike the Ruby implementation, which distinguishes `RBS::Namespace` +//! (a path of class names plus an `absolute` flag) from `RBS::TypeName` +//! (a Namespace plus a trailing name), this module folds both into a +//! single [`TypeName`]: +//! +//! - An empty path is a namespace root (either `::` or `""`). +//! - A non-empty path's last segment is what Ruby calls the trailing +//! "name", and its [`Kind`] is derived from that segment's first +//! character. +//! +//! Each [`TypeName`] is a 64-bit content-addressed id derived from its +//! parent's id and its last segment's [`SymbolId`]. Because the recipe is +//! deterministic, two independently-built [`TypeNameInterner`]s assign the +//! same id to the same logical name — merging is just a `HashMap` union. +//! Two pre-interned roots cover the absolute / relative split via fixed +//! sentinel hashes. +//! +//! ``` +//! use ruby_rbs::interner::StringInterner; +//! use ruby_rbs::type_name::{Kind, TypeNameInterner}; +//! +//! let mut strings = StringInterner::new(); +//! let mut names = TypeNameInterner::new(); +//! +//! let foo = names.parse(&mut strings, "::RBS::Foo"); +//! let foo_again = names.parse(&mut strings, "::RBS::Foo"); +//! assert_eq!(foo, foo_again); // flyweighted +//! assert_eq!(names.kind(foo, &strings), Some(Kind::Class)); +//! assert_eq!(names.display(foo, &strings), "::RBS::Foo"); +//! ``` + +use crate::ids::{SymbolId, TypeName}; +use crate::interner::StringInterner; +use std::collections::HashMap; +use xxhash_rust::xxh3::xxh3_64; + +/// Fixed sentinel hash for the absolute (`::`) namespace root. Chosen so +/// no realistic `xxh3_64` content collision is expected; any value would +/// do as long as it differs from `RELATIVE_ROOT_HASH`. +const ABSOLUTE_ROOT_HASH: u64 = 0xA850_1075_0001_0001; + +/// Fixed sentinel hash for the relative (`""`) namespace root. +const RELATIVE_ROOT_HASH: u64 = 0x8E1A_71FE_0001_0001; + +fn child_hash(parent: TypeName, segment: SymbolId) -> u64 { + let mut buf = [0u8; 16]; + buf[..8].copy_from_slice(&parent.get().to_le_bytes()); + buf[8..].copy_from_slice(&segment.get().to_le_bytes()); + xxh3_64(&buf) +} + +/// Kind of a [`TypeName`], derived from the first character of its last +/// segment. `None` is returned for an empty type name (a namespace root). +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub enum Kind { + Class, + Alias, + Interface, +} + +/// An entry rooted in the absolute namespace (`::`). +#[derive(Copy, Clone, Debug)] +struct AbsoluteTypeNameEntry { + /// `None` for the absolute root. + parent: Option, + /// `None` for the absolute root. + segment: Option, +} + +/// An entry rooted in the relative namespace (`""`). +#[derive(Copy, Clone, Debug)] +struct RelativeTypeNameEntry { + /// `None` for the relative root. + parent: Option, + /// `None` for the relative root. + segment: Option, +} + +#[derive(Copy, Clone, Debug)] +enum Entry { + Absolute(AbsoluteTypeNameEntry), + Relative(RelativeTypeNameEntry), +} + +impl Entry { + fn parent(self) -> Option { + match self { + Self::Absolute(e) => e.parent, + Self::Relative(e) => e.parent, + } + } + + fn segment(self) -> Option { + match self { + Self::Absolute(e) => e.segment, + Self::Relative(e) => e.segment, + } + } + + fn is_absolute(self) -> bool { + matches!(self, Self::Absolute(_)) + } +} + +/// Interner that flyweights [`TypeName`]s with content-addressed ids. +/// +/// Build new names by walking down from a root: +/// +/// ``` +/// use ruby_rbs::interner::StringInterner; +/// use ruby_rbs::type_name::TypeNameInterner; +/// +/// let mut strings = StringInterner::new(); +/// let mut names = TypeNameInterner::new(); +/// let rbs = strings.intern("RBS"); +/// let foo = strings.intern("Foo"); +/// +/// let abs = names.absolute_root(); +/// let n1 = names.append(abs, rbs); +/// let n2 = names.append(n1, foo); +/// assert_eq!(names.display(n2, &strings), "::RBS::Foo"); +/// ``` +pub struct TypeNameInterner { + entries: HashMap, + relative_root: TypeName, + absolute_root: TypeName, +} + +impl Default for TypeNameInterner { + fn default() -> Self { + let relative_root = TypeName::from_hash(RELATIVE_ROOT_HASH); + let absolute_root = TypeName::from_hash(ABSOLUTE_ROOT_HASH); + let mut entries = HashMap::new(); + entries.insert( + relative_root, + Entry::Relative(RelativeTypeNameEntry { + parent: None, + segment: None, + }), + ); + entries.insert( + absolute_root, + Entry::Absolute(AbsoluteTypeNameEntry { + parent: None, + segment: None, + }), + ); + Self { + entries, + relative_root, + absolute_root, + } + } +} + +impl TypeNameInterner { + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// The empty relative type name (`""` — `Namespace.empty` in Ruby). + #[must_use] + pub fn relative_root(&self) -> TypeName { + self.relative_root + } + + /// The empty absolute type name (`"::"` — `Namespace.root` in Ruby). + #[must_use] + pub fn absolute_root(&self) -> TypeName { + self.absolute_root + } + + /// Returns [`absolute_root`] when `absolute` is true, else + /// [`relative_root`]. + /// + /// [`absolute_root`]: Self::absolute_root + /// [`relative_root`]: Self::relative_root + #[must_use] + pub fn root(&self, absolute: bool) -> TypeName { + if absolute { + self.absolute_root + } else { + self.relative_root + } + } + + /// Returns the type name `parent::segment`. Content-addressed: + /// identical inputs return the same [`TypeName`] across any + /// [`TypeNameInterner`]. + pub fn append(&mut self, parent: TypeName, segment: SymbolId) -> TypeName { + let id = TypeName::from_hash(child_hash(parent, segment)); + if self.entries.contains_key(&id) { + return id; + } + let parent_entry = self + .entries + .get(&parent) + .copied() + .expect("parent TypeName must be interned"); + let entry = if parent_entry.is_absolute() { + Entry::Absolute(AbsoluteTypeNameEntry { + parent: Some(parent), + segment: Some(segment), + }) + } else { + Entry::Relative(RelativeTypeNameEntry { + parent: Some(parent), + segment: Some(segment), + }) + }; + self.entries.insert(id, entry); + id + } + + /// Builds a type name by appending each `segment` in order to `base`. + pub fn extend(&mut self, base: TypeName, segments: I) -> TypeName + where + I: IntoIterator, + { + segments.into_iter().fold(base, |p, s| self.append(p, s)) + } + + /// Returns the parent of `name`, or `None` if `name` is one of the + /// two roots. + #[must_use] + pub fn parent(&self, name: TypeName) -> Option { + self.entries[&name].parent() + } + + /// Returns the last segment of `name`, or `None` if `name` is one of + /// the two roots. + #[must_use] + pub fn last_segment(&self, name: TypeName) -> Option { + self.entries[&name].segment() + } + + #[must_use] + pub fn is_absolute(&self, name: TypeName) -> bool { + self.entries[&name].is_absolute() + } + + /// True for an empty path (the relative or absolute root). + #[must_use] + pub fn is_root(&self, name: TypeName) -> bool { + self.entries[&name].parent().is_none() + } + + /// Number of segments in `name`. + #[must_use] + pub fn depth(&self, name: TypeName) -> usize { + let mut depth = 0; + let mut cur = name; + while let Some(parent) = self.parent(cur) { + depth += 1; + cur = parent; + } + depth + } + + /// Returns the segments of `name` from root to leaf. + #[must_use] + pub fn segments(&self, name: TypeName) -> Vec { + let mut buf = Vec::with_capacity(self.depth(name)); + let mut cur = name; + while let (Some(parent), Some(seg)) = (self.parent(cur), self.last_segment(cur)) { + buf.push(seg); + cur = parent; + } + buf.reverse(); + buf + } + + /// Returns the same type name with `absolute = true`, sharing the path. + pub fn to_absolute(&mut self, name: TypeName) -> TypeName { + if self.is_absolute(name) { + return name; + } + let segs = self.segments(name); + self.extend(self.absolute_root, segs) + } + + /// Returns the same type name with `absolute = false`, sharing the path. + pub fn to_relative(&mut self, name: TypeName) -> TypeName { + if !self.is_absolute(name) { + return name; + } + let segs = self.segments(name); + self.extend(self.relative_root, segs) + } + + /// Ruby `TypeName#+` semantics: if `tail` is absolute, return `tail`; + /// otherwise concatenate `head`'s segments + `tail`'s segments under + /// `head`'s absolute flag. + pub fn concat(&mut self, head: TypeName, tail: TypeName) -> TypeName { + if self.is_absolute(tail) { + return tail; + } + let tail_segs = self.segments(tail); + self.extend(head, tail_segs) + } + + /// Kind of the trailing segment. `None` for roots. + #[must_use] + pub fn kind(&self, name: TypeName, strings: &StringInterner) -> Option { + let seg = self.last_segment(name)?; + let bytes = strings.resolve(seg).as_bytes(); + let first = *bytes.first()?; + Some(if first == b'_' { + Kind::Interface + } else if first.is_ascii_uppercase() { + Kind::Class + } else { + Kind::Alias + }) + } + + /// Render `name` in the canonical RBS string form + /// (e.g. `::RBS::Foo`, `Foo::bar`). + #[must_use] + pub fn display(&self, name: TypeName, strings: &StringInterner) -> String { + let segs = self.segments(name); + let absolute = self.is_absolute(name); + let mut s = String::new(); + if absolute { + s.push_str("::"); + } + for (i, seg) in segs.iter().enumerate() { + if i > 0 { + s.push_str("::"); + } + s.push_str(strings.resolve(*seg)); + } + s + } + + /// Parse an RBS type-name string into a [`TypeName`], interning any + /// new segments into `strings`. + /// + /// Empty `source` returns the relative root; `"::"` returns the + /// absolute root. + pub fn parse(&mut self, strings: &mut StringInterner, source: &str) -> TypeName { + let absolute = source.starts_with("::"); + let trimmed = source.strip_prefix("::").unwrap_or(source); + let mut current = self.root(absolute); + for part in trimmed.split("::") { + if part.is_empty() { + continue; + } + let seg = strings.intern(part); + current = self.append(current, seg); + } + current + } + + /// Move every entry from `other` into `self`. Because IDs are + /// content-addressed, the two interners' roots and any shared paths + /// already have the same ids; this is a plain `HashMap` union. + pub fn merge(&mut self, other: TypeNameInterner) { + for (id, entry) in other.entries { + self.entries.entry(id).or_insert(entry); + } + } + + /// Refine `name` to an [`AbsoluteTypeName`] if it is absolute. + #[must_use] + pub fn try_as_absolute(&self, name: TypeName) -> Option { + self.is_absolute(name).then_some(AbsoluteTypeName(name)) + } + + /// Refine `name` to an [`AbsoluteClassTypeName`] if it is absolute and + /// its last segment denotes a class. + #[must_use] + pub fn try_as_absolute_class( + &self, + name: TypeName, + strings: &StringInterner, + ) -> Option { + (self.is_absolute(name) && self.kind(name, strings) == Some(Kind::Class)) + .then_some(AbsoluteClassTypeName(name)) + } + + /// Refine `name` to an [`AbsoluteAliasTypeName`] if it is absolute and + /// its last segment denotes an alias. + #[must_use] + pub fn try_as_absolute_alias( + &self, + name: TypeName, + strings: &StringInterner, + ) -> Option { + (self.is_absolute(name) && self.kind(name, strings) == Some(Kind::Alias)) + .then_some(AbsoluteAliasTypeName(name)) + } + + /// Refine `name` to an [`AbsoluteInterfaceTypeName`] if it is absolute + /// and its last segment denotes an interface. + #[must_use] + pub fn try_as_absolute_interface( + &self, + name: TypeName, + strings: &StringInterner, + ) -> Option { + (self.is_absolute(name) && self.kind(name, strings) == Some(Kind::Interface)) + .then_some(AbsoluteInterfaceTypeName(name)) + } +} + +/// A [`TypeName`] guaranteed to be absolute. +/// +/// Construct via [`TypeNameInterner::try_as_absolute`]; widen back to a +/// plain [`TypeName`] via [`From`]. +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +pub struct AbsoluteTypeName(TypeName); + +impl AbsoluteTypeName { + #[must_use] + pub fn as_type_name(self) -> TypeName { + self.0 + } +} + +impl From for TypeName { + fn from(value: AbsoluteTypeName) -> Self { + value.0 + } +} + +/// A [`TypeName`] guaranteed to be absolute and of [`Kind::Class`]. +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +pub struct AbsoluteClassTypeName(TypeName); + +impl AbsoluteClassTypeName { + #[must_use] + pub fn as_type_name(self) -> TypeName { + self.0 + } +} + +impl From for TypeName { + fn from(value: AbsoluteClassTypeName) -> Self { + value.0 + } +} + +impl From for AbsoluteTypeName { + fn from(value: AbsoluteClassTypeName) -> Self { + AbsoluteTypeName(value.0) + } +} + +/// A [`TypeName`] guaranteed to be absolute and of [`Kind::Alias`]. +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +pub struct AbsoluteAliasTypeName(TypeName); + +impl AbsoluteAliasTypeName { + #[must_use] + pub fn as_type_name(self) -> TypeName { + self.0 + } +} + +impl From for TypeName { + fn from(value: AbsoluteAliasTypeName) -> Self { + value.0 + } +} + +impl From for AbsoluteTypeName { + fn from(value: AbsoluteAliasTypeName) -> Self { + AbsoluteTypeName(value.0) + } +} + +/// A [`TypeName`] guaranteed to be absolute and of [`Kind::Interface`]. +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +pub struct AbsoluteInterfaceTypeName(TypeName); + +impl AbsoluteInterfaceTypeName { + #[must_use] + pub fn as_type_name(self) -> TypeName { + self.0 + } +} + +impl From for TypeName { + fn from(value: AbsoluteInterfaceTypeName) -> Self { + value.0 + } +} + +impl From for AbsoluteTypeName { + fn from(value: AbsoluteInterfaceTypeName) -> Self { + AbsoluteTypeName(value.0) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn setup() -> (StringInterner, TypeNameInterner) { + (StringInterner::new(), TypeNameInterner::new()) + } + + #[test] + fn roots_are_distinct_and_stable() { + let (_, names) = setup(); + let rel = names.relative_root(); + let abs = names.absolute_root(); + assert_ne!(rel, abs); + assert!(names.is_root(rel)); + assert!(names.is_root(abs)); + assert!(!names.is_absolute(rel)); + assert!(names.is_absolute(abs)); + assert_eq!(names.depth(rel), 0); + assert_eq!(names.depth(abs), 0); + } + + #[test] + fn roots_are_identical_across_interners() { + let a = TypeNameInterner::new(); + let b = TypeNameInterner::new(); + assert_eq!(a.absolute_root(), b.absolute_root()); + assert_eq!(a.relative_root(), b.relative_root()); + } + + #[test] + fn append_flyweights_identical_paths() { + let (mut s, mut t) = setup(); + let rbs = s.intern("RBS"); + let foo = s.intern("Foo"); + let abs = t.absolute_root(); + let a = t.append(abs, rbs); + let a2 = t.append(abs, rbs); + assert_eq!(a, a2); + let ab = t.append(a, foo); + let ab2 = t.append(a, foo); + assert_eq!(ab, ab2); + // Different absoluteness ⇒ different ids + let rel = t.relative_root(); + let r = t.append(rel, rbs); + assert_ne!(a, r); + } + + #[test] + fn same_path_yields_same_id_across_interners() { + let mut sa = StringInterner::new(); + let mut sb = StringInterner::new(); + let mut ta = TypeNameInterner::new(); + let mut tb = TypeNameInterner::new(); + + let a = ta.parse(&mut sa, "::RBS::Foo"); + let b = tb.parse(&mut sb, "::RBS::Foo"); + assert_eq!(a, b); + } + + #[test] + fn parse_and_display_round_trip() { + let (mut s, mut t) = setup(); + for src in ["::RBS::Foo", "Foo::Bar", "::Foo", "Foo", "::", ""] { + let id = t.parse(&mut s, src); + assert_eq!(t.display(id, &s), src); + } + } + + #[test] + fn parse_dedups_against_append_path() { + let (mut s, mut t) = setup(); + let parsed = t.parse(&mut s, "::RBS::Foo"); + let rbs = s.intern("RBS"); + let foo = s.intern("Foo"); + let abs = t.absolute_root(); + let built_rbs = t.append(abs, rbs); + let built = t.append(built_rbs, foo); + assert_eq!(parsed, built); + } + + #[test] + fn segments_and_parent() { + let (mut s, mut t) = setup(); + let id = t.parse(&mut s, "::A::B::C"); + let segs = t.segments(id); + assert_eq!(segs.len(), 3); + assert_eq!(s.resolve(segs[0]), "A"); + assert_eq!(s.resolve(segs[1]), "B"); + assert_eq!(s.resolve(segs[2]), "C"); + assert_eq!(t.depth(id), 3); + + let parent = t.parent(id).unwrap(); + assert_eq!(t.display(parent, &s), "::A::B"); + let grand = t.parent(parent).unwrap(); + assert_eq!(t.display(grand, &s), "::A"); + let root = t.parent(grand).unwrap(); + assert_eq!(root, t.absolute_root()); + assert!(t.parent(root).is_none()); + } + + #[test] + fn kind_is_derived_from_last_segment() { + let (mut s, mut t) = setup(); + let cls = t.parse(&mut s, "::RBS::Foo"); + let als = t.parse(&mut s, "::RBS::foo"); + let iface = t.parse(&mut s, "::RBS::_Foo"); + let root = t.absolute_root(); + assert_eq!(t.kind(cls, &s), Some(Kind::Class)); + assert_eq!(t.kind(als, &s), Some(Kind::Alias)); + assert_eq!(t.kind(iface, &s), Some(Kind::Interface)); + assert_eq!(t.kind(root, &s), None); + } + + #[test] + fn to_absolute_to_relative() { + let (mut s, mut t) = setup(); + let rel = t.parse(&mut s, "A::B"); + let abs = t.to_absolute(rel); + assert!(t.is_absolute(abs)); + assert_eq!(t.display(abs, &s), "::A::B"); + let back = t.to_relative(abs); + assert_eq!(back, rel); + // Idempotent + assert_eq!(t.to_absolute(abs), abs); + assert_eq!(t.to_relative(rel), rel); + } + + #[test] + fn concat_follows_ruby_plus_semantics() { + let (mut s, mut t) = setup(); + let head = t.parse(&mut s, "::RBS"); + let tail_rel = t.parse(&mut s, "Foo::Bar"); + let tail_abs = t.parse(&mut s, "::Other"); + + // Relative tail concatenates under head's absoluteness + let joined = t.concat(head, tail_rel); + assert_eq!(t.display(joined, &s), "::RBS::Foo::Bar"); + + // Absolute tail short-circuits + let joined2 = t.concat(head, tail_abs); + assert_eq!(joined2, tail_abs); + assert_eq!(t.display(joined2, &s), "::Other"); + } + + #[test] + fn merge_unions_entries() { + let mut sa = StringInterner::new(); + let mut sb = StringInterner::new(); + let mut ta = TypeNameInterner::new(); + let mut tb = TypeNameInterner::new(); + + let a1 = ta.parse(&mut sa, "::RBS::Foo"); + let a2 = ta.parse(&mut sa, "::Only::In::A"); + let b1 = tb.parse(&mut sb, "::RBS::Foo"); + let b2 = tb.parse(&mut sb, "::Only::In::B"); + + // Same content ⇒ same id, even before merge. + assert_eq!(a1, b1); + + // Merge strings + names from b into a. + sa.merge(sb); + ta.merge(tb); + + // All ids resolve in the merged interners. + assert_eq!(ta.display(a2, &sa), "::Only::In::A"); + assert_eq!(ta.display(b2, &sa), "::Only::In::B"); + } + + #[test] + fn typename_id_size() { + assert_eq!(std::mem::size_of::(), 8); + assert_eq!(std::mem::size_of::>(), 8); + } + + #[test] + fn refined_typename_constructors() { + let (mut s, mut t) = setup(); + let abs_class = t.parse(&mut s, "::RBS::Foo"); + let rel_class = t.parse(&mut s, "RBS::Foo"); + let abs_alias = t.parse(&mut s, "::RBS::foo"); + let abs_iface = t.parse(&mut s, "::RBS::_Foo"); + let abs_root = t.absolute_root(); + + assert!(t.try_as_absolute(abs_class).is_some()); + assert!(t.try_as_absolute(rel_class).is_none()); + + assert!(t.try_as_absolute_class(abs_class, &s).is_some()); + assert!(t.try_as_absolute_class(rel_class, &s).is_none()); + assert!(t.try_as_absolute_class(abs_alias, &s).is_none()); + assert!(t.try_as_absolute_class(abs_iface, &s).is_none()); + assert!(t.try_as_absolute_class(abs_root, &s).is_none()); + + assert!(t.try_as_absolute_alias(abs_alias, &s).is_some()); + assert!(t.try_as_absolute_alias(abs_class, &s).is_none()); + + assert!(t.try_as_absolute_interface(abs_iface, &s).is_some()); + assert!(t.try_as_absolute_interface(abs_class, &s).is_none()); + } + + #[test] + fn refined_typename_widening() { + let (mut s, mut t) = setup(); + let abs_class = t.parse(&mut s, "::RBS::Foo"); + + let class: AbsoluteClassTypeName = t.try_as_absolute_class(abs_class, &s).unwrap(); + // Widening to AbsoluteTypeName + let absolute: AbsoluteTypeName = class.into(); + assert_eq!(absolute.as_type_name(), abs_class); + // Widening to TypeName + let tn: TypeName = class.into(); + assert_eq!(tn, abs_class); + // From AbsoluteTypeName to TypeName + let tn2: TypeName = absolute.into(); + assert_eq!(tn2, abs_class); + } + + #[test] + fn refined_typename_sizes_match_typename() { + // All refinements are zero-cost wrappers + assert_eq!(std::mem::size_of::(), 8); + assert_eq!(std::mem::size_of::(), 8); + assert_eq!(std::mem::size_of::(), 8); + assert_eq!(std::mem::size_of::(), 8); + assert_eq!(std::mem::size_of::>(), 8); + } +}