|
| 1 | +use std::collections::{HashMap, HashSet, VecDeque}; |
| 2 | +use std::path::PathBuf; |
| 3 | +use std::sync::Arc; |
| 4 | + |
| 5 | +use chumsky::container::Container; |
| 6 | + |
| 7 | +use crate::error::{Error, ErrorCollector, RichError, Span}; |
| 8 | +use crate::parse::{self, ParseFromStrWithErrors}; |
| 9 | +use crate::resolution::{CanonPath, DependencyMap, SourceFile}; |
| 10 | + |
| 11 | +/// Represents a single, isolated file in the SimplicityHL project. |
| 12 | +/// In this architecture, a file and a module are the exact same thing. |
| 13 | +#[derive(Debug, Clone)] |
| 14 | +struct Module { |
| 15 | + source: SourceFile, |
| 16 | + /// The completely parsed program for this specific file. |
| 17 | + /// it contains all the functions, aliases, and imports defined inside the file. |
| 18 | + parsed_program: parse::Program, |
| 19 | +} |
| 20 | + |
| 21 | +/// An Intermediate Representation that helps transform isolated files into a global program. |
| 22 | +/// |
| 23 | +/// While an AST only understands a single file, the `DependencyGraph` links multiple |
| 24 | +/// ASTs together into a Directed Acyclic Graph (DAG). This DAG is then used to build |
| 25 | +/// one convenient `Program` struct for the semantic analyzer can easily process. |
| 26 | +/// |
| 27 | +/// This structure provides the global context necessary to solve high-level compiler |
| 28 | +/// problems, including: |
| 29 | +/// * **Cross-Module Resolution:** Allowing the compiler to traverse edges and verify |
| 30 | +/// that imported symbols, functions, and types actually exist in other files. |
| 31 | +/// * **Topological Sorting:** Guaranteeing that modules are analyzed and compiled in |
| 32 | +/// the strictly correct mathematical order (e.g., analyzing module `B` before module |
| 33 | +/// `A` if `A` depends on `B`). |
| 34 | +/// * **Cycle Detection:** Preventing infinite compiler loops by ensuring no circular |
| 35 | +/// imports exist before heavy semantic processing begins. |
| 36 | +pub struct DependencyGraph { |
| 37 | + /// Implements the Arena Pattern to act as the sole, centralized owner of all parsed modules. |
| 38 | + /// |
| 39 | + /// In C++ or Java, a graph would typically link dependencies using direct memory |
| 40 | + /// pointers (e.g., `List<Module*>`). In Rust, doing this requires either |
| 41 | + /// lifetimes or performance-heavy reference counting (`Rc<RefCell<T>>`). |
| 42 | + /// |
| 43 | + /// Using a flat `Vec` as a memory arena is the idiomatic Rust solution. |
| 44 | + modules: Vec<Module>, |
| 45 | + |
| 46 | + /// The configuration environment. |
| 47 | + /// Used to resolve external library dependencies and invoke their associated functions. |
| 48 | + dependency_map: Arc<DependencyMap>, |
| 49 | + |
| 50 | + /// Fast lookup: `CanonPath` -> Module ID. |
| 51 | + /// A reverse index mapping absolute file paths to their internal IDs. |
| 52 | + /// This solves the duplication problem, ensuring each file is only parsed once. |
| 53 | + lookup: HashMap<CanonPath, usize>, |
| 54 | + |
| 55 | + /// Fast lookup: Module ID -> `CanonPath`. |
| 56 | + /// A direct index mapping internal IDs back to their absolute file paths. |
| 57 | + /// This serves as the exact inverse of the `lookup` map. |
| 58 | + paths: Vec<CanonPath>, |
| 59 | + |
| 60 | + /// The Adjacency List: Defines the Directed acyclic Graph (DAG) of imports. |
| 61 | + /// |
| 62 | + /// The Key (`usize`) is the ID of a "Parent" module (the file doing the importing). |
| 63 | + /// The Value (`Vec<usize>`) is a list of IDs of the "Child" modules it relies on. |
| 64 | + /// |
| 65 | + /// Example: If `main.simf` (ID: 0) has `use lib::math;` (ID: 1) and `use lib::io;` (ID: 2), |
| 66 | + /// this map will contain: `{ 0: [1, 2] }`. |
| 67 | + dependencies: HashMap<usize, Vec<usize>>, |
| 68 | +} |
| 69 | + |
| 70 | +impl DependencyGraph { |
| 71 | + /// Initializes a new `ProjectGraph` by parsing the root program and discovering all dependencies. |
| 72 | + /// |
| 73 | + /// Performs a BFS to recursively parse `use` statements, |
| 74 | + /// building a DAG of the project's modules. |
| 75 | + /// |
| 76 | + /// # Arguments |
| 77 | + /// |
| 78 | + /// * `root_source` - The `SourceFile` representing the entry point of the project. |
| 79 | + /// * `dependency_map` - The context-aware mapping rules used to resolve external imports. |
| 80 | + /// * `root_program` - A reference to the already-parsed AST of the root file. |
| 81 | + /// * `handler` - The diagnostics collector used to record resolution and parsing errors. |
| 82 | + /// |
| 83 | + /// # Returns |
| 84 | + /// |
| 85 | + /// * `Ok(Some(Self))` - If the entire project graph was successfully resolved and parsed. |
| 86 | + /// * `Ok(None)` - If the graph traversal completed, but one or more modules contained |
| 87 | + /// errors (which have been safely logged into the `handler`). |
| 88 | + /// |
| 89 | + /// # Errors |
| 90 | + /// |
| 91 | + /// This function will return an `Err(String)` only for critical internal compiler errors |
| 92 | + /// (e.g., if a provided `SourceFile` is unexpectedly missing its underlying file path). |
| 93 | + pub fn new( |
| 94 | + root_source: SourceFile, |
| 95 | + dependency_map: Arc<DependencyMap>, |
| 96 | + root_program: &parse::Program, |
| 97 | + handler: &mut ErrorCollector, |
| 98 | + ) -> Result<Option<Self>, String> { |
| 99 | + let root_name = if let Some(root_name) = root_source.name() { |
| 100 | + CanonPath::canonicalize(root_name)? |
| 101 | + } else { |
| 102 | + return Err( |
| 103 | + "The root_source variable inside the ProjectGraph::new() function has no name" |
| 104 | + .to_string(), |
| 105 | + ); |
| 106 | + }; |
| 107 | + |
| 108 | + let mut graph = Self { |
| 109 | + modules: vec![Module { |
| 110 | + source: root_source, |
| 111 | + parsed_program: root_program.clone(), |
| 112 | + }], |
| 113 | + dependency_map, |
| 114 | + lookup: HashMap::new(), |
| 115 | + paths: vec![root_name.clone()], |
| 116 | + dependencies: HashMap::new(), |
| 117 | + }; |
| 118 | + |
| 119 | + let root_id = 0; |
| 120 | + graph.lookup.insert(root_name, root_id); |
| 121 | + graph.dependencies.insert(root_id, Vec::new()); |
| 122 | + |
| 123 | + let mut queue = VecDeque::new(); |
| 124 | + queue.push_back(root_id); |
| 125 | + |
| 126 | + // Prevent errors in the checked files from being doubled in the `load_and_parse_dependencies` function. |
| 127 | + let mut inalid_imports = HashSet::new(); |
| 128 | + |
| 129 | + while let Some(curr_id) = queue.pop_front() { |
| 130 | + let Some(current_module) = graph.modules.get(curr_id) else { |
| 131 | + return Err(format!( |
| 132 | + "Internal Driver Error: Module ID {} is in the queue but missing from the graph.modules.", |
| 133 | + curr_id |
| 134 | + )); |
| 135 | + }; |
| 136 | + |
| 137 | + // We need this to report errors inside THIS file. |
| 138 | + let importer_source = current_module.source.clone(); |
| 139 | + |
| 140 | + let importer_source_name = if let Some(name) = importer_source.name() { |
| 141 | + CanonPath::canonicalize(name)? |
| 142 | + } else { |
| 143 | + return Err(format!( |
| 144 | + "The {:?} variable inside the DependencyGraph::new() function has no name", |
| 145 | + importer_source |
| 146 | + )); |
| 147 | + }; |
| 148 | + |
| 149 | + // PHASE 1: Immutably read from the graph |
| 150 | + let valid_imports = Self::resolve_imports( |
| 151 | + ¤t_module.parsed_program, |
| 152 | + &importer_source, |
| 153 | + importer_source_name, |
| 154 | + &graph.dependency_map, |
| 155 | + handler, |
| 156 | + ); |
| 157 | + |
| 158 | + // PHASE 2: Mutate the graph |
| 159 | + graph.load_and_parse_dependencies( |
| 160 | + curr_id, |
| 161 | + valid_imports, |
| 162 | + &mut inalid_imports, |
| 163 | + &importer_source, |
| 164 | + handler, |
| 165 | + &mut queue, |
| 166 | + ); |
| 167 | + } |
| 168 | + |
| 169 | + // TODO: Consider getting rid of the 'String' error here and changing it to a more appropriate error |
| 170 | + // (e.g. 'Result<Self, ErrorCollector>') after resolving https://github.com/BlockstreamResearch/SimplicityHL/issues/270. |
| 171 | + Ok((!handler.has_errors()).then_some(graph)) |
| 172 | + } |
| 173 | + |
| 174 | + /// This helper cleanly encapsulates the process of loading source text, parsing it |
| 175 | + /// into an `parse::Program`, and combining them so the compiler can easily work with the file. |
| 176 | + /// If the file is missing or contains syntax errors, it logs the diagnostic to the |
| 177 | + /// `ErrorCollector` and safely returns `None`. |
| 178 | + fn parse_and_get_program( |
| 179 | + path: &CanonPath, |
| 180 | + importer_source: SourceFile, |
| 181 | + span: Span, |
| 182 | + handler: &mut ErrorCollector, |
| 183 | + ) -> Option<Module> { |
| 184 | + let Ok(content) = std::fs::read_to_string(path.as_path()) else { |
| 185 | + let err = RichError::new(Error::FileNotFound(PathBuf::from(path.as_path())), span) |
| 186 | + .with_source(importer_source.clone()); |
| 187 | + |
| 188 | + handler.push(err); |
| 189 | + return None; |
| 190 | + }; |
| 191 | + |
| 192 | + let mut error_handler = ErrorCollector::new(); |
| 193 | + let dep_source_file = SourceFile::new(path.as_path(), Arc::from(content.clone())); |
| 194 | + |
| 195 | + let ast = parse::Program::parse_from_str_with_errors(&dep_source_file, &mut error_handler); |
| 196 | + |
| 197 | + if error_handler.has_errors() { |
| 198 | + handler.extend_with_handler(dep_source_file, &error_handler); |
| 199 | + None |
| 200 | + } else { |
| 201 | + ast.map(|parsed_program| Module { |
| 202 | + source: dep_source_file.clone(), |
| 203 | + parsed_program, |
| 204 | + }) |
| 205 | + } |
| 206 | + } |
| 207 | + |
| 208 | + /// PHASE 1 OF GRAPH CONSTRUCTION: Resolves all imports inside a single `parse::Program`. |
| 209 | + /// Note: This is a specialized helper function designed exclusively for the `DependencyGraph::new()` constructor. |
| 210 | + fn resolve_imports( |
| 211 | + current_program: &parse::Program, |
| 212 | + importer_source: &SourceFile, |
| 213 | + importer_source_name: CanonPath, |
| 214 | + dependency_map: &DependencyMap, |
| 215 | + handler: &mut ErrorCollector, |
| 216 | + ) -> Vec<(CanonPath, Span)> { |
| 217 | + let mut valid_imports = Vec::new(); |
| 218 | + |
| 219 | + for elem in current_program.items() { |
| 220 | + let parse::Item::Use(use_decl) = elem else { |
| 221 | + continue; |
| 222 | + }; |
| 223 | + |
| 224 | + match dependency_map.resolve_path(importer_source_name.clone(), use_decl) { |
| 225 | + Ok(path) => valid_imports.push((path, *use_decl.span())), |
| 226 | + Err(err) => handler.push(err.with_source(importer_source.clone())), |
| 227 | + } |
| 228 | + } |
| 229 | + |
| 230 | + valid_imports |
| 231 | + } |
| 232 | + |
| 233 | + /// PHASE 2 OF GRAPH CONSTRUCTION: Loads, parses, and registers new dependencies. |
| 234 | + /// Note: This is a specialized helper function designed exclusively for the `DependencyGraph::new()` constructor. |
| 235 | + fn load_and_parse_dependencies( |
| 236 | + &mut self, |
| 237 | + curr_id: usize, |
| 238 | + valid_imports: Vec<(CanonPath, Span)>, |
| 239 | + inalid_imports: &mut HashSet<CanonPath>, |
| 240 | + importer_source: &SourceFile, |
| 241 | + handler: &mut ErrorCollector, |
| 242 | + queue: &mut VecDeque<usize>, |
| 243 | + ) { |
| 244 | + for (path, import_span) in valid_imports { |
| 245 | + if inalid_imports.contains(&path) { |
| 246 | + continue; |
| 247 | + } |
| 248 | + |
| 249 | + if let Some(&existing_id) = self.lookup.get(&path) { |
| 250 | + let deps = self.dependencies.entry(curr_id).or_default(); |
| 251 | + if !deps.contains(&existing_id) { |
| 252 | + deps.push(existing_id); |
| 253 | + } |
| 254 | + continue; |
| 255 | + } |
| 256 | + |
| 257 | + let Some(module) = |
| 258 | + Self::parse_and_get_program(&path, importer_source.clone(), import_span, handler) |
| 259 | + else { |
| 260 | + inalid_imports.push(path); |
| 261 | + continue; |
| 262 | + }; |
| 263 | + |
| 264 | + let last_ind = self.modules.len(); |
| 265 | + self.modules.push(module); |
| 266 | + |
| 267 | + self.lookup.insert(path.clone(), last_ind); |
| 268 | + self.paths.push(path.clone()); |
| 269 | + self.dependencies.entry(curr_id).or_default().push(last_ind); |
| 270 | + |
| 271 | + queue.push_back(last_ind); |
| 272 | + } |
| 273 | + } |
| 274 | +} |
| 275 | + |
| 276 | +#[cfg(test)] |
| 277 | +mod tests { |
| 278 | + use super::*; |
| 279 | + use crate::resolution::tests::canon; |
| 280 | + use crate::test_utils::TempWorkspace; |
| 281 | + |
| 282 | + #[test] |
| 283 | + fn test_new_bfs_traversal_state() { |
| 284 | + // Goal: Verify that a simple chain (main -> a -> b) correctly pushes items |
| 285 | + // into the vectors and builds the adjacency list in BFS order. |
| 286 | + |
| 287 | + let ws = TempWorkspace::new("bfs_state"); |
| 288 | + let mut handler = ErrorCollector::new(); |
| 289 | + |
| 290 | + let workspace = canon(&ws.create_dir("workspace")); |
| 291 | + |
| 292 | + let dir_a = canon(&ws.create_dir("workspace/a")); |
| 293 | + let dir_b = canon(&ws.create_dir("workspace/b")); |
| 294 | + |
| 295 | + let main_content = "use a::mock_file::mock_item;"; |
| 296 | + let a_content = "use b::mock_file::mock_item;"; |
| 297 | + let b_content = ""; |
| 298 | + |
| 299 | + let main_file = canon(&ws.create_file("workspace/main.simf", main_content)); |
| 300 | + let a_file = canon(&ws.create_file("workspace/a/mock_file.simf", a_content)); |
| 301 | + let b_file = canon(&ws.create_file("workspace/b/mock_file.simf", b_content)); |
| 302 | + |
| 303 | + let mut map = DependencyMap::new(); |
| 304 | + |
| 305 | + map.insert(workspace.clone(), "a".to_string(), dir_a) |
| 306 | + .unwrap(); |
| 307 | + map.insert(workspace.clone(), "b".to_string(), dir_b) |
| 308 | + .unwrap(); |
| 309 | + let map = Arc::new(map); |
| 310 | + |
| 311 | + let main_source = SourceFile::new(main_file.as_path(), Arc::from(main_content)); |
| 312 | + let main_program_option = |
| 313 | + parse::Program::parse_from_str_with_errors(&main_source, &mut handler); |
| 314 | + |
| 315 | + let Some(main_program) = main_program_option else { |
| 316 | + eprintln!("Parser Error in Test Setup: {}", handler); |
| 317 | + std::process::exit(1); |
| 318 | + }; |
| 319 | + |
| 320 | + // Act |
| 321 | + let graph_option = |
| 322 | + DependencyGraph::new(main_source, map, &main_program, &mut handler).unwrap(); |
| 323 | + |
| 324 | + let Some(graph) = graph_option else { |
| 325 | + eprintln!("DependencyGraph Error: {}", handler); |
| 326 | + std::process::exit(1); |
| 327 | + }; |
| 328 | + |
| 329 | + // Assert: Size checks |
| 330 | + assert_eq!(graph.modules.len(), 3); |
| 331 | + assert_eq!(graph.paths.len(), 3); |
| 332 | + |
| 333 | + // Assert: Ensure BFS assigned the IDs in the exact correct order |
| 334 | + let main_id = *graph.lookup.get(&main_file).unwrap(); |
| 335 | + let a_id = *graph.lookup.get(&a_file).unwrap(); |
| 336 | + let b_id = *graph.lookup.get(&b_file).unwrap(); |
| 337 | + |
| 338 | + assert_eq!(main_id, 0); |
| 339 | + assert_eq!(a_id, 1); |
| 340 | + assert_eq!(b_id, 2); |
| 341 | + |
| 342 | + // Assert: Ensure the Adjacency List (dependencies map) linked them correctly |
| 343 | + assert_eq!( |
| 344 | + *graph.dependencies.get(&main_id).unwrap(), |
| 345 | + vec![a_id], |
| 346 | + "Main depends on A" |
| 347 | + ); |
| 348 | + assert_eq!( |
| 349 | + *graph.dependencies.get(&a_id).unwrap(), |
| 350 | + vec![b_id], |
| 351 | + "A depends on B" |
| 352 | + ); |
| 353 | + assert!( |
| 354 | + !graph.dependencies.contains_key(&b_id), |
| 355 | + "B depends on nothing" |
| 356 | + ); |
| 357 | + } |
| 358 | +} |
0 commit comments