Skip to content

Commit ae3ed68

Browse files
committed
feat: implement DependencyGraph inside driver.rs file and change parse trait to the SourceFile
1 parent f609987 commit ae3ed68

5 files changed

Lines changed: 396 additions & 18 deletions

File tree

src/driver/mod.rs

Lines changed: 358 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,358 @@
1+
use std::collections::{HashMap, HashSet, VecDeque};
2+
use std::path::PathBuf;
3+
use std::sync::Arc;
4+
5+
use chumsky::container::Container;
6+
7+
use crate::error::{Error, ErrorCollector, RichError, Span};
8+
use crate::parse::{self, ParseFromStrWithErrors};
9+
use crate::resolution::{CanonPath, DependencyMap, SourceFile};
10+
11+
/// Represents a single, isolated file in the SimplicityHL project.
12+
/// In this architecture, a file and a module are the exact same thing.
13+
#[derive(Debug, Clone)]
14+
struct Module {
15+
source: SourceFile,
16+
/// The completely parsed program for this specific file.
17+
/// it contains all the functions, aliases, and imports defined inside the file.
18+
parsed_program: parse::Program,
19+
}
20+
21+
/// An Intermediate Representation that helps transform isolated files into a global program.
22+
///
23+
/// While an AST only understands a single file, the `DependencyGraph` links multiple
24+
/// ASTs together into a Directed Acyclic Graph (DAG). This DAG is then used to build
25+
/// one convenient `Program` struct for the semantic analyzer can easily process.
26+
///
27+
/// This structure provides the global context necessary to solve high-level compiler
28+
/// problems, including:
29+
/// * **Cross-Module Resolution:** Allowing the compiler to traverse edges and verify
30+
/// that imported symbols, functions, and types actually exist in other files.
31+
/// * **Topological Sorting:** Guaranteeing that modules are analyzed and compiled in
32+
/// the strictly correct mathematical order (e.g., analyzing module `B` before module
33+
/// `A` if `A` depends on `B`).
34+
/// * **Cycle Detection:** Preventing infinite compiler loops by ensuring no circular
35+
/// imports exist before heavy semantic processing begins.
36+
pub struct DependencyGraph {
37+
/// Implements the Arena Pattern to act as the sole, centralized owner of all parsed modules.
38+
///
39+
/// In C++ or Java, a graph would typically link dependencies using direct memory
40+
/// pointers (e.g., `List<Module*>`). In Rust, doing this requires either
41+
/// lifetimes or performance-heavy reference counting (`Rc<RefCell<T>>`).
42+
///
43+
/// Using a flat `Vec` as a memory arena is the idiomatic Rust solution.
44+
modules: Vec<Module>,
45+
46+
/// The configuration environment.
47+
/// Used to resolve external library dependencies and invoke their associated functions.
48+
dependency_map: Arc<DependencyMap>,
49+
50+
/// Fast lookup: `CanonPath` -> Module ID.
51+
/// A reverse index mapping absolute file paths to their internal IDs.
52+
/// This solves the duplication problem, ensuring each file is only parsed once.
53+
lookup: HashMap<CanonPath, usize>,
54+
55+
/// Fast lookup: Module ID -> `CanonPath`.
56+
/// A direct index mapping internal IDs back to their absolute file paths.
57+
/// This serves as the exact inverse of the `lookup` map.
58+
paths: Vec<CanonPath>,
59+
60+
/// The Adjacency List: Defines the Directed acyclic Graph (DAG) of imports.
61+
///
62+
/// The Key (`usize`) is the ID of a "Parent" module (the file doing the importing).
63+
/// The Value (`Vec<usize>`) is a list of IDs of the "Child" modules it relies on.
64+
///
65+
/// Example: If `main.simf` (ID: 0) has `use lib::math;` (ID: 1) and `use lib::io;` (ID: 2),
66+
/// this map will contain: `{ 0: [1, 2] }`.
67+
dependencies: HashMap<usize, Vec<usize>>,
68+
}
69+
70+
impl DependencyGraph {
71+
/// Initializes a new `ProjectGraph` by parsing the root program and discovering all dependencies.
72+
///
73+
/// Performs a BFS to recursively parse `use` statements,
74+
/// building a DAG of the project's modules.
75+
///
76+
/// # Arguments
77+
///
78+
/// * `root_source` - The `SourceFile` representing the entry point of the project.
79+
/// * `dependency_map` - The context-aware mapping rules used to resolve external imports.
80+
/// * `root_program` - A reference to the already-parsed AST of the root file.
81+
/// * `handler` - The diagnostics collector used to record resolution and parsing errors.
82+
///
83+
/// # Returns
84+
///
85+
/// * `Ok(Some(Self))` - If the entire project graph was successfully resolved and parsed.
86+
/// * `Ok(None)` - If the graph traversal completed, but one or more modules contained
87+
/// errors (which have been safely logged into the `handler`).
88+
///
89+
/// # Errors
90+
///
91+
/// This function will return an `Err(String)` only for critical internal compiler errors
92+
/// (e.g., if a provided `SourceFile` is unexpectedly missing its underlying file path).
93+
pub fn new(
94+
root_source: SourceFile,
95+
dependency_map: Arc<DependencyMap>,
96+
root_program: &parse::Program,
97+
handler: &mut ErrorCollector,
98+
) -> Result<Option<Self>, String> {
99+
let root_name = if let Some(root_name) = root_source.name() {
100+
CanonPath::canonicalize(root_name)?
101+
} else {
102+
return Err(
103+
"The root_source variable inside the ProjectGraph::new() function has no name"
104+
.to_string(),
105+
);
106+
};
107+
108+
let mut graph = Self {
109+
modules: vec![Module {
110+
source: root_source,
111+
parsed_program: root_program.clone(),
112+
}],
113+
dependency_map,
114+
lookup: HashMap::new(),
115+
paths: vec![root_name.clone()],
116+
dependencies: HashMap::new(),
117+
};
118+
119+
let root_id = 0;
120+
graph.lookup.insert(root_name, root_id);
121+
graph.dependencies.insert(root_id, Vec::new());
122+
123+
let mut queue = VecDeque::new();
124+
queue.push_back(root_id);
125+
126+
// Prevent errors in the checked files from being doubled in the `load_and_parse_dependencies` function.
127+
let mut inalid_imports = HashSet::new();
128+
129+
while let Some(curr_id) = queue.pop_front() {
130+
let Some(current_module) = graph.modules.get(curr_id) else {
131+
return Err(format!(
132+
"Internal Driver Error: Module ID {} is in the queue but missing from the graph.modules.",
133+
curr_id
134+
));
135+
};
136+
137+
// We need this to report errors inside THIS file.
138+
let importer_source = current_module.source.clone();
139+
140+
let importer_source_name = if let Some(name) = importer_source.name() {
141+
CanonPath::canonicalize(name)?
142+
} else {
143+
return Err(format!(
144+
"The {:?} variable inside the DependencyGraph::new() function has no name",
145+
importer_source
146+
));
147+
};
148+
149+
// PHASE 1: Immutably read from the graph
150+
let valid_imports = Self::resolve_imports(
151+
&current_module.parsed_program,
152+
&importer_source,
153+
importer_source_name,
154+
&graph.dependency_map,
155+
handler,
156+
);
157+
158+
// PHASE 2: Mutate the graph
159+
graph.load_and_parse_dependencies(
160+
curr_id,
161+
valid_imports,
162+
&mut inalid_imports,
163+
&importer_source,
164+
handler,
165+
&mut queue,
166+
);
167+
}
168+
169+
// TODO: Consider getting rid of the 'String' error here and changing it to a more appropriate error
170+
// (e.g. 'Result<Self, ErrorCollector>') after resolving https://github.com/BlockstreamResearch/SimplicityHL/issues/270.
171+
Ok((!handler.has_errors()).then_some(graph))
172+
}
173+
174+
/// This helper cleanly encapsulates the process of loading source text, parsing it
175+
/// into an `parse::Program`, and combining them so the compiler can easily work with the file.
176+
/// If the file is missing or contains syntax errors, it logs the diagnostic to the
177+
/// `ErrorCollector` and safely returns `None`.
178+
fn parse_and_get_program(
179+
path: &CanonPath,
180+
importer_source: SourceFile,
181+
span: Span,
182+
handler: &mut ErrorCollector,
183+
) -> Option<Module> {
184+
let Ok(content) = std::fs::read_to_string(path.as_path()) else {
185+
let err = RichError::new(Error::FileNotFound(PathBuf::from(path.as_path())), span)
186+
.with_source(importer_source.clone());
187+
188+
handler.push(err);
189+
return None;
190+
};
191+
192+
let mut error_handler = ErrorCollector::new();
193+
let dep_source_file = SourceFile::new(path.as_path(), Arc::from(content.clone()));
194+
195+
let ast = parse::Program::parse_from_str_with_errors(&dep_source_file, &mut error_handler);
196+
197+
if error_handler.has_errors() {
198+
handler.extend_with_handler(dep_source_file, &error_handler);
199+
None
200+
} else {
201+
ast.map(|parsed_program| Module {
202+
source: dep_source_file.clone(),
203+
parsed_program,
204+
})
205+
}
206+
}
207+
208+
/// PHASE 1 OF GRAPH CONSTRUCTION: Resolves all imports inside a single `parse::Program`.
209+
/// Note: This is a specialized helper function designed exclusively for the `DependencyGraph::new()` constructor.
210+
fn resolve_imports(
211+
current_program: &parse::Program,
212+
importer_source: &SourceFile,
213+
importer_source_name: CanonPath,
214+
dependency_map: &DependencyMap,
215+
handler: &mut ErrorCollector,
216+
) -> Vec<(CanonPath, Span)> {
217+
let mut valid_imports = Vec::new();
218+
219+
for elem in current_program.items() {
220+
let parse::Item::Use(use_decl) = elem else {
221+
continue;
222+
};
223+
224+
match dependency_map.resolve_path(importer_source_name.clone(), use_decl) {
225+
Ok(path) => valid_imports.push((path, *use_decl.span())),
226+
Err(err) => handler.push(err.with_source(importer_source.clone())),
227+
}
228+
}
229+
230+
valid_imports
231+
}
232+
233+
/// PHASE 2 OF GRAPH CONSTRUCTION: Loads, parses, and registers new dependencies.
234+
/// Note: This is a specialized helper function designed exclusively for the `DependencyGraph::new()` constructor.
235+
fn load_and_parse_dependencies(
236+
&mut self,
237+
curr_id: usize,
238+
valid_imports: Vec<(CanonPath, Span)>,
239+
inalid_imports: &mut HashSet<CanonPath>,
240+
importer_source: &SourceFile,
241+
handler: &mut ErrorCollector,
242+
queue: &mut VecDeque<usize>,
243+
) {
244+
for (path, import_span) in valid_imports {
245+
if inalid_imports.contains(&path) {
246+
continue;
247+
}
248+
249+
if let Some(&existing_id) = self.lookup.get(&path) {
250+
let deps = self.dependencies.entry(curr_id).or_default();
251+
if !deps.contains(&existing_id) {
252+
deps.push(existing_id);
253+
}
254+
continue;
255+
}
256+
257+
let Some(module) =
258+
Self::parse_and_get_program(&path, importer_source.clone(), import_span, handler)
259+
else {
260+
inalid_imports.push(path);
261+
continue;
262+
};
263+
264+
let last_ind = self.modules.len();
265+
self.modules.push(module);
266+
267+
self.lookup.insert(path.clone(), last_ind);
268+
self.paths.push(path.clone());
269+
self.dependencies.entry(curr_id).or_default().push(last_ind);
270+
271+
queue.push_back(last_ind);
272+
}
273+
}
274+
}
275+
276+
#[cfg(test)]
277+
mod tests {
278+
use super::*;
279+
use crate::resolution::tests::canon;
280+
use crate::test_utils::TempWorkspace;
281+
282+
#[test]
283+
fn test_new_bfs_traversal_state() {
284+
// Goal: Verify that a simple chain (main -> a -> b) correctly pushes items
285+
// into the vectors and builds the adjacency list in BFS order.
286+
287+
let ws = TempWorkspace::new("bfs_state");
288+
let mut handler = ErrorCollector::new();
289+
290+
let workspace = canon(&ws.create_dir("workspace"));
291+
292+
let dir_a = canon(&ws.create_dir("workspace/a"));
293+
let dir_b = canon(&ws.create_dir("workspace/b"));
294+
295+
let main_content = "use a::mock_file::mock_item;";
296+
let a_content = "use b::mock_file::mock_item;";
297+
let b_content = "";
298+
299+
let main_file = canon(&ws.create_file("workspace/main.simf", main_content));
300+
let a_file = canon(&ws.create_file("workspace/a/mock_file.simf", a_content));
301+
let b_file = canon(&ws.create_file("workspace/b/mock_file.simf", b_content));
302+
303+
let mut map = DependencyMap::new();
304+
305+
map.insert(workspace.clone(), "a".to_string(), dir_a)
306+
.unwrap();
307+
map.insert(workspace.clone(), "b".to_string(), dir_b)
308+
.unwrap();
309+
let map = Arc::new(map);
310+
311+
let main_source = SourceFile::new(main_file.as_path(), Arc::from(main_content));
312+
let main_program_option =
313+
parse::Program::parse_from_str_with_errors(&main_source, &mut handler);
314+
315+
let Some(main_program) = main_program_option else {
316+
eprintln!("Parser Error in Test Setup: {}", handler);
317+
std::process::exit(1);
318+
};
319+
320+
// Act
321+
let graph_option =
322+
DependencyGraph::new(main_source, map, &main_program, &mut handler).unwrap();
323+
324+
let Some(graph) = graph_option else {
325+
eprintln!("DependencyGraph Error: {}", handler);
326+
std::process::exit(1);
327+
};
328+
329+
// Assert: Size checks
330+
assert_eq!(graph.modules.len(), 3);
331+
assert_eq!(graph.paths.len(), 3);
332+
333+
// Assert: Ensure BFS assigned the IDs in the exact correct order
334+
let main_id = *graph.lookup.get(&main_file).unwrap();
335+
let a_id = *graph.lookup.get(&a_file).unwrap();
336+
let b_id = *graph.lookup.get(&b_file).unwrap();
337+
338+
assert_eq!(main_id, 0);
339+
assert_eq!(a_id, 1);
340+
assert_eq!(b_id, 2);
341+
342+
// Assert: Ensure the Adjacency List (dependencies map) linked them correctly
343+
assert_eq!(
344+
*graph.dependencies.get(&main_id).unwrap(),
345+
vec![a_id],
346+
"Main depends on A"
347+
);
348+
assert_eq!(
349+
*graph.dependencies.get(&a_id).unwrap(),
350+
vec![b_id],
351+
"A depends on B"
352+
);
353+
assert!(
354+
!graph.dependencies.contains_key(&b_id),
355+
"B depends on nothing"
356+
);
357+
}
358+
}

src/error.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ impl ErrorCollector {
421421
Self { errors: Vec::new() }
422422
}
423423

424-
/// Exten existing errors with specific `RichError`.
424+
/// Extend existing errors with specific `RichError`.
425425
/// We assume that `RichError` contains `SourceFile`.
426426
pub fn push(&mut self, error: RichError) {
427427
self.errors.push(error);
@@ -437,6 +437,11 @@ impl ErrorCollector {
437437
self.errors.extend(new_errors);
438438
}
439439

440+
/// The same idea applies to the `extend()` function.
441+
pub fn extend_with_handler(&mut self, source: SourceFile, handler: &ErrorCollector) {
442+
self.extend(source, handler.errors.iter().cloned());
443+
}
444+
440445
pub fn get(&self) -> &[RichError] {
441446
&self.errors
442447
}

0 commit comments

Comments
 (0)