-
Notifications
You must be signed in to change notification settings - Fork 2k
Expand file tree
/
Copy pathsimple.rs
More file actions
196 lines (179 loc) · 7.17 KB
/
simple.rs
File metadata and controls
196 lines (179 loc) · 7.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
use crate::{file_paths, trap};
use globset::{GlobBuilder, GlobSetBuilder};
use rayon::prelude::*;
use std::fs::File;
use std::io::BufRead;
use std::path::{Path, PathBuf};
use crate::diagnostics;
use crate::node_types;
pub struct LanguageSpec {
pub prefix: &'static str,
pub ts_language: tree_sitter::Language,
pub node_types: &'static str,
pub file_globs: Vec<String>,
}
pub struct Extractor {
pub prefix: String,
pub languages: Vec<LanguageSpec>,
pub trap_dir: PathBuf,
pub source_archive_dir: PathBuf,
pub file_lists: Vec<PathBuf>,
// Typically constructed via `trap::Compression::from_env`.
// This allow us to report the error using our diagnostics system
// without exposing it to consumers.
pub trap_compression: Result<trap::Compression, String>,
}
impl Extractor {
pub fn run(&self) -> std::io::Result<()> {
tracing::info!("Extraction started");
let diagnostics = diagnostics::DiagnosticLoggers::new(&self.prefix);
let mut main_thread_logger = diagnostics.logger();
let num_threads = match crate::options::num_threads() {
Ok(num) => num,
Err(e) => {
main_thread_logger.write(
main_thread_logger
.new_entry("configuration-error", "Configuration error")
.message(
"{}; defaulting to 1 thread.",
&[diagnostics::MessageArg::Code(&e)],
)
.severity(diagnostics::Severity::Warning),
);
1
}
};
tracing::info!(
"Using {} {}",
num_threads,
if num_threads == 1 {
"thread"
} else {
"threads"
}
);
let trap_compression = match &self.trap_compression {
Ok(x) => *x,
Err(e) => {
main_thread_logger.write(
main_thread_logger
.new_entry("configuration-error", "Configuration error")
.message("{}; using gzip.", &[diagnostics::MessageArg::Code(e)])
.severity(diagnostics::Severity::Warning),
);
trap::Compression::Gzip
}
};
drop(main_thread_logger);
rayon::ThreadPoolBuilder::new()
.num_threads(num_threads)
.build_global()
.unwrap();
let file_lists: Vec<File> = self
.file_lists
.iter()
.map(|file_list| {
File::open(file_list)
.unwrap_or_else(|_| panic!("Unable to open file list at {file_list:?}"))
})
.collect();
let mut schemas = vec![];
for lang in &self.languages {
let schema = node_types::read_node_types_str(lang.prefix, lang.node_types)?;
schemas.push(schema);
}
// Construct a single globset containing all language globs,
// and a mapping from glob index to language index.
let (globset, glob_language_mapping) = {
let mut builder = GlobSetBuilder::new();
let mut glob_lang_mapping = vec![];
for (i, lang) in self.languages.iter().enumerate() {
for glob_str in &lang.file_globs {
let glob = GlobBuilder::new(glob_str)
.literal_separator(true)
.build()
.expect("invalid glob");
builder.add(glob);
glob_lang_mapping.push(i);
}
}
(
builder.build().expect("failed to build globset"),
glob_lang_mapping,
)
};
let path_transformer = file_paths::load_path_transformer()?;
let lines: std::io::Result<Vec<String>> = file_lists
.iter()
.flat_map(|file_list| std::io::BufReader::new(file_list).lines())
.collect();
let lines = lines?;
lines
.par_iter()
.try_for_each(|line| {
let mut diagnostics_writer = diagnostics.logger();
let path = PathBuf::from(line).canonicalize()?;
let src_archive_file = crate::file_paths::path_for(
&self.source_archive_dir,
&path,
"",
path_transformer.as_ref(),
);
let source = std::fs::read(&path)?;
let mut trap_writer = trap::Writer::new();
match path.file_name() {
None => {
tracing::error!(?path, "No file name found, skipping file.");
}
Some(filename) => {
let matches = globset.matches(filename);
if matches.is_empty() {
tracing::error!(?path, "No matching language found, skipping file.");
} else {
let mut languages_processed = vec![false; self.languages.len()];
for m in matches {
let i = glob_language_mapping[m];
if languages_processed[i] {
continue;
}
languages_processed[i] = true;
let lang = &self.languages[i];
crate::extractor::extract(
&lang.ts_language,
lang.prefix,
&schemas[i],
&mut diagnostics_writer,
&mut trap_writer,
None,
&path,
&source,
&[],
);
std::fs::create_dir_all(src_archive_file.parent().unwrap())?;
std::fs::copy(&path, &src_archive_file)?;
write_trap(&self.trap_dir, &path, &trap_writer, trap_compression)?;
}
}
}
}
Ok(()) as std::io::Result<()>
})
.expect("failed to extract files");
let path = PathBuf::from("extras");
let mut trap_writer = trap::Writer::new();
crate::extractor::populate_empty_location(&mut trap_writer);
let res = write_trap(&self.trap_dir, &path, &trap_writer, trap_compression);
tracing::info!("Extraction complete");
res
}
}
fn write_trap(
trap_dir: &Path,
path: &Path,
trap_writer: &trap::Writer,
trap_compression: trap::Compression,
) -> std::io::Result<()> {
let trap_file = crate::file_paths::path_for(trap_dir, path, trap_compression.extension(), None);
std::fs::create_dir_all(trap_file.parent().unwrap())?;
trap_writer.write_to_file(&trap_file, trap_compression)
}