@@ -14,7 +14,6 @@ use crate::{
1414use clru:: CLruCache ;
1515use rayon:: prelude:: * ;
1616use smartstring:: alias:: String as SmartString ;
17- use std:: io:: prelude:: * ;
1817use std:: ops:: Bound :: Included ;
1918use std:: path:: { Path , PathBuf } ;
2019use std:: string:: ToString ;
@@ -23,6 +22,7 @@ use std::{
2322 sync:: { Arc , Mutex } ,
2423} ;
2524use std:: { collections:: BTreeMap , num:: NonZeroUsize } ;
25+ use std:: { collections:: BTreeSet , io:: prelude:: * } ;
2626use update:: { GraphUpdate , UpdateEvent } ;
2727
2828pub const ANNIS_NS : & str = "annis" ;
@@ -113,6 +113,66 @@ fn component_path<CT: ComponentType>(
113113 }
114114}
115115
116+ /// List all the components that belong to corpus in the given directory.
117+ pub fn find_components_from_disk < CT : ComponentType , P : AsRef < Path > > (
118+ location : P ,
119+ ) -> Result < BTreeSet < Component < CT > > > {
120+ let mut result = BTreeSet :: new ( ) ;
121+ // for all component types
122+ for c in CT :: all_component_types ( ) . into_iter ( ) {
123+ let cpath = PathBuf :: from ( location. as_ref ( ) )
124+ . join ( "gs" )
125+ . join ( c. to_string ( ) ) ;
126+
127+ if cpath. is_dir ( ) {
128+ // get all the namespaces/layers
129+ for layer in cpath. read_dir ( ) ? {
130+ let layer = layer?;
131+ if layer. path ( ) . is_dir ( ) {
132+ // try to load the component with the empty name
133+ let layer_file_name = layer. file_name ( ) ;
134+ let layer_name_from_file = layer_file_name. to_string_lossy ( ) ;
135+ let layer_name: SmartString = if layer_name_from_file == DEFAULT_EMPTY_LAYER {
136+ SmartString :: default ( )
137+ } else {
138+ layer_name_from_file. into ( )
139+ } ;
140+ let empty_name_component =
141+ Component :: new ( c. clone ( ) , layer_name. clone ( ) , SmartString :: default ( ) ) ;
142+ {
143+ let cfg_file = PathBuf :: from ( location. as_ref ( ) )
144+ . join ( component_to_relative_path ( & empty_name_component) )
145+ . join ( "impl.cfg" ) ;
146+
147+ if cfg_file. is_file ( ) {
148+ result. insert ( empty_name_component. clone ( ) ) ;
149+ debug ! ( "Registered component {}" , empty_name_component) ;
150+ }
151+ }
152+ // also load all named components
153+ for name in layer. path ( ) . read_dir ( ) ? {
154+ let name = name?;
155+ let named_component = Component :: new (
156+ c. clone ( ) ,
157+ layer_name. clone ( ) ,
158+ name. file_name ( ) . to_string_lossy ( ) . into ( ) ,
159+ ) ;
160+ let cfg_file = PathBuf :: from ( location. as_ref ( ) )
161+ . join ( component_to_relative_path ( & named_component) )
162+ . join ( "impl.cfg" ) ;
163+
164+ if cfg_file. is_file ( ) {
165+ result. insert ( named_component. clone ( ) ) ;
166+ debug ! ( "Registered component {}" , named_component) ;
167+ }
168+ }
169+ }
170+ }
171+ }
172+ } // end for all components
173+ Ok ( result)
174+ }
175+
116176impl < CT : ComponentType > Graph < CT > {
117177 /// Create a new and empty instance without any location on the disk.
118178 pub fn new ( disk_based : bool ) -> Result < Self > {
@@ -233,7 +293,10 @@ impl<CT: ComponentType> Graph<CT> {
233293
234294 let logfile_exists = log_path. exists ( ) && log_path. is_file ( ) ;
235295
236- self . find_components_from_disk ( & dir2load) ?;
296+ self . components = find_components_from_disk ( & dir2load) ?
297+ . into_iter ( )
298+ . map ( |c| ( c, None ) )
299+ . collect ( ) ;
237300
238301 // If backup is active or a write log exists, always a pre-load to get the complete corpus.
239302 if logfile_exists | load_from_backup {
@@ -287,63 +350,6 @@ impl<CT: ComponentType> Graph<CT> {
287350 Ok ( ( ) )
288351 }
289352
290- fn find_components_from_disk ( & mut self , location : & Path ) -> Result < ( ) > {
291- self . components . clear ( ) ;
292-
293- // for all component types
294- for c in CT :: all_component_types ( ) . into_iter ( ) {
295- let cpath = PathBuf :: from ( location) . join ( "gs" ) . join ( c. to_string ( ) ) ;
296-
297- if cpath. is_dir ( ) {
298- // get all the namespaces/layers
299- for layer in cpath. read_dir ( ) ? {
300- let layer = layer?;
301- if layer. path ( ) . is_dir ( ) {
302- // try to load the component with the empty name
303- let layer_file_name = layer. file_name ( ) ;
304- let layer_name_from_file = layer_file_name. to_string_lossy ( ) ;
305- let layer_name: SmartString = if layer_name_from_file == DEFAULT_EMPTY_LAYER
306- {
307- SmartString :: default ( )
308- } else {
309- layer_name_from_file. into ( )
310- } ;
311- let empty_name_component =
312- Component :: new ( c. clone ( ) , layer_name. clone ( ) , SmartString :: default ( ) ) ;
313- {
314- let cfg_file = PathBuf :: from ( location)
315- . join ( component_to_relative_path ( & empty_name_component) )
316- . join ( "impl.cfg" ) ;
317-
318- if cfg_file. is_file ( ) {
319- self . components . insert ( empty_name_component. clone ( ) , None ) ;
320- debug ! ( "Registered component {}" , empty_name_component) ;
321- }
322- }
323- // also load all named components
324- for name in layer. path ( ) . read_dir ( ) ? {
325- let name = name?;
326- let named_component = Component :: new (
327- c. clone ( ) ,
328- layer_name. clone ( ) ,
329- name. file_name ( ) . to_string_lossy ( ) . into ( ) ,
330- ) ;
331- let cfg_file = PathBuf :: from ( location)
332- . join ( component_to_relative_path ( & named_component) )
333- . join ( "impl.cfg" ) ;
334-
335- if cfg_file. is_file ( ) {
336- self . components . insert ( named_component. clone ( ) , None ) ;
337- debug ! ( "Registered component {}" , named_component) ;
338- }
339- }
340- }
341- }
342- }
343- } // end for all components
344- Ok ( ( ) )
345- }
346-
347353 fn internal_save ( & self , location : & Path ) -> Result < ( ) > {
348354 let location = PathBuf :: from ( location) ;
349355
@@ -376,7 +382,7 @@ impl<CT: ComponentType> Graph<CT> {
376382
377383 fn get_cached_node_id_from_name (
378384 & self ,
379- node_name : Cow < String > ,
385+ node_name : Cow < str > ,
380386 cache : & mut CLruCache < String , Option < NodeID > > ,
381387 ) -> Result < Option < NodeID > > {
382388 if let Some ( id) = cache. get ( node_name. as_ref ( ) ) {
0 commit comments