@@ -73,7 +73,7 @@ def save_element(self, e, node=None):
7373 fmt = e .format ()
7474
7575 if not node :
76- curr_node = URIRef (odmlns + str (e .id ))
76+ curr_node = URIRef (odmlns + unicode (e .id ))
7777 else :
7878 curr_node = node
7979
@@ -88,6 +88,11 @@ def save_element(self, e, node=None):
8888 if isinstance (fmt , Document .__class__ ):
8989 self .g .add ((self .hub_root , odmlns .hasDocument , curr_node ))
9090
91+ # If available add the documents filename to the document node
92+ # so we can identify where the data came from.
93+ if hasattr (e , "_origin_file_name" ):
94+ self .g .add ((curr_node , odmlns .hasFileName , Literal (e ._origin_file_name )))
95+
9196 for k in fmt .rdf_map_keys :
9297 if k == 'id' :
9398 continue
@@ -101,7 +106,7 @@ def save_element(self, e, node=None):
101106 self .g .add ((curr_node , fmt .rdf_map (k ), terminology_node ))
102107 else :
103108 # adding terminology to the hub and to link with the doc
104- node = URIRef (odmlns + str (uuid .uuid4 ()))
109+ node = URIRef (odmlns + unicode (uuid .uuid4 ()))
105110 self .g .add ((node , RDF .type , URIRef (terminology_url )))
106111 self .g .add ((self .hub_root , odmlns .hasTerminology , node ))
107112 self .g .add ((curr_node , fmt .rdf_map (k ), node ))
@@ -111,20 +116,20 @@ def save_element(self, e, node=None):
111116 k == 'sections' and len (getattr (e , k )) > 0 :
112117 sections = getattr (e , k )
113118 for s in sections :
114- node = URIRef (odmlns + str (s .id ))
119+ node = URIRef (odmlns + unicode (s .id ))
115120 self .g .add ((curr_node , fmt .rdf_map (k ), node ))
116121 self .save_element (s , node )
117122 elif isinstance (fmt , Section .__class__ ) and \
118123 k == 'properties' and len (getattr (e , k )) > 0 :
119124 properties = getattr (e , k )
120125 for p in properties :
121- node = URIRef (odmlns + str (p .id ))
126+ node = URIRef (odmlns + unicode (p .id ))
122127 self .g .add ((curr_node , fmt .rdf_map (k ), node ))
123128 self .save_element (p , node )
124129 elif isinstance (fmt , Property .__class__ ) and \
125130 k == 'value' and len (getattr (e , k )) > 0 :
126131 values = getattr (e , k )
127- seq = URIRef (odmlns + str (uuid .uuid4 ()))
132+ seq = URIRef (odmlns + unicode (uuid .uuid4 ()))
128133 self .g .add ((seq , RDF .type , RDF .Seq ))
129134 self .g .add ((curr_node , fmt .rdf_map (k ), seq ))
130135 # rdflib so far does not respect RDF:li item order
@@ -133,15 +138,15 @@ def save_element(self, e, node=None):
133138 # this should be reversed to RDF:li again!
134139 # see https://github.com/RDFLib/rdflib/issues/280
135140 # -- keep until supported
136- # bag = URIRef(odmlns + str (uuid.uuid4()))
141+ # bag = URIRef(odmlns + unicode (uuid.uuid4()))
137142 # self.g.add((bag, RDF.type, RDF.Bag))
138143 # self.g.add((curr_node, fmt.rdf_map(k), bag))
139144 # for v in values:
140145 # self.g.add((bag, RDF.li, Literal(v)))
141146
142147 counter = 1
143148 for v in values :
144- pred = "%s_%s" % (str (RDF ), counter )
149+ pred = "%s_%s" % (unicode (RDF ), counter )
145150 self .g .add ((seq , URIRef (pred ), Literal (v )))
146151 counter = counter + 1
147152
@@ -222,7 +227,11 @@ def to_odml(self):
222227
223228 def from_file (self , filename , doc_format ):
224229 self .g = Graph ().parse (source = filename , format = doc_format )
225- return self .to_odml ()
230+ docs = self .to_odml ()
231+ for d in docs :
232+ # Provide original file name via the document
233+ d ._origin_file_name = os .path .basename (filename )
234+ return docs
226235
227236 def from_string (self , file , doc_format ):
228237 self .g = Graph ().parse (source = StringIO (file ), format = doc_format )
@@ -242,7 +251,7 @@ def parse_document(self, doc_uri):
242251 doc_attrs [attr [0 ]] = doc_uri .split ("#" , 1 )[1 ]
243252 else :
244253 if len (elems ) > 0 :
245- doc_attrs [attr [0 ]] = str (elems [0 ].toPython ())
254+ doc_attrs [attr [0 ]] = unicode (elems [0 ].toPython ())
246255
247256 return {'Document' : doc_attrs , 'odml-version' : FORMAT_VERSION }
248257
@@ -264,7 +273,7 @@ def parse_section(self, sec_uri):
264273 sec_attrs [attr [0 ]] = sec_uri .split ("#" , 1 )[1 ]
265274 else :
266275 if len (elems ) > 0 :
267- sec_attrs [attr [0 ]] = str (elems [0 ].toPython ())
276+ sec_attrs [attr [0 ]] = unicode (elems [0 ].toPython ())
268277 self ._check_mandatory_attrs (sec_attrs )
269278 return sec_attrs
270279
@@ -293,7 +302,7 @@ def parse_property(self, prop_uri):
293302 prop_attrs [attr [0 ]] = prop_uri .split ("#" , 1 )[1 ]
294303 else :
295304 if len (elems ) > 0 :
296- prop_attrs [attr [0 ]] = str (elems [0 ].toPython ())
305+ prop_attrs [attr [0 ]] = unicode (elems [0 ].toPython ())
297306 self ._check_mandatory_attrs (prop_attrs )
298307 return prop_attrs
299308
0 commit comments