Skip to content

Commit 024cc85

Browse files
author
Thomas Hanke
committed
redesign, now is quering all types in both documents and the cretad widgets. also singe html page ui
1 parent 102642d commit 024cc85

6 files changed

Lines changed: 847 additions & 130 deletions

File tree

Dockerfile

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,6 @@ RUN pip install --no-cache-dir --upgrade -r requirements.txt
1414

1515
ADD . /src
1616
WORKDIR /src
17-
# get ontologies
18-
RUN curl https://raw.githubusercontent.com/Mat-O-Lab/MSEO/main/MSEO_mid.ttl > ./ontologies/mseo.ttl
19-
RUN curl https://raw.githubusercontent.com/CommonCoreOntology/CommonCoreOntologies/master/cco-merged/MergedAllCoreOntology-v1.3-2021-03-01.ttl > ./ontologies/cco.ttl
20-
RUN curl https://raw.githubusercontent.com/iofoundry/ontology/master/core/Core.rdf > ./ontologies/iof.rdf
2117
ENV PYTHONDONTWRITEBYTECODE 1
2218
# Turns off buffering for easier container logging
2319
ENV PYTHONUNBUFFERED 1

app.py

Lines changed: 165 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,33 @@
2626

2727
setting = settings.Setting()
2828

29+
# Namespace prefixes for abbreviating IRIs
30+
NAMESPACE_PREFIXES = {
31+
'http://www.w3.org/ns/csvw#': 'csvw',
32+
'http://www.w3.org/ns/oa#': 'oa',
33+
'http://www.w3.org/ns/prov#': 'prov',
34+
'http://www.w3.org/2002/07/owl#': 'owl',
35+
'http://www.w3.org/1999/02/22-rdf-syntax-ns#': 'rdf',
36+
'http://www.w3.org/2000/01/rdf-schema#': 'rdfs',
37+
'http://purl.obolibrary.org/obo/': 'obo',
38+
'https://spec.industrialontologies.org/ontology/core/Core/': 'iof',
39+
'http://qudt.org/schema/qudt/': 'qudt',
40+
'https://purl.matolab.org/mseo/mid/': 'mseo'
41+
}
42+
43+
def abbreviate_iri(iri):
44+
"""Abbreviate IRI using known namespace prefixes."""
45+
if not iri:
46+
return None
47+
for namespace, prefix in NAMESPACE_PREFIXES.items():
48+
if iri.startswith(namespace):
49+
return prefix + '#' + iri[len(namespace):]
50+
# If no prefix found, show last part after # or /
51+
parts = iri.split('/')
52+
if '#' in parts[-1]:
53+
parts = iri.split('#')
54+
return parts[-1]
55+
2956

3057
config_name = os.environ.get("APP_MODE") or "development"
3158
middleware = [
@@ -75,16 +102,6 @@
75102
app.mount("/static/", StaticFiles(directory="static", html=True), name="static")
76103
templates = Jinja2Templates(directory="templates")
77104

78-
print(os.environ.get("APP_MODE", "production"))
79-
if os.environ.get("APP_MODE", "production") == "development":
80-
print("fetching methods form MSEO repo")
81-
app.methods_dict = {
82-
"DIN_EN_ISO_527": "https://github.com/Mat-O-Lab/MSEO/raw/main/methods/DIN_EN_ISO_527-3.drawio.ttl"
83-
}
84-
else:
85-
app.methods_dict = maptomethod.get_methods()
86-
87-
88105
# flash integration flike flask flash
89106
def flash(request: Request, message: Any, category: str = "info") -> None:
90107
if "_messages" not in request.session:
@@ -102,7 +119,6 @@ def get_flashed_messages(request: Request):
102119
@app.get("/", response_class=HTMLResponse, include_in_schema=False)
103120
async def index(request: Request):
104121
start_form = await forms.StartForm.from_formdata(request)
105-
start_form.method_sel.choices = [(v, k) for k, v in app.methods_dict.items()]
106122
# request.session.clear()
107123
return templates.TemplateResponse(
108124
"index.html",
@@ -128,7 +144,6 @@ async def create_mapper(request: Request):
128144
start_form = await forms.StartForm.from_formdata(request)
129145
logging.debug(start_form.data)
130146

131-
start_form.method_sel.choices = [(v, k) for k, v in app.methods_dict.items()]
132147
mapping_form = ""
133148
logging.info("create mapping")
134149
if await start_form.validate_on_submit():
@@ -142,55 +157,86 @@ async def create_mapper(request: Request):
142157
data_url = start_form.data_url.data
143158
request.session["data_url"] = data_url
144159

145-
# if url to method graph provided use it if not use select widget
160+
# Use method_url from form or use placeholder
146161
if start_form.method_url.data:
147162
method_url = start_form.method_url.data
148163
else:
149-
method_url = start_form.method_sel.data
164+
method_url = start_form.method_url.render_kw["placeholder"]
165+
flash(
166+
request,
167+
"URL Method File empty: using placeholder value for demonstration",
168+
"info",
169+
)
150170
request.session["method_url"] = method_url
151171
request.session["use_template_rowwise"] = start_form.use_template_rowwise.data
152-
# entrys from advanced form
153-
mapping_subject_class_uris = (
154-
start_form.advanced.data_subject_super_class_uris.data
155-
)
156-
request.session["mapping_subject_class_uris"] = mapping_subject_class_uris
172+
# entrys from advanced form - parse comma-separated values
173+
data_subject_types_str = start_form.advanced.data_subject_types.data or ""
174+
mapping_subject_types = [
175+
uri.strip() for uri in data_subject_types_str.split(",") if uri.strip()
176+
]
177+
request.session["mapping_subject_types"] = mapping_subject_types
178+
157179
mapping_predicate_uri = start_form.advanced.mapping_predicate_uri.data
158180
request.session["mapping_predicate_uri"] = mapping_predicate_uri
159-
mapping_object_class_uris = (
160-
start_form.advanced.method_object_super_class_uris.data
161-
)
162-
request.session["mapping_object_class_uris"] = mapping_object_class_uris
181+
182+
method_object_types_str = start_form.advanced.method_object_types.data or ""
183+
mapping_object_types = [
184+
uri.strip() for uri in method_object_types_str.split(",") if uri.strip()
185+
]
186+
request.session["mapping_object_types"] = mapping_object_types
163187

164188
try:
165189
mapper = maptomethod.Mapper(
166190
data_url=data_url,
167191
method_url=method_url,
168192
use_template_rowwise=request.session["use_template_rowwise"],
169193
mapping_predicate_uri=URIRef(mapping_predicate_uri),
170-
data_subject_super_class_uris=[
171-
URIRef(uri) for uri in mapping_subject_class_uris
194+
data_subject_types=[
195+
URIRef(uri) for uri in mapping_subject_types
172196
],
173-
method_object_super_class_uris=[
174-
URIRef(uri) for uri in mapping_object_class_uris
197+
method_object_types=[
198+
URIRef(uri) for uri in mapping_object_types
175199
],
176200
authorization=authorization,
177201
)
202+
# Store subjects and objects in session for later use in /map
203+
request.session["subjects"] = mapper.subjects
204+
request.session["objects"] = mapper.objects
178205
flash(request, str(mapper), "info")
179206
# flash(request, str(mapper.subjects), "info")
180207
except Exception as err:
181208
flash(request, str(err), "error")
182209
# print(mapper.objects.keys())
183210
# only named instances in the data can be mapped
184211
else:
185-
info_choices = [
186-
(id, value["text"])
187-
for id, value in mapper.subjects.items()
188-
if "text" in value.keys()
189-
]
212+
# Create choices with type information
213+
info_choices = []
214+
for id, value in mapper.subjects.items():
215+
if "text" in value.keys():
216+
text = value["text"]
217+
entity_type = value.get("type")
218+
if entity_type:
219+
type_abbrev = abbreviate_iri(entity_type)
220+
choice_text = f"{text} ({type_abbrev})"
221+
else:
222+
choice_text = text
223+
info_choices.append((id, choice_text))
190224
info_choices.insert(0, (None, "None"))
191-
select_forms = forms.get_select_entries(mapper.objects.keys(), info_choices)
225+
226+
# Pass both objects dict and mapper.objects for type info
227+
select_forms = forms.get_select_entries(
228+
mapper.objects,
229+
info_choices,
230+
abbreviate_iri
231+
)
192232
mapping_form = await forms.MappingFormList.from_formdata(request)
193233
mapping_form.assignments.entries = select_forms
234+
235+
# Populate form fields from session to ensure badges persist
236+
start_form.advanced.data_subject_types.data = ",".join(mapping_subject_types)
237+
start_form.advanced.method_object_types.data = ",".join(mapping_object_types)
238+
start_form.advanced.mapping_predicate_uri.data = mapping_predicate_uri
239+
194240
request.session["auth"] = authorization
195241
logging.debug("session: {}".format(request.session))
196242

@@ -220,17 +266,32 @@ async def map(request: Request):
220266
formdata = await request.form()
221267
data_url = request.session.get("data_url", None)
222268
method_url = request.session.get("method_url", None)
223-
method_sel = request.session.get("method_url", None)
224-
subjects = request.session.get("subjects", None)
225-
objects = request.session.get("objects", None)
226269
use_template_rowwise = request.session.get("use_template_rowwise", False)
227-
mapping_subject_class_uris = request.session.get("mapping_subject_class_uris", None)
270+
mapping_subject_types = request.session.get("mapping_subject_types", [])
228271
mapping_predicate_uri = request.session.get("mapping_predicate_uri", None)
229-
mapping_object_class_uris = request.session.get("mapping_object_class_uris", None)
272+
mapping_object_types = request.session.get("mapping_object_types", [])
273+
274+
# Re-query entities to ensure we have complete data (including "property" field)
275+
# instead of relying on potentially incomplete session storage
276+
subjects, _ = maptomethod.query_entities(
277+
data_url,
278+
[URIRef(uri) for uri in mapping_subject_types],
279+
authorization
280+
)
281+
objects, _ = maptomethod.query_entities(
282+
method_url,
283+
[URIRef(uri) for uri in mapping_object_types],
284+
authorization
285+
)
286+
287+
# Create form and populate with session data
230288
start_form = forms.StartForm(
231-
request, data_url=data_url, method_url=method_url, method_sel=method_sel
289+
request, data_url=data_url, method_url=method_url
232290
)
233-
start_form.method_sel.choices = [(v, k) for k, v in app.methods_dict.items()]
291+
# Populate advanced fields with session data
292+
start_form.advanced.data_subject_types.data = ",".join(mapping_subject_types)
293+
start_form.advanced.method_object_types.data = ",".join(mapping_object_types)
294+
start_form.advanced.mapping_predicate_uri.data = mapping_predicate_uri
234295
# entrys from advanced form
235296

236297
result = ""
@@ -240,19 +301,21 @@ async def map(request: Request):
240301
select_dict = dict(formdata)
241302
maplist = [(k, v) for k, v in select_dict.items() if v != "None"]
242303
logging.info("Creating mapping file for mapping list: {}".format(maplist))
304+
logging.info("Session mapping_subject_types: {}".format(mapping_subject_types))
305+
logging.info("Session mapping_object_types: {}".format(mapping_object_types))
306+
logging.info("Re-queried subjects: {}".format(subjects))
307+
logging.info("Re-queried objects: {}".format(objects))
243308
request.session["maplist"] = maplist
244-
logging.debug("subjects: {}".format(subjects))
245-
logging.debug("objects: {}".format(objects))
246309
with maptomethod.Mapper(
247310
data_url=data_url,
248311
method_url=method_url,
249312
use_template_rowwise=use_template_rowwise,
250313
mapping_predicate_uri=URIRef(mapping_predicate_uri),
251-
data_subject_super_class_uris=[
252-
URIRef(uri) for uri in mapping_subject_class_uris
314+
data_subject_types=[
315+
URIRef(uri) for uri in mapping_subject_types
253316
],
254-
method_object_super_class_uris=[
255-
URIRef(uri) for uri in mapping_object_class_uris
317+
method_object_types=[
318+
URIRef(uri) for uri in mapping_object_types
256319
],
257320
maplist=maplist,
258321
subjects=subjects,
@@ -302,43 +365,76 @@ class Config:
302365
}
303366

304367

305-
@app.post("/api/entities")
306-
def query_entities(request: QueryRequest, req: Request):
307-
authorization = req.headers.get("Authorization", None)
308-
# translate urls in entity_classes list to URIRef objects
309-
request.entity_classes = [URIRef(str(url)) for url in request.entity_classes]
310-
return maptomethod.query_entities(
311-
str(request.url), request.entity_classes, authorization
312-
)
368+
@app.get("/api/types")
369+
def get_types(
370+
url: str = "https://github.com/Mat-O-Lab/CSVToCSVW/raw/main/examples/example-metadata.json",
371+
req: Request = None
372+
):
373+
"""Get all unique rdf:type values from a semantic document.
374+
375+
Args:
376+
url: URL to the semantic document (defaults to example data file)
377+
378+
Returns:
379+
JSON array of type IRIs
380+
"""
381+
authorization = req.headers.get("Authorization", None) if req else None
382+
try:
383+
types = maptomethod.get_all_types(url, authorization)
384+
return types
385+
except Exception as err:
386+
raise HTTPException(status_code=500, detail=str(err))
387+
388+
389+
@app.get("/api/entities")
390+
def query_entities(
391+
url: str = "https://github.com/Mat-O-Lab/CSVToCSVW/raw/main/examples/example-metadata.json",
392+
types: str = "http://www.w3.org/ns/oa#Annotation,http://www.w3.org/ns/csvw#Column",
393+
req: Request = None
394+
):
395+
"""Get entities of specified types from a semantic document.
396+
397+
Args:
398+
url: URL to the semantic document (defaults to example data file)
399+
types: Comma-separated list of type URIs (defaults to Annotation and Column)
400+
401+
Returns:
402+
JSON dict of entities with their metadata
403+
"""
404+
authorization = req.headers.get("Authorization", None) if req else None
405+
# Parse comma-separated types and convert to URIRef objects
406+
type_list = [URIRef(uri.strip()) for uri in types.split(",") if uri.strip()]
407+
entities, base_ns = maptomethod.query_entities(url, type_list, authorization)
408+
return {"entities": entities, "base_namespace": base_ns}
313409

314410

315411
class MappingRequest(BaseModel):
316412
data_url: AnyUrl = Field(
317413
"", title="Datas Graph Url", description="Url to data metadata to use."
318414
)
319-
method_url: AnyUrl = Field(
320-
"", title="Method Graph Url", description="Url to knowledge graph to use."
415+
template_url: AnyUrl = Field(
416+
"", title="Template Graph Url", description="Url to knowledge graph to use."
321417
)
322418
use_template_rowwise: Optional[bool] = Field(
323419
False,
324420
title="Use Template Rowwise",
325-
description="If to duplicate the Method Graph for each row.",
421+
description="If to duplicate the Template Graph for each row.",
326422
omit_default=True,
327423
)
328-
data_super_classes: List = Field(
424+
data_types: List = Field(
329425
[maptomethod.OA.Annotation, maptomethod.CSVW.Column],
330-
title="Subject Super Classes",
331-
description="List of subject super classes to query for mapping partners in data.",
426+
title="Data Subject Types",
427+
description="List of entity types to query for mapping partners in data.",
332428
)
333429
predicate: AnyUrl = Field(
334430
maptomethod.ContentToBearingRelation,
335431
title="predicate property",
336-
description="Predicate Property to connect data to method entities.",
432+
description="Predicate Property to connect data to template entities.",
337433
)
338-
method_super_classes: List = Field(
434+
template_types: List = Field(
339435
[maptomethod.InformtionContentEntity, maptomethod.TemporalRegionClass],
340-
title="Object Super Classes",
341-
description="List of object super classes to query for mapping partners in method graph.",
436+
title="Template Object Types",
437+
description="List of entity types to query for mapping partners in template graph.",
342438
)
343439
map: dict = Field(
344440
title="Map Dict",
@@ -380,14 +476,14 @@ def mapping(request: MappingRequest, req: Request) -> StreamingResponse:
380476
try:
381477
result = maptomethod.Mapper(
382478
str(request.data_url),
383-
str(request.method_url),
384-
method_object_super_class_uris=[
385-
URIRef(str(uri)) for uri in request.method_super_classes
479+
str(request.template_url),
480+
method_object_types=[
481+
URIRef(str(uri)) for uri in request.template_types
386482
],
387483
mapping_predicate_uri=URIRef(str(request.predicate)),
388484
use_template_rowwise=request.use_template_rowwise,
389-
data_subject_super_class_uris=[
390-
URIRef(str(uri)) for uri in request.data_super_classes
485+
data_subject_types=[
486+
URIRef(str(uri)) for uri in request.data_types
391487
],
392488
maplist=request.map.items(),
393489
authorization=authorization,

0 commit comments

Comments
 (0)