From 8ae6b4590d0b541833cd6d2c7fecb94f7149c2b6 Mon Sep 17 00:00:00 2001 From: yelenacox Date: Thu, 18 Jun 2026 12:42:32 -0500 Subject: [PATCH 1/9] search-dragon script to expand enums --- src/tweaver/__init__.py | 0 src/tweaver/weaver.py | 170 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 161 insertions(+), 9 deletions(-) create mode 100644 src/tweaver/__init__.py diff --git a/src/tweaver/__init__.py b/src/tweaver/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tweaver/weaver.py b/src/tweaver/weaver.py index 976ea03..a05cf2a 100644 --- a/src/tweaver/weaver.py +++ b/src/tweaver/weaver.py @@ -1,6 +1,12 @@ +import argparse +import csv +import io import logging +import subprocess import sys -from argparse import ArgumentParser # , FileType +from pathlib import Path + +import yaml # Rich Logging if rich is installed if sys.stderr.isatty(): @@ -35,10 +41,136 @@ def init_logging(loglevel: str | None = None): ) +def parsed_csv(csv_text: str) -> dict: + """Parse dragon_search CSV output into permissible_values object for enum yaml file.""" + reader = csv.DictReader(io.StringIO(csv_text)) + permissible_values = {} + for row in reader: + code = row["descendant_code"] + if code.lower() == "no results": + print(f"No descendants found for {row['parent_code']}") + continue + permissible_values[code] = { + "text": code, + "description": row.get("description", ""), + "title": row.get("display", ""), + } + return permissible_values + + +class IndentedDumper(yaml.Dumper): + def increase_indent(self, flow=False, indentless=False): + return super().increase_indent(flow=flow, indentless=False) + + +def expand( + local_filepath: Path | None = None, + output_filepath: Path | None = None, + iri: str | None = None, +): + """Extract Enums from a monolithic LinkML model into individual YAML files + Args: + local_filepath: The file containing the monolithic linkml model + output_filepath: The directory where the enum YAMLs are to be written + iri: Optional iri if a specific iri is desired other than the iri derived programattically + Returns: + list of enum names + """ + if output_filepath is None: + output_filepath = Path("output") + + output_filepath.mkdir(parents=True, exist_ok=True) + enum_count = 0 + expanded_count = 0 + for enum_file in local_filepath.glob("Enum*.yaml"): + raw_enum = enum_file.read_text() + parsed = yaml.safe_load(raw_enum) + + enums = parsed.get("enums", {}) + for name, enum in enums.items(): + expanded_enum = output_filepath / f"{name}.yaml" + + has_permissible = ( + "permissible_values" in (enum) and enum["permissible_values"] + ) + + has_reachable = enum.get("reachable_from") or {} + has_ontology = has_reachable.get("source_ontology") + has_nodes = has_reachable.get("source_nodes") + has_direct = has_reachable.get("is_direct") + + endpoint = "-c" if has_direct else "-d" + + if has_permissible or not has_ontology: + expanded_enum.write_text(raw_enum) + logging.info(f"Copied {name} (does not require expansion)") + enum_count += 1 + expanded_count += 1 + continue + + if not has_ontology: + continue + ontology = has_ontology.split(":")[1] + if not has_nodes: + continue + + all_permissible_values = {} + node_failed = False + for node in has_nodes: + cmd = [ + "dragon_search", + "-ak", + str(node), + "-o", + str(ontology), + "-f", + str(expanded_enum), + str(endpoint), + "-s", + "0", + ] + if has_reachable.get("include_self"): + cmd.append("-p") + if iri: + cmd.extend(["-i", str(iri)]) + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + ) + enum_count += 1 + if result.returncode != 0: + logging.error(f"Failed for {name}: {result.stdout}") + logging.error(f"Failed for {name}: {result.stderr}") + node_failed = True + else: + parsed_nodes = parsed_csv(expanded_enum.read_text()) + all_permissible_values.update(parsed_nodes) + logging.info(f"Expanded enumeration: {name}") + + if all_permissible_values: + parsed["enums"][name]["permissible_values"] = all_permissible_values + expanded_enum.write_text( + yaml.dump( + parsed, + Dumper=IndentedDumper, + default_flow_style=False, + sort_keys=False, + allow_unicode=True, + ) + ) + if not node_failed: + expanded_count += 1 + + if expanded_count != enum_count: + logging.error(f"{enum_count - expanded_count} failed to be expanded.") + + def exec(args: list[str] | None = None): - parser = ArgumentParser( - prog="term-weaver", - description="""Materializing Enumerations since 2026""", + + parser = argparse.ArgumentParser( + description="Expand enums from a monolithic LinkML model" ) parser.add_argument( "-log", @@ -47,12 +179,32 @@ def exec(args: list[str] | None = None): default="INFO", help="Logging level tolerated (default is INFO)", ) + parser.add_argument( + "-s", + "--source", + required=True, + type=Path, + help="The source file containing the enumerations to be expanded", + ) + parser.add_argument( + "-o", + "--output", + required=False, + type=Path, + help="The directory where expanded output YAML files will be written", + ) + parser.add_argument( + "-i", + "--iri", + required=False, + default=None, + help="Optional iri for the parent code to pull descendants.", + ) - args = parser.parse_args(args) + args = parser.parse_args() # Initialize the logger with whatever the user requested init_logging(args.log_level) - logging.info(f"You have chose to use: {args}") - logging.warn(f"Hello") - logging.error("world") - logging.debug("Goodbye") + enums = expand( + local_filepath=args.source, output_filepath=args.output, iri=args.iri + ) From 6b6c946fa01db1f94960171ac69ce04c6c7f897b Mon Sep 17 00:00:00 2001 From: yelenacox Date: Thu, 18 Jun 2026 12:43:37 -0500 Subject: [PATCH 2/9] Modifying README --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index 13ba176..8b16633 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ Materialize LinkML enumerations + ## Install For using on a local machine, it is recommended to add the dev dependencies: @@ -10,3 +11,14 @@ pip install -e ".[dev]" ``` This enables rich output which can be helpful. + +## Running the script +`tweaver -s {path/to/source/files} -o {path/to/output/directory}` + +## [LinkML properties](https://linkml.io/linkml-model/latest/docs/ReachabilityQuery/) currently supported +- source_ontology +- source_nodes +- relationship_types + - only supporting rdfs:subClassOf +- is_direct +- include_self From 581cb87af2abc8ca55f567b625138d8c7d7f672e Mon Sep 17 00:00:00 2001 From: yelenacox Date: Thu, 18 Jun 2026 13:11:22 -0500 Subject: [PATCH 3/9] Adding dependency --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 206fa97..e3df151 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,8 @@ dependencies = [ "requests", "linkml", "PyYAML", - "jinja2"] + "jinja2", + "search-dragon@git+https://github.com/NIH-NCPI/search-dragon.git@yc/fd-3693"] dynamic = ["version"] [project.optional-dependencies] From ab3c337345bdb034ec6df92053bdfa8676367d51 Mon Sep 17 00:00:00 2001 From: yelenacox Date: Thu, 18 Jun 2026 15:53:28 -0500 Subject: [PATCH 4/9] Modifying dependency --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e3df151..380a1f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ dependencies = [ "linkml", "PyYAML", "jinja2", - "search-dragon@git+https://github.com/NIH-NCPI/search-dragon.git@yc/fd-3693"] + "search-dragon@git+https://github.com/NIH-NCPI/search-dragon.git"] dynamic = ["version"] [project.optional-dependencies] From c1cfa0c3edcd7891b276050cd5fc97c200d768e5 Mon Sep 17 00:00:00 2001 From: yelenacox Date: Mon, 22 Jun 2026 14:18:15 -0500 Subject: [PATCH 5/9] Adding script to update imports property in model file --- src/tweaver/weaver.py | 51 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/src/tweaver/weaver.py b/src/tweaver/weaver.py index a05cf2a..41c6e6e 100644 --- a/src/tweaver/weaver.py +++ b/src/tweaver/weaver.py @@ -41,14 +41,15 @@ def init_logging(loglevel: str | None = None): ) -def parsed_csv(csv_text: str) -> dict: +def parsed_csv(csv_text: str, endpoint: str) -> dict: """Parse dragon_search CSV output into permissible_values object for enum yaml file.""" reader = csv.DictReader(io.StringIO(csv_text)) permissible_values = {} + argument = "children" if endpoint == "-c" else "descendants" for row in reader: code = row["descendant_code"] if code.lower() == "no results": - print(f"No descendants found for {row['parent_code']}") + print(f"No {argument} found for {row['parent_code']}") continue permissible_values[code] = { "text": code, @@ -82,12 +83,14 @@ def expand( output_filepath.mkdir(parents=True, exist_ok=True) enum_count = 0 expanded_count = 0 + enum_names = [] for enum_file in local_filepath.glob("Enum*.yaml"): raw_enum = enum_file.read_text() parsed = yaml.safe_load(raw_enum) enums = parsed.get("enums", {}) for name, enum in enums.items(): + enum_names.append(name) expanded_enum = output_filepath / f"{name}.yaml" has_permissible = ( @@ -145,7 +148,7 @@ def expand( logging.error(f"Failed for {name}: {result.stderr}") node_failed = True else: - parsed_nodes = parsed_csv(expanded_enum.read_text()) + parsed_nodes = parsed_csv(expanded_enum.read_text(), endpoint) all_permissible_values.update(parsed_nodes) logging.info(f"Expanded enumeration: {name}") @@ -165,6 +168,35 @@ def expand( if expanded_count != enum_count: logging.error(f"{enum_count - expanded_count} failed to be expanded.") + return enum_names + + +def update_imports(enum_list: list[str], model_filepath: Path): + """ + Writes the name of each enum to "imports" property in model file. + + Opens file containing the master LinkML model and gets the data under the 'imports' key. + Appends the name of each extracted enumeration to any imports that may already exist, if it is not already there. + Writes the file with enum updates to the same filepath. + """ + with model_filepath.open() as imports: + imports_parsed = yaml.safe_load(imports) + + existing_imports = imports_parsed.get("imports", []) + updated_imports = existing_imports + [ + n for n in enum_list if n not in existing_imports + ] + imports_parsed["imports"] = updated_imports + + with model_filepath.open("w") as f: + yaml.dump( + imports_parsed, + f, + sort_keys=False, + Dumper=IndentedDumper, + indent=2, + default_flow_style=False, + ) def exec(args: list[str] | None = None): @@ -186,6 +218,13 @@ def exec(args: list[str] | None = None): type=Path, help="The source file containing the enumerations to be expanded", ) + parser.add_argument( + "-m", + "--model", + required=False, + type=Path, + help="The path of the model YAML file", + ) parser.add_argument( "-o", "--output", @@ -206,5 +245,9 @@ def exec(args: list[str] | None = None): init_logging(args.log_level) enums = expand( - local_filepath=args.source, output_filepath=args.output, iri=args.iri + local_filepath=args.source, + output_filepath=args.output, + iri=args.iri, ) + + update_imports(enum_list=enums, model_filepath=args.model) From f511739adc93c514baa0fffd386933d3f0debc1e Mon Sep 17 00:00:00 2001 From: yelenacox Date: Mon, 22 Jun 2026 16:01:23 -0500 Subject: [PATCH 6/9] Modifying version --- pyproject.toml | 2 +- src/tweaver/__init__.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index be226f4..0ef0a04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ dev = [ ] [tool.setuptools_scm] -version_file = "src/tweaver/_version.py" +#version_file = "src/tweaver/_version.py" [tool.setuptools] include-package-data = true diff --git a/src/tweaver/__init__.py b/src/tweaver/__init__.py index e69de29..5633ae7 100644 --- a/src/tweaver/__init__.py +++ b/src/tweaver/__init__.py @@ -0,0 +1,7 @@ +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("tweaver") +except PackageNotFoundError: + # package is not installed + __version__ = "0.0.0" From f02ad0b231cbe011953139ab64b3fc696ad1e8bf Mon Sep 17 00:00:00 2001 From: yelenacox Date: Mon, 22 Jun 2026 16:04:58 -0500 Subject: [PATCH 7/9] Modifying required python version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0ef0a04..5f49a12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" name = "term-weaver" description = "Enumeration Materialization" readme = "README.md" -requires-python = ">=3.10" +requires-python = ">=3.12" classifiers = [ "Programming Language :: Python :: 3", ] From 563ec62cb7198cde5ef10c004231f9d6306f6487 Mon Sep 17 00:00:00 2001 From: Eric Torstenson Date: Mon, 22 Jun 2026 16:34:55 -0500 Subject: [PATCH 8/9] The version() expects the project name defined inside the pyproject.toml file --- src/tweaver/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tweaver/__init__.py b/src/tweaver/__init__.py index 5633ae7..d1aa6dc 100644 --- a/src/tweaver/__init__.py +++ b/src/tweaver/__init__.py @@ -1,7 +1,7 @@ from importlib.metadata import PackageNotFoundError, version try: - __version__ = version("tweaver") + __version__ = version("term-weaver") except PackageNotFoundError: # package is not installed __version__ = "0.0.0" From 4080358ec9c62925f64a8ac56998c1117b876d0a Mon Sep 17 00:00:00 2001 From: yelenacox Date: Tue, 23 Jun 2026 11:09:14 -0500 Subject: [PATCH 9/9] Pulling version from __init__.py --- src/tweaver/_version.py | 24 ------------------------ src/tweaver/weaver.py | 9 +++++++++ 2 files changed, 9 insertions(+), 24 deletions(-) delete mode 100644 src/tweaver/_version.py diff --git a/src/tweaver/_version.py b/src/tweaver/_version.py deleted file mode 100644 index 68f437f..0000000 --- a/src/tweaver/_version.py +++ /dev/null @@ -1,24 +0,0 @@ -# file generated by vcs-versioning -# don't change, don't track in version control -from __future__ import annotations - -__all__ = [ - "__version__", - "__version_tuple__", - "version", - "version_tuple", - "__commit_id__", - "commit_id", -] - -version: str -__version__: str -__version_tuple__: tuple[int | str, ...] -version_tuple: tuple[int | str, ...] -commit_id: str | None -__commit_id__: str | None - -__version__ = version = '0.1.dev2+gd224cbbae.d20260513' -__version_tuple__ = version_tuple = (0, 1, 'dev2', 'gd224cbbae.d20260513') - -__commit_id__ = commit_id = 'gd224cbbae' diff --git a/src/tweaver/weaver.py b/src/tweaver/weaver.py index 41c6e6e..fa52356 100644 --- a/src/tweaver/weaver.py +++ b/src/tweaver/weaver.py @@ -8,6 +8,8 @@ import yaml +from tweaver.__init__ import __version__ + # Rich Logging if rich is installed if sys.stderr.isatty(): from rich.console import Console @@ -239,6 +241,13 @@ def exec(args: list[str] | None = None): default=None, help="Optional iri for the parent code to pull descendants.", ) + parser.add_argument( + "-v", + "--version", + action="version", + version=f"{__version__}", + help="Pulls the version from the __init__.py file", + ) args = parser.parse_args() # Initialize the logger with whatever the user requested