From 910c30853522e96382206bb33889bf824f7b1a52 Mon Sep 17 00:00:00 2001 From: Ville Laitila Date: Tue, 16 Jun 2026 23:35:37 +0300 Subject: [PATCH] feat(cli): add model comparison CLI and correct comparison docs Add a shell entry point for comparing two sgraph models: python -m sgraph.cli.compare MODEL_A MODEL_B [options] Modeled on the existing sgraph.cypher CLI (argparse, status to stderr). Options: -f/--format {text,json}, -o/--output FILE, --rename-detection, --exclude-attrs a,b,c. Output is either the human-readable printCompareInfos() summary (text) or pretty-printed JSON listing added, removed and changed elements and dependencies. Exit codes follow git diff conventions so the command is usable as a change gate in scripts/CI: 0 no differences 1 differences found 2 error (bad path, parse failure, usage error) Also correct the comparison documentation, which described an API that does not exist (compare() returning a dict, a calculateSimilarity() method, and an Example 9 that called .get() on the result). The docs now match the real ModelCompare API: compare()/compareModels() return an SGraph, and getCompareInfos()/printCompareInfos() extract structured results. Implementation is split into a testable run(argv) -> int core and a thin main(); 7 tests cover both output formats, the three exit codes, --exclude-attrs and --rename-detection. --- README.md | 22 +++++ docs/api-reference.md | 91 +++++++++++++++++++-- docs/examples.md | 69 +++++++++------- src/sgraph/cli/compare.py | 146 ++++++++++++++++++++++++++++++++++ tests/cli/test_compare_cli.py | 116 +++++++++++++++++++++++++++ 5 files changed, 408 insertions(+), 36 deletions(-) create mode 100644 src/sgraph/cli/compare.py create mode 100644 tests/cli/test_compare_cli.py diff --git a/README.md b/README.md index dedbe84..5fad2c1 100644 --- a/README.md +++ b/README.md @@ -159,6 +159,28 @@ python -m sgraph.cypher model.xml.zip -f dot 'MATCH (a)-[r]->(b) RETURN a, r, b' See the [Cypher documentation](https://softagram.github.io/sgraph/cypher.html) for full details and query examples. +### Comparing models + +Two models can be compared to see what was added, removed, or changed: + +```python +from sgraph.compare.modelcompare import ModelCompare + +mc = ModelCompare() +compare_model = mc.compare('old_model.xml', 'new_model.xml') # returns an SGraph +mc.printCompareInfos(compare_model) +``` + +A CLI is also available (exit codes follow `git diff`: `0` = no differences, `1` = differences, `2` = error): + +```bash +python -m sgraph.cli.compare old_model.xml new_model.xml # human-readable summary +python -m sgraph.cli.compare old_model.xml new_model.xml -f json # machine-readable JSON +python -m sgraph.cli.compare old_model.xml new_model.xml --rename-detection +``` + +See the [API reference](https://softagram.github.io/sgraph/api-reference.html#comparison) for the full comparison API. + ## Current utilization [Softagram](https://github.com/softagram) uses it for building up the information model about the analyzed software. diff --git a/docs/api-reference.md b/docs/api-reference.md index 759e2b5..6691eef 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -380,16 +380,59 @@ filtering = SGraphFiltering(model) from sgraph.compare.modelcompare import ModelCompare comparer = ModelCompare() -result = comparer.compare('old_model.xml', 'new_model.xml') +# Returns a *compare model* (an SGraph), not a plain dict. +compare_model = comparer.compare('old_model.xml', 'new_model.xml') ``` -#### Methods +`compare()` / `compareModels()` return a new `SGraph` (the "compare model") in +which differences are annotated as element/association attributes (`compare`, +`_only_in`, `_changed_dep`, `_change_count`, `_attr_diff`, ...). Use the +`getCompareInfos()` / `printCompareInfos()` helpers, or the individual +extractors below, to turn that compare model into structured results. + +#### Building the compare model + +##### `compare(path1: str, path2: str, exclude_attrs: set[str] | None = None) -> SGraph` +Loads two models from XML (or zipped XML) file paths and compares them. + +##### `compareModels(model1: SGraph, model2: SGraph, rename_detection: bool = False, exclude_attrs: set[str] | None = None) -> SGraph` +Compares two already-loaded in-memory models. + +`exclude_attrs` is a set of attribute names to ignore during comparison. The +preset `SLIDING_WINDOW_ATTRS` (from `sgraph.compare.compareutils`) suppresses +time-windowed metric noise (author/commit/bug counts, `last_modified`, etc.): + +```python +from sgraph.compare.compareutils import SLIDING_WINDOW_ATTRS -##### `compare(old_model: str, new_model: str) -> Dict` -Compares two models and returns differences. +compare_model = comparer.compare('a.xml', 'b.xml', exclude_attrs=SLIDING_WINDOW_ATTRS) +``` + +#### Reading the results + +##### `getCompareInfos(compare_model: SGraph) -> tuple` +Returns a 6-tuple: +`(new_deps, removed_deps, changed_elems, new_elems, removed_elems, attr_changes)`. + +##### `printCompareInfos(compare_model: SGraph) -> tuple` +Prints a human-readable summary and returns the same 6-tuple as `getCompareInfos()`. -##### `calculateSimilarity(old_model: str, new_model: str) -> float` -Calculates similarity score between models. +The tuple elements are: + +| Field | Shape | Meaning | +|-------|-------|---------| +| `new_deps` | `list[(SElementAssociation, int)]` | Added dependencies with dependency length, longest first | +| `removed_deps` | `list[(SElementAssociation, int)]` | Removed dependencies with dependency length, longest first | +| `changed_elems` | `list[(SElement, int)]` | Elements with a change count, highest first | +| `new_elems` | `list[(str, SElement)]` | Added elements as `("parent/name", element)` | +| `removed_elems` | `list[(str, SElement)]` | Removed elements as `("parent/name", element)` | +| `attr_changes` | `list[(SElement, str)]` | Elements whose attributes changed, with the diff string | + +Individual extractors are also available: `newAndRemovedElems()`, +`newAndRemovedDependenciesLists()`, `elemsWithChanges()`, +`elemsWithAttrChanges()`, `uniqueConnectionsCreated()`, +`uniqueConnectionsRemoved()`, `externalChanges()`, and +`getElementsWithAttrDiff(compare_model, attribute)`. ## CLI Tools @@ -420,6 +463,42 @@ Options: - `--type TYPE` - Filter by element type - `--output FILE` - Output file path +### compare + +Compare two models and report the differences (added/removed/changed elements +and dependencies). `MODEL_A` is the "before"/old model, `MODEL_B` the +"after"/new model. + +```bash +# Human-readable summary +python -m sgraph.cli.compare old_model.xml new_model.xml + +# Machine-readable, pretty-printed JSON +python -m sgraph.cli.compare old_model.xml new_model.xml -f json +``` + +Options: +- `-f, --format {text,json}` - Output format (default: `text`; `text` reuses `ModelCompare.printCompareInfos()`) +- `-o, --output FILE` - Write output to a file instead of stdout +- `--rename-detection` - Detect renamed elements (collapses an add+remove into a single changed element annotated with `old_name`) +- `--exclude-attrs a,b,c` - Comma-separated attribute names to ignore during comparison + +Exit codes follow `git diff` conventions: + +| Code | Meaning | +|------|---------| +| `0` | Models are equivalent (no differences) | +| `1` | Differences were found | +| `2` | Error (bad path, parse failure, or usage error) | + +This makes it usable as a change gate in scripts/CI: + +```bash +if ! python -m sgraph.cli.compare before.xml after.xml -f json -o diff.json; then + echo "Model changed — see diff.json" +fi +``` + ## Exceptions ### SElementMergedException diff --git a/docs/examples.md b/docs/examples.md index e57771f..54d39b2 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -463,47 +463,56 @@ def create_interactive_visualization(model_path): ```python from sgraph.compare.modelcompare import ModelCompare -from sgraph.modelapi import ModelApi def compare_model_versions(old_model_path, new_model_path): """ - Compare two versions of a model to track changes + Compare two versions of a model to track changes. + + ModelCompare.compare() returns a *compare model* (an SGraph) with the + differences annotated as attributes. getCompareInfos() turns that compare + model into structured lists. """ comparer = ModelCompare() - comparison = comparer.compare(old_model_path, new_model_path) - + compare_model = comparer.compare(old_model_path, new_model_path) + + # getCompareInfos returns a 6-tuple. Note the order: + # (new_deps, removed_deps, changed_elems, new_elems, removed_elems, attr_changes) + (new_deps, removed_deps, changed_elems, + new_elems, removed_elems, attr_changes) = comparer.getCompareInfos(compare_model) + print("=== Model Evolution Analysis ===") print(f"Old model: {old_model_path}") print(f"New model: {new_model_path}") print("=" * 40) - - # Analyze changes - added_elements = comparison.get('added_elements', []) - removed_elements = comparison.get('removed_elements', []) - modified_elements = comparison.get('modified_elements', []) - - print(f"šŸ“ˆ Added elements: {len(added_elements)}") - for elem in added_elements[:5]: - print(f" + {elem}") - if len(added_elements) > 5: - print(f" ... and {len(added_elements) - 5} more") - - print(f"\nšŸ“‰ Removed elements: {len(removed_elements)}") - for elem in removed_elements[:5]: - print(f" - {elem}") - if len(removed_elements) > 5: - print(f" ... and {len(removed_elements) - 5} more") - - print(f"\nšŸ”„ Modified elements: {len(modified_elements)}") - for elem in modified_elements[:5]: - print(f" ~ {elem}") - if len(modified_elements) > 5: - print(f" ... and {len(modified_elements) - 5} more") - - return comparison + + # new_elems / removed_elems are lists of ("parent/name", SElement) tuples. + print(f"šŸ“ˆ Added elements: {len(new_elems)}") + for label, elem in new_elems[:5]: + print(f" + {elem.getPath()}") + if len(new_elems) > 5: + print(f" ... and {len(new_elems) - 5} more") + + print(f"\nšŸ“‰ Removed elements: {len(removed_elems)}") + for label, elem in removed_elems[:5]: + print(f" - {elem.getPath()}") + if len(removed_elems) > 5: + print(f" ... and {len(removed_elems) - 5} more") + + # changed_elems is a list of (SElement, change_count) tuples. + print(f"\nšŸ”„ Changed elements: {len(changed_elems)}") + for elem, change_count in changed_elems[:5]: + print(f" ~ {elem.getPath()} ({change_count} changes)") + if len(changed_elems) > 5: + print(f" ... and {len(changed_elems) - 5} more") + + # new_deps / removed_deps are lists of (SElementAssociation, length) tuples. + print(f"\nšŸ”— Added dependencies: {len(new_deps)}," + f" removed: {len(removed_deps)}") + + return compare_model # Usage -# evolution = compare_model_versions('v1.0_model.xml', 'v2.0_model.xml') +# compare_model = compare_model_versions('v1.0_model.xml', 'v2.0_model.xml') ``` ### Example 10: Custom Metrics Calculation diff --git a/src/sgraph/cli/compare.py b/src/sgraph/cli/compare.py new file mode 100644 index 0000000..5249157 --- /dev/null +++ b/src/sgraph/cli/compare.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +import argparse +import contextlib +import json +import sys + +from sgraph import SGraph +from sgraph.compare.modelcompare import ModelCompare + +""" +Compare two sgraph models and report the differences. + +Usage: + python -m sgraph.cli.compare MODEL_A MODEL_B [options] + + MODEL_A is the "before"/old model, MODEL_B the "after"/new model + (paths to .xml or .xml.zip files). + +Exit codes (git-diff style): + 0 models are equivalent (no differences) + 1 differences were found + 2 an error occurred (bad path, parse failure, usage error) +""" + + +def _build_payload(model_a: str, model_b: str, infos) -> dict: + """Turn ModelCompare.getCompareInfos() output into a JSON-friendly dict.""" + (new_deps, removed_deps, changed_elems, new_elems, removed_elems, + attr_changes) = infos + + def dep_entry(item): + association, length = item + return { + 'from': association.fromElement.getPath(), + 'to': association.toElement.getPath(), + 'deptype': association.deptype, + 'length': length, + } + + def changed_entry(item): + elem, change_count = item + entry = {'path': elem.getPath(), 'change_count': int(change_count)} + if elem.attrs.get('renamed') == 'true' and 'old_name' in elem.attrs: + entry['old_name'] = elem.attrs['old_name'] + return entry + + payload = { + 'model_a': model_a, + 'model_b': model_b, + 'new_elements': [{'path': e.getPath()} for _, e in new_elems], + 'removed_elements': [{'path': e.getPath()} for _, e in removed_elems], + 'changed_elements': [changed_entry(c) for c in changed_elems], + 'new_dependencies': [dep_entry(d) for d in new_deps], + 'removed_dependencies': [dep_entry(d) for d in removed_deps], + # Drop entries whose diff is empty: they carry no information (e.g. an + # element whose only differing attribute was excluded via --exclude-attrs). + 'attr_changes': [{'path': e.getPath(), 'diff': d} + for e, d in attr_changes if d], + } + payload['summary'] = { + 'new_elements': len(payload['new_elements']), + 'removed_elements': len(payload['removed_elements']), + 'changed_elements': len(payload['changed_elements']), + 'new_dependencies': len(payload['new_dependencies']), + 'removed_dependencies': len(payload['removed_dependencies']), + 'attr_changes': len(payload['attr_changes']), + } + return payload + + +def _write_output(payload: dict, compare_model: SGraph, mc: ModelCompare, + output_format: str, output: str | None): + stream = open(output, 'w') if output else sys.stdout + try: + if output_format == 'json': + stream.write(json.dumps(payload, indent=2) + '\n') + else: + # Reuse the library's human-readable summary printer. + with contextlib.redirect_stdout(stream): + mc.printCompareInfos(compare_model) + finally: + if output: + stream.close() + + +def _parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser( + prog='python -m sgraph.cli.compare', + description='Compare two sgraph models and report the differences.') + parser.add_argument('model_a', + help='Path to the "before"/old model (.xml or .xml.zip)') + parser.add_argument('model_b', + help='Path to the "after"/new model (.xml or .xml.zip)') + parser.add_argument('-f', '--format', default='text', choices=['text', 'json'], + help='Output format (default: text)') + parser.add_argument('-o', '--output', default=None, metavar='FILE', + help='Write output to FILE instead of stdout') + parser.add_argument('--rename-detection', action='store_true', + help='Detect renamed elements (collapses an add+remove ' + 'into a single changed element annotated with old_name)') + parser.add_argument('--exclude-attrs', default=None, metavar='a,b,c', + help='Comma-separated attribute names to ignore during ' + 'comparison') + return parser.parse_args(argv) + + +def run(argv: list[str]) -> int: + """Run the comparison. Returns the process exit code (0/1/2).""" + args = _parse_args(argv) + + exclude_attrs = None + if args.exclude_attrs: + exclude_attrs = {name.strip() for name in args.exclude_attrs.split(',') + if name.strip()} + + mc = ModelCompare() + try: + if args.rename_detection: + # rename_detection lives on compareModels, so load the models first. + model1 = SGraph.parse_xml_or_zipped_xml(args.model_a) + model2 = SGraph.parse_xml_or_zipped_xml(args.model_b) + compare_model = mc.compareModels(model1, model2, rename_detection=True, + exclude_attrs=exclude_attrs) + else: + compare_model = mc.compare(args.model_a, args.model_b, + exclude_attrs=exclude_attrs) + except Exception as e: # noqa: BLE001 - surface any load/parse failure as exit 2 + print(f'Error: {e}', file=sys.stderr) + return 2 + + infos = mc.getCompareInfos(compare_model) + payload = _build_payload(args.model_a, args.model_b, infos) + has_diff = any(payload['summary'].values()) + + _write_output(payload, compare_model, mc, args.format, args.output) + + return 1 if has_diff else 0 + + +def main(): + sys.exit(run(sys.argv[1:])) + + +if __name__ == '__main__': + main() diff --git a/tests/cli/test_compare_cli.py b/tests/cli/test_compare_cli.py new file mode 100644 index 0000000..288538d --- /dev/null +++ b/tests/cli/test_compare_cli.py @@ -0,0 +1,116 @@ +"""Tests for the `python -m sgraph.cli.compare` command-line entry point.""" +import json + +from sgraph import SGraph, SElement +from sgraph.cli import compare as compare_cli + +MODEL_A = 'tests/modelfile.xml' +MODEL_B = 'tests/modelfile_direct_indirect.xml' + + +def _write_file_model(path, attrs): + """Write a tiny one-file model with the given attrs on /proj/src/file.py.""" + m = SGraph(SElement(None, '')) + e = m.createOrGetElementFromPath('/proj/src/file.py') + for k, v in attrs.items(): + e.addAttribute(k, v) + m.to_xml(path) + + +def _write_single_child(path, name): + """Write a model whose only leaf is /p/src/.""" + m = SGraph(SElement(None, '')) + m.createOrGetElementFromPath('/p/src/' + name) + m.to_xml(path) + + +def test_text_output_prints_summary_and_exits_one_on_difference(capsys): + code = compare_cli.run([MODEL_A, MODEL_B]) + out = capsys.readouterr().out + assert code == 1 # git-diff style: differences found + assert 'New elements' in out + assert 'Removed elements' in out + + +def test_json_output_reports_correct_counts(capsys): + code = compare_cli.run([MODEL_A, MODEL_B, '-f', 'json']) + out = capsys.readouterr().out + assert code == 1 + data = json.loads(out) + s = data['summary'] + assert s['new_elements'] == 6 + assert s['removed_elements'] == 26 + assert s['new_dependencies'] == 4 + assert s['removed_dependencies'] == 6 + assert s['changed_elements'] == 2 + # summary counts must match the list lengths + assert len(data['new_elements']) == s['new_elements'] + assert len(data['removed_dependencies']) == s['removed_dependencies'] + # dependency entries carry from/to/deptype/length + dep = data['removed_dependencies'][0] + assert {'from', 'to', 'deptype', 'length'} <= set(dep) + assert data['model_a'] == MODEL_A and data['model_b'] == MODEL_B + + +def test_identical_models_exit_zero_with_empty_summary(capsys): + code = compare_cli.run([MODEL_A, MODEL_A, '-f', 'json']) + out = capsys.readouterr().out + assert code == 0 + data = json.loads(out) + assert all(v == 0 for v in data['summary'].values()) + + +def test_missing_file_exits_two(capsys): + code = compare_cli.run(['tests/does_not_exist_xyz.xml', MODEL_B]) + err = capsys.readouterr().err + assert code == 2 + assert 'rror' in err # "Error: ..." + + +def test_exclude_attrs_suppresses_attribute(tmp_path, capsys): + a = str(tmp_path / 'a.xml') + b = str(tmp_path / 'b.xml') + _write_file_model(a, {'hash': 'same', 'commit_count_30': '5'}) + _write_file_model(b, {'hash': 'same', 'commit_count_30': '15'}) + + # Without exclude: the attribute change is reported, exit 1 + code = compare_cli.run([a, b, '-f', 'json']) + data = json.loads(capsys.readouterr().out) + assert code == 1 + diffs = ' '.join(e['diff'] for e in data['attr_changes']) + assert 'commit_count_30' in diffs + + # With exclude: no meaningful change remains, exit 0 and no attr_changes + code = compare_cli.run([a, b, '-f', 'json', '--exclude-attrs', 'commit_count_30']) + data = json.loads(capsys.readouterr().out) + assert code == 0 + assert data['attr_changes'] == [] + + +def test_rename_detection_collapses_add_remove_into_change(tmp_path, capsys): + a = str(tmp_path / 'a.xml') + b = str(tmp_path / 'b.xml') + _write_single_child(a, 'alpha.py') + _write_single_child(b, 'beta.py') + + # Without rename detection: alpha removed, beta added + compare_cli.run([a, b, '-f', 'json']) + data = json.loads(capsys.readouterr().out) + assert any(e['path'].endswith('alpha.py') for e in data['removed_elements']) + assert any(e['path'].endswith('beta.py') for e in data['new_elements']) + + # With rename detection: collapsed into a changed element carrying old_name + compare_cli.run([a, b, '-f', 'json', '--rename-detection']) + data = json.loads(capsys.readouterr().out) + assert data['removed_elements'] == [] + assert data['new_elements'] == [] + assert any(e.get('old_name') == 'alpha.py' for e in data['changed_elements']) + + +def test_output_flag_writes_to_file_not_stdout(tmp_path, capsys): + out_file = tmp_path / 'out.json' + code = compare_cli.run([MODEL_A, MODEL_B, '-f', 'json', '-o', str(out_file)]) + assert code == 1 + assert capsys.readouterr().out.strip() == '' # nothing on stdout + data = json.loads(out_file.read_text()) + assert data['summary']['new_elements'] == 6