leadforge/pyproject.toml at main · leadforge-dev/leadforge · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
[build-system]
requires = ["setuptools>=68", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "leadforge"
version = "1.0.0"
description = "Opinionated framework for generating synthetic CRM and GTM datasets from simulated commercial worlds"
readme = "README.md"
license = { text = "MIT" }
requires-python = ">=3.11"
authors = [{ name = "leadforge contributors" }]
keywords = ["synthetic data", "CRM", "lead scoring", "machine learning", "simulation"]
classifiers = [
    "Development Status :: 5 - Production/Stable",
    "Intended Audience :: Developers",
    "Intended Audience :: Education",
    "Intended Audience :: Science/Research",
    "License :: OSI Approved :: MIT License",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dependencies = [
    "typer[all]>=0.12",
    "pyyaml>=6.0",
    "pandas>=2.0",
    "pyarrow>=14.0",
    "networkx>=3.2",
    "numpy>=1.26",
]

[project.optional-dependencies]
dev = [
    "pytest>=8.0",
    "ruff>=0.4",
    "mypy>=1.10",
    "pre-commit>=3.7",
    "types-pyyaml>=6.0",
    # scikit-learn 1.9.0 changed HistGradientBoostingClassifier defaults in a
    # way that improves the flat GBM more than the engineered one, making the
    # headline GBM(eng)−GBM(flat) lift in NB02 go non-positive and breaking
    # the G13.1 CI gate.  Pin <1.9 while the notebook targets are recalibrated
    # or the engineered features are updated to restore positive lift on 1.9.
    # See: https://github.com/leadforge-dev/leadforge/issues/114
    "scikit-learn>=1.3,<1.9",
    "matplotlib>=3.7",
    # PR 7.2: the preview-page renderers (scripts/preview_{kaggle,hf}_page.py)
    # call into markdown-it-py at test time via render_*_html().  Keeping
    # the dep here as well as in [publish] means CI's "test" job (which
    # installs only [dev]) does not ImportError mid-test.  pytest.importorskip
    # would also work, but the rendering tests are the primary coverage of
    # this PR — gating them off would defeat the purpose.
    "markdown-it-py>=3.0",
]
scripts = [
    "scikit-learn>=1.3,<1.9",  # keep in sync with [dev] pin above
    "matplotlib>=3.7",
]
# Optional dependencies for the platform release packagers.  Installing
# this extra (``pip install -e ".[publish]"``) enables the gated
# ``load_dataset()`` / Kaggle-CLI smoke tests that verify G11.3 (Kaggle
# package) and G12.3 / G12.4 (HF load_dataset round-trip) without
# pulling the heavy SDKs into the default dev install.  PR 7.2 adds
# ``markdown-it-py`` for the local Kaggle / HF preview pages
# (``scripts/preview_{kaggle,hf}_page.py``) — same publish-extra
# posture, missing import raises a clean error pointing at this extra.
publish = [
    "datasets>=2.14",
    "kaggle>=1.6",
    "markdown-it-py>=3.0",
]
# Optional dependencies for executing the public release notebooks.
# Installing this extra (``pip install -e ".[notebooks]"``) enables the
# G13.1 acceptance gate: the CI ``notebooks`` job nbclient-executes
# ``release/notebooks/*.ipynb`` end-to-end against a freshly built
# public bundle, asserting the notebooks reproduce validation_report.md
# metrics within ±0.05 (G13.2) and never load instructor artefacts
# (G13.3).
notebooks = [
    "nbclient>=0.10",
    "nbformat>=5.10",
    # ``ipykernel`` provides the ``python3`` kernelspec that
    # ``nbclient.NotebookClient(..., kernel_name="python3")`` looks up.
    # Without it CI fails with ``NoSuchKernel: No such kernel named
    # python3`` because the GitHub-hosted runner has no kernelspecs
    # registered out of the box (local dev environments usually do).
    "ipykernel>=6.0",
    "scikit-learn>=1.3,<1.9",  # keep in sync with [dev] pin above
    "matplotlib>=3.7",
]

[project.scripts]
leadforge = "leadforge.cli.main:app"

[tool.setuptools.packages.find]
where = ["."]
include = ["leadforge*"]

[tool.setuptools.package-data]
"leadforge" = ["**/*.yaml", "**/*.yml"]

[tool.ruff]
target-version = "py311"
line-length = 100

[tool.ruff.lint]
select = ["E", "F", "I", "N", "W", "UP", "B", "C4", "PT", "S"]

[tool.ruff.lint.per-file-ignores]
"tests/**/*" = ["S101", "S108"]
# Release notebooks deliberately use ``assert`` for contract checks
# (path discipline, snapshot-safe joins, G13.2 tolerance gate).  ``assert``
# is the conventional notebook idiom and these are exactly the cells we
# want to fail loud on regression.
"release/notebooks/**/*.ipynb" = ["S101"]
# Notebook builders emit dedented heredocs whose lines render as
# markdown tables and print-statement output inside the notebook.
# Line length is a property of the rendered cell, not the .py source,
# so 100c is the wrong yardstick here.
"scripts/build_release_notebook_*.py" = ["E501"]
# Preview-page scripts (PR 7.2) carry inlined CSS + multi-attribute
# HTML strings inside f-string templates; the rendered HTML is the
# product, so wrapping the source CSS at 100c is line noise.
"scripts/preview_kaggle_page.py" = ["E501"]
"scripts/preview_hf_page.py" = ["E501"]
# _preview_common is plain Python (no inline HTML / CSS); leaving
# E501 enabled.

[tool.mypy]
python_version = "3.11"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
disallow_incomplete_defs = true
check_untyped_defs = true
no_implicit_optional = true

[[tool.mypy.overrides]]
module = ["pandas", "pandas.*", "pyarrow", "pyarrow.*"]
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = ["networkx", "networkx.*"]
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = ["sklearn", "sklearn.*"]
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = ["matplotlib", "matplotlib.*"]
ignore_missing_imports = true

# Anthropic SDK is loaded lazily inside ``build_anthropic_client`` (PR
# 7.1) so the LLM critique module imports cleanly without the SDK.  CI's
# type-check job doesn't install ``anthropic``; the override stops mypy
# from failing on the missing import stub.  The runtime contract is
# enforced by tests via the ``LLMCritiqueClient`` protocol.
[[tool.mypy.overrides]]
module = ["anthropic", "anthropic.*"]
ignore_missing_imports = true

[tool.pytest.ini_options]
testpaths = ["tests"]