Skip to content

Commit ed088f0

Browse files
committed
add option for fitting to the whole view instead of wrk set
1 parent 79332eb commit ed088f0

2 files changed

Lines changed: 17 additions & 1 deletion

File tree

src/pasteur/kedro/pipelines/main.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,18 @@ def generate_pipelines(
124124
)
125125

126126
# Create view transform pipeline that can run as part of ingest
127+
if view.fit_global:
128+
pipe_fit = create_fit_pipeline(view, all_types, modules, 'view')
129+
+ create_transform_pipeline(
130+
view,
131+
'view',
132+
all_types,
133+
)
134+
else:
135+
pipe_fit = create_fit_pipeline(view, all_types, modules, wrk_split)
136+
127137
pipe_transform = (
128-
create_fit_pipeline(view, all_types, modules, wrk_split)
138+
pipe_fit
129139
+ create_transform_pipeline(
130140
view,
131141
wrk_split,

src/pasteur/view.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,12 @@ class View(Module):
167167
"""
168168
trn_deps: dict[str, list[str]] = {}
169169
parameters: dict[str, Any] | str | None = None
170+
171+
""" If true, transformers and encoders for this view will be fit on the global
172+
dataset. Resolves encoding errors that stem from sampling the partial view.
173+
When true, subsampling the view is not possible during transformation and
174+
encoding, which may add significant overhead."""
175+
fit_global: bool = False
170176

171177
def __init__(self, **_) -> None:
172178
pass

0 commit comments

Comments
 (0)