DataKitchen
diff --git a/‎testgen/commands/queries/execute_tests_query.py‎
Lines changed: 9 additions & 4 deletions b/‎testgen/commands/queries/execute_tests_query.py‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎testgen/commands/queries/profiling_query.py‎
Lines changed: 3 additions & 1 deletion b/‎testgen/commands/queries/profiling_query.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎testgen/commands/run_profiling.py‎
Lines changed: 19 additions & 0 deletions b/‎testgen/commands/run_profiling.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎testgen/commands/test_thresholds_prediction.py‎
Lines changed: 7 additions & 2 deletions b/‎testgen/commands/test_thresholds_prediction.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎testgen/common/models/data_column.py‎
Lines changed: 31 additions & 0 deletions b/‎testgen/common/models/data_column.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎testgen/common/models/table_group.py‎
Lines changed: 5 additions & 0 deletions b/‎testgen/common/models/table_group.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎testgen/common/pii_masking.py‎
Lines changed: 96 additions & 0 deletions b/‎testgen/common/pii_masking.py‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎testgen/mcp/exceptions.py‎
Lines changed: 44 additions & 0 deletions b/‎testgen/mcp/exceptions.py‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎testgen/mcp/permissions.py‎
Lines changed: 7 additions & 10 deletions b/‎testgen/mcp/permissions.py‎
Lines changed: 7 additions & 10 deletions
@@ -326,16 +326,21 @@ def _get_params(self, test_def: TestExecutionDef | None = None) -> dict:
             # Freshness exclusion params — computed per test at execution time
             if test_def.test_type == "Freshness_Trend" and test_def.baseline_sum:
                 sched = get_schedule_params(test_def.prediction)
-                has_exclusions = self._exclude_weekends or sched.excluded_days or sched.window_start is not None
+                # Once the schedule is active (excluded_days derived from active_days),
+                # it supersedes exclude_weekends as the single source of truth for
+                # day exclusion — avoids conflicts where a detection day (e.g. Saturday)
+                # is active per schedule but excluded per exclude_weekends.
+                effective_exclude_weekends = False if sched.excluded_days else self._exclude_weekends
+                has_exclusions = effective_exclude_weekends or sched.excluded_days or sched.window_start is not None
                 if has_exclusions:
                     last_update = pd.Timestamp(test_def.baseline_sum)
-                    excluded = int(count_excluded_minutes(
-                        last_update, self.run_date, self._exclude_weekends, self._holiday_dates,
+                    excluded = round(count_excluded_minutes(
+                        last_update, self.run_date, effective_exclude_weekends, self._holiday_dates,
                         tz=self._schedule_tz, excluded_days=sched.excluded_days,
                         window_start=sched.window_start, window_end=sched.window_end,
                     ))
                     is_excl = 1 if is_excluded_day(
-                        pd.Timestamp(self.run_date), self._exclude_weekends, self._holiday_dates,
+                        pd.Timestamp(self.run_date), effective_exclude_weekends, self._holiday_dates,
                         tz=self._schedule_tz, excluded_days=sched.excluded_days,
                         window_start=sched.window_start, window_end=sched.window_end,
                     ) else 0
 
@@ -167,8 +167,10 @@ def update_profiling_results(self) -> list[tuple[str, dict]]:
             self._get_query("functional_datatype.sql"),
             self._get_query("functional_tabletype_stage.sql"),
             self._get_query("functional_tabletype_update.sql"),
-            self._get_query("pii_flag.sql"),
         ]
+        if self.table_group.profile_flag_pii:
+            queries.append(self._get_query("pii_flag.sql"))
+            queries.append(self._get_query("pii_flag_update.sql"))
         if self.table_group.profile_flag_cdes:
             queries.append(self._get_query("cde_flagger_query.sql"))
         return queries
 
@@ -28,6 +28,7 @@
 from testgen.common.mixpanel_service import MixpanelService
 from testgen.common.models import get_current_session, with_database_session
 from testgen.common.models.connection import Connection
+from testgen.common.models.data_column import DataColumnChars
 from testgen.common.models.profiling_run import ProfilingRun
 from testgen.common.models.table_group import TableGroup
 from testgen.common.models.test_suite import TestSuite
@@ -85,6 +86,8 @@ def run_profiling(table_group_id: str | UUID, username: str | None = None, run_d
     LOG.info(f"Profiling run: {profiling_run.id}, Table group: {table_group.table_groups_name}, Connection: {connection.connection_name}")
     try:
         data_chars = run_data_chars_refresh(connection, table_group, profiling_run.profiling_starttime)
+        if table_group.profile_exclude_xde:
+            data_chars = _exclude_xde_columns(data_chars, table_group.id)
         distinct_tables = {(column.table_name, column.record_ct) for column in data_chars}
 
         profiling_run.set_progress("data_chars", "Completed")
@@ -144,6 +147,22 @@ def run_profiling(table_group_id: str | UUID, username: str | None = None, run_d
     """
 
 
+def _exclude_xde_columns(data_chars: list[ColumnChars], table_group_id: UUID) -> list[ColumnChars]:
+    """Filter out columns marked as excluded_data_element in data_column_chars."""
+    xde_columns = DataColumnChars.select_where(
+        DataColumnChars.table_groups_id == table_group_id,
+        DataColumnChars.excluded_data_element.is_(True),
+    )
+    if not xde_columns:
+        return data_chars
+
+    excluded = {(col.table_name, col.column_name) for col in xde_columns}
+    filtered = [col for col in data_chars if (col.table_name, col.column_name) not in excluded]
+    if len(filtered) < len(data_chars):
+        LOG.info(f"Excluding {len(data_chars) - len(filtered)} XDE columns from profiling")
+    return filtered
+
+
 def _run_column_profiling(sql_generator: ProfilingSQL, data_chars: list[ColumnChars]) -> None:
     profiling_run = sql_generator.profiling_run
     profiling_run.set_progress("col_profiling", "Running")
 
@@ -213,6 +213,11 @@ def compute_freshness_threshold(
     if schedule.stage == "active":
         excluded_days = frozenset(range(7)) - schedule.active_days if schedule.active_days else None
 
+        # Once the schedule is active, excluded_days is the single source of truth
+        # for day exclusion — it supersedes exclude_weekends, which was the user's
+        # initial hint before enough data was available for schedule inference.
+        schedule_exclude_weekends = False if excluded_days else exclude_weekends
+
         # For sub-daily schedules, apply window exclusion for overnight gaps
         has_window = (
             schedule.frequency == "sub_daily"
@@ -228,7 +233,7 @@ def compute_freshness_threshold(
                     upper_percentile=upper_percentile,
                     floor_multiplier=floor_multiplier,
                     lower_percentile=lower_percentile,
-                    exclude_weekends=exclude_weekends,
+                    exclude_weekends=schedule_exclude_weekends,
                     holiday_codes=holiday_codes,
                     tz=schedule_tz,
                     staleness_factor=staleness_factor,
@@ -246,7 +251,7 @@ def compute_freshness_threshold(
             holiday_dates = resolve_holiday_dates(holiday_codes, history.index) if holiday_codes else None
             schedule_upper = minutes_to_next_deadline(
                 result.last_update, schedule,
-                exclude_weekends, holiday_dates, schedule_tz,
+                schedule_exclude_weekends, holiday_dates, schedule_tz,
                 deadline_buffer, excluded_days=excluded_days,
             )
             if schedule_upper is not None:
 
@@ -0,0 +1,31 @@
+from uuid import UUID, uuid4
+
+from sqlalchemy import Boolean, Column, ForeignKey, String
+from sqlalchemy.dialects import postgresql
+
+from testgen.common.models.entity import Entity
+
+
+class DataColumnChars(Entity):
+    __tablename__ = "data_column_chars"
+
+    id: UUID = Column("column_id", postgresql.UUID(as_uuid=True), primary_key=True, default=uuid4)
+    table_groups_id: UUID = Column(postgresql.UUID(as_uuid=True), ForeignKey("table_groups.id"))
+    schema_name: str = Column(String)
+    table_name: str = Column(String)
+    column_name: str = Column(String)
+    excluded_data_element: bool | None = Column(Boolean, nullable=True)
+    pii_flag: str | None = Column(String(50), nullable=True)
+
+    _default_order_by = (id,)
+
+    # Unmapped columns: table_id, ordinal_position, general_type, column_type,
+    # db_data_type, functional_data_type, description, critical_data_element,
+    # data_source, source_system, source_process, business_domain,
+    # stakeholder_group, transform_level, aggregation_level, data_product,
+    # add_date, last_mod_date, drop_date, test_ct, last_test_date,
+    # tests_last_run, tests_7_days_prior, tests_30_days_prior,
+    # fails_last_run, fails_7_days_prior, fails_30_days_prior,
+    # warnings_last_run, warnings_7_days_prior, warnings_30_days_prior,
+    # last_complete_profile_run_id, valid_profile_issue_ct,
+    # valid_test_issue_ct, dq_score_profiling, dq_score_testing
@@ -28,6 +28,9 @@ class TableGroupMinimal(EntityMinimal):
     profile_use_sampling: bool
     profiling_delay_days: str
     monitor_test_suite_id: UUID | None
+    profile_flag_cdes: bool
+    profile_flag_pii: bool
+    profile_exclude_xde: bool
     last_complete_profile_run_id: UUID | None
 
 
@@ -112,6 +115,8 @@ class TableGroup(Entity):
     profile_sample_min_count: int = Column(BigInteger, default=100000)
     profiling_delay_days: str = Column(String, default="0")
     profile_flag_cdes: bool = Column(Boolean, default=True)
+    profile_flag_pii: bool = Column(Boolean, default=True)
+    profile_exclude_xde: bool = Column(Boolean, default=True)
     profile_do_pair_rules: bool = Column(YNString, default="N")
     profile_pair_rule_pct: int = Column(Integer, default=95)
     include_in_dashboard: bool = Column(Boolean, default=True)
 
@@ -0,0 +1,96 @@
+"""PII masking utilities for redacting sensitive data in the UI."""
+import pandas as pd
+
+from testgen.ui.services.database_service import fetch_all_from_db
+
+PII_REDACTED = "[PII Redacted]"
+
+PROFILING_PII_FIELDS = (
+    "top_freq_values", "min_text", "max_text",
+    "min_value", "min_value_over_0", "max_value",
+    "min_date", "max_date",
+)
+
+
+def get_pii_columns(table_group_id: str, schema: str | None = None, table_name: str | None = None) -> set[str]:
+    """Look up PII-flagged column names from data_column_chars."""
+
+    query = f"""
+    SELECT column_name
+    FROM data_column_chars
+    WHERE table_groups_id = :table_group_id
+        AND pii_flag IS NOT NULL
+        {"AND schema_name = :schema" if schema else ""}
+        {"AND table_name = :table_name" if table_name else ""}
+    """
+    params: dict = {
+        "table_group_id": table_group_id,
+        "schema": schema,
+        "table_name": table_name,
+    }
+
+    results = fetch_all_from_db(query, params)
+    return {row.column_name for row in results}
+
+
+def mask_source_data_pii(df: pd.DataFrame, pii_columns: set[str]) -> None:
+    """In-place mask values in PII columns with PII_REDACTED."""
+    if df.empty or not pii_columns:
+        return
+    for col in pii_columns:
+        # Match case-insensitively since column names may differ in case
+        for df_col in df.columns:
+            if df_col.lower() == col.lower():
+                df[df_col] = PII_REDACTED
+
+
+def mask_hygiene_detail(data: pd.DataFrame | list[dict], pii_columns: set[str] | None = None) -> None:
+    """Redact hygiene issue detail for PII columns where detail_redactable is true.
+
+    Accepts:
+    - DataFrame with detail_redactable, pii_flag, and detail columns (hygiene issues grid/export)
+    - List of issue dicts, each with detail_redactable and either pii_flag or column_name
+      (when pii_columns set is provided, matches column_name against it)
+    """
+    if isinstance(data, pd.DataFrame):
+        if data.empty or "detail_redactable" not in data.columns:
+            return
+        pii_mask = data["detail_redactable"].fillna(False) & data["pii_flag"].notna()
+        data.loc[pii_mask, "detail"] = PII_REDACTED
+        return
+
+    if not data:
+        return
+    pii_lower = {c.lower() for c in pii_columns} if pii_columns else None
+    for issue in data:
+        if not issue.get("detail_redactable"):
+            continue
+        if pii_lower is not None:
+            if issue.get("column_name", "").lower() in pii_lower:
+                issue["detail"] = PII_REDACTED
+        elif issue.get("pii_flag"):
+            issue["detail"] = PII_REDACTED
+
+
+def mask_profiling_pii(data: pd.DataFrame | dict, pii_columns: set[str]) -> None:
+    """Mask profiling fields for PII columns. Accepts a DataFrame or a single-row dict."""
+    if isinstance(data, dict):
+        if not pii_columns:
+            return
+        column_name = data.get("column_name")
+        if column_name and column_name.lower() not in {c.lower() for c in pii_columns}:
+            return
+        for field in PROFILING_PII_FIELDS:
+            if field in data:
+                data[field] = PII_REDACTED
+        return
+
+    if data.empty or not pii_columns:
+        return
+    pii_lower = {c.lower() for c in pii_columns}
+    mask = data["column_name"].str.lower().isin(pii_lower)
+    for field in PROFILING_PII_FIELDS:
+        if field in data.columns:
+            if data[field].dtype != object:
+                data[field] = data[field].astype(object)
+            data.loc[mask, field] = PII_REDACTED
@@ -0,0 +1,44 @@
+"""MCP exception hierarchy and error boundary.
+
+``MCPUserError`` (and its subclasses) carry safe, user-facing messages.
+``mcp_error_boundary`` is a decorator that catches them and converts to
+text, while neutralising unexpected exceptions.
+"""
+
+import functools
+import logging
+
+LOG = logging.getLogger("testgen")
+
+
+class MCPUserError(Exception):
+    """Safe, user-facing error for MCP tools, prompts, and resources.
+
+    The error boundary converts ``str(e)`` into the response text.
+    All other exceptions are treated as unexpected: their traceback is
+    logged and a neutral message is returned to the client.
+    """
+
+
+class MCPPermissionDenied(MCPUserError):
+    """Raised when access is denied due to insufficient project permissions."""
+
+
+def mcp_error_handler(fn):
+    """Wrap an MCP handler (tool, resource, or prompt) with safe error handling.
+
+    - ``MCPUserError`` (including ``MCPPermissionDenied``) → ``str(e)`` as the response.
+    - Any other exception → traceback logged, neutral message returned.
+    """
+
+    @functools.wraps(fn)
+    def wrapper(*args, **kwargs):
+        try:
+            return fn(*args, **kwargs)
+        except MCPUserError as e:
+            return str(e)
+        except Exception:
+            LOG.exception("Unhandled error in MCP handler '%s'", fn.__name__)
+            return "An unexpected error occurred."
+
+    return wrapper
@@ -7,6 +7,7 @@
 
 from testgen.common.models.project_membership import ProjectMembership
 from testgen.common.models.user import User
+from testgen.mcp.exceptions import MCPPermissionDenied
 from testgen.utils.plugins import PluginHook
 
 _NOT_SET = object()
@@ -17,10 +18,6 @@
 )
 
 
-class MCPPermissionDenied(Exception):
-    """Raised by ProjectPermissions when access is denied. Caught by the decorator."""
-
-
 @dataclass(frozen=True, slots=True)
 class ProjectPermissions:
     memberships: dict[str, str]  # {project_code: role}
@@ -105,9 +102,9 @@ def mcp_permission(permission: str) -> Callable:
     permission, and stores it in a ContextVar. The tool retrieves the value
     via ``get_project_permissions()``.
 
-    If the user has no projects with the required permission, returns an
-    early denial message. Catches MCPPermissionDenied raised by tool code
-    and returns str(e) as the tool response.
+    Raises ``MCPPermissionDenied`` if the user has no projects with the required
+    permission. Other ``MCPPermissionDenied`` exceptions from tool code propagate
+    through — the ``safe_tool`` error boundary handles conversion to text.
     """
 
     def decorator(fn: Callable) -> Callable:
@@ -116,12 +113,12 @@ def wrapper(*args, **kwargs):
             user = get_current_mcp_user()
             perms = _compute_project_permissions(user, permission)
             if not perms.allowed_codes:
-                return "Your role does not include the necessary permission for this operation on any project."
+                raise MCPPermissionDenied(
+                    "Your role does not include the necessary permission for this operation on any project."
+                )
             tok = _mcp_project_permissions.set(perms)
             try:
                 return fn(*args, **kwargs)
-            except MCPPermissionDenied as e:
-                return str(e)
             finally:
                 _mcp_project_permissions.reset(tok)