From e38438f831a5a7492df85440b6257fdfcc111f3f Mon Sep 17 00:00:00 2001 From: Rajandeep Date: Tue, 2 Jun 2026 11:29:34 -0700 Subject: [PATCH] 33652 - Update Auto Pruning Logic for Frozen Corps --- data-tool/.corps.env.sample | 1 + data-tool/flows/common/colin_queries.py | 26 +++++++++---------- data-tool/flows/common/query_utils.py | 1 - data-tool/flows/config.py | 1 + .../flows/refresh_extract_subset_flow.py | 9 ++++--- 5 files changed, 20 insertions(+), 18 deletions(-) diff --git a/data-tool/.corps.env.sample b/data-tool/.corps.env.sample index 32e0106624..311577d948 100644 --- a/data-tool/.corps.env.sample +++ b/data-tool/.corps.env.sample @@ -12,6 +12,7 @@ APP_SETTINGS=dev ## LOCAL_MIGR_TEST ## ------------------------------------------------------------------------------------------------------------ DATA_LOAD_ENV=local_migr_test +TARGET_CONNECTION= SKIP_IF_RUNNING_HANDLER_ENABLED=False # Source database diff --git a/data-tool/flows/common/colin_queries.py b/data-tool/flows/common/colin_queries.py index 466bcdcef5..a1cec9ea61 100644 --- a/data-tool/flows/common/colin_queries.py +++ b/data-tool/flows/common/colin_queries.py @@ -47,19 +47,19 @@ def get_updated_identifiers(timestamp: str, corp_list: str, chunk_size: int) -> JOIN corp_list c ON c.corp_num = e.corp_num WHERE e.event_timestmp > TIMESTAMP '{timestamp}' - INTERVAL '2' HOUR - AND NOT ( - EXISTS ( - SELECT 1 - FROM corporation c2 - WHERE c2.corp_num = e.corp_num - AND c2.corp_frozen_typ_cd = 'C' - ) - AND EXISTS ( - SELECT 1 - FROM corp_early_adopters cea - WHERE cea.corp_num = e.corp_num - ) - ) + -- AND NOT ( + -- EXISTS ( + -- SELECT 1 + -- FROM corporation c2 + -- WHERE c2.corp_num = e.corp_num + -- AND c2.corp_frozen_typ_cd = 'C' + -- ) + -- AND EXISTS ( + -- SELECT 1 + -- FROM corp_early_adopters cea + -- WHERE cea.corp_num = e.corp_num + -- ) + -- ) ) SELECT le.EVENT_ID, le.corp_num, le.event_typ_cd, diff --git a/data-tool/flows/common/query_utils.py b/data-tool/flows/common/query_utils.py index c57df10925..158b38fd76 100644 --- a/data-tool/flows/common/query_utils.py +++ b/data-tool/flows/common/query_utils.py @@ -67,7 +67,6 @@ def get_candidates_not_matching_saf_criteria_query(updated_corp_nums: list) -> s AND in_dissolution = false AND migrated <> 'Y' AND has_password = true - AND has_officers = false AND meets_main_criteria = true AND has_3rd_party = false AND admin_email IS NOT NULL diff --git a/data-tool/flows/config.py b/data-tool/flows/config.py index ea1239cd24..e1db15fe59 100644 --- a/data-tool/flows/config.py +++ b/data-tool/flows/config.py @@ -77,6 +77,7 @@ class _Config(): # pylint: disable=too-few-public-methods """ DATA_LOAD_ENV = os.getenv('DATA_LOAD_ENV', '') + TARGET_CONNECTION = os.getenv('TARGET_CONNECTION', 'ctst_pg') CORP_NAME_SUFFIX = os.getenv('CORP_NAME_SUFFIX', '') UPDATE_ENTITY = os.getenv('UPDATE_ENTITY', 'False') == 'True' AFFILIATE_ENTITY = os.getenv('AFFILIATE_ENTITY', 'False') == 'True' diff --git a/data-tool/flows/refresh_extract_subset_flow.py b/data-tool/flows/refresh_extract_subset_flow.py index fc2fba6821..6f9dd76498 100644 --- a/data-tool/flows/refresh_extract_subset_flow.py +++ b/data-tool/flows/refresh_extract_subset_flow.py @@ -16,6 +16,7 @@ from common.init_utils import colin_oracle_init, get_config from common.query_utils import corpnum_to_oracle_ids, get_cutoff_timestamp_query, get_fallout_corp_nums, prune_candidates_from_account, prune_candidates_from_batch, prune_candidates_from_cp +_DEFAULT_TARGET_CONNECTION = get_named_config().TARGET_CONNECTION _REPO_ROOT = Path(__file__).resolve().parents[2] _SCRIPT_PATH = _REPO_ROOT / 'data-tool' / 'scripts' / 'generate_cprd_subset_extract.py' _GENERATED_DIR = _REPO_ROOT / 'data-tool' / 'scripts' / 'generated' @@ -148,7 +149,7 @@ def run_cprd_subset_extract_generator( pg_disable_method: str, out: str | None, include_cp: bool = False, - target_connection: str = 'ctst_pg', + target_connection: str = _DEFAULT_TARGET_CONNECTION, prefix_numeric_bc: bool = False, ) -> subprocess.CompletedProcess: """ @@ -241,7 +242,7 @@ def extract_pull_flow( refresh_views: bool = True, reset_extract_postgres: bool = True, include_cp: bool = False, - target_connection: str = 'ctst_pg', + target_connection: str = _DEFAULT_TARGET_CONNECTION, ) -> None: """ Generate files @@ -259,7 +260,7 @@ def extract_pull_flow( # Get Identifiers feed_path: Path | None = None if mode == 'refresh': - updated_rows = get_updated_identifiers_colin(cutoff_timestamp=cutoff, mig_batch_id=1, colin_oracle_engine=colin_oracle_engine, chunk_size=chunk_size) + updated_rows = get_updated_identifiers_colin(cutoff_timestamp=cutoff, mig_batch_id=config.MIG_BATCH_IDS, colin_oracle_engine=colin_oracle_engine, chunk_size=chunk_size) print(f'Colin updated identifiers : {len(updated_rows)} rows') _GENERATED_DIR.mkdir(parents=True, exist_ok=True) feed_path = _GENERATED_DIR / f'refresh_corp_feed_{os.getpid()}.tmp' @@ -334,5 +335,5 @@ def extract_pull_flow( p.add_argument('--refresh-views', action='store_false') p.add_argument('--dbschemacli-cmd', default='dbschemacli') p.add_argument('--reset-extract-postgres', action='store_false') - p.add_argument('--target-connection', default='ctst_pg') + p.add_argument('--target-connection', default=_DEFAULT_TARGET_CONNECTION) extract_pull_flow(**vars(p.parse_args()))