Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@ import {

import { collect, parseBooleanArg } from '../lib/args';
import { emitStatsdMetrics } from '../lib/metrics';
import { AppConfig, AuthLogger } from '../../lib/types';
import { AccountEventsManager } from '../../lib/account-events';
import { AppConfig, AuthFirestore, AuthLogger } from '../../lib/types';
import appConfig from '../../config';
import { setupFirestore } from '../../lib/firestore-db';
import { requestForGlean } from '../../lib/inactive-accounts';
import initLog from '../../lib/log';
import initRedis from '../../lib/redis';
Expand All @@ -65,7 +67,6 @@ import {
hasActiveRefreshToken,
hasActiveSessionToken,
IsActiveFnBuilder,
setDateToUTC,
buildExclusionsTempTableQuery,
} from './lib';

Expand All @@ -75,7 +76,7 @@ const statsd = new StatsD({ ...config.statsd });
// {{{ constants and defaults

const defaultDaysTilFirstEmail = 0;
const defaultResultsLImit = 500000;
const defaultBatchSize = 1000;
const defaultConcurrency = 100;
const twoYearsAgo = () => {
const x = new Date();
Expand All @@ -94,16 +95,13 @@ const init = async () => {
program
.description(
'Starts the inactive account deletion process by enqueuing the first email\n' +
'notification for inactive accounts. This script allows segmenting the\n' +
'accounts to search by account creation date. It also optionally accepts a\n' +
'date at or after when an account is active in order to be excluded.\n\n' +
'For example, to start the inactive deletion process on accounts created\n' +
'between 2015-01-01 and 2015-01-31 where the account is not active after\n' +
'2024-10-31:\n' +
' enqueue-inactive-account-deletions.ts \\\n' +
' --start-date 2015-01-01 \\\n' +
' --end-date 2015-12-31 \\\n' +
' --active-by-date 2024-10-31'
'notification for the oldest N inactive accounts that have not yet been\n' +
'notified. Designed to be invoked on a schedule (e.g. a Kubernetes CronJob).\n\n' +
'Each run picks up to --batch-size candidates with createdAt < two-years-ago,\n' +
'ordered by createdAt ASC, then filters out any that have already received an\n' +
'inactiveAccountFirstWarning email. No state is persisted between runs; the\n' +
'cursor advances implicitly as old accounts are deleted ~60 days after\n' +
'notification.'
)
.option(
'--dry-run [true|false]',
Expand All @@ -129,36 +127,20 @@ const init = async () => {
parseBooleanArg,
false
)
.option(
'--active-by-date [date]',
'An account is considered active if it has any activity at or after this date. Optional. Defaults to two years ago from script execution time.',
Date.parse
)
.option(
'--start-date [date]',
'Start of date range of account creation date, inclusive. Optional. Defaults to 2012-03-12.',
Date.parse,
'2012-03-12'
)
.option(
'--end-date [date]',
'End of date range of account creation date, inclusive. Defaults to a day before active by date.',
Date.parse
)
.option(
'--days-til-first-email [float]',
'The amount of time from now until the first email is sent, in days. Defaults to 0. Max allowed (GCP limit) is 30.',
parseFloat
)
.option(
'--results-limit [number]',
'The number of results per accounts DB query. Defaults to 500000.',
'--batch-size [number]',
`The max number of candidate accounts to consider per run. Defaults to ${defaultBatchSize}.`,
(x) => parseInt(x),
defaultResultsLImit
defaultBatchSize
Comment on lines +136 to +139
)
.option(
'--output-path [path]',
'File path to write the list of UIDs from MySQL. Optional. Defaults to CWD and filename based on the end date.'
'File path to write the list of UIDs from MySQL. Optional. Defaults to CWD and filename based on the run timestamp.'
)
.option(
'--bq-dataset <string>',
Expand All @@ -185,23 +167,20 @@ const init = async () => {
throw new Error('BigQuery dataset ID is required.');
}

const startDate = setDateToUTC(program.startDate);
const endDate = program.endDate
? setDateToUTC(program.endDate)
: twoYearsAgo();
const activeByDate = program.activeByDate
? setDateToUTC(program.activeByDate)
: twoYearsAgo();
const startDateTimestamp = startDate.valueOf();
const endDateTimestamp = endDate.valueOf() + 86400000; // next day for < comparisons
const activeByDateTimestamp = activeByDate.valueOf();

if (endDateTimestamp <= startDateTimestamp) {
throw new Error(
'The end date must be on the same day or later than the start date.'
);
const batchSize = program.batchSize;
if (!Number.isInteger(batchSize) || batchSize <= 0) {
throw new Error('--batch-size must be a positive integer.');
}

const endDate = twoYearsAgo();
const activeByDate = twoYearsAgo();
// No lower bound on createdAt — ORDER BY createdAt ASC + LIMIT batchSize
// naturally starts at the oldest still-existing candidate. As accounts age
// out via deletion, the cursor advances implicitly.
const startDateTimestamp = 0;
const endDateTimestamp = endDate.valueOf();
const activeByDateTimestamp = activeByDate.valueOf();

const daysTilFirstEmail =
program.daysTilFirstEmail !== undefined
? program.daysTilFirstEmail
Expand All @@ -214,24 +193,19 @@ const init = async () => {
}

const msTilFirstEmail = daysTilFirstEmail * 86400000;
const runTag = `${Date.now()}`;
const mysqlResCsvPath =
program.outputPath ||
path.join(
process.cwd(),
`mysql-inactive-account-uids-${endDate
.toISOString()
.substring(0, 10)}.csv`
);
path.join(process.cwd(), `mysql-inactive-account-uids-${runTag}.csv`);

// /arguments }}}

console.log(`Save inactive account UIDs: ${program.saveUids}`);
console.log(`Enqueue emails: ${program.enqueueEmails}`);
console.log(`Start date: ${startDate.toISOString()}`);
console.log(`End date: ${endDate.toISOString()}`);
console.log(`Batch size: ${batchSize}`);
console.log(`End date (createdAt <): ${endDate.toISOString()}`);
console.log(`Active by date: ${activeByDate.toISOString()}`);
console.log(`Days 'til first email: ${daysTilFirstEmail}`);
console.log(`Per MySQL query results limit: ${program.resultsLimit}`);

if (program.dryRun) {
console.log(
Expand Down Expand Up @@ -295,20 +269,16 @@ const init = async () => {

Container.set(AppConfig, config);
Container.set(AuthLogger, log);
Container.set(StatsD, statsd);
if (!Container.has(AuthFirestore)) {
Container.set(AuthFirestore, setupFirestore(config));
}
const accountEventsManager = new AccountEventsManager();
Container.set(AccountEventsManager, accountEventsManager);

// /dependencies }}}

const postfixTableName = (prefix: string) =>
`${prefix}_${startDate
.toISOString()
.substring(0, 10)
.replaceAll('-', '')}_${endDate
.toISOString()
.substring(0, 10)
.replaceAll('-', '')}_${activeByDate
.toISOString()
.substring(0, 10)
.replaceAll('-', '')}`;
const postfixTableName = (prefix: string) => `${prefix}_${runTag}`;

// {{{ build exclusions temp table in BQ and start a session

Expand Down Expand Up @@ -368,45 +338,23 @@ const init = async () => {

// {{{ write MySQL results to CSV and load into BQ temp table

const accountQueryBuilder = () =>
accountWhereAndOrderByQueryBuilder(
startDateTimestamp,
endDateTimestamp,
activeByDateTimestamp
)
.select('accounts.uid')
.limit(program.resultsLimit);

let hasMaxResultsCount = true;
let totalRowsReturned = 0;
let inactiveCandidateUids: string[] = [];

while (hasMaxResultsCount) {
const accountsQuery = accountQueryBuilder();
accountsQuery.offset(totalRowsReturned);

const accounts = await emitStatsdMetrics(
async () => await accountsQuery,
'accounts.inactive.sql-query',
statsd
)();

debugLog(`MySQL results count: ${accounts.length}`);

if (!accounts.length) {
hasMaxResultsCount = false;
break;
}

inactiveCandidateUids = inactiveCandidateUids.concat(
accounts.map((x) => x.uid)
);
const accountsQuery = accountWhereAndOrderByQueryBuilder(
startDateTimestamp,
endDateTimestamp,
activeByDateTimestamp
)
.select('accounts.uid')
.limit(batchSize);
Comment on lines +341 to +347

const accounts = await emitStatsdMetrics(
async () => await accountsQuery,
'accounts.inactive.sql-query',
statsd
)();

hasMaxResultsCount = accounts.length === program.resultsLimit;
totalRowsReturned += accounts.length;
}
const inactiveCandidateUids: string[] = accounts.map((x) => x.uid);

debugLog(`MySQL total rows returned: ${totalRowsReturned}`);
debugLog(`MySQL results count: ${inactiveCandidateUids.length}`);

const inactivesMySqlResultsTableName = await saveUidsToBqTable(
inactiveCandidateUids,
Expand Down Expand Up @@ -580,6 +528,19 @@ const init = async () => {
await queue.onSizeLessThan(concurrency * 5);

queue.add(async () => {
const priorEvents = await accountEventsManager.findEmailEvents(
uid,
'emailSent',
'inactiveAccountFirstWarning',
0,
Date.now()
Comment on lines +535 to +536
);
Comment on lines +531 to +537
if (priorEvents && priorEvents.length > 0) {
statsd.increment('accounts.inactive.cron.skipped-already-notified');
debugLog(`Skipping ${uid}: first-warning email already sent.`);
return;
Comment on lines +538 to +541
}

// @TODO this function could be abstracted and moved to InactiveAccountsManager
const taskPayload: InactiveAccountEmailTaskPayloadParam = {
uid,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,11 @@ describe('enqueue inactive account deletions script', () => {
const diff = Math.abs(now.valueOf() - nowish.valueOf());
expect(diff).toBeLessThanOrEqual(1000);

const startDateString = getOutputValue(outputLines, 'Start date');
expect(startDateString?.startsWith('2012-03-12')).toBe(true);
const batchSizeString = getOutputValue(outputLines, 'Batch size');
expect(batchSizeString).toBe('1000');

const daysTilFirstEmailString = getOutputValue(outputLines, "Days 'til");
expect(daysTilFirstEmailString).toBe('0');

const dbResultsLimitString = getOutputValue(outputLines, 'Per MySQL query');
expect(dbResultsLimitString).toBe('500000');
});

it('requires an BQ dataset id', async () => {
Expand All @@ -66,22 +63,19 @@ describe('enqueue inactive account deletions script', () => {
await exec(cmd.join(' '), execOptions);
});

it('requires the end date to be the same or later than the start date', async () => {
it('rejects a non-positive --batch-size', async () => {
try {
const cmd = [
...command,
'--end-date 2020-12-22',
'--start-date 2021-12-22',
'--batch-size 0',
'--bq-dataset fxa-dev.inactives-testo',
];
await exec(cmd.join(' '), execOptions);
throw new Error(
'Expected script to fail with end date before start date'
);
throw new Error('Expected script to fail with non-positive batch size');
} catch (err: any) {
expect(err.code).toBe(1);
expect(err.stderr).toContain(
'The end date must be on the same day or later than the start date.'
'--batch-size must be a positive integer.'
);
}
});
Expand Down