Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions functions/src/bills/BillProcessor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,22 @@ export type BillUpdates = Map<string, DocUpdate<Bill>>

/** Base class for jobs that need to process all bills. */
export default abstract class BillProcessor {
protected court: number | string = currentGeneralCourt
protected bills!: any[]
protected billIds!: string[]
protected committees!: Committee[]
protected members!: Member[]
protected cities!: City[]

static async runForCourt(
ProcessorClass: { new (): BillProcessor },
court: number | string
): Promise<void> {
const p = new ProcessorClass()
p.court = court
await p.run()
}

static pubsub(
Processor: { new (args?: any): BillProcessor },
topic: string,
Expand Down Expand Up @@ -56,7 +66,7 @@ export default abstract class BillProcessor {
abstract process(): Promise<void>

billPath(id?: string) {
return `/generalCourts/${currentGeneralCourt}/bills${id ? `/${id}` : ""}`
return `/generalCourts/${this.court}/bills${id ? `/${id}` : ""}`
}

protected async writeBills(updates: BillUpdates) {
Expand All @@ -77,15 +87,15 @@ export default abstract class BillProcessor {
.then(snap => snap.docs.map(d => d.data()))
this.billIds = this.bills.map(b => b.id)
this.cities = await db
.collection(`/generalCourts/${currentGeneralCourt}/cities`)
.collection(`/generalCourts/${this.court}/cities`)
.get()
.then(this.load(City))
this.committees = await db
.collection(`/generalCourts/${currentGeneralCourt}/committees`)
.collection(`/generalCourts/${this.court}/committees`)
.get()
.then(this.load(Committee))
this.members = await db
.collection(`/generalCourts/${currentGeneralCourt}/members`)
.collection(`/generalCourts/${this.court}/members`)
.get()
.then(this.load(Member))
}
Expand Down
3 changes: 3 additions & 0 deletions functions/src/bills/updateBillReferences.ts
Original file line number Diff line number Diff line change
Expand Up @@ -218,5 +218,8 @@ class UpdateBillReferences extends BillProcessor {
}
}

export const runUpdateBillReferences = (court: number | string) =>
BillProcessor.runForCourt(UpdateBillReferences, court)

export const updateBillReferences =
BillProcessor.scheduled(UpdateBillReferences)
40 changes: 20 additions & 20 deletions functions/src/committees/updateCommitteeRosters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,29 @@ import { Member } from "../members/types"
import { Committee } from "./types"
import { currentGeneralCourt } from "../shared"

/** Updates the list of members in each committee. */
export const updateCommitteeRosters = runWith({ timeoutSeconds: 120 })
.pubsub.schedule("every 24 hours")
.onRun(async () => {
const members = await db
.collection(`/generalCourts/${currentGeneralCourt}/members`)
.get()
.then(c => c.docs.map(d => d.data()).filter(Member.guard))
const rosters = computeRosters(members)
export async function runUpdateCommitteeRosters(court: number | string) {
const members = await db
.collection(`/generalCourts/${court}/members`)
.get()
.then(c => c.docs.map(d => d.data()).filter(Member.guard))
const rosters = computeRosters(members)

const writer = db.bulkWriter()
rosters.forEach((roster, id) => {
const update: DocUpdate<Committee> = {
members: roster.map(m => ({ id: m.id, name: m.content.Name }))
}
writer.set(
db.doc(`/generalCourts/${currentGeneralCourt}/committees/${id}`),
update,
{ merge: true }
)
const writer = db.bulkWriter()
rosters.forEach((roster, id) => {
const update: DocUpdate<Committee> = {
members: roster.map(m => ({ id: m.id, name: m.content.Name }))
}
writer.set(db.doc(`/generalCourts/${court}/committees/${id}`), update, {
merge: true
})
await writer.close()
})
await writer.close()
}

/** Updates the list of members in each committee. */
export const updateCommitteeRosters = runWith({ timeoutSeconds: 120 })
.pubsub.schedule("every 24 hours")
.onRun(() => runUpdateCommitteeRosters(currentGeneralCourt))

function computeRosters(members: Member[]) {
const rosters = new Map<string, Member[]>()
Expand Down
19 changes: 15 additions & 4 deletions functions/src/events/scrapeEvents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ abstract class EventScraper<ListItem, Event extends BaseEvent> {
abstract listEvents(): Promise<ListItem[]>
abstract getEvent(item: ListItem): Promise<Event>

private async run() {
private async run(skipCutoff = false) {
const list = await this.listEvents().catch(logFetchError("event list"))

if (!list) return
Expand All @@ -67,7 +67,11 @@ abstract class EventScraper<ListItem, Event extends BaseEvent> {
event = await this.getEvent(item).catch(logFetchError("event", id))

if (!event) continue
if (event.startsAt.toMillis() < upcomingOrRecentCutoff.toMillis()) break
if (
!skipCutoff &&
event.startsAt.toMillis() < upcomingOrRecentCutoff.toMillis()
)
break

writer.set(db.doc(`/events/${event.id}`), event, { merge: true })

Expand All @@ -77,6 +81,10 @@ abstract class EventScraper<ListItem, Event extends BaseEvent> {
await writer.close()
}

async runForBackfill() {
return this.run(true)
}

/** Parse the event start time in the time zone of the API. */
getEventStart(content: { EventDate: string; StartTime: string }) {
const { year, month, day } = DateTime.fromISO(content.EventDate, {
Expand Down Expand Up @@ -128,10 +136,11 @@ class SpecialEventsScraper extends EventScraper<
}

class SessionScraper extends EventScraper<SessionContent, Session> {
private court = currentGeneralCourt
private court: number

constructor() {
constructor(court: number = currentGeneralCourt) {
super("every 60 minutes", 120)
this.court = court
}

async listEvents() {
Expand Down Expand Up @@ -517,4 +526,6 @@ export const scrapeSingleHearingv2 = onCall(

export const scrapeSpecialEvents = new SpecialEventsScraper().function
export const scrapeSessions = new SessionScraper().function
export const scrapeSessionsForCourt = (court: number) =>
new SessionScraper(court).runForBackfill()
export const scrapeHearings = new HearingScraper().function
36 changes: 17 additions & 19 deletions functions/src/members/createMemberSearchIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,24 @@ import { pubsub } from "firebase-functions"
import { db } from "../firebase"
import { currentGeneralCourt } from "../shared"

export async function runCreateMemberSearchIndex(court: number | string) {
const members = await db.collection(`/generalCourts/${court}/members`).get()

const index = members.docs
.map(d => (d.exists && d.data().content) ?? {})
.filter(Boolean)
// Strip out sponsored and cosponsored bills for size
.map(({ SponsoredBills, CoSponsoredBills, ...member }) => member)
.sort((m1, m2) => (m1.Name < m2.Name ? -1 : 1))

await db.doc(`/generalCourts/${court}/indexes/memberSearch`).set({
representatives: index.filter(d => d.Branch === "House"),
senators: index.filter(d => d.Branch === "Senate")
})
}

/** Create a document that aggregates all legislative members for easier
* searching on the client. */
export const createMemberSearchIndex = pubsub
.schedule("every 24 hours")
.onRun(async () => {
const members = await db
.collection(`/generalCourts/${currentGeneralCourt}/members`)
.get()

const index = members.docs
.map(d => (d.exists && d.data().content) ?? {})
.filter(Boolean)
// Strip out sponsored and cosponsored bills for size
.map(({ SponsoredBills, CoSponsoredBills, ...member }) => member)
.sort((m1, m2) => (m1.Name < m2.Name ? -1 : 1))

await db
.doc(`/generalCourts/${currentGeneralCourt}/indexes/memberSearch`)
.set({
representatives: index.filter(d => d.Branch === "House"),
senators: index.filter(d => d.Branch === "Senate")
})
})
.onRun(() => runCreateMemberSearchIndex(currentGeneralCourt))
88 changes: 58 additions & 30 deletions functions/src/scraper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ import { currentGeneralCourt } from "./shared"
type Batch = {
court: number
ids: string[]
/** When true, unexpected (non-Axios) errors are logged and the batch
* continues with the next id instead of failing the whole invocation.
* Used by the per-court backfill script so one bad id doesn't sink a
* historical-court run. Defaults to false for the scheduled prod path. */
resilient?: boolean
}

/** List all ids of the resources to scrape. Falsey values will be filtered out.
Expand Down Expand Up @@ -117,41 +122,64 @@ export function createScraper<T>({
const fetchBatch = runWith({ timeoutSeconds: fetchBatchTimeout })
.firestore.document(`/scrapers/${resourceName}/batches/{batchId}`)
.onCreate(async snap => {
const batch = snap.data() as Batch,
court = batch.court,
writer = db.bulkWriter()
try {
const batch = snap.data() as Batch,
court = batch.court,
writer = db.bulkWriter()

for (const id of batch.ids) {
try {
const path = `/generalCourts/${court}/${resourceName}/${id}`
const current = await db.doc(path).get()
const resource = await fetchResource(court, id, current.data())

writer.set(
db.doc(path),
{
...resource,
fetchedAt: Timestamp.now(),
lastFetch: FieldValue.delete(),
id,
court
},
{ merge: true }
)
} catch (e) {
if (axios.isAxiosError(e)) {
if (!missingResource(e)) {
logger.warn(
`Could not fetch resource ${resourceName}/${id}: ${e.message}`
)
}
} else if (batch.resilient) {
logger.error(`Unexpected error fetching ${resourceName}/${id}`, e)
} else {
throw e
}
}
}

for (const id of batch.ids) {
try {
const path = `/generalCourts/${court}/${resourceName}/${id}`
const current = await db.doc(path).get()
const resource = await fetchResource(court, id, current.data())

writer.set(
db.doc(path),
{
...resource,
fetchedAt: Timestamp.now(),
lastFetch: FieldValue.delete(),
id,
court
},
{ merge: true }
)
await writer.close()
} catch (e) {
if (axios.isAxiosError(e)) {
if (!missingResource(e)) {
logger.warn(
`Could not fetch resource ${resourceName}/${id}: ${e.message}`
)
}
} else {
throw e
}
logger.error(`bulkWriter.close failed for ${resourceName} batch`, e)
}
} finally {
logger.info(
`Attempting to delete ${resourceName} batch doc ${snap.ref.path}`
)
await snap.ref
.delete()
.then(() =>
logger.info(`Deleted ${resourceName} batch doc ${snap.ref.path}`)
)
.catch(e =>
logger.warn(
`Failed to delete ${resourceName} batch doc ${snap.ref.path}`,
e
)
)
}

await writer.close()
})

return { startBatches, fetchBatch }
Expand Down
18 changes: 18 additions & 0 deletions functions/src/shared/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,24 @@ export const generalCourts: Record<number, GeneralCourt | undefined> = {
Number: 192,
FirstYear: 2021,
SecondYear: 2022
},
191: {
Name: "191st (2019 - 2020)",
Number: 191,
FirstYear: 2019,
SecondYear: 2020
},
190: {
Name: "190th (2017 - 2018)",
Number: 190,
FirstYear: 2017,
SecondYear: 2018
},
189: {
Name: "189th (2015 - 2016)",
Number: 189,
FirstYear: 2015,
SecondYear: 2016
}
}

Expand Down
4 changes: 4 additions & 0 deletions scripts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,10 @@ yarn firebase-admin run-script backfillBallotQuestionTestimonyCounts --env prod

<!-- TODO: document -->

#### `runScrapersByCourt`

<!-- TODO: document -->

#### `seedActiveTopicSubscriptions`

<!-- TODO: document -->
Expand Down
Loading
Loading