|
| 1 | +# Copyright 2026 Google LLC |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +import base64 |
| 16 | +import json |
| 17 | +import logging |
| 18 | +import os |
| 19 | +import croniter |
| 20 | +from datetime import datetime, timezone |
| 21 | +from google.auth.transport.requests import Request |
| 22 | +from google.oauth2 import id_token |
| 23 | +from google.cloud import storage |
| 24 | +from google.cloud.workflows import executions_v1 |
| 25 | +import requests |
| 26 | + |
| 27 | +logging.getLogger().setLevel(logging.INFO) |
| 28 | + |
| 29 | +PROJECT_ID = os.environ.get('PROJECT_ID') |
| 30 | +LOCATION = os.environ.get('LOCATION') |
| 31 | +GCS_BUCKET_ID = os.environ.get('GCS_BUCKET_ID') |
| 32 | +INGESTION_HELPER_URL = f"https://{LOCATION}-{PROJECT_ID}.cloudfunctions.net/spanner-ingestion-helper" |
| 33 | +WORKFLOW_ID = 'spanner-ingestion-workflow' |
| 34 | + |
| 35 | +def invoke_ingestion_workflow(import_name: str): |
| 36 | + """Triggers the graph ingestion workflows. |
| 37 | +
|
| 38 | + Args: |
| 39 | + import_name: The name of the import. |
| 40 | + """ |
| 41 | + workflow_args = {"importList": [import_name.split(':')[-1]]} |
| 42 | + |
| 43 | + logging.info(f"Invoking {WORKFLOW_ID} for {import_name}") |
| 44 | + execution_client = executions_v1.ExecutionsClient() |
| 45 | + parent = f"projects/{PROJECT_ID}/locations/{LOCATION}/workflows/{WORKFLOW_ID}" |
| 46 | + execution_req = executions_v1.Execution(argument=json.dumps(workflow_args)) |
| 47 | + response = execution_client.create_execution(parent=parent, |
| 48 | + execution=execution_req) |
| 49 | + logging.info( |
| 50 | + f"Triggered workflow {WORKFLOW_ID} for {import_name}. Execution ID: {response.name}" |
| 51 | + ) |
| 52 | + |
| 53 | + |
| 54 | +def update_import_status(import_name, |
| 55 | + import_status, |
| 56 | + import_version, |
| 57 | + graph_path, |
| 58 | + job_id, |
| 59 | + cron_schedule=None): |
| 60 | + """Updates the status for the specified import job. |
| 61 | +
|
| 62 | + Args: |
| 63 | + import_name: The name of the import. |
| 64 | + import_status: The new status of the import. |
| 65 | + import_version: The version of the import. |
| 66 | + graph_path: The graph path for the import. |
| 67 | + job_id: The job ID associated with the import. |
| 68 | + cron_schedule: The cron schedule for the import (optional). |
| 69 | + """ |
| 70 | + logging.info(f"Updating {import_name} status: {import_status}") |
| 71 | + latest_version = 'gs://' + GCS_BUCKET_ID + '/' + import_name.replace( |
| 72 | + ':', '/') + '/' + import_version |
| 73 | + request = { |
| 74 | + 'actionType': 'update_import_status', |
| 75 | + 'importName': import_name, |
| 76 | + 'status': import_status, |
| 77 | + 'job_id': job_id, |
| 78 | + 'latestVersion': latest_version, |
| 79 | + 'graphPath': graph_path |
| 80 | + } |
| 81 | + if cron_schedule: |
| 82 | + try: |
| 83 | + next_refresh = croniter.croniter( |
| 84 | + cron_schedule, |
| 85 | + datetime.now(timezone.utc)).get_next(datetime).isoformat() |
| 86 | + request['nextRefresh'] = next_refresh |
| 87 | + except (croniter.CroniterError) as e: |
| 88 | + logging.error( |
| 89 | + f"Error calculating next refresh from schedule '{cron_schedule}': {e}" |
| 90 | + ) |
| 91 | + logging.info(f"Update request: {request}") |
| 92 | + auth_req = Request() |
| 93 | + token = id_token.fetch_id_token(auth_req, INGESTION_HELPER_URL) |
| 94 | + headers = {'Authorization': f'Bearer {token}'} |
| 95 | + response = requests.post(INGESTION_HELPER_URL, |
| 96 | + json=request, |
| 97 | + headers=headers) |
| 98 | + response.raise_for_status() |
| 99 | + logging.info(f"Updated status for {import_name}") |
| 100 | + |
| 101 | + |
| 102 | +def parse_message(request) -> dict: |
| 103 | + """Processes the incoming Pub/Sub message. |
| 104 | +
|
| 105 | + Args: |
| 106 | + request: The flask request object. |
| 107 | +
|
| 108 | + Returns: |
| 109 | + A dictionary containing the message data, or None if invalid. |
| 110 | + """ |
| 111 | + request_json = request.get_json(silent=True) |
| 112 | + if not request_json or 'message' not in request_json: |
| 113 | + logging.error('Invalid Pub/Sub message format') |
| 114 | + return None |
| 115 | + |
| 116 | + pubsub_message = request_json['message'] |
| 117 | + logging.info(f"Received Pub/Sub message: {pubsub_message}") |
| 118 | + try: |
| 119 | + data_bytes = base64.b64decode(pubsub_message["data"]) |
| 120 | + notification_json = data_bytes.decode("utf-8") |
| 121 | + logging.info(f"Notification content: {notification_json}") |
| 122 | + except Exception as e: |
| 123 | + logging.error(f"Error decoding message data: {e}") |
| 124 | + |
| 125 | + return pubsub_message |
| 126 | + |
| 127 | + |
| 128 | +def check_duplicate(message_id: str): |
| 129 | + """Checks for duplicate messages using a GCS file. |
| 130 | +
|
| 131 | + Args: |
| 132 | + message_id: The ID of the message to check. |
| 133 | +
|
| 134 | + Returns: |
| 135 | + True if the message is a duplicate, False otherwise. |
| 136 | + """ |
| 137 | + duplicate = False |
| 138 | + if not message_id: |
| 139 | + return duplicate |
| 140 | + logging.info(f"Checking for existing message: {message_id}") |
| 141 | + storage_client = storage.Client() |
| 142 | + bucket = storage_client.bucket(GCS_BUCKET_ID) |
| 143 | + blob = bucket.blob(f"google3/transfers/{message_id}") |
| 144 | + try: |
| 145 | + blob.upload_from_string("", if_generation_match=0) |
| 146 | + except Exception: |
| 147 | + duplicate = True |
| 148 | + return duplicate |
0 commit comments