diff --git a/.github/workflows/CERT_ecse_site_list.yml b/.github/workflows/CERT_ecse_site_list.yml new file mode 100644 index 000000000..83c6d9053 --- /dev/null +++ b/.github/workflows/CERT_ecse_site_list.yml @@ -0,0 +1,47 @@ +name: CERT-cert-site-list +on: + push: + branches: + - 'cert' + paths: + - 'pipe/ecse/site-list.json' + workflow_dispatch: {} # Allows trigger of workflow from web interface + +permissions: + contents: read + +# Workflow-level concurrency +concurrency: + group: ${{ + contains(github.workflow, 'update-dag') || + contains(github.workflow, 'site-list') && 'operations-queue' + || github.workflow + }} + cancel-in-progress: false # Queue them up, don't cancel + +jobs: + put_files: + runs-on: arc-neon-gke + #runs-on: ubuntu-latest + env: + PACHD_ADDRESS: grpcs://pachyderm-cert.transitions-nonprod.gcp.neoninternal.org:443 + PACH_TOKEN: ${{ secrets.RepoOwnerPachydermCert }} + REPO: ecse_site_list # Pachyderm repo + BRANCH: master + IN_PATHS: 'pipe/ecse/site-list.json' # Comma-separated list (no spaces) to one or more paths or directories. Length must match OUT_PATHS. If directory, all files in directory will be placed in pachyderm at corresponding entry of OUT_PATHS. + OUT_PATHS: 'site-list.json' # Comma-separated list (no spaces) of corresponding path(s) to place the files(s) in Pachyderm. Must be same length as IN_PATHS. If corresponding entry in IN_PATHS is a file, specify to the file. If corresponding entry in IN_PATHS is a directory, specify to the directory. + steps: + - uses: actions/checkout@v4 + - run: ls -la + + - name: Put file + uses: ./.github/actions/put-files + with: + pachd_address: ${{ env.PACHD_ADDRESS }} + pach_token: ${{ env.PACH_TOKEN }} + repo_name: ${{ env.REPO }} + branch_name: ${{ env.BRANCH }} + in_paths: ${{ env.IN_PATHS }} + out_paths: ${{ env.OUT_PATHS }} + + diff --git a/.github/workflows/CERT_ecse_update_dag.yml b/.github/workflows/CERT_ecse_update_dag.yml new file mode 100644 index 000000000..cb6c4627e --- /dev/null +++ b/.github/workflows/CERT_ecse_update_dag.yml @@ -0,0 +1,73 @@ +name: CERT-ecse-update-dag +on: + push: + branches: + - 'none' + paths: + - 'pipe/ecse/*.yaml' + - 'pipe/ecse/pipe_list_ecse.txt' + workflow_dispatch: {} # Allows trigger of workflow from web interface + +permissions: + contents: read + +# Workflow-level concurrency +concurrency: + group: ${{ + contains(github.workflow, 'update-dag') || + contains(github.workflow, 'site-list') && 'operations-queue' + || github.workflow + }} + cancel-in-progress: false # Queue them up, don't cancel + +jobs: + # ------------------------------------------------------------- + # Using GitHub's API is not supported for push events + # ------------------------------------------------------------- + # + # ---------------------------------------------------------------------------------------------- + # Using local .git history + # ---------------------------------------------------------------------------------------------- + # Event `push`: Compare the preceding remote commit -> to the current commit of the main branch + # ---------------------------------------------------------------------------------------------- + + changed_files: + runs-on: ubuntu-latest # windows-latest || macos-latest + outputs: + # Use this changed_file_list if you plan to use get-changed-files-action + changed_file_list: ${{ steps.changed-files-action.outputs.changed_file_list }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # OR "2" -> To retrieve the preceding commit. + + # Using get-changed-files-action + - name: Get changed files action + id: changed-files-action + uses: ./.github/actions/get-changed-files + + update_pipelines: + needs: changed_files + runs-on: arc-neon-gke + #runs-on: ubuntu-latest + env: + PACHD_ADDRESS: grpcs://pachyderm-cert.transitions-nonprod.gcp.neoninternal.org:443 + PACH_TOKEN: ${{ secrets.RepoOwnerPachydermCert }} + PATHS: 'pipe/ecse=pipe_list_ecse.txt' # Format: '='. Separate multiple with comma (e.g. 'pipe/pqs1=pipe_list_pqs1.txt,pipe/parWaterSurface=pipe_list_parWaterSurface.txt'). Order matters. + TRANSACTION: True + UPDATE_SCOPE: changed # 'all' or 'changed'. If not specified, all will be updated. 'changed' will update/create any changed/non-existent pipelines. + CHANGED_FILES: ${{needs.changed_files.outputs.changed_file_list}} + steps: + - uses: actions/checkout@v4 + - run: ls -la + + - name: Update pipelines + uses: ./.github/actions/update-pipelines + with: + pachd_address: ${{ env.PACHD_ADDRESS }} + pach_token: ${{ env.PACH_TOKEN }} + paths: ${{ env.PATHS }} + transaction: ${{ env.TRANSACTION }} + update_scope: ${{ env.UPDATE_SCOPE }} + changed_files: ${{ env.CHANGED_FILES }} + diff --git a/.github/workflows/DEV_ecse_site_list.yml b/.github/workflows/DEV_ecse_site_list.yml new file mode 100644 index 000000000..d5d8612da --- /dev/null +++ b/.github/workflows/DEV_ecse_site_list.yml @@ -0,0 +1,47 @@ +name: DEV-ecse-site-list +on: + push: + branches: + - 'master' + paths: + - 'pipe/ecse/site-list.json' + workflow_dispatch: {} # Allows trigger of workflow from web interface + +permissions: + contents: read + +# Workflow-level concurrency +concurrency: + group: ${{ + contains(github.workflow, 'update-dag') || + contains(github.workflow, 'site-list') && 'operations-queue' + || github.workflow + }} + cancel-in-progress: false # Queue them up, don't cancel + +jobs: + put_files: + runs-on: arc-neon-gke + #runs-on: ubuntu-latest + env: + PACHD_ADDRESS: grpcs://pachyderm-dev.transitions-nonprod.gcp.neoninternal.org:443 + PACH_TOKEN: ${{ secrets.RepoOwnerPachydermDev }} + REPO: ecse_site_list # Pachyderm repo + BRANCH: master + IN_PATHS: 'pipe/ecse/site-list.json' # Comma-separated list (no spaces) to one or more paths or directories. Length must match OUT_PATHS. If directory, all files in directory will be placed in pachyderm at corresponding entry of OUT_PATHS. + OUT_PATHS: 'site-list.json' # Comma-separated list (no spaces) of corresponding path(s) to place the files(s) in Pachyderm. Must be same length as IN_PATHS. If corresponding entry in IN_PATHS is a file, specify to the file. If corresponding entry in IN_PATHS is a directory, specify to the directory. + steps: + - uses: actions/checkout@v4 + - run: ls -la + + - name: Put file + uses: ./.github/actions/put-files + with: + pachd_address: ${{ env.PACHD_ADDRESS }} + pach_token: ${{ env.PACH_TOKEN }} + repo_name: ${{ env.REPO }} + branch_name: ${{ env.BRANCH }} + in_paths: ${{ env.IN_PATHS }} + out_paths: ${{ env.OUT_PATHS }} + + diff --git a/.github/workflows/DEV_ecse_update_dag.yml b/.github/workflows/DEV_ecse_update_dag.yml new file mode 100644 index 000000000..f66313dcb --- /dev/null +++ b/.github/workflows/DEV_ecse_update_dag.yml @@ -0,0 +1,76 @@ +name: DEV-ecse-update-dag +on: + push: + branches: + - 'none' + paths: + - 'pipe/ecse/*.yaml' + - 'pipe/ecse/pipe_list_ecse.txt' + workflow_dispatch: {} # Allows trigger of workflow from web interface + +permissions: + contents: read + +permissions: + contents: read + +# Workflow-level concurrency +concurrency: + group: ${{ + contains(github.workflow, 'update-dag') || + contains(github.workflow, 'site-list') && 'operations-queue' + || github.workflow + }} + cancel-in-progress: false # Queue them up, don't cancel + +jobs: + # ------------------------------------------------------------- + # Using GitHub's API is not supported for push events + # ------------------------------------------------------------- + # + # ---------------------------------------------------------------------------------------------- + # Using local .git history + # ---------------------------------------------------------------------------------------------- + # Event `push`: Compare the preceding remote commit -> to the current commit of the main branch + # ---------------------------------------------------------------------------------------------- + + changed_files: + runs-on: ubuntu-latest # windows-latest || macos-latest + outputs: + # Use this changed_file_list if you plan to use get-changed-files-action + changed_file_list: ${{ steps.changed-files-action.outputs.changed_file_list }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # OR "2" -> To retrieve the preceding commit. + + # Using get-changed-files-action + - name: Get changed files action + id: changed-files-action + uses: ./.github/actions/get-changed-files + + update_pipelines: + needs: changed_files + runs-on: arc-neon-gke + #runs-on: ubuntu-latest + env: + PACHD_ADDRESS: grpcs://pachyderm-dev.transitions-nonprod.gcp.neoninternal.org:443 + PACH_TOKEN: ${{ secrets.RepoOwnerPachydermDev }} + PATHS: 'pipe/ecse=pipe_list_ecse.txt' # Format: '='. Separate multiple with comma (e.g. 'pipe/pqs1=pipe_list_pqs1.txt,pipe/parWaterSurface=pipe_list_parWaterSurface.txt'). Order matters. + TRANSACTION: True + UPDATE_SCOPE: changed # 'all' or 'changed'. If not specified, all will be updated. 'changed' will update/create any changed/non-existent pipelines. + CHANGED_FILES: ${{needs.changed_files.outputs.changed_file_list}} + steps: + - uses: actions/checkout@v4 + - run: ls -la + + - name: Update pipelines + uses: ./.github/actions/update-pipelines + with: + pachd_address: ${{ env.PACHD_ADDRESS }} + pach_token: ${{ env.PACH_TOKEN }} + paths: ${{ env.PATHS }} + transaction: ${{ env.TRANSACTION }} + update_scope: ${{ env.UPDATE_SCOPE }} + changed_files: ${{ env.CHANGED_FILES }} + diff --git a/.github/workflows/PROD_ecse_site_list.yml b/.github/workflows/PROD_ecse_site_list.yml new file mode 100644 index 000000000..e60673602 --- /dev/null +++ b/.github/workflows/PROD_ecse_site_list.yml @@ -0,0 +1,47 @@ +name: PROD-ecse-site-list +on: + push: + branches: + - 'prod' + paths: + - 'pipe/ecse/site-list.json' + workflow_dispatch: {} # Allows trigger of workflow from web interface + +permissions: + contents: read + +# Workflow-level concurrency +concurrency: + group: ${{ + contains(github.workflow, 'update-dag') || + contains(github.workflow, 'site-list') && 'operations-queue' + || github.workflow + }} + cancel-in-progress: false # Queue them up, don't cancel + +jobs: + put_files: + runs-on: arc-neon-gke + #runs-on: ubuntu-latest + env: + PACHD_ADDRESS: grpcs://pachyderm-prod.transitions.gcp.neoninternal.org:443 + PACH_TOKEN: ${{ secrets.RepoOwnerPachydermProd }} + REPO: ecse_site_list # Pachyderm repo + BRANCH: master + IN_PATHS: 'pipe/ecse/site-list.json' # Comma-separated list (no spaces) to one or more paths or directories. Length must match OUT_PATHS. If directory, all files in directory will be placed in pachyderm at corresponding entry of OUT_PATHS. + OUT_PATHS: 'site-list.json' # Comma-separated list (no spaces) of corresponding path(s) to place the files(s) in Pachyderm. Must be same length as IN_PATHS. If corresponding entry in IN_PATHS is a file, specify to the file. If corresponding entry in IN_PATHS is a directory, specify to the directory. + steps: + - uses: actions/checkout@v4 + - run: ls -la + + - name: Put file + uses: ./.github/actions/put-files + with: + pachd_address: ${{ env.PACHD_ADDRESS }} + pach_token: ${{ env.PACH_TOKEN }} + repo_name: ${{ env.REPO }} + branch_name: ${{ env.BRANCH }} + in_paths: ${{ env.IN_PATHS }} + out_paths: ${{ env.OUT_PATHS }} + + diff --git a/.github/workflows/PROD_ecse_update_dag.yml b/.github/workflows/PROD_ecse_update_dag.yml new file mode 100644 index 000000000..ceea9126c --- /dev/null +++ b/.github/workflows/PROD_ecse_update_dag.yml @@ -0,0 +1,73 @@ +name: PROD-ecse-update-dag +on: + push: + branches: + - 'prod' + paths: + - 'pipe/ecse/*.yaml' + - 'pipe/ecse/pipe_list_ecse.txt' + workflow_dispatch: {} # Allows trigger of workflow from web interface + +permissions: + contents: read + +# Workflow-level concurrency +concurrency: + group: ${{ + contains(github.workflow, 'update-dag') || + contains(github.workflow, 'site-list') && 'operations-queue' + || github.workflow + }} + cancel-in-progress: false # Queue them up, don't cancel + +jobs: + # ------------------------------------------------------------- + # Using GitHub's API is not supported for push events + # ------------------------------------------------------------- + # + # ---------------------------------------------------------------------------------------------- + # Using local .git history + # ---------------------------------------------------------------------------------------------- + # Event `push`: Compare the preceding remote commit -> to the current commit of the main branch + # ---------------------------------------------------------------------------------------------- + + changed_files: + runs-on: ubuntu-latest # windows-latest || macos-latest + outputs: + # Use this changed_file_list if you plan to use get-changed-files-action + changed_file_list: ${{ steps.changed-files-action.outputs.changed_file_list }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # OR "2" -> To retrieve the preceding commit. + + # Using get-changed-files-action + - name: Get changed files action + id: changed-files-action + uses: ./.github/actions/get-changed-files + + update_pipelines: + needs: changed_files + runs-on: arc-neon-gke + #runs-on: ubuntu-latest + env: + PACHD_ADDRESS: grpcs://pachyderm-prod.transitions.gcp.neoninternal.org:443 + PACH_TOKEN: ${{ secrets.RepoOwnerPachydermProd }} + PATHS: 'pipe/ecse=pipe_list_ecse.txt' # Format: '='. Separate multiple with comma (e.g. 'pipe/pqs1=pipe_list_pqs1.txt,pipe/parWaterSurface=pipe_list_parWaterSurface.txt'). Order matters. + TRANSACTION: True + UPDATE_SCOPE: changed # 'all' or 'changed'. If not specified, all will be updated. 'changed' will update/create any changed/non-existent pipelines. + CHANGED_FILES: ${{needs.changed_files.outputs.changed_file_list}} + steps: + - uses: actions/checkout@v4 + - run: ls -la + + - name: Update pipelines + uses: ./.github/actions/update-pipelines + with: + pachd_address: ${{ env.PACHD_ADDRESS }} + pach_token: ${{ env.PACH_TOKEN }} + paths: ${{ env.PATHS }} + transaction: ${{ env.TRANSACTION }} + update_scope: ${{ env.UPDATE_SCOPE }} + changed_files: ${{ env.CHANGED_FILES }} + diff --git a/pipe/ecse/ecse_cron_daily_and_date_control.yaml b/pipe/ecse/ecse_cron_daily_and_date_control.yaml index 4dc572d45..20ab8caac 100644 --- a/pipe/ecse/ecse_cron_daily_and_date_control.yaml +++ b/pipe/ecse/ecse_cron_daily_and_date_control.yaml @@ -11,7 +11,7 @@ transform: # kafka_start_date in the site-list file is the first full day from which data began streaming via Kafka # END_DATE can be set or unset (remove line entirely to unset). If unset, end date will be yesterday. OUT_PATH: /pfs/out - START_DATE: "2025-07-01" # Inclusive + START_DATE: "2026-05-08" # Inclusive #END_DATE: "2025-06-30" # Inclusive SOURCE_TYPE: "ecse" stdin: diff --git a/pipe/ecse/ecse_event_data_source_kafka.yaml b/pipe/ecse/ecse_event_data_source_kafka.yaml index 8e36ed87f..a14fbd8ce 100644 --- a/pipe/ecse/ecse_event_data_source_kafka.yaml +++ b/pipe/ecse/ecse_event_data_source_kafka.yaml @@ -153,9 +153,6 @@ transform: rm -rf "${outdir}" done - # Update the airflow triggering table - ./update-trigger-table.py -s $site -S $SOURCE_TYPE -D "$OUT_PATH/$SOURCE_TYPE/$site" - # set +x # Uncomment for troubleshooting rm -rf $linkdir fi diff --git a/pipe/ecse/pipe_list_ecse.txt b/pipe/ecse/pipe_list_ecse.txt new file mode 100644 index 000000000..61ce1c81e --- /dev/null +++ b/pipe/ecse/pipe_list_ecse.txt @@ -0,0 +1,2 @@ +ecse_cron_daily_and_date_control.yaml +ecse_event_data_source_kafka.yaml