From b07028c8ceae333b296c4569d512bda1d6b7e43c Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Fri, 29 May 2026 15:00:33 +0000 Subject: [PATCH] [INFRA] Build full docs on master and deploy via workflow_run in pages.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In build_and_test.yml: - For master (inputs.branch == ''), always build the full documentation site without SKIP_PYTHONDOC / SKIP_RDOC change detection, so the deployed site is always complete. - Upload docs/_site as artifact docs-site-- (retention: 1 day). In pages.yml: - Replace the self-contained build with a workflow_run trigger that fires after the "Build" workflow completes on master. - The deploy job prints the triggering commit title, downloads the artifact produced by CI, and publishes it to GitHub Pages — no build tooling needed. Generated-by: Claude Code (https://claude.ai/code) --- .github/workflows/build_and_test.yml | 24 ++-------- .github/workflows/pages.yml | 70 +++++++--------------------- 2 files changed, 20 insertions(+), 74 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 66c3f89ebbab..fa62c3a854a5 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -1371,34 +1371,16 @@ jobs: run: | # We need this link to make sure `python3` points to `python3.12` which contains the prerequisite packages. ln -s "$(which python3.12)" "/usr/local/bin/python3" - # Build docs first with SKIP_API to ensure they are buildable without requiring any - # language docs to be built beforehand. - cd docs; SKIP_ERRORDOC=1 SKIP_API=1 bundle exec jekyll build; cd .. - if [ -f "./dev/is-changed.py" ]; then - # Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs - pyspark_modules=`cd dev && python3.12 -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"` - if [ `./dev/is-changed.py -m $pyspark_modules` = false ]; then export SKIP_PYTHONDOC=1; fi - if [ `./dev/is-changed.py -m sparkr` = false ]; then export SKIP_RDOC=1; fi - fi export PYSPARK_DRIVER_PYTHON=python3.12 export PYSPARK_PYTHON=python3.12 - # Print the values of environment variables `SKIP_ERRORDOC`, `SKIP_SCALADOC`, `SKIP_PYTHONDOC`, `SKIP_RDOC` and `SKIP_SQLDOC` - echo "SKIP_ERRORDOC: $SKIP_ERRORDOC" - echo "SKIP_SCALADOC: $SKIP_SCALADOC" - echo "SKIP_PYTHONDOC: $SKIP_PYTHONDOC" - echo "SKIP_RDOC: $SKIP_RDOC" - echo "SKIP_SQLDOC: $SKIP_SQLDOC" + # Always build the full documentation site for GitHub Pages deployment. cd docs bundle exec jekyll build - - name: Tar documentation - if: github.repository != 'apache/spark' - run: tar cjf site.tar.bz2 docs/_site - name: Upload documentation - if: github.repository != 'apache/spark' uses: actions/upload-artifact@v7 with: - name: site - path: site.tar.bz2 + name: docs-site-${{ inputs.branch }}-${{ needs.precondition.outputs.head_sha }} + path: docs/_site retention-days: 1 # Any TPC-DS related updates on this job need to be applied to tpcds-1g-gen job of benchmark.yml as well diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index fdf15337cf67..d2822b9fc2a0 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -20,79 +20,43 @@ name: GitHub Pages deployment on: - push: - branches: - - master + workflow_run: + workflows: ["Build"] + branches: [master] + types: [completed] concurrency: group: 'docs preview' cancel-in-progress: false jobs: - docs: - name: Build and deploy documentation + deploy: + name: Deploy Documentation to GitHub Pages + if: github.event.workflow_run.conclusion == 'success' runs-on: ubuntu-latest permissions: id-token: write pages: write environment: name: github-pages # https://github.com/actions/deploy-pages/issues/271 - env: - SPARK_TESTING: 1 # Reduce some noise in the logs - RELEASE_VERSION: 'In-Progress' - if: github.repository == 'apache/spark' steps: - - name: Checkout Spark repository - uses: actions/checkout@v6 - with: - repository: apache/spark - ref: 'master' - - name: Install Java 17 - uses: actions/setup-java@v5 - with: - distribution: zulu - java-version: 17 - - name: Install Python 3.11 - uses: actions/setup-python@v6 - with: - python-version: '3.11' - architecture: x64 - cache: 'pip' - - name: Install Python dependencies + - name: Print triggering commit run: | - pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \ - ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.23.2' pyarrow 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \ - 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'ruff==0.14.8' \ - 'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.5' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ - 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' - - name: Install Ruby for documentation generation - uses: ruby/setup-ruby@4dc28cf14d77b0afa6832d9765ac422dbf0dfedd # v1 + echo "Title: ${{ github.event.workflow_run.display_title }}" + echo "SHA: ${{ github.event.workflow_run.head_sha }}" + - name: Download documentation site + uses: actions/download-artifact@v8 with: - ruby-version: '3.3' - bundler-cache: true - - name: Install Pandoc - run: | - sudo apt-get update -y - sudo apt-get install pandoc - - name: Install dependencies for documentation generation - run: | - cd docs - gem install bundler -v 2.4.22 -n /usr/local/bin - bundle install --retry=100 - - name: Run documentation build - run: | - sed -i".tmp1" 's/SPARK_VERSION:.*$/SPARK_VERSION: '"$RELEASE_VERSION"'/g' docs/_config.yml - sed -i".tmp2" 's/SPARK_VERSION_SHORT:.*$/SPARK_VERSION_SHORT: '"$RELEASE_VERSION"'/g' docs/_config.yml - sed -i".tmp3" "s/'facetFilters':.*$/'facetFilters': [\"version:$RELEASE_VERSION\"]/g" docs/_config.yml - sed -i".tmp4" 's/__version__: str = .*$/__version__: str = "'"$RELEASE_VERSION"'"/' python/pyspark/version.py - cd docs - SKIP_RDOC=1 bundle exec jekyll build + name: docs-site-${{ github.event.workflow_run.head_branch }}-${{ github.event.workflow_run.head_sha }} + path: docs/_site + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Setup Pages uses: actions/configure-pages@v6 - name: Upload artifact uses: actions/upload-pages-artifact@v5 with: - path: 'docs/_site' + path: docs/_site - name: Deploy to GitHub Pages id: deployment uses: actions/deploy-pages@v5