Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 3 additions & 21 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1371,34 +1371,16 @@ jobs:
run: |
# We need this link to make sure `python3` points to `python3.12` which contains the prerequisite packages.
ln -s "$(which python3.12)" "/usr/local/bin/python3"
# Build docs first with SKIP_API to ensure they are buildable without requiring any
# language docs to be built beforehand.
cd docs; SKIP_ERRORDOC=1 SKIP_API=1 bundle exec jekyll build; cd ..
if [ -f "./dev/is-changed.py" ]; then
# Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs
pyspark_modules=`cd dev && python3.12 -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"`
if [ `./dev/is-changed.py -m $pyspark_modules` = false ]; then export SKIP_PYTHONDOC=1; fi
if [ `./dev/is-changed.py -m sparkr` = false ]; then export SKIP_RDOC=1; fi
fi
export PYSPARK_DRIVER_PYTHON=python3.12
export PYSPARK_PYTHON=python3.12
# Print the values of environment variables `SKIP_ERRORDOC`, `SKIP_SCALADOC`, `SKIP_PYTHONDOC`, `SKIP_RDOC` and `SKIP_SQLDOC`
echo "SKIP_ERRORDOC: $SKIP_ERRORDOC"
echo "SKIP_SCALADOC: $SKIP_SCALADOC"
echo "SKIP_PYTHONDOC: $SKIP_PYTHONDOC"
echo "SKIP_RDOC: $SKIP_RDOC"
echo "SKIP_SQLDOC: $SKIP_SQLDOC"
# Always build the full documentation site for GitHub Pages deployment.
cd docs
bundle exec jekyll build
- name: Tar documentation
if: github.repository != 'apache/spark'
run: tar cjf site.tar.bz2 docs/_site
- name: Upload documentation
if: github.repository != 'apache/spark'
uses: actions/upload-artifact@v7
with:
name: site
path: site.tar.bz2
name: docs-site-${{ inputs.branch }}-${{ needs.precondition.outputs.head_sha }}
path: docs/_site
retention-days: 1

# Any TPC-DS related updates on this job need to be applied to tpcds-1g-gen job of benchmark.yml as well
Expand Down
70 changes: 17 additions & 53 deletions .github/workflows/pages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,79 +20,43 @@
name: GitHub Pages deployment

on:
push:
branches:
- master
workflow_run:
workflows: ["Build"]
branches: [master]
types: [completed]

concurrency:
group: 'docs preview'
cancel-in-progress: false

jobs:
docs:
name: Build and deploy documentation
deploy:
name: Deploy Documentation to GitHub Pages
if: github.event.workflow_run.conclusion == 'success'
runs-on: ubuntu-latest
permissions:
id-token: write
pages: write
environment:
name: github-pages # https://github.com/actions/deploy-pages/issues/271
env:
SPARK_TESTING: 1 # Reduce some noise in the logs
RELEASE_VERSION: 'In-Progress'
if: github.repository == 'apache/spark'
steps:
- name: Checkout Spark repository
uses: actions/checkout@v6
with:
repository: apache/spark
ref: 'master'
- name: Install Java 17
uses: actions/setup-java@v5
with:
distribution: zulu
java-version: 17
- name: Install Python 3.11
uses: actions/setup-python@v6
with:
python-version: '3.11'
architecture: x64
cache: 'pip'
- name: Install Python dependencies
- name: Print triggering commit
run: |
pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.23.2' pyarrow 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'ruff==0.14.8' \
'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.5' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
- name: Install Ruby for documentation generation
uses: ruby/setup-ruby@4dc28cf14d77b0afa6832d9765ac422dbf0dfedd # v1
echo "Title: ${{ github.event.workflow_run.display_title }}"
echo "SHA: ${{ github.event.workflow_run.head_sha }}"
- name: Download documentation site
uses: actions/download-artifact@v8
with:
ruby-version: '3.3'
bundler-cache: true
- name: Install Pandoc
run: |
sudo apt-get update -y
sudo apt-get install pandoc
- name: Install dependencies for documentation generation
run: |
cd docs
gem install bundler -v 2.4.22 -n /usr/local/bin
bundle install --retry=100
- name: Run documentation build
run: |
sed -i".tmp1" 's/SPARK_VERSION:.*$/SPARK_VERSION: '"$RELEASE_VERSION"'/g' docs/_config.yml
sed -i".tmp2" 's/SPARK_VERSION_SHORT:.*$/SPARK_VERSION_SHORT: '"$RELEASE_VERSION"'/g' docs/_config.yml
sed -i".tmp3" "s/'facetFilters':.*$/'facetFilters': [\"version:$RELEASE_VERSION\"]/g" docs/_config.yml
sed -i".tmp4" 's/__version__: str = .*$/__version__: str = "'"$RELEASE_VERSION"'"/' python/pyspark/version.py
cd docs
SKIP_RDOC=1 bundle exec jekyll build
name: docs-site-${{ github.event.workflow_run.head_branch }}-${{ github.event.workflow_run.head_sha }}
path: docs/_site
run-id: ${{ github.event.workflow_run.id }}
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Setup Pages
uses: actions/configure-pages@v6
- name: Upload artifact
uses: actions/upload-pages-artifact@v5
with:
path: 'docs/_site'
path: docs/_site
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v5