tutorials/.jenkins/build.sh at 68c67e093fc99944b16dbc551f0cf6d3280482a1 · pytorch/tutorials · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/bin/bash

set -ex

export BUCKET_NAME=pytorch-tutorial-build-pull-request

# Set build prefix based on whether this is a nightly build or not
# This prevents conflicts when both builds run simultaneously
if [ "${USE_NIGHTLY:-0}" -eq 1 ]; then
  export BUILD_PREFIX="nightly"
else
  export BUILD_PREFIX="stable"
fi

# set locale for click dependency in spacy
export LC_ALL=C.UTF-8
export LANG=C.UTF-8

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"

# Update root certificates by installing new libgnutls30

# Install pandoc (does not install from pypi)
sudo apt-get update
sudo apt-get install -y pandoc

# NS: Path to python runtime should already be part of docker container
# export PATH=/opt/conda/bin:$PATH

# Install PyTorch Nightly for test.
if [ "${USE_NIGHTLY:-0}" -eq 1 ]; then
  sudo pip uninstall -y torch torchvision torchaudio
  pip3 install torch==2.10.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu130
  pip show torch
fi

# Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
# Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
# sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata
#pip3 install torch==2.10.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu130
# Install two language tokenizers for Translation with TorchText tutorial
pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0-py3-none-any.whl

awsv2 -i
awsv2 configure set default.s3.multipart_threshold 5120MB

# Decide whether to parallelize tutorial builds, based on $JOB_BASE_NAME
if [[ "${JOB_TYPE}" == "worker" ]]; then
  # Step 1: Determine which tutorials this worker should execute.
  # FILES_TO_RUN is read by conf.py to set sphinx_gallery's filename_pattern,
  # so only the assigned tutorials have their code executed.
  # IMPORTANT NOTE: We assume that each tutorial has a UNIQUE filename.
  FILES_TO_RUN=$(python .jenkins/get_files_to_run.py)
  echo "FILES_TO_RUN: " ${FILES_TO_RUN}
  # Files to run must be accessible to subprocessed (at least to `download_data.py`)
  export FILES_TO_RUN

  # Step 3: Run `make docs` to generate HTML files and static files for these tutorialis
  make docs

  # Step 3.1: Run the post-processing script:
  python .jenkins/post_process_notebooks.py

  # Step 4: If any of the generated files are not related the tutorial files we want to run,
  # then we remove them
  set +x
  for filename in $(find docs/beginner docs/intermediate docs/advanced docs/recipes docs/prototype -name '*.html'); do
    file_basename=$(basename $filename .html)
    if [[ ! " ${FILES_TO_RUN} " =~ " ${file_basename} " ]]; then
      echo "removing $filename"
      rm $filename
    fi
  done
  for filename in $(find docs/beginner docs/intermediate docs/advanced docs/recipes docs/prototype -name '*.rst'); do
    file_basename=$(basename $filename .rst)
    if [[ ! " ${FILES_TO_RUN} " =~ " ${file_basename} " ]]; then
      echo "removing $filename"
      rm $filename
    fi
  done
  for filename in $(find docs/_downloads -name '*.py'); do
    file_basename=$(basename $filename .py)
    if [[ ! " ${FILES_TO_RUN} " =~ " ${file_basename} " ]]; then
      echo "removing $filename"
      rm $filename
    fi
  done
  for filename in $(find docs/_downloads -name '*.ipynb'); do
    file_basename=$(basename $filename .ipynb)
    if [[ ! " ${FILES_TO_RUN} " =~ " ${file_basename} " ]]; then
      echo "removing $filename"
      rm $filename
    fi
  done
  for filename in $(find docs/_sources/beginner docs/_sources/intermediate docs/_sources/advanced docs/_sources/recipes -name '*.rst.txt'); do
    file_basename=$(basename $filename .rst.txt)
    if [[ ! " ${FILES_TO_RUN} " =~ " ${file_basename} " ]]; then
      echo "removing $filename"
      rm $filename
    fi
  done
  for filename in $(find docs/.doctrees/beginner docs/.doctrees/intermediate docs/.doctrees/advanced docs/.doctrees/recipes docs/.doctrees/prototype -name '*.doctree'); do
    file_basename=$(basename $filename .doctree)
    if [[ ! " ${FILES_TO_RUN} " =~ " ${file_basename} " ]]; then
      echo "removing $filename"
      rm $filename
    fi
  done
  set -x

  # Step 5: Remove INVISIBLE_CODE_BLOCK from .html/.rst.txt/.ipynb/.py files
  bash $DIR/remove_invisible_code_block_batch.sh docs
  python .jenkins/validate_tutorials_built.py

  # Step 6: Copy generated files to S3, tag with commit ID
  if [ "${UPLOAD:-0}" -eq 1 ]; then
    7z a worker_${WORKER_ID}.7z docs
    awsv2 s3 cp worker_${WORKER_ID}.7z s3://${BUCKET_NAME}/${BUILD_PREFIX}/${COMMIT_ID}/worker_${WORKER_ID}.7z
  fi
elif [[ "${JOB_TYPE}" == "manager" ]]; then
  # Step 1: Generate no-plot HTML pages for all tutorials
  make html-noplot
  cp -r _build/html docs

  # Step 2: Wait for all workers to finish
  # Don't actually need to do this because gha will wait

  # Step 3: Download generated with-plot HTML files and static files from S3, merge into one folder
  mkdir -p docs_with_plot/docs
  for ((worker_id=1;worker_id<NUM_WORKERS+1;worker_id++)); do
    awsv2 s3 cp s3://${BUCKET_NAME}/${BUILD_PREFIX}/${COMMIT_ID}/worker_$worker_id.7z worker_$worker_id.7z
    7z x worker_$worker_id.7z -oworker_$worker_id
    yes | cp -R worker_$worker_id/docs/* docs_with_plot/docs
  done

  # Step 4: Copy all generated files into docs
  rsync -av docs_with_plot/docs/ docs

  # Step 5: Remove INVISIBLE_CODE_BLOCK from .html/.rst.txt/.ipynb/.py files
  bash $DIR/remove_invisible_code_block_batch.sh docs
  python .jenkins/validate_tutorials_built.py

  # Step 5.1: Run post-processing script on .ipynb files:
  python .jenkins/post_process_notebooks.py

  # Step 6: Copy generated HTML files and static files to S3
  7z a manager.7z docs
  awsv2 s3 cp manager.7z s3://${BUCKET_NAME}/${BUILD_PREFIX}/${COMMIT_ID}/manager.7z

  # Step 7: push new HTML files and static files to gh-pages
  if [[ "$COMMIT_SOURCE" == "refs/heads/master" || "$COMMIT_SOURCE" == "refs/heads/main" ]]; then
    git clone https://github.com/pytorch/tutorials.git -b gh-pages gh-pages
    # Clean up directories that contain tutorials

    for dir in beginner intermediate prototype recipes advanced distributed vision text audio; do
      rm -rf "gh-pages/$dir"
    done

    cp -r docs/* gh-pages/
    pushd gh-pages
    # DANGER! DO NOT REMOVE THE `set +x` SETTING HERE!
    set +x
    git remote set-url origin https://pytorchbot:${GITHUB_PYTORCHBOT_TOKEN}@github.com/pytorch/tutorials.git
    set -x
    git add -f -A || true
    git config user.email "soumith+bot@pytorch.org"
    git config user.name "pytorchbot"
    git commit -m "Automated tutorials push" || true
    git status
    git push origin gh-pages
  fi
else
  make docs
fi