-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Expand file tree
/
Copy pathbuild.sh
More file actions
executable file
·174 lines (148 loc) · 6.71 KB
/
build.sh
File metadata and controls
executable file
·174 lines (148 loc) · 6.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/bin/bash
set -ex
export BUCKET_NAME=pytorch-tutorial-build-pull-request
# Set build prefix based on whether this is a nightly build or not
# This prevents conflicts when both builds run simultaneously
if [ "${USE_NIGHTLY:-0}" -eq 1 ]; then
export BUILD_PREFIX="nightly"
else
export BUILD_PREFIX="stable"
fi
# set locale for click dependency in spacy
export LC_ALL=C.UTF-8
export LANG=C.UTF-8
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
# Update root certificates by installing new libgnutls30
# Install pandoc (does not install from pypi)
sudo apt-get update
sudo apt-get install -y pandoc
# NS: Path to python runtime should already be part of docker container
# export PATH=/opt/conda/bin:$PATH
# Install PyTorch Nightly for test.
if [ "${USE_NIGHTLY:-0}" -eq 1 ]; then
sudo pip uninstall -y torch torchvision torchaudio
pip3 install torch==2.10.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu130
pip show torch
fi
# Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
# Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
# sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata
#pip3 install torch==2.10.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu130
# Install two language tokenizers for Translation with TorchText tutorial
pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0-py3-none-any.whl
awsv2 -i
awsv2 configure set default.s3.multipart_threshold 5120MB
# Decide whether to parallelize tutorial builds, based on $JOB_BASE_NAME
if [[ "${JOB_TYPE}" == "worker" ]]; then
# Step 1: Determine which tutorials this worker should execute.
# FILES_TO_RUN is read by conf.py to set sphinx_gallery's filename_pattern,
# so only the assigned tutorials have their code executed.
# IMPORTANT NOTE: We assume that each tutorial has a UNIQUE filename.
FILES_TO_RUN=$(python .jenkins/get_files_to_run.py)
echo "FILES_TO_RUN: " ${FILES_TO_RUN}
# Files to run must be accessible to subprocessed (at least to `download_data.py`)
export FILES_TO_RUN
# Step 3: Run `make docs` to generate HTML files and static files for these tutorialis
make docs
# Step 3.1: Run the post-processing script:
python .jenkins/post_process_notebooks.py
# Step 4: If any of the generated files are not related the tutorial files we want to run,
# then we remove them
set +x
for filename in $(find docs/beginner docs/intermediate docs/advanced docs/recipes docs/prototype -name '*.html'); do
file_basename=$(basename $filename .html)
if [[ ! " ${FILES_TO_RUN} " =~ " ${file_basename} " ]]; then
echo "removing $filename"
rm $filename
fi
done
for filename in $(find docs/beginner docs/intermediate docs/advanced docs/recipes docs/prototype -name '*.rst'); do
file_basename=$(basename $filename .rst)
if [[ ! " ${FILES_TO_RUN} " =~ " ${file_basename} " ]]; then
echo "removing $filename"
rm $filename
fi
done
for filename in $(find docs/_downloads -name '*.py'); do
file_basename=$(basename $filename .py)
if [[ ! " ${FILES_TO_RUN} " =~ " ${file_basename} " ]]; then
echo "removing $filename"
rm $filename
fi
done
for filename in $(find docs/_downloads -name '*.ipynb'); do
file_basename=$(basename $filename .ipynb)
if [[ ! " ${FILES_TO_RUN} " =~ " ${file_basename} " ]]; then
echo "removing $filename"
rm $filename
fi
done
for filename in $(find docs/_sources/beginner docs/_sources/intermediate docs/_sources/advanced docs/_sources/recipes -name '*.rst.txt'); do
file_basename=$(basename $filename .rst.txt)
if [[ ! " ${FILES_TO_RUN} " =~ " ${file_basename} " ]]; then
echo "removing $filename"
rm $filename
fi
done
for filename in $(find docs/.doctrees/beginner docs/.doctrees/intermediate docs/.doctrees/advanced docs/.doctrees/recipes docs/.doctrees/prototype -name '*.doctree'); do
file_basename=$(basename $filename .doctree)
if [[ ! " ${FILES_TO_RUN} " =~ " ${file_basename} " ]]; then
echo "removing $filename"
rm $filename
fi
done
set -x
# Step 5: Remove INVISIBLE_CODE_BLOCK from .html/.rst.txt/.ipynb/.py files
bash $DIR/remove_invisible_code_block_batch.sh docs
python .jenkins/validate_tutorials_built.py
# Step 6: Copy generated files to S3, tag with commit ID
if [ "${UPLOAD:-0}" -eq 1 ]; then
7z a worker_${WORKER_ID}.7z docs
awsv2 s3 cp worker_${WORKER_ID}.7z s3://${BUCKET_NAME}/${BUILD_PREFIX}/${COMMIT_ID}/worker_${WORKER_ID}.7z
fi
elif [[ "${JOB_TYPE}" == "manager" ]]; then
# Step 1: Generate no-plot HTML pages for all tutorials
make html-noplot
cp -r _build/html docs
# Step 2: Wait for all workers to finish
# Don't actually need to do this because gha will wait
# Step 3: Download generated with-plot HTML files and static files from S3, merge into one folder
mkdir -p docs_with_plot/docs
for ((worker_id=1;worker_id<NUM_WORKERS+1;worker_id++)); do
awsv2 s3 cp s3://${BUCKET_NAME}/${BUILD_PREFIX}/${COMMIT_ID}/worker_$worker_id.7z worker_$worker_id.7z
7z x worker_$worker_id.7z -oworker_$worker_id
yes | cp -R worker_$worker_id/docs/* docs_with_plot/docs
done
# Step 4: Copy all generated files into docs
rsync -av docs_with_plot/docs/ docs
# Step 5: Remove INVISIBLE_CODE_BLOCK from .html/.rst.txt/.ipynb/.py files
bash $DIR/remove_invisible_code_block_batch.sh docs
python .jenkins/validate_tutorials_built.py
# Step 5.1: Run post-processing script on .ipynb files:
python .jenkins/post_process_notebooks.py
# Step 6: Copy generated HTML files and static files to S3
7z a manager.7z docs
awsv2 s3 cp manager.7z s3://${BUCKET_NAME}/${BUILD_PREFIX}/${COMMIT_ID}/manager.7z
# Step 7: push new HTML files and static files to gh-pages
if [[ "$COMMIT_SOURCE" == "refs/heads/master" || "$COMMIT_SOURCE" == "refs/heads/main" ]]; then
git clone https://github.com/pytorch/tutorials.git -b gh-pages gh-pages
# Clean up directories that contain tutorials
for dir in beginner intermediate prototype recipes advanced distributed vision text audio; do
rm -rf "gh-pages/$dir"
done
cp -r docs/* gh-pages/
pushd gh-pages
# DANGER! DO NOT REMOVE THE `set +x` SETTING HERE!
set +x
git remote set-url origin https://pytorchbot:${GITHUB_PYTORCHBOT_TOKEN}@github.com/pytorch/tutorials.git
set -x
git add -f -A || true
git config user.email "soumith+bot@pytorch.org"
git config user.name "pytorchbot"
git commit -m "Automated tutorials push" || true
git status
git push origin gh-pages
fi
else
make docs
fi