forked from harvard-edge/cs249r_book
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path.pre-commit-config.yaml
More file actions
327 lines (292 loc) · 12.6 KB
/
.pre-commit-config.yaml
File metadata and controls
327 lines (292 loc) · 12.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
repos:
# =============================================================================
# PHASE 1: AUTO-FORMATTERS (Fix basic formatting issues first)
# =============================================================================
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
name: "Trim trailing whitespace"
files: ^quarto/contents/.*\.qmd$
verbose: false
- id: end-of-file-fixer
name: "Fix end of file newlines"
files: ^quarto/contents/.*\.qmd$
verbose: false
# --- Content Formatters ---
- repo: https://github.com/executablebooks/mdformat
rev: 0.7.9
hooks:
- id: mdformat
name: "Format quarto markdown"
additional_dependencies: [mdformat-frontmatter]
files: ^quarto/contents/.*\.qmd$
pass_filenames: true
- repo: https://github.com/FlamingTempura/bibtex-tidy
rev: v1.14.0
hooks:
- id: bibtex-tidy
name: "Tidy bibtex files"
args: [
'--align=space',
'--curly',
'--sort=key',
'--sort-fields',
'--duplicates=key',
'--remove-empty-fields',
'--space=2',
'--trailing-commas',
'--escape',
'--wrap=100',
'--blank-lines'
]
files: ^quarto/contents/.*\.bib$
- repo: local
hooks:
# --- Content Formatting ---
- id: collapse-extra-blank-lines
name: "Collapse extra blank lines"
entry: python tools/scripts/content/format_blank_lines.py
language: python
additional_dependencies: []
pass_filenames: true
files: ^quarto/contents/.*\.qmd$
- id: format-python-in-qmd
name: "Format Python code blocks (Black, 70 chars)"
entry: python tools/scripts/content/format_python_in_qmd.py
language: python
additional_dependencies: [black>=23.0.0]
pass_filenames: true
files: ^quarto/contents/.*\.qmd$
- id: check-list-formatting
name: "Fix markdown list formatting (require blank line before lists)"
entry: python tools/scripts/utilities/check_list_formatting.py --fix
language: python
additional_dependencies: []
pass_filenames: true
files: \.qmd$
description: "Ensure bullet lists are preceded by blank lines for proper markdown rendering"
# =============================================================================
# PHASE 2: BASIC VALIDATORS (Structure and syntax)
# =============================================================================
- repo: local
hooks:
# --- JSON Validation ---
- id: validate-json
name: "Validate JSON files"
entry: python tools/scripts/utilities/validate_json.py
language: python
files: \.json$
pass_filenames: true
description: "Validate all JSON files have correct syntax using Python's built-in json module"
# --- Project Structure Check ---
- id: check-project-structure
name: "Check required project files exist"
entry: bash -c 'if [ ! -f "quarto/_quarto.yml" ]; then echo "❌ Missing quarto/_quarto.yml"; exit 1; fi; if [ ! -f "quarto/index.qmd" ]; then echo "❌ Missing quarto/index.qmd"; exit 1; fi; echo "✅ Structure check passed"'
language: system
pass_filenames: false
files: ''
description: "Ensure required project structure files exist"
# --- YAML Validation ---
- id: yamllint
name: "Validate YAML files"
entry: yamllint
language: system
args: [--config-file=.yamllint]
files: \.(yml|yaml)$
exclude: |
(?x)^(
node_modules/|
\.git/|
_site/|
_book/|
\.venv/|
__pycache__/|
\.pyc$
)$
description: "Validate all YAML files with custom config"
# =============================================================================
# PHASE 3: CONTENT VALIDATORS (After formatting is complete)
# =============================================================================
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
hooks:
- id: codespell
name: "Check for common misspellings"
args: ["--ignore-words", "config/linting/.codespell-ignore"]
files: \.qmd$
exclude: ".venv|_book|_site|node_modules|images"
- repo: local
hooks:
# --- Structural & Reference Validation ---
- id: check-unreferenced-labels
name: "Check for unreferenced labels"
entry: python ./tools/scripts/content/check_unreferenced_labels.py ./quarto/contents/core
language: python
additional_dependencies: []
pass_filenames: false
files: ''
- id: check-duplicate-labels
name: "Check for duplicate labels"
entry: python tools/scripts/content/check_duplicate_labels.py
args: ['-d', 'quarto/contents/', '--figures', '--tables', '--listings', '--quiet', '--strict']
language: python
additional_dependencies: []
pass_filenames: false
files: ^quarto/contents/.*\.qmd$
description: "Ensure all figure, table, and listing labels are unique across the book"
- id: validate-citations
name: "Validate citation references in .qmd files"
entry: python tools/scripts/content/validate_citations.py --quiet
language: python
additional_dependencies: []
pass_filenames: true
files: ^quarto/contents/.*\.qmd$
description: "Ensure all @key citations have corresponding entries in .bib files"
- id: validate-footnotes
name: "Validate footnote references and definitions"
entry: python tools/scripts/content/footnote_cleanup.py -d quarto/contents/ --validate
language: python
additional_dependencies: []
pass_filenames: false
files: ^quarto/contents/.*\.qmd$
description: "Ensure all footnote references have definitions and all definitions are used"
- id: check-forbidden-footnotes
name: "Check for footnotes in tables/captions/divs"
entry: python tools/scripts/content/check_forbidden_footnotes.py -d quarto/contents/
language: python
additional_dependencies: []
pass_filenames: false
files: ^quarto/contents/.*\.qmd$
description: "Prevent footnotes in tables, captions, and div blocks that break Quarto builds"
- id: header-inline-check
name: "Detect inline-style Markdown headers"
entry: bash -c 'find . -name "*.qmd" -exec grep -nE "^[^#\n]*\\.[#]{1,6} " {} + && exit 1 || exit 0'
language: system
pass_filenames: false
files: ^quarto/contents/.*\.qmd$
- id: grep-forbidden-phrases
name: "Check for forbidden words"
entry: bash -c 'grep --color=always -n -E "Retry" "$@" && exit 1 || exit 0'
language: system
pass_filenames: true
files: ^quarto/contents/.*\.qmd$
- id: check-purpose-unnumbered
name: "Ensure Purpose sections are unnumbered"
entry: bash -c 'grep -n "^## Purpose" "$@" | grep -v "\.unnumbered" && exit 1 || exit 0' --
language: system
pass_filenames: true
files: ^quarto/contents/.*\.qmd$
description: "Ensure all ## Purpose sections have {.unnumbered} attribute"
# --- Table Formatting Validation ---
- id: check-table-formatting
name: "Check table formatting (alignment, bolding, spacing)"
entry: python tools/scripts/content/format_tables.py --check -d quarto/contents/
language: python
additional_dependencies: []
pass_filenames: false
files: ^quarto/contents/.*\.qmd$
description: "Ensure all tables have proper bolding, alignment, and spacing"
# --- Part Key Validation ---
- id: validate-part-keys
name: "Validate part keys in .qmd files"
entry: python tools/scripts/utilities/validate_part_keys.py
language: python
additional_dependencies:
- pyyaml
pass_filenames: false
files: ''
# =============================================================================
# PHASE 4: ASSET VALIDATORS (Images and external resources)
# =============================================================================
- repo: local
hooks:
# --- Image Validation ---
- id: validate-images
name: "Validate image files"
entry: python tools/scripts/images/manage_images.py
language: python
additional_dependencies:
- pillow
- rich
pass_filenames: true
files: ^quarto/contents/.*\.(png|jpg|jpeg|gif)$
- id: validate-external-images
name: "Check for external images in Quarto files"
entry: python tools/scripts/images/manage_external_images.py --validate quarto/contents/
language: python
additional_dependencies: [requests]
pass_filenames: false
files: ^quarto/contents/.*\.qmd$
description: "Ensure all images are local for build reliability"
- id: validate-image-references
name: "Check that all image references exist on disk"
entry: python tools/scripts/images/validate_image_references.py -d quarto/contents/ --quiet
language: python
additional_dependencies: []
pass_filenames: false
files: ^quarto/contents/.*\.qmd$
description: "Ensure all referenced images exist on disk"
- id: prevent-svg-files
name: "Prevent manual SVG files from being committed"
entry: bash -c 'echo "❌ Manual SVG files are not allowed. Please convert to PNG format using:"; echo " magick file.svg file.png (recommended)"; echo " convert file.svg file.png (deprecated but works)"; echo ""; echo "Or use the conversion helper tool:"; echo " python tools/scripts/utilities/convert_svg_to_png.py file.svg"; echo ""; echo "Found SVG files:"; for file in "$@"; do echo " - $file"; done; exit 1'
language: system
files: \.svg$
exclude: |
(?x)^(
.*_files/mediabag/.*\.svg$
)$
description: "Manual SVG files cause compatibility issues - convert to PNG instead"
- id: sanitize-svgs
name: "Remove control chars from existing SVGs"
entry: bash -c 'perl -pi -e '\''s/[\x00-\x08\x0B\x0C\x0E-\x1F]//g'\'' "$@"' --
language: system
files: \.svg$
description: "Clean up control characters in generated/existing SVG files"
# =============================================================================
# PHASE 5: SYSTEM & WORKFLOW CHECKS (Final validation)
# =============================================================================
- repo: local
hooks:
# --- Auto-cleanup with Book Binder ---
- id: auto-cleanup-artifacts
name: "Auto-cleanup build artifacts (Book Binder)"
entry: python tools/scripts/maintenance/cleanup_build_artifacts.py
language: python
additional_dependencies: [rich]
pass_filenames: false
files: ''
stages: [pre-commit]
# --- Locked File Check (macOS specific) ---
- id: check-locked-files
name: "Detect locked files (uchg flag on macOS)"
entry: bash -c 'find quarto/contents/ -type f -exec ls -lO {} + | grep -q " uchg " && { echo "❌ Locked files detected (uchg). Please unlock them before commit."; exit 1; } || exit 0'
language: system
pass_filenames: false
# --- Workflow File Check ---
# - id: check-workflow-changes
# name: "Check for workflow file changes"
# entry: tools/scripts/check_workflow_changes.sh
# language: system
# pass_filenames: false
# files: ''
# description: "Warn about workflow file changes that may cause publish issues"
# =============================================================================
# DISABLED/COMMENTED HOOKS
# =============================================================================
# - repo: https://github.com/igorshubovych/markdownlint-cli
# rev: v0.45.0
# hooks:
# - id: markdownlint
# name: "Lint quarto markdown"
# types: [text]
# files: ^book/contents/.*\.qmd$
# args: ["--quiet", "-c", "config/linting/.mdlintconfig.yml"]
# entry: bash -c 'markdownlint "$@" || true'
# - id: check-section-ids
# name: "Check section IDs"
# entry: python tools/scripts/content/manage_section_ids.py -d book/contents/ --verify --yes
# language: python
# additional_dependencies: [nltk>=3.8]
# pass_filenames: false
# files: ^book/contents/.*\.qmd$