1+ #!/usr/bin/env python3
2+ """
3+ fix_header_rules.py
4+
5+ Normalize "rule" lines made of repeated '#', '=' or '-' in a single text file.
6+
7+ Modes:
8+ - check: print normalized file to stdout, violations to stderr, exit non-zero on violations.
9+ - fix: print normalized file to stdout.
10+
11+ Target specification:
12+ - Global targets (mutually exclusive):
13+ --len N -> set repeat length N for all rule types
14+ --col N -> set target end column N for all rule types (visual column)
15+ - Per-type overrides:
16+ --len-hash N or --col-hash N for '#'
17+ --len-equal N or --col-equal N for '='
18+ --len-dash N or --col-dash N for '-'
19+
20+ If no target provided at all, lengths are inferred as the maximum seen per type (length semantics).
21+
22+ Comment safety (default):
23+ - By default only lines that are comments are modified, and lines inside Python strings are skipped.
24+ Use --include-non-comments to also modify non-comment rule-like lines.
25+
26+ reST titles:
27+ - --from-title makes underline/overline lengths match the title text length (length semantics).
28+ """
29+ from __future__ import annotations
30+ import argparse , re , sys , io , tokenize
31+ from pathlib import Path
32+ from typing import List , Tuple , Optional , Dict , Literal
33+
34+ # Two forms:
35+ # 1) Comment-prefixed: ^indent '#'+spaces char{3,} spaces*$
36+ # e.g. "# =======", "# ----------", "# #######"
37+ RULE_COMMENT_RE = re .compile (
38+ r'^(?P<indent>[ \t]*)#(?P<prefix_space>[ \t]+)'
39+ r'(?P<char>[#=\-])(?P<run>(?P=char){2,})[ \t]*$'
40+ )
41+ # 2) Pure run: ^indent char{3,} spaces*$
42+ # e.g. "=====", "-----", "#####"
43+ RULE_PURE_RE = re .compile (
44+ r'^(?P<indent>[ \t]*)(?P<char>[#=\-])(?P<run>(?P=char){2,})[ \t]*$'
45+ )
46+
47+ SpecMode = Literal ["length" , "column" ]
48+ Spec = Dict [str , Optional [tuple [SpecMode , int ]]]
49+
50+ def detect_rule (line : str ) -> Optional [Tuple [str , str , str , int ]]:
51+ """
52+ Return (indent, prefix, char, run_len) if line is a rule, else None.
53+ indent: leading whitespace
54+ prefix: "" or "# " (or "# ") – comment marker and spaces BEFORE the run
55+ char: '#', '=' or '-'
56+ run_len: number of repeated 'char'
57+ """
58+ m = RULE_COMMENT_RE .match (line )
59+ if m :
60+ indent = m .group ("indent" )
61+ prefix = "#" + m .group ("prefix_space" )
62+ ch = m .group ("char" )
63+ run_len = len (m .group ("run" )) + 1
64+ return indent , prefix , ch , run_len
65+ m = RULE_PURE_RE .match (line )
66+ if m :
67+ indent = m .group ("indent" )
68+ prefix = "" # no comment prefix
69+ ch = m .group ("char" )
70+ run_len = len (m .group ("run" )) + 1
71+ return indent , prefix , ch , run_len
72+ return None
73+
74+ def infer_max_lengths (lines : List [str ]) -> Dict [str , int ]:
75+ maxlen = {'#' : 0 , '=' : 0 , '-' : 0 }
76+ for ln in lines :
77+ d = detect_rule (ln )
78+ if d :
79+ _ , _ , ch , n = d
80+ if n > maxlen [ch ]:
81+ maxlen [ch ] = n
82+ return maxlen
83+
84+ def visual_start_col (indent : str , prefix : str , tabsize : int ) -> int :
85+ """
86+ 1-based visual column where the FIRST run-character sits.
87+ Accounts for indent and optional comment prefix ('# ').
88+ """
89+ return len ((indent + prefix ).expandtabs (tabsize )) + 1
90+
91+ def is_probably_python (path : Path , text : str ) -> bool :
92+ if path .suffix == ".py" :
93+ return True
94+ first = text .splitlines (True )[:1 ]
95+ return bool (first and first [0 ].startswith ("#!" ) and "python" in first [0 ].lower ())
96+
97+ def classify_python_tokens (text : str ) -> tuple [set [int ], set [int ]]:
98+ comment_lines : set [int ] = set ()
99+ in_string_lines : set [int ] = set ()
100+ try :
101+ for tok in tokenize .generate_tokens (io .StringIO (text ).readline ):
102+ ttype = tok .type
103+ (srow , _ ), (erow , _ ) = tok .start , tok .end
104+ if ttype == tokenize .COMMENT :
105+ comment_lines .add (srow )
106+ elif ttype == tokenize .STRING :
107+ for ln in range (srow , erow + 1 ):
108+ in_string_lines .add (ln )
109+ except Exception :
110+ pass
111+ return comment_lines , in_string_lines
112+
113+ def build_spec_from_args (args ) -> Spec :
114+ conflicts = []
115+ if args .len_hash is not None and args .col_hash is not None :
116+ conflicts .append ("'#': both --len-hash and --col-hash" )
117+ if args .len_equal is not None and args .col_equal is not None :
118+ conflicts .append ("'=': both --len-equal and --col-equal" )
119+ if args .len_dash is not None and args .col_dash is not None :
120+ conflicts .append ("'-': both --len-dash and --col-dash" )
121+ if conflicts :
122+ print ("Error: Conflicting per-type options:\n " + "\n " .join (conflicts ), file = sys .stderr )
123+ sys .exit (2 )
124+
125+ spec : Spec = {'#' : None , '=' : None , '-' : None }
126+ if args .len is not None :
127+ spec = {'#' : ("length" , args .len ), '=' : ("length" , args .len ), '-' : ("length" , args .len )}
128+ if args .col is not None :
129+ spec = {'#' : ("column" , args .col ), '=' : ("column" , args .col ), '-' : ("column" , args .col )}
130+
131+ if args .len_hash is not None : spec ['#' ] = ("length" , args .len_hash )
132+ if args .len_equal is not None : spec ['=' ] = ("length" , args .len_equal )
133+ if args .len_dash is not None : spec ['-' ] = ("length" , args .len_dash )
134+
135+ if args .col_hash is not None : spec ['#' ] = ("column" , args .col_hash )
136+ if args .col_equal is not None : spec ['=' ] = ("column" , args .col_equal )
137+ if args .col_dash is not None : spec ['-' ] = ("column" , args .col_dash )
138+ return spec
139+
140+ def normalize_rules (
141+ text : str ,
142+ mode : str ,
143+ spec : Spec ,
144+ from_title : bool ,
145+ tabsize : int ,
146+ path : Path ,
147+ only_comments : bool ,
148+ is_python : bool ,
149+ py_comment_lines : set [int ],
150+ py_in_string_lines : set [int ],
151+ debug : bool = False ,
152+ ) -> Tuple [str , int , List [str ]]:
153+ lines = text .splitlines (keepends = False )
154+ msgs : List [str ] = []
155+ changes_or_viol = 0
156+ out : List [str ] = lines [:]
157+
158+ if all (v is None for v in spec .values ()):
159+ maxlens = infer_max_lengths (lines )
160+ spec = {k : (("length" , v ) if v > 0 else None ) for k , v in maxlens .items ()}
161+
162+ def prev_nonempty (i ):
163+ j = i - 1
164+ while j >= 0 and lines [j ].strip () == "" :
165+ j -= 1
166+ return j
167+
168+ def next_nonempty (i , n ):
169+ j = i + 1
170+ while j < n and lines [j ].strip () == "" :
171+ j += 1
172+ return j
173+
174+ def find_title_len (lines_ : List [str ], idx_ : int ) -> Optional [int ]:
175+ n = len (lines_ )
176+ j = prev_nonempty (idx_ )
177+ if j is not None and j >= 0 and detect_rule (lines_ [idx_ ]) and not detect_rule (lines_ [j ]):
178+ return len (lines_ [j ].rstrip ("\n " ))
179+ j = next_nonempty (idx_ , n )
180+ k = next_nonempty (j , n ) if j is not None and j < n else None
181+ if j is not None and k is not None :
182+ over = detect_rule (lines_ [idx_ ])
183+ title = lines_ [j ].rstrip ("\n " )
184+ under = detect_rule (lines_ [k ])
185+ if over and under and over [2 ] == under [2 ]:
186+ return len (title )
187+ return None
188+
189+ for idx , line in enumerate (lines ):
190+ info = detect_rule (line )
191+ if not info :
192+ continue
193+
194+ indent , prefix , ch , run_len = info
195+
196+ # Skip string literals in Python
197+ if is_python and (idx + 1 ) in py_in_string_lines :
198+ continue
199+
200+ # Only-comments filter:
201+ if only_comments :
202+ stripped = line .lstrip ()
203+ if is_python :
204+ if not (stripped .startswith ("#" ) and (idx + 1 ) in py_comment_lines ):
205+ continue
206+ else :
207+ if not stripped .startswith ("#" ):
208+ continue
209+
210+ # Target length precedence: explicit spec > from_title > keep
211+ mode_val = spec .get (ch )
212+ if mode_val is not None :
213+ mode_name , val = mode_val
214+ if mode_name == "length" :
215+ target_len = max (1 , int (val ))
216+ msg_descr = f"{ ch * target_len } (len { run_len } -> { target_len } )"
217+ start_col = visual_start_col (indent , prefix , tabsize )
218+ end_col = start_col + target_len - 1
219+ else :
220+ start_col = visual_start_col (indent , prefix , tabsize )
221+ end_col = int (val )
222+ target_len = max (1 , end_col - start_col + 1 )
223+ msg_descr = f"{ ch * target_len } (end col -> { end_col } )"
224+ else :
225+ tlen = find_title_len (lines , idx ) if from_title else None
226+ if tlen is not None and tlen > 0 :
227+ target_len = tlen
228+ msg_descr = f"{ ch * target_len } (len from title -> { target_len } )"
229+ start_col = visual_start_col (indent , prefix , tabsize )
230+ end_col = start_col + target_len - 1
231+ else :
232+ target_len = run_len
233+ msg_descr = None
234+ start_col = visual_start_col (indent , prefix , tabsize )
235+ end_col = start_col + target_len - 1
236+
237+ if debug and target_len != run_len :
238+ print (
239+ f"DBG { path } :{ idx + 1 } ch={ ch } prefix={ 'yes' if prefix else 'no' } "
240+ f"start_col={ start_col } -> end_col={ end_col } target_len={ target_len } " ,
241+ file = sys .stderr ,
242+ )
243+
244+ if run_len != target_len :
245+ changes_or_viol += 1
246+ if mode == "check" and msg_descr :
247+ msgs .append (f"{ path } :{ idx + 1 } : { ch * run_len } -> expected { msg_descr } " )
248+ out [idx ] = f"{ indent } { prefix } { ch * target_len } "
249+
250+ new_text = "\n " .join (out ) + ("\n " if text .endswith ("\n " ) else "" )
251+ return new_text , changes_or_viol , msgs
252+
253+ def main ():
254+ ap = argparse .ArgumentParser ()
255+ ap .add_argument ("--mode" , choices = ["check" , "fix" ], default = "check" )
256+ ap .add_argument ("--debug" , action = "store_true" , help = "Print calculation details to stderr for modified lines." )
257+
258+ # Global targets (mutually exclusive)
259+ meg = ap .add_mutually_exclusive_group ()
260+ meg .add_argument ("--len" , type = int , help = "Universal length for all rule types." )
261+ meg .add_argument ("--col" , type = int , help = "Universal end column (visual) for all rule types." )
262+
263+ # Per-type targets
264+ ap .add_argument ("--len-hash" , type = int , help = "Length for '#' rules." )
265+ ap .add_argument ("--len-equal" , type = int , help = "Length for '=' rules." )
266+ ap .add_argument ("--len-dash" , type = int , help = "Length for '-' rules." )
267+ ap .add_argument ("--col-hash" , type = int , help = "End column for '#' rules (visual)." )
268+ ap .add_argument ("--col-equal" , type = int , help = "End column for '=' rules (visual)." )
269+ ap .add_argument ("--col-dash" , type = int , help = "End column for '-' rules (visual)." )
270+
271+ ap .add_argument ("--tabsize" , type = int , default = 8 , help = "Tab width for visual column calculation." )
272+ ap .add_argument ("--from-title" , action = "store_true" , help = "Match rule length to title text length (reST-style)." )
273+
274+ # Default: only modify comment lines
275+ ap .set_defaults (only_comments = True )
276+ oc = ap .add_mutually_exclusive_group ()
277+ oc .add_argument ("--only-comments" , dest = "only_comments" , action = "store_true" ,
278+ help = "Modify only comment rule lines (default)." )
279+ oc .add_argument ("--include-non-comments" , dest = "only_comments" , action = "store_false" ,
280+ help = "Also modify non-comment rule-like lines." )
281+
282+ ap .add_argument ("file" , help = "Single input file to process." )
283+ args = ap .parse_args ()
284+
285+ p = Path (args .file )
286+ src = p .read_text (encoding = "utf-8" )
287+
288+ spec = build_spec_from_args (args )
289+
290+ is_py = is_probably_python (p , src )
291+ if args .only_comments and is_py :
292+ py_comment_lines , py_in_string_lines = classify_python_tokens (src )
293+ else :
294+ py_comment_lines , py_in_string_lines = set (), set ()
295+
296+ new_text , count , msgs = normalize_rules (
297+ text = src ,
298+ mode = args .mode ,
299+ spec = spec ,
300+ from_title = args .from_title ,
301+ tabsize = args .tabsize ,
302+ path = p ,
303+ only_comments = args .only_comments ,
304+ is_python = is_py ,
305+ py_comment_lines = py_comment_lines ,
306+ py_in_string_lines = py_in_string_lines ,
307+ debug = args .debug ,
308+ )
309+
310+ # Always print normalized content to stdout
311+ print (new_text , end = "" )
312+
313+ if args .mode == "check" :
314+ for m in msgs :
315+ print (m , file = sys .stderr )
316+ if count > 0 :
317+ print (f"\n { count } rule line(s) inconsistent." , file = sys .stderr )
318+ sys .exit (1 )
319+
320+ if __name__ == "__main__" :
321+ main ()
0 commit comments