@@ -141,6 +141,113 @@ def std_bootstrap(x: np.ndarray,
141141 return float (means .std (ddof = 1 ))
142142
143143
144+ # ─────────────────────────────────────────────────────────────────────────────
145+ # 5. Blocking method
146+ # ─────────────────────────────────────────────────────────────────────────────
147+
148+ def blocking_analysis (x : np .ndarray , n_blocks_range = None ) -> dict :
149+ """
150+ Blocking (or 'binning') analysis of a time series.
151+
152+ The series of N values is split into n_b non-overlapping blocks of
153+ equal size b = floor(N / n_b). The standard deviation of the mean is
154+ estimated from the n_b block averages:
155+
156+ sigma_block(n_b) = std( {y_k} ) / sqrt(n_b)
157+
158+ where y_k = (1/b) * sum_{i in block k} x_i.
159+
160+ For block sizes b >> tau_int the block means are approximately i.i.d.,
161+ so sigma_block(n_b) reaches a plateau equal to the true sigma_mean.
162+ For b << tau_int correlations are still present and sigma_block is
163+ underestimated (biased low).
164+
165+ The plateau onset gives a second, independent estimate of tau_int:
166+ tau_int ≈ b_plateau / 2
167+
168+ Parameters
169+ ----------
170+ x : time series
171+ n_blocks_range : iterable of n_b values to sweep; defaults to
172+ all divisors of N from 4 up to N//4
173+
174+ Returns
175+ -------
176+ dict with keys
177+ n_blocks : array of n_b values tried
178+ block_sizes : corresponding block sizes b = N // n_b
179+ sigma_block : sigma_mean estimate at each n_b
180+ sigma_err : statistical uncertainty on sigma_block
181+ plateau_idx : index where the plateau is first detected
182+ plateau_value : sigma_mean at the plateau
183+ tau_from_block: tau_int estimated from plateau onset
184+ """
185+ N = len (x )
186+
187+ if n_blocks_range is None :
188+ # Use all n_b such that b = N//n_b gives at least 4 blocks
189+ # and at most N//4 blocks (so b >= 4)
190+ bs = np .unique (np .round (np .geomspace (4 , N // 4 , 80 )).astype (int ))
191+ # Convert block sizes -> number of blocks
192+ n_blocks_range = np .unique ((N // bs ).astype (int ))
193+ n_blocks_range = n_blocks_range [n_blocks_range >= 4 ]
194+
195+ n_blocks_arr = np .asarray (sorted (set (int (v ) for v in n_blocks_range
196+ if 4 <= v <= N // 4 )), dtype = int )
197+
198+ sigma_block = np .zeros (len (n_blocks_arr ))
199+ sigma_err = np .zeros (len (n_blocks_arr ))
200+
201+ for idx , n_b in enumerate (n_blocks_arr ):
202+ b = N // n_b # block size (discard remainder)
203+ blocks = x [:n_b * b ].reshape (n_b , b ) # shape (n_b, b)
204+ y = blocks .mean (axis = 1 ) # block means
205+ s = y .std (ddof = 1 ) / np .sqrt (n_b ) # sigma_mean from blocks
206+ sigma_block [idx ] = s
207+ # Uncertainty on s itself: Var(s) ≈ s² / (2*(n_b-1))
208+ sigma_err [idx ] = s / np .sqrt (2 * (n_b - 1 ))
209+
210+ block_sizes = N // n_blocks_arr
211+
212+ # ── Detect plateau ─────────────────────────────────────────────────────
213+ # The plateau is where sigma_block stops rising as b increases.
214+ # Strategy: find the block size at which the relative change in sigma_block
215+ # (smoothed over a window) first falls below a threshold and stays there.
216+ # We work with sigma_block as a function of increasing b (decreasing n_b),
217+ # so we reverse the arrays.
218+ sb_by_b = sigma_block [::- 1 ] # indexed by increasing b
219+ bs_sorted = block_sizes [::- 1 ]
220+
221+ window = max (3 , len (sb_by_b ) // 10 )
222+ smoothed = np .convolve (sb_by_b , np .ones (window )/ window , mode = 'valid' )
223+ rel_change = np .abs (np .diff (smoothed ) / (smoothed [:- 1 ] + 1e-30 ))
224+ threshold = 0.05 # < 5% change → plateau
225+
226+ plateau_idx_rev = len (sb_by_b ) - 1 # default: last point (largest b)
227+ for i in range (len (rel_change ) - window ):
228+ if np .all (rel_change [i :i + window ] < threshold ):
229+ plateau_idx_rev = i + window // 2
230+ break
231+
232+ # Map back to original (decreasing b) indexing
233+ plateau_idx = len (sigma_block ) - 1 - plateau_idx_rev
234+ plateau_idx = int (np .clip (plateau_idx , 0 , len (sigma_block ) - 1 ))
235+ plateau_value = float (np .median (
236+ sigma_block [max (0 , plateau_idx - 3 ): plateau_idx + 4 ]))
237+ b_plateau = int (block_sizes [plateau_idx ])
238+ tau_from_block = b_plateau / 2.0
239+
240+ return dict (
241+ n_blocks = n_blocks_arr ,
242+ block_sizes = block_sizes ,
243+ sigma_block = sigma_block ,
244+ sigma_err = sigma_err ,
245+ plateau_idx = plateau_idx ,
246+ plateau_value = plateau_value ,
247+ tau_from_block = tau_from_block ,
248+ )
249+
250+
144251# ─────────────────────────────────────────────────────────────────────────────
145252# 5. Plots
146253# ─────────────────────────────────────────────────────────────────────────────
@@ -175,6 +282,89 @@ def reset_style():
175282 plt .rcParams .update (plt .rcParamsDefault )
176283
177284
285+ def plot_blocking (blk_result : dict , sigma_naive : float , sigma_corr : float ,
286+ sigma_boot : float , N : int , tau_int : float ,
287+ savename = 'blocking.png' ):
288+ """
289+ Two-panel blocking-analysis figure.
290+
291+ Left — sigma_block vs block size b (x-axis = b, log scale).
292+ Shows the rise from underestimated (b << tau_int) to the
293+ plateau (b >> tau_int) with error bars and reference lines.
294+
295+ Right — sigma_block vs number of blocks n_b (x-axis = n_b, log scale).
296+ Useful for seeing how many independent blocks are needed.
297+ """
298+ apply_dark_style ()
299+
300+ bs = blk_result ['block_sizes' ]
301+ nb = blk_result ['n_blocks' ]
302+ sig = blk_result ['sigma_block' ]
303+ err = blk_result ['sigma_err' ]
304+ pidx = blk_result ['plateau_idx' ]
305+ plat = blk_result ['plateau_value' ]
306+ b_pl = bs [pidx ]
307+ tau_b = blk_result ['tau_from_block' ]
308+
309+ fig , (ax1 , ax2 ) = plt .subplots (1 , 2 , figsize = (16 , 5 ), facecolor = BG )
310+
311+ # ── Left: sigma vs block size ──────────────────────────────────────
312+ ax1 .errorbar (bs , sig , yerr = err , fmt = 'o-' , color = ACCENT ,
313+ ms = 4 , lw = 1.6 , elinewidth = 1.0 , capsize = 3 , alpha = 0.85 ,
314+ label = '$\\ sigma_{\\ rm block}(b)$' )
315+ ax1 .axhline (plat , color = GREEN , lw = 2.0 , ls = '--' ,
316+ label = f'Plateau { plat :.6f} ' )
317+ ax1 .axhline (sigma_naive , color = WARM , lw = 1.5 , ls = ':' ,
318+ label = f'Naive { sigma_naive :.6f} ' )
319+ ax1 .axhline (sigma_corr , color = PURPLE , lw = 1.5 , ls = '-.' ,
320+ label = f'Cov-corr { sigma_corr :.6f} ' )
321+ ax1 .axhline (sigma_boot , color = '#f48fb1' , lw = 1.5 , ls = (0 ,(3 ,1 ,1 ,1 )),
322+ label = f'Bootstrap { sigma_boot :.6f} ' )
323+ ax1 .axvline (b_pl , color = GREEN , lw = 1.2 , ls = '--' , alpha = 0.5 ,
324+ label = f'Plateau onset $b={ b_pl } $' )
325+ ax1 .axvspan (1 , 2 * tau_int , color = WARM , alpha = 0.07 ,
326+ label = f'$b < 2\\ tau_{{\\ rm int}}={ 2 * tau_int :.1f} $ (biased)' )
327+ ax1 .set_xscale ('log' )
328+ ax1 .set_xlabel ('Block size $b$' , fontsize = 13 )
329+ ax1 .set_ylabel ('$\\ sigma_{\\ bar{x}}^{\\ rm block}(b)$' , fontsize = 13 )
330+ ax1 .set_title ('Blocking Analysis: $\\ sigma$ vs Block Size' ,
331+ fontweight = 'bold' , color = TEXT )
332+ ax1 .legend (fontsize = 8.5 , loc = 'lower right' )
333+ ax1 .grid (True , alpha = 0.35 , which = 'both' )
334+
335+ # ── Right: sigma vs number of blocks ──────────────────────────────
336+ ax2 .errorbar (nb , sig , yerr = err , fmt = 's-' , color = ACCENT ,
337+ ms = 4 , lw = 1.6 , elinewidth = 1.0 , capsize = 3 , alpha = 0.85 ,
338+ label = '$\\ sigma_{\\ rm block}(n_b)$' )
339+ ax2 .axhline (plat , color = GREEN , lw = 2.0 , ls = '--' ,
340+ label = f'Plateau { plat :.6f} ' )
341+ ax2 .axhline (sigma_naive , color = WARM , lw = 1.5 , ls = ':' ,
342+ label = f'Naive { sigma_naive :.6f} ' )
343+ nb_pl = nb [pidx ]
344+ ax2 .axvline (nb_pl , color = GREEN , lw = 1.2 , ls = '--' , alpha = 0.5 ,
345+ label = f'Plateau at $n_b={ nb_pl } $' )
346+ ax2 .set_xscale ('log' )
347+ ax2 .invert_xaxis () # large n_b (small b) on left → mirrors left panel
348+ ax2 .set_xlabel ('Number of blocks $n_b$ (log, reversed)' , fontsize = 13 )
349+ ax2 .set_ylabel ('$\\ sigma_{\\ bar{x}}^{\\ rm block}(n_b)$' , fontsize = 13 )
350+ ax2 .set_title ('Blocking Analysis: $\\ sigma$ vs $n_b$' ,
351+ fontweight = 'bold' , color = TEXT )
352+ ax2 .legend (fontsize = 9 , loc = 'lower left' )
353+ ax2 .grid (True , alpha = 0.35 , which = 'both' )
354+
355+ plt .suptitle (
356+ f'Blocking Analysis ($N={ N } $, '
357+ f'$\\ hat{{\\ tau}}_{{\\ rm int}}={ tau_int :.3f} $, '
358+ f'$\\ tau_{{\\ rm block}}\\ approx{ tau_b :.1f} $)' ,
359+ fontsize = 14 , fontweight = 'bold' , color = TEXT , y = 1.01 )
360+ plt .tight_layout ()
361+ plt .savefig (savename , dpi = 150 , bbox_inches = 'tight' , facecolor = BG )
362+ plt .close ()
363+ reset_style ()
364+ print (f'Saved { savename } ' )
365+
366+
367+
178368def plot_acf (acf , tau_int , window , N , savename = 'acf.png' ):
179369 """Plot the ACF with confidence band and tau_int window marker."""
180370 apply_dark_style ()
@@ -381,16 +571,24 @@ def plot_bootstrap_distribution(x, n_boot=3000, block_size=None,
381571 block = max (1 , int (2 * tau ))
382572 sigma_boot = std_bootstrap (x , n_boot = 3000 , block_size = block , seed = 0 )
383573
574+ blk = blocking_analysis (x )
575+ sigma_block_plateau = blk ['plateau_value' ]
576+ tau_from_block = blk ['tau_from_block' ]
577+
384578 print (f'N = { N } ' )
385579 print (f'Sample mean = { x .mean ():.6f} (true = 0.5)' )
386580 print (f'tau_int (Sokal) = { tau :.4f} (window W={ W } )' )
581+ print (f'tau_int (blocking) = { tau_from_block :.4f} ' )
387582 print (f'Naive sigma_mean = { sigma_naive :.6f} ' )
388583 print (f'Covariance sigma_mean = { sigma_corr :.6f} ' )
389584 print (f'Bootstrap sigma_mean = { sigma_boot :.6f} ' )
585+ print (f'Blocking sigma_mean = { sigma_block_plateau :.6f} ' )
390586 print (f'Exact sigma_mean (iid) = { 1 / (np .sqrt (12 )* np .sqrt (N )):.6f} ' )
391587
392588 plot_sample_and_running_mean (x , savename = 'fig_sample.png' )
393589 plot_acf (acf , tau , W , N , savename = 'fig_acf.png' )
394590 plot_bootstrap_distribution (x , savename = 'fig_bootstrap.png' )
591+ plot_blocking (blk , sigma_naive , sigma_corr , sigma_boot , N , tau ,
592+ savename = 'fig_blocking.png' )
395593 plot_std_comparison ([500 , 1000 , 5000 , 10000 , 50000 ],
396594 savename = 'fig_std_comparison.png' )
0 commit comments