CompPhysics
diff --git a/‎doc/src/week9/programs/acf_rng.py‎
Lines changed: 198 additions & 0 deletions b/‎doc/src/week9/programs/acf_rng.py‎
Lines changed: 198 additions & 0 deletions
@@ -141,6 +141,113 @@ def std_bootstrap(x: np.ndarray,
     return float(means.std(ddof=1))
 
 
+# ─────────────────────────────────────────────────────────────────────────────
+# 5. Blocking method
+# ─────────────────────────────────────────────────────────────────────────────
+
+def blocking_analysis(x: np.ndarray, n_blocks_range=None) -> dict:
+    """
+    Blocking (or 'binning') analysis of a time series.
+
+    The series of N values is split into n_b non-overlapping blocks of
+    equal size b = floor(N / n_b).  The standard deviation of the mean is
+    estimated from the n_b block averages:
+
+        sigma_block(n_b) = std( {y_k} ) / sqrt(n_b)
+
+    where y_k = (1/b) * sum_{i in block k} x_i.
+
+    For block sizes b >> tau_int the block means are approximately i.i.d.,
+    so sigma_block(n_b) reaches a plateau equal to the true sigma_mean.
+    For b << tau_int correlations are still present and sigma_block is
+    underestimated (biased low).
+
+    The plateau onset gives a second, independent estimate of tau_int:
+        tau_int ≈ b_plateau / 2
+
+    Parameters
+    ----------
+    x              : time series
+    n_blocks_range : iterable of n_b values to sweep; defaults to
+                     all divisors of N from 4 up to N//4
+
+    Returns
+    -------
+    dict with keys
+        n_blocks      : array of n_b values tried
+        block_sizes   : corresponding block sizes b = N // n_b
+        sigma_block   : sigma_mean estimate at each n_b
+        sigma_err     : statistical uncertainty on sigma_block
+        plateau_idx   : index where the plateau is first detected
+        plateau_value : sigma_mean at the plateau
+        tau_from_block: tau_int estimated from plateau onset
+    """
+    N = len(x)
+
+    if n_blocks_range is None:
+        # Use all n_b such that b = N//n_b gives at least 4 blocks
+        # and at most N//4 blocks (so b >= 4)
+        bs = np.unique(np.round(np.geomspace(4, N // 4, 80)).astype(int))
+        # Convert block sizes -> number of blocks
+        n_blocks_range = np.unique((N // bs).astype(int))
+        n_blocks_range = n_blocks_range[n_blocks_range >= 4]
+
+    n_blocks_arr = np.asarray(sorted(set(int(v) for v in n_blocks_range
+                                          if 4 <= v <= N // 4)), dtype=int)
+
+    sigma_block = np.zeros(len(n_blocks_arr))
+    sigma_err   = np.zeros(len(n_blocks_arr))
+
+    for idx, n_b in enumerate(n_blocks_arr):
+        b      = N // n_b                          # block size (discard remainder)
+        blocks = x[:n_b * b].reshape(n_b, b)      # shape (n_b, b)
+        y      = blocks.mean(axis=1)               # block means
+        s      = y.std(ddof=1) / np.sqrt(n_b)     # sigma_mean from blocks
+        sigma_block[idx] = s
+        # Uncertainty on s itself: Var(s) ≈ s² / (2*(n_b-1))
+        sigma_err[idx]   = s / np.sqrt(2 * (n_b - 1))
+
+    block_sizes = N // n_blocks_arr
+
+    # ── Detect plateau ─────────────────────────────────────────────────────
+    # The plateau is where sigma_block stops rising as b increases.
+    # Strategy: find the block size at which the relative change in sigma_block
+    # (smoothed over a window) first falls below a threshold and stays there.
+    # We work with sigma_block as a function of increasing b (decreasing n_b),
+    # so we reverse the arrays.
+    sb_by_b   = sigma_block[::-1]   # indexed by increasing b
+    bs_sorted = block_sizes[::-1]
+
+    window    = max(3, len(sb_by_b) // 10)
+    smoothed  = np.convolve(sb_by_b, np.ones(window)/window, mode='valid')
+    rel_change = np.abs(np.diff(smoothed) / (smoothed[:-1] + 1e-30))
+    threshold  = 0.05   # < 5% change → plateau
+
+    plateau_idx_rev = len(sb_by_b) - 1   # default: last point (largest b)
+    for i in range(len(rel_change) - window):
+        if np.all(rel_change[i:i+window] < threshold):
+            plateau_idx_rev = i + window // 2
+            break
+
+    # Map back to original (decreasing b) indexing
+    plateau_idx   = len(sigma_block) - 1 - plateau_idx_rev
+    plateau_idx   = int(np.clip(plateau_idx, 0, len(sigma_block) - 1))
+    plateau_value = float(np.median(
+        sigma_block[max(0, plateau_idx - 3): plateau_idx + 4]))
+    b_plateau     = int(block_sizes[plateau_idx])
+    tau_from_block = b_plateau / 2.0
+
+    return dict(
+        n_blocks      = n_blocks_arr,
+        block_sizes   = block_sizes,
+        sigma_block   = sigma_block,
+        sigma_err     = sigma_err,
+        plateau_idx   = plateau_idx,
+        plateau_value = plateau_value,
+        tau_from_block= tau_from_block,
+    )
+
+
 # ─────────────────────────────────────────────────────────────────────────────
 # 5. Plots
 # ─────────────────────────────────────────────────────────────────────────────
@@ -175,6 +282,89 @@ def reset_style():
     plt.rcParams.update(plt.rcParamsDefault)
 
 
+def plot_blocking(blk_result: dict, sigma_naive: float, sigma_corr: float,
+                   sigma_boot: float, N: int, tau_int: float,
+                   savename='blocking.png'):
+    """
+    Two-panel blocking-analysis figure.
+
+    Left  — sigma_block vs block size b (x-axis = b, log scale).
+             Shows the rise from underestimated (b << tau_int) to the
+             plateau (b >> tau_int) with error bars and reference lines.
+
+    Right — sigma_block vs number of blocks n_b (x-axis = n_b, log scale).
+             Useful for seeing how many independent blocks are needed.
+    """
+    apply_dark_style()
+
+    bs   = blk_result['block_sizes']
+    nb   = blk_result['n_blocks']
+    sig  = blk_result['sigma_block']
+    err  = blk_result['sigma_err']
+    pidx = blk_result['plateau_idx']
+    plat = blk_result['plateau_value']
+    b_pl = bs[pidx]
+    tau_b= blk_result['tau_from_block']
+
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 5), facecolor=BG)
+
+    # ── Left: sigma vs block size ──────────────────────────────────────
+    ax1.errorbar(bs, sig, yerr=err, fmt='o-', color=ACCENT,
+                 ms=4, lw=1.6, elinewidth=1.0, capsize=3, alpha=0.85,
+                 label='$\\sigma_{\\rm block}(b)$')
+    ax1.axhline(plat, color=GREEN, lw=2.0, ls='--',
+                label=f'Plateau  {plat:.6f}')
+    ax1.axhline(sigma_naive, color=WARM, lw=1.5, ls=':',
+                label=f'Naive    {sigma_naive:.6f}')
+    ax1.axhline(sigma_corr,  color=PURPLE, lw=1.5, ls='-.',
+                label=f'Cov-corr {sigma_corr:.6f}')
+    ax1.axhline(sigma_boot,  color='#f48fb1', lw=1.5, ls=(0,(3,1,1,1)),
+                label=f'Bootstrap {sigma_boot:.6f}')
+    ax1.axvline(b_pl, color=GREEN, lw=1.2, ls='--', alpha=0.5,
+                label=f'Plateau onset  $b={b_pl}$')
+    ax1.axvspan(1, 2*tau_int, color=WARM, alpha=0.07,
+                label=f'$b < 2\\tau_{{\\rm int}}={2*tau_int:.1f}$  (biased)')
+    ax1.set_xscale('log')
+    ax1.set_xlabel('Block size  $b$', fontsize=13)
+    ax1.set_ylabel('$\\sigma_{\\bar{x}}^{\\rm block}(b)$', fontsize=13)
+    ax1.set_title('Blocking Analysis: $\\sigma$ vs Block Size',
+                  fontweight='bold', color=TEXT)
+    ax1.legend(fontsize=8.5, loc='lower right')
+    ax1.grid(True, alpha=0.35, which='both')
+
+    # ── Right: sigma vs number of blocks ──────────────────────────────
+    ax2.errorbar(nb, sig, yerr=err, fmt='s-', color=ACCENT,
+                 ms=4, lw=1.6, elinewidth=1.0, capsize=3, alpha=0.85,
+                 label='$\\sigma_{\\rm block}(n_b)$')
+    ax2.axhline(plat, color=GREEN, lw=2.0, ls='--',
+                label=f'Plateau  {plat:.6f}')
+    ax2.axhline(sigma_naive, color=WARM, lw=1.5, ls=':',
+                label=f'Naive  {sigma_naive:.6f}')
+    nb_pl = nb[pidx]
+    ax2.axvline(nb_pl, color=GREEN, lw=1.2, ls='--', alpha=0.5,
+                label=f'Plateau at $n_b={nb_pl}$')
+    ax2.set_xscale('log')
+    ax2.invert_xaxis()   # large n_b (small b) on left → mirrors left panel
+    ax2.set_xlabel('Number of blocks  $n_b$  (log, reversed)', fontsize=13)
+    ax2.set_ylabel('$\\sigma_{\\bar{x}}^{\\rm block}(n_b)$', fontsize=13)
+    ax2.set_title('Blocking Analysis: $\\sigma$ vs $n_b$',
+                  fontweight='bold', color=TEXT)
+    ax2.legend(fontsize=9, loc='lower left')
+    ax2.grid(True, alpha=0.35, which='both')
+
+    plt.suptitle(
+        f'Blocking Analysis  ($N={N}$,  '
+        f'$\\hat{{\\tau}}_{{\\rm int}}={tau_int:.3f}$,  '
+        f'$\\tau_{{\\rm block}}\\approx{tau_b:.1f}$)',
+        fontsize=14, fontweight='bold', color=TEXT, y=1.01)
+    plt.tight_layout()
+    plt.savefig(savename, dpi=150, bbox_inches='tight', facecolor=BG)
+    plt.close()
+    reset_style()
+    print(f'Saved {savename}')
+
+
+
 def plot_acf(acf, tau_int, window, N, savename='acf.png'):
     """Plot the ACF with confidence band and tau_int window marker."""
     apply_dark_style()
@@ -381,16 +571,24 @@ def plot_bootstrap_distribution(x, n_boot=3000, block_size=None,
     block        = max(1, int(2 * tau))
     sigma_boot   = std_bootstrap(x, n_boot=3000, block_size=block, seed=0)
 
+    blk = blocking_analysis(x)
+    sigma_block_plateau = blk['plateau_value']
+    tau_from_block      = blk['tau_from_block']
+
     print(f'N                       = {N}')
     print(f'Sample mean             = {x.mean():.6f}  (true = 0.5)')
     print(f'tau_int (Sokal)         = {tau:.4f}  (window W={W})')
+    print(f'tau_int (blocking)      = {tau_from_block:.4f}')
     print(f'Naive sigma_mean        = {sigma_naive:.6f}')
     print(f'Covariance sigma_mean   = {sigma_corr:.6f}')
     print(f'Bootstrap sigma_mean    = {sigma_boot:.6f}')
+    print(f'Blocking sigma_mean     = {sigma_block_plateau:.6f}')
     print(f'Exact sigma_mean (iid)  = {1/(np.sqrt(12)*np.sqrt(N)):.6f}')
 
     plot_sample_and_running_mean(x, savename='fig_sample.png')
     plot_acf(acf, tau, W, N, savename='fig_acf.png')
     plot_bootstrap_distribution(x, savename='fig_bootstrap.png')
+    plot_blocking(blk, sigma_naive, sigma_corr, sigma_boot, N, tau,
+                  savename='fig_blocking.png')
     plot_std_comparison([500, 1000, 5000, 10000, 50000],
                          savename='fig_std_comparison.png')