CMacM
diff --git a/‎.codex‎ b/‎.codex‎
diff --git a/‎benchmarks/benchmarks.py‎
Lines changed: 129 additions & 1 deletion b/‎benchmarks/benchmarks.py‎
Lines changed: 129 additions & 1 deletion
diff --git a/‎notebooks/dev/optimise_c.ipynb‎
Lines changed: 414 additions & 0 deletions b/‎notebooks/dev/optimise_c.ipynb‎
Lines changed: 414 additions & 0 deletions
diff --git a/‎notebooks/dev/test_render_size.ipynb‎
Lines changed: 367 additions & 0 deletions b/‎notebooks/dev/test_render_size.ipynb‎
Lines changed: 367 additions & 0 deletions
diff --git a/‎notebooks/ia-analysis/test_models.ipynb‎
Lines changed: 51 additions & 11 deletions b/‎notebooks/ia-analysis/test_models.ipynb‎
Lines changed: 51 additions & 11 deletions
diff --git a/‎notebooks/ia-analysis/test_non-affine_response.ipynb‎
Lines changed: 7 additions & 15 deletions b/‎notebooks/ia-analysis/test_non-affine_response.ipynb‎
Lines changed: 7 additions & 15 deletions
diff --git a/‎setup.py‎
Lines changed: 1 addition & 1 deletion b/‎setup.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/batsim/.nfs00000000749b68410000042e‎
226 KB b/‎src/batsim/.nfs00000000749b68410000042e‎
226 KB
diff --git a/‎src/batsim/.nfs00000000749b684500000440‎
242 KB b/‎src/batsim/.nfs00000000749b684500000440‎
242 KB
diff --git a/‎src/batsim/_gsinterface.cpp‎
Lines changed: 21 additions & 16 deletions b/‎src/batsim/_gsinterface.cpp‎
Lines changed: 21 additions & 16 deletions
@@ -1,7 +1,11 @@
 """Benchmarks to check computation time and memory usage for batsim."""
 import batsim.stamp as batstamp
 import batsim.transforms as batforms
+import batsim
+import contextlib
 import galsim
+import io
+import numpy as np
 import time
 
 def time_shear_speed(nn=64, scale=0.2):
@@ -74,7 +78,131 @@ def time_ia_speed(nn=128, scale=0.1):
     aff_time = aff_end - aff_start
 
     return {'IA time' : ia_time, 'Lens time' : aff_time}
-    
+
+
+def _parse_simulate_profile_logs(log_lines):
+    stats = {}
+    timings = {}
+    for line in log_lines:
+        msg = line.split("] ", 1)[-1]
+        if msg.startswith("stats "):
+            for token in msg[6:].split():
+                if "=" not in token:
+                    continue
+                key, value = token.split("=", 1)
+                try:
+                    stats[key] = int(value)
+                except ValueError:
+                    try:
+                        stats[key] = float(value)
+                    except ValueError:
+                        stats[key] = value
+        elif "=" in msg:
+            key, value = msg.split("=", 1)
+            value = value[:-1] if value.endswith("s") else value
+            try:
+                timings[key] = float(value)
+            except ValueError:
+                timings[key] = value
+    return {"timings": timings, "stats": stats}
+
+
+def _extract_parametric_profile_info(cosmos_catalog, catalog_index, gal_obj):
+    info = {
+        "catalog_index": int(catalog_index),
+        "gsobject_type": type(gal_obj).__name__,
+    }
+    for attr in ("flux", "nyquist_scale"):
+        if hasattr(gal_obj, attr):
+            try:
+                info[attr] = float(getattr(gal_obj, attr))
+            except Exception:
+                pass
+    param_cat = getattr(cosmos_catalog, "param_cat", None)
+    if param_cat is None:
+        return info
+    keys = []
+    if hasattr(param_cat, "colnames"):
+        keys = ["mag_auto", "flux_radius", "zphot"] + [k for k in ("use_bulgefit", "viable_sersic") if k in param_cat.colnames]
+    elif hasattr(param_cat, "dtype") and param_cat.dtype.names:
+        keys = [k for k in ("mag_auto", "flux_radius", "zphot", "use_bulgefit", "viable_sersic") if k in param_cat.dtype.names]
+    if not keys:
+        return info
+    row = param_cat[int(catalog_index)]
+    for key in keys:
+        try:
+            value = row[key]
+            if hasattr(value, "item"):
+                value = value.item()
+            info[key] = value
+        except Exception:
+            pass
+    return info
+
+
+def benchmark_parametric_cosmos_profiles(
+    n_galaxies=5,
+    ngrid=128,
+    pix_scale=0.2,
+    psf_obj=None,
+    draw_method="auto",
+    truncate_ratio=1.0,
+    maximum_num_grids=4096,
+    force_ngrid=False,
+    seed=1234,
+    cosmos_catalog=None,
+):
+    """Run a lightweight per-galaxy benchmark using parametric COSMOS profiles.
+
+    Returns a list of dictionaries containing profile metadata, parsed
+    `simulate_galaxy(profile=True)` logs, and end-to-end elapsed time.
+    """
+    cosmos_catalog = cosmos_catalog or galsim.COSMOSCatalog()
+    rng = np.random.RandomState(seed)
+    indices = rng.choice(len(cosmos_catalog), size=n_galaxies, replace=(n_galaxies > len(cosmos_catalog)))
+
+    records = []
+    for i, idx in enumerate(indices):
+        gal = cosmos_catalog.makeGalaxy(index=int(idx), gal_type="parametric")
+        profile_info = _extract_parametric_profile_info(cosmos_catalog, idx, gal)
+
+        log_buf = io.StringIO()
+        t0 = time.perf_counter()
+        with contextlib.redirect_stdout(log_buf):
+            image = batsim.simulate_galaxy(
+                ngrid=ngrid,
+                pix_scale=pix_scale,
+                gal_obj=gal,
+                psf_obj=psf_obj,
+                truncate_ratio=truncate_ratio,
+                maximum_num_grids=maximum_num_grids,
+                draw_method=draw_method,
+                force_ngrid=force_ngrid,
+                profile=True,
+            )
+        elapsed_s = time.perf_counter() - t0
+
+        profile_logs = [line for line in log_buf.getvalue().splitlines() if line.startswith("[simulate_galaxy]")]
+        parsed_logs = _parse_simulate_profile_logs(profile_logs)
+        record = {
+            "galaxy_number": i,
+            "profile": profile_info,
+            "logger": parsed_logs,
+            "elapsed_s": elapsed_s,
+            "image_shape": tuple(image.shape),
+            "image_sum": float(np.sum(image)),
+        }
+        records.append(record)
+
+        print(
+            f"[benchmark_parametric_cosmos_profiles] i={i} idx={int(idx)} "
+            f"nn={parsed_logs['stats'].get('nn')} downsample_ratio={parsed_logs['stats'].get('downsample_ratio')} "
+            f"elapsed_s={elapsed_s:.4e}"
+        )
+        print(f"[benchmark_parametric_cosmos_profiles] profile={profile_info}")
+        for line in profile_logs:
+            print(line)
+    return records
 
 if __name__ == "__main__":
     time_shear_speed()
 
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "3295c8e5",
    "metadata": {},
    "outputs": [],
@@ -89,6 +89,7 @@
     "                phi = np.radians(0),\n",
     "                clip_radius=5 # clip the transform at 5*hlr to prevent edge effects\n",
     "            )\n",
+    "\n",
     "            gal_img = batsim.simulate_galaxy(\n",
     "                ngrid=nn,\n",
     "                pix_scale=scale,\n",
@@ -100,16 +101,15 @@
     "\n",
     "            # Apply lensing shear directly\n",
     "            gal_img = galsim.Image(gal_img, scale=scale)\n",
-    "            gal = galsim.InterpolatedImage(gal_img, scale=scale)\n",
     "        else:\n",
     "            # convolve, shift, and draw the galaxy\n",
     "            gal = gal.shift(0.5*scale, 0.5*scale) # shift the galaxy to center\n",
     "\n",
-    "        # Apply lensing shear\n",
-    "        gal = gal.shear(g1=lens_shear, g2=0.0)\n",
+    "            # Apply lensing shear\n",
+    "            gal = gal.shear(g1=lens_shear, g2=0.0)\n",
     "\n",
-    "        # Convolve after both IA and lensing\n",
-    "        gal = galsim.Convolve([gal, psf])\n",
+    "            # Convolve after both IA and lensing\n",
+    "            gal = galsim.Convolve([gal, psf])\n",
     "\n",
     "        # Set the subimage in the stamp\n",
     "        gal_img = gal.drawImage(nx=nn, ny=nn, scale=scale).array\n",
@@ -119,14 +119,6 @@
     "\n",
     "    return stamp"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d33c0296",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -145,7 +137,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.19"
+   "version": "3.10.20"
   }
  },
  "nbformat": 4,
 
@@ -68,7 +68,7 @@ def uniq(xs):
     include_dirs=[pybind11.get_include()],
     libraries=["galsim"],
     language="c++",
-    extra_compile_args=["-std=c++11", "-fopenmp", "-O3"],
+    extra_compile_args=["-std=c++17", "-fopenmp", "-O3"],
     extra_link_args=["-flto", "-fopenmp"],
 )
 
 
@@ -7,32 +7,33 @@
 #include <fftw3.h>
 #include <cmath>
 
-
 namespace py = pybind11;
 
 py::array_t<double> getFluxVec(
     const double scale,
     const galsim::SBProfile& gsobj,
     const py::array_t<double>& xy_coords
 ){
-    if (xy_coords.ndim() != 2 || xy_coords.shape(0) != 2) {
-        throw std::runtime_error("xy_coords must be a 2D array with shape (2, n)");
-    }
-
     auto xy = xy_coords.unchecked<2>();
     const int n_points = xy_coords.shape(1);
-    std::vector<double> fluxes(n_points);
-
+    const int dim = std::sqrt(n_points);
+    const int n_used = dim * dim;
+    auto result = py::array_t<double>({dim, dim});
+    auto out = result.mutable_data();
     double area = scale * scale;
-    #pragma omp parallel for
-    for(int i = 0; i < n_points; ++i) {
-        fluxes[i] = gsobj.xValue(
+
+    // Pre-warm GalSim's internal cache with a single serial call
+    // before entering the parallel region
+    gsobj.xValue(galsim::Position<double>(xy(0, 0), xy(1, 0)));
+
+    #pragma omp parallel for schedule(static)
+    for(int i = 0; i < n_used; ++i) {
+        out[i] = gsobj.xValue(
             galsim::Position<double>(xy(0, i), xy(1, i))
         ) * area;
     }
 
-    int dim = std::sqrt(n_points);
-    return py::array_t<double>({dim, dim}, fluxes.data());
+    return result;
 }
 
 // Utility function to generate rfftfreq
@@ -85,8 +86,8 @@ py::array_t<double> convolvePsf(
     int dim2 = dim / downsample_ratio;
 
     // Frequency grids for the down sampled signal
-    const auto x_freqs2 = rfftfreq(dim2, scale2 / M_PI / 2.0);
-    const auto y_freqs2 = fftfreq(dim2, scale2 / M_PI / 2.0);
+    const auto x_freqs2 = rfftfreq(dim2, scale2);
+    const auto y_freqs2 = fftfreq(dim2, scale2);
 
     // Allocate FFTW arrays with pointers
     double* in = static_cast<double*>(info.ptr);
@@ -97,6 +98,10 @@ py::array_t<double> convolvePsf(
     fftw_plan p_forward = fftw_plan_dft_r2c_2d(dim, dim, in, out, FFTW_ESTIMATE);
     fftw_execute(p_forward);
 
+    // Pre-warm GalSim's internal cache with a single serial call
+    // before entering the parallel region
+    gsobj.kValue(galsim::Position<double>(x_freqs2[0], y_freqs2[0]));
+
     // Process FFT result using gsobj
     #pragma omp parallel for
     for (int y2 = 0; y2 < dim2; ++y2) {
@@ -108,8 +113,8 @@ py::array_t<double> convolvePsf(
             std::complex<double> fft_val(out[index][0], out[index][1]);
             std::complex<double> result = fft_val * gsobj.kValue(
                 galsim::Position<double>(
-                    x_freqs2[x2],
-                    y_freqs2[y2]
+                    2.0 * M_PI * x_freqs2[x2],
+                    2.0 * M_PI * y_freqs2[y2]
                 )
             );
             out2[index2][0] = result.real();
Original file line number	Diff line number	Diff line change
`@@ -68,7 +68,7 @@ def uniq(xs):`
`68`	`68`	`include_dirs=[pybind11.get_include()],`
`69`	`69`	`libraries=["galsim"],`
`70`	`70`	`language="c++",`
`71`		`- extra_compile_args=["-std=c++11", "-fopenmp", "-O3"],`
	`71`	`+ extra_compile_args=["-std=c++17", "-fopenmp", "-O3"],`
`72`	`72`	`extra_link_args=["-flto", "-fopenmp"],`
`73`	`73`	`)`
`74`	`74`