Fix docs

cboulay · cboulay · commit 23ac9e018746 · 2026-02-11T19:02:18.000-05:00
diff --git a/docs/source/_templates/autosummary/module.rst b/docs/source/_templates/autosummary/module.rst
@@ -1,6 +1,9 @@
 {{ fullname | escape | underline}}
 
 .. automodule:: {{ fullname }}
+   :no-members:
+   :no-undoc-members:
+   :no-special-members:
 
    {% block attributes %}
    {% if attributes %}
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -37,6 +37,7 @@
 templates_path = ["_templates"]
 source_suffix = [".rst"]
 exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+suppress_warnings = ["myst.xref_missing"]
 
 # The toctree master document
 master_doc = "index"
diff --git a/src/ezmsg/learn/dim_reduce/adaptive_decomp.py b/src/ezmsg/learn/dim_reduce/adaptive_decomp.py
@@ -227,11 +227,11 @@ class MiniBatchNMFSettings(AdaptiveDecompSettings):
 
     beta_loss: typing.Union[str, float] = "frobenius"
     """
-    'frobenius', 'kullback-leibler', 'itakura-saito'
+    'frobenius', 'kullback-leibler', 'itakura-saito'.
     Note that values different from 'frobenius'
-        (or 2) and 'kullback-leibler' (or 1) lead to significantly slower
-        fits. Note that for `beta_loss <= 0` (or 'itakura-saito'), the input
-        matrix `X` cannot contain zeros.
+    (or 2) and 'kullback-leibler' (or 1) lead to significantly slower
+    fits. Note that for ``beta_loss <= 0`` (or 'itakura-saito'), the input
+    matrix ``X`` cannot contain zeros.
     """
 
     tol: float = 1e-4
diff --git a/src/ezmsg/learn/linear_model/sgd.py b/src/ezmsg/learn/linear_model/sgd.py
@@ -5,5 +5,5 @@
     SGDDecoderSettings as SGDDecoderSettings,
 )
 from ..process.sgd import (
-    sgd_decoder as sgd_decoder,
+    SGDDecoderTransformer as SGDDecoderTransformer,
 )
diff --git a/src/ezmsg/learn/model/mlp.py b/src/ezmsg/learn/model/mlp.py
@@ -25,6 +25,7 @@ def __init__(
     ):
         """
         Initialize the MLP model.
+
         Args:
             input_size (int): The size of the input features.
             hidden_size (int | list[int]): The sizes of the hidden layers. If a list, num_layers must be None or the
diff --git a/src/ezmsg/learn/model/rnn.py b/src/ezmsg/learn/model/rnn.py
@@ -37,9 +37,7 @@ def __init__(
         rnn_klass_str = rnn_type.upper().split("-")[0]
         if rnn_klass_str not in ["GRU", "LSTM", "RNN"]:
             raise ValueError(f"Unrecognized rnn_type: {rnn_type}")
-        rnn_klass = {"GRU": torch.nn.GRU, "LSTM": torch.nn.LSTM, "RNN": torch.nn.RNN}[
-            rnn_klass_str
-        ]
+        rnn_klass = {"GRU": torch.nn.GRU, "LSTM": torch.nn.LSTM, "RNN": torch.nn.RNN}[rnn_klass_str]
         rnn_kwargs = {}
         if rnn_klass_str == "RNN":
             rnn_kwargs["nonlinearity"] = rnn_type.lower().split("-")[-1]
@@ -57,16 +55,11 @@ def __init__(
         if isinstance(output_size, int):
             output_size = {"output": output_size}
         self.heads = torch.nn.ModuleDict(
-            {
-                name: torch.nn.Linear(hidden_size, size)
-                for name, size in output_size.items()
-            }
+            {name: torch.nn.Linear(hidden_size, size) for name, size in output_size.items()}
         )
 
     @classmethod
-    def infer_config_from_state_dict(
-        cls, state_dict: dict, rnn_type: str = "GRU"
-    ) -> dict[str, int | float]:
+    def infer_config_from_state_dict(cls, state_dict: dict, rnn_type: str = "GRU") -> dict[str, int | float]:
         """
         This method is specific to each processor.
 
@@ -88,8 +81,7 @@ def infer_config_from_state_dict(
             # Infer input_size from linear_embeddings.weight (shape: [input_size, input_size])
             "input_size": state_dict["linear_embeddings.weight"].shape[1],
             # Infer hidden_size from rnn.weight_ih_l0 (shape: [hidden_size * 3, input_size])
-            "hidden_size": state_dict["rnn.weight_ih_l0"].shape[0]
-            // cls._get_gate_count(rnn_type),
+            "hidden_size": state_dict["rnn.weight_ih_l0"].shape[0] // cls._get_gate_count(rnn_type),
             # Infer num_layers by counting rnn layers in state_dict (e.g., weight_ih_l<k>)
             "num_layers": sum(1 for key in state_dict if "rnn.weight_ih_l" in key),
             "output_size": output_size,
@@ -134,27 +126,25 @@ def forward(
     ) -> tuple[dict[str, torch.Tensor], torch.Tensor | tuple]:
         """
         Forward pass through the RNN model.
+
         Args:
             x (torch.Tensor): Input tensor of shape (batch, seq_len, input_size).
             input_lens (Optional[torch.Tensor]): Optional tensor of lengths for each sequence in the batch.
                 If provided, sequences will be packed before passing through the RNN.
             hx (Optional[torch.Tensor | tuple[torch.Tensor, torch.Tensor]]): Optional initial hidden state for the RNN.
+
         Returns:
             tuple[dict[str, torch.Tensor], torch.Tensor | tuple]:
-                A dictionary mapping head names to output tensors of shape (batch, seq_len, output_size).
-                If the RNN is LSTM, the second element is the hidden state (h_n, c_n) or just h_n if GRU.
+            A dictionary mapping head names to output tensors of shape (batch, seq_len, output_size).
+            If the RNN is LSTM, the second element is the hidden state (h_n, c_n) or just h_n if GRU.
         """
         x = self.linear_embeddings(x)
         x = self.dropout_input(x)
         total_length = x.shape[1]
         if input_lens is not None:
-            x = torch.nn.utils.rnn.pack_padded_sequence(
-                x, input_lens, batch_first=True, enforce_sorted=False
-            )
+            x = torch.nn.utils.rnn.pack_padded_sequence(x, input_lens, batch_first=True, enforce_sorted=False)
         x_out, hx_out = self.rnn(x, hx)
         if input_lens is not None:
-            x_out, _ = torch.nn.utils.rnn.pad_packed_sequence(
-                x_out, batch_first=True, total_length=total_length
-            )
+            x_out, _ = torch.nn.utils.rnn.pad_packed_sequence(x_out, batch_first=True, total_length=total_length)
         x_out = self.output_dropout(x_out)
         return {name: head(x_out) for name, head in self.heads.items()}, hx_out
diff --git a/src/ezmsg/learn/model/transformer.py b/src/ezmsg/learn/model/transformer.py
@@ -49,9 +49,7 @@ def __init__(
         else:
             autoregressive_size = list(output_size.values())[0]
         if isinstance(output_size, dict):
-            autoregressive_size = output_size.get(
-                autoregressive_head, autoregressive_size
-            )
+            autoregressive_size = output_size.get(autoregressive_head, autoregressive_size)
         self.start_token = torch.nn.Parameter(torch.zeros(1, 1, autoregressive_size))
         self.output_to_hidden = torch.nn.Linear(autoregressive_size, hidden_size)
 
@@ -86,10 +84,7 @@ def __init__(
         if isinstance(output_size, int):
             output_size = {"output": output_size}
         self.heads = torch.nn.ModuleDict(
-            {
-                name: torch.nn.Linear(hidden_size, out_dim)
-                for name, out_dim in output_size.items()
-            }
+            {name: torch.nn.Linear(hidden_size, out_dim) for name, out_dim in output_size.items()}
         )
 
     @classmethod
@@ -108,13 +103,9 @@ def infer_config_from_state_dict(cls, state_dict: dict) -> dict[str, int | float
             "hidden_size": state_dict["input_proj.weight"].shape[0],
             "output_size": output_size,
             # Infer encoder_layers from transformer layers in state_dict
-            "encoder_layers": len(
-                [k for k in state_dict if k.startswith("encoder.layers")]
-            ),
+            "encoder_layers": len([k for k in state_dict if k.startswith("encoder.layers")]),
             # Infer decoder_layers from transformer decoder layers in state_dict
-            "decoder_layers": len(
-                {k.split(".")[2] for k in state_dict if k.startswith("decoder.layers")}
-            )
+            "decoder_layers": len({k.split(".")[2] for k in state_dict if k.startswith("decoder.layers")})
             if any(k.startswith("decoder.layers") for k in state_dict)
             else 0,
         }
@@ -129,20 +120,22 @@ def forward(
     ) -> dict[str, torch.Tensor]:
         """
         Forward pass through the transformer model.
+
         Args:
             src (torch.Tensor): Input tensor of shape (batch, seq_len, input_size).
             tgt (Optional[torch.Tensor]): Target tensor for decoder, shape (batch, seq_len, input_size).
-                Required if `decoder_layers > 0`. In training, this can be the ground-truth target sequence
+                Required if ``decoder_layers > 0``. In training, this can be the ground-truth target sequence
                 (i.e. teacher forcing). During inference, this is constructed autoregressively.
             src_mask (Optional[torch.Tensor]): Optional attention mask for the encoder input. Should be broadcastable
                 to shape (batch, seq_len, seq_len) or (seq_len, seq_len).
             tgt_mask (Optional[torch.Tensor]): Optional attention mask for the decoder input. Used to enforce causal
                 decoding (i.e. autoregressive generation) during training or inference.
             start_pos (int): Starting offset for positional embeddings. Used for streaming inference to maintain
                 correct positional indices. Default is 0.
+
         Returns:
-            dict[str, torch.Tensor]: Dictionary of output tensors each output head, each with shape (batch, seq_len,
-                output_size).
+            dict[str, torch.Tensor]: Dictionary of output tensors each output head, each with shape
+            (batch, seq_len, output_size).
         """
         B, T, _ = src.shape
         device = src.device
@@ -158,9 +151,7 @@ def forward(
             if tgt is None:
                 tgt = self.start_token.expand(B, -1, -1).to(device)
             tgt_proj = self.output_to_hidden(tgt)
-            tgt_pos_ids = torch.arange(tgt.shape[1], device=device).expand(
-                B, tgt.shape[1]
-            )
+            tgt_pos_ids = torch.arange(tgt.shape[1], device=device).expand(B, tgt.shape[1])
             tgt_proj = tgt_proj + self.pos_embedding(tgt_pos_ids)
             tgt_proj = self.dropout(tgt_proj)
             out = self.decoder(
diff --git a/src/ezmsg/learn/process/refit_kalman.py b/src/ezmsg/learn/process/refit_kalman.py
@@ -31,16 +31,15 @@ class RefitKalmanFilterSettings(ez.Settings):
 
     This class defines the configuration parameters for the Refit Kalman filter processor.
     The RefitKalmanFilter is designed for online processing and playback.
-
-    Attributes:
-        checkpoint_path: Path to saved model parameters (optional).
-            If provided, loads pre-trained parameters instead of learning from data.
-        steady_state: Whether to use steady-state Kalman filter.
-            If True, uses pre-computed Kalman gain; if False, updates dynamically.
     """
 
     checkpoint_path: str | None = None
+    """Path to saved model parameters. If provided, loads pre-trained parameters instead of learning from data."""
+
     steady_state: bool = False
+    """Whether to use steady-state Kalman filter. If True, uses pre-computed Kalman gain;
+    if False, updates dynamically."""
+
     velocity_indices: tuple[int, int] = (2, 3)
 
 
@@ -51,28 +50,31 @@ class RefitKalmanFilterState:
 
     This class manages the persistent state of the Refit Kalman filter processor,
     including the model instance, current state estimates, and data buffers for refitting.
-
-    Attributes:
-        model: The RefitKalmanFilter model instance.
-        x: Current state estimate (n_states,).
-        P: Current state covariance matrix (n_states x n_states).
-        buffer_neural: Buffer for storing neural activity data for refitting.
-        buffer_state: Buffer for storing state estimates for refitting.
-        buffer_cursor_positions: Buffer for storing cursor positions for refitting.
-        buffer_target_positions: Buffer for storing target positions for refitting.
-        buffer_hold_flags: Buffer for storing hold flags for refitting.
-        current_position: Current cursor position estimate (2,).
     """
 
     model: RefitKalmanFilter | None = None
+    """The RefitKalmanFilter model instance."""
+
     x: object | None = None  # Array API; namespace matches source data.
+    """Current state estimate (n_states,)."""
+
     P: object | None = None  # Array API; namespace matches source data.
+    """Current state covariance matrix (n_states x n_states)."""
 
     buffer_neural: list | None = None
+    """Buffer for storing neural activity data for refitting."""
+
     buffer_state: list | None = None
+    """Buffer for storing state estimates for refitting."""
+
     buffer_cursor_positions: list | None = None
+    """Buffer for storing cursor positions for refitting."""
+
     buffer_target_positions: list | None = None
+    """Buffer for storing target positions for refitting."""
+
     buffer_hold_flags: list | None = None
+    """Buffer for storing hold flags for refitting."""
 
 
 class RefitKalmanFilterProcessor(
@@ -382,10 +384,8 @@ def refit_model(self):
         Refit the observation model (H, Q) using buffered measurements and contextual data.
 
         This method updates the model's understanding of the neural-to-state mapping
-        by calculating a new observation matrix and noise covariance, based on:
-            - Logged neural data
-            - Cursor state estimates
-            - Hold flags and target positions
+        by calculating a new observation matrix and noise covariance, based on
+        logged neural data, cursor state estimates, hold flags, and target positions.
 
         Args:
             velocity_indices (tuple): Indices in the state vector corresponding to velocity components.
diff --git a/src/ezmsg/learn/process/sklearn.py b/src/ezmsg/learn/process/sklearn.py
@@ -55,33 +55,17 @@ class SklearnModelProcessor(BaseAdaptiveTransformer[SklearnModelSettings, AxisAr
 
     The processor expects and outputs `AxisArray` messages with a `"ch"` (channel) axis.
 
-    Settings:
-    ---------
-    model_class : str
-        Full path to the sklearn or River model class to use.
-        Example: "sklearn.linear_model.SGDClassifier" or "river.linear_model.LogisticRegression"
-
-    model_kwargs : dict[str, typing.Any], optional
-        Additional keyword arguments passed to the model constructor.
-
-    checkpoint_path : str, optional
-        Path to a pickle file to load a previously saved model. If provided, the model will
-        be restored from this path at startup.
-
-    partial_fit_classes : np.ndarray, optional
-        For classifiers that require all class labels to be specified during `partial_fit`.
-
-    Example:
-    -----------------------------
-    ```python
-    processor = SklearnModelProcessor(
-        settings=SklearnModelSettings(
-            model_class='sklearn.linear_model.SGDClassifier',
-            model_kwargs={'loss': 'log_loss'},
-            partial_fit_classes=np.array([0, 1]),
+    See :class:`SklearnModelSettings` for configuration options.
+
+    Example::
+
+        processor = SklearnModelProcessor(
+            settings=SklearnModelSettings(
+                model_class='sklearn.linear_model.SGDClassifier',
+                model_kwargs={'loss': 'log_loss'},
+                partial_fit_classes=np.array([0, 1]),
+            )
         )
-    )
-    ```
     """
 
     def _init_model(self) -> None:
@@ -224,17 +208,15 @@ class SklearnModelUnit(BaseAdaptiveTransformerUnit[SklearnModelSettings, AxisArr
     in an ezmsg graph-based system. It takes in `AxisArray` inputs and outputs predictions
     in the same format, optionally performing training via `partial_fit` or `fit`.
 
-    Example:
-    --------
-    ```python
-    unit = SklearnModelUnit(
-        settings=SklearnModelSettings(
-            model_class='sklearn.linear_model.SGDClassifier',
-            model_kwargs={'loss': 'log_loss'},
-            partial_fit_classes=np.array([0, 1]),
+    Example::
+
+        unit = SklearnModelUnit(
+            settings=SklearnModelSettings(
+                model_class='sklearn.linear_model.SGDClassifier',
+                model_kwargs={'loss': 'log_loss'},
+                partial_fit_classes=np.array([0, 1]),
+            )
         )
-    )
-    ```
     """
 
     SETTINGS = SklearnModelSettings

Original file line number	Diff line number	Diff line change
`@@ -5,5 +5,5 @@`
`5`	`5`	`SGDDecoderSettings as SGDDecoderSettings,`
`6`	`6`	`)`
`7`	`7`	`from ..process.sgd import (`
`8`		`- sgd_decoder as sgd_decoder,`
	`8`	`+ SGDDecoderTransformer as SGDDecoderTransformer,`
`9`	`9`	`)`