Arm backend: Mark composable quantizer APIs experimental (#18318)

Sebastian-Larsson · web-flow · commit 2165b5a3f52f · 2026-03-20T09:07:17.000+01:00
Signed-off-by: Sebastian Larsson &lt;sebastian.larsson@arm.com&gt;
diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py
@@ -71,6 +71,7 @@
     SharedQspecQuantizer,
 )
 from executorch.backends.arm.vgf import VgfCompileSpec
+from executorch.exir._warnings import experimental
 from torch.fx import GraphModule, Node
 from torchao.quantization.pt2e import (
     FakeQuantize,
@@ -441,14 +442,26 @@ def _for_each_filtered_node(
 
 
 class TOSAQuantizer(Quantizer):
-    """Manage quantization annotations for TOSA-compatible backends."""
+    """Manage quantization annotations for TOSA-compatible backends.
+
+    .. warning::
+        Setting ``use_composable_quantizer=True`` enables an experimental API
+        surface that may change without notice.
+
+    """
 
     def __init__(
         self,
         compile_spec_or_tosa_spec,
         use_composable_quantizer: bool = False,
     ) -> None:
-        """Create a TOSA quantizer from a TOSA spec or Arm compile spec."""
+        """Create a TOSA quantizer from a TOSA spec or Arm compile spec.
+
+        .. warning::
+            Setting ``use_composable_quantizer=True`` enables an experimental
+            API surface that may change without notice.
+
+        """
         self.use_composable_quantizer = use_composable_quantizer
         self.quantizer: _TOSAQuantizerV1 | _TOSAQuantizerV2
         if use_composable_quantizer:
@@ -606,6 +619,10 @@ def set_io(
         self.quantizer.set_io(quantization_config)
         return self
 
+    @experimental(
+        "This API is experimental and may change without notice. "
+        "It is only available when use_composable_quantizer=True."
+    )
     def add_quantizer(self, quantizer: Quantizer) -> TOSAQuantizer:
         """Insert a quantizer with highest precedence."""
         if self.use_composable_quantizer:
@@ -614,6 +631,10 @@ def add_quantizer(self, quantizer: Quantizer) -> TOSAQuantizer:
             "add_quantizer is only supported in the composable quantizer implementation."
         )
 
+    @experimental(
+        "This API is experimental and may change without notice. "
+        "It is only available when use_composable_quantizer=True."
+    )
     def set_node_finder(
         self, quantization_config: Optional[QuantizationConfig], node_finder: NodeFinder
     ) -> TOSAQuantizer:
@@ -631,6 +652,10 @@ def set_node_finder(
             "set_node_finder is only supported in the composable quantizer implementation."
         )
 
+    @experimental(
+        "This API is experimental and may change without notice. "
+        "It is only available when use_composable_quantizer=True."
+    )
     def set_node_target(
         self, node_target: OpOverload, quantization_config: Optional[QuantizationConfig]
     ) -> TOSAQuantizer:
@@ -641,6 +666,10 @@ def set_node_target(
             "set_node_target is only supported in the composable quantizer implementation."
         )
 
+    @experimental(
+        "This API is experimental and may change without notice. "
+        "It is only available when use_composable_quantizer=True."
+    )
     def set_node_name(
         self, node_name: str, quantization_config: Optional[QuantizationConfig]
     ) -> TOSAQuantizer:
@@ -1167,6 +1196,10 @@ def set_io(
 class EthosUQuantizer(TOSAQuantizer):
     """Quantizer supported by the Arm Ethos-U backend.
 
+    .. warning::
+        Setting ``use_composable_quantizer=True`` enables an experimental API
+        surface that may change without notice.
+
     Args:
         compile_spec (EthosUCompileSpec): Backend compile specification for
             Ethos-U targets.
@@ -1185,6 +1218,10 @@ def __init__(
 class VgfQuantizer(TOSAQuantizer):
     """Quantizer supported by the Arm Vgf backend.
 
+    .. warning::
+        Setting ``use_composable_quantizer=True`` enables an experimental API
+        surface that may change without notice.
+
     Args:
         compile_spec (VgfCompileSpec): Backend compile specification for Vgf
             targets.
diff --git a/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md b/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md
@@ -16,13 +16,23 @@ The Arm Ethos-U delegate supports the following quantization schemes:
 ### Quantization API
 
 ```python
-class EthosUQuantizer(compile_spec: 'EthosUCompileSpec') -> 'None'
+class EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'
 ```
 Quantizer supported by the Arm Ethos-U backend.
 
+.. warning::
+    Setting ``use_composable_quantizer=True`` enables an experimental API
+    surface that may change without notice.
+
 Args:
 - **compile_spec (EthosUCompileSpec)**: Backend compile specification for
         Ethos-U targets.
+- **use_composable_quantizer (bool)**: Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
+
+```python
+def EthosUQuantizer.add_quantizer(self, quantizer: 'Quantizer') -> 'TOSAQuantizer':
+```
+Insert a quantizer with highest precedence.
 
 ```python
 def EthosUQuantizer.quantize_with_submodules(self, model: 'GraphModule', calibration_samples: 'list[tuple]', is_qat: 'bool' = False, fold_quantize: 'bool' = True):
@@ -48,22 +58,24 @@ Returns:
 - **GraphModule**: The quantized model.
 
 ```python
-def EthosUQuantizer.set_global(self, quantization_config: 'QuantizationConfig | None') -> 'TOSAQuantizer':
+def EthosUQuantizer.set_global(self, quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
 ```
 Set quantization_config for submodules not matched by other filters.
 
 Args:
-- **quantization_config (QuantizationConfig)**: Configuration to apply to
-        modules that are not captured by name or type filters.
+- **quantization_config (Optional[QuantizationConfig])**: Configuration to
+        apply to modules that are not captured by name or type filters.
+        ``None`` indicates no quantization.
 
 ```python
-def EthosUQuantizer.set_io(self, quantization_config: 'QuantizationConfig') -> 'TOSAQuantizer':
+def EthosUQuantizer.set_io(self, quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
 ```
 Set quantization_config for input and output nodes.
 
 Args:
-- **quantization_config (QuantizationConfig)**: Configuration describing
-        activation quantization for model inputs and outputs.
+- **quantization_config (Optional[QuantizationConfig])**: Configuration
+        describing activation quantization for model inputs and outputs.
+        ``None`` indicates no quantization.
 
 ```python
 def EthosUQuantizer.set_module_name(self, module_name: 'str', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
@@ -75,29 +87,51 @@ patterns for that submodule with the provided quantization_config.
 
 Args:
 - **module_name (str)**: Fully qualified module name to configure.
-- **quantization_config (QuantizationConfig)**: Configuration applied to
-        the named submodule.
+- **quantization_config (Optional[QuantizationConfig])**: Configuration
+        applied to the named submodule. ``None`` indicates no
+        quantization.
 
 ```python
 def EthosUQuantizer.set_module_type(self, module_type: 'Callable', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
 ```
 Set quantization_config for submodules with a given module type.
 
-For example, calling set_module_type(Sub) quantizes supported patterns
-in each Sub instance with the provided quantization_config.
+For example, calling set_module_type(Softmax) quantizes supported
+patterns in each Softmax instance with the provided quantization_config.
 
 Args:
 - **module_type (Callable)**: Type whose submodules should use the
         provided quantization configuration.
-- **quantization_config (QuantizationConfig)**: Configuration to apply to
-        submodules of the given type.
+- **quantization_config (Optional[QuantizationConfig])**: Configuration to
+        apply to submodules of the given type. ``None`` indicates no
+        quantization.
+
+```python
+def EthosUQuantizer.set_node_finder(self, quantization_config: 'Optional[QuantizationConfig]', node_finder: 'NodeFinder') -> 'TOSAQuantizer':
+```
+Set quantization_config for nodes matched by a custom NodeFinder.
+
+Args:
+- **quantization_config (Optional[QuantizationConfig])**: Configuration
+        describing quantization settings for nodes matched by the provided
+        NodeFinder. ``None`` indicates no quantization.
+
+```python
+def EthosUQuantizer.set_node_name(self, node_name: 'str', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
+```
+Set quantization config for a specific node name.
+
+```python
+def EthosUQuantizer.set_node_target(self, node_target: 'OpOverload', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
+```
+Set quantization config for a specific operator target.
 
 ```python
 def EthosUQuantizer.transform_for_annotation(self, model: 'GraphModule') -> 'GraphModule':
 ```
 Transform the graph to prepare it for quantization annotation.
 
-Currently transforms scalar values to tensor attributes.
+Decomposes all operators where required to get correct quantization parameters.
 
 Args:
 - **model (GraphModule)**: Model whose graph will be transformed.
diff --git a/docs/source/backends/arm-vgf/arm-vgf-quantization.md b/docs/source/backends/arm-vgf/arm-vgf-quantization.md
@@ -35,13 +35,23 @@ setting using the `set_module_name` or `set_module_type` methods.
 ### Quantization API
 
 ```python
-class VgfQuantizer(compile_spec: 'VgfCompileSpec') -> 'None'
+class VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'
 ```
 Quantizer supported by the Arm Vgf backend.
 
+.. warning::
+    Setting ``use_composable_quantizer=True`` enables an experimental API
+    surface that may change without notice.
+
 Args:
 - **compile_spec (VgfCompileSpec)**: Backend compile specification for Vgf
         targets.
+- **use_composable_quantizer (bool)**: Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
+
+```python
+def VgfQuantizer.add_quantizer(self, quantizer: 'Quantizer') -> 'TOSAQuantizer':
+```
+Insert a quantizer with highest precedence.
 
 ```python
 def VgfQuantizer.quantize_with_submodules(self, model: 'GraphModule', calibration_samples: 'list[tuple]', is_qat: 'bool' = False, fold_quantize: 'bool' = True):
@@ -67,22 +77,24 @@ Returns:
 - **GraphModule**: The quantized model.
 
 ```python
-def VgfQuantizer.set_global(self, quantization_config: 'QuantizationConfig | None') -> 'TOSAQuantizer':
+def VgfQuantizer.set_global(self, quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
 ```
 Set quantization_config for submodules not matched by other filters.
 
 Args:
-- **quantization_config (QuantizationConfig)**: Configuration to apply to
-        modules that are not captured by name or type filters.
+- **quantization_config (Optional[QuantizationConfig])**: Configuration to
+        apply to modules that are not captured by name or type filters.
+        ``None`` indicates no quantization.
 
 ```python
-def VgfQuantizer.set_io(self, quantization_config: 'QuantizationConfig') -> 'TOSAQuantizer':
+def VgfQuantizer.set_io(self, quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
 ```
 Set quantization_config for input and output nodes.
 
 Args:
-- **quantization_config (QuantizationConfig)**: Configuration describing
-        activation quantization for model inputs and outputs.
+- **quantization_config (Optional[QuantizationConfig])**: Configuration
+        describing activation quantization for model inputs and outputs.
+        ``None`` indicates no quantization.
 
 ```python
 def VgfQuantizer.set_module_name(self, module_name: 'str', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
@@ -94,29 +106,51 @@ patterns for that submodule with the provided quantization_config.
 
 Args:
 - **module_name (str)**: Fully qualified module name to configure.
-- **quantization_config (QuantizationConfig)**: Configuration applied to
-        the named submodule.
+- **quantization_config (Optional[QuantizationConfig])**: Configuration
+        applied to the named submodule. ``None`` indicates no
+        quantization.
 
 ```python
 def VgfQuantizer.set_module_type(self, module_type: 'Callable', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
 ```
 Set quantization_config for submodules with a given module type.
 
-For example, calling set_module_type(Sub) quantizes supported patterns
-in each Sub instance with the provided quantization_config.
+For example, calling set_module_type(Softmax) quantizes supported
+patterns in each Softmax instance with the provided quantization_config.
 
 Args:
 - **module_type (Callable)**: Type whose submodules should use the
         provided quantization configuration.
-- **quantization_config (QuantizationConfig)**: Configuration to apply to
-        submodules of the given type.
+- **quantization_config (Optional[QuantizationConfig])**: Configuration to
+        apply to submodules of the given type. ``None`` indicates no
+        quantization.
+
+```python
+def VgfQuantizer.set_node_finder(self, quantization_config: 'Optional[QuantizationConfig]', node_finder: 'NodeFinder') -> 'TOSAQuantizer':
+```
+Set quantization_config for nodes matched by a custom NodeFinder.
+
+Args:
+- **quantization_config (Optional[QuantizationConfig])**: Configuration
+        describing quantization settings for nodes matched by the provided
+        NodeFinder. ``None`` indicates no quantization.
+
+```python
+def VgfQuantizer.set_node_name(self, node_name: 'str', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
+```
+Set quantization config for a specific node name.
+
+```python
+def VgfQuantizer.set_node_target(self, node_target: 'OpOverload', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
+```
+Set quantization config for a specific operator target.
 
 ```python
 def VgfQuantizer.transform_for_annotation(self, model: 'GraphModule') -> 'GraphModule':
 ```
 Transform the graph to prepare it for quantization annotation.
 
-Currently transforms scalar values to tensor attributes.
+Decomposes all operators where required to get correct quantization parameters.
 
 Args:
 - **model (GraphModule)**: Model whose graph will be transformed.