Merge pull request #372 from jondea/fix-perf-drop-after-torchao-api-change

nSircombe · web-flow · commit 15949e026469 · 2025-09-25T16:50:21.000+01:00
pytorch: fix perf issue from torchao API change
diff --git a/ML-Frameworks/pytorch-aarch64/CHANGELOG.md b/ML-Frameworks/pytorch-aarch64/CHANGELOG.md
@@ -20,6 +20,7 @@ where `YY` is the year, and `MM` the month of the increment.
   - KLEIDIAI_HASH to bd2e6ae060014035e25bf4986be682762c446c2d, v1.14 from main.
 - Update torchvision from 0.23.0 to a nightly build, 0.25.0.dev20250923
 - Change of flag name in `./build.sh` from `--force` to `--fresh`
+- Add `intx_packing_format="opaque_aten_kleidiai"` to `Int8DynamicActivationIntxWeightConfig` due to torchao API change
 
 ### Removed
 - Removes WIP ComputeLibrary patch https://review.mlplatform.org/c/ml/ComputeLibrary/+/12818/1.
diff --git a/ML-Frameworks/pytorch-aarch64/examples/llama_vision_instruct.py b/ML-Frameworks/pytorch-aarch64/examples/llama_vision_instruct.py
@@ -52,6 +52,7 @@ def main(args):
                 weight_mapping_type=MappingType.SYMMETRIC_NO_CLIPPING_ERR, # MappingType.SYMMETRIC can also be used but increases error
                 layout=layout,
                 weight_dtype=torch.int4,
+                intx_packing_format="opaque_aten_kleidiai",
             ),
         )
 
diff --git a/ML-Frameworks/pytorch-aarch64/examples/transformers_llm_text_gen.py b/ML-Frameworks/pytorch-aarch64/examples/transformers_llm_text_gen.py
@@ -113,6 +113,7 @@ def get_quantized_model(args):
         weight_mapping_type=mapping_type,
         layout=layout,
         weight_dtype=torch.int4,
+        intx_packing_format="opaque_aten_kleidiai",
     )
 
     print("Quantization config:")

Original file line number	Diff line number	Diff line change
`@@ -52,6 +52,7 @@ def main(args):`
`52`	`52`	`weight_mapping_type=MappingType.SYMMETRIC_NO_CLIPPING_ERR, # MappingType.SYMMETRIC can also be used but increases error`
`53`	`53`	`layout=layout,`
`54`	`54`	`weight_dtype=torch.int4,`
	`55`	`+ intx_packing_format="opaque_aten_kleidiai",`
`55`	`56`	`),`
`56`	`57`	`)`
`57`	`58`
Original file line number	Diff line number	Diff line change
`@@ -113,6 +113,7 @@ def get_quantized_model(args):`
`113`	`113`	`weight_mapping_type=mapping_type,`
`114`	`114`	`layout=layout,`
`115`	`115`	`weight_dtype=torch.int4,`
	`116`	`+ intx_packing_format="opaque_aten_kleidiai",`
`116`	`117`	`)`
`117`	`118`
`118`	`119`	`print("Quantization config:")`