Merge pull request #323 from nSircombe/github/april_updates

jondea · web-flow · commit 95928c1ec864 · 2025-04-16T08:23:06.000+01:00
Update examples and CHANGELOG for r25.04
diff --git a/ML-Frameworks/pytorch-aarch64/CHANGELOG.md b/ML-Frameworks/pytorch-aarch64/CHANGELOG.md
@@ -7,6 +7,17 @@ where `YY` is the year, and `MM` the month of the increment.
 
 ## [unreleased]
 
+### Added
+
+### Changed
+
+### Removed
+
+### Fixed
+
+## [r25.04] 2025-04-16
+https://github.com/ARM-software/Tool-Solutions/tree/r25.04
+
 ### Added
  - Work in progress oneDNN patch, [Enable jit conv for 128](https://github.com/uxlfoundation/oneDNN/pull/3022) with ~30% speed up for backward convolutions
  - Add `--wheel-only` flag for only building the torch wheel
@@ -25,6 +36,8 @@ where `YY` is the year, and `MM` the month of the increment.
 ### Removed
  - Removes WIP patches which have now landed in the upstream nightly PyTorch builds.
  - Removes `--tags --force` from git clone command, and adds `--depth=1` to speedup the checkout.
+ - Temporarily removes `--compile` option from some examples due to an issue with https://github.com/pytorch/pytorch/pull/147151
+   the compile path does not work as expected in these cases.
 
 ### Fixed
 
diff --git a/ML-Frameworks/pytorch-aarch64/examples/README.md b/ML-Frameworks/pytorch-aarch64/examples/README.md
@@ -201,7 +201,7 @@ The script [torchchat_llm_text_gen.py](torchchat_llm_text_gen.py) demonstrates h
 To run infernece using torchchat call:
 
 ```
-LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libtcmalloc.so.4  TORCHINDUCTOR_CPP_WRAPPER=1  TORCHINDUCTOR_FREEZING=1  OMP_NUM_THREADS=16 python torchchat_llm_text_gen.py --compile
+LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libtcmalloc.so.4  TORCHINDUCTOR_CPP_WRAPPER=1  TORCHINDUCTOR_FREEZING=1  OMP_NUM_THREADS=16 python torchchat_llm_text_gen.py
 ```
 
 #### Command-Line Options
@@ -212,9 +212,6 @@ LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libtcmalloc.so.4  TORCHINDUCTOR_CPP_WRAPPE
 `--max-new-tokens`
   Description: Max new tokens to generate.
 
-`--compile`
-  Description: Whether to compile the model (default: `False`).
-
 `--model`
   Description: Model alias. (Default: `"llama2"`  )
 
@@ -227,7 +224,7 @@ The script [transformers_llm_text_gen.py](transformers_llm_text_gen.py) demonstr
 To run infernece using torchchat call:
 
 ```
-LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libtcmalloc.so.4  TORCHINDUCTOR_CPP_WRAPPER=1  TORCHINDUCTOR_FREEZING=1  OMP_NUM_THREADS=16 python transformers_llm_text_gen.py --compile
+LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libtcmalloc.so.4  TORCHINDUCTOR_CPP_WRAPPER=1  TORCHINDUCTOR_FREEZING=1  OMP_NUM_THREADS=16 python transformers_llm_text_gen.py
 ```
 
 #### Command-Line Options
@@ -238,9 +235,6 @@ LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libtcmalloc.so.4  TORCHINDUCTOR_CPP_WRAPPE
 `--max-new-tokens`
   Description: Max new tokens to generate.
 
-`--compile`
-  Description: Whether to compile the model (default: `False`).
-
 `--model`
   Description: Local Path to model repo or huggingface model id. (Default: `"meta-llama/Llama-2-7b-hf"`  )
 
diff --git a/ML-Frameworks/pytorch-aarch64/examples/torchchat_llm_text_gen.py b/ML-Frameworks/pytorch-aarch64/examples/torchchat_llm_text_gen.py
@@ -31,8 +31,6 @@ def main(args):
         "python3", torchchat_path, "generate", args.model,
         "--quantize", str(args.quant_config),
         "--prompt", prompt,
-        "--compile" if args.compile else "",
-        "--compile-prefill" if args.compile else "",
         "--max-autotune", "--max-new-tokens", str(args.max_new_tokens)
     ]
     command = [arg for arg in command if arg]
@@ -47,8 +45,6 @@ def main(args):
                         help='Path to json file for quantization config')
     parser.add_argument('--max-new-tokens', type=int,
                         default=64, help='New tokens to generate at decode.')
-    parser.add_argument('--compile', action='store_true',
-                        help='Whether to compile the model.')
     parser.add_argument('--model', type=str, default="llama2",
                         help='Torchchat supported model alias')
     parser.add_argument('--prompt', type=str, default="In a distant world where magic and technology coexist, "
diff --git a/ML-Frameworks/pytorch-aarch64/examples/transformers_llm_text_gen.py b/ML-Frameworks/pytorch-aarch64/examples/transformers_llm_text_gen.py
@@ -130,10 +130,6 @@ def get_quantized_model(args):
     print("Quantizing model to 4 bit ..")
     quantize_model(model, "cpu", args.quant_config)
     model = model.eval()
-    if args.compile:
-        model.generation_config.cache_implementation = "static"
-        model.forward = torch.compile(
-            model.forward, backend='inductor', dynamic=True, fullgraph=True)
     return model, tokenizer, config
 
 
@@ -197,8 +193,6 @@ def main(args):
                         "gen_ai_utils/quant_configs/aarch64_cpu_channelwise.json", help='Path to json file for quantization config')
     parser.add_argument('--max-new-tokens', type=int,
                         default=64, help='New tokens to generate at decode.')
-    parser.add_argument('--compile', action='store_true',
-                        help='Whether to compile the model.')
     parser.add_argument('--model', type=Path, default=Path("meta-llama/Llama-2-7b-hf"),
                         help='Hugging Face model ID or Cloned model repository with model files')
     parser.add_argument('--prompt', type=str, default="In a distant world where magic and technology coexist, "
diff --git a/ML-Frameworks/tensorflow-aarch64/CHANGELOG.md b/ML-Frameworks/tensorflow-aarch64/CHANGELOG.md
@@ -15,6 +15,20 @@ where `YY` is the year, and `MM` the month of the increment.
 
 ### Fixed
 
+## [r25.04] 2025-04-16
+https://github.com/ARM-software/Tool-Solutions/tree/r25.04
+
+### Added
+ - Enables patching of build outside of Bazel build.
+ - default num_threads to max for acl_threadpool, see www.github.com/tensorflow/uxlfoundation/oneDNN/2958
+
+### Changed
+ - Updates TensorFlow build to use oneDNN 3.7 + ACL 24.12, see www.github.com/tensorflow/tensorflow/pull/84975
+
+### Removed
+
+### Fixed
+
 ## [r25.03.1] 2025-03-26
 https://github.com/ARM-software/Tool-Solutions/tree/r25.03.1