Merge pull request #365 from jondea/remove-more-deps

nSircombe · web-flow · commit 544175e11955 · 2025-09-24T08:53:09.000+01:00
Remove more deps
diff --git a/ML-Frameworks/pytorch-aarch64/CHANGELOG.md b/ML-Frameworks/pytorch-aarch64/CHANGELOG.md
@@ -18,6 +18,9 @@ where `YY` is the year, and `MM` the month of the increment.
   - Delete unused submodules to avoid fetching.
   - Replace google-perftool bundle with specific libtcmalloc-minimal4 and add symlink so that you can still use it with `LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libtcmalloc.so.4`.
   - Remove unused git apt package.
+  - Remove outdated versions of pip/setuptools/wheel from image after pip updates itself.
+  - Remove manual install of transformers in `transformers_llm_text_gen.py`, we now use a single version everywhere.
+  - Remove unused pip packages: datasets, omegaconf, psutil, tiktoken, torchaudio, torchdata and torchtune.
 
 ### Fixed
 
diff --git a/ML-Frameworks/pytorch-aarch64/Dockerfile b/ML-Frameworks/pytorch-aarch64/Dockerfile
@@ -28,8 +28,6 @@ ENV TORCH_WHEEL=$TORCH_WHEEL
 ARG TORCH_AO_WHEEL
 ENV TORCH_AO_WHEEL=$TORCH_AO_WHEEL
 
-# TODO make sure Openblas is 0.3.28
-# And mlcommons is 4.1
 RUN if ! [ "$(arch)" = "aarch64" ] ; then exit 1; fi
 
 RUN apt-get update && apt-get install -y \
@@ -67,8 +65,13 @@ RUN echo 'export PATH="$HOME/.local/bin:$PATH"' >>  /etc/bash.bashrc
 WORKDIR /home/$DOCKER_USER
 USER $DOCKER_USER
 
+# Update to newer pip/setuptools/wheel (setuptools>= 70.0.0 due to CVE-2024-6345
+# and CVE-2025-47273, wheel >= 0.38.0 due to CVE-2022-40898) and delete old system
+# version (we essentially use apt:python3-pip to bootstrap pip)
+RUN pip install --upgrade pip~=25.2 setuptools~=78.1.1 wheel~=0.45.1 \
+    && sudo rm -r /usr/lib/python3/dist-packages/
+
 # Base requirements for examples, excluding torch and torch*
-RUN pip install --upgrade pip
 COPY requirements.txt ./
 RUN pip install -r requirements.txt
 
@@ -86,13 +89,8 @@ COPY $TORCH_AO_WHEEL /home/$DOCKER_USER/
 # will need to manually add their dependencies. We don't use the nightly
 # versions which corresponding to our torch build because they can disappear,
 # and we usually don't need features from the nightlies.
-# Note: torchvision is currently pinned to a nightly build, this can be updated
-# at the next vision release, and the `--extra-index-url` removed.
 RUN pip install \
-    torchaudio~=2.6.0 \
-    torchdata~=0.11.0 \
     torchvision~=0.23.0 \
-    torchtune~=0.5.0 \
     --no-deps
 
 # We need --no-deps because the torch version won't match the versions on torch*
diff --git a/ML-Frameworks/pytorch-aarch64/examples/gen_ai_utils/setup_local_packages.py b/ML-Frameworks/pytorch-aarch64/examples/gen_ai_utils/setup_local_packages.py
diff --git a/ML-Frameworks/pytorch-aarch64/examples/transformers_llm_text_gen.py b/ML-Frameworks/pytorch-aarch64/examples/transformers_llm_text_gen.py
@@ -37,14 +37,6 @@
 from torchao.quantization.granularity import PerGroup, PerAxis
 from torchao.quantization.quant_primitives import MappingType
 
-# This script requires a fairly recent version of transformers
-
-gen_ai_utils = os.getcwd() + "/gen_ai_utils/"
-local_packages = gen_ai_utils + "/genai_local_packages"
-install_script = gen_ai_utils + "/setup_local_packages.py"
-subprocess.run([sys.executable, install_script, local_packages, "transformers==4.47.1"])
-sys.path.insert(0, local_packages)
-
 from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer, TextStreamer
 from torch.profiler import profile, ProfilerActivity, tensorboard_trace_handler
 
@@ -95,7 +87,7 @@ def load_model_components(model_folder_path):
 
 
 def get_quantized_model(args):
-    model_name = f"{args.model.name}"
+    model_name = f"{args.model}"
     print("Running model ", model_name)
     config, tokenizer, model = load_model_components(args.model)
     if model is None:
@@ -189,7 +181,7 @@ def eval_quantized_output(quantized_model, tokenizer, input_tensor, max_min_toke
 
 
 def main(args):
-    name_string = f"{args.model.name}"
+    name_string = f"{args.model}"
     quantized_model_, tokenizer_, config_ = get_quantized_model(args)
     input_tensor = tokenizer_.encode(args.prompt, return_tensors="pt")
     eval_quantized_output(
@@ -223,8 +215,8 @@ def main(args):
     )
     parser.add_argument(
         "--model",
-        type=Path,
-        default=Path("TinyLlama/TinyLlama-1.1B-Chat-v1.0"),
+        type=str,
+        default="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
         help="Hugging Face model ID or Cloned model repository with model files",
     )
     parser.add_argument(
diff --git a/ML-Frameworks/pytorch-aarch64/requirements.txt b/ML-Frameworks/pytorch-aarch64/requirements.txt
@@ -3,7 +3,6 @@
 # PyTorch channel and our own built torch wheel (see Dockerfile for more info   )
 certifi==2024.8.30
 charset-normalizer~=3.4.0
-datasets~=3.4.1
 expecttest==0.3.0 # From unit tests
 filelock~=3.16.1
 fsspec==2024.9.0
@@ -14,29 +13,23 @@ MarkupSafe~=3.0.1
 mpmath~=1.3.0
 networkx~=3.4.1
 numpy~=2.1.2
-omegaconf~=2.3.0
 opencv-python-headless~=4.10.0.84
 packaging~=24.1
 pandas~=2.2.3
 pillow~=11.0.0
 protobuf==5.29.5      # GenAI models dependency
-psutil~=7.0.0
 pyaml~=24.9.0
 python-dateutil~=2.9.0.post0
 pytz==2024.2
 PyYAML~=6.0.2
 regex==2024.9.11
 requests~=2.32.3
 safetensors~=0.4.5
-setuptools~=78.1.1    # >= 70.0.0 due to CVE-2024-6345 and CVE-2025-47273
 six~=1.16.0
 sympy~=1.13.1
-tiktoken~=0.9.0
 tokenizers~=0.21.0
 tqdm~=4.66.5
 transformers~=4.55.2  # >= 4.50.0 due to CVE-2025-2099
 typing_extensions~=4.12.2
 tzdata==2024.2
 urllib3~=2.2.3
-wheel~=0.38.0         # >= 0.38.0 due to CVE-2022-40898
-