AdaptiveMotorControlLab · xiu-cs · Apr 10, 2026 · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,67 @@
+FROM pytorch/pytorch:2.4.1-cuda12.4-cudnn9-runtime
+
+ARG USERNAME=fmpose3d
+ARG USER_UID=1000
+ARG USER_GID=1000
+RUN groupadd ${USERNAME} --gid ${USER_GID}
+RUN useradd -m -s /bin/bash -g ${USERNAME} -u ${USER_UID} ${USERNAME}
-RUN groupadd ${USERNAME} --gid ${USER_GID}
-RUN useradd -m -s /bin/bash -g ${USERNAME} -u ${USER_UID} ${USERNAME}
+RUN set -eux; \
+    if getent group "${USER_GID}" >/dev/null; then \
+        GROUP_NAME="$(getent group "${USER_GID}" | cut -d: -f1)"; \
+    else \
+        groupadd --gid "${USER_GID}" "${USERNAME}"; \
+        GROUP_NAME="${USERNAME}"; \
+    fi; \
+    if id -u "${USER_UID}" >/dev/null 2>&1; then \
+        EXISTING_USER="$(getent passwd "${USER_UID}" | cut -d: -f1)"; \
+        usermod -l "${USERNAME}" "${EXISTING_USER}" || true; \
+        usermod -d "/home/${USERNAME}" -m "${USERNAME}" || true; \
+        usermod -g "${GROUP_NAME}" "${USERNAME}" || true; \
+    else \
+        useradd -m -s /bin/bash -u "${USER_UID}" -g "${GROUP_NAME}" "${USERNAME}"; \
+    fi
-RUN groupadd ${USERNAME} --gid ${USER_GID}
-RUN useradd -m -s /bin/bash -g ${USERNAME} -u ${USER_UID} ${USERNAME}
+RUN set -eux; \
+    if getent group "${USER_GID}" >/dev/null; then \
+        GROUP_NAME="$(getent group "${USER_GID}" | cut -d: -f1)"; \
+    else \
+        groupadd --gid "${USER_GID}" "${USERNAME}"; \
+        GROUP_NAME="${USERNAME}"; \
+    fi; \
+    if id -u "${USER_UID}" >/dev/null 2>&1; then \
+        EXISTING_USER="$(getent passwd "${USER_UID}" | cut -d: -f1)"; \
+        usermod -l "${USERNAME}" "${EXISTING_USER}" || true; \
+        usermod -d "/home/${USERNAME}" -m "${USERNAME}" || true; \
+        usermod -g "${GROUP_NAME}" "${USERNAME}" || true; \
+    else \
+        useradd -m -s /bin/bash -u "${USER_UID}" -g "${GROUP_NAME}" "${USERNAME}"; \
+    fi
+RUN mkdir /app /logs /data
+
+ENV DEBIAN_FRONTEND=noninteractive
+SHELL ["/bin/bash", "-c"]
+RUN apt-get update -y && apt-get install -yy --no-install-recommends \
+    vim zsh tmux wget curl htop git sudo ssh git-lfs \
+    python3 python3-pip \
+    libgl1-mesa-glx libglib2.0-0 \
+    ffmpeg \
+    && apt-get -y autoclean \
+    && apt-get -y autoremove \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+
+ENV PATH="/opt/conda/bin:${PATH}"
+
+# Initialize conda for root just in case and fix symlinks
+RUN ln -fs /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh
+
+# --- Install FMPose3D from GitHub ---
+RUN python -m pip install --no-cache-dir --upgrade pip && \
+    git clone --depth 1 https://github.com/AdaptiveMotorControlLab/FMPose3D.git /tmp/fmpose3d && \
+    python -m pip install --no-cache-dir "/tmp/fmpose3d[animals,viz]" gdown && \
+    rm -rf /tmp/fmpose3d
+
+# Allow non-root user to download DLC model weights at runtime
+RUN DLC_MODELZOO_DIR="$(python -c "import site, pathlib; print(pathlib.Path(site.getsitepackages()[0]) / 'deeplabcut' / 'modelzoo')")" \
+ && mkdir -p "${DLC_MODELZOO_DIR}/checkpoints" \
+ && chown -R "${USERNAME}:${USERNAME}" "${DLC_MODELZOO_DIR}"
+
+# Set your user as owner of the home directory before switching
+RUN chown -R ${USERNAME}:${USERNAME} /home/${USERNAME}
+
+ENV NVIDIA_DRIVER_CAPABILITIES=all
+
+# --- SWITCH TO USER ---
+USER ${USERNAME}
+ENV HOME=/home/${USERNAME}
+WORKDIR ${HOME}
+
+# Ensure dotfiles exist
+RUN touch ${HOME}/.bashrc ${HOME}/.zshrc && \
+    chmod 644 ${HOME}/.bashrc ${HOME}/.zshrc
+
+# Install Oh My Zsh and plugins (MUST come before conda init, since OMZ overwrites .zshrc)
+RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" --unattended \
+    && git clone https://github.com/zsh-users/zsh-autosuggestions ~/.oh-my-zsh/custom/plugins/zsh-autosuggestions \
+    && git clone https://github.com/zsh-users/zsh-syntax-highlighting ~/.oh-my-zsh/custom/plugins/zsh-syntax-highlighting \
+    && sed -i 's/^plugins=(.*)$/plugins=(git zsh-autosuggestions zsh-syntax-highlighting)/' ~/.zshrc \
+    && echo "export PATH=\$PATH:${HOME}/.local/bin" >> ~/.zshrc
+
+# Initialize conda AFTER Oh My Zsh (so conda init block is not overwritten)
+RUN /opt/conda/bin/conda init bash && /opt/conda/bin/conda init zsh
+
+# Add conda activation to bashrc and zshrc
+RUN echo "conda activate base" >> ${HOME}/.bashrc && \
+    echo "conda activate base" >> ${HOME}/.zshrc
+
+SHELL ["/bin/zsh", "-c"]
+CMD ["zsh"]
diff --git a/Makefile b/Makefile
@@ -0,0 +1,66 @@
+# [USER: ADJUST PATH]
+PROJECT_NAME := fmpose3d
+#######
+# RUN #
+#######
+
+# for local server
+IMG_NAME := fmpose3d
+IMG_TAG := v0.1
+DOCKERFILE := Dockerfile
+# Linux/macOS: use host UID/GID; Windows fallback to 1000
+HOST_UID ?= $(shell sh -c 'id -u 2>/dev/null || echo 1000')
+HOST_GID ?= $(shell sh -c 'id -g 2>/dev/null || echo 1000')
+BUILD_ARGS := \
+		--build-arg USERNAME=fmpose3d \
+        --build-arg USER_GID=$(HOST_GID) \
+        --build-arg USER_UID=$(HOST_UID)
+
+build:
+	docker build $(BUILD_ARGS) \
+		 -t $(IMG_NAME):$(IMG_TAG) -f $(DOCKERFILE) .
+
+build-clean:
+	docker build --no-cache $(BUILD_ARGS) \
+		 -t $(IMG_NAME):$(IMG_TAG) -f $(DOCKERFILE) .
+
+
+CONTAINER_NAME := fmpose3d_dev1
+# [USER: ADJUST] Mount the project root into the container
+HOST_SRC := $(shell pwd)
+DOCKER_SRC := /fmpose3d
+VOLUMES := \
+    --volume "$(HOST_SRC):$(DOCKER_SRC)"
+
+
+run:
+	docker run -it --gpus all --shm-size=64g --name $(CONTAINER_NAME) -w $(DOCKER_SRC) $(VOLUMES) $(IMG_NAME):$(IMG_TAG) 
+
+exec:
+	docker exec -it -w $(DOCKER_SRC) $(CONTAINER_NAME) /bin/zsh
+
+exec_bash:
+	docker exec -it -w $(DOCKER_SRC) $(CONTAINER_NAME) /bin/bash
+
+stop:
+	docker stop $(CONTAINER_NAME)
+
+rm:
+	docker rm $(CONTAINER_NAME)
+
+# Help message
+help:
+	@echo "Available targets:"
+	@echo "  build             - Build Docker image for local server."
+	@echo "  run               - Run a Docker container with GPU."
+	@echo "  exec              - Attach to running container (zsh)."
+	@echo "  exec_bash         - Attach to running container (bash)."
+	@echo "  stop              - Stop the running container."
+	@echo "  rm                - Remove the stopped container."
+	@echo "  help              - Show this help message."
+	@echo ""
+	@echo "Usage:"
+	@echo "  1. make build"
+	@echo "  2. make run"
+	@echo "  3. Inside container: pip install -e '.[animals,viz]'"
+	@echo "  4. Inside container: sh scripts/FMPose3D_train.sh"
-	@echo "  3. Inside container: pip install -e '.[animals,viz]'"
-	@echo "  4. Inside container: sh scripts/FMPose3D_train.sh"
+	@echo "  3. Inside container: sh scripts/FMPose3D_train.sh"
-	@echo "  3. Inside container: pip install -e '.[animals,viz]'"
-	@echo "  4. Inside container: sh scripts/FMPose3D_train.sh"
+	@echo "  3. Inside container: sh scripts/FMPose3D_train.sh"
diff --git a/README.md b/README.md
@@ -51,8 +51,9 @@ pip install "fmpose3d[animals]"
 
 This visualization script is designed for single-frame based model, allowing you to easily run 3D human pose estimation on any single image.
 
-Before testing, make sure you have the pre-trained model ready.
-You may either use the model trained by your own or download ours from [here](https://drive.google.com/drive/folders/1235_UgUQXYZtjprBOv2ZJJHY2KOAS_6p?usp=sharing) and place it in the `./pre_trained_models` directory.
+Pre-trained weights are downloaded automatically from [Hugging Face](https://huggingface.co/MLAdaptiveIntelligence/FMPose3D) the first time you run inference, so no manual setup is needed.
+
+Alternatively, you can use your own trained weights or download ours from [Google Drive](https://drive.google.com/drive/folders/1aRZ6t_6IxSfM1nCTFOUXcYVaOk-5koGA?usp=sharing), place them in the `./pre_trained_models` directory, and set `model_weights_path` in the shell script (e.g. `demo/vis_in_the_wild.sh`).
 
 Next, put your test images into folder `demo/images`. Then run the visualization script:
 ```bash
@@ -93,7 +94,7 @@ sh ./scripts/FMPose3D_train.sh
 
 ### Inference
 
-First, download the folder with pre-trained model from [here](https://drive.google.com/drive/folders/1235_UgUQXYZtjprBOv2ZJJHY2KOAS_6p?usp=sharing) and place it in the './pre_trained_models' directory.
+Pre-trained weights are fetched automatically from Hugging Face on the first run. You can also use local weights by setting `model_weights_path` in the shell script (see [Demos](#testing-on-in-the-wild-images-humans) above for details).
 
 To run inference on Human3.6M:
 

diff --git a/demo/vis_in_the_wild.py b/demo/vis_in_the_wild.py
@@ -19,6 +19,7 @@
 from fmpose3d.lib.checkpoint.download_checkpoints import ensure_checkpoints
 ensure_checkpoints()
 
+from fmpose3d.utils.weights import resolve_weights_path
 from fmpose3d.lib.preprocess import h36m_coco_format, revise_kpts
 from fmpose3d.lib.hrnet.gen_kpts import gen_video_kpts as hrnet_pose
 from fmpose3d.common.arguments import opts as parse_args
@@ -113,7 +114,7 @@ def show3Dpose(vals, ax):
 def get_pose2D(path, output_dir, type):
 
     print('\nGenerating 2D pose...')
-    keypoints, scores = hrnet_pose(path, det_dim=416, num_peroson=1, gen_output=True, type=type)
+    keypoints, scores = hrnet_pose(path, det_dim=416, num_person=1, gen_output=True, type=type)
     keypoints, scores, valid_frames = h36m_coco_format(keypoints, scores)
     re_kpts = revise_kpts(keypoints, scores, valid_frames)
     print('Generating 2D pose successful!')
@@ -278,8 +279,9 @@ def get_pose3D(path, output_dir, type='image'):
 
     # if args.reload:
     model_dict = model['CFM'].state_dict()
-    model_path = args.model_weights_path
-    print(model_path)
+    model_path = resolve_weights_path(args.model_weights_path, args.model_type)
+
+    print(f"Loading weights from: {model_path}")
     pre_dict = torch.load(model_path, map_location=device, weights_only=True)
     for name, key in model_dict.items():
         model_dict[name] = pre_dict[name]

diff --git a/demo/vis_in_the_wild.sh b/demo/vis_in_the_wild.sh
@@ -6,7 +6,11 @@ batch_size=1
 sh_file='vis_in_the_wild.sh'
 
 model_type='fmpose3d_humans'
-model_weights_path='../pre_trained_models/fmpose3d_h36m/FMpose3D_pretrained_weights.pth'
+
+# By default, weights are automatically downloaded from Hugging Face Hub.
+# To use local weights instead, uncomment the line below:
+# model_weights_path='../pre_trained_models/fmpose3d_h36m/FMpose3D_pretrained_weights.pth'
+model_weights_path=''
 
 target_path='./images/'  # folder containing multiple images
 # target_path='./images/xx.png'  # single image

diff --git a/fmpose3d/animals/common/animal3d_dataset.py b/fmpose3d/animals/common/animal3d_dataset.py
@@ -38,9 +38,9 @@ def __getitem__(self, item):
         else:
             keypoint_2d = np.array(data.get("keypoint_2d", []), dtype=np.float32)
          # normalize 2D keypoints
-        hight = np.array(data["height"])
+        height = np.array(data["height"])
         width = np.array(data["width"])
-        keypoint_2d = normalize_screen_coordinates(keypoint_2d[..., :2], width, hight)
+        keypoint_2d = normalize_screen_coordinates(keypoint_2d[..., :2], width, height)
 
         # build 3D keypoints; append ones; fallback to zeros if missing
         if "keypoint_3d" in data and data["keypoint_3d"] is not None:

diff --git a/fmpose3d/animals/common/lifter3d.py b/fmpose3d/animals/common/lifter3d.py
@@ -306,7 +306,7 @@ def triangulate_3d_batch(points_2d_batch, cameras):
     num_cams, num_frames, num_joints, _ = points_2d_batch.shape  # (6, num_frames, 23, 2)
 
     print("num_cams,num_frames,num_joinits", points_2d_batch.shape)
-    # **1. compute projection matrics (6, 3, 4)**
+    # **1. compute projection matrices (6, 3, 4)**
     proj_matrices = np.array(
         [cam["intrinsic_matrix"] @ np.hstack((cam["R"], cam["T"])) for cam in cameras]
     )  # numpy array (6, 3, 4)
@@ -361,7 +361,7 @@ def triangulate_3d(points_2d, cameras):
     for i, cam in enumerate(cameras):
         K, dist, R, T = cam["intrinsic_matrix"], cam["distortion_coeffs"], cam["R"], cam["T"]
 
-        P = K @ np.hstack((R, T))  # projectio matrix
+        P = K @ np.hstack((R, T))  # projection matrix
         # print("Projection Matrix P:\n", P)
 
         proj_matrices.append(P)
@@ -628,7 +628,7 @@ def main():
     #             reprojected_2d = project_3d_to_2d(points_3d, cameras[i])
     #             # visualize_2d_on_video(video_files[i], frame_number, points_2d_frame[i], reprojected_2d, output_path)
 
-    # # test on trangulate 3D batch
+    # # test on triangulate 3D batch
     # left_frame_id = 10
     # right_frame_id = 60
 

diff --git a/fmpose3d/animals/common/utils.py b/fmpose3d/animals/common/utils.py
@@ -354,7 +354,7 @@ def save_top_N_models(
 
 def back_to_ori_uv(cropped_uv, bb_box):
     """
-    for cropped uv, back to origial uv to help do the uvd->xyz operation
+    for cropped uv, back to original uv to help do the uvd->xyz operation
     :return:
     """
     N, T, V, _ = cropped_uv.size()
@@ -423,7 +423,7 @@ def project_to_2d(X, camera_params):
 
     Arguments:
     X -- 3D points in *camera space* to transform (N, *, 3)
-    camera_params -- intrinsic parameteres (N, 2+2+3+2=9)
+    camera_params -- intrinsic parameters (N, 2+2+3+2=9)
     """
     assert X.shape[-1] == 3  #  B,J,3
     assert len(camera_params.shape) == 2  # camera_params:[B,1,9]

diff --git a/fmpose3d/common/utils.py b/fmpose3d/common/utils.py
@@ -378,7 +378,7 @@ def save_top_N_models(
 
 def back_to_ori_uv(cropped_uv, bb_box):
     """
-    for cropped uv, back to origial uv to help do the uvd->xyz operation
+    for cropped uv, back to original uv to help do the uvd->xyz operation
     :return:
     """
     N, T, V, _ = cropped_uv.size()
@@ -453,7 +453,7 @@ def project_to_2d(X, camera_params):
 
     Arguments:
     X -- 3D points in *camera space* to transform (N, *, 3)
-    camera_params -- intrinsic parameteres (N, 2+2+3+2=9)
+    camera_params -- intrinsic parameters (N, 2+2+3+2=9)
     """
     assert X.shape[-1] == 3  #  B,J,3
     assert len(camera_params.shape) == 2  # camera_params:[B,1,9]

diff --git a/fmpose3d/inference_api/fmpose3d.py b/fmpose3d/inference_api/fmpose3d.py
@@ -34,8 +34,7 @@
 ProgressCallback = Callable[[int, int], None]
 
 
-#: HuggingFace repository hosting the official FMPose3D checkpoints.
-_HF_REPO_ID: str = "deruyter92/fmpose_temp"
+from fmpose3d.utils.weights import HF_REPO_ID as _HF_REPO_ID
 
 # Default camera-to-world rotation quaternion (from the demo script).
 _DEFAULT_CAM_ROTATION = np.array(
@@ -759,8 +758,6 @@ class _IngestedInput:
 # ---------------------------------------------------------------------------
 
 
-# FIXME @deruyter92: THIS IS TEMPORARY UNTIL WE DOWNLOAD THE WEIGHTS FROM HUGGINGFACE
-SKIP_WEIGHTS_VALIDATION = object() # sentinel value to indicate that the weights should not be validated
 
 class FMPose3DInference:
     """High-level, two-step inference API for FMPose3D.

diff --git a/fmpose3d/lib/hrnet/gen_kpts.py b/fmpose3d/lib/hrnet/gen_kpts.py
@@ -24,7 +24,7 @@
 import cv2
 import copy
 
-from fmpose3d.lib.hrnet.lib.utils.utilitys import plot_keypoint, PreProcess, write, load_json
+from fmpose3d.lib.hrnet.lib.utils.utilitys import PreProcess
 from fmpose3d.lib.hrnet.lib.config import cfg, update_config
 from fmpose3d.lib.hrnet.lib.utils.transforms import *
 from fmpose3d.lib.hrnet.lib.utils.inference import get_final_preds
@@ -34,7 +34,6 @@
 
 # Auto-download checkpoints if missing and get checkpoint paths
 from fmpose3d.lib.checkpoint.download_checkpoints import ensure_checkpoints, get_checkpoint_path
-ensure_checkpoints()
 
 # Loading human detector model
 from fmpose3d.lib.yolov3.human_detector import load_model as yolo_model
@@ -100,7 +99,7 @@ def model_load(config):
 
     return model
 
-def gen_from_image(args, frame, people_sort, human_model, pose_model, det_dim=416, num_peroson=1, gen_output=False):
+def gen_from_image(args, frame, people_sort, human_model, pose_model, det_dim=416, num_person=1, gen_output=False):
 
     bboxs, scores = yolo_det(frame, human_model, reso=det_dim, confidence=args.thred_score)
     if bboxs is None or not bboxs.any():
@@ -118,7 +117,7 @@ def gen_from_image(args, frame, people_sort, human_model, pose_model, det_dim=41
     if people_track.shape[0] == 1:
         people_track_ = people_track[-1, :-1].reshape(1, 4)
     elif people_track.shape[0] >= 2:
-        people_track_ = people_track[-num_peroson:, :-1].reshape(num_peroson, 4)
+        people_track_ = people_track[-num_person:, :-1].reshape(num_person, 4)
         people_track_ = people_track_[::-1]
     else:
         return [], []
@@ -130,7 +129,7 @@ def gen_from_image(args, frame, people_sort, human_model, pose_model, det_dim=41
 
     with torch.no_grad():
         # bbox is coordinate location
-        inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, num_peroson)
+        inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, num_person)
 
         inputs = inputs[:, [2, 1, 0]]
 
@@ -141,8 +140,8 @@ def gen_from_image(args, frame, people_sort, human_model, pose_model, det_dim=41
         # compute coordinate
         preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
 
-    kpts = np.zeros((num_peroson, 17, 2), dtype=np.float32)
-    scores = np.zeros((num_peroson, 17), dtype=np.float32)
+    kpts = np.zeros((num_person, 17, 2), dtype=np.float32)
+    scores = np.zeros((num_person, 17), dtype=np.float32)
     for i, kpt in enumerate(preds):
         kpts[i] = kpt
 
@@ -152,7 +151,10 @@ def gen_from_image(args, frame, people_sort, human_model, pose_model, det_dim=41
     return kpts, scores
 
 
-def gen_video_kpts(path, det_dim=416, num_peroson=1, gen_output=False, type='image'):
+def gen_video_kpts(path, det_dim=416, num_person=1, gen_output=False, type='image'):
+    # Ensure checkpoints are downloaded only when HRNet is actually requested
+    ensure_checkpoints()
+
     # Updating configuration
     args1 = parse_args()
     reset_config(args1)
@@ -166,7 +168,7 @@ def gen_video_kpts(path, det_dim=416, num_peroson=1, gen_output=False, type='ima
     scores_result = []
     if type == "image":
         frame = cv2.imread(path)
-        kpts, scores = gen_from_image(args1, frame, people_sort, human_model, pose_model, det_dim=det_dim, num_peroson=num_peroson, gen_output=gen_output)
+        kpts, scores = gen_from_image(args1, frame, people_sort, human_model, pose_model, det_dim=det_dim, num_person=num_person, gen_output=gen_output)
         kpts_result.append(kpts)
         scores_result.append(scores)
 
@@ -178,7 +180,7 @@ def gen_video_kpts(path, det_dim=416, num_peroson=1, gen_output=False, type='ima
             if not ret:
                 continue
 
-            kpts, scores = gen_from_image(args1, frame, people_sort, human_model, pose_model, det_dim=det_dim, num_peroson=num_peroson, gen_output=gen_output)
+            kpts, scores = gen_from_image(args1, frame, people_sort, human_model, pose_model, det_dim=det_dim, num_person=num_person, gen_output=gen_output)
             kpts_result.append(kpts)
             scores_result.append(scores)