diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..85e0dd0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,67 @@ +FROM pytorch/pytorch:2.4.1-cuda12.4-cudnn9-runtime + +ARG USERNAME=fmpose3d +ARG USER_UID=1000 +ARG USER_GID=1000 +RUN groupadd ${USERNAME} --gid ${USER_GID} +RUN useradd -m -s /bin/bash -g ${USERNAME} -u ${USER_UID} ${USERNAME} +RUN mkdir /app /logs /data + +ENV DEBIAN_FRONTEND=noninteractive +SHELL ["/bin/bash", "-c"] +RUN apt-get update -y && apt-get install -yy --no-install-recommends \ + vim zsh tmux wget curl htop git sudo ssh git-lfs \ + python3 python3-pip \ + libgl1-mesa-glx libglib2.0-0 \ + ffmpeg \ + && apt-get -y autoclean \ + && apt-get -y autoremove \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get clean + +ENV PATH="/opt/conda/bin:${PATH}" + +# Initialize conda for root just in case and fix symlinks +RUN ln -fs /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh + +# --- Install FMPose3D from GitHub --- +RUN python -m pip install --no-cache-dir --upgrade pip && \ + git clone --depth 1 https://github.com/AdaptiveMotorControlLab/FMPose3D.git /tmp/fmpose3d && \ + python -m pip install --no-cache-dir "/tmp/fmpose3d[animals,viz]" gdown && \ + rm -rf /tmp/fmpose3d + +# Allow non-root user to download DLC model weights at runtime +RUN DLC_MODELZOO_DIR="$(python -c "import site, pathlib; print(pathlib.Path(site.getsitepackages()[0]) / 'deeplabcut' / 'modelzoo')")" \ + && mkdir -p "${DLC_MODELZOO_DIR}/checkpoints" \ + && chown -R "${USERNAME}:${USERNAME}" "${DLC_MODELZOO_DIR}" + +# Set your user as owner of the home directory before switching +RUN chown -R ${USERNAME}:${USERNAME} /home/${USERNAME} + +ENV NVIDIA_DRIVER_CAPABILITIES=all + +# --- SWITCH TO USER --- +USER ${USERNAME} +ENV HOME=/home/${USERNAME} +WORKDIR ${HOME} + +# Ensure dotfiles exist +RUN touch ${HOME}/.bashrc ${HOME}/.zshrc && \ + chmod 644 ${HOME}/.bashrc ${HOME}/.zshrc + +# Install Oh My Zsh and plugins (MUST come before conda init, since OMZ overwrites .zshrc) +RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" --unattended \ + && git clone https://github.com/zsh-users/zsh-autosuggestions ~/.oh-my-zsh/custom/plugins/zsh-autosuggestions \ + && git clone https://github.com/zsh-users/zsh-syntax-highlighting ~/.oh-my-zsh/custom/plugins/zsh-syntax-highlighting \ + && sed -i 's/^plugins=(.*)$/plugins=(git zsh-autosuggestions zsh-syntax-highlighting)/' ~/.zshrc \ + && echo "export PATH=\$PATH:${HOME}/.local/bin" >> ~/.zshrc + +# Initialize conda AFTER Oh My Zsh (so conda init block is not overwritten) +RUN /opt/conda/bin/conda init bash && /opt/conda/bin/conda init zsh + +# Add conda activation to bashrc and zshrc +RUN echo "conda activate base" >> ${HOME}/.bashrc && \ + echo "conda activate base" >> ${HOME}/.zshrc + +SHELL ["/bin/zsh", "-c"] +CMD ["zsh"] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1c07b32 --- /dev/null +++ b/Makefile @@ -0,0 +1,66 @@ +# [USER: ADJUST PATH] +PROJECT_NAME := fmpose3d +####### +# RUN # +####### + +# for local server +IMG_NAME := fmpose3d +IMG_TAG := v0.1 +DOCKERFILE := Dockerfile +# Linux/macOS: use host UID/GID; Windows fallback to 1000 +HOST_UID ?= $(shell sh -c 'id -u 2>/dev/null || echo 1000') +HOST_GID ?= $(shell sh -c 'id -g 2>/dev/null || echo 1000') +BUILD_ARGS := \ + --build-arg USERNAME=fmpose3d \ + --build-arg USER_GID=$(HOST_GID) \ + --build-arg USER_UID=$(HOST_UID) + +build: + docker build $(BUILD_ARGS) \ + -t $(IMG_NAME):$(IMG_TAG) -f $(DOCKERFILE) . + +build-clean: + docker build --no-cache $(BUILD_ARGS) \ + -t $(IMG_NAME):$(IMG_TAG) -f $(DOCKERFILE) . + + +CONTAINER_NAME := fmpose3d_dev1 +# [USER: ADJUST] Mount the project root into the container +HOST_SRC := $(shell pwd) +DOCKER_SRC := /fmpose3d +VOLUMES := \ + --volume "$(HOST_SRC):$(DOCKER_SRC)" + + +run: + docker run -it --gpus all --shm-size=64g --name $(CONTAINER_NAME) -w $(DOCKER_SRC) $(VOLUMES) $(IMG_NAME):$(IMG_TAG) + +exec: + docker exec -it -w $(DOCKER_SRC) $(CONTAINER_NAME) /bin/zsh + +exec_bash: + docker exec -it -w $(DOCKER_SRC) $(CONTAINER_NAME) /bin/bash + +stop: + docker stop $(CONTAINER_NAME) + +rm: + docker rm $(CONTAINER_NAME) + +# Help message +help: + @echo "Available targets:" + @echo " build - Build Docker image for local server." + @echo " run - Run a Docker container with GPU." + @echo " exec - Attach to running container (zsh)." + @echo " exec_bash - Attach to running container (bash)." + @echo " stop - Stop the running container." + @echo " rm - Remove the stopped container." + @echo " help - Show this help message." + @echo "" + @echo "Usage:" + @echo " 1. make build" + @echo " 2. make run" + @echo " 3. Inside container: pip install -e '.[animals,viz]'" + @echo " 4. Inside container: sh scripts/FMPose3D_train.sh" diff --git a/README.md b/README.md index b421862..1005e6a 100644 --- a/README.md +++ b/README.md @@ -51,8 +51,9 @@ pip install "fmpose3d[animals]" This visualization script is designed for single-frame based model, allowing you to easily run 3D human pose estimation on any single image. -Before testing, make sure you have the pre-trained model ready. -You may either use the model trained by your own or download ours from [here](https://drive.google.com/drive/folders/1235_UgUQXYZtjprBOv2ZJJHY2KOAS_6p?usp=sharing) and place it in the `./pre_trained_models` directory. +Pre-trained weights are downloaded automatically from [Hugging Face](https://huggingface.co/MLAdaptiveIntelligence/FMPose3D) the first time you run inference, so no manual setup is needed. + +Alternatively, you can use your own trained weights or download ours from [Google Drive](https://drive.google.com/drive/folders/1aRZ6t_6IxSfM1nCTFOUXcYVaOk-5koGA?usp=sharing), place them in the `./pre_trained_models` directory, and set `model_weights_path` in the shell script (e.g. `demo/vis_in_the_wild.sh`). Next, put your test images into folder `demo/images`. Then run the visualization script: ```bash @@ -93,7 +94,7 @@ sh ./scripts/FMPose3D_train.sh ### Inference -First, download the folder with pre-trained model from [here](https://drive.google.com/drive/folders/1235_UgUQXYZtjprBOv2ZJJHY2KOAS_6p?usp=sharing) and place it in the './pre_trained_models' directory. +Pre-trained weights are fetched automatically from Hugging Face on the first run. You can also use local weights by setting `model_weights_path` in the shell script (see [Demos](#testing-on-in-the-wild-images-humans) above for details). To run inference on Human3.6M: diff --git a/demo/vis_in_the_wild.py b/demo/vis_in_the_wild.py index 90b2953..8f88819 100755 --- a/demo/vis_in_the_wild.py +++ b/demo/vis_in_the_wild.py @@ -19,6 +19,7 @@ from fmpose3d.lib.checkpoint.download_checkpoints import ensure_checkpoints ensure_checkpoints() +from fmpose3d.utils.weights import resolve_weights_path from fmpose3d.lib.preprocess import h36m_coco_format, revise_kpts from fmpose3d.lib.hrnet.gen_kpts import gen_video_kpts as hrnet_pose from fmpose3d.common.arguments import opts as parse_args @@ -113,7 +114,7 @@ def show3Dpose(vals, ax): def get_pose2D(path, output_dir, type): print('\nGenerating 2D pose...') - keypoints, scores = hrnet_pose(path, det_dim=416, num_peroson=1, gen_output=True, type=type) + keypoints, scores = hrnet_pose(path, det_dim=416, num_person=1, gen_output=True, type=type) keypoints, scores, valid_frames = h36m_coco_format(keypoints, scores) re_kpts = revise_kpts(keypoints, scores, valid_frames) print('Generating 2D pose successful!') @@ -278,8 +279,9 @@ def get_pose3D(path, output_dir, type='image'): # if args.reload: model_dict = model['CFM'].state_dict() - model_path = args.model_weights_path - print(model_path) + model_path = resolve_weights_path(args.model_weights_path, args.model_type) + + print(f"Loading weights from: {model_path}") pre_dict = torch.load(model_path, map_location=device, weights_only=True) for name, key in model_dict.items(): model_dict[name] = pre_dict[name] diff --git a/demo/vis_in_the_wild.sh b/demo/vis_in_the_wild.sh index df88474..426f547 100755 --- a/demo/vis_in_the_wild.sh +++ b/demo/vis_in_the_wild.sh @@ -6,7 +6,11 @@ batch_size=1 sh_file='vis_in_the_wild.sh' model_type='fmpose3d_humans' -model_weights_path='../pre_trained_models/fmpose3d_h36m/FMpose3D_pretrained_weights.pth' + +# By default, weights are automatically downloaded from Hugging Face Hub. +# To use local weights instead, uncomment the line below: +# model_weights_path='../pre_trained_models/fmpose3d_h36m/FMpose3D_pretrained_weights.pth' +model_weights_path='' target_path='./images/' # folder containing multiple images # target_path='./images/xx.png' # single image diff --git a/fmpose3d/animals/common/animal3d_dataset.py b/fmpose3d/animals/common/animal3d_dataset.py index 15c9891..c69482b 100644 --- a/fmpose3d/animals/common/animal3d_dataset.py +++ b/fmpose3d/animals/common/animal3d_dataset.py @@ -38,9 +38,9 @@ def __getitem__(self, item): else: keypoint_2d = np.array(data.get("keypoint_2d", []), dtype=np.float32) # normalize 2D keypoints - hight = np.array(data["height"]) + height = np.array(data["height"]) width = np.array(data["width"]) - keypoint_2d = normalize_screen_coordinates(keypoint_2d[..., :2], width, hight) + keypoint_2d = normalize_screen_coordinates(keypoint_2d[..., :2], width, height) # build 3D keypoints; append ones; fallback to zeros if missing if "keypoint_3d" in data and data["keypoint_3d"] is not None: diff --git a/fmpose3d/animals/common/lifter3d.py b/fmpose3d/animals/common/lifter3d.py index 4911a5a..afebffb 100644 --- a/fmpose3d/animals/common/lifter3d.py +++ b/fmpose3d/animals/common/lifter3d.py @@ -306,7 +306,7 @@ def triangulate_3d_batch(points_2d_batch, cameras): num_cams, num_frames, num_joints, _ = points_2d_batch.shape # (6, num_frames, 23, 2) print("num_cams,num_frames,num_joinits", points_2d_batch.shape) - # **1. compute projection matrics (6, 3, 4)** + # **1. compute projection matrices (6, 3, 4)** proj_matrices = np.array( [cam["intrinsic_matrix"] @ np.hstack((cam["R"], cam["T"])) for cam in cameras] ) # numpy array (6, 3, 4) @@ -361,7 +361,7 @@ def triangulate_3d(points_2d, cameras): for i, cam in enumerate(cameras): K, dist, R, T = cam["intrinsic_matrix"], cam["distortion_coeffs"], cam["R"], cam["T"] - P = K @ np.hstack((R, T)) # projectio matrix + P = K @ np.hstack((R, T)) # projection matrix # print("Projection Matrix P:\n", P) proj_matrices.append(P) @@ -628,7 +628,7 @@ def main(): # reprojected_2d = project_3d_to_2d(points_3d, cameras[i]) # # visualize_2d_on_video(video_files[i], frame_number, points_2d_frame[i], reprojected_2d, output_path) - # # test on trangulate 3D batch + # # test on triangulate 3D batch # left_frame_id = 10 # right_frame_id = 60 diff --git a/fmpose3d/animals/common/utils.py b/fmpose3d/animals/common/utils.py index cdafd8c..9872dde 100755 --- a/fmpose3d/animals/common/utils.py +++ b/fmpose3d/animals/common/utils.py @@ -354,7 +354,7 @@ def save_top_N_models( def back_to_ori_uv(cropped_uv, bb_box): """ - for cropped uv, back to origial uv to help do the uvd->xyz operation + for cropped uv, back to original uv to help do the uvd->xyz operation :return: """ N, T, V, _ = cropped_uv.size() @@ -423,7 +423,7 @@ def project_to_2d(X, camera_params): Arguments: X -- 3D points in *camera space* to transform (N, *, 3) - camera_params -- intrinsic parameteres (N, 2+2+3+2=9) + camera_params -- intrinsic parameters (N, 2+2+3+2=9) """ assert X.shape[-1] == 3 # B,J,3 assert len(camera_params.shape) == 2 # camera_params:[B,1,9] diff --git a/fmpose3d/common/utils.py b/fmpose3d/common/utils.py index 549ef2b..3fef672 100755 --- a/fmpose3d/common/utils.py +++ b/fmpose3d/common/utils.py @@ -378,7 +378,7 @@ def save_top_N_models( def back_to_ori_uv(cropped_uv, bb_box): """ - for cropped uv, back to origial uv to help do the uvd->xyz operation + for cropped uv, back to original uv to help do the uvd->xyz operation :return: """ N, T, V, _ = cropped_uv.size() @@ -453,7 +453,7 @@ def project_to_2d(X, camera_params): Arguments: X -- 3D points in *camera space* to transform (N, *, 3) - camera_params -- intrinsic parameteres (N, 2+2+3+2=9) + camera_params -- intrinsic parameters (N, 2+2+3+2=9) """ assert X.shape[-1] == 3 # B,J,3 assert len(camera_params.shape) == 2 # camera_params:[B,1,9] diff --git a/fmpose3d/inference_api/fmpose3d.py b/fmpose3d/inference_api/fmpose3d.py index 07a3417..970a5e3 100644 --- a/fmpose3d/inference_api/fmpose3d.py +++ b/fmpose3d/inference_api/fmpose3d.py @@ -34,8 +34,7 @@ ProgressCallback = Callable[[int, int], None] -#: HuggingFace repository hosting the official FMPose3D checkpoints. -_HF_REPO_ID: str = "deruyter92/fmpose_temp" +from fmpose3d.utils.weights import HF_REPO_ID as _HF_REPO_ID # Default camera-to-world rotation quaternion (from the demo script). _DEFAULT_CAM_ROTATION = np.array( @@ -759,8 +758,6 @@ class _IngestedInput: # --------------------------------------------------------------------------- -# FIXME @deruyter92: THIS IS TEMPORARY UNTIL WE DOWNLOAD THE WEIGHTS FROM HUGGINGFACE -SKIP_WEIGHTS_VALIDATION = object() # sentinel value to indicate that the weights should not be validated class FMPose3DInference: """High-level, two-step inference API for FMPose3D. diff --git a/fmpose3d/lib/hrnet/gen_kpts.py b/fmpose3d/lib/hrnet/gen_kpts.py index a75e704..0049997 100755 --- a/fmpose3d/lib/hrnet/gen_kpts.py +++ b/fmpose3d/lib/hrnet/gen_kpts.py @@ -24,7 +24,7 @@ import cv2 import copy -from fmpose3d.lib.hrnet.lib.utils.utilitys import plot_keypoint, PreProcess, write, load_json +from fmpose3d.lib.hrnet.lib.utils.utilitys import PreProcess from fmpose3d.lib.hrnet.lib.config import cfg, update_config from fmpose3d.lib.hrnet.lib.utils.transforms import * from fmpose3d.lib.hrnet.lib.utils.inference import get_final_preds @@ -34,7 +34,6 @@ # Auto-download checkpoints if missing and get checkpoint paths from fmpose3d.lib.checkpoint.download_checkpoints import ensure_checkpoints, get_checkpoint_path -ensure_checkpoints() # Loading human detector model from fmpose3d.lib.yolov3.human_detector import load_model as yolo_model @@ -100,7 +99,7 @@ def model_load(config): return model -def gen_from_image(args, frame, people_sort, human_model, pose_model, det_dim=416, num_peroson=1, gen_output=False): +def gen_from_image(args, frame, people_sort, human_model, pose_model, det_dim=416, num_person=1, gen_output=False): bboxs, scores = yolo_det(frame, human_model, reso=det_dim, confidence=args.thred_score) if bboxs is None or not bboxs.any(): @@ -118,7 +117,7 @@ def gen_from_image(args, frame, people_sort, human_model, pose_model, det_dim=41 if people_track.shape[0] == 1: people_track_ = people_track[-1, :-1].reshape(1, 4) elif people_track.shape[0] >= 2: - people_track_ = people_track[-num_peroson:, :-1].reshape(num_peroson, 4) + people_track_ = people_track[-num_person:, :-1].reshape(num_person, 4) people_track_ = people_track_[::-1] else: return [], [] @@ -130,7 +129,7 @@ def gen_from_image(args, frame, people_sort, human_model, pose_model, det_dim=41 with torch.no_grad(): # bbox is coordinate location - inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, num_peroson) + inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, num_person) inputs = inputs[:, [2, 1, 0]] @@ -141,8 +140,8 @@ def gen_from_image(args, frame, people_sort, human_model, pose_model, det_dim=41 # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) - kpts = np.zeros((num_peroson, 17, 2), dtype=np.float32) - scores = np.zeros((num_peroson, 17), dtype=np.float32) + kpts = np.zeros((num_person, 17, 2), dtype=np.float32) + scores = np.zeros((num_person, 17), dtype=np.float32) for i, kpt in enumerate(preds): kpts[i] = kpt @@ -152,7 +151,10 @@ def gen_from_image(args, frame, people_sort, human_model, pose_model, det_dim=41 return kpts, scores -def gen_video_kpts(path, det_dim=416, num_peroson=1, gen_output=False, type='image'): +def gen_video_kpts(path, det_dim=416, num_person=1, gen_output=False, type='image'): + # Ensure checkpoints are downloaded only when HRNet is actually requested + ensure_checkpoints() + # Updating configuration args1 = parse_args() reset_config(args1) @@ -166,7 +168,7 @@ def gen_video_kpts(path, det_dim=416, num_peroson=1, gen_output=False, type='ima scores_result = [] if type == "image": frame = cv2.imread(path) - kpts, scores = gen_from_image(args1, frame, people_sort, human_model, pose_model, det_dim=det_dim, num_peroson=num_peroson, gen_output=gen_output) + kpts, scores = gen_from_image(args1, frame, people_sort, human_model, pose_model, det_dim=det_dim, num_person=num_person, gen_output=gen_output) kpts_result.append(kpts) scores_result.append(scores) @@ -178,7 +180,7 @@ def gen_video_kpts(path, det_dim=416, num_peroson=1, gen_output=False, type='ima if not ret: continue - kpts, scores = gen_from_image(args1, frame, people_sort, human_model, pose_model, det_dim=det_dim, num_peroson=num_peroson, gen_output=gen_output) + kpts, scores = gen_from_image(args1, frame, people_sort, human_model, pose_model, det_dim=det_dim, num_person=num_person, gen_output=gen_output) kpts_result.append(kpts) scores_result.append(scores) diff --git a/fmpose3d/lib/hrnet/lib/utils/utilitys.py b/fmpose3d/lib/hrnet/lib/utils/utilitys.py index fde7b3d..ba587ff 100755 --- a/fmpose3d/lib/hrnet/lib/utils/utilitys.py +++ b/fmpose3d/lib/hrnet/lib/utils/utilitys.py @@ -30,83 +30,6 @@ [170, 0, 255], [255, 0, 255]] -def plot_keypoint(image, coordinates, confidence, keypoint_thresh=0.3): - # USE cv2 - joint_visible = confidence[:, :, 0] > keypoint_thresh - coordinates = coco_h36m(coordinates) - for i in range(coordinates.shape[0]): - pts = coordinates[i] - - for joint in pts: - cv2.circle(image, (int(joint[0]), int(joint[1])), 8, (255, 255, 255), 1) - - for color_i, jp in zip(colors, h36m_pairs): - if joint_visible[i, jp[0]] and joint_visible[i, jp[1]]: - pt0 = pts[jp, 0] - pt1 = pts[jp, 1] - pt0_0, pt0_1, pt1_0, pt1_1 = int(pt0[0]), int(pt0[1]), int(pt1[0]), int(pt1[1]) - - cv2.line(image, (pt0_0, pt1_0), (pt0_1, pt1_1), color_i, 6) - # cv2.circle(image,(pt0_0, pt0_1), 2, color_i, thickness=-1) - # cv2.circle(image,(pt1_0, pt1_1), 2, color_i, thickness=-1) - return image - - -def write(x, img): - x = [int(i) for i in x] - c1 = tuple(x[0:2]) - c2 = tuple(x[2:4]) - - color = [0, 97, 255] - label = 'People {}'.format(x[-1]) - cv2.rectangle(img, c1, c2, color, 2) - t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] - c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4 - cv2.rectangle(img, c1, c2, [0, 128, 255], -1) - cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1) - return img - - -def load_json(file_path): - with open(file_path, 'r') as fr: - video_info = json.load(fr) - - label = video_info['label'] - label_index = video_info['label_index'] - - num_frames = video_info['data'][-1]['frame_index'] - keypoints = np.zeros((2, num_frames, 17, 2), dtype=np.float32) # (M, T, N, 2) - scores = np.zeros((2, num_frames, 17), dtype=np.float32) # (M, T, N) - - for frame_info in video_info['data']: - frame_index = frame_info['frame_index'] - - for index, skeleton_info in enumerate(frame_info['skeleton']): - pose = skeleton_info['pose'] - score = skeleton_info['score'] - bbox = skeleton_info['bbox'] - - if len(bbox) == 0 or index+1 > 2: - continue - - pose = np.asarray(pose, dtype=np.float32) - score = np.asarray(score, dtype=np.float32) - score = score.reshape(-1) - - keypoints[index, frame_index-1] = pose - scores[index, frame_index-1] = score - - new_kpts = [] - for i in range(keypoints.shape[0]): - kps = keypoints[i] - if np.sum(kps) != 0.: - new_kpts.append(kps) - - new_kpts = np.asarray(new_kpts, dtype=np.float32) - scores = np.asarray(scores, dtype=np.float32) - scores = scores[:, :, :, np.newaxis] - return new_kpts, scores, label, label_index - def box_to_center_scale(box, model_image_width, model_image_height): """convert a box to center,scale information required for pose transformation diff --git a/fmpose3d/lib/yolov3/bbox.py b/fmpose3d/lib/yolov3/bbox.py index 60373d5..aef9ee4 100755 --- a/fmpose3d/lib/yolov3/bbox.py +++ b/fmpose3d/lib/yolov3/bbox.py @@ -66,7 +66,7 @@ def bbox_iou(box1, box2): b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] - # get the corrdinates of the intersection rectangle + # get the coordinates of the intersection rectangle inter_rect_x1 = torch.max(b1_x1, b2_x1) inter_rect_y1 = torch.max(b1_y1, b2_y1) inter_rect_x2 = torch.min(b1_x2, b2_x2) diff --git a/fmpose3d/lib/yolov3/darknet.py b/fmpose3d/lib/yolov3/darknet.py index a5e6e7d..177dd85 100755 --- a/fmpose3d/lib/yolov3/darknet.py +++ b/fmpose3d/lib/yolov3/darknet.py @@ -216,7 +216,7 @@ def create_modules(blocks): except: end = 0 - # Positive anotation + # Positive annotation if start > 0: start = start - index diff --git a/fmpose3d/lib/yolov3/human_detector.py b/fmpose3d/lib/yolov3/human_detector.py index 5f9cdab..b669d77 100755 --- a/fmpose3d/lib/yolov3/human_detector.py +++ b/fmpose3d/lib/yolov3/human_detector.py @@ -15,7 +15,6 @@ import os import sys import random -import pickle as pkl import argparse from fmpose3d.lib.yolov3.util import * @@ -63,7 +62,7 @@ def write(x, img, colors): def arg_parse(): """" - Parse arguements to the detect module + Parse arguments to the detect module """ parser = argparse.ArgumentParser(description='YOLO v3 Cam Demo') @@ -104,7 +103,7 @@ def load_model(args=None, CUDA=None, inp_dim=416): assert inp_dim % 32 == 0 assert inp_dim > 32 - # If there's a GPU availible, put the model on GPU + # If there's a GPU available, put the model on GPU if CUDA: model.cuda() diff --git a/fmpose3d/utils/__init__.py b/fmpose3d/utils/__init__.py new file mode 100644 index 0000000..adfc683 --- /dev/null +++ b/fmpose3d/utils/__init__.py @@ -0,0 +1,8 @@ +""" +FMPose3D: monocular 3D Pose Estimation via Flow Matching + +Official implementation of the paper: +"FMPose3D: monocular 3D Pose Estimation via Flow Matching" +by Ti Wang, Xiaohang Yu, and Mackenzie Weygandt Mathis +Licensed under Apache 2.0 +""" diff --git a/fmpose3d/utils/weights.py b/fmpose3d/utils/weights.py new file mode 100644 index 0000000..941e481 --- /dev/null +++ b/fmpose3d/utils/weights.py @@ -0,0 +1,49 @@ +""" +FMPose3D: monocular 3D Pose Estimation via Flow Matching + +Official implementation of the paper: +"FMPose3D: monocular 3D Pose Estimation via Flow Matching" +by Ti Wang, Xiaohang Yu, and Mackenzie Weygandt Mathis +Licensed under Apache 2.0 +""" + +"""Shared helpers for resolving / downloading FMPose3D model weights.""" + +HF_REPO_ID: str = "MLAdaptiveIntelligence/FMPose3D" + + +def resolve_weights_path(model_weights_path: str, model_type: str) -> str: + """Return a local weights path, downloading from Hugging Face Hub if needed. + + Parameters + ---------- + model_weights_path : str + User-supplied local path. If falsy the weights are fetched from the + Hugging Face Hub automatically. + model_type : str + Model variant name used to derive the remote filename + (e.g. ``"fmpose3d_humans"`` -> ``fmpose3d_humans.pth``). + + Returns + ------- + str + Absolute path to the weight file on disk. + """ + if model_weights_path: + return model_weights_path + + try: + from huggingface_hub import hf_hub_download + except ImportError: + raise ImportError( + "huggingface_hub is required to download model weights. " + "Install it with: pip install huggingface_hub\n" + "Or download the weights manually and pass the local path." + ) from None + + filename = f"{model_type}.pth" + print( + f"No local weights path specified. " + f"Downloading '{filename}' from Hugging Face ({HF_REPO_ID})..." + ) + return hf_hub_download(repo_id=HF_REPO_ID, filename=filename) diff --git a/pyproject.toml b/pyproject.toml index b40c122..e7df467 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,6 +68,7 @@ profile = "black" line_length = 100 [tool.pytest.ini_options] +testpaths = ["tests"] markers = [ "functional: marks tests that require pretrained weights (deselect with '-m \"not functional\"')", "network: marks tests that may need internet access on first run (deselect with '-m \"not network\"')", diff --git a/scripts/FMPose3D_main.py b/scripts/FMPose3D_main.py index beb88da..172a482 100644 --- a/scripts/FMPose3D_main.py +++ b/scripts/FMPose3D_main.py @@ -18,6 +18,7 @@ import torch.optim as optim from tqdm import tqdm +from fmpose3d.utils.weights import resolve_weights_path from fmpose3d.common import opts, Human36mDataset, Fusion from fmpose3d.common.utils import * @@ -268,7 +269,11 @@ def print_error_action(action_error_sum, is_train): args.checkpoint = "./checkpoint/" + folder_name elif args.train == False: # create a new folder for the test results - args.previous_dir = os.path.dirname(args.model_weights_path) + if args.model_weights_path: + args.previous_dir = os.path.dirname(args.model_weights_path) + else: + # HuggingFace-downloaded weights: no local dir, use ./checkpoint/ + args.previous_dir = "./checkpoint" args.checkpoint = os.path.join(args.previous_dir, folder_name) if not os.path.exists(args.checkpoint): @@ -337,8 +342,9 @@ def print_error_action(action_error_sum, is_train): if args.reload: model_dict = model["CFM"].state_dict() - model_path = args.model_weights_path - print(model_path) + model_path = resolve_weights_path(args.model_weights_path, args.model_type) + + print(f"Loading weights from: {model_path}") pre_dict = torch.load(model_path, map_location=device, weights_only=True) for name, key in model_dict.items(): model_dict[name] = pre_dict[name] diff --git a/scripts/FMPose3D_test.sh b/scripts/FMPose3D_test.sh index afc1f86..a1cadf7 100755 --- a/scripts/FMPose3D_test.sh +++ b/scripts/FMPose3D_test.sh @@ -11,7 +11,11 @@ exp_temp=0.005 folder_name=test_s${eval_multi_steps}_${mode}_h${num_hypothesis_list}_$(date +%Y%m%d_%H%M%S) model_type='fmpose3d_humans' -model_weights_path='./pre_trained_models/fmpose3d_h36m/FMpose3D_pretrained_weights.pth' + +# By default, weights are automatically downloaded from Hugging Face Hub. +# To use local weights instead, uncomment the line below: +# model_weights_path='./pre_trained_models/fmpose3d_h36m/FMpose3D_pretrained_weights.pth' +model_weights_path='' #Test python3 scripts/FMPose3D_main.py \ diff --git a/tests/fmpose3d_api/test_fmpose3d.py b/tests/fmpose3d_api/test_fmpose3d.py index 1e0b29e..f4d4a02 100644 --- a/tests/fmpose3d_api/test_fmpose3d.py +++ b/tests/fmpose3d_api/test_fmpose3d.py @@ -747,8 +747,8 @@ def test_predict_raises_clear_error_without_deeplabcut(self): frames = np.random.randint(0, 255, (1, 64, 64, 3), dtype=np.uint8) with patch( - "fmpose3d.inference_api.fmpose3d.importlib.util.find_spec", - return_value=None, + "fmpose3d.inference_api.fmpose3d._require_superanimal_analyze_images", + side_effect=ImportError('pip install "fmpose3d[animals]"'), ): with pytest.raises(ImportError, match=r"fmpose3d\[animals\]"): estimator.predict(frames) diff --git a/tests/fmpose3d_api/test_huggingface.py b/tests/fmpose3d_api/test_huggingface.py index 39139b3..01217dc 100644 --- a/tests/fmpose3d_api/test_huggingface.py +++ b/tests/fmpose3d_api/test_huggingface.py @@ -21,6 +21,7 @@ import pytest from fmpose3d.inference_api.fmpose3d import FMPose3DInference +from fmpose3d.utils.weights import HF_REPO_ID class TestDownloadModelWeights: @@ -37,7 +38,7 @@ def test_calls_hf_hub_download_humans(self): api._download_model_weights() mock_dl.assert_called_once_with( - repo_id="deruyter92/fmpose_temp", + repo_id=HF_REPO_ID, filename="fmpose3d_humans.pth", ) assert api.model_weights_path == "/fake/cache/fmpose3d_humans.pth" @@ -52,7 +53,7 @@ def test_calls_hf_hub_download_animals(self): api._download_model_weights() mock_dl.assert_called_once_with( - repo_id="deruyter92/fmpose_temp", + repo_id=HF_REPO_ID, filename="fmpose3d_animals.pth", ) assert api.model_weights_path == "/fake/cache/fmpose3d_animals.pth" diff --git a/tests/test_demo_human.py b/tests/test_demo_human.py index ae8b71a..02d3133 100644 --- a/tests/test_demo_human.py +++ b/tests/test_demo_human.py @@ -38,7 +38,7 @@ def test_2d_pose_estimation(test_image_path, test_output_dir): from fmpose3d.lib.preprocess import h36m_coco_format, revise_kpts # Run 2D pose estimation - keypoints, scores = hrnet_pose(test_image_path, det_dim=416, num_peroson=1, gen_output=True, type='image') + keypoints, scores = hrnet_pose(test_image_path, det_dim=416, num_person=1, gen_output=True, type='image') # Check output shapes assert keypoints is not None, "Keypoints should not be None"