- Model downloads.
- Inference test.
- Participant classification evaluations.
- Room readiness for chairs, tables, whiteboard, and blinds evaluations.
- GPTQ, AWQ, and pre-quantization.
- LoRA fine-tuning.
pip install -r requirements.txt
export HF_HOME=/mnt/shared/$USER/hf_cache
export HUGGINGFACE_HUB_CACHE=/mnt/shared/$USER/hf_cache
export MODEL_ROOT=/mnt/shared/$USER/modelsDownload the two dataset folders from Google Drive and place them next to this repo:
The commands below assume they are named:
../participant_classification/
../room_readiness/
Before downloading models or launching GPU jobs, validate the full repo wiring:
bash pipelines/dry_run_all.shThis checks Python syntax, shell syntax, config/model keys, download targets,
pipeline commands, quantization CLIs, and fine-tuning CLIs without running large
downloads or model inference. It does not require a .git directory, so it also
works from the GitHub source zip.
If a Hugging Face model requires authentication:
export HF_TOKEN=hf_...bash model_download/download_models.sh core
bash model_download/download_models.sh qwen35
bash model_download/download_models.sh optional
bash model_download/download_models.sh allEdit benchmark/benchmark_config.yaml if model paths differ from your machine.
all covers the full model set wired in this repo: SmolVLM2, InternVL,
Qwen3-VL, ZwZ, Qwen3.5, Gemma, Phi-3.5-Vision, Idefics3, LLaVA, Molmo,
MiniCPM-V, and NVILA 2B/8B.
python single_image_inference/run_single_image.py \
--image single_image_inference/001.jpg \
--models qwen3vl_4b qwen35_2b smolvlm internvl \
--output single_image_inference/outputs/example.jsonExpected dataset next to this repo:
../participant_classification/
├── annotations.json
└── images/
Run:
MODELS="qwen3vl_4b qwen35_2b" \
bash pipelines/participant_classification/run.shSmoke run:
LIMIT_CROPS=10 MODELS="qwen3vl_4b" \
bash pipelines/participant_classification/run.shExpected dataset next to this repo:
../room_readiness/
├── manifest.csv
├── images/
└── metadata/
Run:
MODELS="qwen3vl_4b qwen35_2b" \
bash pipelines/room_readiness/run.shSmoke run:
LIMIT_PER_TASK=5 MODELS="qwen3vl_4b" \
bash pipelines/room_readiness/run.shMODELS="qwen3vl_4b qwen3vl_8b smolvlm internvl nvila_2b phi35_vision" \
bash pipelines/run_all.shGPTQ:
python quantizations/quantize.py \
--models qwen3vl_4b qwen3vl_8b qwen35_0_8b qwen35_2b qwen35_4b \
--methods gptq \
--calibration-size 128AWQ:
python quantizations/quantize.py \
--models qwen3vl_4b qwen3vl_8b qwen35_0_8b qwen35_2b qwen35_4b \
--methods awq \
--calibration-size 128Pre-quantization / lower-precision export:
python quantizations/pre_quantization/export_finetuned_qwen.py \
--source fine_tuning/checkpoints/qwen3vl_4b_room_readiness_lora/final \
--model-family qwen3vl \
--precision float16 \
--output-dir /mnt/shared/$USER/models/qwen3vl_4b_room_readiness_lora-fp16Room-readiness LoRA:
python fine_tuning/prepare_room_readiness_sft.py \
--dataset ../room_readiness \
--out-dir fine_tuning/data/room_readiness_sft
python fine_tuning/train_room_readiness_lora.py \
--model-family qwen3vl \
--model-path /mnt/shared/$USER/models/Qwen3-VL-4B-Instruct \
--output-dir fine_tuning/checkpoints/qwen3vl_4b_room_readiness_loraParticipant/person-box LoRA:
python fine_tuning/train_participant_lora.py \
--model-family qwen3vl \
--model-path /mnt/shared/$USER/models/Qwen3-VL-4B-Instruct \
--output-dir fine_tuning/checkpoints/qwen3vl_4b_participant_loraThis block goes from setup through model download and all-model eval commands. Run it from the repo root after the two dataset folders above are downloaded.
git clone https://github.com/mlberkeley/sp26-logitech
cd sp26-logitech
set -euo pipefail
python -m pip install -r requirements.txt
export HF_HOME=/mnt/shared/$USER/hf_cache
export HUGGINGFACE_HUB_CACHE=/mnt/shared/$USER/hf_cache
export MODEL_ROOT=/mnt/shared/$USER/models
# export HF_TOKEN=hf_...
export PARTICIPANT_DATASET_DIR="$PWD/../participant_classification"
export PARTICIPANT_DATASET_JSON="$PARTICIPANT_DATASET_DIR/annotations.json"
export ROOM_READINESS_DATASET="$PWD/../room_readiness"
bash model_download/download_models.sh all
export MODELS="\
smolvlm internvl internvl_int8 internvl_int4 \
qwen3vl_2b qwen3vl_2b_int8 qwen3vl_2b_int4 \
qwen3vl_4b qwen3vl_4b_int8 qwen3vl_4b_int4 \
qwen3vl_8b qwen3vl_8b_int8 qwen3vl_8b_int4 \
zwz_2b \
qwen35_0_8b qwen35_0_8b_int8 qwen35_0_8b_int4 \
qwen35_2b qwen35_2b_int8 qwen35_2b_int4 \
qwen35_4b qwen35_4b_int8 qwen35_4b_int4 \
gemma_e2b_4bit gemma_e2b_8bit_hf gemma_e4b_4bit \
phi35_vision phi35_vision_int8 phi35_vision_int4 \
idefics3 idefics3_int8 idefics3_int4 \
llava llava_int8 llava_int4 \
molmo molmo_int8 molmo_int4 \
minicpmv minicpmv_int8 minicpmv_int4 \
nvila_2b nvila_2b_int8 nvila_2b_int4 \
nvila_8b nvila_8b_int8 nvila_8b_int4"
bash pipelines/dry_run_all.sh
python single_image_inference/run_single_image.py \
--image single_image_inference/001.jpg \
--models $MODELS \
--output single_image_inference/outputs/all_models_single_image.json
bash pipelines/participant_classification/run.sh
bash pipelines/room_readiness/run.sh