diff --git a/point_transformer_v3/external/pointcept_minimal/.github/workflows/formatter.yml b/point_transformer_v3/external/pointcept_minimal/.github/workflows/formatter.yml new file mode 100644 index 0000000..a95391b --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/.github/workflows/formatter.yml @@ -0,0 +1,20 @@ +name: Formatter + +on: + workflow_dispatch: + push: + branches: + - main + pull_request: + types: [opened, reopened, synchronize] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + formatter: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: psf/black@stable diff --git a/point_transformer_v3/external/pointcept_minimal/.gitignore b/point_transformer_v3/external/pointcept_minimal/.gitignore new file mode 100644 index 0000000..59c3884 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/.gitignore @@ -0,0 +1,16 @@ +image/ +__pycache__ +**/build/ +**/*.egg-info/ +**/dist/ +*.so +exp +weights +data +*log* +outputs/ +.vscode +.idea +*/.DS_Store +**/*.out +Dockerfile diff --git a/point_transformer_v3/external/pointcept_minimal/LICENSE b/point_transformer_v3/external/pointcept_minimal/LICENSE new file mode 100644 index 0000000..ee1fac1 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Pointcept + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/point_transformer_v3/external/pointcept_minimal/README.md b/point_transformer_v3/external/pointcept_minimal/README.md new file mode 100644 index 0000000..a05adff --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/README.md @@ -0,0 +1,53 @@ +# Pointcept Minimal + +A minimal version of [Pointcept](https://github.com/Pointcept/Pointcept) for comparing official PTv3 and fVDB PTv3 training results on ScanNet. + +## Environment Setup + +1. **fVDB-core**: Follow fVDB's installation instructions and install `env/dev_environment.yml`. Tested with fVDB commit `51cec3d3e90d7d571e22862d17ae9051cbd13afd`. +2. Activate the environment and install dependencies: + ```bash + conda activate fvdb + pip install -r requirements_pointceptminimal.txt + ``` + +## Dataset Preparation + +Follow the [Pointcept scannet dataset preparation](https://github.com/Pointcept/Pointcept?tab=readme-ov-file#scannet-v2) to download ScanNet and prepare the dataset in the correct location. + +## Training Configurations + +### With Convolution (CPE) +Both configurations train PTv3 on ScanNet with convolution enabled. Training loss curves will diverge. + +**fVDB PTv3:** +```bash +CUDA_VISIBLE_DEVICES=4,5,6,7 sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-fvdb-test-4g-conv -n semseg-pt-v3m1-0-fvdb-test-4g-conv +``` + +**Official PTv3:** +```bash +CUDA_VISIBLE_DEVICES=4,5,6,7 sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-test-4g-conv -n semseg-pt-v3m1-0-test-4g-conv +``` + +### Without Convolution (CPE) +Both configurations train PTv3 on ScanNet without convolution. Training loss curves are identical. + +**fVDB PTv3:** +```bash +CUDA_VISIBLE_DEVICES=4,5,6,7 sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-fvdb-test-4g -n semseg-pt-v3m1-0-fvdb-test-4g +``` + +**Official PTv3:** +```bash +CUDA_VISIBLE_DEVICES=0,1,2,3 sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-test-4g -n semseg-pt-v3m1-0-test-4g +``` + +## Weights & Biases (Optional) + +To enable W&B logging, set the following in your config file: +```python +enable_wandb = True +wandb_project = "your_project" +wandb_key = "your_key" +``` diff --git a/point_transformer_v3/external/pointcept_minimal/README_pointcept.md b/point_transformer_v3/external/pointcept_minimal/README_pointcept.md new file mode 100644 index 0000000..333dddf --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/README_pointcept.md @@ -0,0 +1,988 @@ +

+ + + + + + pointcept + +
+ +

+ +[![Formatter](https://github.com/pointcept/pointcept/actions/workflows/formatter.yml/badge.svg)](https://github.com/pointcept/pointcept/actions/workflows/formatter.yml) + +**Pointcept** is a powerful and flexible codebase for point cloud perception research. It is also an official implementation of the following paper: +- ๐Ÿš€ **Concerto: Joint 2D-3D Self-Supervised Learning Emerges Spatial Representations** +*Yujia Zhang, Xiaoyang Wu, Yixing Lao, Chengyao Wang, Zhuotao Tian, Naiyan Wang, Hengshuang Zhao* +Conference on Neural Information Processing Systems (**NeurIPS**) 2025 +[ Pretrain ] [Concerto] - [ [Project](https://pointcept.github.io/Concerto/) ] [ [Bib](https://xywu.me/research/concerto/bib.txt) ] [ [HF Demo](https://huggingface.co/spaces/Pointcept/Concerto) ] [ [Inference](https://github.com/Pointcept/Concerto) ] [ [Weight](https://huggingface.co/Pointcept/Concerto) ] → [here](#concerto) + + +- **Sonata: Self-Supervised Learning of Reliable Point Representations** +*Xiaoyang Wu, Daniel DeTone, Duncan Frost, Tianwei Shen, Chris Xie, Nan Yang, Jakob Engel, Richard Newcombe, Hengshuang Zhao, Julian Straub* +IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2025 - Highlight +[ Pretrain ] [Sonata] - [ [Project](https://xywu.me/sonata/) ] [ [arXiv](https://arxiv.org/abs/2503.16429) ] [ [Bib](https://xywu.me/research/sonata/bib.txt) ] [ [Demo](https://github.com/facebookresearch/sonata) ] [ [Weight](https://huggingface.co/facebook/sonata) ] → [here](#sonata) + + +- **Point Transformer V3: Simpler, Faster, Stronger** +*Xiaoyang Wu, Li Jiang, Peng-Shuai Wang, Zhijian Liu, Xihui Liu, Yu Qiao, Wanli Ouyang, Tong He, Hengshuang Zhao* +IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 - Oral +[ Backbone ] [PTv3] - [ [arXiv](https://arxiv.org/abs/2312.10035) ] [ [Bib](https://xywu.me/research/ptv3/bib.txt) ] [ [Project](https://github.com/Pointcept/PointTransformerV3) ] → [here](https://github.com/Pointcept/PointTransformerV3) + + +- **OA-CNNs: Omni-Adaptive Sparse CNNs for 3D Semantic Segmentation** +*Bohao Peng, Xiaoyang Wu, Li Jiang, Yukang Chen, Hengshuang Zhao, Zhuotao Tian, Jiaya Jia* +IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 +[ Backbone ] [ OA-CNNs ] - [ [arXiv](https://arxiv.org/abs/2403.14418) ] [ [Bib](https://xywu.me/research/oacnns/bib.txt) ] → [here](#oa-cnns) + + +- **Towards Large-scale 3D Representation Learning with Multi-dataset Point Prompt Training** +*Xiaoyang Wu, Zhuotao Tian, Xin Wen, Bohao Peng, Xihui Liu, Kaicheng Yu, Hengshuang Zhao* +IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 +[ Pretrain ] [PPT] - [ [arXiv](https://arxiv.org/abs/2308.09718) ] [ [Bib](https://xywu.me/research/ppt/bib.txt) ] → [here](#point-prompt-training-ppt) + + +- **Masked Scene Contrast: A Scalable Framework for Unsupervised 3D Representation Learning** +*Xiaoyang Wu, Xin Wen, Xihui Liu, Hengshuang Zhao* +IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2023 +[ Pretrain ] [ MSC ] - [ [arXiv](https://arxiv.org/abs/2303.14191) ] [ [Bib](https://xywu.me/research/msc/bib.txt) ] → [here](#masked-scene-contrast-msc) + + +- **Learning Context-aware Classifier for Semantic Segmentation** (3D Part) +*Zhuotao Tian, Jiequan Cui, Li Jiang, Xiaojuan Qi, Xin Lai, Yixin Chen, Shu Liu, Jiaya Jia* +AAAI Conference on Artificial Intelligence (**AAAI**) 2023 - Oral +[ SemSeg ] [ CAC ] - [ [arXiv](https://arxiv.org/abs/2303.11633) ] [ [Bib](https://xywu.me/research/cac/bib.txt) ] [ [2D Part](https://github.com/tianzhuotao/CAC) ] → [here](#context-aware-classifier) + + +- **Point Transformer V2: Grouped Vector Attention and Partition-based Pooling** +*Xiaoyang Wu, Yixing Lao, Li Jiang, Xihui Liu, Hengshuang Zhao* +Conference on Neural Information Processing Systems (**NeurIPS**) 2022 +[ Backbone ] [ PTv2 ] - [ [arXiv](https://arxiv.org/abs/2210.05666) ] [ [Bib](https://xywu.me/research/ptv2/bib.txt) ] → [here](#point-transformers) + + +- **Point Transformer** +*Hengshuang Zhao, Li Jiang, Jiaya Jia, Philip Torr, Vladlen Koltun* +IEEE International Conference on Computer Vision (**ICCV**) 2021 - Oral +[ Backbone ] [ PTv1 ] - [ [arXiv](https://arxiv.org/abs/2012.09164) ] [ [Bib](https://hszhao.github.io/papers/iccv21_pointtransformer_bib.txt) ] → [here](#point-transformers) + +Additionally, **Pointcept** integrates the following excellent work (contain above): +Backbone: +[MinkUNet](https://github.com/NVIDIA/MinkowskiEngine) ([here](#sparseunet)), +[SpUNet](https://github.com/traveller59/spconv) ([here](#sparseunet)), +[SPVCNN](https://github.com/mit-han-lab/spvnas) ([here](#spvcnn)), +[OACNNs](https://arxiv.org/abs/2403.14418) ([here](#oa-cnns)), +[PTv1](https://arxiv.org/abs/2012.09164) ([here](#point-transformers)), +[PTv2](https://arxiv.org/abs/2210.05666) ([here](#point-transformers)), +[PTv3](https://arxiv.org/abs/2312.10035) ([here](#point-transformers)), +[StratifiedFormer](https://github.com/dvlab-research/Stratified-Transformer) ([here](#stratified-transformer)), +[OctFormer](https://github.com/octree-nn/octformer) ([here](#octformer)), +[Swin3D](https://github.com/microsoft/Swin3D) ([here](#swin3d)); +Semantic Segmentation: +[Mix3d](https://github.com/kumuji/mix3d) ([here](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-spunet-v1m1-0-base.py#L5)), +[CAC](https://arxiv.org/abs/2303.11633) ([here](#context-aware-classifier)); +Instance Segmentation: +[PointGroup](https://github.com/dvlab-research/PointGroup) ([here](#pointgroup)); +Pre-training: +[PointContrast](https://github.com/facebookresearch/PointContrast) ([here](#pointcontrast)), +[Contrastive Scene Contexts](https://github.com/facebookresearch/ContrastiveSceneContexts) ([here](#contrastive-scene-contexts)), +[Masked Scene Contrast](https://arxiv.org/abs/2303.14191) ([here](#masked-scene-contrast-msc)), +[Point Prompt Training](https://arxiv.org/abs/2308.09718) ([here](#point-prompt-training-ppt)), +[Sonata](https://arxiv.org/abs/2503.16429) ([here](#sonata)), +[Concerto]() ([here](#concerto)); +Datasets: +[ScanNet](http://www.scan-net.org/) ([here](#scannet-v2)), +[ScanNet200](http://www.scan-net.org/) ([here](#scannet-v2)), +[ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) ([here](#scannet)), +[S3DIS](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1) ([here](#s3dis)), +[ArkitScene](https://github.com/apple/ARKitScenes) ([here](#arkitscenes)), +[HM3D](https://github.com/facebookresearch/habitat-matterport3d-dataset/) ([here](#habitat---matterport-3d-hm3d)), +[Matterport3D](https://niessner.github.io/Matterport/) ([here](#matterport3d)), +[Structured3D](https://structured3d-dataset.org/) ([here](#structured3d)), +[SemanticKITTI](http://www.semantic-kitti.org/) ([here](#semantickitti)), +[nuScenes](https://www.nuscenes.org/nuscenes) ([here](#nuscenes)), +[ModelNet40](https://modelnet.cs.princeton.edu/) ([here](#modelnet)), +[Waymo](https://waymo.com/open/) ([here](#waymo)). + + +## Highlights +- *Apr 2025* ๐Ÿš€: We now support `wandb`, check the [Quick Start](#quick-start) training section for more information. (Thanks @Streakfull for his contribution!) +- *Mar 2025* ๐Ÿš€: **Sonata** is accepted by CVPR 2025 and selected as one of the **Highlight** presentations (3.0% submissions)! We release the code with Pointcept v1.6.0. We release the pre-training **[code](#sonata)** along with Pointcept v1.6.0 and provide an easy-to-use pre-trained model for inference, tuning, and visualization in our project **[repository](https://github.com/facebookresearch/sonata)** hosted by Meta. +- *May 2024*: In v1.5.2, we redesigned the default structure for each dataset for better performance. Please **re-preprocess** datasets or **download** our preprocessed datasets from **[here](https://huggingface.co/Pointcept)**. +- *Apr 2024*: **PTv3** is selected as one of the 90 **Oral** papers (3.3% accepted papers, 0.78% submissions) by CVPR'24! +- *Mar 2024*: We release code for **OA-CNNs**, accepted by CVPR'24. Issue related to **OA-CNNs** can @Pbihao. +- *Feb 2024*: **PTv3** and **PPT** are accepted by CVPR'24, another **two** papers by our Pointcept team have also been accepted by CVPR'24 ๐ŸŽ‰๐ŸŽ‰๐ŸŽ‰. We will make them publicly available soon! +- *Dec 2023*: **PTv3** is released on arXiv, and the code is available in Pointcept. PTv3 is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios. +- *Aug 2023*: **PPT** is released on arXiv. PPT presents a multi-dataset pre-training framework that achieves SOTA performance in both **indoor** and **outdoor** scenarios. It is compatible with various existing pre-training frameworks and backbones. A **pre-release** version of the code is accessible; for those interested, please feel free to contact me directly for access. +- *Mar 2023*: We released our codebase, **Pointcept**, a highly potent tool for point cloud representation learning and perception. We welcome new work to join the _Pointcept_ family and highly recommend reading [Quick Start](#quick-start) before starting your trail. +- *Feb 2023*: **MSC** and **CeCo** accepted by CVPR 2023. _MSC_ is a highly efficient and effective pretraining framework that facilitates cross-dataset large-scale pretraining, while _CeCo_ is a segmentation method specifically designed for long-tail datasets. Both approaches are compatible with all existing backbone models in our codebase, and we will soon make the code available for public use. +- *Jan 2023*: **CAC**, oral work of AAAI 2023, has expanded its 3D result with the incorporation of Pointcept. This addition will allow CAC to serve as a pluggable segmentor within our codebase. +- *Sep 2022*: **PTv2** accepted by NeurIPS 2022. It is a continuation of the Point Transformer. The proposed GVA theory can apply to most existing attention mechanisms, while Grid Pooling is also a practical addition to existing pooling methods. + +## Citation +If you find _Pointcept_ useful to your research, please cite our work as encouragement. (เฉญหŠ๊’ณโ€‹ห‹)เฉญโœง +``` +@misc{pointcept2023, + title={Pointcept: A Codebase for Point Cloud Perception Research}, + author={Pointcept Contributors}, + howpublished = {\url{https://github.com/Pointcept/Pointcept}}, + year={2023} +} +``` + +## Overview + +- [Installation](#installation) +- [Data Preparation](#data-preparation) +- [Quick Start](#quick-start) +- [Model Zoo](#model-zoo) +- [Acknowledgement](#acknowledgement) + +## Installation + +### Requirements +- Ubuntu: 18.04 and above. +- CUDA: 11.3 and above. +- PyTorch: 1.10.0 and above. + +### Conda Environment +- **Method 1**: Utilize conda `environment.yml` to create a new environment with one line code: + ```bash + # Create and activate conda environment named as 'pointcept-torch2.5.0-cu12.4' + # cuda: 12.4, pytorch: 2.5.0 + + # run `unset CUDA_PATH` if you have installed cuda in your local environment + conda env create -f environment.yml --verbose + conda activate pointcept-torch2.5.0-cu12.4 + ``` + +- **Method 2**: Use our pre-built Docker image and refer to the supported tags [here](https://hub.docker.com/repository/docker/pointcept/pointcept/general). Quickly verify the Docker image on your local machine with the following command: + ```bash + docker run --gpus all -it --rm pointcept/pointcept:v1.6.0-pytorch2.5.0-cuda12.4-cudnn9-devel bash + git clone https://github.com/facebookresearch/sonata + cd sonata + export PYTHONPATH=./ && python demo/0_pca.py + # Ignore the GUI error, we cannot expect a container to have its GUI, right? + ``` + +- **Method 3**: Manually create a conda environment: + ```bash + conda create -n pointcept python=3.10 -y + conda activate pointcept + + # (Optional) If no CUDA installed + conda install nvidia/label/cuda-12.4.1::cuda conda-forge::cudnn conda-forge::gcc=13.2 conda-forge::gxx=13.2 -y + + conda install ninja -y + # Choose version you want here: https://pytorch.org/get-started/previous-versions/ + conda install pytorch==2.5.0 torchvision==0.13.1 torchaudio==0.20.0 pytorch-cuda=12.4 -c pytorch -y + conda install h5py pyyaml -c anaconda -y + conda install sharedarray tensorboard tensorboardx wandb yapf addict einops scipy plyfile termcolor timm -c conda-forge -y + conda install pytorch-cluster pytorch-scatter pytorch-sparse -c pyg -y + pip install torch-geometric + + # spconv (SparseUNet) + # refer https://github.com/traveller59/spconv + pip install spconv-cu124 + + # PPT (clip) + pip install ftfy regex tqdm + pip install git+https://github.com/openai/CLIP.git + + # PTv1 & PTv2 or precise eval + cd libs/pointops + # usual + python setup.py install + # docker & multi GPU arch + TORCH_CUDA_ARCH_LIST="ARCH LIST" python setup.py install + # e.g. 7.5: RTX 3000; 8.0: a100 More available in: https://developer.nvidia.com/cuda-gpus + TORCH_CUDA_ARCH_LIST="7.5 8.0" python setup.py install + cd ../.. + + # Open3D (visualization, optional) + pip install open3d + ``` + +## Data Preparation + +### ScanNet v2 + +The preprocessing supports semantic and instance segmentation for both `ScanNet20`, `ScanNet200`, and `ScanNet Data Efficient`. +- Download the [ScanNet](http://www.scan-net.org/) v2 dataset. +- Run preprocessing code for raw ScanNet as follows: + + ```bash + # RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset. + # PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset (output dir). + python pointcept/datasets/preprocessing/scannet/preprocess_scannet.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_DIR} + ``` +- (Optional) Download ScanNet Data Efficient files: + ```bash + # download-scannet.py is the official download script + # or follow instructions here: https://kaldir.vc.in.tum.de/scannet_benchmark/data_efficient/documentation#download + python download-scannet.py --data_efficient -o ${RAW_SCANNET_DIR} + # unzip downloads + cd ${RAW_SCANNET_DIR}/tasks + unzip limited-annotation-points.zip + unzip limited-reconstruction-scenes.zip + # copy files to processed dataset folder + mkdir ${PROCESSED_SCANNET_DIR}/tasks + cp -r ${RAW_SCANNET_DIR}/tasks/points ${PROCESSED_SCANNET_DIR}/tasks + cp -r ${RAW_SCANNET_DIR}/tasks/scenes ${PROCESSED_SCANNET_DIR}/tasks + ``` +- (Alternative) Our preprocess data can be directly downloaded [[here](https://huggingface.co/datasets/Pointcept/scannet-compressed)], please agree the official license before download it. + +- Link processed dataset to codebase: + ```bash + # PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset. + mkdir data + ln -s ${PROCESSED_SCANNET_DIR} ${CODEBASE_DIR}/data/scannet + ``` + +### ScanNet++ +- Download the [ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) dataset. +- Run preprocessing code for raw ScanNet++ as follows: + ```bash + # RAW_SCANNETPP_DIR: the directory of downloaded ScanNet++ raw dataset. + # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir). + # NUM_WORKERS: the number of workers for parallel preprocessing. + python pointcept/datasets/preprocessing/scannetpp/preprocess_scannetpp.py --dataset_root ${RAW_SCANNETPP_DIR} --output_root ${PROCESSED_SCANNETPP_DIR} --num_workers ${NUM_WORKERS} + ``` +- Sampling and chunking large point cloud data in train/val split as follows (only used for training): + ```bash + # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir). + # NUM_WORKERS: the number of workers for parallel preprocessing. + python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split train --num_workers ${NUM_WORKERS} + python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split val --num_workers ${NUM_WORKERS} + ``` +- Link processed dataset to codebase: + ```bash + # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet dataset. + mkdir data + ln -s ${PROCESSED_SCANNETPP_DIR} ${CODEBASE_DIR}/data/scannetpp + ``` + +### S3DIS + +- Download S3DIS data by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1). Download the `Stanford3dDataset_v1.2.zip` file and unzip it. +- Fix error in `Area_5/office_19/Annotations/ceiling` Line 323474 (103.0๏ฟฝ0000 => 103.000000). +- (Optional) Download Full 2D-3D S3DIS dataset (no XYZ) from [here](https://github.com/alexsax/2D-3D-Semantics) for parsing normal. +- Run preprocessing code for S3DIS as follows: + + ```bash + # S3DIS_DIR: the directory of downloaded Stanford3dDataset_v1.2 dataset. + # RAW_S3DIS_DIR: the directory of Stanford2d3dDataset_noXYZ dataset. (optional, for parsing normal) + # PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset (output dir). + + # S3DIS without aligned angle + python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} + # S3DIS with aligned angle + python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --align_angle + # S3DIS with normal vector (recommended, normal is helpful) + python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --parse_normal + python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --align_angle --parse_normal + ``` + +- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/s3dis-compressed +)] (with normal vector and aligned angle), please agree with the official license before downloading it. + +- Link processed dataset to codebase. + ```bash + # PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset. + mkdir data + ln -s ${PROCESSED_S3DIS_DIR} ${CODEBASE_DIR}/data/s3dis + ``` + + +### ArkitScenes + +- Download ArkitScenes 3DOD split with the following commands: + ```bash + # RAW_AS_DIR: the directory of downloaded Raw ArkitScenes dataset. + git clone https://github.com/apple/ARKitScenes.git + cd ARKitScenes + python download_data.py 3dod --download_dir $RAW_AS_DIR --video_id_csv threedod/3dod_train_val_splits.csv + ``` +- Run preprocessing code for ArkitScenes as follows: + ```bash + # RAW_AS_DIR: the directory of downloaded ArkitScenes dataset. + # PROCESSED_AS_DIR: the directory of processed ArkitScenes dataset (output dir). + # NUM_WORKERS: Number for workers for preprocessing, default same as cpu count (might OOM). + cd $POINTCEPT_DIR + export PYTHONPATH=./ + python pointcept/datasets/preprocessing/arkitscenes/preprocess_arkitscenes_mesh.py --dataset_root $RAW_AS_DIR --output_root $PROCESSED_AS_DIR --num_workers $NUM_WORKERS + ``` + +- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/arkitscenes-compressed +)] please read and agree the official [license](https://github.com/apple/ARKitScenes?tab=License-1-ov-file#readme) before download it. (Unzip with the following command: + `find ./ -name '*.tar.gz' | xargs -n 1 -P 8 -I {} sh -c 'tar -xzvf {}'`) + +- Link processed dataset to codebase. + ```bash + # PROCESSED_AR_DIR: the directory of processed ArkitScenes dataset (output dir). + mkdir data + ln -s ${PROCESSED_AR_DIR} ${CODEBASE_DIR}/data/arkitscenes + ``` + +### Habitat - Matterport 3D (HM3D) + +- Download HM3D `hm3d-train-glb-v0.2.tar` and `hm3d-val-glb-v0.2.tar` with instuction [here](https://github.com/facebookresearch/habitat-sim/blob/main/DATASETS.md#habitat-matterport-3d-research-dataset-hm3d) and unzip them. +- Run preprocessing code for HM3D as follows: + ```bash + # RAW_HM_DIR: the directory of downloaded HM3D dataset. + # PROCESSED_HM_DIR: the directory of processed HM3D dataset (output dir). + # NUM_WORKERS: Number for workers for preprocessing, default same as cpu count (might OOM). + export PYTHONPATH=./ + python pointcept/datasets/preprocessing/hm3d/preprocess_hm3d.py --dataset_root $RAW_HM_DIR --output_root $PROCESSED_HM_DIR --density 0.02 --num_workers $NUM_WORKERS + ``` + +- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/hm3d-compressed +)] please read and agree the official [license](https://matterport.com/legal/matterport-end-user-license-agreement-academic-use-model-data) before download it. (Unzip with the following command: + `find ./ -name '*.tar.gz' | xargs -n 1 -P 4 -I {} sh -c 'tar -xzvf {}'`) + +- Link processed dataset to codebase. + ```bash + # PROCESSED_HM_DIR: the directory of processed HM3D dataset (output dir). + mkdir data + ln -s ${PROCESSED_HM_DIR} ${CODEBASE_DIR}/data/hm3d + + +### Matterport3D +- Follow [this page](https://niessner.github.io/Matterport/#download) to request access to the dataset. +- Download the "region_segmentation" type, which represents the division of a scene into individual rooms. + ```bash + # download-mp.py is the official download script + # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset. + python download-mp.py -o {MATTERPORT3D_DIR} --type region_segmentations + ``` +- Unzip the region_segmentations data + ```bash + # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset. + python pointcept/datasets/preprocessing/matterport3d/unzip_matterport3d_region_segmentation.py --dataset_root {MATTERPORT3D_DIR} + ``` +- Run preprocessing code for Matterport3D as follows: + ```bash + # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset. + # PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir). + # NUM_WORKERS: the number of workers for this preprocessing. + python pointcept/datasets/preprocessing/matterport3d/preprocess_matterport3d_mesh.py --dataset_root ${MATTERPORT3D_DIR} --output_root ${PROCESSED_MATTERPORT3D_DIR} --num_workers ${NUM_WORKERS} + ``` +- Link processed dataset to codebase. + ```bash + # PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir). + mkdir data + ln -s ${PROCESSED_MATTERPORT3D_DIR} ${CODEBASE_DIR}/data/matterport3d + ``` + +Following the instruction of [OpenRooms](https://github.com/ViLab-UCSD/OpenRooms), we remapped Matterport3D's categories to ScanNet 20 semantic categories with the addition of a ceiling category. +* (Alternative) Our preprocess data can also be downloaded [here](https://huggingface.co/datasets/Pointcept/matterport3d-compressed), please agree the official license before download it. + + +### Structured3D + +- Download Structured3D panorama related and perspective (full) related zip files by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLSc0qtvh4vHSoZaW6UvlXYy79MbcGdZfICjh4_t4bYofQIVIdw/viewform?pli=1) (no need to unzip them). +- Organize all downloaded zip file in one folder (`${STRUCT3D_DIR}`). +- Run preprocessing code for Structured3D as follows: + ```bash + # STRUCT3D_DIR: the directory of downloaded Structured3D dataset. + # PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir). + # NUM_WORKERS: Number for workers for preprocessing, default same as cpu count (might OOM). + export PYTHONPATH=./ + python pointcept/datasets/preprocessing/structured3d/preprocess_structured3d.py --dataset_root ${STRUCT3D_DIR} --output_root ${PROCESSED_STRUCT3D_DIR} --num_workers ${NUM_WORKERS} --grid_size 0.01 --fuse_prsp --fuse_pano + ``` +Following the instruction of [Swin3D](https://arxiv.org/abs/2304.06906), we keep 25 categories with frequencies of more than 0.001, out of the original 40 categories. + +[//]: # (- (Alternative) Our preprocess data can also be downloaded [[here]()], please agree the official license before download it.) + +- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/structured3d-compressed +)] (with perspective views and panorama view, 471.7G after unzipping), please agree the official license before download it. (Unzip with the following command: + `find ./ -name '*.tar.gz' | xargs -n 1 -P 15 -I {} sh -c 'tar -xzvf {}'`) + +- Link processed dataset to codebase. + ```bash + # PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir). + mkdir data + ln -s ${PROCESSED_STRUCT3D_DIR} ${CODEBASE_DIR}/data/structured3d + ``` + +### SemanticKITTI +- Download [SemanticKITTI](http://www.semantic-kitti.org/dataset.html#download) dataset. +- Link dataset to codebase. + ```bash + # SEMANTIC_KITTI_DIR: the directory of SemanticKITTI dataset. + # |- SEMANTIC_KITTI_DIR + # |- dataset + # |- sequences + # |- 00 + # |- 01 + # |- ... + + mkdir -p data + ln -s ${SEMANTIC_KITTI_DIR} ${CODEBASE_DIR}/data/semantic_kitti + ``` + +### nuScenes +- Download the official [NuScene](https://www.nuscenes.org/nuscenes#download) dataset (with Lidar Segmentation) and organize the downloaded files as follows: + ```bash + NUSCENES_DIR + โ”‚โ”€โ”€ samples + โ”‚โ”€โ”€ sweeps + โ”‚โ”€โ”€ lidarseg + ... + โ”‚โ”€โ”€ v1.0-trainval + โ”‚โ”€โ”€ v1.0-test + ``` +- Run information preprocessing code (modified from OpenPCDet) for nuScenes as follows: + ```bash + # NUSCENES_DIR: the directory of downloaded nuScenes dataset. + # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir). + # MAX_SWEEPS: Max number of sweeps. Default: 10. + pip install nuscenes-devkit pyquaternion + python pointcept/datasets/preprocessing/nuscenes/preprocess_nuscenes_info.py --dataset_root ${NUSCENES_DIR} --output_root ${PROCESSED_NUSCENES_DIR} --max_sweeps ${MAX_SWEEPS} --with_camera + ``` +- (Alternative) Our preprocess nuScenes information data can also be downloaded [[here]( +https://huggingface.co/datasets/Pointcept/nuscenes-compressed)] (only processed information, still need to download raw dataset and link to the folder), please agree the official license before download it. + +- Link raw dataset to processed NuScene dataset folder: + ```bash + # NUSCENES_DIR: the directory of downloaded nuScenes dataset. + # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir). + ln -s ${NUSCENES_DIR} {PROCESSED_NUSCENES_DIR}/raw + ``` + then the processed nuscenes folder is organized as follows: + ```bash + nuscene + |โ”€โ”€ raw + โ”‚โ”€โ”€ samples + โ”‚โ”€โ”€ sweeps + โ”‚โ”€โ”€ lidarseg + ... + โ”‚โ”€โ”€ v1.0-trainval + โ”‚โ”€โ”€ v1.0-test + |โ”€โ”€ info + ``` + +- Link processed dataset to codebase. + ```bash + # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir). + mkdir data + ln -s ${PROCESSED_NUSCENES_DIR} ${CODEBASE_DIR}/data/nuscenes + ``` + +### Waymo +- Download the official [Waymo](https://waymo.com/open/download/) dataset (v1.4.3) and organize the downloaded files as follows: + ```bash + WAYMO_RAW_DIR + โ”‚โ”€โ”€ training + โ”‚โ”€โ”€ validation + โ”‚โ”€โ”€ testing + ``` +- Install the following dependence: + ```bash + # If shows "No matching distribution found", download whl directly from Pypi and install the package. + conda create -n waymo python=3.10 -y + conda activate waymo + pip install waymo-open-dataset-tf-2-12-0 + ``` +- Run the preprocessing code as follows: + ```bash + # WAYMO_DIR: the directory of the downloaded Waymo dataset. + # PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir). + # NUM_WORKERS: num workers for preprocessing + python pointcept/datasets/preprocessing/waymo/preprocess_waymo.py --dataset_root ${WAYMO_DIR} --output_root ${PROCESSED_WAYMO_DIR} --splits training validation --num_workers ${NUM_WORKERS} + ``` + +- Link processed dataset to the codebase. + ```bash + # PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir). + mkdir data + ln -s ${PROCESSED_WAYMO_DIR} ${CODEBASE_DIR}/data/waymo + ``` + +### ModelNet +- Download [modelnet40_normal_resampled.zip](https://huggingface.co/datasets/Pointcept/modelnet40_normal_resampled-compressed) and unzip +- Link dataset to the codebase. + ```bash + mkdir -p data + ln -s ${MODELNET_DIR} ${CODEBASE_DIR}/data/modelnet40_normal_resampled + ``` + +## Quick Start + +### Training +**Train from scratch.** The training processing is based on configs in `configs` folder. +The training script will generate an experiment folder in `exp` folder and backup essential code in the experiment folder. +Training config, log, tensorboard, and checkpoints will also be saved into the experiment folder during the training process. +```bash +export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} +# Script (Recommended) +sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME} +# Direct +export PYTHONPATH=./ +python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} +``` + +For example: +```bash +# By script (Recommended) +# -p is default set as python and can be ignored +sh scripts/train.sh -p python -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +# Direct +export PYTHONPATH=./ +python tools/train.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base +``` +**Resume training from checkpoint.** If the training process is interrupted by accident, the following script can resume training from a given checkpoint. +```bash +export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} +# Script (Recommended) +# simply add "-r true" +sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME} -r true +# Direct +export PYTHONPATH=./ +python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} resume=True weight=${CHECKPOINT_PATH} +``` +**Weights and Biases.** +Pointcept by default enables both `tensorboard` and `wandb`. There are some usage notes related to `wandb`: +1. Disable by set `enable_wandb=False`; +2. Sync with `wandb` remote server by `wandb login` in the terminal or set `wandb_key=YOUR_WANDB_KEY` in config. +3. The project name is "Pointcept" by default, custom it to your research project name by setting `wandb_project=YOUR_PROJECT_NAME` (e.g. Sonata-Dev, PointTransformerV3-Dev) + +### Testing +During training, model evaluation is performed on point clouds after grid sampling (voxelization), providing an initial assessment of model performance. ~~However, to obtain precise evaluation results, testing is **essential**~~ *(now we automatically run the testing process after training with the `PreciseEvaluation` hook)*. The testing process involves subsampling a dense point cloud into a sequence of voxelized point clouds, ensuring comprehensive coverage of all points. These sub-results are then predicted and collected to form a complete prediction of the entire point cloud. This approach yields higher evaluation results compared to simply mapping/interpolating the prediction. In addition, our testing code supports TTA (test time augmentation) testing, which further enhances the stability of evaluation performance. + +```bash +# By script (Based on experiment folder created by training script) +sh scripts/test.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -n ${EXP_NAME} -w ${CHECKPOINT_NAME} +# Direct +export PYTHONPATH=./ +python tools/test.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} weight=${CHECKPOINT_PATH} +``` +For example: +```bash +# By script (Based on experiment folder created by training script) +# -p is default set as python and can be ignored +# -w is default set as model_best and can be ignored +sh scripts/test.sh -p python -d scannet -n semseg-pt-v2m2-0-base -w model_best +# Direct +export PYTHONPATH=./ +python tools/test.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base weight=exp/scannet/semseg-pt-v2m2-0-base/model/model_best.pth +``` + +The TTA can be disabled by replace `data.test.test_cfg.aug_transform = [...]` with: + +```python +data = dict( + train = dict(...), + val = dict(...), + test = dict( + ..., + test_cfg = dict( + ..., + aug_transform = [ + [dict(type="RandomRotateTargetAngle", angle=[0], axis="z", center=[0, 0, 0], p=1)] + ] + ) + ) +) +``` + +### Offset +`Offset` is the separator of point clouds in batch data, and it is similar to the concept of `Batch` in PyG. +A visual illustration of batch and offset is as follows: +

+ + + + + + pointcept + +
+ +

+ +## Model Zoo +### 1. Backbones and Semantic Segmentation +#### SparseUNet + +_Pointcept_ provides `SparseUNet` implemented by `SpConv` and `MinkowskiEngine`. The SpConv version is recommended since SpConv is easy to install and faster than MinkowskiEngine. Meanwhile, SpConv is also widely applied in outdoor perception. + +- **SpConv (recommend)** + +The SpConv version `SparseUNet` in the codebase was fully rewrite from `MinkowskiEngine` version, example running script is as follows: + +```bash +# ScanNet val +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# S3DIS (with normal) +sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-cn-base -n semseg-spunet-v1m1-0-cn-base +# SemanticKITTI +sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# nuScenes +sh scripts/train.sh -g 4 -d nuscenes -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# ModelNet40 +sh scripts/train.sh -g 2 -d modelnet40 -c cls-spunet-v1m1-0-base -n cls-spunet-v1m1-0-base + +# ScanNet Data Efficient +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la20 -n semseg-spunet-v1m1-2-efficient-la20 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la50 -n semseg-spunet-v1m1-2-efficient-la50 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la100 -n semseg-spunet-v1m1-2-efficient-la100 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la200 -n semseg-spunet-v1m1-2-efficient-la200 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr1 -n semseg-spunet-v1m1-2-efficient-lr1 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr5 -n semseg-spunet-v1m1-2-efficient-lr5 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr10 -n semseg-spunet-v1m1-2-efficient-lr10 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr20 -n semseg-spunet-v1m1-2-efficient-lr20 + +# Profile model run time +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-enable-profiler -n semseg-spunet-v1m1-0-enable-profiler +``` + +- **MinkowskiEngine** + +The MinkowskiEngine version `SparseUNet` in the codebase was modified from the original MinkowskiEngine repo, and example running scripts are as follows: +1. Install MinkowskiEngine, refer https://github.com/NVIDIA/MinkowskiEngine +2. Training with the following example scripts: +```bash +# Uncomment "# from .sparse_unet import *" in "pointcept/models/__init__.py" +# Uncomment "# from .mink_unet import *" in "pointcept/models/sparse_unet/__init__.py" +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base +# SemanticKITTI +sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base +``` + +#### OA-CNNs +Introducing Omni-Adaptive 3D CNNs (**OA-CNNs**), a family of networks that integrates a lightweight module to greatly enhance the adaptivity of sparse CNNs at minimal computational cost. Without any self-attention modules, **OA-CNNs** favorably surpass point transformers in terms of accuracy in both indoor and outdoor scenes, with much less latency and memory cost. Issue related to **OA-CNNs** can @Pbihao. +```bash +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-oacnns-v1m1-0-base -n semseg-oacnns-v1m1-0-base +``` + +#### Point Transformers +- **PTv3** + +[PTv3](https://arxiv.org/abs/2312.10035) is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios. The full PTv3 relies on FlashAttention, while FlashAttention relies on CUDA 11.6 and above, make sure your local Pointcept environment satisfies the requirements. + +If you can not upgrade your local environment to satisfy the requirements (CUDA >= 11.6), then you can disable FlashAttention by setting the model parameter `enable_flash` to `false` and reducing the `enc_patch_size` and `dec_patch_size` to a level (e.g. 128). + +FlashAttention force disables RPE and forces the accuracy reduced to fp16. If you require these features, please disable `enable_flash` and adjust `enable_rpe`, `upcast_attention` and`upcast_softmax`. + +Detailed instructions and experiment records (containing weights) are available on the [project repository](https://github.com/Pointcept/PointTransformerV3). Example running scripts are as follows: +```bash +# Scratched ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base +# PPT joint training (ScanNet + Structured3D) and evaluate in ScanNet +sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme + +# Scratched ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base +# Fine-tuning from PPT joint training (ScanNet + Structured3D) with ScanNet200 +# PTV3_PPT_WEIGHT_PATH: Path to model weight trained by PPT multi-dataset joint training +# e.g. exp/scannet/semseg-pt-v3m1-1-ppt-extreme/model/model_best.pth +sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-1-ppt-ft -n semseg-pt-v3m1-1-ppt-ft -w ${PTV3_PPT_WEIGHT_PATH} + +# Scratched ScanNet++ +sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base +# Scratched ScanNet++ test +sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-1-submit -n semseg-pt-v3m1-1-submit + + +# Scratched S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base +# an example for disbale flash_attention and enable rpe. +sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-1-rpe -n semseg-pt-v3m1-0-rpe +# PPT joint training (ScanNet + S3DIS + Structured3D) and evaluate in ScanNet +sh scripts/train.sh -g 8 -d s3dis -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme +# S3DIS 6-fold cross validation +# 1. The default configs are evaluated on Area_5, modify the "data.train.split", "data.val.split", and "data.test.split" to make the config evaluated on Area_1 ~ Area_6 respectively. +# 2. Train and evaluate the model on each split of areas and gather result files located in "exp/s3dis/EXP_NAME/result/Area_x.pth" in one single folder, noted as RECORD_FOLDER. +# 3. Run the following script to get S3DIS 6-fold cross validation performance: +export PYTHONPATH=./ +python tools/test_s3dis_6fold.py --record_root ${RECORD_FOLDER} + +# Scratched nuScenes +sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base +# Scratched Waymo +sh scripts/train.sh -g 4 -d waymo -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base + +# More configs and exp records for PTv3 will be available soon. +``` + +Indoor semantic segmentation +| Model | Benchmark | Additional Data | Num GPUs | Val mIoU | Config | Tensorboard | Exp Record | +| :---: | :---: |:---------------:| :---: | :---: | :---: | :---: | :---: | +| PTv3 | ScanNet | ✗ | 4 | 77.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-0-base) | +| PTv3 + PPT | ScanNet | ✓ | 8 | 78.5% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-1-ppt-extreme) | +| PTv3 | ScanNet200 | ✗ | 4 | 35.3% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet200/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) |[link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet200-semseg-pt-v3m1-0-base)| +| PTv3 | S3DIS (Area5) | ✗ | 4 | 73.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-0-rpe.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-0-rpe) | +| PTv3 + PPT | S3DIS (Area5) | ✓ | 8 | 75.4% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-1-ppt-extreme) | +_**\*Released model weights are trained for v1.5.1, weights for v1.5.2 and later is still ongoing.**_ + +- **PTv2 mode2** + +The original PTv2 was trained on 4 * RTX a6000 (48G memory). Even enabling AMP, the memory cost of the original PTv2 is slightly larger than 24G. Considering GPUs with 24G memory are much more accessible, I tuned the PTv2 on the latest Pointcept and made it runnable on 4 * RTX 3090 machines. + +`PTv2 Mode2` enables AMP and disables _Position Encoding Multiplier_ & _Grouped Linear_. During our further research, we found that precise coordinates are not necessary for point cloud understanding (Replacing precise coordinates with grid coordinates doesn't influence the performance. Also, SparseUNet is an example). As for Grouped Linear, my implementation of Grouped Linear seems to cost more memory than the Linear layer provided by PyTorch. Benefiting from the codebase and better parameter tuning, we also relieve the overfitting problem. The reproducing performance is even better than the results reported in our paper. + +Example running scripts are as follows: + +```bash +# ptv2m2: PTv2 mode2, disable PEM & Grouped Linear, GPU memory cost < 24G (recommend) +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-3-lovasz -n semseg-pt-v2m2-3-lovasz + +# ScanNet test +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +# ScanNet++ +sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +# ScanNet++ test +sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +# SemanticKITTI +sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +# nuScenes +sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +``` + +- **PTv2 mode1** + +`PTv2 mode1` is the original PTv2 we reported in our paper, example running scripts are as follows: + +```bash +# ptv2m1: PTv2 mode1, Original PTv2, GPU memory cost > 24G +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base +``` + +- **PTv1** + +The original PTv1 is also available in our Pointcept codebase. I haven't run PTv1 for a long time, but I have ensured that the example running script works well. + +```bash +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base +``` + + +#### Stratified Transformer +1. Additional requirements: +```bash +pip install torch-points3d +# Fix dependence, caused by installing torch-points3d +pip uninstall SharedArray +pip install SharedArray==3.2.1 + +cd libs/pointops2 +python setup.py install +cd ../.. +``` +2. Uncomment `# from .stratified_transformer import *` in `pointcept/models/__init__.py`. +3. Refer [Optional Installation](installation) to install dependence. +4. Training with the following example scripts: +```bash +# stv1m1: Stratified Transformer mode1, Modified from the original Stratified Transformer code. +# PTv2m2: Stratified Transformer mode2, My rewrite version (recommend). + +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined +sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m1-0-origin -n semseg-st-v1m1-0-origin +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined +``` + +#### SPVCNN +`SPVCNN` is a baseline model of [SPVNAS](https://github.com/mit-han-lab/spvnas), it is also a practical baseline for outdoor datasets. +1. Install torchsparse: +```bash +# refer https://github.com/mit-han-lab/torchsparse +# install method without sudo apt install +conda install google-sparsehash -c bioconda +export C_INCLUDE_PATH=${CONDA_PREFIX}/include:$C_INCLUDE_PATH +export CPLUS_INCLUDE_PATH=${CONDA_PREFIX}/include:CPLUS_INCLUDE_PATH +pip install --upgrade git+https://github.com/mit-han-lab/torchsparse.git +``` +2. Training with the following example scripts: +```bash +# SemanticKITTI +sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-spvcnn-v1m1-0-base -n semseg-spvcnn-v1m1-0-base +``` + +#### OctFormer +OctFormer from _OctFormer: Octree-based Transformers for 3D Point Clouds_. +1. Additional requirements: +```bash +cd libs +git clone https://github.com/octree-nn/dwconv.git +pip install ./dwconv +pip install ocnn +``` +2. Uncomment `# from .octformer import *` in `pointcept/models/__init__.py`. +2. Training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-octformer-v1m1-0-base -n semseg-octformer-v1m1-0-base +``` + +#### Swin3D +Swin3D from _Swin3D: A Pretrained Transformer Backbone for 3D Indoor Scene Understanding_. +1. Additional requirements: +```bash +# 1. Install MinkEngine v0.5.4, follow readme in https://github.com/NVIDIA/MinkowskiEngine; +# 2. Install Swin3D, mainly for cuda operation: +cd libs +git clone https://github.com/microsoft/Swin3D.git +cd Swin3D +pip install ./ +``` +2. Uncomment `# from .swin3d import *` in `pointcept/models/__init__.py`. +3. Pre-Training with the following example scripts (Structured3D preprocessing refer [here](#structured3d)): +```bash +# Structured3D + Swin-S +sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small +# Structured3D + Swin-L +sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large + +# Addition +# Structured3D + SpUNet +sh scripts/train.sh -g 4 -d structured3d -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# Structured3D + PTv2 +sh scripts/train.sh -g 4 -d structured3d -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +``` +4. Fine-tuning with the following example scripts: +```bash +# ScanNet + Swin-S +sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small +# ScanNet + Swin-L +sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large + +# S3DIS + Swin-S (here we provide config support S3DIS normal vector) +sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small +# S3DIS + Swin-L (here we provide config support S3DIS normal vector) +sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large +``` + +#### Context-Aware Classifier +`Context-Aware Classifier` is a segmentor that can further boost the performance of each backbone, as a replacement for `Default Segmentor`. Training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base +sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz +sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz + +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base +sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz +sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz +``` + + +### 2. Instance Segmentation +#### PointGroup +[PointGroup](https://github.com/dvlab-research/PointGroup) is a baseline framework for point cloud instance segmentation. +1. Additional requirements: +```bash +conda install -c bioconda google-sparsehash +cd libs/pointgroup_ops +python setup.py install --include_dirs=${CONDA_PREFIX}/include +cd ../.. +``` +2. Uncomment `# from .point_group import *` in `pointcept/models/__init__.py`. +3. Training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base +# S3DIS +sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base +``` + +### 3. Pre-training +#### Concerto +Follow the instruction [here](https://github.com/Pointcept/Pointcept/tree/main/pointcept/models/concerto). + +#### Sonata +Follow the instruction [here](https://github.com/Pointcept/Pointcept/tree/main/pointcept/models/sonata). + +#### Masked Scene Contrast (MSC) +1. Pre-training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-0-spunet-base -n pretrain-msc-v1m1-0-spunet-base +``` + +2. Fine-tuning with the following example scripts: +enable PointGroup ([here](#pointgroup)) before fine-tuning on instance segmentation task. +```bash +# ScanNet20 Semantic Segmentation +sh scripts/train.sh -g 8 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c semseg-spunet-v1m1-4-ft -n semseg-msc-v1m1-0f-spunet-base +# ScanNet20 Instance Segmentation (enable PointGroup before running the script) +sh scripts/train.sh -g 4 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-msc-v1m1-0f-pointgroup-spunet-base +``` +3. Example log and weight: [[Pretrain](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EYvNV4XUJ_5Mlk-g15RelN4BW_P8lVBfC_zhjC_BlBDARg?e=UoGFWH)] [[Semseg](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EQkDiv5xkOFKgCpGiGtAlLwBon7i8W6my3TIbGVxuiTttQ?e=tQFnbr)] + +#### Point Prompt Training (PPT) +PPT presents a multi-dataset pre-training framework, and it is compatible with various existing pre-training frameworks and backbones. +1. PPT supervised joint training with the following example scripts: +```bash +# ScanNet + Structured3d, validate on ScanNet (S3DIS might cause long data time, w/o S3DIS for a quick validation) >= 3090 * 8 +sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-0-sc-st-spunet -n semseg-ppt-v1m1-0-sc-st-spunet +sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-1-sc-st-spunet-submit -n semseg-ppt-v1m1-1-sc-st-spunet-submit +# ScanNet + S3DIS + Structured3d, validate on S3DIS (>= a100 * 8) +sh scripts/train.sh -g 8 -d s3dis -c semseg-ppt-v1m1-0-s3-sc-st-spunet -n semseg-ppt-v1m1-0-s3-sc-st-spunet +# SemanticKITTI + nuScenes + Waymo, validate on SemanticKITTI (bs12 >= 3090 * 4 >= 3090 * 8, v1m1-0 is still on tuning) +sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet +sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-0-sk-nu-wa-spunet -n semseg-ppt-v1m2-0-sk-nu-wa-spunet +sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit -n semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit +# SemanticKITTI + nuScenes + Waymo, validate on nuScenes (bs12 >= 3090 * 4; bs24 >= 3090 * 8, v1m1-0 is still on tuning)) +sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet +sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-0-nu-sk-wa-spunet -n semseg-ppt-v1m2-0-nu-sk-wa-spunet +sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit -n semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit +``` + +#### PointContrast +1. Preprocess and link ScanNet-Pair dataset (pair-wise matching with ScanNet raw RGB-D frame, ~1.5T): +```bash +# RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset. +# PROCESSED_SCANNET_PAIR_DIR: the directory of processed ScanNet pair dataset (output dir). +python pointcept/datasets/preprocessing/scannet/scannet_pair/preprocess.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_PAIR_DIR} +ln -s ${PROCESSED_SCANNET_PAIR_DIR} ${CODEBASE_DIR}/data/scannet +``` +2. Pre-training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-1-spunet-pointcontrast -n pretrain-msc-v1m1-1-spunet-pointcontrast +``` +3. Fine-tuning refer [MSC](#masked-scene-contrast-msc). + +#### Contrastive Scene Contexts +1. Preprocess and link ScanNet-Pair dataset (refer [PointContrast](#pointcontrast)): +2. Pre-training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m2-0-spunet-csc -n pretrain-msc-v1m2-0-spunet-csc +``` +3. Fine-tuning refer [MSC](#masked-scene-contrast-msc). + +## Acknowledgement +_Pointcept_ is designed by [Xiaoyang](https://xywu.me/), named by [Yixing](https://github.com/yxlao) and the logo is created by [Yuechen](https://julianjuaner.github.io/). It is derived from [Hengshuang](https://hszhao.github.io/)'s [Semseg](https://github.com/hszhao/semseg) and inspirited by several repos, e.g., [MinkowskiEngine](https://github.com/NVIDIA/MinkowskiEngine), [pointnet2](https://github.com/charlesq34/pointnet2), [mmcv](https://github.com/open-mmlab/mmcv/tree/master/mmcv), and [Detectron2](https://github.com/facebookresearch/detectron2). \ No newline at end of file diff --git a/point_transformer_v3/external/pointcept_minimal/configs/_base_/dataset/scannet.py b/point_transformer_v3/external/pointcept_minimal/configs/_base_/dataset/scannet.py new file mode 100644 index 0000000..eeb9488 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/_base_/dataset/scannet.py @@ -0,0 +1,26 @@ +class_names = [ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", +] + +data = dict( + names=class_names, +) diff --git a/point_transformer_v3/external/pointcept_minimal/configs/_base_/default_runtime.py b/point_transformer_v3/external/pointcept_minimal/configs/_base_/default_runtime.py new file mode 100644 index 0000000..32786d1 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/_base_/default_runtime.py @@ -0,0 +1,48 @@ +weight = None # path to model weight +resume = False # whether to resume training process +evaluate = True # evaluate after each epoch training process +test_only = False # test process + +seed = None # train process will init a random seed and record +save_path = "exp/default" +num_worker = 16 # total worker in all gpu +batch_size = 16 # total batch size in all gpu +gradient_accumulation_steps = 1 # total steps to accumulate gradients for +batch_size_val = None # auto adapt to bs 1 for each gpu +batch_size_test = None # auto adapt to bs 1 for each gpu +epoch = 100 # total epoch, data loop = epoch // eval_epoch +eval_epoch = 100 # sche total eval & checkpoint epoch +clip_grad = None # disable with None, enable with a float + +sync_bn = False +enable_amp = False +amp_dtype = "float16" +empty_cache = False +empty_cache_per_epoch = False +find_unused_parameters = False + +enable_wandb = False +wandb_project = "pointcept" # custom your project name e.g. Sonata, PTv3 +wandb_key = None # wandb token, default is None. If None, login with `wandb login` in your terminal + +mix_prob = 0 +param_dicts = None # example: param_dicts = [dict(keyword="block", lr_scale=0.1)] + +memory_snapshot_enabled = False + +# hook +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + dict(type="CheckpointSaver", save_freq=None), + dict(type="PreciseEvaluator", test_last=False), +] + +# Trainer +train = dict(type="DefaultTrainer") + +# Tester +test = dict(type="SemSegTester", verbose=True) diff --git a/point_transformer_v3/external/pointcept_minimal/configs/_base_/fvdb_runtime.py b/point_transformer_v3/external/pointcept_minimal/configs/_base_/fvdb_runtime.py new file mode 100644 index 0000000..049e382 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/_base_/fvdb_runtime.py @@ -0,0 +1,55 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + +""" +Runtime configuration for FVDB-based Point Transformer V3 models. +""" + +from __future__ import annotations + +weight = None # path to model weight +resume = False # whether to resume training process +evaluate = True # evaluate after each epoch training process +test_only = False # test process + +seed = None # train process will init a random seed and record +save_path = "exp/default" +num_worker = 16 # total worker in all gpu +batch_size = 16 # total batch size in all gpu +gradient_accumulation_steps = 1 # total steps to accumulate gradients for +batch_size_val = None # auto adapt to bs 1 for each gpu +batch_size_test = None # auto adapt to bs 1 for each gpu +epoch = 100 # total epoch, data loop = epoch // eval_epoch +eval_epoch = 100 # sche total eval & checkpoint epoch +clip_grad = None # disable with None, enable with a float + +sync_bn = False +enable_amp = False +amp_dtype = "float16" +empty_cache = False +empty_cache_per_epoch = False +find_unused_parameters = False + +enable_wandb = True +wandb_project = "pointcept" # custom your project name e.g. Sonata, PTv3 +wandb_key = None # wandb token, default is None. If None, login with `wandb login` in your terminal + +mix_prob = 0 +param_dicts = None # example: param_dicts = [dict(keyword="block", lr_scale=0.1)] + +# hook +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + dict(type="CheckpointSaver", save_freq=None), + dict(type="PreciseEvaluator", test_last=False), +] + +# Trainer +train = dict(type="DefaultTrainer") + +# Tester +test = dict(type="SemSegTester", verbose=True) diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-base.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-base.py new file mode 100644 index 0000000..6f35823 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-base.py @@ -0,0 +1,317 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate repro_fvdb +# sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-1g-conv.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-1g-conv.py new file mode 100644 index 0000000..285c5b3 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-1g-conv.py @@ -0,0 +1,333 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 1 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + # enc_depths=(6,), + # enc_channels=(64, ), + # enc_num_heads=(2, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_heads=(), + patch_size=1024, + drop_path=0.3, + proj_drop=0.0, + no_conv_in_cpe=False, + # order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + order_type=("z"), + enable_batch_norm=False, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 20 +eval_epoch = 20 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-fvdb-test-1g-conv -n semseg-pt-v3m1-0-fvdb-test-1g-conv diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-1g-mem.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-1g-mem.py new file mode 100644 index 0000000..22f2536 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-1g-mem.py @@ -0,0 +1,340 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 4 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +memory_snapshot = True +memory_snapshot_interval = 20 +memory_snapshot_max_entries = 100000 + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + # enc_depths=(10,), + # enc_channels=(64, ), + # enc_num_heads=(2, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_heads=(), + patch_size=1024, + drop_path=0.0, + proj_drop=0.0, + no_conv_in_cpe=True, + order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + # order_type=("hilbert-trans"), + enable_batch_norm=True, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 1 +eval_epoch = 1 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = False +# enable_wandb = True +# wandb_project = "fvdb18" +# wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + # dict(type="CheckpointLoader"), + # dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# CUDA_VISIBLE_DEVICES=0 sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-fvdb-test-1g-mem -n semseg-pt-v3m1-0-fvdb-test-1g-mem +# rsync -avx --include='*/' --include='*.pickle' --exclude='*' nvpark:/home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal/exp/scannet/semseg-pt-v3m1-0-fvdb-test-1g-mem ./ diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-1g.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-1g.py new file mode 100644 index 0000000..bd1dd0f --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-1g.py @@ -0,0 +1,334 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 1 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + # enc_depths=(10,), + # enc_channels=(64, ), + # enc_num_heads=(2, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_heads=(), + patch_size=1024, + drop_path=0.0, + proj_drop=0.0, + no_conv_in_cpe=True, + order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + # order_type=("hilbert-trans"), + enable_batch_norm=True, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 20 +eval_epoch = 20 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-fvdb-test-1g -n semseg-pt-v3m1-0-fvdb-test-1g diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-4g-conv.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-4g-conv.py new file mode 100644 index 0000000..19d7f7e --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-4g-conv.py @@ -0,0 +1,334 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 16 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + # enc_depths=(10,), + # enc_channels=(64, ), + # enc_num_heads=(2, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_heads=(), + patch_size=1024, + drop_path=0.0, + proj_drop=0.0, + no_conv_in_cpe=False, + order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + # order_type=("hilbert-trans"), + enable_batch_norm=True, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear" + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 200 +eval_epoch = 40 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# CUDA_VISIBLE_DEVICES=4,5,6,7 sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-fvdb-test-4g-conv -n semseg-pt-v3m1-0-fvdb-test-4g-conv diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-4g-prof.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-4g-prof.py new file mode 100644 index 0000000..d5deaf5 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-4g-prof.py @@ -0,0 +1,335 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 16 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + # enc_depths=(10,), + # enc_channels=(64, ), + # enc_num_heads=(2, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_heads=(), + patch_size=1024, + drop_path=0.0, + proj_drop=0.0, + no_conv_in_cpe=True, + order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + # order_type=("hilbert-trans"), + enable_batch_norm=True, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 1 +eval_epoch = 1 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = False +# enable_wandb = True +# wandb_project = "fvdb18" +# wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + # dict(type="CheckpointLoader"), + # dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile --trace=nvtx,cuda -o semseg-pt-v3m1-0-fvdb-test-4g-prof --force-overwrite true sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-fvdb-test-4g-prof -n semseg-pt-v3m1-0-fvdb-test-4g-prof diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-4g.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-4g.py new file mode 100644 index 0000000..fa5baf7 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test-4g.py @@ -0,0 +1,334 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 16 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + # enc_depths=(10,), + # enc_channels=(64, ), + # enc_num_heads=(2, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_heads=(), + patch_size=1024, + drop_path=0.0, + proj_drop=0.0, + no_conv_in_cpe=True, + order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + # order_type=("hilbert-trans"), + enable_batch_norm=True, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 200 +eval_epoch = 40 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb19" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# CUDA_VISIBLE_DEVICES=4,5,6,7 sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-fvdb-test-4g -n semseg-pt-v3m1-0-fvdb-test-4g diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-1g-conv.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-1g-conv.py new file mode 100644 index 0000000..15aa575 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-1g-conv.py @@ -0,0 +1,351 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 1 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + # order=("z", "z-trans", "hilbert", "hilbert-trans"), + order=("z"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + # stride=(), + # enc_depths=(6,), + # enc_channels=(64,), + # enc_num_head=(2, ), + # enc_patch_size=(1024, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_head=(), + # dec_patch_size=(), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + enable_batch_norm=False, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 20 +eval_epoch = 20 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-test-1g-conv -n semseg-pt-v3m1-0-test-1g-conv diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-1g-mem.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-1g-mem.py new file mode 100644 index 0000000..f365823 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-1g-mem.py @@ -0,0 +1,357 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 4 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 +memory_snapshot = True +memory_snapshot_interval = 20 +memory_snapshot_max_entries = 100000 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + # order=("z-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + # stride=(), + # enc_depths=(10,), + # enc_channels=(64,), + # enc_num_head=(2, ), + # enc_patch_size=(1024, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_head=(), + # dec_patch_size=(), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + enable_batch_norm=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 1 +eval_epoch = 1 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = False +# enable_wandb = True +# wandb_project = "fvdb18" +# wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + # dict(type="CheckpointLoader"), + # dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# CUDA_VISIBLE_DEVICES=0 sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-test-1g-mem -n semseg-pt-v3m1-0-test-1g-mem +# rsync -avx --include='*/' --include='*.pickle' --exclude='*' nvpark:/home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal/exp/scannet/semseg-pt-v3m1-0-test-1g-mem ./ diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-1g.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-1g.py new file mode 100644 index 0000000..bcba12f --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-1g.py @@ -0,0 +1,352 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 1 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + # order=("z-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + # stride=(), + # enc_depths=(10,), + # enc_channels=(64,), + # enc_num_head=(2, ), + # enc_patch_size=(1024, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_head=(), + # dec_patch_size=(), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + enable_batch_norm=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 20 +eval_epoch = 20 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-test-1g -n semseg-pt-v3m1-0-test-1g diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-4g-conv.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-4g-conv.py new file mode 100644 index 0000000..58b81ec --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-4g-conv.py @@ -0,0 +1,353 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 16 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + # order=("z-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + # stride=(), + # enc_depths=(10,), + # enc_channels=(64,), + # enc_num_head=(2, ), + # enc_patch_size=(1024, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_head=(), + # dec_patch_size=(), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + enable_batch_norm=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + no_conv_in_cpe=False, + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 200 +eval_epoch = 40 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# CUDA_VISIBLE_DEVICES=0,1,2,3 sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-test-4g-conv -n semseg-pt-v3m1-0-test-4g-conv diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-4g-prof.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-4g-prof.py new file mode 100644 index 0000000..b47e530 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-4g-prof.py @@ -0,0 +1,353 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 16 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + # order=("z-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + # stride=(), + # enc_depths=(10,), + # enc_channels=(64,), + # enc_num_head=(2, ), + # enc_patch_size=(1024, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_head=(), + # dec_patch_size=(), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + enable_batch_norm=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 1 +eval_epoch = 1 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = False +# enable_wandb = True +# wandb_project = "fvdb18" +# wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + # dict(type="CheckpointLoader"), + # dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile --trace=nvtx,cuda -o semseg-pt-v3m1-0-test-4g-prof --force-overwrite true sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-test-4g-prof -n semseg-pt-v3m1-0-test-4g-prof diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-4g.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-4g.py new file mode 100644 index 0000000..daa6b0c --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test-4g.py @@ -0,0 +1,353 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 16 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + # order=("z-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + # stride=(), + # enc_depths=(10,), + # enc_channels=(64,), + # enc_num_head=(2, ), + # enc_patch_size=(1024, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_head=(), + # dec_patch_size=(), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + enable_batch_norm=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + no_conv_in_cpe=True, + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 200 +eval_epoch = 40 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb19" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# CUDA_VISIBLE_DEVICES=0,1,2,3 sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-test-4g -n semseg-pt-v3m1-0-test-4g diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-base.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-base.py new file mode 100644 index 0000000..6f35823 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-base.py @@ -0,0 +1,317 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate repro_fvdb +# sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-1g-2.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-1g-2.py new file mode 100644 index 0000000..11ff67b --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-1g-2.py @@ -0,0 +1,334 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 1 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + # enc_depths=(6,), + # enc_channels=(64, ), + # enc_num_heads=(2, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_heads=(), + patch_size=1024, + drop_path=0.0, + proj_drop=0.0, + no_conv_in_cpe=True, + # order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + order_type=("hilbert"), + enable_batch_norm=True, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 20 +eval_epoch = 20 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-fvdb-test-1g-2 -n semseg-pt-v3m1-0-fvdb-test-1g-2 diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-1g-conv.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-1g-conv.py new file mode 100644 index 0000000..285c5b3 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-1g-conv.py @@ -0,0 +1,333 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 1 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + # enc_depths=(6,), + # enc_channels=(64, ), + # enc_num_heads=(2, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_heads=(), + patch_size=1024, + drop_path=0.3, + proj_drop=0.0, + no_conv_in_cpe=False, + # order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + order_type=("z"), + enable_batch_norm=False, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 20 +eval_epoch = 20 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-fvdb-test-1g-conv -n semseg-pt-v3m1-0-fvdb-test-1g-conv diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-1g-fw.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-1g-fw.py new file mode 100644 index 0000000..005c178 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-1g-fw.py @@ -0,0 +1,334 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 1 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + # enc_depths=(6,), + # enc_channels=(64, ), + # enc_num_heads=(2, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_heads=(), + patch_size=1024, + # drop_path=0.3, + drop_path=0.0, + proj_drop=0.0, + no_conv_in_cpe=False, + # order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + order_type=("z"), + enable_batch_norm=False, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 20 +eval_epoch = 20 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-fvdb-test-1g-fw -n semseg-pt-v3m1-0-fvdb-test-1g-fw diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-1g.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-1g.py new file mode 100644 index 0000000..9f18a55 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-1g.py @@ -0,0 +1,334 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 1 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + # enc_depths=(10,), + # enc_channels=(64, ), + # enc_num_heads=(2, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_heads=(), + patch_size=1024, + drop_path=0.0, + proj_drop=0.0, + no_conv_in_cpe=True, + order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + # order_type=("hilbert-trans"), + enable_batch_norm=True, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 20 +eval_epoch = 20 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-fvdb-test-1g -n semseg-pt-v3m1-0-fvdb-test-1g diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-4g-2.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-4g-2.py new file mode 100644 index 0000000..74b21c4 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-4g-2.py @@ -0,0 +1,334 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 16 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + # enc_depths=(10,), + # enc_channels=(64, ), + # enc_num_heads=(2, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_heads=(), + patch_size=1024, + drop_path=0.0, + proj_drop=0.0, + no_conv_in_cpe=True, + order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + # order_type=("hilbert-trans"), + enable_batch_norm=True, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 200 +eval_epoch = 40 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# CUDA_VISIBLE_DEVICES=4,5,6,7 sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-fvdb-test-4g-2 -n semseg-pt-v3m1-0-fvdb-test-4g-2 diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-4g-conv.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-4g-conv.py new file mode 100644 index 0000000..bd31501 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-4g-conv.py @@ -0,0 +1,334 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 16 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + # enc_depths=(10,), + # enc_channels=(64, ), + # enc_num_heads=(2, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_heads=(), + patch_size=1024, + drop_path=0.0, + proj_drop=0.0, + no_conv_in_cpe=False, + order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + # order_type=("hilbert-trans"), + enable_batch_norm=True, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 200 +eval_epoch = 40 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# CUDA_VISIBLE_DEVICES=4,5,6,7 sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-fvdb-test-4g-conv -n semseg-pt-v3m1-0-fvdb-test-4g-conv diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-4g-spconv.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-4g-spconv.py new file mode 100644 index 0000000..aaf38e9 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-4g-spconv.py @@ -0,0 +1,334 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 16 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + # enc_depths=(10,), + # enc_channels=(64, ), + # enc_num_heads=(2, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_heads=(), + patch_size=1024, + drop_path=0.0, + proj_drop=0.0, + no_conv_in_cpe=False, + order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + # order_type=("hilbert-trans"), + enable_batch_norm=True, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 200 +eval_epoch = 40 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# CUDA_VISIBLE_DEVICES=4,5,6,7 sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-fvdb-test-4g-spconv -n semseg-pt-v3m1-0-fvdb-test-4g-spconv diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-4g.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-4g.py new file mode 100644 index 0000000..3a38b94 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-4g.py @@ -0,0 +1,334 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 16 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + # enc_depths=(10,), + # enc_channels=(64, ), + # enc_num_heads=(2, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_heads=(), + patch_size=1024, + drop_path=0.0, + proj_drop=0.0, + no_conv_in_cpe=True, + order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + # order_type=("hilbert-trans"), + enable_batch_norm=True, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 200 +eval_epoch = 40 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# CUDA_VISIBLE_DEVICES=4,5,6,7 sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-fvdb-test-4g -n semseg-pt-v3m1-0-fvdb-test-4g diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-fw.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-fw.py new file mode 100644 index 0000000..c26e50b --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test-fw.py @@ -0,0 +1,333 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 8 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +init_params_to_one = True # Initialize all model parameters to 1 before training +disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + # enc_depths=(2, 2, 2, 6, 2), + # enc_channels=(32, 64, 128, 256, 512), + # enc_num_heads=(2, 4, 8, 16, 32), + # dec_depths=(2, 2, 2, 2), + # dec_channels=(256, 128, 64, 64), + # dec_num_heads=(16, 8, 4, 4), + enc_depths=(6,), + enc_channels=(64,), + enc_num_heads=(2,), + dec_depths=(), + dec_channels=(), + dec_num_heads=(), + patch_size=1024, + drop_path=0.3, + proj_drop=0.0, + no_conv_in_cpe=True, + # order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + order_type=("z"), + enable_batch_norm=True, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 5 +eval_epoch = 5 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb16" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-fvdb-test-fw -n semseg-pt-v3m1-0-fvdb-test-fw diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test.py new file mode 100644 index 0000000..fcc418b --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-test.py @@ -0,0 +1,333 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 8 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + # enc_depths=(2, 2, 2, 6, 2), + # enc_channels=(32, 64, 128, 256, 512), + # enc_num_heads=(2, 4, 8, 16, 32), + # dec_depths=(2, 2, 2, 2), + # dec_channels=(256, 128, 64, 64), + # dec_num_heads=(16, 8, 4, 4), + enc_depths=(6,), + enc_channels=(64, ), + enc_num_heads=(2, ), + dec_depths=(), + dec_channels=(), + dec_num_heads=(), + patch_size=1024, + drop_path=0.3, + proj_drop=0.0, + no_conv_in_cpe=True, + # order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + order_type=("z"), + enable_batch_norm=True, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 5 +eval_epoch = 5 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb17" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-fvdb-test -n semseg-pt-v3m1-0-fvdb-test diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-vdb-test.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-vdb-test.py new file mode 100644 index 0000000..dfadf34 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-fvdb-vdb-test.py @@ -0,0 +1,315 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 8 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + patch_size=1024, + drop_path=0.3, + proj_drop=0.0, + no_conv_in_cpe=False, + # order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + order_type=("vdb"), + enable_batch_norm=True, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 5 +eval_epoch = 5 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb12" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-fvdb-vdb-test -n semseg-pt-v3m1-0-fvdb-vdb-test diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-1g-2.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-1g-2.py new file mode 100644 index 0000000..a921826 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-1g-2.py @@ -0,0 +1,352 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 1 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + # order=("z", "z-trans", "hilbert", "hilbert-trans"), + order=("hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + # stride=(), + # enc_depths=(6,), + # enc_channels=(64,), + # enc_num_head=(2, ), + # enc_patch_size=(1024, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_head=(), + # dec_patch_size=(), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + enable_batch_norm=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 20 +eval_epoch = 20 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-test-1g-2 -n semseg-pt-v3m1-0-test-1g-2 diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-1g-conv.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-1g-conv.py new file mode 100644 index 0000000..15aa575 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-1g-conv.py @@ -0,0 +1,351 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 1 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + # order=("z", "z-trans", "hilbert", "hilbert-trans"), + order=("z"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + # stride=(), + # enc_depths=(6,), + # enc_channels=(64,), + # enc_num_head=(2, ), + # enc_patch_size=(1024, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_head=(), + # dec_patch_size=(), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + enable_batch_norm=False, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 20 +eval_epoch = 20 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-test-1g-conv -n semseg-pt-v3m1-0-test-1g-conv diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-1g-fw.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-1g-fw.py new file mode 100644 index 0000000..be4845f --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-1g-fw.py @@ -0,0 +1,352 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 1 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + # order=("z", "z-trans", "hilbert", "hilbert-trans"), + order=("z"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + # stride=(), + # enc_depths=(6,), + # enc_channels=(64,), + # enc_num_head=(2, ), + # enc_patch_size=(1024, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_head=(), + # dec_patch_size=(), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + # drop_path=0.3, + drop_path=0.0, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + enable_batch_norm=False, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 20 +eval_epoch = 20 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-test-1g-fw -n semseg-pt-v3m1-0-test-1g-fw diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-1g.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-1g.py new file mode 100644 index 0000000..bcba12f --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-1g.py @@ -0,0 +1,352 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 1 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + # order=("z-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + # stride=(), + # enc_depths=(10,), + # enc_channels=(64,), + # enc_num_head=(2, ), + # enc_patch_size=(1024, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_head=(), + # dec_patch_size=(), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + enable_batch_norm=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 20 +eval_epoch = 20 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-test-1g -n semseg-pt-v3m1-0-test-1g diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-4g-2.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-4g-2.py new file mode 100644 index 0000000..9d741a3 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-4g-2.py @@ -0,0 +1,352 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 16 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + # order=("z-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + # stride=(), + # enc_depths=(10,), + # enc_channels=(64,), + # enc_num_head=(2, ), + # enc_patch_size=(1024, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_head=(), + # dec_patch_size=(), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + enable_batch_norm=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 200 +eval_epoch = 40 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# CUDA_VISIBLE_DEVICES=0,1,2,3 sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-test-4g-2 -n semseg-pt-v3m1-0-test-4g-2 diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-4g-conv.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-4g-conv.py new file mode 100644 index 0000000..cc1b595 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-4g-conv.py @@ -0,0 +1,352 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 16 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + # order=("z-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + # stride=(), + # enc_depths=(10,), + # enc_channels=(64,), + # enc_num_head=(2, ), + # enc_patch_size=(1024, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_head=(), + # dec_patch_size=(), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + enable_batch_norm=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 200 +eval_epoch = 40 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# CUDA_VISIBLE_DEVICES=0,1,2,3 sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-test-4g-conv -n semseg-pt-v3m1-0-test-4g-conv diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-4g.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-4g.py new file mode 100644 index 0000000..cbef102 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-4g.py @@ -0,0 +1,352 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 16 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +# init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + # order=("z-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + # stride=(), + # enc_depths=(10,), + # enc_channels=(64,), + # enc_num_head=(2, ), + # enc_patch_size=(1024, ), + # dec_depths=(), + # dec_channels=(), + # dec_num_head=(), + # dec_patch_size=(), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + enable_batch_norm=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 200 +eval_epoch = 40 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb18" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="PointMultipleSample", multiple=1024, mode="first"), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# CUDA_VISIBLE_DEVICES=0,1,2,3 sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-test-4g -n semseg-pt-v3m1-0-test-4g diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-fw.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-fw.py new file mode 100644 index 0000000..e062d67 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test-fw.py @@ -0,0 +1,350 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 8 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +init_params_to_one = True # Initialize all model parameters to 1 before training +disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + # order=("z", "z-trans", "hilbert", "hilbert-trans"), + order=("z"), + # stride=(2, 2, 2, 2), + # enc_depths=(2, 2, 2, 6, 2), + # enc_channels=(32, 64, 128, 256, 512), + # enc_num_head=(2, 4, 8, 16, 32), + # enc_patch_size=(1024, 1024, 1024, 1024, 1024), + # dec_depths=(2, 2, 2, 2), + # dec_channels=(64, 64, 128, 256), + # dec_num_head=(4, 4, 8, 16), + # dec_patch_size=(1024, 1024, 1024, 1024), + stride=(), + enc_depths=(6,), + enc_channels=(64,), + enc_num_head=(2, ), + enc_patch_size=(1024, ), + dec_depths=(), + dec_channels=(), + dec_num_head=(), + dec_patch_size=(), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 5 +eval_epoch = 5 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb16" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-test-fw -n semseg-pt-v3m1-0-test-fw diff --git a/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test.py b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test.py new file mode 100644 index 0000000..3528786 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/configs/scannet_backup/semseg-pt-v3m1-0-test.py @@ -0,0 +1,350 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 8 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = False +init_params_to_one = True # Initialize all model parameters to 1 before training +# disable_backward = True # Disable backward pass and optimizer step for debugging + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + # order=("z", "z-trans", "hilbert", "hilbert-trans"), + order=("z"), + # stride=(2, 2, 2, 2), + # enc_depths=(2, 2, 2, 6, 2), + # enc_channels=(32, 64, 128, 256, 512), + # enc_num_head=(2, 4, 8, 16, 32), + # enc_patch_size=(1024, 1024, 1024, 1024, 1024), + # dec_depths=(2, 2, 2, 2), + # dec_channels=(64, 64, 128, 256), + # dec_num_head=(4, 4, 8, 16), + # dec_patch_size=(1024, 1024, 1024, 1024), + stride=(), + enc_depths=(6,), + enc_channels=(64,), + enc_num_head=(2, ), + enc_patch_size=(1024, ), + dec_depths=(), + dec_channels=(), + dec_num_head=(), + dec_patch_size=(), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 5 +eval_epoch = 5 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + +enable_wandb = True +wandb_project = "fvdb17" +wandb_key = "f4d9183db0ec1720f1cf902c9d866024936d944c" + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + # dict(type="CheckpointSaver", save_freq=None), + # dict(type="PreciseEvaluator", test_last=False), +] + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate chptv3 +# cd /home/hexuz/chptv3/fvdb-examples/point_transformer_v3/external/pointcept_minimal +# sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-test -n semseg-pt-v3m1-0-test diff --git a/point_transformer_v3/external/pointcept_minimal/environment.yml b/point_transformer_v3/external/pointcept_minimal/environment.yml new file mode 100644 index 0000000..cf277f2 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/environment.yml @@ -0,0 +1,52 @@ +name: pointcept-torch2.5.0-cu12.4 +channels: + - pytorch + - nvidia/label/cuda-12.4.1 + - nvidia + - bioconda + - conda-forge + - defaults +dependencies: + - python=3.10 + - pip + - cuda + - conda-forge::cudnn + - gcc=13.2 + - gxx=13.2 + - pytorch=2.5.0 + - torchvision=0.20.0 + - torchaudio=2.5.0 + - pytorch-cuda=12.4 + - ninja + - google-sparsehash + - h5py + - pyyaml + - tensorboard + - tensorboardx + - wandb + - yapf + - addict + - einops + - scipy + - plyfile + - termcolor + - timm + - ftfy + - regex + - tqdm + - matplotlib + - black + - open3d + - pip: + - --find-links https://data.pyg.org/whl/torch-2.5.0+cu124.html + - torch-cluster + - torch-scatter + - torch-sparse + - torch-geometric + - spconv-cu124 + - peft #for lora finetune + - git+https://github.com/octree-nn/ocnn-pytorch.git + - git+https://github.com/openai/CLIP.git + - git+https://github.com/Dao-AILab/flash-attention.git + - ./libs/pointops + - ./libs/pointgroup_ops \ No newline at end of file diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/__init__.py b/point_transformer_v3/external/pointcept_minimal/pointcept/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/__init__.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/__init__.py new file mode 100644 index 0000000..9341b8f --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/__init__.py @@ -0,0 +1,9 @@ +from .defaults import DefaultDataset, DefaultImagePointDataset, ConcatDataset +from .builder import build_dataset +from .utils import point_collate_fn, collate_fn + +# indoor scene +from .scannet import ScanNetDataset, ScanNet200Dataset + +# dataloader +from .dataloader import MultiDatasetDataloader diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/builder.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/builder.py new file mode 100644 index 0000000..1fa5f0e --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/builder.py @@ -0,0 +1,15 @@ +""" +Dataset Builder + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.utils.registry import Registry + +DATASETS = Registry("datasets") + + +def build_dataset(cfg): + """Build datasets.""" + return DATASETS.build(cfg) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/dataloader.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/dataloader.py new file mode 100644 index 0000000..a3c8e1d --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/dataloader.py @@ -0,0 +1,112 @@ +from functools import partial +import weakref +import torch +import torch.utils.data + +import pointcept.utils.comm as comm +from pointcept.datasets.utils import point_collate_fn +from pointcept.datasets import ConcatDataset +from pointcept.utils.env import set_seed + + +class MultiDatasetDummySampler: + def __init__(self): + self.dataloader = None + + def set_epoch(self, epoch): + if comm.get_world_size() > 1: + for dataloader in self.dataloader.dataloaders: + dataloader.sampler.set_epoch(epoch) + return + + +class MultiDatasetDataloader: + """ + Multiple Datasets Dataloader, batch data from a same dataset and mix up ratio determined by loop of each sub dataset. + The overall length is determined by the main dataset (first) and loop of concat dataset. + """ + + def __init__( + self, + concat_dataset: ConcatDataset, + batch_size_per_gpu: int, + num_worker_per_gpu: int, + mix_prob=0, + seed=None, + ): + self.datasets = concat_dataset.datasets + self.ratios = [dataset.loop for dataset in self.datasets] + # reset data loop, original loop serve as ratios + for dataset in self.datasets: + dataset.loop = 1 + # determine union training epoch by main dataset + self.datasets[0].loop = concat_dataset.loop + # build sub-dataloaders + num_workers = num_worker_per_gpu // len(self.datasets) + self.dataloaders = [] + for dataset_id, dataset in enumerate(self.datasets): + if comm.get_world_size() > 1: + sampler = torch.utils.data.distributed.DistributedSampler(dataset) + else: + sampler = None + + init_fn = ( + partial( + self._worker_init_fn, + dataset_id=dataset_id, + num_workers=num_workers, + num_datasets=len(self.datasets), + rank=comm.get_rank(), + seed=seed, + ) + if seed is not None + else None + ) + self.dataloaders.append( + torch.utils.data.DataLoader( + dataset, + batch_size=batch_size_per_gpu, + shuffle=(sampler is None), + num_workers=num_worker_per_gpu, + sampler=sampler, + collate_fn=partial(point_collate_fn, mix_prob=mix_prob), + pin_memory=True, + worker_init_fn=init_fn, + drop_last=True, + persistent_workers=True, + ) + ) + self.sampler = MultiDatasetDummySampler() + self.sampler.dataloader = weakref.proxy(self) + + def __iter__(self): + iterator = [iter(dataloader) for dataloader in self.dataloaders] + while True: + for i in range(len(self.ratios)): + for _ in range(self.ratios[i]): + try: + batch = next(iterator[i]) + except StopIteration: + if i == 0: + return + else: + iterator[i] = iter(self.dataloaders[i]) + batch = next(iterator[i]) + yield batch + + def __len__(self): + main_data_loader_length = len(self.dataloaders[0]) + return ( + main_data_loader_length // self.ratios[0] * sum(self.ratios) + + main_data_loader_length % self.ratios[0] + ) + + @staticmethod + def _worker_init_fn(worker_id, num_workers, dataset_id, num_datasets, rank, seed): + worker_seed = ( + num_workers * num_datasets * rank + + num_workers * dataset_id + + worker_id + + seed + ) + set_seed(worker_seed) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/defaults.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/defaults.py new file mode 100644 index 0000000..929d135 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/defaults.py @@ -0,0 +1,499 @@ +""" +Default Datasets + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Yujia Zhang (yujia.zhang.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import glob +import json + +import numpy as np +import torch +from copy import deepcopy +from torch.utils.data import Dataset +from collections.abc import Sequence +from torchvision.transforms import InterpolationMode +from PIL import Image +from torchvision.transforms import transforms as T +import torch.nn.functional as F + +from pointcept.utils.logger import get_root_logger +from pointcept.utils.cache import shared_dict + +from .builder import DATASETS, build_dataset +from .transform import Compose, TRANSFORMS + +INTERPOLATION_MODE = { + "bilinear": InterpolationMode.BILINEAR, + "bicubic": InterpolationMode.BICUBIC, + "nearest": InterpolationMode.NEAREST, +} + + +@DATASETS.register_module() +class DefaultDataset(Dataset): + VALID_ASSETS = [ + "coord", + "color", + "normal", + "strength", + "segment", + "instance", + "pose", + ] + + def __init__( + self, + split="train", + data_root="data/dataset", + transform=None, + test_mode=False, + test_cfg=None, + cache=False, + ignore_index=-1, + loop=1, + ): + super(DefaultDataset, self).__init__() + self.data_root = data_root + self.split = split + self.transform = Compose(transform) + self.cache = cache + self.ignore_index = ignore_index + self.loop = ( + loop if not test_mode else 1 + ) # force make loop = 1 while in test mode + self.test_mode = test_mode + self.test_cfg = test_cfg if test_mode else None + + if test_mode: + self.test_voxelize = TRANSFORMS.build(self.test_cfg.voxelize) + self.test_crop = ( + TRANSFORMS.build(self.test_cfg.crop) if self.test_cfg.crop else None + ) + self.post_transform = Compose(self.test_cfg.post_transform) + self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform] + + self.data_list = self.get_data_list() + logger = get_root_logger() + logger.info( + "Totally {} x {} samples in {} {} set.".format( + len(self.data_list), self.loop, os.path.basename(self.data_root), split + ) + ) + + def get_data_list(self): + if isinstance(self.split, str): + split_list = [self.split] + elif isinstance(self.split, Sequence): + split_list = self.split + else: + raise NotImplementedError + + data_list = [] + for split in split_list: + if os.path.isfile(os.path.join(self.data_root, split)): + with open(os.path.join(self.data_root, split)) as f: + data_list += [ + os.path.join(self.data_root, data) for data in json.load(f) + ] + else: + data_list += glob.glob(os.path.join(self.data_root, split, "*")) + return data_list + + def get_data(self, idx): + data_path = self.data_list[idx % len(self.data_list)] + name = self.get_data_name(idx) + split = self.get_split_name(idx) + if self.cache: + cache_name = f"pointcept-{name}" + return shared_dict(cache_name) + + data_dict = {} + assets = os.listdir(data_path) + for asset in assets: + if not asset.endswith(".npy"): + continue + if asset[:-4] not in self.VALID_ASSETS: + continue + data_dict[asset[:-4]] = np.load(os.path.join(data_path, asset)) + data_dict["name"] = name + data_dict["split"] = split + + if "coord" in data_dict.keys(): + data_dict["coord"] = data_dict["coord"].astype(np.float32) + + if "color" in data_dict.keys(): + data_dict["color"] = data_dict["color"].astype(np.float32) + + if "normal" in data_dict.keys(): + data_dict["normal"] = data_dict["normal"].astype(np.float32) + + if "segment" in data_dict.keys(): + data_dict["segment"] = data_dict["segment"].reshape([-1]).astype(np.int32) + else: + data_dict["segment"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + + if "instance" in data_dict.keys(): + data_dict["instance"] = data_dict["instance"].reshape([-1]).astype(np.int32) + else: + data_dict["instance"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + return data_dict + + def get_data_name(self, idx): + return os.path.basename(self.data_list[idx % len(self.data_list)]) + + def get_split_name(self, idx): + return os.path.basename( + os.path.dirname(self.data_list[idx % len(self.data_list)]) + ) + + def prepare_train_data(self, idx): + # load data + data_dict = self.get_data(idx) + data_dict = self.transform(data_dict) + return data_dict + + def prepare_test_data(self, idx): + # load data + data_dict = self.get_data(idx) + data_dict = self.transform(data_dict) + result_dict = dict(segment=data_dict.pop("segment"), name=data_dict.pop("name")) + if "origin_segment" in data_dict: + assert "inverse" in data_dict + result_dict["origin_segment"] = data_dict.pop("origin_segment") + result_dict["inverse"] = data_dict.pop("inverse") + + data_dict_list = [] + for aug in self.aug_transform: + data_dict_list.append(aug(deepcopy(data_dict))) + + fragment_list = [] + for data in data_dict_list: + if self.test_voxelize is not None: + data_part_list = self.test_voxelize(data) + else: + data["index"] = np.arange(data["coord"].shape[0]) + data_part_list = [data] + for data_part in data_part_list: + if self.test_crop is not None: + data_part = self.test_crop(data_part) + else: + data_part = [data_part] + fragment_list += data_part + + for i in range(len(fragment_list)): + fragment_list[i] = self.post_transform(fragment_list[i]) + result_dict["fragment_list"] = fragment_list + return result_dict + + def __getitem__(self, idx): + if self.test_mode: + return self.prepare_test_data(idx) + else: + return self.prepare_train_data(idx) + + def __len__(self): + return len(self.data_list) * self.loop + + +@DATASETS.register_module() +class DefaultImagePointDataset(Dataset): + PC_VALID_ASSETS = [ + "coord", + "color", + "normal", + ] + + def __init__( + self, + split="train", + data_root="data/dataset", + transform=None, + test_mode=False, + test_cfg=None, + cache=False, + ignore_index=-1, + loop=1, + crop_h=630, + crop_w=1120, + patch_size=14, + interpolation="bilinear", + ): + super(DefaultImagePointDataset, self).__init__() + self.data_root = data_root + self.split = split + self.transform = Compose(transform) + self.cache = cache + self.ignore_index = ignore_index + self.loop = ( + loop if not test_mode else 1 + ) # force make loop = 1 while in test mode + self.test_mode = test_mode + self.test_cfg = test_cfg if test_mode else None + + if test_mode: + self.test_voxelize = TRANSFORMS.build(self.test_cfg.voxelize) + self.test_crop = ( + TRANSFORMS.build(self.test_cfg.crop) if self.test_cfg.crop else None + ) + self.post_transform = Compose(self.test_cfg.post_transform) + self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform] + + self.data_list, self.split_list = self.get_data_list() + if isinstance(self.data_list, dict): + self.data_name = list(self.data_list.keys()) + else: + self.data_name = list([data["token"] for data in self.data_list]) + logger = get_root_logger() + logger.info( + "Totally {} x {} samples in {} {} set.".format( + len(self.data_name), self.loop, os.path.basename(self.data_root), split + ) + ) + + self.crop_h = crop_h + self.crop_w = crop_w + self.patch_size = patch_size + self.patch_h = crop_h // patch_size + self.patch_w = crop_w // patch_size + self.transform_img = T.Compose( + [ + T.Resize( + (self.patch_h * self.patch_size, self.patch_w * self.patch_size), + interpolation=INTERPOLATION_MODE[interpolation], + ), + T.ToTensor(), + ] + ) + + def resize_correspondence_info( + self, correspondence, size, size0, crop_size, _alignment + ): + h, w = size + h0, w0 = size0 + left, top, right, bottom = crop_size + crop_h = bottom - top + crop_w = right - left + mask_crop = ( + (correspondence[:, 1] >= top) + & (correspondence[:, 1] < bottom) + & (correspondence[:, 0] >= left) + & (correspondence[:, 0] < right) + ) + correspondence = correspondence[mask_crop] + correspondence[:, 1] -= top + correspondence[:, 0] -= left + correspondence[:, 1] = (correspondence[:, 1] * h / crop_h // _alignment).astype( + np.int32 + ) + correspondence[:, 0] = (correspondence[:, 0] * w / crop_w // _alignment).astype( + np.int32 + ) + correspondence = correspondence[:, [1, 0, 2]] + correspondence = np.unique(correspondence, axis=0) + return correspondence + + def get_data_list(self): + split_list = {} + if isinstance(self.split, str): + data_path = os.path.join(self.data_root, "splits", f"{self.split}.json") + with open(data_path, "r", encoding="utf-8") as file: + data_list = json.load(file) + split_list[self.split] = list(data_list.keys()) + elif isinstance(self.split, Sequence): + data_list = {} + for split in self.split: + data_path = os.path.join(self.data_root, "splits", f"{split}.json") + with open(data_path, "r", encoding="utf-8") as file: + data_split_dict = json.load(file) + data_list.update(data_split_dict) + split_list[split] = list(data_split_dict.keys()) + else: + raise NotImplementedError + return data_list, split_list + + def get_data_name(self, idx): + return self.data_name[idx % len(self.data_name)] + + def get_split_name(self, idx): + for split, names in self.split_list.items(): + if self.data_name[idx % len(self.data_name)] in names: + return split + return None + + def get_data(self, idx): + data_dict = {} + name = self.get_data_name(idx) + split = self.get_split_name(idx) + data_dict["name"] = name + data_dict["split"] = split + data_path = self.data_list[name] + + pointclouds_path = data_path["pointclouds"] + assets = os.listdir(pointclouds_path) + for asset in assets: + if not asset.endswith(".npy"): + continue + if asset[:-4] not in self.PC_VALID_ASSETS: + continue + data_dict[asset[:-4]] = np.load(os.path.join(pointclouds_path, asset)) + imgs_path = data_path["images"] + imgs = [Image.open(asset) for asset in imgs_path] + img_width, img_height = imgs[0].size + div_w = img_width // self.patch_w + div_h = img_height // self.patch_h + div_min = max(min(div_w, div_h), 1) + crop_img_width = div_min * self.patch_w + crop_img_height = div_min * self.patch_h + left = int((img_width - crop_img_width) / 2) + top = int((img_height - crop_img_height) / 2) + right = int((img_width + crop_img_width) / 2) + bottom = int((img_height + crop_img_height) / 2) + imgs = [img.crop((left, top, right, bottom)) for img in imgs] + imgs = [self.transform_img(img) for img in imgs] + if len(imgs) > 0: + imgs_list = torch.stack(imgs) + data_dict["images"] = imgs_list.float() + else: + data_dict["images"] = torch.empty( + (0, 3, self.patch_h * self.patch_size, self.patch_w * self.patch_size) + ) + data_dict["img_num"] = np.array([data_dict["images"].shape[0]], dtype=np.int32) + + correspondences_path = data_path["correspondences"] + correspondence_infos = np.ones( + (data_dict["coord"].shape[0], len(correspondences_path), 2), dtype=np.int32 + ) * (-1) + for asset_id, asset in enumerate(correspondences_path): + correspondence_info = np.load(asset).astype(np.int32) + if np.array_equal(correspondence_info, -np.ones((1, 3))): + continue + correspondence_info = self.resize_correspondence_info( + correspondence_info, + (self.patch_h * self.patch_size, self.patch_w * self.patch_size), + (img_height, img_width), + (left, top, right, bottom), + self.patch_size, + ) + correspondence_infos[correspondence_info[:, -1], asset_id, :] = ( + correspondence_info[:, :-1] + ) + data_dict["correspondence"] = correspondence_infos # .reshape(-1, 2) + + if "coord" in data_dict.keys(): + data_dict["coord"] = data_dict["coord"].astype(np.float32) + + if "color" in data_dict.keys(): + data_dict["color"] = data_dict["color"].astype(np.float32) + + if "normal" in data_dict.keys(): + data_dict["normal"] = data_dict["normal"].astype(np.float32) + + if "segment" in data_dict.keys(): + data_dict["segment"] = data_dict["segment"].reshape([-1]).astype(np.int32) + else: + data_dict["segment"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + + if "instance" in data_dict.keys(): + data_dict["instance"] = data_dict["instance"].reshape([-1]).astype(np.int32) + else: + data_dict["instance"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + return data_dict + + def prepare_train_data(self, idx): + # load data + data_dict = self.get_data(idx) + data_dict = self.transform(data_dict) + return data_dict + + def prepare_test_data(self, idx): + # load data + data_dict = self.get_data(idx) + data_dict = self.transform(data_dict) + result_dict = dict(segment=data_dict.pop("segment"), name=data_dict.pop("name")) + if "origin_segment" in data_dict: + assert "inverse" in data_dict + result_dict["origin_segment"] = data_dict.pop("origin_segment") + result_dict["inverse"] = data_dict.pop("inverse") + + data_dict_list = [] + for aug in self.aug_transform: + data_dict_list.append(aug(deepcopy(data_dict))) + + fragment_list = [] + for data in data_dict_list: + if self.test_voxelize is not None: + data_part_list = self.test_voxelize(data) + else: + data["index"] = np.arange(data["coord"].shape[0]) + data_part_list = [data] + for data_part in data_part_list: + if self.test_crop is not None: + data_part = self.test_crop(data_part) + else: + data_part = [data_part] + fragment_list += data_part + + for i in range(len(fragment_list)): + fragment_list[i] = self.post_transform(fragment_list[i]) + result_dict["fragment_list"] = fragment_list + return result_dict + + def __getitem__(self, idx): + if self.test_mode: + return self.prepare_test_data(idx) + else: + return self.prepare_train_data(idx) + + def __len__(self): + return len(self.data_list) * self.loop + + +@DATASETS.register_module() +class ConcatDataset(Dataset): + def __init__(self, datasets, loop=1): + super(ConcatDataset, self).__init__() + self.datasets = [build_dataset(dataset) for dataset in datasets] + self.loop = loop + self.data_list = self.get_data_list() + logger = get_root_logger() + logger.info( + "Totally {} x {} samples in the concat set.".format( + len(self.data_list), self.loop + ) + ) + + def get_data_list(self): + data_list = [] + for i in range(len(self.datasets)): + data_list.extend( + zip( + np.ones(len(self.datasets[i]), dtype=int) * i, + np.arange(len(self.datasets[i])), + ) + ) + return data_list + + def get_data(self, idx): + dataset_idx, data_idx = self.data_list[idx % len(self.data_list)] + return self.datasets[dataset_idx][data_idx] + + def get_data_name(self, idx): + dataset_idx, data_idx = self.data_list[idx % len(self.data_list)] + return self.datasets[dataset_idx].get_data_name(data_idx) + + def __getitem__(self, idx): + return self.get_data(idx) + + def __len__(self): + return len(self.data_list) * self.loop diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.py new file mode 100644 index 0000000..95c6f76 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.py @@ -0,0 +1,574 @@ +""" +Preprocessing Script for ScanNet 20/200 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Yujia Zhang (yujia.zhang.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import warnings + +warnings.filterwarnings("ignore", category=DeprecationWarning) + +import os +import camtools as ct +import open3d as o3d +from scipy.spatial import cKDTree +import struct +import zlib +import imageio +import cv2 +import argparse +import glob +import json +import plyfile +import numpy as np +import pandas as pd +import multiprocessing as mp +from pathlib import Path + +# Load external constants +import sys + +sys.path.append("pointcept/datasets/preprocessing/scannet/meta_data") +from scannet200_constants import VALID_CLASS_IDS_200, VALID_CLASS_IDS_20 + +CLOUD_FILE_PFIX = "_vh_clean_2" +SEGMENTS_FILE_PFIX = ".0.010000.segs.json" +AGGREGATIONS_FILE_PFIX = ".aggregation.json" +CLASS_IDS200 = VALID_CLASS_IDS_200 +CLASS_IDS20 = VALID_CLASS_IDS_20 +IGNORE_INDEX = -1 + +COMPRESSION_TYPE_COLOR = {-1: "unknown", 0: "raw", 1: "png", 2: "jpeg"} +COMPRESSION_TYPE_DEPTH = { + -1: "unknown", + 0: "raw_ushort", + 1: "zlib_ushort", + 2: "occi_ushort", +} + + +class RGBDFrame: + def load(self, file_handle): + self.camera_to_world = np.asarray( + struct.unpack("f" * 16, file_handle.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.timestamp_color = struct.unpack("Q", file_handle.read(8))[0] + self.timestamp_depth = struct.unpack("Q", file_handle.read(8))[0] + self.color_size_bytes = struct.unpack("Q", file_handle.read(8))[0] + self.depth_size_bytes = struct.unpack("Q", file_handle.read(8))[0] + self.color_data = b"".join( + struct.unpack( + "c" * self.color_size_bytes, file_handle.read(self.color_size_bytes) + ) + ) + self.depth_data = b"".join( + struct.unpack( + "c" * self.depth_size_bytes, file_handle.read(self.depth_size_bytes) + ) + ) + + def decompress_depth(self, compression_type): + if compression_type == "zlib_ushort": + return self.decompress_depth_zlib() + else: + raise + + def decompress_depth_zlib(self): + return zlib.decompress(self.depth_data) + + def decompress_color(self, compression_type): + if compression_type == "jpeg": + return self.decompress_color_jpeg() + else: + raise + + def decompress_color_jpeg(self): + return imageio.imread(self.color_data) + + +class SensorData: + def __init__(self, filename): + self.version = 4 + self.load(filename) + + def load(self, filename): + with open(filename, "rb") as f: + version = struct.unpack("I", f.read(4))[0] + assert self.version == version + strlen = struct.unpack("Q", f.read(8))[0] + self.sensor_name = b"".join(struct.unpack("c" * strlen, f.read(strlen))) + self.intrinsic_color = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.extrinsic_color = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.intrinsic_depth = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.extrinsic_depth = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.color_compression_type = COMPRESSION_TYPE_COLOR[ + struct.unpack("i", f.read(4))[0] + ] + self.depth_compression_type = COMPRESSION_TYPE_DEPTH[ + struct.unpack("i", f.read(4))[0] + ] + self.color_width = struct.unpack("I", f.read(4))[0] + self.color_height = struct.unpack("I", f.read(4))[0] + self.depth_width = struct.unpack("I", f.read(4))[0] + self.depth_height = struct.unpack("I", f.read(4))[0] + self.depth_shift = struct.unpack("f", f.read(4))[0] + num_frames = struct.unpack("Q", f.read(8))[0] + self.frames = [] + for i in range(num_frames): + frame = RGBDFrame() + frame.load(f) + self.frames.append(frame) + + def export_depth_images(self, output_path, image_size=None, frame_skip=1): + if not os.path.exists(output_path): + os.makedirs(output_path) + print( + "exporting", len(self.frames) // frame_skip, " depth frames to", output_path + ) + for f in range(0, len(self.frames), frame_skip): + if os.path.exists((os.path.join(output_path, str(f) + ".png"))): + continue + if f % 100 == 0: + print( + "exporting", + f, + "th depth frames to", + os.path.join(output_path, str(f) + ".png"), + ) + + depth_data = self.frames[f].decompress_depth(self.depth_compression_type) + depth = np.fromstring(depth_data, dtype=np.uint16).reshape( + self.depth_height, self.depth_width + ) + if image_size is not None: + depth = cv2.resize( + depth, + (image_size[1], image_size[0]), + interpolation=cv2.INTER_NEAREST, + ) + imageio.imwrite(os.path.join(output_path, str(f) + ".png"), depth) + + def export_color_images(self, output_path, image_size=None, frame_skip=1): + if not os.path.exists(output_path): + os.makedirs(output_path) + print( + "exporting", len(self.frames) // frame_skip, "color frames to", output_path + ) + for f in range(0, len(self.frames), frame_skip): + if os.path.exists((os.path.join(output_path, str(f) + ".png"))): + continue + if f % 100 == 0: + print( + "exporting", + f, + "th color frames to", + os.path.join(output_path, str(f) + ".png"), + ) + color = self.frames[f].decompress_color(self.color_compression_type) + if image_size is not None: + color = cv2.resize( + color, + (image_size[1], image_size[0]), + interpolation=cv2.INTER_NEAREST, + ) + # imageio.imwrite(os.path.join(output_path, str(f) + '.jpg'), color) + imageio.imwrite(os.path.join(output_path, str(f) + ".png"), color) + + def save_mat_to_file(self, matrix, filename): + with open(filename, "w") as f: + for line in matrix: + np.savetxt(f, line[np.newaxis], fmt="%f") + + def export_poses(self, output_path, frame_skip=1): + if not os.path.exists(output_path): + os.makedirs(output_path) + print( + "exporting", len(self.frames) // frame_skip, "camera poses to", output_path + ) + for f in range(0, len(self.frames), frame_skip): + np.save( + os.path.join(output_path, str(f) + ".npy"), + self.frames[f].camera_to_world, + ) + + def export_intrinsics(self, output_path): + if not os.path.exists(output_path): + os.makedirs(output_path) + print("exporting camera intrinsics to", output_path) + np.save(os.path.join(output_path, "intrinsic.npy"), self.intrinsic_color) + + +def reader( + filename, + output_path, + frame_skip, + export_color_images=False, + export_depth_images=False, + export_poses=False, + export_intrinsics=False, +): + if not os.path.exists(output_path): + os.makedirs(output_path) + + # load the data + print("loading %s..." % filename) + sd = SensorData(filename) + if export_depth_images: + sd.export_depth_images( + os.path.join(output_path, "depth"), frame_skip=frame_skip + ) + if export_color_images: + sd.export_color_images( + os.path.join(output_path, "color"), frame_skip=frame_skip + ) + if export_poses: + sd.export_poses(os.path.join(output_path, "pose"), frame_skip=frame_skip) + if export_intrinsics: + sd.export_intrinsics(os.path.join(output_path, "intrinsic")) + return sd.color_height, sd.color_width + + +def read_plymesh(filepath): + """Read ply file and return it as numpy array. Returns None if emtpy.""" + with open(filepath, "rb") as f: + plydata = plyfile.PlyData.read(f) + if plydata.elements: + vertices = pd.DataFrame(plydata["vertex"].data).values + faces = np.stack(plydata["face"].data["vertex_indices"], axis=0) + return vertices, faces + + +# Map the raw category id to the point cloud +def point_indices_from_group(seg_indices, group, labels_pd): + group_segments = np.array(group["segments"]) + label = group["label"] + + # Map the category name to id + label_id20 = labels_pd[labels_pd["raw_category"] == label]["nyu40id"] + label_id20 = int(label_id20.iloc[0]) if len(label_id20) > 0 else 0 + label_id200 = labels_pd[labels_pd["raw_category"] == label]["id"] + label_id200 = int(label_id200.iloc[0]) if len(label_id200) > 0 else 0 + + # Only store for the valid categories + if label_id20 in CLASS_IDS20: + label_id20 = CLASS_IDS20.index(label_id20) + else: + label_id20 = IGNORE_INDEX + + if label_id200 in CLASS_IDS200: + label_id200 = CLASS_IDS200.index(label_id200) + else: + label_id200 = IGNORE_INDEX + + # get points, where segment indices (points labelled with segment ids) are in the group segment list + point_idx = np.where(np.isin(seg_indices, group_segments))[0] + return point_idx, label_id20, label_id200 + + +def face_normal(vertex, face): + v01 = vertex[face[:, 1]] - vertex[face[:, 0]] + v02 = vertex[face[:, 2]] - vertex[face[:, 0]] + vec = np.cross(v01, v02) + length = np.sqrt(np.sum(vec**2, axis=1, keepdims=True)) + 1.0e-8 + nf = vec / length + area = length * 0.5 + return nf, area + + +def vertex_normal(vertex, face): + nf, area = face_normal(vertex, face) + nf = nf * area + + nv = np.zeros_like(vertex) + for i in range(face.shape[0]): + nv[face[i]] += nf[i] + + length = np.sqrt(np.sum(nv**2, axis=1, keepdims=True)) + 1.0e-8 + nv = nv / length + return nv + + +def correspondenceGet(mesh, K, T, img_size, coords_gt): + height, width = img_size + if np.isnan(T).any() or np.isinf(T).any(): + return None + + depth = ct.raycast.mesh_to_im_depth( + mesh=mesh, K=K, T=np.linalg.inv(T), height=height, width=width + ) + + pixel = np.transpose(np.indices((width, height)), (2, 1, 0)) + pixel = pixel.reshape((-1, 2)) + pixel = np.hstack((pixel, np.ones((pixel.shape[0], 1)))) + depth = depth.reshape((-1, 1)) + valid = ~np.isinf(depth).squeeze(-1) + coord = np.zeros_like(pixel, dtype=np.float32) + coord[valid] = depth[valid] * (np.linalg.inv(K) @ pixel[valid].T).T # coord_camera + coord[valid] = coord[valid] @ T[:3, :3].T + T[:3, 3] # column then row + + pixel = pixel[valid] + coord = coord[valid] + if coord.shape[0] == 0: + return None + pixel = pixel[:, :2] + coord_dict = {"pixel": pixel, "coord": coord} + return coord_dict + + +def correspondenceSave(mesh, scene_dir, coords_gt, output_dir, img_size): + os.makedirs(output_dir, exist_ok=True) + scene_dir = Path(scene_dir) + index_gt = [ + img_name.split(".")[0] + for img_name in os.listdir(str(scene_dir / "color")) + if img_name.endswith(".png") + ] + index_gt = sorted(index_gt, key=lambda x: int(x)) + + Ks_path = str(scene_dir / "intrinsic" / "intrinsic.npy") + Ts_path = str(scene_dir / "pose") + Ts_files = sorted( + [f for f in os.listdir(Ts_path) if f.endswith(".npy")], + key=lambda x: int(x.split(".")[0]), + ) + + print(f"total pose num:{len(Ts_files)}") + Ts = [] + for Ts_file in Ts_files: + file_path = os.path.join(Ts_path, Ts_file) + Ts_ = np.load(file_path) + Ts.append(Ts_) + Ts = np.stack(Ts) + Ks = np.load(Ks_path) + + Ks = np.tile(Ks, (Ts.shape[0], 1, 1)) + Ks = Ks[:, :3, :3] + coords_gt_ = coords_gt + pixels_ = [] + coords_ = [] + + for i, (K, T) in enumerate(zip(Ks, Ts)): + coord_dict = correspondenceGet(mesh, K, T, img_size, coords_gt) + if coord_dict is None: + correspondences = -np.ones((1, 3)) + else: + pixels_ = coord_dict["pixel"] + coords_ = coord_dict["coord"] + tree = cKDTree(coords_gt_) + dis, idx = tree.query(coords_, k=1) + idx_valid = idx[dis < 0.01] + pixels_valid = pixels_[dis < 0.01] + correspondences = np.hstack((pixels_valid, idx_valid.reshape(-1, 1))) + np.save(Path(output_dir) / "{}.npy".format(index_gt[i]), correspondences) + + +def handle_process( + scene_path, + output_path, + pointclouds_root, + labels_pd, + train_scenes, + val_scenes, + frame_gap=75, + parse_pointclouds=True, + parse_normals=True, + export_depth_images=True, +): + pc_output_path = output_path + im_output_path = os.path.join(output_path, "images") + scene_id = os.path.basename(scene_path) + mesh_path = os.path.join(scene_path, f"{scene_id}{CLOUD_FILE_PFIX}.ply") + segments_file = os.path.join( + scene_path, f"{scene_id}{CLOUD_FILE_PFIX}{SEGMENTS_FILE_PFIX}" + ) + aggregations_file = os.path.join(scene_path, f"{scene_id}{AGGREGATIONS_FILE_PFIX}") + + if scene_id in train_scenes: + pc_output_path = os.path.join(pc_output_path, "train", f"{scene_id}") + pc_input_path = os.path.join(pointclouds_root, "train", f"{scene_id}") + im_output_path = os.path.join(im_output_path, "train", f"{scene_id}") + split_name = "train" + elif scene_id in val_scenes: + pc_output_path = os.path.join(pc_output_path, "val", f"{scene_id}") + pc_input_path = os.path.join(pointclouds_root, "val", f"{scene_id}") + im_output_path = os.path.join(im_output_path, "val", f"{scene_id}") + split_name = "val" + else: + pc_output_path = os.path.join(pc_output_path, "test", f"{scene_id}") + pc_input_path = os.path.join(pointclouds_root, "test", f"{scene_id}") + im_output_path = os.path.join(im_output_path, "test", f"{scene_id}") + split_name = "test" + + print(f"Processing: {scene_id} in {split_name}") + + if parse_pointclouds: + vertices, faces = read_plymesh(mesh_path) + coords = vertices[:, :3] + colors = vertices[:, 3:6] + save_dict = dict( + coord=coords.astype(np.float32), + color=colors.astype(np.uint8), + ) + # Parse Normals + if parse_normals: + save_dict["normal"] = vertex_normal(coords, faces).astype(np.float32) + + # Load segments file + if split_name != "test": + with open(segments_file) as f: + segments = json.load(f) + seg_indices = np.array(segments["segIndices"]) + + # Load Aggregations file + with open(aggregations_file) as f: + aggregation = json.load(f) + seg_groups = np.array(aggregation["segGroups"]) + + # Generate new labels + semantic_gt20 = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX + semantic_gt200 = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX + instance_ids = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX + for group in seg_groups: + point_idx, label_id20, label_id200 = point_indices_from_group( + seg_indices, group, labels_pd + ) + + semantic_gt20[point_idx] = label_id20 + semantic_gt200[point_idx] = label_id200 + instance_ids[point_idx] = group["id"] + + semantic_gt20 = semantic_gt20.astype(int) + semantic_gt200 = semantic_gt200.astype(int) + instance_ids = instance_ids.astype(int) + + save_dict["segment20"] = semantic_gt20 + save_dict["segment200"] = semantic_gt200 + save_dict["instance"] = instance_ids + + # Concatenate with original cloud + processed_vertices = np.hstack((semantic_gt200, instance_ids)) + + if np.any(np.isnan(processed_vertices)) or not np.all( + np.isfinite(processed_vertices) + ): + raise ValueError(f"Find NaN in Scene: {scene_id}") + + # Save pointcloud data + os.makedirs(pc_output_path, exist_ok=True) + for key in save_dict.keys(): + np.save(os.path.join(pc_output_path, f"{key}.npy"), save_dict[key]) + else: + coords = np.load(os.path.join(pc_input_path, "coord.npy")) + save_dict = dict( + coord=coords.astype(np.float32), + ) + + # Save img data + os.makedirs(im_output_path, exist_ok=True) + sens_dir = os.path.join(scene_path, scene_id + ".sens") + print(f"Parsing sens data{sens_dir}") + h, w = reader( + sens_dir, + im_output_path, + frame_gap, + export_color_images=True, + export_depth_images=export_depth_images, + export_poses=True, + export_intrinsics=True, + ) + mesh = o3d.io.read_triangle_mesh(mesh_path) + correspondenceSave( + mesh, + im_output_path, + save_dict["coord"], + os.path.join(im_output_path, "correspondence"), + (h, w), + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the ScanNet dataset containing scene folders", + ) + parser.add_argument( + "--output_root", + required=True, + help="Output path where train/val folders will be located", + ) + parser.add_argument( + "--pointclouds_root", + default="data/scannet", + type=str, + help="Input path where previous pointclouds folder located", + ) + parser.add_argument( + "--frame_gap", default=75, type=int, help="Frame gap for processing" + ) + parser.add_argument( + "--parse_pointclouds", action="store_true", help="Whether parse point clouds" + ) + parser.add_argument( + "--parse_normals", action="store_true", help="Whether parse point normals" + ) + parser.add_argument( + "--parse_depths", action="store_true", help="Whether parse depths" + ) + parser.add_argument( + "--num_workers", + default=mp.cpu_count(), + type=int, + help="Num workers for preprocessing.", + ) + parser.add_argument( + "--thread_id", + default=0, + type=int, + help="Thread id for parallel processing", + ) + config = parser.parse_args() + meta_root = Path("pointcept/datasets/preprocessing/scannet") / "meta_data" + + # Load label map + labels_pd = pd.read_csv( + meta_root / "scannetv2-labels.combined.tsv", + sep="\t", + header=0, + ) + + # Load train/val splits + with open(meta_root / "scannetv2_train.txt") as train_file: + train_scenes = train_file.read().splitlines() + with open(meta_root / "scannetv2_val.txt") as val_file: + val_scenes = val_file.read().splitlines() + + # Load scene paths + scene_paths = sorted(glob.glob(config.dataset_root + "/scans*/scene*")) + scene_paths_list = np.array_split(scene_paths, config.num_workers) + scene_paths_ = scene_paths_list[config.thread_id] + # Preprocess data. + print("Processing scenes...") + for scene_paths_i in scene_paths_: + handle_process( + scene_paths_i, + config.output_root, + config.pointclouds_root, + labels_pd, + train_scenes, + val_scenes, + config.frame_gap, + config.parse_pointclouds, + config.parse_normals, + config.parse_depths, + ) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.sh b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.sh new file mode 100644 index 0000000..00b2112 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +dataset_root="" +output_root="" +num_workers=16 +parse_depths=false +parse_pointclouds=false + +while getopts "d:o:n:pc" opt; do + case $opt in + d) dataset_root=$OPTARG ;; + o) output_root=$OPTARG ;; + n) num_workers=$OPTARG ;; + p) parse_depths=true ;; + c) parse_pointclouds=true ;; + *) echo "Usage: $0 -d -o [-n ] [-p] [-c]"; exit 1 ;; + esac +done + +if [ -z "$dataset_root" ] || [ -z "$output_root" ]; then + echo "Usage: $0 -d -o [-n ] [-p] [-c]" + exit 1 +fi + +for i in $(seq 0 $((num_workers - 1))); do + cmd="python pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.py --thread_id $i \ + --num_workers $num_workers \ + --dataset_root $dataset_root \ + --output_root $output_root \ + --parse_normal" + if $parse_depths; then + cmd="$cmd --parse_depths" + fi + + if $parse_pointclouds; then + cmd="$cmd --parse_pointclouds" + fi + + eval "$cmd &" +done + +wait diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/splits.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/splits.py new file mode 100644 index 0000000..5d4e5c6 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/splits.py @@ -0,0 +1,62 @@ +import os +import json +import argparse + + +def get_splits_paths(dataset_path): + # Get the names of all subfolders in the given folder + im_path = os.path.join(dataset_path, "images") + pc_path = dataset_path + splits = ["train", "val", "test"] + split_path = os.path.join(dataset_path, "splits") + os.makedirs(split_path, exist_ok=True) + for split in splits: + im_split_path = os.path.join(im_path, split) + pc_split_path = os.path.join(pc_path, split).replace( + dataset_path, "data/scannet" + ) + split_names = [f.name for f in os.scandir(im_split_path) if f.is_dir()] + split_dict = {} + for name in split_names: + im_split_name_path = os.path.join(im_split_path, name, "color") + co_split_name_path = os.path.join(im_split_path, name, "correspondence") + png_files = [ + f for f in os.listdir(im_split_name_path) if f.endswith(".png") + ] + png_files = sorted(png_files, key=lambda x: int(x.split(".")[0])) + # Get the full paths of the .png files + png_file_paths = [ + os.path.join(im_split_name_path, f).replace( + dataset_path, "data/scannet" + ) + for f in png_files + ] + co_file_paths = [ + os.path.join(co_split_name_path, f.replace(".png", ".npy")).replace( + dataset_path, "data/scannet" + ) + for f in png_files + ] + for i in range(0, len(png_file_paths), 4): + split_dict[f"{name}_{i//4}"] = {} + split_dict[f"{name}_{i//4}"]["pointclouds"] = os.path.join( + pc_split_path, name + ) + split_dict[f"{name}_{i//4}"]["images"] = png_file_paths[i : i + 4] + split_dict[f"{name}_{i//4}"]["correspondences"] = co_file_paths[ + i : i + 4 + ] + with open(os.path.join(split_path, f"{split}.json"), "w") as f: + json.dump(split_dict, f, indent=4) + + +# Example usage +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the ScanNet dataset containing scene folders", + ) + config = parser.parse_args() + get_splits_paths(config.dataset_root) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/sampling_chunking_data.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/sampling_chunking_data.py new file mode 100644 index 0000000..96536d4 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/sampling_chunking_data.py @@ -0,0 +1,149 @@ +""" +Chunking Data + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import argparse +import numpy as np +import multiprocessing as mp +from concurrent.futures import ProcessPoolExecutor +from itertools import repeat +from pathlib import Path + + +def chunking_scene( + name, + dataset_root, + split, + grid_size=None, + chunk_range=(6, 6), + chunk_stride=(3, 3), + chunk_minimum_size=10000, +): + print(f"Chunking scene {name} in {split} split") + dataset_root = Path(dataset_root) + scene_path = dataset_root / split / name + assets = os.listdir(scene_path) + data_dict = dict() + for asset in assets: + if not asset.endswith(".npy"): + continue + data_dict[asset[:-4]] = np.load(scene_path / asset) + coord = data_dict["coord"] - data_dict["coord"].min(axis=0) + + if grid_size is not None: + grid_coord = np.floor(coord / grid_size).astype(int) + _, idx = np.unique(grid_coord, axis=0, return_index=True) + coord = coord[idx] + for key in data_dict.keys(): + data_dict[key] = data_dict[key][idx] + + bev_range = coord.max(axis=0)[:2] + x, y = np.meshgrid( + np.arange(0, bev_range[0] + chunk_stride[0] - chunk_range[0], chunk_stride[0]), + np.arange(0, bev_range[0] + chunk_stride[0] - chunk_range[0], chunk_stride[0]), + indexing="ij", + ) + chunks = np.concatenate([x.reshape([-1, 1]), y.reshape([-1, 1])], axis=-1) + chunk_idx = 0 + for chunk in chunks: + mask = ( + (coord[:, 0] >= chunk[0]) + & (coord[:, 0] < chunk[0] + chunk_range[0]) + & (coord[:, 1] >= chunk[1]) + & (coord[:, 1] < chunk[1] + chunk_range[1]) + ) + if np.sum(mask) < chunk_minimum_size: + continue + + chunk_data_name = f"{name}_{chunk_idx}" + if grid_size is not None: + chunk_split_name = ( + f"{split}_" + f"grid{grid_size * 100:.0f}mm_" + f"chunk{chunk_range[0]}x{chunk_range[1]}_" + f"stride{chunk_stride[0]}x{chunk_stride[1]}" + ) + else: + chunk_split_name = ( + f"{split}_" + f"chunk{chunk_range[0]}x{chunk_range[1]}_" + f"stride{chunk_stride[0]}x{chunk_stride[1]}" + ) + + chunk_save_path = dataset_root / chunk_split_name / chunk_data_name + chunk_save_path.mkdir(parents=True, exist_ok=True) + for key in data_dict.keys(): + np.save(chunk_save_path / f"{key}.npy", data_dict[key][mask]) + chunk_idx += 1 + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the Pointcept processed ScanNet++ dataset.", + ) + parser.add_argument( + "--split", + required=True, + default="train", + type=str, + help="Split need to process.", + ) + parser.add_argument( + "--grid_size", + default=None, + type=float, + help="Grid size for initial grid sampling", + ) + parser.add_argument( + "--chunk_range", + default=[6, 6], + type=int, + nargs="+", + help="Range of each chunk, e.g. --chunk_range 6 6", + ) + parser.add_argument( + "--chunk_stride", + default=[3, 3], + type=int, + nargs="+", + help="Stride of each chunk, e.g. --chunk_stride 3 3", + ) + parser.add_argument( + "--chunk_minimum_size", + default=10000, + type=int, + help="Minimum number of points in each chunk", + ) + parser.add_argument( + "--num_workers", + default=mp.cpu_count(), + type=int, + help="Num workers for preprocessing.", + ) + + config = parser.parse_args() + config.dataset_root = Path(config.dataset_root) + data_list = os.listdir(config.dataset_root / config.split) + + print("Processing scenes...") + pool = ProcessPoolExecutor(max_workers=config.num_workers) + _ = list( + pool.map( + chunking_scene, + data_list, + repeat(config.dataset_root), + repeat(config.split), + repeat(config.grid_size), + repeat(config.chunk_range), + repeat(config.chunk_stride), + repeat(config.chunk_minimum_size), + ) + ) + pool.shutdown() diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/prepare_scene_list.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/prepare_scene_list.py new file mode 100644 index 0000000..7b43f15 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/prepare_scene_list.py @@ -0,0 +1,27 @@ +import os +import argparse +from pathlib import Path + +import numpy as np + +if __name__ == "__main__": + num_train_list = 12 + num_val_list = 3 + meta_root = Path(os.path.dirname(__file__)).parent / "meta_data" + + # Load train/val splits + train_scenes = np.loadtxt(meta_root / "scannetv2_train.txt", dtype=str) + val_scenes = np.loadtxt(meta_root / "scannetv2_val.txt", dtype=str) + + for i in range(num_train_list): + np.savetxt( + meta_root / f"scannetv2_train_{i}.txt", + train_scenes[i::num_train_list], + fmt="%s", + ) + for i in range(num_val_list): + np.savetxt( + meta_root / f"scannetv2_val_{i}.txt", + val_scenes[i::num_val_list], + fmt="%s", + ) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/preprocess_dino_feature.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/preprocess_dino_feature.py new file mode 100644 index 0000000..1eb9541 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/preprocess_dino_feature.py @@ -0,0 +1,362 @@ +import os +import argparse +import einops +import torch +import torch.nn.functional as F +import torchvision +import tqdm +import cv2 +import camtools as ct +import open3d as o3d +import zlib +import imageio +import struct +import numpy as np +import torch_scatter +from pathlib import Path + + +class RGBDFrame: + def __init__(self, file_handle): + self.camera_to_world = np.asarray( + struct.unpack("f" * 16, file_handle.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.timestamp_color = struct.unpack("Q", file_handle.read(8))[0] + self.timestamp_depth = struct.unpack("Q", file_handle.read(8))[0] + self.color_size_bytes = struct.unpack("Q", file_handle.read(8))[0] + self.depth_size_bytes = struct.unpack("Q", file_handle.read(8))[0] + self.color_data = b"".join( + struct.unpack( + "c" * self.color_size_bytes, file_handle.read(self.color_size_bytes) + ) + ) + self.depth_data = b"".join( + struct.unpack( + "c" * self.depth_size_bytes, file_handle.read(self.depth_size_bytes) + ) + ) + + def decompress_depth(self, compression_type): + if compression_type == "zlib_ushort": + return self.decompress_depth_zlib() + else: + raise + + def decompress_depth_zlib(self): + return zlib.decompress(self.depth_data) + + def decompress_color(self, compression_type): + if compression_type == "jpeg": + return self.decompress_color_jpeg() + else: + raise + + def decompress_color_jpeg(self): + return imageio.v2.imread(self.color_data) + + +class SensorData: + COMPRESSION_TYPE_COLOR = { + -1: "unknown", + 0: "raw", + 1: "png", + 2: "jpeg", + } + COMPRESSION_TYPE_DEPTH = { + -1: "unknown", + 0: "raw_ushort", + 1: "zlib_ushort", + 2: "occi_ushort", + } + + def __init__(self, filename): + self.version = 4 + f = open(filename, "rb") + version = struct.unpack("I", f.read(4))[0] + assert self.version == version + strlen = struct.unpack("Q", f.read(8))[0] + self.sensor_name = b"".join(struct.unpack("c" * strlen, f.read(strlen))) + self.intrinsic_color = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.extrinsic_color = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.intrinsic_depth = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.extrinsic_depth = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.color_compression_type = self.COMPRESSION_TYPE_COLOR[ + struct.unpack("i", f.read(4))[0] + ] + self.depth_compression_type = self.COMPRESSION_TYPE_DEPTH[ + struct.unpack("i", f.read(4))[0] + ] + self.color_width = struct.unpack("I", f.read(4))[0] + self.color_height = struct.unpack("I", f.read(4))[0] + self.depth_width = struct.unpack("I", f.read(4))[0] + self.depth_height = struct.unpack("I", f.read(4))[0] + self.depth_shift = struct.unpack("f", f.read(4))[0] + self.num_frames = struct.unpack("Q", f.read(8))[0] + self.file_handle = f + + def export( + self, + frame_skip=20, + export_color=True, + export_depth=True, + export_pose=True, + ): + for i in range(self.num_frames): + if i % frame_skip != 0: + self.file_handle.seek(16 * 4 + 8 + 8, 1) # skip pose, timestamp + color_size_bytes = struct.unpack("Q", self.file_handle.read(8))[0] + depth_size_bytes = struct.unpack("Q", self.file_handle.read(8))[0] + self.file_handle.seek(color_size_bytes + depth_size_bytes, 1) + continue + else: + frame = RGBDFrame(self.file_handle) + data_dict = {} + if export_color: + color = frame.decompress_color(self.color_compression_type) + data_dict["color"] = color + if export_depth: + depth = frame.decompress_depth(self.depth_compression_type) + depth = np.frombuffer(depth, dtype=np.uint16).reshape( + self.depth_height, self.depth_width + ) + data_dict["depth"] = depth + if export_pose: + pose = frame.camera_to_world + data_dict["pose"] = pose + yield data_dict + + def __del__(self): + self.file_handle.close() + + +def ray_distance_to_z_depth(ray_depth, K): + height, width = ray_depth.shape + + u = np.arange(width) + v = np.arange(height) + u_grid, v_grid = np.meshgrid(u, v) + + fx = K[0, 0] + fy = K[1, 1] + cx = K[0, 2] + cy = K[1, 2] + + u_norm = (u_grid - cx) / fx + v_norm = (v_grid - cy) / fy + + norm_square = u_norm**2 + v_norm**2 + + z_depth = ray_depth / np.sqrt(norm_square + 1) + return z_depth + + +def center_crop(image, crop_ratio=1.0, patch_size=None): + if len(image.shape) == 2: + height, width = image.shape + elif len(image.shape) == 3: + height, width, _ = image.shape + else: + raise ValueError("Invalid image shape") + if patch_size is not None: + crop_h = int(height * crop_ratio // patch_size * patch_size) + crop_w = int(width * crop_ratio // patch_size * patch_size) + else: + crop_h = int(height * crop_ratio) + crop_w = int(width * crop_ratio) + + # Calculate the cropping box + start_h = (height - crop_h) // 2 + start_w = (width - crop_w) // 2 + + # Perform the center crop + cropped_image = image[start_h : start_h + crop_h, start_w : start_w + crop_w] + + return cropped_image + + +def parsing_scene( + scene_path, + output_root, + split, + model, + frame_skip=20, + grid_size=0.08, + crop_ratio=0.95, + device="cuda", +): + print(f"Parsing scene: {scene_path.name}") + device = torch.device(device) + scene_path = Path(scene_path) + sensor_reader = SensorData(scene_path / f"{scene_path.name}.sens") + mesh = o3d.io.read_triangle_mesh( + str(scene_path / f"{scene_path.name}_vh_clean_2.ply") + ) + transform = torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize( + mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225) + ), + ] + ) + scene_coord = [] + scene_feat = [] + scene_count = [] + for data in tqdm.tqdm( + sensor_reader.export(frame_skip=frame_skip), + total=sensor_reader.num_frames // frame_skip, + ): + height, width = data["depth"].shape + K = sensor_reader.intrinsic_depth[:3, :3] + T = data["pose"] + if np.isnan(T).any() or np.isinf(T).any(): + continue + depth = ct.raycast.mesh_to_depth( + mesh=mesh, K=K, T=np.linalg.inv(T), height=height, width=width + ) + depth = ray_distance_to_z_depth(depth, K) + depth = center_crop(depth, crop_ratio, model.patch_size) + height_, width_ = depth.shape + pixel = np.transpose(np.indices((width_, height_)), (2, 1, 0)) + pixel = pixel.reshape((-1, 2)) + pixel = np.hstack((pixel, np.ones((pixel.shape[0], 1)))) + depth = depth.reshape((-1, 1)) + valid = ~np.isinf(depth).squeeze(-1) + coord = depth[valid] * (np.linalg.inv(K) @ pixel[valid].T).T # coord_camera + coord = coord @ T[:3, :3].T + T[:3, 3] + + color = cv2.resize( + data["color"], (width, height), interpolation=cv2.INTER_LINEAR + ) + color = center_crop(color, crop_ratio, model.patch_size) + with torch.inference_mode(): + color_t = transform(color).unsqueeze(0).to(device) + feat_t = model.forward_features(color_t)["x_norm_patchtokens"] + feat_t = einops.rearrange( + feat_t, "1 (h w) c -> 1 c h w", w=width_ // model.patch_size + ) + feat_t = F.interpolate(feat_t, (height_, width_), mode="bilinear") + feat_t = einops.rearrange(feat_t, "1 c h w -> (h w) c")[valid] + coord_t = torch.tensor(coord, dtype=torch.float32).to(device) + scene_coord.append(coord_t) + scene_feat.append(feat_t) + scene_count.append( + torch.ones(coord_t.shape[0], dtype=torch.long, device=device) + ) + scene_coord = torch.concatenate(scene_coord, dim=0) + scene_feat = torch.concatenate(scene_feat, dim=0) + scene_count = torch.concatenate(scene_count, dim=0) + + # grid sampling + grid_coord = torch.floor_divide(scene_coord, grid_size).to(torch.int32) + grid_coord, cluster = torch.unique( + grid_coord, sorted=True, return_inverse=True, dim=0 + ) + scene_coord = [ + torch_scatter.scatter(scene_coord, cluster, reduce="mean", dim=0) + ] + scene_feat = [ + torch_scatter.scatter(scene_feat, cluster, reduce="sum", dim=0) + ] + scene_count = [ + torch_scatter.scatter(scene_count, cluster, reduce="sum", dim=0) + ] + + # color = color.reshape((-1, 3))[valid] + # pcd = o3d.geometry.PointCloud() + # pcd.points = o3d.utility.Vector3dVector(coord) + # pcd.colors = o3d.utility.Vector3dVector(color / 255) + # o3d.visualization.draw_geometries([pcd]) + + scene_coord = scene_coord[0] + scene_feat = scene_feat[0] / scene_count[0].unsqueeze(-1) + + scene_coord = scene_coord.half().cpu().numpy() + scene_feat = scene_feat.half().cpu().numpy() + np.savez( + Path(output_root) / split / f"{scene_path.name}.npz", + coord=scene_coord, + feat=scene_feat, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the ScanNet dataset containing scene folders", + ) + parser.add_argument( + "--output_root", + required=True, + help="Output path where train/val folders will be located", + ) + parser.add_argument( + "--scene_list", + required=True, + help="Path to scene list need to process", + ) + parser.add_argument( + "--frame_skip", + default=10, + help="Frame skip for processing", + ) + parser.add_argument( + "--grid_size", + default=0.08, + help="Grid size for sampling", + ) + parser.add_argument( + "--crop_ratio", + default=0.95, + help="Crop ratio for center crop", + ) + + args = parser.parse_args() + scene_list = np.loadtxt(args.scene_list, dtype=str) + if "train" in args.scene_list: + split = "train" + folder = "scans" + elif "val" in args.scene_list: + split = "val" + folder = "scans" + else: + split = "test" + folder = "scans_test" + + os.makedirs(Path(args.output_root) / split, exist_ok=True) + + device = torch.device("cuda") + model = torch.hub.load("facebookresearch/dinov2", "dinov2_vitg14").to(device) + model.eval() + for scene in scene_list: + parsing_scene( + scene_path=Path(args.dataset_root) / folder / scene, + output_root=args.output_root, + split=split, + frame_skip=args.frame_skip, + grid_size=args.grid_size, + crop_ratio=args.crop_ratio, + model=model, + device="cuda", + ) + + # parsing_scene( + # scene_path=Path("/mnt/e/datasets/raw/scannet/scans/scene0230_00"), + # output_root=args.output_root, + # split=split, + # frame_skip=args.frame_skip, + # grid_size=args.grid_size, + # crop_ratio=args.crop_ratio, + # model=model, + # device="cuda", + # ) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/extract_partition.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/extract_partition.py new file mode 100644 index 0000000..432a490 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/extract_partition.py @@ -0,0 +1,71 @@ +import json +import shutil +import argparse +import torch +import glob +import os.path + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the ScanNet dataset containing scene folders", + ) + parser.add_argument( + "--processed_root", + required=True, + help="Path to the processed ScanNet dataset, add partition to test data dict", + ) + parser.add_argument( + "--segmentor_root", + required=True, + help="Path to Felzenswalb and Huttenlocher's Graph Based Image Segmentation binary", + ) + parser.add_argument( + "--split", + default="test", + choices=["test", "val"], + help="Split to process. [test / val]", + ) + config = parser.parse_args() + if config.split == "test": + raw_split = "scans_test" + else: + raw_split = "scans" + + scene_list = glob.glob(os.path.join(config.processed_root, config.split, "*.pth")) + os.makedirs(os.path.join(config.processed_root, "tmp"), exist_ok=True) + + for scene in scene_list: + scene_name = os.path.basename(scene).split(".")[0] + raw_scene = os.path.join( + config.dataset_root, + raw_split, + scene_name, + f"{scene_name}_vh_clean_2.ply", + ) + tmp_scene = os.path.join( + config.processed_root, + "tmp", + f"{scene_name}_vh_clean_2.ply", + ) + # copy original scene to tmp folder + shutil.copy(raw_scene, tmp_scene) + # run segmentor + process = os.popen(f"{config.segmentor_root} {tmp_scene}") + print(process.read()) + process.close() + # load partition file + partition_file = tmp_scene.replace(".ply", ".0.010000.segs.json") + with open(partition_file) as f: + partition = json.load(f)["segIndices"] + data_dict = torch.load(scene) + data_dict["partition"] = partition + torch.save(data_dict, scene) + # clean tmp + os.remove(partition_file) + os.remove(tmp_scene) + print(f"Adding partition information to {scene_name}") + + os.rmdir(os.path.join(config.processed_root, "tmp")) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/classes_ObjClassification-ShapeNetCore55.txt b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/classes_ObjClassification-ShapeNetCore55.txt new file mode 100644 index 0000000..e53f5bc --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/classes_ObjClassification-ShapeNetCore55.txt @@ -0,0 +1,17 @@ +1 trash +3 basket +4 bathtub +5 bed +9 shelf +13 cabinet +18 chair +20 keyboard +22 tv +30 lamp +31 laptop +35 microwave +39 pillow +42 printer +47 sofa +48 stove +49 table diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/classes_SemVoxLabel-nyu40id.txt b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/classes_SemVoxLabel-nyu40id.txt new file mode 100644 index 0000000..48e2287 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/classes_SemVoxLabel-nyu40id.txt @@ -0,0 +1,20 @@ +1 wall +2 floor +3 cabinet +4 bed +5 chair +6 sofa +7 table +8 door +9 window +10 bookshelf +11 picture +12 counter +14 desk +16 curtain +24 refridgerator +28 shower curtain +33 toilet +34 sink +36 bathtub +39 otherfurniture \ No newline at end of file diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_constants.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_constants.py new file mode 100644 index 0000000..0404fd6 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_constants.py @@ -0,0 +1,704 @@ +# ScanNet Benchmark constants +VALID_CLASS_IDS_20 = ( + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 14, + 16, + 24, + 28, + 33, + 34, + 36, + 39, +) + +CLASS_LABELS_20 = ( + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refrigerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", +) + +SCANNET_COLOR_MAP_20 = { + 0: (0.0, 0.0, 0.0), + 1: (174.0, 199.0, 232.0), + 2: (152.0, 223.0, 138.0), + 3: (31.0, 119.0, 180.0), + 4: (255.0, 187.0, 120.0), + 5: (188.0, 189.0, 34.0), + 6: (140.0, 86.0, 75.0), + 7: (255.0, 152.0, 150.0), + 8: (214.0, 39.0, 40.0), + 9: (197.0, 176.0, 213.0), + 10: (148.0, 103.0, 189.0), + 11: (196.0, 156.0, 148.0), + 12: (23.0, 190.0, 207.0), + 14: (247.0, 182.0, 210.0), + 15: (66.0, 188.0, 102.0), + 16: (219.0, 219.0, 141.0), + 17: (140.0, 57.0, 197.0), + 18: (202.0, 185.0, 52.0), + 19: (51.0, 176.0, 203.0), + 20: (200.0, 54.0, 131.0), + 21: (92.0, 193.0, 61.0), + 22: (78.0, 71.0, 183.0), + 23: (172.0, 114.0, 82.0), + 24: (255.0, 127.0, 14.0), + 25: (91.0, 163.0, 138.0), + 26: (153.0, 98.0, 156.0), + 27: (140.0, 153.0, 101.0), + 28: (158.0, 218.0, 229.0), + 29: (100.0, 125.0, 154.0), + 30: (178.0, 127.0, 135.0), + 32: (146.0, 111.0, 194.0), + 33: (44.0, 160.0, 44.0), + 34: (112.0, 128.0, 144.0), + 35: (96.0, 207.0, 209.0), + 36: (227.0, 119.0, 194.0), + 37: (213.0, 92.0, 176.0), + 38: (94.0, 106.0, 211.0), + 39: (82.0, 84.0, 163.0), + 40: (100.0, 85.0, 144.0), +} + +# ScanNet200 Benchmark constants +VALID_CLASS_IDS_200 = ( + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 21, + 22, + 23, + 24, + 26, + 27, + 28, + 29, + 31, + 32, + 33, + 34, + 35, + 36, + 38, + 39, + 40, + 41, + 42, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 54, + 55, + 56, + 57, + 58, + 59, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 82, + 84, + 86, + 87, + 88, + 89, + 90, + 93, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 110, + 112, + 115, + 116, + 118, + 120, + 121, + 122, + 125, + 128, + 130, + 131, + 132, + 134, + 136, + 138, + 139, + 140, + 141, + 145, + 148, + 154, + 155, + 156, + 157, + 159, + 161, + 163, + 165, + 166, + 168, + 169, + 170, + 177, + 180, + 185, + 188, + 191, + 193, + 195, + 202, + 208, + 213, + 214, + 221, + 229, + 230, + 232, + 233, + 242, + 250, + 261, + 264, + 276, + 283, + 286, + 300, + 304, + 312, + 323, + 325, + 331, + 342, + 356, + 370, + 392, + 395, + 399, + 408, + 417, + 488, + 540, + 562, + 570, + 572, + 581, + 609, + 748, + 776, + 1156, + 1163, + 1164, + 1165, + 1166, + 1167, + 1168, + 1169, + 1170, + 1171, + 1172, + 1173, + 1174, + 1175, + 1176, + 1178, + 1179, + 1180, + 1181, + 1182, + 1183, + 1184, + 1185, + 1186, + 1187, + 1188, + 1189, + 1190, + 1191, +) + +CLASS_LABELS_200 = ( + "wall", + "chair", + "floor", + "table", + "door", + "couch", + "cabinet", + "shelf", + "desk", + "office chair", + "bed", + "pillow", + "sink", + "picture", + "window", + "toilet", + "bookshelf", + "monitor", + "curtain", + "book", + "armchair", + "coffee table", + "box", + "refrigerator", + "lamp", + "kitchen cabinet", + "towel", + "clothes", + "tv", + "nightstand", + "counter", + "dresser", + "stool", + "cushion", + "plant", + "ceiling", + "bathtub", + "end table", + "dining table", + "keyboard", + "bag", + "backpack", + "toilet paper", + "printer", + "tv stand", + "whiteboard", + "blanket", + "shower curtain", + "trash can", + "closet", + "stairs", + "microwave", + "stove", + "shoe", + "computer tower", + "bottle", + "bin", + "ottoman", + "bench", + "board", + "washing machine", + "mirror", + "copier", + "basket", + "sofa chair", + "file cabinet", + "fan", + "laptop", + "shower", + "paper", + "person", + "paper towel dispenser", + "oven", + "blinds", + "rack", + "plate", + "blackboard", + "piano", + "suitcase", + "rail", + "radiator", + "recycling bin", + "container", + "wardrobe", + "soap dispenser", + "telephone", + "bucket", + "clock", + "stand", + "light", + "laundry basket", + "pipe", + "clothes dryer", + "guitar", + "toilet paper holder", + "seat", + "speaker", + "column", + "bicycle", + "ladder", + "bathroom stall", + "shower wall", + "cup", + "jacket", + "storage bin", + "coffee maker", + "dishwasher", + "paper towel roll", + "machine", + "mat", + "windowsill", + "bar", + "toaster", + "bulletin board", + "ironing board", + "fireplace", + "soap dish", + "kitchen counter", + "doorframe", + "toilet paper dispenser", + "mini fridge", + "fire extinguisher", + "ball", + "hat", + "shower curtain rod", + "water cooler", + "paper cutter", + "tray", + "shower door", + "pillar", + "ledge", + "toaster oven", + "mouse", + "toilet seat cover dispenser", + "furniture", + "cart", + "storage container", + "scale", + "tissue box", + "light switch", + "crate", + "power outlet", + "decoration", + "sign", + "projector", + "closet door", + "vacuum cleaner", + "candle", + "plunger", + "stuffed animal", + "headphones", + "dish rack", + "broom", + "guitar case", + "range hood", + "dustpan", + "hair dryer", + "water bottle", + "handicap bar", + "purse", + "vent", + "shower floor", + "water pitcher", + "mailbox", + "bowl", + "paper bag", + "alarm clock", + "music stand", + "projector screen", + "divider", + "laundry detergent", + "bathroom counter", + "object", + "bathroom vanity", + "closet wall", + "laundry hamper", + "bathroom stall door", + "ceiling light", + "trash bin", + "dumbbell", + "stair rail", + "tube", + "bathroom cabinet", + "cd case", + "closet rod", + "coffee kettle", + "structure", + "shower head", + "keyboard piano", + "case of water bottles", + "coat rack", + "storage organizer", + "folded chair", + "fire alarm", + "power strip", + "calendar", + "poster", + "potted plant", + "luggage", + "mattress", +) + +SCANNET_COLOR_MAP_200 = { + 0: (0.0, 0.0, 0.0), + 1: (174.0, 199.0, 232.0), + 2: (188.0, 189.0, 34.0), + 3: (152.0, 223.0, 138.0), + 4: (255.0, 152.0, 150.0), + 5: (214.0, 39.0, 40.0), + 6: (91.0, 135.0, 229.0), + 7: (31.0, 119.0, 180.0), + 8: (229.0, 91.0, 104.0), + 9: (247.0, 182.0, 210.0), + 10: (91.0, 229.0, 110.0), + 11: (255.0, 187.0, 120.0), + 13: (141.0, 91.0, 229.0), + 14: (112.0, 128.0, 144.0), + 15: (196.0, 156.0, 148.0), + 16: (197.0, 176.0, 213.0), + 17: (44.0, 160.0, 44.0), + 18: (148.0, 103.0, 189.0), + 19: (229.0, 91.0, 223.0), + 21: (219.0, 219.0, 141.0), + 22: (192.0, 229.0, 91.0), + 23: (88.0, 218.0, 137.0), + 24: (58.0, 98.0, 137.0), + 26: (177.0, 82.0, 239.0), + 27: (255.0, 127.0, 14.0), + 28: (237.0, 204.0, 37.0), + 29: (41.0, 206.0, 32.0), + 31: (62.0, 143.0, 148.0), + 32: (34.0, 14.0, 130.0), + 33: (143.0, 45.0, 115.0), + 34: (137.0, 63.0, 14.0), + 35: (23.0, 190.0, 207.0), + 36: (16.0, 212.0, 139.0), + 38: (90.0, 119.0, 201.0), + 39: (125.0, 30.0, 141.0), + 40: (150.0, 53.0, 56.0), + 41: (186.0, 197.0, 62.0), + 42: (227.0, 119.0, 194.0), + 44: (38.0, 100.0, 128.0), + 45: (120.0, 31.0, 243.0), + 46: (154.0, 59.0, 103.0), + 47: (169.0, 137.0, 78.0), + 48: (143.0, 245.0, 111.0), + 49: (37.0, 230.0, 205.0), + 50: (14.0, 16.0, 155.0), + 51: (196.0, 51.0, 182.0), + 52: (237.0, 80.0, 38.0), + 54: (138.0, 175.0, 62.0), + 55: (158.0, 218.0, 229.0), + 56: (38.0, 96.0, 167.0), + 57: (190.0, 77.0, 246.0), + 58: (208.0, 49.0, 84.0), + 59: (208.0, 193.0, 72.0), + 62: (55.0, 220.0, 57.0), + 63: (10.0, 125.0, 140.0), + 64: (76.0, 38.0, 202.0), + 65: (191.0, 28.0, 135.0), + 66: (211.0, 120.0, 42.0), + 67: (118.0, 174.0, 76.0), + 68: (17.0, 242.0, 171.0), + 69: (20.0, 65.0, 247.0), + 70: (208.0, 61.0, 222.0), + 71: (162.0, 62.0, 60.0), + 72: (210.0, 235.0, 62.0), + 73: (45.0, 152.0, 72.0), + 74: (35.0, 107.0, 149.0), + 75: (160.0, 89.0, 237.0), + 76: (227.0, 56.0, 125.0), + 77: (169.0, 143.0, 81.0), + 78: (42.0, 143.0, 20.0), + 79: (25.0, 160.0, 151.0), + 80: (82.0, 75.0, 227.0), + 82: (253.0, 59.0, 222.0), + 84: (240.0, 130.0, 89.0), + 86: (123.0, 172.0, 47.0), + 87: (71.0, 194.0, 133.0), + 88: (24.0, 94.0, 205.0), + 89: (134.0, 16.0, 179.0), + 90: (159.0, 32.0, 52.0), + 93: (213.0, 208.0, 88.0), + 95: (64.0, 158.0, 70.0), + 96: (18.0, 163.0, 194.0), + 97: (65.0, 29.0, 153.0), + 98: (177.0, 10.0, 109.0), + 99: (152.0, 83.0, 7.0), + 100: (83.0, 175.0, 30.0), + 101: (18.0, 199.0, 153.0), + 102: (61.0, 81.0, 208.0), + 103: (213.0, 85.0, 216.0), + 104: (170.0, 53.0, 42.0), + 105: (161.0, 192.0, 38.0), + 106: (23.0, 241.0, 91.0), + 107: (12.0, 103.0, 170.0), + 110: (151.0, 41.0, 245.0), + 112: (133.0, 51.0, 80.0), + 115: (184.0, 162.0, 91.0), + 116: (50.0, 138.0, 38.0), + 118: (31.0, 237.0, 236.0), + 120: (39.0, 19.0, 208.0), + 121: (223.0, 27.0, 180.0), + 122: (254.0, 141.0, 85.0), + 125: (97.0, 144.0, 39.0), + 128: (106.0, 231.0, 176.0), + 130: (12.0, 61.0, 162.0), + 131: (124.0, 66.0, 140.0), + 132: (137.0, 66.0, 73.0), + 134: (250.0, 253.0, 26.0), + 136: (55.0, 191.0, 73.0), + 138: (60.0, 126.0, 146.0), + 139: (153.0, 108.0, 234.0), + 140: (184.0, 58.0, 125.0), + 141: (135.0, 84.0, 14.0), + 145: (139.0, 248.0, 91.0), + 148: (53.0, 200.0, 172.0), + 154: (63.0, 69.0, 134.0), + 155: (190.0, 75.0, 186.0), + 156: (127.0, 63.0, 52.0), + 157: (141.0, 182.0, 25.0), + 159: (56.0, 144.0, 89.0), + 161: (64.0, 160.0, 250.0), + 163: (182.0, 86.0, 245.0), + 165: (139.0, 18.0, 53.0), + 166: (134.0, 120.0, 54.0), + 168: (49.0, 165.0, 42.0), + 169: (51.0, 128.0, 133.0), + 170: (44.0, 21.0, 163.0), + 177: (232.0, 93.0, 193.0), + 180: (176.0, 102.0, 54.0), + 185: (116.0, 217.0, 17.0), + 188: (54.0, 209.0, 150.0), + 191: (60.0, 99.0, 204.0), + 193: (129.0, 43.0, 144.0), + 195: (252.0, 100.0, 106.0), + 202: (187.0, 196.0, 73.0), + 208: (13.0, 158.0, 40.0), + 213: (52.0, 122.0, 152.0), + 214: (128.0, 76.0, 202.0), + 221: (187.0, 50.0, 115.0), + 229: (180.0, 141.0, 71.0), + 230: (77.0, 208.0, 35.0), + 232: (72.0, 183.0, 168.0), + 233: (97.0, 99.0, 203.0), + 242: (172.0, 22.0, 158.0), + 250: (155.0, 64.0, 40.0), + 261: (118.0, 159.0, 30.0), + 264: (69.0, 252.0, 148.0), + 276: (45.0, 103.0, 173.0), + 283: (111.0, 38.0, 149.0), + 286: (184.0, 9.0, 49.0), + 300: (188.0, 174.0, 67.0), + 304: (53.0, 206.0, 53.0), + 312: (97.0, 235.0, 252.0), + 323: (66.0, 32.0, 182.0), + 325: (236.0, 114.0, 195.0), + 331: (241.0, 154.0, 83.0), + 342: (133.0, 240.0, 52.0), + 356: (16.0, 205.0, 144.0), + 370: (75.0, 101.0, 198.0), + 392: (237.0, 95.0, 251.0), + 395: (191.0, 52.0, 49.0), + 399: (227.0, 254.0, 54.0), + 408: (49.0, 206.0, 87.0), + 417: (48.0, 113.0, 150.0), + 488: (125.0, 73.0, 182.0), + 540: (229.0, 32.0, 114.0), + 562: (158.0, 119.0, 28.0), + 570: (60.0, 205.0, 27.0), + 572: (18.0, 215.0, 201.0), + 581: (79.0, 76.0, 153.0), + 609: (134.0, 13.0, 116.0), + 748: (192.0, 97.0, 63.0), + 776: (108.0, 163.0, 18.0), + 1156: (95.0, 220.0, 156.0), + 1163: (98.0, 141.0, 208.0), + 1164: (144.0, 19.0, 193.0), + 1165: (166.0, 36.0, 57.0), + 1166: (212.0, 202.0, 34.0), + 1167: (23.0, 206.0, 34.0), + 1168: (91.0, 211.0, 236.0), + 1169: (79.0, 55.0, 137.0), + 1170: (182.0, 19.0, 117.0), + 1171: (134.0, 76.0, 14.0), + 1172: (87.0, 185.0, 28.0), + 1173: (82.0, 224.0, 187.0), + 1174: (92.0, 110.0, 214.0), + 1175: (168.0, 80.0, 171.0), + 1176: (197.0, 63.0, 51.0), + 1178: (175.0, 199.0, 77.0), + 1179: (62.0, 180.0, 98.0), + 1180: (8.0, 91.0, 150.0), + 1181: (77.0, 15.0, 130.0), + 1182: (154.0, 65.0, 96.0), + 1183: (197.0, 152.0, 11.0), + 1184: (59.0, 155.0, 45.0), + 1185: (12.0, 147.0, 145.0), + 1186: (54.0, 35.0, 219.0), + 1187: (210.0, 73.0, 181.0), + 1188: (221.0, 124.0, 77.0), + 1189: (149.0, 214.0, 66.0), + 1190: (72.0, 185.0, 134.0), + 1191: (42.0, 94.0, 198.0), +} + +# For instance segmentation the non-object categories +VALID_PANOPTIC_IDS = (1, 3) + +CLASS_LABELS_PANOPTIC = ("wall", "floor") diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_splits.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_splits.py new file mode 100644 index 0000000..39ccc3c --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_splits.py @@ -0,0 +1,625 @@ +# This file contains the HEAD - COMMON - TAIL split category ids for ScanNet 200 + +HEAD_CATS_SCANNET_200 = [ + "tv stand", + "curtain", + "blinds", + "shower curtain", + "bookshelf", + "tv", + "kitchen cabinet", + "pillow", + "lamp", + "dresser", + "monitor", + "object", + "ceiling", + "board", + "stove", + "closet wall", + "couch", + "office chair", + "kitchen counter", + "shower", + "closet", + "doorframe", + "sofa chair", + "mailbox", + "nightstand", + "washing machine", + "picture", + "book", + "sink", + "recycling bin", + "table", + "backpack", + "shower wall", + "toilet", + "copier", + "counter", + "stool", + "refrigerator", + "window", + "file cabinet", + "chair", + "wall", + "plant", + "coffee table", + "stairs", + "armchair", + "cabinet", + "bathroom vanity", + "bathroom stall", + "mirror", + "blackboard", + "trash can", + "stair rail", + "box", + "towel", + "door", + "clothes", + "whiteboard", + "bed", + "floor", + "bathtub", + "desk", + "wardrobe", + "clothes dryer", + "radiator", + "shelf", +] +COMMON_CATS_SCANNET_200 = [ + "cushion", + "end table", + "dining table", + "keyboard", + "bag", + "toilet paper", + "printer", + "blanket", + "microwave", + "shoe", + "computer tower", + "bottle", + "bin", + "ottoman", + "bench", + "basket", + "fan", + "laptop", + "person", + "paper towel dispenser", + "oven", + "rack", + "piano", + "suitcase", + "rail", + "container", + "telephone", + "stand", + "light", + "laundry basket", + "pipe", + "seat", + "column", + "bicycle", + "ladder", + "jacket", + "storage bin", + "coffee maker", + "dishwasher", + "machine", + "mat", + "windowsill", + "bulletin board", + "fireplace", + "mini fridge", + "water cooler", + "shower door", + "pillar", + "ledge", + "furniture", + "cart", + "decoration", + "closet door", + "vacuum cleaner", + "dish rack", + "range hood", + "projector screen", + "divider", + "bathroom counter", + "laundry hamper", + "bathroom stall door", + "ceiling light", + "trash bin", + "bathroom cabinet", + "structure", + "storage organizer", + "potted plant", + "mattress", +] +TAIL_CATS_SCANNET_200 = [ + "paper", + "plate", + "soap dispenser", + "bucket", + "clock", + "guitar", + "toilet paper holder", + "speaker", + "cup", + "paper towel roll", + "bar", + "toaster", + "ironing board", + "soap dish", + "toilet paper dispenser", + "fire extinguisher", + "ball", + "hat", + "shower curtain rod", + "paper cutter", + "tray", + "toaster oven", + "mouse", + "toilet seat cover dispenser", + "storage container", + "scale", + "tissue box", + "light switch", + "crate", + "power outlet", + "sign", + "projector", + "candle", + "plunger", + "stuffed animal", + "headphones", + "broom", + "guitar case", + "dustpan", + "hair dryer", + "water bottle", + "handicap bar", + "purse", + "vent", + "shower floor", + "water pitcher", + "bowl", + "paper bag", + "alarm clock", + "music stand", + "laundry detergent", + "dumbbell", + "tube", + "cd case", + "closet rod", + "coffee kettle", + "shower head", + "keyboard piano", + "case of water bottles", + "coat rack", + "folded chair", + "fire alarm", + "power strip", + "calendar", + "poster", + "luggage", +] + + +# Given the different size of the official train and val sets, not all ScanNet200 categories are present in the validation set. +# Here we list of categories with labels and IDs present in both train and validation set, and the remaining categories those are present in train, but not in val +# We dont evaluate on unseen validation categories in this benchmark + +VALID_CLASS_IDS_200_VALIDATION = ( + "wall", + "chair", + "floor", + "table", + "door", + "couch", + "cabinet", + "shelf", + "desk", + "office chair", + "bed", + "pillow", + "sink", + "picture", + "window", + "toilet", + "bookshelf", + "monitor", + "curtain", + "book", + "armchair", + "coffee table", + "box", + "refrigerator", + "lamp", + "kitchen cabinet", + "towel", + "clothes", + "tv", + "nightstand", + "counter", + "dresser", + "stool", + "cushion", + "plant", + "ceiling", + "bathtub", + "end table", + "dining table", + "keyboard", + "bag", + "backpack", + "toilet paper", + "printer", + "tv stand", + "whiteboard", + "blanket", + "shower curtain", + "trash can", + "closet", + "stairs", + "microwave", + "stove", + "shoe", + "computer tower", + "bottle", + "bin", + "ottoman", + "bench", + "board", + "washing machine", + "mirror", + "copier", + "basket", + "sofa chair", + "file cabinet", + "fan", + "laptop", + "shower", + "paper", + "person", + "paper towel dispenser", + "oven", + "blinds", + "rack", + "plate", + "blackboard", + "piano", + "suitcase", + "rail", + "radiator", + "recycling bin", + "container", + "wardrobe", + "soap dispenser", + "telephone", + "bucket", + "clock", + "stand", + "light", + "laundry basket", + "pipe", + "clothes dryer", + "guitar", + "toilet paper holder", + "seat", + "speaker", + "column", + "ladder", + "bathroom stall", + "shower wall", + "cup", + "jacket", + "storage bin", + "coffee maker", + "dishwasher", + "paper towel roll", + "machine", + "mat", + "windowsill", + "bar", + "toaster", + "bulletin board", + "ironing board", + "fireplace", + "soap dish", + "kitchen counter", + "doorframe", + "toilet paper dispenser", + "mini fridge", + "fire extinguisher", + "ball", + "hat", + "shower curtain rod", + "water cooler", + "paper cutter", + "tray", + "shower door", + "pillar", + "ledge", + "toaster oven", + "mouse", + "toilet seat cover dispenser", + "furniture", + "cart", + "scale", + "tissue box", + "light switch", + "crate", + "power outlet", + "decoration", + "sign", + "projector", + "closet door", + "vacuum cleaner", + "plunger", + "stuffed animal", + "headphones", + "dish rack", + "broom", + "range hood", + "dustpan", + "hair dryer", + "water bottle", + "handicap bar", + "vent", + "shower floor", + "water pitcher", + "mailbox", + "bowl", + "paper bag", + "projector screen", + "divider", + "laundry detergent", + "bathroom counter", + "object", + "bathroom vanity", + "closet wall", + "laundry hamper", + "bathroom stall door", + "ceiling light", + "trash bin", + "dumbbell", + "stair rail", + "tube", + "bathroom cabinet", + "closet rod", + "coffee kettle", + "shower head", + "keyboard piano", + "case of water bottles", + "coat rack", + "folded chair", + "fire alarm", + "power strip", + "calendar", + "poster", + "potted plant", + "mattress", +) + +CLASS_LABELS_200_VALIDATION = ( + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 21, + 22, + 23, + 24, + 26, + 27, + 28, + 29, + 31, + 32, + 33, + 34, + 35, + 36, + 38, + 39, + 40, + 41, + 42, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 54, + 55, + 56, + 57, + 58, + 59, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 82, + 84, + 86, + 87, + 88, + 89, + 90, + 93, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 110, + 112, + 115, + 116, + 118, + 120, + 122, + 125, + 128, + 130, + 131, + 132, + 134, + 136, + 138, + 139, + 140, + 141, + 145, + 148, + 154, + 155, + 156, + 157, + 159, + 161, + 163, + 165, + 166, + 168, + 169, + 170, + 177, + 180, + 185, + 188, + 191, + 193, + 195, + 202, + 208, + 213, + 214, + 229, + 230, + 232, + 233, + 242, + 250, + 261, + 264, + 276, + 283, + 300, + 304, + 312, + 323, + 325, + 342, + 356, + 370, + 392, + 395, + 408, + 417, + 488, + 540, + 562, + 570, + 609, + 748, + 776, + 1156, + 1163, + 1164, + 1165, + 1166, + 1167, + 1168, + 1169, + 1170, + 1171, + 1172, + 1173, + 1175, + 1176, + 1179, + 1180, + 1181, + 1182, + 1184, + 1185, + 1186, + 1187, + 1188, + 1189, + 1191, +) + +VALID_CLASS_IDS_200_TRAIN_ONLY = ( + "bicycle", + "storage container", + "candle", + "guitar case", + "purse", + "alarm clock", + "music stand", + "cd case", + "structure", + "storage organizer", + "luggage", +) + +CLASS_LABELS_200_TRAIN_ONLY = ( + 121, + 221, + 286, + 331, + 399, + 572, + 581, + 1174, + 1178, + 1183, + 1190, +) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet_means.npz b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet_means.npz new file mode 100644 index 0000000..e57647c Binary files /dev/null and b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet_means.npz differ diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_test.txt b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_test.txt new file mode 100644 index 0000000..b9e7d92 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_test.txt @@ -0,0 +1,312 @@ +scene0568_00 +scene0568_01 +scene0568_02 +scene0304_00 +scene0488_00 +scene0488_01 +scene0412_00 +scene0412_01 +scene0217_00 +scene0019_00 +scene0019_01 +scene0414_00 +scene0575_00 +scene0575_01 +scene0575_02 +scene0426_00 +scene0426_01 +scene0426_02 +scene0426_03 +scene0549_00 +scene0549_01 +scene0578_00 +scene0578_01 +scene0578_02 +scene0665_00 +scene0665_01 +scene0050_00 +scene0050_01 +scene0050_02 +scene0257_00 +scene0025_00 +scene0025_01 +scene0025_02 +scene0583_00 +scene0583_01 +scene0583_02 +scene0701_00 +scene0701_01 +scene0701_02 +scene0580_00 +scene0580_01 +scene0565_00 +scene0169_00 +scene0169_01 +scene0655_00 +scene0655_01 +scene0655_02 +scene0063_00 +scene0221_00 +scene0221_01 +scene0591_00 +scene0591_01 +scene0591_02 +scene0678_00 +scene0678_01 +scene0678_02 +scene0462_00 +scene0427_00 +scene0595_00 +scene0193_00 +scene0193_01 +scene0164_00 +scene0164_01 +scene0164_02 +scene0164_03 +scene0598_00 +scene0598_01 +scene0598_02 +scene0599_00 +scene0599_01 +scene0599_02 +scene0328_00 +scene0300_00 +scene0300_01 +scene0354_00 +scene0458_00 +scene0458_01 +scene0423_00 +scene0423_01 +scene0423_02 +scene0307_00 +scene0307_01 +scene0307_02 +scene0606_00 +scene0606_01 +scene0606_02 +scene0432_00 +scene0432_01 +scene0608_00 +scene0608_01 +scene0608_02 +scene0651_00 +scene0651_01 +scene0651_02 +scene0430_00 +scene0430_01 +scene0689_00 +scene0357_00 +scene0357_01 +scene0574_00 +scene0574_01 +scene0574_02 +scene0329_00 +scene0329_01 +scene0329_02 +scene0153_00 +scene0153_01 +scene0616_00 +scene0616_01 +scene0671_00 +scene0671_01 +scene0618_00 +scene0382_00 +scene0382_01 +scene0490_00 +scene0621_00 +scene0607_00 +scene0607_01 +scene0149_00 +scene0695_00 +scene0695_01 +scene0695_02 +scene0695_03 +scene0389_00 +scene0377_00 +scene0377_01 +scene0377_02 +scene0342_00 +scene0139_00 +scene0629_00 +scene0629_01 +scene0629_02 +scene0496_00 +scene0633_00 +scene0633_01 +scene0518_00 +scene0652_00 +scene0406_00 +scene0406_01 +scene0406_02 +scene0144_00 +scene0144_01 +scene0494_00 +scene0278_00 +scene0278_01 +scene0316_00 +scene0609_00 +scene0609_01 +scene0609_02 +scene0609_03 +scene0084_00 +scene0084_01 +scene0084_02 +scene0696_00 +scene0696_01 +scene0696_02 +scene0351_00 +scene0351_01 +scene0643_00 +scene0644_00 +scene0645_00 +scene0645_01 +scene0645_02 +scene0081_00 +scene0081_01 +scene0081_02 +scene0647_00 +scene0647_01 +scene0535_00 +scene0353_00 +scene0353_01 +scene0353_02 +scene0559_00 +scene0559_01 +scene0559_02 +scene0593_00 +scene0593_01 +scene0246_00 +scene0653_00 +scene0653_01 +scene0064_00 +scene0064_01 +scene0356_00 +scene0356_01 +scene0356_02 +scene0030_00 +scene0030_01 +scene0030_02 +scene0222_00 +scene0222_01 +scene0338_00 +scene0338_01 +scene0338_02 +scene0378_00 +scene0378_01 +scene0378_02 +scene0660_00 +scene0553_00 +scene0553_01 +scene0553_02 +scene0527_00 +scene0663_00 +scene0663_01 +scene0663_02 +scene0664_00 +scene0664_01 +scene0664_02 +scene0334_00 +scene0334_01 +scene0334_02 +scene0046_00 +scene0046_01 +scene0046_02 +scene0203_00 +scene0203_01 +scene0203_02 +scene0088_00 +scene0088_01 +scene0088_02 +scene0088_03 +scene0086_00 +scene0086_01 +scene0086_02 +scene0670_00 +scene0670_01 +scene0256_00 +scene0256_01 +scene0256_02 +scene0249_00 +scene0441_00 +scene0658_00 +scene0704_00 +scene0704_01 +scene0187_00 +scene0187_01 +scene0131_00 +scene0131_01 +scene0131_02 +scene0207_00 +scene0207_01 +scene0207_02 +scene0461_00 +scene0011_00 +scene0011_01 +scene0343_00 +scene0251_00 +scene0077_00 +scene0077_01 +scene0684_00 +scene0684_01 +scene0550_00 +scene0686_00 +scene0686_01 +scene0686_02 +scene0208_00 +scene0500_00 +scene0500_01 +scene0552_00 +scene0552_01 +scene0648_00 +scene0648_01 +scene0435_00 +scene0435_01 +scene0435_02 +scene0435_03 +scene0690_00 +scene0690_01 +scene0693_00 +scene0693_01 +scene0693_02 +scene0700_00 +scene0700_01 +scene0700_02 +scene0699_00 +scene0231_00 +scene0231_01 +scene0231_02 +scene0697_00 +scene0697_01 +scene0697_02 +scene0697_03 +scene0474_00 +scene0474_01 +scene0474_02 +scene0474_03 +scene0474_04 +scene0474_05 +scene0355_00 +scene0355_01 +scene0146_00 +scene0146_01 +scene0146_02 +scene0196_00 +scene0702_00 +scene0702_01 +scene0702_02 +scene0314_00 +scene0277_00 +scene0277_01 +scene0277_02 +scene0095_00 +scene0095_01 +scene0015_00 +scene0100_00 +scene0100_01 +scene0100_02 +scene0558_00 +scene0558_01 +scene0558_02 +scene0685_00 +scene0685_01 +scene0685_02 diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_train.txt b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_train.txt new file mode 100644 index 0000000..7520948 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_train.txt @@ -0,0 +1,1045 @@ +scene0191_00 +scene0191_01 +scene0191_02 +scene0119_00 +scene0230_00 +scene0528_00 +scene0528_01 +scene0705_00 +scene0705_01 +scene0705_02 +scene0415_00 +scene0415_01 +scene0415_02 +scene0007_00 +scene0141_00 +scene0141_01 +scene0141_02 +scene0515_00 +scene0515_01 +scene0515_02 +scene0447_00 +scene0447_01 +scene0447_02 +scene0531_00 +scene0503_00 +scene0285_00 +scene0069_00 +scene0584_00 +scene0584_01 +scene0584_02 +scene0581_00 +scene0581_01 +scene0581_02 +scene0620_00 +scene0620_01 +scene0263_00 +scene0263_01 +scene0481_00 +scene0481_01 +scene0020_00 +scene0020_01 +scene0291_00 +scene0291_01 +scene0291_02 +scene0469_00 +scene0469_01 +scene0469_02 +scene0659_00 +scene0659_01 +scene0024_00 +scene0024_01 +scene0024_02 +scene0564_00 +scene0117_00 +scene0027_00 +scene0027_01 +scene0027_02 +scene0028_00 +scene0330_00 +scene0418_00 +scene0418_01 +scene0418_02 +scene0233_00 +scene0233_01 +scene0673_00 +scene0673_01 +scene0673_02 +scene0673_03 +scene0673_04 +scene0673_05 +scene0585_00 +scene0585_01 +scene0362_00 +scene0362_01 +scene0362_02 +scene0362_03 +scene0035_00 +scene0035_01 +scene0358_00 +scene0358_01 +scene0358_02 +scene0037_00 +scene0194_00 +scene0321_00 +scene0293_00 +scene0293_01 +scene0623_00 +scene0623_01 +scene0592_00 +scene0592_01 +scene0569_00 +scene0569_01 +scene0413_00 +scene0313_00 +scene0313_01 +scene0313_02 +scene0480_00 +scene0480_01 +scene0401_00 +scene0517_00 +scene0517_01 +scene0517_02 +scene0032_00 +scene0032_01 +scene0613_00 +scene0613_01 +scene0613_02 +scene0306_00 +scene0306_01 +scene0052_00 +scene0052_01 +scene0052_02 +scene0053_00 +scene0444_00 +scene0444_01 +scene0055_00 +scene0055_01 +scene0055_02 +scene0560_00 +scene0589_00 +scene0589_01 +scene0589_02 +scene0610_00 +scene0610_01 +scene0610_02 +scene0364_00 +scene0364_01 +scene0383_00 +scene0383_01 +scene0383_02 +scene0006_00 +scene0006_01 +scene0006_02 +scene0275_00 +scene0451_00 +scene0451_01 +scene0451_02 +scene0451_03 +scene0451_04 +scene0451_05 +scene0135_00 +scene0065_00 +scene0065_01 +scene0065_02 +scene0104_00 +scene0674_00 +scene0674_01 +scene0448_00 +scene0448_01 +scene0448_02 +scene0502_00 +scene0502_01 +scene0502_02 +scene0440_00 +scene0440_01 +scene0440_02 +scene0071_00 +scene0072_00 +scene0072_01 +scene0072_02 +scene0509_00 +scene0509_01 +scene0509_02 +scene0649_00 +scene0649_01 +scene0602_00 +scene0694_00 +scene0694_01 +scene0101_00 +scene0101_01 +scene0101_02 +scene0101_03 +scene0101_04 +scene0101_05 +scene0218_00 +scene0218_01 +scene0579_00 +scene0579_01 +scene0579_02 +scene0039_00 +scene0039_01 +scene0493_00 +scene0493_01 +scene0242_00 +scene0242_01 +scene0242_02 +scene0083_00 +scene0083_01 +scene0127_00 +scene0127_01 +scene0662_00 +scene0662_01 +scene0662_02 +scene0018_00 +scene0087_00 +scene0087_01 +scene0087_02 +scene0332_00 +scene0332_01 +scene0332_02 +scene0628_00 +scene0628_01 +scene0628_02 +scene0134_00 +scene0134_01 +scene0134_02 +scene0238_00 +scene0238_01 +scene0092_00 +scene0092_01 +scene0092_02 +scene0092_03 +scene0092_04 +scene0022_00 +scene0022_01 +scene0467_00 +scene0392_00 +scene0392_01 +scene0392_02 +scene0424_00 +scene0424_01 +scene0424_02 +scene0646_00 +scene0646_01 +scene0646_02 +scene0098_00 +scene0098_01 +scene0044_00 +scene0044_01 +scene0044_02 +scene0510_00 +scene0510_01 +scene0510_02 +scene0571_00 +scene0571_01 +scene0166_00 +scene0166_01 +scene0166_02 +scene0563_00 +scene0172_00 +scene0172_01 +scene0388_00 +scene0388_01 +scene0215_00 +scene0215_01 +scene0252_00 +scene0287_00 +scene0668_00 +scene0572_00 +scene0572_01 +scene0572_02 +scene0026_00 +scene0224_00 +scene0113_00 +scene0113_01 +scene0551_00 +scene0381_00 +scene0381_01 +scene0381_02 +scene0371_00 +scene0371_01 +scene0460_00 +scene0118_00 +scene0118_01 +scene0118_02 +scene0417_00 +scene0008_00 +scene0634_00 +scene0521_00 +scene0123_00 +scene0123_01 +scene0123_02 +scene0045_00 +scene0045_01 +scene0511_00 +scene0511_01 +scene0114_00 +scene0114_01 +scene0114_02 +scene0070_00 +scene0029_00 +scene0029_01 +scene0029_02 +scene0129_00 +scene0103_00 +scene0103_01 +scene0002_00 +scene0002_01 +scene0132_00 +scene0132_01 +scene0132_02 +scene0124_00 +scene0124_01 +scene0143_00 +scene0143_01 +scene0143_02 +scene0604_00 +scene0604_01 +scene0604_02 +scene0507_00 +scene0105_00 +scene0105_01 +scene0105_02 +scene0428_00 +scene0428_01 +scene0311_00 +scene0140_00 +scene0140_01 +scene0182_00 +scene0182_01 +scene0182_02 +scene0142_00 +scene0142_01 +scene0399_00 +scene0399_01 +scene0012_00 +scene0012_01 +scene0012_02 +scene0060_00 +scene0060_01 +scene0370_00 +scene0370_01 +scene0370_02 +scene0310_00 +scene0310_01 +scene0310_02 +scene0661_00 +scene0650_00 +scene0152_00 +scene0152_01 +scene0152_02 +scene0158_00 +scene0158_01 +scene0158_02 +scene0482_00 +scene0482_01 +scene0600_00 +scene0600_01 +scene0600_02 +scene0393_00 +scene0393_01 +scene0393_02 +scene0562_00 +scene0174_00 +scene0174_01 +scene0157_00 +scene0157_01 +scene0161_00 +scene0161_01 +scene0161_02 +scene0159_00 +scene0254_00 +scene0254_01 +scene0115_00 +scene0115_01 +scene0115_02 +scene0162_00 +scene0163_00 +scene0163_01 +scene0523_00 +scene0523_01 +scene0523_02 +scene0459_00 +scene0459_01 +scene0175_00 +scene0085_00 +scene0085_01 +scene0279_00 +scene0279_01 +scene0279_02 +scene0201_00 +scene0201_01 +scene0201_02 +scene0283_00 +scene0456_00 +scene0456_01 +scene0429_00 +scene0043_00 +scene0043_01 +scene0419_00 +scene0419_01 +scene0419_02 +scene0368_00 +scene0368_01 +scene0348_00 +scene0348_01 +scene0348_02 +scene0442_00 +scene0178_00 +scene0380_00 +scene0380_01 +scene0380_02 +scene0165_00 +scene0165_01 +scene0165_02 +scene0181_00 +scene0181_01 +scene0181_02 +scene0181_03 +scene0333_00 +scene0614_00 +scene0614_01 +scene0614_02 +scene0404_00 +scene0404_01 +scene0404_02 +scene0185_00 +scene0126_00 +scene0126_01 +scene0126_02 +scene0519_00 +scene0236_00 +scene0236_01 +scene0189_00 +scene0075_00 +scene0267_00 +scene0192_00 +scene0192_01 +scene0192_02 +scene0281_00 +scene0420_00 +scene0420_01 +scene0420_02 +scene0195_00 +scene0195_01 +scene0195_02 +scene0597_00 +scene0597_01 +scene0597_02 +scene0041_00 +scene0041_01 +scene0111_00 +scene0111_01 +scene0111_02 +scene0666_00 +scene0666_01 +scene0666_02 +scene0200_00 +scene0200_01 +scene0200_02 +scene0536_00 +scene0536_01 +scene0536_02 +scene0390_00 +scene0280_00 +scene0280_01 +scene0280_02 +scene0344_00 +scene0344_01 +scene0205_00 +scene0205_01 +scene0205_02 +scene0484_00 +scene0484_01 +scene0009_00 +scene0009_01 +scene0009_02 +scene0302_00 +scene0302_01 +scene0209_00 +scene0209_01 +scene0209_02 +scene0210_00 +scene0210_01 +scene0395_00 +scene0395_01 +scene0395_02 +scene0683_00 +scene0601_00 +scene0601_01 +scene0214_00 +scene0214_01 +scene0214_02 +scene0477_00 +scene0477_01 +scene0439_00 +scene0439_01 +scene0468_00 +scene0468_01 +scene0468_02 +scene0546_00 +scene0466_00 +scene0466_01 +scene0220_00 +scene0220_01 +scene0220_02 +scene0122_00 +scene0122_01 +scene0130_00 +scene0110_00 +scene0110_01 +scene0110_02 +scene0327_00 +scene0156_00 +scene0266_00 +scene0266_01 +scene0001_00 +scene0001_01 +scene0228_00 +scene0199_00 +scene0219_00 +scene0464_00 +scene0232_00 +scene0232_01 +scene0232_02 +scene0299_00 +scene0299_01 +scene0530_00 +scene0363_00 +scene0453_00 +scene0453_01 +scene0570_00 +scene0570_01 +scene0570_02 +scene0183_00 +scene0239_00 +scene0239_01 +scene0239_02 +scene0373_00 +scene0373_01 +scene0241_00 +scene0241_01 +scene0241_02 +scene0188_00 +scene0622_00 +scene0622_01 +scene0244_00 +scene0244_01 +scene0691_00 +scene0691_01 +scene0206_00 +scene0206_01 +scene0206_02 +scene0247_00 +scene0247_01 +scene0061_00 +scene0061_01 +scene0082_00 +scene0250_00 +scene0250_01 +scene0250_02 +scene0501_00 +scene0501_01 +scene0501_02 +scene0320_00 +scene0320_01 +scene0320_02 +scene0320_03 +scene0631_00 +scene0631_01 +scene0631_02 +scene0255_00 +scene0255_01 +scene0255_02 +scene0047_00 +scene0265_00 +scene0265_01 +scene0265_02 +scene0004_00 +scene0336_00 +scene0336_01 +scene0058_00 +scene0058_01 +scene0260_00 +scene0260_01 +scene0260_02 +scene0243_00 +scene0603_00 +scene0603_01 +scene0093_00 +scene0093_01 +scene0093_02 +scene0109_00 +scene0109_01 +scene0434_00 +scene0434_01 +scene0434_02 +scene0290_00 +scene0627_00 +scene0627_01 +scene0470_00 +scene0470_01 +scene0137_00 +scene0137_01 +scene0137_02 +scene0270_00 +scene0270_01 +scene0270_02 +scene0271_00 +scene0271_01 +scene0504_00 +scene0274_00 +scene0274_01 +scene0274_02 +scene0036_00 +scene0036_01 +scene0276_00 +scene0276_01 +scene0272_00 +scene0272_01 +scene0499_00 +scene0698_00 +scene0698_01 +scene0051_00 +scene0051_01 +scene0051_02 +scene0051_03 +scene0108_00 +scene0245_00 +scene0369_00 +scene0369_01 +scene0369_02 +scene0284_00 +scene0289_00 +scene0289_01 +scene0286_00 +scene0286_01 +scene0286_02 +scene0286_03 +scene0031_00 +scene0031_01 +scene0031_02 +scene0545_00 +scene0545_01 +scene0545_02 +scene0557_00 +scene0557_01 +scene0557_02 +scene0533_00 +scene0533_01 +scene0116_00 +scene0116_01 +scene0116_02 +scene0611_00 +scene0611_01 +scene0688_00 +scene0294_00 +scene0294_01 +scene0294_02 +scene0295_00 +scene0295_01 +scene0296_00 +scene0296_01 +scene0596_00 +scene0596_01 +scene0596_02 +scene0532_00 +scene0532_01 +scene0637_00 +scene0638_00 +scene0121_00 +scene0121_01 +scene0121_02 +scene0040_00 +scene0040_01 +scene0197_00 +scene0197_01 +scene0197_02 +scene0410_00 +scene0410_01 +scene0305_00 +scene0305_01 +scene0615_00 +scene0615_01 +scene0703_00 +scene0703_01 +scene0555_00 +scene0297_00 +scene0297_01 +scene0297_02 +scene0582_00 +scene0582_01 +scene0582_02 +scene0023_00 +scene0094_00 +scene0013_00 +scene0013_01 +scene0013_02 +scene0136_00 +scene0136_01 +scene0136_02 +scene0407_00 +scene0407_01 +scene0062_00 +scene0062_01 +scene0062_02 +scene0386_00 +scene0318_00 +scene0554_00 +scene0554_01 +scene0497_00 +scene0213_00 +scene0258_00 +scene0323_00 +scene0323_01 +scene0324_00 +scene0324_01 +scene0016_00 +scene0016_01 +scene0016_02 +scene0681_00 +scene0398_00 +scene0398_01 +scene0227_00 +scene0090_00 +scene0066_00 +scene0262_00 +scene0262_01 +scene0155_00 +scene0155_01 +scene0155_02 +scene0352_00 +scene0352_01 +scene0352_02 +scene0038_00 +scene0038_01 +scene0038_02 +scene0335_00 +scene0335_01 +scene0335_02 +scene0261_00 +scene0261_01 +scene0261_02 +scene0261_03 +scene0640_00 +scene0640_01 +scene0640_02 +scene0080_00 +scene0080_01 +scene0080_02 +scene0403_00 +scene0403_01 +scene0282_00 +scene0282_01 +scene0282_02 +scene0682_00 +scene0173_00 +scene0173_01 +scene0173_02 +scene0522_00 +scene0687_00 +scene0345_00 +scene0345_01 +scene0612_00 +scene0612_01 +scene0411_00 +scene0411_01 +scene0411_02 +scene0625_00 +scene0625_01 +scene0211_00 +scene0211_01 +scene0211_02 +scene0211_03 +scene0676_00 +scene0676_01 +scene0179_00 +scene0498_00 +scene0498_01 +scene0498_02 +scene0547_00 +scene0547_01 +scene0547_02 +scene0269_00 +scene0269_01 +scene0269_02 +scene0366_00 +scene0680_00 +scene0680_01 +scene0588_00 +scene0588_01 +scene0588_02 +scene0588_03 +scene0346_00 +scene0346_01 +scene0359_00 +scene0359_01 +scene0014_00 +scene0120_00 +scene0120_01 +scene0212_00 +scene0212_01 +scene0212_02 +scene0176_00 +scene0049_00 +scene0259_00 +scene0259_01 +scene0586_00 +scene0586_01 +scene0586_02 +scene0309_00 +scene0309_01 +scene0125_00 +scene0455_00 +scene0177_00 +scene0177_01 +scene0177_02 +scene0326_00 +scene0372_00 +scene0171_00 +scene0171_01 +scene0374_00 +scene0654_00 +scene0654_01 +scene0445_00 +scene0445_01 +scene0475_00 +scene0475_01 +scene0475_02 +scene0349_00 +scene0349_01 +scene0234_00 +scene0669_00 +scene0669_01 +scene0375_00 +scene0375_01 +scene0375_02 +scene0387_00 +scene0387_01 +scene0387_02 +scene0312_00 +scene0312_01 +scene0312_02 +scene0384_00 +scene0385_00 +scene0385_01 +scene0385_02 +scene0000_00 +scene0000_01 +scene0000_02 +scene0376_00 +scene0376_01 +scene0376_02 +scene0301_00 +scene0301_01 +scene0301_02 +scene0322_00 +scene0542_00 +scene0079_00 +scene0079_01 +scene0099_00 +scene0099_01 +scene0476_00 +scene0476_01 +scene0476_02 +scene0394_00 +scene0394_01 +scene0147_00 +scene0147_01 +scene0067_00 +scene0067_01 +scene0067_02 +scene0397_00 +scene0397_01 +scene0337_00 +scene0337_01 +scene0337_02 +scene0431_00 +scene0223_00 +scene0223_01 +scene0223_02 +scene0010_00 +scene0010_01 +scene0402_00 +scene0268_00 +scene0268_01 +scene0268_02 +scene0679_00 +scene0679_01 +scene0405_00 +scene0128_00 +scene0408_00 +scene0408_01 +scene0190_00 +scene0107_00 +scene0076_00 +scene0167_00 +scene0361_00 +scene0361_01 +scene0361_02 +scene0216_00 +scene0202_00 +scene0303_00 +scene0303_01 +scene0303_02 +scene0446_00 +scene0446_01 +scene0089_00 +scene0089_01 +scene0089_02 +scene0360_00 +scene0150_00 +scene0150_01 +scene0150_02 +scene0421_00 +scene0421_01 +scene0421_02 +scene0454_00 +scene0626_00 +scene0626_01 +scene0626_02 +scene0186_00 +scene0186_01 +scene0538_00 +scene0479_00 +scene0479_01 +scene0479_02 +scene0656_00 +scene0656_01 +scene0656_02 +scene0656_03 +scene0525_00 +scene0525_01 +scene0525_02 +scene0308_00 +scene0396_00 +scene0396_01 +scene0396_02 +scene0624_00 +scene0292_00 +scene0292_01 +scene0632_00 +scene0253_00 +scene0021_00 +scene0325_00 +scene0325_01 +scene0437_00 +scene0437_01 +scene0438_00 +scene0590_00 +scene0590_01 +scene0400_00 +scene0400_01 +scene0541_00 +scene0541_01 +scene0541_02 +scene0677_00 +scene0677_01 +scene0677_02 +scene0443_00 +scene0315_00 +scene0288_00 +scene0288_01 +scene0288_02 +scene0422_00 +scene0672_00 +scene0672_01 +scene0184_00 +scene0449_00 +scene0449_01 +scene0449_02 +scene0048_00 +scene0048_01 +scene0138_00 +scene0452_00 +scene0452_01 +scene0452_02 +scene0667_00 +scene0667_01 +scene0667_02 +scene0463_00 +scene0463_01 +scene0078_00 +scene0078_01 +scene0078_02 +scene0636_00 +scene0457_00 +scene0457_01 +scene0457_02 +scene0465_00 +scene0465_01 +scene0577_00 +scene0151_00 +scene0151_01 +scene0339_00 +scene0573_00 +scene0573_01 +scene0154_00 +scene0096_00 +scene0096_01 +scene0096_02 +scene0235_00 +scene0168_00 +scene0168_01 +scene0168_02 +scene0594_00 +scene0587_00 +scene0587_01 +scene0587_02 +scene0587_03 +scene0229_00 +scene0229_01 +scene0229_02 +scene0512_00 +scene0106_00 +scene0106_01 +scene0106_02 +scene0472_00 +scene0472_01 +scene0472_02 +scene0489_00 +scene0489_01 +scene0489_02 +scene0425_00 +scene0425_01 +scene0641_00 +scene0526_00 +scene0526_01 +scene0317_00 +scene0317_01 +scene0544_00 +scene0017_00 +scene0017_01 +scene0017_02 +scene0042_00 +scene0042_01 +scene0042_02 +scene0576_00 +scene0576_01 +scene0576_02 +scene0347_00 +scene0347_01 +scene0347_02 +scene0436_00 +scene0226_00 +scene0226_01 +scene0485_00 +scene0486_00 +scene0487_00 +scene0487_01 +scene0619_00 +scene0097_00 +scene0367_00 +scene0367_01 +scene0491_00 +scene0492_00 +scene0492_01 +scene0005_00 +scene0005_01 +scene0543_00 +scene0543_01 +scene0543_02 +scene0657_00 +scene0341_00 +scene0341_01 diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_val.txt b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_val.txt new file mode 100644 index 0000000..965ff25 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_val.txt @@ -0,0 +1,156 @@ +scene0534_00 +scene0534_01 +scene0319_00 +scene0273_00 +scene0273_01 +scene0225_00 +scene0198_00 +scene0003_00 +scene0003_01 +scene0003_02 +scene0409_00 +scene0409_01 +scene0331_00 +scene0331_01 +scene0505_00 +scene0505_01 +scene0505_02 +scene0505_03 +scene0505_04 +scene0506_00 +scene0057_00 +scene0057_01 +scene0074_00 +scene0074_01 +scene0074_02 +scene0091_00 +scene0112_00 +scene0112_01 +scene0112_02 +scene0240_00 +scene0102_00 +scene0102_01 +scene0513_00 +scene0514_00 +scene0514_01 +scene0537_00 +scene0516_00 +scene0516_01 +scene0495_00 +scene0617_00 +scene0133_00 +scene0520_00 +scene0520_01 +scene0635_00 +scene0635_01 +scene0054_00 +scene0473_00 +scene0473_01 +scene0524_00 +scene0524_01 +scene0379_00 +scene0471_00 +scene0471_01 +scene0471_02 +scene0566_00 +scene0248_00 +scene0248_01 +scene0248_02 +scene0529_00 +scene0529_01 +scene0529_02 +scene0391_00 +scene0264_00 +scene0264_01 +scene0264_02 +scene0675_00 +scene0675_01 +scene0350_00 +scene0350_01 +scene0350_02 +scene0450_00 +scene0068_00 +scene0068_01 +scene0237_00 +scene0237_01 +scene0365_00 +scene0365_01 +scene0365_02 +scene0605_00 +scene0605_01 +scene0539_00 +scene0539_01 +scene0539_02 +scene0540_00 +scene0540_01 +scene0540_02 +scene0170_00 +scene0170_01 +scene0170_02 +scene0433_00 +scene0340_00 +scene0340_01 +scene0340_02 +scene0160_00 +scene0160_01 +scene0160_02 +scene0160_03 +scene0160_04 +scene0059_00 +scene0059_01 +scene0059_02 +scene0056_00 +scene0056_01 +scene0478_00 +scene0478_01 +scene0548_00 +scene0548_01 +scene0548_02 +scene0204_00 +scene0204_01 +scene0204_02 +scene0033_00 +scene0145_00 +scene0483_00 +scene0508_00 +scene0508_01 +scene0508_02 +scene0180_00 +scene0148_00 +scene0556_00 +scene0556_01 +scene0416_00 +scene0416_01 +scene0416_02 +scene0416_03 +scene0416_04 +scene0073_00 +scene0073_01 +scene0073_02 +scene0073_03 +scene0034_00 +scene0034_01 +scene0034_02 +scene0639_00 +scene0561_00 +scene0561_01 +scene0298_00 +scene0692_00 +scene0692_01 +scene0692_02 +scene0692_03 +scene0692_04 +scene0642_00 +scene0642_01 +scene0642_02 +scene0642_03 +scene0630_00 +scene0630_01 +scene0630_02 +scene0630_03 +scene0630_04 +scene0630_05 +scene0630_06 +scene0706_00 +scene0567_00 +scene0567_01 diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels-old.combined.tsv b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels-old.combined.tsv new file mode 100644 index 0000000..05c006e --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels-old.combined.tsv @@ -0,0 +1,608 @@ +id raw_category category count nyu40id eigen13id nyuClass nyu40class eigen13class ModelNet40 ModelNet10 ShapeNetCore55 synsetoffset wnsynsetid wnsynsetkey mpcat40 mpcat40index +1 wall wall 8277 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +2 chair chair 4646 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +22 books book 1678 23 2 book books Books n02870526 book.n.11 objects 39 +3 floor floor 1553 2 5 floor floor Floor n03365592 floor.n.01 floor 2 +5 door door 1483 8 12 door door Wall door n03221720 door.n.01 door 4 +1163 object object 1313 40 7 otherprop Objects objects 39 +16 window window 1209 9 13 window window Window n04587648 window.n.01 window 9 +4 table table 1170 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +56 trash can trash can 1090 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +13 pillow pillow 937 18 7 pillow pillow Objects pillow 3938244 n03938244 pillow.n.01 cushion 8 +15 picture picture 862 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +41 ceiling ceiling 806 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 ceiling 17 +26 box box 775 29 7 box box Objects n02883344 box.n.01 objects 39 +161 doorframe doorframe 768 8 12 door door Wall door doorframe.n.01 door 4 +19 monitor monitor 765 40 7 monitor otherprop Objects monitor monitor tv or monitor 3211117 n03782190 monitor.n.04 objects 39 +7 cabinet cabinet 731 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +9 desk desk 680 14 10 desk desk Table desk desk table 4379243 n03179701 desk.n.01 table 5 +8 shelf shelf 641 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +10 office chair office chair 595 5 4 chair chair Chair chair chair chair 3001627 n04373704 swivel_chair.n.01 chair 3 +31 towel towel 570 27 7 towel towel Objects n04459362 towel.n.01 towel 20 +6 couch couch 502 6 9 sofa sofa Sofa sofa sofa sofa 4256520 n04256520 sofa.n.01 sofa 10 +14 sink sink 488 34 7 sink sink Objects sink n04223580 sink.n.01 sink 15 +48 backpack backpack 479 40 7 backpack otherprop Objects n02769748 backpack.n.01 objects 39 +28 lamp lamp 419 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +11 bed bed 370 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +18 bookshelf bookshelf 360 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +71 mirror mirror 349 19 7 mirror mirror Objects n03773035 mirror.n.01 mirror 21 +21 curtain curtain 347 16 13 curtain curtain Window curtain n03151077 curtain.n.01 curtain 12 +40 plant plant 331 40 7 plant otherprop Objects plant n00017222 plant.n.02 plant 14 +52 whiteboard whiteboard 327 30 7 whiteboard whiteboard Objects n03211616 display_panel.n.01 board_panel 35 +96 radiator radiator 322 39 6 radiator otherfurniture Furniture n04041069 radiator.n.02 misc 40 +22 book book 318 23 2 book books Books n02870526 book.n.11 objects 39 +29 kitchen cabinet kitchen cabinet 310 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 cabinet 7 +49 toilet paper toilet paper 291 40 7 toilet paper otherprop Objects n15075141 toilet_tissue.n.01 objects 39 +29 kitchen cabinets kitchen cabinet 289 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +23 armchair armchair 281 5 4 chair chair Chair chair chair chair 3001627 n02738535 armchair.n.01 chair 3 +63 shoes shoe 272 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +24 coffee table coffee table 258 7 10 coffee table table Table table table table 4379243 n03063968 coffee_table.n.01 table 5 +17 toilet toilet 256 33 7 toilet toilet Objects toilet toilet n04446276 toilet.n.01 toilet 18 +47 bag bag 252 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +32 clothes clothes 248 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +46 keyboard keyboard 246 40 7 keyboard otherprop Objects keyboard computer keyboard 3085013 n03085013 computer_keyboard.n.01 objects 39 +65 bottle bottle 226 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +97 recycling bin recycling bin 225 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +34 nightstand nightstand 224 32 6 night stand night stand Furniture night_stand night_stand n03015254 chest_of_drawers.n.01 chest_of_drawers 13 +38 stool stool 221 40 7 stool otherprop Objects stool n04326896 stool.n.01 stool 19 +33 tv tv 219 25 11 television television TV tv or monitor 3211117 n03211117 display.n.06 tv_monitor 22 +75 file cabinet file cabinet 217 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +36 dresser dresser 213 17 6 dresser dresser Furniture dresser dresser n03015254 chest_of_drawers.n.01 chest_of_drawers 13 +64 computer tower computer tower 203 40 7 computer otherprop Objects n03082979 computer.n.01 objects 39 +32 clothing clothes 165 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +101 telephone telephone 164 40 7 telephone otherprop Objects telephone 4401088 n04401088 telephone.n.01 objects 39 +130 cup cup 157 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +27 refrigerator refrigerator 154 24 6 refridgerator refridgerator Furniture n04070727 refrigerator.n.01 appliances 37 +44 end table end table 147 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +131 jacket jacket 146 40 7 jacket otherprop Objects n03589791 jacket.n.01 clothes 38 +55 shower curtain shower curtain 144 28 7 shower curtain shower curtain Objects curtain n04209239 shower_curtain.n.01 curtain 12 +42 bathtub bathtub 144 36 7 bathtub bathtub Objects bathtub bathtub tub 2808440 n02808440 bathtub.n.01 bathtub 25 +59 microwave microwave 141 40 7 microwave otherprop Objects microwave 3761084 n03761084 microwave.n.02 appliances 37 +159 kitchen counter kitchen counter 140 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26 +74 sofa chair sofa chair 129 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +82 paper towel dispenser paper towel dispenser 129 40 7 paper towel dispenser otherprop Objects objects 39 +1164 bathroom vanity bathroom vanity 126 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 table 5 +93 suitcase suitcase 118 40 7 luggage otherprop Objects n02773838 bag.n.06 objects 39 +77 laptop laptop 111 40 7 laptop otherprop Objects laptop laptop 3642806 n03642806 laptop.n.01 objects 39 +67 ottoman ottoman 111 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 stool 19 +128 shower walls shower wall 109 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +50 printer printer 106 40 7 printer otherprop Objects printer 4004475 n04004475 printer.n.03 appliances 37 +35 counter counter 104 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26 +69 board board 100 38 7 board otherstructure Objects board_panel 35 +100 soap dispenser soap dispenser 99 40 7 otherprop Objects n04254120 soap_dispenser.n.01 objects 39 +62 stove stove 95 38 7 stove otherstructure Objects stove 4330267 n04330267 stove.n.02 appliances 37 +105 light light 93 38 7 light otherstructure Objects n03665366 light.n.02 lighting 28 +1165 closet wall closet wall 90 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +165 mini fridge mini fridge 87 24 6 refridgerator refridgerator Furniture n03273913 electric_refrigerator.n.01 appliances 37 +7 cabinets cabinet 79 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +5 doors door 76 8 12 door door Wall door n03221720 door.n.01 door 4 +76 fan fan 75 40 7 fan otherprop Objects n03320046 fan.n.01 misc 40 +230 tissue box tissue box 73 40 7 tissue box otherprop Objects n02883344 box.n.01 objects 39 +54 blanket blanket 72 40 7 blanket otherprop Objects n02849154 blanket.n.01 objects 39 +125 bathroom stall bathroom stall 71 38 7 otherstructure Objects n02873839 booth.n.02 misc 40 +72 copier copier 70 40 7 otherprop Objects n03257586 duplicator.n.01 appliances 37 +68 bench bench 66 39 6 bench otherfurniture Furniture bench bench 2828884 n02828884 bench.n.01 seating 34 +145 bar bar 66 38 7 bar otherstructure Objects n02788689 bar.n.03 misc 40 +157 soap dish soap dish 65 40 7 soap dish otherprop Objects n04254009 soap_dish.n.01 objects 39 +1166 laundry hamper laundry hamper 65 40 7 laundry basket otherprop Objects objects 39 +132 storage bin storage bin 63 40 7 storage bin otherprop Objects objects 39 +1167 bathroom stall door bathroom stall door 62 8 12 door door Wall door n03221720 door.n.01 door 4 +232 light switch light switch 61 38 7 light switch otherstructure Objects n04372370 switch.n.01 misc 40 +134 coffee maker coffee maker 61 40 7 otherprop Objects n03063338 coffee_maker.n.01 appliances 37 +51 tv stand tv stand 61 39 6 tv stand otherfurniture Furniture tv_stand n03290653 entertainment_center.n.01 furniture 36 +250 decoration decoration 60 40 7 otherprop Objects n03169390 decoration.n.01 misc 40 +1168 ceiling light ceiling light 59 38 7 light otherstructure Objects n03665366 light.n.02 lighting 28 +342 range hood range hood 59 38 7 range hood otherstructure Objects range_hood n04053677 range_hood.n.01 misc 40 +89 blackboard blackboard 58 38 7 blackboard otherstructure Objects n02846511 blackboard.n.01 board_panel 35 +103 clock clock 58 40 7 clock otherprop Objects clock 3046257 n03046257 clock.n.01 objects 39 +99 wardrobe closet wardrobe 54 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +95 rail rail 53 38 7 railing otherstructure Objects n04047401 railing.n.01 railing 30 +154 bulletin board bulletin board 53 38 7 board otherstructure Objects n03211616 display_panel.n.01 board_panel 35 +140 mat mat 52 20 5 floor mat floor mat Floor n03727837 mat.n.01 floor 2 +1169 trash bin trash bin 52 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +193 ledge ledge 51 38 7 otherstructure Objects n09337253 ledge.n.01 misc 40 +116 seat seat 49 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 furniture 36 +202 mouse mouse 49 40 7 mouse otherprop Objects n03793489 mouse.n.04 objects 39 +73 basket basket 48 40 7 basket otherprop Objects basket 2801938 n02801938 basket.n.01 objects 39 +78 shower shower 48 38 7 otherstructure Objects n04208936 shower.n.01 shower 23 +1170 dumbbell dumbbell 48 40 7 otherprop Objects n03255030 dumbbell.n.01 objects 39 +79 paper paper 46 26 7 paper paper Objects n14974264 paper.n.01 objects 39 +80 person person 46 31 7 person person Objects person n05217688 person.n.02 misc 40 +141 windowsill windowsill 45 38 7 otherstructure Objects n04590263 windowsill.n.01 window 9 +57 closet closet 45 39 6 wardrobe otherfurniture Furniture wardrobe misc 40 +102 bucket bucket 45 40 7 bucket otherprop Objects n02909870 bucket.n.01 misc 40 +261 sign sign 44 40 7 sign otherprop Objects n04217882 signboard.n.01 objects 39 +118 speaker speaker 43 40 7 speaker otherprop Objects speaker 3691459 n03691459 loudspeaker.n.01 objects 39 +136 dishwasher dishwasher 43 38 7 dishwasher otherstructure Objects dishwasher 3207941 n03207941 dishwasher.n.01 appliances 37 +98 container container 43 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1171 stair rail stair rail 42 38 7 banister otherstructure Objects n02788148 bannister.n.02 railing 30 +170 shower curtain rod shower curtain rod 42 40 7 otherprop Objects curtain 12 +1172 tube tube 41 40 7 otherprop Objects misc 40 +1173 bathroom cabinet bathroom cabinet 39 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +79 papers paper 39 26 7 paper paper Objects n14974264 paper.n.01 objects 39 +221 storage container storage container 39 40 7 container otherprop Objects objects 39 +570 paper bag paper bag 39 37 7 bag bag Objects n04122825 sack.n.01 objects 39 +138 paper towel roll paper towel roll 39 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20 +168 ball ball 39 40 7 ball otherprop Objects objects 39 +276 closet doors closet door 38 8 12 door door Wall door n03221720 door.n.01 door 4 +106 laundry basket laundry basket 37 40 7 laundry basket otherprop Objects basket 2801938 n03050864 clothes_hamper.n.01 objects 39 +214 cart cart 37 40 7 cart otherprop Objects n03484083 handcart.n.01 shelving 31 +276 closet door closet door 35 8 12 door door Wall door n03221720 door.n.01 door 4 +323 dish rack dish rack 35 40 7 dish rack otherprop Objects n03207630 dish_rack.n.01 objects 39 +58 stairs stairs 35 38 7 stairs otherstructure Objects n04298308 stairway.n.01 stairs 16 +86 blinds blinds 35 13 13 blinds blinds Window n02851099 blind.n.03 blinds 32 +2 stack of chairs chair 35 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +399 purse purse 34 40 7 purse otherprop Objects n02774152 bag.n.04 objects 39 +121 bicycle bicycle 33 40 7 bicycle otherprop Objects bicycle 2834778 n02834778 bicycle.n.01 objects 39 +185 tray tray 32 40 7 tray otherprop Objects n04476259 tray.n.01 objects 39 +300 plunger plunger 30 40 7 otherprop Objects n03970156 plunger.n.03 objects 39 +180 paper cutter paper cutter 30 40 7 paper cutter otherprop Objects n03886940 paper_cutter.n.01 objects 39 +163 toilet paper dispenser toilet paper dispenser 29 40 7 otherprop Objects objects 39 +26 boxes box 29 29 7 box box Objects n02883344 box.n.01 objects 39 +66 bin bin 28 40 7 bin otherprop Objects n02839910 bin.n.01 objects 39 +208 toilet seat cover dispenser toilet seat cover dispenser 28 40 7 otherprop Objects objects 39 +112 guitar guitar 28 40 7 guitar otherprop Objects guitar guitar 3467517 n03467517 guitar.n.01 objects 39 +540 mailboxes mailbox 28 29 7 box box Objects mailbox 3710193 n03710193 mailbox.n.01 misc 40 +395 handicap bar handicap bar 27 38 7 bar otherstructure Objects misc 40 +166 fire extinguisher fire extinguisher 27 40 7 fire extinguisher otherprop Objects n03345837 fire_extinguisher.n.01 misc 40 +122 ladder ladder 27 39 6 ladder otherfurniture Furniture stairs n03632277 ladder.n.01 stairs 16 +120 column column 26 38 7 column otherstructure Objects n03074380 column.n.06 column 24 +107 pipe pipe 25 40 7 pipe otherprop Objects n03944672 pipe.n.02 misc 40 +283 vacuum cleaner vacuum cleaner 25 40 7 otherprop Objects n04517823 vacuum.n.04 objects 39 +88 plate plate 24 40 7 plate otherprop Objects n03959485 plate.n.04 objects 39 +90 piano piano 24 39 6 piano otherfurniture Furniture piano piano 3928116 n03928116 piano.n.01 furniture 36 +177 water cooler water cooler 24 39 6 water cooler otherfurniture Furniture n04559166 water_cooler.n.01 misc 40 +1174 cd case cd case 24 40 7 otherprop Objects objects 39 +562 bowl bowl 24 40 7 bowl otherprop Objects bowl bowl 2880940 n02880940 bowl.n.03 objects 39 +1175 closet rod closet rod 24 40 7 otherprop Objects n04100174 rod.n.01 misc 40 +1156 bathroom counter bathroom counter 24 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26 +84 oven oven 23 38 7 oven otherstructure Objects n03862676 oven.n.01 appliances 37 +104 stand stand 23 39 6 stand otherfurniture Furniture table table table 4379243 n04301000 stand.n.04 table 5 +229 scale scale 23 40 7 scale otherprop Objects n04141975 scale.n.07 objects 39 +70 washing machine washing machine 23 39 6 washing machine otherfurniture Furniture washing_machine 4554684 n04554684 washer.n.03 appliances 37 +325 broom broom 22 40 7 broom otherprop Objects n02906734 broom.n.01 objects 39 +169 hat hat 22 40 7 hat otherprop Objects n03497657 hat.n.01 clothes 38 +128 shower wall shower wall 22 1 12 wall wall Wall n04208936 shower.n.01 wall 1 +331 guitar case guitar case 21 40 7 guitar case otherprop Objects objects 39 +87 rack rack 21 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +488 water pitcher water pitcher 21 40 7 pitcher otherprop Objects n03950228 pitcher.n.02 objects 39 +776 laundry detergent laundry detergent 21 40 7 otherprop Objects objects 39 +370 hair dryer hair dryer 21 40 7 hair dryer otherprop Objects n03483316 hand_blower.n.01 objects 39 +191 pillar pillar 21 38 7 column otherstructure Objects n03073977 column.n.07 column 24 +748 divider divider 20 40 7 otherprop Objects wall 1 +242 power outlet power outlet 19 40 7 otherprop Objects misc 40 +45 dining table dining table 19 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +417 shower floor shower floor 19 2 5 floor floor Floor n04208936 shower.n.01 floor 2 +70 washing machines washing machine 19 39 6 washing machine otherfurniture Furniture washing_machine 4554684 n04554684 washer.n.03 appliances 37 +188 shower door shower door 19 8 12 door door Wall door n04208936 shower.n.01 door 4 +1176 coffee kettle coffee kettle 18 40 7 pot otherprop Objects n03612814 kettle.n.01 objects 39 +1177 wardrobe cabinet wardrobe 18 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +1178 structure structure 18 38 7 otherstructure Objects misc 40 +18 bookshelves bookshelf 17 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +110 clothes dryer clothes dryer 17 39 6 otherfurniture Furniture n03251766 dryer.n.01 appliances 37 +148 toaster toaster 17 40 7 toaster otherprop Objects n04442312 toaster.n.02 appliances 37 +63 shoe shoe 17 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +155 ironing board ironing board 16 39 6 ironing board otherfurniture Furniture n03586090 ironing_board.n.01 objects 39 +572 alarm clock alarm clock 16 40 7 alarm clock otherprop Objects clock 3046257 n02694662 alarm_clock.n.01 objects 39 +1179 shower head shower head 15 38 7 otherstructure Objects shower 23 +28 lamp base lamp 15 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +392 water bottle water bottle 15 40 7 bottle otherprop Objects bottle bottle 2876657 n04557648 water_bottle.n.01 objects 39 +1180 keyboard piano keyboard piano 15 39 6 piano otherfurniture Furniture piano piano 3928116 n03928116 piano.n.01 furniture 36 +609 projector screen projector screen 15 38 7 projector screen otherstructure Objects misc 40 +1181 case of water bottles case of water bottles 15 40 7 otherprop Objects objects 39 +195 toaster oven toaster oven 14 40 7 toaster oven otherprop Objects n04442441 toaster_oven.n.01 appliances 37 +581 music stand music stand 14 39 6 music stand otherfurniture Furniture n03801760 music_stand.n.01 furniture 36 +58 staircase stairs 14 38 7 stairs otherstructure Objects n04298308 stairway.n.01 stairs 16 +1182 coat rack coat rack 14 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 3 +1183 storage organizer storage organizer 14 40 7 otherprop Objects shelving 3 +139 machine machine 14 40 7 machine otherprop Objects n03699975 machine.n.01 appliances 37 +1184 folded chair folded chair 14 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1185 fire alarm fire alarm 14 40 7 otherprop Objects n03343737 fire_alarm.n.02 misc 40 +156 fireplace fireplace 13 38 7 fireplace otherstructure Objects n03346455 fireplace.n.01 fireplace 27 +408 vent vent 13 40 7 otherprop Objects n04526241 vent.n.01 misc 40 +213 furniture furniture 13 39 6 furniture otherfurniture Furniture n03405725 furniture.n.01 furniture 36 +1186 power strip power strip 13 40 7 otherprop Objects objects 39 +1187 calendar calendar 13 40 7 otherprop Objects objects 39 +1188 poster poster 13 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +115 toilet paper holder toilet paper holder 13 40 7 toilet paper holder otherprop Objects objects 39 +1189 potted plant potted plant 12 40 7 plant otherprop Objects plant n00017222 plant.n.02 plant 14 +304 stuffed animal stuffed animal 12 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 objects 39 +1190 luggage luggage 12 40 7 luggage otherprop Objects n02774630 baggage.n.01 objects 39 +21 curtains curtain 12 16 13 curtain curtain Window curtain n03151077 curtain.n.01 curtain 12 +312 headphones headphones 12 40 7 otherprop Objects n03261776 earphone.n.01 objects 39 +233 crate crate 12 39 6 crate otherfurniture Furniture n03127925 crate.n.01 objects 39 +286 candle candle 12 40 7 candle otherprop Objects lamp n02948072 candle.n.01 objects 39 +264 projector projector 12 40 7 projector otherprop Objects n04009552 projector.n.02 objects 39 +110 clothes dryers clothes dryer 12 39 6 otherfurniture Furniture n03251766 dryer.n.01 appliances 37 +1191 mattress mattress 12 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +356 dustpan dustpan 12 40 7 otherprop Objects n03259009 dustpan.n.02 objects 39 +25 drawer drawer 11 39 6 drawer otherfurniture Furniture n03233905 drawer.n.01 furniture 36 +750 rod rod 11 40 7 otherprop Objects pistol 3948459 n03427202 gat.n.01 misc 40 +269 globe globe 11 40 7 globe otherprop Objects objects 39 +307 footrest footrest 11 39 6 foot rest otherfurniture Furniture stool n03380724 footstool.n.01 stool 19 +410 piano bench piano bench 11 39 6 piano bench otherfurniture Furniture bench bench 2828884 n02828884 bench.n.01 seating 34 +730 breakfast bar breakfast bar 11 38 7 bar otherstructure Objects counter 26 +216 step stool step stool 11 40 7 step stool otherprop Objects stool n04315713 step_stool.n.01 stool 19 +1192 hand rail hand rail 11 38 7 railing otherstructure Objects railing 30 +119 vending machine vending machine 11 40 7 machine otherprop Objects n04525305 vending_machine.n.01 appliances 37 +682 ceiling fan ceiling fan 11 40 7 fan otherprop Objects n03320046 fan.n.01 misc 40 +434 swiffer swiffer 11 40 7 otherprop Objects objects 39 +126 foosball table foosball table 11 39 6 foosball table otherfurniture Furniture table table table 4379243 n04379243 table.n.02 table 5 +919 jar jar 11 40 7 jar otherprop Objects jar 3593526 n03593526 jar.n.01 objects 39 +85 footstool footstool 11 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 stool 19 +1193 folded table folded table 10 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +108 round table round table 10 7 10 table table Table table table table 4379243 n04114554 round_table.n.02 table 5 +135 hamper hamper 10 40 7 basket otherprop Objects basket 2801938 n03482405 hamper.n.02 objects 39 +1194 poster tube poster tube 10 40 7 otherprop Objects objects 39 +432 case case 10 40 7 case otherprop Objects objects 39 +53 carpet carpet 10 40 7 rug otherprop Objects n04118021 rug.n.01 floor 2 +1195 thermostat thermostat 10 40 7 otherprop Objects n04422875 thermostat.n.01 misc 40 +111 coat coat 10 40 7 jacket otherprop Objects n03057021 coat.n.01 clothes 38 +305 water fountain water fountain 10 38 7 water fountain otherstructure Objects n03241335 drinking_fountain.n.01 misc 40 +1125 smoke detector smoke detector 10 40 7 otherprop Objects misc 40 +13 pillows pillow 9 18 7 pillow pillow Objects pillow 3938244 n03938244 pillow.n.01 cushion 8 +1196 flip flops flip flops 9 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +1197 cloth cloth 9 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +1198 banner banner 9 40 7 otherprop Objects n02788021 banner.n.01 misc 40 +1199 clothes hanger clothes hanger 9 40 7 otherprop Objects n03057920 coat_hanger.n.01 objects 39 +1200 whiteboard eraser whiteboard eraser 9 40 7 otherprop Objects objects 39 +378 iron iron 9 40 7 otherprop Objects n03584829 iron.n.04 objects 39 +591 instrument case instrument case 9 40 7 case otherprop Objects objects 39 +49 toilet paper rolls toilet paper 9 40 7 toilet paper otherprop Objects n15075141 toilet_tissue.n.01 objects 39 +92 soap soap 9 40 7 soap otherprop Objects n04253437 soap.n.01 objects 39 +1098 block block 9 40 7 otherprop Objects misc 40 +291 wall hanging wall hanging 8 40 7 otherprop Objects n03491178 hanging.n.01 picture 6 +1063 kitchen island kitchen island 8 38 7 kitchen island otherstructure Objects n03620600 kitchen_island.n.01 counter 26 +107 pipes pipe 8 38 7 otherstructure Objects misc 40 +1135 toothbrush toothbrush 8 40 7 toothbrush otherprop Objects n04453156 toothbrush.n.01 objects 39 +189 shirt shirt 8 40 7 otherprop Objects n04197391 shirt.n.01 clothes 38 +245 cutting board cutting board 8 40 7 cutting board otherprop Objects n03025513 chopping_board.n.01 objects 39 +194 vase vase 8 40 7 vase otherprop Objects vase jar 3593526 n04522168 vase.n.01 objects 39 +1201 shower control valve shower control valve 8 38 7 otherstructure Objects n04208936 shower.n.01 shower 23 +386 exercise machine exercise machine 8 40 7 machine otherprop Objects gym_equipment 33 +1202 compost bin compost bin 8 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +857 shorts shorts 8 40 7 shorts otherprop Objects clothes 38 +452 tire tire 8 40 7 otherprop Objects n04440749 tire.n.01 objects 39 +1203 teddy bear teddy bear 7 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 objects 39 +346 bathrobe bathrobe 7 40 7 otherprop Objects n02807616 bathrobe.n.01 clothes 38 +152 handrail handrail 7 38 7 railing otherstructure Objects n02788148 bannister.n.02 railing 30 +83 faucet faucet 7 40 7 faucet otherprop Objects faucet 3325088 n03325088 faucet.n.01 misc 40 +1204 pantry wall pantry wall 7 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +726 thermos thermos 7 40 7 flask otherprop Objects bottle bottle 2876657 n04422727 thermos.n.01 objects 39 +61 rug rug 7 40 7 rug otherprop Objects n04118021 rug.n.01 floor 2 +39 couch cushions cushion 7 18 7 pillow pillow Objects n03151500 cushion.n.03 cushion 8 +1117 tripod tripod 7 39 6 stand otherfurniture Furniture n04485082 tripod.n.01 objects 39 +540 mailbox mailbox 7 29 7 box box Objects mailbox 3710193 n03710193 mailbox.n.01 misc 40 +1205 tupperware tupperware 7 40 7 otherprop Objects objects 39 +415 shoe rack shoe rack 7 40 7 shoe rack otherprop Objects shelving 31 +31 towels towel 6 27 7 towel towel Objects n04459362 towel.n.01 towel 20 +1206 beer bottles beer bottle 6 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +153 treadmill treadmill 6 39 6 treadmill otherfurniture Furniture n04477387 treadmill.n.01 gym_equipment 33 +1207 salt salt 6 40 7 otherprop Objects objects 39 +129 chest chest 6 39 6 chest otherfurniture Furniture dresser dresser chest_of_drawers 13 +220 dispenser dispenser 6 40 7 otherprop Objects n03210683 dispenser.n.01 objects 39 +1208 mirror doors mirror door 6 8 12 door door Wall door n03221720 door.n.01 door 4 +231 remote remote 6 40 7 otherprop Objects remote_control 4074963 n04074963 remote_control.n.01 objects 39 +1209 folded ladder folded ladder 6 39 6 ladder otherfurniture Furniture stairs n03632277 ladder.n.01 misc 40 +39 cushion cushion 6 18 7 pillow pillow Objects n03151500 cushion.n.03 cushion 8 +1210 carton carton 6 40 7 otherprop Objects objects 39 +117 step step 6 38 7 otherstructure Objects n04314914 step.n.04 misc 40 +822 drying rack drying rack 6 39 6 drying rack otherfurniture Furniture shelving 31 +238 slippers slipper 6 40 7 shoe otherprop Objects n04241394 slipper.n.01 clothes 38 +143 pool table pool table 6 39 6 pool table otherfurniture Furniture table table table 4379243 n03982430 pool_table.n.01 table 5 +1211 soda stream soda stream 6 40 7 otherprop Objects objects 39 +228 toilet brush toilet brush 6 40 7 toilet brush otherprop Objects objects 39 +494 loft bed loft bed 6 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +226 cooking pot cooking pot 6 40 7 pot otherprop Objects objects 39 +91 heater heater 6 39 6 heater otherfurniture Furniture n03508101 heater.n.01 misc 40 +1072 messenger bag messenger bag 6 37 7 bag bag Objects objects 39 +435 stapler stapler 6 40 7 stapler otherprop Objects n04303497 stapler.n.01 objects 39 +1165 closet walls closet wall 5 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +345 scanner scanner 5 40 7 otherprop Objects appliances 37 +893 elliptical machine elliptical machine 5 40 7 machine otherprop Objects gym_equipment 33 +621 kettle kettle 5 40 7 pot otherprop Objects n03612814 kettle.n.01 objects 39 +1212 metronome metronome 5 40 7 otherprop Objects n03757604 metronome.n.01 objects 39 +297 dumbell dumbell 5 40 7 otherprop Objects objects 39 +1213 music book music book 5 23 2 book books Books n02870526 book.n.11 objects 39 +1214 rice cooker rice cooker 5 40 7 otherprop Objects objects 39 +1215 dart board dart board 5 38 7 board otherstructure Objects n03162940 dartboard.n.01 objects 39 +529 sewing machine sewing machine 5 40 7 sewing machine otherprop Objects n04179913 sewing_machine.n.01 objects 39 +1216 grab bar grab bar 5 38 7 railing otherstructure Objects railing 30 +1217 flowerpot flowerpot 5 40 7 vase otherprop Objects vase jar 3593526 n04522168 vase.n.01 objects 39 +1218 painting painting 5 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +1219 railing railing 5 38 7 railing otherstructure Objects n04047401 railing.n.01 railing 30 +1220 stair stair 5 38 7 stairs otherstructure Objects stairs n04314914 step.n.04 stairs 16 +525 toolbox toolbox 5 39 6 chest otherfurniture Furniture n04452615 toolbox.n.01 objects 39 +204 nerf gun nerf gun 5 40 7 otherprop Objects objects 39 +693 binders binder 5 40 7 binder otherprop Objects objects 39 +179 desk lamp desk lamp 5 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +1221 quadcopter quadcopter 5 40 7 otherprop Objects objects 39 +1222 pitcher pitcher 5 40 7 pitcher otherprop Objects n03950228 pitcher.n.02 objects 39 +1223 hanging hanging 5 40 7 otherprop Objects misc 40 +1224 mail mail 5 40 7 otherprop Objects misc 40 +1225 closet ceiling closet ceiling 5 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 ceiling 17 +1226 hoverboard hoverboard 5 40 7 otherprop Objects objects 39 +1227 beanbag chair beanbag chair 5 39 6 bean bag otherfurniture Furniture n02816656 beanbag.n.01 chair 3 +571 water heater water heater 5 40 7 water heater otherprop Objects n04560113 water_heater.n.01 misc 40 +1228 spray bottle spray bottle 5 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +556 rope rope 5 40 7 rope otherprop Objects n04108268 rope.n.01 objects 39 +280 plastic container plastic container 5 40 7 container otherprop Objects objects 39 +1229 soap bottle soap bottle 5 40 7 soap otherprop Objects objects 39 +1230 ikea bag ikea bag 4 37 7 bag bag Objects 2773838 n02773838 bag.n.06 objects 39 +1231 sleeping bag sleeping bag 4 40 7 otherprop Objects n04235860 sleeping_bag.n.01 objects 39 +1232 duffel bag duffel bag 4 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +746 frying pan frying pan 4 40 7 frying pan otherprop Objects n03400231 frying_pan.n.01 objects 39 +1233 oven mitt oven mitt 4 40 7 otherprop Objects objects 39 +1234 pot pot 4 40 7 pot otherprop Objects n04235860 sleeping_bag.n.01 objects 39 +144 hand dryer hand dryer 4 40 7 otherprop Objects objects 39 +282 dollhouse dollhouse 4 39 6 doll house otherfurniture Furniture n03219483 dollhouse.n.01 objects 39 +167 shampoo bottle shampoo bottle 4 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1235 hair brush hair brush 4 40 7 otherprop Objects n02908217 brush.n.02 objects 39 +1236 tennis racket tennis racket 4 40 7 otherprop Objects n04409806 tennis_racket.n.01 objects 39 +1237 display case display case 4 40 7 case otherprop Objects objects 39 +234 ping pong table ping pong table 4 39 6 ping pong table otherfurniture Furniture table table table 4379243 n04379243 table.n.02 table 5 +563 boiler boiler 4 40 7 otherprop Objects misc 40 +1238 bag of coffee beans bag of coffee beans 4 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +1239 bananas banana 4 40 7 otherprop Objects n00021265 food.n.01 objects 39 +1240 carseat carseat 4 40 7 otherprop Objects misc 40 +366 helmet helmet 4 40 7 otherprop Objects helmet 3513137 n03513137 helmet.n.02 clothes 38 +816 umbrella umbrella 4 40 7 umbrella otherprop Objects n04507155 umbrella.n.01 objects 39 +1241 coffee box coffee box 4 40 7 otherprop Objects objects 39 +719 envelope envelope 4 40 7 envelope otherprop Objects n03291819 envelope.n.01 objects 39 +284 wet floor sign wet floor sign 4 40 7 sign otherprop Objects misc 40 +1242 clothing rack clothing rack 4 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +247 controller controller 4 40 7 otherprop Objects n03096960 control.n.09 objects 39 +1243 bath walls bathroom wall 4 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +1244 podium podium 4 39 6 otherfurniture Furniture n03159640 dais.n.01 furniture 36 +1245 storage box storage box 4 29 7 box box Objects n02883344 box.n.01 objects 39 +1246 dolly dolly 4 40 7 otherprop Objects misc 40 +1247 shampoo shampoo 3 40 7 otherprop Objects n04183516 shampoo.n.01 objects 39 +592 paper tray paper tray 3 40 7 paper tray otherprop Objects objects 39 +385 cabinet door cabinet door 3 8 12 door door Wall door door 4 +1248 changing station changing station 3 40 7 otherprop Objects misc 40 +1249 poster printer poster printer 3 40 7 printer otherprop Objects printer 4004475 n04004475 printer.n.03 appliances 37 +133 screen screen 3 40 7 otherprop Objects n03151077 curtain.n.01 curtain 12 +301 soap bar soap bar 3 38 7 bar otherstructure Objects objects 39 +1250 crutches crutches 3 40 7 otherprop Objects n03141823 crutch.n.01 objects 39 +379 studio light studio light 3 38 7 light otherstructure Objects lighting 28 +130 stack of cups cup 3 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +1251 toilet flush button toilet flush button 3 40 7 otherprop Objects objects 39 +450 trunk trunk 3 40 7 otherprop Objects misc 40 +1252 grocery bag grocery bag 3 37 7 bag bag Objects suitcase 2773838 n03461288 grocery_bag.n.01 objects 39 +316 plastic bin plastic bin 3 40 7 bin otherprop Objects objects 39 +1253 pizza box pizza box 3 29 7 box box Objects objects 39 +385 cabinet doors cabinet door 3 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 door 4 +1254 legs legs 3 31 7 person person Objects person n05217688 person.n.02 misc 40 +461 car car 3 40 7 car otherprop Objects car car 2958343 n02958343 car.n.01 misc 40 +1255 shaving cream shaving cream 3 40 7 otherprop Objects n04186051 shaving_cream.n.01 objects 39 +1256 luggage stand luggage stand 3 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +599 shredder shredder 3 40 7 otherprop Objects n04210120 shredder.n.01 objects 39 +281 statue statue 3 40 7 sculpture otherprop Objects n04306847 statue.n.01 misc 40 +1257 urinal urinal 3 33 7 toilet toilet Objects toilet toilet n04515991 urinal.n.01 toilet 18 +1258 hose hose 3 40 7 otherprop Objects n03539875 hose.n.03 misc 40 +1259 bike pump bike pump 3 40 7 otherprop Objects objects 39 +319 coatrack coatrack 3 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31 +1260 bear bear 3 40 7 otherprop Objects objects 39 +28 wall lamp lamp 3 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +1261 humidifier humidifier 3 40 7 otherprop Objects objects 39 +546 toothpaste toothpaste 3 40 7 toothpaste otherprop Objects objects 39 +1262 mouthwash bottle mouthwash bottle 3 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1263 poster cutter poster cutter 3 40 7 otherprop Objects objects 39 +1264 golf bag golf bag 3 37 7 bag bag Objects suitcase 2773838 n03445617 golf_bag.n.01 objects 39 +1265 food container food container 3 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1266 camera camera 3 40 7 otherprop Objects objects 39 +28 table lamp lamp 3 35 7 lamp lamp Objects lamp lamp 3636649 n04380533 table_lamp.n.01 lighting 28 +1267 yoga mat yoga mat 3 20 5 floor mat floor mat Floor n03727837 mat.n.01 floor 2 +1268 card card 3 40 7 otherprop Objects objects 39 +1269 mug mug 3 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +188 shower doors shower door 3 38 7 otherstructure Objects n04208936 shower.n.01 door 4 +689 cardboard cardboard 3 40 7 otherprop Objects objects 39 +1270 rack stand rack stand 3 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +1271 boxes of paper boxes of paper 3 29 7 box box Objects n02883344 box.n.01 objects 39 +1272 flag flag 3 40 7 otherprop Objects misc 40 +354 futon futon 3 39 6 mattress otherfurniture Furniture n03408444 futon.n.01 sofa 10 +339 magazine magazine 3 40 7 magazine otherprop Objects n06595351 magazine.n.01 objects 39 +1009 exit sign exit sign 3 40 7 exit sign otherprop Objects misc 40 +1273 rolled poster rolled poster 3 40 7 otherprop Objects objects 39 +1274 wheel wheel 3 40 7 otherprop Objects objects 39 +15 pictures picture 3 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +1275 blackboard eraser blackboard eraser 3 40 7 eraser otherprop Objects n03294833 eraser.n.01 objects 39 +361 organizer organizer 3 40 7 otherprop Objects n03918737 personal_digital_assistant.n.01 objects 39 +1276 doll doll 3 40 7 toy otherprop Objects n03219135 doll.n.01 objects 39 +326 book rack book rack 3 39 6 bookrack otherfurniture Furniture objects 39 +1277 laundry bag laundry bag 3 40 7 laundry basket otherprop Objects basket 2801938 n03050864 clothes_hamper.n.01 objects 39 +1278 sponge sponge 3 40 7 otherprop Objects n01906749 sponge.n.04 objects 39 +116 seating seat 3 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 furniture 36 +1184 folded chairs folded chair 2 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1279 lotion bottle lotion bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +212 can can 2 40 7 can otherprop Objects can 2946921 n02946921 can.n.01 objects 39 +1280 lunch box lunch box 2 40 7 otherprop Objects objects 39 +1281 food display food display 2 40 7 otherprop Objects misc 40 +794 storage shelf storage shelf 2 40 7 otherprop Objects shelving 31 +1282 sliding wood door sliding wood door 2 40 7 otherprop Objects door 4 +955 pants pants 2 40 7 otherprop Objects n04489008 trouser.n.01 clothes 38 +387 wood wood 2 40 7 otherprop Objects misc 40 +69 boards board 2 38 7 board otherstructure Objects board_panel 35 +65 bottles bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +523 washcloth washcloth 2 40 7 otherprop Objects n04554523 washcloth.n.01 towel 20 +389 workbench workbench 2 39 6 bench otherfurniture Furniture bench table 4379243 n04600486 workbench.n.01 table 5 +29 open kitchen cabinet kitchen cabinet 2 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 cabinet 7 +1283 organizer shelf organizer shelf 2 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +146 frame frame 2 38 7 otherstructure Objects misc 40 +130 cups cup 2 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +372 exercise ball exercise ball 2 40 7 ball otherprop Objects n04285146 sports_equipment.n.01 gym_equipment 33 +289 easel easel 2 39 6 stand otherfurniture Furniture n03262809 easel.n.01 furniture 36 +440 garbage bag garbage bag 2 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +321 roomba roomba 2 40 7 otherprop Objects objects 39 +976 garage door garage door 2 38 7 garage door otherstructure Objects door door 4 +1256 luggage rack luggage stand 2 39 6 stand otherfurniture Furniture n04038440 shelving 31 +1284 bike lock bike lock 2 40 7 otherprop Objects objects 39 +1285 briefcase briefcase 2 40 7 otherprop Objects n02900705 briefcase.n.01 objects 39 +357 hand towel hand towel 2 27 7 towel towel Objects n03490006 hand_towel.n.01 towel 20 +1286 bath products bath product 2 40 7 otherprop Objects objects 39 +1287 star star 2 40 7 otherprop Objects n09444783 star.n.03 misc 40 +365 map map 2 40 7 map otherprop Objects n03720163 map.n.01 misc 40 +1288 coffee bean bag coffee bean bag 2 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +81 headboard headboard 2 39 6 headboard otherfurniture Furniture n03502200 headboard.n.01 bed 11 +1289 ipad ipad 2 40 7 otherprop Objects objects 39 +1290 display rack display rack 2 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +948 traffic cone traffic cone 2 40 7 cone otherprop Objects cone objects 39 +174 toiletry toiletry 2 40 7 otherprop Objects n04447443 toiletry.n.01 objects 39 +1028 canopy canopy 2 40 7 otherprop Objects misc 40 +1291 massage chair massage chair 2 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1292 paper organizer paper organizer 2 40 7 otherprop Objects objects 39 +1005 barricade barricade 2 40 7 otherprop Objects misc 40 +235 platform platform 2 38 7 otherstructure Objects misc 40 +1293 cap cap 2 40 7 hat otherprop Objects n03497657 hat.n.01 clothes 38 +1294 dumbbell plates dumbbell plates 2 40 7 otherprop Objects objects 39 +1295 elevator elevator 2 38 7 otherstructure Objects misc 40 +1296 cooking pan cooking pan 2 40 7 pan otherprop Objects n03880531 pan.n.01 objects 39 +1297 trash bag trash bag 2 37 7 bag bag Objects objects 39 +1298 santa santa 2 40 7 otherprop Objects misc 40 +1299 jewelry box jewelry box 2 29 7 box box Objects n02883344 box.n.01 objects 39 +1300 boat boat 2 40 7 otherprop Objects misc 40 +1301 sock sock 2 21 7 clothes clothes Objects n04254777 sock.n.01 clothes 38 +1051 kinect kinect 2 40 7 kinect otherprop Objects objects 39 +566 crib crib 2 39 6 crib otherfurniture Furniture furniture 36 +1302 plastic storage bin plastic storage bin 2 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1062 cooler cooler 2 24 6 refridgerator refridgerator Furniture n03102654 cooler.n.01 appliances 37 +1303 kitchen apron kitchen apron 2 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +1304 dishwashing soap bottle dishwashing soap bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1305 xbox controller xbox controller 2 40 7 otherprop Objects objects 39 +1306 banana holder banana holder 2 40 7 otherprop Objects objects 39 +298 ping pong paddle ping pong paddle 2 40 7 otherprop Objects table 5 +1307 airplane airplane 2 40 7 otherprop Objects misc 40 +1308 conditioner bottle conditioner bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1309 tea kettle tea kettle 2 40 7 tea kettle otherprop Objects n04397768 teakettle.n.01 objects 39 +43 bedframe bedframe 2 39 6 otherfurniture Furniture n02822579 bedstead.n.01 bed 11 +1310 wood beam wood beam 2 38 7 otherstructure Objects beam 29 +593 toilet paper package toilet paper package 2 40 7 otherprop Objects objects 39 +1311 wall mounted coat rack wall mounted coat rack 2 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31 +1312 film light film light 2 40 7 otherprop Objects lighting 28 +749 ceiling lamp ceiling lamp 1 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +623 chain chain 1 40 7 otherprop Objects chair 3 +1313 sofa sofa 1 6 9 sofa sofa Sofa sofa sofa sofa 4256520 n04256520 sofa.n.01 sofa 10 +99 closet wardrobe wardrobe 1 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +265 sweater sweater 1 40 7 otherprop Objects n04370048 sweater.n.01 clothes 38 +1314 kitchen mixer kitchen mixer 1 40 7 otherprop Objects appliances 37 +99 wardrobe wardrobe 1 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +1315 water softener water softener 1 40 7 otherprop Objects misc 40 +448 banister banister 1 38 7 banister otherstructure Objects n02788148 bannister.n.02 railing 30 +257 trolley trolley 1 40 7 trolley otherprop Objects n04335435 streetcar.n.01 misc 40 +1316 pantry shelf pantry shelf 1 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +786 sofa bed sofa bed 1 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +801 loofa loofa 1 40 7 otherprop Objects objects 39 +972 shower faucet handle shower faucet handle 1 40 7 handle otherprop Objects shower 23 +1317 toy piano toy piano 1 40 7 toy otherprop Objects n03964744 plaything.n.01 objects 39 +1318 fish fish 1 40 7 otherprop Objects n02512053 fish.n.01 objects 39 +75 file cabinets file cabinet 1 3 6 cabinet cabinet Furniture cabinet 2933112 n03337140 file.n.03 cabinet 7 +657 cat litter box cat litter box 1 29 7 box box Objects objects 39 +561 electric panel electric panel 1 40 7 otherprop Objects misc 40 +93 suitcases suitcase 1 40 7 luggage otherprop Objects n02774630 baggage.n.01 objects 39 +513 curtain rod curtain rod 1 38 7 curtain rod otherstructure Objects curtain 12 +411 bunk bed bunk bed 1 39 6 bunk bed otherfurniture Furniture bed bed bed 2818832 n02920259 bunk_bed.n.01 bed 11 +1122 chandelier chandelier 1 38 7 chandelier otherstructure Objects n03005285 chandelier.n.01 lighting 28 +922 tape tape 1 40 7 tape otherprop Objects objects 39 +88 plates plate 1 40 7 otherprop Objects n03959485 plate.n.04 objects 39 +518 alarm alarm 1 40 7 alarm otherprop Objects clock 3046257 n02694662 alarm_clock.n.01 objects 39 +814 fire hose fire hose 1 40 7 otherprop Objects n03346004 fire_hose.n.01 misc 40 +1319 toy dinosaur toy dinosaur 1 40 7 toy otherprop Objects n03964744 plaything.n.01 objects 39 +1320 cone cone 1 40 7 otherprop Objects objects 39 +649 glass doors glass door 1 8 12 door door Wall door n03221720 door.n.01 door 4 +607 hatrack hatrack 1 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31 +819 subwoofer subwoofer 1 40 7 speaker otherprop Objects speaker 3691459 n04349401 subwoofer.n.01 objects 39 +1321 fire sprinkler fire sprinkler 1 40 7 otherprop Objects misc 40 +1322 trash cabinet trash cabinet 1 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +1204 pantry walls pantry wall 1 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +227 photo photo 1 40 7 photo otherprop Objects n03925226 photograph.n.01 picture 6 +817 barrier barrier 1 40 7 otherprop Objects n02796623 barrier.n.01 misc 40 +130 stacks of cups cup 1 40 7 otherprop Objects n03147509 cup.n.01 objects 39 +712 beachball beachball 1 40 7 ball otherprop Objects n02814224 beach_ball.n.01 objects 39 +1323 folded boxes folded boxes 1 40 7 otherprop Objects objects 39 +1324 contact lens solution bottle contact lens solution bottle 1 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +673 covered box covered box 1 29 7 box box Objects objects 39 +459 folder folder 1 40 7 folder otherprop Objects n03376279 folder.n.02 objects 39 +643 mail trays mail tray 1 40 7 mail tray otherprop Objects objects 39 +238 slipper slipper 1 40 7 otherprop Objects n04241394 slipper.n.01 clothes 38 +765 magazine rack magazine rack 1 39 6 stand otherfurniture Furniture n03704549 magazine_rack.n.01 shelving 31 +1008 sticker sticker 1 40 7 sticker otherprop Objects n07272545 gummed_label.n.01 objects 39 +225 lotion lotion 1 40 7 otherprop Objects n03690938 lotion.n.01 objects 39 +1083 buddha buddha 1 40 7 otherprop Objects objects 39 +813 file organizer file organizer 1 40 7 otherprop Objects objects 39 +138 paper towel rolls paper towel roll 1 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20 +1145 night lamp night lamp 1 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +796 fuse box fuse box 1 40 7 otherprop Objects misc 40 +1325 knife block knife block 1 40 7 otherprop Objects objects 39 +363 furnace furnace 1 39 6 furnace otherfurniture Furniture n03404449 furnace.n.01 +1174 cd cases cd case 1 40 7 otherprop Objects objects 39 +38 stools stool 1 40 7 stool otherprop Objects stool n04326896 stool.n.01 stool 19 +1326 hand sanitzer dispenser hand sanitzer dispenser 1 40 7 otherprop Objects n04254120 soap_dispenser.n.01 objects 39 +997 teapot teapot 1 40 7 tea pot otherprop Objects n04398044 teapot.n.01 objects 39 +1327 pen holder pen holder 1 40 7 otherprop Objects objects 39 +1328 tray rack tray rack 1 40 7 otherprop Objects objects 39 +1329 wig wig 1 40 7 otherprop Objects n04584207 wig.n.01 objects 39 +182 switch switch 1 40 7 otherprop Objects n04372370 switch.n.01 misc 40 +280 plastic containers plastic container 1 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1330 night light night light 1 40 7 otherprop Objects lighting 28 +1331 notepad notepad 1 40 7 otherprop Objects objects 39 +1332 mail bin mail bin 1 40 7 otherprop Objects misc 40 +1333 elevator button elevator button 1 40 7 otherprop Objects misc 40 +939 gaming wheel gaming wheel 1 40 7 otherprop Objects objects 39 +1334 drum set drum set 1 40 7 otherprop Objects objects 39 +480 cosmetic bag cosmetic bag 1 37 7 bag bag Objects objects 39 +907 coffee mug coffee mug 1 40 7 vessel otherprop Objects cup or mug 3797390 n03063599 coffee_mug.n.01 objects 39 +1335 closet shelf closet shelf 1 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +1336 baby mobile baby mobile 1 40 7 otherprop Objects objects 39 +829 diaper bin diaper bin 1 40 7 bin otherprop Objects objects 39 +947 door wall door wall 1 1 12 wall wall Wall wall 1 +1116 stepstool stepstool 1 40 7 step stool otherprop Objects objects 39 +599 paper shredder shredder 1 40 7 otherprop Objects n04210120 shredder.n.01 objects 39 +733 dress rack dress rack 1 40 7 otherprop Objects n03238762 dress_rack.n.01 misc 40 +123 cover cover 1 40 7 blanket otherprop Objects objects 39 +506 shopping bag shopping bag 1 37 7 bag bag Objects n04204081 shopping_bag.n.01 objects 39 +569 sliding door sliding door 1 8 12 door door Wall door n04239074 sliding_door.n.01 door 4 +1337 exercise bike exercise bike 1 40 7 machine otherprop Objects n04210120 shredder.n.01 gym_equipment 33 +1338 recliner chair recliner chair 1 5 4 chair chair Chair chair chair chair 3001627 n03238762 dress_rack.n.01 chair 3 +1314 kitchenaid mixer kitchen mixer 1 40 7 otherprop Objects appliances 37 +1339 soda can soda can 1 40 7 can otherprop Objects can 2946921 n02946921 can.n.01 objects 39 +1340 stovetop stovetop 1 38 7 stove otherstructure Objects stove 4330267 n04330267 stove.n.02 appliances 37 +851 stepladder stepladder 1 39 6 ladder otherfurniture Furniture stairs n04315599 step_ladder.n.01 stairs 16 +142 tap tap 1 40 7 faucet otherprop Objects faucet 3325088 n04559451 water_faucet.n.01 objects 39 +436 cable cable 1 40 7 cables otherprop Objects objects 39 +1341 baby changing station baby changing station 1 39 6 otherfurniture Furniture furniture 36 +1342 costume costume 1 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +885 rocking chair rocking chair 1 5 4 chair chair Chair chair chair chair 3001627 n04099969 rocking_chair.n.01 chair 3 +693 binder binder 1 40 7 binder otherprop Objects objects 39 +815 media center media center 1 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +401 towel rack towel rack 1 40 7 otherprop Objects n04459773 towel_rack.n.01 misc 40 +1343 medal medal 1 40 7 otherprop Objects objects 39 +1184 stack of folded chairs folded chair 1 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1344 telescope telescope 1 40 7 otherprop Objects n04403638 telescope.n.01 objects 39 +1345 closet doorframe closet doorframe 1 8 12 door door Wall door door 4 +160 glass glass 1 38 7 glass otherstructure Objects n03438257 glass.n.02 misc 40 +1126 baseball cap baseball cap 1 40 7 otherprop Objects cap 2954340 n02799323 baseball_cap.n.01 clothes 38 +1346 battery disposal jar battery disposal jar 1 40 7 jar otherprop Objects jar 3593526 n03593526 jar.n.01 objects 39 +332 mop mop 1 40 7 otherprop Objects n04367480 swab.n.02 objects 39 +397 tank tank 1 40 7 otherprop Objects objects 39 +643 mail tray mail tray 1 40 7 mail tray otherprop Objects objects 39 +551 centerpiece centerpiece 1 40 7 centerpiece otherprop Objects n02994419 centerpiece.n.02 objects 39 +1163 stick stick 1 40 7 stick otherprop Objects objects 39 +1347 closet floor closet floor 1 2 5 floor floor Floor n03365592 floor.n.01 floor 2 +1348 dryer sheets dryer sheets 1 40 7 otherprop Objects objects 39 +803 bycicle bycicle 1 40 7 otherprop Objects misc 40 +484 flower stand flower stand 1 39 6 stand otherfurniture Furniture furniture 36 +1349 air mattress air mattress 1 4 1 bed bed Bed bed bed bed 2818832 n02690809 air_mattress.n.01 bed 11 +1350 clip clip 1 40 7 otherprop Objects objects 39 +222 side table side table 1 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +1253 pizza boxes pizza box 1 29 7 box box Objects n02883344 box.n.01 objects 39 +1351 display display 1 39 7 otherfurniture Furniture n03211117 display.n.06 misc 40 +1352 postcard postcard 1 40 7 otherprop Objects objects 39 +828 display sign display sign 1 40 7 sign otherprop Objects misc 40 +1353 paper towel paper towel 1 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20 +612 boots boot 1 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +1354 tennis racket bag tennis racket bag 1 40 7 otherprop Objects objects 39 +1355 air hockey table air hockey table 1 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +1301 socks sock 1 21 7 clothes clothes Objects n04254777 sock.n.01 clothes 38 +1356 food bag food bag 1 37 7 bag bag Objects objects 39 +1199 clothes hangers clothes hanger 1 40 7 otherprop Objects n03057920 coat_hanger.n.01 misc 40 +1357 starbucks cup starbucks cup 1 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels.combined.tsv b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels.combined.tsv new file mode 100644 index 0000000..cff61b1 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels.combined.tsv @@ -0,0 +1,608 @@ +id raw_category category count nyu40id eigen13id nyuClass nyu40class eigen13class ModelNet40 ModelNet10 ShapeNetCore55 synsetoffset wnsynsetid wnsynsetkey mpcat40 mpcat40index +1 wall wall 8277 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +2 chair chair 4646 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +22 books book 1678 23 2 book books Books n02870526 book.n.11 objects 39 +3 floor floor 1553 2 5 floor floor Floor n03365592 floor.n.01 floor 2 +5 door door 1483 8 12 door door Wall door n03221720 door.n.01 door 4 +1163 object object 1313 40 7 otherprop Objects objects 39 +16 window window 1209 9 13 window window Window n04587648 window.n.01 window 9 +4 table table 1170 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +56 trash can trash can 1090 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +13 pillow pillow 937 18 7 pillow pillow Objects pillow 3938244 n03938244 pillow.n.01 cushion 8 +15 picture picture 862 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +41 ceiling ceiling 806 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 ceiling 17 +26 box box 775 29 7 box box Objects n02883344 box.n.01 objects 39 +161 doorframe doorframe 768 8 12 door door Wall door doorframe.n.01 door 4 +19 monitor monitor 765 40 7 monitor otherprop Objects monitor monitor tv or monitor 3211117 n03782190 monitor.n.04 objects 39 +7 cabinet cabinet 731 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +9 desk desk 680 14 10 desk desk Table desk desk table 4379243 n03179701 desk.n.01 table 5 +8 shelf shelf 641 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +10 office chair office chair 595 5 4 chair chair Chair chair chair chair 3001627 n04373704 swivel_chair.n.01 chair 3 +31 towel towel 570 27 7 towel towel Objects n04459362 towel.n.01 towel 20 +6 couch couch 502 6 9 sofa sofa Sofa sofa sofa sofa 4256520 n04256520 sofa.n.01 sofa 10 +14 sink sink 488 34 7 sink sink Objects sink n04223580 sink.n.01 sink 15 +48 backpack backpack 479 40 7 backpack otherprop Objects n02769748 backpack.n.01 objects 39 +28 lamp lamp 419 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +11 bed bed 370 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +18 bookshelf bookshelf 360 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +71 mirror mirror 349 19 7 mirror mirror Objects n03773035 mirror.n.01 mirror 21 +21 curtain curtain 347 16 13 curtain curtain Window curtain n03151077 curtain.n.01 curtain 12 +40 plant plant 331 40 7 plant otherprop Objects plant n00017222 plant.n.02 plant 14 +52 whiteboard whiteboard 327 30 7 whiteboard whiteboard Objects n03211616 display_panel.n.01 board_panel 35 +96 radiator radiator 322 39 6 radiator otherfurniture Furniture n04041069 radiator.n.02 misc 40 +22 book book 318 23 2 book books Books n02870526 book.n.11 objects 39 +29 kitchen cabinet kitchen cabinet 310 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 cabinet 7 +49 toilet paper toilet paper 291 40 7 toilet paper otherprop Objects n15075141 toilet_tissue.n.01 objects 39 +29 kitchen cabinets kitchen cabinet 289 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +23 armchair armchair 281 5 4 chair chair Chair chair chair chair 3001627 n02738535 armchair.n.01 chair 3 +63 shoes shoe 272 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +24 coffee table coffee table 258 7 10 coffee table table Table table table table 4379243 n03063968 coffee_table.n.01 table 5 +17 toilet toilet 256 33 7 toilet toilet Objects toilet toilet n04446276 toilet.n.01 toilet 18 +47 bag bag 252 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +32 clothes clothes 248 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +46 keyboard keyboard 246 40 7 keyboard otherprop Objects keyboard computer keyboard 3085013 n03085013 computer_keyboard.n.01 objects 39 +65 bottle bottle 226 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +97 recycling bin recycling bin 225 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +34 nightstand nightstand 224 32 6 night stand night stand Furniture night_stand night_stand n03015254 chest_of_drawers.n.01 chest_of_drawers 13 +38 stool stool 221 40 7 stool otherprop Objects stool n04326896 stool.n.01 stool 19 +33 tv tv 219 25 11 television television TV tv or monitor 3211117 n03211117 display.n.06 tv_monitor 22 +75 file cabinet file cabinet 217 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +36 dresser dresser 213 17 6 dresser dresser Furniture dresser dresser n03015254 chest_of_drawers.n.01 chest_of_drawers 13 +64 computer tower computer tower 203 40 7 computer otherprop Objects n03082979 computer.n.01 objects 39 +32 clothing clothes 165 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +101 telephone telephone 164 40 7 telephone otherprop Objects telephone 4401088 n04401088 telephone.n.01 objects 39 +130 cup cup 157 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +27 refrigerator refrigerator 154 24 6 refridgerator refridgerator Furniture n04070727 refrigerator.n.01 appliances 37 +44 end table end table 147 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +131 jacket jacket 146 40 7 jacket otherprop Objects n03589791 jacket.n.01 clothes 38 +55 shower curtain shower curtain 144 28 7 shower curtain shower curtain Objects curtain n04209239 shower_curtain.n.01 curtain 12 +42 bathtub bathtub 144 36 7 bathtub bathtub Objects bathtub bathtub tub 2808440 n02808440 bathtub.n.01 bathtub 25 +59 microwave microwave 141 40 7 microwave otherprop Objects microwave 3761084 n03761084 microwave.n.02 appliances 37 +159 kitchen counter kitchen counter 140 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26 +74 sofa chair sofa chair 129 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +82 paper towel dispenser paper towel dispenser 129 40 7 paper towel dispenser otherprop Objects objects 39 +1164 bathroom vanity bathroom vanity 126 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 table 5 +93 suitcase suitcase 118 40 7 luggage otherprop Objects n02773838 bag.n.06 objects 39 +77 laptop laptop 111 40 7 laptop otherprop Objects laptop laptop 3642806 n03642806 laptop.n.01 objects 39 +67 ottoman ottoman 111 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 stool 19 +128 shower walls shower wall 109 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +50 printer printer 106 40 7 printer otherprop Objects printer 4004475 n04004475 printer.n.03 appliances 37 +35 counter counter 104 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26 +69 board board 100 38 7 board otherstructure Objects board_panel 35 +100 soap dispenser soap dispenser 99 40 7 otherprop Objects n04254120 soap_dispenser.n.01 objects 39 +62 stove stove 95 38 7 stove otherstructure Objects stove 4330267 n04330267 stove.n.02 appliances 37 +105 light light 93 38 7 light otherstructure Objects n03665366 light.n.02 lighting 28 +1165 closet wall closet wall 90 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +165 mini fridge mini fridge 87 24 6 refridgerator refridgerator Furniture n03273913 electric_refrigerator.n.01 appliances 37 +7 cabinets cabinet 79 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +5 doors door 76 8 12 door door Wall door n03221720 door.n.01 door 4 +76 fan fan 75 40 7 fan otherprop Objects n03320046 fan.n.01 misc 40 +230 tissue box tissue box 73 40 7 tissue box otherprop Objects n02883344 box.n.01 objects 39 +54 blanket blanket 72 40 7 blanket otherprop Objects n02849154 blanket.n.01 objects 39 +125 bathroom stall bathroom stall 71 38 7 otherstructure Objects n02873839 booth.n.02 misc 40 +72 copier copier 70 40 7 otherprop Objects n03257586 duplicator.n.01 appliances 37 +68 bench bench 66 39 6 bench otherfurniture Furniture bench bench 2828884 n02828884 bench.n.01 seating 34 +145 bar bar 66 38 7 bar otherstructure Objects n02788689 bar.n.03 misc 40 +157 soap dish soap dish 65 40 7 soap dish otherprop Objects n04254009 soap_dish.n.01 objects 39 +1166 laundry hamper laundry hamper 65 40 7 laundry basket otherprop Objects objects 39 +132 storage bin storage bin 63 40 7 storage bin otherprop Objects objects 39 +1167 bathroom stall door bathroom stall door 62 8 12 door door Wall door n03221720 door.n.01 door 4 +232 light switch light switch 61 38 7 light switch otherstructure Objects n04372370 switch.n.01 misc 40 +134 coffee maker coffee maker 61 40 7 otherprop Objects n03063338 coffee_maker.n.01 appliances 37 +51 tv stand tv stand 61 39 6 tv stand otherfurniture Furniture tv_stand n03290653 entertainment_center.n.01 furniture 36 +250 decoration decoration 60 40 7 otherprop Objects n03169390 decoration.n.01 misc 40 +1168 ceiling light ceiling light 59 38 7 light otherstructure Objects n03665366 light.n.02 lighting 28 +342 range hood range hood 59 38 7 range hood otherstructure Objects range_hood n04053677 range_hood.n.01 misc 40 +89 blackboard blackboard 58 38 7 blackboard otherstructure Objects n02846511 blackboard.n.01 board_panel 35 +103 clock clock 58 40 7 clock otherprop Objects clock 3046257 n03046257 clock.n.01 objects 39 +99 wardrobe closet wardrobe 54 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +95 rail rail 53 38 7 railing otherstructure Objects n04047401 railing.n.01 railing 30 +154 bulletin board bulletin board 53 38 7 board otherstructure Objects n03211616 display_panel.n.01 board_panel 35 +140 mat mat 52 20 5 floor mat floor mat Floor n03727837 mat.n.01 floor 2 +1169 trash bin trash bin 52 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +193 ledge ledge 51 38 7 otherstructure Objects n09337253 ledge.n.01 misc 40 +116 seat seat 49 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 furniture 36 +202 mouse mouse 49 40 7 mouse otherprop Objects n03793489 mouse.n.04 objects 39 +73 basket basket 48 40 7 basket otherprop Objects basket 2801938 n02801938 basket.n.01 objects 39 +78 shower shower 48 38 7 otherstructure Objects n04208936 shower.n.01 shower 23 +1170 dumbbell dumbbell 48 40 7 otherprop Objects n03255030 dumbbell.n.01 objects 39 +79 paper paper 46 26 7 paper paper Objects n14974264 paper.n.01 objects 39 +80 person person 46 31 7 person person Objects person n05217688 person.n.02 misc 40 +141 windowsill windowsill 45 38 7 otherstructure Objects n04590263 windowsill.n.01 window 9 +57 closet closet 45 39 6 wardrobe otherfurniture Furniture wardrobe misc 40 +102 bucket bucket 45 40 7 bucket otherprop Objects n02909870 bucket.n.01 misc 40 +261 sign sign 44 40 7 sign otherprop Objects n04217882 signboard.n.01 objects 39 +118 speaker speaker 43 40 7 speaker otherprop Objects speaker 3691459 n03691459 loudspeaker.n.01 objects 39 +136 dishwasher dishwasher 43 38 7 dishwasher otherstructure Objects dishwasher 3207941 n03207941 dishwasher.n.01 appliances 37 +98 container container 43 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1171 stair rail stair rail 42 38 7 banister otherstructure Objects n02788148 bannister.n.02 railing 30 +170 shower curtain rod shower curtain rod 42 40 7 otherprop Objects curtain 12 +1172 tube tube 41 40 7 otherprop Objects misc 40 +1173 bathroom cabinet bathroom cabinet 39 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +79 papers paper 39 26 7 paper paper Objects n14974264 paper.n.01 objects 39 +221 storage container storage container 39 40 7 container otherprop Objects objects 39 +570 paper bag paper bag 39 37 7 bag bag Objects n04122825 sack.n.01 objects 39 +138 paper towel roll paper towel roll 39 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20 +168 ball ball 39 40 7 ball otherprop Objects objects 39 +276 closet doors closet door 38 8 12 door door Wall door n03221720 door.n.01 door 4 +106 laundry basket laundry basket 37 40 7 laundry basket otherprop Objects basket 2801938 n03050864 clothes_hamper.n.01 objects 39 +214 cart cart 37 40 7 cart otherprop Objects n03484083 handcart.n.01 shelving 31 +276 closet door closet door 35 8 12 door door Wall door n03221720 door.n.01 door 4 +323 dish rack dish rack 35 40 7 dish rack otherprop Objects n03207630 dish_rack.n.01 objects 39 +58 stairs stairs 35 38 7 stairs otherstructure Objects n04298308 stairway.n.01 stairs 16 +86 blinds blinds 35 13 13 blinds blinds Window n02851099 blind.n.03 blinds 32 +2 stack of chairs chair 35 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +399 purse purse 34 40 7 purse otherprop Objects n02774152 bag.n.04 objects 39 +121 bicycle bicycle 33 40 7 bicycle otherprop Objects bicycle 2834778 n02834778 bicycle.n.01 objects 39 +185 tray tray 32 40 7 tray otherprop Objects n04476259 tray.n.01 objects 39 +300 plunger plunger 30 40 7 otherprop Objects n03970156 plunger.n.03 objects 39 +180 paper cutter paper cutter 30 40 7 paper cutter otherprop Objects n03886940 paper_cutter.n.01 objects 39 +163 toilet paper dispenser toilet paper dispenser 29 40 7 otherprop Objects objects 39 +26 boxes box 29 29 7 box box Objects n02883344 box.n.01 objects 39 +66 bin bin 28 40 7 bin otherprop Objects n02839910 bin.n.01 objects 39 +208 toilet seat cover dispenser toilet seat cover dispenser 28 40 7 otherprop Objects objects 39 +112 guitar guitar 28 40 7 guitar otherprop Objects guitar guitar 3467517 n03467517 guitar.n.01 objects 39 +540 mailboxes mailbox 28 29 7 box box Objects mailbox 3710193 n03710193 mailbox.n.01 misc 40 +395 handicap bar handicap bar 27 38 7 bar otherstructure Objects misc 40 +166 fire extinguisher fire extinguisher 27 40 7 fire extinguisher otherprop Objects n03345837 fire_extinguisher.n.01 misc 40 +122 ladder ladder 27 39 6 ladder otherfurniture Furniture stairs n03632277 ladder.n.01 stairs 16 +120 column column 26 38 7 column otherstructure Objects n03074380 column.n.06 column 24 +107 pipe pipe 25 40 7 pipe otherprop Objects n03944672 pipe.n.02 misc 40 +283 vacuum cleaner vacuum cleaner 25 40 7 otherprop Objects n04517823 vacuum.n.04 objects 39 +88 plate plate 24 40 7 plate otherprop Objects n03959485 plate.n.04 objects 39 +90 piano piano 24 39 6 piano otherfurniture Furniture piano piano 3928116 n03928116 piano.n.01 furniture 36 +177 water cooler water cooler 24 39 6 water cooler otherfurniture Furniture n04559166 water_cooler.n.01 misc 40 +1174 cd case cd case 24 40 7 otherprop Objects objects 39 +562 bowl bowl 24 40 7 bowl otherprop Objects bowl bowl 2880940 n02880940 bowl.n.03 objects 39 +1175 closet rod closet rod 24 40 7 otherprop Objects n04100174 rod.n.01 misc 40 +1156 bathroom counter bathroom counter 24 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26 +84 oven oven 23 38 7 oven otherstructure Objects n03862676 oven.n.01 appliances 37 +104 stand stand 23 39 6 stand otherfurniture Furniture table table table 4379243 n04301000 stand.n.04 table 5 +229 scale scale 23 40 7 scale otherprop Objects n04141975 scale.n.07 objects 39 +70 washing machine washing machine 23 39 6 washing machine otherfurniture Furniture washing_machine 4554684 n04554684 washer.n.03 appliances 37 +325 broom broom 22 40 7 broom otherprop Objects n02906734 broom.n.01 objects 39 +169 hat hat 22 40 7 hat otherprop Objects n03497657 hat.n.01 clothes 38 +128 shower wall shower wall 22 1 12 wall wall Wall n04208936 shower.n.01 wall 1 +331 guitar case guitar case 21 40 7 guitar case otherprop Objects objects 39 +87 rack rack 21 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +488 water pitcher water pitcher 21 40 7 pitcher otherprop Objects n03950228 pitcher.n.02 objects 39 +776 laundry detergent laundry detergent 21 40 7 otherprop Objects objects 39 +370 hair dryer hair dryer 21 40 7 hair dryer otherprop Objects n03483316 hand_blower.n.01 objects 39 +191 pillar pillar 21 38 7 column otherstructure Objects n03073977 column.n.07 column 24 +748 divider divider 20 40 7 otherprop Objects wall 1 +242 power outlet power outlet 19 40 7 otherprop Objects misc 40 +45 dining table dining table 19 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +417 shower floor shower floor 19 2 5 floor floor Floor n04208936 shower.n.01 floor 2 +70 washing machines washing machine 19 39 6 washing machine otherfurniture Furniture washing_machine 4554684 n04554684 washer.n.03 appliances 37 +188 shower door shower door 19 8 12 door door Wall door n04208936 shower.n.01 door 4 +1176 coffee kettle coffee kettle 18 40 7 pot otherprop Objects n03612814 kettle.n.01 objects 39 +1177 wardrobe cabinet wardrobe 18 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +1178 structure structure 18 38 7 otherstructure Objects misc 40 +18 bookshelves bookshelf 17 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +110 clothes dryer clothes dryer 17 39 6 otherfurniture Furniture n03251766 dryer.n.01 appliances 37 +148 toaster toaster 17 40 7 toaster otherprop Objects n04442312 toaster.n.02 appliances 37 +63 shoe shoe 17 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +155 ironing board ironing board 16 39 6 ironing board otherfurniture Furniture n03586090 ironing_board.n.01 objects 39 +572 alarm clock alarm clock 16 40 7 alarm clock otherprop Objects clock 3046257 n02694662 alarm_clock.n.01 objects 39 +1179 shower head shower head 15 38 7 otherstructure Objects shower 23 +28 lamp base lamp 15 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +392 water bottle water bottle 15 40 7 bottle otherprop Objects bottle bottle 2876657 n04557648 water_bottle.n.01 objects 39 +1180 keyboard piano keyboard piano 15 39 6 piano otherfurniture Furniture piano piano 3928116 n03928116 piano.n.01 furniture 36 +609 projector screen projector screen 15 38 7 projector screen otherstructure Objects misc 40 +1181 case of water bottles case of water bottles 15 40 7 otherprop Objects objects 39 +195 toaster oven toaster oven 14 40 7 toaster oven otherprop Objects n04442441 toaster_oven.n.01 appliances 37 +581 music stand music stand 14 39 6 music stand otherfurniture Furniture n03801760 music_stand.n.01 furniture 36 +58 staircase stairs 14 38 7 stairs otherstructure Objects n04298308 stairway.n.01 stairs 16 +1182 coat rack coat rack 14 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 3 +1183 storage organizer storage organizer 14 40 7 otherprop Objects shelving 3 +139 machine machine 14 40 7 machine otherprop Objects n03699975 machine.n.01 appliances 37 +1184 folded chair folded chair 14 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1185 fire alarm fire alarm 14 40 7 otherprop Objects n03343737 fire_alarm.n.02 misc 40 +156 fireplace fireplace 13 38 7 fireplace otherstructure Objects n03346455 fireplace.n.01 fireplace 27 +408 vent vent 13 40 7 otherprop Objects n04526241 vent.n.01 misc 40 +213 furniture furniture 13 39 6 furniture otherfurniture Furniture n03405725 furniture.n.01 furniture 36 +1186 power strip power strip 13 40 7 otherprop Objects objects 39 +1187 calendar calendar 13 40 7 otherprop Objects objects 39 +1188 poster poster 13 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +115 toilet paper holder toilet paper holder 13 40 7 toilet paper holder otherprop Objects objects 39 +1189 potted plant potted plant 12 40 7 plant otherprop Objects plant n00017222 plant.n.02 plant 14 +304 stuffed animal stuffed animal 12 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 objects 39 +1190 luggage luggage 12 40 7 luggage otherprop Objects n02774630 baggage.n.01 objects 39 +21 curtains curtain 12 16 13 curtain curtain Window curtain n03151077 curtain.n.01 curtain 12 +312 headphones headphones 12 40 7 otherprop Objects n03261776 earphone.n.01 objects 39 +233 crate crate 12 39 6 crate otherfurniture Furniture n03127925 crate.n.01 objects 39 +286 candle candle 12 40 7 candle otherprop Objects lamp n02948072 candle.n.01 objects 39 +264 projector projector 12 40 7 projector otherprop Objects n04009552 projector.n.02 objects 39 +110 clothes dryers clothes dryer 12 39 6 otherfurniture Furniture n03251766 dryer.n.01 appliances 37 +1191 mattress mattress 12 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +356 dustpan dustpan 12 40 7 otherprop Objects n03259009 dustpan.n.02 objects 39 +25 drawer drawer 11 39 6 drawer otherfurniture Furniture n03233905 drawer.n.01 furniture 36 +750 rod rod 11 40 7 otherprop Objects pistol 3948459 n03427202 gat.n.01 misc 40 +269 globe globe 11 40 7 globe otherprop Objects objects 39 +307 footrest footrest 11 39 6 foot rest otherfurniture Furniture stool n03380724 footstool.n.01 stool 19 +410 piano bench piano bench 11 39 6 piano bench otherfurniture Furniture bench bench 2828884 n02828884 bench.n.01 seating 34 +730 breakfast bar breakfast bar 11 38 7 bar otherstructure Objects counter 26 +216 step stool step stool 11 40 7 step stool otherprop Objects stool n04315713 step_stool.n.01 stool 19 +1192 hand rail hand rail 11 38 7 railing otherstructure Objects railing 30 +119 vending machine vending machine 11 40 7 machine otherprop Objects n04525305 vending_machine.n.01 appliances 37 +682 ceiling fan ceiling fan 11 40 7 fan otherprop Objects n03320046 fan.n.01 misc 40 +434 swiffer swiffer 11 40 7 otherprop Objects objects 39 +126 foosball table foosball table 11 39 6 foosball table otherfurniture Furniture table table table 4379243 n04379243 table.n.02 table 5 +919 jar jar 11 40 7 jar otherprop Objects jar 3593526 n03593526 jar.n.01 objects 39 +85 footstool footstool 11 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 stool 19 +1193 folded table folded table 10 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +108 round table round table 10 7 10 table table Table table table table 4379243 n04114554 round_table.n.02 table 5 +135 hamper hamper 10 40 7 basket otherprop Objects basket 2801938 n03482405 hamper.n.02 objects 39 +1194 poster tube poster tube 10 40 7 otherprop Objects objects 39 +432 case case 10 40 7 case otherprop Objects objects 39 +53 carpet carpet 10 40 7 rug otherprop Objects n04118021 rug.n.01 floor 2 +1195 thermostat thermostat 10 40 7 otherprop Objects n04422875 thermostat.n.01 misc 40 +111 coat coat 10 40 7 jacket otherprop Objects n03057021 coat.n.01 clothes 38 +305 water fountain water fountain 10 38 7 water fountain otherstructure Objects n03241335 drinking_fountain.n.01 misc 40 +1125 smoke detector smoke detector 10 40 7 otherprop Objects misc 40 +13 pillows pillow 9 18 7 pillow pillow Objects pillow 3938244 n03938244 pillow.n.01 cushion 8 +1196 flip flops flip flops 9 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +1197 cloth cloth 9 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +1198 banner banner 9 40 7 otherprop Objects n02788021 banner.n.01 misc 40 +1199 clothes hanger clothes hanger 9 40 7 otherprop Objects n03057920 coat_hanger.n.01 objects 39 +1200 whiteboard eraser whiteboard eraser 9 40 7 otherprop Objects objects 39 +378 iron iron 9 40 7 otherprop Objects n03584829 iron.n.04 objects 39 +591 instrument case instrument case 9 40 7 case otherprop Objects objects 39 +49 toilet paper rolls toilet paper 9 40 7 toilet paper otherprop Objects n15075141 toilet_tissue.n.01 objects 39 +92 soap soap 9 40 7 soap otherprop Objects n04253437 soap.n.01 objects 39 +1098 block block 9 40 7 otherprop Objects misc 40 +291 wall hanging wall hanging 8 40 7 otherprop Objects n03491178 hanging.n.01 picture 6 +1063 kitchen island kitchen island 8 38 7 kitchen island otherstructure Objects n03620600 kitchen_island.n.01 counter 26 +107 pipes pipe 8 38 7 otherstructure Objects misc 40 +1135 toothbrush toothbrush 8 40 7 toothbrush otherprop Objects n04453156 toothbrush.n.01 objects 39 +189 shirt shirt 8 40 7 otherprop Objects n04197391 shirt.n.01 clothes 38 +245 cutting board cutting board 8 40 7 cutting board otherprop Objects n03025513 chopping_board.n.01 objects 39 +194 vase vase 8 40 7 vase otherprop Objects vase jar 3593526 n04522168 vase.n.01 objects 39 +1201 shower control valve shower control valve 8 38 7 otherstructure Objects n04208936 shower.n.01 shower 23 +386 exercise machine exercise machine 8 40 7 machine otherprop Objects gym_equipment 33 +1202 compost bin compost bin 8 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +857 shorts shorts 8 40 7 shorts otherprop Objects clothes 38 +452 tire tire 8 40 7 otherprop Objects n04440749 tire.n.01 objects 39 +1203 teddy bear teddy bear 7 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 objects 39 +346 bathrobe bathrobe 7 40 7 otherprop Objects n02807616 bathrobe.n.01 clothes 38 +152 handrail handrail 7 38 7 railing otherstructure Objects n02788148 bannister.n.02 railing 30 +83 faucet faucet 7 40 7 faucet otherprop Objects faucet 3325088 n03325088 faucet.n.01 misc 40 +1204 pantry wall pantry wall 7 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +726 thermos thermos 7 40 7 flask otherprop Objects bottle bottle 2876657 n04422727 thermos.n.01 objects 39 +61 rug rug 7 40 7 rug otherprop Objects n04118021 rug.n.01 floor 2 +39 couch cushions cushion 7 18 7 pillow pillow Objects n03151500 cushion.n.03 cushion 8 +1117 tripod tripod 7 39 6 stand otherfurniture Furniture n04485082 tripod.n.01 objects 39 +540 mailbox mailbox 7 29 7 box box Objects mailbox 3710193 n03710193 mailbox.n.01 misc 40 +1205 tupperware tupperware 7 40 7 otherprop Objects objects 39 +415 shoe rack shoe rack 7 40 7 shoe rack otherprop Objects shelving 31 +31 towels towel 6 27 7 towel towel Objects n04459362 towel.n.01 towel 20 +1206 beer bottles beer bottle 6 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +153 treadmill treadmill 6 39 6 treadmill otherfurniture Furniture n04477387 treadmill.n.01 gym_equipment 33 +1207 salt salt 6 40 7 otherprop Objects objects 39 +129 chest chest 6 39 6 chest otherfurniture Furniture dresser dresser chest_of_drawers 13 +220 dispenser dispenser 6 40 7 otherprop Objects n03210683 dispenser.n.01 objects 39 +1208 mirror doors mirror door 6 8 12 door door Wall door n03221720 door.n.01 door 4 +231 remote remote 6 40 7 otherprop Objects remote_control 4074963 n04074963 remote_control.n.01 objects 39 +1209 folded ladder folded ladder 6 39 6 ladder otherfurniture Furniture stairs n03632277 ladder.n.01 misc 40 +39 cushion cushion 6 18 7 pillow pillow Objects n03151500 cushion.n.03 cushion 8 +1210 carton carton 6 40 7 otherprop Objects objects 39 +117 step step 6 38 7 otherstructure Objects n04314914 step.n.04 misc 40 +822 drying rack drying rack 6 39 6 drying rack otherfurniture Furniture shelving 31 +238 slippers slipper 6 40 7 shoe otherprop Objects n04241394 slipper.n.01 clothes 38 +143 pool table pool table 6 39 6 pool table otherfurniture Furniture table table table 4379243 n03982430 pool_table.n.01 table 5 +1211 soda stream soda stream 6 40 7 otherprop Objects objects 39 +228 toilet brush toilet brush 6 40 7 toilet brush otherprop Objects objects 39 +494 loft bed loft bed 6 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +226 cooking pot cooking pot 6 40 7 pot otherprop Objects objects 39 +91 heater heater 6 39 6 heater otherfurniture Furniture n03508101 heater.n.01 misc 40 +1072 messenger bag messenger bag 6 37 7 bag bag Objects objects 39 +435 stapler stapler 6 40 7 stapler otherprop Objects n04303497 stapler.n.01 objects 39 +1165 closet walls closet wall 5 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +345 scanner scanner 5 40 7 otherprop Objects appliances 37 +893 elliptical machine elliptical machine 5 40 7 machine otherprop Objects gym_equipment 33 +621 kettle kettle 5 40 7 pot otherprop Objects n03612814 kettle.n.01 objects 39 +1212 metronome metronome 5 40 7 otherprop Objects n03757604 metronome.n.01 objects 39 +297 dumbell dumbell 5 40 7 otherprop Objects objects 39 +1213 music book music book 5 23 2 book books Books n02870526 book.n.11 objects 39 +1214 rice cooker rice cooker 5 40 7 otherprop Objects objects 39 +1215 dart board dart board 5 38 7 board otherstructure Objects n03162940 dartboard.n.01 objects 39 +529 sewing machine sewing machine 5 40 7 sewing machine otherprop Objects n04179913 sewing_machine.n.01 objects 39 +1216 grab bar grab bar 5 38 7 railing otherstructure Objects railing 30 +1217 flowerpot flowerpot 5 40 7 vase otherprop Objects vase jar 3593526 n04522168 vase.n.01 objects 39 +1218 painting painting 5 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +1219 railing railing 5 38 7 railing otherstructure Objects n04047401 railing.n.01 railing 30 +1220 stair stair 5 38 7 stairs otherstructure Objects stairs n04314914 step.n.04 stairs 16 +525 toolbox toolbox 5 39 6 chest otherfurniture Furniture n04452615 toolbox.n.01 objects 39 +204 nerf gun nerf gun 5 40 7 otherprop Objects objects 39 +693 binders binder 5 40 7 binder otherprop Objects objects 39 +179 desk lamp desk lamp 5 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +1221 quadcopter quadcopter 5 40 7 otherprop Objects objects 39 +1222 pitcher pitcher 5 40 7 pitcher otherprop Objects n03950228 pitcher.n.02 objects 39 +1223 hanging hanging 5 40 7 otherprop Objects misc 40 +1224 mail mail 5 40 7 otherprop Objects misc 40 +1225 closet ceiling closet ceiling 5 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 ceiling 17 +1226 hoverboard hoverboard 5 40 7 otherprop Objects objects 39 +1227 beanbag chair beanbag chair 5 39 6 bean bag otherfurniture Furniture n02816656 beanbag.n.01 chair 3 +571 water heater water heater 5 40 7 water heater otherprop Objects n04560113 water_heater.n.01 misc 40 +1228 spray bottle spray bottle 5 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +556 rope rope 5 40 7 rope otherprop Objects n04108268 rope.n.01 objects 39 +280 plastic container plastic container 5 40 7 container otherprop Objects objects 39 +1229 soap bottle soap bottle 5 40 7 soap otherprop Objects objects 39 +1230 ikea bag ikea bag 4 37 7 bag bag Objects 2773838 n02773838 bag.n.06 objects 39 +1231 sleeping bag sleeping bag 4 40 7 otherprop Objects n04235860 sleeping_bag.n.01 objects 39 +1232 duffel bag duffel bag 4 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +746 frying pan frying pan 4 40 7 frying pan otherprop Objects n03400231 frying_pan.n.01 objects 39 +1233 oven mitt oven mitt 4 40 7 otherprop Objects objects 39 +1234 pot pot 4 40 7 pot otherprop Objects n04235860 sleeping_bag.n.01 objects 39 +144 hand dryer hand dryer 4 40 7 otherprop Objects objects 39 +282 dollhouse dollhouse 4 39 6 doll house otherfurniture Furniture n03219483 dollhouse.n.01 objects 39 +167 shampoo bottle shampoo bottle 4 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1235 hair brush hair brush 4 40 7 otherprop Objects n02908217 brush.n.02 objects 39 +1236 tennis racket tennis racket 4 40 7 otherprop Objects n04409806 tennis_racket.n.01 objects 39 +1237 display case display case 4 40 7 case otherprop Objects objects 39 +234 ping pong table ping pong table 4 39 6 ping pong table otherfurniture Furniture table table table 4379243 n04379243 table.n.02 table 5 +563 boiler boiler 4 40 7 otherprop Objects misc 40 +1238 bag of coffee beans bag of coffee beans 4 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +1239 bananas banana 4 40 7 otherprop Objects n00021265 food.n.01 objects 39 +1240 carseat carseat 4 40 7 otherprop Objects misc 40 +366 helmet helmet 4 40 7 otherprop Objects helmet 3513137 n03513137 helmet.n.02 clothes 38 +816 umbrella umbrella 4 40 7 umbrella otherprop Objects n04507155 umbrella.n.01 objects 39 +1241 coffee box coffee box 4 40 7 otherprop Objects objects 39 +719 envelope envelope 4 40 7 envelope otherprop Objects n03291819 envelope.n.01 objects 39 +284 wet floor sign wet floor sign 4 40 7 sign otherprop Objects misc 40 +1242 clothing rack clothing rack 4 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +247 controller controller 4 40 7 otherprop Objects n03096960 control.n.09 objects 39 +1243 bath walls bathroom wall 4 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +1244 podium podium 4 39 6 otherfurniture Furniture n03159640 dais.n.01 furniture 36 +1245 storage box storage box 4 29 7 box box Objects n02883344 box.n.01 objects 39 +1246 dolly dolly 4 40 7 otherprop Objects misc 40 +1247 shampoo shampoo 3 40 7 otherprop Objects n04183516 shampoo.n.01 objects 39 +592 paper tray paper tray 3 40 7 paper tray otherprop Objects objects 39 +385 cabinet door cabinet door 3 8 12 door door Wall door door 4 +1248 changing station changing station 3 40 7 otherprop Objects misc 40 +1249 poster printer poster printer 3 40 7 printer otherprop Objects printer 4004475 n04004475 printer.n.03 appliances 37 +133 screen screen 3 40 7 otherprop Objects n03151077 curtain.n.01 curtain 12 +301 soap bar soap bar 3 38 7 bar otherstructure Objects objects 39 +1250 crutches crutches 3 40 7 otherprop Objects n03141823 crutch.n.01 objects 39 +379 studio light studio light 3 38 7 light otherstructure Objects lighting 28 +130 stack of cups cup 3 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +1251 toilet flush button toilet flush button 3 40 7 otherprop Objects objects 39 +450 trunk trunk 3 40 7 otherprop Objects misc 40 +1252 grocery bag grocery bag 3 37 7 bag bag Objects suitcase 2773838 n03461288 grocery_bag.n.01 objects 39 +316 plastic bin plastic bin 3 40 7 bin otherprop Objects objects 39 +1253 pizza box pizza box 3 29 7 box box Objects objects 39 +385 cabinet doors cabinet door 3 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 door 4 +1254 legs legs 3 31 7 person person Objects person n05217688 person.n.02 misc 40 +461 car car 3 40 7 car otherprop Objects car car 2958343 n02958343 car.n.01 misc 40 +1255 shaving cream shaving cream 3 40 7 otherprop Objects n04186051 shaving_cream.n.01 objects 39 +1256 luggage stand luggage stand 3 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +599 shredder shredder 3 40 7 otherprop Objects n04210120 shredder.n.01 objects 39 +281 statue statue 3 40 7 sculpture otherprop Objects n04306847 statue.n.01 misc 40 +1257 urinal urinal 3 33 7 toilet toilet Objects toilet toilet n04515991 urinal.n.01 toilet 18 +1258 hose hose 3 40 7 otherprop Objects n03539875 hose.n.03 misc 40 +1259 bike pump bike pump 3 40 7 otherprop Objects objects 39 +319 coatrack coatrack 3 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31 +1260 bear bear 3 40 7 otherprop Objects objects 39 +28 wall lamp lamp 3 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +1261 humidifier humidifier 3 40 7 otherprop Objects objects 39 +546 toothpaste toothpaste 3 40 7 toothpaste otherprop Objects objects 39 +1262 mouthwash bottle mouthwash bottle 3 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1263 poster cutter poster cutter 3 40 7 otherprop Objects objects 39 +1264 golf bag golf bag 3 37 7 bag bag Objects suitcase 2773838 n03445617 golf_bag.n.01 objects 39 +1265 food container food container 3 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1266 camera camera 3 40 7 otherprop Objects objects 39 +28 table lamp lamp 3 35 7 lamp lamp Objects lamp lamp 3636649 n04380533 table_lamp.n.01 lighting 28 +1267 yoga mat yoga mat 3 20 5 floor mat floor mat Floor n03727837 mat.n.01 floor 2 +1268 card card 3 40 7 otherprop Objects objects 39 +1269 mug mug 3 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +188 shower doors shower door 3 38 7 otherstructure Objects n04208936 shower.n.01 door 4 +689 cardboard cardboard 3 40 7 otherprop Objects objects 39 +1270 rack stand rack stand 3 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +1271 boxes of paper boxes of paper 3 29 7 box box Objects n02883344 box.n.01 objects 39 +1272 flag flag 3 40 7 otherprop Objects misc 40 +354 futon futon 3 39 6 mattress otherfurniture Furniture n03408444 futon.n.01 sofa 10 +339 magazine magazine 3 40 7 magazine otherprop Objects n06595351 magazine.n.01 objects 39 +1009 exit sign exit sign 3 40 7 exit sign otherprop Objects misc 40 +1273 rolled poster rolled poster 3 40 7 otherprop Objects objects 39 +1274 wheel wheel 3 40 7 otherprop Objects objects 39 +15 pictures picture 3 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +1275 blackboard eraser blackboard eraser 3 40 7 eraser otherprop Objects n03294833 eraser.n.01 objects 39 +361 organizer organizer 3 40 7 otherprop Objects n03918737 personal_digital_assistant.n.01 objects 39 +1276 doll doll 3 40 7 toy otherprop Objects n03219135 doll.n.01 objects 39 +326 book rack book rack 3 39 6 bookrack otherfurniture Furniture objects 39 +1277 laundry bag laundry bag 3 40 7 laundry basket otherprop Objects basket 2801938 n03050864 clothes_hamper.n.01 objects 39 +1278 sponge sponge 3 40 7 otherprop Objects n01906749 sponge.n.04 objects 39 +116 seating seat 3 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 furniture 36 +1184 folded chairs folded chair 2 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1279 lotion bottle lotion bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +212 can can 2 40 7 can otherprop Objects can 2946921 n02946921 can.n.01 objects 39 +1280 lunch box lunch box 2 40 7 otherprop Objects objects 39 +1281 food display food display 2 40 7 otherprop Objects misc 40 +794 storage shelf storage shelf 2 40 7 otherprop Objects shelving 31 +1282 sliding wood door sliding wood door 2 40 7 otherprop Objects door 4 +955 pants pants 2 40 7 otherprop Objects n04489008 trouser.n.01 clothes 38 +387 wood wood 2 40 7 otherprop Objects misc 40 +69 boards board 2 38 7 board otherstructure Objects board_panel 35 +65 bottles bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +523 washcloth washcloth 2 40 7 otherprop Objects n04554523 washcloth.n.01 towel 20 +389 workbench workbench 2 39 6 bench otherfurniture Furniture bench table 4379243 n04600486 workbench.n.01 table 5 +29 open kitchen cabinet kitchen cabinet 2 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 cabinet 7 +1283 organizer shelf organizer shelf 2 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +146 frame frame 2 38 7 otherstructure Objects misc 40 +130 cups cup 2 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +372 exercise ball exercise ball 2 40 7 ball otherprop Objects n04285146 sports_equipment.n.01 gym_equipment 33 +289 easel easel 2 39 6 stand otherfurniture Furniture n03262809 easel.n.01 furniture 36 +440 garbage bag garbage bag 2 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +321 roomba roomba 2 40 7 otherprop Objects objects 39 +976 garage door garage door 2 38 7 garage door otherstructure Objects door door 4 +1256 luggage rack luggage stand 2 39 6 stand otherfurniture Furniture n04038440 shelving 31 +1284 bike lock bike lock 2 40 7 otherprop Objects objects 39 +1285 briefcase briefcase 2 40 7 otherprop Objects n02900705 briefcase.n.01 objects 39 +357 hand towel hand towel 2 27 7 towel towel Objects n03490006 hand_towel.n.01 towel 20 +1286 bath products bath product 2 40 7 otherprop Objects objects 39 +1287 star star 2 40 7 otherprop Objects n09444783 star.n.03 misc 40 +365 map map 2 40 7 map otherprop Objects n03720163 map.n.01 misc 40 +1288 coffee bean bag coffee bean bag 2 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +81 headboard headboard 2 39 6 headboard otherfurniture Furniture n03502200 headboard.n.01 bed 11 +1289 ipad ipad 2 40 7 otherprop Objects objects 39 +1290 display rack display rack 2 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +948 traffic cone traffic cone 2 40 7 cone otherprop Objects cone objects 39 +174 toiletry toiletry 2 40 7 otherprop Objects n04447443 toiletry.n.01 objects 39 +1028 canopy canopy 2 40 7 otherprop Objects misc 40 +1291 massage chair massage chair 2 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1292 paper organizer paper organizer 2 40 7 otherprop Objects objects 39 +1005 barricade barricade 2 40 7 otherprop Objects misc 40 +235 platform platform 2 38 7 otherstructure Objects misc 40 +1293 cap cap 2 40 7 hat otherprop Objects n03497657 hat.n.01 clothes 38 +1294 dumbbell plates dumbbell plates 2 40 7 otherprop Objects objects 39 +1295 elevator elevator 2 38 7 otherstructure Objects misc 40 +1296 cooking pan cooking pan 2 40 7 pan otherprop Objects n03880531 pan.n.01 objects 39 +1297 trash bag trash bag 2 37 7 bag bag Objects objects 39 +1298 santa santa 2 40 7 otherprop Objects misc 40 +1299 jewelry box jewelry box 2 29 7 box box Objects n02883344 box.n.01 objects 39 +1300 boat boat 2 40 7 otherprop Objects misc 40 +1301 sock sock 2 21 7 clothes clothes Objects n04254777 sock.n.01 clothes 38 +1051 kinect kinect 2 40 7 kinect otherprop Objects objects 39 +566 crib crib 2 39 6 crib otherfurniture Furniture furniture 36 +1302 plastic storage bin plastic storage bin 2 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1062 cooler cooler 2 24 6 refridgerator refridgerator Furniture n03102654 cooler.n.01 appliances 37 +1303 kitchen apron kitchen apron 2 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +1304 dishwashing soap bottle dishwashing soap bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1305 xbox controller xbox controller 2 40 7 otherprop Objects objects 39 +1306 banana holder banana holder 2 40 7 otherprop Objects objects 39 +298 ping pong paddle ping pong paddle 2 40 7 otherprop Objects table 5 +1307 airplane airplane 2 40 7 otherprop Objects misc 40 +1308 conditioner bottle conditioner bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1309 tea kettle tea kettle 2 40 7 tea kettle otherprop Objects n04397768 teakettle.n.01 objects 39 +43 bedframe bedframe 2 39 6 otherfurniture Furniture n02822579 bedstead.n.01 bed 11 +1310 wood beam wood beam 2 38 7 otherstructure Objects beam 29 +593 toilet paper package toilet paper package 2 40 7 otherprop Objects objects 39 +1311 wall mounted coat rack wall mounted coat rack 2 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31 +1312 film light film light 2 40 7 otherprop Objects lighting 28 +749 ceiling lamp ceiling lamp 1 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +623 chain chain 1 40 7 otherprop Objects chair 3 +1313 sofa sofa 1 6 9 sofa sofa Sofa sofa sofa sofa 4256520 n04256520 sofa.n.01 sofa 10 +99 closet wardrobe wardrobe 1 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +265 sweater sweater 1 40 7 otherprop Objects n04370048 sweater.n.01 clothes 38 +1314 kitchen mixer kitchen mixer 1 40 7 otherprop Objects appliances 37 +99 wardrobe wardrobe 1 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +1315 water softener water softener 1 40 7 otherprop Objects misc 40 +448 banister banister 1 38 7 banister otherstructure Objects n02788148 bannister.n.02 railing 30 +257 trolley trolley 1 40 7 trolley otherprop Objects n04335435 streetcar.n.01 misc 40 +1316 pantry shelf pantry shelf 1 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +786 sofa bed sofa bed 1 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +801 loofa loofa 1 40 7 otherprop Objects objects 39 +972 shower faucet handle shower faucet handle 1 40 7 handle otherprop Objects shower 23 +1317 toy piano toy piano 1 40 7 toy otherprop Objects n03964744 plaything.n.01 objects 39 +1318 fish fish 1 40 7 otherprop Objects n02512053 fish.n.01 objects 39 +75 file cabinets file cabinet 1 3 6 cabinet cabinet Furniture cabinet 2933112 n03337140 file.n.03 cabinet 7 +657 cat litter box cat litter box 1 29 7 box box Objects objects 39 +561 electric panel electric panel 1 40 7 otherprop Objects misc 40 +93 suitcases suitcase 1 40 7 luggage otherprop Objects n02774630 baggage.n.01 objects 39 +513 curtain rod curtain rod 1 38 7 curtain rod otherstructure Objects curtain 12 +411 bunk bed bunk bed 1 39 6 bunk bed otherfurniture Furniture bed bed bed 2818832 n02920259 bunk_bed.n.01 bed 11 +1122 chandelier chandelier 1 38 7 chandelier otherstructure Objects n03005285 chandelier.n.01 lighting 28 +922 tape tape 1 40 7 tape otherprop Objects objects 39 +88 plates plate 1 40 7 otherprop Objects n03959485 plate.n.04 objects 39 +518 alarm alarm 1 40 7 alarm otherprop Objects clock 3046257 n02694662 alarm_clock.n.01 objects 39 +814 fire hose fire hose 1 40 7 otherprop Objects n03346004 fire_hose.n.01 misc 40 +1319 toy dinosaur toy dinosaur 1 40 7 toy otherprop Objects n03964744 plaything.n.01 objects 39 +1320 cone cone 1 40 7 otherprop Objects objects 39 +649 glass doors glass door 1 8 12 door door Wall door n03221720 door.n.01 door 4 +607 hatrack hatrack 1 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31 +819 subwoofer subwoofer 1 40 7 speaker otherprop Objects speaker 3691459 n04349401 subwoofer.n.01 objects 39 +1321 fire sprinkler fire sprinkler 1 40 7 otherprop Objects misc 40 +1322 trash cabinet trash cabinet 1 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +1204 pantry walls pantry wall 1 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +227 photo photo 1 40 7 photo otherprop Objects n03925226 photograph.n.01 picture 6 +817 barrier barrier 1 40 7 otherprop Objects n02796623 barrier.n.01 misc 40 +130 stacks of cups cup 1 40 7 otherprop Objects n03147509 cup.n.01 objects 39 +712 beachball beachball 1 40 7 ball otherprop Objects n02814224 beach_ball.n.01 objects 39 +1323 folded boxes folded boxes 1 40 7 otherprop Objects objects 39 +1324 contact lens solution bottle contact lens solution bottle 1 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +673 covered box covered box 1 29 7 box box Objects objects 39 +459 folder folder 1 40 7 folder otherprop Objects n03376279 folder.n.02 objects 39 +643 mail trays mail tray 1 40 7 mail tray otherprop Objects objects 39 +238 slipper slipper 1 40 7 otherprop Objects n04241394 slipper.n.01 clothes 38 +765 magazine rack magazine rack 1 39 6 stand otherfurniture Furniture n03704549 magazine_rack.n.01 shelving 31 +1008 sticker sticker 1 40 7 sticker otherprop Objects n07272545 gummed_label.n.01 objects 39 +225 lotion lotion 1 40 7 otherprop Objects n03690938 lotion.n.01 objects 39 +1083 buddha buddha 1 40 7 otherprop Objects objects 39 +813 file organizer file organizer 1 40 7 otherprop Objects objects 39 +138 paper towel rolls paper towel roll 1 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20 +1145 night lamp night lamp 1 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +796 fuse box fuse box 1 40 7 otherprop Objects misc 40 +1325 knife block knife block 1 40 7 otherprop Objects objects 39 +363 furnace furnace 1 39 6 furnace otherfurniture Furniture n03404449 furnace.n.01 +1174 cd cases cd case 1 40 7 otherprop Objects objects 39 +38 stools stool 1 40 7 stool otherprop Objects stool n04326896 stool.n.01 stool 19 +1326 hand sanitzer dispenser hand sanitzer dispenser 1 40 7 otherprop Objects n04254120 soap_dispenser.n.01 objects 39 +997 teapot teapot 1 40 7 tea pot otherprop Objects n04398044 teapot.n.01 objects 39 +1327 pen holder pen holder 1 40 7 otherprop Objects objects 39 +1328 tray rack tray rack 1 40 7 otherprop Objects objects 39 +1329 wig wig 1 40 7 otherprop Objects n04584207 wig.n.01 objects 39 +182 switch switch 1 40 7 otherprop Objects n04372370 switch.n.01 misc 40 +280 plastic containers plastic container 1 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1330 night light night light 1 40 7 otherprop Objects lighting 28 +1331 notepad notepad 1 40 7 otherprop Objects objects 39 +1332 mail bin mail bin 1 40 7 otherprop Objects misc 40 +1333 elevator button elevator button 1 40 7 otherprop Objects misc 40 +939 gaming wheel gaming wheel 1 40 7 otherprop Objects objects 39 +1334 drum set drum set 1 40 7 otherprop Objects objects 39 +480 cosmetic bag cosmetic bag 1 37 7 bag bag Objects objects 39 +907 coffee mug coffee mug 1 40 7 vessel otherprop Objects cup or mug 3797390 n03063599 coffee_mug.n.01 objects 39 +1335 closet shelf closet shelf 1 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +1336 baby mobile baby mobile 1 40 7 otherprop Objects objects 39 +829 diaper bin diaper bin 1 40 7 bin otherprop Objects objects 39 +947 door wall door wall 1 1 12 wall wall Wall wall 1 +1116 stepstool stepstool 1 40 7 step stool otherprop Objects objects 39 +599 paper shredder shredder 1 40 7 otherprop Objects n04210120 shredder.n.01 objects 39 +733 dress rack dress rack 1 40 7 otherprop Objects n03238762 dress_rack.n.01 misc 40 +123 cover cover 1 40 7 blanket otherprop Objects objects 39 +506 shopping bag shopping bag 1 37 7 bag bag Objects n04204081 shopping_bag.n.01 objects 39 +569 sliding door sliding door 1 8 12 door door Wall door n04239074 sliding_door.n.01 door 4 +1337 exercise bike exercise bike 1 40 7 machine otherprop Objects n04210120 shredder.n.01 gym_equipment 33 +1338 recliner chair recliner chair 1 5 4 chair chair Chair chair chair chair 3001627 n03238762 dress_rack.n.01 chair 3 +1314 kitchenaid mixer kitchen mixer 1 40 7 otherprop Objects appliances 37 +1339 soda can soda can 1 40 7 can otherprop Objects can 2946921 n02946921 can.n.01 objects 39 +1340 stovetop stovetop 1 38 7 stove otherstructure Objects stove 4330267 n04330267 stove.n.02 appliances 37 +851 stepladder stepladder 1 39 6 ladder otherfurniture Furniture stairs n04315599 step_ladder.n.01 stairs 16 +142 tap tap 1 40 7 faucet otherprop Objects faucet 3325088 n04559451 water_faucet.n.01 objects 39 +436 cable cable 1 40 7 cables otherprop Objects objects 39 +1341 baby changing station baby changing station 1 39 6 otherfurniture Furniture furniture 36 +1342 costume costume 1 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +885 rocking chair rocking chair 1 5 4 chair chair Chair chair chair chair 3001627 n04099969 rocking_chair.n.01 chair 3 +693 binder binder 1 40 7 binder otherprop Objects objects 39 +815 media center media center 1 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +401 towel rack towel rack 1 40 7 otherprop Objects n04459773 towel_rack.n.01 misc 40 +1343 medal medal 1 40 7 otherprop Objects objects 39 +1184 stack of folded chairs folded chair 1 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1344 telescope telescope 1 40 7 otherprop Objects n04403638 telescope.n.01 objects 39 +1345 closet doorframe closet doorframe 1 8 12 door door Wall door door 4 +160 glass glass 1 38 7 glass otherstructure Objects n03438257 glass.n.02 misc 40 +1126 baseball cap baseball cap 1 40 7 otherprop Objects cap 2954340 n02799323 baseball_cap.n.01 clothes 38 +1346 battery disposal jar battery disposal jar 1 40 7 jar otherprop Objects jar 3593526 n03593526 jar.n.01 objects 39 +332 mop mop 1 40 7 otherprop Objects n04367480 swab.n.02 objects 39 +397 tank tank 1 40 7 otherprop Objects objects 39 +643 mail tray mail tray 1 40 7 mail tray otherprop Objects objects 39 +551 centerpiece centerpiece 1 40 7 centerpiece otherprop Objects n02994419 centerpiece.n.02 objects 39 +1163 object stick 1 40 7 stick otherprop Objects objects 39 +1347 closet floor closet floor 1 2 5 floor floor Floor n03365592 floor.n.01 floor 2 +1348 dryer sheets dryer sheets 1 40 7 otherprop Objects objects 39 +803 bycicle bycicle 1 40 7 otherprop Objects misc 40 +484 flower stand flower stand 1 39 6 stand otherfurniture Furniture furniture 36 +1349 air mattress air mattress 1 4 1 bed bed Bed bed bed bed 2818832 n02690809 air_mattress.n.01 bed 11 +1350 clip clip 1 40 7 otherprop Objects objects 39 +222 side table side table 1 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +1253 pizza boxes pizza box 1 29 7 box box Objects n02883344 box.n.01 objects 39 +1351 display display 1 39 7 otherfurniture Furniture n03211117 display.n.06 misc 40 +1352 postcard postcard 1 40 7 otherprop Objects objects 39 +828 display sign display sign 1 40 7 sign otherprop Objects misc 40 +1353 paper towel paper towel 1 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20 +612 boots boot 1 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +1354 tennis racket bag tennis racket bag 1 40 7 otherprop Objects objects 39 +1355 air hockey table air hockey table 1 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +1301 socks sock 1 21 7 clothes clothes Objects n04254777 sock.n.01 clothes 38 +1356 food bag food bag 1 37 7 bag bag Objects objects 39 +1199 clothes hangers clothes hanger 1 40 7 otherprop Objects n03057920 coat_hanger.n.01 misc 40 +1357 starbucks cup starbucks cup 1 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 \ No newline at end of file diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_test.txt b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_test.txt new file mode 100644 index 0000000..79d15b0 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_test.txt @@ -0,0 +1,100 @@ +scene0707_00 +scene0708_00 +scene0709_00 +scene0710_00 +scene0711_00 +scene0712_00 +scene0713_00 +scene0714_00 +scene0715_00 +scene0716_00 +scene0717_00 +scene0718_00 +scene0719_00 +scene0720_00 +scene0721_00 +scene0722_00 +scene0723_00 +scene0724_00 +scene0725_00 +scene0726_00 +scene0727_00 +scene0728_00 +scene0729_00 +scene0730_00 +scene0731_00 +scene0732_00 +scene0733_00 +scene0734_00 +scene0735_00 +scene0736_00 +scene0737_00 +scene0738_00 +scene0739_00 +scene0740_00 +scene0741_00 +scene0742_00 +scene0743_00 +scene0744_00 +scene0745_00 +scene0746_00 +scene0747_00 +scene0748_00 +scene0749_00 +scene0750_00 +scene0751_00 +scene0752_00 +scene0753_00 +scene0754_00 +scene0755_00 +scene0756_00 +scene0757_00 +scene0758_00 +scene0759_00 +scene0760_00 +scene0761_00 +scene0762_00 +scene0763_00 +scene0764_00 +scene0765_00 +scene0766_00 +scene0767_00 +scene0768_00 +scene0769_00 +scene0770_00 +scene0771_00 +scene0772_00 +scene0773_00 +scene0774_00 +scene0775_00 +scene0776_00 +scene0777_00 +scene0778_00 +scene0779_00 +scene0780_00 +scene0781_00 +scene0782_00 +scene0783_00 +scene0784_00 +scene0785_00 +scene0786_00 +scene0787_00 +scene0788_00 +scene0789_00 +scene0790_00 +scene0791_00 +scene0792_00 +scene0793_00 +scene0794_00 +scene0795_00 +scene0796_00 +scene0797_00 +scene0798_00 +scene0799_00 +scene0800_00 +scene0801_00 +scene0802_00 +scene0803_00 +scene0804_00 +scene0805_00 +scene0806_00 diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_train.txt b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_train.txt new file mode 100644 index 0000000..ef625f1 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_train.txt @@ -0,0 +1,1201 @@ +scene0191_00 +scene0191_01 +scene0191_02 +scene0119_00 +scene0230_00 +scene0528_00 +scene0528_01 +scene0705_00 +scene0705_01 +scene0705_02 +scene0415_00 +scene0415_01 +scene0415_02 +scene0007_00 +scene0141_00 +scene0141_01 +scene0141_02 +scene0515_00 +scene0515_01 +scene0515_02 +scene0447_00 +scene0447_01 +scene0447_02 +scene0531_00 +scene0503_00 +scene0285_00 +scene0069_00 +scene0584_00 +scene0584_01 +scene0584_02 +scene0581_00 +scene0581_01 +scene0581_02 +scene0620_00 +scene0620_01 +scene0263_00 +scene0263_01 +scene0481_00 +scene0481_01 +scene0020_00 +scene0020_01 +scene0291_00 +scene0291_01 +scene0291_02 +scene0469_00 +scene0469_01 +scene0469_02 +scene0659_00 +scene0659_01 +scene0024_00 +scene0024_01 +scene0024_02 +scene0564_00 +scene0117_00 +scene0027_00 +scene0027_01 +scene0027_02 +scene0028_00 +scene0330_00 +scene0418_00 +scene0418_01 +scene0418_02 +scene0233_00 +scene0233_01 +scene0673_00 +scene0673_01 +scene0673_02 +scene0673_03 +scene0673_04 +scene0673_05 +scene0585_00 +scene0585_01 +scene0362_00 +scene0362_01 +scene0362_02 +scene0362_03 +scene0035_00 +scene0035_01 +scene0358_00 +scene0358_01 +scene0358_02 +scene0037_00 +scene0194_00 +scene0321_00 +scene0293_00 +scene0293_01 +scene0623_00 +scene0623_01 +scene0592_00 +scene0592_01 +scene0569_00 +scene0569_01 +scene0413_00 +scene0313_00 +scene0313_01 +scene0313_02 +scene0480_00 +scene0480_01 +scene0401_00 +scene0517_00 +scene0517_01 +scene0517_02 +scene0032_00 +scene0032_01 +scene0613_00 +scene0613_01 +scene0613_02 +scene0306_00 +scene0306_01 +scene0052_00 +scene0052_01 +scene0052_02 +scene0053_00 +scene0444_00 +scene0444_01 +scene0055_00 +scene0055_01 +scene0055_02 +scene0560_00 +scene0589_00 +scene0589_01 +scene0589_02 +scene0610_00 +scene0610_01 +scene0610_02 +scene0364_00 +scene0364_01 +scene0383_00 +scene0383_01 +scene0383_02 +scene0006_00 +scene0006_01 +scene0006_02 +scene0275_00 +scene0451_00 +scene0451_01 +scene0451_02 +scene0451_03 +scene0451_04 +scene0451_05 +scene0135_00 +scene0065_00 +scene0065_01 +scene0065_02 +scene0104_00 +scene0674_00 +scene0674_01 +scene0448_00 +scene0448_01 +scene0448_02 +scene0502_00 +scene0502_01 +scene0502_02 +scene0440_00 +scene0440_01 +scene0440_02 +scene0071_00 +scene0072_00 +scene0072_01 +scene0072_02 +scene0509_00 +scene0509_01 +scene0509_02 +scene0649_00 +scene0649_01 +scene0602_00 +scene0694_00 +scene0694_01 +scene0101_00 +scene0101_01 +scene0101_02 +scene0101_03 +scene0101_04 +scene0101_05 +scene0218_00 +scene0218_01 +scene0579_00 +scene0579_01 +scene0579_02 +scene0039_00 +scene0039_01 +scene0493_00 +scene0493_01 +scene0242_00 +scene0242_01 +scene0242_02 +scene0083_00 +scene0083_01 +scene0127_00 +scene0127_01 +scene0662_00 +scene0662_01 +scene0662_02 +scene0018_00 +scene0087_00 +scene0087_01 +scene0087_02 +scene0332_00 +scene0332_01 +scene0332_02 +scene0628_00 +scene0628_01 +scene0628_02 +scene0134_00 +scene0134_01 +scene0134_02 +scene0238_00 +scene0238_01 +scene0092_00 +scene0092_01 +scene0092_02 +scene0092_03 +scene0092_04 +scene0022_00 +scene0022_01 +scene0467_00 +scene0392_00 +scene0392_01 +scene0392_02 +scene0424_00 +scene0424_01 +scene0424_02 +scene0646_00 +scene0646_01 +scene0646_02 +scene0098_00 +scene0098_01 +scene0044_00 +scene0044_01 +scene0044_02 +scene0510_00 +scene0510_01 +scene0510_02 +scene0571_00 +scene0571_01 +scene0166_00 +scene0166_01 +scene0166_02 +scene0563_00 +scene0172_00 +scene0172_01 +scene0388_00 +scene0388_01 +scene0215_00 +scene0215_01 +scene0252_00 +scene0287_00 +scene0668_00 +scene0572_00 +scene0572_01 +scene0572_02 +scene0026_00 +scene0224_00 +scene0113_00 +scene0113_01 +scene0551_00 +scene0381_00 +scene0381_01 +scene0381_02 +scene0371_00 +scene0371_01 +scene0460_00 +scene0118_00 +scene0118_01 +scene0118_02 +scene0417_00 +scene0008_00 +scene0634_00 +scene0521_00 +scene0123_00 +scene0123_01 +scene0123_02 +scene0045_00 +scene0045_01 +scene0511_00 +scene0511_01 +scene0114_00 +scene0114_01 +scene0114_02 +scene0070_00 +scene0029_00 +scene0029_01 +scene0029_02 +scene0129_00 +scene0103_00 +scene0103_01 +scene0002_00 +scene0002_01 +scene0132_00 +scene0132_01 +scene0132_02 +scene0124_00 +scene0124_01 +scene0143_00 +scene0143_01 +scene0143_02 +scene0604_00 +scene0604_01 +scene0604_02 +scene0507_00 +scene0105_00 +scene0105_01 +scene0105_02 +scene0428_00 +scene0428_01 +scene0311_00 +scene0140_00 +scene0140_01 +scene0182_00 +scene0182_01 +scene0182_02 +scene0142_00 +scene0142_01 +scene0399_00 +scene0399_01 +scene0012_00 +scene0012_01 +scene0012_02 +scene0060_00 +scene0060_01 +scene0370_00 +scene0370_01 +scene0370_02 +scene0310_00 +scene0310_01 +scene0310_02 +scene0661_00 +scene0650_00 +scene0152_00 +scene0152_01 +scene0152_02 +scene0158_00 +scene0158_01 +scene0158_02 +scene0482_00 +scene0482_01 +scene0600_00 +scene0600_01 +scene0600_02 +scene0393_00 +scene0393_01 +scene0393_02 +scene0562_00 +scene0174_00 +scene0174_01 +scene0157_00 +scene0157_01 +scene0161_00 +scene0161_01 +scene0161_02 +scene0159_00 +scene0254_00 +scene0254_01 +scene0115_00 +scene0115_01 +scene0115_02 +scene0162_00 +scene0163_00 +scene0163_01 +scene0523_00 +scene0523_01 +scene0523_02 +scene0459_00 +scene0459_01 +scene0175_00 +scene0085_00 +scene0085_01 +scene0279_00 +scene0279_01 +scene0279_02 +scene0201_00 +scene0201_01 +scene0201_02 +scene0283_00 +scene0456_00 +scene0456_01 +scene0429_00 +scene0043_00 +scene0043_01 +scene0419_00 +scene0419_01 +scene0419_02 +scene0368_00 +scene0368_01 +scene0348_00 +scene0348_01 +scene0348_02 +scene0442_00 +scene0178_00 +scene0380_00 +scene0380_01 +scene0380_02 +scene0165_00 +scene0165_01 +scene0165_02 +scene0181_00 +scene0181_01 +scene0181_02 +scene0181_03 +scene0333_00 +scene0614_00 +scene0614_01 +scene0614_02 +scene0404_00 +scene0404_01 +scene0404_02 +scene0185_00 +scene0126_00 +scene0126_01 +scene0126_02 +scene0519_00 +scene0236_00 +scene0236_01 +scene0189_00 +scene0075_00 +scene0267_00 +scene0192_00 +scene0192_01 +scene0192_02 +scene0281_00 +scene0420_00 +scene0420_01 +scene0420_02 +scene0195_00 +scene0195_01 +scene0195_02 +scene0597_00 +scene0597_01 +scene0597_02 +scene0041_00 +scene0041_01 +scene0111_00 +scene0111_01 +scene0111_02 +scene0666_00 +scene0666_01 +scene0666_02 +scene0200_00 +scene0200_01 +scene0200_02 +scene0536_00 +scene0536_01 +scene0536_02 +scene0390_00 +scene0280_00 +scene0280_01 +scene0280_02 +scene0344_00 +scene0344_01 +scene0205_00 +scene0205_01 +scene0205_02 +scene0484_00 +scene0484_01 +scene0009_00 +scene0009_01 +scene0009_02 +scene0302_00 +scene0302_01 +scene0209_00 +scene0209_01 +scene0209_02 +scene0210_00 +scene0210_01 +scene0395_00 +scene0395_01 +scene0395_02 +scene0683_00 +scene0601_00 +scene0601_01 +scene0214_00 +scene0214_01 +scene0214_02 +scene0477_00 +scene0477_01 +scene0439_00 +scene0439_01 +scene0468_00 +scene0468_01 +scene0468_02 +scene0546_00 +scene0466_00 +scene0466_01 +scene0220_00 +scene0220_01 +scene0220_02 +scene0122_00 +scene0122_01 +scene0130_00 +scene0110_00 +scene0110_01 +scene0110_02 +scene0327_00 +scene0156_00 +scene0266_00 +scene0266_01 +scene0001_00 +scene0001_01 +scene0228_00 +scene0199_00 +scene0219_00 +scene0464_00 +scene0232_00 +scene0232_01 +scene0232_02 +scene0299_00 +scene0299_01 +scene0530_00 +scene0363_00 +scene0453_00 +scene0453_01 +scene0570_00 +scene0570_01 +scene0570_02 +scene0183_00 +scene0239_00 +scene0239_01 +scene0239_02 +scene0373_00 +scene0373_01 +scene0241_00 +scene0241_01 +scene0241_02 +scene0188_00 +scene0622_00 +scene0622_01 +scene0244_00 +scene0244_01 +scene0691_00 +scene0691_01 +scene0206_00 +scene0206_01 +scene0206_02 +scene0247_00 +scene0247_01 +scene0061_00 +scene0061_01 +scene0082_00 +scene0250_00 +scene0250_01 +scene0250_02 +scene0501_00 +scene0501_01 +scene0501_02 +scene0320_00 +scene0320_01 +scene0320_02 +scene0320_03 +scene0631_00 +scene0631_01 +scene0631_02 +scene0255_00 +scene0255_01 +scene0255_02 +scene0047_00 +scene0265_00 +scene0265_01 +scene0265_02 +scene0004_00 +scene0336_00 +scene0336_01 +scene0058_00 +scene0058_01 +scene0260_00 +scene0260_01 +scene0260_02 +scene0243_00 +scene0603_00 +scene0603_01 +scene0093_00 +scene0093_01 +scene0093_02 +scene0109_00 +scene0109_01 +scene0434_00 +scene0434_01 +scene0434_02 +scene0290_00 +scene0627_00 +scene0627_01 +scene0470_00 +scene0470_01 +scene0137_00 +scene0137_01 +scene0137_02 +scene0270_00 +scene0270_01 +scene0270_02 +scene0271_00 +scene0271_01 +scene0504_00 +scene0274_00 +scene0274_01 +scene0274_02 +scene0036_00 +scene0036_01 +scene0276_00 +scene0276_01 +scene0272_00 +scene0272_01 +scene0499_00 +scene0698_00 +scene0698_01 +scene0051_00 +scene0051_01 +scene0051_02 +scene0051_03 +scene0108_00 +scene0245_00 +scene0369_00 +scene0369_01 +scene0369_02 +scene0284_00 +scene0289_00 +scene0289_01 +scene0286_00 +scene0286_01 +scene0286_02 +scene0286_03 +scene0031_00 +scene0031_01 +scene0031_02 +scene0545_00 +scene0545_01 +scene0545_02 +scene0557_00 +scene0557_01 +scene0557_02 +scene0533_00 +scene0533_01 +scene0116_00 +scene0116_01 +scene0116_02 +scene0611_00 +scene0611_01 +scene0688_00 +scene0294_00 +scene0294_01 +scene0294_02 +scene0295_00 +scene0295_01 +scene0296_00 +scene0296_01 +scene0596_00 +scene0596_01 +scene0596_02 +scene0532_00 +scene0532_01 +scene0637_00 +scene0638_00 +scene0121_00 +scene0121_01 +scene0121_02 +scene0040_00 +scene0040_01 +scene0197_00 +scene0197_01 +scene0197_02 +scene0410_00 +scene0410_01 +scene0305_00 +scene0305_01 +scene0615_00 +scene0615_01 +scene0703_00 +scene0703_01 +scene0555_00 +scene0297_00 +scene0297_01 +scene0297_02 +scene0582_00 +scene0582_01 +scene0582_02 +scene0023_00 +scene0094_00 +scene0013_00 +scene0013_01 +scene0013_02 +scene0136_00 +scene0136_01 +scene0136_02 +scene0407_00 +scene0407_01 +scene0062_00 +scene0062_01 +scene0062_02 +scene0386_00 +scene0318_00 +scene0554_00 +scene0554_01 +scene0497_00 +scene0213_00 +scene0258_00 +scene0323_00 +scene0323_01 +scene0324_00 +scene0324_01 +scene0016_00 +scene0016_01 +scene0016_02 +scene0681_00 +scene0398_00 +scene0398_01 +scene0227_00 +scene0090_00 +scene0066_00 +scene0262_00 +scene0262_01 +scene0155_00 +scene0155_01 +scene0155_02 +scene0352_00 +scene0352_01 +scene0352_02 +scene0038_00 +scene0038_01 +scene0038_02 +scene0335_00 +scene0335_01 +scene0335_02 +scene0261_00 +scene0261_01 +scene0261_02 +scene0261_03 +scene0640_00 +scene0640_01 +scene0640_02 +scene0080_00 +scene0080_01 +scene0080_02 +scene0403_00 +scene0403_01 +scene0282_00 +scene0282_01 +scene0282_02 +scene0682_00 +scene0173_00 +scene0173_01 +scene0173_02 +scene0522_00 +scene0687_00 +scene0345_00 +scene0345_01 +scene0612_00 +scene0612_01 +scene0411_00 +scene0411_01 +scene0411_02 +scene0625_00 +scene0625_01 +scene0211_00 +scene0211_01 +scene0211_02 +scene0211_03 +scene0676_00 +scene0676_01 +scene0179_00 +scene0498_00 +scene0498_01 +scene0498_02 +scene0547_00 +scene0547_01 +scene0547_02 +scene0269_00 +scene0269_01 +scene0269_02 +scene0366_00 +scene0680_00 +scene0680_01 +scene0588_00 +scene0588_01 +scene0588_02 +scene0588_03 +scene0346_00 +scene0346_01 +scene0359_00 +scene0359_01 +scene0014_00 +scene0120_00 +scene0120_01 +scene0212_00 +scene0212_01 +scene0212_02 +scene0176_00 +scene0049_00 +scene0259_00 +scene0259_01 +scene0586_00 +scene0586_01 +scene0586_02 +scene0309_00 +scene0309_01 +scene0125_00 +scene0455_00 +scene0177_00 +scene0177_01 +scene0177_02 +scene0326_00 +scene0372_00 +scene0171_00 +scene0171_01 +scene0374_00 +scene0654_00 +scene0654_01 +scene0445_00 +scene0445_01 +scene0475_00 +scene0475_01 +scene0475_02 +scene0349_00 +scene0349_01 +scene0234_00 +scene0669_00 +scene0669_01 +scene0375_00 +scene0375_01 +scene0375_02 +scene0387_00 +scene0387_01 +scene0387_02 +scene0312_00 +scene0312_01 +scene0312_02 +scene0384_00 +scene0385_00 +scene0385_01 +scene0385_02 +scene0000_00 +scene0000_01 +scene0000_02 +scene0376_00 +scene0376_01 +scene0376_02 +scene0301_00 +scene0301_01 +scene0301_02 +scene0322_00 +scene0542_00 +scene0079_00 +scene0079_01 +scene0099_00 +scene0099_01 +scene0476_00 +scene0476_01 +scene0476_02 +scene0394_00 +scene0394_01 +scene0147_00 +scene0147_01 +scene0067_00 +scene0067_01 +scene0067_02 +scene0397_00 +scene0397_01 +scene0337_00 +scene0337_01 +scene0337_02 +scene0431_00 +scene0223_00 +scene0223_01 +scene0223_02 +scene0010_00 +scene0010_01 +scene0402_00 +scene0268_00 +scene0268_01 +scene0268_02 +scene0679_00 +scene0679_01 +scene0405_00 +scene0128_00 +scene0408_00 +scene0408_01 +scene0190_00 +scene0107_00 +scene0076_00 +scene0167_00 +scene0361_00 +scene0361_01 +scene0361_02 +scene0216_00 +scene0202_00 +scene0303_00 +scene0303_01 +scene0303_02 +scene0446_00 +scene0446_01 +scene0089_00 +scene0089_01 +scene0089_02 +scene0360_00 +scene0150_00 +scene0150_01 +scene0150_02 +scene0421_00 +scene0421_01 +scene0421_02 +scene0454_00 +scene0626_00 +scene0626_01 +scene0626_02 +scene0186_00 +scene0186_01 +scene0538_00 +scene0479_00 +scene0479_01 +scene0479_02 +scene0656_00 +scene0656_01 +scene0656_02 +scene0656_03 +scene0525_00 +scene0525_01 +scene0525_02 +scene0308_00 +scene0396_00 +scene0396_01 +scene0396_02 +scene0624_00 +scene0292_00 +scene0292_01 +scene0632_00 +scene0253_00 +scene0021_00 +scene0325_00 +scene0325_01 +scene0437_00 +scene0437_01 +scene0438_00 +scene0590_00 +scene0590_01 +scene0400_00 +scene0400_01 +scene0541_00 +scene0541_01 +scene0541_02 +scene0677_00 +scene0677_01 +scene0677_02 +scene0443_00 +scene0315_00 +scene0288_00 +scene0288_01 +scene0288_02 +scene0422_00 +scene0672_00 +scene0672_01 +scene0184_00 +scene0449_00 +scene0449_01 +scene0449_02 +scene0048_00 +scene0048_01 +scene0138_00 +scene0452_00 +scene0452_01 +scene0452_02 +scene0667_00 +scene0667_01 +scene0667_02 +scene0463_00 +scene0463_01 +scene0078_00 +scene0078_01 +scene0078_02 +scene0636_00 +scene0457_00 +scene0457_01 +scene0457_02 +scene0465_00 +scene0465_01 +scene0577_00 +scene0151_00 +scene0151_01 +scene0339_00 +scene0573_00 +scene0573_01 +scene0154_00 +scene0096_00 +scene0096_01 +scene0096_02 +scene0235_00 +scene0168_00 +scene0168_01 +scene0168_02 +scene0594_00 +scene0587_00 +scene0587_01 +scene0587_02 +scene0587_03 +scene0229_00 +scene0229_01 +scene0229_02 +scene0512_00 +scene0106_00 +scene0106_01 +scene0106_02 +scene0472_00 +scene0472_01 +scene0472_02 +scene0489_00 +scene0489_01 +scene0489_02 +scene0425_00 +scene0425_01 +scene0641_00 +scene0526_00 +scene0526_01 +scene0317_00 +scene0317_01 +scene0544_00 +scene0017_00 +scene0017_01 +scene0017_02 +scene0042_00 +scene0042_01 +scene0042_02 +scene0576_00 +scene0576_01 +scene0576_02 +scene0347_00 +scene0347_01 +scene0347_02 +scene0436_00 +scene0226_00 +scene0226_01 +scene0485_00 +scene0486_00 +scene0487_00 +scene0487_01 +scene0619_00 +scene0097_00 +scene0367_00 +scene0367_01 +scene0491_00 +scene0492_00 +scene0492_01 +scene0005_00 +scene0005_01 +scene0543_00 +scene0543_01 +scene0543_02 +scene0657_00 +scene0341_00 +scene0341_01 +scene0534_00 +scene0534_01 +scene0319_00 +scene0273_00 +scene0273_01 +scene0225_00 +scene0198_00 +scene0003_00 +scene0003_01 +scene0003_02 +scene0409_00 +scene0409_01 +scene0331_00 +scene0331_01 +scene0505_00 +scene0505_01 +scene0505_02 +scene0505_03 +scene0505_04 +scene0506_00 +scene0057_00 +scene0057_01 +scene0074_00 +scene0074_01 +scene0074_02 +scene0091_00 +scene0112_00 +scene0112_01 +scene0112_02 +scene0240_00 +scene0102_00 +scene0102_01 +scene0513_00 +scene0514_00 +scene0514_01 +scene0537_00 +scene0516_00 +scene0516_01 +scene0495_00 +scene0617_00 +scene0133_00 +scene0520_00 +scene0520_01 +scene0635_00 +scene0635_01 +scene0054_00 +scene0473_00 +scene0473_01 +scene0524_00 +scene0524_01 +scene0379_00 +scene0471_00 +scene0471_01 +scene0471_02 +scene0566_00 +scene0248_00 +scene0248_01 +scene0248_02 +scene0529_00 +scene0529_01 +scene0529_02 +scene0391_00 +scene0264_00 +scene0264_01 +scene0264_02 +scene0675_00 +scene0675_01 +scene0350_00 +scene0350_01 +scene0350_02 +scene0450_00 +scene0068_00 +scene0068_01 +scene0237_00 +scene0237_01 +scene0365_00 +scene0365_01 +scene0365_02 +scene0605_00 +scene0605_01 +scene0539_00 +scene0539_01 +scene0539_02 +scene0540_00 +scene0540_01 +scene0540_02 +scene0170_00 +scene0170_01 +scene0170_02 +scene0433_00 +scene0340_00 +scene0340_01 +scene0340_02 +scene0160_00 +scene0160_01 +scene0160_02 +scene0160_03 +scene0160_04 +scene0059_00 +scene0059_01 +scene0059_02 +scene0056_00 +scene0056_01 +scene0478_00 +scene0478_01 +scene0548_00 +scene0548_01 +scene0548_02 +scene0204_00 +scene0204_01 +scene0204_02 +scene0033_00 +scene0145_00 +scene0483_00 +scene0508_00 +scene0508_01 +scene0508_02 +scene0180_00 +scene0148_00 +scene0556_00 +scene0556_01 +scene0416_00 +scene0416_01 +scene0416_02 +scene0416_03 +scene0416_04 +scene0073_00 +scene0073_01 +scene0073_02 +scene0073_03 +scene0034_00 +scene0034_01 +scene0034_02 +scene0639_00 +scene0561_00 +scene0561_01 +scene0298_00 +scene0692_00 +scene0692_01 +scene0692_02 +scene0692_03 +scene0692_04 +scene0642_00 +scene0642_01 +scene0642_02 +scene0642_03 +scene0630_00 +scene0630_01 +scene0630_02 +scene0630_03 +scene0630_04 +scene0630_05 +scene0630_06 +scene0706_00 +scene0567_00 +scene0567_01 diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_val.txt b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_val.txt new file mode 100644 index 0000000..b9e7d92 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_val.txt @@ -0,0 +1,312 @@ +scene0568_00 +scene0568_01 +scene0568_02 +scene0304_00 +scene0488_00 +scene0488_01 +scene0412_00 +scene0412_01 +scene0217_00 +scene0019_00 +scene0019_01 +scene0414_00 +scene0575_00 +scene0575_01 +scene0575_02 +scene0426_00 +scene0426_01 +scene0426_02 +scene0426_03 +scene0549_00 +scene0549_01 +scene0578_00 +scene0578_01 +scene0578_02 +scene0665_00 +scene0665_01 +scene0050_00 +scene0050_01 +scene0050_02 +scene0257_00 +scene0025_00 +scene0025_01 +scene0025_02 +scene0583_00 +scene0583_01 +scene0583_02 +scene0701_00 +scene0701_01 +scene0701_02 +scene0580_00 +scene0580_01 +scene0565_00 +scene0169_00 +scene0169_01 +scene0655_00 +scene0655_01 +scene0655_02 +scene0063_00 +scene0221_00 +scene0221_01 +scene0591_00 +scene0591_01 +scene0591_02 +scene0678_00 +scene0678_01 +scene0678_02 +scene0462_00 +scene0427_00 +scene0595_00 +scene0193_00 +scene0193_01 +scene0164_00 +scene0164_01 +scene0164_02 +scene0164_03 +scene0598_00 +scene0598_01 +scene0598_02 +scene0599_00 +scene0599_01 +scene0599_02 +scene0328_00 +scene0300_00 +scene0300_01 +scene0354_00 +scene0458_00 +scene0458_01 +scene0423_00 +scene0423_01 +scene0423_02 +scene0307_00 +scene0307_01 +scene0307_02 +scene0606_00 +scene0606_01 +scene0606_02 +scene0432_00 +scene0432_01 +scene0608_00 +scene0608_01 +scene0608_02 +scene0651_00 +scene0651_01 +scene0651_02 +scene0430_00 +scene0430_01 +scene0689_00 +scene0357_00 +scene0357_01 +scene0574_00 +scene0574_01 +scene0574_02 +scene0329_00 +scene0329_01 +scene0329_02 +scene0153_00 +scene0153_01 +scene0616_00 +scene0616_01 +scene0671_00 +scene0671_01 +scene0618_00 +scene0382_00 +scene0382_01 +scene0490_00 +scene0621_00 +scene0607_00 +scene0607_01 +scene0149_00 +scene0695_00 +scene0695_01 +scene0695_02 +scene0695_03 +scene0389_00 +scene0377_00 +scene0377_01 +scene0377_02 +scene0342_00 +scene0139_00 +scene0629_00 +scene0629_01 +scene0629_02 +scene0496_00 +scene0633_00 +scene0633_01 +scene0518_00 +scene0652_00 +scene0406_00 +scene0406_01 +scene0406_02 +scene0144_00 +scene0144_01 +scene0494_00 +scene0278_00 +scene0278_01 +scene0316_00 +scene0609_00 +scene0609_01 +scene0609_02 +scene0609_03 +scene0084_00 +scene0084_01 +scene0084_02 +scene0696_00 +scene0696_01 +scene0696_02 +scene0351_00 +scene0351_01 +scene0643_00 +scene0644_00 +scene0645_00 +scene0645_01 +scene0645_02 +scene0081_00 +scene0081_01 +scene0081_02 +scene0647_00 +scene0647_01 +scene0535_00 +scene0353_00 +scene0353_01 +scene0353_02 +scene0559_00 +scene0559_01 +scene0559_02 +scene0593_00 +scene0593_01 +scene0246_00 +scene0653_00 +scene0653_01 +scene0064_00 +scene0064_01 +scene0356_00 +scene0356_01 +scene0356_02 +scene0030_00 +scene0030_01 +scene0030_02 +scene0222_00 +scene0222_01 +scene0338_00 +scene0338_01 +scene0338_02 +scene0378_00 +scene0378_01 +scene0378_02 +scene0660_00 +scene0553_00 +scene0553_01 +scene0553_02 +scene0527_00 +scene0663_00 +scene0663_01 +scene0663_02 +scene0664_00 +scene0664_01 +scene0664_02 +scene0334_00 +scene0334_01 +scene0334_02 +scene0046_00 +scene0046_01 +scene0046_02 +scene0203_00 +scene0203_01 +scene0203_02 +scene0088_00 +scene0088_01 +scene0088_02 +scene0088_03 +scene0086_00 +scene0086_01 +scene0086_02 +scene0670_00 +scene0670_01 +scene0256_00 +scene0256_01 +scene0256_02 +scene0249_00 +scene0441_00 +scene0658_00 +scene0704_00 +scene0704_01 +scene0187_00 +scene0187_01 +scene0131_00 +scene0131_01 +scene0131_02 +scene0207_00 +scene0207_01 +scene0207_02 +scene0461_00 +scene0011_00 +scene0011_01 +scene0343_00 +scene0251_00 +scene0077_00 +scene0077_01 +scene0684_00 +scene0684_01 +scene0550_00 +scene0686_00 +scene0686_01 +scene0686_02 +scene0208_00 +scene0500_00 +scene0500_01 +scene0552_00 +scene0552_01 +scene0648_00 +scene0648_01 +scene0435_00 +scene0435_01 +scene0435_02 +scene0435_03 +scene0690_00 +scene0690_01 +scene0693_00 +scene0693_01 +scene0693_02 +scene0700_00 +scene0700_01 +scene0700_02 +scene0699_00 +scene0231_00 +scene0231_01 +scene0231_02 +scene0697_00 +scene0697_01 +scene0697_02 +scene0697_03 +scene0474_00 +scene0474_01 +scene0474_02 +scene0474_03 +scene0474_04 +scene0474_05 +scene0355_00 +scene0355_01 +scene0146_00 +scene0146_01 +scene0146_02 +scene0196_00 +scene0702_00 +scene0702_01 +scene0702_02 +scene0314_00 +scene0277_00 +scene0277_01 +scene0277_02 +scene0095_00 +scene0095_01 +scene0015_00 +scene0100_00 +scene0100_01 +scene0100_02 +scene0558_00 +scene0558_01 +scene0558_02 +scene0685_00 +scene0685_01 +scene0685_02 diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/preprocess_scannet.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/preprocess_scannet.py new file mode 100644 index 0000000..549a426 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/preprocessing/scannet/preprocess_scannet.py @@ -0,0 +1,253 @@ +""" +Preprocessing Script for ScanNet 20/200 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import warnings + +warnings.filterwarnings("ignore", category=DeprecationWarning) + +import os +import argparse +import glob +import json +import plyfile +import numpy as np +import pandas as pd +import multiprocessing as mp +from concurrent.futures import ProcessPoolExecutor +from itertools import repeat +from pathlib import Path + +# Load external constants +from meta_data.scannet200_constants import VALID_CLASS_IDS_200, VALID_CLASS_IDS_20 + +CLOUD_FILE_PFIX = "_vh_clean_2" +SEGMENTS_FILE_PFIX = ".0.010000.segs.json" +AGGREGATIONS_FILE_PFIX = ".aggregation.json" +CLASS_IDS200 = VALID_CLASS_IDS_200 +CLASS_IDS20 = VALID_CLASS_IDS_20 +IGNORE_INDEX = -1 + + +def read_plymesh(filepath): + """Read ply file and return it as numpy array. Returns None if emtpy.""" + with open(filepath, "rb") as f: + plydata = plyfile.PlyData.read(f) + if plydata.elements: + vertices = pd.DataFrame(plydata["vertex"].data).values + faces = np.stack(plydata["face"].data["vertex_indices"], axis=0) + return vertices, faces + + +# Map the raw category id to the point cloud +def point_indices_from_group(seg_indices, group, labels_pd): + group_segments = np.array(group["segments"]) + label = group["label"] + + # Map the category name to id + label_id20 = labels_pd[labels_pd["raw_category"] == label]["nyu40id"] + label_id20 = int(label_id20.iloc[0]) if len(label_id20) > 0 else 0 + label_id200 = labels_pd[labels_pd["raw_category"] == label]["id"] + label_id200 = int(label_id200.iloc[0]) if len(label_id200) > 0 else 0 + + # Only store for the valid categories + if label_id20 in CLASS_IDS20: + label_id20 = CLASS_IDS20.index(label_id20) + else: + label_id20 = IGNORE_INDEX + + if label_id200 in CLASS_IDS200: + label_id200 = CLASS_IDS200.index(label_id200) + else: + label_id200 = IGNORE_INDEX + + # get points, where segment indices (points labelled with segment ids) are in the group segment list + point_idx = np.where(np.isin(seg_indices, group_segments))[0] + return point_idx, label_id20, label_id200 + + +def face_normal(vertex, face): + v01 = vertex[face[:, 1]] - vertex[face[:, 0]] + v02 = vertex[face[:, 2]] - vertex[face[:, 0]] + vec = np.cross(v01, v02) + length = np.sqrt(np.sum(vec**2, axis=1, keepdims=True)) + 1.0e-8 + nf = vec / length + area = length * 0.5 + return nf, area + + +def vertex_normal(vertex, face): + nf, area = face_normal(vertex, face) + nf = nf * area + + nv = np.zeros_like(vertex) + for i in range(face.shape[0]): + nv[face[i]] += nf[i] + + length = np.sqrt(np.sum(nv**2, axis=1, keepdims=True)) + 1.0e-8 + nv = nv / length + return nv + + +def handle_process( + scene_path, output_path, labels_pd, train_scenes, val_scenes, parse_normals=True +): + scene_id = os.path.basename(scene_path) + mesh_path = os.path.join(scene_path, f"{scene_id}{CLOUD_FILE_PFIX}.ply") + segments_file = os.path.join( + scene_path, f"{scene_id}{CLOUD_FILE_PFIX}{SEGMENTS_FILE_PFIX}" + ) + aggregations_file = os.path.join(scene_path, f"{scene_id}{AGGREGATIONS_FILE_PFIX}") + info_file = os.path.join(scene_path, f"{scene_id}.txt") + + if scene_id in train_scenes: + output_path = os.path.join(output_path, "train", f"{scene_id}") + split_name = "train" + elif scene_id in val_scenes: + output_path = os.path.join(output_path, "val", f"{scene_id}") + split_name = "val" + else: + output_path = os.path.join(output_path, "test", f"{scene_id}") + split_name = "test" + + print(f"Processing: {scene_id} in {split_name}") + + vertices, faces = read_plymesh(mesh_path) + coords = vertices[:, :3] + colors = vertices[:, 3:6] + save_dict = dict( + coord=coords.astype(np.float32), + color=colors.astype(np.uint8), + ) + + # # Rotating the mesh to axis aligned + # info_dict = {} + # with open(info_file) as f: + # for line in f: + # (key, val) = line.split(" = ") + # info_dict[key] = np.fromstring(val, sep=' ') + # + # if 'axisAlignment' not in info_dict: + # rot_matrix = np.identity(4) + # else: + # rot_matrix = info_dict['axisAlignment'].reshape(4, 4) + # r_coords = coords.transpose() + # r_coords = np.append(r_coords, np.ones((1, r_coords.shape[1])), axis=0) + # r_coords = np.dot(rot_matrix, r_coords) + # coords = r_coords + + # Parse Normals + if parse_normals: + save_dict["normal"] = vertex_normal(coords, faces).astype(np.float32) + + # Load segments file + if split_name != "test": + with open(segments_file) as f: + segments = json.load(f) + seg_indices = np.array(segments["segIndices"]) + + # Load Aggregations file + with open(aggregations_file) as f: + aggregation = json.load(f) + seg_groups = np.array(aggregation["segGroups"]) + + # Generate new labels + semantic_gt20 = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX + semantic_gt200 = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX + instance_ids = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX + for group in seg_groups: + point_idx, label_id20, label_id200 = point_indices_from_group( + seg_indices, group, labels_pd + ) + + semantic_gt20[point_idx] = label_id20 + semantic_gt200[point_idx] = label_id200 + instance_ids[point_idx] = group["id"] + + semantic_gt20 = semantic_gt20.astype(int) + semantic_gt200 = semantic_gt200.astype(int) + instance_ids = instance_ids.astype(int) + + save_dict["segment20"] = semantic_gt20 + save_dict["segment200"] = semantic_gt200 + save_dict["instance"] = instance_ids + + # Concatenate with original cloud + processed_vertices = np.hstack((semantic_gt200, instance_ids)) + + if np.any(np.isnan(processed_vertices)) or not np.all( + np.isfinite(processed_vertices) + ): + raise ValueError(f"Find NaN in Scene: {scene_id}") + + # Save processed data + os.makedirs(output_path, exist_ok=True) + for key in save_dict.keys(): + np.save(os.path.join(output_path, f"{key}.npy"), save_dict[key]) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the ScanNet dataset containing scene folders", + ) + parser.add_argument( + "--output_root", + required=True, + help="Output path where train/val folders will be located", + ) + parser.add_argument( + "--parse_normals", default=True, type=bool, help="Whether parse point normals" + ) + parser.add_argument( + "--num_workers", + default=mp.cpu_count(), + type=int, + help="Num workers for preprocessing.", + ) + config = parser.parse_args() + meta_root = Path(os.path.dirname(__file__)) / "meta_data" + + # Load label map + labels_pd = pd.read_csv( + meta_root / "scannetv2-labels.combined.tsv", + sep="\t", + header=0, + ) + + # Load train/val splits + with open(meta_root / "scannetv2_train.txt") as train_file: + train_scenes = train_file.read().splitlines() + with open(meta_root / "scannetv2_val.txt") as val_file: + val_scenes = val_file.read().splitlines() + + # Create output directories + train_output_dir = os.path.join(config.output_root, "train") + os.makedirs(train_output_dir, exist_ok=True) + val_output_dir = os.path.join(config.output_root, "val") + os.makedirs(val_output_dir, exist_ok=True) + test_output_dir = os.path.join(config.output_root, "test") + os.makedirs(test_output_dir, exist_ok=True) + + # Load scene paths + scene_paths = sorted(glob.glob(config.dataset_root + "/scans*/scene*")) + + # Preprocess data. + print("Processing scenes...") + pool = ProcessPoolExecutor(max_workers=config.num_workers) + _ = list( + pool.map( + handle_process, + scene_paths, + repeat(config.output_root), + repeat(labels_pd), + repeat(train_scenes), + repeat(val_scenes), + repeat(config.parse_normals), + ) + ) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/scannet.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/scannet.py new file mode 100644 index 0000000..35d4606 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/scannet.py @@ -0,0 +1,118 @@ +""" +ScanNet20 / ScanNet200 / ScanNet Data Efficient Dataset + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import glob +import numpy as np +import torch +from copy import deepcopy +from torch.utils.data import Dataset +from collections.abc import Sequence + +from pointcept.utils.logger import get_root_logger +from pointcept.utils.cache import shared_dict +from .builder import DATASETS +from .defaults import DefaultDataset +from .transform import Compose, TRANSFORMS +from .preprocessing.scannet.meta_data.scannet200_constants import ( + VALID_CLASS_IDS_20, + VALID_CLASS_IDS_200, +) + + +@DATASETS.register_module() +class ScanNetDataset(DefaultDataset): + VALID_ASSETS = [ + "coord", + "color", + "normal", + "segment20", + "instance", + ] + class2id = np.array(VALID_CLASS_IDS_20) + + def __init__( + self, + lr_file=None, + la_file=None, + **kwargs, + ): + self.lr = np.loadtxt(lr_file, dtype=str) if lr_file is not None else None + self.la = torch.load(la_file) if la_file is not None else None + super().__init__(**kwargs) + + def get_data_list(self): + if self.lr is None: + data_list = super().get_data_list() + else: + data_list = [ + os.path.join(self.data_root, "train", name) for name in self.lr + ] + return data_list + + def get_data(self, idx): + data_path = self.data_list[idx % len(self.data_list)] + name = self.get_data_name(idx) + split = self.get_split_name(idx) + if self.cache: + cache_name = f"pointcept-{name}" + return shared_dict(cache_name) + + data_dict = {} + assets = os.listdir(data_path) + for asset in assets: + if not asset.endswith(".npy"): + continue + if asset[:-4] not in self.VALID_ASSETS: + continue + data_dict[asset[:-4]] = np.load(os.path.join(data_path, asset)) + data_dict["name"] = name + data_dict["split"] = split + data_dict["coord"] = data_dict["coord"].astype(np.float32) + data_dict["color"] = data_dict["color"].astype(np.float32) + data_dict["normal"] = data_dict["normal"].astype(np.float32) + + if "segment20" in data_dict.keys(): + data_dict["segment"] = ( + data_dict.pop("segment20").reshape([-1]).astype(np.int32) + ) + elif "segment200" in data_dict.keys(): + data_dict["segment"] = ( + data_dict.pop("segment200").reshape([-1]).astype(np.int32) + ) + else: + data_dict["segment"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + + if "instance" in data_dict.keys(): + data_dict["instance"] = ( + data_dict.pop("instance").reshape([-1]).astype(np.int32) + ) + else: + data_dict["instance"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + if self.la: + sampled_index = self.la[self.get_data_name(idx)] + mask = np.ones_like(data_dict["segment"], dtype=bool) + mask[sampled_index] = False + data_dict["segment"][mask] = self.ignore_index + data_dict["sampled_index"] = sampled_index + return data_dict + + +@DATASETS.register_module() +class ScanNet200Dataset(ScanNetDataset): + VALID_ASSETS = [ + "coord", + "color", + "normal", + "segment200", + "instance", + ] + class2id = np.array(VALID_CLASS_IDS_200) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/transform.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/transform.py new file mode 100644 index 0000000..de4d2e7 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/transform.py @@ -0,0 +1,1492 @@ +""" +3D point cloud augmentation + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Yujia Zhang (yujia.zhang.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import random +import numbers +import scipy +import scipy.ndimage +import scipy.interpolate +import scipy.stats +import numpy as np +import torch +from torchvision import transforms +import copy +from collections.abc import Sequence, Mapping +from pointcept.utils.registry import Registry + +TRANSFORMS = Registry("transforms") + + +def index_operator(data_dict, index, duplicate=False): + # index selection operator for keys in "index_valid_keys" + # custom these keys by "Update" transform in config + if "index_valid_keys" not in data_dict: + data_dict["index_valid_keys"] = [ + "coord", + "color", + "normal", + "superpoint", + "strength", + "segment", + "instance", + ] + if not duplicate: + for key in data_dict["index_valid_keys"]: + if key in data_dict: + data_dict[key] = data_dict[key][index] + return data_dict + else: + data_dict_ = dict() + for key in data_dict.keys(): + if key in data_dict["index_valid_keys"]: + data_dict_[key] = data_dict[key][index] + elif key == "index_valid_keys": + data_dict_[key] = copy.copy(data_dict[key]) + else: + data_dict_[key] = data_dict[key] + return data_dict_ + + +@TRANSFORMS.register_module() +class Collect(object): + def __init__(self, keys, offset_keys_dict=None, **kwargs): + """ + e.g. Collect(keys=[coord], feat_keys=[coord, color]) + """ + if offset_keys_dict is None: + offset_keys_dict = dict(offset="coord") + self.keys = keys + self.offset_keys = offset_keys_dict + self.kwargs = kwargs + + def __call__(self, data_dict): + data = dict() + if isinstance(self.keys, str): + self.keys = [self.keys] + for key in self.keys: + data[key] = data_dict[key] + for key, value in self.offset_keys.items(): + data[key] = torch.tensor([data_dict[value].shape[0]]) + for name, keys in self.kwargs.items(): + name = name.replace("_keys", "") + assert isinstance(keys, Sequence) + data[name] = torch.cat([data_dict[key].float() for key in keys], dim=1) + return data + + +@TRANSFORMS.register_module() +class Copy(object): + def __init__(self, keys_dict=None): + if keys_dict is None: + keys_dict = dict(coord="origin_coord", segment="origin_segment") + self.keys_dict = keys_dict + + def __call__(self, data_dict): + for key, value in self.keys_dict.items(): + if isinstance(data_dict[key], np.ndarray): + data_dict[value] = data_dict[key].copy() + elif isinstance(data_dict[key], torch.Tensor): + data_dict[value] = data_dict[key].clone().detach() + else: + data_dict[value] = copy.deepcopy(data_dict[key]) + return data_dict + + +@TRANSFORMS.register_module() +class Update(object): + def __init__(self, keys_dict=None): + if keys_dict is None: + keys_dict = dict() + self.keys_dict = keys_dict + + def __call__(self, data_dict): + for key, value in self.keys_dict.items(): + data_dict[key] = value + return data_dict + + +@TRANSFORMS.register_module() +class ToTensor(object): + def __call__(self, data): + if isinstance(data, torch.Tensor): + return data + elif isinstance(data, str): + # note that str is also a kind of sequence, judgement should before sequence + return data + elif isinstance(data, int): + return torch.LongTensor([data]) + elif isinstance(data, float): + return torch.FloatTensor([data]) + elif isinstance(data, np.ndarray) and np.issubdtype(data.dtype, bool): + return torch.from_numpy(data) + elif isinstance(data, np.ndarray) and np.issubdtype(data.dtype, np.integer): + return torch.from_numpy(data).long() + elif isinstance(data, np.ndarray) and np.issubdtype(data.dtype, np.floating): + return torch.from_numpy(data).float() + elif isinstance(data, Mapping): + result = {sub_key: self(item) for sub_key, item in data.items()} + return result + elif isinstance(data, Sequence): + result = [self(item) for item in data] + return result + else: + raise TypeError(f"type {type(data)} cannot be converted to tensor.") + + +@TRANSFORMS.register_module() +class NormalizeColor(object): + def __call__(self, data_dict): + if "color" in data_dict.keys(): + data_dict["color"] = data_dict["color"] / 255 + return data_dict + + +@TRANSFORMS.register_module() +class NormalizeCoord(object): + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + # modified from pointnet2 + centroid = np.mean(data_dict["coord"], axis=0) + data_dict["coord"] -= centroid + m = np.max(np.sqrt(np.sum(data_dict["coord"] ** 2, axis=1))) + data_dict["coord"] = data_dict["coord"] / m + return data_dict + + +@TRANSFORMS.register_module() +class PositiveShift(object): + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + coord_min = np.min(data_dict["coord"], 0) + data_dict["coord"] -= coord_min + return data_dict + + +@TRANSFORMS.register_module() +class CenterShift(object): + def __init__(self, apply_z=True): + self.apply_z = apply_z + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + x_min, y_min, z_min = data_dict["coord"].min(axis=0) + x_max, y_max, _ = data_dict["coord"].max(axis=0) + if self.apply_z: + shift = [(x_min + x_max) / 2, (y_min + y_max) / 2, z_min] + else: + shift = [(x_min + x_max) / 2, (y_min + y_max) / 2, 0] + data_dict["coord"] -= shift + return data_dict + + +@TRANSFORMS.register_module() +class RandomShift(object): + def __init__(self, shift=((-0.2, 0.2), (-0.2, 0.2), (0, 0))): + self.shift = shift + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + shift_x = np.random.uniform(self.shift[0][0], self.shift[0][1]) + shift_y = np.random.uniform(self.shift[1][0], self.shift[1][1]) + shift_z = np.random.uniform(self.shift[2][0], self.shift[2][1]) + data_dict["coord"] += [shift_x, shift_y, shift_z] + return data_dict + + +@TRANSFORMS.register_module() +class PointClip(object): + def __init__(self, point_cloud_range=(-80, -80, -3, 80, 80, 1)): + self.point_cloud_range = point_cloud_range + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + data_dict["coord"] = np.clip( + data_dict["coord"], + a_min=self.point_cloud_range[:3], + a_max=self.point_cloud_range[3:], + ) + return data_dict + + +@TRANSFORMS.register_module() +class RandomDropout(object): + def __init__(self, dropout_ratio=0.2, dropout_application_ratio=0.5): + """ + upright_axis: axis index among x,y,z, i.e. 2 for z + """ + self.dropout_ratio = dropout_ratio + self.dropout_application_ratio = dropout_application_ratio + + def __call__(self, data_dict): + if random.random() < self.dropout_application_ratio: + n = len(data_dict["coord"]) + idx = np.random.choice(n, int(n * (1 - self.dropout_ratio)), replace=False) + if "sampled_index" in data_dict: + # for ScanNet data efficient, we need to make sure labeled point is sampled. + idx = np.unique(np.append(idx, data_dict["sampled_index"])) + mask = np.zeros_like(data_dict["segment"]).astype(bool) + mask[data_dict["sampled_index"]] = True + data_dict["sampled_index"] = np.where(mask[idx])[0] + data_dict = index_operator(data_dict, idx) + return data_dict + + +@TRANSFORMS.register_module() +class RandomRotate(object): + def __init__(self, angle=None, center=None, axis="z", always_apply=False, p=0.5): + self.angle = [-1, 1] if angle is None else angle + self.axis = axis + self.always_apply = always_apply + self.p = p if not self.always_apply else 1 + self.center = center + + def __call__(self, data_dict): + if random.random() > self.p: + return data_dict + angle = np.random.uniform(self.angle[0], self.angle[1]) * np.pi + rot_cos, rot_sin = np.cos(angle), np.sin(angle) + if self.axis == "x": + rot_t = np.array([[1, 0, 0], [0, rot_cos, -rot_sin], [0, rot_sin, rot_cos]]) + elif self.axis == "y": + rot_t = np.array([[rot_cos, 0, rot_sin], [0, 1, 0], [-rot_sin, 0, rot_cos]]) + elif self.axis == "z": + rot_t = np.array([[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0], [0, 0, 1]]) + else: + raise NotImplementedError + if "coord" in data_dict.keys(): + if self.center is None: + x_min, y_min, z_min = data_dict["coord"].min(axis=0) + x_max, y_max, z_max = data_dict["coord"].max(axis=0) + center = [(x_min + x_max) / 2, (y_min + y_max) / 2, (z_min + z_max) / 2] + else: + center = self.center + data_dict["coord"] -= center + data_dict["coord"] = np.dot(data_dict["coord"], np.transpose(rot_t)) + data_dict["coord"] += center + if "normal" in data_dict.keys(): + data_dict["normal"] = np.dot(data_dict["normal"], np.transpose(rot_t)) + return data_dict + + +@TRANSFORMS.register_module() +class RandomRotateTargetAngle(object): + def __init__( + self, angle=(1 / 2, 1, 3 / 2), center=None, axis="z", always_apply=False, p=0.75 + ): + self.angle = angle + self.axis = axis + self.always_apply = always_apply + self.p = p if not self.always_apply else 1 + self.center = center + + def __call__(self, data_dict): + if random.random() > self.p: + return data_dict + angle = np.random.choice(self.angle) * np.pi + rot_cos, rot_sin = np.cos(angle), np.sin(angle) + if self.axis == "x": + rot_t = np.array([[1, 0, 0], [0, rot_cos, -rot_sin], [0, rot_sin, rot_cos]]) + elif self.axis == "y": + rot_t = np.array([[rot_cos, 0, rot_sin], [0, 1, 0], [-rot_sin, 0, rot_cos]]) + elif self.axis == "z": + rot_t = np.array([[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0], [0, 0, 1]]) + else: + raise NotImplementedError + if "coord" in data_dict.keys(): + if self.center is None: + x_min, y_min, z_min = data_dict["coord"].min(axis=0) + x_max, y_max, z_max = data_dict["coord"].max(axis=0) + center = [(x_min + x_max) / 2, (y_min + y_max) / 2, (z_min + z_max) / 2] + else: + center = self.center + data_dict["coord"] -= center + data_dict["coord"] = np.dot(data_dict["coord"], np.transpose(rot_t)) + data_dict["coord"] += center + if "normal" in data_dict.keys(): + data_dict["normal"] = np.dot(data_dict["normal"], np.transpose(rot_t)) + return data_dict + + +@TRANSFORMS.register_module() +class RandomScale(object): + def __init__(self, scale=None, anisotropic=False): + self.scale = scale if scale is not None else [0.95, 1.05] + self.anisotropic = anisotropic + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + scale = np.random.uniform( + self.scale[0], self.scale[1], 3 if self.anisotropic else 1 + ) + data_dict["coord"] *= scale + return data_dict + + +@TRANSFORMS.register_module() +class RandomFlip(object): + def __init__(self, p=0.5): + self.p = p + + def __call__(self, data_dict): + if np.random.rand() < self.p: + if "coord" in data_dict.keys(): + data_dict["coord"][:, 0] = -data_dict["coord"][:, 0] + if "normal" in data_dict.keys(): + data_dict["normal"][:, 0] = -data_dict["normal"][:, 0] + if np.random.rand() < self.p: + if "coord" in data_dict.keys(): + data_dict["coord"][:, 1] = -data_dict["coord"][:, 1] + if "normal" in data_dict.keys(): + data_dict["normal"][:, 1] = -data_dict["normal"][:, 1] + return data_dict + + +@TRANSFORMS.register_module() +class RandomJitter(object): + def __init__(self, sigma=0.01, clip=0.05): + assert clip > 0 + self.sigma = sigma + self.clip = clip + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + jitter = np.clip( + self.sigma * np.random.randn(data_dict["coord"].shape[0], 3), + -self.clip, + self.clip, + ) + data_dict["coord"] += jitter + return data_dict + + +@TRANSFORMS.register_module() +class ClipGaussianJitter(object): + def __init__(self, scalar=0.02, store_jitter=False): + self.scalar = scalar + self.mean = np.mean(3) + self.cov = np.identity(3) + self.quantile = 1.96 + self.store_jitter = store_jitter + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + jitter = np.random.multivariate_normal( + self.mean, self.cov, data_dict["coord"].shape[0] + ) + jitter = self.scalar * np.clip(jitter / 1.96, -1, 1) + data_dict["coord"] += jitter + if self.store_jitter: + data_dict["jitter"] = jitter + return data_dict + + +@TRANSFORMS.register_module() +class ChromaticAutoContrast(object): + def __init__(self, p=0.2, blend_factor=None): + self.p = p + self.blend_factor = blend_factor + + def __call__(self, data_dict): + if "color" in data_dict.keys() and np.random.rand() < self.p: + lo = np.min(data_dict["color"], 0, keepdims=True) + hi = np.max(data_dict["color"], 0, keepdims=True) + scale = 255 / (hi - lo) + contrast_feat = (data_dict["color"][:, :3] - lo) * scale + blend_factor = ( + np.random.rand() if self.blend_factor is None else self.blend_factor + ) + data_dict["color"][:, :3] = (1 - blend_factor) * data_dict["color"][ + :, :3 + ] + blend_factor * contrast_feat + return data_dict + + +@TRANSFORMS.register_module() +class ChromaticTranslation(object): + def __init__(self, p=0.95, ratio=0.05): + self.p = p + self.ratio = ratio + + def __call__(self, data_dict): + if "color" in data_dict.keys() and np.random.rand() < self.p: + tr = (np.random.rand(1, 3) - 0.5) * 255 * 2 * self.ratio + data_dict["color"][:, :3] = np.clip(tr + data_dict["color"][:, :3], 0, 255) + return data_dict + + +@TRANSFORMS.register_module() +class ChromaticJitter(object): + def __init__(self, p=0.95, std=0.005): + self.p = p + self.std = std + + def __call__(self, data_dict): + if "color" in data_dict.keys() and np.random.rand() < self.p: + noise = np.random.randn(data_dict["color"].shape[0], 3) + noise *= self.std * 255 + data_dict["color"][:, :3] = np.clip( + noise + data_dict["color"][:, :3], 0, 255 + ) + return data_dict + + +@TRANSFORMS.register_module() +class RandomColorGrayScale(object): + def __init__(self, p): + self.p = p + + @staticmethod + def rgb_to_grayscale(color, num_output_channels=1): + if color.shape[-1] < 3: + raise TypeError( + "Input color should have at least 3 dimensions, but found {}".format( + color.shape[-1] + ) + ) + + if num_output_channels not in (1, 3): + raise ValueError("num_output_channels should be either 1 or 3") + + r, g, b = color[..., 0], color[..., 1], color[..., 2] + gray = (0.2989 * r + 0.587 * g + 0.114 * b).astype(color.dtype) + gray = np.expand_dims(gray, axis=-1) + + if num_output_channels == 3: + gray = np.broadcast_to(gray, color.shape) + + return gray + + def __call__(self, data_dict): + if np.random.rand() < self.p: + data_dict["color"] = self.rgb_to_grayscale(data_dict["color"], 3) + return data_dict + + +@TRANSFORMS.register_module() +class RandomColorJitter(object): + """ + Random Color Jitter for 3D point cloud (refer torchvision) + """ + + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, p=0.95): + self.brightness = self._check_input(brightness, "brightness") + self.contrast = self._check_input(contrast, "contrast") + self.saturation = self._check_input(saturation, "saturation") + self.hue = self._check_input( + hue, "hue", center=0, bound=(-0.5, 0.5), clip_first_on_zero=False + ) + self.p = p + + @staticmethod + def _check_input( + value, name, center=1, bound=(0, float("inf")), clip_first_on_zero=True + ): + if isinstance(value, numbers.Number): + if value < 0: + raise ValueError( + "If {} is a single number, it must be non negative.".format(name) + ) + value = [center - float(value), center + float(value)] + if clip_first_on_zero: + value[0] = max(value[0], 0.0) + elif isinstance(value, (tuple, list)) and len(value) == 2: + if not bound[0] <= value[0] <= value[1] <= bound[1]: + raise ValueError("{} values should be between {}".format(name, bound)) + else: + raise TypeError( + "{} should be a single number or a list/tuple with length 2.".format( + name + ) + ) + + # if value is 0 or (1., 1.) for brightness/contrast/saturation + # or (0., 0.) for hue, do nothing + if value[0] == value[1] == center: + value = None + return value + + @staticmethod + def blend(color1, color2, ratio): + ratio = float(ratio) + bound = 255.0 + return ( + (ratio * color1 + (1.0 - ratio) * color2) + .clip(0, bound) + .astype(color1.dtype) + ) + + @staticmethod + def rgb2hsv(rgb): + r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2] + maxc = np.max(rgb, axis=-1) + minc = np.min(rgb, axis=-1) + eqc = maxc == minc + cr = maxc - minc + s = cr / (np.ones_like(maxc) * eqc + maxc * (1 - eqc)) + cr_divisor = np.ones_like(maxc) * eqc + cr * (1 - eqc) + rc = (maxc - r) / cr_divisor + gc = (maxc - g) / cr_divisor + bc = (maxc - b) / cr_divisor + + hr = (maxc == r) * (bc - gc) + hg = ((maxc == g) & (maxc != r)) * (2.0 + rc - bc) + hb = ((maxc != g) & (maxc != r)) * (4.0 + gc - rc) + h = hr + hg + hb + h = (h / 6.0 + 1.0) % 1.0 + return np.stack((h, s, maxc), axis=-1) + + @staticmethod + def hsv2rgb(hsv): + h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2] + i = np.floor(h * 6.0) + f = (h * 6.0) - i + i = i.astype(np.int32) + + p = np.clip((v * (1.0 - s)), 0.0, 1.0) + q = np.clip((v * (1.0 - s * f)), 0.0, 1.0) + t = np.clip((v * (1.0 - s * (1.0 - f))), 0.0, 1.0) + i = i % 6 + mask = np.expand_dims(i, axis=-1) == np.arange(6) + + a1 = np.stack((v, q, p, p, t, v), axis=-1) + a2 = np.stack((t, v, v, q, p, p), axis=-1) + a3 = np.stack((p, p, t, v, v, q), axis=-1) + a4 = np.stack((a1, a2, a3), axis=-1) + + return np.einsum("...na, ...nab -> ...nb", mask.astype(hsv.dtype), a4) + + def adjust_brightness(self, color, brightness_factor): + if brightness_factor < 0: + raise ValueError( + "brightness_factor ({}) is not non-negative.".format(brightness_factor) + ) + + return self.blend(color, np.zeros_like(color), brightness_factor) + + def adjust_contrast(self, color, contrast_factor): + if contrast_factor < 0: + raise ValueError( + "contrast_factor ({}) is not non-negative.".format(contrast_factor) + ) + mean = np.mean(RandomColorGrayScale.rgb_to_grayscale(color)) + return self.blend(color, mean, contrast_factor) + + def adjust_saturation(self, color, saturation_factor): + if saturation_factor < 0: + raise ValueError( + "saturation_factor ({}) is not non-negative.".format(saturation_factor) + ) + gray = RandomColorGrayScale.rgb_to_grayscale(color) + return self.blend(color, gray, saturation_factor) + + def adjust_hue(self, color, hue_factor): + if not (-0.5 <= hue_factor <= 0.5): + raise ValueError( + "hue_factor ({}) is not in [-0.5, 0.5].".format(hue_factor) + ) + orig_dtype = color.dtype + hsv = self.rgb2hsv(color / 255.0) + h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2] + h = (h + hue_factor) % 1.0 + hsv = np.stack((h, s, v), axis=-1) + color_hue_adj = (self.hsv2rgb(hsv) * 255.0).astype(orig_dtype) + return color_hue_adj + + @staticmethod + def get_params(brightness, contrast, saturation, hue): + fn_idx = torch.randperm(4) + b = ( + None + if brightness is None + else np.random.uniform(brightness[0], brightness[1]) + ) + c = None if contrast is None else np.random.uniform(contrast[0], contrast[1]) + s = ( + None + if saturation is None + else np.random.uniform(saturation[0], saturation[1]) + ) + h = None if hue is None else np.random.uniform(hue[0], hue[1]) + return fn_idx, b, c, s, h + + def __call__(self, data_dict): + ( + fn_idx, + brightness_factor, + contrast_factor, + saturation_factor, + hue_factor, + ) = self.get_params(self.brightness, self.contrast, self.saturation, self.hue) + + for fn_id in fn_idx: + if ( + fn_id == 0 + and brightness_factor is not None + and np.random.rand() < self.p + ): + data_dict["color"] = self.adjust_brightness( + data_dict["color"], brightness_factor + ) + elif ( + fn_id == 1 and contrast_factor is not None and np.random.rand() < self.p + ): + data_dict["color"] = self.adjust_contrast( + data_dict["color"], contrast_factor + ) + elif ( + fn_id == 2 + and saturation_factor is not None + and np.random.rand() < self.p + ): + data_dict["color"] = self.adjust_saturation( + data_dict["color"], saturation_factor + ) + elif fn_id == 3 and hue_factor is not None and np.random.rand() < self.p: + data_dict["color"] = self.adjust_hue(data_dict["color"], hue_factor) + return data_dict + + +@TRANSFORMS.register_module() +class HueSaturationTranslation(object): + @staticmethod + def rgb_to_hsv(rgb): + # Translated from source of colorsys.rgb_to_hsv + # r,g,b should be a numpy arrays with values between 0 and 255 + # rgb_to_hsv returns an array of floats between 0.0 and 1.0. + rgb = rgb.astype("float") + hsv = np.zeros_like(rgb) + # in case an RGBA array was passed, just copy the A channel + hsv[..., 3:] = rgb[..., 3:] + r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2] + maxc = np.max(rgb[..., :3], axis=-1) + minc = np.min(rgb[..., :3], axis=-1) + hsv[..., 2] = maxc + mask = maxc != minc + hsv[mask, 1] = (maxc - minc)[mask] / maxc[mask] + rc = np.zeros_like(r) + gc = np.zeros_like(g) + bc = np.zeros_like(b) + rc[mask] = (maxc - r)[mask] / (maxc - minc)[mask] + gc[mask] = (maxc - g)[mask] / (maxc - minc)[mask] + bc[mask] = (maxc - b)[mask] / (maxc - minc)[mask] + hsv[..., 0] = np.select( + [r == maxc, g == maxc], [bc - gc, 2.0 + rc - bc], default=4.0 + gc - rc + ) + hsv[..., 0] = (hsv[..., 0] / 6.0) % 1.0 + return hsv + + @staticmethod + def hsv_to_rgb(hsv): + # Translated from source of colorsys.hsv_to_rgb + # h,s should be a numpy arrays with values between 0.0 and 1.0 + # v should be a numpy array with values between 0.0 and 255.0 + # hsv_to_rgb returns an array of uints between 0 and 255. + rgb = np.empty_like(hsv) + rgb[..., 3:] = hsv[..., 3:] + h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2] + i = (h * 6.0).astype("uint8") + f = (h * 6.0) - i + p = v * (1.0 - s) + q = v * (1.0 - s * f) + t = v * (1.0 - s * (1.0 - f)) + i = i % 6 + conditions = [s == 0.0, i == 1, i == 2, i == 3, i == 4, i == 5] + rgb[..., 0] = np.select(conditions, [v, q, p, p, t, v], default=v) + rgb[..., 1] = np.select(conditions, [v, v, v, q, p, p], default=t) + rgb[..., 2] = np.select(conditions, [v, p, t, v, v, q], default=p) + return rgb.astype("uint8") + + def __init__(self, hue_max=0.5, saturation_max=0.2): + self.hue_max = hue_max + self.saturation_max = saturation_max + + def __call__(self, data_dict): + if "color" in data_dict.keys(): + # Assume color[:, :3] is rgb + hsv = HueSaturationTranslation.rgb_to_hsv(data_dict["color"][:, :3]) + hue_val = (np.random.rand() - 0.5) * 2 * self.hue_max + sat_ratio = 1 + (np.random.rand() - 0.5) * 2 * self.saturation_max + hsv[..., 0] = np.remainder(hue_val + hsv[..., 0] + 1, 1) + hsv[..., 1] = np.clip(sat_ratio * hsv[..., 1], 0, 1) + data_dict["color"][:, :3] = np.clip( + HueSaturationTranslation.hsv_to_rgb(hsv), 0, 255 + ) + return data_dict + + +@TRANSFORMS.register_module() +class RandomColorDrop(object): + def __init__(self, p=0.2, color_augment=0.0): + self.p = p + self.color_augment = color_augment + + def __call__(self, data_dict): + if "color" in data_dict.keys() and np.random.rand() < self.p: + data_dict["color"] *= self.color_augment + return data_dict + + def __repr__(self): + return "RandomColorDrop(color_augment: {}, p: {})".format( + self.color_augment, self.p + ) + + +@TRANSFORMS.register_module() +class ElasticDistortion(object): + def __init__(self, distortion_params=None): + self.distortion_params = ( + [[0.2, 0.4], [0.8, 1.6]] if distortion_params is None else distortion_params + ) + + @staticmethod + def elastic_distortion(coords, granularity, magnitude): + """ + Apply elastic distortion on sparse coordinate space. + pointcloud: numpy array of (number of points, at least 3 spatial dims) + granularity: size of the noise grid (in same scale[m/cm] as the voxel grid) + magnitude: noise multiplier + """ + blurx = np.ones((3, 1, 1, 1)).astype("float32") / 3 + blury = np.ones((1, 3, 1, 1)).astype("float32") / 3 + blurz = np.ones((1, 1, 3, 1)).astype("float32") / 3 + coords_min = coords.min(0) + + # Create Gaussian noise tensor of the size given by granularity. + noise_dim = ((coords - coords_min).max(0) // granularity).astype(int) + 3 + noise = np.random.randn(*noise_dim, 3).astype(np.float32) + + # Smoothing. + for _ in range(2): + noise = scipy.ndimage.filters.convolve( + noise, blurx, mode="constant", cval=0 + ) + noise = scipy.ndimage.filters.convolve( + noise, blury, mode="constant", cval=0 + ) + noise = scipy.ndimage.filters.convolve( + noise, blurz, mode="constant", cval=0 + ) + + # Trilinear interpolate noise filters for each spatial dimensions. + ax = [ + np.linspace(d_min, d_max, d) + for d_min, d_max, d in zip( + coords_min - granularity, + coords_min + granularity * (noise_dim - 2), + noise_dim, + ) + ] + interp = scipy.interpolate.RegularGridInterpolator( + ax, noise, bounds_error=False, fill_value=0 + ) + coords += interp(coords) * magnitude + return coords + + def __call__(self, data_dict): + if "coord" in data_dict.keys() and self.distortion_params is not None: + if random.random() < 0.95: + for granularity, magnitude in self.distortion_params: + data_dict["coord"] = self.elastic_distortion( + data_dict["coord"], granularity, magnitude + ) + return data_dict + + +@TRANSFORMS.register_module() +class GridSample(object): + def __init__( + self, + grid_size=0.05, + hash_type="fnv", + mode="train", + return_inverse=False, + return_grid_coord=False, + return_min_coord=False, + return_displacement=False, + project_displacement=False, + ): + self.grid_size = grid_size + self.hash = self.fnv_hash_vec if hash_type == "fnv" else self.ravel_hash_vec + assert mode in ["train", "test"] + self.mode = mode + self.return_inverse = return_inverse + self.return_grid_coord = return_grid_coord + self.return_min_coord = return_min_coord + self.return_displacement = return_displacement + self.project_displacement = project_displacement + + def __call__(self, data_dict): + assert "coord" in data_dict.keys() + scaled_coord = data_dict["coord"] / np.array(self.grid_size) + grid_coord = np.floor(scaled_coord).astype(int) + min_coord = grid_coord.min(0) + grid_coord -= min_coord + scaled_coord -= min_coord + min_coord = min_coord * np.array(self.grid_size) + key = self.hash(grid_coord) + idx_sort = np.argsort(key) + key_sort = key[idx_sort] + _, inverse, count = np.unique(key_sort, return_inverse=True, return_counts=True) + if self.mode == "train": # train mode + idx_select = ( + np.cumsum(np.insert(count, 0, 0)[0:-1]) + + np.random.randint(0, count.max(), count.size) % count + ) + idx_unique = idx_sort[idx_select] + if "sampled_index" in data_dict: + # for ScanNet data efficient, we need to make sure labeled point is sampled. + idx_unique = np.unique( + np.append(idx_unique, data_dict["sampled_index"]) + ) + mask = np.zeros_like(data_dict["segment"]).astype(bool) + mask[data_dict["sampled_index"]] = True + data_dict["sampled_index"] = np.where(mask[idx_unique])[0] + data_dict = index_operator(data_dict, idx_unique) + if self.return_inverse: + data_dict["inverse"] = np.zeros_like(inverse) + data_dict["inverse"][idx_sort] = inverse + if self.return_grid_coord: + data_dict["grid_coord"] = grid_coord[idx_unique] + if "grid_coord" not in data_dict["index_valid_keys"]: + data_dict["index_valid_keys"].append("grid_coord") + if self.return_min_coord: + data_dict["min_coord"] = min_coord.reshape([1, 3]) + if self.return_displacement: + displacement = ( + scaled_coord - grid_coord - 0.5 + ) # [0, 1] -> [-0.5, 0.5] displacement to center + if self.project_displacement: + displacement = np.sum( + displacement * data_dict["normal"], axis=-1, keepdims=True + ) + data_dict["displacement"] = displacement[idx_unique] + if "displacement" not in data_dict["index_valid_keys"]: + data_dict["index_valid_keys"].append("displacement") + return data_dict + + elif self.mode == "test": # test mode + data_part_list = [] + for i in range(count.max()): + idx_select = np.cumsum(np.insert(count, 0, 0)[0:-1]) + i % count + idx_part = idx_sort[idx_select] + data_part = index_operator(data_dict, idx_part, duplicate=True) + data_part["index"] = idx_part + if self.return_inverse: + data_part["inverse"] = np.zeros_like(inverse) + data_part["inverse"][idx_sort] = inverse + if self.return_grid_coord: + data_part["grid_coord"] = grid_coord[idx_part] + if "grid_coord" not in data_part["index_valid_keys"]: + data_part["index_valid_keys"].append("grid_coord") + if self.return_min_coord: + data_part["min_coord"] = min_coord.reshape([1, 3]) + if self.return_displacement: + displacement = ( + scaled_coord - grid_coord - 0.5 + ) # [0, 1] -> [-0.5, 0.5] displacement to center + if self.project_displacement: + displacement = np.sum( + displacement * data_dict["normal"], axis=-1, keepdims=True + ) + data_part["displacement"] = displacement[idx_part] + if "displacement" not in data_part["index_valid_keys"]: + data_part["index_valid_keys"].append("displacement") + data_part_list.append(data_part) + return data_part_list + else: + raise NotImplementedError + + @staticmethod + def ravel_hash_vec(arr): + """ + Ravel the coordinates after subtracting the min coordinates. + """ + assert arr.ndim == 2 + arr = arr.copy() + arr -= arr.min(0) + arr = arr.astype(np.uint64, copy=False) + arr_max = arr.max(0).astype(np.uint64) + 1 + + keys = np.zeros(arr.shape[0], dtype=np.uint64) + # Fortran style indexing + for j in range(arr.shape[1] - 1): + keys += arr[:, j] + keys *= arr_max[j + 1] + keys += arr[:, -1] + return keys + + @staticmethod + def fnv_hash_vec(arr): + """ + FNV64-1A + """ + assert arr.ndim == 2 + # Floor first for negative coordinates + arr = arr.copy() + arr = arr.astype(np.uint64, copy=False) + hashed_arr = np.uint64(14695981039346656037) * np.ones( + arr.shape[0], dtype=np.uint64 + ) + for j in range(arr.shape[1]): + hashed_arr *= np.uint64(1099511628211) + hashed_arr = np.bitwise_xor(hashed_arr, arr[:, j]) + return hashed_arr + + +@TRANSFORMS.register_module() +class SphereCrop(object): + def __init__(self, point_max=80000, sample_rate=None, mode="random"): + self.point_max = point_max + self.sample_rate = sample_rate + assert mode in ["random", "center", "all", "given"] + self.mode = mode + + def __call__(self, data_dict): + point_max = ( + int(self.sample_rate * data_dict["coord"].shape[0]) + if self.sample_rate is not None + else self.point_max + ) + + assert "coord" in data_dict.keys() + if data_dict["coord"].shape[0] > point_max: + if self.mode == "random": + center = data_dict["coord"][ + np.random.randint(data_dict["coord"].shape[0]) + ] + elif self.mode == "center": + center = data_dict["coord"][data_dict["coord"].shape[0] // 2] + elif self.mode == "given": + given_index = data_dict["correspondence"].reshape( + data_dict["correspondence"].shape[0], -1 + ) + given_index = np.all( + given_index != np.ones_like(given_index[0]) * -1, axis=1 + ) + given_coord = data_dict["coord"][given_index] + if given_coord.shape[0] == 0: + center = data_dict["coord"][ + np.random.randint(data_dict["coord"].shape[0]) + ] + else: + center = np.mean(given_coord, axis=0) + else: + raise NotImplementedError + idx_crop = np.argsort(np.sum(np.square(data_dict["coord"] - center), 1))[ + :point_max + ] + data_dict = index_operator(data_dict, idx_crop) + return data_dict + + +@TRANSFORMS.register_module() +class ShufflePoint(object): + def __call__(self, data_dict): + assert "coord" in data_dict.keys() + shuffle_index = np.arange(data_dict["coord"].shape[0]) + np.random.shuffle(shuffle_index) + data_dict = index_operator(data_dict, shuffle_index) + return data_dict + + +@TRANSFORMS.register_module() +class PointMultipleSample(object): + """Sample points to ensure the total number is a multiple of a given value.""" + + def __init__(self, multiple=1024, mode="first"): + """ + Args: + multiple: Ensure point count is a multiple of this value (default: 1024) + mode: Sampling strategy - "random" or "first" (default: "random") + """ + self.multiple = multiple + assert mode in ["random", "first"] + self.mode = mode + + def __call__(self, data_dict): + assert "coord" in data_dict.keys() + num_points = data_dict["coord"].shape[0] + + # Calculate target number of points (floor to nearest multiple) + target_points = (num_points // self.multiple) * self.multiple + + if target_points > 0 and target_points != num_points: + if self.mode == "random": + # Randomly sample target_points + idx = np.random.choice(num_points, target_points, replace=False) + idx = np.sort(idx) # Optional: keep order + else: # mode == "first" + # Take first target_points + idx = np.arange(target_points) + + data_dict = index_operator(data_dict, idx) + + return data_dict + + +@TRANSFORMS.register_module() +class CropBoundary(object): + def __call__(self, data_dict): + assert "segment" in data_dict + segment = data_dict["segment"].flatten() + mask = (segment != 0) * (segment != 1) + data_dict = index_operator(data_dict, mask) + return data_dict + + +@TRANSFORMS.register_module() +class ContrastiveViewsGenerator(object): + def __init__( + self, + view_keys=("coord", "color", "normal", "origin_coord"), + view_trans_cfg=None, + ): + self.view_keys = view_keys + self.view_trans = Compose(view_trans_cfg) + + def __call__(self, data_dict): + view1_dict = dict() + view2_dict = dict() + for key in self.view_keys: + view1_dict[key] = data_dict[key].copy() + view2_dict[key] = data_dict[key].copy() + view1_dict = self.view_trans(view1_dict) + view2_dict = self.view_trans(view2_dict) + for key, value in view1_dict.items(): + data_dict["view1_" + key] = value + for key, value in view2_dict.items(): + data_dict["view2_" + key] = value + return data_dict + + +@TRANSFORMS.register_module() +class MultiViewGenerator(object): + def __init__( + self, + global_view_num=2, + global_view_scale=(0.4, 1.0), + local_view_num=4, + local_view_scale=(0.1, 0.4), + global_shared_transform=None, + global_transform=None, + local_transform=None, + max_size=65536, + enc2d_max_size=102400, + enc2d_scale=(0.8, 1), + center_height_scale=(0, 1), + shared_global_view=False, + view_keys=("coord", "origin_coord", "color", "normal", "correspondence"), + static_view_keys=("name", "img_num"), + ): + self.global_view_num = global_view_num + self.global_view_scale = global_view_scale + self.local_view_num = local_view_num + self.local_view_scale = local_view_scale + self.global_shared_transform = Compose(global_shared_transform) + self.global_transform = Compose(global_transform) + self.local_transform = Compose(local_transform) + self.max_size = max_size + self.enc2d_max_size = enc2d_max_size + self.enc2d_scale = enc2d_scale + self.center_height_scale = center_height_scale + self.shared_global_view = shared_global_view + self.view_keys = view_keys + self.static_view_keys = static_view_keys + assert "coord" in view_keys + + def get_view(self, point, center, scale, if_enc2d=False): + coord = point["coord"] + max_size = min(self.max_size, coord.shape[0]) + enc2d_max_size = min(self.enc2d_max_size, coord.shape[0]) + size = 0 + for _ in range(10): + if if_enc2d: + size = enc2d_max_size + else: + size = int(np.random.uniform(*scale) * max_size) + if size > 0: + break + if size == 0: + size = max(10, scale[-1] * max_size) + assert size > 0 + index = np.argsort(np.sum(np.square(coord - center), axis=-1))[:size] + view = dict(index=index) + for key in point.keys(): + if key in self.view_keys: + view[key] = point[key][index] + if key in self.static_view_keys: + view[key] = point[key] + if "index_valid_keys" in point.keys(): + # inherit index_valid_keys from point + view["index_valid_keys"] = point["index_valid_keys"] + return view + + @staticmethod + def match_point_image(major_view, data_dict): + major_correspondence = major_view["correspondence"].transpose(1, 0, 2) + correspondence = data_dict["correspondence"].transpose(1, 0, 2) + is_all_neg1 = np.any(major_correspondence != np.array([-1, -1]), axis=(1, 2)) + indices = np.where(is_all_neg1)[0] + img_dict = { + "images": data_dict["images"][indices], + "img_num": indices.shape[0], + "major_correspondence": major_correspondence[indices].transpose(1, 0, 2), + "correspondence": correspondence[indices].transpose(1, 0, 2), + } + return img_dict + + def __call__(self, data_dict): + coord = data_dict["coord"] + point = self.global_shared_transform(copy.deepcopy(data_dict)) + z_min = coord[:, 2].min() + z_max = coord[:, 2].max() + z_min_ = z_min + (z_max - z_min) * self.center_height_scale[0] + z_max_ = z_min + (z_max - z_min) * self.center_height_scale[1] + if "correspondence" not in data_dict.keys(): + center_mask = np.logical_and(coord[:, 2] >= z_min_, coord[:, 2] <= z_max_) + major_center = coord[np.random.choice(np.where(center_mask)[0])] + major_view = self.get_view(point, major_center, self.global_view_scale) + else: + given_index = data_dict["correspondence"].reshape( + data_dict["correspondence"].shape[0], -1 + ) + given_index = np.all( + given_index != np.ones_like(given_index[0]) * -1, axis=1 + ) + given_coord = data_dict["coord"][given_index] + if given_coord.shape[0] == 0: + center_mask = np.logical_and( + coord[:, 2] >= z_min_, coord[:, 2] <= z_max_ + ) + major_center = coord[np.random.choice(np.where(center_mask)[0])] + else: + major_center = np.mean(given_coord, axis=0) + major_view = self.get_view( + point, major_center, self.global_view_scale, if_enc2d=True + ) + img_dict = self.match_point_image(major_view, data_dict) + major_view["correspondence"] = img_dict["major_correspondence"] + data_dict["correspondence"] = img_dict["correspondence"] + point["correspondence"] = img_dict["correspondence"] + data_dict["img_num"] = img_dict["img_num"] + data_dict["images"] = img_dict["images"] + major_coord = major_view["coord"] + + # get global views: restrict the center of left global view within the major global view + if not self.shared_global_view: + global_views = [ + self.get_view( + point=point, + center=major_coord[np.random.randint(major_coord.shape[0])], + scale=self.global_view_scale, + ) + for _ in range(self.global_view_num - 1) + ] + else: + global_views = [ + {key: value.copy() for key, value in major_view.items()} + for _ in range(self.global_view_num - 1) + ] + + global_views = [major_view] + global_views + + # get local views: restrict the center of local view within the major global view + cover_mask = np.zeros_like(major_view["index"], dtype=bool) + local_views = [] + for i in range(self.local_view_num): + if sum(~cover_mask) == 0: + # reset cover mask if all points are sampled + cover_mask[:] = False + local_view = self.get_view( + point=data_dict, + center=major_coord[np.random.choice(np.where(~cover_mask)[0])], + scale=self.local_view_scale, + ) + local_views.append(local_view) + cover_mask[np.isin(major_view["index"], local_view["index"])] = True + + # augmentation and concat + view_dict = {} + for global_view in global_views: + global_view.pop("index") + global_view = self.global_transform(global_view) + for key in self.view_keys: + if f"global_{key}" in view_dict.keys(): + view_dict[f"global_{key}"].append(global_view[key]) + else: + view_dict[f"global_{key}"] = [global_view[key]] + view_dict["global_offset"] = np.cumsum( + [data.shape[0] for data in view_dict["global_coord"]] + ) + for local_view in local_views: + local_view.pop("index") + local_view = self.local_transform(local_view) + for key in self.view_keys: + if f"local_{key}" in view_dict.keys(): + view_dict[f"local_{key}"].append(local_view[key]) + else: + view_dict[f"local_{key}"] = [local_view[key]] + view_dict["local_offset"] = np.cumsum( + [data.shape[0] for data in view_dict["local_coord"]] + ) + + for key in view_dict.keys(): + if "offset" not in key: + if key in self.static_view_keys: + view_dict[key] = view_dict[key] + else: + view_dict[key] = np.concatenate(view_dict[key], axis=0) + data_dict.update(view_dict) + return data_dict + + +@TRANSFORMS.register_module() +class InstanceParser(object): + def __init__(self, segment_ignore_index=(-1, 0, 1), instance_ignore_index=-1): + self.segment_ignore_index = segment_ignore_index + self.instance_ignore_index = instance_ignore_index + + def __call__(self, data_dict): + coord = data_dict["coord"] + segment = data_dict["segment"] + instance = data_dict["instance"] + mask = ~np.in1d(segment, self.segment_ignore_index) + # mapping ignored instance to ignore index + instance[~mask] = self.instance_ignore_index + # reorder left instance + unique, inverse = np.unique(instance[mask], return_inverse=True) + instance_num = len(unique) + instance[mask] = inverse + # init instance information + centroid = np.ones((coord.shape[0], 3)) * self.instance_ignore_index + bbox = np.ones((instance_num, 8)) * self.instance_ignore_index + vacancy = [ + index for index in self.segment_ignore_index if index >= 0 + ] # vacate class index + + for instance_id in range(instance_num): + mask_ = instance == instance_id + coord_ = coord[mask_] + bbox_min = coord_.min(0) + bbox_max = coord_.max(0) + bbox_centroid = coord_.mean(0) + bbox_center = (bbox_max + bbox_min) / 2 + bbox_size = bbox_max - bbox_min + bbox_theta = np.zeros(1, dtype=coord_.dtype) + bbox_class = np.array([segment[mask_][0]], dtype=coord_.dtype) + # shift class index to fill vacate class index caused by segment ignore index + bbox_class -= np.greater(bbox_class, vacancy).sum() + + centroid[mask_] = bbox_centroid + bbox[instance_id] = np.concatenate( + [bbox_center, bbox_size, bbox_theta, bbox_class] + ) # 3 + 3 + 1 + 1 = 8 + data_dict["instance"] = instance + data_dict["instance_centroid"] = centroid + data_dict["bbox"] = bbox + return data_dict + + +class Compose(object): + def __init__(self, cfg=None): + self.cfg = cfg if cfg is not None else [] + self.transforms = [] + for t_cfg in self.cfg: + self.transforms.append(TRANSFORMS.build(t_cfg)) + + def __call__(self, data_dict): + for t in self.transforms: + data_dict = t(data_dict) + return data_dict + + +@TRANSFORMS.register_module() +class ImgToTensor(object): + def __init__(self): + self.totensor = transforms.ToTensor() + + def __call__(self, img): + return self.totensor(img) + + +@TRANSFORMS.register_module() +class ImgGaussianBlur(object): + """ + Apply Gaussian Blur to the PIL image. + """ + + def __init__( + self, *, p: float = 0.5, radius_min: float = 0.1, radius_max: float = 2.0 + ): + # NOTE: torchvision is applying 1 - probability to return the original image + self.p = p + self.transform = transforms.GaussianBlur( + kernel_size=9, sigma=(radius_min, radius_max) + ) + super().__init__() + + def __call__(self, img): + if np.random.rand() < self.p: + img = self.transform(img) + return img + + +@TRANSFORMS.register_module() +class ImgChromaticJitter(object): + def __init__(self, p=0.95, std=0.005): + self.p = p + self.std = std + + def __call__(self, img): + if np.random.rand() < self.p: + noise = torch.rand(3) + noise *= self.std + noise = noise[:, None, None].expand_as(img) + img += noise + img = torch.clip(img, 0, 1) + return img + + +@TRANSFORMS.register_module() +class ImgPixelContrast(object): + def __init__(self, threshold, p=0.2): + super().__init__() + self.p = p + self.threshold = threshold + + def __call__(self, img): + if np.random.rand() < self.p: + n, h, w = img.shape[0], img.shape[2], img.shape[3] + num_pixels = int(self.threshold * h * w * n) + indices = torch.randint(0, n * h * w, (num_pixels,)) + img = img.permute(0, 2, 3, 1).reshape(-1, 3) + img[indices, :] = 255.0 - img[indices, :] + img = img.reshape(n, h, w, 3).permute(0, 3, 1, 2) + return img + + +IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406) +IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225) + + +@TRANSFORMS.register_module() +class Imgnormalize(object): + def __init__(self, mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD): + super().__init__() + self.normalize = transforms.Normalize(mean=mean, std=std) + + def __call__(self, img): + return self.normalize(img) + + +@TRANSFORMS.register_module() +class ImgRandomHorizontalFlip(object): + def __init__(self, p=0.5): + super().__init__() + self.p = p + self.imgrandomhorizontalflip = transforms.RandomHorizontalFlip(p=p) + + def __call__(self, img): + return self.imgrandomhorizontalflip(img) + + +@TRANSFORMS.register_module() +class ImgRandomResizedCrop(object): + def __init__(self, size, scale, interpolation): + super().__init__() + self.imgrandomresizedcrop = transforms.RandomResizedCrop( + size=size, scale=scale, interpolation=interpolation + ) + + def __call__(self, img): + return self.imgrandomresizedcrop(img) + + +@TRANSFORMS.register_module() +class ImgRandomColorJitter(object): + def __init__(self, brightness=0.4, contrast=0.4, saturation=0.2, hue=0.1, p=0.8): + colorjitter = transforms.ColorJitter( + brightness=brightness, contrast=contrast, saturation=saturation, hue=hue + ) + super().__init__() + self.p = p + self.colorjitter = colorjitter + + def __call__(self, img): + return self.colorjitter(img) + + +@TRANSFORMS.register_module() +class ImgRandomGrayscale(object): + def __init__(self, p=0.1): + super().__init__() + self.p = p + self.imgrandomgrayscale = transforms.RandomGrayscale(p=p) + + def __call__(self, img): + return self.imgrandomgrayscale(img) + + +@TRANSFORMS.register_module() +class ImgRandomSolarize(object): + def __init__(self, threshold, p=0.1): + super().__init__() + self.p = p + self.imgrandomsolarize = transforms.RandomSolarize(threshold=threshold, p=p) + + def __call__(self, img): + return self.imgrandomsolarize(img) + + +@TRANSFORMS.register_module() +class ImgAugmentation(object): + def __init__( + self, + imgtransforms, + crop_h=518, + crop_w=518, + patch_h=37, + patch_w=37, + patch_size=14, + ): + self.transforms = [] + self.transforms_cfg = imgtransforms + for t_cfg in self.transforms_cfg: + self.transforms.append(TRANSFORMS.build(t_cfg)) + self.crop_h = crop_h + self.crop_w = crop_w + self.patch_h = patch_h + self.patch_w = patch_w + self.patch_size = patch_size + self.crop_start = [ + random.randint(0, patch_h * patch_size - crop_h), + random.randint(0, patch_w * patch_size - crop_w), + ] + + def __call__(self, point): + point["images"] = transforms.functional.crop( + point["images"], + top=self.crop_start[0], + left=self.crop_start[1], + height=self.crop_h, + width=self.crop_w, + ) + for id, t in enumerate(self.transforms): + point["images"] = t(point["images"]) + correspondence = point["correspondence"] + correspondence_shape = correspondence.shape + correspondence = correspondence.reshape(-1, 2) + mask = ( + (self.crop_start[0] <= correspondence[:, 0]) + & (correspondence[:, 0] < self.crop_start[0] + self.crop_h) + & (self.crop_start[1] <= correspondence[:, 1]) + & (correspondence[:, 1] < self.crop_start[1] + self.crop_w) + ) + correspondence[~mask] = np.array([-1, -1]) + correspondence[mask] -= np.array(self.crop_start) + point["correspondence"] = correspondence.reshape(correspondence_shape) + return point diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/utils.py b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/utils.py new file mode 100644 index 0000000..89e4247 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/datasets/utils.py @@ -0,0 +1,140 @@ +""" +Utils for Datasets + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import random +from collections.abc import Mapping, Sequence +import numpy as np +import torch +from torch.utils.data.dataloader import default_collate +import torch.nn.functional as F + + +def collate_fn(batch): + """ + collate function for point cloud which support dict and list, + 'coord' is necessary to determine 'offset' + """ + if not isinstance(batch, Sequence): + raise TypeError(f"{batch.dtype} is not supported.") + + if isinstance(batch[0], torch.Tensor): + return torch.cat(list(batch)) + elif isinstance(batch[0], str): + # str is also a kind of Sequence, judgement should before Sequence + return list(batch) + elif isinstance(batch[0], Sequence): + for data in batch: + data.append(torch.tensor([data[0].shape[0]])) + batch = [collate_fn(samples) for samples in zip(*batch)] + batch[-1] = torch.cumsum(batch[-1], dim=0).int() + return batch + elif isinstance(batch[0], Mapping): + if "img_num" in batch[0].keys(): + max_img_num = max([d["img_num"] for d in batch]) + batch = { + key: ( + ( + collate_fn([d[key] for d in batch]) + if "offset" not in key + # offset -> bincount -> concat bincount-> concat offset + else torch.cumsum( + collate_fn( + [d[key].diff(prepend=torch.tensor([0])) for d in batch] + ), + dim=0, + ) + ) + if "correspondence" not in key + else collate_fn( + [ + F.pad( + d[key].permute(0, 2, 1), + (0, max_img_num - d[key].shape[1]), + value=-1, + ).permute(0, 2, 1) + for d in batch + ] + ) + ) + for key in batch[0] + } + return batch + else: + return default_collate(batch) + + +def point_collate_fn(batch, mix_prob=0): + assert isinstance( + batch[0], Mapping + ) # currently, only support input_dict, rather than input_list + batch = collate_fn(batch) + if random.random() < mix_prob: + if "instance" in batch.keys(): + offset = batch["offset"] + start = 0 + num_instance = 0 + for i in range(len(offset)): + if i % 2 == 0: + num_instance = max(batch["instance"][start : offset[i]]) + if i % 2 != 0: + mask = batch["instance"][start : offset[i]] != -1 + batch["instance"][start : offset[i]] += num_instance * mask + start = offset[i] + offset_assets = [asset for asset in batch.keys() if "offset" in asset] + for offset_asset in offset_assets: + batch[offset_asset] = torch.cat( + [batch[offset_asset][1:-1:2], batch[offset_asset][-1].unsqueeze(0)], + dim=0, + ) + if "img_num" in batch.keys(): + n = batch["img_num"].shape[0] + num_pairs = n // 2 + len_pairs = num_pairs * 2 + pairs_tensor = batch["img_num"][:len_pairs] + + if num_pairs == 0: + pass + else: + summed_pairs = pairs_tensor.view(-1, 2).sum(dim=1) + if n % 2 != 0: + last_element = batch["img_num"][-1:] + result = torch.cat((summed_pairs, last_element)) + else: + result = summed_pairs + batch["img_num"] = result + correspondence_assets = [ + asset for asset in batch.keys() if "correspondence" in asset + ] + for correspondence_asset in correspondence_assets: + offset = batch["offset"] + start = 0 + N, v, n = batch[correspondence_asset].shape + v2 = v * 2 + batch_correspondence_mix = -torch.ones((N, v2, n)) + for i in range(len(offset)): + if i % 2 == 0: + batch_correspondence_mix[start : offset[i], 0:v] = batch[ + correspondence_asset + ][start : offset[i], 0:v] + if i % 2 != 0: + batch_correspondence_mix[start : offset[i], v:] = batch[ + correspondence_asset + ][start : offset[i], 0:v] + start = offset[i] + if len(offset) % 2 == 0: + pass + else: + start = 0 if len(offset) == 1 else offset[-2] + batch_correspondence_mix[start:N, -v:] = batch[correspondence_asset][ + start:N, -v: + ] + batch[correspondence_asset] = batch_correspondence_mix + return batch + + +def gaussian_kernel(dist2: np.array, a: float = 1, c: float = 5): + return a * np.exp(-dist2 / (2 * c**2)) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/engines/__init__.py b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/engines/defaults.py b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/defaults.py new file mode 100644 index 0000000..6091e70 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/defaults.py @@ -0,0 +1,152 @@ +""" +Default training/testing logic + +modified from detectron2(https://github.com/facebookresearch/detectron2) + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import sys +import argparse +import multiprocessing as mp +from torch.nn.parallel import DistributedDataParallel + + +import pointcept.utils.comm as comm +from pointcept.utils.env import get_random_seed, set_seed +from pointcept.utils.config import Config, DictAction + + +def create_ddp_model(model, *, fp16_compression=False, **kwargs): + """ + Create a DistributedDataParallel model if there are >1 processes. + Args: + model: a torch.nn.Module + fp16_compression: add fp16 compression hooks to the ddp object. + See more at https://pytorch.org/docs/stable/ddp_comm_hooks.html#torch.distributed.algorithms.ddp_comm_hooks.default_hooks.fp16_compress_hook + kwargs: other arguments of :module:`torch.nn.parallel.DistributedDataParallel`. + """ + if comm.get_world_size() == 1: + return model + # kwargs['find_unused_parameters'] = True + if "device_ids" not in kwargs: + kwargs["device_ids"] = [comm.get_local_rank()] + if "output_device" not in kwargs: + kwargs["output_device"] = [comm.get_local_rank()] + ddp = DistributedDataParallel(model, **kwargs) + if fp16_compression: + from torch.distributed.algorithms.ddp_comm_hooks import default as comm_hooks + + ddp.register_comm_hook(state=None, hook=comm_hooks.fp16_compress_hook) + return ddp + + +def worker_init_fn(worker_id, num_workers, rank, seed): + """Worker init func for dataloader. + + The seed of each worker equals to num_worker * rank + worker_id + user_seed + + Args: + worker_id (int): Worker id. + num_workers (int): Number of workers. + rank (int): The rank of current process. + seed (int): The random seed to use. + """ + + worker_seed = None if seed is None else num_workers * rank + worker_id + seed + set_seed(worker_seed) + + +def default_argument_parser(epilog=None): + parser = argparse.ArgumentParser( + epilog=epilog + or f""" + Examples: + Run on single machine: + $ {sys.argv[0]} --num-gpus 8 --config-file cfg.yaml + Change some config options: + $ {sys.argv[0]} --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth SOLVER.BASE_LR 0.001 + Run on multiple machines: + (machine0)$ {sys.argv[0]} --machine-rank 0 --num-machines 2 --dist-url [--other-flags] + (machine1)$ {sys.argv[0]} --machine-rank 1 --num-machines 2 --dist-url [--other-flags] + """, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--config-file", default="", metavar="FILE", help="path to config file" + ) + parser.add_argument( + "--num-gpus", type=int, default=1, help="number of gpus *per machine*" + ) + parser.add_argument( + "--num-machines", type=int, default=1, help="total number of machines" + ) + parser.add_argument( + "--machine-rank", + type=int, + default=0, + help="the rank of this machine (unique per machine)", + ) + # PyTorch still may leave orphan processes in multi-gpu training. + # Therefore we use a deterministic way to obtain port, + # so that users are aware of orphan processes by seeing the port occupied. + # port = 2 ** 15 + 2 ** 14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2 ** 14 + parser.add_argument( + "--dist-url", + # default="tcp://127.0.0.1:{}".format(port), + default="auto", + help="initialization URL for pytorch distributed backend. See " + "https://pytorch.org/docs/stable/distributed.html for details.", + ) + parser.add_argument( + "--options", nargs="+", action=DictAction, help="custom options" + ) + return parser + + +def default_config_parser(file_path, options): + # config name protocol: dataset_name/model_name-exp_name + if os.path.isfile(file_path): + cfg = Config.fromfile(file_path) + else: + sep = file_path.find("-") + cfg = Config.fromfile(os.path.join(file_path[:sep], file_path[sep + 1 :])) + + if options is not None: + cfg.merge_from_dict(options) + + if cfg.seed is None: + cfg.seed = get_random_seed() + + cfg.data.train.loop = cfg.epoch // cfg.eval_epoch + + os.makedirs(os.path.join(cfg.save_path, "model"), exist_ok=True) + if not cfg.resume: + cfg.dump(os.path.join(cfg.save_path, "config.py")) + return cfg + + +def default_setup(cfg): + # scalar by world size + world_size = comm.get_world_size() + cfg.num_worker = cfg.num_worker if cfg.num_worker is not None else mp.cpu_count() + cfg.num_worker_per_gpu = cfg.num_worker // world_size + assert cfg.batch_size % world_size == 0 + assert cfg.batch_size_val is None or cfg.batch_size_val % world_size == 0 + assert cfg.batch_size_test is None or cfg.batch_size_test % world_size == 0 + cfg.batch_size_per_gpu = cfg.batch_size // world_size + cfg.batch_size_val_per_gpu = ( + cfg.batch_size_val // world_size if cfg.batch_size_val is not None else 1 + ) + cfg.batch_size_test_per_gpu = ( + cfg.batch_size_test // world_size if cfg.batch_size_test is not None else 1 + ) + # update data loop + assert cfg.epoch % cfg.eval_epoch == 0 + # settle random seed + rank = comm.get_rank() + seed = None if cfg.seed is None else cfg.seed + rank * cfg.num_worker_per_gpu + set_seed(seed) + return cfg diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/engines/hooks/__init__.py b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/hooks/__init__.py new file mode 100644 index 0000000..41c0320 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/hooks/__init__.py @@ -0,0 +1,5 @@ +from .default import * +from .misc import * +from .evaluator import * + +from .builder import build_hooks diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/engines/hooks/builder.py b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/hooks/builder.py new file mode 100644 index 0000000..2f4cce4 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/hooks/builder.py @@ -0,0 +1,18 @@ +""" +Hook Builder + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.utils.registry import Registry + + +HOOKS = Registry("hooks") + + +def build_hooks(cfg): + hooks = [] + for hook_cfg in cfg: + hooks.append(HOOKS.build(hook_cfg)) + return hooks diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/engines/hooks/default.py b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/hooks/default.py new file mode 100644 index 0000000..47f2aa1 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/hooks/default.py @@ -0,0 +1,66 @@ +""" +Default Hook + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import pointcept.utils.comm as comm +import weakref +from .builder import HOOKS + + +class HookBase: + """ + Base class for hooks that can be registered with :class:`TrainerBase`. + """ + + trainer = None # A weak reference to the trainer object. + + def before_train(self): + pass + + def before_epoch(self): + pass + + def before_step(self): + pass + + def after_step(self): + pass + + def after_epoch(self): + pass + + def after_train(self): + pass + + +@HOOKS.register_module() +class ModelHook(HookBase): + def before_train(self): + if comm.get_world_size() > 1 and isinstance( + self.trainer.model.module, HookBase + ): + self.model = weakref.proxy(self.trainer.model.module) + elif isinstance(self.trainer.model, HookBase): + self.model = weakref.proxy(self.trainer.model) + else: + self.model = HookBase() + self.model.trainer = self.trainer + self.model.before_train() + + def before_epoch(self): + self.model.before_epoch() + + def before_step(self): + self.model.before_step() + + def after_step(self): + self.model.after_step() + + def after_epoch(self): + self.model.after_epoch() + + def after_train(self): + self.model.after_train() diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/engines/hooks/evaluator.py b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/hooks/evaluator.py new file mode 100644 index 0000000..55b19d0 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/hooks/evaluator.py @@ -0,0 +1,243 @@ +""" +Evaluate Hook + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import numpy as np +import wandb +import torch +import torch.distributed as dist +from uuid import uuid4 + +import pointcept.utils.comm as comm +from pointcept.utils.misc import intersection_and_union_gpu + +from .default import HookBase +from .builder import HOOKS + + +@HOOKS.register_module() +class ClsEvaluator(HookBase): + def after_epoch(self): + if self.trainer.cfg.evaluate: + self.eval() + + def eval(self): + self.trainer.logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + self.trainer.model.eval() + for i, input_dict in enumerate(self.trainer.val_loader): + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with torch.no_grad(): + output_dict = self.trainer.model(input_dict) + output = output_dict["cls_logits"] + loss = output_dict["loss"] + pred = output.max(1)[1] + label = input_dict["category"] + intersection, union, target = intersection_and_union_gpu( + pred, + label, + self.trainer.cfg.data.num_classes, + self.trainer.cfg.data.ignore_index, + ) + if comm.get_world_size() > 1: + dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce( + target + ) + intersection, union, target = ( + intersection.cpu().numpy(), + union.cpu().numpy(), + target.cpu().numpy(), + ) + # Here there is no need to sync since sync happened in dist.all_reduce + self.trainer.storage.put_scalar("val_intersection", intersection) + self.trainer.storage.put_scalar("val_union", union) + self.trainer.storage.put_scalar("val_target", target) + self.trainer.storage.put_scalar("val_loss", loss.item()) + self.trainer.logger.info( + "Test: [{iter}/{max_iter}] " + "Loss {loss:.4f} ".format( + iter=i + 1, max_iter=len(self.trainer.val_loader), loss=loss.item() + ) + ) + loss_avg = self.trainer.storage.history("val_loss").avg + intersection = self.trainer.storage.history("val_intersection").total + union = self.trainer.storage.history("val_union").total + target = self.trainer.storage.history("val_target").total + iou_class = intersection / (union + 1e-10) + acc_class = intersection / (target + 1e-10) + m_iou = np.mean(iou_class) + m_acc = np.mean(acc_class) + all_acc = sum(intersection) / (sum(target) + 1e-10) + self.trainer.logger.info( + "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.".format( + m_iou, m_acc, all_acc + ) + ) + for i in range(self.trainer.cfg.data.num_classes): + self.trainer.logger.info( + "Class_{idx}-{name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( + idx=i, + name=self.trainer.cfg.data.names[i], + iou=iou_class[i], + accuracy=acc_class[i], + ) + ) + current_epoch = self.trainer.epoch + 1 + if self.trainer.writer is not None: + self.trainer.writer.add_scalar("val/loss", loss_avg, current_epoch) + self.trainer.writer.add_scalar("val/mIoU", m_iou, current_epoch) + self.trainer.writer.add_scalar("val/mAcc", m_acc, current_epoch) + self.trainer.writer.add_scalar("val/allAcc", all_acc, current_epoch) + if self.trainer.cfg.enable_wandb: + wandb.log( + { + "Epoch": current_epoch, + "val/loss": loss_avg, + "val/mIoU": m_iou, + "val/mAcc": m_acc, + "val/allAcc": all_acc, + }, + step=wandb.run.step, + ) + self.trainer.logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + self.trainer.comm_info["current_metric_value"] = all_acc # save for saver + self.trainer.comm_info["current_metric_name"] = "allAcc" # save for saver + + def after_train(self): + self.trainer.logger.info( + "Best {}: {:.4f}".format("allAcc", self.trainer.best_metric_value) + ) + + +@HOOKS.register_module() +class SemSegEvaluator(HookBase): + def __init__(self, write_cls_iou=False): + self.write_cls_iou = write_cls_iou + + def before_train(self): + if self.trainer.writer is not None and self.trainer.cfg.enable_wandb: + wandb.define_metric("val/*", step_metric="Epoch") + + def after_epoch(self): + if self.trainer.cfg.evaluate: + self.eval() + + def eval(self): + self.trainer.logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + self.trainer.model.eval() + for i, input_dict in enumerate(self.trainer.val_loader): + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with torch.no_grad(): + output_dict = self.trainer.model(input_dict) + output = output_dict["seg_logits"] + loss = output_dict["loss"] + pred = output.max(1)[1] + segment = input_dict["segment"] + if "inverse" in input_dict.keys(): + assert "origin_segment" in input_dict.keys() + pred = pred[input_dict["inverse"]] + segment = input_dict["origin_segment"] + intersection, union, target = intersection_and_union_gpu( + pred, + segment, + self.trainer.cfg.data.num_classes, + self.trainer.cfg.data.ignore_index, + ) + if comm.get_world_size() > 1: + dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce( + target + ) + intersection, union, target = ( + intersection.cpu().numpy(), + union.cpu().numpy(), + target.cpu().numpy(), + ) + # Here there is no need to sync since sync happened in dist.all_reduce + self.trainer.storage.put_scalar("val_intersection", intersection) + self.trainer.storage.put_scalar("val_union", union) + self.trainer.storage.put_scalar("val_target", target) + self.trainer.storage.put_scalar("val_loss", loss.item()) + info = "Test: [{iter}/{max_iter}] ".format( + iter=i + 1, max_iter=len(self.trainer.val_loader) + ) + if "origin_coord" in input_dict.keys(): + info = "Interp. " + info + self.trainer.logger.info( + info + + "Loss {loss:.4f} ".format( + iter=i + 1, max_iter=len(self.trainer.val_loader), loss=loss.item() + ) + ) + loss_avg = self.trainer.storage.history("val_loss").avg + intersection = self.trainer.storage.history("val_intersection").total + union = self.trainer.storage.history("val_union").total + target = self.trainer.storage.history("val_target").total + iou_class = intersection / (union + 1e-10) + acc_class = intersection / (target + 1e-10) + m_iou = np.mean(iou_class) + m_acc = np.mean(acc_class) + all_acc = sum(intersection) / (sum(target) + 1e-10) + self.trainer.logger.info( + "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.".format( + m_iou, m_acc, all_acc + ) + ) + for i in range(self.trainer.cfg.data.num_classes): + self.trainer.logger.info( + "Class_{idx}-{name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( + idx=i, + name=self.trainer.cfg.data.names[i], + iou=iou_class[i], + accuracy=acc_class[i], + ) + ) + current_epoch = self.trainer.epoch + 1 + if self.trainer.writer is not None: + self.trainer.writer.add_scalar("val/loss", loss_avg, current_epoch) + self.trainer.writer.add_scalar("val/mIoU", m_iou, current_epoch) + self.trainer.writer.add_scalar("val/mAcc", m_acc, current_epoch) + self.trainer.writer.add_scalar("val/allAcc", all_acc, current_epoch) + if self.trainer.cfg.enable_wandb: + wandb.log( + { + "Epoch": current_epoch, + "val/loss": loss_avg, + "val/mIoU": m_iou, + "val/mAcc": m_acc, + "val/allAcc": all_acc, + }, + step=wandb.run.step, + ) + if self.write_cls_iou: + for i in range(self.trainer.cfg.data.num_classes): + self.trainer.writer.add_scalar( + f"val/cls_{i}-{self.trainer.cfg.data.names[i]} IoU", + iou_class[i], + current_epoch, + ) + if self.trainer.cfg.enable_wandb: + for i in range(self.trainer.cfg.data.num_classes): + wandb.log( + { + "Epoch": current_epoch, + f"val/cls_{i}-{self.trainer.cfg.data.names[i]} IoU": iou_class[ + i + ], + }, + step=wandb.run.step, + ) + self.trainer.logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + self.trainer.comm_info["current_metric_value"] = m_iou # save for saver + self.trainer.comm_info["current_metric_name"] = "mIoU" # save for saver + + def after_train(self): + self.trainer.logger.info( + "Best {}: {:.4f}".format("mIoU", self.trainer.best_metric_value) + ) + diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/engines/hooks/misc.py b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/hooks/misc.py new file mode 100644 index 0000000..28d9682 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/hooks/misc.py @@ -0,0 +1,553 @@ +""" +Misc Hook + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import sys +import glob +import os +import shutil +import time +import gc +import wandb +import torch +import torch.utils.data +from collections import OrderedDict + +if sys.version_info >= (3, 10): + from collections.abc import Sequence +else: + from collections import Sequence +from pointcept.utils.timer import Timer +from pointcept.utils.comm import is_main_process, synchronize +from pointcept.utils.cache import shared_dict +from pointcept.utils.scheduler import CosineScheduler +import pointcept.utils.comm as comm + +from .default import HookBase +from .builder import HOOKS + + +@HOOKS.register_module() +class IterationTimer(HookBase): + def __init__(self, warmup_iter=1): + self._warmup_iter = warmup_iter + self._start_time = time.perf_counter() + self._iter_timer = Timer() + self._remain_iter = 0 + + def before_train(self): + self._start_time = time.perf_counter() + _remain_epoch = self.trainer.max_epoch - self.trainer.start_epoch + self._remain_iter = _remain_epoch * len(self.trainer.train_loader) + + def before_epoch(self): + self._iter_timer.reset() + + def before_step(self): + data_time = self._iter_timer.seconds() + self.trainer.storage.put_scalar("data_time", data_time) + + def after_step(self): + batch_time = self._iter_timer.seconds() + self._iter_timer.reset() + self.trainer.storage.put_scalar("batch_time", batch_time) + self._remain_iter -= 1 + remain_time = self._remain_iter * self.trainer.storage.history("batch_time").avg + t_m, t_s = divmod(remain_time, 60) + t_h, t_m = divmod(t_m, 60) + remain_time = "{:02d}:{:02d}:{:02d}".format(int(t_h), int(t_m), int(t_s)) + if "iter_info" in self.trainer.comm_info.keys(): + info = ( + "Data {data_time_val:.3f} ({data_time_avg:.3f}) " + "Batch {batch_time_val:.3f} ({batch_time_avg:.3f}) " + "Remain {remain_time} ".format( + data_time_val=self.trainer.storage.history("data_time").val, + data_time_avg=self.trainer.storage.history("data_time").avg, + batch_time_val=self.trainer.storage.history("batch_time").val, + batch_time_avg=self.trainer.storage.history("batch_time").avg, + remain_time=remain_time, + ) + ) + self.trainer.comm_info["iter_info"] += info + if self.trainer.comm_info["iter"] <= self._warmup_iter: + self.trainer.storage.history("data_time").reset() + self.trainer.storage.history("batch_time").reset() + + +@HOOKS.register_module() +class InformationWriter(HookBase): + def __init__(self): + self.curr_iter = 0 + self.model_output_keys = [] + + def before_train(self): + self.trainer.comm_info["iter_info"] = "" + self.curr_iter = self.trainer.start_epoch * len(self.trainer.train_loader) + if self.trainer.writer is not None and self.trainer.cfg.enable_wandb: + wandb.define_metric("params/*", step_metric="Iter") + wandb.define_metric("train_batch/*", step_metric="Iter") + wandb.define_metric("train/*", step_metric="Epoch") + + def before_step(self): + self.curr_iter += 1 + info = "Train: [{epoch}/{max_epoch}][{iter}/{max_iter}] ".format( + epoch=self.trainer.epoch + 1, + max_epoch=self.trainer.max_epoch, + iter=self.trainer.comm_info["iter"] + 1, + max_iter=len(self.trainer.train_loader), + ) + self.trainer.comm_info["iter_info"] += info + + def after_step(self): + if "model_output_dict" in self.trainer.comm_info.keys(): + model_output_dict = self.trainer.comm_info["model_output_dict"] + self.model_output_keys = model_output_dict.keys() + for key in self.model_output_keys: + self.trainer.storage.put_scalar(key, model_output_dict[key].item()) + + for key in self.model_output_keys: + self.trainer.comm_info["iter_info"] += "{key}: {value:.4f} ".format( + key=key, value=self.trainer.storage.history(key).val + ) + lr = self.trainer.optimizer.state_dict()["param_groups"][0]["lr"] + self.trainer.comm_info["iter_info"] += "Lr: {lr:.5f}".format(lr=lr) + self.trainer.logger.info(self.trainer.comm_info["iter_info"]) + self.trainer.comm_info["iter_info"] = "" # reset iter info + if self.trainer.writer is not None: + self.trainer.writer.add_scalar("params/lr", lr, self.curr_iter) + for key in self.model_output_keys: + self.trainer.writer.add_scalar( + "train_batch/" + key, + self.trainer.storage.history(key).val, + self.curr_iter, + ) + if self.trainer.cfg.enable_wandb: + + wandb.log( + {"Iter": self.curr_iter, "params/lr": lr}, step=self.curr_iter + ) + for key in self.model_output_keys: + wandb.log( + { + "Iter": self.curr_iter, + f"train_batch/{key}": self.trainer.storage.history(key).val, + }, + step=wandb.run.step, + ) + + def after_epoch(self): + epoch_info = "Train result: " + for key in self.model_output_keys: + epoch_info += "{key}: {value:.4f} ".format( + key=key, value=self.trainer.storage.history(key).avg + ) + self.trainer.logger.info(epoch_info) + if self.trainer.writer is not None: + for key in self.model_output_keys: + self.trainer.writer.add_scalar( + "train/" + key, + self.trainer.storage.history(key).avg, + self.trainer.epoch + 1, + ) + + if self.trainer.cfg.enable_wandb: + + for key in self.model_output_keys: + wandb.log( + { + "Epoch": self.trainer.epoch + 1, + f"train/{key}": self.trainer.storage.history(key).avg, + }, + step=wandb.run.step, + ) + + +@HOOKS.register_module() +class CheckpointSaver(HookBase): + def __init__(self, save_freq=None): + self.save_freq = save_freq # None or int, None indicate only save model last + + def after_epoch(self): + if is_main_process(): + is_best = False + if self.trainer.cfg.evaluate: + current_metric_value = self.trainer.comm_info["current_metric_value"] + current_metric_name = self.trainer.comm_info["current_metric_name"] + if current_metric_value > self.trainer.best_metric_value: + self.trainer.best_metric_value = current_metric_value + is_best = True + self.trainer.logger.info( + "Best validation {} updated to: {:.4f}".format( + current_metric_name, current_metric_value + ) + ) + self.trainer.logger.info( + "Currently Best {}: {:.4f}".format( + current_metric_name, self.trainer.best_metric_value + ) + ) + + filename = os.path.join( + self.trainer.cfg.save_path, "model", "model_last.pth" + ) + self.trainer.logger.info("Saving checkpoint to: " + filename) + torch.save( + { + "epoch": self.trainer.epoch + 1, + "state_dict": self.trainer.model.state_dict(), + "optimizer": self.trainer.optimizer.state_dict(), + "scheduler": self.trainer.scheduler.state_dict(), + "scaler": ( + self.trainer.scaler.state_dict() + if self.trainer.cfg.enable_amp + else None + ), + "best_metric_value": self.trainer.best_metric_value, + }, + filename + ".tmp", + ) + os.replace(filename + ".tmp", filename) + if is_best: + shutil.copyfile( + filename, + os.path.join(self.trainer.cfg.save_path, "model", "model_best.pth"), + ) + if self.save_freq and (self.trainer.epoch + 1) % self.save_freq == 0: + shutil.copyfile( + filename, + os.path.join( + self.trainer.cfg.save_path, + "model", + f"epoch_{self.trainer.epoch + 1}.pth", + ), + ) + + +@HOOKS.register_module() +class CheckpointLoader(HookBase): + def __init__(self, keywords="", replacement=None, strict=False): + self.keywords = keywords + self.replacement = replacement if replacement is not None else keywords + self.strict = strict + + def before_train(self): + self.trainer.logger.info("=> Loading checkpoint & weight ...") + if self.trainer.cfg.weight and os.path.isfile(self.trainer.cfg.weight): + self.trainer.logger.info(f"Loading weight at: {self.trainer.cfg.weight}") + checkpoint = torch.load( + self.trainer.cfg.weight, + map_location=lambda storage, loc: storage.cuda(), + weights_only=False, + ) + self.trainer.logger.info( + f"Loading layer weights with keyword: {self.keywords}, " + f"replace keyword with: {self.replacement}" + ) + weight = OrderedDict() + for key, value in checkpoint["state_dict"].items(): + if not key.startswith("module."): + key = "module." + key # xxx.xxx -> module.xxx.xxx + # Now all keys contain "module." no matter DDP or not. + if self.keywords in key: + key = key.replace(self.keywords, self.replacement, 1) + if comm.get_world_size() == 1: + key = key[7:] # module.xxx.xxx -> xxx.xxx + weight[key] = value + load_state_info = self.trainer.model.load_state_dict( + weight, strict=self.strict + ) + self.trainer.logger.info(f"Missing keys: {load_state_info[0]}") + if self.trainer.cfg.resume: + self.trainer.logger.info( + f"Resuming train at eval epoch: {checkpoint['epoch']}" + ) + self.trainer.start_epoch = checkpoint["epoch"] + self.trainer.best_metric_value = checkpoint["best_metric_value"] + self.trainer.optimizer.load_state_dict(checkpoint["optimizer"]) + self.trainer.scheduler.load_state_dict(checkpoint["scheduler"]) + if self.trainer.cfg.enable_amp: + self.trainer.scaler.load_state_dict(checkpoint["scaler"]) + else: + self.trainer.logger.info(f"No weight found at: {self.trainer.cfg.weight}") + + +@HOOKS.register_module() +class PreciseEvaluator(HookBase): + def __init__(self, test_last=False): + self.test_last = test_last + + def after_train(self): + from pointcept.engines.test import TESTERS + + self.trainer.logger.info( + ">>>>>>>>>>>>>>>> Start Precise Evaluation >>>>>>>>>>>>>>>>" + ) + torch.cuda.empty_cache() + cfg = self.trainer.cfg + test_cfg = dict(cfg=cfg, model=self.trainer.model, **cfg.test) + tester = TESTERS.build(test_cfg) + if self.test_last: + self.trainer.logger.info("=> Testing on model_last ...") + else: + self.trainer.logger.info("=> Testing on model_best ...") + best_path = os.path.join( + self.trainer.cfg.save_path, "model", "model_best.pth" + ) + checkpoint = torch.load(best_path, weights_only=False) + weight = OrderedDict() + for key, value in checkpoint["state_dict"].items(): + if not key.startswith("module."): + key = "module." + key # xxx.xxx -> module.xxx.xxx + # Now all keys contain "module." no matter DDP or not. + if comm.get_world_size() == 1: + key = key[7:] # module.xxx.xxx -> xxx.xxx + weight[key] = value + tester.model.load_state_dict(weight, strict=True) + tester.test() + + +@HOOKS.register_module() +class DataCacheOperator(HookBase): + def __init__(self, data_root, split): + self.data_root = data_root + self.split = split + self.data_list = self.get_data_list() + + def get_data_list(self): + if isinstance(self.split, str): + data_list = glob.glob(os.path.join(self.data_root, self.split)) + elif isinstance(self.split, Sequence): + data_list = [] + for split in self.split: + data_list += glob.glob(os.path.join(self.data_root, split)) + else: + raise NotImplementedError + return data_list + + def get_cache_name(self, data_path): + data_name = data_path.replace(os.path.dirname(self.data_root), "") + return "pointcept" + data_name.replace(os.path.sep, "-") + + def before_train(self): + self.trainer.logger.info( + f"=> Caching dataset: {self.data_root}, split: {self.split} ..." + ) + if is_main_process(): + dataset = self.trainer.train_loader.dataset + for i in range(len(dataset)): + data_dict = dataset[i] + name = data_dict["name"] + shared_dict(f"Pointcept-{name}", data_dict) + synchronize() + + +@HOOKS.register_module() +class RuntimeProfiler(HookBase): + def __init__( + self, + forward=True, + backward=True, + interrupt=False, + warm_up=2, + sort_by="cuda_time_total", + row_limit=30, + ): + self.forward = forward + self.backward = backward + self.interrupt = interrupt + self.warm_up = warm_up + self.sort_by = sort_by + self.row_limit = row_limit + + def before_train(self): + self.trainer.logger.info("Profiling runtime ...") + from torch.profiler import profile, record_function, ProfilerActivity + + for i, input_dict in enumerate(self.trainer.train_loader): + if i == self.warm_up + 1: + break + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + if self.forward: + with profile( + activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + record_shapes=True, + profile_memory=True, + with_stack=True, + ) as forward_prof: + with record_function("model_inference"): + output_dict = self.trainer.model(input_dict) + else: + output_dict = self.trainer.model(input_dict) + loss = output_dict["loss"] + if self.backward: + with profile( + activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + record_shapes=True, + profile_memory=True, + with_stack=True, + ) as backward_prof: + with record_function("model_inference"): + loss.backward() + self.trainer.logger.info(f"Profile: [{i + 1}/{self.warm_up + 1}]") + if self.forward: + self.trainer.logger.info( + "Forward profile: \n" + + str( + forward_prof.key_averages().table( + sort_by=self.sort_by, row_limit=self.row_limit + ) + ) + ) + forward_prof.export_chrome_trace( + os.path.join(self.trainer.cfg.save_path, "forward_trace.json") + ) + + if self.backward: + self.trainer.logger.info( + "Backward profile: \n" + + str( + backward_prof.key_averages().table( + sort_by=self.sort_by, row_limit=self.row_limit + ) + ) + ) + backward_prof.export_chrome_trace( + os.path.join(self.trainer.cfg.save_path, "backward_trace.json") + ) + if self.interrupt: + sys.exit(0) + + +@HOOKS.register_module() +class RuntimeProfilerV2(HookBase): + def __init__( + self, + interrupt=False, + wait=1, + warmup=1, + active=10, + repeat=1, + sort_by="cuda_time_total", + row_limit=30, + ): + self.interrupt = interrupt + self.wait = wait + self.warmup = warmup + self.active = active + self.repeat = repeat + self.sort_by = sort_by + self.row_limit = row_limit + + def before_train(self): + self.trainer.logger.info("Profiling runtime ...") + from torch.profiler import ( + profile, + record_function, + ProfilerActivity, + schedule, + tensorboard_trace_handler, + ) + + prof = profile( + activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + schedule=schedule( + wait=self.wait, + warmup=self.warmup, + active=self.active, + repeat=self.repeat, + ), + on_trace_ready=tensorboard_trace_handler(self.trainer.cfg.save_path), + record_shapes=True, + profile_memory=True, + with_stack=True, + ) + prof.start() + for i, input_dict in enumerate(self.trainer.train_loader): + if i >= (self.wait + self.warmup + self.active) * self.repeat: + break + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with record_function("model_forward"): + output_dict = self.trainer.model(input_dict) + loss = output_dict["loss"] + with record_function("model_backward"): + loss.backward() + prof.step() + self.trainer.logger.info( + f"Profile: [{i + 1}/{(self.wait + self.warmup + self.active) * self.repeat}]" + ) + self.trainer.logger.info( + "Profile: \n" + + str( + prof.key_averages().table( + sort_by=self.sort_by, row_limit=self.row_limit + ) + ) + ) + prof.stop() + + if self.interrupt: + sys.exit(0) + + +@HOOKS.register_module() +class WeightDecaySchedular(HookBase): + def __init__( + self, + base_value=0.04, + final_value=0.2, + ): + self.base_value = base_value + self.final_value = final_value + self.scheduler = None + + def before_train(self): + curr_step = self.trainer.start_epoch * len(self.trainer.train_loader) + self.scheduler = CosineScheduler( + base_value=self.base_value, + final_value=self.final_value, + total_iters=self.trainer.cfg.scheduler.total_steps, + ) + self.scheduler.iter = curr_step + + def before_step(self): + wd = self.scheduler.step() + for param_group in self.trainer.optimizer.param_groups: + param_group["weight_decay"] = wd + if self.trainer.writer is not None: + self.trainer.writer.add_scalar("params/wd", wd, self.scheduler.iter) + + +@HOOKS.register_module() +class GarbageHandler(HookBase): + def __init__(self, interval=150, disable_auto=True, empty_cache=False): + self.interval = interval + self.disable_auto = disable_auto + self.empty_cache = empty_cache + self.iter = 1 + + def before_train(self): + if self.disable_auto: + gc.disable() + self.trainer.logger.info("Disable automatic garbage collection") + + def before_epoch(self): + self.iter = 1 + + def after_step(self): + if self.iter % self.interval == 0: + gc.collect() + if self.empty_cache: + torch.cuda.empty_cache() + self.trainer.logger.info("Garbage collected") + self.iter += 1 + + def after_train(self): + gc.collect() + torch.cuda.empty_cache() diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/engines/launch.py b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/launch.py new file mode 100644 index 0000000..99a8351 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/launch.py @@ -0,0 +1,137 @@ +""" +Launcher + +modified from detectron2(https://github.com/facebookresearch/detectron2) + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import logging +from datetime import timedelta +import torch +import torch.distributed as dist +import torch.multiprocessing as mp + +from pointcept.utils import comm + +__all__ = ["DEFAULT_TIMEOUT", "launch"] + +DEFAULT_TIMEOUT = timedelta(minutes=60) + + +def _find_free_port(): + import socket + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + # Binding to port 0 will cause the OS to find an available port for us + sock.bind(("", 0)) + port = sock.getsockname()[1] + sock.close() + # NOTE: there is still a chance the port could be taken by other processes. + return port + + +def launch( + main_func, + num_gpus_per_machine, + num_machines=1, + machine_rank=0, + dist_url=None, + cfg=(), + timeout=DEFAULT_TIMEOUT, +): + """ + Launch multi-gpu or distributed training. + This function must be called on all machines involved in the training. + It will spawn child processes (defined by ``num_gpus_per_machine``) on each machine. + Args: + main_func: a function that will be called by `main_func(*args)` + num_gpus_per_machine (int): number of GPUs per machine + num_machines (int): the total number of machines + machine_rank (int): the rank of this machine + dist_url (str): url to connect to for distributed jobs, including protocol + e.g. "tcp://127.0.0.1:8686". + Can be set to "auto" to automatically select a free port on localhost + timeout (timedelta): timeout of the distributed workers + args (tuple): arguments passed to main_func + """ + world_size = num_machines * num_gpus_per_machine + if world_size > 1: + if dist_url == "auto": + assert ( + num_machines == 1 + ), "dist_url=auto not supported in multi-machine jobs." + port = _find_free_port() + dist_url = f"tcp://127.0.0.1:{port}" + if num_machines > 1 and dist_url.startswith("file://"): + logger = logging.getLogger(__name__) + logger.warning( + "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://" + ) + + mp.spawn( + _distributed_worker, + nprocs=num_gpus_per_machine, + args=( + main_func, + world_size, + num_gpus_per_machine, + machine_rank, + dist_url, + cfg, + timeout, + ), + daemon=False, + ) + else: + main_func(*cfg) + + +def _distributed_worker( + local_rank, + main_func, + world_size, + num_gpus_per_machine, + machine_rank, + dist_url, + cfg, + timeout=DEFAULT_TIMEOUT, +): + assert ( + torch.cuda.is_available() + ), "cuda is not available. Please check your installation." + global_rank = machine_rank * num_gpus_per_machine + local_rank + try: + dist.init_process_group( + backend="NCCL", + init_method=dist_url, + world_size=world_size, + rank=global_rank, + timeout=timeout, + ) + except Exception as e: + logger = logging.getLogger(__name__) + logger.error("Process group URL: {}".format(dist_url)) + raise e + + # Setup the local process group (which contains ranks within the same machine) + assert comm._LOCAL_PROCESS_GROUP is None + num_machines = world_size // num_gpus_per_machine + for i in range(num_machines): + ranks_on_i = list( + range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine) + ) + pg = dist.new_group(ranks_on_i) + if i == machine_rank: + comm._LOCAL_PROCESS_GROUP = pg + + assert num_gpus_per_machine <= torch.cuda.device_count() + torch.cuda.set_device(local_rank) + + # synchronize is needed here to prevent a possible timeout after calling init_process_group + # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172 + comm.synchronize() + + main_func(*cfg) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/engines/test.py b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/test.py new file mode 100644 index 0000000..55f5964 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/test.py @@ -0,0 +1,890 @@ +""" +Tester + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import json +from uuid import uuid4 +import os +import time +import numpy as np +from collections import OrderedDict +import torch +import torch.distributed as dist +import torch.nn.functional as F +import torch.utils.data + +from .defaults import create_ddp_model +import pointcept.utils.comm as comm +from pointcept.datasets import build_dataset, collate_fn +from pointcept.models import build_model +from pointcept.utils.logger import get_root_logger +from pointcept.utils.registry import Registry +from pointcept.utils.misc import ( + AverageMeter, + intersection_and_union, + intersection_and_union_gpu, + make_dirs, +) + + +TESTERS = Registry("testers") + + +class TesterBase: + def __init__(self, cfg, model=None, test_loader=None, verbose=False) -> None: + torch.multiprocessing.set_sharing_strategy("file_system") + self.logger = get_root_logger( + log_file=os.path.join(cfg.save_path, "test.log"), + file_mode="a" if cfg.resume else "w", + ) + self.logger.info("=> Loading config ...") + self.cfg = cfg + self.verbose = verbose + if self.verbose and model is None: + # if model is not none, trigger tester with trainer, no need to print config + self.logger.info(f"Save path: {cfg.save_path}") + self.logger.info(f"Config:\n{cfg.pretty_text}") + if model is None: + self.logger.info("=> Building model ...") + self.model = self.build_model() + else: + self.model = model + if test_loader is None: + self.logger.info("=> Building test dataset & dataloader ...") + self.test_loader = self.build_test_loader() + else: + self.test_loader = test_loader + + def build_model(self): + model = build_model(self.cfg.model) + n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) + self.logger.info(f"Num params: {n_parameters}") + model = create_ddp_model( + model.cuda(), + broadcast_buffers=False, + find_unused_parameters=self.cfg.find_unused_parameters, + ) + if os.path.isfile(self.cfg.weight): + self.logger.info(f"Loading weight at: {self.cfg.weight}") + checkpoint = torch.load(self.cfg.weight, weights_only=False) + weight = OrderedDict() + for key, value in checkpoint["state_dict"].items(): + if key.startswith("module."): + if comm.get_world_size() == 1: + key = key[7:] # module.xxx.xxx -> xxx.xxx + else: + if comm.get_world_size() > 1: + key = "module." + key # xxx.xxx -> module.xxx.xxx + weight[key] = value + model.load_state_dict(weight, strict=True) + self.logger.info( + "=> Loaded weight '{}' (epoch {})".format( + self.cfg.weight, checkpoint["epoch"] + ) + ) + else: + raise RuntimeError("=> No checkpoint found at '{}'".format(self.cfg.weight)) + return model + + def build_test_loader(self): + test_dataset = build_dataset(self.cfg.data.test) + if comm.get_world_size() > 1: + test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset) + else: + test_sampler = None + test_loader = torch.utils.data.DataLoader( + test_dataset, + batch_size=self.cfg.batch_size_test_per_gpu, + shuffle=False, + num_workers=self.cfg.batch_size_test_per_gpu, + pin_memory=True, + sampler=test_sampler, + collate_fn=self.__class__.collate_fn, + ) + return test_loader + + def test(self): + raise NotImplementedError + + @staticmethod + def collate_fn(batch): + raise collate_fn(batch) + + +@TESTERS.register_module() +class SemSegTester(TesterBase): + def test(self): + assert self.test_loader.batch_size == 1 + logger = get_root_logger() + logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + + batch_time = AverageMeter() + intersection_meter = AverageMeter() + union_meter = AverageMeter() + target_meter = AverageMeter() + self.model.eval() + + save_path = os.path.join(self.cfg.save_path, "result") + make_dirs(save_path) + # create submit folder only on main process + if ( + self.cfg.data.test.type == "ScanNetDataset" + or self.cfg.data.test.type == "ScanNet200Dataset" + or self.cfg.data.test.type == "ScanNetPPDataset" + ) and comm.is_main_process(): + make_dirs(os.path.join(save_path, "submit")) + elif ( + self.cfg.data.test.type == "SemanticKITTIDataset" and comm.is_main_process() + ): + make_dirs(os.path.join(save_path, "submit")) + elif self.cfg.data.test.type == "NuScenesDataset" and comm.is_main_process(): + import json + + make_dirs(os.path.join(save_path, "submit", "lidarseg", "test")) + make_dirs(os.path.join(save_path, "submit", "test")) + submission = dict( + meta=dict( + use_camera=False, + use_lidar=True, + use_radar=False, + use_map=False, + use_external=False, + ) + ) + with open( + os.path.join(save_path, "submit", "test", "submission.json"), "w" + ) as f: + json.dump(submission, f, indent=4) + comm.synchronize() + record = {} + # fragment inference + for idx, data_dict in enumerate(self.test_loader): + start = time.time() + data_dict = data_dict[0] # current assume batch size is 1 + fragment_list = data_dict.pop("fragment_list") + segment = data_dict.pop("segment") + data_name = data_dict.pop("name") + pred_save_path = os.path.join(save_path, "{}_pred.npy".format(data_name)) + if os.path.isfile(pred_save_path): + logger.info( + "{}/{}: {}, loaded pred and label.".format( + idx + 1, len(self.test_loader), data_name + ) + ) + pred = np.load(pred_save_path) + if "origin_segment" in data_dict.keys(): + segment = data_dict["origin_segment"] + else: + pred = torch.zeros((segment.size, self.cfg.data.num_classes)).cuda() + for i in range(len(fragment_list)): + fragment_batch_size = 1 + s_i, e_i = i * fragment_batch_size, min( + (i + 1) * fragment_batch_size, len(fragment_list) + ) + input_dict = collate_fn(fragment_list[s_i:e_i]) + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + idx_part = input_dict["index"] + with torch.no_grad(): + pred_part = self.model(input_dict)["seg_logits"] # (n, k) + pred_part = F.softmax(pred_part, -1) + if self.cfg.empty_cache: + torch.cuda.empty_cache() + bs = 0 + for be in input_dict["offset"]: + pred[idx_part[bs:be], :] += pred_part[bs:be] + bs = be + + logger.info( + "Test: {}/{}-{data_name}, Batch: {batch_idx}/{batch_num}".format( + idx + 1, + len(self.test_loader), + data_name=data_name, + batch_idx=i, + batch_num=len(fragment_list), + ) + ) + if self.cfg.data.test.type == "ScanNetPPDataset": + pred = pred.topk(3, dim=1)[1].data.cpu().numpy() + else: + pred = pred.max(1)[1].data.cpu().numpy() + if "origin_segment" in data_dict.keys(): + assert "inverse" in data_dict.keys() + pred = pred[data_dict["inverse"]] + segment = data_dict["origin_segment"] + np.save(pred_save_path, pred) + if ( + self.cfg.data.test.type == "ScanNetDataset" + or self.cfg.data.test.type == "ScanNet200Dataset" + ): + np.savetxt( + os.path.join(save_path, "submit", "{}.txt".format(data_name)), + self.test_loader.dataset.class2id[pred].reshape([-1, 1]), + fmt="%d", + ) + elif self.cfg.data.test.type == "ScanNetPPDataset": + np.savetxt( + os.path.join(save_path, "submit", "{}.txt".format(data_name)), + pred.astype(np.int32), + delimiter=",", + fmt="%d", + ) + pred = pred[:, 0] # for mIoU, TODO: support top3 mIoU + elif self.cfg.data.test.type == "SemanticKITTIDataset": + # 00_000000 -> 00, 000000 + sequence_name, frame_name = data_name.split("_") + os.makedirs( + os.path.join( + save_path, "submit", "sequences", sequence_name, "predictions" + ), + exist_ok=True, + ) + submit = pred.astype(np.uint32) + submit = np.vectorize( + self.test_loader.dataset.learning_map_inv.__getitem__ + )(submit).astype(np.uint32) + submit.tofile( + os.path.join( + save_path, + "submit", + "sequences", + sequence_name, + "predictions", + f"{frame_name}.label", + ) + ) + elif self.cfg.data.test.type == "NuScenesDataset": + np.array(pred + 1).astype(np.uint8).tofile( + os.path.join( + save_path, + "submit", + "lidarseg", + "test", + "{}_lidarseg.bin".format(data_name), + ) + ) + + intersection, union, target = intersection_and_union( + pred, segment, self.cfg.data.num_classes, self.cfg.data.ignore_index + ) + intersection_meter.update(intersection) + union_meter.update(union) + target_meter.update(target) + record[data_name] = dict( + intersection=intersection, union=union, target=target + ) + + mask = union != 0 + iou_class = intersection / (union + 1e-10) + iou = np.mean(iou_class[mask]) + acc = sum(intersection) / (sum(target) + 1e-10) + + m_iou = np.mean(intersection_meter.sum / (union_meter.sum + 1e-10)) + m_acc = np.mean(intersection_meter.sum / (target_meter.sum + 1e-10)) + + batch_time.update(time.time() - start) + logger.info( + "Test: {} [{}/{}]-{} " + "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " + "Accuracy {acc:.4f} ({m_acc:.4f}) " + "mIoU {iou:.4f} ({m_iou:.4f})".format( + data_name, + idx + 1, + len(self.test_loader), + segment.size, + batch_time=batch_time, + acc=acc, + m_acc=m_acc, + iou=iou, + m_iou=m_iou, + ) + ) + + logger.info("Syncing ...") + comm.synchronize() + record_sync = comm.gather(record, dst=0) + + if comm.is_main_process(): + record = {} + for _ in range(len(record_sync)): + r = record_sync.pop() + record.update(r) + del r + intersection = np.sum( + [meters["intersection"] for _, meters in record.items()], axis=0 + ) + union = np.sum([meters["union"] for _, meters in record.items()], axis=0) + target = np.sum([meters["target"] for _, meters in record.items()], axis=0) + + if self.cfg.data.test.type == "S3DISDataset": + torch.save( + dict(intersection=intersection, union=union, target=target), + os.path.join(save_path, f"{self.test_loader.dataset.split}.pth"), + ) + + iou_class = intersection / (union + 1e-10) + accuracy_class = intersection / (target + 1e-10) + mIoU = np.mean(iou_class) + mAcc = np.mean(accuracy_class) + allAcc = sum(intersection) / (sum(target) + 1e-10) + + logger.info( + "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}".format( + mIoU, mAcc, allAcc + ) + ) + for i in range(self.cfg.data.num_classes): + logger.info( + "Class_{idx} - {name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( + idx=i, + name=self.cfg.data.names[i], + iou=iou_class[i], + accuracy=accuracy_class[i], + ) + ) + logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + + @staticmethod + def collate_fn(batch): + return batch + + +@TESTERS.register_module() +class DINOSemSegTester(TesterBase): + def test(self): + assert self.test_loader.batch_size == 1 + logger = get_root_logger() + logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + + batch_time = AverageMeter() + intersection_meter = AverageMeter() + union_meter = AverageMeter() + target_meter = AverageMeter() + self.model.eval() + + save_path = os.path.join(self.cfg.save_path, "result") + make_dirs(save_path) + # create submit folder only on main process + if ( + self.cfg.data.test.type == "ScanNetDataset" + or self.cfg.data.test.type == "ScanNet200Dataset" + or self.cfg.data.test.type == "ScanNetPPDataset" + ) and comm.is_main_process(): + make_dirs(os.path.join(save_path, "submit")) + elif ( + self.cfg.data.test.type == "SemanticKITTIDataset" and comm.is_main_process() + ): + make_dirs(os.path.join(save_path, "submit")) + elif self.cfg.data.test.type == "NuScenesDataset" and comm.is_main_process(): + import json + + make_dirs(os.path.join(save_path, "submit", "lidarseg", "test")) + make_dirs(os.path.join(save_path, "submit", "test")) + submission = dict( + meta=dict( + use_camera=False, + use_lidar=True, + use_radar=False, + use_map=False, + use_external=False, + ) + ) + with open( + os.path.join(save_path, "submit", "test", "submission.json"), "w" + ) as f: + json.dump(submission, f, indent=4) + comm.synchronize() + record = {} + # fragment inference + for idx, data_dict in enumerate(self.test_loader): + end = time.time() + data_dict = data_dict[0] # current assume batch size is 1 + fragment_list = data_dict.pop("fragment_list") + segment = data_dict.pop("segment") + data_name = data_dict.pop("name") + dino_coord = data_dict.pop("dino_coord").cuda(non_blocking=True) + dino_feat = data_dict.pop("dino_feat").cuda(non_blocking=True) + dino_offset = data_dict.pop("dino_offset").cuda(non_blocking=True) + pred_save_path = os.path.join(save_path, "{}_pred.npy".format(data_name)) + if os.path.isfile(pred_save_path): + logger.info( + "{}/{}: {}, loaded pred and label.".format( + idx + 1, len(self.test_loader), data_name + ) + ) + pred = np.load(pred_save_path) + if "origin_segment" in data_dict.keys(): + segment = data_dict["origin_segment"] + else: + pred = torch.zeros((segment.size, self.cfg.data.num_classes)).cuda() + for i in range(len(fragment_list)): + fragment_batch_size = 1 + s_i, e_i = i * fragment_batch_size, min( + (i + 1) * fragment_batch_size, len(fragment_list) + ) + input_dict = collate_fn(fragment_list[s_i:e_i]) + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + input_dict["dino_coord"] = dino_coord + input_dict["dino_feat"] = dino_feat + input_dict["dino_offset"] = dino_offset + idx_part = input_dict["index"] + with torch.no_grad(): + pred_part = self.model(input_dict)["seg_logits"] # (n, k) + pred_part = F.softmax(pred_part, -1) + if self.cfg.empty_cache: + torch.cuda.empty_cache() + bs = 0 + for be in input_dict["offset"]: + pred[idx_part[bs:be], :] += pred_part[bs:be] + bs = be + + logger.info( + "Test: {}/{}-{data_name}, Batch: {batch_idx}/{batch_num}".format( + idx + 1, + len(self.test_loader), + data_name=data_name, + batch_idx=i, + batch_num=len(fragment_list), + ) + ) + if self.cfg.data.test.type == "ScanNetPPDataset": + pred = pred.topk(3, dim=1)[1].data.cpu().numpy() + else: + pred = pred.max(1)[1].data.cpu().numpy() + if "origin_segment" in data_dict.keys(): + assert "inverse" in data_dict.keys() + pred = pred[data_dict["inverse"]] + segment = data_dict["origin_segment"] + np.save(pred_save_path, pred) + if ( + self.cfg.data.test.type == "ScanNetDataset" + or self.cfg.data.test.type == "ScanNet200Dataset" + ): + np.savetxt( + os.path.join(save_path, "submit", "{}.txt".format(data_name)), + self.test_loader.dataset.class2id[pred].reshape([-1, 1]), + fmt="%d", + ) + elif self.cfg.data.test.type == "ScanNetPPDataset": + np.savetxt( + os.path.join(save_path, "submit", "{}.txt".format(data_name)), + pred.astype(np.int32), + delimiter=",", + fmt="%d", + ) + pred = pred[:, 0] # for mIoU, TODO: support top3 mIoU + elif self.cfg.data.test.type == "SemanticKITTIDataset": + # 00_000000 -> 00, 000000 + sequence_name, frame_name = data_name.split("_") + os.makedirs( + os.path.join( + save_path, "submit", "sequences", sequence_name, "predictions" + ), + exist_ok=True, + ) + submit = pred.astype(np.uint32) + submit = np.vectorize( + self.test_loader.dataset.learning_map_inv.__getitem__ + )(submit).astype(np.uint32) + submit.tofile( + os.path.join( + save_path, + "submit", + "sequences", + sequence_name, + "predictions", + f"{frame_name}.label", + ) + ) + elif self.cfg.data.test.type == "NuScenesDataset": + np.array(pred + 1).astype(np.uint8).tofile( + os.path.join( + save_path, + "submit", + "lidarseg", + "test", + "{}_lidarseg.bin".format(data_name), + ) + ) + + intersection, union, target = intersection_and_union( + pred, segment, self.cfg.data.num_classes, self.cfg.data.ignore_index + ) + intersection_meter.update(intersection) + union_meter.update(union) + target_meter.update(target) + record[data_name] = dict( + intersection=intersection, union=union, target=target + ) + + mask = union != 0 + iou_class = intersection / (union + 1e-10) + iou = np.mean(iou_class[mask]) + acc = sum(intersection) / (sum(target) + 1e-10) + + m_iou = np.mean(intersection_meter.sum / (union_meter.sum + 1e-10)) + m_acc = np.mean(intersection_meter.sum / (target_meter.sum + 1e-10)) + + batch_time.update(time.time() - end) + logger.info( + "Test: {} [{}/{}]-{} " + "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " + "Accuracy {acc:.4f} ({m_acc:.4f}) " + "mIoU {iou:.4f} ({m_iou:.4f})".format( + data_name, + idx + 1, + len(self.test_loader), + segment.size, + batch_time=batch_time, + acc=acc, + m_acc=m_acc, + iou=iou, + m_iou=m_iou, + ) + ) + + logger.info("Syncing ...") + comm.synchronize() + record_sync = comm.gather(record, dst=0) + + if comm.is_main_process(): + record = {} + for _ in range(len(record_sync)): + r = record_sync.pop() + record.update(r) + del r + intersection = np.sum( + [meters["intersection"] for _, meters in record.items()], axis=0 + ) + union = np.sum([meters["union"] for _, meters in record.items()], axis=0) + target = np.sum([meters["target"] for _, meters in record.items()], axis=0) + + if self.cfg.data.test.type == "S3DISDataset": + torch.save( + dict(intersection=intersection, union=union, target=target), + os.path.join(save_path, f"{self.test_loader.dataset.split}.pth"), + ) + + iou_class = intersection / (union + 1e-10) + accuracy_class = intersection / (target + 1e-10) + mIoU = np.mean(iou_class) + mAcc = np.mean(accuracy_class) + allAcc = sum(intersection) / (sum(target) + 1e-10) + + logger.info( + "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}".format( + mIoU, mAcc, allAcc + ) + ) + for i in range(self.cfg.data.num_classes): + logger.info( + "Class_{idx} - {name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( + idx=i, + name=self.cfg.data.names[i], + iou=iou_class[i], + accuracy=accuracy_class[i], + ) + ) + logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + + @staticmethod + def collate_fn(batch): + return batch + + +@TESTERS.register_module() +class ClsTester(TesterBase): + def test(self): + logger = get_root_logger() + logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + batch_time = AverageMeter() + intersection_meter = AverageMeter() + union_meter = AverageMeter() + target_meter = AverageMeter() + self.model.eval() + + for i, input_dict in enumerate(self.test_loader): + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + end = time.time() + with torch.no_grad(): + output_dict = self.model(input_dict) + output = output_dict["cls_logits"] + pred = output.max(1)[1] + label = input_dict["category"] + intersection, union, target = intersection_and_union_gpu( + pred, label, self.cfg.data.num_classes, self.cfg.data.ignore_index + ) + if comm.get_world_size() > 1: + dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce( + target + ) + intersection, union, target = ( + intersection.cpu().numpy(), + union.cpu().numpy(), + target.cpu().numpy(), + ) + intersection_meter.update(intersection), union_meter.update( + union + ), target_meter.update(target) + + accuracy = sum(intersection_meter.val) / (sum(target_meter.val) + 1e-10) + batch_time.update(time.time() - end) + + logger.info( + "Test: [{}/{}] " + "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " + "Accuracy {accuracy:.4f} ".format( + i + 1, + len(self.test_loader), + batch_time=batch_time, + accuracy=accuracy, + ) + ) + + iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) + accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) + mIoU = np.mean(iou_class) + mAcc = np.mean(accuracy_class) + allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) + logger.info( + "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.".format( + mIoU, mAcc, allAcc + ) + ) + + for i in range(self.cfg.data.num_classes): + logger.info( + "Class_{idx} - {name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( + idx=i, + name=self.cfg.data.names[i], + iou=iou_class[i], + accuracy=accuracy_class[i], + ) + ) + logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + + @staticmethod + def collate_fn(batch): + return collate_fn(batch) + + +@TESTERS.register_module() +class ClsVotingTester(TesterBase): + def __init__( + self, + num_repeat=100, + metric="allAcc", + **kwargs, + ): + super().__init__(**kwargs) + self.num_repeat = num_repeat + self.metric = metric + self.best_idx = 0 + self.best_record = None + self.best_metric = 0 + + def test(self): + for i in range(self.num_repeat): + logger = get_root_logger() + logger.info(f">>>>>>>>>>>>>>>> Start Evaluation {i + 1} >>>>>>>>>>>>>>>>") + record = self.test_once() + if comm.is_main_process(): + if record[self.metric] > self.best_metric: + self.best_record = record + self.best_idx = i + self.best_metric = record[self.metric] + info = f"Current best record is Evaluation {i + 1}: " + for m in self.best_record.keys(): + info += f"{m}: {self.best_record[m]:.4f} " + logger.info(info) + + def test_once(self): + logger = get_root_logger() + batch_time = AverageMeter() + intersection_meter = AverageMeter() + target_meter = AverageMeter() + record = {} + self.model.eval() + + for idx, data_dict in enumerate(self.test_loader): + end = time.time() + data_dict = data_dict[0] # current assume batch size is 1 + voting_list = data_dict.pop("voting_list") + category = data_dict.pop("category") + data_name = data_dict.pop("name") + # pred = torch.zeros([1, self.cfg.data.num_classes]).cuda() + # for i in range(len(voting_list)): + # input_dict = voting_list[i] + # for key in input_dict.keys(): + # if isinstance(input_dict[key], torch.Tensor): + # input_dict[key] = input_dict[key].cuda(non_blocking=True) + # with torch.no_grad(): + # pred += F.softmax(self.model(input_dict)["cls_logits"], -1) + input_dict = collate_fn(voting_list) + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with torch.no_grad(): + pred = F.softmax(self.model(input_dict)["cls_logits"], -1).sum( + 0, keepdim=True + ) + pred = pred.max(1)[1].cpu().numpy() + intersection, union, target = intersection_and_union( + pred, category, self.cfg.data.num_classes, self.cfg.data.ignore_index + ) + intersection_meter.update(intersection) + target_meter.update(target) + record[data_name] = dict(intersection=intersection, target=target) + acc = sum(intersection) / (sum(target) + 1e-10) + m_acc = np.mean(intersection_meter.sum / (target_meter.sum + 1e-10)) + batch_time.update(time.time() - end) + logger.info( + "Test: {} [{}/{}] " + "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " + "Accuracy {acc:.4f} ({m_acc:.4f}) ".format( + data_name, + idx + 1, + len(self.test_loader), + batch_time=batch_time, + acc=acc, + m_acc=m_acc, + ) + ) + + logger.info("Syncing ...") + comm.synchronize() + record_sync = comm.gather(record, dst=0) + + if comm.is_main_process(): + record = {} + for _ in range(len(record_sync)): + r = record_sync.pop() + record.update(r) + del r + intersection = np.sum( + [meters["intersection"] for _, meters in record.items()], axis=0 + ) + target = np.sum([meters["target"] for _, meters in record.items()], axis=0) + accuracy_class = intersection / (target + 1e-10) + mAcc = np.mean(accuracy_class) + allAcc = sum(intersection) / (sum(target) + 1e-10) + + logger.info("Val result: mAcc/allAcc {:.4f}/{:.4f}".format(mAcc, allAcc)) + for i in range(self.cfg.data.num_classes): + logger.info( + "Class_{idx} - {name} Result: iou/accuracy {accuracy:.4f}".format( + idx=i, + name=self.cfg.data.names[i], + accuracy=accuracy_class[i], + ) + ) + return dict(mAcc=mAcc, allAcc=allAcc) + + @staticmethod + def collate_fn(batch): + return batch + + +@TESTERS.register_module() +class PartSegTester(TesterBase): + def test(self): + test_dataset = self.test_loader.dataset + logger = get_root_logger() + logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + + batch_time = AverageMeter() + + num_categories = len(self.test_loader.dataset.categories) + iou_category, iou_count = np.zeros(num_categories), np.zeros(num_categories) + self.model.eval() + + save_path = os.path.join( + self.cfg.save_path, "result", "test_epoch{}".format(self.cfg.test_epoch) + ) + make_dirs(save_path) + + for idx in range(len(test_dataset)): + end = time.time() + data_name = test_dataset.get_data_name(idx) + + data_dict_list, label = test_dataset[idx] + pred = torch.zeros((label.size, self.cfg.data.num_classes)).cuda() + batch_num = int(np.ceil(len(data_dict_list) / self.cfg.batch_size_test)) + for i in range(batch_num): + s_i, e_i = i * self.cfg.batch_size_test, min( + (i + 1) * self.cfg.batch_size_test, len(data_dict_list) + ) + input_dict = collate_fn(data_dict_list[s_i:e_i]) + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with torch.no_grad(): + pred_part = self.model(input_dict)["cls_logits"] + pred_part = F.softmax(pred_part, -1) + if self.cfg.empty_cache: + torch.cuda.empty_cache() + pred_part = pred_part.reshape(-1, label.size, self.cfg.data.num_classes) + pred = pred + pred_part.total(dim=0) + logger.info( + "Test: {} {}/{}, Batch: {batch_idx}/{batch_num}".format( + data_name, + idx + 1, + len(test_dataset), + batch_idx=i, + batch_num=batch_num, + ) + ) + pred = pred.max(1)[1].data.cpu().numpy() + + category_index = data_dict_list[0]["cls_token"] + category = self.test_loader.dataset.categories[category_index] + parts_idx = self.test_loader.dataset.category2part[category] + parts_iou = np.zeros(len(parts_idx)) + for j, part in enumerate(parts_idx): + if (np.sum(label == part) == 0) and (np.sum(pred == part) == 0): + parts_iou[j] = 1.0 + else: + i = (label == part) & (pred == part) + u = (label == part) | (pred == part) + parts_iou[j] = np.sum(i) / (np.sum(u) + 1e-10) + iou_category[category_index] += parts_iou.mean() + iou_count[category_index] += 1 + + batch_time.update(time.time() - end) + logger.info( + "Test: {} [{}/{}] " + "Batch {batch_time.val:.3f} " + "({batch_time.avg:.3f}) ".format( + data_name, idx + 1, len(self.test_loader), batch_time=batch_time + ) + ) + + ins_mIoU = iou_category.sum() / (iou_count.sum() + 1e-10) + cat_mIoU = (iou_category / (iou_count + 1e-10)).mean() + logger.info( + "Val result: ins.mIoU/cat.mIoU {:.4f}/{:.4f}.".format(ins_mIoU, cat_mIoU) + ) + for i in range(num_categories): + logger.info( + "Class_{idx}-{name} Result: iou_cat/num_sample {iou_cat:.4f}/{iou_count:.4f}".format( + idx=i, + name=self.test_loader.dataset.categories[i], + iou_cat=iou_category[i] / (iou_count[i] + 1e-10), + iou_count=int(iou_count[i]), + ) + ) + logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + + @staticmethod + def collate_fn(batch): + return collate_fn(batch) + + diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/engines/train.py b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/train.py new file mode 100644 index 0000000..f162f0a --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/engines/train.py @@ -0,0 +1,477 @@ +""" +Trainer + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import sys +import weakref +import wandb +import torch +import torch.nn as nn +import torch.utils.data +from packaging import version +from functools import partial +from pathlib import Path + +if sys.version_info >= (3, 10): + from collections.abc import Iterator +else: + from collections import Iterator +from tensorboardX import SummaryWriter + +from .defaults import create_ddp_model, worker_init_fn +from .hooks import HookBase, build_hooks +import pointcept.utils.comm as comm +from pointcept.datasets import build_dataset, point_collate_fn, collate_fn +from pointcept.models import build_model +from pointcept.utils.logger import get_root_logger +from pointcept.utils.optimizer import build_optimizer +from pointcept.utils.scheduler import build_scheduler +from pointcept.utils.events import EventStorage, ExceptionWriter +from pointcept.utils.registry import Registry + + +TRAINERS = Registry("trainers") +AMP_DTYPE = dict( + float16=torch.float16, + bfloat16=torch.bfloat16, +) + + +class TrainerBase: + def __init__(self) -> None: + self.hooks = [] + self.model = None + self.epoch = 0 + self.start_epoch = 0 + self.max_epoch = 0 + self.max_iter = 0 + self.comm_info = dict() + self.data_iterator: Iterator = enumerate([]) + self.storage: EventStorage + self.writer: SummaryWriter + + def register_hooks(self, hooks) -> None: + hooks = build_hooks(hooks) + for h in hooks: + assert isinstance(h, HookBase) + # To avoid circular reference, hooks and trainer cannot own each other. + # This normally does not matter, but will cause memory leak if the + # involved objects contain __del__: + # See http://engineering.hearsaysocial.com/2013/06/16/circular-references-in-python/ + h.trainer = weakref.proxy(self) + self.hooks.extend(hooks) + + def train(self): + with EventStorage() as self.storage: + # => before train + self.before_train() + for self.epoch in range(self.start_epoch, self.max_epoch): + # => before epoch + self.before_epoch() + # => run_epoch + for ( + self.comm_info["iter"], + self.comm_info["input_dict"], + ) in self.data_iterator: + # => before_step + self.before_step() + # => run_step + self.run_step() + # => after_step + self.after_step() + # => after epoch + self.after_epoch() + # => after train + self.after_train() + + def before_train(self): + for h in self.hooks: + h.before_train() + + def before_epoch(self): + for h in self.hooks: + h.before_epoch() + + def before_step(self): + for h in self.hooks: + h.before_step() + + def run_step(self): + raise NotImplementedError + + def after_step(self): + for h in self.hooks: + h.after_step() + + def after_epoch(self): + for h in self.hooks: + h.after_epoch() + self.storage.reset_histories() + + def after_train(self): + # Sync GPU before running train hooks + comm.synchronize() + for h in self.hooks: + h.after_train() + if comm.is_main_process(): + self.writer.close() + + +@TRAINERS.register_module("DefaultTrainer") +class Trainer(TrainerBase): + def __init__(self, cfg): + super(Trainer, self).__init__() + self.epoch = 0 + self.start_epoch = 0 + self.max_epoch = cfg.eval_epoch + self.best_metric_value = -torch.inf + self.logger = get_root_logger( + log_file=os.path.join(cfg.save_path, "train.log"), + file_mode="a" if cfg.resume else "w", + ) + self.logger.info("=> Loading config ...") + self.cfg = cfg + self.logger.info(f"Save path: {cfg.save_path}") + self.logger.info(f"Config:\n{cfg.pretty_text}") + self.logger.info("=> Building model ...") + self.model = self.build_model() + self.logger.info("=> Building writer ...") + self.writer = self.build_writer() + self.logger.info("=> Building train dataset & dataloader ...") + self.train_loader = self.build_train_loader() + self.logger.info("=> Building val dataset & dataloader ...") + self.val_loader = self.build_val_loader() + self.logger.info("=> Building optimize, scheduler, scaler(amp) ...") + self.optimizer = self.build_optimizer() + self.scheduler = self.build_scheduler() + self.scaler = self.build_scaler() + self.logger.info("=> Building hooks ...") + self.register_hooks(self.cfg.hooks) + self._gradient_accumulation_counter = 0 + + # Initialize memory profiling settings + self.memory_snapshot_enabled = getattr(self.cfg, 'memory_snapshot', False) + self.memory_snapshot_interval = getattr(self.cfg, 'memory_snapshot_interval', 100) + self.memory_snapshot_max_entries = getattr(self.cfg, 'memory_snapshot_max_entries', 100000) + + if self.memory_snapshot_enabled and torch.cuda.is_available() and comm.is_main_process(): + self.logger.info(f"Memory Snapshot enabled: will dump every {self.memory_snapshot_interval} iterations") + self.logger.info(f"Memory Snapshot max entries: {self.memory_snapshot_max_entries}") + + # Log if backward pass is disabled for debugging + if getattr(self.cfg, 'disable_backward', False): + self.logger.warning("!!! BACKWARD PASS AND OPTIMIZER STEP ARE DISABLED (DEBUG MODE) !!!") + + def train(self): + with EventStorage() as self.storage, ExceptionWriter(): + # => before train + self.before_train() + + self.logger.info(">>>>>>>>>>>>>>>> Start Training >>>>>>>>>>>>>>>>") + for self.epoch in range(self.start_epoch, self.max_epoch): + # => before epoch + if comm.get_world_size() > 1: + self.train_loader.sampler.set_epoch(self.epoch) + self.model.train() + self.data_iterator = enumerate(self.train_loader) + self.before_epoch() + # => run_epoch + for ( + self.comm_info["iter"], + self.comm_info["input_dict"], + ) in self.data_iterator: + # => before_step + self._start_memory_snapshot_for_iteration() + self.before_step() + # => run_step + self.run_step() + # => after_step + self.after_step() + self._dump_memory_snapshot() + # => after epoch + self.after_epoch() + + # => after train + self.after_train() + + def run_step(self): + if version.parse(torch.__version__) >= version.parse("2.4"): + auto_cast = partial(torch.amp.autocast, device_type="cuda") + else: + # deprecated warning + auto_cast = torch.cuda.amp.autocast + + input_dict = self.comm_info["input_dict"] + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + + # Check if backward pass is disabled for debugging + disable_backward = getattr(self.cfg, 'disable_backward', False) + + # Only clear gradients on first accumulation step (if backward is enabled) + if not disable_backward and self._gradient_accumulation_counter == 0: + self.optimizer.zero_grad() + + # Forward pass - disable autograd if backward is disabled + # Use inference_mode (more efficient) or no_grad to prevent graph construction + forward_context = torch.inference_mode() if disable_backward else torch.enable_grad() + + with forward_context, auto_cast( + enabled=self.cfg.enable_amp, dtype=AMP_DTYPE[self.cfg.amp_dtype] + ): + output_dict = self.model(input_dict) + loss = ( + output_dict["loss"] / self.cfg.gradient_accumulation_steps + ) # scale loss + + if not disable_backward: + # Backward pass + if self.cfg.enable_amp: + self.scaler.scale(loss).backward() + else: + loss.backward() + self._gradient_accumulation_counter += 1 + + # Perform optimizer step only when enough gradients have accumulated + if self._gradient_accumulation_counter >= self.cfg.gradient_accumulation_steps: + if self.cfg.enable_amp: + self.scaler.unscale_(self.optimizer) + if self.cfg.clip_grad is not None: + torch.nn.utils.clip_grad_norm_( + self.model.parameters(), self.cfg.clip_grad + ) + self.scaler.step(self.optimizer) + + # When enable amp, optimizer.step call are skipped if the loss scaling factor is too large. + # Fix torch warning scheduler step before optimizer step. + scale = self.scaler.get_scale() + self.scaler.update() + if scale <= self.scaler.get_scale(): + self.scheduler.step() + else: + if self.cfg.clip_grad is not None: + torch.nn.utils.clip_grad_norm_( + self.model.parameters(), self.cfg.clip_grad + ) + self.optimizer.step() + self.scheduler.step() + + # Reset grad accumulation counter + self._gradient_accumulation_counter = 0 + + if self.cfg.empty_cache: + torch.cuda.empty_cache() + self.comm_info["model_output_dict"] = output_dict + + def _start_memory_snapshot_for_iteration(self): + """Start recording memory snapshot for this specific iteration if needed.""" + if not self.memory_snapshot_enabled: + return + if not torch.cuda.is_available(): + return + if not comm.is_main_process(): + return + + # Check if we should record this iteration + current_iter = self.comm_info.get("iter", 0) + if current_iter % self.memory_snapshot_interval != 0: + return + + self.logger.info(f"Starting memory snapshot recording for iteration {current_iter}") + torch.cuda.memory._record_memory_history( + max_entries=self.memory_snapshot_max_entries + ) + + def _dump_memory_snapshot(self): + """Dump memory snapshot to disk for this specific iteration.""" + if not self.memory_snapshot_enabled: + return + if not torch.cuda.is_available(): + return + if not comm.is_main_process(): + return + + # Check if we should dump this iteration + current_iter = self.comm_info.get("iter", 0) + if current_iter % self.memory_snapshot_interval != 0: + return + + # Create file name with iteration + file_name = f"memory_snapshot_epoch{self.epoch}_iter{current_iter}.pickle" + file_path = os.path.join(self.cfg.save_path, file_name) + + try: + self.logger.info(f"Dumping memory snapshot to: {file_name}") + torch.cuda.memory._dump_snapshot(file_path) + self.logger.info(f"Memory snapshot saved successfully") + + # Log basic memory stats + allocated_gb = torch.cuda.memory_allocated() / 1024**3 + reserved_gb = torch.cuda.memory_reserved() / 1024**3 + max_allocated_gb = torch.cuda.max_memory_allocated() / 1024**3 + self.logger.info( + f"[Memory Stats] Allocated: {allocated_gb:.2f} GB, " + f"Reserved: {reserved_gb:.2f} GB, " + f"Max Allocated: {max_allocated_gb:.2f} GB" + ) + + # Stop recording after dumping - this iteration's snapshot is complete + self.logger.info("Stopping memory snapshot recording for this iteration") + torch.cuda.memory._record_memory_history(enabled=None) + + except Exception as e: + self.logger.error(f"Failed to dump memory snapshot: {e}") + + def after_epoch(self): + for h in self.hooks: + h.after_epoch() + self.storage.reset_histories() + if self.cfg.empty_cache_per_epoch: + torch.cuda.empty_cache() + + def build_model(self): + model = build_model(self.cfg.model) + if self.cfg.sync_bn: + model = nn.SyncBatchNorm.convert_sync_batchnorm(model) + n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) + # logger.info(f"Model: \n{self.model}") + self.logger.info(f"Num params: {n_parameters}") + + # Print model structure (parameter names and shapes) + self.logger.info("=" * 80) + self.logger.info("Model Structure:") + self.logger.info("=" * 80) + for name, param in model.named_parameters(): + # self.logger.info(f"{name:80s} | Shape: {str(param.shape):30s} | Numel: {param.numel()}") + # if ".cpe.0." in name or "cpe.maybe_conv" in name: + # # initialize the convolution layers to 0.01 + # nn.init.constant_(param, 0.01) + mean_val = param.data.mean().item() + last_val = param.data.flatten()[-1].item() + self.logger.info(f"{name:80s} | Mean: {mean_val:12.6e} | Last: {last_val:12.6e}") + self.logger.info("=" * 80) + + # Initialize all parameters to 1 if flag is set + if hasattr(self.cfg, 'init_params_to_one') and self.cfg.init_params_to_one: + self.logger.info("Initializing all model parameters to 1...") + for param in model.parameters(): + nn.init.constant_(param, 0.01) + self.logger.info("Parameter initialization complete.") + + model = create_ddp_model( + model.cuda(), + broadcast_buffers=False, + find_unused_parameters=self.cfg.find_unused_parameters, + ) + return model + + def build_writer(self): + writer = SummaryWriter(self.cfg.save_path) if comm.is_main_process() else None + self.logger.info(f"Tensorboard writer logging dir: {self.cfg.save_path}") + if self.cfg.enable_wandb and comm.is_main_process(): + tag, name = Path(self.cfg.save_path).parts[-2:] + wandb.init( + project=self.cfg.wandb_project, + name=f"{tag}/{name}", + tags=[tag], + dir=self.cfg.save_path, + settings=wandb.Settings(api_key=self.cfg.wandb_key), + config=self.cfg, + ) + return writer + + def build_train_loader(self): + train_data = build_dataset(self.cfg.data.train) + + if comm.get_world_size() > 1: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_data) + else: + train_sampler = None + + init_fn = ( + partial( + worker_init_fn, + num_workers=self.cfg.num_worker_per_gpu, + rank=comm.get_rank(), + seed=self.cfg.seed, + ) + if self.cfg.seed is not None + else None + ) + + train_loader = torch.utils.data.DataLoader( + train_data, + batch_size=self.cfg.batch_size_per_gpu, + shuffle=(train_sampler is None), + num_workers=self.cfg.num_worker_per_gpu, + sampler=train_sampler, + collate_fn=partial(point_collate_fn, mix_prob=self.cfg.mix_prob), + pin_memory=True, + worker_init_fn=init_fn, + drop_last=len(train_data) > self.cfg.batch_size, + persistent_workers=True, + ) + return train_loader + + def build_val_loader(self): + val_loader = None + if self.cfg.evaluate: + val_data = build_dataset(self.cfg.data.val) + if comm.get_world_size() > 1: + val_sampler = torch.utils.data.distributed.DistributedSampler(val_data) + else: + val_sampler = None + val_loader = torch.utils.data.DataLoader( + val_data, + batch_size=self.cfg.batch_size_val_per_gpu, + shuffle=False, + num_workers=self.cfg.num_worker_per_gpu, + pin_memory=True, + sampler=val_sampler, + collate_fn=collate_fn, + ) + return val_loader + + def build_optimizer(self): + return build_optimizer(self.cfg.optimizer, self.model, self.cfg.param_dicts) + + def build_scheduler(self): + assert hasattr(self, "optimizer") + assert hasattr(self, "train_loader") + self.cfg.scheduler.total_steps = ( + len(self.train_loader) + * self.cfg.eval_epoch + // self.cfg.gradient_accumulation_steps + ) + return build_scheduler(self.cfg.scheduler, self.optimizer) + + def build_scaler(self): + if version.parse(torch.__version__) >= version.parse("2.4"): + grad_scaler = partial(torch.amp.GradScaler, device="cuda") + else: + # deprecated warning + grad_scaler = torch.cuda.amp.GradScaler + scaler = grad_scaler() if self.cfg.enable_amp else None + return scaler + + +@TRAINERS.register_module("MultiDatasetTrainer") +class MultiDatasetTrainer(Trainer): + def build_train_loader(self): + from pointcept.datasets import MultiDatasetDataloader + + train_data = build_dataset(self.cfg.data.train) + train_loader = MultiDatasetDataloader( + train_data, + self.cfg.batch_size_per_gpu, + self.cfg.num_worker_per_gpu, + self.cfg.mix_prob, + self.cfg.seed, + ) + self.comm_info["iter_per_epoch"] = len(train_loader) + return train_loader diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/__init__.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/__init__.py new file mode 100644 index 0000000..0cfbde7 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/__init__.py @@ -0,0 +1,8 @@ +from .builder import build_model +from .default import DefaultSegmentorV2 +from .modules import PointModule, PointModel + +# Backbones +from .point_transformer_v3 import * + + diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/builder.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/builder.py new file mode 100644 index 0000000..8c723d7 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/builder.py @@ -0,0 +1,17 @@ +""" +Model Builder + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import copy +from pointcept.utils.registry import Registry + +MODELS = Registry("models") +MODULES = Registry("modules") + + +def build_model(cfg): + """Build models.""" + return MODELS.build(copy.deepcopy(cfg)) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/default.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/default.py new file mode 100644 index 0000000..087dbfd --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/default.py @@ -0,0 +1,230 @@ +import torch +import torch.nn as nn +# import torch_scatter +# import torch_cluster +from collections import OrderedDict + +from pointcept.models.losses import build_criteria +from pointcept.models.utils.structure import Point +from pointcept.models.utils import offset2batch +from .builder import MODELS, build_model + + +@MODELS.register_module() +class DefaultSegmentor(nn.Module): + def __init__(self, backbone=None, criteria=None): + super().__init__() + self.backbone = build_model(backbone) + self.criteria = build_criteria(criteria) + + def forward(self, input_dict): + if "condition" in input_dict.keys(): + # PPT (https://arxiv.org/abs/2308.09718) + # currently, only support one batch one condition + input_dict["condition"] = input_dict["condition"][0] + seg_logits = self.backbone(input_dict) + # train + if self.training: + loss = self.criteria(seg_logits, input_dict["segment"]) + return dict(loss=loss) + # eval + elif "segment" in input_dict.keys(): + loss = self.criteria(seg_logits, input_dict["segment"]) + return dict(loss=loss, seg_logits=seg_logits) + # test + else: + return dict(seg_logits=seg_logits) + + +@MODELS.register_module() +class DefaultSegmentorV2(nn.Module): + def __init__( + self, + num_classes, + backbone_out_channels, + backbone=None, + criteria=None, + freeze_backbone=False, + ): + super().__init__() + self.seg_head = ( + nn.Linear(backbone_out_channels, num_classes) + if num_classes > 0 + else nn.Identity() + ) + self.backbone = build_model(backbone) + self.criteria = build_criteria(criteria) + self.freeze_backbone = freeze_backbone + if self.freeze_backbone: + for p in self.backbone.parameters(): + p.requires_grad = False + + def forward(self, input_dict, return_point=False): + point = Point(input_dict) + point = self.backbone(point) + # Backbone added after v1.5.0 return Point instead of feat and use DefaultSegmentorV2 + # TODO: remove this part after make all backbone return Point only. + if isinstance(point, Point): + while "pooling_parent" in point.keys(): + assert "pooling_inverse" in point.keys() + parent = point.pop("pooling_parent") + inverse = point.pop("pooling_inverse") + parent.feat = torch.cat([parent.feat, point.feat[inverse]], dim=-1) + point = parent + feat = point.feat + else: + feat = point + seg_logits = self.seg_head(feat) + return_dict = dict() + if return_point: + # PCA evaluator parse feat and coord in point + return_dict["point"] = point + # train + if self.training: + loss = self.criteria(seg_logits, input_dict["segment"]) + return_dict["loss"] = loss + # eval + elif "segment" in input_dict.keys(): + loss = self.criteria(seg_logits, input_dict["segment"]) + return_dict["loss"] = loss + return_dict["seg_logits"] = seg_logits + # test + else: + return_dict["seg_logits"] = seg_logits + return return_dict + + + +# @MODELS.register_module() +# class DINOEnhancedSegmentor(nn.Module): +# def __init__( +# self, +# num_classes, +# backbone_out_channels, +# backbone=None, +# criteria=None, +# freeze_backbone=False, +# ): +# super().__init__() +# self.seg_head = ( +# nn.Linear(backbone_out_channels, num_classes) +# if num_classes > 0 +# else nn.Identity() +# ) +# self.backbone = build_model(backbone) if backbone is not None else None +# self.criteria = build_criteria(criteria) +# self.freeze_backbone = freeze_backbone +# if self.backbone is not None and self.freeze_backbone: +# for p in self.backbone.parameters(): +# p.requires_grad = False + +# def forward(self, input_dict, return_point=False): +# point = Point(input_dict) +# if self.backbone is not None: +# if self.freeze_backbone: +# with torch.no_grad(): +# point = self.backbone(point) +# else: +# point = self.backbone(point) +# point_list = [point] +# while "unpooling_parent" in point_list[-1].keys(): +# point_list.append(point_list[-1].pop("unpooling_parent")) +# for i in reversed(range(1, len(point_list))): +# point = point_list[i] +# parent = point_list[i - 1] +# assert "pooling_inverse" in point.keys() +# inverse = point.pooling_inverse +# parent.feat = torch.cat([parent.feat, point.feat[inverse]], dim=-1) +# point = point_list[0] +# while "pooling_parent" in point.keys(): +# assert "pooling_inverse" in point.keys() +# parent = point.pop("pooling_parent") +# inverse = point.pooling_inverse +# parent.feat = torch.cat([parent.feat, point.feat[inverse]], dim=-1) +# point = parent +# feat = [point.feat] +# else: +# feat = [] +# dino_coord = input_dict["dino_coord"] +# dino_feat = input_dict["dino_feat"] +# dino_offset = input_dict["dino_offset"] +# idx = torch_cluster.knn( +# x=dino_coord, +# y=point.origin_coord, +# batch_x=offset2batch(dino_offset), +# batch_y=offset2batch(point.origin_offset), +# k=1, +# )[1] + +# feat.append(dino_feat[idx]) +# feat = torch.concatenate(feat, dim=-1) +# seg_logits = self.seg_head(feat) +# return_dict = dict() +# if return_point: +# # PCA evaluator parse feat and coord in point +# return_dict["point"] = point +# # train +# if self.training: +# loss = self.criteria(seg_logits, input_dict["segment"]) +# return_dict["loss"] = loss +# # eval +# elif "segment" in input_dict.keys(): +# loss = self.criteria(seg_logits, input_dict["segment"]) +# return_dict["loss"] = loss +# return_dict["seg_logits"] = seg_logits +# # test +# else: +# return_dict["seg_logits"] = seg_logits +# return return_dict + + +# @MODELS.register_module() +# class DefaultClassifier(nn.Module): +# def __init__( +# self, +# backbone=None, +# criteria=None, +# num_classes=40, +# backbone_embed_dim=256, +# ): +# super().__init__() +# self.backbone = build_model(backbone) +# self.criteria = build_criteria(criteria) +# self.num_classes = num_classes +# self.backbone_embed_dim = backbone_embed_dim +# self.cls_head = nn.Sequential( +# nn.Linear(backbone_embed_dim, 256), +# nn.BatchNorm1d(256), +# nn.ReLU(inplace=True), +# nn.Dropout(p=0.5), +# nn.Linear(256, 128), +# nn.BatchNorm1d(128), +# nn.ReLU(inplace=True), +# nn.Dropout(p=0.5), +# nn.Linear(128, num_classes), +# ) + +# def forward(self, input_dict): +# point = Point(input_dict) +# point = self.backbone(point) +# # Backbone added after v1.5.0 return Point instead of feat +# # And after v1.5.0 feature aggregation for classification operated in classifier +# # TODO: remove this part after make all backbone return Point only. +# if isinstance(point, Point): +# point.feat = torch_scatter.segment_csr( +# src=point.feat, +# indptr=nn.functional.pad(point.offset, (1, 0)), +# reduce="mean", +# ) +# feat = point.feat +# else: +# feat = point +# cls_logits = self.cls_head(feat) +# if self.training: +# loss = self.criteria(cls_logits, input_dict["category"]) +# return dict(loss=loss) +# elif "category" in input_dict.keys(): +# loss = self.criteria(cls_logits, input_dict["category"]) +# return dict(loss=loss, cls_logits=cls_logits) +# else: +# return dict(cls_logits=cls_logits) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/losses/__init__.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/losses/__init__.py new file mode 100644 index 0000000..0f4f29c --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/losses/__init__.py @@ -0,0 +1,4 @@ +from .builder import build_criteria, LOSSES + +from .misc import CrossEntropyLoss, SmoothCELoss, DiceLoss, FocalLoss, BinaryFocalLoss +from .lovasz import LovaszLoss diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/losses/builder.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/losses/builder.py new file mode 100644 index 0000000..ef642d9 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/losses/builder.py @@ -0,0 +1,31 @@ +""" +Criteria Builder + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.utils.registry import Registry + +LOSSES = Registry("losses") + + +class Criteria(object): + def __init__(self, cfg=None): + self.cfg = cfg if cfg is not None else [] + self.criteria = [] + for loss_cfg in self.cfg: + self.criteria.append(LOSSES.build(cfg=loss_cfg)) + + def __call__(self, pred, target): + if len(self.criteria) == 0: + # loss computation occur in model + return pred + loss = 0 + for c in self.criteria: + loss += c(pred, target) + return loss + + +def build_criteria(cfg): + return Criteria(cfg) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/losses/lovasz.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/losses/lovasz.py new file mode 100644 index 0000000..690c2ba --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/losses/lovasz.py @@ -0,0 +1,257 @@ +""" +Lovasz Loss +refer https://arxiv.org/abs/1705.08790 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from typing import Optional +from itertools import filterfalse +import torch +import torch.nn.functional as F +from torch.nn.modules.loss import _Loss + +from .builder import LOSSES + +BINARY_MODE: str = "binary" +MULTICLASS_MODE: str = "multiclass" +MULTILABEL_MODE: str = "multilabel" + + +def _lovasz_grad(gt_sorted): + """Compute gradient of the Lovasz extension w.r.t sorted errors + See Alg. 1 in paper + """ + p = len(gt_sorted) + gts = gt_sorted.sum() + intersection = gts - gt_sorted.float().cumsum(0) + union = gts + (1 - gt_sorted).float().cumsum(0) + jaccard = 1.0 - intersection / union + if p > 1: # cover 1-pixel case + jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] + return jaccard + + +def _lovasz_hinge(logits, labels, per_image=True, ignore=None): + """ + Binary Lovasz hinge loss + logits: [B, H, W] Logits at each pixel (between -infinity and +infinity) + labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) + per_image: compute the loss per image instead of per batch + ignore: void class id + """ + if per_image: + loss = mean( + _lovasz_hinge_flat( + *_flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore) + ) + for log, lab in zip(logits, labels) + ) + else: + loss = _lovasz_hinge_flat(*_flatten_binary_scores(logits, labels, ignore)) + return loss + + +def _lovasz_hinge_flat(logits, labels): + """Binary Lovasz hinge loss + Args: + logits: [P] Logits at each prediction (between -infinity and +infinity) + labels: [P] Tensor, binary ground truth labels (0 or 1) + """ + if len(labels) == 0: + # only void pixels, the gradients should be 0 + return logits.sum() * 0.0 + signs = 2.0 * labels.float() - 1.0 + errors = 1.0 - logits * signs + errors_sorted, perm = torch.sort(errors, dim=0, descending=True) + perm = perm.data + gt_sorted = labels[perm] + grad = _lovasz_grad(gt_sorted) + loss = torch.dot(F.relu(errors_sorted), grad) + return loss + + +def _flatten_binary_scores(scores, labels, ignore=None): + """Flattens predictions in the batch (binary case) + Remove labels equal to 'ignore' + """ + scores = scores.view(-1) + labels = labels.view(-1) + if ignore is None: + return scores, labels + valid = labels != ignore + vscores = scores[valid] + vlabels = labels[valid] + return vscores, vlabels + + +def _lovasz_softmax( + probas, labels, classes="present", class_seen=None, per_image=False, ignore=None +): + """Multi-class Lovasz-Softmax loss + Args: + @param probas: [B, C, H, W] Class probabilities at each prediction (between 0 and 1). + Interpreted as binary (sigmoid) output with outputs of size [B, H, W]. + @param labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) + @param classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. + @param per_image: compute the loss per image instead of per batch + @param ignore: void class labels + """ + if per_image: + loss = mean( + _lovasz_softmax_flat( + *_flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), + classes=classes + ) + for prob, lab in zip(probas, labels) + ) + else: + loss = _lovasz_softmax_flat( + *_flatten_probas(probas, labels, ignore), + classes=classes, + class_seen=class_seen + ) + return loss + + +def _lovasz_softmax_flat(probas, labels, classes="present", class_seen=None): + """Multi-class Lovasz-Softmax loss + Args: + @param probas: [P, C] Class probabilities at each prediction (between 0 and 1) + @param labels: [P] Tensor, ground truth labels (between 0 and C - 1) + @param classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. + """ + if probas.numel() == 0: + # only void pixels, the gradients should be 0 + return probas * 0.0 + C = probas.size(1) + losses = [] + class_to_sum = list(range(C)) if classes in ["all", "present"] else classes + # for c in class_to_sum: + for c in labels.unique(): + if class_seen is None: + fg = (labels == c).type_as(probas) # foreground for class c + if classes == "present" and fg.sum() == 0: + continue + if C == 1: + if len(classes) > 1: + raise ValueError("Sigmoid output possible only with 1 class") + class_pred = probas[:, 0] + else: + class_pred = probas[:, c] + errors = (fg - class_pred).abs() + errors_sorted, perm = torch.sort(errors, 0, descending=True) + perm = perm.data + fg_sorted = fg[perm] + losses.append(torch.dot(errors_sorted, _lovasz_grad(fg_sorted))) + else: + if c in class_seen: + fg = (labels == c).type_as(probas) # foreground for class c + if classes == "present" and fg.sum() == 0: + continue + if C == 1: + if len(classes) > 1: + raise ValueError("Sigmoid output possible only with 1 class") + class_pred = probas[:, 0] + else: + class_pred = probas[:, c] + errors = (fg - class_pred).abs() + errors_sorted, perm = torch.sort(errors, 0, descending=True) + perm = perm.data + fg_sorted = fg[perm] + losses.append(torch.dot(errors_sorted, _lovasz_grad(fg_sorted))) + return mean(losses) + + +def _flatten_probas(probas, labels, ignore=None): + """Flattens predictions in the batch""" + if probas.dim() == 3: + # assumes output of a sigmoid layer + B, H, W = probas.size() + probas = probas.view(B, 1, H, W) + + C = probas.size(1) + probas = torch.movedim(probas, 1, -1) # [B, C, Di, Dj, ...] -> [B, Di, Dj, ..., C] + probas = probas.contiguous().view(-1, C) # [P, C] + + labels = labels.view(-1) + if ignore is None: + return probas, labels + valid = labels != ignore + vprobas = probas[valid] + vlabels = labels[valid] + return vprobas, vlabels + + +def isnan(x): + return x != x + + +def mean(values, ignore_nan=False, empty=0): + """Nan-mean compatible with generators.""" + values = iter(values) + if ignore_nan: + values = filterfalse(isnan, values) + try: + n = 1 + acc = next(values) + except StopIteration: + if empty == "raise": + raise ValueError("Empty mean") + return empty + for n, v in enumerate(values, 2): + acc += v + if n == 1: + return acc + return acc / n + + +@LOSSES.register_module() +class LovaszLoss(_Loss): + def __init__( + self, + mode: str, + class_seen: Optional[int] = None, + per_image: bool = False, + ignore_index: Optional[int] = None, + loss_weight: float = 1.0, + ): + """Lovasz loss for segmentation task. + It supports binary, multiclass and multilabel cases + Args: + mode: Loss mode 'binary', 'multiclass' or 'multilabel' + ignore_index: Label that indicates ignored pixels (does not contribute to loss) + per_image: If True loss computed per each image and then averaged, else computed per whole batch + Shape + - **y_pred** - torch.Tensor of shape (N, C, H, W) + - **y_true** - torch.Tensor of shape (N, H, W) or (N, C, H, W) + Reference + https://github.com/BloodAxe/pytorch-toolbelt + """ + assert mode in {BINARY_MODE, MULTILABEL_MODE, MULTICLASS_MODE} + super().__init__() + + self.mode = mode + self.ignore_index = ignore_index + self.per_image = per_image + self.class_seen = class_seen + self.loss_weight = loss_weight + + def forward(self, y_pred, y_true): + if self.mode in {BINARY_MODE, MULTILABEL_MODE}: + loss = _lovasz_hinge( + y_pred, y_true, per_image=self.per_image, ignore=self.ignore_index + ) + elif self.mode == MULTICLASS_MODE: + y_pred = y_pred.softmax(dim=1) + loss = _lovasz_softmax( + y_pred, + y_true, + class_seen=self.class_seen, + per_image=self.per_image, + ignore=self.ignore_index, + ) + else: + raise ValueError("Wrong mode {}.".format(self.mode)) + return loss * self.loss_weight diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/losses/misc.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/losses/misc.py new file mode 100644 index 0000000..ec300a5 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/losses/misc.py @@ -0,0 +1,223 @@ +""" +Misc Losses + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F +from .builder import LOSSES + + +@LOSSES.register_module() +class CrossEntropyLoss(nn.Module): + def __init__( + self, + weight=None, + size_average=None, + reduce=None, + reduction="mean", + label_smoothing=0.0, + loss_weight=1.0, + ignore_index=-1, + ): + super(CrossEntropyLoss, self).__init__() + weight = torch.tensor(weight).cuda() if weight is not None else None + self.loss_weight = loss_weight + self.loss = nn.CrossEntropyLoss( + weight=weight, + size_average=size_average, + ignore_index=ignore_index, + reduce=reduce, + reduction=reduction, + label_smoothing=label_smoothing, + ) + + def forward(self, pred, target): + return self.loss(pred, target) * self.loss_weight + + +@LOSSES.register_module() +class SmoothCELoss(nn.Module): + def __init__(self, smoothing_ratio=0.1): + super(SmoothCELoss, self).__init__() + self.smoothing_ratio = smoothing_ratio + + def forward(self, pred, target): + eps = self.smoothing_ratio + n_class = pred.size(1) + one_hot = torch.zeros_like(pred).scatter(1, target.view(-1, 1), 1) + one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1) + log_prb = F.log_softmax(pred, dim=1) + loss = -(one_hot * log_prb).total(dim=1) + loss = loss[torch.isfinite(loss)].mean() + return loss + + +@LOSSES.register_module() +class BinaryFocalLoss(nn.Module): + def __init__(self, gamma=2.0, alpha=0.5, logits=True, reduce=True, loss_weight=1.0): + """Binary Focal Loss + ` + """ + super(BinaryFocalLoss, self).__init__() + assert 0 < alpha < 1 + self.gamma = gamma + self.alpha = alpha + self.logits = logits + self.reduce = reduce + self.loss_weight = loss_weight + + def forward(self, pred, target, **kwargs): + """Forward function. + Args: + pred (torch.Tensor): The prediction with shape (N) + target (torch.Tensor): The ground truth. If containing class + indices, shape (N) where each value is 0โ‰คtargets[i]โ‰ค1, If containing class probabilities, + same shape as the input. + Returns: + torch.Tensor: The calculated loss + """ + if self.logits: + bce = F.binary_cross_entropy_with_logits(pred, target, reduction="none") + else: + bce = F.binary_cross_entropy(pred, target, reduction="none") + pt = torch.exp(-bce) + alpha = self.alpha * target + (1 - self.alpha) * (1 - target) + focal_loss = alpha * (1 - pt) ** self.gamma * bce + + if self.reduce: + focal_loss = torch.mean(focal_loss) + return focal_loss * self.loss_weight + + +@LOSSES.register_module() +class FocalLoss(nn.Module): + def __init__( + self, gamma=2.0, alpha=0.5, reduction="mean", loss_weight=1.0, ignore_index=-1 + ): + """Focal Loss + ` + """ + super(FocalLoss, self).__init__() + assert reduction in ( + "mean", + "sum", + ), "AssertionError: reduction should be 'mean' or 'sum'" + assert isinstance( + alpha, (float, list) + ), "AssertionError: alpha should be of type float" + assert isinstance(gamma, float), "AssertionError: gamma should be of type float" + assert isinstance( + loss_weight, float + ), "AssertionError: loss_weight should be of type float" + assert isinstance(ignore_index, int), "ignore_index must be of type int" + self.gamma = gamma + self.alpha = alpha + self.reduction = reduction + self.loss_weight = loss_weight + self.ignore_index = ignore_index + + def forward(self, pred, target, **kwargs): + """Forward function. + Args: + pred (torch.Tensor): The prediction with shape (N, C) where C = number of classes. + target (torch.Tensor): The ground truth. If containing class + indices, shape (N) where each value is 0โ‰คtargets[i]โ‰คCโˆ’1, If containing class probabilities, + same shape as the input. + Returns: + torch.Tensor: The calculated loss + """ + # [B, C, d_1, d_2, ..., d_k] -> [C, B, d_1, d_2, ..., d_k] + pred = pred.transpose(0, 1) + # [C, B, d_1, d_2, ..., d_k] -> [C, N] + pred = pred.reshape(pred.size(0), -1) + # [C, N] -> [N, C] + pred = pred.transpose(0, 1).contiguous() + # (B, d_1, d_2, ..., d_k) --> (B * d_1 * d_2 * ... * d_k,) + target = target.view(-1).contiguous() + assert pred.size(0) == target.size( + 0 + ), "The shape of pred doesn't match the shape of target" + valid_mask = target != self.ignore_index + target = target[valid_mask] + pred = pred[valid_mask] + + if len(target) == 0: + return 0.0 + + num_classes = pred.size(1) + target = F.one_hot(target, num_classes=num_classes) + + alpha = self.alpha + if isinstance(alpha, list): + alpha = pred.new_tensor(alpha) + pred_sigmoid = pred.sigmoid() + target = target.type_as(pred) + one_minus_pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) + focal_weight = (alpha * target + (1 - alpha) * (1 - target)) * one_minus_pt.pow( + self.gamma + ) + + loss = ( + F.binary_cross_entropy_with_logits(pred, target, reduction="none") + * focal_weight + ) + if self.reduction == "mean": + loss = loss.mean() + elif self.reduction == "sum": + loss = loss.total() + return self.loss_weight * loss + + +@LOSSES.register_module() +class DiceLoss(nn.Module): + def __init__(self, smooth=1, exponent=2, loss_weight=1.0, ignore_index=-1): + """DiceLoss. + This loss is proposed in `V-Net: Fully Convolutional Neural Networks for + Volumetric Medical Image Segmentation `_. + """ + super(DiceLoss, self).__init__() + self.smooth = smooth + self.exponent = exponent + self.loss_weight = loss_weight + self.ignore_index = ignore_index + + def forward(self, pred, target, **kwargs): + # [B, C, d_1, d_2, ..., d_k] -> [C, B, d_1, d_2, ..., d_k] + pred = pred.transpose(0, 1) + # [C, B, d_1, d_2, ..., d_k] -> [C, N] + pred = pred.reshape(pred.size(0), -1) + # [C, N] -> [N, C] + pred = pred.transpose(0, 1).contiguous() + # (B, d_1, d_2, ..., d_k) --> (B * d_1 * d_2 * ... * d_k,) + target = target.view(-1).contiguous() + assert pred.size(0) == target.size( + 0 + ), "The shape of pred doesn't match the shape of target" + valid_mask = target != self.ignore_index + target = target[valid_mask] + pred = pred[valid_mask] + + pred = F.softmax(pred, dim=1) + num_classes = pred.shape[1] + target = F.one_hot( + torch.clamp(target.long(), 0, num_classes - 1), num_classes=num_classes + ) + + total_loss = 0 + for i in range(num_classes): + if i != self.ignore_index: + num = torch.sum(torch.mul(pred[:, i], target[:, i])) * 2 + self.smooth + den = ( + torch.sum( + pred[:, i].pow(self.exponent) + target[:, i].pow(self.exponent) + ) + + self.smooth + ) + dice_loss = 1 - num / den + total_loss += dice_loss + loss = total_loss / num_classes + return self.loss_weight * loss diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/modules.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/modules.py new file mode 100644 index 0000000..0ec8fbd --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/modules.py @@ -0,0 +1,120 @@ +import sys +import torch.nn as nn +import spconv.pytorch as spconv + +try: + import ocnn +except ImportError: + ocnn = None + +from collections import OrderedDict +from pointcept.models.utils.structure import Point +from pointcept.engines.hooks import HookBase + + +def is_ocnn_module(module): + if ocnn is not None: + ocnn_modules = ( + ocnn.nn.OctreeConv, + ocnn.nn.OctreeDeconv, + ocnn.nn.OctreeGroupConv, + ocnn.nn.OctreeDWConv, + ) + return isinstance(module, ocnn_modules) + else: + return False + + +class PointModule(nn.Module): + r"""PointModule + placeholder, all module subclass from this will take Point in PointSequential. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + +class PointSequential(PointModule): + r"""A sequential container. + Modules will be added to it in the order they are passed in the constructor. + Alternatively, an ordered dict of modules can also be passed in. + """ + + def __init__(self, *args, **kwargs): + super().__init__() + if len(args) == 1 and isinstance(args[0], OrderedDict): + for key, module in args[0].items(): + self.add_module(key, module) + else: + for idx, module in enumerate(args): + self.add_module(str(idx), module) + for name, module in kwargs.items(): + if sys.version_info < (3, 6): + raise ValueError("kwargs only supported in py36+") + if name in self._modules: + raise ValueError("name exists.") + self.add_module(name, module) + + def __getitem__(self, idx): + if not (-len(self) <= idx < len(self)): + raise IndexError("index {} is out of range".format(idx)) + if idx < 0: + idx += len(self) + it = iter(self._modules.values()) + for i in range(idx): + next(it) + return next(it) + + def __len__(self): + return len(self._modules) + + def add(self, module, name=None): + if name is None: + name = str(len(self._modules)) + if name in self._modules: + raise KeyError("name exists") + self.add_module(name, module) + + def forward(self, input): + for k, module in self._modules.items(): + # Point module + if isinstance(module, PointModule): + input = module(input) + # Spconv module + elif spconv.modules.is_spconv_module(module): + if isinstance(input, Point): + input.sparse_conv_feat = module(input.sparse_conv_feat) + input.feat = input.sparse_conv_feat.features + else: + input = module(input) + elif is_ocnn_module(module): + if isinstance(input, Point): + input.octree.features[-1] = module( + input.feat[input.octree_order], input.octree, input.octree.depth + ) + input.feat = input.octree.features[-1][input.octree_inverse] + else: + input = module(input) + # PyTorch module + else: + if isinstance(input, Point): + input.feat = module(input.feat) + if "sparse_conv_feat" in input.keys(): + input.sparse_conv_feat = input.sparse_conv_feat.replace_feature( + input.feat + ) + elif isinstance(input, spconv.SparseConvTensor): + if input.indices.shape[0] != 0: + input = input.replace_feature(module(input.features)) + else: + input = module(input) + return input + + +class PointModel(PointModule, HookBase): + r"""PointModel + placeholder, PointModel can be customized as a Pointcept hook. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/__init__.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/__init__.py new file mode 100644 index 0000000..2fd471d --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/__init__.py @@ -0,0 +1,3 @@ +from .point_transformer_v3m1_base import * +from .point_transformer_v3m2_sonata import * +from .point_transformer_v3m1_fvdb import * diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/fvdb_utils.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/fvdb_utils.py new file mode 100644 index 0000000..920afa5 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/fvdb_utils.py @@ -0,0 +1,333 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + +""" +Utility functions for using fVDB in Point Transformer V3 +""" + +from typing import Any, cast + +import fvdb +import torch + +# Add NVTX import for profiling +try: + import torch.cuda.nvtx as nvtx + + NVTX_AVAILABLE = True +except ImportError: + NVTX_AVAILABLE = False + + class DummyNVTX: + def range_push(self, msg): + pass + + def range_pop(self): + pass + + nvtx = DummyNVTX() + + +class NVTXRange: + """ + Context manager for NVTX range push/pop. + Enables usage: + with NVTXRange("msg"): + ... + to automatically push and pop NVTX profiling ranges. + If NVTX is unavailable, this is a no-op. + """ + + def __init__(self, msg): + self.msg = msg + + def __enter__(self): + nvtx.range_push(self.msg) + + def __exit__(self, exc_type, exc_val, exc_tb): + nvtx.range_pop() + + +def jagged_cumulative_argsort(unsorted_jt: fvdb.JaggedTensor) -> fvdb.JaggedTensor: + sorted_indices = torch.empty(unsorted_jt.rshape[0], dtype=torch.long, device=unsorted_jt.device) + offset = 0 + for t_i in unsorted_jt: + num_elements = t_i.rshape[0] + if num_elements == 0: + continue + indices = torch.argsort(t_i.jdata) + sorted_indices[offset : offset + num_elements] = indices + offset + offset += num_elements + return unsorted_jt.jagged_like(sorted_indices) + + +def morton_from_jagged_ijk(jagged_ijk: fvdb.JaggedTensor) -> fvdb.JaggedTensor: + ijk_j: torch.Tensor = jagged_ijk.jdata + kji_j = ijk_j[:, [2, 1, 0]].contiguous() + morton_j = fvdb.morton(kji_j) + return jagged_ijk.jagged_like(morton_j) + + +def morton_flipped_from_jagged_ijk(jagged_ijk: fvdb.JaggedTensor) -> fvdb.JaggedTensor: + ijk_j: torch.Tensor = jagged_ijk.jdata + # kji_j = ijk_j.flip(dims=[-1]) + kij_j = ijk_j[:, [2, 0, 1]].contiguous() + morton_j = fvdb.morton(kij_j) + return jagged_ijk.jagged_like(morton_j) + + +def hilbert_from_jagged_ijk(jagged_ijk: fvdb.JaggedTensor) -> fvdb.JaggedTensor: + ijk_j: torch.Tensor = jagged_ijk.jdata + # ijk_j = ijk_j[:, [0, 1, 2]].contiguous() + # kji_j = ijk_j[:, [2, 1, 0]].contiguous() + # jik_j = ijk_j[:, [1, 0, 2]].contiguous() + # ikj_j = ijk_j[:, [0, 2, 1]].contiguous() + jki_j = ijk_j[:, [1, 2, 0]].contiguous() + # kij_j = ijk_j[:, [2, 0, 1]].contiguous() + # import pdb; pdb.set_trace() + hilbert_j = fvdb.hilbert(jki_j) + return jagged_ijk.jagged_like(hilbert_j) + + +def hilbert_flipped_from_jagged_ijk(jagged_ijk: fvdb.JaggedTensor) -> fvdb.JaggedTensor: + ijk_j: torch.Tensor = jagged_ijk.jdata + # kij_j = ijk_j[:, [2, 0, 1]].contiguous() + # jik_j = ijk_j[:, [1, 0, 2]].contiguous() + # jik_j_trans = ijk_j[:, [1, 2, 0]][:, [1, 0, 2]].contiguous() + ikj_j = ijk_j[:, [0, 2, 1]].contiguous() + hilbert_j = fvdb.hilbert(ikj_j) + return jagged_ijk.jagged_like(hilbert_j) + + +def identity_from_jagged_ijk(jagged_ijk: fvdb.JaggedTensor) -> fvdb.JaggedTensor: + # We don't need to individually separate out the jaggedness. + ijk_j = jagged_ijk.jdata + count = ijk_j.shape[0] + identity_j = torch.arange(count, device=ijk_j.device, dtype=torch.int64) + return jagged_ijk.jagged_like(identity_j) + + +def space_filling_curve_from_jagged_ijk(jagged_ijk: fvdb.JaggedTensor, curve_type: str) -> fvdb.JaggedTensor: + match curve_type: + case "morton" | "z": + return morton_from_jagged_ijk(jagged_ijk) + case "morton_zyx" | "z-trans": + return morton_flipped_from_jagged_ijk(jagged_ijk) + case "hilbert": + return hilbert_from_jagged_ijk(jagged_ijk) + case "hilbert-trans": + return hilbert_flipped_from_jagged_ijk(jagged_ijk) + case "vdb" | "identity": + return identity_from_jagged_ijk(jagged_ijk) + case _: + raise ValueError(f"Unsupported curve type: {curve_type}") + + +# Source: perm: torch.Tensor | None = None +# if order_type != "vdb": +# perm_jagged = space_filling_curve_from_jagged_ijk(grid.ijk, order_type) +# perm = perm_jagged.jdata.squeeze(-1) # [num_voxels] +# # Use torch.gather for permutation: expand perm to match feats_j dimensions +# perm_expanded = perm.unsqueeze(-1).expand(-1, feats_j.shape[-1]) # [num_voxels, hidden_size] +# feats_j = torch.gather(feats_j, 0, perm_expanded) +# feats = feats.jagged_like(feats_j) + + +def order_features_from_jagged_ijk( + jagged_feats: fvdb.JaggedTensor, jagged_ijk: fvdb.JaggedTensor, order_type: str +) -> tuple[fvdb.JaggedTensor, fvdb.JaggedTensor]: + curve_values = space_filling_curve_from_jagged_ijk(jagged_ijk, order_type) + # Argsort the curve values to get permutation indices (sort within each batch) + permutation = jagged_cumulative_argsort(curve_values) + # import pdb; pdb.set_trace() + # Expand permutation to match feature dimensions for torch.gather + perm_expanded = permutation.jdata.unsqueeze(-1).expand(-1, jagged_feats.jdata.shape[-1]) + feats_reodered = jagged_feats.jagged_like(torch.gather(jagged_feats.jdata, 0, perm_expanded)) + return feats_reodered, permutation + + +# Restore jagged features to their original ordering using the provided permutation. +# orderd_jagged_feats: fvdb.JaggedTensor with permuted features +# permutation: fvdb.JaggedTensor with indices such that orderd_jagged_feats.jdata = feats_jdata[permutation.jdata] +# We need to invert this permutation so that output[permutation[j]] = orderd_jagged_feats[j] +def inverse_order_features_from_perm( + orderd_jagged_feats: fvdb.JaggedTensor, permutation: fvdb.JaggedTensor +) -> fvdb.JaggedTensor: + # permutation.jdata: shape [N], dtype torch.long + perm = permutation.jdata + inverse_perm = torch.empty_like(perm) + inverse_perm[perm] = torch.arange(perm.shape[0], device=perm.device) + # Now restore the original order using gather: + feats_jdata = orderd_jagged_feats.jdata + restored_feats = torch.gather(feats_jdata, 0, inverse_perm.unsqueeze(-1).expand_as(feats_jdata)) + return orderd_jagged_feats.jagged_like(restored_feats) + + +class FVDBGridModule(torch.nn.Module): + """ + Base class for modules that operate on (JaggedTensor, GridBatch) -> JaggedTensor. + + Provides a typed __call__ override so static type checkers can verify + argument types and order when calling the module. + """ + + def __call__(self, feats: fvdb.JaggedTensor, grid: fvdb.GridBatch) -> fvdb.JaggedTensor: + """Override __call__ to preserve type hints from forward.""" + assert isinstance(feats, fvdb.JaggedTensor), "Input feats must be a JaggedTensor" + assert isinstance(grid, fvdb.GridBatch), "Input grid must be a GridBatch" + return super().__call__(feats, grid) + + def forward(self, feats: fvdb.JaggedTensor, grid: fvdb.GridBatch) -> fvdb.JaggedTensor: + assert isinstance(feats, fvdb.JaggedTensor), "Input feats must be a JaggedTensor" + assert isinstance(grid, fvdb.GridBatch), "Input grid must be a GridBatch" + raise NotImplementedError("Subclasses must implement forward()") + + +class FVDBJaggedModule(torch.nn.Module): + """ + Base class for modules that operate on JaggedTensor -> JaggedTensor. + + Provides a typed __call__ override so static type checkers can verify + argument types and order when calling the module. + """ + + def __call__(self, feats: fvdb.JaggedTensor) -> fvdb.JaggedTensor: + """Override __call__ to preserve type hints from forward.""" + assert isinstance(feats, fvdb.JaggedTensor), "Input jagged_tensor must be a JaggedTensor" + return super().__call__(feats) + + def forward(self, feats: fvdb.JaggedTensor) -> fvdb.JaggedTensor: + assert isinstance(feats, fvdb.JaggedTensor), "Input jagged_tensor must be a JaggedTensor" + raise NotImplementedError("Subclasses must implement forward()") + + +class FVDBJaggedWrapper(FVDBJaggedModule): + """ + Wrap a standard `torch.nn.Module` so it operates on `fvdb.JaggedTensor`s. + + The wrapped module is stored as `self.module` and is always called on + the `.jdata` of the incoming `JaggedTensor`. The output is wrapped back + into a `JaggedTensor` with the same jagged structure. + """ + + def __init__(self, module: torch.nn.Module): + super().__init__() + self.module = module + + def __call__(self, jagged_tensor: fvdb.JaggedTensor) -> fvdb.JaggedTensor: + """Override __call__ to preserve type hints from forward.""" + assert isinstance(jagged_tensor, fvdb.JaggedTensor), "Input jagged_tensor must be a JaggedTensor" + return super().__call__(jagged_tensor) + + def forward(self, jagged_tensor: fvdb.JaggedTensor) -> fvdb.JaggedTensor: + assert isinstance(jagged_tensor, fvdb.JaggedTensor), "Input jagged_tensor must be a JaggedTensor" + new_jdata = self.module(jagged_tensor.jdata) + return jagged_tensor.jagged_like(new_jdata) + + +# Alias to keep call sites concise (e.g., `self.fc1 = FJTM(nn.Linear(...))`) +FJTM = FVDBJaggedWrapper + + +def jagged_attention( + feats: fvdb.JaggedTensor, + qkv: fvdb.JaggedTensor, + *, + hidden_size: int, + num_heads: int, + head_dim: int, + patch_size: int, + sliding_window_attention: bool, + scale: float, +) -> fvdb.JaggedTensor: + + if (sliding_window_attention and patch_size > 0) or (patch_size > 0): + try: + import flash_attn + except ImportError: + raise RuntimeError( + "flash_attn is required for jagged_attention with " + "sliding_window_attention or patch_size > 0. " + "Install with: pip install flash-attn" + ) + + qkv_j = qkv.jdata + feats_j = feats.jdata + + if sliding_window_attention and patch_size > 0: + # Perform sliding window attention per-grid using flash attention + num_voxels = feats_j.shape[0] + H = num_heads + D = head_dim + offsets = feats.joffsets.to(device=qkv.device, dtype=torch.int64) + outputs = [] + for b in range(offsets.numel() - 1): + start = int(offsets[b].item()) + end = int(offsets[b + 1].item()) + Li = end - start + if Li <= 0: + continue + qkv_b = qkv_j[start:end].view(1, Li, 3, H, D) + window_size = (patch_size // 2, patch_size // 2) + out_b = cast( + Any, + flash_attn.flash_attn_qkvpacked_func( + qkv_b.half(), dropout_p=0.0, softmax_scale=scale, window_size=window_size + ), + ).reshape( + Li, hidden_size + ) # dtype: float16 + outputs.append(out_b) + if len(outputs) == 0: + feats_out_j = torch.empty_like(qkv_j[:, :hidden_size]) + else: + feats_out_j = torch.cat(outputs, dim=0) + + feats_out_j = feats_out_j.to(feats_j.dtype) + + elif patch_size > 0: + # Perform attention within each patch_size window per-grid using varlen API + num_voxels = feats_j.shape[0] + H = num_heads + D = head_dim + qkv_j = qkv_j.view(-1, 3, H, D) # (num_voxels, 3, num_heads, head_dim) + + # Build cu_seqlens as concatenation of per-grid patches so we never cross grid boundaries + offsets = feats.joffsets.to(device=qkv_j.device, dtype=torch.int64) + lengths = [] + for b in range(offsets.numel() - 1): + start = int(offsets[b].item()) + end = int(offsets[b + 1].item()) + Li = end - start + if Li <= 0: + continue + full = Li // patch_size + rem = Li % patch_size + if full > 0: + lengths.extend([patch_size] * full) + if rem > 0: + lengths.append(rem) + if len(lengths) == 0: + feats_out_j = torch.empty((0, hidden_size), device=qkv_j.device, dtype=feats_j.dtype) + else: + cu_seqlens = torch.zeros(len(lengths) + 1, device=qkv.device, dtype=torch.int32) + cu_seqlens[1:] = torch.as_tensor(lengths, device=qkv.device, dtype=torch.int32).cumsum(dim=0) + + feats_out_j = cast( + Any, + flash_attn.flash_attn_varlen_qkvpacked_func( + # qkv_j.half(), + qkv_j.to(dtype=torch.bfloat16), + cu_seqlens, + max_seqlen=patch_size, + dropout_p=0.0, # TODO: implement attention dropout in the future. By default, it is 0. + softmax_scale=scale, + ), + ).reshape( + num_voxels, hidden_size + ) # dtype: bfloat16 + + feats_out_j = feats_out_j.to(feats_j.dtype) + else: + feats_out_j = qkv_j[:, :hidden_size].contiguous() + return feats.jagged_like(feats_out_j) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py new file mode 100644 index 0000000..dbeefdb --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py @@ -0,0 +1,797 @@ +""" +Point Transformer - V3 Mode1 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from functools import partial +from addict import Dict +import math +import torch +import torch.nn as nn +import spconv.pytorch as spconv +import torch_scatter +from timm.layers import DropPath + +try: + import flash_attn +except ImportError: + flash_attn = None + +# Add NVTX import for profiling +try: + import torch.cuda.nvtx as nvtx + NVTX_AVAILABLE = True +except ImportError: + NVTX_AVAILABLE = False + class DummyNVTX: + def range_push(self, msg): + pass + def range_pop(self): + pass + nvtx = DummyNVTX() + +# from pointcept.models.point_prompt_training import PDNorm +from pointcept.models.builder import MODELS +from pointcept.models.utils.misc import offset2bincount +from pointcept.models.utils.structure import Point +from pointcept.models.modules import PointModule, PointSequential + + +class NVTXRange: + """ + Context manager for NVTX range push/pop. + Enables usage: + with NVTXRange("msg"): + ... + to automatically push and pop NVTX profiling ranges. + If NVTX is unavailable, this is a no-op. + """ + def __init__(self, msg): + self.msg = msg + + def __enter__(self): + nvtx.range_push(self.msg) + + def __exit__(self, exc_type, exc_val, exc_tb): + nvtx.range_pop() + + +class RPE(torch.nn.Module): + def __init__(self, patch_size, num_heads): + super().__init__() + self.patch_size = patch_size + self.num_heads = num_heads + self.pos_bnd = int((4 * patch_size) ** (1 / 3) * 2) + self.rpe_num = 2 * self.pos_bnd + 1 + self.rpe_table = torch.nn.Parameter(torch.zeros(3 * self.rpe_num, num_heads)) + torch.nn.init.trunc_normal_(self.rpe_table, std=0.02) + + def forward(self, coord): + idx = ( + coord.clamp(-self.pos_bnd, self.pos_bnd) # clamp into bnd + + self.pos_bnd # relative position to positive index + + torch.arange(3, device=coord.device) * self.rpe_num # x, y, z stride + ) + out = self.rpe_table.index_select(0, idx.reshape(-1)) + out = out.view(idx.shape + (-1,)).sum(3) + out = out.permute(0, 3, 1, 2) # (N, K, K, H) -> (N, H, K, K) + return out + + +class SerializedAttention(PointModule): + def __init__( + self, + channels, + num_heads, + patch_size, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + order_index=0, + enable_rpe=False, + enable_flash=True, + upcast_attention=True, + upcast_softmax=True, + ): + super().__init__() + assert channels % num_heads == 0 + self.channels = channels + self.num_heads = num_heads + self.scale = qk_scale or (channels // num_heads) ** -0.5 + self.order_index = order_index + self.upcast_attention = upcast_attention + self.upcast_softmax = upcast_softmax + self.enable_rpe = enable_rpe + self.enable_flash = enable_flash + if enable_flash: + assert ( + enable_rpe is False + ), "Set enable_rpe to False when enable Flash Attention" + assert ( + upcast_attention is False + ), "Set upcast_attention to False when enable Flash Attention" + assert ( + upcast_softmax is False + ), "Set upcast_softmax to False when enable Flash Attention" + assert flash_attn is not None, "Make sure flash_attn is installed." + self.patch_size = patch_size + self.attn_drop = attn_drop + else: + # when disable flash attention, we still don't want to use mask + # consequently, patch size will auto set to the + # min number of patch_size_max and number of points + self.patch_size_max = patch_size + self.patch_size = 0 + self.attn_drop = torch.nn.Dropout(attn_drop) + + self.qkv = torch.nn.Linear(channels, channels * 3, bias=qkv_bias) + self.proj = torch.nn.Linear(channels, channels) + self.proj_drop = torch.nn.Dropout(proj_drop) + self.softmax = torch.nn.Softmax(dim=-1) + self.rpe = RPE(patch_size, num_heads) if self.enable_rpe else None + + @torch.no_grad() + def get_rel_pos(self, point, order): + K = self.patch_size + rel_pos_key = f"rel_pos_{self.order_index}" + if rel_pos_key not in point.keys(): + grid_coord = point.grid_coord[order] + grid_coord = grid_coord.reshape(-1, K, 3) + point[rel_pos_key] = grid_coord.unsqueeze(2) - grid_coord.unsqueeze(1) + return point[rel_pos_key] + + @torch.no_grad() + def get_padding_and_inverse(self, point): + pad_key = "pad" + unpad_key = "unpad" + cu_seqlens_key = "cu_seqlens_key" + if ( + pad_key not in point.keys() + or unpad_key not in point.keys() + or cu_seqlens_key not in point.keys() + ): + offset = point.offset + bincount = offset2bincount(offset) + bincount_pad = ( + torch.div( + bincount + self.patch_size - 1, + self.patch_size, + rounding_mode="trunc", + ) + * self.patch_size + ) + # only pad point when num of points larger than patch_size + mask_pad = bincount > self.patch_size + bincount_pad = ~mask_pad * bincount + mask_pad * bincount_pad + _offset = nn.functional.pad(offset, (1, 0)) + _offset_pad = nn.functional.pad(torch.cumsum(bincount_pad, dim=0), (1, 0)) + pad = torch.arange(_offset_pad[-1], device=offset.device) + unpad = torch.arange(_offset[-1], device=offset.device) + cu_seqlens = [] + for i in range(len(offset)): + unpad[_offset[i] : _offset[i + 1]] += _offset_pad[i] - _offset[i] + if bincount[i] != bincount_pad[i]: + pad[ + _offset_pad[i + 1] + - self.patch_size + + (bincount[i] % self.patch_size) : _offset_pad[i + 1] + ] = pad[ + _offset_pad[i + 1] + - 2 * self.patch_size + + (bincount[i] % self.patch_size) : _offset_pad[i + 1] + - self.patch_size + ] + pad[_offset_pad[i] : _offset_pad[i + 1]] -= _offset_pad[i] - _offset[i] + cu_seqlens.append( + torch.arange( + _offset_pad[i], + _offset_pad[i + 1], + step=self.patch_size, + dtype=torch.int32, + device=offset.device, + ) + ) + point[pad_key] = pad + point[unpad_key] = unpad + point[cu_seqlens_key] = nn.functional.pad( + torch.concat(cu_seqlens), (0, 1), value=_offset_pad[-1] + ) + return point[pad_key], point[unpad_key], point[cu_seqlens_key] + + def forward(self, point): + with NVTXRange("PTV3_Attention"): + if not self.enable_flash: + self.patch_size = min( + offset2bincount(point.offset).min().tolist(), self.patch_size_max + ) + + H = self.num_heads + K = self.patch_size + C = self.channels + + pad, unpad, cu_seqlens = self.get_padding_and_inverse(point) + order = point.serialized_order[self.order_index][pad] + inverse = unpad[point.serialized_inverse[self.order_index]] + # import pdb; pdb.set_trace() + # padding and reshape feat and batch for serialized point patch + qkv = self.qkv(point.feat)[order] + # import pdb; pdb.set_trace() + if not self.enable_flash: + # encode and reshape qkv: (N', K, 3, H, C') => (3, N', H, K, C') + q, k, v = ( + qkv.reshape(-1, K, 3, H, C // H).permute(2, 0, 3, 1, 4).unbind(dim=0) + ) + # attn + if self.upcast_attention: + q = q.float() + k = k.float() + attn = (q * self.scale) @ k.transpose(-2, -1) # (N', H, K, K) + if self.enable_rpe: + attn = attn + self.rpe(self.get_rel_pos(point, order)) + if self.upcast_softmax: + attn = attn.float() + attn = self.softmax(attn) + attn = self.attn_drop(attn).to(qkv.dtype) + feat = (attn @ v).transpose(1, 2).reshape(-1, C) + else: + feat = flash_attn.flash_attn_varlen_qkvpacked_func( + qkv.to(torch.bfloat16).reshape(-1, 3, H, C // H), + cu_seqlens, + max_seqlen=self.patch_size, + dropout_p=self.attn_drop if self.training else 0, + softmax_scale=self.scale, + ).reshape(-1, C) + feat = feat.to(qkv.dtype) + + # import pdb; pdb.set_trace() + feat = feat[inverse] + + # ffn + feat = self.proj(feat) + feat = self.proj_drop(feat) + point.feat = feat + return point + + +class MLP(nn.Module): + def __init__( + self, + in_channels, + hidden_channels=None, + out_channels=None, + act_layer=nn.GELU, + drop=0.0, + ): + super().__init__() + out_channels = out_channels or in_channels + hidden_channels = hidden_channels or in_channels + self.fc1 = nn.Linear(in_channels, hidden_channels) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_channels, out_channels) + self.drop = nn.Dropout(drop) + + def forward(self, x): + with NVTXRange("PTV3_MLP"): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Block(PointModule): + def __init__( + self, + channels, + num_heads, + patch_size=48, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + norm_layer=nn.LayerNorm, + act_layer=nn.GELU, + pre_norm=True, + order_index=0, + cpe_indice_key=None, + enable_rpe=False, + enable_flash=True, + upcast_attention=True, + upcast_softmax=True, + no_conv_in_cpe=False, + ): + super().__init__() + self.channels = channels + self.pre_norm = pre_norm + + # Build CPE layers based on no_conv_in_cpe flag + cpe_layers = [] + if not no_conv_in_cpe: + cpe_layers.append( + spconv.SubMConv3d( + channels, + channels, + kernel_size=3, + bias=True, + indice_key=cpe_indice_key, + ) + ) + cpe_layers.extend([ + nn.Linear(channels, channels), + norm_layer(channels), + ]) + self.cpe = PointSequential(*cpe_layers) + + self.norm1 = PointSequential(norm_layer(channels)) + self.attn = SerializedAttention( + channels=channels, + patch_size=patch_size, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + order_index=order_index, + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ) # temporary disable attention + self.norm2 = PointSequential(norm_layer(channels)) + self.mlp = PointSequential( + MLP( + in_channels=channels, + hidden_channels=int(channels * mlp_ratio), + out_channels=channels, + act_layer=act_layer, + drop=proj_drop, + ) + ) + self.drop_path = PointSequential( + DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + ) + + def forward(self, point: Point): + with NVTXRange("PTV3_Block"): + shortcut = point.feat + with NVTXRange("PTV3_CPE"): + point = self.cpe(point) + # import pdb; pdb.set_trace() + point.feat = shortcut + point.feat + shortcut = point.feat + if self.pre_norm: + point = self.norm1(point) + point = self.drop_path(self.attn(point)) # temporary disable attention + # point = self.drop_path(point) # temporary disable attention + # import pdb; pdb.set_trace() + point.feat = shortcut + point.feat + if not self.pre_norm: + point = self.norm1(point) + + shortcut = point.feat + if self.pre_norm: + point = self.norm2(point) + point = self.drop_path(self.mlp(point)) + point.feat = shortcut + point.feat + if not self.pre_norm: + point = self.norm2(point) + point.sparse_conv_feat = point.sparse_conv_feat.replace_feature(point.feat) + # import pdb; pdb.set_trace() + return point + + +class SerializedPooling(PointModule): + def __init__( + self, + in_channels, + out_channels, + stride=2, + norm_layer=None, + act_layer=None, + reduce="max", + shuffle_orders=True, + traceable=True, # record parent and cluster + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + + assert stride == 2 ** (math.ceil(stride) - 1).bit_length() # 2, 4, 8 + # TODO: add support to grid pool (any stride) + self.stride = stride + assert reduce in ["sum", "mean", "min", "max"] + self.reduce = reduce + self.shuffle_orders = shuffle_orders + self.traceable = traceable + + self.proj = nn.Linear(in_channels, out_channels) + if norm_layer is not None: + self.norm = PointSequential(norm_layer(out_channels)) + if act_layer is not None: + self.act = PointSequential(act_layer()) + + def forward(self, point: Point): + with NVTXRange("PTV3_Pooling"): + pooling_depth = (math.ceil(self.stride) - 1).bit_length() + if pooling_depth > point.serialized_depth: + pooling_depth = 0 + assert { + "serialized_code", + "serialized_order", + "serialized_inverse", + "serialized_depth", + }.issubset( + point.keys() + ), "Run point.serialization() point cloud before SerializedPooling" + + code = point.serialized_code >> pooling_depth * 3 + code_, cluster, counts = torch.unique( + code[0], + sorted=True, + return_inverse=True, + return_counts=True, + ) + # indices of point sorted by cluster, for torch_scatter.segment_csr + _, indices = torch.sort(cluster) + # index pointer for sorted point, for torch_scatter.segment_csr + idx_ptr = torch.cat([counts.new_zeros(1), torch.cumsum(counts, dim=0)]) + # head_indices of each cluster, for reduce attr e.g. code, batch + head_indices = indices[idx_ptr[:-1]] + # generate down code, order, inverse + code = code[:, head_indices] + order = torch.argsort(code) + inverse = torch.zeros_like(order).scatter_( + dim=1, + index=order, + src=torch.arange(0, code.shape[1], device=order.device).repeat( + code.shape[0], 1 + ), + ) + + if self.shuffle_orders: + perm = torch.randperm(code.shape[0]) + code = code[perm] + order = order[perm] + inverse = inverse[perm] + new_order_perm = [point.order_perm[i] for i in perm] + + # collect information + point_dict = Dict( + feat=torch_scatter.segment_csr( + self.proj(point.feat)[indices], idx_ptr, reduce=self.reduce + ), + coord=torch_scatter.segment_csr( + point.coord[indices], idx_ptr, reduce="mean" + ), + grid_coord=point.grid_coord[head_indices] >> pooling_depth, + serialized_code=code, + serialized_order=order, + serialized_inverse=inverse, + serialized_depth=point.serialized_depth - pooling_depth, + batch=point.batch[head_indices], + order_perm=new_order_perm, + ) + + if "condition" in point.keys(): + point_dict["condition"] = point.condition + if "context" in point.keys(): + point_dict["context"] = point.context + + if self.traceable: + point_dict["pooling_inverse"] = cluster + point_dict["pooling_parent"] = point + point = Point(point_dict) + if self.norm is not None: + point = self.norm(point) + if self.act is not None: + point = self.act(point) + point.sparsify() + return point + + +class SerializedUnpooling(PointModule): + def __init__( + self, + in_channels, + skip_channels, + out_channels, + norm_layer=None, + act_layer=None, + traceable=False, # record parent and cluster + ): + super().__init__() + self.proj = PointSequential(nn.Linear(in_channels, out_channels)) + self.proj_skip = PointSequential(nn.Linear(skip_channels, out_channels)) + + if norm_layer is not None: + self.proj.add(norm_layer(out_channels)) + self.proj_skip.add(norm_layer(out_channels)) + + if act_layer is not None: + self.proj.add(act_layer()) + self.proj_skip.add(act_layer()) + + self.traceable = traceable + + def forward(self, point): + with NVTXRange("PTV3_Unpooling"): + # import pdb; pdb.set_trace() + assert "pooling_parent" in point.keys() + assert "pooling_inverse" in point.keys() + parent = point.pop("pooling_parent") + inverse = point.pop("pooling_inverse") + point = self.proj(point) + parent = self.proj_skip(parent) + parent.feat = parent.feat + point.feat[inverse] + + if self.traceable: + parent["unpooling_parent"] = point + return parent + + +class Embedding(PointModule): + def __init__( + self, + in_channels, + embed_channels, + norm_layer=None, + act_layer=None, + ): + super().__init__() + self.in_channels = in_channels + self.embed_channels = embed_channels + + # TODO: check remove spconv + self.stem = PointSequential( + # conv=spconv.SubMConv3d( + # in_channels, + # embed_channels, + # kernel_size=5, + # padding=1, + # bias=False, + # indice_key="stem", + # ) + linear_emb=nn.Linear( + in_channels, + embed_channels, + ) + ) + if norm_layer is not None: + self.stem.add(norm_layer(embed_channels), name="norm") + if act_layer is not None: + self.stem.add(act_layer(), name="act") + + def forward(self, point: Point): + with NVTXRange("PTV3_Embedding"): + point = self.stem(point) + return point + + +@MODELS.register_module("PT-v3m1") +class PointTransformerV3(PointModule): + def __init__( + self, + in_channels=6, + order=("z", "z-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(48, 48, 48, 48, 48), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(48, 48, 48, 48), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + pre_norm=True, + shuffle_orders=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + enable_batch_norm=True, + no_conv_in_cpe=False, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ): + super().__init__() + self.num_stages = len(enc_depths) + self.order = [order] if isinstance(order, str) else order + self.enc_mode = enc_mode + self.shuffle_orders = shuffle_orders + + assert self.num_stages == len(stride) + 1 + assert self.num_stages == len(enc_depths) + assert self.num_stages == len(enc_channels) + assert self.num_stages == len(enc_num_head) + assert self.num_stages == len(enc_patch_size) + assert self.enc_mode or self.num_stages == len(dec_depths) + 1 + assert self.enc_mode or self.num_stages == len(dec_channels) + 1 + assert self.enc_mode or self.num_stages == len(dec_num_head) + 1 + assert self.enc_mode or self.num_stages == len(dec_patch_size) + 1 + + # norm layers + if pdnorm_bn: + assert False, "PDNorm is not supported in this minimal pointcept codebase for fvdb." + bn_layer = partial( + PDNorm, + norm_layer=partial( + nn.BatchNorm1d, eps=1e-3, momentum=0.01, affine=pdnorm_affine + ), + conditions=pdnorm_conditions, + decouple=pdnorm_decouple, + adaptive=pdnorm_adaptive, + ) + else: + bn_layer = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01) + if pdnorm_ln: + assert False, "PDNorm is not supported in this minimal pointcept codebase for fvdb." + ln_layer = partial( + PDNorm, + norm_layer=partial(nn.LayerNorm, elementwise_affine=pdnorm_affine), + conditions=pdnorm_conditions, + decouple=pdnorm_decouple, + adaptive=pdnorm_adaptive, + ) + else: + ln_layer = nn.LayerNorm + # activation layers + act_layer = nn.GELU + + self.embedding = Embedding( + in_channels=in_channels, + embed_channels=enc_channels[0], + norm_layer=bn_layer if enable_batch_norm else ln_layer, + act_layer=act_layer, + ) + + # encoder + enc_drop_path = [ + x.item() for x in torch.linspace(0, drop_path, sum(enc_depths)) + ] + self.enc = PointSequential() + for s in range(self.num_stages): + enc_drop_path_ = enc_drop_path[ + sum(enc_depths[:s]) : sum(enc_depths[: s + 1]) + ] + enc = PointSequential() + if s > 0: + enc.add( + SerializedPooling( + in_channels=enc_channels[s - 1], + out_channels=enc_channels[s], + stride=stride[s - 1], + norm_layer=bn_layer if enable_batch_norm else ln_layer, + act_layer=act_layer, + ), + name="down", + ) + for i in range(enc_depths[s]): + enc.add( + Block( + channels=enc_channels[s], + num_heads=enc_num_head[s], + patch_size=enc_patch_size[s], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + drop_path=enc_drop_path_[i], + norm_layer=ln_layer, + act_layer=act_layer, + pre_norm=pre_norm, + order_index=i % len(self.order), + cpe_indice_key=f"stage{s}", + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + no_conv_in_cpe=no_conv_in_cpe, + ), + name=f"block{i}", + ) + if len(enc) != 0: + self.enc.add(module=enc, name=f"enc{s}") + + # decoder + if not self.enc_mode: + dec_drop_path = [ + x.item() for x in torch.linspace(0, drop_path, sum(dec_depths)) + ] + self.dec = PointSequential() + dec_channels = list(dec_channels) + [enc_channels[-1]] + for s in reversed(range(self.num_stages - 1)): + dec_drop_path_ = dec_drop_path[ + sum(dec_depths[:s]) : sum(dec_depths[: s + 1]) + ] + dec_drop_path_.reverse() + dec = PointSequential() + dec.add( + SerializedUnpooling( + in_channels=dec_channels[s + 1], + skip_channels=enc_channels[s], + out_channels=dec_channels[s], + norm_layer=bn_layer if enable_batch_norm else ln_layer, + act_layer=act_layer, + ), + name="up", + ) + for i in range(dec_depths[s]): + dec.add( + Block( + channels=dec_channels[s], + num_heads=dec_num_head[s], + patch_size=dec_patch_size[s], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + drop_path=dec_drop_path_[i], + norm_layer=ln_layer, + act_layer=act_layer, + pre_norm=pre_norm, + order_index=i % len(self.order), + cpe_indice_key=f"stage{s}", + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + no_conv_in_cpe=no_conv_in_cpe, + ), + name=f"block{i}", + ) + self.dec.add(module=dec, name=f"dec{s}") + + def forward(self, data_dict): + with NVTXRange("PTV3_Forward"): + point = Point(data_dict) + point.serialization(order=self.order, shuffle_orders=self.shuffle_orders) + point.sparsify() + + point = self.embedding(point) + + # Encoder stages with detailed profiling + for s, enc_module in enumerate(self.enc): + stage_name = enc_module.__class__.__name__ + if "Pooling" in stage_name: + with NVTXRange(f"PTV3_Pooling_{s}"): + point = enc_module(point) + else: + with NVTXRange(f"PTV3_Encoder_{s}"): + point = enc_module(point) + + if not self.enc_mode: + # Decoder stages with detailed profiling + for s, dec_module in enumerate(self.dec): + stage_name = dec_module.__class__.__name__ + if "Unpooling" in stage_name: + with NVTXRange(f"PTV3_Unpooling_{s}"): + point = dec_module(point) + else: + with NVTXRange(f"PTV3_Decoder_{s}"): + point = dec_module(point) + # else: + # point.feat = torch_scatter.segment_csr( + # src=point.feat, + # indptr=nn.functional.pad(point.offset, (1, 0)), + # reduce="mean", + # ) + return point diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_fvdb.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_fvdb.py new file mode 100644 index 0000000..26c1d3b --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_fvdb.py @@ -0,0 +1,218 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + +""" +Point Transformer - V3 Mode1 FVDB Implementation +""" + + +from __future__ import annotations + +import fvdb +import torch +from pointcept.models.builder import MODELS +from pointcept.models.modules import PointModule + +# Import PTV3 FVDB implementation - use relative import since we're in the same package +from .ptv3_fvdb import PTV3, set_global_original_coord_to_voxel_idx, get_global_original_coord_to_voxel_idx + + +def tensor_hash_simple(tensor: torch.Tensor) -> int: + """Simple Python hash - fastest but less robust""" + return hash(tuple(tensor.detach().cpu().flatten().tolist())) + + +def create_grid_from_points( + grid_coord: torch.Tensor, + feat: torch.Tensor, + offset: torch.Tensor, + voxel_size: float, + device: str = "cuda", +) -> tuple[fvdb.GridBatch, fvdb.JaggedTensor, fvdb.JaggedTensor]: + """Create FVDB tensor from ScanNet-like point data with proper batching. + + Args: + grid_coord: Batched grid coordinates [N, 3] + feat: Batched features [N, C] + offset: Tensor indicating batch boundaries [B] + voxel_size: Voxel size for grid creation + device: Device for tensor operations + + Returns: + grid: fvdb.GridBatch + jfeats: fvdb.JaggedTensor with features + original_coord_to_voxel_idx: Mapping from original coords to voxel indices + """ + + offset_list = list(offset.cpu().numpy()) + # Convert offset to individual sample boundaries + if len(offset_list) == 1: + # Single sample case + coords_list = [grid_coord.to(device=device, dtype=torch.int32)] + feats_list = [feat.to(device=device, dtype=torch.float32)] + else: + # Multiple samples case - split using offset + coords_list = [] + feats_list = [] + prev_offset = 0 + for curr_offset in offset_list: + coords_list.append(grid_coord[prev_offset:curr_offset].to(device=device, dtype=torch.int32)) + feats_list.append(feat[prev_offset:curr_offset].to(device=device, dtype=torch.float32)) + prev_offset = curr_offset + + coords_jagged = fvdb.JaggedTensor(coords_list) + + grid = fvdb.GridBatch.from_ijk( + coords_jagged, # TODO: understand world coordinates and voxel coordinates. + # voxel_sizes=[[voxel_size, voxel_size, voxel_size]] * len(coords_list), + # origins=[0.0] * 3, + ) + + feats_jagged = fvdb.JaggedTensor(feats_list) + feats_vdb_order = grid.inject_from_ijk(coords_jagged, feats_jagged) # + original_coord_to_voxel_idx = grid.ijk_to_index(coords_jagged, cumulative=True) + + return grid, feats_vdb_order, original_coord_to_voxel_idx + + +@MODELS.register_module("PT-v3fvdb") +class PointTransformerV3(PointModule): + def __init__( + self, + in_channels: int = 6, + enc_depths: tuple[int, ...] = (2, 2, 2, 2), + enc_channels: tuple[int, ...] = (32, 64, 128, 256), + enc_num_heads: tuple[int, ...] = (1, 1, 1, 1), + dec_depths: tuple[int, ...] = (2, 2, 2), + dec_channels: tuple[int, ...] = (128, 64, 32), + dec_num_heads: tuple[int, ...] = (1, 1, 1), + patch_size: int = 1024, + drop_path: float = 0.3, + proj_drop: float = 0.0, + qk_scale: float = 1.0, + enable_batch_norm: bool = False, + embedding_mode: str = "linear", + no_conv_in_cpe: bool = False, + cross_patch_attention: bool = False, + cross_patch_pooling: str = "mean", + sliding_window_attention: bool = False, + pipelined_batch: bool = False, + order_type: str | tuple[str, ...] = ("z", "z-trans"), + shuffle_orders: bool = True, + ): + super().__init__() + + self.pipelined_batch = pipelined_batch + self.order_type = order_type + + self.fvdb_ptv3_model = PTV3( + num_classes=-1, + input_dim=in_channels, + enc_depths=enc_depths, + enc_channels=enc_channels, + enc_num_heads=enc_num_heads, + dec_depths=dec_depths, + dec_channels=dec_channels, + dec_num_heads=dec_num_heads, + patch_size=patch_size, + drop_path=drop_path, + proj_drop=proj_drop, + qk_scale=qk_scale, + enable_batch_norm=enable_batch_norm, + embedding_mode=embedding_mode, + no_conv_in_cpe=no_conv_in_cpe, + # cross_patch_attention=cross_patch_attention, + # cross_patch_pooling=cross_patch_pooling, + sliding_window_attention=sliding_window_attention, + order_type=order_type, + shuffle_orders=shuffle_orders, + ) + + def __call__(self, data_dict: dict) -> torch.Tensor: + """Override __call__ to preserve type hints from forward.""" + return super().__call__(data_dict) + + def forward(self, data_dict: dict) -> torch.Tensor: + + grid_coord = data_dict["grid_coord"] + feat = data_dict["feat"] + offset = data_dict["offset"] + # import pdb; pdb.set_trace() + # print(f"grid_coord.shape: {grid_coord.shape}, feat.shape: {feat.shape}, offset.shape: {offset.shape}") + # exit() + + if self.pipelined_batch and len(offset) > 1: + # Pipelined batch mode: process each point cloud individually + # This mode splits the batch into individual point clouds, processes each + # one separately through the FVDB model, and concatenates the results. + # This can be useful for: + # 1. Memory efficiency when individual processing uses less memory + # 2. Debugging to isolate issues with specific point clouds + # 3. Different processing requirements per sample + outputs = [] + prev_offset = 0 + # catted_input_grid_ijk = [] + # catted_input_feat = [] + # catted_original_coord_to_voxel_idx = [] + for curr_offset in offset: + # Extract data for current point cloud + curr_grid_coord = grid_coord[prev_offset:curr_offset] + curr_feat = feat[prev_offset:curr_offset] + curr_num_points = curr_offset - prev_offset + curr_offset_tensor = torch.tensor([curr_num_points], dtype=offset.dtype, device=offset.device) + + # Process single point cloud + grid, jfeats, original_coord_to_voxel_idx = create_grid_from_points( + curr_grid_coord, curr_feat, curr_offset_tensor, voxel_size=0.02 + ) + assert ( + grid.ijk.jdata.shape == curr_grid_coord.shape + ), f"curr_grid_coord.shape: {curr_grid_coord.shape}, grid.ijk.jdata.shape: {grid.ijk.jdata.shape}" # + + # catted_input_grid_ijk.append(grid.ijk.jdata) + # catted_input_feat.append(jfeats.jdata) + # catted_original_coord_to_voxel_idx.append(original_coord_to_voxel_idx.jdata) + # grid shape and feats values match here. + jfeats = self.fvdb_ptv3_model(jfeats, grid) + # feats values does not match here. + + # Get output for this point cloud. + curr_output = jfeats.jdata[original_coord_to_voxel_idx.jdata] + outputs.append(curr_output) + + prev_offset = curr_offset + + # Concatenate all outputs + output = torch.cat(outputs, dim=0) + # import pdb; pdb.set_trace() + + # catted_input_grid_ijk = torch.cat(catted_input_grid_ijk, dim=0) + # catted_input_feat = torch.cat(catted_input_feat, dim=0) + # catted_original_coord_to_voxel_idx = torch.cat(catted_original_coord_to_voxel_idx, dim=0) + + else: + # Standard batch mode (original implementation) + grid, jfeats, original_coord_to_voxel_idx = create_grid_from_points( + grid_coord, feat, offset, voxel_size=0.02 + ) + # TODO: check the downsampling behavior is the same or not? + assert ( + grid_coord.shape == grid.ijk.jdata.shape + ), f"grid_coord.shape: {grid_coord.shape}, grid.ijk.jdata.shape: {grid.ijk.jdata.shape}" # this is not always true, because mix-prob may duplicate points with the same coordinate. + assert ( + grid_coord.shape[0] == original_coord_to_voxel_idx.jdata.shape[0] + ), f"grid_coord.shape: {grid_coord.shape}, original_coord_to_voxel_idx.jdata.shape: {original_coord_to_voxel_idx.jdata.shape}" + + # import pdb; pdb.set_trace() + if torch.is_autocast_enabled(): + with torch.autocast(device_type="cuda", enabled=False): + jfeats = self.fvdb_ptv3_model(jfeats, grid) + else: + # import pdb; pdb.set_trace() + set_global_original_coord_to_voxel_idx(original_coord_to_voxel_idx) + jfeats = self.fvdb_ptv3_model(jfeats, grid) + + output = jfeats.jdata[original_coord_to_voxel_idx.jdata] + # import pdb; pdb.set_trace() + + return output # return logits in torch.tensor format diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m2_sonata.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m2_sonata.py new file mode 100644 index 0000000..cc8fee1 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m2_sonata.py @@ -0,0 +1,732 @@ +""" +Point Transformer - V3 Mode2 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from addict import Dict +import torch +import torch.nn as nn +from torch.nn.init import trunc_normal_ +import spconv.pytorch as spconv +import torch_scatter +from timm.layers import DropPath + +try: + import flash_attn +except ImportError: + flash_attn = None + +from pointcept.models.builder import MODELS +from pointcept.models.utils.misc import offset2bincount +from pointcept.models.utils.structure import Point +from pointcept.models.modules import PointModule, PointSequential + + +class LayerScale(nn.Module): + def __init__( + self, + dim: int, + init_values: float = 1e-5, + inplace: bool = False, + ) -> None: + super().__init__() + self.inplace = inplace + self.gamma = nn.Parameter(init_values * torch.ones(dim)) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return x.mul_(self.gamma) if self.inplace else x * self.gamma + + +class RPE(torch.nn.Module): + def __init__(self, patch_size, num_heads): + super().__init__() + self.patch_size = patch_size + self.num_heads = num_heads + self.pos_bnd = int((4 * patch_size) ** (1 / 3) * 2) + self.rpe_num = 2 * self.pos_bnd + 1 + self.rpe_table = torch.nn.Parameter(torch.zeros(3 * self.rpe_num, num_heads)) + torch.nn.init.trunc_normal_(self.rpe_table, std=0.02) + + def forward(self, coord): + idx = ( + coord.clamp(-self.pos_bnd, self.pos_bnd) # clamp into bnd + + self.pos_bnd # relative position to positive index + + torch.arange(3, device=coord.device) * self.rpe_num # x, y, z stride + ) + out = self.rpe_table.index_select(0, idx.reshape(-1)) + out = out.view(idx.shape + (-1,)).sum(3) + out = out.permute(0, 3, 1, 2) # (N, K, K, H) -> (N, H, K, K) + return out + + +class SerializedAttention(PointModule): + def __init__( + self, + channels, + num_heads, + patch_size, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + order_index=0, + enable_rpe=False, + enable_flash=True, + upcast_attention=True, + upcast_softmax=True, + ): + super().__init__() + assert channels % num_heads == 0 + self.channels = channels + self.num_heads = num_heads + self.scale = qk_scale or (channels // num_heads) ** -0.5 + self.order_index = order_index + self.upcast_attention = upcast_attention + self.upcast_softmax = upcast_softmax + self.enable_rpe = enable_rpe + self.enable_flash = enable_flash + if enable_flash: + assert ( + enable_rpe is False + ), "Set enable_rpe to False when enable Flash Attention" + assert ( + upcast_attention is False + ), "Set upcast_attention to False when enable Flash Attention" + assert ( + upcast_softmax is False + ), "Set upcast_softmax to False when enable Flash Attention" + assert flash_attn is not None, "Make sure flash_attn is installed." + self.patch_size = patch_size + self.attn_drop = attn_drop + else: + # when disable flash attention, we still don't want to use mask + # consequently, patch size will auto set to the + # min number of patch_size_max and number of points + self.patch_size_max = patch_size + self.patch_size = 0 + self.attn_drop = torch.nn.Dropout(attn_drop) + + self.qkv = torch.nn.Linear(channels, channels * 3, bias=qkv_bias) + self.proj = torch.nn.Linear(channels, channels) + self.proj_drop = torch.nn.Dropout(proj_drop) + self.softmax = torch.nn.Softmax(dim=-1) + self.rpe = RPE(patch_size, num_heads) if self.enable_rpe else None + + @torch.no_grad() + def get_rel_pos(self, point, order): + K = self.patch_size + rel_pos_key = f"rel_pos_{self.order_index}" + if rel_pos_key not in point.keys(): + grid_coord = point.grid_coord[order] + grid_coord = grid_coord.reshape(-1, K, 3) + point[rel_pos_key] = grid_coord.unsqueeze(2) - grid_coord.unsqueeze(1) + return point[rel_pos_key] + + @torch.no_grad() + def get_padding_and_inverse(self, point): + pad_key = "pad" + unpad_key = "unpad" + cu_seqlens_key = "cu_seqlens_key" + if ( + pad_key not in point.keys() + or unpad_key not in point.keys() + or cu_seqlens_key not in point.keys() + ): + offset = point.offset + bincount = offset2bincount(offset) + bincount_pad = ( + torch.div( + bincount + self.patch_size - 1, + self.patch_size, + rounding_mode="trunc", + ) + * self.patch_size + ) + # only pad point when num of points larger than patch_size + mask_pad = bincount > self.patch_size + bincount_pad = ~mask_pad * bincount + mask_pad * bincount_pad + _offset = nn.functional.pad(offset, (1, 0)) + _offset_pad = nn.functional.pad(torch.cumsum(bincount_pad, dim=0), (1, 0)) + pad = torch.arange(_offset_pad[-1], device=offset.device) + unpad = torch.arange(_offset[-1], device=offset.device) + cu_seqlens = [] + for i in range(len(offset)): + unpad[_offset[i] : _offset[i + 1]] += _offset_pad[i] - _offset[i] + if bincount[i] != bincount_pad[i]: + pad[ + _offset_pad[i + 1] + - self.patch_size + + (bincount[i] % self.patch_size) : _offset_pad[i + 1] + ] = pad[ + _offset_pad[i + 1] + - 2 * self.patch_size + + (bincount[i] % self.patch_size) : _offset_pad[i + 1] + - self.patch_size + ] + pad[_offset_pad[i] : _offset_pad[i + 1]] -= _offset_pad[i] - _offset[i] + cu_seqlens.append( + torch.arange( + _offset_pad[i], + _offset_pad[i + 1], + step=self.patch_size, + dtype=torch.int32, + device=offset.device, + ) + ) + point[pad_key] = pad + point[unpad_key] = unpad + point[cu_seqlens_key] = nn.functional.pad( + torch.concat(cu_seqlens), (0, 1), value=_offset_pad[-1] + ) + return point[pad_key], point[unpad_key], point[cu_seqlens_key] + + def forward(self, point): + if not self.enable_flash: + self.patch_size = min( + offset2bincount(point.offset).min().tolist(), self.patch_size_max + ) + + H = self.num_heads + K = self.patch_size + C = self.channels + + pad, unpad, cu_seqlens = self.get_padding_and_inverse(point) + + order = point.serialized_order[self.order_index][pad] + inverse = unpad[point.serialized_inverse[self.order_index]] + + # padding and reshape feat and batch for serialized point patch + qkv = self.qkv(point.feat)[order] + + if not self.enable_flash: + # encode and reshape qkv: (N', K, 3, H, C') => (3, N', H, K, C') + q, k, v = ( + qkv.reshape(-1, K, 3, H, C // H).permute(2, 0, 3, 1, 4).unbind(dim=0) + ) + # attn + if self.upcast_attention: + q = q.float() + k = k.float() + attn = (q * self.scale) @ k.transpose(-2, -1) # (N', H, K, K) + if self.enable_rpe: + attn = attn + self.rpe(self.get_rel_pos(point, order)) + if self.upcast_softmax: + attn = attn.float() + attn = self.softmax(attn) + attn = self.attn_drop(attn).to(qkv.dtype) + feat = (attn @ v).transpose(1, 2).reshape(-1, C) + else: + feat = flash_attn.flash_attn_varlen_qkvpacked_func( + qkv.to(torch.bfloat16).reshape(-1, 3, H, C // H), + cu_seqlens, + max_seqlen=self.patch_size, + dropout_p=self.attn_drop if self.training else 0, + softmax_scale=self.scale, + ).reshape(-1, C) + feat = feat.to(qkv.dtype) + feat = feat[inverse] + + # ffn + feat = self.proj(feat) + feat = self.proj_drop(feat) + point.feat = feat + return point + + +class MLP(nn.Module): + def __init__( + self, + in_channels, + hidden_channels=None, + out_channels=None, + act_layer=nn.GELU, + drop=0.0, + ): + super().__init__() + out_channels = out_channels or in_channels + hidden_channels = hidden_channels or in_channels + self.fc1 = nn.Linear(in_channels, hidden_channels) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_channels, out_channels) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Block(PointModule): + def __init__( + self, + channels, + num_heads, + patch_size=48, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + layer_scale=None, + norm_layer=nn.LayerNorm, + act_layer=nn.GELU, + pre_norm=True, + order_index=0, + cpe_indice_key=None, + enable_rpe=False, + enable_flash=True, + upcast_attention=True, + upcast_softmax=True, + ): + super().__init__() + self.channels = channels + self.pre_norm = pre_norm + + self.cpe = PointSequential( + spconv.SubMConv3d( + channels, + channels, + kernel_size=3, + bias=True, + indice_key=cpe_indice_key, + ), + nn.Linear(channels, channels), + norm_layer(channels), + ) + + self.norm1 = PointSequential(norm_layer(channels)) + self.ls1 = PointSequential( + LayerScale(channels, init_values=layer_scale) + if layer_scale is not None + else nn.Identity() + ) + self.attn = SerializedAttention( + channels=channels, + patch_size=patch_size, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + order_index=order_index, + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ) + self.norm2 = PointSequential(norm_layer(channels)) + self.ls2 = PointSequential( + LayerScale(channels, init_values=layer_scale) + if layer_scale is not None + else nn.Identity() + ) + self.mlp = PointSequential( + MLP( + in_channels=channels, + hidden_channels=int(channels * mlp_ratio), + out_channels=channels, + act_layer=act_layer, + drop=proj_drop, + ) + ) + self.drop_path = PointSequential( + DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + ) + + def forward(self, point: Point): + shortcut = point.feat + point = self.cpe(point) + point.feat = shortcut + point.feat + shortcut = point.feat + if self.pre_norm: + point = self.norm1(point) + point = self.drop_path(self.ls1(self.attn(point))) + point.feat = shortcut + point.feat + if not self.pre_norm: + point = self.norm1(point) + + shortcut = point.feat + if self.pre_norm: + point = self.norm2(point) + point = self.drop_path(self.ls2(self.mlp(point))) + point.feat = shortcut + point.feat + if not self.pre_norm: + point = self.norm2(point) + point.sparse_conv_feat = point.sparse_conv_feat.replace_feature(point.feat) + return point + + +class GridPooling(PointModule): + def __init__( + self, + in_channels, + out_channels, + stride=2, + norm_layer=None, + act_layer=None, + reduce="max", + shuffle_orders=True, + traceable=True, # record parent and cluster + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + + self.stride = stride + assert reduce in ["sum", "mean", "min", "max"] + self.reduce = reduce + self.shuffle_orders = shuffle_orders + self.traceable = traceable + + self.proj = nn.Linear(in_channels, out_channels) + if norm_layer is not None: + self.norm = PointSequential(norm_layer(out_channels)) + if act_layer is not None: + self.act = PointSequential(act_layer()) + + def forward(self, point: Point): + if "grid_coord" in point.keys(): + grid_coord = point.grid_coord + elif {"coord", "grid_size"}.issubset(point.keys()): + grid_coord = torch.div( + point.coord - point.coord.min(0)[0], + point.grid_size, + rounding_mode="trunc", + ).int() + else: + raise AssertionError( + "[gird_coord] or [coord, grid_size] should be include in the Point" + ) + grid_coord = torch.div(grid_coord, self.stride, rounding_mode="trunc") + grid_coord = grid_coord | point.batch.view(-1, 1) << 48 + grid_coord, cluster, counts = torch.unique( + grid_coord, + sorted=True, + return_inverse=True, + return_counts=True, + dim=0, + ) + grid_coord = grid_coord & ((1 << 48) - 1) + # indices of point sorted by cluster, for torch_scatter.segment_csr + _, indices = torch.sort(cluster) + # index pointer for sorted point, for torch_scatter.segment_csr + idx_ptr = torch.cat([counts.new_zeros(1), torch.cumsum(counts, dim=0)]) + # head_indices of each cluster, for reduce attr e.g. code, batch + head_indices = indices[idx_ptr[:-1]] + point_dict = Dict( + feat=torch_scatter.segment_csr( + self.proj(point.feat)[indices], idx_ptr, reduce=self.reduce + ), + coord=torch_scatter.segment_csr( + point.coord[indices], idx_ptr, reduce="mean" + ), + grid_coord=grid_coord, + batch=point.batch[head_indices], + ) + if "origin_coord" in point.keys(): + point_dict["origin_coord"] = torch_scatter.segment_csr( + point.origin_coord[indices], idx_ptr, reduce="mean" + ) + if "condition" in point.keys(): + point_dict["condition"] = point.condition + if "context" in point.keys(): + point_dict["context"] = point.context + if "name" in point.keys(): + point_dict["name"] = point.name + if "split" in point.keys(): + point_dict["split"] = point.split + if "color" in point.keys(): + point_dict["color"] = torch_scatter.segment_csr( + point.color[indices], idx_ptr, reduce="mean" + ) + if "grid_size" in point.keys(): + point_dict["grid_size"] = point.grid_size * self.stride + + if self.traceable: + point_dict["pooling_inverse"] = cluster + point_dict["pooling_parent"] = point + point_dict["idx_ptr"] = idx_ptr + order = point.order + point = Point(point_dict) + if self.norm is not None: + point = self.norm(point) + if self.act is not None: + point = self.act(point) + point.serialization(order=order, shuffle_orders=self.shuffle_orders) + point.sparsify() + return point + + +class GridUnpooling(PointModule): + def __init__( + self, + in_channels, + skip_channels, + out_channels, + norm_layer=None, + act_layer=None, + traceable=False, # record parent and cluster + ): + super().__init__() + self.proj = PointSequential(nn.Linear(in_channels, out_channels)) + self.proj_skip = PointSequential(nn.Linear(skip_channels, out_channels)) + + if norm_layer is not None: + self.proj.add(norm_layer(out_channels)) + self.proj_skip.add(norm_layer(out_channels)) + + if act_layer is not None: + self.proj.add(act_layer()) + self.proj_skip.add(act_layer()) + + self.traceable = traceable + + def forward(self, point): + assert "pooling_parent" in point.keys() + assert "pooling_inverse" in point.keys() + parent = point.pop("pooling_parent") + inverse = point.pooling_inverse + feat = point.feat + + parent = self.proj_skip(parent) + parent.feat = parent.feat + self.proj(point).feat[inverse] + parent.sparse_conv_feat = parent.sparse_conv_feat.replace_feature(parent.feat) + + if self.traceable: + point.feat = feat + parent["unpooling_parent"] = point + return parent + + +class Embedding(PointModule): + def __init__( + self, + in_channels, + embed_channels, + norm_layer=None, + act_layer=None, + mask_token=False, + ): + super().__init__() + self.in_channels = in_channels + self.embed_channels = embed_channels + + self.stem = PointSequential(linear=nn.Linear(in_channels, embed_channels)) + if norm_layer is not None: + self.stem.add(norm_layer(embed_channels), name="norm") + if act_layer is not None: + self.stem.add(act_layer(), name="act") + + if mask_token: + self.mask_token = nn.Parameter(torch.zeros(1, embed_channels)) + else: + self.mask_token = None + + def forward(self, point: Point): + point = self.stem(point) + if "mask" in point.keys(): + point.feat = torch.where( + point.mask.unsqueeze(-1), + self.mask_token.to(point.feat.dtype), + point.feat, + ) + return point + + +@MODELS.register_module("PT-v3m2") +class PointTransformerV3(PointModule): + def __init__( + self, + in_channels=6, + order=("z", "z-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(48, 48, 48, 48, 48), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(48, 48, 48, 48), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + layer_scale=None, + pre_norm=True, + shuffle_orders=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + traceable=False, + mask_token=False, + enc_mode=False, + freeze_encoder=False, + ): + super().__init__() + self.num_stages = len(enc_depths) + self.order = [order] if isinstance(order, str) else order + self.shuffle_orders = shuffle_orders + self.enc_mode = enc_mode + self.freeze_encoder = freeze_encoder + + assert self.num_stages == len(stride) + 1 + assert self.num_stages == len(enc_depths) + assert self.num_stages == len(enc_channels) + assert self.num_stages == len(enc_num_head) + assert self.num_stages == len(enc_patch_size) + assert self.enc_mode or self.num_stages == len(dec_depths) + 1 + assert self.enc_mode or self.num_stages == len(dec_channels) + 1 + assert self.enc_mode or self.num_stages == len(dec_num_head) + 1 + assert self.enc_mode or self.num_stages == len(dec_patch_size) + 1 + + # normalization layer + ln_layer = nn.LayerNorm + # activation layers + act_layer = nn.GELU + + self.embedding = Embedding( + in_channels=in_channels, + embed_channels=enc_channels[0], + norm_layer=ln_layer, + act_layer=act_layer, + mask_token=mask_token, + ) + + # encoder + enc_drop_path = [ + x.item() for x in torch.linspace(0, drop_path, sum(enc_depths)) + ] + self.enc = PointSequential() + for s in range(self.num_stages): + enc_drop_path_ = enc_drop_path[ + sum(enc_depths[:s]) : sum(enc_depths[: s + 1]) + ] + enc = PointSequential() + if s > 0: + enc.add( + GridPooling( + in_channels=enc_channels[s - 1], + out_channels=enc_channels[s], + stride=stride[s - 1], + norm_layer=ln_layer, + act_layer=act_layer, + ), + name="down", + ) + for i in range(enc_depths[s]): + enc.add( + Block( + channels=enc_channels[s], + num_heads=enc_num_head[s], + patch_size=enc_patch_size[s], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + drop_path=enc_drop_path_[i], + layer_scale=layer_scale, + norm_layer=ln_layer, + act_layer=act_layer, + pre_norm=pre_norm, + order_index=i % len(self.order), + cpe_indice_key=f"stage{s}", + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ), + name=f"block{i}", + ) + if len(enc) != 0: + self.enc.add(module=enc, name=f"enc{s}") + + # decoder + if not self.enc_mode: + dec_drop_path = [ + x.item() for x in torch.linspace(0, drop_path, sum(dec_depths)) + ] + self.dec = PointSequential() + dec_channels = list(dec_channels) + [enc_channels[-1]] + for s in reversed(range(self.num_stages - 1)): + dec_drop_path_ = dec_drop_path[ + sum(dec_depths[:s]) : sum(dec_depths[: s + 1]) + ] + dec_drop_path_.reverse() + dec = PointSequential() + dec.add( + GridUnpooling( + in_channels=dec_channels[s + 1], + skip_channels=enc_channels[s], + out_channels=dec_channels[s], + norm_layer=ln_layer, + act_layer=act_layer, + traceable=traceable, + ), + name="up", + ) + for i in range(dec_depths[s]): + dec.add( + Block( + channels=dec_channels[s], + num_heads=dec_num_head[s], + patch_size=dec_patch_size[s], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + drop_path=dec_drop_path_[i], + layer_scale=layer_scale, + norm_layer=ln_layer, + act_layer=act_layer, + pre_norm=pre_norm, + order_index=i % len(self.order), + cpe_indice_key=f"stage{s}", + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ), + name=f"block{i}", + ) + self.dec.add(module=dec, name=f"dec{s}") + if self.freeze_encoder: + for p in self.embedding.parameters(): + p.requires_grad = False + for p in self.enc.parameters(): + p.requires_grad = False + self.apply(self._init_weights) + + @staticmethod + def _init_weights(module): + if isinstance(module, nn.Linear): + trunc_normal_(module.weight, std=0.02) + if module.bias is not None: + nn.init.zeros_(module.bias) + elif isinstance(module, spconv.SubMConv3d): + trunc_normal_(module.weight, std=0.02) + if module.bias is not None: + nn.init.zeros_(module.bias) + + def forward(self, data_dict): + point = Point(data_dict) + point = self.embedding(point) + + point.serialization(order=self.order, shuffle_orders=self.shuffle_orders) + point.sparsify() + + point = self.enc(point) + if not self.enc_mode: + point = self.dec(point) + return point diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/ptv3_fvdb.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/ptv3_fvdb.py new file mode 100644 index 0000000..64ac6f2 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/point_transformer_v3/ptv3_fvdb.py @@ -0,0 +1,735 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + +""" +PTV3 FVDB Implementation + +This module contains the core Point Transformer V3 implementation using FVDB. +It works directly with FVDB GridBatch and JaggedTensor types. + +For pointcept framework integration, see point_transformer_v3m1_fvdb.py +""" + +from functools import partial +from typing import Callable + +import fvdb +import torch +import torch.nn +import torch.nn.functional as F +from timm.layers import DropPath + +from .fvdb_utils import ( + FJTM, + FVDBGridModule, + NVTXRange, + inverse_order_features_from_perm, + jagged_attention, + order_features_from_jagged_ijk, +) + + +class PTV3_Embedding(FVDBGridModule): + """ + PTV3_Embedding for 3D point cloud embedding. + """ + + def __init__( + self, + in_channels, + embed_channels, + norm_layer_module: type[torch.nn.Module] | Callable = torch.nn.LayerNorm, + embedding_mode: str = "linear", + shared_plan_cache: dict | None = None, + ): + """ + Args: + in_channels (int): Number of channels in the input features. + embed_channels (int): Number of channels in the output features. + norm_layer_module (type[torch.nn.Module] | Callable): Normalization layer module. + embedding_mode (str): The type of embedding layer, "linear" or "conv3x3", "conv5x5". + #shared_plan_cache (dict | None): Shared cache for ConvolutionPlans across all layers. + """ + super().__init__() + self.embedding_mode = embedding_mode + self.shared_plan_cache = shared_plan_cache if shared_plan_cache is not None else {} + + if embedding_mode == "linear": + self.embed = FJTM(torch.nn.Linear(in_channels, embed_channels)) + elif embedding_mode == "conv3x3": + # Initialize embedding using FVDB's sparse 3D convolution + self.embed_conv3x3_1 = fvdb.nn.SparseConv3d( + in_channels, embed_channels, kernel_size=3, stride=1, bias=False + ) + elif embedding_mode == "conv5x5": + # Initialize embedding using FVDB's sparse 3D convolution + self.embed_conv5x5_1 = fvdb.nn.SparseConv3d(in_channels, embed_channels, kernel_size=5, stride=1) + else: + raise ValueError(f"Unsupported embedding mode: {embedding_mode}") + + self.norm_layer = FJTM(norm_layer_module(embed_channels)) + self.act_layer = FJTM(torch.nn.GELU()) + + def _get_plan(self, grid: fvdb.GridBatch, kernel_size, stride): + """Get or create a ConvolutionPlan from shared cache.""" + # target_grid = grid.conv_grid(kernel_size=kernel_size, stride=stride) + + # We definitely want the target grid to be the same as the source grid, + # because we need the topology to remain the same. + return fvdb.ConvolutionPlan.from_grid_batch( + kernel_size=kernel_size, stride=stride, source_grid=grid, target_grid=grid + ) + # cache_key = (grid.address, kernel_size, stride) + # if cache_key not in self.shared_plan_cache: + # self.shared_plan_cache[cache_key] = fvdb.ConvolutionPlan.from_grid_batch( + # kernel_size=kernel_size, stride=stride, source_grid=grid, target_grid=grid + # ) + # return self.shared_plan_cache[cache_key] + + # We use the same output grid as the input grid to maintain topology, so only the + # features are updated. + def forward(self, feats: fvdb.JaggedTensor, grid: fvdb.GridBatch) -> fvdb.JaggedTensor: + with NVTXRange("PTV3_Embedding"): + if self.embedding_mode == "linear": + feats = self.embed(feats) + elif self.embedding_mode == "conv3x3": + plan = self._get_plan(grid, kernel_size=3, stride=1) + feats = self.embed_conv3x3_1(feats, plan) + elif self.embedding_mode == "conv5x5": + plan = self._get_plan(grid, kernel_size=5, stride=1) + feats = self.embed_conv5x5_1(feats, plan) + + feats = self.norm_layer(feats) + feats = self.act_layer(feats) + return feats + + +class PTV3_Pooling(torch.nn.Module): + def __init__( + self, + kernel_size: int = 2, + in_channels: int = 64, + out_channels: int = 64, + norm_layer_module: type[torch.nn.Module] | Callable = torch.nn.LayerNorm, + ): + """ + Args: + kernel_size (int): Kernel size for the pooling operation. + in_channels (int): Number of channels in the input features. + out_channels (int): Number of channels in the output features. + """ + super().__init__() + self.kernel_size = kernel_size + self.proj = FJTM(torch.nn.Linear(in_channels, out_channels)) + self.norm_layer = FJTM(norm_layer_module(out_channels)) + self.act_layer = FJTM(torch.nn.GELU()) + + def __call__(self, feats: fvdb.JaggedTensor, grid: fvdb.GridBatch) -> tuple[fvdb.JaggedTensor, fvdb.GridBatch]: + """Override __call__ to preserve type hints from forward.""" + return super().__call__(feats, grid) + + def forward(self, feats: fvdb.JaggedTensor, grid: fvdb.GridBatch) -> tuple[fvdb.JaggedTensor, fvdb.GridBatch]: + with NVTXRange("PTV3_Pooling"): + feats = self.proj(feats) + + ds_feature, ds_grid = grid.max_pool(self.kernel_size, feats, stride=self.kernel_size, coarse_grid=None) + ds_feature = self.norm_layer(ds_feature) + ds_feature = self.act_layer(ds_feature) + return ds_feature, ds_grid + + +class PTV3_Unpooling(torch.nn.Module): + def __init__( + self, + kernel_size: int = 2, + in_channels: int = 64, + out_channels: int = 64, + skip_channels: int = 64, + norm_layer_module: type[torch.nn.Module] | Callable = torch.nn.LayerNorm, + ): + """ + Args: + kernel_size (int): Kernel size for the pooling operation. + in_channels (int): Number of channels in the input features. + out_channels (int): Number of channels in the output features. + skip_channels (int): Number of channels in the skip connection. + """ + super().__init__() + self.kernel_size = kernel_size + + self.in_channels = in_channels + self.out_channels = out_channels + + self.proj = FJTM(torch.nn.Linear(in_channels, out_channels)) + self.proj_skip = FJTM(torch.nn.Linear(skip_channels, out_channels)) + self.norm = FJTM(norm_layer_module(out_channels)) + self.norm_skip = FJTM(norm_layer_module(out_channels)) + self.act_layer = FJTM(torch.nn.GELU()) + self.act_layer_skip = FJTM(torch.nn.GELU()) + + def __call__( + self, feats: fvdb.JaggedTensor, grid: fvdb.GridBatch, last_feats: fvdb.JaggedTensor, last_grid: fvdb.GridBatch + ) -> tuple[fvdb.JaggedTensor, fvdb.GridBatch]: + """Override __call__ to preserve type hints from forward.""" + return super().__call__(feats, grid, last_feats, last_grid) + + def forward( + self, feats: fvdb.JaggedTensor, grid: fvdb.GridBatch, last_feats: fvdb.JaggedTensor, last_grid: fvdb.GridBatch + ) -> tuple[fvdb.JaggedTensor, fvdb.GridBatch]: + with NVTXRange("PTV3_Unpooling"): + # The conversion is to avoid the bug when enabled AMP, + # despite both feats.jdata and linear.weights are float32, + # the output becomes float16 which causes the subsequent convolution operation to fail. + # import pdb; pdb.set_trace() + feats = self.proj(feats) #.to(torch.float32) + feats = self.norm(feats) + feats = self.act_layer(feats) + + last_feats = self.proj_skip(last_feats) + last_feats = self.norm_skip(last_feats) + last_feats = self.act_layer_skip(last_feats) + + feats, _match_last_grid = grid.refine(self.kernel_size, feats, fine_grid=last_grid) + assert last_grid.is_same(_match_last_grid), "The last grid and the matched grid are not the same." + + feats = fvdb.add(feats, last_feats) + return feats, last_grid + + +class PTV3_MLP(torch.nn.Module): + def __init__(self, hidden_size: int, proj_drop: float = 0.0): + """ + Args: + hidden_size (int): Number of channels in the input features. + proj_drop (float): Dropout rate for MLP layers. + """ + super().__init__() + self.hidden_size = hidden_size + self.fc1 = FJTM(torch.nn.Linear(hidden_size, hidden_size * 4)) + self.act = FJTM(torch.nn.GELU()) + self.fc2 = FJTM(torch.nn.Linear(hidden_size * 4, hidden_size)) + self.drop = FJTM(torch.nn.Dropout(proj_drop)) + + def __call__(self, feats: fvdb.JaggedTensor) -> fvdb.JaggedTensor: + """Override __call__ to preserve type hints from forward.""" + return super().__call__(feats) + + def forward(self, feats: fvdb.JaggedTensor) -> fvdb.JaggedTensor: + with NVTXRange("PTV3_MLP"): + feats = self.fc1(feats) + feats = self.act(feats) + feats = self.drop(feats) + feats = self.fc2(feats) + feats = self.drop(feats) + return feats + + +class PTV3_Attention(FVDBGridModule): + def __init__( + self, + hidden_size: int, + num_heads: int, + proj_drop: float = 0.0, + patch_size: int = 0, + qk_scale: float | None = None, + sliding_window_attention: bool = False, + order_index: int = 0, + order_types: tuple = ("vdb",), + ): + """ + Args: + hidden_size (int): Number of channels in the input features. + num_heads (int): Number of attention heads in each block. + proj_drop (float): Dropout rate for MLP layers. + patch_size (int): Patch size for patch attention. + qk_scale (float | None): Scale factor for query-key dot product. If None, uses 1/sqrt(head_dim). + sliding_window_attention (bool): Whether to use sliding window attention (uses patch_size as window size). + order_index (int): Index into order_types to select which order to use for this block. + order_types (tuple): Tuple of order type strings (e.g., ("z", "z-trans")). + """ + super().__init__() + self.hidden_size = hidden_size + self.num_heads = num_heads + self.head_dim = hidden_size // num_heads + assert self.head_dim * num_heads == hidden_size, "hidden_size must be divisible by num_heads" + + self.scale = qk_scale or (self.head_dim) ** -0.5 + self.qkv = FJTM(torch.nn.Linear(hidden_size, hidden_size * 3)) # Combined QKV projection + self.proj = FJTM(torch.nn.Linear(hidden_size, hidden_size)) + self.drop = FJTM(torch.nn.Dropout(proj_drop)) + self.patch_size = patch_size + self.order_index = order_index + self.order_types = order_types + + # TODO: Add attention dropout + + # Sliding window attention parameter + self.sliding_window_attention = sliding_window_attention + + def forward(self, feats: fvdb.JaggedTensor, grid: fvdb.GridBatch) -> fvdb.JaggedTensor: + with NVTXRange("PTV3_Attention"): + # Get the shuffled order from grid metadata if available, otherwise use default order_types + # This allows for order shuffling per forward pass (matching reference implementation) + active_order_types = grid._shuffled_order # type: ignore + + # Get the order type for this block using the order index + order_type = active_order_types[self.order_index % len(active_order_types)] + # import pdb; pdb.set_trace() + + feats_ordered, perm = order_features_from_jagged_ijk(feats, grid.ijk, order_type) + # import pdb; pdb.set_trace() + + qkv = self.qkv(feats_ordered) + # import pdb; pdb.set_trace() + feats_ordered_out = jagged_attention( + feats_ordered, + qkv, + hidden_size=self.hidden_size, + num_heads=self.num_heads, + head_dim=self.head_dim, + patch_size=self.patch_size, + sliding_window_attention=self.sliding_window_attention, + scale=self.scale, + ) + # import pdb; pdb.set_trace() + + feats_out = inverse_order_features_from_perm(feats_ordered_out, perm) + feats_out = self.proj(feats_out) + feats_out = self.drop(feats_out) + return feats_out + + +class PTV3_CPE(FVDBGridModule): + def __init__(self, hidden_size: int, no_conv_in_cpe: bool = False, shared_plan_cache: dict | None = None): + """ + Args: + hidden_size (int): Number of channels in the input features. + no_conv_in_cpe (bool): Whether to disable convolution in CPE. + shared_plan_cache (dict | None): Shared cache for ConvolutionPlans across all layers. + """ + super().__init__() + self.hidden_size = hidden_size + self.no_conv_in_cpe = no_conv_in_cpe + self.shared_plan_cache = shared_plan_cache if shared_plan_cache is not None else {} + + self.maybe_conv: fvdb.nn.SparseConv3d | None = ( + None + if no_conv_in_cpe + else fvdb.nn.SparseConv3d(hidden_size, hidden_size, kernel_size=3, stride=1, bias=True) + ) + self.linear = FJTM(torch.nn.Linear(hidden_size, hidden_size)) + self.norm = FJTM(torch.nn.LayerNorm(hidden_size)) + + def _get_plan(self, grid, kernel_size, stride): + """Get or create a ConvolutionPlan from shared cache.""" + # target_grid = grid.conv_grid(kernel_size=kernel_size, stride=stride) + # We need target grid to be the same as the source grid to maintain topology. + return fvdb.ConvolutionPlan.from_grid_batch( + kernel_size=kernel_size, stride=stride, source_grid=grid, target_grid=grid + ) + # cache_key = (grid.address, kernel_size, stride) + # if cache_key not in self.shared_plan_cache: + # self.shared_plan_cache[cache_key] = fvdb.ConvolutionPlan.from_grid_batch( + # kernel_size=kernel_size, stride=stride, source_grid=grid, target_grid=grid + # ) + # return self.shared_plan_cache[cache_key] + + # Target grid is same as source grid to maintain topology. + def forward(self, feats: fvdb.JaggedTensor, grid: fvdb.GridBatch) -> fvdb.JaggedTensor: + with NVTXRange("PTV3_CPE"): + if not self.no_conv_in_cpe: + # Apply 3x3 sparse convolution using shared ConvolutionPlan cache + plan = self._get_plan(grid, kernel_size=3, stride=1) + assert self.maybe_conv is not None, "maybe_conv is not initialized" + feats = self.maybe_conv(feats, plan) + + feats = self.linear(feats) + feats = self.norm(feats) + + return feats + + +class PTV3_Block(FVDBGridModule): + def __init__( + self, + hidden_size: int, + num_heads: int, + drop_path: float, + proj_drop: float = 0.0, + patch_size: int = 0, + qk_scale: float | None = None, + no_conv_in_cpe: bool = False, + sliding_window_attention: bool = False, + order_index: int = 0, + order_types: tuple = ("vdb",), + shared_plan_cache: dict | None = None, + ): + """ + Args: + hidden_size (int): Number of channels in the input features. + num_heads (int): Number of attention heads in each block. + drop_path (float): Drop path rate for regularization. + proj_drop (float): Dropout rate for MLP layers. + patch_size (int): Patch size for patch attention. + qk_scale (float | None): Scale factor for query-key dot product. If None, uses 1/sqrt(head_dim). + no_conv_in_cpe (bool): Whether to disable convolution in CPE. + sliding_window_attention (bool): Whether to use sliding window attention (uses patch_size as window size). + order_index (int): Index into order_types to select which order to use for this block. + order_types (tuple): Tuple of order type strings (e.g., ("z", "z-trans")). + shared_plan_cache (dict | None): Shared cache for ConvolutionPlans across all layers. + """ + super().__init__() + + self.cpe = PTV3_CPE(hidden_size, no_conv_in_cpe, shared_plan_cache) + self.norm1 = FJTM(torch.nn.LayerNorm(hidden_size)) + self.attn = PTV3_Attention( + hidden_size, + num_heads, + proj_drop, + patch_size, + qk_scale, + sliding_window_attention, + order_index, + order_types, + ) # temporary disable attention + self.norm2 = FJTM(torch.nn.LayerNorm(hidden_size)) + self.order_index = order_index + self.mlp = PTV3_MLP(hidden_size, proj_drop) + self.drop_path = FJTM(DropPath(drop_path)) if drop_path > 0.0 else FJTM(torch.nn.Identity()) + + def forward(self, feats: fvdb.JaggedTensor, grid: fvdb.GridBatch) -> fvdb.JaggedTensor: + assert isinstance(feats, fvdb.JaggedTensor), "Input feats must be a JaggedTensor" + assert isinstance(grid, fvdb.GridBatch), "Input grid must be a GridBatch" + with NVTXRange("PTV3_Block"): + short_cut = feats + feats = self.cpe(feats, grid) + # import pdb; pdb.set_trace() + + feats = fvdb.add(short_cut, feats) + short_cut = feats + + feats = self.norm1(feats) + feats = self.attn(feats, grid) # temporary disable attention + feats = self.drop_path(feats) # temporary disable attention + # The drop_path is applied to each point independently. + # import pdb; pdb.set_trace() + feats = fvdb.add(short_cut, feats) + + short_cut = feats + feats = self.norm2(feats) + feats = self.mlp(feats) + feats = self.drop_path(feats) + feats = fvdb.add(short_cut, feats) + # import pdb; pdb.set_trace() + + return feats + + +class PTV3_Encoder(FVDBGridModule): + def __init__( + self, + hidden_size: int, + depth: int, + num_heads: int, + drop_path, # drop_path is a list of drop path rates for each block. + proj_drop: float = 0.0, + patch_size: int = 0, + qk_scale: float | None = None, + no_conv_in_cpe: bool = False, + sliding_window_attention: bool = False, + order_types: tuple = ("vdb",), + shared_plan_cache: dict | None = None, + ): + """ + Args: + hidden_size (int): Number of channels in the input features. + depth (int): Number of blocks in the encoder. + num_heads (int): Number of attention heads in each block. + drop_path (list): Drop path rates for each block. + proj_drop (float): Dropout rate for MLP layers. + patch_size (int): Patch size for patch attention. + qk_scale (float | None): Scale factor for query-key dot product. If None, uses 1/sqrt(head_dim). + no_conv_in_cpe (bool): Whether to disable convolution in CPE. + sliding_window_attention (bool): Whether to use sliding window attention (uses patch_size as window size). + order_types (tuple): Tuple of order type strings (e.g., ("z", "z-trans")). + shared_plan_cache (dict | None): Shared cache for ConvolutionPlans across all layers. + """ + super().__init__() + self.depth = depth + self.blocks = torch.nn.ModuleList( + [ + PTV3_Block( + hidden_size, + num_heads, + drop_path[i], + proj_drop, + patch_size, + qk_scale, + no_conv_in_cpe, + sliding_window_attention, + i % len(order_types), # order_index cycles through available order types + order_types, + shared_plan_cache, + ) + for i in range(depth) + ] + ) + self.order_types = order_types + + def forward(self, feats: fvdb.JaggedTensor, grid: fvdb.GridBatch) -> fvdb.JaggedTensor: + for block in self.blocks: + assert isinstance(block, PTV3_Block), "All blocks must be of type PTV3_Block" + feats = block(feats, grid) + return feats + +global_original_coord_to_voxel_idx = None + +def set_global_original_coord_to_voxel_idx(original_coord_to_voxel_idx: fvdb.JaggedTensor): + global global_original_coord_to_voxel_idx + global_original_coord_to_voxel_idx = original_coord_to_voxel_idx + +def get_global_original_coord_to_voxel_idx(): + global global_original_coord_to_voxel_idx + return global_original_coord_to_voxel_idx + +class PTV3(FVDBGridModule): + + def __init__( + self, + num_classes: int, + input_dim: int = 6, # xyz + intensity/reflectance + additional features + enc_depths: tuple[int, ...] = ( + 2, + 2, + 2, + 2, + ), # default hyper-parameters to align with sonata ptv3's default hyper-parameters. + enc_channels: tuple[int, ...] = (32, 64, 128, 256, 512), + enc_num_heads: tuple[int, ...] = (2, 4, 8, 16, 32), + # enc_patch_size: tuple[int, ...] = (4096), + dec_depths: tuple[int, ...] = (), # by default, no decoder. + dec_channels: tuple[int, ...] = (), + dec_num_heads: tuple[int, ...] = (), + patch_size: int = 0, + drop_path: float = 0.3, + proj_drop: float = 0.0, + qk_scale: float | None = None, + enable_batch_norm: bool = False, + embedding_mode: str = "linear", + no_conv_in_cpe: bool = False, + sliding_window_attention: bool = False, + order_type: str | tuple = ("z", "z-trans"), + shuffle_orders: bool = True, + ) -> None: + """ + ptv3 for 3D point cloud segmentation. + + Args: + num_classes (int): Number of classes for segmentation. + input_dim (int): Input feature dimension (default: 4 for xyz + intensity). + hidden_dims (tuple[int, ...]): Hidden layer dimensions (not used in simplified version). + enc_depths (tuple[int, ...]): Number of encoder blocks for each stage. + enc_channels (tuple[int, ...]): Number of channels for each stage. + enc_num_heads (tuple[int, ...]): Number of attention heads for each stage. + dec_depths (tuple[int, ...]): Number of decoder blocks for each stage. + dec_channels (tuple[int, ...]): Number of channels for each stage. + dec_num_heads (tuple[int, ...]): Number of attention heads for each stage. + patch_size (int): Patch size for patch attention. + drop_path (float): Drop path rate for regularization. + proj_drop (float): Dropout rate for MLP layers. + qk_scale (float | None): Scale factor for query-key dot product. If None, uses 1/sqrt(head_dim). + enable_batch_norm (bool): Whether to use batch normalization for the embedding, down pooling, and up pooling. + embedding_mode (bool): the mode for the embedding layer, "linear" or "conv3x3", "conv5x5". + no_conv_in_cpe (bool): Whether to disable convolution in CPE. + sliding_window_attention (bool): Whether to use sliding window attention (uses patch_size as window size). + order (str | tuple): The type(s) of point ordering. Can be a single string ("vdb", "z", "z-trans", "hilbert", "hilbert-trans") + or a tuple of strings (e.g., ("z", "z-trans")). Each block within a stage cycles through the order types. + shuffle_orders (bool): Whether to shuffle the order of order types at the beginning of each forward pass and after each pooling. + """ + super().__init__() + self.num_classes = num_classes + self.drop_path = drop_path + self.proj_drop = proj_drop + self.qk_scale = qk_scale + self.no_conv_in_cpe = no_conv_in_cpe + self.sliding_window_attention = sliding_window_attention + self.shuffle_orders = shuffle_orders + + # Handle order: convert to tuple for uniform processing (matching reference implementation) + self.order_type = tuple([order_type]) if isinstance(order_type, str) else tuple(order_type) + + if not enable_batch_norm: + self.norm_layer = torch.nn.LayerNorm + else: + self.norm_layer = partial(torch.nn.BatchNorm1d, eps=1e-3, momentum=0.01) + + # Shared ConvolutionPlan cache across all layers to avoid redundant computation. + # Cache is cleared at the end of each forward pass to prevent OOM. + self.shared_plan_cache = {} + + self.embedding = PTV3_Embedding( + input_dim, + enc_channels[0], + norm_layer_module=self.norm_layer, + embedding_mode=embedding_mode, + shared_plan_cache=self.shared_plan_cache, + ) + + assert ( + len(enc_depths) == len(enc_channels) == len(enc_num_heads) + ), "The number of encoder depths, channels, and heads must be the same." + + self.num_stages = len(enc_depths) + if self.num_stages > 0: + self.enc = torch.nn.ModuleList() + enc_drop_path = [x.item() for x in torch.linspace(0, drop_path, sum(enc_depths))] + for i in range(self.num_stages): + if i > 0: + self.enc.append( + PTV3_Pooling( + kernel_size=2, + in_channels=enc_channels[i - 1], + out_channels=enc_channels[i], + norm_layer_module=self.norm_layer, + ) + ) + # All encoder stages share the same order types; blocks within each stage cycle through them + self.enc.append( + PTV3_Encoder( + enc_channels[i], + enc_depths[i], + enc_num_heads[i], + enc_drop_path[sum(enc_depths[:i]) : sum(enc_depths[: i + 1])], + proj_drop, + patch_size, + qk_scale, + no_conv_in_cpe, + sliding_window_attention, + self.order_type, + self.shared_plan_cache, + ) + ) + + # create decoder + self.num_dec_stages = len(dec_depths) + if self.num_dec_stages > 0: + assert ( + self.num_dec_stages == self.num_stages - 1 + ), "The number of decoder stages must be one less than the number of encoder stages." + self.dec = torch.nn.ModuleList() + dec_drop_path = [x.item() for x in torch.linspace(0, drop_path, sum(dec_depths))] + dec_drop_path = dec_drop_path[::-1] + + for i in range(self.num_dec_stages): + dec_drop_path_ = dec_drop_path[sum(dec_depths[:i]) : sum(dec_depths[: i + 1])] + if i == 0: + last_channels = enc_channels[-1] + else: + last_channels = dec_channels[i - 1] + self.dec.append( + PTV3_Unpooling( + kernel_size=2, + in_channels=last_channels, + out_channels=dec_channels[i], + skip_channels=enc_channels[self.num_stages - 2 - i], + norm_layer_module=self.norm_layer, + ) + ) + # All decoder stages share the same order types; + # blocks within each stage cycle through them + self.dec.append( + PTV3_Encoder( + dec_channels[i], + dec_depths[i], + dec_num_heads[i], + dec_drop_path_, + proj_drop, + patch_size, + qk_scale, + no_conv_in_cpe, + sliding_window_attention, + self.order_type, + self.shared_plan_cache, + ) + ) + # import pdb; pdb.set_trace() + + def _shuffle_order(self, shuffled_order): + """ + Randomly shuffle the order tuple to create variation across forward passes. + Returns a new shuffled tuple of order types. + """ + if self.shuffle_orders: + indices = torch.randperm(len(shuffled_order)) + return tuple(shuffled_order[i] for i in indices) + else: + return shuffled_order + + def forward(self, feats: fvdb.JaggedTensor, grid: fvdb.GridBatch) -> fvdb.JaggedTensor: + original_grid = grid + original_coord_to_voxel_idx = get_global_original_coord_to_voxel_idx() + with NVTXRange("PTV3_Forward"): + + # Shuffle order at the beginning of forward pass (matching reference implementation) + shuffled_order = self._shuffle_order(self.order_type) + + # Store shuffled order in grid metadata so all blocks can access it + grid._shuffled_order = shuffled_order # type: ignore + + feats = self.embedding(feats, grid) + + layer_id = 0 + stack = [] # Stack stores (grid, feats, shuffled_order) tuples + for i in range(self.num_stages): + if i > 0: + with NVTXRange(f"PTV3_Pooling_{layer_id}"): + # Push grid, feats, AND the current shuffled_order to stack + # The decoder will reuse this exact shuffled order for the corresponding stage + stack.append((grid, feats, shuffled_order)) + pooler = self.enc[layer_id] + assert isinstance(pooler, PTV3_Pooling), "All encoder poolers must be of type PTV3_Pooling" + feats, grid = pooler(feats, grid) + + # Shuffle order after pooling for the next (downsampled) stage + shuffled_order = self._shuffle_order(shuffled_order) + grid._shuffled_order = shuffled_order # type: ignore + layer_id += 1 + with NVTXRange(f"PTV3_Encoder_{layer_id}"): + encoder = self.enc[layer_id] + assert isinstance(encoder, PTV3_Encoder), "All encoder stages must be of type PTV3_Encoder" + feats = encoder(feats, grid) + layer_id += 1 + + if self.num_dec_stages > 0: + layer_id = 0 + for i in range(self.num_dec_stages): + with NVTXRange(f"PTV3_Unpooling_{layer_id}"): + # Pop grid, feats, AND the shuffled_order from the corresponding encoder stage + last_grid, last_feats, last_shuffled_order = stack.pop() + + # Restore the shuffled order from the encoder stage to the grids + # This ensures decoder blocks use the SAME order as the corresponding encoder blocks + last_grid._shuffled_order = last_shuffled_order + + unpooler = self.dec[layer_id] + assert isinstance( + unpooler, PTV3_Unpooling + ), "All decoder unpoolers must be of type PTV3_Unpooling" + feats, grid = unpooler(feats, grid, last_feats, last_grid) + # After unpooling, grid becomes last_grid with the restored shuffled order + layer_id += 1 + + with NVTXRange(f"PTV3_Decoder_{layer_id}"): + # Decoder blocks use grid with the restored shuffled order from encoder + decoder = self.dec[layer_id] + assert isinstance(decoder, PTV3_Encoder), "All decoder stages must be of type PTV3_Encoder" + feats = decoder(feats, grid) + layer_id += 1 + + # import pdb; pdb.set_trace() + # Clear cache after forward pass to prevent OOM between batches + # Plans are shared across layers during this forward pass, but won't be needed for next batch + self.shared_plan_cache.clear() + assert original_grid.is_same(grid), "The original grid and the final grid are not the same." + + return feats diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/__init__.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/__init__.py new file mode 100644 index 0000000..da0c47b --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/__init__.py @@ -0,0 +1,9 @@ +from .misc import ( + offset2batch, + offset2bincount, + bincount2offset, + batch2offset, + off_diagonal, +) +from .checkpoint import checkpoint +from .serialization import encode, decode diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/checkpoint.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/checkpoint.py new file mode 100644 index 0000000..5882035 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/checkpoint.py @@ -0,0 +1,57 @@ +""" +Checkpoint Utils for Models + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch + + +class CheckpointFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, run_function, length, *args): + ctx.run_function = run_function + ctx.input_tensors = list(args[:length]) + ctx.input_params = list(args[length:]) + + with torch.no_grad(): + output_tensors = ctx.run_function(*ctx.input_tensors) + return output_tensors + + @staticmethod + def backward(ctx, *output_grads): + ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors] + with torch.enable_grad(): + # Fixes a bug where the first op in run_function modifies the + # Tensor storage in place, which is not allowed for detach()'d + # Tensors. + shallow_copies = [x.view_as(x) for x in ctx.input_tensors] + output_tensors = ctx.run_function(*shallow_copies) + input_grads = torch.autograd.grad( + output_tensors, + ctx.input_tensors + ctx.input_params, + output_grads, + allow_unused=True, + ) + del ctx.input_tensors + del ctx.input_params + del output_tensors + return (None, None) + input_grads + + +def checkpoint(func, inputs, params, flag): + """ + Evaluate a function without caching intermediate activations, allowing for + reduced memory at the expense of extra compute in the backward pass. + :param func: the function to evaluate. + :param inputs: the argument sequence to pass to `func`. + :param params: a sequence of parameters `func` depends on but does not + explicitly take as arguments. + :param flag: if False, disable gradient checkpointing. + """ + if flag: + args = tuple(inputs) + tuple(params) + return CheckpointFunction.apply(func, len(inputs), *args) + else: + return func(*inputs) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/misc.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/misc.py new file mode 100644 index 0000000..4eef9eb --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/misc.py @@ -0,0 +1,41 @@ +""" +General Utils for Models + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch +from itertools import chain + + +@torch.no_grad() +def offset2bincount(offset): + return torch.diff( + offset, prepend=torch.tensor([0], device=offset.device, dtype=torch.long) + ) + + +@torch.no_grad() +def bincount2offset(bincount): + return torch.cumsum(bincount, dim=0) + + +@torch.no_grad() +def offset2batch(offset): + bincount = offset2bincount(offset) + return torch.arange( + len(bincount), device=offset.device, dtype=torch.long + ).repeat_interleave(bincount) + + +@torch.no_grad() +def batch2offset(batch): + return torch.cumsum(batch.bincount(), dim=0).long() + + +def off_diagonal(x): + # return a flattened view of the off-diagonal elements of a square matrix + n, m = x.shape + assert n == m + return x.flatten()[:-1].view(n - 1, n + 1)[:, 1:].flatten() diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/serialization/__init__.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/serialization/__init__.py new file mode 100644 index 0000000..058c5e1 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/serialization/__init__.py @@ -0,0 +1,8 @@ +from .default import ( + encode, + decode, + z_order_encode, + z_order_decode, + hilbert_encode, + hilbert_decode, +) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/serialization/default.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/serialization/default.py new file mode 100644 index 0000000..15898b5 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/serialization/default.py @@ -0,0 +1,59 @@ +import torch +from .z_order import xyz2key as z_order_encode_ +from .z_order import key2xyz as z_order_decode_ +from .hilbert import encode as hilbert_encode_ +from .hilbert import decode as hilbert_decode_ + + +@torch.inference_mode() +def encode(grid_coord, batch=None, depth=16, order="z"): + assert order in {"z", "z-trans", "hilbert", "hilbert-trans"} + if order == "z": + code = z_order_encode(grid_coord, depth=depth) + elif order == "z-trans": + code = z_order_encode(grid_coord[:, [1, 0, 2]], depth=depth) + elif order == "hilbert": + code = hilbert_encode(grid_coord, depth=depth) + elif order == "hilbert-trans": + code = hilbert_encode(grid_coord[:, [1, 0, 2]], depth=depth) + else: + raise NotImplementedError + if batch is not None: + batch = batch.long() + code = batch << depth * 3 | code + return code + + +@torch.inference_mode() +def decode(code, depth=16, order="z"): + assert order in {"z", "hilbert"} + batch = code >> depth * 3 + code = code & ((1 << depth * 3) - 1) + if order == "z": + grid_coord = z_order_decode(code, depth=depth) + elif order == "hilbert": + grid_coord = hilbert_decode(code, depth=depth) + else: + raise NotImplementedError + return grid_coord, batch + + +def z_order_encode(grid_coord: torch.Tensor, depth: int = 16): + x, y, z = grid_coord[:, 0].long(), grid_coord[:, 1].long(), grid_coord[:, 2].long() + # we block the support to batch, maintain batched code in Point class + code = z_order_encode_(x, y, z, b=None, depth=depth) + return code + + +def z_order_decode(code: torch.Tensor, depth): + x, y, z = z_order_decode_(code, depth=depth) + grid_coord = torch.stack([x, y, z], dim=-1) # (N, 3) + return grid_coord + + +def hilbert_encode(grid_coord: torch.Tensor, depth: int = 16): + return hilbert_encode_(grid_coord, num_dims=3, num_bits=depth) + + +def hilbert_decode(code: torch.Tensor, depth: int = 16): + return hilbert_decode_(code, num_dims=3, num_bits=depth) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/serialization/hilbert.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/serialization/hilbert.py new file mode 100644 index 0000000..c96a3a9 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/serialization/hilbert.py @@ -0,0 +1,303 @@ +""" +Hilbert Order +Modified from https://github.com/PrincetonLIPS/numpy-hilbert-curve + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Kaixin Xu +Please cite our work if the code is helpful to you. +""" + +import torch + + +def right_shift(binary, k=1, axis=-1): + """Right shift an array of binary values. + + Parameters: + ----------- + binary: An ndarray of binary values. + + k: The number of bits to shift. Default 1. + + axis: The axis along which to shift. Default -1. + + Returns: + -------- + Returns an ndarray with zero prepended and the ends truncated, along + whatever axis was specified.""" + + # If we're shifting the whole thing, just return zeros. + if binary.shape[axis] <= k: + return torch.zeros_like(binary) + + # Determine the padding pattern. + # padding = [(0,0)] * len(binary.shape) + # padding[axis] = (k,0) + + # Determine the slicing pattern to eliminate just the last one. + slicing = [slice(None)] * len(binary.shape) + slicing[axis] = slice(None, -k) + shifted = torch.nn.functional.pad( + binary[tuple(slicing)], (k, 0), mode="constant", value=0 + ) + + return shifted + + +def binary2gray(binary, axis=-1): + """Convert an array of binary values into Gray codes. + + This uses the classic X ^ (X >> 1) trick to compute the Gray code. + + Parameters: + ----------- + binary: An ndarray of binary values. + + axis: The axis along which to compute the gray code. Default=-1. + + Returns: + -------- + Returns an ndarray of Gray codes. + """ + shifted = right_shift(binary, axis=axis) + + # Do the X ^ (X >> 1) trick. + gray = torch.logical_xor(binary, shifted) + + return gray + + +def gray2binary(gray, axis=-1): + """Convert an array of Gray codes back into binary values. + + Parameters: + ----------- + gray: An ndarray of gray codes. + + axis: The axis along which to perform Gray decoding. Default=-1. + + Returns: + -------- + Returns an ndarray of binary values. + """ + + # Loop the log2(bits) number of times necessary, with shift and xor. + shift = 2 ** (torch.Tensor([gray.shape[axis]]).log2().ceil().int() - 1) + while shift > 0: + gray = torch.logical_xor(gray, right_shift(gray, shift)) + shift = torch.div(shift, 2, rounding_mode="floor") + return gray + + +def encode(locs, num_dims, num_bits): + """Decode an array of locations in a hypercube into a Hilbert integer. + + This is a vectorized-ish version of the Hilbert curve implementation by John + Skilling as described in: + + Skilling, J. (2004, April). Programming the Hilbert curve. In AIP Conference + Proceedings (Vol. 707, No. 1, pp. 381-387). American Institute of Physics. + + Params: + ------- + locs - An ndarray of locations in a hypercube of num_dims dimensions, in + which each dimension runs from 0 to 2**num_bits-1. The shape can + be arbitrary, as long as the last dimension of the same has size + num_dims. + + num_dims - The dimensionality of the hypercube. Integer. + + num_bits - The number of bits for each dimension. Integer. + + Returns: + -------- + The output is an ndarray of uint64 integers with the same shape as the + input, excluding the last dimension, which needs to be num_dims. + """ + + # Keep around the original shape for later. + orig_shape = locs.shape + bitpack_mask = 1 << torch.arange(0, 8).to(locs.device) + bitpack_mask_rev = bitpack_mask.flip(-1) + + if orig_shape[-1] != num_dims: + raise ValueError( + """ + The shape of locs was surprising in that the last dimension was of size + %d, but num_dims=%d. These need to be equal. + """ + % (orig_shape[-1], num_dims) + ) + + if num_dims * num_bits > 63: + raise ValueError( + """ + num_dims=%d and num_bits=%d for %d bits total, which can't be encoded + into a int64. Are you sure you need that many points on your Hilbert + curve? + """ + % (num_dims, num_bits, num_dims * num_bits) + ) + + # Treat the location integers as 64-bit unsigned and then split them up into + # a sequence of uint8s. Preserve the association by dimension. + locs_uint8 = locs.long().view(torch.uint8).reshape((-1, num_dims, 8)).flip(-1) + + # Now turn these into bits and truncate to num_bits. + gray = ( + locs_uint8.unsqueeze(-1) + .bitwise_and(bitpack_mask_rev) + .ne(0) + .byte() + .flatten(-2, -1)[..., -num_bits:] + ) + + # Run the decoding process the other way. + # Iterate forwards through the bits. + for bit in range(0, num_bits): + # Iterate forwards through the dimensions. + for dim in range(0, num_dims): + # Identify which ones have this bit active. + mask = gray[:, dim, bit] + + # Where this bit is on, invert the 0 dimension for lower bits. + gray[:, 0, bit + 1 :] = torch.logical_xor( + gray[:, 0, bit + 1 :], mask[:, None] + ) + + # Where the bit is off, exchange the lower bits with the 0 dimension. + to_flip = torch.logical_and( + torch.logical_not(mask[:, None]).repeat(1, gray.shape[2] - bit - 1), + torch.logical_xor(gray[:, 0, bit + 1 :], gray[:, dim, bit + 1 :]), + ) + gray[:, dim, bit + 1 :] = torch.logical_xor( + gray[:, dim, bit + 1 :], to_flip + ) + gray[:, 0, bit + 1 :] = torch.logical_xor(gray[:, 0, bit + 1 :], to_flip) + + # Now flatten out. + gray = gray.swapaxes(1, 2).reshape((-1, num_bits * num_dims)) + + # Convert Gray back to binary. + hh_bin = gray2binary(gray) + + # Pad back out to 64 bits. + extra_dims = 64 - num_bits * num_dims + padded = torch.nn.functional.pad(hh_bin, (extra_dims, 0), "constant", 0) + + # Convert binary values into uint8s. + hh_uint8 = ( + (padded.flip(-1).reshape((-1, 8, 8)) * bitpack_mask) + .sum(2) + .squeeze() + .type(torch.uint8) + ) + + # Convert uint8s into uint64s. + hh_uint64 = hh_uint8.view(torch.int64).squeeze() + + return hh_uint64 + + +def decode(hilberts, num_dims, num_bits): + """Decode an array of Hilbert integers into locations in a hypercube. + + This is a vectorized-ish version of the Hilbert curve implementation by John + Skilling as described in: + + Skilling, J. (2004, April). Programming the Hilbert curve. In AIP Conference + Proceedings (Vol. 707, No. 1, pp. 381-387). American Institute of Physics. + + Params: + ------- + hilberts - An ndarray of Hilbert integers. Must be an integer dtype and + cannot have fewer bits than num_dims * num_bits. + + num_dims - The dimensionality of the hypercube. Integer. + + num_bits - The number of bits for each dimension. Integer. + + Returns: + -------- + The output is an ndarray of unsigned integers with the same shape as hilberts + but with an additional dimension of size num_dims. + """ + + if num_dims * num_bits > 64: + raise ValueError( + """ + num_dims=%d and num_bits=%d for %d bits total, which can't be encoded + into a uint64. Are you sure you need that many points on your Hilbert + curve? + """ + % (num_dims, num_bits) + ) + + # Handle the case where we got handed a naked integer. + hilberts = torch.atleast_1d(hilberts) + + # Keep around the shape for later. + orig_shape = hilberts.shape + bitpack_mask = 2 ** torch.arange(0, 8).to(hilberts.device) + bitpack_mask_rev = bitpack_mask.flip(-1) + + # Treat each of the hilberts as a s equence of eight uint8. + # This treats all of the inputs as uint64 and makes things uniform. + hh_uint8 = ( + hilberts.ravel().type(torch.int64).view(torch.uint8).reshape((-1, 8)).flip(-1) + ) + + # Turn these lists of uints into lists of bits and then truncate to the size + # we actually need for using Skilling's procedure. + hh_bits = ( + hh_uint8.unsqueeze(-1) + .bitwise_and(bitpack_mask_rev) + .ne(0) + .byte() + .flatten(-2, -1)[:, -num_dims * num_bits :] + ) + + # Take the sequence of bits and Gray-code it. + gray = binary2gray(hh_bits) + + # There has got to be a better way to do this. + # I could index them differently, but the eventual packbits likes it this way. + gray = gray.reshape((-1, num_bits, num_dims)).swapaxes(1, 2) + + # Iterate backwards through the bits. + for bit in range(num_bits - 1, -1, -1): + # Iterate backwards through the dimensions. + for dim in range(num_dims - 1, -1, -1): + # Identify which ones have this bit active. + mask = gray[:, dim, bit] + + # Where this bit is on, invert the 0 dimension for lower bits. + gray[:, 0, bit + 1 :] = torch.logical_xor( + gray[:, 0, bit + 1 :], mask[:, None] + ) + + # Where the bit is off, exchange the lower bits with the 0 dimension. + to_flip = torch.logical_and( + torch.logical_not(mask[:, None]), + torch.logical_xor(gray[:, 0, bit + 1 :], gray[:, dim, bit + 1 :]), + ) + gray[:, dim, bit + 1 :] = torch.logical_xor( + gray[:, dim, bit + 1 :], to_flip + ) + gray[:, 0, bit + 1 :] = torch.logical_xor(gray[:, 0, bit + 1 :], to_flip) + + # Pad back out to 64 bits. + extra_dims = 64 - num_bits + padded = torch.nn.functional.pad(gray, (extra_dims, 0), "constant", 0) + + # Now chop these up into blocks of 8. + locs_chopped = padded.flip(-1).reshape((-1, num_dims, 8, 8)) + + # Take those blocks and turn them unto uint8s. + # from IPython import embed; embed() + locs_uint8 = (locs_chopped * bitpack_mask).sum(3).squeeze().type(torch.uint8) + + # Finally, treat these as uint64s. + flat_locs = locs_uint8.view(torch.int64) + + # Return them in the expected shape. + return flat_locs.reshape((*orig_shape, num_dims)) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/serialization/z_order.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/serialization/z_order.py new file mode 100644 index 0000000..6fd01a5 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/serialization/z_order.py @@ -0,0 +1,126 @@ +# -------------------------------------------------------- +# Octree-based Sparse Convolutional Neural Networks +# Copyright (c) 2022 Peng-Shuai Wang +# Licensed under The MIT License [see LICENSE for details] +# Written by Peng-Shuai Wang +# -------------------------------------------------------- + +import torch +from typing import Optional, Union + + +class KeyLUT: + def __init__(self): + r256 = torch.arange(256, dtype=torch.int64) + r512 = torch.arange(512, dtype=torch.int64) + zero = torch.zeros(256, dtype=torch.int64) + device = torch.device("cpu") + + self._encode = { + device: ( + self.xyz2key(r256, zero, zero, 8), + self.xyz2key(zero, r256, zero, 8), + self.xyz2key(zero, zero, r256, 8), + ) + } + self._decode = {device: self.key2xyz(r512, 9)} + + def encode_lut(self, device=torch.device("cpu")): + if device not in self._encode: + cpu = torch.device("cpu") + self._encode[device] = tuple(e.to(device) for e in self._encode[cpu]) + return self._encode[device] + + def decode_lut(self, device=torch.device("cpu")): + if device not in self._decode: + cpu = torch.device("cpu") + self._decode[device] = tuple(e.to(device) for e in self._decode[cpu]) + return self._decode[device] + + def xyz2key(self, x, y, z, depth): + key = torch.zeros_like(x) + for i in range(depth): + mask = 1 << i + key = ( + key + | ((x & mask) << (2 * i + 2)) + | ((y & mask) << (2 * i + 1)) + | ((z & mask) << (2 * i + 0)) + ) + return key + + def key2xyz(self, key, depth): + x = torch.zeros_like(key) + y = torch.zeros_like(key) + z = torch.zeros_like(key) + for i in range(depth): + x = x | ((key & (1 << (3 * i + 2))) >> (2 * i + 2)) + y = y | ((key & (1 << (3 * i + 1))) >> (2 * i + 1)) + z = z | ((key & (1 << (3 * i + 0))) >> (2 * i + 0)) + return x, y, z + + +_key_lut = KeyLUT() + + +def xyz2key( + x: torch.Tensor, + y: torch.Tensor, + z: torch.Tensor, + b: Optional[Union[torch.Tensor, int]] = None, + depth: int = 16, +): + r"""Encodes :attr:`x`, :attr:`y`, :attr:`z` coordinates to the shuffled keys + based on pre-computed look up tables. The speed of this function is much + faster than the method based on for-loop. + + Args: + x (torch.Tensor): The x coordinate. + y (torch.Tensor): The y coordinate. + z (torch.Tensor): The z coordinate. + b (torch.Tensor or int): The batch index of the coordinates, and should be + smaller than 32768. If :attr:`b` is :obj:`torch.Tensor`, the size of + :attr:`b` must be the same as :attr:`x`, :attr:`y`, and :attr:`z`. + depth (int): The depth of the shuffled key, and must be smaller than 17 (< 17). + """ + + EX, EY, EZ = _key_lut.encode_lut(x.device) + x, y, z = x.long(), y.long(), z.long() + + mask = 255 if depth > 8 else (1 << depth) - 1 + key = EX[x & mask] | EY[y & mask] | EZ[z & mask] + if depth > 8: + mask = (1 << (depth - 8)) - 1 + key16 = EX[(x >> 8) & mask] | EY[(y >> 8) & mask] | EZ[(z >> 8) & mask] + key = key16 << 24 | key + + if b is not None: + b = b.long() + key = b << 48 | key + + return key + + +def key2xyz(key: torch.Tensor, depth: int = 16): + r"""Decodes the shuffled key to :attr:`x`, :attr:`y`, :attr:`z` coordinates + and the batch index based on pre-computed look up tables. + + Args: + key (torch.Tensor): The shuffled key. + depth (int): The depth of the shuffled key, and must be smaller than 17 (< 17). + """ + + DX, DY, DZ = _key_lut.decode_lut(key.device) + x, y, z = torch.zeros_like(key), torch.zeros_like(key), torch.zeros_like(key) + + b = key >> 48 + key = key & ((1 << 48) - 1) + + n = (depth + 2) // 3 + for i in range(n): + k = key >> (i * 9) & 511 + x = x | (DX[k] << (i * 3)) + y = y | (DY[k] << (i * 3)) + z = z | (DZ[k] << (i * 3)) + + return x, y, z, b diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/structure.py b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/structure.py new file mode 100644 index 0000000..4f2ddb5 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/models/utils/structure.py @@ -0,0 +1,210 @@ +import torch +import spconv.pytorch as spconv + +try: + import ocnn +except ImportError: + ocnn = None +from addict import Dict +from typing import List + +from pointcept.models.utils.serialization import encode +from pointcept.models.utils import ( + offset2batch, + batch2offset, + offset2bincount, + bincount2offset, +) + + +class Point(Dict): + """ + Point Structure of Pointcept + + A Point (point cloud) in Pointcept is a dictionary that contains various properties of + a batched point cloud. The property with the following names have a specific definition + as follows: + + - "coord": original coordinate of point cloud; + - "grid_coord": grid coordinate for specific grid size (related to GridSampling); + Point also support the following optional attributes: + - "offset": if not exist, initialized as batch size is 1; + - "batch": if not exist, initialized as batch size is 1; + - "feat": feature of point cloud, default input of model; + - "grid_size": Grid size of point cloud (related to GridSampling); + (related to Serialization) + - "serialized_depth": depth of serialization, 2 ** depth * grid_size describe the maximum of point cloud range; + - "serialized_code": a list of serialization codes; + - "serialized_order": a list of serialization order determined by code; + - "serialized_inverse": a list of inverse mapping determined by code; + (related to Sparsify: SpConv) + - "sparse_shape": Sparse shape for Sparse Conv Tensor; + - "sparse_conv_feat": SparseConvTensor init with information provide by Point; + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # If one of "offset" or "batch" do not exist, generate by the existing one + if "batch" not in self.keys() and "offset" in self.keys(): + self["batch"] = offset2batch(self.offset) + elif "offset" not in self.keys() and "batch" in self.keys(): + self["offset"] = batch2offset(self.batch) + + def serialization(self, order="z", depth=None, shuffle_orders=False): + """ + Point Cloud Serialization + + relay on ["grid_coord" or "coord" + "grid_size", "batch", "feat"] + """ + self["order"] = order + assert "batch" in self.keys() + if "grid_coord" not in self.keys(): + # if you don't want to operate GridSampling in data augmentation, + # please add the following augmentation into your pipline: + # dict(type="Copy", keys_dict={"grid_size": 0.01}), + # (adjust `grid_size` to what your want) + assert {"grid_size", "coord"}.issubset(self.keys()) + + self["grid_coord"] = torch.div( + self.coord - self.coord.min(0)[0], self.grid_size, rounding_mode="trunc" + ).int() + + if depth is None: + # Adaptive measure the depth of serialization cube (length = 2 ^ depth) + depth = int(self.grid_coord.max() + 1).bit_length() + self["serialized_depth"] = depth + # Maximum bit length for serialization code is 63 (int64) + assert depth * 3 + len(self.offset).bit_length() <= 63 + # Here we follow OCNN and set the depth limitation to 16 (48bit) for the point position. + # Although depth is limited to less than 16, we can encode a 655.36^3 (2^16 * 0.01) meter^3 + # cube with a grid size of 0.01 meter. We consider it is enough for the current stage. + # We can unlock the limitation by optimizing the z-order encoding function if necessary. + assert depth <= 16 + + # The serialization codes are arranged as following structures: + # [Order1 ([n]), + # Order2 ([n]), + # ... + # OrderN ([n])] (k, n) + code = [ + encode(self.grid_coord, self.batch, depth, order=order_) for order_ in order + ] + code = torch.stack(code) + order = torch.argsort(code) + inverse = torch.zeros_like(order).scatter_( + dim=1, + index=order, + src=torch.arange(0, code.shape[1], device=order.device).repeat( + code.shape[0], 1 + ), + ) + + if shuffle_orders: + perm = torch.randperm(code.shape[0]) + code = code[perm] + order = order[perm] + inverse = inverse[perm] + self["order_perm"] = [self["order"][i] for i in perm] + + self["serialized_code"] = code + self["serialized_order"] = order + self["serialized_inverse"] = inverse + + def sparsify(self, pad=96): + """ + Point Cloud Serialization + + Point cloud is sparse, here we use "sparsify" to specifically refer to + preparing "spconv.SparseConvTensor" for SpConv. + + relay on ["grid_coord" or "coord" + "grid_size", "batch", "feat"] + + pad: padding sparse for sparse shape. + """ + assert {"feat", "batch"}.issubset(self.keys()) + if "grid_coord" not in self.keys(): + # if you don't want to operate GridSampling in data augmentation, + # please add the following augmentation into your pipline: + # dict(type="Copy", keys_dict={"grid_size": 0.01}), + # (adjust `grid_size` to what your want) + assert {"grid_size", "coord"}.issubset(self.keys()) + self["grid_coord"] = torch.div( + self.coord - self.coord.min(0)[0], self.grid_size, rounding_mode="trunc" + ).int() + if "sparse_shape" in self.keys(): + sparse_shape = self.sparse_shape + else: + sparse_shape = torch.add( + torch.max(self.grid_coord, dim=0).values, pad + ).tolist() + sparse_conv_feat = spconv.SparseConvTensor( + features=self.feat, + indices=torch.cat( + [self.batch.unsqueeze(-1).int(), self.grid_coord.int()], dim=1 + ).contiguous(), + spatial_shape=sparse_shape, + batch_size=self.batch[-1].tolist() + 1, + ) + self["sparse_shape"] = sparse_shape + self["sparse_conv_feat"] = sparse_conv_feat + + def octreelization(self, depth=None, full_depth=None): + """ + Point Cloud Octreelization + + Generate octree with OCNN + relay on ["grid_coord", "batch", "feat"] + """ + assert ( + ocnn is not None + ), "Please follow https://github.com/octree-nn/ocnn-pytorch install ocnn." + assert {"feat", "batch"}.issubset(self.keys()) + # add 1 to make grid space support shift order + if "grid_coord" not in self.keys(): + # if you don't want to operate GridSampling in data augmentation, + # please add the following augmentation into your pipline: + # dict(type="Copy", keys_dict={"grid_size": 0.01}), + # (adjust `grid_size` to what your want) + assert {"grid_size", "coord"}.issubset(self.keys()) + self["grid_coord"] = torch.div( + self.coord - self.coord.min(0)[0], self.grid_size, rounding_mode="trunc" + ).int() + if depth is None: + if "depth" in self.keys(): + depth = self.depth + else: + depth = int(self.grid_coord.max() + 1).bit_length() + if full_depth is None: + full_depth = 1 + self["depth"] = depth + assert depth <= 16 # maximum in ocnn + + # [0, 2**depth] -> [0, 2] -> [-1, 1] + coord = self.grid_coord / 2 ** (self.depth - 1) - 1.0 + point = ocnn.octree.Points( + points=coord, + features=self.feat, + batch_id=self.batch.unsqueeze(-1), + batch_size=self.batch[-1] + 1, + ) + octree = ocnn.octree.Octree( + depth=depth, + full_depth=full_depth, + batch_size=self.batch[-1] + 1, + device=coord.device, + ) + octree.build_octree(point) + octree.construct_all_neigh() + + query_pts = torch.cat([self.grid_coord, point.batch_id], dim=1).contiguous() + inverse = octree.search_xyzb(query_pts, depth, True) + assert torch.sum(inverse < 0) == 0 # all mapping should be valid + inverse_ = torch.unique(inverse) + order = torch.zeros_like(inverse_).scatter_( + dim=0, + index=inverse, + src=torch.arange(0, inverse.shape[0], device=inverse.device), + ) + self["octree"] = octree + self["octree_order"] = order + self["octree_inverse"] = inverse diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/utils/__init__.py b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/utils/cache.py b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/cache.py new file mode 100644 index 0000000..c7aec25 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/cache.py @@ -0,0 +1,60 @@ +""" +Data Cache Utils + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os + +try: + import SharedArray +except ImportError: + SharedArray = None + +try: + from multiprocessing.shared_memory import ShareableList +except ImportError: + import warnings + + warnings.warn("Please update python version >= 3.8 to enable shared_memory") +import numpy as np + + +def shared_array(name, var=None): + if var is not None: + # check exist + if os.path.exists(f"/dev/shm/{name}"): + return SharedArray.attach(f"shm://{name}") + # create shared_array + data = SharedArray.create(f"shm://{name}", var.shape, dtype=var.dtype) + data[...] = var[...] + data.flags.writeable = False + else: + data = SharedArray.attach(f"shm://{name}").copy() + return data + + +def shared_dict(name, var=None): + name = str(name) + assert "." not in name # '.' is used as sep flag + data = {} + if var is not None: + assert isinstance(var, dict) + keys = var.keys() + # current version only cache np.array + keys_valid = [] + for key in keys: + if isinstance(var[key], np.ndarray): + keys_valid.append(key) + keys = keys_valid + + ShareableList(sequence=keys, name=name + ".keys") + for key in keys: + if isinstance(var[key], np.ndarray): + data[key] = shared_array(name=f"{name}.{key}", var=var[key]) + else: + keys = list(ShareableList(name=name + ".keys")) + for key in keys: + data[key] = shared_array(name=f"{name}.{key}") + return data diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/utils/comm.py b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/comm.py new file mode 100644 index 0000000..69e29e7 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/comm.py @@ -0,0 +1,198 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +""" +This file contains primitives for multi-gpu communication. +This is useful when doing distributed training. +Modified from detectron2(https://github.com/facebookresearch/detectron2) + +Copyright (c) Xiaoyang Wu (xiaoyang.wu@connect.hku.hk). All Rights Reserved. +Please cite our work if you use any part of the code. +""" + +import functools +import numpy as np +import torch +import torch.distributed as dist + +_LOCAL_PROCESS_GROUP = None +""" +A torch process group which only includes processes that on the same machine as the current process. +This variable is set when processes are spawned by `launch()` in "engine/launch.py". +""" + + +def get_world_size() -> int: + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank() -> int: + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + return dist.get_rank() + + +def get_local_rank() -> int: + """ + Returns: + The rank of the current process within the local (per-machine) process group. + """ + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + assert ( + _LOCAL_PROCESS_GROUP is not None + ), "Local process group is not created! Please use launch() to spawn processes!" + return dist.get_rank(group=_LOCAL_PROCESS_GROUP) + + +def get_local_size() -> int: + """ + Returns: + The size of the per-machine process group, + i.e. the number of processes per machine. + """ + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size(group=_LOCAL_PROCESS_GROUP) + + +def is_main_process() -> bool: + return get_rank() == 0 + + +def synchronize(): + """ + Helper function to synchronize (barrier) among all processes when + using distributed training + """ + if not dist.is_available(): + return + if not dist.is_initialized(): + return + world_size = dist.get_world_size() + if world_size == 1: + return + if dist.get_backend() == dist.Backend.NCCL: + # This argument is needed to avoid warnings. + # It's valid only for NCCL backend. + dist.barrier(device_ids=[torch.cuda.current_device()]) + else: + dist.barrier() + + +@functools.lru_cache() +def _get_global_gloo_group(): + """ + Return a process group based on gloo backend, containing all the ranks + The result is cached. + """ + if dist.get_backend() == "nccl": + return dist.new_group(backend="gloo") + else: + return dist.group.WORLD + + +def all_gather(data, group=None): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors). + Args: + data: any picklable object + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + Returns: + list[data]: list of data gathered from each rank + """ + if get_world_size() == 1: + return [data] + if group is None: + group = ( + _get_global_gloo_group() + ) # use CPU group by default, to reduce GPU RAM usage. + world_size = dist.get_world_size(group) + if world_size == 1: + return [data] + + output = [None for _ in range(world_size)] + dist.all_gather_object(output, data, group=group) + return output + + +def gather(data, dst=0, group=None): + """ + Run gather on arbitrary picklable data (not necessarily tensors). + Args: + data: any picklable object + dst (int): destination rank + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + Returns: + list[data]: on dst, a list of data gathered from each rank. Otherwise, + an empty list. + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + world_size = dist.get_world_size(group=group) + if world_size == 1: + return [data] + rank = dist.get_rank(group=group) + + if rank == dst: + output = [None for _ in range(world_size)] + dist.gather_object(data, output, dst=dst, group=group) + return output + else: + dist.gather_object(data, None, dst=dst, group=group) + return [] + + +def shared_random_seed(): + """ + Returns: + int: a random number that is the same across all workers. + If workers need a shared RNG, they can use this shared seed to + create one. + All workers must call this function, otherwise it will deadlock. + """ + ints = np.random.randint(2**31) + all_ints = all_gather(ints) + return all_ints[0] + + +def reduce_dict(input_dict, average=True): + """ + Reduce the values in the dictionary from all processes so that process with rank + 0 has the reduced results. + Args: + input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor. + average (bool): whether to do average or sum + Returns: + a dict with the same keys as input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.reduce(values, dst=0) + if dist.get_rank() == 0 and average: + # only main process gets accumulated, so only divide by + # world_size in this case + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/utils/config.py b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/config.py new file mode 100644 index 0000000..762ebf4 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/config.py @@ -0,0 +1,694 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import ast +import copy +import os +import os.path as osp +import platform +import shutil +import sys +import tempfile +import uuid +import warnings +from argparse import Action, ArgumentParser +from collections import abc +from importlib import import_module + +from addict import Dict +from yapf.yapflib.yapf_api import FormatCode + +from .misc import import_modules_from_strings +from .path import check_file_exist + +if platform.system() == "Windows": + import regex as re +else: + import re + +BASE_KEY = "_base_" +DELETE_KEY = "_delete_" +DEPRECATION_KEY = "_deprecation_" +RESERVED_KEYS = ["filename", "text", "pretty_text"] + + +class ConfigDict(Dict): + def __missing__(self, name): + raise KeyError(name) + + def __getattr__(self, name): + try: + value = super(ConfigDict, self).__getattr__(name) + except KeyError: + ex = AttributeError( + f"'{self.__class__.__name__}' object has no " f"attribute '{name}'" + ) + except Exception as e: + ex = e + else: + return value + raise ex + + +def add_args(parser, cfg, prefix=""): + for k, v in cfg.items(): + if isinstance(v, str): + parser.add_argument("--" + prefix + k) + elif isinstance(v, int): + parser.add_argument("--" + prefix + k, type=int) + elif isinstance(v, float): + parser.add_argument("--" + prefix + k, type=float) + elif isinstance(v, bool): + parser.add_argument("--" + prefix + k, action="store_true") + elif isinstance(v, dict): + add_args(parser, v, prefix + k + ".") + elif isinstance(v, abc.Iterable): + parser.add_argument("--" + prefix + k, type=type(v[0]), nargs="+") + else: + print(f"cannot parse key {prefix + k} of type {type(v)}") + return parser + + +class Config: + """A facility for config and config files. + + It supports common file formats as configs: python/json/yaml. The interface + is the same as a dict object and also allows access config values as + attributes. + + Example: + >>> cfg = Config(dict(a=1, b=dict(b1=[0, 1]))) + >>> cfg.a + 1 + >>> cfg.b + {'b1': [0, 1]} + >>> cfg.b.b1 + [0, 1] + >>> cfg = Config.fromfile('tests/data/config/a.py') + >>> cfg.filename + "/home/kchen/projects/mmcv/tests/data/config/a.py" + >>> cfg.item4 + 'test' + >>> cfg + "Config [path: /home/kchen/projects/mmcv/tests/data/config/a.py]: " + "{'item1': [1, 2], 'item2': {'a': 0}, 'item3': True, 'item4': 'test'}" + """ + + @staticmethod + def _validate_py_syntax(filename): + with open(filename, "r", encoding="utf-8") as f: + # Setting encoding explicitly to resolve coding issue on windows + content = f.read() + try: + ast.parse(content) + except SyntaxError as e: + raise SyntaxError( + "There are syntax errors in config " f"file {filename}: {e}" + ) + + @staticmethod + def _substitute_predefined_vars(filename, temp_config_name): + file_dirname = osp.dirname(filename) + file_basename = osp.basename(filename) + file_basename_no_extension = osp.splitext(file_basename)[0] + file_extname = osp.splitext(filename)[1] + support_templates = dict( + fileDirname=file_dirname, + fileBasename=file_basename, + fileBasenameNoExtension=file_basename_no_extension, + fileExtname=file_extname, + ) + with open(filename, "r", encoding="utf-8") as f: + # Setting encoding explicitly to resolve coding issue on windows + config_file = f.read() + for key, value in support_templates.items(): + regexp = r"\{\{\s*" + str(key) + r"\s*\}\}" + value = value.replace("\\", "/") + config_file = re.sub(regexp, value, config_file) + with open(temp_config_name, "w", encoding="utf-8") as tmp_config_file: + tmp_config_file.write(config_file) + + @staticmethod + def _pre_substitute_base_vars(filename, temp_config_name): + """Substitute base variable placehoders to string, so that parsing + would work.""" + with open(filename, "r", encoding="utf-8") as f: + # Setting encoding explicitly to resolve coding issue on windows + config_file = f.read() + base_var_dict = {} + regexp = r"\{\{\s*" + BASE_KEY + r"\.([\w\.]+)\s*\}\}" + base_vars = set(re.findall(regexp, config_file)) + for base_var in base_vars: + randstr = f"_{base_var}_{uuid.uuid4().hex.lower()[:6]}" + base_var_dict[randstr] = base_var + regexp = r"\{\{\s*" + BASE_KEY + r"\." + base_var + r"\s*\}\}" + config_file = re.sub(regexp, f'"{randstr}"', config_file) + with open(temp_config_name, "w", encoding="utf-8") as tmp_config_file: + tmp_config_file.write(config_file) + return base_var_dict + + @staticmethod + def _substitute_base_vars(cfg, base_var_dict, base_cfg): + """Substitute variable strings to their actual values.""" + cfg = copy.deepcopy(cfg) + + if isinstance(cfg, dict): + for k, v in cfg.items(): + if isinstance(v, str) and v in base_var_dict: + new_v = base_cfg + for new_k in base_var_dict[v].split("."): + new_v = new_v[new_k] + cfg[k] = new_v + elif isinstance(v, (list, tuple, dict)): + cfg[k] = Config._substitute_base_vars(v, base_var_dict, base_cfg) + elif isinstance(cfg, tuple): + cfg = tuple( + Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg + ) + elif isinstance(cfg, list): + cfg = [ + Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg + ] + elif isinstance(cfg, str) and cfg in base_var_dict: + new_v = base_cfg + for new_k in base_var_dict[cfg].split("."): + new_v = new_v[new_k] + cfg = new_v + + return cfg + + @staticmethod + def _file2dict(filename, use_predefined_variables=True): + filename = osp.abspath(osp.expanduser(filename)) + check_file_exist(filename) + fileExtname = osp.splitext(filename)[1] + if fileExtname not in [".py", ".json", ".yaml", ".yml"]: + raise IOError("Only py/yml/yaml/json type are supported now!") + + with tempfile.TemporaryDirectory() as temp_config_dir: + temp_config_file = tempfile.NamedTemporaryFile( + dir=temp_config_dir, suffix=fileExtname + ) + if platform.system() == "Windows": + temp_config_file.close() + temp_config_name = osp.basename(temp_config_file.name) + # Substitute predefined variables + if use_predefined_variables: + Config._substitute_predefined_vars(filename, temp_config_file.name) + else: + shutil.copyfile(filename, temp_config_file.name) + # Substitute base variables from placeholders to strings + base_var_dict = Config._pre_substitute_base_vars( + temp_config_file.name, temp_config_file.name + ) + + if filename.endswith(".py"): + temp_module_name = osp.splitext(temp_config_name)[0] + sys.path.insert(0, temp_config_dir) + Config._validate_py_syntax(filename) + mod = import_module(temp_module_name) + sys.path.pop(0) + cfg_dict = { + name: value + for name, value in mod.__dict__.items() + if not name.startswith("__") + } + # delete imported module + del sys.modules[temp_module_name] + elif filename.endswith((".yml", ".yaml", ".json")): + raise NotImplementedError + # close temp file + temp_config_file.close() + + # check deprecation information + if DEPRECATION_KEY in cfg_dict: + deprecation_info = cfg_dict.pop(DEPRECATION_KEY) + warning_msg = ( + f"The config file {filename} will be deprecated " "in the future." + ) + if "expected" in deprecation_info: + warning_msg += f' Please use {deprecation_info["expected"]} ' "instead." + if "reference" in deprecation_info: + warning_msg += ( + " More information can be found at " + f'{deprecation_info["reference"]}' + ) + warnings.warn(warning_msg) + + cfg_text = filename + "\n" + with open(filename, "r", encoding="utf-8") as f: + # Setting encoding explicitly to resolve coding issue on windows + cfg_text += f.read() + + if BASE_KEY in cfg_dict: + cfg_dir = osp.dirname(filename) + base_filename = cfg_dict.pop(BASE_KEY) + base_filename = ( + base_filename if isinstance(base_filename, list) else [base_filename] + ) + + cfg_dict_list = list() + cfg_text_list = list() + for f in base_filename: + _cfg_dict, _cfg_text = Config._file2dict(osp.join(cfg_dir, f)) + cfg_dict_list.append(_cfg_dict) + cfg_text_list.append(_cfg_text) + + base_cfg_dict = dict() + for c in cfg_dict_list: + duplicate_keys = base_cfg_dict.keys() & c.keys() + if len(duplicate_keys) > 0: + raise KeyError( + "Duplicate key is not allowed among bases. " + f"Duplicate keys: {duplicate_keys}" + ) + base_cfg_dict.update(c) + + # Substitute base variables from strings to their actual values + cfg_dict = Config._substitute_base_vars( + cfg_dict, base_var_dict, base_cfg_dict + ) + + base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict) + cfg_dict = base_cfg_dict + + # merge cfg_text + cfg_text_list.append(cfg_text) + cfg_text = "\n".join(cfg_text_list) + + return cfg_dict, cfg_text + + @staticmethod + def _merge_a_into_b(a, b, allow_list_keys=False): + """merge dict ``a`` into dict ``b`` (non-inplace). + + Values in ``a`` will overwrite ``b``. ``b`` is copied first to avoid + in-place modifications. + + Args: + a (dict): The source dict to be merged into ``b``. + b (dict): The origin dict to be fetch keys from ``a``. + allow_list_keys (bool): If True, int string keys (e.g. '0', '1') + are allowed in source ``a`` and will replace the element of the + corresponding index in b if b is a list. Default: False. + + Returns: + dict: The modified dict of ``b`` using ``a``. + + Examples: + # Normally merge a into b. + >>> Config._merge_a_into_b( + ... dict(obj=dict(a=2)), dict(obj=dict(a=1))) + {'obj': {'a': 2}} + + # Delete b first and merge a into b. + >>> Config._merge_a_into_b( + ... dict(obj=dict(_delete_=True, a=2)), dict(obj=dict(a=1))) + {'obj': {'a': 2}} + + # b is a list + >>> Config._merge_a_into_b( + ... {'0': dict(a=2)}, [dict(a=1), dict(b=2)], True) + [{'a': 2}, {'b': 2}] + """ + b = b.copy() + for k, v in a.items(): + if allow_list_keys and k.isdigit() and isinstance(b, list): + k = int(k) + if len(b) <= k: + raise KeyError(f"Index {k} exceeds the length of list {b}") + b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) + elif isinstance(v, dict) and k in b and not v.pop(DELETE_KEY, False): + allowed_types = (dict, list) if allow_list_keys else dict + if not isinstance(b[k], allowed_types): + raise TypeError( + f"{k}={v} in child config cannot inherit from base " + f"because {k} is a dict in the child config but is of " + f"type {type(b[k])} in base config. You may set " + f"`{DELETE_KEY}=True` to ignore the base config" + ) + b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) + else: + b[k] = v + return b + + @staticmethod + def fromfile(filename, use_predefined_variables=True, import_custom_modules=True): + cfg_dict, cfg_text = Config._file2dict(filename, use_predefined_variables) + if import_custom_modules and cfg_dict.get("custom_imports", None): + import_modules_from_strings(**cfg_dict["custom_imports"]) + return Config(cfg_dict, cfg_text=cfg_text, filename=filename) + + @staticmethod + def fromstring(cfg_str, file_format): + """Generate config from config str. + + Args: + cfg_str (str): Config str. + file_format (str): Config file format corresponding to the + config str. Only py/yml/yaml/json type are supported now! + + Returns: + obj:`Config`: Config obj. + """ + if file_format not in [".py", ".json", ".yaml", ".yml"]: + raise IOError("Only py/yml/yaml/json type are supported now!") + if file_format != ".py" and "dict(" in cfg_str: + # check if users specify a wrong suffix for python + warnings.warn('Please check "file_format", the file format may be .py') + with tempfile.NamedTemporaryFile( + "w", encoding="utf-8", suffix=file_format, delete=False + ) as temp_file: + temp_file.write(cfg_str) + # on windows, previous implementation cause error + # see PR 1077 for details + cfg = Config.fromfile(temp_file.name) + os.remove(temp_file.name) + return cfg + + @staticmethod + def auto_argparser(description=None): + """Generate argparser from config file automatically (experimental)""" + partial_parser = ArgumentParser(description=description) + partial_parser.add_argument("config", help="config file path") + cfg_file = partial_parser.parse_known_args()[0].config + cfg = Config.fromfile(cfg_file) + parser = ArgumentParser(description=description) + parser.add_argument("config", help="config file path") + add_args(parser, cfg) + return parser, cfg + + def __init__(self, cfg_dict=None, cfg_text=None, filename=None): + if cfg_dict is None: + cfg_dict = dict() + elif not isinstance(cfg_dict, dict): + raise TypeError("cfg_dict must be a dict, but " f"got {type(cfg_dict)}") + for key in cfg_dict: + if key in RESERVED_KEYS: + raise KeyError(f"{key} is reserved for config file") + + super(Config, self).__setattr__("_cfg_dict", ConfigDict(cfg_dict)) + super(Config, self).__setattr__("_filename", filename) + if cfg_text: + text = cfg_text + elif filename: + with open(filename, "r") as f: + text = f.read() + else: + text = "" + super(Config, self).__setattr__("_text", text) + + @property + def filename(self): + return self._filename + + @property + def text(self): + return self._text + + @property + def pretty_text(self): + indent = 4 + + def _indent(s_, num_spaces): + s = s_.split("\n") + if len(s) == 1: + return s_ + first = s.pop(0) + s = [(num_spaces * " ") + line for line in s] + s = "\n".join(s) + s = first + "\n" + s + return s + + def _format_basic_types(k, v, use_mapping=False): + if isinstance(v, str): + v_str = f"'{v}'" + else: + v_str = str(v) + + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f"{k_str}: {v_str}" + else: + attr_str = f"{str(k)}={v_str}" + attr_str = _indent(attr_str, indent) + + return attr_str + + def _format_list(k, v, use_mapping=False): + # check if all items in the list are dict + if all(isinstance(_, dict) for _ in v): + v_str = "[\n" + v_str += "\n".join( + f"dict({_indent(_format_dict(v_), indent)})," for v_ in v + ).rstrip(",") + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f"{k_str}: {v_str}" + else: + attr_str = f"{str(k)}={v_str}" + attr_str = _indent(attr_str, indent) + "]" + else: + attr_str = _format_basic_types(k, v, use_mapping) + return attr_str + + def _contain_invalid_identifier(dict_str): + contain_invalid_identifier = False + for key_name in dict_str: + contain_invalid_identifier |= not str(key_name).isidentifier() + return contain_invalid_identifier + + def _format_dict(input_dict, outest_level=False): + r = "" + s = [] + + use_mapping = _contain_invalid_identifier(input_dict) + if use_mapping: + r += "{" + for idx, (k, v) in enumerate(input_dict.items()): + is_last = idx >= len(input_dict) - 1 + end = "" if outest_level or is_last else "," + if isinstance(v, dict): + v_str = "\n" + _format_dict(v) + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f"{k_str}: dict({v_str}" + else: + attr_str = f"{str(k)}=dict({v_str}" + attr_str = _indent(attr_str, indent) + ")" + end + elif isinstance(v, list): + attr_str = _format_list(k, v, use_mapping) + end + else: + attr_str = _format_basic_types(k, v, use_mapping) + end + + s.append(attr_str) + r += "\n".join(s) + if use_mapping: + r += "}" + return r + + cfg_dict = self._cfg_dict.to_dict() + text = _format_dict(cfg_dict, outest_level=True) + # copied from setup.cfg + yapf_style = dict( + based_on_style="pep8", + blank_line_before_nested_class_or_def=True, + split_before_expression_after_opening_paren=True, + ) + text, _ = FormatCode(text, style_config=yapf_style) + + return text + + def __repr__(self): + return f"Config (path: {self.filename}): {self._cfg_dict.__repr__()}" + + def __len__(self): + return len(self._cfg_dict) + + def __getattr__(self, name): + return getattr(self._cfg_dict, name) + + def __getitem__(self, name): + return self._cfg_dict.__getitem__(name) + + def __setattr__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setattr__(name, value) + + def __setitem__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setitem__(name, value) + + def __iter__(self): + return iter(self._cfg_dict) + + def __getstate__(self): + return (self._cfg_dict, self._filename, self._text) + + def __setstate__(self, state): + _cfg_dict, _filename, _text = state + super(Config, self).__setattr__("_cfg_dict", _cfg_dict) + super(Config, self).__setattr__("_filename", _filename) + super(Config, self).__setattr__("_text", _text) + + def dump(self, file=None): + cfg_dict = super(Config, self).__getattribute__("_cfg_dict").to_dict() + if self.filename.endswith(".py"): + if file is None: + return self.pretty_text + else: + with open(file, "w", encoding="utf-8") as f: + f.write(self.pretty_text) + else: + import mmcv + + if file is None: + file_format = self.filename.split(".")[-1] + return mmcv.dump(cfg_dict, file_format=file_format) + else: + mmcv.dump(cfg_dict, file) + + def merge_from_dict(self, options, allow_list_keys=True): + """Merge list into cfg_dict. + + Merge the dict parsed by MultipleKVAction into this cfg. + + Examples: + >>> options = {'models.backbone.depth': 50, + ... 'models.backbone.with_cp':True} + >>> cfg = Config(dict(models=dict(backbone=dict(type='ResNet')))) + >>> cfg.merge_from_dict(options) + >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + >>> assert cfg_dict == dict( + ... models=dict(backbone=dict(depth=50, with_cp=True))) + + # Merge list element + >>> cfg = Config(dict(pipeline=[ + ... dict(type='LoadImage'), dict(type='LoadAnnotations')])) + >>> options = dict(pipeline={'0': dict(type='SelfLoadImage')}) + >>> cfg.merge_from_dict(options, allow_list_keys=True) + >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + >>> assert cfg_dict == dict(pipeline=[ + ... dict(type='SelfLoadImage'), dict(type='LoadAnnotations')]) + + Args: + options (dict): dict of configs to merge from. + allow_list_keys (bool): If True, int string keys (e.g. '0', '1') + are allowed in ``options`` and will replace the element of the + corresponding index in the config if the config is a list. + Default: True. + """ + option_cfg_dict = {} + for full_key, v in options.items(): + d = option_cfg_dict + key_list = full_key.split(".") + for subkey in key_list[:-1]: + d.setdefault(subkey, ConfigDict()) + d = d[subkey] + subkey = key_list[-1] + d[subkey] = v + + cfg_dict = super(Config, self).__getattribute__("_cfg_dict") + super(Config, self).__setattr__( + "_cfg_dict", + Config._merge_a_into_b( + option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys + ), + ) + + +class DictAction(Action): + """ + argparse action to split an argument into KEY=VALUE form + on the first = and append to a dictionary. List options can + be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit + brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build + list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]' + """ + + @staticmethod + def _parse_int_float_bool(val): + try: + return int(val) + except ValueError: + pass + try: + return float(val) + except ValueError: + pass + if val.lower() in ["true", "false"]: + return True if val.lower() == "true" else False + return val + + @staticmethod + def _parse_iterable(val): + """Parse iterable values in the string. + + All elements inside '()' or '[]' are treated as iterable values. + + Args: + val (str): Value string. + + Returns: + list | tuple: The expanded list or tuple from the string. + + Examples: + >>> DictAction._parse_iterable('1,2,3') + [1, 2, 3] + >>> DictAction._parse_iterable('[a, b, c]') + ['a', 'b', 'c'] + >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]') + [(1, 2, 3), ['a', 'b'], 'c'] + """ + + def find_next_comma(string): + """Find the position of next comma in the string. + + If no ',' is found in the string, return the string length. All + chars inside '()' and '[]' are treated as one element and thus ',' + inside these brackets are ignored. + """ + assert (string.count("(") == string.count(")")) and ( + string.count("[") == string.count("]") + ), f"Imbalanced brackets exist in {string}" + end = len(string) + for idx, char in enumerate(string): + pre = string[:idx] + # The string before this ',' is balanced + if ( + (char == ",") + and (pre.count("(") == pre.count(")")) + and (pre.count("[") == pre.count("]")) + ): + end = idx + break + return end + + # Strip ' and " characters and replace whitespace. + val = val.strip("'\"").replace(" ", "") + is_tuple = False + if val.startswith("(") and val.endswith(")"): + is_tuple = True + val = val[1:-1] + elif val.startswith("[") and val.endswith("]"): + val = val[1:-1] + elif "," not in val: + # val is a single value + return DictAction._parse_int_float_bool(val) + + values = [] + while len(val) > 0: + comma_idx = find_next_comma(val) + element = DictAction._parse_iterable(val[:comma_idx]) + values.append(element) + val = val[comma_idx + 1 :] + if is_tuple: + values = tuple(values) + return values + + def __call__(self, parser, namespace, values, option_string=None): + options = {} + for kv in values: + key, val = kv.split("=", maxsplit=1) + options[key] = self._parse_iterable(val) + setattr(namespace, self.dest, options) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/utils/env.py b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/env.py new file mode 100644 index 0000000..653f007 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/env.py @@ -0,0 +1,36 @@ +""" +Environment Utils + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import random +import numpy as np +import torch +import torch.backends.cudnn as cudnn + +from datetime import datetime + + +def get_random_seed(): + seed = ( + os.getpid() + + int(datetime.now().strftime("%S%f")) + + int.from_bytes(os.urandom(2), "big") + ) + return seed + + +def set_seed(seed=None): + if seed is None: + seed = get_random_seed() + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + cudnn.benchmark = False + cudnn.deterministic = True + os.environ["PYTHONHASHSEED"] = str(seed) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/utils/events.py b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/events.py new file mode 100644 index 0000000..718ee91 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/events.py @@ -0,0 +1,612 @@ +""" +Events Utils + +Modified from Detectron2 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import datetime +import json +import logging +import os +import time +import torch +import numpy as np +import traceback +import sys + +from typing import List, Optional, Tuple +from collections import defaultdict +from contextlib import contextmanager + +__all__ = [ + "get_event_storage", + "JSONWriter", + "TensorboardXWriter", + "CommonMetricPrinter", + "EventStorage", + "ExceptionWriter", +] + +_CURRENT_STORAGE_STACK = [] + + +def get_event_storage(): + """ + Returns: + The :class:`EventStorage` object that's currently being used. + Throws an error if no :class:`EventStorage` is currently enabled. + """ + assert len( + _CURRENT_STORAGE_STACK + ), "get_event_storage() has to be called inside a 'with EventStorage(...)' context!" + return _CURRENT_STORAGE_STACK[-1] + + +class EventWriter: + """ + Base class for writers that obtain events from :class:`EventStorage` and process them. + """ + + def write(self): + raise NotImplementedError + + def close(self): + pass + + +class JSONWriter(EventWriter): + """ + Write scalars to a json file. + It saves scalars as one json per line (instead of a big json) for easy parsing. + Examples parsing such a json file: + :: + $ cat metrics.json | jq -s '.[0:2]' + [ + { + "data_time": 0.008433341979980469, + "iteration": 19, + "loss": 1.9228371381759644, + "loss_box_reg": 0.050025828182697296, + "loss_classifier": 0.5316952466964722, + "loss_mask": 0.7236229181289673, + "loss_rpn_box": 0.0856662318110466, + "loss_rpn_cls": 0.48198649287223816, + "lr": 0.007173333333333333, + "time": 0.25401854515075684 + }, + { + "data_time": 0.007216215133666992, + "iteration": 39, + "loss": 1.282649278640747, + "loss_box_reg": 0.06222952902317047, + "loss_classifier": 0.30682939291000366, + "loss_mask": 0.6970193982124329, + "loss_rpn_box": 0.038663312792778015, + "loss_rpn_cls": 0.1471673548221588, + "lr": 0.007706666666666667, + "time": 0.2490077018737793 + } + ] + $ cat metrics.json | jq '.loss_mask' + 0.7126231789588928 + 0.689423680305481 + 0.6776131987571716 + ... + """ + + def __init__(self, json_file, window_size=20): + """ + Args: + json_file (str): path to the json file. New data will be appended if the file exists. + window_size (int): the window size of median smoothing for the scalars whose + `smoothing_hint` are True. + """ + self._file_handle = open(json_file, "a") + self._window_size = window_size + self._last_write = -1 + + def write(self): + storage = get_event_storage() + to_save = defaultdict(dict) + + for k, (v, iter) in storage.latest_with_smoothing_hint( + self._window_size + ).items(): + # keep scalars that have not been written + if iter <= self._last_write: + continue + to_save[iter][k] = v + if len(to_save): + all_iters = sorted(to_save.keys()) + self._last_write = max(all_iters) + + for itr, scalars_per_iter in to_save.items(): + scalars_per_iter["iteration"] = itr + self._file_handle.write(json.dumps(scalars_per_iter, sort_keys=True) + "\n") + self._file_handle.flush() + try: + os.fsync(self._file_handle.fileno()) + except AttributeError: + pass + + def close(self): + self._file_handle.close() + + +class TensorboardXWriter(EventWriter): + """ + Write all scalars to a tensorboard file. + """ + + def __init__(self, log_dir: str, window_size: int = 20, **kwargs): + """ + Args: + log_dir (str): the directory to save the output events + window_size (int): the scalars will be median-smoothed by this window size + kwargs: other arguments passed to `torch.utils.tensorboard.SummaryWriter(...)` + """ + self._window_size = window_size + from torch.utils.tensorboard import SummaryWriter + + self._writer = SummaryWriter(log_dir, **kwargs) + self._last_write = -1 + + def write(self): + storage = get_event_storage() + new_last_write = self._last_write + for k, (v, iter) in storage.latest_with_smoothing_hint( + self._window_size + ).items(): + if iter > self._last_write: + self._writer.add_scalar(k, v, iter) + new_last_write = max(new_last_write, iter) + self._last_write = new_last_write + + # storage.put_{image,histogram} is only meant to be used by + # tensorboard writer. So we access its internal fields directly from here. + if len(storage._vis_data) >= 1: + for img_name, img, step_num in storage._vis_data: + self._writer.add_image(img_name, img, step_num) + # Storage stores all image data and rely on this writer to clear them. + # As a result it assumes only one writer will use its image data. + # An alternative design is to let storage store limited recent + # data (e.g. only the most recent image) that all writers can access. + # In that case a writer may not see all image data if its period is long. + storage.clear_images() + + if len(storage._histograms) >= 1: + for params in storage._histograms: + self._writer.add_histogram_raw(**params) + storage.clear_histograms() + + def close(self): + if hasattr(self, "_writer"): # doesn't exist when the code fails at import + self._writer.close() + + +class CommonMetricPrinter(EventWriter): + """ + Print **common** metrics to the terminal, including + iteration time, ETA, memory, all losses, and the learning rate. + It also applies smoothing using a window of 20 elements. + It's meant to print common metrics in common ways. + To print something in more customized ways, please implement a similar printer by yourself. + """ + + def __init__(self, max_iter: Optional[int] = None, window_size: int = 20): + """ + Args: + max_iter: the maximum number of iterations to train. + Used to compute ETA. If not given, ETA will not be printed. + window_size (int): the losses will be median-smoothed by this window size + """ + self.logger = logging.getLogger(__name__) + self._max_iter = max_iter + self._window_size = window_size + self._last_write = ( + None # (step, time) of last call to write(). Used to compute ETA + ) + + def _get_eta(self, storage) -> Optional[str]: + if self._max_iter is None: + return "" + iteration = storage.iter + try: + eta_seconds = storage.history("time").median(1000) * ( + self._max_iter - iteration - 1 + ) + storage.put_scalar("eta_seconds", eta_seconds, smoothing_hint=False) + return str(datetime.timedelta(seconds=int(eta_seconds))) + except KeyError: + # estimate eta on our own - more noisy + eta_string = None + if self._last_write is not None: + estimate_iter_time = (time.perf_counter() - self._last_write[1]) / ( + iteration - self._last_write[0] + ) + eta_seconds = estimate_iter_time * (self._max_iter - iteration - 1) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + self._last_write = (iteration, time.perf_counter()) + return eta_string + + def write(self): + storage = get_event_storage() + iteration = storage.iter + if iteration == self._max_iter: + # This hook only reports training progress (loss, ETA, etc) but not other data, + # therefore do not write anything after training succeeds, even if this method + # is called. + return + + try: + data_time = storage.history("data_time").avg(20) + except KeyError: + # they may not exist in the first few iterations (due to warmup) + # or when SimpleTrainer is not used + data_time = None + try: + iter_time = storage.history("time").global_avg() + except KeyError: + iter_time = None + try: + lr = "{:.5g}".format(storage.history("lr").latest()) + except KeyError: + lr = "N/A" + + eta_string = self._get_eta(storage) + + if torch.cuda.is_available(): + max_mem_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0 + else: + max_mem_mb = None + + # NOTE: max_mem is parsed by grep in "dev/parse_results.sh" + self.logger.info( + " {eta}iter: {iter} {losses} {time}{data_time}lr: {lr} {memory}".format( + eta=f"eta: {eta_string} " if eta_string else "", + iter=iteration, + losses=" ".join( + [ + "{}: {:.4g}".format(k, v.median(self._window_size)) + for k, v in storage.histories().items() + if "loss" in k + ] + ), + time=( + "time: {:.4f} ".format(iter_time) if iter_time is not None else "" + ), + data_time=( + "data_time: {:.4f} ".format(data_time) + if data_time is not None + else "" + ), + lr=lr, + memory=( + "max_mem: {:.0f}M".format(max_mem_mb) + if max_mem_mb is not None + else "" + ), + ) + ) + + +class EventStorage: + """ + The user-facing class that provides metric storage functionalities. + In the future we may add support for storing / logging other types of data if needed. + """ + + def __init__(self, start_iter=0): + """ + Args: + start_iter (int): the iteration number to start with + """ + self._history = defaultdict(AverageMeter) + self._smoothing_hints = {} + self._latest_scalars = {} + self._iter = start_iter + self._current_prefix = "" + self._vis_data = [] + self._histograms = [] + + # def put_image(self, img_name, img_tensor): + # """ + # Add an `img_tensor` associated with `img_name`, to be shown on + # tensorboard. + # Args: + # img_name (str): The name of the image to put into tensorboard. + # img_tensor (torch.Tensor or numpy.array): An `uint8` or `float` + # Tensor of shape `[channel, height, width]` where `channel` is + # 3. The image format should be RGB. The elements in img_tensor + # can either have values in [0, 1] (float32) or [0, 255] (uint8). + # The `img_tensor` will be visualized in tensorboard. + # """ + # self._vis_data.append((img_name, img_tensor, self._iter)) + + def put_scalar(self, name, value, n=1, smoothing_hint=False): + """ + Add a scalar `value` to the `HistoryBuffer` associated with `name`. + Args: + smoothing_hint (bool): a 'hint' on whether this scalar is noisy and should be + smoothed when logged. The hint will be accessible through + :meth:`EventStorage.smoothing_hints`. A writer may ignore the hint + and apply custom smoothing rule. + It defaults to True because most scalars we save need to be smoothed to + provide any useful signal. + """ + name = self._current_prefix + name + history = self._history[name] + history.update(value, n) + self._latest_scalars[name] = (value, self._iter) + + existing_hint = self._smoothing_hints.get(name) + if existing_hint is not None: + assert ( + existing_hint == smoothing_hint + ), "Scalar {} was put with a different smoothing_hint!".format(name) + else: + self._smoothing_hints[name] = smoothing_hint + + # def put_scalars(self, *, smoothing_hint=True, **kwargs): + # """ + # Put multiple scalars from keyword arguments. + # Examples: + # storage.put_scalars(loss=my_loss, accuracy=my_accuracy, smoothing_hint=True) + # """ + # for k, v in kwargs.items(): + # self.put_scalar(k, v, smoothing_hint=smoothing_hint) + # + # def put_histogram(self, hist_name, hist_tensor, bins=1000): + # """ + # Create a histogram from a tensor. + # Args: + # hist_name (str): The name of the histogram to put into tensorboard. + # hist_tensor (torch.Tensor): A Tensor of arbitrary shape to be converted + # into a histogram. + # bins (int): Number of histogram bins. + # """ + # ht_min, ht_max = hist_tensor.min().item(), hist_tensor.max().item() + # + # # Create a histogram with PyTorch + # hist_counts = torch.histc(hist_tensor, bins=bins) + # hist_edges = torch.linspace(start=ht_min, end=ht_max, steps=bins + 1, dtype=torch.float32) + # + # # Parameter for the add_histogram_raw function of SummaryWriter + # hist_params = dict( + # tag=hist_name, + # min=ht_min, + # max=ht_max, + # num=len(hist_tensor), + # sum=float(hist_tensor.sum()), + # sum_squares=float(torch.sum(hist_tensor**2)), + # bucket_limits=hist_edges[1:].tolist(), + # bucket_counts=hist_counts.tolist(), + # global_step=self._iter, + # ) + # self._histograms.append(hist_params) + + def history(self, name): + """ + Returns: + AverageMeter: the history for name + """ + ret = self._history.get(name, None) + if ret is None: + raise KeyError("No history metric available for {}!".format(name)) + return ret + + def histories(self): + """ + Returns: + dict[name -> HistoryBuffer]: the HistoryBuffer for all scalars + """ + return self._history + + def latest(self): + """ + Returns: + dict[str -> (float, int)]: mapping from the name of each scalar to the most + recent value and the iteration number its added. + """ + return self._latest_scalars + + def latest_with_smoothing_hint(self, window_size=20): + """ + Similar to :meth:`latest`, but the returned values + are either the un-smoothed original latest value, + or a median of the given window_size, + depend on whether the smoothing_hint is True. + This provides a default behavior that other writers can use. + """ + result = {} + for k, (v, itr) in self._latest_scalars.items(): + result[k] = ( + self._history[k].median(window_size) if self._smoothing_hints[k] else v, + itr, + ) + return result + + def smoothing_hints(self): + """ + Returns: + dict[name -> bool]: the user-provided hint on whether the scalar + is noisy and needs smoothing. + """ + return self._smoothing_hints + + def step(self): + """ + User should either: (1) Call this function to increment storage.iter when needed. Or + (2) Set `storage.iter` to the correct iteration number before each iteration. + The storage will then be able to associate the new data with an iteration number. + """ + self._iter += 1 + + @property + def iter(self): + """ + Returns: + int: The current iteration number. When used together with a trainer, + this is ensured to be the same as trainer.iter. + """ + return self._iter + + @iter.setter + def iter(self, val): + self._iter = int(val) + + @property + def iteration(self): + # for backward compatibility + return self._iter + + def __enter__(self): + _CURRENT_STORAGE_STACK.append(self) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + assert _CURRENT_STORAGE_STACK[-1] == self + _CURRENT_STORAGE_STACK.pop() + + @contextmanager + def name_scope(self, name): + """ + Yields: + A context within which all the events added to this storage + will be prefixed by the name scope. + """ + old_prefix = self._current_prefix + self._current_prefix = name.rstrip("/") + "/" + yield + self._current_prefix = old_prefix + + def clear_images(self): + """ + Delete all the stored images for visualization. This should be called + after images are written to tensorboard. + """ + self._vis_data = [] + + def clear_histograms(self): + """ + Delete all the stored histograms for visualization. + This should be called after histograms are written to tensorboard. + """ + self._histograms = [] + + def reset_history(self, name): + ret = self._history.get(name, None) + if ret is None: + raise KeyError("No history metric available for {}!".format(name)) + ret.reset() + + def reset_histories(self): + for name in self._history.keys(): + self._history[name].reset() + + +class AverageMeter: + """Computes and stores the average and current value""" + + def __init__(self): + self.val = 0 + self.avg = 0 + self.total = 0 + self.count = 0 + + def reset(self): + self.val = 0 + self.avg = 0 + self.total = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.total += val * n + self.count += n + self.avg = self.total / self.count + + +class HistoryBuffer: + """ + Track a series of scalar values and provide access to smoothed values over a + window or the global average of the series. + """ + + def __init__(self, max_length: int = 1000000) -> None: + """ + Args: + max_length: maximal number of values that can be stored in the + buffer. When the capacity of the buffer is exhausted, old + values will be removed. + """ + self._max_length: int = max_length + self._data: List[Tuple[float, float]] = [] # (value, iteration) pairs + self._count: int = 0 + self._global_avg: float = 0 + + def update(self, value: float, iteration: Optional[float] = None) -> None: + """ + Add a new scalar value produced at certain iteration. If the length + of the buffer exceeds self._max_length, the oldest element will be + removed from the buffer. + """ + if iteration is None: + iteration = self._count + if len(self._data) == self._max_length: + self._data.pop(0) + self._data.append((value, iteration)) + + self._count += 1 + self._global_avg += (value - self._global_avg) / self._count + + def latest(self) -> float: + """ + Return the latest scalar value added to the buffer. + """ + return self._data[-1][0] + + def median(self, window_size: int) -> float: + """ + Return the median of the latest `window_size` values in the buffer. + """ + return np.median([x[0] for x in self._data[-window_size:]]) + + def avg(self, window_size: int) -> float: + """ + Return the mean of the latest `window_size` values in the buffer. + """ + return np.mean([x[0] for x in self._data[-window_size:]]) + + def global_avg(self) -> float: + """ + Return the mean of all the elements in the buffer. Note that this + includes those getting removed due to limited buffer storage. + """ + return self._global_avg + + def values(self) -> List[Tuple[float, float]]: + """ + Returns: + list[(number, iteration)]: content of the current buffer. + """ + return self._data + + +class ExceptionWriter: + + def __init__(self): + self.logger = logging.getLogger(__name__) + + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + if exc_type: + tb = traceback.format_exception(exc_type, exc_val, exc_tb) + formatted_tb_str = "".join(tb) + self.logger.error(formatted_tb_str) + sys.exit(1) # This prevents double logging the error to the console diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/utils/misc.py b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/misc.py new file mode 100644 index 0000000..3177bae --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/misc.py @@ -0,0 +1,164 @@ +""" +Misc + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import warnings +from collections import abc +import numpy as np +import torch +from importlib import import_module + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +def intersection_and_union(output, target, K, ignore_index=-1): + # 'K' classes, output and target sizes are N or N * L or N * H * W, each value in range 0 to K - 1. + assert output.ndim in [1, 2, 3] + assert output.shape == target.shape + output = output.reshape(output.size).copy() + target = target.reshape(target.size) + output[np.where(target == ignore_index)[0]] = ignore_index + intersection = output[np.where(output == target)[0]] + area_intersection, _ = np.histogram(intersection, bins=np.arange(K + 1)) + area_output, _ = np.histogram(output, bins=np.arange(K + 1)) + area_target, _ = np.histogram(target, bins=np.arange(K + 1)) + area_union = area_output + area_target - area_intersection + return area_intersection, area_union, area_target + + +def intersection_and_union_gpu(output, target, k, ignore_index=-1): + # 'K' classes, output and target sizes are N or N * L or N * H * W, each value in range 0 to K - 1. + assert output.dim() in [1, 2, 3] + assert output.shape == target.shape + output = output.view(-1) + target = target.view(-1) + output[target == ignore_index] = ignore_index + intersection = output[output == target] + area_intersection = torch.histc(intersection, bins=k, min=0, max=k - 1) + area_output = torch.histc(output, bins=k, min=0, max=k - 1) + area_target = torch.histc(target, bins=k, min=0, max=k - 1) + area_union = area_output + area_target - area_intersection + return area_intersection, area_union, area_target + + +def make_dirs(dir_name): + if not os.path.exists(dir_name): + os.makedirs(dir_name, exist_ok=True) + + +def find_free_port(): + import socket + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + # Binding to port 0 will cause the OS to find an available port for us + sock.bind(("", 0)) + port = sock.getsockname()[1] + sock.close() + # NOTE: there is still a chance the port could be taken by other processes. + return port + + +def is_seq_of(seq, expected_type, seq_type=None): + """Check whether it is a sequence of some type. + + Args: + seq (Sequence): The sequence to be checked. + expected_type (type): Expected type of sequence items. + seq_type (type, optional): Expected sequence type. + + Returns: + bool: Whether the sequence is valid. + """ + if seq_type is None: + exp_seq_type = abc.Sequence + else: + assert isinstance(seq_type, type) + exp_seq_type = seq_type + if not isinstance(seq, exp_seq_type): + return False + for item in seq: + if not isinstance(item, expected_type): + return False + return True + + +def is_str(x): + """Whether the input is an string instance. + + Note: This method is deprecated since python 2 is no longer supported. + """ + return isinstance(x, str) + + +def import_modules_from_strings(imports, allow_failed_imports=False): + """Import modules from the given list of strings. + + Args: + imports (list | str | None): The given module names to be imported. + allow_failed_imports (bool): If True, the failed imports will return + None. Otherwise, an ImportError is raise. Default: False. + + Returns: + list[module] | module | None: The imported modules. + + Examples: + >>> osp, sys = import_modules_from_strings( + ... ['os.path', 'sys']) + >>> import os.path as osp_ + >>> import sys as sys_ + >>> assert osp == osp_ + >>> assert sys == sys_ + """ + if not imports: + return + single_import = False + if isinstance(imports, str): + single_import = True + imports = [imports] + if not isinstance(imports, list): + raise TypeError(f"custom_imports must be a list but got type {type(imports)}") + imported = [] + for imp in imports: + if not isinstance(imp, str): + raise TypeError(f"{imp} is of type {type(imp)} and cannot be imported.") + try: + imported_tmp = import_module(imp) + except ImportError: + if allow_failed_imports: + warnings.warn(f"{imp} failed to import and is ignored.", UserWarning) + imported_tmp = None + else: + raise ImportError + imported.append(imported_tmp) + if single_import: + imported = imported[0] + return imported + + +class DummyClass: + def __init__(self): + pass diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/utils/optimizer.py b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/optimizer.py new file mode 100644 index 0000000..eef8735 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/optimizer.py @@ -0,0 +1,57 @@ +""" +Optimizer + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import copy +import torch +from pointcept.utils.logger import get_root_logger +from pointcept.utils.registry import Registry + +OPTIMIZERS = Registry("optimizers") + + +OPTIMIZERS.register_module(module=torch.optim.SGD, name="SGD") +OPTIMIZERS.register_module(module=torch.optim.Adam, name="Adam") +OPTIMIZERS.register_module(module=torch.optim.AdamW, name="AdamW") + + +def build_optimizer(cfg, model, param_dicts=None): + cfg = copy.deepcopy(cfg) + if param_dicts is None: + cfg.params = model.parameters() + else: + cfg.params = [dict(names=[], params=[], lr=cfg.lr)] + for i in range(len(param_dicts)): + param_group = dict(names=[], params=[]) + if "lr" in param_dicts[i].keys(): + param_group["lr"] = param_dicts[i].lr + if "momentum" in param_dicts[i].keys(): + param_group["momentum"] = param_dicts[i].momentum + if "weight_decay" in param_dicts[i].keys(): + param_group["weight_decay"] = param_dicts[i].weight_decay + cfg.params.append(param_group) + + for n, p in model.named_parameters(): + flag = False + for i in range(len(param_dicts)): + if param_dicts[i].keyword in n: + cfg.params[i + 1]["names"].append(n) + cfg.params[i + 1]["params"].append(p) + flag = True + break + if not flag: + cfg.params[0]["names"].append(n) + cfg.params[0]["params"].append(p) + + logger = get_root_logger() + for i in range(len(cfg.params)): + param_names = cfg.params[i].pop("names") + message = "" + for key in cfg.params[i].keys(): + if key != "params": + message += f" {key}: {cfg.params[i][key]};" + logger.info(f"Params Group {i+1} -{message} Params: {param_names}.") + return OPTIMIZERS.build(cfg=cfg) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/utils/path.py b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/path.py new file mode 100644 index 0000000..ce98fa5 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/path.py @@ -0,0 +1,103 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import os.path as osp +from pathlib import Path + +from .misc import is_str + + +def is_filepath(x): + return is_str(x) or isinstance(x, Path) + + +def fopen(filepath, *args, **kwargs): + if is_str(filepath): + return open(filepath, *args, **kwargs) + elif isinstance(filepath, Path): + return filepath.open(*args, **kwargs) + raise ValueError("`filepath` should be a string or a Path") + + +def check_file_exist(filename, msg_tmpl='file "{}" does not exist'): + if not osp.isfile(filename): + raise FileNotFoundError(msg_tmpl.format(filename)) + + +def mkdir_or_exist(dir_name, mode=0o777): + if dir_name == "": + return + dir_name = osp.expanduser(dir_name) + os.makedirs(dir_name, mode=mode, exist_ok=True) + + +def symlink(src, dst, overwrite=True, **kwargs): + if os.path.lexists(dst) and overwrite: + os.remove(dst) + os.symlink(src, dst, **kwargs) + + +def scandir(dir_path, suffix=None, recursive=False, case_sensitive=True): + """Scan a directory to find the interested files. + + Args: + dir_path (str | obj:`Path`): Path of the directory. + suffix (str | tuple(str), optional): File suffix that we are + interested in. Default: None. + recursive (bool, optional): If set to True, recursively scan the + directory. Default: False. + case_sensitive (bool, optional) : If set to False, ignore the case of + suffix. Default: True. + + Returns: + A generator for all the interested files with relative paths. + """ + if isinstance(dir_path, (str, Path)): + dir_path = str(dir_path) + else: + raise TypeError('"dir_path" must be a string or Path object') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('"suffix" must be a string or tuple of strings') + + if suffix is not None and not case_sensitive: + suffix = ( + suffix.lower() + if isinstance(suffix, str) + else tuple(item.lower() for item in suffix) + ) + + root = dir_path + + def _scandir(dir_path, suffix, recursive, case_sensitive): + for entry in os.scandir(dir_path): + if not entry.name.startswith(".") and entry.is_file(): + rel_path = osp.relpath(entry.path, root) + _rel_path = rel_path if case_sensitive else rel_path.lower() + if suffix is None or _rel_path.endswith(suffix): + yield rel_path + elif recursive and os.path.isdir(entry.path): + # scan recursively if entry.path is a directory + yield from _scandir(entry.path, suffix, recursive, case_sensitive) + + return _scandir(dir_path, suffix, recursive, case_sensitive) + + +def find_vcs_root(path, markers=(".git",)): + """Finds the root directory (including itself) of specified markers. + + Args: + path (str): Path of directory or file. + markers (list[str], optional): List of file or directory names. + + Returns: + The directory contained one of the markers or None if not found. + """ + if osp.isfile(path): + path = osp.dirname(path) + + prev, cur = None, osp.abspath(osp.expanduser(path)) + while cur != prev: + if any(osp.exists(osp.join(cur, marker)) for marker in markers): + return cur + prev, cur = cur, osp.split(cur)[0] + return None diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/utils/registry.py b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/registry.py new file mode 100644 index 0000000..7ac308a --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/registry.py @@ -0,0 +1,316 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect +import warnings +from functools import partial + +from .misc import is_seq_of + + +def build_from_cfg(cfg, registry, default_args=None): + """Build a module from configs dict. + + Args: + cfg (dict): Config dict. It should at least contain the key "type". + registry (:obj:`Registry`): The registry to search the type from. + default_args (dict, optional): Default initialization arguments. + + Returns: + object: The constructed object. + """ + if not isinstance(cfg, dict): + raise TypeError(f"cfg must be a dict, but got {type(cfg)}") + if "type" not in cfg: + if default_args is None or "type" not in default_args: + raise KeyError( + '`cfg` or `default_args` must contain the key "type", ' + f"but got {cfg}\n{default_args}" + ) + if not isinstance(registry, Registry): + raise TypeError( + "registry must be an mmcv.Registry object, " f"but got {type(registry)}" + ) + if not (isinstance(default_args, dict) or default_args is None): + raise TypeError( + "default_args must be a dict or None, " f"but got {type(default_args)}" + ) + + args = cfg.copy() + + if default_args is not None: + for name, value in default_args.items(): + args.setdefault(name, value) + + obj_type = args.pop("type") + if isinstance(obj_type, str): + obj_cls = registry.get(obj_type) + if obj_cls is None: + raise KeyError(f"{obj_type} is not in the {registry.name} registry") + elif inspect.isclass(obj_type): + obj_cls = obj_type + else: + raise TypeError(f"type must be a str or valid type, but got {type(obj_type)}") + try: + return obj_cls(**args) + except Exception as e: + # Normal TypeError does not print class name. + raise type(e)(f"{obj_cls.__name__}: {e}") + + +class Registry: + """A registry to map strings to classes. + + Registered object could be built from registry. + Example: + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + >>> resnet = MODELS.build(dict(type='ResNet')) + + Please refer to + https://mmcv.readthedocs.io/en/latest/understand_mmcv/registry.html for + advanced usage. + + Args: + name (str): Registry name. + build_func(func, optional): Build function to construct instance from + Registry, func:`build_from_cfg` is used if neither ``parent`` or + ``build_func`` is specified. If ``parent`` is specified and + ``build_func`` is not given, ``build_func`` will be inherited + from ``parent``. Default: None. + parent (Registry, optional): Parent registry. The class registered in + children registry could be built from parent. Default: None. + scope (str, optional): The scope of registry. It is the key to search + for children registry. If not specified, scope will be the name of + the package where class is defined, e.g. mmdet, mmcls, mmseg. + Default: None. + """ + + def __init__(self, name, build_func=None, parent=None, scope=None): + self._name = name + self._module_dict = dict() + self._children = dict() + self._scope = self.infer_scope() if scope is None else scope + + # self.build_func will be set with the following priority: + # 1. build_func + # 2. parent.build_func + # 3. build_from_cfg + if build_func is None: + if parent is not None: + self.build_func = parent.build_func + else: + self.build_func = build_from_cfg + else: + self.build_func = build_func + if parent is not None: + assert isinstance(parent, Registry) + parent._add_children(self) + self.parent = parent + else: + self.parent = None + + def __len__(self): + return len(self._module_dict) + + def __contains__(self, key): + return self.get(key) is not None + + def __repr__(self): + format_str = ( + self.__class__.__name__ + f"(name={self._name}, " + f"items={self._module_dict})" + ) + return format_str + + @staticmethod + def infer_scope(): + """Infer the scope of registry. + + The name of the package where registry is defined will be returned. + + Example: + # in mmdet/models/backbone/resnet.py + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + The scope of ``ResNet`` will be ``mmdet``. + + + Returns: + scope (str): The inferred scope name. + """ + # inspect.stack() trace where this function is called, the index-2 + # indicates the frame where `infer_scope()` is called + filename = inspect.getmodule(inspect.stack()[2][0]).__name__ + split_filename = filename.split(".") + return split_filename[0] + + @staticmethod + def split_scope_key(key): + """Split scope and key. + + The first scope will be split from key. + + Examples: + >>> Registry.split_scope_key('mmdet.ResNet') + 'mmdet', 'ResNet' + >>> Registry.split_scope_key('ResNet') + None, 'ResNet' + + Return: + scope (str, None): The first scope. + key (str): The remaining key. + """ + split_index = key.find(".") + if split_index != -1: + return key[:split_index], key[split_index + 1 :] + else: + return None, key + + @property + def name(self): + return self._name + + @property + def scope(self): + return self._scope + + @property + def module_dict(self): + return self._module_dict + + @property + def children(self): + return self._children + + def get(self, key): + """Get the registry record. + + Args: + key (str): The class name in string format. + + Returns: + class: The corresponding class. + """ + scope, real_key = self.split_scope_key(key) + if scope is None or scope == self._scope: + # get from self + if real_key in self._module_dict: + return self._module_dict[real_key] + else: + # get from self._children + if scope in self._children: + return self._children[scope].get(real_key) + else: + # goto root + parent = self.parent + while parent.parent is not None: + parent = parent.parent + return parent.get(key) + + def build(self, *args, **kwargs): + return self.build_func(*args, **kwargs, registry=self) + + def _add_children(self, registry): + """Add children for a registry. + + The ``registry`` will be added as children based on its scope. + The parent registry could build objects from children registry. + + Example: + >>> models = Registry('models') + >>> mmdet_models = Registry('models', parent=models) + >>> @mmdet_models.register_module() + >>> class ResNet: + >>> pass + >>> resnet = models.build(dict(type='mmdet.ResNet')) + """ + + assert isinstance(registry, Registry) + assert registry.scope is not None + assert ( + registry.scope not in self.children + ), f"scope {registry.scope} exists in {self.name} registry" + self.children[registry.scope] = registry + + def _register_module(self, module_class, module_name=None, force=False): + if not inspect.isclass(module_class): + raise TypeError("module must be a class, " f"but got {type(module_class)}") + + if module_name is None: + module_name = module_class.__name__ + if isinstance(module_name, str): + module_name = [module_name] + for name in module_name: + if not force and name in self._module_dict: + raise KeyError(f"{name} is already registered " f"in {self.name}") + self._module_dict[name] = module_class + + def deprecated_register_module(self, cls=None, force=False): + warnings.warn( + "The old API of register_module(module, force=False) " + "is deprecated and will be removed, please use the new API " + "register_module(name=None, force=False, module=None) instead." + ) + if cls is None: + return partial(self.deprecated_register_module, force=force) + self._register_module(cls, force=force) + return cls + + def register_module(self, name=None, force=False, module=None): + """Register a module. + + A record will be added to `self._module_dict`, whose key is the class + name or the specified name, and value is the class itself. + It can be used as a decorator or a normal function. + + Example: + >>> backbones = Registry('backbone') + >>> @backbones.register_module() + >>> class ResNet: + >>> pass + + >>> backbones = Registry('backbone') + >>> @backbones.register_module(name='mnet') + >>> class MobileNet: + >>> pass + + >>> backbones = Registry('backbone') + >>> class ResNet: + >>> pass + >>> backbones.register_module(ResNet) + + Args: + name (str | None): The module name to be registered. If not + specified, the class name will be used. + force (bool, optional): Whether to override an existing class with + the same name. Default: False. + module (type): Module class to be registered. + """ + if not isinstance(force, bool): + raise TypeError(f"force must be a boolean, but got {type(force)}") + # NOTE: This is a walkaround to be compatible with the old api, + # while it may introduce unexpected bugs. + if isinstance(name, type): + return self.deprecated_register_module(name, force=force) + + # raise the error ahead of time + if not (name is None or isinstance(name, str) or is_seq_of(name, str)): + raise TypeError( + "name must be either of None, an instance of str or a sequence" + f" of str, but got {type(name)}" + ) + + # use it as a normal method: x.register_module(module=SomeClass) + if module is not None: + self._register_module(module_class=module, module_name=name, force=force) + return module + + # use it as a decorator: @x.register_module() + def _register(cls): + self._register_module(module_class=cls, module_name=name, force=force) + return cls + + return _register diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/utils/scheduler.py b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/scheduler.py new file mode 100644 index 0000000..e57084f --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/scheduler.py @@ -0,0 +1,205 @@ +""" +Scheduler + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import copy +import numpy as np +import torch.optim.lr_scheduler as lr_scheduler +from .registry import Registry + +SCHEDULERS = Registry("schedulers") + + +@SCHEDULERS.register_module() +class MultiStepLR(lr_scheduler.MultiStepLR): + def __init__( + self, + optimizer, + milestones, + total_steps, + gamma=0.1, + last_epoch=-1, + ): + super().__init__( + optimizer=optimizer, + milestones=[int(rate * total_steps) for rate in milestones], + gamma=gamma, + last_epoch=last_epoch, + ) + + +@SCHEDULERS.register_module() +class MultiStepWithWarmupLR(lr_scheduler.LambdaLR): + def __init__( + self, + optimizer, + milestones, + total_steps, + gamma=0.1, + warmup_rate=0.05, + warmup_scale=1e-6, + last_epoch=-1, + ): + milestones = [rate * total_steps for rate in milestones] + + def multi_step_with_warmup(s): + factor = 1.0 + for i in range(len(milestones)): + if s < milestones[i]: + break + factor *= gamma + + if s <= warmup_rate * total_steps: + warmup_coefficient = 1 - (1 - s / warmup_rate / total_steps) * ( + 1 - warmup_scale + ) + else: + warmup_coefficient = 1.0 + return warmup_coefficient * factor + + super().__init__( + optimizer=optimizer, + lr_lambda=multi_step_with_warmup, + last_epoch=last_epoch, + ) + + +@SCHEDULERS.register_module() +class PolyLR(lr_scheduler.LambdaLR): + def __init__( + self, + optimizer, + total_steps, + power=0.9, + last_epoch=-1, + ): + super().__init__( + optimizer=optimizer, + lr_lambda=lambda s: (1 - s / (total_steps + 1)) ** power, + last_epoch=last_epoch, + ) + + +@SCHEDULERS.register_module() +class ExpLR(lr_scheduler.LambdaLR): + def __init__( + self, + optimizer, + total_steps, + gamma=0.9, + last_epoch=-1, + ): + super().__init__( + optimizer=optimizer, + lr_lambda=lambda s: gamma ** (s / total_steps), + last_epoch=last_epoch, + ) + + +@SCHEDULERS.register_module() +class CosineAnnealingLR(lr_scheduler.CosineAnnealingLR): + def __init__( + self, + optimizer, + total_steps, + eta_min=0, + last_epoch=-1, + ): + super().__init__( + optimizer=optimizer, + T_max=total_steps, + eta_min=eta_min, + last_epoch=last_epoch, + ) + + +@SCHEDULERS.register_module() +class OneCycleLR(lr_scheduler.OneCycleLR): + r""" + torch.optim.lr_scheduler.OneCycleLR, Block total_steps + """ + + def __init__( + self, + optimizer, + max_lr, + total_steps=None, + pct_start=0.3, + anneal_strategy="cos", + cycle_momentum=True, + base_momentum=0.85, + max_momentum=0.95, + div_factor=25.0, + final_div_factor=1e4, + three_phase=False, + last_epoch=-1, + ): + super().__init__( + optimizer=optimizer, + max_lr=max_lr, + total_steps=total_steps, + pct_start=pct_start, + anneal_strategy=anneal_strategy, + cycle_momentum=cycle_momentum, + base_momentum=base_momentum, + max_momentum=max_momentum, + div_factor=div_factor, + final_div_factor=final_div_factor, + three_phase=three_phase, + last_epoch=last_epoch, + ) + + +class CosineScheduler(object): + def __init__( + self, + base_value, + final_value, + total_iters, + start_value=0, + warmup_iters=0, + freeze_value=None, + freeze_iters=0, + ): + self.base_value = base_value + self.final_value = final_value + self.total_iters = total_iters + + warmup_schedule = np.linspace(start_value, base_value, warmup_iters) + + if freeze_value is None: + freeze_value = final_value + freeze_schedule = np.ones(freeze_iters) * freeze_value + + iters = np.arange(total_iters - warmup_iters - freeze_iters) + schedule = final_value + 0.5 * (base_value - final_value) * ( + 1 + np.cos(np.pi * iters / len(iters)) + ) + self.schedule = np.concatenate((warmup_schedule, schedule, freeze_schedule)) + self.iter = 0 + + def get(self, it): + if it >= self.total_iters: + return self.final_value + else: + return self.schedule[it] + + def step(self): + value = self.get(self.iter) + self.iter += 1 + return value + + def reset(self): + self.iter = 0 + + def __getitem__(self, it): + return self.get(it) + + +def build_scheduler(cfg, optimizer): + cfg = copy.deepcopy(cfg) + cfg.optimizer = optimizer + return SCHEDULERS.build(cfg=cfg) diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/utils/timer.py b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/timer.py new file mode 100644 index 0000000..3de4a16 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/timer.py @@ -0,0 +1,70 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# -*- coding: utf-8 -*- + +from time import perf_counter +from typing import Optional + + +class Timer: + """ + A timer which computes the time elapsed since the start/reset of the timer. + """ + + def __init__(self) -> None: + self.reset() + + def reset(self) -> None: + """ + Reset the timer. + """ + self._start = perf_counter() + self._paused: Optional[float] = None + self._total_paused = 0 + self._count_start = 1 + + def pause(self) -> None: + """ + Pause the timer. + """ + if self._paused is not None: + raise ValueError("Trying to pause a Timer that is already paused!") + self._paused = perf_counter() + + def is_paused(self) -> bool: + """ + Returns: + bool: whether the timer is currently paused + """ + return self._paused is not None + + def resume(self) -> None: + """ + Resume the timer. + """ + if self._paused is None: + raise ValueError("Trying to resume a Timer that is not paused!") + # pyre-fixme[58]: `-` is not supported for operand types `float` and + # `Optional[float]`. + self._total_paused += perf_counter() - self._paused + self._paused = None + self._count_start += 1 + + def seconds(self) -> float: + """ + Returns: + (float): the total number of seconds since the start/reset of the + timer, excluding the time when the timer is paused. + """ + if self._paused is not None: + end_time: float = self._paused # type: ignore + else: + end_time = perf_counter() + return end_time - self._start - self._total_paused + + def avg_seconds(self) -> float: + """ + Returns: + (float): the average number of seconds between every start/reset and + pause. + """ + return self.seconds() / self._count_start diff --git a/point_transformer_v3/external/pointcept_minimal/pointcept/utils/visualization.py b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/visualization.py new file mode 100644 index 0000000..92ac39b --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/pointcept/utils/visualization.py @@ -0,0 +1,128 @@ +""" +Visualization Utils + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os + +try: + import open3d as o3d +except ImportError: + o3d = None +import numpy as np +import torch + + +def to_numpy(x): + if isinstance(x, torch.Tensor): + x = x.clone().detach().cpu().numpy() + assert isinstance(x, np.ndarray) + return x + + +def get_point_cloud(coord, color=None, verbose=True): + if not isinstance(coord, list): + coord = [coord] + if color is not None: + color = [color] + + pcd_list = [] + for i in range(len(coord)): + coord_ = to_numpy(coord[i]) + if color is not None: + color_ = to_numpy(color[i]) + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(coord_) + pcd.colors = o3d.utility.Vector3dVector( + np.zeros_like(coord_) if color is None else color_ + ) + pcd_list.append(pcd) + if verbose: + o3d.visualization.draw_geometries(pcd_list) + return pcd_list + + +def get_line_set(coord, line, color=(1.0, 0.0, 0.0), verbose=True): + coord = to_numpy(coord) + line = to_numpy(line) + colors = np.array([color for _ in range(len(line))]) + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(coord) + line_set.lines = o3d.utility.Vector2iVector(line) + line_set.colors = o3d.utility.Vector3dVector(colors) + if verbose: + o3d.visualization.draw_geometries([line_set]) + return line_set + + +def save_point_cloud(coord, color=None, file_path="pc.ply", logger=None): + os.makedirs(os.path.dirname(file_path), exist_ok=True) + coord = to_numpy(coord) + if color is not None: + color = to_numpy(color) + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(coord) + pcd.colors = o3d.utility.Vector3dVector( + np.ones_like(coord) if color is None else color + ) + o3d.io.write_point_cloud(file_path, pcd) + if logger is not None: + logger.info(f"Save Point Cloud to: {file_path}") + + +def save_bounding_boxes( + bboxes_corners, color=(1.0, 0.0, 0.0), file_path="bbox.ply", logger=None +): + bboxes_corners = to_numpy(bboxes_corners) + # point list + points = bboxes_corners.reshape(-1, 3) + # line list + box_lines = np.array( + [ + [0, 1], + [1, 2], + [2, 3], + [3, 0], + [4, 5], + [5, 6], + [6, 7], + [7, 0], + [0, 4], + [1, 5], + [2, 6], + [3, 7], + ] + ) + lines = [] + for i, _ in enumerate(bboxes_corners): + lines.append(box_lines + i * 8) + lines = np.concatenate(lines) + # color list + color = np.array([color for _ in range(len(lines))]) + # generate line set + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + line_set.colors = o3d.utility.Vector3dVector(color) + o3d.io.write_line_set(file_path, line_set) + + if logger is not None: + logger.info(f"Save Boxes to: {file_path}") + + +def save_lines( + points, lines, color=(1.0, 0.0, 0.0), file_path="lines.ply", logger=None +): + points = to_numpy(points) + lines = to_numpy(lines) + colors = np.array([color for _ in range(len(lines))]) + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + line_set.colors = o3d.utility.Vector3dVector(colors) + o3d.io.write_line_set(file_path, line_set) + + if logger is not None: + logger.info(f"Save Lines to: {file_path}") diff --git a/point_transformer_v3/external/pointcept_minimal/requirements_pointceptminimal.txt b/point_transformer_v3/external/pointcept_minimal/requirements_pointceptminimal.txt new file mode 100644 index 0000000..c5b0e7c --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/requirements_pointceptminimal.txt @@ -0,0 +1,29 @@ +# Core dependencies for PT-v3 FVDB implementation +timm +requests +addict +peft +wandb +tensorboard +tensorboardx +yapf + +# flash-attn is only needed when patch_size > 0 (default config uses patch_size=1024) +# While PyTorch 2.8+ has built-in flash attention, flash-attn provides optimized varlen functions +# that are faster for variable-length sequences. The build is slow but worth it for performance. +# +# If pip install freezes or is very slow, try installing separately with: +# MAX_JOBS=4 pip install flash-attn==2.7.4.post1 --no-build-isolation +# Or check for pre-built wheels at: https://github.com/Dao-AILab/flash-attention/releases +flash-attn==2.7.4.post1 + +# Pointcept framework dependencies (only needed when using point_transformer_v3m1_fvdb.py) +# Install from PyG wheels for PyTorch 2.8.0 + CUDA 12.9 +--find-links https://data.pyg.org/whl/torch-2.8.0+cu129.html +torch-cluster +# Sparse convolution - spconv-cu129 not available, try cu124 (usually compatible with 12.9) +# If this fails, install from source: https://github.com/traveller59/spconv +spconv-cu124 + +# Development +black~=24.0 diff --git a/point_transformer_v3/external/pointcept_minimal/run.sh b/point_transformer_v3/external/pointcept_minimal/run.sh new file mode 100644 index 0000000..3fffa00 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/run.sh @@ -0,0 +1,16 @@ + +# sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-test-fw -n semseg-pt-v3m1-0-test-fw + +# sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-fvdb-test-fw -n semseg-pt-v3m1-0-fvdb-test-fw + +# sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-test -n semseg-pt-v3m1-0-test + +# sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-fvdb-test -n semseg-pt-v3m1-0-fvdb-test + +CUDA_VISIBLE_DEVICES=0 sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-test-1g -n semseg-pt-v3m1-0-test-1g + +CUDA_VISIBLE_DEVICES=1 sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-fvdb-test-1g -n semseg-pt-v3m1-0-fvdb-test-1g + +CUDA_VISIBLE_DEVICES=2 sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-test-1g-2 -n semseg-pt-v3m1-0-test-1g-2 + +CUDA_VISIBLE_DEVICES=3 sh scripts/train.sh -g 1 -d scannet -c semseg-pt-v3m1-0-fvdb-test-1g-2 -n semseg-pt-v3m1-0-fvdb-test-1g-2 diff --git a/point_transformer_v3/external/pointcept_minimal/scripts/build_image.sh b/point_transformer_v3/external/pointcept_minimal/scripts/build_image.sh new file mode 100644 index 0000000..7233b7f --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/scripts/build_image.sh @@ -0,0 +1,83 @@ +TORCH_VERSION=2.5.0 +CUDA_VERSION=12.4 +CUDNN_VERSION=9 + +ARGS=`getopt -o t:c: -l torch:,cuda:,cudnn: -n "$0" -- "$@"` +[ $? != 0 ] && exit 1 +eval set -- "${ARGS}" +while true ; do + case "$1" in + -t | --torch) + TORCH_VERSION=$2 + shift 2 + ;; + -c | --cuda) + CUDA_VERSION=$2 + shift 2 + ;; + --cudnn) + CUDNN_VERSION=$2 + shift 2 + ;; + --) + break + ;; + *) + echo "Invalid option: $1" + exit 1 + ;; + esac +done + +CUDA_VERSION_NO_DOT=`echo ${CUDA_VERSION} | tr -d "."` +BASE_TORCH_TAG=${TORCH_VERSION}-cuda${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel +IMG_TAG=pointcept/pointcept:v1.6.0-pytorch${BASE_TORCH_TAG} + +echo "TORCH VERSION: ${TORCH_VERSION}" +echo "CUDA VERSION: ${CUDA_VERSION}" +echo "CUDNN VERSION: ${CUDNN_VERSION}" + + +cat > ./Dockerfile <<- EOM +FROM pytorch/pytorch:${BASE_TORCH_TAG} + +# Fix nvidia-key error issue (NO_PUBKEY A4B469963BF863CC) +RUN rm /etc/apt/sources.list.d/*.list + +# Installing apt packages +RUN export DEBIAN_FRONTEND=noninteractive \ + && apt -y update --no-install-recommends \ + && apt -y install --no-install-recommends \ + git wget tmux vim zsh build-essential cmake ninja-build libopenblas-dev libsparsehash-dev \ + && apt autoremove -y \ + && apt clean -y \ + && export DEBIAN_FRONTEND=dialog + +# Install Pointcept environment +RUN conda install h5py pyyaml tensorboard tensorboardx wandb yapf addict einops scipy plyfile termcolor matplotlib black open3d -c conda-forge -y + +RUN pip install --upgrade pip +RUN pip install timm +RUN pip install torch-geometric +RUN pip install torch_scatter torch_sparse torch_cluster -f https://data.pyg.org/whl/torch-${TORCH_VERSION}+cu${CUDA_VERSION_NO_DOT}.html +RUN pip install spconv-cu${CUDA_VERSION_NO_DOT} +RUN pip install git+https://github.com/octree-nn/ocnn-pytorch.git +RUN pip install ftfy regex tqdm +RUN pip install git+https://github.com/openai/CLIP.git + +# Build swin3d +RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" pip install -U git+https://github.com/microsoft/Swin3D.git -v + +# Build FlashAttention2 +RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" pip install git+https://github.com/Dao-AILab/flash-attention.git + +# Build pointops +RUN git clone https://github.com/Pointcept/Pointcept.git +RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" pip install Pointcept/libs/pointops -v + +# Build pointgroup_ops +RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" pip install Pointcept/libs/pointgroup_ops -v + +EOM + +docker build . -f ./Dockerfile -t $IMG_TAG \ No newline at end of file diff --git a/point_transformer_v3/external/pointcept_minimal/scripts/create_tars.sh b/point_transformer_v3/external/pointcept_minimal/scripts/create_tars.sh new file mode 100644 index 0000000..8bd990b --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/scripts/create_tars.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +# Variables +SOURCE_DIR=$1 +DEST_DIR=$2 +MAX_SIZE=$(awk "BEGIN {printf \"%d\", $3 * 1024 * 1024}") # Convert GB to KB as an integer + +# Get the base name of the source directory to use as TAR_NAME +TAR_NAME=$(basename "$SOURCE_DIR") + +# Create destination directory if it doesn't exist +mkdir -p "$DEST_DIR" + +# Function to create a new tar file +create_tar() { + tar_number=$1 + file_list=$2 + tar_name=$(printf "%s/${TAR_NAME}_%0${width}d.tar.gz" "$DEST_DIR" "$tar_number") + tar -zcvf "$tar_name" -C "$SOURCE_DIR" -T "$file_list" +} + +# Initialize +tar_number=1 +current_size=0 +temp_dir=$(mktemp -d) +file_list="$temp_dir/file_list_$tar_number" +echo Start indexing "file_list_$tar_number" + +cd "$SOURCE_DIR" || exit 1 + +# Iterate over all files in the source directory +find . -type f | while IFS= read -r file; do + file_size=$(du -k "$file" | cut -f1) + + if [ $(( current_size + file_size )) -gt $MAX_SIZE ]; then + tar_number=$((tar_number + 1)) + file_list="$temp_dir/file_list_$tar_number" + echo Start indexing "file_list_$tar_number" + current_size=0 + fi + + echo "$file" >> "$file_list" + current_size=$((current_size + file_size)) +done + +# Determine the width for the tar file numbers +total_files=$(find "$temp_dir" -name 'file_list_*' | wc -l) +width=${#total_files} + +# Set PARALLEL_PROCESSES to the number of file lists if not provided +PARALLEL_PROCESSES=${4:-$total_files} + +# Debug information +echo "Total files: $total_files" +echo "Width: $width" +echo "Parallel processes: $PARALLEL_PROCESSES" + +# Run tar creation in parallel +find "$temp_dir" -name 'file_list_*' | xargs -P "$PARALLEL_PROCESSES" -I {} sh -c ' + file_list={} + tar_number=$(basename "$file_list" | cut -d_ -f3) + tar_name=$(printf "%s/'"$TAR_NAME"'_%0'"$width"'d.tar.gz" "'"$DEST_DIR"'" "$tar_number") + tar -zcvf "$tar_name" -C "'"$SOURCE_DIR"'" -T "$file_list" +' + +# Clean up +rm -rf "$temp_dir" \ No newline at end of file diff --git a/point_transformer_v3/external/pointcept_minimal/scripts/test.sh b/point_transformer_v3/external/pointcept_minimal/scripts/test.sh new file mode 100644 index 0000000..42ccf04 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/scripts/test.sh @@ -0,0 +1,92 @@ +#!/bin/sh + +cd $(dirname $(dirname "$0")) || exit +PYTHON=python + +TEST_CODE=test.py + +DATASET=scannet +CONFIG="None" +EXP_NAME=debug +WEIGHT=model_best +NUM_GPU=None +NUM_MACHINE=1 +DIST_URL="auto" + +while getopts "p:d:c:n:w:g:m:" opt; do + case $opt in + p) + PYTHON=$OPTARG + ;; + d) + DATASET=$OPTARG + ;; + c) + CONFIG=$OPTARG + ;; + n) + EXP_NAME=$OPTARG + ;; + w) + WEIGHT=$OPTARG + ;; + g) + NUM_GPU=$OPTARG + ;; + m) + NUM_MACHINE=$OPTARG + ;; + \?) + echo "Invalid option: -$OPTARG" + ;; + esac +done + +if [ "${NUM_GPU}" = 'None' ] +then + NUM_GPU=`$PYTHON -c 'import torch; print(torch.cuda.device_count())'` +fi + +echo "Experiment name: $EXP_NAME" +echo "Python interpreter dir: $PYTHON" +echo "Dataset: $DATASET" +echo "GPU Num: $NUM_GPU" +echo "Machine Num: $NUM_MACHINE" + +if [ -n "$SLURM_NODELIST" ]; then + MASTER_HOSTNAME=$(scontrol show hostname "$SLURM_NODELIST" | head -n 1) + MASTER_ADDR=$(getent hosts "$MASTER_HOSTNAME" | awk '{ print $1 }') + MASTER_PORT=$((10000 + 0x$(echo -n "${DATASET}/${EXP_NAME}" | md5sum | cut -c 1-4 | awk '{print $1}') % 20000)) + DIST_URL=tcp://$MASTER_ADDR:$MASTER_PORT +fi + +echo "Dist URL: $DIST_URL" + +EXP_DIR=exp/${DATASET}/${EXP_NAME} +MODEL_DIR=${EXP_DIR}/model +CODE_DIR=${EXP_DIR}/code +CONFIG_DIR=${EXP_DIR}/config.py + +if [ "${CONFIG}" = "None" ] +then + CONFIG_DIR=${EXP_DIR}/config.py +else + CONFIG_DIR=configs/${DATASET}/${CONFIG}.py +fi + +echo "Loading config in:" $CONFIG_DIR +#export PYTHONPATH=./$CODE_DIR +export PYTHONPATH=./ +echo "Running code in: $CODE_DIR" + + +echo " =========> RUN TASK <=========" +ulimit -n 65536 +#$PYTHON -u "$CODE_DIR"/tools/$TEST_CODE \ +$PYTHON -u tools/$TEST_CODE \ + --config-file "$CONFIG_DIR" \ + --num-gpus "$NUM_GPU" \ + --num-machines "$NUM_MACHINE" \ + --machine-rank ${SLURM_NODEID:-0} \ + --dist-url ${DIST_URL} \ + --options save_path="$EXP_DIR" weight="${MODEL_DIR}"/"${WEIGHT}".pth diff --git a/point_transformer_v3/external/pointcept_minimal/scripts/train.sh b/point_transformer_v3/external/pointcept_minimal/scripts/train.sh new file mode 100644 index 0000000..15abf05 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/scripts/train.sh @@ -0,0 +1,114 @@ +#!/bin/sh + +cd $(dirname $(dirname "$0")) || exit +ROOT_DIR=$(pwd) +PYTHON=python + +TRAIN_CODE=train.py + +DATASET=scannet +CONFIG="None" +EXP_NAME=debug +WEIGHT="None" +RESUME=false +NUM_GPU=None +NUM_MACHINE=1 +DIST_URL="auto" + + +while getopts "p:d:c:n:w:g:m:r:" opt; do + case $opt in + p) + PYTHON=$OPTARG + ;; + d) + DATASET=$OPTARG + ;; + c) + CONFIG=$OPTARG + ;; + n) + EXP_NAME=$OPTARG + ;; + w) + WEIGHT=$OPTARG + ;; + r) + RESUME=$OPTARG + ;; + g) + NUM_GPU=$OPTARG + ;; + m) + NUM_MACHINE=$OPTARG + ;; + \?) + echo "Invalid option: -$OPTARG" + ;; + esac +done + +if [ "${NUM_GPU}" = 'None' ] +then + NUM_GPU=`$PYTHON -c 'import torch; print(torch.cuda.device_count())'` +fi + +echo "Experiment name: $EXP_NAME" +echo "Python interpreter dir: $PYTHON" +echo "Dataset: $DATASET" +echo "Config: $CONFIG" +echo "GPU Num: $NUM_GPU" +echo "Machine Num: $NUM_MACHINE" + +if [ -n "$SLURM_NODELIST" ]; then + MASTER_HOSTNAME=$(scontrol show hostname "$SLURM_NODELIST" | head -n 1) + MASTER_ADDR=$(getent hosts "$MASTER_HOSTNAME" | awk '{ print $1 }') + MASTER_PORT=$((10000 + 0x$(echo -n "${DATASET}/${EXP_NAME}" | md5sum | cut -c 1-4 | awk '{print $1}') % 20000)) + DIST_URL=tcp://$MASTER_ADDR:$MASTER_PORT +fi + +echo "Dist URL: $DIST_URL" + +EXP_DIR=exp/${DATASET}/${EXP_NAME} +MODEL_DIR=${EXP_DIR}/model +CODE_DIR=${EXP_DIR}/code +CONFIG_DIR=configs/${DATASET}/${CONFIG}.py + + +echo " =========> CREATE EXP DIR <=========" +echo "Experiment dir: $ROOT_DIR/$EXP_DIR" +if [ "${RESUME}" = true ] && [ -d "$EXP_DIR" ] +then + CONFIG_DIR=${EXP_DIR}/config.py + WEIGHT=$MODEL_DIR/model_last.pth +else + RESUME=false + mkdir -p "$MODEL_DIR" "$CODE_DIR" + cp -r scripts tools pointcept "$CODE_DIR" +fi + +echo "Loading config in:" $CONFIG_DIR +export PYTHONPATH=./$CODE_DIR +echo "Running code in: $CODE_DIR" + + +echo " =========> RUN TASK <=========" +ulimit -n 65536 +if [ "${WEIGHT}" = "None" ] +then + $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ + --config-file "$CONFIG_DIR" \ + --num-gpus "$NUM_GPU" \ + --num-machines "$NUM_MACHINE" \ + --machine-rank ${SLURM_NODEID:-0} \ + --dist-url ${DIST_URL} \ + --options save_path="$EXP_DIR" +else + $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ + --config-file "$CONFIG_DIR" \ + --num-gpus "$NUM_GPU" \ + --num-machines "$NUM_MACHINE" \ + --machine-rank ${SLURM_NODEID:-0} \ + --dist-url ${DIST_URL} \ + --options save_path="$EXP_DIR" resume="$RESUME" weight="$WEIGHT" +fi diff --git a/point_transformer_v3/external/pointcept_minimal/tools/test.py b/point_transformer_v3/external/pointcept_minimal/tools/test.py new file mode 100644 index 0000000..8b477f9 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/tools/test.py @@ -0,0 +1,39 @@ +""" +Main Testing Script + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.engines.defaults import ( + default_argument_parser, + default_config_parser, + default_setup, +) +from pointcept.engines.test import TESTERS +from pointcept.engines.launch import launch + + +def main_worker(cfg): + cfg = default_setup(cfg) + test_cfg = dict(cfg=cfg, **cfg.test) + tester = TESTERS.build(test_cfg) + tester.test() + + +def main(): + args = default_argument_parser().parse_args() + cfg = default_config_parser(args.config_file, args.options) + + launch( + main_worker, + num_gpus_per_machine=args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + cfg=(cfg,), + ) + + +if __name__ == "__main__": + main() diff --git a/point_transformer_v3/external/pointcept_minimal/tools/train.py b/point_transformer_v3/external/pointcept_minimal/tools/train.py new file mode 100644 index 0000000..e3ed749 --- /dev/null +++ b/point_transformer_v3/external/pointcept_minimal/tools/train.py @@ -0,0 +1,38 @@ +""" +Main Training Script + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.engines.defaults import ( + default_argument_parser, + default_config_parser, + default_setup, +) +from pointcept.engines.train import TRAINERS +from pointcept.engines.launch import launch + + +def main_worker(cfg): + cfg = default_setup(cfg) + trainer = TRAINERS.build(dict(type=cfg.train.type, cfg=cfg)) + trainer.train() + + +def main(): + args = default_argument_parser().parse_args() + cfg = default_config_parser(args.config_file, args.options) + + launch( + main_worker, + num_gpus_per_machine=args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + cfg=(cfg,), + ) + + +if __name__ == "__main__": + main()