From 0503d60f11db01eafd2f2d0f56660b86c27735a4 Mon Sep 17 00:00:00 2001 From: Hexu Zhao Date: Sun, 9 Nov 2025 13:56:38 -0800 Subject: [PATCH 1/7] add a minimal pointcept to train the fvdb-based ptv3. This has removed most of code which are not used in ptv3 training. Signed-off-by: Hexu Zhao --- point_transformer_v3/README.md | 70 +- .../.github/workflows/formatter.yml | 20 + .../pointcept_minimal/.gitignore | 16 + .../pointcept_minimal/LICENSE | 21 + .../pointcept_minimal/README.md | 988 +++++++++++ .../configs/_base_/dataset/scannet.py | 26 + .../configs/_base_/default_runtime.py | 46 + .../configs/scannet/semseg-pt-v3m1-0-base.py | 317 ++++ .../scannet/semseg-pt-v3m1-0-fvdb-test.py | 307 ++++ .../configs/scannet/semseg-pt-v3m1-0-test.py | 321 ++++ .../pointcept_minimal/environment.yml | 52 + .../pointcept_minimal/pointcept/__init__.py | 0 .../pointcept/datasets/__init__.py | 9 + .../pointcept/datasets/builder.py | 15 + .../pointcept/datasets/dataloader.py | 112 ++ .../pointcept/datasets/defaults.py | 499 ++++++ .../concerto/scannet/preprocess_scannet.py | 574 +++++++ .../concerto/scannet/preprocess_scannet.sh | 42 + .../preprocessing/concerto/scannet/splits.py | 62 + .../preprocessing/sampling_chunking_data.py | 149 ++ .../scannet/dino/prepare_scene_list.py | 27 + .../scannet/dino/preprocess_dino_feature.py | 362 ++++ .../scannet/extract_partition.py | 71 + .../scannet/meta_data/scannet200_constants.py | 704 ++++++++ .../scannet/meta_data/scannet200_splits.py | 625 +++++++ .../scannet/meta_data/scannet_means.npz | Bin 0 -> 676 bytes .../scannetv2-labels-old.combined.tsv | 608 +++++++ .../meta_data/scannetv2-labels.combined.tsv | 608 +++++++ .../scannet/preprocess_scannet.py | 253 +++ .../pointcept/datasets/scannet.py | 118 ++ .../pointcept/datasets/transform.py | 1457 +++++++++++++++++ .../pointcept/datasets/utils.py | 140 ++ .../pointcept/engines/__init__.py | 0 .../pointcept/engines/defaults.py | 152 ++ .../pointcept/engines/hooks/__init__.py | 5 + .../pointcept/engines/hooks/builder.py | 18 + .../pointcept/engines/hooks/default.py | 66 + .../pointcept/engines/hooks/evaluator.py | 243 +++ .../pointcept/engines/hooks/misc.py | 553 +++++++ .../pointcept/engines/launch.py | 137 ++ .../pointcept/engines/test.py | 890 ++++++++++ .../pointcept/engines/train.py | 372 +++++ .../pointcept/models/__init__.py | 10 + .../pointcept/models/builder.py | 17 + .../pointcept/models/default.py | 230 +++ .../pointcept/models/losses/__init__.py | 4 + .../pointcept/models/losses/builder.py | 31 + .../pointcept/models/losses/lovasz.py | 257 +++ .../pointcept/models/losses/misc.py | 223 +++ .../pointcept/models/modules.py | 120 ++ .../models/point_transformer_v3/__init__.py | 3 + .../point_transformer_v3m1_base.py | 716 ++++++++ .../point_transformer_v3m1_fvdb.py | 219 +++ .../point_transformer_v3m2_sonata.py | 732 +++++++++ .../pointcept/models/utils/__init__.py | 9 + .../pointcept/models/utils/checkpoint.py | 57 + .../pointcept/models/utils/misc.py | 41 + .../models/utils/serialization/__init__.py | 8 + .../models/utils/serialization/default.py | 59 + .../models/utils/serialization/hilbert.py | 303 ++++ .../models/utils/serialization/z_order.py | 126 ++ .../pointcept/models/utils/structure.py | 209 +++ .../pointcept/utils/__init__.py | 0 .../pointcept/utils/cache.py | 60 + .../pointcept_minimal/pointcept/utils/comm.py | 198 +++ .../pointcept/utils/config.py | 694 ++++++++ .../pointcept_minimal/pointcept/utils/env.py | 36 + .../pointcept/utils/events.py | 612 +++++++ .../pointcept_minimal/pointcept/utils/misc.py | 164 ++ .../pointcept/utils/optimizer.py | 57 + .../pointcept_minimal/pointcept/utils/path.py | 103 ++ .../pointcept/utils/registry.py | 316 ++++ .../pointcept/utils/scheduler.py | 205 +++ .../pointcept/utils/timer.py | 70 + .../pointcept/utils/visualization.py | 128 ++ .../pointcept_minimal/scripts/build_image.sh | 83 + .../pointcept_minimal/scripts/create_tars.sh | 67 + .../pointcept_minimal/scripts/test.sh | 92 ++ .../pointcept_minimal/scripts/train.sh | 114 ++ .../pointcept_minimal/tools/test.py | 39 + .../pointcept_minimal/tools/train.py | 38 + 81 files changed, 17504 insertions(+), 1 deletion(-) create mode 100644 point_transformer_v3/pointcept_minimal/.github/workflows/formatter.yml create mode 100644 point_transformer_v3/pointcept_minimal/.gitignore create mode 100644 point_transformer_v3/pointcept_minimal/LICENSE create mode 100644 point_transformer_v3/pointcept_minimal/README.md create mode 100644 point_transformer_v3/pointcept_minimal/configs/_base_/dataset/scannet.py create mode 100644 point_transformer_v3/pointcept_minimal/configs/_base_/default_runtime.py create mode 100644 point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-base.py create mode 100644 point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test.py create mode 100644 point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test.py create mode 100644 point_transformer_v3/pointcept_minimal/environment.yml create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/__init__.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/__init__.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/builder.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/dataloader.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/defaults.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.sh create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/splits.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/sampling_chunking_data.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/prepare_scene_list.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/preprocess_dino_feature.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/extract_partition.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_constants.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_splits.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet_means.npz create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels-old.combined.tsv create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels.combined.tsv create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/preprocess_scannet.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/scannet.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/transform.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/utils.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/__init__.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/defaults.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/__init__.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/builder.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/default.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/evaluator.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/misc.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/launch.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/test.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/train.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/__init__.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/builder.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/default.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/losses/__init__.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/losses/builder.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/losses/lovasz.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/losses/misc.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/modules.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/__init__.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_fvdb.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m2_sonata.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/__init__.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/checkpoint.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/misc.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/__init__.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/default.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/hilbert.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/z_order.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/structure.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/__init__.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/cache.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/comm.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/config.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/env.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/events.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/misc.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/optimizer.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/path.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/registry.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/scheduler.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/timer.py create mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/visualization.py create mode 100644 point_transformer_v3/pointcept_minimal/scripts/build_image.sh create mode 100644 point_transformer_v3/pointcept_minimal/scripts/create_tars.sh create mode 100644 point_transformer_v3/pointcept_minimal/scripts/test.sh create mode 100644 point_transformer_v3/pointcept_minimal/scripts/train.sh create mode 100644 point_transformer_v3/pointcept_minimal/tools/test.py create mode 100644 point_transformer_v3/pointcept_minimal/tools/train.py diff --git a/point_transformer_v3/README.md b/point_transformer_v3/README.md index 7a59693..34ab415 100644 --- a/point_transformer_v3/README.md +++ b/point_transformer_v3/README.md @@ -16,10 +16,18 @@ conda activate fvdb Next, activate the environment and install additional dependancies specifically for the point transformer project. ```bash -cd fvdb/projects/point_transformer_v3 +cd fvdb-examples/point_transformer_v3 pip install -r requirements.txt ``` +In order to train on Scannet dataset with pointcept codebase, we need to additionally install: + +```bash +cd fvdb-examples/point_transformer_v3 +pip install -r requirements_pointcept.txt +``` + + ## Files Overview @@ -150,3 +158,63 @@ This will: - Report differences in output features, sums, and last elements - Expect only small numerical differences (typically < 1e-5) due to floating-point precision. +## Training on ScanNet Dataset + +This section describes how to train PT-v3 models on the ScanNet dataset using the minimal Pointcept training codebase, using either their ptv3 implementation and our fVDB implementation. + +### Environment Setup + +Follow the **Environment** section above to set up the development environment and install all required dependencies. + +### ScanNet Dataset Preparation + +The preprocessing pipeline supports semantic and instance segmentation for `ScanNet20`, `ScanNet200`, and `ScanNet Data Efficient` benchmarks. + +1. **Download the dataset**: Obtain the [ScanNet v2 dataset](http://www.scan-net.org/) (requires registration and approval). + +2. **Preprocess the raw data**: Run the preprocessing script to convert the raw ScanNet data into the required format: + +```bash +# RAW_SCANNET_DIR: the directory containing the downloaded ScanNet v2 raw dataset +# PROCESSED_SCANNET_DIR: the output directory for the processed ScanNet dataset +python pointcept_minimal/pointcept/datasets/preprocessing/scannet/preprocess_scannet.py \ + --dataset_root ${RAW_SCANNET_DIR} \ + --output_root ${PROCESSED_SCANNET_DIR} +``` + +3. **Alternative**: Download preprocessed data directly from [HuggingFace](https://huggingface.co/datasets/Pointcept/scannet-compressed). Please ensure you agree to the official ScanNet license before downloading. + +4. **Link the processed dataset** to the codebase data directory: + +```bash +# PROCESSED_SCANNET_DIR: the directory containing the processed ScanNet dataset +mkdir -p pointcept_minimal/data +ln -s ${PROCESSED_SCANNET_DIR} pointcept_minimal/data/scannet +``` + +### Training Scripts + +To train the PT-v3 models with different configurations, use the following commands from the `pointcept_minimal` directory: + +```bash +# Train PT-v3 with FVDB backend (8 GPUs) +sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-fvdb-test -n semseg-pt-v3m1-0-fvdb-test + +# Train PT-v3 with standard backend (8 GPUs) +sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-test -n semseg-pt-v3m1-0-test +``` + +You should launch the above scripts within `point_transformer_v3/pointcept_minimal` folder. + +### Configuration Files + +The training configurations are located at: +- `pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test.py` - FVDB-based implementation +- `pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test.py` - Standard implementation + +### Model Implementation + +The model implementations can be found in the following files: +- `pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py` - Base PT-v3 implementation +- `pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_fvdb.py` - FVDB-accelerated PT-v3 implementation + diff --git a/point_transformer_v3/pointcept_minimal/.github/workflows/formatter.yml b/point_transformer_v3/pointcept_minimal/.github/workflows/formatter.yml new file mode 100644 index 0000000..a95391b --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/.github/workflows/formatter.yml @@ -0,0 +1,20 @@ +name: Formatter + +on: + workflow_dispatch: + push: + branches: + - main + pull_request: + types: [opened, reopened, synchronize] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + formatter: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: psf/black@stable diff --git a/point_transformer_v3/pointcept_minimal/.gitignore b/point_transformer_v3/pointcept_minimal/.gitignore new file mode 100644 index 0000000..59c3884 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/.gitignore @@ -0,0 +1,16 @@ +image/ +__pycache__ +**/build/ +**/*.egg-info/ +**/dist/ +*.so +exp +weights +data +*log* +outputs/ +.vscode +.idea +*/.DS_Store +**/*.out +Dockerfile diff --git a/point_transformer_v3/pointcept_minimal/LICENSE b/point_transformer_v3/pointcept_minimal/LICENSE new file mode 100644 index 0000000..ee1fac1 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Pointcept + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/point_transformer_v3/pointcept_minimal/README.md b/point_transformer_v3/pointcept_minimal/README.md new file mode 100644 index 0000000..333dddf --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/README.md @@ -0,0 +1,988 @@ +

+ + + + + + pointcept + +
+ +

+ +[![Formatter](https://github.com/pointcept/pointcept/actions/workflows/formatter.yml/badge.svg)](https://github.com/pointcept/pointcept/actions/workflows/formatter.yml) + +**Pointcept** is a powerful and flexible codebase for point cloud perception research. It is also an official implementation of the following paper: +- πŸš€ **Concerto: Joint 2D-3D Self-Supervised Learning Emerges Spatial Representations** +*Yujia Zhang, Xiaoyang Wu, Yixing Lao, Chengyao Wang, Zhuotao Tian, Naiyan Wang, Hengshuang Zhao* +Conference on Neural Information Processing Systems (**NeurIPS**) 2025 +[ Pretrain ] [Concerto] - [ [Project](https://pointcept.github.io/Concerto/) ] [ [Bib](https://xywu.me/research/concerto/bib.txt) ] [ [HF Demo](https://huggingface.co/spaces/Pointcept/Concerto) ] [ [Inference](https://github.com/Pointcept/Concerto) ] [ [Weight](https://huggingface.co/Pointcept/Concerto) ] → [here](#concerto) + + +- **Sonata: Self-Supervised Learning of Reliable Point Representations** +*Xiaoyang Wu, Daniel DeTone, Duncan Frost, Tianwei Shen, Chris Xie, Nan Yang, Jakob Engel, Richard Newcombe, Hengshuang Zhao, Julian Straub* +IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2025 - Highlight +[ Pretrain ] [Sonata] - [ [Project](https://xywu.me/sonata/) ] [ [arXiv](https://arxiv.org/abs/2503.16429) ] [ [Bib](https://xywu.me/research/sonata/bib.txt) ] [ [Demo](https://github.com/facebookresearch/sonata) ] [ [Weight](https://huggingface.co/facebook/sonata) ] → [here](#sonata) + + +- **Point Transformer V3: Simpler, Faster, Stronger** +*Xiaoyang Wu, Li Jiang, Peng-Shuai Wang, Zhijian Liu, Xihui Liu, Yu Qiao, Wanli Ouyang, Tong He, Hengshuang Zhao* +IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 - Oral +[ Backbone ] [PTv3] - [ [arXiv](https://arxiv.org/abs/2312.10035) ] [ [Bib](https://xywu.me/research/ptv3/bib.txt) ] [ [Project](https://github.com/Pointcept/PointTransformerV3) ] → [here](https://github.com/Pointcept/PointTransformerV3) + + +- **OA-CNNs: Omni-Adaptive Sparse CNNs for 3D Semantic Segmentation** +*Bohao Peng, Xiaoyang Wu, Li Jiang, Yukang Chen, Hengshuang Zhao, Zhuotao Tian, Jiaya Jia* +IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 +[ Backbone ] [ OA-CNNs ] - [ [arXiv](https://arxiv.org/abs/2403.14418) ] [ [Bib](https://xywu.me/research/oacnns/bib.txt) ] → [here](#oa-cnns) + + +- **Towards Large-scale 3D Representation Learning with Multi-dataset Point Prompt Training** +*Xiaoyang Wu, Zhuotao Tian, Xin Wen, Bohao Peng, Xihui Liu, Kaicheng Yu, Hengshuang Zhao* +IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 +[ Pretrain ] [PPT] - [ [arXiv](https://arxiv.org/abs/2308.09718) ] [ [Bib](https://xywu.me/research/ppt/bib.txt) ] → [here](#point-prompt-training-ppt) + + +- **Masked Scene Contrast: A Scalable Framework for Unsupervised 3D Representation Learning** +*Xiaoyang Wu, Xin Wen, Xihui Liu, Hengshuang Zhao* +IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2023 +[ Pretrain ] [ MSC ] - [ [arXiv](https://arxiv.org/abs/2303.14191) ] [ [Bib](https://xywu.me/research/msc/bib.txt) ] → [here](#masked-scene-contrast-msc) + + +- **Learning Context-aware Classifier for Semantic Segmentation** (3D Part) +*Zhuotao Tian, Jiequan Cui, Li Jiang, Xiaojuan Qi, Xin Lai, Yixin Chen, Shu Liu, Jiaya Jia* +AAAI Conference on Artificial Intelligence (**AAAI**) 2023 - Oral +[ SemSeg ] [ CAC ] - [ [arXiv](https://arxiv.org/abs/2303.11633) ] [ [Bib](https://xywu.me/research/cac/bib.txt) ] [ [2D Part](https://github.com/tianzhuotao/CAC) ] → [here](#context-aware-classifier) + + +- **Point Transformer V2: Grouped Vector Attention and Partition-based Pooling** +*Xiaoyang Wu, Yixing Lao, Li Jiang, Xihui Liu, Hengshuang Zhao* +Conference on Neural Information Processing Systems (**NeurIPS**) 2022 +[ Backbone ] [ PTv2 ] - [ [arXiv](https://arxiv.org/abs/2210.05666) ] [ [Bib](https://xywu.me/research/ptv2/bib.txt) ] → [here](#point-transformers) + + +- **Point Transformer** +*Hengshuang Zhao, Li Jiang, Jiaya Jia, Philip Torr, Vladlen Koltun* +IEEE International Conference on Computer Vision (**ICCV**) 2021 - Oral +[ Backbone ] [ PTv1 ] - [ [arXiv](https://arxiv.org/abs/2012.09164) ] [ [Bib](https://hszhao.github.io/papers/iccv21_pointtransformer_bib.txt) ] → [here](#point-transformers) + +Additionally, **Pointcept** integrates the following excellent work (contain above): +Backbone: +[MinkUNet](https://github.com/NVIDIA/MinkowskiEngine) ([here](#sparseunet)), +[SpUNet](https://github.com/traveller59/spconv) ([here](#sparseunet)), +[SPVCNN](https://github.com/mit-han-lab/spvnas) ([here](#spvcnn)), +[OACNNs](https://arxiv.org/abs/2403.14418) ([here](#oa-cnns)), +[PTv1](https://arxiv.org/abs/2012.09164) ([here](#point-transformers)), +[PTv2](https://arxiv.org/abs/2210.05666) ([here](#point-transformers)), +[PTv3](https://arxiv.org/abs/2312.10035) ([here](#point-transformers)), +[StratifiedFormer](https://github.com/dvlab-research/Stratified-Transformer) ([here](#stratified-transformer)), +[OctFormer](https://github.com/octree-nn/octformer) ([here](#octformer)), +[Swin3D](https://github.com/microsoft/Swin3D) ([here](#swin3d)); +Semantic Segmentation: +[Mix3d](https://github.com/kumuji/mix3d) ([here](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-spunet-v1m1-0-base.py#L5)), +[CAC](https://arxiv.org/abs/2303.11633) ([here](#context-aware-classifier)); +Instance Segmentation: +[PointGroup](https://github.com/dvlab-research/PointGroup) ([here](#pointgroup)); +Pre-training: +[PointContrast](https://github.com/facebookresearch/PointContrast) ([here](#pointcontrast)), +[Contrastive Scene Contexts](https://github.com/facebookresearch/ContrastiveSceneContexts) ([here](#contrastive-scene-contexts)), +[Masked Scene Contrast](https://arxiv.org/abs/2303.14191) ([here](#masked-scene-contrast-msc)), +[Point Prompt Training](https://arxiv.org/abs/2308.09718) ([here](#point-prompt-training-ppt)), +[Sonata](https://arxiv.org/abs/2503.16429) ([here](#sonata)), +[Concerto]() ([here](#concerto)); +Datasets: +[ScanNet](http://www.scan-net.org/) ([here](#scannet-v2)), +[ScanNet200](http://www.scan-net.org/) ([here](#scannet-v2)), +[ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) ([here](#scannet)), +[S3DIS](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1) ([here](#s3dis)), +[ArkitScene](https://github.com/apple/ARKitScenes) ([here](#arkitscenes)), +[HM3D](https://github.com/facebookresearch/habitat-matterport3d-dataset/) ([here](#habitat---matterport-3d-hm3d)), +[Matterport3D](https://niessner.github.io/Matterport/) ([here](#matterport3d)), +[Structured3D](https://structured3d-dataset.org/) ([here](#structured3d)), +[SemanticKITTI](http://www.semantic-kitti.org/) ([here](#semantickitti)), +[nuScenes](https://www.nuscenes.org/nuscenes) ([here](#nuscenes)), +[ModelNet40](https://modelnet.cs.princeton.edu/) ([here](#modelnet)), +[Waymo](https://waymo.com/open/) ([here](#waymo)). + + +## Highlights +- *Apr 2025* πŸš€: We now support `wandb`, check the [Quick Start](#quick-start) training section for more information. (Thanks @Streakfull for his contribution!) +- *Mar 2025* πŸš€: **Sonata** is accepted by CVPR 2025 and selected as one of the **Highlight** presentations (3.0% submissions)! We release the code with Pointcept v1.6.0. We release the pre-training **[code](#sonata)** along with Pointcept v1.6.0 and provide an easy-to-use pre-trained model for inference, tuning, and visualization in our project **[repository](https://github.com/facebookresearch/sonata)** hosted by Meta. +- *May 2024*: In v1.5.2, we redesigned the default structure for each dataset for better performance. Please **re-preprocess** datasets or **download** our preprocessed datasets from **[here](https://huggingface.co/Pointcept)**. +- *Apr 2024*: **PTv3** is selected as one of the 90 **Oral** papers (3.3% accepted papers, 0.78% submissions) by CVPR'24! +- *Mar 2024*: We release code for **OA-CNNs**, accepted by CVPR'24. Issue related to **OA-CNNs** can @Pbihao. +- *Feb 2024*: **PTv3** and **PPT** are accepted by CVPR'24, another **two** papers by our Pointcept team have also been accepted by CVPR'24 πŸŽ‰πŸŽ‰πŸŽ‰. We will make them publicly available soon! +- *Dec 2023*: **PTv3** is released on arXiv, and the code is available in Pointcept. PTv3 is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios. +- *Aug 2023*: **PPT** is released on arXiv. PPT presents a multi-dataset pre-training framework that achieves SOTA performance in both **indoor** and **outdoor** scenarios. It is compatible with various existing pre-training frameworks and backbones. A **pre-release** version of the code is accessible; for those interested, please feel free to contact me directly for access. +- *Mar 2023*: We released our codebase, **Pointcept**, a highly potent tool for point cloud representation learning and perception. We welcome new work to join the _Pointcept_ family and highly recommend reading [Quick Start](#quick-start) before starting your trail. +- *Feb 2023*: **MSC** and **CeCo** accepted by CVPR 2023. _MSC_ is a highly efficient and effective pretraining framework that facilitates cross-dataset large-scale pretraining, while _CeCo_ is a segmentation method specifically designed for long-tail datasets. Both approaches are compatible with all existing backbone models in our codebase, and we will soon make the code available for public use. +- *Jan 2023*: **CAC**, oral work of AAAI 2023, has expanded its 3D result with the incorporation of Pointcept. This addition will allow CAC to serve as a pluggable segmentor within our codebase. +- *Sep 2022*: **PTv2** accepted by NeurIPS 2022. It is a continuation of the Point Transformer. The proposed GVA theory can apply to most existing attention mechanisms, while Grid Pooling is also a practical addition to existing pooling methods. + +## Citation +If you find _Pointcept_ useful to your research, please cite our work as encouragement. (ΰ©­ΛŠκ’³β€‹Λ‹)੭✧ +``` +@misc{pointcept2023, + title={Pointcept: A Codebase for Point Cloud Perception Research}, + author={Pointcept Contributors}, + howpublished = {\url{https://github.com/Pointcept/Pointcept}}, + year={2023} +} +``` + +## Overview + +- [Installation](#installation) +- [Data Preparation](#data-preparation) +- [Quick Start](#quick-start) +- [Model Zoo](#model-zoo) +- [Acknowledgement](#acknowledgement) + +## Installation + +### Requirements +- Ubuntu: 18.04 and above. +- CUDA: 11.3 and above. +- PyTorch: 1.10.0 and above. + +### Conda Environment +- **Method 1**: Utilize conda `environment.yml` to create a new environment with one line code: + ```bash + # Create and activate conda environment named as 'pointcept-torch2.5.0-cu12.4' + # cuda: 12.4, pytorch: 2.5.0 + + # run `unset CUDA_PATH` if you have installed cuda in your local environment + conda env create -f environment.yml --verbose + conda activate pointcept-torch2.5.0-cu12.4 + ``` + +- **Method 2**: Use our pre-built Docker image and refer to the supported tags [here](https://hub.docker.com/repository/docker/pointcept/pointcept/general). Quickly verify the Docker image on your local machine with the following command: + ```bash + docker run --gpus all -it --rm pointcept/pointcept:v1.6.0-pytorch2.5.0-cuda12.4-cudnn9-devel bash + git clone https://github.com/facebookresearch/sonata + cd sonata + export PYTHONPATH=./ && python demo/0_pca.py + # Ignore the GUI error, we cannot expect a container to have its GUI, right? + ``` + +- **Method 3**: Manually create a conda environment: + ```bash + conda create -n pointcept python=3.10 -y + conda activate pointcept + + # (Optional) If no CUDA installed + conda install nvidia/label/cuda-12.4.1::cuda conda-forge::cudnn conda-forge::gcc=13.2 conda-forge::gxx=13.2 -y + + conda install ninja -y + # Choose version you want here: https://pytorch.org/get-started/previous-versions/ + conda install pytorch==2.5.0 torchvision==0.13.1 torchaudio==0.20.0 pytorch-cuda=12.4 -c pytorch -y + conda install h5py pyyaml -c anaconda -y + conda install sharedarray tensorboard tensorboardx wandb yapf addict einops scipy plyfile termcolor timm -c conda-forge -y + conda install pytorch-cluster pytorch-scatter pytorch-sparse -c pyg -y + pip install torch-geometric + + # spconv (SparseUNet) + # refer https://github.com/traveller59/spconv + pip install spconv-cu124 + + # PPT (clip) + pip install ftfy regex tqdm + pip install git+https://github.com/openai/CLIP.git + + # PTv1 & PTv2 or precise eval + cd libs/pointops + # usual + python setup.py install + # docker & multi GPU arch + TORCH_CUDA_ARCH_LIST="ARCH LIST" python setup.py install + # e.g. 7.5: RTX 3000; 8.0: a100 More available in: https://developer.nvidia.com/cuda-gpus + TORCH_CUDA_ARCH_LIST="7.5 8.0" python setup.py install + cd ../.. + + # Open3D (visualization, optional) + pip install open3d + ``` + +## Data Preparation + +### ScanNet v2 + +The preprocessing supports semantic and instance segmentation for both `ScanNet20`, `ScanNet200`, and `ScanNet Data Efficient`. +- Download the [ScanNet](http://www.scan-net.org/) v2 dataset. +- Run preprocessing code for raw ScanNet as follows: + + ```bash + # RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset. + # PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset (output dir). + python pointcept/datasets/preprocessing/scannet/preprocess_scannet.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_DIR} + ``` +- (Optional) Download ScanNet Data Efficient files: + ```bash + # download-scannet.py is the official download script + # or follow instructions here: https://kaldir.vc.in.tum.de/scannet_benchmark/data_efficient/documentation#download + python download-scannet.py --data_efficient -o ${RAW_SCANNET_DIR} + # unzip downloads + cd ${RAW_SCANNET_DIR}/tasks + unzip limited-annotation-points.zip + unzip limited-reconstruction-scenes.zip + # copy files to processed dataset folder + mkdir ${PROCESSED_SCANNET_DIR}/tasks + cp -r ${RAW_SCANNET_DIR}/tasks/points ${PROCESSED_SCANNET_DIR}/tasks + cp -r ${RAW_SCANNET_DIR}/tasks/scenes ${PROCESSED_SCANNET_DIR}/tasks + ``` +- (Alternative) Our preprocess data can be directly downloaded [[here](https://huggingface.co/datasets/Pointcept/scannet-compressed)], please agree the official license before download it. + +- Link processed dataset to codebase: + ```bash + # PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset. + mkdir data + ln -s ${PROCESSED_SCANNET_DIR} ${CODEBASE_DIR}/data/scannet + ``` + +### ScanNet++ +- Download the [ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) dataset. +- Run preprocessing code for raw ScanNet++ as follows: + ```bash + # RAW_SCANNETPP_DIR: the directory of downloaded ScanNet++ raw dataset. + # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir). + # NUM_WORKERS: the number of workers for parallel preprocessing. + python pointcept/datasets/preprocessing/scannetpp/preprocess_scannetpp.py --dataset_root ${RAW_SCANNETPP_DIR} --output_root ${PROCESSED_SCANNETPP_DIR} --num_workers ${NUM_WORKERS} + ``` +- Sampling and chunking large point cloud data in train/val split as follows (only used for training): + ```bash + # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir). + # NUM_WORKERS: the number of workers for parallel preprocessing. + python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split train --num_workers ${NUM_WORKERS} + python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split val --num_workers ${NUM_WORKERS} + ``` +- Link processed dataset to codebase: + ```bash + # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet dataset. + mkdir data + ln -s ${PROCESSED_SCANNETPP_DIR} ${CODEBASE_DIR}/data/scannetpp + ``` + +### S3DIS + +- Download S3DIS data by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1). Download the `Stanford3dDataset_v1.2.zip` file and unzip it. +- Fix error in `Area_5/office_19/Annotations/ceiling` Line 323474 (103.0οΏ½0000 => 103.000000). +- (Optional) Download Full 2D-3D S3DIS dataset (no XYZ) from [here](https://github.com/alexsax/2D-3D-Semantics) for parsing normal. +- Run preprocessing code for S3DIS as follows: + + ```bash + # S3DIS_DIR: the directory of downloaded Stanford3dDataset_v1.2 dataset. + # RAW_S3DIS_DIR: the directory of Stanford2d3dDataset_noXYZ dataset. (optional, for parsing normal) + # PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset (output dir). + + # S3DIS without aligned angle + python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} + # S3DIS with aligned angle + python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --align_angle + # S3DIS with normal vector (recommended, normal is helpful) + python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --parse_normal + python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --align_angle --parse_normal + ``` + +- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/s3dis-compressed +)] (with normal vector and aligned angle), please agree with the official license before downloading it. + +- Link processed dataset to codebase. + ```bash + # PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset. + mkdir data + ln -s ${PROCESSED_S3DIS_DIR} ${CODEBASE_DIR}/data/s3dis + ``` + + +### ArkitScenes + +- Download ArkitScenes 3DOD split with the following commands: + ```bash + # RAW_AS_DIR: the directory of downloaded Raw ArkitScenes dataset. + git clone https://github.com/apple/ARKitScenes.git + cd ARKitScenes + python download_data.py 3dod --download_dir $RAW_AS_DIR --video_id_csv threedod/3dod_train_val_splits.csv + ``` +- Run preprocessing code for ArkitScenes as follows: + ```bash + # RAW_AS_DIR: the directory of downloaded ArkitScenes dataset. + # PROCESSED_AS_DIR: the directory of processed ArkitScenes dataset (output dir). + # NUM_WORKERS: Number for workers for preprocessing, default same as cpu count (might OOM). + cd $POINTCEPT_DIR + export PYTHONPATH=./ + python pointcept/datasets/preprocessing/arkitscenes/preprocess_arkitscenes_mesh.py --dataset_root $RAW_AS_DIR --output_root $PROCESSED_AS_DIR --num_workers $NUM_WORKERS + ``` + +- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/arkitscenes-compressed +)] please read and agree the official [license](https://github.com/apple/ARKitScenes?tab=License-1-ov-file#readme) before download it. (Unzip with the following command: + `find ./ -name '*.tar.gz' | xargs -n 1 -P 8 -I {} sh -c 'tar -xzvf {}'`) + +- Link processed dataset to codebase. + ```bash + # PROCESSED_AR_DIR: the directory of processed ArkitScenes dataset (output dir). + mkdir data + ln -s ${PROCESSED_AR_DIR} ${CODEBASE_DIR}/data/arkitscenes + ``` + +### Habitat - Matterport 3D (HM3D) + +- Download HM3D `hm3d-train-glb-v0.2.tar` and `hm3d-val-glb-v0.2.tar` with instuction [here](https://github.com/facebookresearch/habitat-sim/blob/main/DATASETS.md#habitat-matterport-3d-research-dataset-hm3d) and unzip them. +- Run preprocessing code for HM3D as follows: + ```bash + # RAW_HM_DIR: the directory of downloaded HM3D dataset. + # PROCESSED_HM_DIR: the directory of processed HM3D dataset (output dir). + # NUM_WORKERS: Number for workers for preprocessing, default same as cpu count (might OOM). + export PYTHONPATH=./ + python pointcept/datasets/preprocessing/hm3d/preprocess_hm3d.py --dataset_root $RAW_HM_DIR --output_root $PROCESSED_HM_DIR --density 0.02 --num_workers $NUM_WORKERS + ``` + +- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/hm3d-compressed +)] please read and agree the official [license](https://matterport.com/legal/matterport-end-user-license-agreement-academic-use-model-data) before download it. (Unzip with the following command: + `find ./ -name '*.tar.gz' | xargs -n 1 -P 4 -I {} sh -c 'tar -xzvf {}'`) + +- Link processed dataset to codebase. + ```bash + # PROCESSED_HM_DIR: the directory of processed HM3D dataset (output dir). + mkdir data + ln -s ${PROCESSED_HM_DIR} ${CODEBASE_DIR}/data/hm3d + + +### Matterport3D +- Follow [this page](https://niessner.github.io/Matterport/#download) to request access to the dataset. +- Download the "region_segmentation" type, which represents the division of a scene into individual rooms. + ```bash + # download-mp.py is the official download script + # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset. + python download-mp.py -o {MATTERPORT3D_DIR} --type region_segmentations + ``` +- Unzip the region_segmentations data + ```bash + # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset. + python pointcept/datasets/preprocessing/matterport3d/unzip_matterport3d_region_segmentation.py --dataset_root {MATTERPORT3D_DIR} + ``` +- Run preprocessing code for Matterport3D as follows: + ```bash + # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset. + # PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir). + # NUM_WORKERS: the number of workers for this preprocessing. + python pointcept/datasets/preprocessing/matterport3d/preprocess_matterport3d_mesh.py --dataset_root ${MATTERPORT3D_DIR} --output_root ${PROCESSED_MATTERPORT3D_DIR} --num_workers ${NUM_WORKERS} + ``` +- Link processed dataset to codebase. + ```bash + # PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir). + mkdir data + ln -s ${PROCESSED_MATTERPORT3D_DIR} ${CODEBASE_DIR}/data/matterport3d + ``` + +Following the instruction of [OpenRooms](https://github.com/ViLab-UCSD/OpenRooms), we remapped Matterport3D's categories to ScanNet 20 semantic categories with the addition of a ceiling category. +* (Alternative) Our preprocess data can also be downloaded [here](https://huggingface.co/datasets/Pointcept/matterport3d-compressed), please agree the official license before download it. + + +### Structured3D + +- Download Structured3D panorama related and perspective (full) related zip files by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLSc0qtvh4vHSoZaW6UvlXYy79MbcGdZfICjh4_t4bYofQIVIdw/viewform?pli=1) (no need to unzip them). +- Organize all downloaded zip file in one folder (`${STRUCT3D_DIR}`). +- Run preprocessing code for Structured3D as follows: + ```bash + # STRUCT3D_DIR: the directory of downloaded Structured3D dataset. + # PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir). + # NUM_WORKERS: Number for workers for preprocessing, default same as cpu count (might OOM). + export PYTHONPATH=./ + python pointcept/datasets/preprocessing/structured3d/preprocess_structured3d.py --dataset_root ${STRUCT3D_DIR} --output_root ${PROCESSED_STRUCT3D_DIR} --num_workers ${NUM_WORKERS} --grid_size 0.01 --fuse_prsp --fuse_pano + ``` +Following the instruction of [Swin3D](https://arxiv.org/abs/2304.06906), we keep 25 categories with frequencies of more than 0.001, out of the original 40 categories. + +[//]: # (- (Alternative) Our preprocess data can also be downloaded [[here]()], please agree the official license before download it.) + +- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/structured3d-compressed +)] (with perspective views and panorama view, 471.7G after unzipping), please agree the official license before download it. (Unzip with the following command: + `find ./ -name '*.tar.gz' | xargs -n 1 -P 15 -I {} sh -c 'tar -xzvf {}'`) + +- Link processed dataset to codebase. + ```bash + # PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir). + mkdir data + ln -s ${PROCESSED_STRUCT3D_DIR} ${CODEBASE_DIR}/data/structured3d + ``` + +### SemanticKITTI +- Download [SemanticKITTI](http://www.semantic-kitti.org/dataset.html#download) dataset. +- Link dataset to codebase. + ```bash + # SEMANTIC_KITTI_DIR: the directory of SemanticKITTI dataset. + # |- SEMANTIC_KITTI_DIR + # |- dataset + # |- sequences + # |- 00 + # |- 01 + # |- ... + + mkdir -p data + ln -s ${SEMANTIC_KITTI_DIR} ${CODEBASE_DIR}/data/semantic_kitti + ``` + +### nuScenes +- Download the official [NuScene](https://www.nuscenes.org/nuscenes#download) dataset (with Lidar Segmentation) and organize the downloaded files as follows: + ```bash + NUSCENES_DIR + │── samples + │── sweeps + │── lidarseg + ... + │── v1.0-trainval + │── v1.0-test + ``` +- Run information preprocessing code (modified from OpenPCDet) for nuScenes as follows: + ```bash + # NUSCENES_DIR: the directory of downloaded nuScenes dataset. + # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir). + # MAX_SWEEPS: Max number of sweeps. Default: 10. + pip install nuscenes-devkit pyquaternion + python pointcept/datasets/preprocessing/nuscenes/preprocess_nuscenes_info.py --dataset_root ${NUSCENES_DIR} --output_root ${PROCESSED_NUSCENES_DIR} --max_sweeps ${MAX_SWEEPS} --with_camera + ``` +- (Alternative) Our preprocess nuScenes information data can also be downloaded [[here]( +https://huggingface.co/datasets/Pointcept/nuscenes-compressed)] (only processed information, still need to download raw dataset and link to the folder), please agree the official license before download it. + +- Link raw dataset to processed NuScene dataset folder: + ```bash + # NUSCENES_DIR: the directory of downloaded nuScenes dataset. + # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir). + ln -s ${NUSCENES_DIR} {PROCESSED_NUSCENES_DIR}/raw + ``` + then the processed nuscenes folder is organized as follows: + ```bash + nuscene + |── raw + │── samples + │── sweeps + │── lidarseg + ... + │── v1.0-trainval + │── v1.0-test + |── info + ``` + +- Link processed dataset to codebase. + ```bash + # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir). + mkdir data + ln -s ${PROCESSED_NUSCENES_DIR} ${CODEBASE_DIR}/data/nuscenes + ``` + +### Waymo +- Download the official [Waymo](https://waymo.com/open/download/) dataset (v1.4.3) and organize the downloaded files as follows: + ```bash + WAYMO_RAW_DIR + │── training + │── validation + │── testing + ``` +- Install the following dependence: + ```bash + # If shows "No matching distribution found", download whl directly from Pypi and install the package. + conda create -n waymo python=3.10 -y + conda activate waymo + pip install waymo-open-dataset-tf-2-12-0 + ``` +- Run the preprocessing code as follows: + ```bash + # WAYMO_DIR: the directory of the downloaded Waymo dataset. + # PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir). + # NUM_WORKERS: num workers for preprocessing + python pointcept/datasets/preprocessing/waymo/preprocess_waymo.py --dataset_root ${WAYMO_DIR} --output_root ${PROCESSED_WAYMO_DIR} --splits training validation --num_workers ${NUM_WORKERS} + ``` + +- Link processed dataset to the codebase. + ```bash + # PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir). + mkdir data + ln -s ${PROCESSED_WAYMO_DIR} ${CODEBASE_DIR}/data/waymo + ``` + +### ModelNet +- Download [modelnet40_normal_resampled.zip](https://huggingface.co/datasets/Pointcept/modelnet40_normal_resampled-compressed) and unzip +- Link dataset to the codebase. + ```bash + mkdir -p data + ln -s ${MODELNET_DIR} ${CODEBASE_DIR}/data/modelnet40_normal_resampled + ``` + +## Quick Start + +### Training +**Train from scratch.** The training processing is based on configs in `configs` folder. +The training script will generate an experiment folder in `exp` folder and backup essential code in the experiment folder. +Training config, log, tensorboard, and checkpoints will also be saved into the experiment folder during the training process. +```bash +export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} +# Script (Recommended) +sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME} +# Direct +export PYTHONPATH=./ +python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} +``` + +For example: +```bash +# By script (Recommended) +# -p is default set as python and can be ignored +sh scripts/train.sh -p python -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +# Direct +export PYTHONPATH=./ +python tools/train.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base +``` +**Resume training from checkpoint.** If the training process is interrupted by accident, the following script can resume training from a given checkpoint. +```bash +export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} +# Script (Recommended) +# simply add "-r true" +sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME} -r true +# Direct +export PYTHONPATH=./ +python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} resume=True weight=${CHECKPOINT_PATH} +``` +**Weights and Biases.** +Pointcept by default enables both `tensorboard` and `wandb`. There are some usage notes related to `wandb`: +1. Disable by set `enable_wandb=False`; +2. Sync with `wandb` remote server by `wandb login` in the terminal or set `wandb_key=YOUR_WANDB_KEY` in config. +3. The project name is "Pointcept" by default, custom it to your research project name by setting `wandb_project=YOUR_PROJECT_NAME` (e.g. Sonata-Dev, PointTransformerV3-Dev) + +### Testing +During training, model evaluation is performed on point clouds after grid sampling (voxelization), providing an initial assessment of model performance. ~~However, to obtain precise evaluation results, testing is **essential**~~ *(now we automatically run the testing process after training with the `PreciseEvaluation` hook)*. The testing process involves subsampling a dense point cloud into a sequence of voxelized point clouds, ensuring comprehensive coverage of all points. These sub-results are then predicted and collected to form a complete prediction of the entire point cloud. This approach yields higher evaluation results compared to simply mapping/interpolating the prediction. In addition, our testing code supports TTA (test time augmentation) testing, which further enhances the stability of evaluation performance. + +```bash +# By script (Based on experiment folder created by training script) +sh scripts/test.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -n ${EXP_NAME} -w ${CHECKPOINT_NAME} +# Direct +export PYTHONPATH=./ +python tools/test.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} weight=${CHECKPOINT_PATH} +``` +For example: +```bash +# By script (Based on experiment folder created by training script) +# -p is default set as python and can be ignored +# -w is default set as model_best and can be ignored +sh scripts/test.sh -p python -d scannet -n semseg-pt-v2m2-0-base -w model_best +# Direct +export PYTHONPATH=./ +python tools/test.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base weight=exp/scannet/semseg-pt-v2m2-0-base/model/model_best.pth +``` + +The TTA can be disabled by replace `data.test.test_cfg.aug_transform = [...]` with: + +```python +data = dict( + train = dict(...), + val = dict(...), + test = dict( + ..., + test_cfg = dict( + ..., + aug_transform = [ + [dict(type="RandomRotateTargetAngle", angle=[0], axis="z", center=[0, 0, 0], p=1)] + ] + ) + ) +) +``` + +### Offset +`Offset` is the separator of point clouds in batch data, and it is similar to the concept of `Batch` in PyG. +A visual illustration of batch and offset is as follows: +

+ + + + + + pointcept + +
+ +

+ +## Model Zoo +### 1. Backbones and Semantic Segmentation +#### SparseUNet + +_Pointcept_ provides `SparseUNet` implemented by `SpConv` and `MinkowskiEngine`. The SpConv version is recommended since SpConv is easy to install and faster than MinkowskiEngine. Meanwhile, SpConv is also widely applied in outdoor perception. + +- **SpConv (recommend)** + +The SpConv version `SparseUNet` in the codebase was fully rewrite from `MinkowskiEngine` version, example running script is as follows: + +```bash +# ScanNet val +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# S3DIS (with normal) +sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-cn-base -n semseg-spunet-v1m1-0-cn-base +# SemanticKITTI +sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# nuScenes +sh scripts/train.sh -g 4 -d nuscenes -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# ModelNet40 +sh scripts/train.sh -g 2 -d modelnet40 -c cls-spunet-v1m1-0-base -n cls-spunet-v1m1-0-base + +# ScanNet Data Efficient +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la20 -n semseg-spunet-v1m1-2-efficient-la20 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la50 -n semseg-spunet-v1m1-2-efficient-la50 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la100 -n semseg-spunet-v1m1-2-efficient-la100 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la200 -n semseg-spunet-v1m1-2-efficient-la200 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr1 -n semseg-spunet-v1m1-2-efficient-lr1 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr5 -n semseg-spunet-v1m1-2-efficient-lr5 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr10 -n semseg-spunet-v1m1-2-efficient-lr10 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr20 -n semseg-spunet-v1m1-2-efficient-lr20 + +# Profile model run time +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-enable-profiler -n semseg-spunet-v1m1-0-enable-profiler +``` + +- **MinkowskiEngine** + +The MinkowskiEngine version `SparseUNet` in the codebase was modified from the original MinkowskiEngine repo, and example running scripts are as follows: +1. Install MinkowskiEngine, refer https://github.com/NVIDIA/MinkowskiEngine +2. Training with the following example scripts: +```bash +# Uncomment "# from .sparse_unet import *" in "pointcept/models/__init__.py" +# Uncomment "# from .mink_unet import *" in "pointcept/models/sparse_unet/__init__.py" +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base +# SemanticKITTI +sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base +``` + +#### OA-CNNs +Introducing Omni-Adaptive 3D CNNs (**OA-CNNs**), a family of networks that integrates a lightweight module to greatly enhance the adaptivity of sparse CNNs at minimal computational cost. Without any self-attention modules, **OA-CNNs** favorably surpass point transformers in terms of accuracy in both indoor and outdoor scenes, with much less latency and memory cost. Issue related to **OA-CNNs** can @Pbihao. +```bash +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-oacnns-v1m1-0-base -n semseg-oacnns-v1m1-0-base +``` + +#### Point Transformers +- **PTv3** + +[PTv3](https://arxiv.org/abs/2312.10035) is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios. The full PTv3 relies on FlashAttention, while FlashAttention relies on CUDA 11.6 and above, make sure your local Pointcept environment satisfies the requirements. + +If you can not upgrade your local environment to satisfy the requirements (CUDA >= 11.6), then you can disable FlashAttention by setting the model parameter `enable_flash` to `false` and reducing the `enc_patch_size` and `dec_patch_size` to a level (e.g. 128). + +FlashAttention force disables RPE and forces the accuracy reduced to fp16. If you require these features, please disable `enable_flash` and adjust `enable_rpe`, `upcast_attention` and`upcast_softmax`. + +Detailed instructions and experiment records (containing weights) are available on the [project repository](https://github.com/Pointcept/PointTransformerV3). Example running scripts are as follows: +```bash +# Scratched ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base +# PPT joint training (ScanNet + Structured3D) and evaluate in ScanNet +sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme + +# Scratched ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base +# Fine-tuning from PPT joint training (ScanNet + Structured3D) with ScanNet200 +# PTV3_PPT_WEIGHT_PATH: Path to model weight trained by PPT multi-dataset joint training +# e.g. exp/scannet/semseg-pt-v3m1-1-ppt-extreme/model/model_best.pth +sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-1-ppt-ft -n semseg-pt-v3m1-1-ppt-ft -w ${PTV3_PPT_WEIGHT_PATH} + +# Scratched ScanNet++ +sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base +# Scratched ScanNet++ test +sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-1-submit -n semseg-pt-v3m1-1-submit + + +# Scratched S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base +# an example for disbale flash_attention and enable rpe. +sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-1-rpe -n semseg-pt-v3m1-0-rpe +# PPT joint training (ScanNet + S3DIS + Structured3D) and evaluate in ScanNet +sh scripts/train.sh -g 8 -d s3dis -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme +# S3DIS 6-fold cross validation +# 1. The default configs are evaluated on Area_5, modify the "data.train.split", "data.val.split", and "data.test.split" to make the config evaluated on Area_1 ~ Area_6 respectively. +# 2. Train and evaluate the model on each split of areas and gather result files located in "exp/s3dis/EXP_NAME/result/Area_x.pth" in one single folder, noted as RECORD_FOLDER. +# 3. Run the following script to get S3DIS 6-fold cross validation performance: +export PYTHONPATH=./ +python tools/test_s3dis_6fold.py --record_root ${RECORD_FOLDER} + +# Scratched nuScenes +sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base +# Scratched Waymo +sh scripts/train.sh -g 4 -d waymo -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base + +# More configs and exp records for PTv3 will be available soon. +``` + +Indoor semantic segmentation +| Model | Benchmark | Additional Data | Num GPUs | Val mIoU | Config | Tensorboard | Exp Record | +| :---: | :---: |:---------------:| :---: | :---: | :---: | :---: | :---: | +| PTv3 | ScanNet | ✗ | 4 | 77.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-0-base) | +| PTv3 + PPT | ScanNet | ✓ | 8 | 78.5% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-1-ppt-extreme) | +| PTv3 | ScanNet200 | ✗ | 4 | 35.3% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet200/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) |[link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet200-semseg-pt-v3m1-0-base)| +| PTv3 | S3DIS (Area5) | ✗ | 4 | 73.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-0-rpe.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-0-rpe) | +| PTv3 + PPT | S3DIS (Area5) | ✓ | 8 | 75.4% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-1-ppt-extreme) | +_**\*Released model weights are trained for v1.5.1, weights for v1.5.2 and later is still ongoing.**_ + +- **PTv2 mode2** + +The original PTv2 was trained on 4 * RTX a6000 (48G memory). Even enabling AMP, the memory cost of the original PTv2 is slightly larger than 24G. Considering GPUs with 24G memory are much more accessible, I tuned the PTv2 on the latest Pointcept and made it runnable on 4 * RTX 3090 machines. + +`PTv2 Mode2` enables AMP and disables _Position Encoding Multiplier_ & _Grouped Linear_. During our further research, we found that precise coordinates are not necessary for point cloud understanding (Replacing precise coordinates with grid coordinates doesn't influence the performance. Also, SparseUNet is an example). As for Grouped Linear, my implementation of Grouped Linear seems to cost more memory than the Linear layer provided by PyTorch. Benefiting from the codebase and better parameter tuning, we also relieve the overfitting problem. The reproducing performance is even better than the results reported in our paper. + +Example running scripts are as follows: + +```bash +# ptv2m2: PTv2 mode2, disable PEM & Grouped Linear, GPU memory cost < 24G (recommend) +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-3-lovasz -n semseg-pt-v2m2-3-lovasz + +# ScanNet test +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +# ScanNet++ +sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +# ScanNet++ test +sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +# SemanticKITTI +sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +# nuScenes +sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +``` + +- **PTv2 mode1** + +`PTv2 mode1` is the original PTv2 we reported in our paper, example running scripts are as follows: + +```bash +# ptv2m1: PTv2 mode1, Original PTv2, GPU memory cost > 24G +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base +``` + +- **PTv1** + +The original PTv1 is also available in our Pointcept codebase. I haven't run PTv1 for a long time, but I have ensured that the example running script works well. + +```bash +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base +``` + + +#### Stratified Transformer +1. Additional requirements: +```bash +pip install torch-points3d +# Fix dependence, caused by installing torch-points3d +pip uninstall SharedArray +pip install SharedArray==3.2.1 + +cd libs/pointops2 +python setup.py install +cd ../.. +``` +2. Uncomment `# from .stratified_transformer import *` in `pointcept/models/__init__.py`. +3. Refer [Optional Installation](installation) to install dependence. +4. Training with the following example scripts: +```bash +# stv1m1: Stratified Transformer mode1, Modified from the original Stratified Transformer code. +# PTv2m2: Stratified Transformer mode2, My rewrite version (recommend). + +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined +sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m1-0-origin -n semseg-st-v1m1-0-origin +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined +``` + +#### SPVCNN +`SPVCNN` is a baseline model of [SPVNAS](https://github.com/mit-han-lab/spvnas), it is also a practical baseline for outdoor datasets. +1. Install torchsparse: +```bash +# refer https://github.com/mit-han-lab/torchsparse +# install method without sudo apt install +conda install google-sparsehash -c bioconda +export C_INCLUDE_PATH=${CONDA_PREFIX}/include:$C_INCLUDE_PATH +export CPLUS_INCLUDE_PATH=${CONDA_PREFIX}/include:CPLUS_INCLUDE_PATH +pip install --upgrade git+https://github.com/mit-han-lab/torchsparse.git +``` +2. Training with the following example scripts: +```bash +# SemanticKITTI +sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-spvcnn-v1m1-0-base -n semseg-spvcnn-v1m1-0-base +``` + +#### OctFormer +OctFormer from _OctFormer: Octree-based Transformers for 3D Point Clouds_. +1. Additional requirements: +```bash +cd libs +git clone https://github.com/octree-nn/dwconv.git +pip install ./dwconv +pip install ocnn +``` +2. Uncomment `# from .octformer import *` in `pointcept/models/__init__.py`. +2. Training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-octformer-v1m1-0-base -n semseg-octformer-v1m1-0-base +``` + +#### Swin3D +Swin3D from _Swin3D: A Pretrained Transformer Backbone for 3D Indoor Scene Understanding_. +1. Additional requirements: +```bash +# 1. Install MinkEngine v0.5.4, follow readme in https://github.com/NVIDIA/MinkowskiEngine; +# 2. Install Swin3D, mainly for cuda operation: +cd libs +git clone https://github.com/microsoft/Swin3D.git +cd Swin3D +pip install ./ +``` +2. Uncomment `# from .swin3d import *` in `pointcept/models/__init__.py`. +3. Pre-Training with the following example scripts (Structured3D preprocessing refer [here](#structured3d)): +```bash +# Structured3D + Swin-S +sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small +# Structured3D + Swin-L +sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large + +# Addition +# Structured3D + SpUNet +sh scripts/train.sh -g 4 -d structured3d -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# Structured3D + PTv2 +sh scripts/train.sh -g 4 -d structured3d -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +``` +4. Fine-tuning with the following example scripts: +```bash +# ScanNet + Swin-S +sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small +# ScanNet + Swin-L +sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large + +# S3DIS + Swin-S (here we provide config support S3DIS normal vector) +sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small +# S3DIS + Swin-L (here we provide config support S3DIS normal vector) +sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large +``` + +#### Context-Aware Classifier +`Context-Aware Classifier` is a segmentor that can further boost the performance of each backbone, as a replacement for `Default Segmentor`. Training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base +sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz +sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz + +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base +sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz +sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz +``` + + +### 2. Instance Segmentation +#### PointGroup +[PointGroup](https://github.com/dvlab-research/PointGroup) is a baseline framework for point cloud instance segmentation. +1. Additional requirements: +```bash +conda install -c bioconda google-sparsehash +cd libs/pointgroup_ops +python setup.py install --include_dirs=${CONDA_PREFIX}/include +cd ../.. +``` +2. Uncomment `# from .point_group import *` in `pointcept/models/__init__.py`. +3. Training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base +# S3DIS +sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base +``` + +### 3. Pre-training +#### Concerto +Follow the instruction [here](https://github.com/Pointcept/Pointcept/tree/main/pointcept/models/concerto). + +#### Sonata +Follow the instruction [here](https://github.com/Pointcept/Pointcept/tree/main/pointcept/models/sonata). + +#### Masked Scene Contrast (MSC) +1. Pre-training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-0-spunet-base -n pretrain-msc-v1m1-0-spunet-base +``` + +2. Fine-tuning with the following example scripts: +enable PointGroup ([here](#pointgroup)) before fine-tuning on instance segmentation task. +```bash +# ScanNet20 Semantic Segmentation +sh scripts/train.sh -g 8 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c semseg-spunet-v1m1-4-ft -n semseg-msc-v1m1-0f-spunet-base +# ScanNet20 Instance Segmentation (enable PointGroup before running the script) +sh scripts/train.sh -g 4 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-msc-v1m1-0f-pointgroup-spunet-base +``` +3. Example log and weight: [[Pretrain](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EYvNV4XUJ_5Mlk-g15RelN4BW_P8lVBfC_zhjC_BlBDARg?e=UoGFWH)] [[Semseg](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EQkDiv5xkOFKgCpGiGtAlLwBon7i8W6my3TIbGVxuiTttQ?e=tQFnbr)] + +#### Point Prompt Training (PPT) +PPT presents a multi-dataset pre-training framework, and it is compatible with various existing pre-training frameworks and backbones. +1. PPT supervised joint training with the following example scripts: +```bash +# ScanNet + Structured3d, validate on ScanNet (S3DIS might cause long data time, w/o S3DIS for a quick validation) >= 3090 * 8 +sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-0-sc-st-spunet -n semseg-ppt-v1m1-0-sc-st-spunet +sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-1-sc-st-spunet-submit -n semseg-ppt-v1m1-1-sc-st-spunet-submit +# ScanNet + S3DIS + Structured3d, validate on S3DIS (>= a100 * 8) +sh scripts/train.sh -g 8 -d s3dis -c semseg-ppt-v1m1-0-s3-sc-st-spunet -n semseg-ppt-v1m1-0-s3-sc-st-spunet +# SemanticKITTI + nuScenes + Waymo, validate on SemanticKITTI (bs12 >= 3090 * 4 >= 3090 * 8, v1m1-0 is still on tuning) +sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet +sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-0-sk-nu-wa-spunet -n semseg-ppt-v1m2-0-sk-nu-wa-spunet +sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit -n semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit +# SemanticKITTI + nuScenes + Waymo, validate on nuScenes (bs12 >= 3090 * 4; bs24 >= 3090 * 8, v1m1-0 is still on tuning)) +sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet +sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-0-nu-sk-wa-spunet -n semseg-ppt-v1m2-0-nu-sk-wa-spunet +sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit -n semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit +``` + +#### PointContrast +1. Preprocess and link ScanNet-Pair dataset (pair-wise matching with ScanNet raw RGB-D frame, ~1.5T): +```bash +# RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset. +# PROCESSED_SCANNET_PAIR_DIR: the directory of processed ScanNet pair dataset (output dir). +python pointcept/datasets/preprocessing/scannet/scannet_pair/preprocess.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_PAIR_DIR} +ln -s ${PROCESSED_SCANNET_PAIR_DIR} ${CODEBASE_DIR}/data/scannet +``` +2. Pre-training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-1-spunet-pointcontrast -n pretrain-msc-v1m1-1-spunet-pointcontrast +``` +3. Fine-tuning refer [MSC](#masked-scene-contrast-msc). + +#### Contrastive Scene Contexts +1. Preprocess and link ScanNet-Pair dataset (refer [PointContrast](#pointcontrast)): +2. Pre-training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m2-0-spunet-csc -n pretrain-msc-v1m2-0-spunet-csc +``` +3. Fine-tuning refer [MSC](#masked-scene-contrast-msc). + +## Acknowledgement +_Pointcept_ is designed by [Xiaoyang](https://xywu.me/), named by [Yixing](https://github.com/yxlao) and the logo is created by [Yuechen](https://julianjuaner.github.io/). It is derived from [Hengshuang](https://hszhao.github.io/)'s [Semseg](https://github.com/hszhao/semseg) and inspirited by several repos, e.g., [MinkowskiEngine](https://github.com/NVIDIA/MinkowskiEngine), [pointnet2](https://github.com/charlesq34/pointnet2), [mmcv](https://github.com/open-mmlab/mmcv/tree/master/mmcv), and [Detectron2](https://github.com/facebookresearch/detectron2). \ No newline at end of file diff --git a/point_transformer_v3/pointcept_minimal/configs/_base_/dataset/scannet.py b/point_transformer_v3/pointcept_minimal/configs/_base_/dataset/scannet.py new file mode 100644 index 0000000..eeb9488 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/configs/_base_/dataset/scannet.py @@ -0,0 +1,26 @@ +class_names = [ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", +] + +data = dict( + names=class_names, +) diff --git a/point_transformer_v3/pointcept_minimal/configs/_base_/default_runtime.py b/point_transformer_v3/pointcept_minimal/configs/_base_/default_runtime.py new file mode 100644 index 0000000..a790099 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/configs/_base_/default_runtime.py @@ -0,0 +1,46 @@ +weight = None # path to model weight +resume = False # whether to resume training process +evaluate = True # evaluate after each epoch training process +test_only = False # test process + +seed = None # train process will init a random seed and record +save_path = "exp/default" +num_worker = 16 # total worker in all gpu +batch_size = 16 # total batch size in all gpu +gradient_accumulation_steps = 1 # total steps to accumulate gradients for +batch_size_val = None # auto adapt to bs 1 for each gpu +batch_size_test = None # auto adapt to bs 1 for each gpu +epoch = 100 # total epoch, data loop = epoch // eval_epoch +eval_epoch = 100 # sche total eval & checkpoint epoch +clip_grad = None # disable with None, enable with a float + +sync_bn = False +enable_amp = False +amp_dtype = "float16" +empty_cache = False +empty_cache_per_epoch = False +find_unused_parameters = False + +enable_wandb = True +wandb_project = "pointcept" # custom your project name e.g. Sonata, PTv3 +wandb_key = None # wandb token, default is None. If None, login with `wandb login` in your terminal + +mix_prob = 0 +param_dicts = None # example: param_dicts = [dict(keyword="block", lr_scale=0.1)] + +# hook +hooks = [ + dict(type="CheckpointLoader"), + dict(type="ModelHook"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + dict(type="CheckpointSaver", save_freq=None), + dict(type="PreciseEvaluator", test_last=False), +] + +# Trainer +train = dict(type="DefaultTrainer") + +# Tester +test = dict(type="SemSegTester", verbose=True) diff --git a/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-base.py b/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-base.py new file mode 100644 index 0000000..6f35823 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-base.py @@ -0,0 +1,317 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate repro_fvdb +# sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base diff --git a/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test.py b/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test.py new file mode 100644 index 0000000..6eaad49 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test.py @@ -0,0 +1,307 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 8 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = False + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3fvdb", + in_channels=6, + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_heads=(2, 4, 8, 16, 32), + dec_depths=(2, 2, 2, 2), + dec_channels=(256, 128, 64, 64), + dec_num_heads=(16, 8, 4, 4), + patch_size=1024, + drop_path=0.3, + proj_drop=0.0, + no_conv_in_cpe=False, + order_type=("z", "z-trans", "hilbert", "hilbert-trans"), + enable_batch_norm=True, + qk_scale=None, + shuffle_orders=True, + embedding_mode="linear", + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 350 +eval_epoch = 10 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate repro_fvdb +# cd reproduce/fvdb-examples/point_transformer_v3/pointcept_minimal +# sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-fvdb-test -n semseg-pt-v3m1-0-fvdb-test diff --git a/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test.py b/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test.py new file mode 100644 index 0000000..ca47e04 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test.py @@ -0,0 +1,321 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 8 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +seed = 42 + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 350 +eval_epoch = 10 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" + + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_inverse=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# conda activate repro_fvdb +# cd reproduce/fvdb-examples/point_transformer_v3/pointcept_minimal +# sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-test -n semseg-pt-v3m1-0-test diff --git a/point_transformer_v3/pointcept_minimal/environment.yml b/point_transformer_v3/pointcept_minimal/environment.yml new file mode 100644 index 0000000..cf277f2 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/environment.yml @@ -0,0 +1,52 @@ +name: pointcept-torch2.5.0-cu12.4 +channels: + - pytorch + - nvidia/label/cuda-12.4.1 + - nvidia + - bioconda + - conda-forge + - defaults +dependencies: + - python=3.10 + - pip + - cuda + - conda-forge::cudnn + - gcc=13.2 + - gxx=13.2 + - pytorch=2.5.0 + - torchvision=0.20.0 + - torchaudio=2.5.0 + - pytorch-cuda=12.4 + - ninja + - google-sparsehash + - h5py + - pyyaml + - tensorboard + - tensorboardx + - wandb + - yapf + - addict + - einops + - scipy + - plyfile + - termcolor + - timm + - ftfy + - regex + - tqdm + - matplotlib + - black + - open3d + - pip: + - --find-links https://data.pyg.org/whl/torch-2.5.0+cu124.html + - torch-cluster + - torch-scatter + - torch-sparse + - torch-geometric + - spconv-cu124 + - peft #for lora finetune + - git+https://github.com/octree-nn/ocnn-pytorch.git + - git+https://github.com/openai/CLIP.git + - git+https://github.com/Dao-AILab/flash-attention.git + - ./libs/pointops + - ./libs/pointgroup_ops \ No newline at end of file diff --git a/point_transformer_v3/pointcept_minimal/pointcept/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/__init__.py new file mode 100644 index 0000000..9341b8f --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/__init__.py @@ -0,0 +1,9 @@ +from .defaults import DefaultDataset, DefaultImagePointDataset, ConcatDataset +from .builder import build_dataset +from .utils import point_collate_fn, collate_fn + +# indoor scene +from .scannet import ScanNetDataset, ScanNet200Dataset + +# dataloader +from .dataloader import MultiDatasetDataloader diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/builder.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/builder.py new file mode 100644 index 0000000..1fa5f0e --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/builder.py @@ -0,0 +1,15 @@ +""" +Dataset Builder + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.utils.registry import Registry + +DATASETS = Registry("datasets") + + +def build_dataset(cfg): + """Build datasets.""" + return DATASETS.build(cfg) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/dataloader.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/dataloader.py new file mode 100644 index 0000000..a3c8e1d --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/dataloader.py @@ -0,0 +1,112 @@ +from functools import partial +import weakref +import torch +import torch.utils.data + +import pointcept.utils.comm as comm +from pointcept.datasets.utils import point_collate_fn +from pointcept.datasets import ConcatDataset +from pointcept.utils.env import set_seed + + +class MultiDatasetDummySampler: + def __init__(self): + self.dataloader = None + + def set_epoch(self, epoch): + if comm.get_world_size() > 1: + for dataloader in self.dataloader.dataloaders: + dataloader.sampler.set_epoch(epoch) + return + + +class MultiDatasetDataloader: + """ + Multiple Datasets Dataloader, batch data from a same dataset and mix up ratio determined by loop of each sub dataset. + The overall length is determined by the main dataset (first) and loop of concat dataset. + """ + + def __init__( + self, + concat_dataset: ConcatDataset, + batch_size_per_gpu: int, + num_worker_per_gpu: int, + mix_prob=0, + seed=None, + ): + self.datasets = concat_dataset.datasets + self.ratios = [dataset.loop for dataset in self.datasets] + # reset data loop, original loop serve as ratios + for dataset in self.datasets: + dataset.loop = 1 + # determine union training epoch by main dataset + self.datasets[0].loop = concat_dataset.loop + # build sub-dataloaders + num_workers = num_worker_per_gpu // len(self.datasets) + self.dataloaders = [] + for dataset_id, dataset in enumerate(self.datasets): + if comm.get_world_size() > 1: + sampler = torch.utils.data.distributed.DistributedSampler(dataset) + else: + sampler = None + + init_fn = ( + partial( + self._worker_init_fn, + dataset_id=dataset_id, + num_workers=num_workers, + num_datasets=len(self.datasets), + rank=comm.get_rank(), + seed=seed, + ) + if seed is not None + else None + ) + self.dataloaders.append( + torch.utils.data.DataLoader( + dataset, + batch_size=batch_size_per_gpu, + shuffle=(sampler is None), + num_workers=num_worker_per_gpu, + sampler=sampler, + collate_fn=partial(point_collate_fn, mix_prob=mix_prob), + pin_memory=True, + worker_init_fn=init_fn, + drop_last=True, + persistent_workers=True, + ) + ) + self.sampler = MultiDatasetDummySampler() + self.sampler.dataloader = weakref.proxy(self) + + def __iter__(self): + iterator = [iter(dataloader) for dataloader in self.dataloaders] + while True: + for i in range(len(self.ratios)): + for _ in range(self.ratios[i]): + try: + batch = next(iterator[i]) + except StopIteration: + if i == 0: + return + else: + iterator[i] = iter(self.dataloaders[i]) + batch = next(iterator[i]) + yield batch + + def __len__(self): + main_data_loader_length = len(self.dataloaders[0]) + return ( + main_data_loader_length // self.ratios[0] * sum(self.ratios) + + main_data_loader_length % self.ratios[0] + ) + + @staticmethod + def _worker_init_fn(worker_id, num_workers, dataset_id, num_datasets, rank, seed): + worker_seed = ( + num_workers * num_datasets * rank + + num_workers * dataset_id + + worker_id + + seed + ) + set_seed(worker_seed) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/defaults.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/defaults.py new file mode 100644 index 0000000..929d135 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/defaults.py @@ -0,0 +1,499 @@ +""" +Default Datasets + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Yujia Zhang (yujia.zhang.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import glob +import json + +import numpy as np +import torch +from copy import deepcopy +from torch.utils.data import Dataset +from collections.abc import Sequence +from torchvision.transforms import InterpolationMode +from PIL import Image +from torchvision.transforms import transforms as T +import torch.nn.functional as F + +from pointcept.utils.logger import get_root_logger +from pointcept.utils.cache import shared_dict + +from .builder import DATASETS, build_dataset +from .transform import Compose, TRANSFORMS + +INTERPOLATION_MODE = { + "bilinear": InterpolationMode.BILINEAR, + "bicubic": InterpolationMode.BICUBIC, + "nearest": InterpolationMode.NEAREST, +} + + +@DATASETS.register_module() +class DefaultDataset(Dataset): + VALID_ASSETS = [ + "coord", + "color", + "normal", + "strength", + "segment", + "instance", + "pose", + ] + + def __init__( + self, + split="train", + data_root="data/dataset", + transform=None, + test_mode=False, + test_cfg=None, + cache=False, + ignore_index=-1, + loop=1, + ): + super(DefaultDataset, self).__init__() + self.data_root = data_root + self.split = split + self.transform = Compose(transform) + self.cache = cache + self.ignore_index = ignore_index + self.loop = ( + loop if not test_mode else 1 + ) # force make loop = 1 while in test mode + self.test_mode = test_mode + self.test_cfg = test_cfg if test_mode else None + + if test_mode: + self.test_voxelize = TRANSFORMS.build(self.test_cfg.voxelize) + self.test_crop = ( + TRANSFORMS.build(self.test_cfg.crop) if self.test_cfg.crop else None + ) + self.post_transform = Compose(self.test_cfg.post_transform) + self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform] + + self.data_list = self.get_data_list() + logger = get_root_logger() + logger.info( + "Totally {} x {} samples in {} {} set.".format( + len(self.data_list), self.loop, os.path.basename(self.data_root), split + ) + ) + + def get_data_list(self): + if isinstance(self.split, str): + split_list = [self.split] + elif isinstance(self.split, Sequence): + split_list = self.split + else: + raise NotImplementedError + + data_list = [] + for split in split_list: + if os.path.isfile(os.path.join(self.data_root, split)): + with open(os.path.join(self.data_root, split)) as f: + data_list += [ + os.path.join(self.data_root, data) for data in json.load(f) + ] + else: + data_list += glob.glob(os.path.join(self.data_root, split, "*")) + return data_list + + def get_data(self, idx): + data_path = self.data_list[idx % len(self.data_list)] + name = self.get_data_name(idx) + split = self.get_split_name(idx) + if self.cache: + cache_name = f"pointcept-{name}" + return shared_dict(cache_name) + + data_dict = {} + assets = os.listdir(data_path) + for asset in assets: + if not asset.endswith(".npy"): + continue + if asset[:-4] not in self.VALID_ASSETS: + continue + data_dict[asset[:-4]] = np.load(os.path.join(data_path, asset)) + data_dict["name"] = name + data_dict["split"] = split + + if "coord" in data_dict.keys(): + data_dict["coord"] = data_dict["coord"].astype(np.float32) + + if "color" in data_dict.keys(): + data_dict["color"] = data_dict["color"].astype(np.float32) + + if "normal" in data_dict.keys(): + data_dict["normal"] = data_dict["normal"].astype(np.float32) + + if "segment" in data_dict.keys(): + data_dict["segment"] = data_dict["segment"].reshape([-1]).astype(np.int32) + else: + data_dict["segment"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + + if "instance" in data_dict.keys(): + data_dict["instance"] = data_dict["instance"].reshape([-1]).astype(np.int32) + else: + data_dict["instance"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + return data_dict + + def get_data_name(self, idx): + return os.path.basename(self.data_list[idx % len(self.data_list)]) + + def get_split_name(self, idx): + return os.path.basename( + os.path.dirname(self.data_list[idx % len(self.data_list)]) + ) + + def prepare_train_data(self, idx): + # load data + data_dict = self.get_data(idx) + data_dict = self.transform(data_dict) + return data_dict + + def prepare_test_data(self, idx): + # load data + data_dict = self.get_data(idx) + data_dict = self.transform(data_dict) + result_dict = dict(segment=data_dict.pop("segment"), name=data_dict.pop("name")) + if "origin_segment" in data_dict: + assert "inverse" in data_dict + result_dict["origin_segment"] = data_dict.pop("origin_segment") + result_dict["inverse"] = data_dict.pop("inverse") + + data_dict_list = [] + for aug in self.aug_transform: + data_dict_list.append(aug(deepcopy(data_dict))) + + fragment_list = [] + for data in data_dict_list: + if self.test_voxelize is not None: + data_part_list = self.test_voxelize(data) + else: + data["index"] = np.arange(data["coord"].shape[0]) + data_part_list = [data] + for data_part in data_part_list: + if self.test_crop is not None: + data_part = self.test_crop(data_part) + else: + data_part = [data_part] + fragment_list += data_part + + for i in range(len(fragment_list)): + fragment_list[i] = self.post_transform(fragment_list[i]) + result_dict["fragment_list"] = fragment_list + return result_dict + + def __getitem__(self, idx): + if self.test_mode: + return self.prepare_test_data(idx) + else: + return self.prepare_train_data(idx) + + def __len__(self): + return len(self.data_list) * self.loop + + +@DATASETS.register_module() +class DefaultImagePointDataset(Dataset): + PC_VALID_ASSETS = [ + "coord", + "color", + "normal", + ] + + def __init__( + self, + split="train", + data_root="data/dataset", + transform=None, + test_mode=False, + test_cfg=None, + cache=False, + ignore_index=-1, + loop=1, + crop_h=630, + crop_w=1120, + patch_size=14, + interpolation="bilinear", + ): + super(DefaultImagePointDataset, self).__init__() + self.data_root = data_root + self.split = split + self.transform = Compose(transform) + self.cache = cache + self.ignore_index = ignore_index + self.loop = ( + loop if not test_mode else 1 + ) # force make loop = 1 while in test mode + self.test_mode = test_mode + self.test_cfg = test_cfg if test_mode else None + + if test_mode: + self.test_voxelize = TRANSFORMS.build(self.test_cfg.voxelize) + self.test_crop = ( + TRANSFORMS.build(self.test_cfg.crop) if self.test_cfg.crop else None + ) + self.post_transform = Compose(self.test_cfg.post_transform) + self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform] + + self.data_list, self.split_list = self.get_data_list() + if isinstance(self.data_list, dict): + self.data_name = list(self.data_list.keys()) + else: + self.data_name = list([data["token"] for data in self.data_list]) + logger = get_root_logger() + logger.info( + "Totally {} x {} samples in {} {} set.".format( + len(self.data_name), self.loop, os.path.basename(self.data_root), split + ) + ) + + self.crop_h = crop_h + self.crop_w = crop_w + self.patch_size = patch_size + self.patch_h = crop_h // patch_size + self.patch_w = crop_w // patch_size + self.transform_img = T.Compose( + [ + T.Resize( + (self.patch_h * self.patch_size, self.patch_w * self.patch_size), + interpolation=INTERPOLATION_MODE[interpolation], + ), + T.ToTensor(), + ] + ) + + def resize_correspondence_info( + self, correspondence, size, size0, crop_size, _alignment + ): + h, w = size + h0, w0 = size0 + left, top, right, bottom = crop_size + crop_h = bottom - top + crop_w = right - left + mask_crop = ( + (correspondence[:, 1] >= top) + & (correspondence[:, 1] < bottom) + & (correspondence[:, 0] >= left) + & (correspondence[:, 0] < right) + ) + correspondence = correspondence[mask_crop] + correspondence[:, 1] -= top + correspondence[:, 0] -= left + correspondence[:, 1] = (correspondence[:, 1] * h / crop_h // _alignment).astype( + np.int32 + ) + correspondence[:, 0] = (correspondence[:, 0] * w / crop_w // _alignment).astype( + np.int32 + ) + correspondence = correspondence[:, [1, 0, 2]] + correspondence = np.unique(correspondence, axis=0) + return correspondence + + def get_data_list(self): + split_list = {} + if isinstance(self.split, str): + data_path = os.path.join(self.data_root, "splits", f"{self.split}.json") + with open(data_path, "r", encoding="utf-8") as file: + data_list = json.load(file) + split_list[self.split] = list(data_list.keys()) + elif isinstance(self.split, Sequence): + data_list = {} + for split in self.split: + data_path = os.path.join(self.data_root, "splits", f"{split}.json") + with open(data_path, "r", encoding="utf-8") as file: + data_split_dict = json.load(file) + data_list.update(data_split_dict) + split_list[split] = list(data_split_dict.keys()) + else: + raise NotImplementedError + return data_list, split_list + + def get_data_name(self, idx): + return self.data_name[idx % len(self.data_name)] + + def get_split_name(self, idx): + for split, names in self.split_list.items(): + if self.data_name[idx % len(self.data_name)] in names: + return split + return None + + def get_data(self, idx): + data_dict = {} + name = self.get_data_name(idx) + split = self.get_split_name(idx) + data_dict["name"] = name + data_dict["split"] = split + data_path = self.data_list[name] + + pointclouds_path = data_path["pointclouds"] + assets = os.listdir(pointclouds_path) + for asset in assets: + if not asset.endswith(".npy"): + continue + if asset[:-4] not in self.PC_VALID_ASSETS: + continue + data_dict[asset[:-4]] = np.load(os.path.join(pointclouds_path, asset)) + imgs_path = data_path["images"] + imgs = [Image.open(asset) for asset in imgs_path] + img_width, img_height = imgs[0].size + div_w = img_width // self.patch_w + div_h = img_height // self.patch_h + div_min = max(min(div_w, div_h), 1) + crop_img_width = div_min * self.patch_w + crop_img_height = div_min * self.patch_h + left = int((img_width - crop_img_width) / 2) + top = int((img_height - crop_img_height) / 2) + right = int((img_width + crop_img_width) / 2) + bottom = int((img_height + crop_img_height) / 2) + imgs = [img.crop((left, top, right, bottom)) for img in imgs] + imgs = [self.transform_img(img) for img in imgs] + if len(imgs) > 0: + imgs_list = torch.stack(imgs) + data_dict["images"] = imgs_list.float() + else: + data_dict["images"] = torch.empty( + (0, 3, self.patch_h * self.patch_size, self.patch_w * self.patch_size) + ) + data_dict["img_num"] = np.array([data_dict["images"].shape[0]], dtype=np.int32) + + correspondences_path = data_path["correspondences"] + correspondence_infos = np.ones( + (data_dict["coord"].shape[0], len(correspondences_path), 2), dtype=np.int32 + ) * (-1) + for asset_id, asset in enumerate(correspondences_path): + correspondence_info = np.load(asset).astype(np.int32) + if np.array_equal(correspondence_info, -np.ones((1, 3))): + continue + correspondence_info = self.resize_correspondence_info( + correspondence_info, + (self.patch_h * self.patch_size, self.patch_w * self.patch_size), + (img_height, img_width), + (left, top, right, bottom), + self.patch_size, + ) + correspondence_infos[correspondence_info[:, -1], asset_id, :] = ( + correspondence_info[:, :-1] + ) + data_dict["correspondence"] = correspondence_infos # .reshape(-1, 2) + + if "coord" in data_dict.keys(): + data_dict["coord"] = data_dict["coord"].astype(np.float32) + + if "color" in data_dict.keys(): + data_dict["color"] = data_dict["color"].astype(np.float32) + + if "normal" in data_dict.keys(): + data_dict["normal"] = data_dict["normal"].astype(np.float32) + + if "segment" in data_dict.keys(): + data_dict["segment"] = data_dict["segment"].reshape([-1]).astype(np.int32) + else: + data_dict["segment"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + + if "instance" in data_dict.keys(): + data_dict["instance"] = data_dict["instance"].reshape([-1]).astype(np.int32) + else: + data_dict["instance"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + return data_dict + + def prepare_train_data(self, idx): + # load data + data_dict = self.get_data(idx) + data_dict = self.transform(data_dict) + return data_dict + + def prepare_test_data(self, idx): + # load data + data_dict = self.get_data(idx) + data_dict = self.transform(data_dict) + result_dict = dict(segment=data_dict.pop("segment"), name=data_dict.pop("name")) + if "origin_segment" in data_dict: + assert "inverse" in data_dict + result_dict["origin_segment"] = data_dict.pop("origin_segment") + result_dict["inverse"] = data_dict.pop("inverse") + + data_dict_list = [] + for aug in self.aug_transform: + data_dict_list.append(aug(deepcopy(data_dict))) + + fragment_list = [] + for data in data_dict_list: + if self.test_voxelize is not None: + data_part_list = self.test_voxelize(data) + else: + data["index"] = np.arange(data["coord"].shape[0]) + data_part_list = [data] + for data_part in data_part_list: + if self.test_crop is not None: + data_part = self.test_crop(data_part) + else: + data_part = [data_part] + fragment_list += data_part + + for i in range(len(fragment_list)): + fragment_list[i] = self.post_transform(fragment_list[i]) + result_dict["fragment_list"] = fragment_list + return result_dict + + def __getitem__(self, idx): + if self.test_mode: + return self.prepare_test_data(idx) + else: + return self.prepare_train_data(idx) + + def __len__(self): + return len(self.data_list) * self.loop + + +@DATASETS.register_module() +class ConcatDataset(Dataset): + def __init__(self, datasets, loop=1): + super(ConcatDataset, self).__init__() + self.datasets = [build_dataset(dataset) for dataset in datasets] + self.loop = loop + self.data_list = self.get_data_list() + logger = get_root_logger() + logger.info( + "Totally {} x {} samples in the concat set.".format( + len(self.data_list), self.loop + ) + ) + + def get_data_list(self): + data_list = [] + for i in range(len(self.datasets)): + data_list.extend( + zip( + np.ones(len(self.datasets[i]), dtype=int) * i, + np.arange(len(self.datasets[i])), + ) + ) + return data_list + + def get_data(self, idx): + dataset_idx, data_idx = self.data_list[idx % len(self.data_list)] + return self.datasets[dataset_idx][data_idx] + + def get_data_name(self, idx): + dataset_idx, data_idx = self.data_list[idx % len(self.data_list)] + return self.datasets[dataset_idx].get_data_name(data_idx) + + def __getitem__(self, idx): + return self.get_data(idx) + + def __len__(self): + return len(self.data_list) * self.loop diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.py new file mode 100644 index 0000000..95c6f76 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.py @@ -0,0 +1,574 @@ +""" +Preprocessing Script for ScanNet 20/200 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Yujia Zhang (yujia.zhang.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import warnings + +warnings.filterwarnings("ignore", category=DeprecationWarning) + +import os +import camtools as ct +import open3d as o3d +from scipy.spatial import cKDTree +import struct +import zlib +import imageio +import cv2 +import argparse +import glob +import json +import plyfile +import numpy as np +import pandas as pd +import multiprocessing as mp +from pathlib import Path + +# Load external constants +import sys + +sys.path.append("pointcept/datasets/preprocessing/scannet/meta_data") +from scannet200_constants import VALID_CLASS_IDS_200, VALID_CLASS_IDS_20 + +CLOUD_FILE_PFIX = "_vh_clean_2" +SEGMENTS_FILE_PFIX = ".0.010000.segs.json" +AGGREGATIONS_FILE_PFIX = ".aggregation.json" +CLASS_IDS200 = VALID_CLASS_IDS_200 +CLASS_IDS20 = VALID_CLASS_IDS_20 +IGNORE_INDEX = -1 + +COMPRESSION_TYPE_COLOR = {-1: "unknown", 0: "raw", 1: "png", 2: "jpeg"} +COMPRESSION_TYPE_DEPTH = { + -1: "unknown", + 0: "raw_ushort", + 1: "zlib_ushort", + 2: "occi_ushort", +} + + +class RGBDFrame: + def load(self, file_handle): + self.camera_to_world = np.asarray( + struct.unpack("f" * 16, file_handle.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.timestamp_color = struct.unpack("Q", file_handle.read(8))[0] + self.timestamp_depth = struct.unpack("Q", file_handle.read(8))[0] + self.color_size_bytes = struct.unpack("Q", file_handle.read(8))[0] + self.depth_size_bytes = struct.unpack("Q", file_handle.read(8))[0] + self.color_data = b"".join( + struct.unpack( + "c" * self.color_size_bytes, file_handle.read(self.color_size_bytes) + ) + ) + self.depth_data = b"".join( + struct.unpack( + "c" * self.depth_size_bytes, file_handle.read(self.depth_size_bytes) + ) + ) + + def decompress_depth(self, compression_type): + if compression_type == "zlib_ushort": + return self.decompress_depth_zlib() + else: + raise + + def decompress_depth_zlib(self): + return zlib.decompress(self.depth_data) + + def decompress_color(self, compression_type): + if compression_type == "jpeg": + return self.decompress_color_jpeg() + else: + raise + + def decompress_color_jpeg(self): + return imageio.imread(self.color_data) + + +class SensorData: + def __init__(self, filename): + self.version = 4 + self.load(filename) + + def load(self, filename): + with open(filename, "rb") as f: + version = struct.unpack("I", f.read(4))[0] + assert self.version == version + strlen = struct.unpack("Q", f.read(8))[0] + self.sensor_name = b"".join(struct.unpack("c" * strlen, f.read(strlen))) + self.intrinsic_color = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.extrinsic_color = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.intrinsic_depth = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.extrinsic_depth = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.color_compression_type = COMPRESSION_TYPE_COLOR[ + struct.unpack("i", f.read(4))[0] + ] + self.depth_compression_type = COMPRESSION_TYPE_DEPTH[ + struct.unpack("i", f.read(4))[0] + ] + self.color_width = struct.unpack("I", f.read(4))[0] + self.color_height = struct.unpack("I", f.read(4))[0] + self.depth_width = struct.unpack("I", f.read(4))[0] + self.depth_height = struct.unpack("I", f.read(4))[0] + self.depth_shift = struct.unpack("f", f.read(4))[0] + num_frames = struct.unpack("Q", f.read(8))[0] + self.frames = [] + for i in range(num_frames): + frame = RGBDFrame() + frame.load(f) + self.frames.append(frame) + + def export_depth_images(self, output_path, image_size=None, frame_skip=1): + if not os.path.exists(output_path): + os.makedirs(output_path) + print( + "exporting", len(self.frames) // frame_skip, " depth frames to", output_path + ) + for f in range(0, len(self.frames), frame_skip): + if os.path.exists((os.path.join(output_path, str(f) + ".png"))): + continue + if f % 100 == 0: + print( + "exporting", + f, + "th depth frames to", + os.path.join(output_path, str(f) + ".png"), + ) + + depth_data = self.frames[f].decompress_depth(self.depth_compression_type) + depth = np.fromstring(depth_data, dtype=np.uint16).reshape( + self.depth_height, self.depth_width + ) + if image_size is not None: + depth = cv2.resize( + depth, + (image_size[1], image_size[0]), + interpolation=cv2.INTER_NEAREST, + ) + imageio.imwrite(os.path.join(output_path, str(f) + ".png"), depth) + + def export_color_images(self, output_path, image_size=None, frame_skip=1): + if not os.path.exists(output_path): + os.makedirs(output_path) + print( + "exporting", len(self.frames) // frame_skip, "color frames to", output_path + ) + for f in range(0, len(self.frames), frame_skip): + if os.path.exists((os.path.join(output_path, str(f) + ".png"))): + continue + if f % 100 == 0: + print( + "exporting", + f, + "th color frames to", + os.path.join(output_path, str(f) + ".png"), + ) + color = self.frames[f].decompress_color(self.color_compression_type) + if image_size is not None: + color = cv2.resize( + color, + (image_size[1], image_size[0]), + interpolation=cv2.INTER_NEAREST, + ) + # imageio.imwrite(os.path.join(output_path, str(f) + '.jpg'), color) + imageio.imwrite(os.path.join(output_path, str(f) + ".png"), color) + + def save_mat_to_file(self, matrix, filename): + with open(filename, "w") as f: + for line in matrix: + np.savetxt(f, line[np.newaxis], fmt="%f") + + def export_poses(self, output_path, frame_skip=1): + if not os.path.exists(output_path): + os.makedirs(output_path) + print( + "exporting", len(self.frames) // frame_skip, "camera poses to", output_path + ) + for f in range(0, len(self.frames), frame_skip): + np.save( + os.path.join(output_path, str(f) + ".npy"), + self.frames[f].camera_to_world, + ) + + def export_intrinsics(self, output_path): + if not os.path.exists(output_path): + os.makedirs(output_path) + print("exporting camera intrinsics to", output_path) + np.save(os.path.join(output_path, "intrinsic.npy"), self.intrinsic_color) + + +def reader( + filename, + output_path, + frame_skip, + export_color_images=False, + export_depth_images=False, + export_poses=False, + export_intrinsics=False, +): + if not os.path.exists(output_path): + os.makedirs(output_path) + + # load the data + print("loading %s..." % filename) + sd = SensorData(filename) + if export_depth_images: + sd.export_depth_images( + os.path.join(output_path, "depth"), frame_skip=frame_skip + ) + if export_color_images: + sd.export_color_images( + os.path.join(output_path, "color"), frame_skip=frame_skip + ) + if export_poses: + sd.export_poses(os.path.join(output_path, "pose"), frame_skip=frame_skip) + if export_intrinsics: + sd.export_intrinsics(os.path.join(output_path, "intrinsic")) + return sd.color_height, sd.color_width + + +def read_plymesh(filepath): + """Read ply file and return it as numpy array. Returns None if emtpy.""" + with open(filepath, "rb") as f: + plydata = plyfile.PlyData.read(f) + if plydata.elements: + vertices = pd.DataFrame(plydata["vertex"].data).values + faces = np.stack(plydata["face"].data["vertex_indices"], axis=0) + return vertices, faces + + +# Map the raw category id to the point cloud +def point_indices_from_group(seg_indices, group, labels_pd): + group_segments = np.array(group["segments"]) + label = group["label"] + + # Map the category name to id + label_id20 = labels_pd[labels_pd["raw_category"] == label]["nyu40id"] + label_id20 = int(label_id20.iloc[0]) if len(label_id20) > 0 else 0 + label_id200 = labels_pd[labels_pd["raw_category"] == label]["id"] + label_id200 = int(label_id200.iloc[0]) if len(label_id200) > 0 else 0 + + # Only store for the valid categories + if label_id20 in CLASS_IDS20: + label_id20 = CLASS_IDS20.index(label_id20) + else: + label_id20 = IGNORE_INDEX + + if label_id200 in CLASS_IDS200: + label_id200 = CLASS_IDS200.index(label_id200) + else: + label_id200 = IGNORE_INDEX + + # get points, where segment indices (points labelled with segment ids) are in the group segment list + point_idx = np.where(np.isin(seg_indices, group_segments))[0] + return point_idx, label_id20, label_id200 + + +def face_normal(vertex, face): + v01 = vertex[face[:, 1]] - vertex[face[:, 0]] + v02 = vertex[face[:, 2]] - vertex[face[:, 0]] + vec = np.cross(v01, v02) + length = np.sqrt(np.sum(vec**2, axis=1, keepdims=True)) + 1.0e-8 + nf = vec / length + area = length * 0.5 + return nf, area + + +def vertex_normal(vertex, face): + nf, area = face_normal(vertex, face) + nf = nf * area + + nv = np.zeros_like(vertex) + for i in range(face.shape[0]): + nv[face[i]] += nf[i] + + length = np.sqrt(np.sum(nv**2, axis=1, keepdims=True)) + 1.0e-8 + nv = nv / length + return nv + + +def correspondenceGet(mesh, K, T, img_size, coords_gt): + height, width = img_size + if np.isnan(T).any() or np.isinf(T).any(): + return None + + depth = ct.raycast.mesh_to_im_depth( + mesh=mesh, K=K, T=np.linalg.inv(T), height=height, width=width + ) + + pixel = np.transpose(np.indices((width, height)), (2, 1, 0)) + pixel = pixel.reshape((-1, 2)) + pixel = np.hstack((pixel, np.ones((pixel.shape[0], 1)))) + depth = depth.reshape((-1, 1)) + valid = ~np.isinf(depth).squeeze(-1) + coord = np.zeros_like(pixel, dtype=np.float32) + coord[valid] = depth[valid] * (np.linalg.inv(K) @ pixel[valid].T).T # coord_camera + coord[valid] = coord[valid] @ T[:3, :3].T + T[:3, 3] # column then row + + pixel = pixel[valid] + coord = coord[valid] + if coord.shape[0] == 0: + return None + pixel = pixel[:, :2] + coord_dict = {"pixel": pixel, "coord": coord} + return coord_dict + + +def correspondenceSave(mesh, scene_dir, coords_gt, output_dir, img_size): + os.makedirs(output_dir, exist_ok=True) + scene_dir = Path(scene_dir) + index_gt = [ + img_name.split(".")[0] + for img_name in os.listdir(str(scene_dir / "color")) + if img_name.endswith(".png") + ] + index_gt = sorted(index_gt, key=lambda x: int(x)) + + Ks_path = str(scene_dir / "intrinsic" / "intrinsic.npy") + Ts_path = str(scene_dir / "pose") + Ts_files = sorted( + [f for f in os.listdir(Ts_path) if f.endswith(".npy")], + key=lambda x: int(x.split(".")[0]), + ) + + print(f"total pose num:{len(Ts_files)}") + Ts = [] + for Ts_file in Ts_files: + file_path = os.path.join(Ts_path, Ts_file) + Ts_ = np.load(file_path) + Ts.append(Ts_) + Ts = np.stack(Ts) + Ks = np.load(Ks_path) + + Ks = np.tile(Ks, (Ts.shape[0], 1, 1)) + Ks = Ks[:, :3, :3] + coords_gt_ = coords_gt + pixels_ = [] + coords_ = [] + + for i, (K, T) in enumerate(zip(Ks, Ts)): + coord_dict = correspondenceGet(mesh, K, T, img_size, coords_gt) + if coord_dict is None: + correspondences = -np.ones((1, 3)) + else: + pixels_ = coord_dict["pixel"] + coords_ = coord_dict["coord"] + tree = cKDTree(coords_gt_) + dis, idx = tree.query(coords_, k=1) + idx_valid = idx[dis < 0.01] + pixels_valid = pixels_[dis < 0.01] + correspondences = np.hstack((pixels_valid, idx_valid.reshape(-1, 1))) + np.save(Path(output_dir) / "{}.npy".format(index_gt[i]), correspondences) + + +def handle_process( + scene_path, + output_path, + pointclouds_root, + labels_pd, + train_scenes, + val_scenes, + frame_gap=75, + parse_pointclouds=True, + parse_normals=True, + export_depth_images=True, +): + pc_output_path = output_path + im_output_path = os.path.join(output_path, "images") + scene_id = os.path.basename(scene_path) + mesh_path = os.path.join(scene_path, f"{scene_id}{CLOUD_FILE_PFIX}.ply") + segments_file = os.path.join( + scene_path, f"{scene_id}{CLOUD_FILE_PFIX}{SEGMENTS_FILE_PFIX}" + ) + aggregations_file = os.path.join(scene_path, f"{scene_id}{AGGREGATIONS_FILE_PFIX}") + + if scene_id in train_scenes: + pc_output_path = os.path.join(pc_output_path, "train", f"{scene_id}") + pc_input_path = os.path.join(pointclouds_root, "train", f"{scene_id}") + im_output_path = os.path.join(im_output_path, "train", f"{scene_id}") + split_name = "train" + elif scene_id in val_scenes: + pc_output_path = os.path.join(pc_output_path, "val", f"{scene_id}") + pc_input_path = os.path.join(pointclouds_root, "val", f"{scene_id}") + im_output_path = os.path.join(im_output_path, "val", f"{scene_id}") + split_name = "val" + else: + pc_output_path = os.path.join(pc_output_path, "test", f"{scene_id}") + pc_input_path = os.path.join(pointclouds_root, "test", f"{scene_id}") + im_output_path = os.path.join(im_output_path, "test", f"{scene_id}") + split_name = "test" + + print(f"Processing: {scene_id} in {split_name}") + + if parse_pointclouds: + vertices, faces = read_plymesh(mesh_path) + coords = vertices[:, :3] + colors = vertices[:, 3:6] + save_dict = dict( + coord=coords.astype(np.float32), + color=colors.astype(np.uint8), + ) + # Parse Normals + if parse_normals: + save_dict["normal"] = vertex_normal(coords, faces).astype(np.float32) + + # Load segments file + if split_name != "test": + with open(segments_file) as f: + segments = json.load(f) + seg_indices = np.array(segments["segIndices"]) + + # Load Aggregations file + with open(aggregations_file) as f: + aggregation = json.load(f) + seg_groups = np.array(aggregation["segGroups"]) + + # Generate new labels + semantic_gt20 = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX + semantic_gt200 = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX + instance_ids = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX + for group in seg_groups: + point_idx, label_id20, label_id200 = point_indices_from_group( + seg_indices, group, labels_pd + ) + + semantic_gt20[point_idx] = label_id20 + semantic_gt200[point_idx] = label_id200 + instance_ids[point_idx] = group["id"] + + semantic_gt20 = semantic_gt20.astype(int) + semantic_gt200 = semantic_gt200.astype(int) + instance_ids = instance_ids.astype(int) + + save_dict["segment20"] = semantic_gt20 + save_dict["segment200"] = semantic_gt200 + save_dict["instance"] = instance_ids + + # Concatenate with original cloud + processed_vertices = np.hstack((semantic_gt200, instance_ids)) + + if np.any(np.isnan(processed_vertices)) or not np.all( + np.isfinite(processed_vertices) + ): + raise ValueError(f"Find NaN in Scene: {scene_id}") + + # Save pointcloud data + os.makedirs(pc_output_path, exist_ok=True) + for key in save_dict.keys(): + np.save(os.path.join(pc_output_path, f"{key}.npy"), save_dict[key]) + else: + coords = np.load(os.path.join(pc_input_path, "coord.npy")) + save_dict = dict( + coord=coords.astype(np.float32), + ) + + # Save img data + os.makedirs(im_output_path, exist_ok=True) + sens_dir = os.path.join(scene_path, scene_id + ".sens") + print(f"Parsing sens data{sens_dir}") + h, w = reader( + sens_dir, + im_output_path, + frame_gap, + export_color_images=True, + export_depth_images=export_depth_images, + export_poses=True, + export_intrinsics=True, + ) + mesh = o3d.io.read_triangle_mesh(mesh_path) + correspondenceSave( + mesh, + im_output_path, + save_dict["coord"], + os.path.join(im_output_path, "correspondence"), + (h, w), + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the ScanNet dataset containing scene folders", + ) + parser.add_argument( + "--output_root", + required=True, + help="Output path where train/val folders will be located", + ) + parser.add_argument( + "--pointclouds_root", + default="data/scannet", + type=str, + help="Input path where previous pointclouds folder located", + ) + parser.add_argument( + "--frame_gap", default=75, type=int, help="Frame gap for processing" + ) + parser.add_argument( + "--parse_pointclouds", action="store_true", help="Whether parse point clouds" + ) + parser.add_argument( + "--parse_normals", action="store_true", help="Whether parse point normals" + ) + parser.add_argument( + "--parse_depths", action="store_true", help="Whether parse depths" + ) + parser.add_argument( + "--num_workers", + default=mp.cpu_count(), + type=int, + help="Num workers for preprocessing.", + ) + parser.add_argument( + "--thread_id", + default=0, + type=int, + help="Thread id for parallel processing", + ) + config = parser.parse_args() + meta_root = Path("pointcept/datasets/preprocessing/scannet") / "meta_data" + + # Load label map + labels_pd = pd.read_csv( + meta_root / "scannetv2-labels.combined.tsv", + sep="\t", + header=0, + ) + + # Load train/val splits + with open(meta_root / "scannetv2_train.txt") as train_file: + train_scenes = train_file.read().splitlines() + with open(meta_root / "scannetv2_val.txt") as val_file: + val_scenes = val_file.read().splitlines() + + # Load scene paths + scene_paths = sorted(glob.glob(config.dataset_root + "/scans*/scene*")) + scene_paths_list = np.array_split(scene_paths, config.num_workers) + scene_paths_ = scene_paths_list[config.thread_id] + # Preprocess data. + print("Processing scenes...") + for scene_paths_i in scene_paths_: + handle_process( + scene_paths_i, + config.output_root, + config.pointclouds_root, + labels_pd, + train_scenes, + val_scenes, + config.frame_gap, + config.parse_pointclouds, + config.parse_normals, + config.parse_depths, + ) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.sh b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.sh new file mode 100644 index 0000000..00b2112 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +dataset_root="" +output_root="" +num_workers=16 +parse_depths=false +parse_pointclouds=false + +while getopts "d:o:n:pc" opt; do + case $opt in + d) dataset_root=$OPTARG ;; + o) output_root=$OPTARG ;; + n) num_workers=$OPTARG ;; + p) parse_depths=true ;; + c) parse_pointclouds=true ;; + *) echo "Usage: $0 -d -o [-n ] [-p] [-c]"; exit 1 ;; + esac +done + +if [ -z "$dataset_root" ] || [ -z "$output_root" ]; then + echo "Usage: $0 -d -o [-n ] [-p] [-c]" + exit 1 +fi + +for i in $(seq 0 $((num_workers - 1))); do + cmd="python pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.py --thread_id $i \ + --num_workers $num_workers \ + --dataset_root $dataset_root \ + --output_root $output_root \ + --parse_normal" + if $parse_depths; then + cmd="$cmd --parse_depths" + fi + + if $parse_pointclouds; then + cmd="$cmd --parse_pointclouds" + fi + + eval "$cmd &" +done + +wait diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/splits.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/splits.py new file mode 100644 index 0000000..5d4e5c6 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/splits.py @@ -0,0 +1,62 @@ +import os +import json +import argparse + + +def get_splits_paths(dataset_path): + # Get the names of all subfolders in the given folder + im_path = os.path.join(dataset_path, "images") + pc_path = dataset_path + splits = ["train", "val", "test"] + split_path = os.path.join(dataset_path, "splits") + os.makedirs(split_path, exist_ok=True) + for split in splits: + im_split_path = os.path.join(im_path, split) + pc_split_path = os.path.join(pc_path, split).replace( + dataset_path, "data/scannet" + ) + split_names = [f.name for f in os.scandir(im_split_path) if f.is_dir()] + split_dict = {} + for name in split_names: + im_split_name_path = os.path.join(im_split_path, name, "color") + co_split_name_path = os.path.join(im_split_path, name, "correspondence") + png_files = [ + f for f in os.listdir(im_split_name_path) if f.endswith(".png") + ] + png_files = sorted(png_files, key=lambda x: int(x.split(".")[0])) + # Get the full paths of the .png files + png_file_paths = [ + os.path.join(im_split_name_path, f).replace( + dataset_path, "data/scannet" + ) + for f in png_files + ] + co_file_paths = [ + os.path.join(co_split_name_path, f.replace(".png", ".npy")).replace( + dataset_path, "data/scannet" + ) + for f in png_files + ] + for i in range(0, len(png_file_paths), 4): + split_dict[f"{name}_{i//4}"] = {} + split_dict[f"{name}_{i//4}"]["pointclouds"] = os.path.join( + pc_split_path, name + ) + split_dict[f"{name}_{i//4}"]["images"] = png_file_paths[i : i + 4] + split_dict[f"{name}_{i//4}"]["correspondences"] = co_file_paths[ + i : i + 4 + ] + with open(os.path.join(split_path, f"{split}.json"), "w") as f: + json.dump(split_dict, f, indent=4) + + +# Example usage +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the ScanNet dataset containing scene folders", + ) + config = parser.parse_args() + get_splits_paths(config.dataset_root) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/sampling_chunking_data.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/sampling_chunking_data.py new file mode 100644 index 0000000..96536d4 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/sampling_chunking_data.py @@ -0,0 +1,149 @@ +""" +Chunking Data + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import argparse +import numpy as np +import multiprocessing as mp +from concurrent.futures import ProcessPoolExecutor +from itertools import repeat +from pathlib import Path + + +def chunking_scene( + name, + dataset_root, + split, + grid_size=None, + chunk_range=(6, 6), + chunk_stride=(3, 3), + chunk_minimum_size=10000, +): + print(f"Chunking scene {name} in {split} split") + dataset_root = Path(dataset_root) + scene_path = dataset_root / split / name + assets = os.listdir(scene_path) + data_dict = dict() + for asset in assets: + if not asset.endswith(".npy"): + continue + data_dict[asset[:-4]] = np.load(scene_path / asset) + coord = data_dict["coord"] - data_dict["coord"].min(axis=0) + + if grid_size is not None: + grid_coord = np.floor(coord / grid_size).astype(int) + _, idx = np.unique(grid_coord, axis=0, return_index=True) + coord = coord[idx] + for key in data_dict.keys(): + data_dict[key] = data_dict[key][idx] + + bev_range = coord.max(axis=0)[:2] + x, y = np.meshgrid( + np.arange(0, bev_range[0] + chunk_stride[0] - chunk_range[0], chunk_stride[0]), + np.arange(0, bev_range[0] + chunk_stride[0] - chunk_range[0], chunk_stride[0]), + indexing="ij", + ) + chunks = np.concatenate([x.reshape([-1, 1]), y.reshape([-1, 1])], axis=-1) + chunk_idx = 0 + for chunk in chunks: + mask = ( + (coord[:, 0] >= chunk[0]) + & (coord[:, 0] < chunk[0] + chunk_range[0]) + & (coord[:, 1] >= chunk[1]) + & (coord[:, 1] < chunk[1] + chunk_range[1]) + ) + if np.sum(mask) < chunk_minimum_size: + continue + + chunk_data_name = f"{name}_{chunk_idx}" + if grid_size is not None: + chunk_split_name = ( + f"{split}_" + f"grid{grid_size * 100:.0f}mm_" + f"chunk{chunk_range[0]}x{chunk_range[1]}_" + f"stride{chunk_stride[0]}x{chunk_stride[1]}" + ) + else: + chunk_split_name = ( + f"{split}_" + f"chunk{chunk_range[0]}x{chunk_range[1]}_" + f"stride{chunk_stride[0]}x{chunk_stride[1]}" + ) + + chunk_save_path = dataset_root / chunk_split_name / chunk_data_name + chunk_save_path.mkdir(parents=True, exist_ok=True) + for key in data_dict.keys(): + np.save(chunk_save_path / f"{key}.npy", data_dict[key][mask]) + chunk_idx += 1 + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the Pointcept processed ScanNet++ dataset.", + ) + parser.add_argument( + "--split", + required=True, + default="train", + type=str, + help="Split need to process.", + ) + parser.add_argument( + "--grid_size", + default=None, + type=float, + help="Grid size for initial grid sampling", + ) + parser.add_argument( + "--chunk_range", + default=[6, 6], + type=int, + nargs="+", + help="Range of each chunk, e.g. --chunk_range 6 6", + ) + parser.add_argument( + "--chunk_stride", + default=[3, 3], + type=int, + nargs="+", + help="Stride of each chunk, e.g. --chunk_stride 3 3", + ) + parser.add_argument( + "--chunk_minimum_size", + default=10000, + type=int, + help="Minimum number of points in each chunk", + ) + parser.add_argument( + "--num_workers", + default=mp.cpu_count(), + type=int, + help="Num workers for preprocessing.", + ) + + config = parser.parse_args() + config.dataset_root = Path(config.dataset_root) + data_list = os.listdir(config.dataset_root / config.split) + + print("Processing scenes...") + pool = ProcessPoolExecutor(max_workers=config.num_workers) + _ = list( + pool.map( + chunking_scene, + data_list, + repeat(config.dataset_root), + repeat(config.split), + repeat(config.grid_size), + repeat(config.chunk_range), + repeat(config.chunk_stride), + repeat(config.chunk_minimum_size), + ) + ) + pool.shutdown() diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/prepare_scene_list.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/prepare_scene_list.py new file mode 100644 index 0000000..7b43f15 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/prepare_scene_list.py @@ -0,0 +1,27 @@ +import os +import argparse +from pathlib import Path + +import numpy as np + +if __name__ == "__main__": + num_train_list = 12 + num_val_list = 3 + meta_root = Path(os.path.dirname(__file__)).parent / "meta_data" + + # Load train/val splits + train_scenes = np.loadtxt(meta_root / "scannetv2_train.txt", dtype=str) + val_scenes = np.loadtxt(meta_root / "scannetv2_val.txt", dtype=str) + + for i in range(num_train_list): + np.savetxt( + meta_root / f"scannetv2_train_{i}.txt", + train_scenes[i::num_train_list], + fmt="%s", + ) + for i in range(num_val_list): + np.savetxt( + meta_root / f"scannetv2_val_{i}.txt", + val_scenes[i::num_val_list], + fmt="%s", + ) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/preprocess_dino_feature.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/preprocess_dino_feature.py new file mode 100644 index 0000000..1eb9541 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/preprocess_dino_feature.py @@ -0,0 +1,362 @@ +import os +import argparse +import einops +import torch +import torch.nn.functional as F +import torchvision +import tqdm +import cv2 +import camtools as ct +import open3d as o3d +import zlib +import imageio +import struct +import numpy as np +import torch_scatter +from pathlib import Path + + +class RGBDFrame: + def __init__(self, file_handle): + self.camera_to_world = np.asarray( + struct.unpack("f" * 16, file_handle.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.timestamp_color = struct.unpack("Q", file_handle.read(8))[0] + self.timestamp_depth = struct.unpack("Q", file_handle.read(8))[0] + self.color_size_bytes = struct.unpack("Q", file_handle.read(8))[0] + self.depth_size_bytes = struct.unpack("Q", file_handle.read(8))[0] + self.color_data = b"".join( + struct.unpack( + "c" * self.color_size_bytes, file_handle.read(self.color_size_bytes) + ) + ) + self.depth_data = b"".join( + struct.unpack( + "c" * self.depth_size_bytes, file_handle.read(self.depth_size_bytes) + ) + ) + + def decompress_depth(self, compression_type): + if compression_type == "zlib_ushort": + return self.decompress_depth_zlib() + else: + raise + + def decompress_depth_zlib(self): + return zlib.decompress(self.depth_data) + + def decompress_color(self, compression_type): + if compression_type == "jpeg": + return self.decompress_color_jpeg() + else: + raise + + def decompress_color_jpeg(self): + return imageio.v2.imread(self.color_data) + + +class SensorData: + COMPRESSION_TYPE_COLOR = { + -1: "unknown", + 0: "raw", + 1: "png", + 2: "jpeg", + } + COMPRESSION_TYPE_DEPTH = { + -1: "unknown", + 0: "raw_ushort", + 1: "zlib_ushort", + 2: "occi_ushort", + } + + def __init__(self, filename): + self.version = 4 + f = open(filename, "rb") + version = struct.unpack("I", f.read(4))[0] + assert self.version == version + strlen = struct.unpack("Q", f.read(8))[0] + self.sensor_name = b"".join(struct.unpack("c" * strlen, f.read(strlen))) + self.intrinsic_color = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.extrinsic_color = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.intrinsic_depth = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.extrinsic_depth = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.color_compression_type = self.COMPRESSION_TYPE_COLOR[ + struct.unpack("i", f.read(4))[0] + ] + self.depth_compression_type = self.COMPRESSION_TYPE_DEPTH[ + struct.unpack("i", f.read(4))[0] + ] + self.color_width = struct.unpack("I", f.read(4))[0] + self.color_height = struct.unpack("I", f.read(4))[0] + self.depth_width = struct.unpack("I", f.read(4))[0] + self.depth_height = struct.unpack("I", f.read(4))[0] + self.depth_shift = struct.unpack("f", f.read(4))[0] + self.num_frames = struct.unpack("Q", f.read(8))[0] + self.file_handle = f + + def export( + self, + frame_skip=20, + export_color=True, + export_depth=True, + export_pose=True, + ): + for i in range(self.num_frames): + if i % frame_skip != 0: + self.file_handle.seek(16 * 4 + 8 + 8, 1) # skip pose, timestamp + color_size_bytes = struct.unpack("Q", self.file_handle.read(8))[0] + depth_size_bytes = struct.unpack("Q", self.file_handle.read(8))[0] + self.file_handle.seek(color_size_bytes + depth_size_bytes, 1) + continue + else: + frame = RGBDFrame(self.file_handle) + data_dict = {} + if export_color: + color = frame.decompress_color(self.color_compression_type) + data_dict["color"] = color + if export_depth: + depth = frame.decompress_depth(self.depth_compression_type) + depth = np.frombuffer(depth, dtype=np.uint16).reshape( + self.depth_height, self.depth_width + ) + data_dict["depth"] = depth + if export_pose: + pose = frame.camera_to_world + data_dict["pose"] = pose + yield data_dict + + def __del__(self): + self.file_handle.close() + + +def ray_distance_to_z_depth(ray_depth, K): + height, width = ray_depth.shape + + u = np.arange(width) + v = np.arange(height) + u_grid, v_grid = np.meshgrid(u, v) + + fx = K[0, 0] + fy = K[1, 1] + cx = K[0, 2] + cy = K[1, 2] + + u_norm = (u_grid - cx) / fx + v_norm = (v_grid - cy) / fy + + norm_square = u_norm**2 + v_norm**2 + + z_depth = ray_depth / np.sqrt(norm_square + 1) + return z_depth + + +def center_crop(image, crop_ratio=1.0, patch_size=None): + if len(image.shape) == 2: + height, width = image.shape + elif len(image.shape) == 3: + height, width, _ = image.shape + else: + raise ValueError("Invalid image shape") + if patch_size is not None: + crop_h = int(height * crop_ratio // patch_size * patch_size) + crop_w = int(width * crop_ratio // patch_size * patch_size) + else: + crop_h = int(height * crop_ratio) + crop_w = int(width * crop_ratio) + + # Calculate the cropping box + start_h = (height - crop_h) // 2 + start_w = (width - crop_w) // 2 + + # Perform the center crop + cropped_image = image[start_h : start_h + crop_h, start_w : start_w + crop_w] + + return cropped_image + + +def parsing_scene( + scene_path, + output_root, + split, + model, + frame_skip=20, + grid_size=0.08, + crop_ratio=0.95, + device="cuda", +): + print(f"Parsing scene: {scene_path.name}") + device = torch.device(device) + scene_path = Path(scene_path) + sensor_reader = SensorData(scene_path / f"{scene_path.name}.sens") + mesh = o3d.io.read_triangle_mesh( + str(scene_path / f"{scene_path.name}_vh_clean_2.ply") + ) + transform = torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize( + mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225) + ), + ] + ) + scene_coord = [] + scene_feat = [] + scene_count = [] + for data in tqdm.tqdm( + sensor_reader.export(frame_skip=frame_skip), + total=sensor_reader.num_frames // frame_skip, + ): + height, width = data["depth"].shape + K = sensor_reader.intrinsic_depth[:3, :3] + T = data["pose"] + if np.isnan(T).any() or np.isinf(T).any(): + continue + depth = ct.raycast.mesh_to_depth( + mesh=mesh, K=K, T=np.linalg.inv(T), height=height, width=width + ) + depth = ray_distance_to_z_depth(depth, K) + depth = center_crop(depth, crop_ratio, model.patch_size) + height_, width_ = depth.shape + pixel = np.transpose(np.indices((width_, height_)), (2, 1, 0)) + pixel = pixel.reshape((-1, 2)) + pixel = np.hstack((pixel, np.ones((pixel.shape[0], 1)))) + depth = depth.reshape((-1, 1)) + valid = ~np.isinf(depth).squeeze(-1) + coord = depth[valid] * (np.linalg.inv(K) @ pixel[valid].T).T # coord_camera + coord = coord @ T[:3, :3].T + T[:3, 3] + + color = cv2.resize( + data["color"], (width, height), interpolation=cv2.INTER_LINEAR + ) + color = center_crop(color, crop_ratio, model.patch_size) + with torch.inference_mode(): + color_t = transform(color).unsqueeze(0).to(device) + feat_t = model.forward_features(color_t)["x_norm_patchtokens"] + feat_t = einops.rearrange( + feat_t, "1 (h w) c -> 1 c h w", w=width_ // model.patch_size + ) + feat_t = F.interpolate(feat_t, (height_, width_), mode="bilinear") + feat_t = einops.rearrange(feat_t, "1 c h w -> (h w) c")[valid] + coord_t = torch.tensor(coord, dtype=torch.float32).to(device) + scene_coord.append(coord_t) + scene_feat.append(feat_t) + scene_count.append( + torch.ones(coord_t.shape[0], dtype=torch.long, device=device) + ) + scene_coord = torch.concatenate(scene_coord, dim=0) + scene_feat = torch.concatenate(scene_feat, dim=0) + scene_count = torch.concatenate(scene_count, dim=0) + + # grid sampling + grid_coord = torch.floor_divide(scene_coord, grid_size).to(torch.int32) + grid_coord, cluster = torch.unique( + grid_coord, sorted=True, return_inverse=True, dim=0 + ) + scene_coord = [ + torch_scatter.scatter(scene_coord, cluster, reduce="mean", dim=0) + ] + scene_feat = [ + torch_scatter.scatter(scene_feat, cluster, reduce="sum", dim=0) + ] + scene_count = [ + torch_scatter.scatter(scene_count, cluster, reduce="sum", dim=0) + ] + + # color = color.reshape((-1, 3))[valid] + # pcd = o3d.geometry.PointCloud() + # pcd.points = o3d.utility.Vector3dVector(coord) + # pcd.colors = o3d.utility.Vector3dVector(color / 255) + # o3d.visualization.draw_geometries([pcd]) + + scene_coord = scene_coord[0] + scene_feat = scene_feat[0] / scene_count[0].unsqueeze(-1) + + scene_coord = scene_coord.half().cpu().numpy() + scene_feat = scene_feat.half().cpu().numpy() + np.savez( + Path(output_root) / split / f"{scene_path.name}.npz", + coord=scene_coord, + feat=scene_feat, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the ScanNet dataset containing scene folders", + ) + parser.add_argument( + "--output_root", + required=True, + help="Output path where train/val folders will be located", + ) + parser.add_argument( + "--scene_list", + required=True, + help="Path to scene list need to process", + ) + parser.add_argument( + "--frame_skip", + default=10, + help="Frame skip for processing", + ) + parser.add_argument( + "--grid_size", + default=0.08, + help="Grid size for sampling", + ) + parser.add_argument( + "--crop_ratio", + default=0.95, + help="Crop ratio for center crop", + ) + + args = parser.parse_args() + scene_list = np.loadtxt(args.scene_list, dtype=str) + if "train" in args.scene_list: + split = "train" + folder = "scans" + elif "val" in args.scene_list: + split = "val" + folder = "scans" + else: + split = "test" + folder = "scans_test" + + os.makedirs(Path(args.output_root) / split, exist_ok=True) + + device = torch.device("cuda") + model = torch.hub.load("facebookresearch/dinov2", "dinov2_vitg14").to(device) + model.eval() + for scene in scene_list: + parsing_scene( + scene_path=Path(args.dataset_root) / folder / scene, + output_root=args.output_root, + split=split, + frame_skip=args.frame_skip, + grid_size=args.grid_size, + crop_ratio=args.crop_ratio, + model=model, + device="cuda", + ) + + # parsing_scene( + # scene_path=Path("/mnt/e/datasets/raw/scannet/scans/scene0230_00"), + # output_root=args.output_root, + # split=split, + # frame_skip=args.frame_skip, + # grid_size=args.grid_size, + # crop_ratio=args.crop_ratio, + # model=model, + # device="cuda", + # ) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/extract_partition.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/extract_partition.py new file mode 100644 index 0000000..432a490 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/extract_partition.py @@ -0,0 +1,71 @@ +import json +import shutil +import argparse +import torch +import glob +import os.path + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the ScanNet dataset containing scene folders", + ) + parser.add_argument( + "--processed_root", + required=True, + help="Path to the processed ScanNet dataset, add partition to test data dict", + ) + parser.add_argument( + "--segmentor_root", + required=True, + help="Path to Felzenswalb and Huttenlocher's Graph Based Image Segmentation binary", + ) + parser.add_argument( + "--split", + default="test", + choices=["test", "val"], + help="Split to process. [test / val]", + ) + config = parser.parse_args() + if config.split == "test": + raw_split = "scans_test" + else: + raw_split = "scans" + + scene_list = glob.glob(os.path.join(config.processed_root, config.split, "*.pth")) + os.makedirs(os.path.join(config.processed_root, "tmp"), exist_ok=True) + + for scene in scene_list: + scene_name = os.path.basename(scene).split(".")[0] + raw_scene = os.path.join( + config.dataset_root, + raw_split, + scene_name, + f"{scene_name}_vh_clean_2.ply", + ) + tmp_scene = os.path.join( + config.processed_root, + "tmp", + f"{scene_name}_vh_clean_2.ply", + ) + # copy original scene to tmp folder + shutil.copy(raw_scene, tmp_scene) + # run segmentor + process = os.popen(f"{config.segmentor_root} {tmp_scene}") + print(process.read()) + process.close() + # load partition file + partition_file = tmp_scene.replace(".ply", ".0.010000.segs.json") + with open(partition_file) as f: + partition = json.load(f)["segIndices"] + data_dict = torch.load(scene) + data_dict["partition"] = partition + torch.save(data_dict, scene) + # clean tmp + os.remove(partition_file) + os.remove(tmp_scene) + print(f"Adding partition information to {scene_name}") + + os.rmdir(os.path.join(config.processed_root, "tmp")) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_constants.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_constants.py new file mode 100644 index 0000000..0404fd6 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_constants.py @@ -0,0 +1,704 @@ +# ScanNet Benchmark constants +VALID_CLASS_IDS_20 = ( + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 14, + 16, + 24, + 28, + 33, + 34, + 36, + 39, +) + +CLASS_LABELS_20 = ( + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refrigerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", +) + +SCANNET_COLOR_MAP_20 = { + 0: (0.0, 0.0, 0.0), + 1: (174.0, 199.0, 232.0), + 2: (152.0, 223.0, 138.0), + 3: (31.0, 119.0, 180.0), + 4: (255.0, 187.0, 120.0), + 5: (188.0, 189.0, 34.0), + 6: (140.0, 86.0, 75.0), + 7: (255.0, 152.0, 150.0), + 8: (214.0, 39.0, 40.0), + 9: (197.0, 176.0, 213.0), + 10: (148.0, 103.0, 189.0), + 11: (196.0, 156.0, 148.0), + 12: (23.0, 190.0, 207.0), + 14: (247.0, 182.0, 210.0), + 15: (66.0, 188.0, 102.0), + 16: (219.0, 219.0, 141.0), + 17: (140.0, 57.0, 197.0), + 18: (202.0, 185.0, 52.0), + 19: (51.0, 176.0, 203.0), + 20: (200.0, 54.0, 131.0), + 21: (92.0, 193.0, 61.0), + 22: (78.0, 71.0, 183.0), + 23: (172.0, 114.0, 82.0), + 24: (255.0, 127.0, 14.0), + 25: (91.0, 163.0, 138.0), + 26: (153.0, 98.0, 156.0), + 27: (140.0, 153.0, 101.0), + 28: (158.0, 218.0, 229.0), + 29: (100.0, 125.0, 154.0), + 30: (178.0, 127.0, 135.0), + 32: (146.0, 111.0, 194.0), + 33: (44.0, 160.0, 44.0), + 34: (112.0, 128.0, 144.0), + 35: (96.0, 207.0, 209.0), + 36: (227.0, 119.0, 194.0), + 37: (213.0, 92.0, 176.0), + 38: (94.0, 106.0, 211.0), + 39: (82.0, 84.0, 163.0), + 40: (100.0, 85.0, 144.0), +} + +# ScanNet200 Benchmark constants +VALID_CLASS_IDS_200 = ( + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 21, + 22, + 23, + 24, + 26, + 27, + 28, + 29, + 31, + 32, + 33, + 34, + 35, + 36, + 38, + 39, + 40, + 41, + 42, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 54, + 55, + 56, + 57, + 58, + 59, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 82, + 84, + 86, + 87, + 88, + 89, + 90, + 93, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 110, + 112, + 115, + 116, + 118, + 120, + 121, + 122, + 125, + 128, + 130, + 131, + 132, + 134, + 136, + 138, + 139, + 140, + 141, + 145, + 148, + 154, + 155, + 156, + 157, + 159, + 161, + 163, + 165, + 166, + 168, + 169, + 170, + 177, + 180, + 185, + 188, + 191, + 193, + 195, + 202, + 208, + 213, + 214, + 221, + 229, + 230, + 232, + 233, + 242, + 250, + 261, + 264, + 276, + 283, + 286, + 300, + 304, + 312, + 323, + 325, + 331, + 342, + 356, + 370, + 392, + 395, + 399, + 408, + 417, + 488, + 540, + 562, + 570, + 572, + 581, + 609, + 748, + 776, + 1156, + 1163, + 1164, + 1165, + 1166, + 1167, + 1168, + 1169, + 1170, + 1171, + 1172, + 1173, + 1174, + 1175, + 1176, + 1178, + 1179, + 1180, + 1181, + 1182, + 1183, + 1184, + 1185, + 1186, + 1187, + 1188, + 1189, + 1190, + 1191, +) + +CLASS_LABELS_200 = ( + "wall", + "chair", + "floor", + "table", + "door", + "couch", + "cabinet", + "shelf", + "desk", + "office chair", + "bed", + "pillow", + "sink", + "picture", + "window", + "toilet", + "bookshelf", + "monitor", + "curtain", + "book", + "armchair", + "coffee table", + "box", + "refrigerator", + "lamp", + "kitchen cabinet", + "towel", + "clothes", + "tv", + "nightstand", + "counter", + "dresser", + "stool", + "cushion", + "plant", + "ceiling", + "bathtub", + "end table", + "dining table", + "keyboard", + "bag", + "backpack", + "toilet paper", + "printer", + "tv stand", + "whiteboard", + "blanket", + "shower curtain", + "trash can", + "closet", + "stairs", + "microwave", + "stove", + "shoe", + "computer tower", + "bottle", + "bin", + "ottoman", + "bench", + "board", + "washing machine", + "mirror", + "copier", + "basket", + "sofa chair", + "file cabinet", + "fan", + "laptop", + "shower", + "paper", + "person", + "paper towel dispenser", + "oven", + "blinds", + "rack", + "plate", + "blackboard", + "piano", + "suitcase", + "rail", + "radiator", + "recycling bin", + "container", + "wardrobe", + "soap dispenser", + "telephone", + "bucket", + "clock", + "stand", + "light", + "laundry basket", + "pipe", + "clothes dryer", + "guitar", + "toilet paper holder", + "seat", + "speaker", + "column", + "bicycle", + "ladder", + "bathroom stall", + "shower wall", + "cup", + "jacket", + "storage bin", + "coffee maker", + "dishwasher", + "paper towel roll", + "machine", + "mat", + "windowsill", + "bar", + "toaster", + "bulletin board", + "ironing board", + "fireplace", + "soap dish", + "kitchen counter", + "doorframe", + "toilet paper dispenser", + "mini fridge", + "fire extinguisher", + "ball", + "hat", + "shower curtain rod", + "water cooler", + "paper cutter", + "tray", + "shower door", + "pillar", + "ledge", + "toaster oven", + "mouse", + "toilet seat cover dispenser", + "furniture", + "cart", + "storage container", + "scale", + "tissue box", + "light switch", + "crate", + "power outlet", + "decoration", + "sign", + "projector", + "closet door", + "vacuum cleaner", + "candle", + "plunger", + "stuffed animal", + "headphones", + "dish rack", + "broom", + "guitar case", + "range hood", + "dustpan", + "hair dryer", + "water bottle", + "handicap bar", + "purse", + "vent", + "shower floor", + "water pitcher", + "mailbox", + "bowl", + "paper bag", + "alarm clock", + "music stand", + "projector screen", + "divider", + "laundry detergent", + "bathroom counter", + "object", + "bathroom vanity", + "closet wall", + "laundry hamper", + "bathroom stall door", + "ceiling light", + "trash bin", + "dumbbell", + "stair rail", + "tube", + "bathroom cabinet", + "cd case", + "closet rod", + "coffee kettle", + "structure", + "shower head", + "keyboard piano", + "case of water bottles", + "coat rack", + "storage organizer", + "folded chair", + "fire alarm", + "power strip", + "calendar", + "poster", + "potted plant", + "luggage", + "mattress", +) + +SCANNET_COLOR_MAP_200 = { + 0: (0.0, 0.0, 0.0), + 1: (174.0, 199.0, 232.0), + 2: (188.0, 189.0, 34.0), + 3: (152.0, 223.0, 138.0), + 4: (255.0, 152.0, 150.0), + 5: (214.0, 39.0, 40.0), + 6: (91.0, 135.0, 229.0), + 7: (31.0, 119.0, 180.0), + 8: (229.0, 91.0, 104.0), + 9: (247.0, 182.0, 210.0), + 10: (91.0, 229.0, 110.0), + 11: (255.0, 187.0, 120.0), + 13: (141.0, 91.0, 229.0), + 14: (112.0, 128.0, 144.0), + 15: (196.0, 156.0, 148.0), + 16: (197.0, 176.0, 213.0), + 17: (44.0, 160.0, 44.0), + 18: (148.0, 103.0, 189.0), + 19: (229.0, 91.0, 223.0), + 21: (219.0, 219.0, 141.0), + 22: (192.0, 229.0, 91.0), + 23: (88.0, 218.0, 137.0), + 24: (58.0, 98.0, 137.0), + 26: (177.0, 82.0, 239.0), + 27: (255.0, 127.0, 14.0), + 28: (237.0, 204.0, 37.0), + 29: (41.0, 206.0, 32.0), + 31: (62.0, 143.0, 148.0), + 32: (34.0, 14.0, 130.0), + 33: (143.0, 45.0, 115.0), + 34: (137.0, 63.0, 14.0), + 35: (23.0, 190.0, 207.0), + 36: (16.0, 212.0, 139.0), + 38: (90.0, 119.0, 201.0), + 39: (125.0, 30.0, 141.0), + 40: (150.0, 53.0, 56.0), + 41: (186.0, 197.0, 62.0), + 42: (227.0, 119.0, 194.0), + 44: (38.0, 100.0, 128.0), + 45: (120.0, 31.0, 243.0), + 46: (154.0, 59.0, 103.0), + 47: (169.0, 137.0, 78.0), + 48: (143.0, 245.0, 111.0), + 49: (37.0, 230.0, 205.0), + 50: (14.0, 16.0, 155.0), + 51: (196.0, 51.0, 182.0), + 52: (237.0, 80.0, 38.0), + 54: (138.0, 175.0, 62.0), + 55: (158.0, 218.0, 229.0), + 56: (38.0, 96.0, 167.0), + 57: (190.0, 77.0, 246.0), + 58: (208.0, 49.0, 84.0), + 59: (208.0, 193.0, 72.0), + 62: (55.0, 220.0, 57.0), + 63: (10.0, 125.0, 140.0), + 64: (76.0, 38.0, 202.0), + 65: (191.0, 28.0, 135.0), + 66: (211.0, 120.0, 42.0), + 67: (118.0, 174.0, 76.0), + 68: (17.0, 242.0, 171.0), + 69: (20.0, 65.0, 247.0), + 70: (208.0, 61.0, 222.0), + 71: (162.0, 62.0, 60.0), + 72: (210.0, 235.0, 62.0), + 73: (45.0, 152.0, 72.0), + 74: (35.0, 107.0, 149.0), + 75: (160.0, 89.0, 237.0), + 76: (227.0, 56.0, 125.0), + 77: (169.0, 143.0, 81.0), + 78: (42.0, 143.0, 20.0), + 79: (25.0, 160.0, 151.0), + 80: (82.0, 75.0, 227.0), + 82: (253.0, 59.0, 222.0), + 84: (240.0, 130.0, 89.0), + 86: (123.0, 172.0, 47.0), + 87: (71.0, 194.0, 133.0), + 88: (24.0, 94.0, 205.0), + 89: (134.0, 16.0, 179.0), + 90: (159.0, 32.0, 52.0), + 93: (213.0, 208.0, 88.0), + 95: (64.0, 158.0, 70.0), + 96: (18.0, 163.0, 194.0), + 97: (65.0, 29.0, 153.0), + 98: (177.0, 10.0, 109.0), + 99: (152.0, 83.0, 7.0), + 100: (83.0, 175.0, 30.0), + 101: (18.0, 199.0, 153.0), + 102: (61.0, 81.0, 208.0), + 103: (213.0, 85.0, 216.0), + 104: (170.0, 53.0, 42.0), + 105: (161.0, 192.0, 38.0), + 106: (23.0, 241.0, 91.0), + 107: (12.0, 103.0, 170.0), + 110: (151.0, 41.0, 245.0), + 112: (133.0, 51.0, 80.0), + 115: (184.0, 162.0, 91.0), + 116: (50.0, 138.0, 38.0), + 118: (31.0, 237.0, 236.0), + 120: (39.0, 19.0, 208.0), + 121: (223.0, 27.0, 180.0), + 122: (254.0, 141.0, 85.0), + 125: (97.0, 144.0, 39.0), + 128: (106.0, 231.0, 176.0), + 130: (12.0, 61.0, 162.0), + 131: (124.0, 66.0, 140.0), + 132: (137.0, 66.0, 73.0), + 134: (250.0, 253.0, 26.0), + 136: (55.0, 191.0, 73.0), + 138: (60.0, 126.0, 146.0), + 139: (153.0, 108.0, 234.0), + 140: (184.0, 58.0, 125.0), + 141: (135.0, 84.0, 14.0), + 145: (139.0, 248.0, 91.0), + 148: (53.0, 200.0, 172.0), + 154: (63.0, 69.0, 134.0), + 155: (190.0, 75.0, 186.0), + 156: (127.0, 63.0, 52.0), + 157: (141.0, 182.0, 25.0), + 159: (56.0, 144.0, 89.0), + 161: (64.0, 160.0, 250.0), + 163: (182.0, 86.0, 245.0), + 165: (139.0, 18.0, 53.0), + 166: (134.0, 120.0, 54.0), + 168: (49.0, 165.0, 42.0), + 169: (51.0, 128.0, 133.0), + 170: (44.0, 21.0, 163.0), + 177: (232.0, 93.0, 193.0), + 180: (176.0, 102.0, 54.0), + 185: (116.0, 217.0, 17.0), + 188: (54.0, 209.0, 150.0), + 191: (60.0, 99.0, 204.0), + 193: (129.0, 43.0, 144.0), + 195: (252.0, 100.0, 106.0), + 202: (187.0, 196.0, 73.0), + 208: (13.0, 158.0, 40.0), + 213: (52.0, 122.0, 152.0), + 214: (128.0, 76.0, 202.0), + 221: (187.0, 50.0, 115.0), + 229: (180.0, 141.0, 71.0), + 230: (77.0, 208.0, 35.0), + 232: (72.0, 183.0, 168.0), + 233: (97.0, 99.0, 203.0), + 242: (172.0, 22.0, 158.0), + 250: (155.0, 64.0, 40.0), + 261: (118.0, 159.0, 30.0), + 264: (69.0, 252.0, 148.0), + 276: (45.0, 103.0, 173.0), + 283: (111.0, 38.0, 149.0), + 286: (184.0, 9.0, 49.0), + 300: (188.0, 174.0, 67.0), + 304: (53.0, 206.0, 53.0), + 312: (97.0, 235.0, 252.0), + 323: (66.0, 32.0, 182.0), + 325: (236.0, 114.0, 195.0), + 331: (241.0, 154.0, 83.0), + 342: (133.0, 240.0, 52.0), + 356: (16.0, 205.0, 144.0), + 370: (75.0, 101.0, 198.0), + 392: (237.0, 95.0, 251.0), + 395: (191.0, 52.0, 49.0), + 399: (227.0, 254.0, 54.0), + 408: (49.0, 206.0, 87.0), + 417: (48.0, 113.0, 150.0), + 488: (125.0, 73.0, 182.0), + 540: (229.0, 32.0, 114.0), + 562: (158.0, 119.0, 28.0), + 570: (60.0, 205.0, 27.0), + 572: (18.0, 215.0, 201.0), + 581: (79.0, 76.0, 153.0), + 609: (134.0, 13.0, 116.0), + 748: (192.0, 97.0, 63.0), + 776: (108.0, 163.0, 18.0), + 1156: (95.0, 220.0, 156.0), + 1163: (98.0, 141.0, 208.0), + 1164: (144.0, 19.0, 193.0), + 1165: (166.0, 36.0, 57.0), + 1166: (212.0, 202.0, 34.0), + 1167: (23.0, 206.0, 34.0), + 1168: (91.0, 211.0, 236.0), + 1169: (79.0, 55.0, 137.0), + 1170: (182.0, 19.0, 117.0), + 1171: (134.0, 76.0, 14.0), + 1172: (87.0, 185.0, 28.0), + 1173: (82.0, 224.0, 187.0), + 1174: (92.0, 110.0, 214.0), + 1175: (168.0, 80.0, 171.0), + 1176: (197.0, 63.0, 51.0), + 1178: (175.0, 199.0, 77.0), + 1179: (62.0, 180.0, 98.0), + 1180: (8.0, 91.0, 150.0), + 1181: (77.0, 15.0, 130.0), + 1182: (154.0, 65.0, 96.0), + 1183: (197.0, 152.0, 11.0), + 1184: (59.0, 155.0, 45.0), + 1185: (12.0, 147.0, 145.0), + 1186: (54.0, 35.0, 219.0), + 1187: (210.0, 73.0, 181.0), + 1188: (221.0, 124.0, 77.0), + 1189: (149.0, 214.0, 66.0), + 1190: (72.0, 185.0, 134.0), + 1191: (42.0, 94.0, 198.0), +} + +# For instance segmentation the non-object categories +VALID_PANOPTIC_IDS = (1, 3) + +CLASS_LABELS_PANOPTIC = ("wall", "floor") diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_splits.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_splits.py new file mode 100644 index 0000000..39ccc3c --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_splits.py @@ -0,0 +1,625 @@ +# This file contains the HEAD - COMMON - TAIL split category ids for ScanNet 200 + +HEAD_CATS_SCANNET_200 = [ + "tv stand", + "curtain", + "blinds", + "shower curtain", + "bookshelf", + "tv", + "kitchen cabinet", + "pillow", + "lamp", + "dresser", + "monitor", + "object", + "ceiling", + "board", + "stove", + "closet wall", + "couch", + "office chair", + "kitchen counter", + "shower", + "closet", + "doorframe", + "sofa chair", + "mailbox", + "nightstand", + "washing machine", + "picture", + "book", + "sink", + "recycling bin", + "table", + "backpack", + "shower wall", + "toilet", + "copier", + "counter", + "stool", + "refrigerator", + "window", + "file cabinet", + "chair", + "wall", + "plant", + "coffee table", + "stairs", + "armchair", + "cabinet", + "bathroom vanity", + "bathroom stall", + "mirror", + "blackboard", + "trash can", + "stair rail", + "box", + "towel", + "door", + "clothes", + "whiteboard", + "bed", + "floor", + "bathtub", + "desk", + "wardrobe", + "clothes dryer", + "radiator", + "shelf", +] +COMMON_CATS_SCANNET_200 = [ + "cushion", + "end table", + "dining table", + "keyboard", + "bag", + "toilet paper", + "printer", + "blanket", + "microwave", + "shoe", + "computer tower", + "bottle", + "bin", + "ottoman", + "bench", + "basket", + "fan", + "laptop", + "person", + "paper towel dispenser", + "oven", + "rack", + "piano", + "suitcase", + "rail", + "container", + "telephone", + "stand", + "light", + "laundry basket", + "pipe", + "seat", + "column", + "bicycle", + "ladder", + "jacket", + "storage bin", + "coffee maker", + "dishwasher", + "machine", + "mat", + "windowsill", + "bulletin board", + "fireplace", + "mini fridge", + "water cooler", + "shower door", + "pillar", + "ledge", + "furniture", + "cart", + "decoration", + "closet door", + "vacuum cleaner", + "dish rack", + "range hood", + "projector screen", + "divider", + "bathroom counter", + "laundry hamper", + "bathroom stall door", + "ceiling light", + "trash bin", + "bathroom cabinet", + "structure", + "storage organizer", + "potted plant", + "mattress", +] +TAIL_CATS_SCANNET_200 = [ + "paper", + "plate", + "soap dispenser", + "bucket", + "clock", + "guitar", + "toilet paper holder", + "speaker", + "cup", + "paper towel roll", + "bar", + "toaster", + "ironing board", + "soap dish", + "toilet paper dispenser", + "fire extinguisher", + "ball", + "hat", + "shower curtain rod", + "paper cutter", + "tray", + "toaster oven", + "mouse", + "toilet seat cover dispenser", + "storage container", + "scale", + "tissue box", + "light switch", + "crate", + "power outlet", + "sign", + "projector", + "candle", + "plunger", + "stuffed animal", + "headphones", + "broom", + "guitar case", + "dustpan", + "hair dryer", + "water bottle", + "handicap bar", + "purse", + "vent", + "shower floor", + "water pitcher", + "bowl", + "paper bag", + "alarm clock", + "music stand", + "laundry detergent", + "dumbbell", + "tube", + "cd case", + "closet rod", + "coffee kettle", + "shower head", + "keyboard piano", + "case of water bottles", + "coat rack", + "folded chair", + "fire alarm", + "power strip", + "calendar", + "poster", + "luggage", +] + + +# Given the different size of the official train and val sets, not all ScanNet200 categories are present in the validation set. +# Here we list of categories with labels and IDs present in both train and validation set, and the remaining categories those are present in train, but not in val +# We dont evaluate on unseen validation categories in this benchmark + +VALID_CLASS_IDS_200_VALIDATION = ( + "wall", + "chair", + "floor", + "table", + "door", + "couch", + "cabinet", + "shelf", + "desk", + "office chair", + "bed", + "pillow", + "sink", + "picture", + "window", + "toilet", + "bookshelf", + "monitor", + "curtain", + "book", + "armchair", + "coffee table", + "box", + "refrigerator", + "lamp", + "kitchen cabinet", + "towel", + "clothes", + "tv", + "nightstand", + "counter", + "dresser", + "stool", + "cushion", + "plant", + "ceiling", + "bathtub", + "end table", + "dining table", + "keyboard", + "bag", + "backpack", + "toilet paper", + "printer", + "tv stand", + "whiteboard", + "blanket", + "shower curtain", + "trash can", + "closet", + "stairs", + "microwave", + "stove", + "shoe", + "computer tower", + "bottle", + "bin", + "ottoman", + "bench", + "board", + "washing machine", + "mirror", + "copier", + "basket", + "sofa chair", + "file cabinet", + "fan", + "laptop", + "shower", + "paper", + "person", + "paper towel dispenser", + "oven", + "blinds", + "rack", + "plate", + "blackboard", + "piano", + "suitcase", + "rail", + "radiator", + "recycling bin", + "container", + "wardrobe", + "soap dispenser", + "telephone", + "bucket", + "clock", + "stand", + "light", + "laundry basket", + "pipe", + "clothes dryer", + "guitar", + "toilet paper holder", + "seat", + "speaker", + "column", + "ladder", + "bathroom stall", + "shower wall", + "cup", + "jacket", + "storage bin", + "coffee maker", + "dishwasher", + "paper towel roll", + "machine", + "mat", + "windowsill", + "bar", + "toaster", + "bulletin board", + "ironing board", + "fireplace", + "soap dish", + "kitchen counter", + "doorframe", + "toilet paper dispenser", + "mini fridge", + "fire extinguisher", + "ball", + "hat", + "shower curtain rod", + "water cooler", + "paper cutter", + "tray", + "shower door", + "pillar", + "ledge", + "toaster oven", + "mouse", + "toilet seat cover dispenser", + "furniture", + "cart", + "scale", + "tissue box", + "light switch", + "crate", + "power outlet", + "decoration", + "sign", + "projector", + "closet door", + "vacuum cleaner", + "plunger", + "stuffed animal", + "headphones", + "dish rack", + "broom", + "range hood", + "dustpan", + "hair dryer", + "water bottle", + "handicap bar", + "vent", + "shower floor", + "water pitcher", + "mailbox", + "bowl", + "paper bag", + "projector screen", + "divider", + "laundry detergent", + "bathroom counter", + "object", + "bathroom vanity", + "closet wall", + "laundry hamper", + "bathroom stall door", + "ceiling light", + "trash bin", + "dumbbell", + "stair rail", + "tube", + "bathroom cabinet", + "closet rod", + "coffee kettle", + "shower head", + "keyboard piano", + "case of water bottles", + "coat rack", + "folded chair", + "fire alarm", + "power strip", + "calendar", + "poster", + "potted plant", + "mattress", +) + +CLASS_LABELS_200_VALIDATION = ( + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 21, + 22, + 23, + 24, + 26, + 27, + 28, + 29, + 31, + 32, + 33, + 34, + 35, + 36, + 38, + 39, + 40, + 41, + 42, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 54, + 55, + 56, + 57, + 58, + 59, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 82, + 84, + 86, + 87, + 88, + 89, + 90, + 93, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 110, + 112, + 115, + 116, + 118, + 120, + 122, + 125, + 128, + 130, + 131, + 132, + 134, + 136, + 138, + 139, + 140, + 141, + 145, + 148, + 154, + 155, + 156, + 157, + 159, + 161, + 163, + 165, + 166, + 168, + 169, + 170, + 177, + 180, + 185, + 188, + 191, + 193, + 195, + 202, + 208, + 213, + 214, + 229, + 230, + 232, + 233, + 242, + 250, + 261, + 264, + 276, + 283, + 300, + 304, + 312, + 323, + 325, + 342, + 356, + 370, + 392, + 395, + 408, + 417, + 488, + 540, + 562, + 570, + 609, + 748, + 776, + 1156, + 1163, + 1164, + 1165, + 1166, + 1167, + 1168, + 1169, + 1170, + 1171, + 1172, + 1173, + 1175, + 1176, + 1179, + 1180, + 1181, + 1182, + 1184, + 1185, + 1186, + 1187, + 1188, + 1189, + 1191, +) + +VALID_CLASS_IDS_200_TRAIN_ONLY = ( + "bicycle", + "storage container", + "candle", + "guitar case", + "purse", + "alarm clock", + "music stand", + "cd case", + "structure", + "storage organizer", + "luggage", +) + +CLASS_LABELS_200_TRAIN_ONLY = ( + 121, + 221, + 286, + 331, + 399, + 572, + 581, + 1174, + 1178, + 1183, + 1190, +) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet_means.npz b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet_means.npz new file mode 100644 index 0000000000000000000000000000000000000000..e57647c9a3553ca4653a9d1e53ed4a2a58def822 GIT binary patch literal 676 zcmWIWW@Zs#fB;2?qMfn(4VV}hK$sIKm{?R4Z=jb~P&wHz)HfiKk)e#CT0JGTIJrpO zNVaPx7@2E}8$<-lIU?aSr=G`p*2`D_wHFi=4Ad6?Z2wg7 zT-4wBkL}%0{@(Oq&0~8dx7n)%|2(tb8@ImvNANfMQmeV|P8j~M=W9QlxM}k%d#067 zUa&{JxBoLix428>qy623b*tSSpV$lOrYR*cKeT7o{Lg!y@3DP|USJPD!yo(h`{xDw z&%CprZTw4G*5JAQt*;YQe#U*W=YO@prfcCV`xdJe0b(aV*c%%g_q#Iuwy*#AW9BTL z)ApCn)_;q8earr|Y)f&z;Y0i57fRxnOMSNohP6OR)Ia;sqByq**NgVcY$r}^ulQi! zH{+X)O!+sExn{*IZ|vWC+dbI9_RT)O$!tdV_b2wkTMovrc=pTwslw+=kseR&J0DEc zt3CM4-oiOsUFyUa`~8c<*;F6ivQO-)nYPC1hW(a21J<3af9!2LeYE%ddt~pgbh)GX z^&@*-e 0 else 0 + label_id200 = labels_pd[labels_pd["raw_category"] == label]["id"] + label_id200 = int(label_id200.iloc[0]) if len(label_id200) > 0 else 0 + + # Only store for the valid categories + if label_id20 in CLASS_IDS20: + label_id20 = CLASS_IDS20.index(label_id20) + else: + label_id20 = IGNORE_INDEX + + if label_id200 in CLASS_IDS200: + label_id200 = CLASS_IDS200.index(label_id200) + else: + label_id200 = IGNORE_INDEX + + # get points, where segment indices (points labelled with segment ids) are in the group segment list + point_idx = np.where(np.isin(seg_indices, group_segments))[0] + return point_idx, label_id20, label_id200 + + +def face_normal(vertex, face): + v01 = vertex[face[:, 1]] - vertex[face[:, 0]] + v02 = vertex[face[:, 2]] - vertex[face[:, 0]] + vec = np.cross(v01, v02) + length = np.sqrt(np.sum(vec**2, axis=1, keepdims=True)) + 1.0e-8 + nf = vec / length + area = length * 0.5 + return nf, area + + +def vertex_normal(vertex, face): + nf, area = face_normal(vertex, face) + nf = nf * area + + nv = np.zeros_like(vertex) + for i in range(face.shape[0]): + nv[face[i]] += nf[i] + + length = np.sqrt(np.sum(nv**2, axis=1, keepdims=True)) + 1.0e-8 + nv = nv / length + return nv + + +def handle_process( + scene_path, output_path, labels_pd, train_scenes, val_scenes, parse_normals=True +): + scene_id = os.path.basename(scene_path) + mesh_path = os.path.join(scene_path, f"{scene_id}{CLOUD_FILE_PFIX}.ply") + segments_file = os.path.join( + scene_path, f"{scene_id}{CLOUD_FILE_PFIX}{SEGMENTS_FILE_PFIX}" + ) + aggregations_file = os.path.join(scene_path, f"{scene_id}{AGGREGATIONS_FILE_PFIX}") + info_file = os.path.join(scene_path, f"{scene_id}.txt") + + if scene_id in train_scenes: + output_path = os.path.join(output_path, "train", f"{scene_id}") + split_name = "train" + elif scene_id in val_scenes: + output_path = os.path.join(output_path, "val", f"{scene_id}") + split_name = "val" + else: + output_path = os.path.join(output_path, "test", f"{scene_id}") + split_name = "test" + + print(f"Processing: {scene_id} in {split_name}") + + vertices, faces = read_plymesh(mesh_path) + coords = vertices[:, :3] + colors = vertices[:, 3:6] + save_dict = dict( + coord=coords.astype(np.float32), + color=colors.astype(np.uint8), + ) + + # # Rotating the mesh to axis aligned + # info_dict = {} + # with open(info_file) as f: + # for line in f: + # (key, val) = line.split(" = ") + # info_dict[key] = np.fromstring(val, sep=' ') + # + # if 'axisAlignment' not in info_dict: + # rot_matrix = np.identity(4) + # else: + # rot_matrix = info_dict['axisAlignment'].reshape(4, 4) + # r_coords = coords.transpose() + # r_coords = np.append(r_coords, np.ones((1, r_coords.shape[1])), axis=0) + # r_coords = np.dot(rot_matrix, r_coords) + # coords = r_coords + + # Parse Normals + if parse_normals: + save_dict["normal"] = vertex_normal(coords, faces).astype(np.float32) + + # Load segments file + if split_name != "test": + with open(segments_file) as f: + segments = json.load(f) + seg_indices = np.array(segments["segIndices"]) + + # Load Aggregations file + with open(aggregations_file) as f: + aggregation = json.load(f) + seg_groups = np.array(aggregation["segGroups"]) + + # Generate new labels + semantic_gt20 = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX + semantic_gt200 = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX + instance_ids = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX + for group in seg_groups: + point_idx, label_id20, label_id200 = point_indices_from_group( + seg_indices, group, labels_pd + ) + + semantic_gt20[point_idx] = label_id20 + semantic_gt200[point_idx] = label_id200 + instance_ids[point_idx] = group["id"] + + semantic_gt20 = semantic_gt20.astype(int) + semantic_gt200 = semantic_gt200.astype(int) + instance_ids = instance_ids.astype(int) + + save_dict["segment20"] = semantic_gt20 + save_dict["segment200"] = semantic_gt200 + save_dict["instance"] = instance_ids + + # Concatenate with original cloud + processed_vertices = np.hstack((semantic_gt200, instance_ids)) + + if np.any(np.isnan(processed_vertices)) or not np.all( + np.isfinite(processed_vertices) + ): + raise ValueError(f"Find NaN in Scene: {scene_id}") + + # Save processed data + os.makedirs(output_path, exist_ok=True) + for key in save_dict.keys(): + np.save(os.path.join(output_path, f"{key}.npy"), save_dict[key]) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the ScanNet dataset containing scene folders", + ) + parser.add_argument( + "--output_root", + required=True, + help="Output path where train/val folders will be located", + ) + parser.add_argument( + "--parse_normals", default=True, type=bool, help="Whether parse point normals" + ) + parser.add_argument( + "--num_workers", + default=mp.cpu_count(), + type=int, + help="Num workers for preprocessing.", + ) + config = parser.parse_args() + meta_root = Path(os.path.dirname(__file__)) / "meta_data" + + # Load label map + labels_pd = pd.read_csv( + meta_root / "scannetv2-labels.combined.tsv", + sep="\t", + header=0, + ) + + # Load train/val splits + with open(meta_root / "scannetv2_train.txt") as train_file: + train_scenes = train_file.read().splitlines() + with open(meta_root / "scannetv2_val.txt") as val_file: + val_scenes = val_file.read().splitlines() + + # Create output directories + train_output_dir = os.path.join(config.output_root, "train") + os.makedirs(train_output_dir, exist_ok=True) + val_output_dir = os.path.join(config.output_root, "val") + os.makedirs(val_output_dir, exist_ok=True) + test_output_dir = os.path.join(config.output_root, "test") + os.makedirs(test_output_dir, exist_ok=True) + + # Load scene paths + scene_paths = sorted(glob.glob(config.dataset_root + "/scans*/scene*")) + + # Preprocess data. + print("Processing scenes...") + pool = ProcessPoolExecutor(max_workers=config.num_workers) + _ = list( + pool.map( + handle_process, + scene_paths, + repeat(config.output_root), + repeat(labels_pd), + repeat(train_scenes), + repeat(val_scenes), + repeat(config.parse_normals), + ) + ) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/scannet.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/scannet.py new file mode 100644 index 0000000..35d4606 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/scannet.py @@ -0,0 +1,118 @@ +""" +ScanNet20 / ScanNet200 / ScanNet Data Efficient Dataset + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import glob +import numpy as np +import torch +from copy import deepcopy +from torch.utils.data import Dataset +from collections.abc import Sequence + +from pointcept.utils.logger import get_root_logger +from pointcept.utils.cache import shared_dict +from .builder import DATASETS +from .defaults import DefaultDataset +from .transform import Compose, TRANSFORMS +from .preprocessing.scannet.meta_data.scannet200_constants import ( + VALID_CLASS_IDS_20, + VALID_CLASS_IDS_200, +) + + +@DATASETS.register_module() +class ScanNetDataset(DefaultDataset): + VALID_ASSETS = [ + "coord", + "color", + "normal", + "segment20", + "instance", + ] + class2id = np.array(VALID_CLASS_IDS_20) + + def __init__( + self, + lr_file=None, + la_file=None, + **kwargs, + ): + self.lr = np.loadtxt(lr_file, dtype=str) if lr_file is not None else None + self.la = torch.load(la_file) if la_file is not None else None + super().__init__(**kwargs) + + def get_data_list(self): + if self.lr is None: + data_list = super().get_data_list() + else: + data_list = [ + os.path.join(self.data_root, "train", name) for name in self.lr + ] + return data_list + + def get_data(self, idx): + data_path = self.data_list[idx % len(self.data_list)] + name = self.get_data_name(idx) + split = self.get_split_name(idx) + if self.cache: + cache_name = f"pointcept-{name}" + return shared_dict(cache_name) + + data_dict = {} + assets = os.listdir(data_path) + for asset in assets: + if not asset.endswith(".npy"): + continue + if asset[:-4] not in self.VALID_ASSETS: + continue + data_dict[asset[:-4]] = np.load(os.path.join(data_path, asset)) + data_dict["name"] = name + data_dict["split"] = split + data_dict["coord"] = data_dict["coord"].astype(np.float32) + data_dict["color"] = data_dict["color"].astype(np.float32) + data_dict["normal"] = data_dict["normal"].astype(np.float32) + + if "segment20" in data_dict.keys(): + data_dict["segment"] = ( + data_dict.pop("segment20").reshape([-1]).astype(np.int32) + ) + elif "segment200" in data_dict.keys(): + data_dict["segment"] = ( + data_dict.pop("segment200").reshape([-1]).astype(np.int32) + ) + else: + data_dict["segment"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + + if "instance" in data_dict.keys(): + data_dict["instance"] = ( + data_dict.pop("instance").reshape([-1]).astype(np.int32) + ) + else: + data_dict["instance"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + if self.la: + sampled_index = self.la[self.get_data_name(idx)] + mask = np.ones_like(data_dict["segment"], dtype=bool) + mask[sampled_index] = False + data_dict["segment"][mask] = self.ignore_index + data_dict["sampled_index"] = sampled_index + return data_dict + + +@DATASETS.register_module() +class ScanNet200Dataset(ScanNetDataset): + VALID_ASSETS = [ + "coord", + "color", + "normal", + "segment200", + "instance", + ] + class2id = np.array(VALID_CLASS_IDS_200) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/transform.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/transform.py new file mode 100644 index 0000000..8073c76 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/transform.py @@ -0,0 +1,1457 @@ +""" +3D point cloud augmentation + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Yujia Zhang (yujia.zhang.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import random +import numbers +import scipy +import scipy.ndimage +import scipy.interpolate +import scipy.stats +import numpy as np +import torch +from torchvision import transforms +import copy +from collections.abc import Sequence, Mapping +from pointcept.utils.registry import Registry + +TRANSFORMS = Registry("transforms") + + +def index_operator(data_dict, index, duplicate=False): + # index selection operator for keys in "index_valid_keys" + # custom these keys by "Update" transform in config + if "index_valid_keys" not in data_dict: + data_dict["index_valid_keys"] = [ + "coord", + "color", + "normal", + "superpoint", + "strength", + "segment", + "instance", + ] + if not duplicate: + for key in data_dict["index_valid_keys"]: + if key in data_dict: + data_dict[key] = data_dict[key][index] + return data_dict + else: + data_dict_ = dict() + for key in data_dict.keys(): + if key in data_dict["index_valid_keys"]: + data_dict_[key] = data_dict[key][index] + elif key == "index_valid_keys": + data_dict_[key] = copy.copy(data_dict[key]) + else: + data_dict_[key] = data_dict[key] + return data_dict_ + + +@TRANSFORMS.register_module() +class Collect(object): + def __init__(self, keys, offset_keys_dict=None, **kwargs): + """ + e.g. Collect(keys=[coord], feat_keys=[coord, color]) + """ + if offset_keys_dict is None: + offset_keys_dict = dict(offset="coord") + self.keys = keys + self.offset_keys = offset_keys_dict + self.kwargs = kwargs + + def __call__(self, data_dict): + data = dict() + if isinstance(self.keys, str): + self.keys = [self.keys] + for key in self.keys: + data[key] = data_dict[key] + for key, value in self.offset_keys.items(): + data[key] = torch.tensor([data_dict[value].shape[0]]) + for name, keys in self.kwargs.items(): + name = name.replace("_keys", "") + assert isinstance(keys, Sequence) + data[name] = torch.cat([data_dict[key].float() for key in keys], dim=1) + return data + + +@TRANSFORMS.register_module() +class Copy(object): + def __init__(self, keys_dict=None): + if keys_dict is None: + keys_dict = dict(coord="origin_coord", segment="origin_segment") + self.keys_dict = keys_dict + + def __call__(self, data_dict): + for key, value in self.keys_dict.items(): + if isinstance(data_dict[key], np.ndarray): + data_dict[value] = data_dict[key].copy() + elif isinstance(data_dict[key], torch.Tensor): + data_dict[value] = data_dict[key].clone().detach() + else: + data_dict[value] = copy.deepcopy(data_dict[key]) + return data_dict + + +@TRANSFORMS.register_module() +class Update(object): + def __init__(self, keys_dict=None): + if keys_dict is None: + keys_dict = dict() + self.keys_dict = keys_dict + + def __call__(self, data_dict): + for key, value in self.keys_dict.items(): + data_dict[key] = value + return data_dict + + +@TRANSFORMS.register_module() +class ToTensor(object): + def __call__(self, data): + if isinstance(data, torch.Tensor): + return data + elif isinstance(data, str): + # note that str is also a kind of sequence, judgement should before sequence + return data + elif isinstance(data, int): + return torch.LongTensor([data]) + elif isinstance(data, float): + return torch.FloatTensor([data]) + elif isinstance(data, np.ndarray) and np.issubdtype(data.dtype, bool): + return torch.from_numpy(data) + elif isinstance(data, np.ndarray) and np.issubdtype(data.dtype, np.integer): + return torch.from_numpy(data).long() + elif isinstance(data, np.ndarray) and np.issubdtype(data.dtype, np.floating): + return torch.from_numpy(data).float() + elif isinstance(data, Mapping): + result = {sub_key: self(item) for sub_key, item in data.items()} + return result + elif isinstance(data, Sequence): + result = [self(item) for item in data] + return result + else: + raise TypeError(f"type {type(data)} cannot be converted to tensor.") + + +@TRANSFORMS.register_module() +class NormalizeColor(object): + def __call__(self, data_dict): + if "color" in data_dict.keys(): + data_dict["color"] = data_dict["color"] / 255 + return data_dict + + +@TRANSFORMS.register_module() +class NormalizeCoord(object): + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + # modified from pointnet2 + centroid = np.mean(data_dict["coord"], axis=0) + data_dict["coord"] -= centroid + m = np.max(np.sqrt(np.sum(data_dict["coord"] ** 2, axis=1))) + data_dict["coord"] = data_dict["coord"] / m + return data_dict + + +@TRANSFORMS.register_module() +class PositiveShift(object): + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + coord_min = np.min(data_dict["coord"], 0) + data_dict["coord"] -= coord_min + return data_dict + + +@TRANSFORMS.register_module() +class CenterShift(object): + def __init__(self, apply_z=True): + self.apply_z = apply_z + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + x_min, y_min, z_min = data_dict["coord"].min(axis=0) + x_max, y_max, _ = data_dict["coord"].max(axis=0) + if self.apply_z: + shift = [(x_min + x_max) / 2, (y_min + y_max) / 2, z_min] + else: + shift = [(x_min + x_max) / 2, (y_min + y_max) / 2, 0] + data_dict["coord"] -= shift + return data_dict + + +@TRANSFORMS.register_module() +class RandomShift(object): + def __init__(self, shift=((-0.2, 0.2), (-0.2, 0.2), (0, 0))): + self.shift = shift + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + shift_x = np.random.uniform(self.shift[0][0], self.shift[0][1]) + shift_y = np.random.uniform(self.shift[1][0], self.shift[1][1]) + shift_z = np.random.uniform(self.shift[2][0], self.shift[2][1]) + data_dict["coord"] += [shift_x, shift_y, shift_z] + return data_dict + + +@TRANSFORMS.register_module() +class PointClip(object): + def __init__(self, point_cloud_range=(-80, -80, -3, 80, 80, 1)): + self.point_cloud_range = point_cloud_range + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + data_dict["coord"] = np.clip( + data_dict["coord"], + a_min=self.point_cloud_range[:3], + a_max=self.point_cloud_range[3:], + ) + return data_dict + + +@TRANSFORMS.register_module() +class RandomDropout(object): + def __init__(self, dropout_ratio=0.2, dropout_application_ratio=0.5): + """ + upright_axis: axis index among x,y,z, i.e. 2 for z + """ + self.dropout_ratio = dropout_ratio + self.dropout_application_ratio = dropout_application_ratio + + def __call__(self, data_dict): + if random.random() < self.dropout_application_ratio: + n = len(data_dict["coord"]) + idx = np.random.choice(n, int(n * (1 - self.dropout_ratio)), replace=False) + if "sampled_index" in data_dict: + # for ScanNet data efficient, we need to make sure labeled point is sampled. + idx = np.unique(np.append(idx, data_dict["sampled_index"])) + mask = np.zeros_like(data_dict["segment"]).astype(bool) + mask[data_dict["sampled_index"]] = True + data_dict["sampled_index"] = np.where(mask[idx])[0] + data_dict = index_operator(data_dict, idx) + return data_dict + + +@TRANSFORMS.register_module() +class RandomRotate(object): + def __init__(self, angle=None, center=None, axis="z", always_apply=False, p=0.5): + self.angle = [-1, 1] if angle is None else angle + self.axis = axis + self.always_apply = always_apply + self.p = p if not self.always_apply else 1 + self.center = center + + def __call__(self, data_dict): + if random.random() > self.p: + return data_dict + angle = np.random.uniform(self.angle[0], self.angle[1]) * np.pi + rot_cos, rot_sin = np.cos(angle), np.sin(angle) + if self.axis == "x": + rot_t = np.array([[1, 0, 0], [0, rot_cos, -rot_sin], [0, rot_sin, rot_cos]]) + elif self.axis == "y": + rot_t = np.array([[rot_cos, 0, rot_sin], [0, 1, 0], [-rot_sin, 0, rot_cos]]) + elif self.axis == "z": + rot_t = np.array([[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0], [0, 0, 1]]) + else: + raise NotImplementedError + if "coord" in data_dict.keys(): + if self.center is None: + x_min, y_min, z_min = data_dict["coord"].min(axis=0) + x_max, y_max, z_max = data_dict["coord"].max(axis=0) + center = [(x_min + x_max) / 2, (y_min + y_max) / 2, (z_min + z_max) / 2] + else: + center = self.center + data_dict["coord"] -= center + data_dict["coord"] = np.dot(data_dict["coord"], np.transpose(rot_t)) + data_dict["coord"] += center + if "normal" in data_dict.keys(): + data_dict["normal"] = np.dot(data_dict["normal"], np.transpose(rot_t)) + return data_dict + + +@TRANSFORMS.register_module() +class RandomRotateTargetAngle(object): + def __init__( + self, angle=(1 / 2, 1, 3 / 2), center=None, axis="z", always_apply=False, p=0.75 + ): + self.angle = angle + self.axis = axis + self.always_apply = always_apply + self.p = p if not self.always_apply else 1 + self.center = center + + def __call__(self, data_dict): + if random.random() > self.p: + return data_dict + angle = np.random.choice(self.angle) * np.pi + rot_cos, rot_sin = np.cos(angle), np.sin(angle) + if self.axis == "x": + rot_t = np.array([[1, 0, 0], [0, rot_cos, -rot_sin], [0, rot_sin, rot_cos]]) + elif self.axis == "y": + rot_t = np.array([[rot_cos, 0, rot_sin], [0, 1, 0], [-rot_sin, 0, rot_cos]]) + elif self.axis == "z": + rot_t = np.array([[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0], [0, 0, 1]]) + else: + raise NotImplementedError + if "coord" in data_dict.keys(): + if self.center is None: + x_min, y_min, z_min = data_dict["coord"].min(axis=0) + x_max, y_max, z_max = data_dict["coord"].max(axis=0) + center = [(x_min + x_max) / 2, (y_min + y_max) / 2, (z_min + z_max) / 2] + else: + center = self.center + data_dict["coord"] -= center + data_dict["coord"] = np.dot(data_dict["coord"], np.transpose(rot_t)) + data_dict["coord"] += center + if "normal" in data_dict.keys(): + data_dict["normal"] = np.dot(data_dict["normal"], np.transpose(rot_t)) + return data_dict + + +@TRANSFORMS.register_module() +class RandomScale(object): + def __init__(self, scale=None, anisotropic=False): + self.scale = scale if scale is not None else [0.95, 1.05] + self.anisotropic = anisotropic + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + scale = np.random.uniform( + self.scale[0], self.scale[1], 3 if self.anisotropic else 1 + ) + data_dict["coord"] *= scale + return data_dict + + +@TRANSFORMS.register_module() +class RandomFlip(object): + def __init__(self, p=0.5): + self.p = p + + def __call__(self, data_dict): + if np.random.rand() < self.p: + if "coord" in data_dict.keys(): + data_dict["coord"][:, 0] = -data_dict["coord"][:, 0] + if "normal" in data_dict.keys(): + data_dict["normal"][:, 0] = -data_dict["normal"][:, 0] + if np.random.rand() < self.p: + if "coord" in data_dict.keys(): + data_dict["coord"][:, 1] = -data_dict["coord"][:, 1] + if "normal" in data_dict.keys(): + data_dict["normal"][:, 1] = -data_dict["normal"][:, 1] + return data_dict + + +@TRANSFORMS.register_module() +class RandomJitter(object): + def __init__(self, sigma=0.01, clip=0.05): + assert clip > 0 + self.sigma = sigma + self.clip = clip + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + jitter = np.clip( + self.sigma * np.random.randn(data_dict["coord"].shape[0], 3), + -self.clip, + self.clip, + ) + data_dict["coord"] += jitter + return data_dict + + +@TRANSFORMS.register_module() +class ClipGaussianJitter(object): + def __init__(self, scalar=0.02, store_jitter=False): + self.scalar = scalar + self.mean = np.mean(3) + self.cov = np.identity(3) + self.quantile = 1.96 + self.store_jitter = store_jitter + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + jitter = np.random.multivariate_normal( + self.mean, self.cov, data_dict["coord"].shape[0] + ) + jitter = self.scalar * np.clip(jitter / 1.96, -1, 1) + data_dict["coord"] += jitter + if self.store_jitter: + data_dict["jitter"] = jitter + return data_dict + + +@TRANSFORMS.register_module() +class ChromaticAutoContrast(object): + def __init__(self, p=0.2, blend_factor=None): + self.p = p + self.blend_factor = blend_factor + + def __call__(self, data_dict): + if "color" in data_dict.keys() and np.random.rand() < self.p: + lo = np.min(data_dict["color"], 0, keepdims=True) + hi = np.max(data_dict["color"], 0, keepdims=True) + scale = 255 / (hi - lo) + contrast_feat = (data_dict["color"][:, :3] - lo) * scale + blend_factor = ( + np.random.rand() if self.blend_factor is None else self.blend_factor + ) + data_dict["color"][:, :3] = (1 - blend_factor) * data_dict["color"][ + :, :3 + ] + blend_factor * contrast_feat + return data_dict + + +@TRANSFORMS.register_module() +class ChromaticTranslation(object): + def __init__(self, p=0.95, ratio=0.05): + self.p = p + self.ratio = ratio + + def __call__(self, data_dict): + if "color" in data_dict.keys() and np.random.rand() < self.p: + tr = (np.random.rand(1, 3) - 0.5) * 255 * 2 * self.ratio + data_dict["color"][:, :3] = np.clip(tr + data_dict["color"][:, :3], 0, 255) + return data_dict + + +@TRANSFORMS.register_module() +class ChromaticJitter(object): + def __init__(self, p=0.95, std=0.005): + self.p = p + self.std = std + + def __call__(self, data_dict): + if "color" in data_dict.keys() and np.random.rand() < self.p: + noise = np.random.randn(data_dict["color"].shape[0], 3) + noise *= self.std * 255 + data_dict["color"][:, :3] = np.clip( + noise + data_dict["color"][:, :3], 0, 255 + ) + return data_dict + + +@TRANSFORMS.register_module() +class RandomColorGrayScale(object): + def __init__(self, p): + self.p = p + + @staticmethod + def rgb_to_grayscale(color, num_output_channels=1): + if color.shape[-1] < 3: + raise TypeError( + "Input color should have at least 3 dimensions, but found {}".format( + color.shape[-1] + ) + ) + + if num_output_channels not in (1, 3): + raise ValueError("num_output_channels should be either 1 or 3") + + r, g, b = color[..., 0], color[..., 1], color[..., 2] + gray = (0.2989 * r + 0.587 * g + 0.114 * b).astype(color.dtype) + gray = np.expand_dims(gray, axis=-1) + + if num_output_channels == 3: + gray = np.broadcast_to(gray, color.shape) + + return gray + + def __call__(self, data_dict): + if np.random.rand() < self.p: + data_dict["color"] = self.rgb_to_grayscale(data_dict["color"], 3) + return data_dict + + +@TRANSFORMS.register_module() +class RandomColorJitter(object): + """ + Random Color Jitter for 3D point cloud (refer torchvision) + """ + + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, p=0.95): + self.brightness = self._check_input(brightness, "brightness") + self.contrast = self._check_input(contrast, "contrast") + self.saturation = self._check_input(saturation, "saturation") + self.hue = self._check_input( + hue, "hue", center=0, bound=(-0.5, 0.5), clip_first_on_zero=False + ) + self.p = p + + @staticmethod + def _check_input( + value, name, center=1, bound=(0, float("inf")), clip_first_on_zero=True + ): + if isinstance(value, numbers.Number): + if value < 0: + raise ValueError( + "If {} is a single number, it must be non negative.".format(name) + ) + value = [center - float(value), center + float(value)] + if clip_first_on_zero: + value[0] = max(value[0], 0.0) + elif isinstance(value, (tuple, list)) and len(value) == 2: + if not bound[0] <= value[0] <= value[1] <= bound[1]: + raise ValueError("{} values should be between {}".format(name, bound)) + else: + raise TypeError( + "{} should be a single number or a list/tuple with length 2.".format( + name + ) + ) + + # if value is 0 or (1., 1.) for brightness/contrast/saturation + # or (0., 0.) for hue, do nothing + if value[0] == value[1] == center: + value = None + return value + + @staticmethod + def blend(color1, color2, ratio): + ratio = float(ratio) + bound = 255.0 + return ( + (ratio * color1 + (1.0 - ratio) * color2) + .clip(0, bound) + .astype(color1.dtype) + ) + + @staticmethod + def rgb2hsv(rgb): + r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2] + maxc = np.max(rgb, axis=-1) + minc = np.min(rgb, axis=-1) + eqc = maxc == minc + cr = maxc - minc + s = cr / (np.ones_like(maxc) * eqc + maxc * (1 - eqc)) + cr_divisor = np.ones_like(maxc) * eqc + cr * (1 - eqc) + rc = (maxc - r) / cr_divisor + gc = (maxc - g) / cr_divisor + bc = (maxc - b) / cr_divisor + + hr = (maxc == r) * (bc - gc) + hg = ((maxc == g) & (maxc != r)) * (2.0 + rc - bc) + hb = ((maxc != g) & (maxc != r)) * (4.0 + gc - rc) + h = hr + hg + hb + h = (h / 6.0 + 1.0) % 1.0 + return np.stack((h, s, maxc), axis=-1) + + @staticmethod + def hsv2rgb(hsv): + h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2] + i = np.floor(h * 6.0) + f = (h * 6.0) - i + i = i.astype(np.int32) + + p = np.clip((v * (1.0 - s)), 0.0, 1.0) + q = np.clip((v * (1.0 - s * f)), 0.0, 1.0) + t = np.clip((v * (1.0 - s * (1.0 - f))), 0.0, 1.0) + i = i % 6 + mask = np.expand_dims(i, axis=-1) == np.arange(6) + + a1 = np.stack((v, q, p, p, t, v), axis=-1) + a2 = np.stack((t, v, v, q, p, p), axis=-1) + a3 = np.stack((p, p, t, v, v, q), axis=-1) + a4 = np.stack((a1, a2, a3), axis=-1) + + return np.einsum("...na, ...nab -> ...nb", mask.astype(hsv.dtype), a4) + + def adjust_brightness(self, color, brightness_factor): + if brightness_factor < 0: + raise ValueError( + "brightness_factor ({}) is not non-negative.".format(brightness_factor) + ) + + return self.blend(color, np.zeros_like(color), brightness_factor) + + def adjust_contrast(self, color, contrast_factor): + if contrast_factor < 0: + raise ValueError( + "contrast_factor ({}) is not non-negative.".format(contrast_factor) + ) + mean = np.mean(RandomColorGrayScale.rgb_to_grayscale(color)) + return self.blend(color, mean, contrast_factor) + + def adjust_saturation(self, color, saturation_factor): + if saturation_factor < 0: + raise ValueError( + "saturation_factor ({}) is not non-negative.".format(saturation_factor) + ) + gray = RandomColorGrayScale.rgb_to_grayscale(color) + return self.blend(color, gray, saturation_factor) + + def adjust_hue(self, color, hue_factor): + if not (-0.5 <= hue_factor <= 0.5): + raise ValueError( + "hue_factor ({}) is not in [-0.5, 0.5].".format(hue_factor) + ) + orig_dtype = color.dtype + hsv = self.rgb2hsv(color / 255.0) + h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2] + h = (h + hue_factor) % 1.0 + hsv = np.stack((h, s, v), axis=-1) + color_hue_adj = (self.hsv2rgb(hsv) * 255.0).astype(orig_dtype) + return color_hue_adj + + @staticmethod + def get_params(brightness, contrast, saturation, hue): + fn_idx = torch.randperm(4) + b = ( + None + if brightness is None + else np.random.uniform(brightness[0], brightness[1]) + ) + c = None if contrast is None else np.random.uniform(contrast[0], contrast[1]) + s = ( + None + if saturation is None + else np.random.uniform(saturation[0], saturation[1]) + ) + h = None if hue is None else np.random.uniform(hue[0], hue[1]) + return fn_idx, b, c, s, h + + def __call__(self, data_dict): + ( + fn_idx, + brightness_factor, + contrast_factor, + saturation_factor, + hue_factor, + ) = self.get_params(self.brightness, self.contrast, self.saturation, self.hue) + + for fn_id in fn_idx: + if ( + fn_id == 0 + and brightness_factor is not None + and np.random.rand() < self.p + ): + data_dict["color"] = self.adjust_brightness( + data_dict["color"], brightness_factor + ) + elif ( + fn_id == 1 and contrast_factor is not None and np.random.rand() < self.p + ): + data_dict["color"] = self.adjust_contrast( + data_dict["color"], contrast_factor + ) + elif ( + fn_id == 2 + and saturation_factor is not None + and np.random.rand() < self.p + ): + data_dict["color"] = self.adjust_saturation( + data_dict["color"], saturation_factor + ) + elif fn_id == 3 and hue_factor is not None and np.random.rand() < self.p: + data_dict["color"] = self.adjust_hue(data_dict["color"], hue_factor) + return data_dict + + +@TRANSFORMS.register_module() +class HueSaturationTranslation(object): + @staticmethod + def rgb_to_hsv(rgb): + # Translated from source of colorsys.rgb_to_hsv + # r,g,b should be a numpy arrays with values between 0 and 255 + # rgb_to_hsv returns an array of floats between 0.0 and 1.0. + rgb = rgb.astype("float") + hsv = np.zeros_like(rgb) + # in case an RGBA array was passed, just copy the A channel + hsv[..., 3:] = rgb[..., 3:] + r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2] + maxc = np.max(rgb[..., :3], axis=-1) + minc = np.min(rgb[..., :3], axis=-1) + hsv[..., 2] = maxc + mask = maxc != minc + hsv[mask, 1] = (maxc - minc)[mask] / maxc[mask] + rc = np.zeros_like(r) + gc = np.zeros_like(g) + bc = np.zeros_like(b) + rc[mask] = (maxc - r)[mask] / (maxc - minc)[mask] + gc[mask] = (maxc - g)[mask] / (maxc - minc)[mask] + bc[mask] = (maxc - b)[mask] / (maxc - minc)[mask] + hsv[..., 0] = np.select( + [r == maxc, g == maxc], [bc - gc, 2.0 + rc - bc], default=4.0 + gc - rc + ) + hsv[..., 0] = (hsv[..., 0] / 6.0) % 1.0 + return hsv + + @staticmethod + def hsv_to_rgb(hsv): + # Translated from source of colorsys.hsv_to_rgb + # h,s should be a numpy arrays with values between 0.0 and 1.0 + # v should be a numpy array with values between 0.0 and 255.0 + # hsv_to_rgb returns an array of uints between 0 and 255. + rgb = np.empty_like(hsv) + rgb[..., 3:] = hsv[..., 3:] + h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2] + i = (h * 6.0).astype("uint8") + f = (h * 6.0) - i + p = v * (1.0 - s) + q = v * (1.0 - s * f) + t = v * (1.0 - s * (1.0 - f)) + i = i % 6 + conditions = [s == 0.0, i == 1, i == 2, i == 3, i == 4, i == 5] + rgb[..., 0] = np.select(conditions, [v, q, p, p, t, v], default=v) + rgb[..., 1] = np.select(conditions, [v, v, v, q, p, p], default=t) + rgb[..., 2] = np.select(conditions, [v, p, t, v, v, q], default=p) + return rgb.astype("uint8") + + def __init__(self, hue_max=0.5, saturation_max=0.2): + self.hue_max = hue_max + self.saturation_max = saturation_max + + def __call__(self, data_dict): + if "color" in data_dict.keys(): + # Assume color[:, :3] is rgb + hsv = HueSaturationTranslation.rgb_to_hsv(data_dict["color"][:, :3]) + hue_val = (np.random.rand() - 0.5) * 2 * self.hue_max + sat_ratio = 1 + (np.random.rand() - 0.5) * 2 * self.saturation_max + hsv[..., 0] = np.remainder(hue_val + hsv[..., 0] + 1, 1) + hsv[..., 1] = np.clip(sat_ratio * hsv[..., 1], 0, 1) + data_dict["color"][:, :3] = np.clip( + HueSaturationTranslation.hsv_to_rgb(hsv), 0, 255 + ) + return data_dict + + +@TRANSFORMS.register_module() +class RandomColorDrop(object): + def __init__(self, p=0.2, color_augment=0.0): + self.p = p + self.color_augment = color_augment + + def __call__(self, data_dict): + if "color" in data_dict.keys() and np.random.rand() < self.p: + data_dict["color"] *= self.color_augment + return data_dict + + def __repr__(self): + return "RandomColorDrop(color_augment: {}, p: {})".format( + self.color_augment, self.p + ) + + +@TRANSFORMS.register_module() +class ElasticDistortion(object): + def __init__(self, distortion_params=None): + self.distortion_params = ( + [[0.2, 0.4], [0.8, 1.6]] if distortion_params is None else distortion_params + ) + + @staticmethod + def elastic_distortion(coords, granularity, magnitude): + """ + Apply elastic distortion on sparse coordinate space. + pointcloud: numpy array of (number of points, at least 3 spatial dims) + granularity: size of the noise grid (in same scale[m/cm] as the voxel grid) + magnitude: noise multiplier + """ + blurx = np.ones((3, 1, 1, 1)).astype("float32") / 3 + blury = np.ones((1, 3, 1, 1)).astype("float32") / 3 + blurz = np.ones((1, 1, 3, 1)).astype("float32") / 3 + coords_min = coords.min(0) + + # Create Gaussian noise tensor of the size given by granularity. + noise_dim = ((coords - coords_min).max(0) // granularity).astype(int) + 3 + noise = np.random.randn(*noise_dim, 3).astype(np.float32) + + # Smoothing. + for _ in range(2): + noise = scipy.ndimage.filters.convolve( + noise, blurx, mode="constant", cval=0 + ) + noise = scipy.ndimage.filters.convolve( + noise, blury, mode="constant", cval=0 + ) + noise = scipy.ndimage.filters.convolve( + noise, blurz, mode="constant", cval=0 + ) + + # Trilinear interpolate noise filters for each spatial dimensions. + ax = [ + np.linspace(d_min, d_max, d) + for d_min, d_max, d in zip( + coords_min - granularity, + coords_min + granularity * (noise_dim - 2), + noise_dim, + ) + ] + interp = scipy.interpolate.RegularGridInterpolator( + ax, noise, bounds_error=False, fill_value=0 + ) + coords += interp(coords) * magnitude + return coords + + def __call__(self, data_dict): + if "coord" in data_dict.keys() and self.distortion_params is not None: + if random.random() < 0.95: + for granularity, magnitude in self.distortion_params: + data_dict["coord"] = self.elastic_distortion( + data_dict["coord"], granularity, magnitude + ) + return data_dict + + +@TRANSFORMS.register_module() +class GridSample(object): + def __init__( + self, + grid_size=0.05, + hash_type="fnv", + mode="train", + return_inverse=False, + return_grid_coord=False, + return_min_coord=False, + return_displacement=False, + project_displacement=False, + ): + self.grid_size = grid_size + self.hash = self.fnv_hash_vec if hash_type == "fnv" else self.ravel_hash_vec + assert mode in ["train", "test"] + self.mode = mode + self.return_inverse = return_inverse + self.return_grid_coord = return_grid_coord + self.return_min_coord = return_min_coord + self.return_displacement = return_displacement + self.project_displacement = project_displacement + + def __call__(self, data_dict): + assert "coord" in data_dict.keys() + scaled_coord = data_dict["coord"] / np.array(self.grid_size) + grid_coord = np.floor(scaled_coord).astype(int) + min_coord = grid_coord.min(0) + grid_coord -= min_coord + scaled_coord -= min_coord + min_coord = min_coord * np.array(self.grid_size) + key = self.hash(grid_coord) + idx_sort = np.argsort(key) + key_sort = key[idx_sort] + _, inverse, count = np.unique(key_sort, return_inverse=True, return_counts=True) + if self.mode == "train": # train mode + idx_select = ( + np.cumsum(np.insert(count, 0, 0)[0:-1]) + + np.random.randint(0, count.max(), count.size) % count + ) + idx_unique = idx_sort[idx_select] + if "sampled_index" in data_dict: + # for ScanNet data efficient, we need to make sure labeled point is sampled. + idx_unique = np.unique( + np.append(idx_unique, data_dict["sampled_index"]) + ) + mask = np.zeros_like(data_dict["segment"]).astype(bool) + mask[data_dict["sampled_index"]] = True + data_dict["sampled_index"] = np.where(mask[idx_unique])[0] + data_dict = index_operator(data_dict, idx_unique) + if self.return_inverse: + data_dict["inverse"] = np.zeros_like(inverse) + data_dict["inverse"][idx_sort] = inverse + if self.return_grid_coord: + data_dict["grid_coord"] = grid_coord[idx_unique] + if "grid_coord" not in data_dict["index_valid_keys"]: + data_dict["index_valid_keys"].append("grid_coord") + if self.return_min_coord: + data_dict["min_coord"] = min_coord.reshape([1, 3]) + if self.return_displacement: + displacement = ( + scaled_coord - grid_coord - 0.5 + ) # [0, 1] -> [-0.5, 0.5] displacement to center + if self.project_displacement: + displacement = np.sum( + displacement * data_dict["normal"], axis=-1, keepdims=True + ) + data_dict["displacement"] = displacement[idx_unique] + if "displacement" not in data_dict["index_valid_keys"]: + data_dict["index_valid_keys"].append("displacement") + return data_dict + + elif self.mode == "test": # test mode + data_part_list = [] + for i in range(count.max()): + idx_select = np.cumsum(np.insert(count, 0, 0)[0:-1]) + i % count + idx_part = idx_sort[idx_select] + data_part = index_operator(data_dict, idx_part, duplicate=True) + data_part["index"] = idx_part + if self.return_inverse: + data_part["inverse"] = np.zeros_like(inverse) + data_part["inverse"][idx_sort] = inverse + if self.return_grid_coord: + data_part["grid_coord"] = grid_coord[idx_part] + if "grid_coord" not in data_part["index_valid_keys"]: + data_part["index_valid_keys"].append("grid_coord") + if self.return_min_coord: + data_part["min_coord"] = min_coord.reshape([1, 3]) + if self.return_displacement: + displacement = ( + scaled_coord - grid_coord - 0.5 + ) # [0, 1] -> [-0.5, 0.5] displacement to center + if self.project_displacement: + displacement = np.sum( + displacement * data_dict["normal"], axis=-1, keepdims=True + ) + data_part["displacement"] = displacement[idx_part] + if "displacement" not in data_part["index_valid_keys"]: + data_part["index_valid_keys"].append("displacement") + data_part_list.append(data_part) + return data_part_list + else: + raise NotImplementedError + + @staticmethod + def ravel_hash_vec(arr): + """ + Ravel the coordinates after subtracting the min coordinates. + """ + assert arr.ndim == 2 + arr = arr.copy() + arr -= arr.min(0) + arr = arr.astype(np.uint64, copy=False) + arr_max = arr.max(0).astype(np.uint64) + 1 + + keys = np.zeros(arr.shape[0], dtype=np.uint64) + # Fortran style indexing + for j in range(arr.shape[1] - 1): + keys += arr[:, j] + keys *= arr_max[j + 1] + keys += arr[:, -1] + return keys + + @staticmethod + def fnv_hash_vec(arr): + """ + FNV64-1A + """ + assert arr.ndim == 2 + # Floor first for negative coordinates + arr = arr.copy() + arr = arr.astype(np.uint64, copy=False) + hashed_arr = np.uint64(14695981039346656037) * np.ones( + arr.shape[0], dtype=np.uint64 + ) + for j in range(arr.shape[1]): + hashed_arr *= np.uint64(1099511628211) + hashed_arr = np.bitwise_xor(hashed_arr, arr[:, j]) + return hashed_arr + + +@TRANSFORMS.register_module() +class SphereCrop(object): + def __init__(self, point_max=80000, sample_rate=None, mode="random"): + self.point_max = point_max + self.sample_rate = sample_rate + assert mode in ["random", "center", "all", "given"] + self.mode = mode + + def __call__(self, data_dict): + point_max = ( + int(self.sample_rate * data_dict["coord"].shape[0]) + if self.sample_rate is not None + else self.point_max + ) + + assert "coord" in data_dict.keys() + if data_dict["coord"].shape[0] > point_max: + if self.mode == "random": + center = data_dict["coord"][ + np.random.randint(data_dict["coord"].shape[0]) + ] + elif self.mode == "center": + center = data_dict["coord"][data_dict["coord"].shape[0] // 2] + elif self.mode == "given": + given_index = data_dict["correspondence"].reshape( + data_dict["correspondence"].shape[0], -1 + ) + given_index = np.all( + given_index != np.ones_like(given_index[0]) * -1, axis=1 + ) + given_coord = data_dict["coord"][given_index] + if given_coord.shape[0] == 0: + center = data_dict["coord"][ + np.random.randint(data_dict["coord"].shape[0]) + ] + else: + center = np.mean(given_coord, axis=0) + else: + raise NotImplementedError + idx_crop = np.argsort(np.sum(np.square(data_dict["coord"] - center), 1))[ + :point_max + ] + data_dict = index_operator(data_dict, idx_crop) + return data_dict + + +@TRANSFORMS.register_module() +class ShufflePoint(object): + def __call__(self, data_dict): + assert "coord" in data_dict.keys() + shuffle_index = np.arange(data_dict["coord"].shape[0]) + np.random.shuffle(shuffle_index) + data_dict = index_operator(data_dict, shuffle_index) + return data_dict + + +@TRANSFORMS.register_module() +class CropBoundary(object): + def __call__(self, data_dict): + assert "segment" in data_dict + segment = data_dict["segment"].flatten() + mask = (segment != 0) * (segment != 1) + data_dict = index_operator(data_dict, mask) + return data_dict + + +@TRANSFORMS.register_module() +class ContrastiveViewsGenerator(object): + def __init__( + self, + view_keys=("coord", "color", "normal", "origin_coord"), + view_trans_cfg=None, + ): + self.view_keys = view_keys + self.view_trans = Compose(view_trans_cfg) + + def __call__(self, data_dict): + view1_dict = dict() + view2_dict = dict() + for key in self.view_keys: + view1_dict[key] = data_dict[key].copy() + view2_dict[key] = data_dict[key].copy() + view1_dict = self.view_trans(view1_dict) + view2_dict = self.view_trans(view2_dict) + for key, value in view1_dict.items(): + data_dict["view1_" + key] = value + for key, value in view2_dict.items(): + data_dict["view2_" + key] = value + return data_dict + + +@TRANSFORMS.register_module() +class MultiViewGenerator(object): + def __init__( + self, + global_view_num=2, + global_view_scale=(0.4, 1.0), + local_view_num=4, + local_view_scale=(0.1, 0.4), + global_shared_transform=None, + global_transform=None, + local_transform=None, + max_size=65536, + enc2d_max_size=102400, + enc2d_scale=(0.8, 1), + center_height_scale=(0, 1), + shared_global_view=False, + view_keys=("coord", "origin_coord", "color", "normal", "correspondence"), + static_view_keys=("name", "img_num"), + ): + self.global_view_num = global_view_num + self.global_view_scale = global_view_scale + self.local_view_num = local_view_num + self.local_view_scale = local_view_scale + self.global_shared_transform = Compose(global_shared_transform) + self.global_transform = Compose(global_transform) + self.local_transform = Compose(local_transform) + self.max_size = max_size + self.enc2d_max_size = enc2d_max_size + self.enc2d_scale = enc2d_scale + self.center_height_scale = center_height_scale + self.shared_global_view = shared_global_view + self.view_keys = view_keys + self.static_view_keys = static_view_keys + assert "coord" in view_keys + + def get_view(self, point, center, scale, if_enc2d=False): + coord = point["coord"] + max_size = min(self.max_size, coord.shape[0]) + enc2d_max_size = min(self.enc2d_max_size, coord.shape[0]) + size = 0 + for _ in range(10): + if if_enc2d: + size = enc2d_max_size + else: + size = int(np.random.uniform(*scale) * max_size) + if size > 0: + break + if size == 0: + size = max(10, scale[-1] * max_size) + assert size > 0 + index = np.argsort(np.sum(np.square(coord - center), axis=-1))[:size] + view = dict(index=index) + for key in point.keys(): + if key in self.view_keys: + view[key] = point[key][index] + if key in self.static_view_keys: + view[key] = point[key] + if "index_valid_keys" in point.keys(): + # inherit index_valid_keys from point + view["index_valid_keys"] = point["index_valid_keys"] + return view + + @staticmethod + def match_point_image(major_view, data_dict): + major_correspondence = major_view["correspondence"].transpose(1, 0, 2) + correspondence = data_dict["correspondence"].transpose(1, 0, 2) + is_all_neg1 = np.any(major_correspondence != np.array([-1, -1]), axis=(1, 2)) + indices = np.where(is_all_neg1)[0] + img_dict = { + "images": data_dict["images"][indices], + "img_num": indices.shape[0], + "major_correspondence": major_correspondence[indices].transpose(1, 0, 2), + "correspondence": correspondence[indices].transpose(1, 0, 2), + } + return img_dict + + def __call__(self, data_dict): + coord = data_dict["coord"] + point = self.global_shared_transform(copy.deepcopy(data_dict)) + z_min = coord[:, 2].min() + z_max = coord[:, 2].max() + z_min_ = z_min + (z_max - z_min) * self.center_height_scale[0] + z_max_ = z_min + (z_max - z_min) * self.center_height_scale[1] + if "correspondence" not in data_dict.keys(): + center_mask = np.logical_and(coord[:, 2] >= z_min_, coord[:, 2] <= z_max_) + major_center = coord[np.random.choice(np.where(center_mask)[0])] + major_view = self.get_view(point, major_center, self.global_view_scale) + else: + given_index = data_dict["correspondence"].reshape( + data_dict["correspondence"].shape[0], -1 + ) + given_index = np.all( + given_index != np.ones_like(given_index[0]) * -1, axis=1 + ) + given_coord = data_dict["coord"][given_index] + if given_coord.shape[0] == 0: + center_mask = np.logical_and( + coord[:, 2] >= z_min_, coord[:, 2] <= z_max_ + ) + major_center = coord[np.random.choice(np.where(center_mask)[0])] + else: + major_center = np.mean(given_coord, axis=0) + major_view = self.get_view( + point, major_center, self.global_view_scale, if_enc2d=True + ) + img_dict = self.match_point_image(major_view, data_dict) + major_view["correspondence"] = img_dict["major_correspondence"] + data_dict["correspondence"] = img_dict["correspondence"] + point["correspondence"] = img_dict["correspondence"] + data_dict["img_num"] = img_dict["img_num"] + data_dict["images"] = img_dict["images"] + major_coord = major_view["coord"] + + # get global views: restrict the center of left global view within the major global view + if not self.shared_global_view: + global_views = [ + self.get_view( + point=point, + center=major_coord[np.random.randint(major_coord.shape[0])], + scale=self.global_view_scale, + ) + for _ in range(self.global_view_num - 1) + ] + else: + global_views = [ + {key: value.copy() for key, value in major_view.items()} + for _ in range(self.global_view_num - 1) + ] + + global_views = [major_view] + global_views + + # get local views: restrict the center of local view within the major global view + cover_mask = np.zeros_like(major_view["index"], dtype=bool) + local_views = [] + for i in range(self.local_view_num): + if sum(~cover_mask) == 0: + # reset cover mask if all points are sampled + cover_mask[:] = False + local_view = self.get_view( + point=data_dict, + center=major_coord[np.random.choice(np.where(~cover_mask)[0])], + scale=self.local_view_scale, + ) + local_views.append(local_view) + cover_mask[np.isin(major_view["index"], local_view["index"])] = True + + # augmentation and concat + view_dict = {} + for global_view in global_views: + global_view.pop("index") + global_view = self.global_transform(global_view) + for key in self.view_keys: + if f"global_{key}" in view_dict.keys(): + view_dict[f"global_{key}"].append(global_view[key]) + else: + view_dict[f"global_{key}"] = [global_view[key]] + view_dict["global_offset"] = np.cumsum( + [data.shape[0] for data in view_dict["global_coord"]] + ) + for local_view in local_views: + local_view.pop("index") + local_view = self.local_transform(local_view) + for key in self.view_keys: + if f"local_{key}" in view_dict.keys(): + view_dict[f"local_{key}"].append(local_view[key]) + else: + view_dict[f"local_{key}"] = [local_view[key]] + view_dict["local_offset"] = np.cumsum( + [data.shape[0] for data in view_dict["local_coord"]] + ) + + for key in view_dict.keys(): + if "offset" not in key: + if key in self.static_view_keys: + view_dict[key] = view_dict[key] + else: + view_dict[key] = np.concatenate(view_dict[key], axis=0) + data_dict.update(view_dict) + return data_dict + + +@TRANSFORMS.register_module() +class InstanceParser(object): + def __init__(self, segment_ignore_index=(-1, 0, 1), instance_ignore_index=-1): + self.segment_ignore_index = segment_ignore_index + self.instance_ignore_index = instance_ignore_index + + def __call__(self, data_dict): + coord = data_dict["coord"] + segment = data_dict["segment"] + instance = data_dict["instance"] + mask = ~np.in1d(segment, self.segment_ignore_index) + # mapping ignored instance to ignore index + instance[~mask] = self.instance_ignore_index + # reorder left instance + unique, inverse = np.unique(instance[mask], return_inverse=True) + instance_num = len(unique) + instance[mask] = inverse + # init instance information + centroid = np.ones((coord.shape[0], 3)) * self.instance_ignore_index + bbox = np.ones((instance_num, 8)) * self.instance_ignore_index + vacancy = [ + index for index in self.segment_ignore_index if index >= 0 + ] # vacate class index + + for instance_id in range(instance_num): + mask_ = instance == instance_id + coord_ = coord[mask_] + bbox_min = coord_.min(0) + bbox_max = coord_.max(0) + bbox_centroid = coord_.mean(0) + bbox_center = (bbox_max + bbox_min) / 2 + bbox_size = bbox_max - bbox_min + bbox_theta = np.zeros(1, dtype=coord_.dtype) + bbox_class = np.array([segment[mask_][0]], dtype=coord_.dtype) + # shift class index to fill vacate class index caused by segment ignore index + bbox_class -= np.greater(bbox_class, vacancy).sum() + + centroid[mask_] = bbox_centroid + bbox[instance_id] = np.concatenate( + [bbox_center, bbox_size, bbox_theta, bbox_class] + ) # 3 + 3 + 1 + 1 = 8 + data_dict["instance"] = instance + data_dict["instance_centroid"] = centroid + data_dict["bbox"] = bbox + return data_dict + + +class Compose(object): + def __init__(self, cfg=None): + self.cfg = cfg if cfg is not None else [] + self.transforms = [] + for t_cfg in self.cfg: + self.transforms.append(TRANSFORMS.build(t_cfg)) + + def __call__(self, data_dict): + for t in self.transforms: + data_dict = t(data_dict) + return data_dict + + +@TRANSFORMS.register_module() +class ImgToTensor(object): + def __init__(self): + self.totensor = transforms.ToTensor() + + def __call__(self, img): + return self.totensor(img) + + +@TRANSFORMS.register_module() +class ImgGaussianBlur(object): + """ + Apply Gaussian Blur to the PIL image. + """ + + def __init__( + self, *, p: float = 0.5, radius_min: float = 0.1, radius_max: float = 2.0 + ): + # NOTE: torchvision is applying 1 - probability to return the original image + self.p = p + self.transform = transforms.GaussianBlur( + kernel_size=9, sigma=(radius_min, radius_max) + ) + super().__init__() + + def __call__(self, img): + if np.random.rand() < self.p: + img = self.transform(img) + return img + + +@TRANSFORMS.register_module() +class ImgChromaticJitter(object): + def __init__(self, p=0.95, std=0.005): + self.p = p + self.std = std + + def __call__(self, img): + if np.random.rand() < self.p: + noise = torch.rand(3) + noise *= self.std + noise = noise[:, None, None].expand_as(img) + img += noise + img = torch.clip(img, 0, 1) + return img + + +@TRANSFORMS.register_module() +class ImgPixelContrast(object): + def __init__(self, threshold, p=0.2): + super().__init__() + self.p = p + self.threshold = threshold + + def __call__(self, img): + if np.random.rand() < self.p: + n, h, w = img.shape[0], img.shape[2], img.shape[3] + num_pixels = int(self.threshold * h * w * n) + indices = torch.randint(0, n * h * w, (num_pixels,)) + img = img.permute(0, 2, 3, 1).reshape(-1, 3) + img[indices, :] = 255.0 - img[indices, :] + img = img.reshape(n, h, w, 3).permute(0, 3, 1, 2) + return img + + +IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406) +IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225) + + +@TRANSFORMS.register_module() +class Imgnormalize(object): + def __init__(self, mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD): + super().__init__() + self.normalize = transforms.Normalize(mean=mean, std=std) + + def __call__(self, img): + return self.normalize(img) + + +@TRANSFORMS.register_module() +class ImgRandomHorizontalFlip(object): + def __init__(self, p=0.5): + super().__init__() + self.p = p + self.imgrandomhorizontalflip = transforms.RandomHorizontalFlip(p=p) + + def __call__(self, img): + return self.imgrandomhorizontalflip(img) + + +@TRANSFORMS.register_module() +class ImgRandomResizedCrop(object): + def __init__(self, size, scale, interpolation): + super().__init__() + self.imgrandomresizedcrop = transforms.RandomResizedCrop( + size=size, scale=scale, interpolation=interpolation + ) + + def __call__(self, img): + return self.imgrandomresizedcrop(img) + + +@TRANSFORMS.register_module() +class ImgRandomColorJitter(object): + def __init__(self, brightness=0.4, contrast=0.4, saturation=0.2, hue=0.1, p=0.8): + colorjitter = transforms.ColorJitter( + brightness=brightness, contrast=contrast, saturation=saturation, hue=hue + ) + super().__init__() + self.p = p + self.colorjitter = colorjitter + + def __call__(self, img): + return self.colorjitter(img) + + +@TRANSFORMS.register_module() +class ImgRandomGrayscale(object): + def __init__(self, p=0.1): + super().__init__() + self.p = p + self.imgrandomgrayscale = transforms.RandomGrayscale(p=p) + + def __call__(self, img): + return self.imgrandomgrayscale(img) + + +@TRANSFORMS.register_module() +class ImgRandomSolarize(object): + def __init__(self, threshold, p=0.1): + super().__init__() + self.p = p + self.imgrandomsolarize = transforms.RandomSolarize(threshold=threshold, p=p) + + def __call__(self, img): + return self.imgrandomsolarize(img) + + +@TRANSFORMS.register_module() +class ImgAugmentation(object): + def __init__( + self, + imgtransforms, + crop_h=518, + crop_w=518, + patch_h=37, + patch_w=37, + patch_size=14, + ): + self.transforms = [] + self.transforms_cfg = imgtransforms + for t_cfg in self.transforms_cfg: + self.transforms.append(TRANSFORMS.build(t_cfg)) + self.crop_h = crop_h + self.crop_w = crop_w + self.patch_h = patch_h + self.patch_w = patch_w + self.patch_size = patch_size + self.crop_start = [ + random.randint(0, patch_h * patch_size - crop_h), + random.randint(0, patch_w * patch_size - crop_w), + ] + + def __call__(self, point): + point["images"] = transforms.functional.crop( + point["images"], + top=self.crop_start[0], + left=self.crop_start[1], + height=self.crop_h, + width=self.crop_w, + ) + for id, t in enumerate(self.transforms): + point["images"] = t(point["images"]) + correspondence = point["correspondence"] + correspondence_shape = correspondence.shape + correspondence = correspondence.reshape(-1, 2) + mask = ( + (self.crop_start[0] <= correspondence[:, 0]) + & (correspondence[:, 0] < self.crop_start[0] + self.crop_h) + & (self.crop_start[1] <= correspondence[:, 1]) + & (correspondence[:, 1] < self.crop_start[1] + self.crop_w) + ) + correspondence[~mask] = np.array([-1, -1]) + correspondence[mask] -= np.array(self.crop_start) + point["correspondence"] = correspondence.reshape(correspondence_shape) + return point diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/utils.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/utils.py new file mode 100644 index 0000000..89e4247 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/datasets/utils.py @@ -0,0 +1,140 @@ +""" +Utils for Datasets + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import random +from collections.abc import Mapping, Sequence +import numpy as np +import torch +from torch.utils.data.dataloader import default_collate +import torch.nn.functional as F + + +def collate_fn(batch): + """ + collate function for point cloud which support dict and list, + 'coord' is necessary to determine 'offset' + """ + if not isinstance(batch, Sequence): + raise TypeError(f"{batch.dtype} is not supported.") + + if isinstance(batch[0], torch.Tensor): + return torch.cat(list(batch)) + elif isinstance(batch[0], str): + # str is also a kind of Sequence, judgement should before Sequence + return list(batch) + elif isinstance(batch[0], Sequence): + for data in batch: + data.append(torch.tensor([data[0].shape[0]])) + batch = [collate_fn(samples) for samples in zip(*batch)] + batch[-1] = torch.cumsum(batch[-1], dim=0).int() + return batch + elif isinstance(batch[0], Mapping): + if "img_num" in batch[0].keys(): + max_img_num = max([d["img_num"] for d in batch]) + batch = { + key: ( + ( + collate_fn([d[key] for d in batch]) + if "offset" not in key + # offset -> bincount -> concat bincount-> concat offset + else torch.cumsum( + collate_fn( + [d[key].diff(prepend=torch.tensor([0])) for d in batch] + ), + dim=0, + ) + ) + if "correspondence" not in key + else collate_fn( + [ + F.pad( + d[key].permute(0, 2, 1), + (0, max_img_num - d[key].shape[1]), + value=-1, + ).permute(0, 2, 1) + for d in batch + ] + ) + ) + for key in batch[0] + } + return batch + else: + return default_collate(batch) + + +def point_collate_fn(batch, mix_prob=0): + assert isinstance( + batch[0], Mapping + ) # currently, only support input_dict, rather than input_list + batch = collate_fn(batch) + if random.random() < mix_prob: + if "instance" in batch.keys(): + offset = batch["offset"] + start = 0 + num_instance = 0 + for i in range(len(offset)): + if i % 2 == 0: + num_instance = max(batch["instance"][start : offset[i]]) + if i % 2 != 0: + mask = batch["instance"][start : offset[i]] != -1 + batch["instance"][start : offset[i]] += num_instance * mask + start = offset[i] + offset_assets = [asset for asset in batch.keys() if "offset" in asset] + for offset_asset in offset_assets: + batch[offset_asset] = torch.cat( + [batch[offset_asset][1:-1:2], batch[offset_asset][-1].unsqueeze(0)], + dim=0, + ) + if "img_num" in batch.keys(): + n = batch["img_num"].shape[0] + num_pairs = n // 2 + len_pairs = num_pairs * 2 + pairs_tensor = batch["img_num"][:len_pairs] + + if num_pairs == 0: + pass + else: + summed_pairs = pairs_tensor.view(-1, 2).sum(dim=1) + if n % 2 != 0: + last_element = batch["img_num"][-1:] + result = torch.cat((summed_pairs, last_element)) + else: + result = summed_pairs + batch["img_num"] = result + correspondence_assets = [ + asset for asset in batch.keys() if "correspondence" in asset + ] + for correspondence_asset in correspondence_assets: + offset = batch["offset"] + start = 0 + N, v, n = batch[correspondence_asset].shape + v2 = v * 2 + batch_correspondence_mix = -torch.ones((N, v2, n)) + for i in range(len(offset)): + if i % 2 == 0: + batch_correspondence_mix[start : offset[i], 0:v] = batch[ + correspondence_asset + ][start : offset[i], 0:v] + if i % 2 != 0: + batch_correspondence_mix[start : offset[i], v:] = batch[ + correspondence_asset + ][start : offset[i], 0:v] + start = offset[i] + if len(offset) % 2 == 0: + pass + else: + start = 0 if len(offset) == 1 else offset[-2] + batch_correspondence_mix[start:N, -v:] = batch[correspondence_asset][ + start:N, -v: + ] + batch[correspondence_asset] = batch_correspondence_mix + return batch + + +def gaussian_kernel(dist2: np.array, a: float = 1, c: float = 5): + return a * np.exp(-dist2 / (2 * c**2)) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/defaults.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/defaults.py new file mode 100644 index 0000000..6091e70 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/engines/defaults.py @@ -0,0 +1,152 @@ +""" +Default training/testing logic + +modified from detectron2(https://github.com/facebookresearch/detectron2) + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import sys +import argparse +import multiprocessing as mp +from torch.nn.parallel import DistributedDataParallel + + +import pointcept.utils.comm as comm +from pointcept.utils.env import get_random_seed, set_seed +from pointcept.utils.config import Config, DictAction + + +def create_ddp_model(model, *, fp16_compression=False, **kwargs): + """ + Create a DistributedDataParallel model if there are >1 processes. + Args: + model: a torch.nn.Module + fp16_compression: add fp16 compression hooks to the ddp object. + See more at https://pytorch.org/docs/stable/ddp_comm_hooks.html#torch.distributed.algorithms.ddp_comm_hooks.default_hooks.fp16_compress_hook + kwargs: other arguments of :module:`torch.nn.parallel.DistributedDataParallel`. + """ + if comm.get_world_size() == 1: + return model + # kwargs['find_unused_parameters'] = True + if "device_ids" not in kwargs: + kwargs["device_ids"] = [comm.get_local_rank()] + if "output_device" not in kwargs: + kwargs["output_device"] = [comm.get_local_rank()] + ddp = DistributedDataParallel(model, **kwargs) + if fp16_compression: + from torch.distributed.algorithms.ddp_comm_hooks import default as comm_hooks + + ddp.register_comm_hook(state=None, hook=comm_hooks.fp16_compress_hook) + return ddp + + +def worker_init_fn(worker_id, num_workers, rank, seed): + """Worker init func for dataloader. + + The seed of each worker equals to num_worker * rank + worker_id + user_seed + + Args: + worker_id (int): Worker id. + num_workers (int): Number of workers. + rank (int): The rank of current process. + seed (int): The random seed to use. + """ + + worker_seed = None if seed is None else num_workers * rank + worker_id + seed + set_seed(worker_seed) + + +def default_argument_parser(epilog=None): + parser = argparse.ArgumentParser( + epilog=epilog + or f""" + Examples: + Run on single machine: + $ {sys.argv[0]} --num-gpus 8 --config-file cfg.yaml + Change some config options: + $ {sys.argv[0]} --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth SOLVER.BASE_LR 0.001 + Run on multiple machines: + (machine0)$ {sys.argv[0]} --machine-rank 0 --num-machines 2 --dist-url [--other-flags] + (machine1)$ {sys.argv[0]} --machine-rank 1 --num-machines 2 --dist-url [--other-flags] + """, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--config-file", default="", metavar="FILE", help="path to config file" + ) + parser.add_argument( + "--num-gpus", type=int, default=1, help="number of gpus *per machine*" + ) + parser.add_argument( + "--num-machines", type=int, default=1, help="total number of machines" + ) + parser.add_argument( + "--machine-rank", + type=int, + default=0, + help="the rank of this machine (unique per machine)", + ) + # PyTorch still may leave orphan processes in multi-gpu training. + # Therefore we use a deterministic way to obtain port, + # so that users are aware of orphan processes by seeing the port occupied. + # port = 2 ** 15 + 2 ** 14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2 ** 14 + parser.add_argument( + "--dist-url", + # default="tcp://127.0.0.1:{}".format(port), + default="auto", + help="initialization URL for pytorch distributed backend. See " + "https://pytorch.org/docs/stable/distributed.html for details.", + ) + parser.add_argument( + "--options", nargs="+", action=DictAction, help="custom options" + ) + return parser + + +def default_config_parser(file_path, options): + # config name protocol: dataset_name/model_name-exp_name + if os.path.isfile(file_path): + cfg = Config.fromfile(file_path) + else: + sep = file_path.find("-") + cfg = Config.fromfile(os.path.join(file_path[:sep], file_path[sep + 1 :])) + + if options is not None: + cfg.merge_from_dict(options) + + if cfg.seed is None: + cfg.seed = get_random_seed() + + cfg.data.train.loop = cfg.epoch // cfg.eval_epoch + + os.makedirs(os.path.join(cfg.save_path, "model"), exist_ok=True) + if not cfg.resume: + cfg.dump(os.path.join(cfg.save_path, "config.py")) + return cfg + + +def default_setup(cfg): + # scalar by world size + world_size = comm.get_world_size() + cfg.num_worker = cfg.num_worker if cfg.num_worker is not None else mp.cpu_count() + cfg.num_worker_per_gpu = cfg.num_worker // world_size + assert cfg.batch_size % world_size == 0 + assert cfg.batch_size_val is None or cfg.batch_size_val % world_size == 0 + assert cfg.batch_size_test is None or cfg.batch_size_test % world_size == 0 + cfg.batch_size_per_gpu = cfg.batch_size // world_size + cfg.batch_size_val_per_gpu = ( + cfg.batch_size_val // world_size if cfg.batch_size_val is not None else 1 + ) + cfg.batch_size_test_per_gpu = ( + cfg.batch_size_test // world_size if cfg.batch_size_test is not None else 1 + ) + # update data loop + assert cfg.epoch % cfg.eval_epoch == 0 + # settle random seed + rank = comm.get_rank() + seed = None if cfg.seed is None else cfg.seed + rank * cfg.num_worker_per_gpu + set_seed(seed) + return cfg diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/__init__.py new file mode 100644 index 0000000..41c0320 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/__init__.py @@ -0,0 +1,5 @@ +from .default import * +from .misc import * +from .evaluator import * + +from .builder import build_hooks diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/builder.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/builder.py new file mode 100644 index 0000000..2f4cce4 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/builder.py @@ -0,0 +1,18 @@ +""" +Hook Builder + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.utils.registry import Registry + + +HOOKS = Registry("hooks") + + +def build_hooks(cfg): + hooks = [] + for hook_cfg in cfg: + hooks.append(HOOKS.build(hook_cfg)) + return hooks diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/default.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/default.py new file mode 100644 index 0000000..47f2aa1 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/default.py @@ -0,0 +1,66 @@ +""" +Default Hook + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import pointcept.utils.comm as comm +import weakref +from .builder import HOOKS + + +class HookBase: + """ + Base class for hooks that can be registered with :class:`TrainerBase`. + """ + + trainer = None # A weak reference to the trainer object. + + def before_train(self): + pass + + def before_epoch(self): + pass + + def before_step(self): + pass + + def after_step(self): + pass + + def after_epoch(self): + pass + + def after_train(self): + pass + + +@HOOKS.register_module() +class ModelHook(HookBase): + def before_train(self): + if comm.get_world_size() > 1 and isinstance( + self.trainer.model.module, HookBase + ): + self.model = weakref.proxy(self.trainer.model.module) + elif isinstance(self.trainer.model, HookBase): + self.model = weakref.proxy(self.trainer.model) + else: + self.model = HookBase() + self.model.trainer = self.trainer + self.model.before_train() + + def before_epoch(self): + self.model.before_epoch() + + def before_step(self): + self.model.before_step() + + def after_step(self): + self.model.after_step() + + def after_epoch(self): + self.model.after_epoch() + + def after_train(self): + self.model.after_train() diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/evaluator.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/evaluator.py new file mode 100644 index 0000000..55b19d0 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/evaluator.py @@ -0,0 +1,243 @@ +""" +Evaluate Hook + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import numpy as np +import wandb +import torch +import torch.distributed as dist +from uuid import uuid4 + +import pointcept.utils.comm as comm +from pointcept.utils.misc import intersection_and_union_gpu + +from .default import HookBase +from .builder import HOOKS + + +@HOOKS.register_module() +class ClsEvaluator(HookBase): + def after_epoch(self): + if self.trainer.cfg.evaluate: + self.eval() + + def eval(self): + self.trainer.logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + self.trainer.model.eval() + for i, input_dict in enumerate(self.trainer.val_loader): + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with torch.no_grad(): + output_dict = self.trainer.model(input_dict) + output = output_dict["cls_logits"] + loss = output_dict["loss"] + pred = output.max(1)[1] + label = input_dict["category"] + intersection, union, target = intersection_and_union_gpu( + pred, + label, + self.trainer.cfg.data.num_classes, + self.trainer.cfg.data.ignore_index, + ) + if comm.get_world_size() > 1: + dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce( + target + ) + intersection, union, target = ( + intersection.cpu().numpy(), + union.cpu().numpy(), + target.cpu().numpy(), + ) + # Here there is no need to sync since sync happened in dist.all_reduce + self.trainer.storage.put_scalar("val_intersection", intersection) + self.trainer.storage.put_scalar("val_union", union) + self.trainer.storage.put_scalar("val_target", target) + self.trainer.storage.put_scalar("val_loss", loss.item()) + self.trainer.logger.info( + "Test: [{iter}/{max_iter}] " + "Loss {loss:.4f} ".format( + iter=i + 1, max_iter=len(self.trainer.val_loader), loss=loss.item() + ) + ) + loss_avg = self.trainer.storage.history("val_loss").avg + intersection = self.trainer.storage.history("val_intersection").total + union = self.trainer.storage.history("val_union").total + target = self.trainer.storage.history("val_target").total + iou_class = intersection / (union + 1e-10) + acc_class = intersection / (target + 1e-10) + m_iou = np.mean(iou_class) + m_acc = np.mean(acc_class) + all_acc = sum(intersection) / (sum(target) + 1e-10) + self.trainer.logger.info( + "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.".format( + m_iou, m_acc, all_acc + ) + ) + for i in range(self.trainer.cfg.data.num_classes): + self.trainer.logger.info( + "Class_{idx}-{name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( + idx=i, + name=self.trainer.cfg.data.names[i], + iou=iou_class[i], + accuracy=acc_class[i], + ) + ) + current_epoch = self.trainer.epoch + 1 + if self.trainer.writer is not None: + self.trainer.writer.add_scalar("val/loss", loss_avg, current_epoch) + self.trainer.writer.add_scalar("val/mIoU", m_iou, current_epoch) + self.trainer.writer.add_scalar("val/mAcc", m_acc, current_epoch) + self.trainer.writer.add_scalar("val/allAcc", all_acc, current_epoch) + if self.trainer.cfg.enable_wandb: + wandb.log( + { + "Epoch": current_epoch, + "val/loss": loss_avg, + "val/mIoU": m_iou, + "val/mAcc": m_acc, + "val/allAcc": all_acc, + }, + step=wandb.run.step, + ) + self.trainer.logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + self.trainer.comm_info["current_metric_value"] = all_acc # save for saver + self.trainer.comm_info["current_metric_name"] = "allAcc" # save for saver + + def after_train(self): + self.trainer.logger.info( + "Best {}: {:.4f}".format("allAcc", self.trainer.best_metric_value) + ) + + +@HOOKS.register_module() +class SemSegEvaluator(HookBase): + def __init__(self, write_cls_iou=False): + self.write_cls_iou = write_cls_iou + + def before_train(self): + if self.trainer.writer is not None and self.trainer.cfg.enable_wandb: + wandb.define_metric("val/*", step_metric="Epoch") + + def after_epoch(self): + if self.trainer.cfg.evaluate: + self.eval() + + def eval(self): + self.trainer.logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + self.trainer.model.eval() + for i, input_dict in enumerate(self.trainer.val_loader): + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with torch.no_grad(): + output_dict = self.trainer.model(input_dict) + output = output_dict["seg_logits"] + loss = output_dict["loss"] + pred = output.max(1)[1] + segment = input_dict["segment"] + if "inverse" in input_dict.keys(): + assert "origin_segment" in input_dict.keys() + pred = pred[input_dict["inverse"]] + segment = input_dict["origin_segment"] + intersection, union, target = intersection_and_union_gpu( + pred, + segment, + self.trainer.cfg.data.num_classes, + self.trainer.cfg.data.ignore_index, + ) + if comm.get_world_size() > 1: + dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce( + target + ) + intersection, union, target = ( + intersection.cpu().numpy(), + union.cpu().numpy(), + target.cpu().numpy(), + ) + # Here there is no need to sync since sync happened in dist.all_reduce + self.trainer.storage.put_scalar("val_intersection", intersection) + self.trainer.storage.put_scalar("val_union", union) + self.trainer.storage.put_scalar("val_target", target) + self.trainer.storage.put_scalar("val_loss", loss.item()) + info = "Test: [{iter}/{max_iter}] ".format( + iter=i + 1, max_iter=len(self.trainer.val_loader) + ) + if "origin_coord" in input_dict.keys(): + info = "Interp. " + info + self.trainer.logger.info( + info + + "Loss {loss:.4f} ".format( + iter=i + 1, max_iter=len(self.trainer.val_loader), loss=loss.item() + ) + ) + loss_avg = self.trainer.storage.history("val_loss").avg + intersection = self.trainer.storage.history("val_intersection").total + union = self.trainer.storage.history("val_union").total + target = self.trainer.storage.history("val_target").total + iou_class = intersection / (union + 1e-10) + acc_class = intersection / (target + 1e-10) + m_iou = np.mean(iou_class) + m_acc = np.mean(acc_class) + all_acc = sum(intersection) / (sum(target) + 1e-10) + self.trainer.logger.info( + "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.".format( + m_iou, m_acc, all_acc + ) + ) + for i in range(self.trainer.cfg.data.num_classes): + self.trainer.logger.info( + "Class_{idx}-{name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( + idx=i, + name=self.trainer.cfg.data.names[i], + iou=iou_class[i], + accuracy=acc_class[i], + ) + ) + current_epoch = self.trainer.epoch + 1 + if self.trainer.writer is not None: + self.trainer.writer.add_scalar("val/loss", loss_avg, current_epoch) + self.trainer.writer.add_scalar("val/mIoU", m_iou, current_epoch) + self.trainer.writer.add_scalar("val/mAcc", m_acc, current_epoch) + self.trainer.writer.add_scalar("val/allAcc", all_acc, current_epoch) + if self.trainer.cfg.enable_wandb: + wandb.log( + { + "Epoch": current_epoch, + "val/loss": loss_avg, + "val/mIoU": m_iou, + "val/mAcc": m_acc, + "val/allAcc": all_acc, + }, + step=wandb.run.step, + ) + if self.write_cls_iou: + for i in range(self.trainer.cfg.data.num_classes): + self.trainer.writer.add_scalar( + f"val/cls_{i}-{self.trainer.cfg.data.names[i]} IoU", + iou_class[i], + current_epoch, + ) + if self.trainer.cfg.enable_wandb: + for i in range(self.trainer.cfg.data.num_classes): + wandb.log( + { + "Epoch": current_epoch, + f"val/cls_{i}-{self.trainer.cfg.data.names[i]} IoU": iou_class[ + i + ], + }, + step=wandb.run.step, + ) + self.trainer.logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + self.trainer.comm_info["current_metric_value"] = m_iou # save for saver + self.trainer.comm_info["current_metric_name"] = "mIoU" # save for saver + + def after_train(self): + self.trainer.logger.info( + "Best {}: {:.4f}".format("mIoU", self.trainer.best_metric_value) + ) + diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/misc.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/misc.py new file mode 100644 index 0000000..28d9682 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/misc.py @@ -0,0 +1,553 @@ +""" +Misc Hook + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import sys +import glob +import os +import shutil +import time +import gc +import wandb +import torch +import torch.utils.data +from collections import OrderedDict + +if sys.version_info >= (3, 10): + from collections.abc import Sequence +else: + from collections import Sequence +from pointcept.utils.timer import Timer +from pointcept.utils.comm import is_main_process, synchronize +from pointcept.utils.cache import shared_dict +from pointcept.utils.scheduler import CosineScheduler +import pointcept.utils.comm as comm + +from .default import HookBase +from .builder import HOOKS + + +@HOOKS.register_module() +class IterationTimer(HookBase): + def __init__(self, warmup_iter=1): + self._warmup_iter = warmup_iter + self._start_time = time.perf_counter() + self._iter_timer = Timer() + self._remain_iter = 0 + + def before_train(self): + self._start_time = time.perf_counter() + _remain_epoch = self.trainer.max_epoch - self.trainer.start_epoch + self._remain_iter = _remain_epoch * len(self.trainer.train_loader) + + def before_epoch(self): + self._iter_timer.reset() + + def before_step(self): + data_time = self._iter_timer.seconds() + self.trainer.storage.put_scalar("data_time", data_time) + + def after_step(self): + batch_time = self._iter_timer.seconds() + self._iter_timer.reset() + self.trainer.storage.put_scalar("batch_time", batch_time) + self._remain_iter -= 1 + remain_time = self._remain_iter * self.trainer.storage.history("batch_time").avg + t_m, t_s = divmod(remain_time, 60) + t_h, t_m = divmod(t_m, 60) + remain_time = "{:02d}:{:02d}:{:02d}".format(int(t_h), int(t_m), int(t_s)) + if "iter_info" in self.trainer.comm_info.keys(): + info = ( + "Data {data_time_val:.3f} ({data_time_avg:.3f}) " + "Batch {batch_time_val:.3f} ({batch_time_avg:.3f}) " + "Remain {remain_time} ".format( + data_time_val=self.trainer.storage.history("data_time").val, + data_time_avg=self.trainer.storage.history("data_time").avg, + batch_time_val=self.trainer.storage.history("batch_time").val, + batch_time_avg=self.trainer.storage.history("batch_time").avg, + remain_time=remain_time, + ) + ) + self.trainer.comm_info["iter_info"] += info + if self.trainer.comm_info["iter"] <= self._warmup_iter: + self.trainer.storage.history("data_time").reset() + self.trainer.storage.history("batch_time").reset() + + +@HOOKS.register_module() +class InformationWriter(HookBase): + def __init__(self): + self.curr_iter = 0 + self.model_output_keys = [] + + def before_train(self): + self.trainer.comm_info["iter_info"] = "" + self.curr_iter = self.trainer.start_epoch * len(self.trainer.train_loader) + if self.trainer.writer is not None and self.trainer.cfg.enable_wandb: + wandb.define_metric("params/*", step_metric="Iter") + wandb.define_metric("train_batch/*", step_metric="Iter") + wandb.define_metric("train/*", step_metric="Epoch") + + def before_step(self): + self.curr_iter += 1 + info = "Train: [{epoch}/{max_epoch}][{iter}/{max_iter}] ".format( + epoch=self.trainer.epoch + 1, + max_epoch=self.trainer.max_epoch, + iter=self.trainer.comm_info["iter"] + 1, + max_iter=len(self.trainer.train_loader), + ) + self.trainer.comm_info["iter_info"] += info + + def after_step(self): + if "model_output_dict" in self.trainer.comm_info.keys(): + model_output_dict = self.trainer.comm_info["model_output_dict"] + self.model_output_keys = model_output_dict.keys() + for key in self.model_output_keys: + self.trainer.storage.put_scalar(key, model_output_dict[key].item()) + + for key in self.model_output_keys: + self.trainer.comm_info["iter_info"] += "{key}: {value:.4f} ".format( + key=key, value=self.trainer.storage.history(key).val + ) + lr = self.trainer.optimizer.state_dict()["param_groups"][0]["lr"] + self.trainer.comm_info["iter_info"] += "Lr: {lr:.5f}".format(lr=lr) + self.trainer.logger.info(self.trainer.comm_info["iter_info"]) + self.trainer.comm_info["iter_info"] = "" # reset iter info + if self.trainer.writer is not None: + self.trainer.writer.add_scalar("params/lr", lr, self.curr_iter) + for key in self.model_output_keys: + self.trainer.writer.add_scalar( + "train_batch/" + key, + self.trainer.storage.history(key).val, + self.curr_iter, + ) + if self.trainer.cfg.enable_wandb: + + wandb.log( + {"Iter": self.curr_iter, "params/lr": lr}, step=self.curr_iter + ) + for key in self.model_output_keys: + wandb.log( + { + "Iter": self.curr_iter, + f"train_batch/{key}": self.trainer.storage.history(key).val, + }, + step=wandb.run.step, + ) + + def after_epoch(self): + epoch_info = "Train result: " + for key in self.model_output_keys: + epoch_info += "{key}: {value:.4f} ".format( + key=key, value=self.trainer.storage.history(key).avg + ) + self.trainer.logger.info(epoch_info) + if self.trainer.writer is not None: + for key in self.model_output_keys: + self.trainer.writer.add_scalar( + "train/" + key, + self.trainer.storage.history(key).avg, + self.trainer.epoch + 1, + ) + + if self.trainer.cfg.enable_wandb: + + for key in self.model_output_keys: + wandb.log( + { + "Epoch": self.trainer.epoch + 1, + f"train/{key}": self.trainer.storage.history(key).avg, + }, + step=wandb.run.step, + ) + + +@HOOKS.register_module() +class CheckpointSaver(HookBase): + def __init__(self, save_freq=None): + self.save_freq = save_freq # None or int, None indicate only save model last + + def after_epoch(self): + if is_main_process(): + is_best = False + if self.trainer.cfg.evaluate: + current_metric_value = self.trainer.comm_info["current_metric_value"] + current_metric_name = self.trainer.comm_info["current_metric_name"] + if current_metric_value > self.trainer.best_metric_value: + self.trainer.best_metric_value = current_metric_value + is_best = True + self.trainer.logger.info( + "Best validation {} updated to: {:.4f}".format( + current_metric_name, current_metric_value + ) + ) + self.trainer.logger.info( + "Currently Best {}: {:.4f}".format( + current_metric_name, self.trainer.best_metric_value + ) + ) + + filename = os.path.join( + self.trainer.cfg.save_path, "model", "model_last.pth" + ) + self.trainer.logger.info("Saving checkpoint to: " + filename) + torch.save( + { + "epoch": self.trainer.epoch + 1, + "state_dict": self.trainer.model.state_dict(), + "optimizer": self.trainer.optimizer.state_dict(), + "scheduler": self.trainer.scheduler.state_dict(), + "scaler": ( + self.trainer.scaler.state_dict() + if self.trainer.cfg.enable_amp + else None + ), + "best_metric_value": self.trainer.best_metric_value, + }, + filename + ".tmp", + ) + os.replace(filename + ".tmp", filename) + if is_best: + shutil.copyfile( + filename, + os.path.join(self.trainer.cfg.save_path, "model", "model_best.pth"), + ) + if self.save_freq and (self.trainer.epoch + 1) % self.save_freq == 0: + shutil.copyfile( + filename, + os.path.join( + self.trainer.cfg.save_path, + "model", + f"epoch_{self.trainer.epoch + 1}.pth", + ), + ) + + +@HOOKS.register_module() +class CheckpointLoader(HookBase): + def __init__(self, keywords="", replacement=None, strict=False): + self.keywords = keywords + self.replacement = replacement if replacement is not None else keywords + self.strict = strict + + def before_train(self): + self.trainer.logger.info("=> Loading checkpoint & weight ...") + if self.trainer.cfg.weight and os.path.isfile(self.trainer.cfg.weight): + self.trainer.logger.info(f"Loading weight at: {self.trainer.cfg.weight}") + checkpoint = torch.load( + self.trainer.cfg.weight, + map_location=lambda storage, loc: storage.cuda(), + weights_only=False, + ) + self.trainer.logger.info( + f"Loading layer weights with keyword: {self.keywords}, " + f"replace keyword with: {self.replacement}" + ) + weight = OrderedDict() + for key, value in checkpoint["state_dict"].items(): + if not key.startswith("module."): + key = "module." + key # xxx.xxx -> module.xxx.xxx + # Now all keys contain "module." no matter DDP or not. + if self.keywords in key: + key = key.replace(self.keywords, self.replacement, 1) + if comm.get_world_size() == 1: + key = key[7:] # module.xxx.xxx -> xxx.xxx + weight[key] = value + load_state_info = self.trainer.model.load_state_dict( + weight, strict=self.strict + ) + self.trainer.logger.info(f"Missing keys: {load_state_info[0]}") + if self.trainer.cfg.resume: + self.trainer.logger.info( + f"Resuming train at eval epoch: {checkpoint['epoch']}" + ) + self.trainer.start_epoch = checkpoint["epoch"] + self.trainer.best_metric_value = checkpoint["best_metric_value"] + self.trainer.optimizer.load_state_dict(checkpoint["optimizer"]) + self.trainer.scheduler.load_state_dict(checkpoint["scheduler"]) + if self.trainer.cfg.enable_amp: + self.trainer.scaler.load_state_dict(checkpoint["scaler"]) + else: + self.trainer.logger.info(f"No weight found at: {self.trainer.cfg.weight}") + + +@HOOKS.register_module() +class PreciseEvaluator(HookBase): + def __init__(self, test_last=False): + self.test_last = test_last + + def after_train(self): + from pointcept.engines.test import TESTERS + + self.trainer.logger.info( + ">>>>>>>>>>>>>>>> Start Precise Evaluation >>>>>>>>>>>>>>>>" + ) + torch.cuda.empty_cache() + cfg = self.trainer.cfg + test_cfg = dict(cfg=cfg, model=self.trainer.model, **cfg.test) + tester = TESTERS.build(test_cfg) + if self.test_last: + self.trainer.logger.info("=> Testing on model_last ...") + else: + self.trainer.logger.info("=> Testing on model_best ...") + best_path = os.path.join( + self.trainer.cfg.save_path, "model", "model_best.pth" + ) + checkpoint = torch.load(best_path, weights_only=False) + weight = OrderedDict() + for key, value in checkpoint["state_dict"].items(): + if not key.startswith("module."): + key = "module." + key # xxx.xxx -> module.xxx.xxx + # Now all keys contain "module." no matter DDP or not. + if comm.get_world_size() == 1: + key = key[7:] # module.xxx.xxx -> xxx.xxx + weight[key] = value + tester.model.load_state_dict(weight, strict=True) + tester.test() + + +@HOOKS.register_module() +class DataCacheOperator(HookBase): + def __init__(self, data_root, split): + self.data_root = data_root + self.split = split + self.data_list = self.get_data_list() + + def get_data_list(self): + if isinstance(self.split, str): + data_list = glob.glob(os.path.join(self.data_root, self.split)) + elif isinstance(self.split, Sequence): + data_list = [] + for split in self.split: + data_list += glob.glob(os.path.join(self.data_root, split)) + else: + raise NotImplementedError + return data_list + + def get_cache_name(self, data_path): + data_name = data_path.replace(os.path.dirname(self.data_root), "") + return "pointcept" + data_name.replace(os.path.sep, "-") + + def before_train(self): + self.trainer.logger.info( + f"=> Caching dataset: {self.data_root}, split: {self.split} ..." + ) + if is_main_process(): + dataset = self.trainer.train_loader.dataset + for i in range(len(dataset)): + data_dict = dataset[i] + name = data_dict["name"] + shared_dict(f"Pointcept-{name}", data_dict) + synchronize() + + +@HOOKS.register_module() +class RuntimeProfiler(HookBase): + def __init__( + self, + forward=True, + backward=True, + interrupt=False, + warm_up=2, + sort_by="cuda_time_total", + row_limit=30, + ): + self.forward = forward + self.backward = backward + self.interrupt = interrupt + self.warm_up = warm_up + self.sort_by = sort_by + self.row_limit = row_limit + + def before_train(self): + self.trainer.logger.info("Profiling runtime ...") + from torch.profiler import profile, record_function, ProfilerActivity + + for i, input_dict in enumerate(self.trainer.train_loader): + if i == self.warm_up + 1: + break + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + if self.forward: + with profile( + activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + record_shapes=True, + profile_memory=True, + with_stack=True, + ) as forward_prof: + with record_function("model_inference"): + output_dict = self.trainer.model(input_dict) + else: + output_dict = self.trainer.model(input_dict) + loss = output_dict["loss"] + if self.backward: + with profile( + activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + record_shapes=True, + profile_memory=True, + with_stack=True, + ) as backward_prof: + with record_function("model_inference"): + loss.backward() + self.trainer.logger.info(f"Profile: [{i + 1}/{self.warm_up + 1}]") + if self.forward: + self.trainer.logger.info( + "Forward profile: \n" + + str( + forward_prof.key_averages().table( + sort_by=self.sort_by, row_limit=self.row_limit + ) + ) + ) + forward_prof.export_chrome_trace( + os.path.join(self.trainer.cfg.save_path, "forward_trace.json") + ) + + if self.backward: + self.trainer.logger.info( + "Backward profile: \n" + + str( + backward_prof.key_averages().table( + sort_by=self.sort_by, row_limit=self.row_limit + ) + ) + ) + backward_prof.export_chrome_trace( + os.path.join(self.trainer.cfg.save_path, "backward_trace.json") + ) + if self.interrupt: + sys.exit(0) + + +@HOOKS.register_module() +class RuntimeProfilerV2(HookBase): + def __init__( + self, + interrupt=False, + wait=1, + warmup=1, + active=10, + repeat=1, + sort_by="cuda_time_total", + row_limit=30, + ): + self.interrupt = interrupt + self.wait = wait + self.warmup = warmup + self.active = active + self.repeat = repeat + self.sort_by = sort_by + self.row_limit = row_limit + + def before_train(self): + self.trainer.logger.info("Profiling runtime ...") + from torch.profiler import ( + profile, + record_function, + ProfilerActivity, + schedule, + tensorboard_trace_handler, + ) + + prof = profile( + activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + schedule=schedule( + wait=self.wait, + warmup=self.warmup, + active=self.active, + repeat=self.repeat, + ), + on_trace_ready=tensorboard_trace_handler(self.trainer.cfg.save_path), + record_shapes=True, + profile_memory=True, + with_stack=True, + ) + prof.start() + for i, input_dict in enumerate(self.trainer.train_loader): + if i >= (self.wait + self.warmup + self.active) * self.repeat: + break + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with record_function("model_forward"): + output_dict = self.trainer.model(input_dict) + loss = output_dict["loss"] + with record_function("model_backward"): + loss.backward() + prof.step() + self.trainer.logger.info( + f"Profile: [{i + 1}/{(self.wait + self.warmup + self.active) * self.repeat}]" + ) + self.trainer.logger.info( + "Profile: \n" + + str( + prof.key_averages().table( + sort_by=self.sort_by, row_limit=self.row_limit + ) + ) + ) + prof.stop() + + if self.interrupt: + sys.exit(0) + + +@HOOKS.register_module() +class WeightDecaySchedular(HookBase): + def __init__( + self, + base_value=0.04, + final_value=0.2, + ): + self.base_value = base_value + self.final_value = final_value + self.scheduler = None + + def before_train(self): + curr_step = self.trainer.start_epoch * len(self.trainer.train_loader) + self.scheduler = CosineScheduler( + base_value=self.base_value, + final_value=self.final_value, + total_iters=self.trainer.cfg.scheduler.total_steps, + ) + self.scheduler.iter = curr_step + + def before_step(self): + wd = self.scheduler.step() + for param_group in self.trainer.optimizer.param_groups: + param_group["weight_decay"] = wd + if self.trainer.writer is not None: + self.trainer.writer.add_scalar("params/wd", wd, self.scheduler.iter) + + +@HOOKS.register_module() +class GarbageHandler(HookBase): + def __init__(self, interval=150, disable_auto=True, empty_cache=False): + self.interval = interval + self.disable_auto = disable_auto + self.empty_cache = empty_cache + self.iter = 1 + + def before_train(self): + if self.disable_auto: + gc.disable() + self.trainer.logger.info("Disable automatic garbage collection") + + def before_epoch(self): + self.iter = 1 + + def after_step(self): + if self.iter % self.interval == 0: + gc.collect() + if self.empty_cache: + torch.cuda.empty_cache() + self.trainer.logger.info("Garbage collected") + self.iter += 1 + + def after_train(self): + gc.collect() + torch.cuda.empty_cache() diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/launch.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/launch.py new file mode 100644 index 0000000..99a8351 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/engines/launch.py @@ -0,0 +1,137 @@ +""" +Launcher + +modified from detectron2(https://github.com/facebookresearch/detectron2) + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import logging +from datetime import timedelta +import torch +import torch.distributed as dist +import torch.multiprocessing as mp + +from pointcept.utils import comm + +__all__ = ["DEFAULT_TIMEOUT", "launch"] + +DEFAULT_TIMEOUT = timedelta(minutes=60) + + +def _find_free_port(): + import socket + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + # Binding to port 0 will cause the OS to find an available port for us + sock.bind(("", 0)) + port = sock.getsockname()[1] + sock.close() + # NOTE: there is still a chance the port could be taken by other processes. + return port + + +def launch( + main_func, + num_gpus_per_machine, + num_machines=1, + machine_rank=0, + dist_url=None, + cfg=(), + timeout=DEFAULT_TIMEOUT, +): + """ + Launch multi-gpu or distributed training. + This function must be called on all machines involved in the training. + It will spawn child processes (defined by ``num_gpus_per_machine``) on each machine. + Args: + main_func: a function that will be called by `main_func(*args)` + num_gpus_per_machine (int): number of GPUs per machine + num_machines (int): the total number of machines + machine_rank (int): the rank of this machine + dist_url (str): url to connect to for distributed jobs, including protocol + e.g. "tcp://127.0.0.1:8686". + Can be set to "auto" to automatically select a free port on localhost + timeout (timedelta): timeout of the distributed workers + args (tuple): arguments passed to main_func + """ + world_size = num_machines * num_gpus_per_machine + if world_size > 1: + if dist_url == "auto": + assert ( + num_machines == 1 + ), "dist_url=auto not supported in multi-machine jobs." + port = _find_free_port() + dist_url = f"tcp://127.0.0.1:{port}" + if num_machines > 1 and dist_url.startswith("file://"): + logger = logging.getLogger(__name__) + logger.warning( + "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://" + ) + + mp.spawn( + _distributed_worker, + nprocs=num_gpus_per_machine, + args=( + main_func, + world_size, + num_gpus_per_machine, + machine_rank, + dist_url, + cfg, + timeout, + ), + daemon=False, + ) + else: + main_func(*cfg) + + +def _distributed_worker( + local_rank, + main_func, + world_size, + num_gpus_per_machine, + machine_rank, + dist_url, + cfg, + timeout=DEFAULT_TIMEOUT, +): + assert ( + torch.cuda.is_available() + ), "cuda is not available. Please check your installation." + global_rank = machine_rank * num_gpus_per_machine + local_rank + try: + dist.init_process_group( + backend="NCCL", + init_method=dist_url, + world_size=world_size, + rank=global_rank, + timeout=timeout, + ) + except Exception as e: + logger = logging.getLogger(__name__) + logger.error("Process group URL: {}".format(dist_url)) + raise e + + # Setup the local process group (which contains ranks within the same machine) + assert comm._LOCAL_PROCESS_GROUP is None + num_machines = world_size // num_gpus_per_machine + for i in range(num_machines): + ranks_on_i = list( + range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine) + ) + pg = dist.new_group(ranks_on_i) + if i == machine_rank: + comm._LOCAL_PROCESS_GROUP = pg + + assert num_gpus_per_machine <= torch.cuda.device_count() + torch.cuda.set_device(local_rank) + + # synchronize is needed here to prevent a possible timeout after calling init_process_group + # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172 + comm.synchronize() + + main_func(*cfg) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/test.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/test.py new file mode 100644 index 0000000..55f5964 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/engines/test.py @@ -0,0 +1,890 @@ +""" +Tester + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import json +from uuid import uuid4 +import os +import time +import numpy as np +from collections import OrderedDict +import torch +import torch.distributed as dist +import torch.nn.functional as F +import torch.utils.data + +from .defaults import create_ddp_model +import pointcept.utils.comm as comm +from pointcept.datasets import build_dataset, collate_fn +from pointcept.models import build_model +from pointcept.utils.logger import get_root_logger +from pointcept.utils.registry import Registry +from pointcept.utils.misc import ( + AverageMeter, + intersection_and_union, + intersection_and_union_gpu, + make_dirs, +) + + +TESTERS = Registry("testers") + + +class TesterBase: + def __init__(self, cfg, model=None, test_loader=None, verbose=False) -> None: + torch.multiprocessing.set_sharing_strategy("file_system") + self.logger = get_root_logger( + log_file=os.path.join(cfg.save_path, "test.log"), + file_mode="a" if cfg.resume else "w", + ) + self.logger.info("=> Loading config ...") + self.cfg = cfg + self.verbose = verbose + if self.verbose and model is None: + # if model is not none, trigger tester with trainer, no need to print config + self.logger.info(f"Save path: {cfg.save_path}") + self.logger.info(f"Config:\n{cfg.pretty_text}") + if model is None: + self.logger.info("=> Building model ...") + self.model = self.build_model() + else: + self.model = model + if test_loader is None: + self.logger.info("=> Building test dataset & dataloader ...") + self.test_loader = self.build_test_loader() + else: + self.test_loader = test_loader + + def build_model(self): + model = build_model(self.cfg.model) + n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) + self.logger.info(f"Num params: {n_parameters}") + model = create_ddp_model( + model.cuda(), + broadcast_buffers=False, + find_unused_parameters=self.cfg.find_unused_parameters, + ) + if os.path.isfile(self.cfg.weight): + self.logger.info(f"Loading weight at: {self.cfg.weight}") + checkpoint = torch.load(self.cfg.weight, weights_only=False) + weight = OrderedDict() + for key, value in checkpoint["state_dict"].items(): + if key.startswith("module."): + if comm.get_world_size() == 1: + key = key[7:] # module.xxx.xxx -> xxx.xxx + else: + if comm.get_world_size() > 1: + key = "module." + key # xxx.xxx -> module.xxx.xxx + weight[key] = value + model.load_state_dict(weight, strict=True) + self.logger.info( + "=> Loaded weight '{}' (epoch {})".format( + self.cfg.weight, checkpoint["epoch"] + ) + ) + else: + raise RuntimeError("=> No checkpoint found at '{}'".format(self.cfg.weight)) + return model + + def build_test_loader(self): + test_dataset = build_dataset(self.cfg.data.test) + if comm.get_world_size() > 1: + test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset) + else: + test_sampler = None + test_loader = torch.utils.data.DataLoader( + test_dataset, + batch_size=self.cfg.batch_size_test_per_gpu, + shuffle=False, + num_workers=self.cfg.batch_size_test_per_gpu, + pin_memory=True, + sampler=test_sampler, + collate_fn=self.__class__.collate_fn, + ) + return test_loader + + def test(self): + raise NotImplementedError + + @staticmethod + def collate_fn(batch): + raise collate_fn(batch) + + +@TESTERS.register_module() +class SemSegTester(TesterBase): + def test(self): + assert self.test_loader.batch_size == 1 + logger = get_root_logger() + logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + + batch_time = AverageMeter() + intersection_meter = AverageMeter() + union_meter = AverageMeter() + target_meter = AverageMeter() + self.model.eval() + + save_path = os.path.join(self.cfg.save_path, "result") + make_dirs(save_path) + # create submit folder only on main process + if ( + self.cfg.data.test.type == "ScanNetDataset" + or self.cfg.data.test.type == "ScanNet200Dataset" + or self.cfg.data.test.type == "ScanNetPPDataset" + ) and comm.is_main_process(): + make_dirs(os.path.join(save_path, "submit")) + elif ( + self.cfg.data.test.type == "SemanticKITTIDataset" and comm.is_main_process() + ): + make_dirs(os.path.join(save_path, "submit")) + elif self.cfg.data.test.type == "NuScenesDataset" and comm.is_main_process(): + import json + + make_dirs(os.path.join(save_path, "submit", "lidarseg", "test")) + make_dirs(os.path.join(save_path, "submit", "test")) + submission = dict( + meta=dict( + use_camera=False, + use_lidar=True, + use_radar=False, + use_map=False, + use_external=False, + ) + ) + with open( + os.path.join(save_path, "submit", "test", "submission.json"), "w" + ) as f: + json.dump(submission, f, indent=4) + comm.synchronize() + record = {} + # fragment inference + for idx, data_dict in enumerate(self.test_loader): + start = time.time() + data_dict = data_dict[0] # current assume batch size is 1 + fragment_list = data_dict.pop("fragment_list") + segment = data_dict.pop("segment") + data_name = data_dict.pop("name") + pred_save_path = os.path.join(save_path, "{}_pred.npy".format(data_name)) + if os.path.isfile(pred_save_path): + logger.info( + "{}/{}: {}, loaded pred and label.".format( + idx + 1, len(self.test_loader), data_name + ) + ) + pred = np.load(pred_save_path) + if "origin_segment" in data_dict.keys(): + segment = data_dict["origin_segment"] + else: + pred = torch.zeros((segment.size, self.cfg.data.num_classes)).cuda() + for i in range(len(fragment_list)): + fragment_batch_size = 1 + s_i, e_i = i * fragment_batch_size, min( + (i + 1) * fragment_batch_size, len(fragment_list) + ) + input_dict = collate_fn(fragment_list[s_i:e_i]) + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + idx_part = input_dict["index"] + with torch.no_grad(): + pred_part = self.model(input_dict)["seg_logits"] # (n, k) + pred_part = F.softmax(pred_part, -1) + if self.cfg.empty_cache: + torch.cuda.empty_cache() + bs = 0 + for be in input_dict["offset"]: + pred[idx_part[bs:be], :] += pred_part[bs:be] + bs = be + + logger.info( + "Test: {}/{}-{data_name}, Batch: {batch_idx}/{batch_num}".format( + idx + 1, + len(self.test_loader), + data_name=data_name, + batch_idx=i, + batch_num=len(fragment_list), + ) + ) + if self.cfg.data.test.type == "ScanNetPPDataset": + pred = pred.topk(3, dim=1)[1].data.cpu().numpy() + else: + pred = pred.max(1)[1].data.cpu().numpy() + if "origin_segment" in data_dict.keys(): + assert "inverse" in data_dict.keys() + pred = pred[data_dict["inverse"]] + segment = data_dict["origin_segment"] + np.save(pred_save_path, pred) + if ( + self.cfg.data.test.type == "ScanNetDataset" + or self.cfg.data.test.type == "ScanNet200Dataset" + ): + np.savetxt( + os.path.join(save_path, "submit", "{}.txt".format(data_name)), + self.test_loader.dataset.class2id[pred].reshape([-1, 1]), + fmt="%d", + ) + elif self.cfg.data.test.type == "ScanNetPPDataset": + np.savetxt( + os.path.join(save_path, "submit", "{}.txt".format(data_name)), + pred.astype(np.int32), + delimiter=",", + fmt="%d", + ) + pred = pred[:, 0] # for mIoU, TODO: support top3 mIoU + elif self.cfg.data.test.type == "SemanticKITTIDataset": + # 00_000000 -> 00, 000000 + sequence_name, frame_name = data_name.split("_") + os.makedirs( + os.path.join( + save_path, "submit", "sequences", sequence_name, "predictions" + ), + exist_ok=True, + ) + submit = pred.astype(np.uint32) + submit = np.vectorize( + self.test_loader.dataset.learning_map_inv.__getitem__ + )(submit).astype(np.uint32) + submit.tofile( + os.path.join( + save_path, + "submit", + "sequences", + sequence_name, + "predictions", + f"{frame_name}.label", + ) + ) + elif self.cfg.data.test.type == "NuScenesDataset": + np.array(pred + 1).astype(np.uint8).tofile( + os.path.join( + save_path, + "submit", + "lidarseg", + "test", + "{}_lidarseg.bin".format(data_name), + ) + ) + + intersection, union, target = intersection_and_union( + pred, segment, self.cfg.data.num_classes, self.cfg.data.ignore_index + ) + intersection_meter.update(intersection) + union_meter.update(union) + target_meter.update(target) + record[data_name] = dict( + intersection=intersection, union=union, target=target + ) + + mask = union != 0 + iou_class = intersection / (union + 1e-10) + iou = np.mean(iou_class[mask]) + acc = sum(intersection) / (sum(target) + 1e-10) + + m_iou = np.mean(intersection_meter.sum / (union_meter.sum + 1e-10)) + m_acc = np.mean(intersection_meter.sum / (target_meter.sum + 1e-10)) + + batch_time.update(time.time() - start) + logger.info( + "Test: {} [{}/{}]-{} " + "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " + "Accuracy {acc:.4f} ({m_acc:.4f}) " + "mIoU {iou:.4f} ({m_iou:.4f})".format( + data_name, + idx + 1, + len(self.test_loader), + segment.size, + batch_time=batch_time, + acc=acc, + m_acc=m_acc, + iou=iou, + m_iou=m_iou, + ) + ) + + logger.info("Syncing ...") + comm.synchronize() + record_sync = comm.gather(record, dst=0) + + if comm.is_main_process(): + record = {} + for _ in range(len(record_sync)): + r = record_sync.pop() + record.update(r) + del r + intersection = np.sum( + [meters["intersection"] for _, meters in record.items()], axis=0 + ) + union = np.sum([meters["union"] for _, meters in record.items()], axis=0) + target = np.sum([meters["target"] for _, meters in record.items()], axis=0) + + if self.cfg.data.test.type == "S3DISDataset": + torch.save( + dict(intersection=intersection, union=union, target=target), + os.path.join(save_path, f"{self.test_loader.dataset.split}.pth"), + ) + + iou_class = intersection / (union + 1e-10) + accuracy_class = intersection / (target + 1e-10) + mIoU = np.mean(iou_class) + mAcc = np.mean(accuracy_class) + allAcc = sum(intersection) / (sum(target) + 1e-10) + + logger.info( + "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}".format( + mIoU, mAcc, allAcc + ) + ) + for i in range(self.cfg.data.num_classes): + logger.info( + "Class_{idx} - {name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( + idx=i, + name=self.cfg.data.names[i], + iou=iou_class[i], + accuracy=accuracy_class[i], + ) + ) + logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + + @staticmethod + def collate_fn(batch): + return batch + + +@TESTERS.register_module() +class DINOSemSegTester(TesterBase): + def test(self): + assert self.test_loader.batch_size == 1 + logger = get_root_logger() + logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + + batch_time = AverageMeter() + intersection_meter = AverageMeter() + union_meter = AverageMeter() + target_meter = AverageMeter() + self.model.eval() + + save_path = os.path.join(self.cfg.save_path, "result") + make_dirs(save_path) + # create submit folder only on main process + if ( + self.cfg.data.test.type == "ScanNetDataset" + or self.cfg.data.test.type == "ScanNet200Dataset" + or self.cfg.data.test.type == "ScanNetPPDataset" + ) and comm.is_main_process(): + make_dirs(os.path.join(save_path, "submit")) + elif ( + self.cfg.data.test.type == "SemanticKITTIDataset" and comm.is_main_process() + ): + make_dirs(os.path.join(save_path, "submit")) + elif self.cfg.data.test.type == "NuScenesDataset" and comm.is_main_process(): + import json + + make_dirs(os.path.join(save_path, "submit", "lidarseg", "test")) + make_dirs(os.path.join(save_path, "submit", "test")) + submission = dict( + meta=dict( + use_camera=False, + use_lidar=True, + use_radar=False, + use_map=False, + use_external=False, + ) + ) + with open( + os.path.join(save_path, "submit", "test", "submission.json"), "w" + ) as f: + json.dump(submission, f, indent=4) + comm.synchronize() + record = {} + # fragment inference + for idx, data_dict in enumerate(self.test_loader): + end = time.time() + data_dict = data_dict[0] # current assume batch size is 1 + fragment_list = data_dict.pop("fragment_list") + segment = data_dict.pop("segment") + data_name = data_dict.pop("name") + dino_coord = data_dict.pop("dino_coord").cuda(non_blocking=True) + dino_feat = data_dict.pop("dino_feat").cuda(non_blocking=True) + dino_offset = data_dict.pop("dino_offset").cuda(non_blocking=True) + pred_save_path = os.path.join(save_path, "{}_pred.npy".format(data_name)) + if os.path.isfile(pred_save_path): + logger.info( + "{}/{}: {}, loaded pred and label.".format( + idx + 1, len(self.test_loader), data_name + ) + ) + pred = np.load(pred_save_path) + if "origin_segment" in data_dict.keys(): + segment = data_dict["origin_segment"] + else: + pred = torch.zeros((segment.size, self.cfg.data.num_classes)).cuda() + for i in range(len(fragment_list)): + fragment_batch_size = 1 + s_i, e_i = i * fragment_batch_size, min( + (i + 1) * fragment_batch_size, len(fragment_list) + ) + input_dict = collate_fn(fragment_list[s_i:e_i]) + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + input_dict["dino_coord"] = dino_coord + input_dict["dino_feat"] = dino_feat + input_dict["dino_offset"] = dino_offset + idx_part = input_dict["index"] + with torch.no_grad(): + pred_part = self.model(input_dict)["seg_logits"] # (n, k) + pred_part = F.softmax(pred_part, -1) + if self.cfg.empty_cache: + torch.cuda.empty_cache() + bs = 0 + for be in input_dict["offset"]: + pred[idx_part[bs:be], :] += pred_part[bs:be] + bs = be + + logger.info( + "Test: {}/{}-{data_name}, Batch: {batch_idx}/{batch_num}".format( + idx + 1, + len(self.test_loader), + data_name=data_name, + batch_idx=i, + batch_num=len(fragment_list), + ) + ) + if self.cfg.data.test.type == "ScanNetPPDataset": + pred = pred.topk(3, dim=1)[1].data.cpu().numpy() + else: + pred = pred.max(1)[1].data.cpu().numpy() + if "origin_segment" in data_dict.keys(): + assert "inverse" in data_dict.keys() + pred = pred[data_dict["inverse"]] + segment = data_dict["origin_segment"] + np.save(pred_save_path, pred) + if ( + self.cfg.data.test.type == "ScanNetDataset" + or self.cfg.data.test.type == "ScanNet200Dataset" + ): + np.savetxt( + os.path.join(save_path, "submit", "{}.txt".format(data_name)), + self.test_loader.dataset.class2id[pred].reshape([-1, 1]), + fmt="%d", + ) + elif self.cfg.data.test.type == "ScanNetPPDataset": + np.savetxt( + os.path.join(save_path, "submit", "{}.txt".format(data_name)), + pred.astype(np.int32), + delimiter=",", + fmt="%d", + ) + pred = pred[:, 0] # for mIoU, TODO: support top3 mIoU + elif self.cfg.data.test.type == "SemanticKITTIDataset": + # 00_000000 -> 00, 000000 + sequence_name, frame_name = data_name.split("_") + os.makedirs( + os.path.join( + save_path, "submit", "sequences", sequence_name, "predictions" + ), + exist_ok=True, + ) + submit = pred.astype(np.uint32) + submit = np.vectorize( + self.test_loader.dataset.learning_map_inv.__getitem__ + )(submit).astype(np.uint32) + submit.tofile( + os.path.join( + save_path, + "submit", + "sequences", + sequence_name, + "predictions", + f"{frame_name}.label", + ) + ) + elif self.cfg.data.test.type == "NuScenesDataset": + np.array(pred + 1).astype(np.uint8).tofile( + os.path.join( + save_path, + "submit", + "lidarseg", + "test", + "{}_lidarseg.bin".format(data_name), + ) + ) + + intersection, union, target = intersection_and_union( + pred, segment, self.cfg.data.num_classes, self.cfg.data.ignore_index + ) + intersection_meter.update(intersection) + union_meter.update(union) + target_meter.update(target) + record[data_name] = dict( + intersection=intersection, union=union, target=target + ) + + mask = union != 0 + iou_class = intersection / (union + 1e-10) + iou = np.mean(iou_class[mask]) + acc = sum(intersection) / (sum(target) + 1e-10) + + m_iou = np.mean(intersection_meter.sum / (union_meter.sum + 1e-10)) + m_acc = np.mean(intersection_meter.sum / (target_meter.sum + 1e-10)) + + batch_time.update(time.time() - end) + logger.info( + "Test: {} [{}/{}]-{} " + "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " + "Accuracy {acc:.4f} ({m_acc:.4f}) " + "mIoU {iou:.4f} ({m_iou:.4f})".format( + data_name, + idx + 1, + len(self.test_loader), + segment.size, + batch_time=batch_time, + acc=acc, + m_acc=m_acc, + iou=iou, + m_iou=m_iou, + ) + ) + + logger.info("Syncing ...") + comm.synchronize() + record_sync = comm.gather(record, dst=0) + + if comm.is_main_process(): + record = {} + for _ in range(len(record_sync)): + r = record_sync.pop() + record.update(r) + del r + intersection = np.sum( + [meters["intersection"] for _, meters in record.items()], axis=0 + ) + union = np.sum([meters["union"] for _, meters in record.items()], axis=0) + target = np.sum([meters["target"] for _, meters in record.items()], axis=0) + + if self.cfg.data.test.type == "S3DISDataset": + torch.save( + dict(intersection=intersection, union=union, target=target), + os.path.join(save_path, f"{self.test_loader.dataset.split}.pth"), + ) + + iou_class = intersection / (union + 1e-10) + accuracy_class = intersection / (target + 1e-10) + mIoU = np.mean(iou_class) + mAcc = np.mean(accuracy_class) + allAcc = sum(intersection) / (sum(target) + 1e-10) + + logger.info( + "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}".format( + mIoU, mAcc, allAcc + ) + ) + for i in range(self.cfg.data.num_classes): + logger.info( + "Class_{idx} - {name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( + idx=i, + name=self.cfg.data.names[i], + iou=iou_class[i], + accuracy=accuracy_class[i], + ) + ) + logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + + @staticmethod + def collate_fn(batch): + return batch + + +@TESTERS.register_module() +class ClsTester(TesterBase): + def test(self): + logger = get_root_logger() + logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + batch_time = AverageMeter() + intersection_meter = AverageMeter() + union_meter = AverageMeter() + target_meter = AverageMeter() + self.model.eval() + + for i, input_dict in enumerate(self.test_loader): + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + end = time.time() + with torch.no_grad(): + output_dict = self.model(input_dict) + output = output_dict["cls_logits"] + pred = output.max(1)[1] + label = input_dict["category"] + intersection, union, target = intersection_and_union_gpu( + pred, label, self.cfg.data.num_classes, self.cfg.data.ignore_index + ) + if comm.get_world_size() > 1: + dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce( + target + ) + intersection, union, target = ( + intersection.cpu().numpy(), + union.cpu().numpy(), + target.cpu().numpy(), + ) + intersection_meter.update(intersection), union_meter.update( + union + ), target_meter.update(target) + + accuracy = sum(intersection_meter.val) / (sum(target_meter.val) + 1e-10) + batch_time.update(time.time() - end) + + logger.info( + "Test: [{}/{}] " + "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " + "Accuracy {accuracy:.4f} ".format( + i + 1, + len(self.test_loader), + batch_time=batch_time, + accuracy=accuracy, + ) + ) + + iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) + accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) + mIoU = np.mean(iou_class) + mAcc = np.mean(accuracy_class) + allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) + logger.info( + "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.".format( + mIoU, mAcc, allAcc + ) + ) + + for i in range(self.cfg.data.num_classes): + logger.info( + "Class_{idx} - {name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( + idx=i, + name=self.cfg.data.names[i], + iou=iou_class[i], + accuracy=accuracy_class[i], + ) + ) + logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + + @staticmethod + def collate_fn(batch): + return collate_fn(batch) + + +@TESTERS.register_module() +class ClsVotingTester(TesterBase): + def __init__( + self, + num_repeat=100, + metric="allAcc", + **kwargs, + ): + super().__init__(**kwargs) + self.num_repeat = num_repeat + self.metric = metric + self.best_idx = 0 + self.best_record = None + self.best_metric = 0 + + def test(self): + for i in range(self.num_repeat): + logger = get_root_logger() + logger.info(f">>>>>>>>>>>>>>>> Start Evaluation {i + 1} >>>>>>>>>>>>>>>>") + record = self.test_once() + if comm.is_main_process(): + if record[self.metric] > self.best_metric: + self.best_record = record + self.best_idx = i + self.best_metric = record[self.metric] + info = f"Current best record is Evaluation {i + 1}: " + for m in self.best_record.keys(): + info += f"{m}: {self.best_record[m]:.4f} " + logger.info(info) + + def test_once(self): + logger = get_root_logger() + batch_time = AverageMeter() + intersection_meter = AverageMeter() + target_meter = AverageMeter() + record = {} + self.model.eval() + + for idx, data_dict in enumerate(self.test_loader): + end = time.time() + data_dict = data_dict[0] # current assume batch size is 1 + voting_list = data_dict.pop("voting_list") + category = data_dict.pop("category") + data_name = data_dict.pop("name") + # pred = torch.zeros([1, self.cfg.data.num_classes]).cuda() + # for i in range(len(voting_list)): + # input_dict = voting_list[i] + # for key in input_dict.keys(): + # if isinstance(input_dict[key], torch.Tensor): + # input_dict[key] = input_dict[key].cuda(non_blocking=True) + # with torch.no_grad(): + # pred += F.softmax(self.model(input_dict)["cls_logits"], -1) + input_dict = collate_fn(voting_list) + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with torch.no_grad(): + pred = F.softmax(self.model(input_dict)["cls_logits"], -1).sum( + 0, keepdim=True + ) + pred = pred.max(1)[1].cpu().numpy() + intersection, union, target = intersection_and_union( + pred, category, self.cfg.data.num_classes, self.cfg.data.ignore_index + ) + intersection_meter.update(intersection) + target_meter.update(target) + record[data_name] = dict(intersection=intersection, target=target) + acc = sum(intersection) / (sum(target) + 1e-10) + m_acc = np.mean(intersection_meter.sum / (target_meter.sum + 1e-10)) + batch_time.update(time.time() - end) + logger.info( + "Test: {} [{}/{}] " + "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " + "Accuracy {acc:.4f} ({m_acc:.4f}) ".format( + data_name, + idx + 1, + len(self.test_loader), + batch_time=batch_time, + acc=acc, + m_acc=m_acc, + ) + ) + + logger.info("Syncing ...") + comm.synchronize() + record_sync = comm.gather(record, dst=0) + + if comm.is_main_process(): + record = {} + for _ in range(len(record_sync)): + r = record_sync.pop() + record.update(r) + del r + intersection = np.sum( + [meters["intersection"] for _, meters in record.items()], axis=0 + ) + target = np.sum([meters["target"] for _, meters in record.items()], axis=0) + accuracy_class = intersection / (target + 1e-10) + mAcc = np.mean(accuracy_class) + allAcc = sum(intersection) / (sum(target) + 1e-10) + + logger.info("Val result: mAcc/allAcc {:.4f}/{:.4f}".format(mAcc, allAcc)) + for i in range(self.cfg.data.num_classes): + logger.info( + "Class_{idx} - {name} Result: iou/accuracy {accuracy:.4f}".format( + idx=i, + name=self.cfg.data.names[i], + accuracy=accuracy_class[i], + ) + ) + return dict(mAcc=mAcc, allAcc=allAcc) + + @staticmethod + def collate_fn(batch): + return batch + + +@TESTERS.register_module() +class PartSegTester(TesterBase): + def test(self): + test_dataset = self.test_loader.dataset + logger = get_root_logger() + logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + + batch_time = AverageMeter() + + num_categories = len(self.test_loader.dataset.categories) + iou_category, iou_count = np.zeros(num_categories), np.zeros(num_categories) + self.model.eval() + + save_path = os.path.join( + self.cfg.save_path, "result", "test_epoch{}".format(self.cfg.test_epoch) + ) + make_dirs(save_path) + + for idx in range(len(test_dataset)): + end = time.time() + data_name = test_dataset.get_data_name(idx) + + data_dict_list, label = test_dataset[idx] + pred = torch.zeros((label.size, self.cfg.data.num_classes)).cuda() + batch_num = int(np.ceil(len(data_dict_list) / self.cfg.batch_size_test)) + for i in range(batch_num): + s_i, e_i = i * self.cfg.batch_size_test, min( + (i + 1) * self.cfg.batch_size_test, len(data_dict_list) + ) + input_dict = collate_fn(data_dict_list[s_i:e_i]) + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with torch.no_grad(): + pred_part = self.model(input_dict)["cls_logits"] + pred_part = F.softmax(pred_part, -1) + if self.cfg.empty_cache: + torch.cuda.empty_cache() + pred_part = pred_part.reshape(-1, label.size, self.cfg.data.num_classes) + pred = pred + pred_part.total(dim=0) + logger.info( + "Test: {} {}/{}, Batch: {batch_idx}/{batch_num}".format( + data_name, + idx + 1, + len(test_dataset), + batch_idx=i, + batch_num=batch_num, + ) + ) + pred = pred.max(1)[1].data.cpu().numpy() + + category_index = data_dict_list[0]["cls_token"] + category = self.test_loader.dataset.categories[category_index] + parts_idx = self.test_loader.dataset.category2part[category] + parts_iou = np.zeros(len(parts_idx)) + for j, part in enumerate(parts_idx): + if (np.sum(label == part) == 0) and (np.sum(pred == part) == 0): + parts_iou[j] = 1.0 + else: + i = (label == part) & (pred == part) + u = (label == part) | (pred == part) + parts_iou[j] = np.sum(i) / (np.sum(u) + 1e-10) + iou_category[category_index] += parts_iou.mean() + iou_count[category_index] += 1 + + batch_time.update(time.time() - end) + logger.info( + "Test: {} [{}/{}] " + "Batch {batch_time.val:.3f} " + "({batch_time.avg:.3f}) ".format( + data_name, idx + 1, len(self.test_loader), batch_time=batch_time + ) + ) + + ins_mIoU = iou_category.sum() / (iou_count.sum() + 1e-10) + cat_mIoU = (iou_category / (iou_count + 1e-10)).mean() + logger.info( + "Val result: ins.mIoU/cat.mIoU {:.4f}/{:.4f}.".format(ins_mIoU, cat_mIoU) + ) + for i in range(num_categories): + logger.info( + "Class_{idx}-{name} Result: iou_cat/num_sample {iou_cat:.4f}/{iou_count:.4f}".format( + idx=i, + name=self.test_loader.dataset.categories[i], + iou_cat=iou_category[i] / (iou_count[i] + 1e-10), + iou_count=int(iou_count[i]), + ) + ) + logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + + @staticmethod + def collate_fn(batch): + return collate_fn(batch) + + diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/train.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/train.py new file mode 100644 index 0000000..fea77cd --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/engines/train.py @@ -0,0 +1,372 @@ +""" +Trainer + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import sys +import weakref +import wandb +import torch +import torch.nn as nn +import torch.utils.data +from packaging import version +from functools import partial +from pathlib import Path + +if sys.version_info >= (3, 10): + from collections.abc import Iterator +else: + from collections import Iterator +from tensorboardX import SummaryWriter + +from .defaults import create_ddp_model, worker_init_fn +from .hooks import HookBase, build_hooks +import pointcept.utils.comm as comm +from pointcept.datasets import build_dataset, point_collate_fn, collate_fn +from pointcept.models import build_model +from pointcept.utils.logger import get_root_logger +from pointcept.utils.optimizer import build_optimizer +from pointcept.utils.scheduler import build_scheduler +from pointcept.utils.events import EventStorage, ExceptionWriter +from pointcept.utils.registry import Registry + + +TRAINERS = Registry("trainers") +AMP_DTYPE = dict( + float16=torch.float16, + bfloat16=torch.bfloat16, +) + + +class TrainerBase: + def __init__(self) -> None: + self.hooks = [] + self.model = None + self.epoch = 0 + self.start_epoch = 0 + self.max_epoch = 0 + self.max_iter = 0 + self.comm_info = dict() + self.data_iterator: Iterator = enumerate([]) + self.storage: EventStorage + self.writer: SummaryWriter + + def register_hooks(self, hooks) -> None: + hooks = build_hooks(hooks) + for h in hooks: + assert isinstance(h, HookBase) + # To avoid circular reference, hooks and trainer cannot own each other. + # This normally does not matter, but will cause memory leak if the + # involved objects contain __del__: + # See http://engineering.hearsaysocial.com/2013/06/16/circular-references-in-python/ + h.trainer = weakref.proxy(self) + self.hooks.extend(hooks) + + def train(self): + with EventStorage() as self.storage: + # => before train + self.before_train() + for self.epoch in range(self.start_epoch, self.max_epoch): + # => before epoch + self.before_epoch() + # => run_epoch + for ( + self.comm_info["iter"], + self.comm_info["input_dict"], + ) in self.data_iterator: + # => before_step + self.before_step() + # => run_step + self.run_step() + # => after_step + self.after_step() + # => after epoch + self.after_epoch() + # => after train + self.after_train() + + def before_train(self): + for h in self.hooks: + h.before_train() + + def before_epoch(self): + for h in self.hooks: + h.before_epoch() + + def before_step(self): + for h in self.hooks: + h.before_step() + + def run_step(self): + raise NotImplementedError + + def after_step(self): + for h in self.hooks: + h.after_step() + + def after_epoch(self): + for h in self.hooks: + h.after_epoch() + self.storage.reset_histories() + + def after_train(self): + # Sync GPU before running train hooks + comm.synchronize() + for h in self.hooks: + h.after_train() + if comm.is_main_process(): + self.writer.close() + + +@TRAINERS.register_module("DefaultTrainer") +class Trainer(TrainerBase): + def __init__(self, cfg): + super(Trainer, self).__init__() + self.epoch = 0 + self.start_epoch = 0 + self.max_epoch = cfg.eval_epoch + self.best_metric_value = -torch.inf + self.logger = get_root_logger( + log_file=os.path.join(cfg.save_path, "train.log"), + file_mode="a" if cfg.resume else "w", + ) + self.logger.info("=> Loading config ...") + self.cfg = cfg + self.logger.info(f"Save path: {cfg.save_path}") + self.logger.info(f"Config:\n{cfg.pretty_text}") + self.logger.info("=> Building model ...") + self.model = self.build_model() + self.logger.info("=> Building writer ...") + self.writer = self.build_writer() + self.logger.info("=> Building train dataset & dataloader ...") + self.train_loader = self.build_train_loader() + self.logger.info("=> Building val dataset & dataloader ...") + self.val_loader = self.build_val_loader() + self.logger.info("=> Building optimize, scheduler, scaler(amp) ...") + self.optimizer = self.build_optimizer() + self.scheduler = self.build_scheduler() + self.scaler = self.build_scaler() + self.logger.info("=> Building hooks ...") + self.register_hooks(self.cfg.hooks) + self._gradient_accumulation_counter = 0 + + def train(self): + with EventStorage() as self.storage, ExceptionWriter(): + # => before train + self.before_train() + self.logger.info(">>>>>>>>>>>>>>>> Start Training >>>>>>>>>>>>>>>>") + for self.epoch in range(self.start_epoch, self.max_epoch): + # => before epoch + if comm.get_world_size() > 1: + self.train_loader.sampler.set_epoch(self.epoch) + self.model.train() + self.data_iterator = enumerate(self.train_loader) + self.before_epoch() + # => run_epoch + for ( + self.comm_info["iter"], + self.comm_info["input_dict"], + ) in self.data_iterator: + # => before_step + self.before_step() + # => run_step + self.run_step() + # => after_step + self.after_step() + # => after epoch + self.after_epoch() + # => after train + self.after_train() + + def run_step(self): + if version.parse(torch.__version__) >= version.parse("2.4"): + auto_cast = partial(torch.amp.autocast, device_type="cuda") + else: + # deprecated warning + auto_cast = torch.cuda.amp.autocast + + input_dict = self.comm_info["input_dict"] + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + + # Only clear gradients on first accumulation step + if self._gradient_accumulation_counter == 0: + self.optimizer.zero_grad() + + # Forward pass + with auto_cast( + enabled=self.cfg.enable_amp, dtype=AMP_DTYPE[self.cfg.amp_dtype] + ): + output_dict = self.model(input_dict) + loss = ( + output_dict["loss"] / self.cfg.gradient_accumulation_steps + ) # scale loss + + # Backward pass + if self.cfg.enable_amp: + self.scaler.scale(loss).backward() + else: + loss.backward() + self._gradient_accumulation_counter += 1 + + # Perform optimizer step only when enough gradients have accumulated + if self._gradient_accumulation_counter >= self.cfg.gradient_accumulation_steps: + if self.cfg.enable_amp: + self.scaler.unscale_(self.optimizer) + if self.cfg.clip_grad is not None: + torch.nn.utils.clip_grad_norm_( + self.model.parameters(), self.cfg.clip_grad + ) + self.scaler.step(self.optimizer) + + # When enable amp, optimizer.step call are skipped if the loss scaling factor is too large. + # Fix torch warning scheduler step before optimizer step. + scale = self.scaler.get_scale() + self.scaler.update() + if scale <= self.scaler.get_scale(): + self.scheduler.step() + else: + if self.cfg.clip_grad is not None: + torch.nn.utils.clip_grad_norm_( + self.model.parameters(), self.cfg.clip_grad + ) + self.optimizer.step() + self.scheduler.step() + + # Reset grad accumulation counter + self._gradient_accumulation_counter = 0 + + if self.cfg.empty_cache: + torch.cuda.empty_cache() + self.comm_info["model_output_dict"] = output_dict + + def after_epoch(self): + for h in self.hooks: + h.after_epoch() + self.storage.reset_histories() + if self.cfg.empty_cache_per_epoch: + torch.cuda.empty_cache() + + def build_model(self): + model = build_model(self.cfg.model) + if self.cfg.sync_bn: + model = nn.SyncBatchNorm.convert_sync_batchnorm(model) + n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) + # logger.info(f"Model: \n{self.model}") + self.logger.info(f"Num params: {n_parameters}") + model = create_ddp_model( + model.cuda(), + broadcast_buffers=False, + find_unused_parameters=self.cfg.find_unused_parameters, + ) + return model + + def build_writer(self): + writer = SummaryWriter(self.cfg.save_path) if comm.is_main_process() else None + self.logger.info(f"Tensorboard writer logging dir: {self.cfg.save_path}") + if self.cfg.enable_wandb and comm.is_main_process(): + tag, name = Path(self.cfg.save_path).parts[-2:] + wandb.init( + project=self.cfg.wandb_project, + name=f"{tag}/{name}", + tags=[tag], + dir=self.cfg.save_path, + settings=wandb.Settings(api_key=self.cfg.wandb_key), + config=self.cfg, + ) + return writer + + def build_train_loader(self): + train_data = build_dataset(self.cfg.data.train) + + if comm.get_world_size() > 1: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_data) + else: + train_sampler = None + + init_fn = ( + partial( + worker_init_fn, + num_workers=self.cfg.num_worker_per_gpu, + rank=comm.get_rank(), + seed=self.cfg.seed, + ) + if self.cfg.seed is not None + else None + ) + + train_loader = torch.utils.data.DataLoader( + train_data, + batch_size=self.cfg.batch_size_per_gpu, + shuffle=(train_sampler is None), + num_workers=self.cfg.num_worker_per_gpu, + sampler=train_sampler, + collate_fn=partial(point_collate_fn, mix_prob=self.cfg.mix_prob), + pin_memory=True, + worker_init_fn=init_fn, + drop_last=len(train_data) > self.cfg.batch_size, + persistent_workers=True, + ) + return train_loader + + def build_val_loader(self): + val_loader = None + if self.cfg.evaluate: + val_data = build_dataset(self.cfg.data.val) + if comm.get_world_size() > 1: + val_sampler = torch.utils.data.distributed.DistributedSampler(val_data) + else: + val_sampler = None + val_loader = torch.utils.data.DataLoader( + val_data, + batch_size=self.cfg.batch_size_val_per_gpu, + shuffle=False, + num_workers=self.cfg.num_worker_per_gpu, + pin_memory=True, + sampler=val_sampler, + collate_fn=collate_fn, + ) + return val_loader + + def build_optimizer(self): + return build_optimizer(self.cfg.optimizer, self.model, self.cfg.param_dicts) + + def build_scheduler(self): + assert hasattr(self, "optimizer") + assert hasattr(self, "train_loader") + self.cfg.scheduler.total_steps = ( + len(self.train_loader) + * self.cfg.eval_epoch + // self.cfg.gradient_accumulation_steps + ) + return build_scheduler(self.cfg.scheduler, self.optimizer) + + def build_scaler(self): + if version.parse(torch.__version__) >= version.parse("2.4"): + grad_scaler = partial(torch.amp.GradScaler, device="cuda") + else: + # deprecated warning + grad_scaler = torch.cuda.amp.GradScaler + scaler = grad_scaler() if self.cfg.enable_amp else None + return scaler + + +@TRAINERS.register_module("MultiDatasetTrainer") +class MultiDatasetTrainer(Trainer): + def build_train_loader(self): + from pointcept.datasets import MultiDatasetDataloader + + train_data = build_dataset(self.cfg.data.train) + train_loader = MultiDatasetDataloader( + train_data, + self.cfg.batch_size_per_gpu, + self.cfg.num_worker_per_gpu, + self.cfg.mix_prob, + self.cfg.seed, + ) + self.comm_info["iter_per_epoch"] = len(train_loader) + return train_loader diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/models/__init__.py new file mode 100644 index 0000000..1a1a1a4 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/__init__.py @@ -0,0 +1,10 @@ +from .builder import build_model +from .default import DefaultSegmentor, DefaultClassifier +from .modules import PointModule, PointModel + +# Backbones +from .point_transformer_v3 import * + +# Semantic Segmentation +from .context_aware_classifier import * + diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/builder.py b/point_transformer_v3/pointcept_minimal/pointcept/models/builder.py new file mode 100644 index 0000000..8c723d7 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/builder.py @@ -0,0 +1,17 @@ +""" +Model Builder + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import copy +from pointcept.utils.registry import Registry + +MODELS = Registry("models") +MODULES = Registry("modules") + + +def build_model(cfg): + """Build models.""" + return MODELS.build(copy.deepcopy(cfg)) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/default.py b/point_transformer_v3/pointcept_minimal/pointcept/models/default.py new file mode 100644 index 0000000..da934e5 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/default.py @@ -0,0 +1,230 @@ +import torch +import torch.nn as nn +import torch_scatter +import torch_cluster +from collections import OrderedDict + +from pointcept.models.losses import build_criteria +from pointcept.models.utils.structure import Point +from pointcept.models.utils import offset2batch +from .builder import MODELS, build_model + + +@MODELS.register_module() +class DefaultSegmentor(nn.Module): + def __init__(self, backbone=None, criteria=None): + super().__init__() + self.backbone = build_model(backbone) + self.criteria = build_criteria(criteria) + + def forward(self, input_dict): + if "condition" in input_dict.keys(): + # PPT (https://arxiv.org/abs/2308.09718) + # currently, only support one batch one condition + input_dict["condition"] = input_dict["condition"][0] + seg_logits = self.backbone(input_dict) + # train + if self.training: + loss = self.criteria(seg_logits, input_dict["segment"]) + return dict(loss=loss) + # eval + elif "segment" in input_dict.keys(): + loss = self.criteria(seg_logits, input_dict["segment"]) + return dict(loss=loss, seg_logits=seg_logits) + # test + else: + return dict(seg_logits=seg_logits) + + +@MODELS.register_module() +class DefaultSegmentorV2(nn.Module): + def __init__( + self, + num_classes, + backbone_out_channels, + backbone=None, + criteria=None, + freeze_backbone=False, + ): + super().__init__() + self.seg_head = ( + nn.Linear(backbone_out_channels, num_classes) + if num_classes > 0 + else nn.Identity() + ) + self.backbone = build_model(backbone) + self.criteria = build_criteria(criteria) + self.freeze_backbone = freeze_backbone + if self.freeze_backbone: + for p in self.backbone.parameters(): + p.requires_grad = False + + def forward(self, input_dict, return_point=False): + point = Point(input_dict) + point = self.backbone(point) + # Backbone added after v1.5.0 return Point instead of feat and use DefaultSegmentorV2 + # TODO: remove this part after make all backbone return Point only. + if isinstance(point, Point): + while "pooling_parent" in point.keys(): + assert "pooling_inverse" in point.keys() + parent = point.pop("pooling_parent") + inverse = point.pop("pooling_inverse") + parent.feat = torch.cat([parent.feat, point.feat[inverse]], dim=-1) + point = parent + feat = point.feat + else: + feat = point + seg_logits = self.seg_head(feat) + return_dict = dict() + if return_point: + # PCA evaluator parse feat and coord in point + return_dict["point"] = point + # train + if self.training: + loss = self.criteria(seg_logits, input_dict["segment"]) + return_dict["loss"] = loss + # eval + elif "segment" in input_dict.keys(): + loss = self.criteria(seg_logits, input_dict["segment"]) + return_dict["loss"] = loss + return_dict["seg_logits"] = seg_logits + # test + else: + return_dict["seg_logits"] = seg_logits + return return_dict + + + +@MODELS.register_module() +class DINOEnhancedSegmentor(nn.Module): + def __init__( + self, + num_classes, + backbone_out_channels, + backbone=None, + criteria=None, + freeze_backbone=False, + ): + super().__init__() + self.seg_head = ( + nn.Linear(backbone_out_channels, num_classes) + if num_classes > 0 + else nn.Identity() + ) + self.backbone = build_model(backbone) if backbone is not None else None + self.criteria = build_criteria(criteria) + self.freeze_backbone = freeze_backbone + if self.backbone is not None and self.freeze_backbone: + for p in self.backbone.parameters(): + p.requires_grad = False + + def forward(self, input_dict, return_point=False): + point = Point(input_dict) + if self.backbone is not None: + if self.freeze_backbone: + with torch.no_grad(): + point = self.backbone(point) + else: + point = self.backbone(point) + point_list = [point] + while "unpooling_parent" in point_list[-1].keys(): + point_list.append(point_list[-1].pop("unpooling_parent")) + for i in reversed(range(1, len(point_list))): + point = point_list[i] + parent = point_list[i - 1] + assert "pooling_inverse" in point.keys() + inverse = point.pooling_inverse + parent.feat = torch.cat([parent.feat, point.feat[inverse]], dim=-1) + point = point_list[0] + while "pooling_parent" in point.keys(): + assert "pooling_inverse" in point.keys() + parent = point.pop("pooling_parent") + inverse = point.pooling_inverse + parent.feat = torch.cat([parent.feat, point.feat[inverse]], dim=-1) + point = parent + feat = [point.feat] + else: + feat = [] + dino_coord = input_dict["dino_coord"] + dino_feat = input_dict["dino_feat"] + dino_offset = input_dict["dino_offset"] + idx = torch_cluster.knn( + x=dino_coord, + y=point.origin_coord, + batch_x=offset2batch(dino_offset), + batch_y=offset2batch(point.origin_offset), + k=1, + )[1] + + feat.append(dino_feat[idx]) + feat = torch.concatenate(feat, dim=-1) + seg_logits = self.seg_head(feat) + return_dict = dict() + if return_point: + # PCA evaluator parse feat and coord in point + return_dict["point"] = point + # train + if self.training: + loss = self.criteria(seg_logits, input_dict["segment"]) + return_dict["loss"] = loss + # eval + elif "segment" in input_dict.keys(): + loss = self.criteria(seg_logits, input_dict["segment"]) + return_dict["loss"] = loss + return_dict["seg_logits"] = seg_logits + # test + else: + return_dict["seg_logits"] = seg_logits + return return_dict + + +@MODELS.register_module() +class DefaultClassifier(nn.Module): + def __init__( + self, + backbone=None, + criteria=None, + num_classes=40, + backbone_embed_dim=256, + ): + super().__init__() + self.backbone = build_model(backbone) + self.criteria = build_criteria(criteria) + self.num_classes = num_classes + self.backbone_embed_dim = backbone_embed_dim + self.cls_head = nn.Sequential( + nn.Linear(backbone_embed_dim, 256), + nn.BatchNorm1d(256), + nn.ReLU(inplace=True), + nn.Dropout(p=0.5), + nn.Linear(256, 128), + nn.BatchNorm1d(128), + nn.ReLU(inplace=True), + nn.Dropout(p=0.5), + nn.Linear(128, num_classes), + ) + + def forward(self, input_dict): + point = Point(input_dict) + point = self.backbone(point) + # Backbone added after v1.5.0 return Point instead of feat + # And after v1.5.0 feature aggregation for classification operated in classifier + # TODO: remove this part after make all backbone return Point only. + if isinstance(point, Point): + point.feat = torch_scatter.segment_csr( + src=point.feat, + indptr=nn.functional.pad(point.offset, (1, 0)), + reduce="mean", + ) + feat = point.feat + else: + feat = point + cls_logits = self.cls_head(feat) + if self.training: + loss = self.criteria(cls_logits, input_dict["category"]) + return dict(loss=loss) + elif "category" in input_dict.keys(): + loss = self.criteria(cls_logits, input_dict["category"]) + return dict(loss=loss, cls_logits=cls_logits) + else: + return dict(cls_logits=cls_logits) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/losses/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/models/losses/__init__.py new file mode 100644 index 0000000..0f4f29c --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/losses/__init__.py @@ -0,0 +1,4 @@ +from .builder import build_criteria, LOSSES + +from .misc import CrossEntropyLoss, SmoothCELoss, DiceLoss, FocalLoss, BinaryFocalLoss +from .lovasz import LovaszLoss diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/losses/builder.py b/point_transformer_v3/pointcept_minimal/pointcept/models/losses/builder.py new file mode 100644 index 0000000..ef642d9 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/losses/builder.py @@ -0,0 +1,31 @@ +""" +Criteria Builder + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.utils.registry import Registry + +LOSSES = Registry("losses") + + +class Criteria(object): + def __init__(self, cfg=None): + self.cfg = cfg if cfg is not None else [] + self.criteria = [] + for loss_cfg in self.cfg: + self.criteria.append(LOSSES.build(cfg=loss_cfg)) + + def __call__(self, pred, target): + if len(self.criteria) == 0: + # loss computation occur in model + return pred + loss = 0 + for c in self.criteria: + loss += c(pred, target) + return loss + + +def build_criteria(cfg): + return Criteria(cfg) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/losses/lovasz.py b/point_transformer_v3/pointcept_minimal/pointcept/models/losses/lovasz.py new file mode 100644 index 0000000..690c2ba --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/losses/lovasz.py @@ -0,0 +1,257 @@ +""" +Lovasz Loss +refer https://arxiv.org/abs/1705.08790 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from typing import Optional +from itertools import filterfalse +import torch +import torch.nn.functional as F +from torch.nn.modules.loss import _Loss + +from .builder import LOSSES + +BINARY_MODE: str = "binary" +MULTICLASS_MODE: str = "multiclass" +MULTILABEL_MODE: str = "multilabel" + + +def _lovasz_grad(gt_sorted): + """Compute gradient of the Lovasz extension w.r.t sorted errors + See Alg. 1 in paper + """ + p = len(gt_sorted) + gts = gt_sorted.sum() + intersection = gts - gt_sorted.float().cumsum(0) + union = gts + (1 - gt_sorted).float().cumsum(0) + jaccard = 1.0 - intersection / union + if p > 1: # cover 1-pixel case + jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] + return jaccard + + +def _lovasz_hinge(logits, labels, per_image=True, ignore=None): + """ + Binary Lovasz hinge loss + logits: [B, H, W] Logits at each pixel (between -infinity and +infinity) + labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) + per_image: compute the loss per image instead of per batch + ignore: void class id + """ + if per_image: + loss = mean( + _lovasz_hinge_flat( + *_flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore) + ) + for log, lab in zip(logits, labels) + ) + else: + loss = _lovasz_hinge_flat(*_flatten_binary_scores(logits, labels, ignore)) + return loss + + +def _lovasz_hinge_flat(logits, labels): + """Binary Lovasz hinge loss + Args: + logits: [P] Logits at each prediction (between -infinity and +infinity) + labels: [P] Tensor, binary ground truth labels (0 or 1) + """ + if len(labels) == 0: + # only void pixels, the gradients should be 0 + return logits.sum() * 0.0 + signs = 2.0 * labels.float() - 1.0 + errors = 1.0 - logits * signs + errors_sorted, perm = torch.sort(errors, dim=0, descending=True) + perm = perm.data + gt_sorted = labels[perm] + grad = _lovasz_grad(gt_sorted) + loss = torch.dot(F.relu(errors_sorted), grad) + return loss + + +def _flatten_binary_scores(scores, labels, ignore=None): + """Flattens predictions in the batch (binary case) + Remove labels equal to 'ignore' + """ + scores = scores.view(-1) + labels = labels.view(-1) + if ignore is None: + return scores, labels + valid = labels != ignore + vscores = scores[valid] + vlabels = labels[valid] + return vscores, vlabels + + +def _lovasz_softmax( + probas, labels, classes="present", class_seen=None, per_image=False, ignore=None +): + """Multi-class Lovasz-Softmax loss + Args: + @param probas: [B, C, H, W] Class probabilities at each prediction (between 0 and 1). + Interpreted as binary (sigmoid) output with outputs of size [B, H, W]. + @param labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) + @param classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. + @param per_image: compute the loss per image instead of per batch + @param ignore: void class labels + """ + if per_image: + loss = mean( + _lovasz_softmax_flat( + *_flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), + classes=classes + ) + for prob, lab in zip(probas, labels) + ) + else: + loss = _lovasz_softmax_flat( + *_flatten_probas(probas, labels, ignore), + classes=classes, + class_seen=class_seen + ) + return loss + + +def _lovasz_softmax_flat(probas, labels, classes="present", class_seen=None): + """Multi-class Lovasz-Softmax loss + Args: + @param probas: [P, C] Class probabilities at each prediction (between 0 and 1) + @param labels: [P] Tensor, ground truth labels (between 0 and C - 1) + @param classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. + """ + if probas.numel() == 0: + # only void pixels, the gradients should be 0 + return probas * 0.0 + C = probas.size(1) + losses = [] + class_to_sum = list(range(C)) if classes in ["all", "present"] else classes + # for c in class_to_sum: + for c in labels.unique(): + if class_seen is None: + fg = (labels == c).type_as(probas) # foreground for class c + if classes == "present" and fg.sum() == 0: + continue + if C == 1: + if len(classes) > 1: + raise ValueError("Sigmoid output possible only with 1 class") + class_pred = probas[:, 0] + else: + class_pred = probas[:, c] + errors = (fg - class_pred).abs() + errors_sorted, perm = torch.sort(errors, 0, descending=True) + perm = perm.data + fg_sorted = fg[perm] + losses.append(torch.dot(errors_sorted, _lovasz_grad(fg_sorted))) + else: + if c in class_seen: + fg = (labels == c).type_as(probas) # foreground for class c + if classes == "present" and fg.sum() == 0: + continue + if C == 1: + if len(classes) > 1: + raise ValueError("Sigmoid output possible only with 1 class") + class_pred = probas[:, 0] + else: + class_pred = probas[:, c] + errors = (fg - class_pred).abs() + errors_sorted, perm = torch.sort(errors, 0, descending=True) + perm = perm.data + fg_sorted = fg[perm] + losses.append(torch.dot(errors_sorted, _lovasz_grad(fg_sorted))) + return mean(losses) + + +def _flatten_probas(probas, labels, ignore=None): + """Flattens predictions in the batch""" + if probas.dim() == 3: + # assumes output of a sigmoid layer + B, H, W = probas.size() + probas = probas.view(B, 1, H, W) + + C = probas.size(1) + probas = torch.movedim(probas, 1, -1) # [B, C, Di, Dj, ...] -> [B, Di, Dj, ..., C] + probas = probas.contiguous().view(-1, C) # [P, C] + + labels = labels.view(-1) + if ignore is None: + return probas, labels + valid = labels != ignore + vprobas = probas[valid] + vlabels = labels[valid] + return vprobas, vlabels + + +def isnan(x): + return x != x + + +def mean(values, ignore_nan=False, empty=0): + """Nan-mean compatible with generators.""" + values = iter(values) + if ignore_nan: + values = filterfalse(isnan, values) + try: + n = 1 + acc = next(values) + except StopIteration: + if empty == "raise": + raise ValueError("Empty mean") + return empty + for n, v in enumerate(values, 2): + acc += v + if n == 1: + return acc + return acc / n + + +@LOSSES.register_module() +class LovaszLoss(_Loss): + def __init__( + self, + mode: str, + class_seen: Optional[int] = None, + per_image: bool = False, + ignore_index: Optional[int] = None, + loss_weight: float = 1.0, + ): + """Lovasz loss for segmentation task. + It supports binary, multiclass and multilabel cases + Args: + mode: Loss mode 'binary', 'multiclass' or 'multilabel' + ignore_index: Label that indicates ignored pixels (does not contribute to loss) + per_image: If True loss computed per each image and then averaged, else computed per whole batch + Shape + - **y_pred** - torch.Tensor of shape (N, C, H, W) + - **y_true** - torch.Tensor of shape (N, H, W) or (N, C, H, W) + Reference + https://github.com/BloodAxe/pytorch-toolbelt + """ + assert mode in {BINARY_MODE, MULTILABEL_MODE, MULTICLASS_MODE} + super().__init__() + + self.mode = mode + self.ignore_index = ignore_index + self.per_image = per_image + self.class_seen = class_seen + self.loss_weight = loss_weight + + def forward(self, y_pred, y_true): + if self.mode in {BINARY_MODE, MULTILABEL_MODE}: + loss = _lovasz_hinge( + y_pred, y_true, per_image=self.per_image, ignore=self.ignore_index + ) + elif self.mode == MULTICLASS_MODE: + y_pred = y_pred.softmax(dim=1) + loss = _lovasz_softmax( + y_pred, + y_true, + class_seen=self.class_seen, + per_image=self.per_image, + ignore=self.ignore_index, + ) + else: + raise ValueError("Wrong mode {}.".format(self.mode)) + return loss * self.loss_weight diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/losses/misc.py b/point_transformer_v3/pointcept_minimal/pointcept/models/losses/misc.py new file mode 100644 index 0000000..ec300a5 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/losses/misc.py @@ -0,0 +1,223 @@ +""" +Misc Losses + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F +from .builder import LOSSES + + +@LOSSES.register_module() +class CrossEntropyLoss(nn.Module): + def __init__( + self, + weight=None, + size_average=None, + reduce=None, + reduction="mean", + label_smoothing=0.0, + loss_weight=1.0, + ignore_index=-1, + ): + super(CrossEntropyLoss, self).__init__() + weight = torch.tensor(weight).cuda() if weight is not None else None + self.loss_weight = loss_weight + self.loss = nn.CrossEntropyLoss( + weight=weight, + size_average=size_average, + ignore_index=ignore_index, + reduce=reduce, + reduction=reduction, + label_smoothing=label_smoothing, + ) + + def forward(self, pred, target): + return self.loss(pred, target) * self.loss_weight + + +@LOSSES.register_module() +class SmoothCELoss(nn.Module): + def __init__(self, smoothing_ratio=0.1): + super(SmoothCELoss, self).__init__() + self.smoothing_ratio = smoothing_ratio + + def forward(self, pred, target): + eps = self.smoothing_ratio + n_class = pred.size(1) + one_hot = torch.zeros_like(pred).scatter(1, target.view(-1, 1), 1) + one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1) + log_prb = F.log_softmax(pred, dim=1) + loss = -(one_hot * log_prb).total(dim=1) + loss = loss[torch.isfinite(loss)].mean() + return loss + + +@LOSSES.register_module() +class BinaryFocalLoss(nn.Module): + def __init__(self, gamma=2.0, alpha=0.5, logits=True, reduce=True, loss_weight=1.0): + """Binary Focal Loss + ` + """ + super(BinaryFocalLoss, self).__init__() + assert 0 < alpha < 1 + self.gamma = gamma + self.alpha = alpha + self.logits = logits + self.reduce = reduce + self.loss_weight = loss_weight + + def forward(self, pred, target, **kwargs): + """Forward function. + Args: + pred (torch.Tensor): The prediction with shape (N) + target (torch.Tensor): The ground truth. If containing class + indices, shape (N) where each value is 0≀targets[i]≀1, If containing class probabilities, + same shape as the input. + Returns: + torch.Tensor: The calculated loss + """ + if self.logits: + bce = F.binary_cross_entropy_with_logits(pred, target, reduction="none") + else: + bce = F.binary_cross_entropy(pred, target, reduction="none") + pt = torch.exp(-bce) + alpha = self.alpha * target + (1 - self.alpha) * (1 - target) + focal_loss = alpha * (1 - pt) ** self.gamma * bce + + if self.reduce: + focal_loss = torch.mean(focal_loss) + return focal_loss * self.loss_weight + + +@LOSSES.register_module() +class FocalLoss(nn.Module): + def __init__( + self, gamma=2.0, alpha=0.5, reduction="mean", loss_weight=1.0, ignore_index=-1 + ): + """Focal Loss + ` + """ + super(FocalLoss, self).__init__() + assert reduction in ( + "mean", + "sum", + ), "AssertionError: reduction should be 'mean' or 'sum'" + assert isinstance( + alpha, (float, list) + ), "AssertionError: alpha should be of type float" + assert isinstance(gamma, float), "AssertionError: gamma should be of type float" + assert isinstance( + loss_weight, float + ), "AssertionError: loss_weight should be of type float" + assert isinstance(ignore_index, int), "ignore_index must be of type int" + self.gamma = gamma + self.alpha = alpha + self.reduction = reduction + self.loss_weight = loss_weight + self.ignore_index = ignore_index + + def forward(self, pred, target, **kwargs): + """Forward function. + Args: + pred (torch.Tensor): The prediction with shape (N, C) where C = number of classes. + target (torch.Tensor): The ground truth. If containing class + indices, shape (N) where each value is 0≀targets[i]≀Cβˆ’1, If containing class probabilities, + same shape as the input. + Returns: + torch.Tensor: The calculated loss + """ + # [B, C, d_1, d_2, ..., d_k] -> [C, B, d_1, d_2, ..., d_k] + pred = pred.transpose(0, 1) + # [C, B, d_1, d_2, ..., d_k] -> [C, N] + pred = pred.reshape(pred.size(0), -1) + # [C, N] -> [N, C] + pred = pred.transpose(0, 1).contiguous() + # (B, d_1, d_2, ..., d_k) --> (B * d_1 * d_2 * ... * d_k,) + target = target.view(-1).contiguous() + assert pred.size(0) == target.size( + 0 + ), "The shape of pred doesn't match the shape of target" + valid_mask = target != self.ignore_index + target = target[valid_mask] + pred = pred[valid_mask] + + if len(target) == 0: + return 0.0 + + num_classes = pred.size(1) + target = F.one_hot(target, num_classes=num_classes) + + alpha = self.alpha + if isinstance(alpha, list): + alpha = pred.new_tensor(alpha) + pred_sigmoid = pred.sigmoid() + target = target.type_as(pred) + one_minus_pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) + focal_weight = (alpha * target + (1 - alpha) * (1 - target)) * one_minus_pt.pow( + self.gamma + ) + + loss = ( + F.binary_cross_entropy_with_logits(pred, target, reduction="none") + * focal_weight + ) + if self.reduction == "mean": + loss = loss.mean() + elif self.reduction == "sum": + loss = loss.total() + return self.loss_weight * loss + + +@LOSSES.register_module() +class DiceLoss(nn.Module): + def __init__(self, smooth=1, exponent=2, loss_weight=1.0, ignore_index=-1): + """DiceLoss. + This loss is proposed in `V-Net: Fully Convolutional Neural Networks for + Volumetric Medical Image Segmentation `_. + """ + super(DiceLoss, self).__init__() + self.smooth = smooth + self.exponent = exponent + self.loss_weight = loss_weight + self.ignore_index = ignore_index + + def forward(self, pred, target, **kwargs): + # [B, C, d_1, d_2, ..., d_k] -> [C, B, d_1, d_2, ..., d_k] + pred = pred.transpose(0, 1) + # [C, B, d_1, d_2, ..., d_k] -> [C, N] + pred = pred.reshape(pred.size(0), -1) + # [C, N] -> [N, C] + pred = pred.transpose(0, 1).contiguous() + # (B, d_1, d_2, ..., d_k) --> (B * d_1 * d_2 * ... * d_k,) + target = target.view(-1).contiguous() + assert pred.size(0) == target.size( + 0 + ), "The shape of pred doesn't match the shape of target" + valid_mask = target != self.ignore_index + target = target[valid_mask] + pred = pred[valid_mask] + + pred = F.softmax(pred, dim=1) + num_classes = pred.shape[1] + target = F.one_hot( + torch.clamp(target.long(), 0, num_classes - 1), num_classes=num_classes + ) + + total_loss = 0 + for i in range(num_classes): + if i != self.ignore_index: + num = torch.sum(torch.mul(pred[:, i], target[:, i])) * 2 + self.smooth + den = ( + torch.sum( + pred[:, i].pow(self.exponent) + target[:, i].pow(self.exponent) + ) + + self.smooth + ) + dice_loss = 1 - num / den + total_loss += dice_loss + loss = total_loss / num_classes + return self.loss_weight * loss diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/modules.py b/point_transformer_v3/pointcept_minimal/pointcept/models/modules.py new file mode 100644 index 0000000..0ec8fbd --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/modules.py @@ -0,0 +1,120 @@ +import sys +import torch.nn as nn +import spconv.pytorch as spconv + +try: + import ocnn +except ImportError: + ocnn = None + +from collections import OrderedDict +from pointcept.models.utils.structure import Point +from pointcept.engines.hooks import HookBase + + +def is_ocnn_module(module): + if ocnn is not None: + ocnn_modules = ( + ocnn.nn.OctreeConv, + ocnn.nn.OctreeDeconv, + ocnn.nn.OctreeGroupConv, + ocnn.nn.OctreeDWConv, + ) + return isinstance(module, ocnn_modules) + else: + return False + + +class PointModule(nn.Module): + r"""PointModule + placeholder, all module subclass from this will take Point in PointSequential. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + +class PointSequential(PointModule): + r"""A sequential container. + Modules will be added to it in the order they are passed in the constructor. + Alternatively, an ordered dict of modules can also be passed in. + """ + + def __init__(self, *args, **kwargs): + super().__init__() + if len(args) == 1 and isinstance(args[0], OrderedDict): + for key, module in args[0].items(): + self.add_module(key, module) + else: + for idx, module in enumerate(args): + self.add_module(str(idx), module) + for name, module in kwargs.items(): + if sys.version_info < (3, 6): + raise ValueError("kwargs only supported in py36+") + if name in self._modules: + raise ValueError("name exists.") + self.add_module(name, module) + + def __getitem__(self, idx): + if not (-len(self) <= idx < len(self)): + raise IndexError("index {} is out of range".format(idx)) + if idx < 0: + idx += len(self) + it = iter(self._modules.values()) + for i in range(idx): + next(it) + return next(it) + + def __len__(self): + return len(self._modules) + + def add(self, module, name=None): + if name is None: + name = str(len(self._modules)) + if name in self._modules: + raise KeyError("name exists") + self.add_module(name, module) + + def forward(self, input): + for k, module in self._modules.items(): + # Point module + if isinstance(module, PointModule): + input = module(input) + # Spconv module + elif spconv.modules.is_spconv_module(module): + if isinstance(input, Point): + input.sparse_conv_feat = module(input.sparse_conv_feat) + input.feat = input.sparse_conv_feat.features + else: + input = module(input) + elif is_ocnn_module(module): + if isinstance(input, Point): + input.octree.features[-1] = module( + input.feat[input.octree_order], input.octree, input.octree.depth + ) + input.feat = input.octree.features[-1][input.octree_inverse] + else: + input = module(input) + # PyTorch module + else: + if isinstance(input, Point): + input.feat = module(input.feat) + if "sparse_conv_feat" in input.keys(): + input.sparse_conv_feat = input.sparse_conv_feat.replace_feature( + input.feat + ) + elif isinstance(input, spconv.SparseConvTensor): + if input.indices.shape[0] != 0: + input = input.replace_feature(module(input.features)) + else: + input = module(input) + return input + + +class PointModel(PointModule, HookBase): + r"""PointModel + placeholder, PointModel can be customized as a Pointcept hook. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/__init__.py new file mode 100644 index 0000000..2fd471d --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/__init__.py @@ -0,0 +1,3 @@ +from .point_transformer_v3m1_base import * +from .point_transformer_v3m2_sonata import * +from .point_transformer_v3m1_fvdb import * diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py b/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py new file mode 100644 index 0000000..b4b29e6 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py @@ -0,0 +1,716 @@ +""" +Point Transformer - V3 Mode1 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from functools import partial +from addict import Dict +import math +import torch +import torch.nn as nn +import spconv.pytorch as spconv +import torch_scatter +from timm.layers import DropPath + +try: + import flash_attn +except ImportError: + flash_attn = None + +# from pointcept.models.point_prompt_training import PDNorm +from pointcept.models.builder import MODELS +from pointcept.models.utils.misc import offset2bincount +from pointcept.models.utils.structure import Point +from pointcept.models.modules import PointModule, PointSequential + + +class RPE(torch.nn.Module): + def __init__(self, patch_size, num_heads): + super().__init__() + self.patch_size = patch_size + self.num_heads = num_heads + self.pos_bnd = int((4 * patch_size) ** (1 / 3) * 2) + self.rpe_num = 2 * self.pos_bnd + 1 + self.rpe_table = torch.nn.Parameter(torch.zeros(3 * self.rpe_num, num_heads)) + torch.nn.init.trunc_normal_(self.rpe_table, std=0.02) + + def forward(self, coord): + idx = ( + coord.clamp(-self.pos_bnd, self.pos_bnd) # clamp into bnd + + self.pos_bnd # relative position to positive index + + torch.arange(3, device=coord.device) * self.rpe_num # x, y, z stride + ) + out = self.rpe_table.index_select(0, idx.reshape(-1)) + out = out.view(idx.shape + (-1,)).sum(3) + out = out.permute(0, 3, 1, 2) # (N, K, K, H) -> (N, H, K, K) + return out + + +class SerializedAttention(PointModule): + def __init__( + self, + channels, + num_heads, + patch_size, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + order_index=0, + enable_rpe=False, + enable_flash=True, + upcast_attention=True, + upcast_softmax=True, + ): + super().__init__() + assert channels % num_heads == 0 + self.channels = channels + self.num_heads = num_heads + self.scale = qk_scale or (channels // num_heads) ** -0.5 + self.order_index = order_index + self.upcast_attention = upcast_attention + self.upcast_softmax = upcast_softmax + self.enable_rpe = enable_rpe + self.enable_flash = enable_flash + if enable_flash: + assert ( + enable_rpe is False + ), "Set enable_rpe to False when enable Flash Attention" + assert ( + upcast_attention is False + ), "Set upcast_attention to False when enable Flash Attention" + assert ( + upcast_softmax is False + ), "Set upcast_softmax to False when enable Flash Attention" + assert flash_attn is not None, "Make sure flash_attn is installed." + self.patch_size = patch_size + self.attn_drop = attn_drop + else: + # when disable flash attention, we still don't want to use mask + # consequently, patch size will auto set to the + # min number of patch_size_max and number of points + self.patch_size_max = patch_size + self.patch_size = 0 + self.attn_drop = torch.nn.Dropout(attn_drop) + + self.qkv = torch.nn.Linear(channels, channels * 3, bias=qkv_bias) + self.proj = torch.nn.Linear(channels, channels) + self.proj_drop = torch.nn.Dropout(proj_drop) + self.softmax = torch.nn.Softmax(dim=-1) + self.rpe = RPE(patch_size, num_heads) if self.enable_rpe else None + + @torch.no_grad() + def get_rel_pos(self, point, order): + K = self.patch_size + rel_pos_key = f"rel_pos_{self.order_index}" + if rel_pos_key not in point.keys(): + grid_coord = point.grid_coord[order] + grid_coord = grid_coord.reshape(-1, K, 3) + point[rel_pos_key] = grid_coord.unsqueeze(2) - grid_coord.unsqueeze(1) + return point[rel_pos_key] + + @torch.no_grad() + def get_padding_and_inverse(self, point): + pad_key = "pad" + unpad_key = "unpad" + cu_seqlens_key = "cu_seqlens_key" + if ( + pad_key not in point.keys() + or unpad_key not in point.keys() + or cu_seqlens_key not in point.keys() + ): + offset = point.offset + bincount = offset2bincount(offset) + bincount_pad = ( + torch.div( + bincount + self.patch_size - 1, + self.patch_size, + rounding_mode="trunc", + ) + * self.patch_size + ) + # only pad point when num of points larger than patch_size + mask_pad = bincount > self.patch_size + bincount_pad = ~mask_pad * bincount + mask_pad * bincount_pad + _offset = nn.functional.pad(offset, (1, 0)) + _offset_pad = nn.functional.pad(torch.cumsum(bincount_pad, dim=0), (1, 0)) + pad = torch.arange(_offset_pad[-1], device=offset.device) + unpad = torch.arange(_offset[-1], device=offset.device) + cu_seqlens = [] + for i in range(len(offset)): + unpad[_offset[i] : _offset[i + 1]] += _offset_pad[i] - _offset[i] + if bincount[i] != bincount_pad[i]: + pad[ + _offset_pad[i + 1] + - self.patch_size + + (bincount[i] % self.patch_size) : _offset_pad[i + 1] + ] = pad[ + _offset_pad[i + 1] + - 2 * self.patch_size + + (bincount[i] % self.patch_size) : _offset_pad[i + 1] + - self.patch_size + ] + pad[_offset_pad[i] : _offset_pad[i + 1]] -= _offset_pad[i] - _offset[i] + cu_seqlens.append( + torch.arange( + _offset_pad[i], + _offset_pad[i + 1], + step=self.patch_size, + dtype=torch.int32, + device=offset.device, + ) + ) + point[pad_key] = pad + point[unpad_key] = unpad + point[cu_seqlens_key] = nn.functional.pad( + torch.concat(cu_seqlens), (0, 1), value=_offset_pad[-1] + ) + return point[pad_key], point[unpad_key], point[cu_seqlens_key] + + def forward(self, point): + if not self.enable_flash: + self.patch_size = min( + offset2bincount(point.offset).min().tolist(), self.patch_size_max + ) + + H = self.num_heads + K = self.patch_size + C = self.channels + + pad, unpad, cu_seqlens = self.get_padding_and_inverse(point) + + order = point.serialized_order[self.order_index][pad] + inverse = unpad[point.serialized_inverse[self.order_index]] + + # padding and reshape feat and batch for serialized point patch + qkv = self.qkv(point.feat)[order] + + if not self.enable_flash: + # encode and reshape qkv: (N', K, 3, H, C') => (3, N', H, K, C') + q, k, v = ( + qkv.reshape(-1, K, 3, H, C // H).permute(2, 0, 3, 1, 4).unbind(dim=0) + ) + # attn + if self.upcast_attention: + q = q.float() + k = k.float() + attn = (q * self.scale) @ k.transpose(-2, -1) # (N', H, K, K) + if self.enable_rpe: + attn = attn + self.rpe(self.get_rel_pos(point, order)) + if self.upcast_softmax: + attn = attn.float() + attn = self.softmax(attn) + attn = self.attn_drop(attn).to(qkv.dtype) + feat = (attn @ v).transpose(1, 2).reshape(-1, C) + else: + feat = flash_attn.flash_attn_varlen_qkvpacked_func( + qkv.to(torch.bfloat16).reshape(-1, 3, H, C // H), + cu_seqlens, + max_seqlen=self.patch_size, + dropout_p=self.attn_drop if self.training else 0, + softmax_scale=self.scale, + ).reshape(-1, C) + feat = feat.to(qkv.dtype) + feat = feat[inverse] + + # ffn + feat = self.proj(feat) + feat = self.proj_drop(feat) + point.feat = feat + return point + + +class MLP(nn.Module): + def __init__( + self, + in_channels, + hidden_channels=None, + out_channels=None, + act_layer=nn.GELU, + drop=0.0, + ): + super().__init__() + out_channels = out_channels or in_channels + hidden_channels = hidden_channels or in_channels + self.fc1 = nn.Linear(in_channels, hidden_channels) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_channels, out_channels) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Block(PointModule): + def __init__( + self, + channels, + num_heads, + patch_size=48, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + norm_layer=nn.LayerNorm, + act_layer=nn.GELU, + pre_norm=True, + order_index=0, + cpe_indice_key=None, + enable_rpe=False, + enable_flash=True, + upcast_attention=True, + upcast_softmax=True, + ): + super().__init__() + self.channels = channels + self.pre_norm = pre_norm + + self.cpe = PointSequential( + spconv.SubMConv3d( + channels, + channels, + kernel_size=3, + bias=True, + indice_key=cpe_indice_key, + ), + nn.Linear(channels, channels), + norm_layer(channels), + ) + + self.norm1 = PointSequential(norm_layer(channels)) + self.attn = SerializedAttention( + channels=channels, + patch_size=patch_size, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + order_index=order_index, + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ) + self.norm2 = PointSequential(norm_layer(channels)) + self.mlp = PointSequential( + MLP( + in_channels=channels, + hidden_channels=int(channels * mlp_ratio), + out_channels=channels, + act_layer=act_layer, + drop=proj_drop, + ) + ) + self.drop_path = PointSequential( + DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + ) + + def forward(self, point: Point): + shortcut = point.feat + point = self.cpe(point) + point.feat = shortcut + point.feat + shortcut = point.feat + if self.pre_norm: + point = self.norm1(point) + point = self.drop_path(self.attn(point)) + point.feat = shortcut + point.feat + if not self.pre_norm: + point = self.norm1(point) + + shortcut = point.feat + if self.pre_norm: + point = self.norm2(point) + point = self.drop_path(self.mlp(point)) + point.feat = shortcut + point.feat + if not self.pre_norm: + point = self.norm2(point) + point.sparse_conv_feat = point.sparse_conv_feat.replace_feature(point.feat) + return point + + +class SerializedPooling(PointModule): + def __init__( + self, + in_channels, + out_channels, + stride=2, + norm_layer=None, + act_layer=None, + reduce="max", + shuffle_orders=True, + traceable=True, # record parent and cluster + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + + assert stride == 2 ** (math.ceil(stride) - 1).bit_length() # 2, 4, 8 + # TODO: add support to grid pool (any stride) + self.stride = stride + assert reduce in ["sum", "mean", "min", "max"] + self.reduce = reduce + self.shuffle_orders = shuffle_orders + self.traceable = traceable + + self.proj = nn.Linear(in_channels, out_channels) + if norm_layer is not None: + self.norm = PointSequential(norm_layer(out_channels)) + if act_layer is not None: + self.act = PointSequential(act_layer()) + + def forward(self, point: Point): + pooling_depth = (math.ceil(self.stride) - 1).bit_length() + if pooling_depth > point.serialized_depth: + pooling_depth = 0 + assert { + "serialized_code", + "serialized_order", + "serialized_inverse", + "serialized_depth", + }.issubset( + point.keys() + ), "Run point.serialization() point cloud before SerializedPooling" + + code = point.serialized_code >> pooling_depth * 3 + code_, cluster, counts = torch.unique( + code[0], + sorted=True, + return_inverse=True, + return_counts=True, + ) + # indices of point sorted by cluster, for torch_scatter.segment_csr + _, indices = torch.sort(cluster) + # index pointer for sorted point, for torch_scatter.segment_csr + idx_ptr = torch.cat([counts.new_zeros(1), torch.cumsum(counts, dim=0)]) + # head_indices of each cluster, for reduce attr e.g. code, batch + head_indices = indices[idx_ptr[:-1]] + # generate down code, order, inverse + code = code[:, head_indices] + order = torch.argsort(code) + inverse = torch.zeros_like(order).scatter_( + dim=1, + index=order, + src=torch.arange(0, code.shape[1], device=order.device).repeat( + code.shape[0], 1 + ), + ) + + if self.shuffle_orders: + perm = torch.randperm(code.shape[0]) + code = code[perm] + order = order[perm] + inverse = inverse[perm] + + # collect information + point_dict = Dict( + feat=torch_scatter.segment_csr( + self.proj(point.feat)[indices], idx_ptr, reduce=self.reduce + ), + coord=torch_scatter.segment_csr( + point.coord[indices], idx_ptr, reduce="mean" + ), + grid_coord=point.grid_coord[head_indices] >> pooling_depth, + serialized_code=code, + serialized_order=order, + serialized_inverse=inverse, + serialized_depth=point.serialized_depth - pooling_depth, + batch=point.batch[head_indices], + ) + + if "condition" in point.keys(): + point_dict["condition"] = point.condition + if "context" in point.keys(): + point_dict["context"] = point.context + + if self.traceable: + point_dict["pooling_inverse"] = cluster + point_dict["pooling_parent"] = point + point = Point(point_dict) + if self.norm is not None: + point = self.norm(point) + if self.act is not None: + point = self.act(point) + point.sparsify() + return point + + +class SerializedUnpooling(PointModule): + def __init__( + self, + in_channels, + skip_channels, + out_channels, + norm_layer=None, + act_layer=None, + traceable=False, # record parent and cluster + ): + super().__init__() + self.proj = PointSequential(nn.Linear(in_channels, out_channels)) + self.proj_skip = PointSequential(nn.Linear(skip_channels, out_channels)) + + if norm_layer is not None: + self.proj.add(norm_layer(out_channels)) + self.proj_skip.add(norm_layer(out_channels)) + + if act_layer is not None: + self.proj.add(act_layer()) + self.proj_skip.add(act_layer()) + + self.traceable = traceable + + def forward(self, point): + assert "pooling_parent" in point.keys() + assert "pooling_inverse" in point.keys() + parent = point.pop("pooling_parent") + inverse = point.pop("pooling_inverse") + point = self.proj(point) + parent = self.proj_skip(parent) + parent.feat = parent.feat + point.feat[inverse] + + if self.traceable: + parent["unpooling_parent"] = point + return parent + + +class Embedding(PointModule): + def __init__( + self, + in_channels, + embed_channels, + norm_layer=None, + act_layer=None, + ): + super().__init__() + self.in_channels = in_channels + self.embed_channels = embed_channels + + # TODO: check remove spconv + self.stem = PointSequential( + conv=spconv.SubMConv3d( + in_channels, + embed_channels, + kernel_size=5, + padding=1, + bias=False, + indice_key="stem", + ) + ) + if norm_layer is not None: + self.stem.add(norm_layer(embed_channels), name="norm") + if act_layer is not None: + self.stem.add(act_layer(), name="act") + + def forward(self, point: Point): + point = self.stem(point) + return point + + +@MODELS.register_module("PT-v3m1") +class PointTransformerV3(PointModule): + def __init__( + self, + in_channels=6, + order=("z", "z-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(48, 48, 48, 48, 48), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(48, 48, 48, 48), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + pre_norm=True, + shuffle_orders=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + enc_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ): + super().__init__() + self.num_stages = len(enc_depths) + self.order = [order] if isinstance(order, str) else order + self.enc_mode = enc_mode + self.shuffle_orders = shuffle_orders + + assert self.num_stages == len(stride) + 1 + assert self.num_stages == len(enc_depths) + assert self.num_stages == len(enc_channels) + assert self.num_stages == len(enc_num_head) + assert self.num_stages == len(enc_patch_size) + assert self.enc_mode or self.num_stages == len(dec_depths) + 1 + assert self.enc_mode or self.num_stages == len(dec_channels) + 1 + assert self.enc_mode or self.num_stages == len(dec_num_head) + 1 + assert self.enc_mode or self.num_stages == len(dec_patch_size) + 1 + + # norm layers + if pdnorm_bn: + assert False, "PDNorm is not supported in this minimal pointcept codebase for fvdb." + bn_layer = partial( + PDNorm, + norm_layer=partial( + nn.BatchNorm1d, eps=1e-3, momentum=0.01, affine=pdnorm_affine + ), + conditions=pdnorm_conditions, + decouple=pdnorm_decouple, + adaptive=pdnorm_adaptive, + ) + else: + bn_layer = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01) + if pdnorm_ln: + assert False, "PDNorm is not supported in this minimal pointcept codebase for fvdb." + ln_layer = partial( + PDNorm, + norm_layer=partial(nn.LayerNorm, elementwise_affine=pdnorm_affine), + conditions=pdnorm_conditions, + decouple=pdnorm_decouple, + adaptive=pdnorm_adaptive, + ) + else: + ln_layer = nn.LayerNorm + # activation layers + act_layer = nn.GELU + + self.embedding = Embedding( + in_channels=in_channels, + embed_channels=enc_channels[0], + norm_layer=bn_layer, + act_layer=act_layer, + ) + + # encoder + enc_drop_path = [ + x.item() for x in torch.linspace(0, drop_path, sum(enc_depths)) + ] + self.enc = PointSequential() + for s in range(self.num_stages): + enc_drop_path_ = enc_drop_path[ + sum(enc_depths[:s]) : sum(enc_depths[: s + 1]) + ] + enc = PointSequential() + if s > 0: + enc.add( + SerializedPooling( + in_channels=enc_channels[s - 1], + out_channels=enc_channels[s], + stride=stride[s - 1], + norm_layer=bn_layer, + act_layer=act_layer, + ), + name="down", + ) + for i in range(enc_depths[s]): + enc.add( + Block( + channels=enc_channels[s], + num_heads=enc_num_head[s], + patch_size=enc_patch_size[s], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + drop_path=enc_drop_path_[i], + norm_layer=ln_layer, + act_layer=act_layer, + pre_norm=pre_norm, + order_index=i % len(self.order), + cpe_indice_key=f"stage{s}", + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ), + name=f"block{i}", + ) + if len(enc) != 0: + self.enc.add(module=enc, name=f"enc{s}") + + # decoder + if not self.enc_mode: + dec_drop_path = [ + x.item() for x in torch.linspace(0, drop_path, sum(dec_depths)) + ] + self.dec = PointSequential() + dec_channels = list(dec_channels) + [enc_channels[-1]] + for s in reversed(range(self.num_stages - 1)): + dec_drop_path_ = dec_drop_path[ + sum(dec_depths[:s]) : sum(dec_depths[: s + 1]) + ] + dec_drop_path_.reverse() + dec = PointSequential() + dec.add( + SerializedUnpooling( + in_channels=dec_channels[s + 1], + skip_channels=enc_channels[s], + out_channels=dec_channels[s], + norm_layer=bn_layer, + act_layer=act_layer, + ), + name="up", + ) + for i in range(dec_depths[s]): + dec.add( + Block( + channels=dec_channels[s], + num_heads=dec_num_head[s], + patch_size=dec_patch_size[s], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + drop_path=dec_drop_path_[i], + norm_layer=ln_layer, + act_layer=act_layer, + pre_norm=pre_norm, + order_index=i % len(self.order), + cpe_indice_key=f"stage{s}", + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ), + name=f"block{i}", + ) + self.dec.add(module=dec, name=f"dec{s}") + + def forward(self, data_dict): + point = Point(data_dict) + point.serialization(order=self.order, shuffle_orders=self.shuffle_orders) + point.sparsify() + + point = self.embedding(point) + point = self.enc(point) + if not self.enc_mode: + point = self.dec(point) + # else: + # point.feat = torch_scatter.segment_csr( + # src=point.feat, + # indptr=nn.functional.pad(point.offset, (1, 0)), + # reduce="mean", + # ) + return point diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_fvdb.py b/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_fvdb.py new file mode 100644 index 0000000..919f3d1 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_fvdb.py @@ -0,0 +1,219 @@ +""" +Point Transformer - V3 Mode1 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch + +from pointcept.models.builder import MODELS +from pointcept.models.modules import PointModule + +import fvdb + +# Import PTV3 model from parent directory +import sys +import os + +try: +# Add the parent directory (reproduce/fvdb-examples/point_transformer_v3/) to sys.path + model_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', '..', '..', '..', '..', '..')) + if model_dir not in sys.path: + sys.path.insert(0, model_dir) + # print(f"Added {model_dir} to sys.path") + from model import PTV3 +except ImportError: + raise ImportError("Could not find model.py in parent directories") + +from typing import Dict, Tuple, Union, List + +def tensor_hash_simple(tensor): + """Simple Python hash - fastest but less robust""" + return hash(tuple(tensor.detach().cpu().flatten().tolist())) + + +def create_grid_from_points(grid_coord, feat, offset, voxel_size, device="cuda"): + """Create FVDB tensor from ScanNet-like point data with proper batching. + + Args: + grid_coord: Batched grid coordinates [N, 3] + feat: Batched features [N, C] + offset: Tensor indicating batch boundaries [B] + voxel_size: Voxel size for grid creation + device: Device for tensor operations + + Returns: + grid: fvdb.GridBatch + jfeats: fvdb.JaggedTensor with features + original_coord_to_voxel_idx: Mapping from original coords to voxel indices + """ + + offset = list(offset.cpu().numpy()) + # Convert offset to individual sample boundaries + if len(offset) == 1: + # Single sample case + coords_list = [grid_coord.to(device=device, dtype=torch.int32)] + feats_list = [feat.to(device=device, dtype=torch.float32)] + else: + # Multiple samples case - split using offset + coords_list = [] + feats_list = [] + prev_offset = 0 + for curr_offset in offset: + coords_list.append(grid_coord[prev_offset:curr_offset].to(device=device, dtype=torch.int32)) + feats_list.append(feat[prev_offset:curr_offset].to(device=device, dtype=torch.float32)) + prev_offset = curr_offset + + coords_jagged = fvdb.JaggedTensor(coords_list) + + grid = fvdb.GridBatch.from_ijk( + coords_jagged, + voxel_sizes=[[voxel_size, voxel_size, voxel_size]] * len(coords_list), + origins=[0.0] * 3, + ) + + feats_jagged = fvdb.JaggedTensor(feats_list) + feats_vdb_order = grid.inject_from_ijk(coords_jagged, feats_jagged) # + original_coord_to_voxel_idx = grid.ijk_to_index(coords_jagged, cumulative=True) + + return grid, feats_vdb_order, original_coord_to_voxel_idx + + +@MODELS.register_module("PT-v3fvdb") +class PointTransformerV3(PointModule): + def __init__( + self, + in_channels=6, + enc_depths=(2, 2, 2, 2), + enc_channels=(32, 64, 128, 256), + enc_num_heads=(1, 1, 1, 1), + dec_depths=(2, 2, 2), + dec_channels=(128, 64, 32), + dec_num_heads=(1, 1, 1), + patch_size=1024, + drop_path=0.3, + proj_drop=0.0, + qk_scale=None, + enable_batch_norm=False, + embedding_mode="linear", + no_conv_in_cpe=False, + cross_patch_attention: bool = False, + cross_patch_pooling: str = "mean", + sliding_window_attention: bool = False, + pipelined_batch: bool = False, + order_type: Union[str, tuple] = ("z", "z-trans"), + shuffle_orders: bool = True, + ): + super().__init__() + + self.pipelined_batch = pipelined_batch + self.order_type = order_type + + self.fvdb_ptv3_model = PTV3( + num_classes=-1, + input_dim=in_channels, + enc_depths=enc_depths, + enc_channels=enc_channels, + enc_num_heads=enc_num_heads, + dec_depths=dec_depths, + dec_channels=dec_channels, + dec_num_heads=dec_num_heads, + patch_size=patch_size, + drop_path=drop_path, + proj_drop=proj_drop, + qk_scale=qk_scale, + enable_batch_norm=enable_batch_norm, + embedding_mode=embedding_mode, + no_conv_in_cpe=no_conv_in_cpe, + # cross_patch_attention=cross_patch_attention, + # cross_patch_pooling=cross_patch_pooling, + sliding_window_attention=sliding_window_attention, + order_type=order_type, + shuffle_orders=shuffle_orders, + ) + + def forward(self, data_dict): + + grid_coord = data_dict['grid_coord'] + feat = data_dict['feat'] + offset = data_dict['offset'] + # import pdb; pdb.set_trace() + # print(f"grid_coord.shape: {grid_coord.shape}, feat.shape: {feat.shape}, offset.shape: {offset.shape}") + # exit() + + if self.pipelined_batch and len(offset) > 1: + # Pipelined batch mode: process each point cloud individually + # This mode splits the batch into individual point clouds, processes each + # one separately through the FVDB model, and concatenates the results. + # This can be useful for: + # 1. Memory efficiency when individual processing uses less memory + # 2. Debugging to isolate issues with specific point clouds + # 3. Different processing requirements per sample + outputs = [] + prev_offset = 0 + # catted_input_grid_ijk = [] + # catted_input_feat = [] + # catted_original_coord_to_voxel_idx = [] + for curr_offset in offset: + # Extract data for current point cloud + curr_grid_coord = grid_coord[prev_offset:curr_offset] + curr_feat = feat[prev_offset:curr_offset] + curr_num_points = curr_offset - prev_offset + curr_offset_tensor = torch.tensor([curr_num_points], + dtype=offset.dtype, device=offset.device) + + # Process single point cloud + grid, jfeats, original_coord_to_voxel_idx = create_grid_from_points( + curr_grid_coord, curr_feat, curr_offset_tensor, voxel_size=0.02 + ) + assert grid.ijk.jdata.shape == curr_grid_coord.shape, f"curr_grid_coord.shape: {curr_grid_coord.shape}, grid.ijk.jdata.shape: {grid.ijk.jdata.shape}" # + + # catted_input_grid_ijk.append(grid.ijk.jdata) + # catted_input_feat.append(jfeats.jdata) + # catted_original_coord_to_voxel_idx.append(original_coord_to_voxel_idx.jdata) + # grid shape and feats values match here. + grid, jfeats = self.fvdb_ptv3_model(grid, jfeats) + # feats values does not match here. + + # Get output for this point cloud. + curr_output = jfeats.jdata[original_coord_to_voxel_idx.jdata] + outputs.append(curr_output) + + prev_offset = curr_offset + + # Concatenate all outputs + output = torch.cat(outputs, dim=0) + # import pdb; pdb.set_trace() + + # catted_input_grid_ijk = torch.cat(catted_input_grid_ijk, dim=0) + # catted_input_feat = torch.cat(catted_input_feat, dim=0) + # catted_original_coord_to_voxel_idx = torch.cat(catted_original_coord_to_voxel_idx, dim=0) + + + else: + # Standard batch mode (original implementation) + grid, jfeats, original_coord_to_voxel_idx = create_grid_from_points( + grid_coord, feat, offset, voxel_size=0.02 + ) + # import pdb; pdb.set_trace() + # TODO: check the downsampling behavior is the same or not? + assert grid_coord.shape == grid.ijk.jdata.shape, f"grid_coord.shape: {grid_coord.shape}, grid.ijk.jdata.shape: {grid.ijk.jdata.shape}" # this is not always true, because mix-prob may duplicate points with the same coordinate. + assert grid_coord.shape[0] == original_coord_to_voxel_idx.jdata.shape[0], f"grid_coord.shape: {grid_coord.shape}, original_coord_to_voxel_idx.jdata.shape: {original_coord_to_voxel_idx.jdata.shape}" + + # import pdb; pdb.set_trace() + if torch.is_autocast_enabled(): + with torch.autocast(device_type="cuda", enabled=False): + grid, jfeats = self.fvdb_ptv3_model(grid, jfeats) + else: + grid, jfeats = self.fvdb_ptv3_model(grid, jfeats) + + output = jfeats.jdata[original_coord_to_voxel_idx.jdata] + # import pdb; pdb.set_trace() + + + return output # return logits in torch.tensor format + + + + diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m2_sonata.py b/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m2_sonata.py new file mode 100644 index 0000000..cc8fee1 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m2_sonata.py @@ -0,0 +1,732 @@ +""" +Point Transformer - V3 Mode2 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from addict import Dict +import torch +import torch.nn as nn +from torch.nn.init import trunc_normal_ +import spconv.pytorch as spconv +import torch_scatter +from timm.layers import DropPath + +try: + import flash_attn +except ImportError: + flash_attn = None + +from pointcept.models.builder import MODELS +from pointcept.models.utils.misc import offset2bincount +from pointcept.models.utils.structure import Point +from pointcept.models.modules import PointModule, PointSequential + + +class LayerScale(nn.Module): + def __init__( + self, + dim: int, + init_values: float = 1e-5, + inplace: bool = False, + ) -> None: + super().__init__() + self.inplace = inplace + self.gamma = nn.Parameter(init_values * torch.ones(dim)) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return x.mul_(self.gamma) if self.inplace else x * self.gamma + + +class RPE(torch.nn.Module): + def __init__(self, patch_size, num_heads): + super().__init__() + self.patch_size = patch_size + self.num_heads = num_heads + self.pos_bnd = int((4 * patch_size) ** (1 / 3) * 2) + self.rpe_num = 2 * self.pos_bnd + 1 + self.rpe_table = torch.nn.Parameter(torch.zeros(3 * self.rpe_num, num_heads)) + torch.nn.init.trunc_normal_(self.rpe_table, std=0.02) + + def forward(self, coord): + idx = ( + coord.clamp(-self.pos_bnd, self.pos_bnd) # clamp into bnd + + self.pos_bnd # relative position to positive index + + torch.arange(3, device=coord.device) * self.rpe_num # x, y, z stride + ) + out = self.rpe_table.index_select(0, idx.reshape(-1)) + out = out.view(idx.shape + (-1,)).sum(3) + out = out.permute(0, 3, 1, 2) # (N, K, K, H) -> (N, H, K, K) + return out + + +class SerializedAttention(PointModule): + def __init__( + self, + channels, + num_heads, + patch_size, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + order_index=0, + enable_rpe=False, + enable_flash=True, + upcast_attention=True, + upcast_softmax=True, + ): + super().__init__() + assert channels % num_heads == 0 + self.channels = channels + self.num_heads = num_heads + self.scale = qk_scale or (channels // num_heads) ** -0.5 + self.order_index = order_index + self.upcast_attention = upcast_attention + self.upcast_softmax = upcast_softmax + self.enable_rpe = enable_rpe + self.enable_flash = enable_flash + if enable_flash: + assert ( + enable_rpe is False + ), "Set enable_rpe to False when enable Flash Attention" + assert ( + upcast_attention is False + ), "Set upcast_attention to False when enable Flash Attention" + assert ( + upcast_softmax is False + ), "Set upcast_softmax to False when enable Flash Attention" + assert flash_attn is not None, "Make sure flash_attn is installed." + self.patch_size = patch_size + self.attn_drop = attn_drop + else: + # when disable flash attention, we still don't want to use mask + # consequently, patch size will auto set to the + # min number of patch_size_max and number of points + self.patch_size_max = patch_size + self.patch_size = 0 + self.attn_drop = torch.nn.Dropout(attn_drop) + + self.qkv = torch.nn.Linear(channels, channels * 3, bias=qkv_bias) + self.proj = torch.nn.Linear(channels, channels) + self.proj_drop = torch.nn.Dropout(proj_drop) + self.softmax = torch.nn.Softmax(dim=-1) + self.rpe = RPE(patch_size, num_heads) if self.enable_rpe else None + + @torch.no_grad() + def get_rel_pos(self, point, order): + K = self.patch_size + rel_pos_key = f"rel_pos_{self.order_index}" + if rel_pos_key not in point.keys(): + grid_coord = point.grid_coord[order] + grid_coord = grid_coord.reshape(-1, K, 3) + point[rel_pos_key] = grid_coord.unsqueeze(2) - grid_coord.unsqueeze(1) + return point[rel_pos_key] + + @torch.no_grad() + def get_padding_and_inverse(self, point): + pad_key = "pad" + unpad_key = "unpad" + cu_seqlens_key = "cu_seqlens_key" + if ( + pad_key not in point.keys() + or unpad_key not in point.keys() + or cu_seqlens_key not in point.keys() + ): + offset = point.offset + bincount = offset2bincount(offset) + bincount_pad = ( + torch.div( + bincount + self.patch_size - 1, + self.patch_size, + rounding_mode="trunc", + ) + * self.patch_size + ) + # only pad point when num of points larger than patch_size + mask_pad = bincount > self.patch_size + bincount_pad = ~mask_pad * bincount + mask_pad * bincount_pad + _offset = nn.functional.pad(offset, (1, 0)) + _offset_pad = nn.functional.pad(torch.cumsum(bincount_pad, dim=0), (1, 0)) + pad = torch.arange(_offset_pad[-1], device=offset.device) + unpad = torch.arange(_offset[-1], device=offset.device) + cu_seqlens = [] + for i in range(len(offset)): + unpad[_offset[i] : _offset[i + 1]] += _offset_pad[i] - _offset[i] + if bincount[i] != bincount_pad[i]: + pad[ + _offset_pad[i + 1] + - self.patch_size + + (bincount[i] % self.patch_size) : _offset_pad[i + 1] + ] = pad[ + _offset_pad[i + 1] + - 2 * self.patch_size + + (bincount[i] % self.patch_size) : _offset_pad[i + 1] + - self.patch_size + ] + pad[_offset_pad[i] : _offset_pad[i + 1]] -= _offset_pad[i] - _offset[i] + cu_seqlens.append( + torch.arange( + _offset_pad[i], + _offset_pad[i + 1], + step=self.patch_size, + dtype=torch.int32, + device=offset.device, + ) + ) + point[pad_key] = pad + point[unpad_key] = unpad + point[cu_seqlens_key] = nn.functional.pad( + torch.concat(cu_seqlens), (0, 1), value=_offset_pad[-1] + ) + return point[pad_key], point[unpad_key], point[cu_seqlens_key] + + def forward(self, point): + if not self.enable_flash: + self.patch_size = min( + offset2bincount(point.offset).min().tolist(), self.patch_size_max + ) + + H = self.num_heads + K = self.patch_size + C = self.channels + + pad, unpad, cu_seqlens = self.get_padding_and_inverse(point) + + order = point.serialized_order[self.order_index][pad] + inverse = unpad[point.serialized_inverse[self.order_index]] + + # padding and reshape feat and batch for serialized point patch + qkv = self.qkv(point.feat)[order] + + if not self.enable_flash: + # encode and reshape qkv: (N', K, 3, H, C') => (3, N', H, K, C') + q, k, v = ( + qkv.reshape(-1, K, 3, H, C // H).permute(2, 0, 3, 1, 4).unbind(dim=0) + ) + # attn + if self.upcast_attention: + q = q.float() + k = k.float() + attn = (q * self.scale) @ k.transpose(-2, -1) # (N', H, K, K) + if self.enable_rpe: + attn = attn + self.rpe(self.get_rel_pos(point, order)) + if self.upcast_softmax: + attn = attn.float() + attn = self.softmax(attn) + attn = self.attn_drop(attn).to(qkv.dtype) + feat = (attn @ v).transpose(1, 2).reshape(-1, C) + else: + feat = flash_attn.flash_attn_varlen_qkvpacked_func( + qkv.to(torch.bfloat16).reshape(-1, 3, H, C // H), + cu_seqlens, + max_seqlen=self.patch_size, + dropout_p=self.attn_drop if self.training else 0, + softmax_scale=self.scale, + ).reshape(-1, C) + feat = feat.to(qkv.dtype) + feat = feat[inverse] + + # ffn + feat = self.proj(feat) + feat = self.proj_drop(feat) + point.feat = feat + return point + + +class MLP(nn.Module): + def __init__( + self, + in_channels, + hidden_channels=None, + out_channels=None, + act_layer=nn.GELU, + drop=0.0, + ): + super().__init__() + out_channels = out_channels or in_channels + hidden_channels = hidden_channels or in_channels + self.fc1 = nn.Linear(in_channels, hidden_channels) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_channels, out_channels) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Block(PointModule): + def __init__( + self, + channels, + num_heads, + patch_size=48, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + layer_scale=None, + norm_layer=nn.LayerNorm, + act_layer=nn.GELU, + pre_norm=True, + order_index=0, + cpe_indice_key=None, + enable_rpe=False, + enable_flash=True, + upcast_attention=True, + upcast_softmax=True, + ): + super().__init__() + self.channels = channels + self.pre_norm = pre_norm + + self.cpe = PointSequential( + spconv.SubMConv3d( + channels, + channels, + kernel_size=3, + bias=True, + indice_key=cpe_indice_key, + ), + nn.Linear(channels, channels), + norm_layer(channels), + ) + + self.norm1 = PointSequential(norm_layer(channels)) + self.ls1 = PointSequential( + LayerScale(channels, init_values=layer_scale) + if layer_scale is not None + else nn.Identity() + ) + self.attn = SerializedAttention( + channels=channels, + patch_size=patch_size, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + order_index=order_index, + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ) + self.norm2 = PointSequential(norm_layer(channels)) + self.ls2 = PointSequential( + LayerScale(channels, init_values=layer_scale) + if layer_scale is not None + else nn.Identity() + ) + self.mlp = PointSequential( + MLP( + in_channels=channels, + hidden_channels=int(channels * mlp_ratio), + out_channels=channels, + act_layer=act_layer, + drop=proj_drop, + ) + ) + self.drop_path = PointSequential( + DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + ) + + def forward(self, point: Point): + shortcut = point.feat + point = self.cpe(point) + point.feat = shortcut + point.feat + shortcut = point.feat + if self.pre_norm: + point = self.norm1(point) + point = self.drop_path(self.ls1(self.attn(point))) + point.feat = shortcut + point.feat + if not self.pre_norm: + point = self.norm1(point) + + shortcut = point.feat + if self.pre_norm: + point = self.norm2(point) + point = self.drop_path(self.ls2(self.mlp(point))) + point.feat = shortcut + point.feat + if not self.pre_norm: + point = self.norm2(point) + point.sparse_conv_feat = point.sparse_conv_feat.replace_feature(point.feat) + return point + + +class GridPooling(PointModule): + def __init__( + self, + in_channels, + out_channels, + stride=2, + norm_layer=None, + act_layer=None, + reduce="max", + shuffle_orders=True, + traceable=True, # record parent and cluster + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + + self.stride = stride + assert reduce in ["sum", "mean", "min", "max"] + self.reduce = reduce + self.shuffle_orders = shuffle_orders + self.traceable = traceable + + self.proj = nn.Linear(in_channels, out_channels) + if norm_layer is not None: + self.norm = PointSequential(norm_layer(out_channels)) + if act_layer is not None: + self.act = PointSequential(act_layer()) + + def forward(self, point: Point): + if "grid_coord" in point.keys(): + grid_coord = point.grid_coord + elif {"coord", "grid_size"}.issubset(point.keys()): + grid_coord = torch.div( + point.coord - point.coord.min(0)[0], + point.grid_size, + rounding_mode="trunc", + ).int() + else: + raise AssertionError( + "[gird_coord] or [coord, grid_size] should be include in the Point" + ) + grid_coord = torch.div(grid_coord, self.stride, rounding_mode="trunc") + grid_coord = grid_coord | point.batch.view(-1, 1) << 48 + grid_coord, cluster, counts = torch.unique( + grid_coord, + sorted=True, + return_inverse=True, + return_counts=True, + dim=0, + ) + grid_coord = grid_coord & ((1 << 48) - 1) + # indices of point sorted by cluster, for torch_scatter.segment_csr + _, indices = torch.sort(cluster) + # index pointer for sorted point, for torch_scatter.segment_csr + idx_ptr = torch.cat([counts.new_zeros(1), torch.cumsum(counts, dim=0)]) + # head_indices of each cluster, for reduce attr e.g. code, batch + head_indices = indices[idx_ptr[:-1]] + point_dict = Dict( + feat=torch_scatter.segment_csr( + self.proj(point.feat)[indices], idx_ptr, reduce=self.reduce + ), + coord=torch_scatter.segment_csr( + point.coord[indices], idx_ptr, reduce="mean" + ), + grid_coord=grid_coord, + batch=point.batch[head_indices], + ) + if "origin_coord" in point.keys(): + point_dict["origin_coord"] = torch_scatter.segment_csr( + point.origin_coord[indices], idx_ptr, reduce="mean" + ) + if "condition" in point.keys(): + point_dict["condition"] = point.condition + if "context" in point.keys(): + point_dict["context"] = point.context + if "name" in point.keys(): + point_dict["name"] = point.name + if "split" in point.keys(): + point_dict["split"] = point.split + if "color" in point.keys(): + point_dict["color"] = torch_scatter.segment_csr( + point.color[indices], idx_ptr, reduce="mean" + ) + if "grid_size" in point.keys(): + point_dict["grid_size"] = point.grid_size * self.stride + + if self.traceable: + point_dict["pooling_inverse"] = cluster + point_dict["pooling_parent"] = point + point_dict["idx_ptr"] = idx_ptr + order = point.order + point = Point(point_dict) + if self.norm is not None: + point = self.norm(point) + if self.act is not None: + point = self.act(point) + point.serialization(order=order, shuffle_orders=self.shuffle_orders) + point.sparsify() + return point + + +class GridUnpooling(PointModule): + def __init__( + self, + in_channels, + skip_channels, + out_channels, + norm_layer=None, + act_layer=None, + traceable=False, # record parent and cluster + ): + super().__init__() + self.proj = PointSequential(nn.Linear(in_channels, out_channels)) + self.proj_skip = PointSequential(nn.Linear(skip_channels, out_channels)) + + if norm_layer is not None: + self.proj.add(norm_layer(out_channels)) + self.proj_skip.add(norm_layer(out_channels)) + + if act_layer is not None: + self.proj.add(act_layer()) + self.proj_skip.add(act_layer()) + + self.traceable = traceable + + def forward(self, point): + assert "pooling_parent" in point.keys() + assert "pooling_inverse" in point.keys() + parent = point.pop("pooling_parent") + inverse = point.pooling_inverse + feat = point.feat + + parent = self.proj_skip(parent) + parent.feat = parent.feat + self.proj(point).feat[inverse] + parent.sparse_conv_feat = parent.sparse_conv_feat.replace_feature(parent.feat) + + if self.traceable: + point.feat = feat + parent["unpooling_parent"] = point + return parent + + +class Embedding(PointModule): + def __init__( + self, + in_channels, + embed_channels, + norm_layer=None, + act_layer=None, + mask_token=False, + ): + super().__init__() + self.in_channels = in_channels + self.embed_channels = embed_channels + + self.stem = PointSequential(linear=nn.Linear(in_channels, embed_channels)) + if norm_layer is not None: + self.stem.add(norm_layer(embed_channels), name="norm") + if act_layer is not None: + self.stem.add(act_layer(), name="act") + + if mask_token: + self.mask_token = nn.Parameter(torch.zeros(1, embed_channels)) + else: + self.mask_token = None + + def forward(self, point: Point): + point = self.stem(point) + if "mask" in point.keys(): + point.feat = torch.where( + point.mask.unsqueeze(-1), + self.mask_token.to(point.feat.dtype), + point.feat, + ) + return point + + +@MODELS.register_module("PT-v3m2") +class PointTransformerV3(PointModule): + def __init__( + self, + in_channels=6, + order=("z", "z-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(48, 48, 48, 48, 48), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(48, 48, 48, 48), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + layer_scale=None, + pre_norm=True, + shuffle_orders=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + traceable=False, + mask_token=False, + enc_mode=False, + freeze_encoder=False, + ): + super().__init__() + self.num_stages = len(enc_depths) + self.order = [order] if isinstance(order, str) else order + self.shuffle_orders = shuffle_orders + self.enc_mode = enc_mode + self.freeze_encoder = freeze_encoder + + assert self.num_stages == len(stride) + 1 + assert self.num_stages == len(enc_depths) + assert self.num_stages == len(enc_channels) + assert self.num_stages == len(enc_num_head) + assert self.num_stages == len(enc_patch_size) + assert self.enc_mode or self.num_stages == len(dec_depths) + 1 + assert self.enc_mode or self.num_stages == len(dec_channels) + 1 + assert self.enc_mode or self.num_stages == len(dec_num_head) + 1 + assert self.enc_mode or self.num_stages == len(dec_patch_size) + 1 + + # normalization layer + ln_layer = nn.LayerNorm + # activation layers + act_layer = nn.GELU + + self.embedding = Embedding( + in_channels=in_channels, + embed_channels=enc_channels[0], + norm_layer=ln_layer, + act_layer=act_layer, + mask_token=mask_token, + ) + + # encoder + enc_drop_path = [ + x.item() for x in torch.linspace(0, drop_path, sum(enc_depths)) + ] + self.enc = PointSequential() + for s in range(self.num_stages): + enc_drop_path_ = enc_drop_path[ + sum(enc_depths[:s]) : sum(enc_depths[: s + 1]) + ] + enc = PointSequential() + if s > 0: + enc.add( + GridPooling( + in_channels=enc_channels[s - 1], + out_channels=enc_channels[s], + stride=stride[s - 1], + norm_layer=ln_layer, + act_layer=act_layer, + ), + name="down", + ) + for i in range(enc_depths[s]): + enc.add( + Block( + channels=enc_channels[s], + num_heads=enc_num_head[s], + patch_size=enc_patch_size[s], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + drop_path=enc_drop_path_[i], + layer_scale=layer_scale, + norm_layer=ln_layer, + act_layer=act_layer, + pre_norm=pre_norm, + order_index=i % len(self.order), + cpe_indice_key=f"stage{s}", + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ), + name=f"block{i}", + ) + if len(enc) != 0: + self.enc.add(module=enc, name=f"enc{s}") + + # decoder + if not self.enc_mode: + dec_drop_path = [ + x.item() for x in torch.linspace(0, drop_path, sum(dec_depths)) + ] + self.dec = PointSequential() + dec_channels = list(dec_channels) + [enc_channels[-1]] + for s in reversed(range(self.num_stages - 1)): + dec_drop_path_ = dec_drop_path[ + sum(dec_depths[:s]) : sum(dec_depths[: s + 1]) + ] + dec_drop_path_.reverse() + dec = PointSequential() + dec.add( + GridUnpooling( + in_channels=dec_channels[s + 1], + skip_channels=enc_channels[s], + out_channels=dec_channels[s], + norm_layer=ln_layer, + act_layer=act_layer, + traceable=traceable, + ), + name="up", + ) + for i in range(dec_depths[s]): + dec.add( + Block( + channels=dec_channels[s], + num_heads=dec_num_head[s], + patch_size=dec_patch_size[s], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + drop_path=dec_drop_path_[i], + layer_scale=layer_scale, + norm_layer=ln_layer, + act_layer=act_layer, + pre_norm=pre_norm, + order_index=i % len(self.order), + cpe_indice_key=f"stage{s}", + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ), + name=f"block{i}", + ) + self.dec.add(module=dec, name=f"dec{s}") + if self.freeze_encoder: + for p in self.embedding.parameters(): + p.requires_grad = False + for p in self.enc.parameters(): + p.requires_grad = False + self.apply(self._init_weights) + + @staticmethod + def _init_weights(module): + if isinstance(module, nn.Linear): + trunc_normal_(module.weight, std=0.02) + if module.bias is not None: + nn.init.zeros_(module.bias) + elif isinstance(module, spconv.SubMConv3d): + trunc_normal_(module.weight, std=0.02) + if module.bias is not None: + nn.init.zeros_(module.bias) + + def forward(self, data_dict): + point = Point(data_dict) + point = self.embedding(point) + + point.serialization(order=self.order, shuffle_orders=self.shuffle_orders) + point.sparsify() + + point = self.enc(point) + if not self.enc_mode: + point = self.dec(point) + return point diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/__init__.py new file mode 100644 index 0000000..da0c47b --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/__init__.py @@ -0,0 +1,9 @@ +from .misc import ( + offset2batch, + offset2bincount, + bincount2offset, + batch2offset, + off_diagonal, +) +from .checkpoint import checkpoint +from .serialization import encode, decode diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/checkpoint.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/checkpoint.py new file mode 100644 index 0000000..5882035 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/checkpoint.py @@ -0,0 +1,57 @@ +""" +Checkpoint Utils for Models + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch + + +class CheckpointFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, run_function, length, *args): + ctx.run_function = run_function + ctx.input_tensors = list(args[:length]) + ctx.input_params = list(args[length:]) + + with torch.no_grad(): + output_tensors = ctx.run_function(*ctx.input_tensors) + return output_tensors + + @staticmethod + def backward(ctx, *output_grads): + ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors] + with torch.enable_grad(): + # Fixes a bug where the first op in run_function modifies the + # Tensor storage in place, which is not allowed for detach()'d + # Tensors. + shallow_copies = [x.view_as(x) for x in ctx.input_tensors] + output_tensors = ctx.run_function(*shallow_copies) + input_grads = torch.autograd.grad( + output_tensors, + ctx.input_tensors + ctx.input_params, + output_grads, + allow_unused=True, + ) + del ctx.input_tensors + del ctx.input_params + del output_tensors + return (None, None) + input_grads + + +def checkpoint(func, inputs, params, flag): + """ + Evaluate a function without caching intermediate activations, allowing for + reduced memory at the expense of extra compute in the backward pass. + :param func: the function to evaluate. + :param inputs: the argument sequence to pass to `func`. + :param params: a sequence of parameters `func` depends on but does not + explicitly take as arguments. + :param flag: if False, disable gradient checkpointing. + """ + if flag: + args = tuple(inputs) + tuple(params) + return CheckpointFunction.apply(func, len(inputs), *args) + else: + return func(*inputs) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/misc.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/misc.py new file mode 100644 index 0000000..4eef9eb --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/misc.py @@ -0,0 +1,41 @@ +""" +General Utils for Models + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch +from itertools import chain + + +@torch.no_grad() +def offset2bincount(offset): + return torch.diff( + offset, prepend=torch.tensor([0], device=offset.device, dtype=torch.long) + ) + + +@torch.no_grad() +def bincount2offset(bincount): + return torch.cumsum(bincount, dim=0) + + +@torch.no_grad() +def offset2batch(offset): + bincount = offset2bincount(offset) + return torch.arange( + len(bincount), device=offset.device, dtype=torch.long + ).repeat_interleave(bincount) + + +@torch.no_grad() +def batch2offset(batch): + return torch.cumsum(batch.bincount(), dim=0).long() + + +def off_diagonal(x): + # return a flattened view of the off-diagonal elements of a square matrix + n, m = x.shape + assert n == m + return x.flatten()[:-1].view(n - 1, n + 1)[:, 1:].flatten() diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/__init__.py new file mode 100644 index 0000000..058c5e1 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/__init__.py @@ -0,0 +1,8 @@ +from .default import ( + encode, + decode, + z_order_encode, + z_order_decode, + hilbert_encode, + hilbert_decode, +) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/default.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/default.py new file mode 100644 index 0000000..15898b5 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/default.py @@ -0,0 +1,59 @@ +import torch +from .z_order import xyz2key as z_order_encode_ +from .z_order import key2xyz as z_order_decode_ +from .hilbert import encode as hilbert_encode_ +from .hilbert import decode as hilbert_decode_ + + +@torch.inference_mode() +def encode(grid_coord, batch=None, depth=16, order="z"): + assert order in {"z", "z-trans", "hilbert", "hilbert-trans"} + if order == "z": + code = z_order_encode(grid_coord, depth=depth) + elif order == "z-trans": + code = z_order_encode(grid_coord[:, [1, 0, 2]], depth=depth) + elif order == "hilbert": + code = hilbert_encode(grid_coord, depth=depth) + elif order == "hilbert-trans": + code = hilbert_encode(grid_coord[:, [1, 0, 2]], depth=depth) + else: + raise NotImplementedError + if batch is not None: + batch = batch.long() + code = batch << depth * 3 | code + return code + + +@torch.inference_mode() +def decode(code, depth=16, order="z"): + assert order in {"z", "hilbert"} + batch = code >> depth * 3 + code = code & ((1 << depth * 3) - 1) + if order == "z": + grid_coord = z_order_decode(code, depth=depth) + elif order == "hilbert": + grid_coord = hilbert_decode(code, depth=depth) + else: + raise NotImplementedError + return grid_coord, batch + + +def z_order_encode(grid_coord: torch.Tensor, depth: int = 16): + x, y, z = grid_coord[:, 0].long(), grid_coord[:, 1].long(), grid_coord[:, 2].long() + # we block the support to batch, maintain batched code in Point class + code = z_order_encode_(x, y, z, b=None, depth=depth) + return code + + +def z_order_decode(code: torch.Tensor, depth): + x, y, z = z_order_decode_(code, depth=depth) + grid_coord = torch.stack([x, y, z], dim=-1) # (N, 3) + return grid_coord + + +def hilbert_encode(grid_coord: torch.Tensor, depth: int = 16): + return hilbert_encode_(grid_coord, num_dims=3, num_bits=depth) + + +def hilbert_decode(code: torch.Tensor, depth: int = 16): + return hilbert_decode_(code, num_dims=3, num_bits=depth) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/hilbert.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/hilbert.py new file mode 100644 index 0000000..c96a3a9 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/hilbert.py @@ -0,0 +1,303 @@ +""" +Hilbert Order +Modified from https://github.com/PrincetonLIPS/numpy-hilbert-curve + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Kaixin Xu +Please cite our work if the code is helpful to you. +""" + +import torch + + +def right_shift(binary, k=1, axis=-1): + """Right shift an array of binary values. + + Parameters: + ----------- + binary: An ndarray of binary values. + + k: The number of bits to shift. Default 1. + + axis: The axis along which to shift. Default -1. + + Returns: + -------- + Returns an ndarray with zero prepended and the ends truncated, along + whatever axis was specified.""" + + # If we're shifting the whole thing, just return zeros. + if binary.shape[axis] <= k: + return torch.zeros_like(binary) + + # Determine the padding pattern. + # padding = [(0,0)] * len(binary.shape) + # padding[axis] = (k,0) + + # Determine the slicing pattern to eliminate just the last one. + slicing = [slice(None)] * len(binary.shape) + slicing[axis] = slice(None, -k) + shifted = torch.nn.functional.pad( + binary[tuple(slicing)], (k, 0), mode="constant", value=0 + ) + + return shifted + + +def binary2gray(binary, axis=-1): + """Convert an array of binary values into Gray codes. + + This uses the classic X ^ (X >> 1) trick to compute the Gray code. + + Parameters: + ----------- + binary: An ndarray of binary values. + + axis: The axis along which to compute the gray code. Default=-1. + + Returns: + -------- + Returns an ndarray of Gray codes. + """ + shifted = right_shift(binary, axis=axis) + + # Do the X ^ (X >> 1) trick. + gray = torch.logical_xor(binary, shifted) + + return gray + + +def gray2binary(gray, axis=-1): + """Convert an array of Gray codes back into binary values. + + Parameters: + ----------- + gray: An ndarray of gray codes. + + axis: The axis along which to perform Gray decoding. Default=-1. + + Returns: + -------- + Returns an ndarray of binary values. + """ + + # Loop the log2(bits) number of times necessary, with shift and xor. + shift = 2 ** (torch.Tensor([gray.shape[axis]]).log2().ceil().int() - 1) + while shift > 0: + gray = torch.logical_xor(gray, right_shift(gray, shift)) + shift = torch.div(shift, 2, rounding_mode="floor") + return gray + + +def encode(locs, num_dims, num_bits): + """Decode an array of locations in a hypercube into a Hilbert integer. + + This is a vectorized-ish version of the Hilbert curve implementation by John + Skilling as described in: + + Skilling, J. (2004, April). Programming the Hilbert curve. In AIP Conference + Proceedings (Vol. 707, No. 1, pp. 381-387). American Institute of Physics. + + Params: + ------- + locs - An ndarray of locations in a hypercube of num_dims dimensions, in + which each dimension runs from 0 to 2**num_bits-1. The shape can + be arbitrary, as long as the last dimension of the same has size + num_dims. + + num_dims - The dimensionality of the hypercube. Integer. + + num_bits - The number of bits for each dimension. Integer. + + Returns: + -------- + The output is an ndarray of uint64 integers with the same shape as the + input, excluding the last dimension, which needs to be num_dims. + """ + + # Keep around the original shape for later. + orig_shape = locs.shape + bitpack_mask = 1 << torch.arange(0, 8).to(locs.device) + bitpack_mask_rev = bitpack_mask.flip(-1) + + if orig_shape[-1] != num_dims: + raise ValueError( + """ + The shape of locs was surprising in that the last dimension was of size + %d, but num_dims=%d. These need to be equal. + """ + % (orig_shape[-1], num_dims) + ) + + if num_dims * num_bits > 63: + raise ValueError( + """ + num_dims=%d and num_bits=%d for %d bits total, which can't be encoded + into a int64. Are you sure you need that many points on your Hilbert + curve? + """ + % (num_dims, num_bits, num_dims * num_bits) + ) + + # Treat the location integers as 64-bit unsigned and then split them up into + # a sequence of uint8s. Preserve the association by dimension. + locs_uint8 = locs.long().view(torch.uint8).reshape((-1, num_dims, 8)).flip(-1) + + # Now turn these into bits and truncate to num_bits. + gray = ( + locs_uint8.unsqueeze(-1) + .bitwise_and(bitpack_mask_rev) + .ne(0) + .byte() + .flatten(-2, -1)[..., -num_bits:] + ) + + # Run the decoding process the other way. + # Iterate forwards through the bits. + for bit in range(0, num_bits): + # Iterate forwards through the dimensions. + for dim in range(0, num_dims): + # Identify which ones have this bit active. + mask = gray[:, dim, bit] + + # Where this bit is on, invert the 0 dimension for lower bits. + gray[:, 0, bit + 1 :] = torch.logical_xor( + gray[:, 0, bit + 1 :], mask[:, None] + ) + + # Where the bit is off, exchange the lower bits with the 0 dimension. + to_flip = torch.logical_and( + torch.logical_not(mask[:, None]).repeat(1, gray.shape[2] - bit - 1), + torch.logical_xor(gray[:, 0, bit + 1 :], gray[:, dim, bit + 1 :]), + ) + gray[:, dim, bit + 1 :] = torch.logical_xor( + gray[:, dim, bit + 1 :], to_flip + ) + gray[:, 0, bit + 1 :] = torch.logical_xor(gray[:, 0, bit + 1 :], to_flip) + + # Now flatten out. + gray = gray.swapaxes(1, 2).reshape((-1, num_bits * num_dims)) + + # Convert Gray back to binary. + hh_bin = gray2binary(gray) + + # Pad back out to 64 bits. + extra_dims = 64 - num_bits * num_dims + padded = torch.nn.functional.pad(hh_bin, (extra_dims, 0), "constant", 0) + + # Convert binary values into uint8s. + hh_uint8 = ( + (padded.flip(-1).reshape((-1, 8, 8)) * bitpack_mask) + .sum(2) + .squeeze() + .type(torch.uint8) + ) + + # Convert uint8s into uint64s. + hh_uint64 = hh_uint8.view(torch.int64).squeeze() + + return hh_uint64 + + +def decode(hilberts, num_dims, num_bits): + """Decode an array of Hilbert integers into locations in a hypercube. + + This is a vectorized-ish version of the Hilbert curve implementation by John + Skilling as described in: + + Skilling, J. (2004, April). Programming the Hilbert curve. In AIP Conference + Proceedings (Vol. 707, No. 1, pp. 381-387). American Institute of Physics. + + Params: + ------- + hilberts - An ndarray of Hilbert integers. Must be an integer dtype and + cannot have fewer bits than num_dims * num_bits. + + num_dims - The dimensionality of the hypercube. Integer. + + num_bits - The number of bits for each dimension. Integer. + + Returns: + -------- + The output is an ndarray of unsigned integers with the same shape as hilberts + but with an additional dimension of size num_dims. + """ + + if num_dims * num_bits > 64: + raise ValueError( + """ + num_dims=%d and num_bits=%d for %d bits total, which can't be encoded + into a uint64. Are you sure you need that many points on your Hilbert + curve? + """ + % (num_dims, num_bits) + ) + + # Handle the case where we got handed a naked integer. + hilberts = torch.atleast_1d(hilberts) + + # Keep around the shape for later. + orig_shape = hilberts.shape + bitpack_mask = 2 ** torch.arange(0, 8).to(hilberts.device) + bitpack_mask_rev = bitpack_mask.flip(-1) + + # Treat each of the hilberts as a s equence of eight uint8. + # This treats all of the inputs as uint64 and makes things uniform. + hh_uint8 = ( + hilberts.ravel().type(torch.int64).view(torch.uint8).reshape((-1, 8)).flip(-1) + ) + + # Turn these lists of uints into lists of bits and then truncate to the size + # we actually need for using Skilling's procedure. + hh_bits = ( + hh_uint8.unsqueeze(-1) + .bitwise_and(bitpack_mask_rev) + .ne(0) + .byte() + .flatten(-2, -1)[:, -num_dims * num_bits :] + ) + + # Take the sequence of bits and Gray-code it. + gray = binary2gray(hh_bits) + + # There has got to be a better way to do this. + # I could index them differently, but the eventual packbits likes it this way. + gray = gray.reshape((-1, num_bits, num_dims)).swapaxes(1, 2) + + # Iterate backwards through the bits. + for bit in range(num_bits - 1, -1, -1): + # Iterate backwards through the dimensions. + for dim in range(num_dims - 1, -1, -1): + # Identify which ones have this bit active. + mask = gray[:, dim, bit] + + # Where this bit is on, invert the 0 dimension for lower bits. + gray[:, 0, bit + 1 :] = torch.logical_xor( + gray[:, 0, bit + 1 :], mask[:, None] + ) + + # Where the bit is off, exchange the lower bits with the 0 dimension. + to_flip = torch.logical_and( + torch.logical_not(mask[:, None]), + torch.logical_xor(gray[:, 0, bit + 1 :], gray[:, dim, bit + 1 :]), + ) + gray[:, dim, bit + 1 :] = torch.logical_xor( + gray[:, dim, bit + 1 :], to_flip + ) + gray[:, 0, bit + 1 :] = torch.logical_xor(gray[:, 0, bit + 1 :], to_flip) + + # Pad back out to 64 bits. + extra_dims = 64 - num_bits + padded = torch.nn.functional.pad(gray, (extra_dims, 0), "constant", 0) + + # Now chop these up into blocks of 8. + locs_chopped = padded.flip(-1).reshape((-1, num_dims, 8, 8)) + + # Take those blocks and turn them unto uint8s. + # from IPython import embed; embed() + locs_uint8 = (locs_chopped * bitpack_mask).sum(3).squeeze().type(torch.uint8) + + # Finally, treat these as uint64s. + flat_locs = locs_uint8.view(torch.int64) + + # Return them in the expected shape. + return flat_locs.reshape((*orig_shape, num_dims)) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/z_order.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/z_order.py new file mode 100644 index 0000000..6fd01a5 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/z_order.py @@ -0,0 +1,126 @@ +# -------------------------------------------------------- +# Octree-based Sparse Convolutional Neural Networks +# Copyright (c) 2022 Peng-Shuai Wang +# Licensed under The MIT License [see LICENSE for details] +# Written by Peng-Shuai Wang +# -------------------------------------------------------- + +import torch +from typing import Optional, Union + + +class KeyLUT: + def __init__(self): + r256 = torch.arange(256, dtype=torch.int64) + r512 = torch.arange(512, dtype=torch.int64) + zero = torch.zeros(256, dtype=torch.int64) + device = torch.device("cpu") + + self._encode = { + device: ( + self.xyz2key(r256, zero, zero, 8), + self.xyz2key(zero, r256, zero, 8), + self.xyz2key(zero, zero, r256, 8), + ) + } + self._decode = {device: self.key2xyz(r512, 9)} + + def encode_lut(self, device=torch.device("cpu")): + if device not in self._encode: + cpu = torch.device("cpu") + self._encode[device] = tuple(e.to(device) for e in self._encode[cpu]) + return self._encode[device] + + def decode_lut(self, device=torch.device("cpu")): + if device not in self._decode: + cpu = torch.device("cpu") + self._decode[device] = tuple(e.to(device) for e in self._decode[cpu]) + return self._decode[device] + + def xyz2key(self, x, y, z, depth): + key = torch.zeros_like(x) + for i in range(depth): + mask = 1 << i + key = ( + key + | ((x & mask) << (2 * i + 2)) + | ((y & mask) << (2 * i + 1)) + | ((z & mask) << (2 * i + 0)) + ) + return key + + def key2xyz(self, key, depth): + x = torch.zeros_like(key) + y = torch.zeros_like(key) + z = torch.zeros_like(key) + for i in range(depth): + x = x | ((key & (1 << (3 * i + 2))) >> (2 * i + 2)) + y = y | ((key & (1 << (3 * i + 1))) >> (2 * i + 1)) + z = z | ((key & (1 << (3 * i + 0))) >> (2 * i + 0)) + return x, y, z + + +_key_lut = KeyLUT() + + +def xyz2key( + x: torch.Tensor, + y: torch.Tensor, + z: torch.Tensor, + b: Optional[Union[torch.Tensor, int]] = None, + depth: int = 16, +): + r"""Encodes :attr:`x`, :attr:`y`, :attr:`z` coordinates to the shuffled keys + based on pre-computed look up tables. The speed of this function is much + faster than the method based on for-loop. + + Args: + x (torch.Tensor): The x coordinate. + y (torch.Tensor): The y coordinate. + z (torch.Tensor): The z coordinate. + b (torch.Tensor or int): The batch index of the coordinates, and should be + smaller than 32768. If :attr:`b` is :obj:`torch.Tensor`, the size of + :attr:`b` must be the same as :attr:`x`, :attr:`y`, and :attr:`z`. + depth (int): The depth of the shuffled key, and must be smaller than 17 (< 17). + """ + + EX, EY, EZ = _key_lut.encode_lut(x.device) + x, y, z = x.long(), y.long(), z.long() + + mask = 255 if depth > 8 else (1 << depth) - 1 + key = EX[x & mask] | EY[y & mask] | EZ[z & mask] + if depth > 8: + mask = (1 << (depth - 8)) - 1 + key16 = EX[(x >> 8) & mask] | EY[(y >> 8) & mask] | EZ[(z >> 8) & mask] + key = key16 << 24 | key + + if b is not None: + b = b.long() + key = b << 48 | key + + return key + + +def key2xyz(key: torch.Tensor, depth: int = 16): + r"""Decodes the shuffled key to :attr:`x`, :attr:`y`, :attr:`z` coordinates + and the batch index based on pre-computed look up tables. + + Args: + key (torch.Tensor): The shuffled key. + depth (int): The depth of the shuffled key, and must be smaller than 17 (< 17). + """ + + DX, DY, DZ = _key_lut.decode_lut(key.device) + x, y, z = torch.zeros_like(key), torch.zeros_like(key), torch.zeros_like(key) + + b = key >> 48 + key = key & ((1 << 48) - 1) + + n = (depth + 2) // 3 + for i in range(n): + k = key >> (i * 9) & 511 + x = x | (DX[k] << (i * 3)) + y = y | (DY[k] << (i * 3)) + z = z | (DZ[k] << (i * 3)) + + return x, y, z, b diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/structure.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/structure.py new file mode 100644 index 0000000..1e8e80a --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/structure.py @@ -0,0 +1,209 @@ +import torch +import spconv.pytorch as spconv + +try: + import ocnn +except ImportError: + ocnn = None +from addict import Dict +from typing import List + +from pointcept.models.utils.serialization import encode +from pointcept.models.utils import ( + offset2batch, + batch2offset, + offset2bincount, + bincount2offset, +) + + +class Point(Dict): + """ + Point Structure of Pointcept + + A Point (point cloud) in Pointcept is a dictionary that contains various properties of + a batched point cloud. The property with the following names have a specific definition + as follows: + + - "coord": original coordinate of point cloud; + - "grid_coord": grid coordinate for specific grid size (related to GridSampling); + Point also support the following optional attributes: + - "offset": if not exist, initialized as batch size is 1; + - "batch": if not exist, initialized as batch size is 1; + - "feat": feature of point cloud, default input of model; + - "grid_size": Grid size of point cloud (related to GridSampling); + (related to Serialization) + - "serialized_depth": depth of serialization, 2 ** depth * grid_size describe the maximum of point cloud range; + - "serialized_code": a list of serialization codes; + - "serialized_order": a list of serialization order determined by code; + - "serialized_inverse": a list of inverse mapping determined by code; + (related to Sparsify: SpConv) + - "sparse_shape": Sparse shape for Sparse Conv Tensor; + - "sparse_conv_feat": SparseConvTensor init with information provide by Point; + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # If one of "offset" or "batch" do not exist, generate by the existing one + if "batch" not in self.keys() and "offset" in self.keys(): + self["batch"] = offset2batch(self.offset) + elif "offset" not in self.keys() and "batch" in self.keys(): + self["offset"] = batch2offset(self.batch) + + def serialization(self, order="z", depth=None, shuffle_orders=False): + """ + Point Cloud Serialization + + relay on ["grid_coord" or "coord" + "grid_size", "batch", "feat"] + """ + self["order"] = order + assert "batch" in self.keys() + if "grid_coord" not in self.keys(): + # if you don't want to operate GridSampling in data augmentation, + # please add the following augmentation into your pipline: + # dict(type="Copy", keys_dict={"grid_size": 0.01}), + # (adjust `grid_size` to what your want) + assert {"grid_size", "coord"}.issubset(self.keys()) + + self["grid_coord"] = torch.div( + self.coord - self.coord.min(0)[0], self.grid_size, rounding_mode="trunc" + ).int() + + if depth is None: + # Adaptive measure the depth of serialization cube (length = 2 ^ depth) + depth = int(self.grid_coord.max() + 1).bit_length() + self["serialized_depth"] = depth + # Maximum bit length for serialization code is 63 (int64) + assert depth * 3 + len(self.offset).bit_length() <= 63 + # Here we follow OCNN and set the depth limitation to 16 (48bit) for the point position. + # Although depth is limited to less than 16, we can encode a 655.36^3 (2^16 * 0.01) meter^3 + # cube with a grid size of 0.01 meter. We consider it is enough for the current stage. + # We can unlock the limitation by optimizing the z-order encoding function if necessary. + assert depth <= 16 + + # The serialization codes are arranged as following structures: + # [Order1 ([n]), + # Order2 ([n]), + # ... + # OrderN ([n])] (k, n) + code = [ + encode(self.grid_coord, self.batch, depth, order=order_) for order_ in order + ] + code = torch.stack(code) + order = torch.argsort(code) + inverse = torch.zeros_like(order).scatter_( + dim=1, + index=order, + src=torch.arange(0, code.shape[1], device=order.device).repeat( + code.shape[0], 1 + ), + ) + + if shuffle_orders: + perm = torch.randperm(code.shape[0]) + code = code[perm] + order = order[perm] + inverse = inverse[perm] + + self["serialized_code"] = code + self["serialized_order"] = order + self["serialized_inverse"] = inverse + + def sparsify(self, pad=96): + """ + Point Cloud Serialization + + Point cloud is sparse, here we use "sparsify" to specifically refer to + preparing "spconv.SparseConvTensor" for SpConv. + + relay on ["grid_coord" or "coord" + "grid_size", "batch", "feat"] + + pad: padding sparse for sparse shape. + """ + assert {"feat", "batch"}.issubset(self.keys()) + if "grid_coord" not in self.keys(): + # if you don't want to operate GridSampling in data augmentation, + # please add the following augmentation into your pipline: + # dict(type="Copy", keys_dict={"grid_size": 0.01}), + # (adjust `grid_size` to what your want) + assert {"grid_size", "coord"}.issubset(self.keys()) + self["grid_coord"] = torch.div( + self.coord - self.coord.min(0)[0], self.grid_size, rounding_mode="trunc" + ).int() + if "sparse_shape" in self.keys(): + sparse_shape = self.sparse_shape + else: + sparse_shape = torch.add( + torch.max(self.grid_coord, dim=0).values, pad + ).tolist() + sparse_conv_feat = spconv.SparseConvTensor( + features=self.feat, + indices=torch.cat( + [self.batch.unsqueeze(-1).int(), self.grid_coord.int()], dim=1 + ).contiguous(), + spatial_shape=sparse_shape, + batch_size=self.batch[-1].tolist() + 1, + ) + self["sparse_shape"] = sparse_shape + self["sparse_conv_feat"] = sparse_conv_feat + + def octreelization(self, depth=None, full_depth=None): + """ + Point Cloud Octreelization + + Generate octree with OCNN + relay on ["grid_coord", "batch", "feat"] + """ + assert ( + ocnn is not None + ), "Please follow https://github.com/octree-nn/ocnn-pytorch install ocnn." + assert {"feat", "batch"}.issubset(self.keys()) + # add 1 to make grid space support shift order + if "grid_coord" not in self.keys(): + # if you don't want to operate GridSampling in data augmentation, + # please add the following augmentation into your pipline: + # dict(type="Copy", keys_dict={"grid_size": 0.01}), + # (adjust `grid_size` to what your want) + assert {"grid_size", "coord"}.issubset(self.keys()) + self["grid_coord"] = torch.div( + self.coord - self.coord.min(0)[0], self.grid_size, rounding_mode="trunc" + ).int() + if depth is None: + if "depth" in self.keys(): + depth = self.depth + else: + depth = int(self.grid_coord.max() + 1).bit_length() + if full_depth is None: + full_depth = 1 + self["depth"] = depth + assert depth <= 16 # maximum in ocnn + + # [0, 2**depth] -> [0, 2] -> [-1, 1] + coord = self.grid_coord / 2 ** (self.depth - 1) - 1.0 + point = ocnn.octree.Points( + points=coord, + features=self.feat, + batch_id=self.batch.unsqueeze(-1), + batch_size=self.batch[-1] + 1, + ) + octree = ocnn.octree.Octree( + depth=depth, + full_depth=full_depth, + batch_size=self.batch[-1] + 1, + device=coord.device, + ) + octree.build_octree(point) + octree.construct_all_neigh() + + query_pts = torch.cat([self.grid_coord, point.batch_id], dim=1).contiguous() + inverse = octree.search_xyzb(query_pts, depth, True) + assert torch.sum(inverse < 0) == 0 # all mapping should be valid + inverse_ = torch.unique(inverse) + order = torch.zeros_like(inverse_).scatter_( + dim=0, + index=inverse, + src=torch.arange(0, inverse.shape[0], device=inverse.device), + ) + self["octree"] = octree + self["octree_order"] = order + self["octree_inverse"] = inverse diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/cache.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/cache.py new file mode 100644 index 0000000..c7aec25 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/utils/cache.py @@ -0,0 +1,60 @@ +""" +Data Cache Utils + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os + +try: + import SharedArray +except ImportError: + SharedArray = None + +try: + from multiprocessing.shared_memory import ShareableList +except ImportError: + import warnings + + warnings.warn("Please update python version >= 3.8 to enable shared_memory") +import numpy as np + + +def shared_array(name, var=None): + if var is not None: + # check exist + if os.path.exists(f"/dev/shm/{name}"): + return SharedArray.attach(f"shm://{name}") + # create shared_array + data = SharedArray.create(f"shm://{name}", var.shape, dtype=var.dtype) + data[...] = var[...] + data.flags.writeable = False + else: + data = SharedArray.attach(f"shm://{name}").copy() + return data + + +def shared_dict(name, var=None): + name = str(name) + assert "." not in name # '.' is used as sep flag + data = {} + if var is not None: + assert isinstance(var, dict) + keys = var.keys() + # current version only cache np.array + keys_valid = [] + for key in keys: + if isinstance(var[key], np.ndarray): + keys_valid.append(key) + keys = keys_valid + + ShareableList(sequence=keys, name=name + ".keys") + for key in keys: + if isinstance(var[key], np.ndarray): + data[key] = shared_array(name=f"{name}.{key}", var=var[key]) + else: + keys = list(ShareableList(name=name + ".keys")) + for key in keys: + data[key] = shared_array(name=f"{name}.{key}") + return data diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/comm.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/comm.py new file mode 100644 index 0000000..69e29e7 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/utils/comm.py @@ -0,0 +1,198 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +""" +This file contains primitives for multi-gpu communication. +This is useful when doing distributed training. +Modified from detectron2(https://github.com/facebookresearch/detectron2) + +Copyright (c) Xiaoyang Wu (xiaoyang.wu@connect.hku.hk). All Rights Reserved. +Please cite our work if you use any part of the code. +""" + +import functools +import numpy as np +import torch +import torch.distributed as dist + +_LOCAL_PROCESS_GROUP = None +""" +A torch process group which only includes processes that on the same machine as the current process. +This variable is set when processes are spawned by `launch()` in "engine/launch.py". +""" + + +def get_world_size() -> int: + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank() -> int: + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + return dist.get_rank() + + +def get_local_rank() -> int: + """ + Returns: + The rank of the current process within the local (per-machine) process group. + """ + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + assert ( + _LOCAL_PROCESS_GROUP is not None + ), "Local process group is not created! Please use launch() to spawn processes!" + return dist.get_rank(group=_LOCAL_PROCESS_GROUP) + + +def get_local_size() -> int: + """ + Returns: + The size of the per-machine process group, + i.e. the number of processes per machine. + """ + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size(group=_LOCAL_PROCESS_GROUP) + + +def is_main_process() -> bool: + return get_rank() == 0 + + +def synchronize(): + """ + Helper function to synchronize (barrier) among all processes when + using distributed training + """ + if not dist.is_available(): + return + if not dist.is_initialized(): + return + world_size = dist.get_world_size() + if world_size == 1: + return + if dist.get_backend() == dist.Backend.NCCL: + # This argument is needed to avoid warnings. + # It's valid only for NCCL backend. + dist.barrier(device_ids=[torch.cuda.current_device()]) + else: + dist.barrier() + + +@functools.lru_cache() +def _get_global_gloo_group(): + """ + Return a process group based on gloo backend, containing all the ranks + The result is cached. + """ + if dist.get_backend() == "nccl": + return dist.new_group(backend="gloo") + else: + return dist.group.WORLD + + +def all_gather(data, group=None): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors). + Args: + data: any picklable object + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + Returns: + list[data]: list of data gathered from each rank + """ + if get_world_size() == 1: + return [data] + if group is None: + group = ( + _get_global_gloo_group() + ) # use CPU group by default, to reduce GPU RAM usage. + world_size = dist.get_world_size(group) + if world_size == 1: + return [data] + + output = [None for _ in range(world_size)] + dist.all_gather_object(output, data, group=group) + return output + + +def gather(data, dst=0, group=None): + """ + Run gather on arbitrary picklable data (not necessarily tensors). + Args: + data: any picklable object + dst (int): destination rank + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + Returns: + list[data]: on dst, a list of data gathered from each rank. Otherwise, + an empty list. + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + world_size = dist.get_world_size(group=group) + if world_size == 1: + return [data] + rank = dist.get_rank(group=group) + + if rank == dst: + output = [None for _ in range(world_size)] + dist.gather_object(data, output, dst=dst, group=group) + return output + else: + dist.gather_object(data, None, dst=dst, group=group) + return [] + + +def shared_random_seed(): + """ + Returns: + int: a random number that is the same across all workers. + If workers need a shared RNG, they can use this shared seed to + create one. + All workers must call this function, otherwise it will deadlock. + """ + ints = np.random.randint(2**31) + all_ints = all_gather(ints) + return all_ints[0] + + +def reduce_dict(input_dict, average=True): + """ + Reduce the values in the dictionary from all processes so that process with rank + 0 has the reduced results. + Args: + input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor. + average (bool): whether to do average or sum + Returns: + a dict with the same keys as input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.reduce(values, dst=0) + if dist.get_rank() == 0 and average: + # only main process gets accumulated, so only divide by + # world_size in this case + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/config.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/config.py new file mode 100644 index 0000000..762ebf4 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/utils/config.py @@ -0,0 +1,694 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import ast +import copy +import os +import os.path as osp +import platform +import shutil +import sys +import tempfile +import uuid +import warnings +from argparse import Action, ArgumentParser +from collections import abc +from importlib import import_module + +from addict import Dict +from yapf.yapflib.yapf_api import FormatCode + +from .misc import import_modules_from_strings +from .path import check_file_exist + +if platform.system() == "Windows": + import regex as re +else: + import re + +BASE_KEY = "_base_" +DELETE_KEY = "_delete_" +DEPRECATION_KEY = "_deprecation_" +RESERVED_KEYS = ["filename", "text", "pretty_text"] + + +class ConfigDict(Dict): + def __missing__(self, name): + raise KeyError(name) + + def __getattr__(self, name): + try: + value = super(ConfigDict, self).__getattr__(name) + except KeyError: + ex = AttributeError( + f"'{self.__class__.__name__}' object has no " f"attribute '{name}'" + ) + except Exception as e: + ex = e + else: + return value + raise ex + + +def add_args(parser, cfg, prefix=""): + for k, v in cfg.items(): + if isinstance(v, str): + parser.add_argument("--" + prefix + k) + elif isinstance(v, int): + parser.add_argument("--" + prefix + k, type=int) + elif isinstance(v, float): + parser.add_argument("--" + prefix + k, type=float) + elif isinstance(v, bool): + parser.add_argument("--" + prefix + k, action="store_true") + elif isinstance(v, dict): + add_args(parser, v, prefix + k + ".") + elif isinstance(v, abc.Iterable): + parser.add_argument("--" + prefix + k, type=type(v[0]), nargs="+") + else: + print(f"cannot parse key {prefix + k} of type {type(v)}") + return parser + + +class Config: + """A facility for config and config files. + + It supports common file formats as configs: python/json/yaml. The interface + is the same as a dict object and also allows access config values as + attributes. + + Example: + >>> cfg = Config(dict(a=1, b=dict(b1=[0, 1]))) + >>> cfg.a + 1 + >>> cfg.b + {'b1': [0, 1]} + >>> cfg.b.b1 + [0, 1] + >>> cfg = Config.fromfile('tests/data/config/a.py') + >>> cfg.filename + "/home/kchen/projects/mmcv/tests/data/config/a.py" + >>> cfg.item4 + 'test' + >>> cfg + "Config [path: /home/kchen/projects/mmcv/tests/data/config/a.py]: " + "{'item1': [1, 2], 'item2': {'a': 0}, 'item3': True, 'item4': 'test'}" + """ + + @staticmethod + def _validate_py_syntax(filename): + with open(filename, "r", encoding="utf-8") as f: + # Setting encoding explicitly to resolve coding issue on windows + content = f.read() + try: + ast.parse(content) + except SyntaxError as e: + raise SyntaxError( + "There are syntax errors in config " f"file {filename}: {e}" + ) + + @staticmethod + def _substitute_predefined_vars(filename, temp_config_name): + file_dirname = osp.dirname(filename) + file_basename = osp.basename(filename) + file_basename_no_extension = osp.splitext(file_basename)[0] + file_extname = osp.splitext(filename)[1] + support_templates = dict( + fileDirname=file_dirname, + fileBasename=file_basename, + fileBasenameNoExtension=file_basename_no_extension, + fileExtname=file_extname, + ) + with open(filename, "r", encoding="utf-8") as f: + # Setting encoding explicitly to resolve coding issue on windows + config_file = f.read() + for key, value in support_templates.items(): + regexp = r"\{\{\s*" + str(key) + r"\s*\}\}" + value = value.replace("\\", "/") + config_file = re.sub(regexp, value, config_file) + with open(temp_config_name, "w", encoding="utf-8") as tmp_config_file: + tmp_config_file.write(config_file) + + @staticmethod + def _pre_substitute_base_vars(filename, temp_config_name): + """Substitute base variable placehoders to string, so that parsing + would work.""" + with open(filename, "r", encoding="utf-8") as f: + # Setting encoding explicitly to resolve coding issue on windows + config_file = f.read() + base_var_dict = {} + regexp = r"\{\{\s*" + BASE_KEY + r"\.([\w\.]+)\s*\}\}" + base_vars = set(re.findall(regexp, config_file)) + for base_var in base_vars: + randstr = f"_{base_var}_{uuid.uuid4().hex.lower()[:6]}" + base_var_dict[randstr] = base_var + regexp = r"\{\{\s*" + BASE_KEY + r"\." + base_var + r"\s*\}\}" + config_file = re.sub(regexp, f'"{randstr}"', config_file) + with open(temp_config_name, "w", encoding="utf-8") as tmp_config_file: + tmp_config_file.write(config_file) + return base_var_dict + + @staticmethod + def _substitute_base_vars(cfg, base_var_dict, base_cfg): + """Substitute variable strings to their actual values.""" + cfg = copy.deepcopy(cfg) + + if isinstance(cfg, dict): + for k, v in cfg.items(): + if isinstance(v, str) and v in base_var_dict: + new_v = base_cfg + for new_k in base_var_dict[v].split("."): + new_v = new_v[new_k] + cfg[k] = new_v + elif isinstance(v, (list, tuple, dict)): + cfg[k] = Config._substitute_base_vars(v, base_var_dict, base_cfg) + elif isinstance(cfg, tuple): + cfg = tuple( + Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg + ) + elif isinstance(cfg, list): + cfg = [ + Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg + ] + elif isinstance(cfg, str) and cfg in base_var_dict: + new_v = base_cfg + for new_k in base_var_dict[cfg].split("."): + new_v = new_v[new_k] + cfg = new_v + + return cfg + + @staticmethod + def _file2dict(filename, use_predefined_variables=True): + filename = osp.abspath(osp.expanduser(filename)) + check_file_exist(filename) + fileExtname = osp.splitext(filename)[1] + if fileExtname not in [".py", ".json", ".yaml", ".yml"]: + raise IOError("Only py/yml/yaml/json type are supported now!") + + with tempfile.TemporaryDirectory() as temp_config_dir: + temp_config_file = tempfile.NamedTemporaryFile( + dir=temp_config_dir, suffix=fileExtname + ) + if platform.system() == "Windows": + temp_config_file.close() + temp_config_name = osp.basename(temp_config_file.name) + # Substitute predefined variables + if use_predefined_variables: + Config._substitute_predefined_vars(filename, temp_config_file.name) + else: + shutil.copyfile(filename, temp_config_file.name) + # Substitute base variables from placeholders to strings + base_var_dict = Config._pre_substitute_base_vars( + temp_config_file.name, temp_config_file.name + ) + + if filename.endswith(".py"): + temp_module_name = osp.splitext(temp_config_name)[0] + sys.path.insert(0, temp_config_dir) + Config._validate_py_syntax(filename) + mod = import_module(temp_module_name) + sys.path.pop(0) + cfg_dict = { + name: value + for name, value in mod.__dict__.items() + if not name.startswith("__") + } + # delete imported module + del sys.modules[temp_module_name] + elif filename.endswith((".yml", ".yaml", ".json")): + raise NotImplementedError + # close temp file + temp_config_file.close() + + # check deprecation information + if DEPRECATION_KEY in cfg_dict: + deprecation_info = cfg_dict.pop(DEPRECATION_KEY) + warning_msg = ( + f"The config file {filename} will be deprecated " "in the future." + ) + if "expected" in deprecation_info: + warning_msg += f' Please use {deprecation_info["expected"]} ' "instead." + if "reference" in deprecation_info: + warning_msg += ( + " More information can be found at " + f'{deprecation_info["reference"]}' + ) + warnings.warn(warning_msg) + + cfg_text = filename + "\n" + with open(filename, "r", encoding="utf-8") as f: + # Setting encoding explicitly to resolve coding issue on windows + cfg_text += f.read() + + if BASE_KEY in cfg_dict: + cfg_dir = osp.dirname(filename) + base_filename = cfg_dict.pop(BASE_KEY) + base_filename = ( + base_filename if isinstance(base_filename, list) else [base_filename] + ) + + cfg_dict_list = list() + cfg_text_list = list() + for f in base_filename: + _cfg_dict, _cfg_text = Config._file2dict(osp.join(cfg_dir, f)) + cfg_dict_list.append(_cfg_dict) + cfg_text_list.append(_cfg_text) + + base_cfg_dict = dict() + for c in cfg_dict_list: + duplicate_keys = base_cfg_dict.keys() & c.keys() + if len(duplicate_keys) > 0: + raise KeyError( + "Duplicate key is not allowed among bases. " + f"Duplicate keys: {duplicate_keys}" + ) + base_cfg_dict.update(c) + + # Substitute base variables from strings to their actual values + cfg_dict = Config._substitute_base_vars( + cfg_dict, base_var_dict, base_cfg_dict + ) + + base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict) + cfg_dict = base_cfg_dict + + # merge cfg_text + cfg_text_list.append(cfg_text) + cfg_text = "\n".join(cfg_text_list) + + return cfg_dict, cfg_text + + @staticmethod + def _merge_a_into_b(a, b, allow_list_keys=False): + """merge dict ``a`` into dict ``b`` (non-inplace). + + Values in ``a`` will overwrite ``b``. ``b`` is copied first to avoid + in-place modifications. + + Args: + a (dict): The source dict to be merged into ``b``. + b (dict): The origin dict to be fetch keys from ``a``. + allow_list_keys (bool): If True, int string keys (e.g. '0', '1') + are allowed in source ``a`` and will replace the element of the + corresponding index in b if b is a list. Default: False. + + Returns: + dict: The modified dict of ``b`` using ``a``. + + Examples: + # Normally merge a into b. + >>> Config._merge_a_into_b( + ... dict(obj=dict(a=2)), dict(obj=dict(a=1))) + {'obj': {'a': 2}} + + # Delete b first and merge a into b. + >>> Config._merge_a_into_b( + ... dict(obj=dict(_delete_=True, a=2)), dict(obj=dict(a=1))) + {'obj': {'a': 2}} + + # b is a list + >>> Config._merge_a_into_b( + ... {'0': dict(a=2)}, [dict(a=1), dict(b=2)], True) + [{'a': 2}, {'b': 2}] + """ + b = b.copy() + for k, v in a.items(): + if allow_list_keys and k.isdigit() and isinstance(b, list): + k = int(k) + if len(b) <= k: + raise KeyError(f"Index {k} exceeds the length of list {b}") + b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) + elif isinstance(v, dict) and k in b and not v.pop(DELETE_KEY, False): + allowed_types = (dict, list) if allow_list_keys else dict + if not isinstance(b[k], allowed_types): + raise TypeError( + f"{k}={v} in child config cannot inherit from base " + f"because {k} is a dict in the child config but is of " + f"type {type(b[k])} in base config. You may set " + f"`{DELETE_KEY}=True` to ignore the base config" + ) + b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) + else: + b[k] = v + return b + + @staticmethod + def fromfile(filename, use_predefined_variables=True, import_custom_modules=True): + cfg_dict, cfg_text = Config._file2dict(filename, use_predefined_variables) + if import_custom_modules and cfg_dict.get("custom_imports", None): + import_modules_from_strings(**cfg_dict["custom_imports"]) + return Config(cfg_dict, cfg_text=cfg_text, filename=filename) + + @staticmethod + def fromstring(cfg_str, file_format): + """Generate config from config str. + + Args: + cfg_str (str): Config str. + file_format (str): Config file format corresponding to the + config str. Only py/yml/yaml/json type are supported now! + + Returns: + obj:`Config`: Config obj. + """ + if file_format not in [".py", ".json", ".yaml", ".yml"]: + raise IOError("Only py/yml/yaml/json type are supported now!") + if file_format != ".py" and "dict(" in cfg_str: + # check if users specify a wrong suffix for python + warnings.warn('Please check "file_format", the file format may be .py') + with tempfile.NamedTemporaryFile( + "w", encoding="utf-8", suffix=file_format, delete=False + ) as temp_file: + temp_file.write(cfg_str) + # on windows, previous implementation cause error + # see PR 1077 for details + cfg = Config.fromfile(temp_file.name) + os.remove(temp_file.name) + return cfg + + @staticmethod + def auto_argparser(description=None): + """Generate argparser from config file automatically (experimental)""" + partial_parser = ArgumentParser(description=description) + partial_parser.add_argument("config", help="config file path") + cfg_file = partial_parser.parse_known_args()[0].config + cfg = Config.fromfile(cfg_file) + parser = ArgumentParser(description=description) + parser.add_argument("config", help="config file path") + add_args(parser, cfg) + return parser, cfg + + def __init__(self, cfg_dict=None, cfg_text=None, filename=None): + if cfg_dict is None: + cfg_dict = dict() + elif not isinstance(cfg_dict, dict): + raise TypeError("cfg_dict must be a dict, but " f"got {type(cfg_dict)}") + for key in cfg_dict: + if key in RESERVED_KEYS: + raise KeyError(f"{key} is reserved for config file") + + super(Config, self).__setattr__("_cfg_dict", ConfigDict(cfg_dict)) + super(Config, self).__setattr__("_filename", filename) + if cfg_text: + text = cfg_text + elif filename: + with open(filename, "r") as f: + text = f.read() + else: + text = "" + super(Config, self).__setattr__("_text", text) + + @property + def filename(self): + return self._filename + + @property + def text(self): + return self._text + + @property + def pretty_text(self): + indent = 4 + + def _indent(s_, num_spaces): + s = s_.split("\n") + if len(s) == 1: + return s_ + first = s.pop(0) + s = [(num_spaces * " ") + line for line in s] + s = "\n".join(s) + s = first + "\n" + s + return s + + def _format_basic_types(k, v, use_mapping=False): + if isinstance(v, str): + v_str = f"'{v}'" + else: + v_str = str(v) + + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f"{k_str}: {v_str}" + else: + attr_str = f"{str(k)}={v_str}" + attr_str = _indent(attr_str, indent) + + return attr_str + + def _format_list(k, v, use_mapping=False): + # check if all items in the list are dict + if all(isinstance(_, dict) for _ in v): + v_str = "[\n" + v_str += "\n".join( + f"dict({_indent(_format_dict(v_), indent)})," for v_ in v + ).rstrip(",") + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f"{k_str}: {v_str}" + else: + attr_str = f"{str(k)}={v_str}" + attr_str = _indent(attr_str, indent) + "]" + else: + attr_str = _format_basic_types(k, v, use_mapping) + return attr_str + + def _contain_invalid_identifier(dict_str): + contain_invalid_identifier = False + for key_name in dict_str: + contain_invalid_identifier |= not str(key_name).isidentifier() + return contain_invalid_identifier + + def _format_dict(input_dict, outest_level=False): + r = "" + s = [] + + use_mapping = _contain_invalid_identifier(input_dict) + if use_mapping: + r += "{" + for idx, (k, v) in enumerate(input_dict.items()): + is_last = idx >= len(input_dict) - 1 + end = "" if outest_level or is_last else "," + if isinstance(v, dict): + v_str = "\n" + _format_dict(v) + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f"{k_str}: dict({v_str}" + else: + attr_str = f"{str(k)}=dict({v_str}" + attr_str = _indent(attr_str, indent) + ")" + end + elif isinstance(v, list): + attr_str = _format_list(k, v, use_mapping) + end + else: + attr_str = _format_basic_types(k, v, use_mapping) + end + + s.append(attr_str) + r += "\n".join(s) + if use_mapping: + r += "}" + return r + + cfg_dict = self._cfg_dict.to_dict() + text = _format_dict(cfg_dict, outest_level=True) + # copied from setup.cfg + yapf_style = dict( + based_on_style="pep8", + blank_line_before_nested_class_or_def=True, + split_before_expression_after_opening_paren=True, + ) + text, _ = FormatCode(text, style_config=yapf_style) + + return text + + def __repr__(self): + return f"Config (path: {self.filename}): {self._cfg_dict.__repr__()}" + + def __len__(self): + return len(self._cfg_dict) + + def __getattr__(self, name): + return getattr(self._cfg_dict, name) + + def __getitem__(self, name): + return self._cfg_dict.__getitem__(name) + + def __setattr__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setattr__(name, value) + + def __setitem__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setitem__(name, value) + + def __iter__(self): + return iter(self._cfg_dict) + + def __getstate__(self): + return (self._cfg_dict, self._filename, self._text) + + def __setstate__(self, state): + _cfg_dict, _filename, _text = state + super(Config, self).__setattr__("_cfg_dict", _cfg_dict) + super(Config, self).__setattr__("_filename", _filename) + super(Config, self).__setattr__("_text", _text) + + def dump(self, file=None): + cfg_dict = super(Config, self).__getattribute__("_cfg_dict").to_dict() + if self.filename.endswith(".py"): + if file is None: + return self.pretty_text + else: + with open(file, "w", encoding="utf-8") as f: + f.write(self.pretty_text) + else: + import mmcv + + if file is None: + file_format = self.filename.split(".")[-1] + return mmcv.dump(cfg_dict, file_format=file_format) + else: + mmcv.dump(cfg_dict, file) + + def merge_from_dict(self, options, allow_list_keys=True): + """Merge list into cfg_dict. + + Merge the dict parsed by MultipleKVAction into this cfg. + + Examples: + >>> options = {'models.backbone.depth': 50, + ... 'models.backbone.with_cp':True} + >>> cfg = Config(dict(models=dict(backbone=dict(type='ResNet')))) + >>> cfg.merge_from_dict(options) + >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + >>> assert cfg_dict == dict( + ... models=dict(backbone=dict(depth=50, with_cp=True))) + + # Merge list element + >>> cfg = Config(dict(pipeline=[ + ... dict(type='LoadImage'), dict(type='LoadAnnotations')])) + >>> options = dict(pipeline={'0': dict(type='SelfLoadImage')}) + >>> cfg.merge_from_dict(options, allow_list_keys=True) + >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + >>> assert cfg_dict == dict(pipeline=[ + ... dict(type='SelfLoadImage'), dict(type='LoadAnnotations')]) + + Args: + options (dict): dict of configs to merge from. + allow_list_keys (bool): If True, int string keys (e.g. '0', '1') + are allowed in ``options`` and will replace the element of the + corresponding index in the config if the config is a list. + Default: True. + """ + option_cfg_dict = {} + for full_key, v in options.items(): + d = option_cfg_dict + key_list = full_key.split(".") + for subkey in key_list[:-1]: + d.setdefault(subkey, ConfigDict()) + d = d[subkey] + subkey = key_list[-1] + d[subkey] = v + + cfg_dict = super(Config, self).__getattribute__("_cfg_dict") + super(Config, self).__setattr__( + "_cfg_dict", + Config._merge_a_into_b( + option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys + ), + ) + + +class DictAction(Action): + """ + argparse action to split an argument into KEY=VALUE form + on the first = and append to a dictionary. List options can + be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit + brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build + list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]' + """ + + @staticmethod + def _parse_int_float_bool(val): + try: + return int(val) + except ValueError: + pass + try: + return float(val) + except ValueError: + pass + if val.lower() in ["true", "false"]: + return True if val.lower() == "true" else False + return val + + @staticmethod + def _parse_iterable(val): + """Parse iterable values in the string. + + All elements inside '()' or '[]' are treated as iterable values. + + Args: + val (str): Value string. + + Returns: + list | tuple: The expanded list or tuple from the string. + + Examples: + >>> DictAction._parse_iterable('1,2,3') + [1, 2, 3] + >>> DictAction._parse_iterable('[a, b, c]') + ['a', 'b', 'c'] + >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]') + [(1, 2, 3), ['a', 'b'], 'c'] + """ + + def find_next_comma(string): + """Find the position of next comma in the string. + + If no ',' is found in the string, return the string length. All + chars inside '()' and '[]' are treated as one element and thus ',' + inside these brackets are ignored. + """ + assert (string.count("(") == string.count(")")) and ( + string.count("[") == string.count("]") + ), f"Imbalanced brackets exist in {string}" + end = len(string) + for idx, char in enumerate(string): + pre = string[:idx] + # The string before this ',' is balanced + if ( + (char == ",") + and (pre.count("(") == pre.count(")")) + and (pre.count("[") == pre.count("]")) + ): + end = idx + break + return end + + # Strip ' and " characters and replace whitespace. + val = val.strip("'\"").replace(" ", "") + is_tuple = False + if val.startswith("(") and val.endswith(")"): + is_tuple = True + val = val[1:-1] + elif val.startswith("[") and val.endswith("]"): + val = val[1:-1] + elif "," not in val: + # val is a single value + return DictAction._parse_int_float_bool(val) + + values = [] + while len(val) > 0: + comma_idx = find_next_comma(val) + element = DictAction._parse_iterable(val[:comma_idx]) + values.append(element) + val = val[comma_idx + 1 :] + if is_tuple: + values = tuple(values) + return values + + def __call__(self, parser, namespace, values, option_string=None): + options = {} + for kv in values: + key, val = kv.split("=", maxsplit=1) + options[key] = self._parse_iterable(val) + setattr(namespace, self.dest, options) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/env.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/env.py new file mode 100644 index 0000000..653f007 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/utils/env.py @@ -0,0 +1,36 @@ +""" +Environment Utils + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import random +import numpy as np +import torch +import torch.backends.cudnn as cudnn + +from datetime import datetime + + +def get_random_seed(): + seed = ( + os.getpid() + + int(datetime.now().strftime("%S%f")) + + int.from_bytes(os.urandom(2), "big") + ) + return seed + + +def set_seed(seed=None): + if seed is None: + seed = get_random_seed() + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + cudnn.benchmark = False + cudnn.deterministic = True + os.environ["PYTHONHASHSEED"] = str(seed) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/events.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/events.py new file mode 100644 index 0000000..718ee91 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/utils/events.py @@ -0,0 +1,612 @@ +""" +Events Utils + +Modified from Detectron2 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import datetime +import json +import logging +import os +import time +import torch +import numpy as np +import traceback +import sys + +from typing import List, Optional, Tuple +from collections import defaultdict +from contextlib import contextmanager + +__all__ = [ + "get_event_storage", + "JSONWriter", + "TensorboardXWriter", + "CommonMetricPrinter", + "EventStorage", + "ExceptionWriter", +] + +_CURRENT_STORAGE_STACK = [] + + +def get_event_storage(): + """ + Returns: + The :class:`EventStorage` object that's currently being used. + Throws an error if no :class:`EventStorage` is currently enabled. + """ + assert len( + _CURRENT_STORAGE_STACK + ), "get_event_storage() has to be called inside a 'with EventStorage(...)' context!" + return _CURRENT_STORAGE_STACK[-1] + + +class EventWriter: + """ + Base class for writers that obtain events from :class:`EventStorage` and process them. + """ + + def write(self): + raise NotImplementedError + + def close(self): + pass + + +class JSONWriter(EventWriter): + """ + Write scalars to a json file. + It saves scalars as one json per line (instead of a big json) for easy parsing. + Examples parsing such a json file: + :: + $ cat metrics.json | jq -s '.[0:2]' + [ + { + "data_time": 0.008433341979980469, + "iteration": 19, + "loss": 1.9228371381759644, + "loss_box_reg": 0.050025828182697296, + "loss_classifier": 0.5316952466964722, + "loss_mask": 0.7236229181289673, + "loss_rpn_box": 0.0856662318110466, + "loss_rpn_cls": 0.48198649287223816, + "lr": 0.007173333333333333, + "time": 0.25401854515075684 + }, + { + "data_time": 0.007216215133666992, + "iteration": 39, + "loss": 1.282649278640747, + "loss_box_reg": 0.06222952902317047, + "loss_classifier": 0.30682939291000366, + "loss_mask": 0.6970193982124329, + "loss_rpn_box": 0.038663312792778015, + "loss_rpn_cls": 0.1471673548221588, + "lr": 0.007706666666666667, + "time": 0.2490077018737793 + } + ] + $ cat metrics.json | jq '.loss_mask' + 0.7126231789588928 + 0.689423680305481 + 0.6776131987571716 + ... + """ + + def __init__(self, json_file, window_size=20): + """ + Args: + json_file (str): path to the json file. New data will be appended if the file exists. + window_size (int): the window size of median smoothing for the scalars whose + `smoothing_hint` are True. + """ + self._file_handle = open(json_file, "a") + self._window_size = window_size + self._last_write = -1 + + def write(self): + storage = get_event_storage() + to_save = defaultdict(dict) + + for k, (v, iter) in storage.latest_with_smoothing_hint( + self._window_size + ).items(): + # keep scalars that have not been written + if iter <= self._last_write: + continue + to_save[iter][k] = v + if len(to_save): + all_iters = sorted(to_save.keys()) + self._last_write = max(all_iters) + + for itr, scalars_per_iter in to_save.items(): + scalars_per_iter["iteration"] = itr + self._file_handle.write(json.dumps(scalars_per_iter, sort_keys=True) + "\n") + self._file_handle.flush() + try: + os.fsync(self._file_handle.fileno()) + except AttributeError: + pass + + def close(self): + self._file_handle.close() + + +class TensorboardXWriter(EventWriter): + """ + Write all scalars to a tensorboard file. + """ + + def __init__(self, log_dir: str, window_size: int = 20, **kwargs): + """ + Args: + log_dir (str): the directory to save the output events + window_size (int): the scalars will be median-smoothed by this window size + kwargs: other arguments passed to `torch.utils.tensorboard.SummaryWriter(...)` + """ + self._window_size = window_size + from torch.utils.tensorboard import SummaryWriter + + self._writer = SummaryWriter(log_dir, **kwargs) + self._last_write = -1 + + def write(self): + storage = get_event_storage() + new_last_write = self._last_write + for k, (v, iter) in storage.latest_with_smoothing_hint( + self._window_size + ).items(): + if iter > self._last_write: + self._writer.add_scalar(k, v, iter) + new_last_write = max(new_last_write, iter) + self._last_write = new_last_write + + # storage.put_{image,histogram} is only meant to be used by + # tensorboard writer. So we access its internal fields directly from here. + if len(storage._vis_data) >= 1: + for img_name, img, step_num in storage._vis_data: + self._writer.add_image(img_name, img, step_num) + # Storage stores all image data and rely on this writer to clear them. + # As a result it assumes only one writer will use its image data. + # An alternative design is to let storage store limited recent + # data (e.g. only the most recent image) that all writers can access. + # In that case a writer may not see all image data if its period is long. + storage.clear_images() + + if len(storage._histograms) >= 1: + for params in storage._histograms: + self._writer.add_histogram_raw(**params) + storage.clear_histograms() + + def close(self): + if hasattr(self, "_writer"): # doesn't exist when the code fails at import + self._writer.close() + + +class CommonMetricPrinter(EventWriter): + """ + Print **common** metrics to the terminal, including + iteration time, ETA, memory, all losses, and the learning rate. + It also applies smoothing using a window of 20 elements. + It's meant to print common metrics in common ways. + To print something in more customized ways, please implement a similar printer by yourself. + """ + + def __init__(self, max_iter: Optional[int] = None, window_size: int = 20): + """ + Args: + max_iter: the maximum number of iterations to train. + Used to compute ETA. If not given, ETA will not be printed. + window_size (int): the losses will be median-smoothed by this window size + """ + self.logger = logging.getLogger(__name__) + self._max_iter = max_iter + self._window_size = window_size + self._last_write = ( + None # (step, time) of last call to write(). Used to compute ETA + ) + + def _get_eta(self, storage) -> Optional[str]: + if self._max_iter is None: + return "" + iteration = storage.iter + try: + eta_seconds = storage.history("time").median(1000) * ( + self._max_iter - iteration - 1 + ) + storage.put_scalar("eta_seconds", eta_seconds, smoothing_hint=False) + return str(datetime.timedelta(seconds=int(eta_seconds))) + except KeyError: + # estimate eta on our own - more noisy + eta_string = None + if self._last_write is not None: + estimate_iter_time = (time.perf_counter() - self._last_write[1]) / ( + iteration - self._last_write[0] + ) + eta_seconds = estimate_iter_time * (self._max_iter - iteration - 1) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + self._last_write = (iteration, time.perf_counter()) + return eta_string + + def write(self): + storage = get_event_storage() + iteration = storage.iter + if iteration == self._max_iter: + # This hook only reports training progress (loss, ETA, etc) but not other data, + # therefore do not write anything after training succeeds, even if this method + # is called. + return + + try: + data_time = storage.history("data_time").avg(20) + except KeyError: + # they may not exist in the first few iterations (due to warmup) + # or when SimpleTrainer is not used + data_time = None + try: + iter_time = storage.history("time").global_avg() + except KeyError: + iter_time = None + try: + lr = "{:.5g}".format(storage.history("lr").latest()) + except KeyError: + lr = "N/A" + + eta_string = self._get_eta(storage) + + if torch.cuda.is_available(): + max_mem_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0 + else: + max_mem_mb = None + + # NOTE: max_mem is parsed by grep in "dev/parse_results.sh" + self.logger.info( + " {eta}iter: {iter} {losses} {time}{data_time}lr: {lr} {memory}".format( + eta=f"eta: {eta_string} " if eta_string else "", + iter=iteration, + losses=" ".join( + [ + "{}: {:.4g}".format(k, v.median(self._window_size)) + for k, v in storage.histories().items() + if "loss" in k + ] + ), + time=( + "time: {:.4f} ".format(iter_time) if iter_time is not None else "" + ), + data_time=( + "data_time: {:.4f} ".format(data_time) + if data_time is not None + else "" + ), + lr=lr, + memory=( + "max_mem: {:.0f}M".format(max_mem_mb) + if max_mem_mb is not None + else "" + ), + ) + ) + + +class EventStorage: + """ + The user-facing class that provides metric storage functionalities. + In the future we may add support for storing / logging other types of data if needed. + """ + + def __init__(self, start_iter=0): + """ + Args: + start_iter (int): the iteration number to start with + """ + self._history = defaultdict(AverageMeter) + self._smoothing_hints = {} + self._latest_scalars = {} + self._iter = start_iter + self._current_prefix = "" + self._vis_data = [] + self._histograms = [] + + # def put_image(self, img_name, img_tensor): + # """ + # Add an `img_tensor` associated with `img_name`, to be shown on + # tensorboard. + # Args: + # img_name (str): The name of the image to put into tensorboard. + # img_tensor (torch.Tensor or numpy.array): An `uint8` or `float` + # Tensor of shape `[channel, height, width]` where `channel` is + # 3. The image format should be RGB. The elements in img_tensor + # can either have values in [0, 1] (float32) or [0, 255] (uint8). + # The `img_tensor` will be visualized in tensorboard. + # """ + # self._vis_data.append((img_name, img_tensor, self._iter)) + + def put_scalar(self, name, value, n=1, smoothing_hint=False): + """ + Add a scalar `value` to the `HistoryBuffer` associated with `name`. + Args: + smoothing_hint (bool): a 'hint' on whether this scalar is noisy and should be + smoothed when logged. The hint will be accessible through + :meth:`EventStorage.smoothing_hints`. A writer may ignore the hint + and apply custom smoothing rule. + It defaults to True because most scalars we save need to be smoothed to + provide any useful signal. + """ + name = self._current_prefix + name + history = self._history[name] + history.update(value, n) + self._latest_scalars[name] = (value, self._iter) + + existing_hint = self._smoothing_hints.get(name) + if existing_hint is not None: + assert ( + existing_hint == smoothing_hint + ), "Scalar {} was put with a different smoothing_hint!".format(name) + else: + self._smoothing_hints[name] = smoothing_hint + + # def put_scalars(self, *, smoothing_hint=True, **kwargs): + # """ + # Put multiple scalars from keyword arguments. + # Examples: + # storage.put_scalars(loss=my_loss, accuracy=my_accuracy, smoothing_hint=True) + # """ + # for k, v in kwargs.items(): + # self.put_scalar(k, v, smoothing_hint=smoothing_hint) + # + # def put_histogram(self, hist_name, hist_tensor, bins=1000): + # """ + # Create a histogram from a tensor. + # Args: + # hist_name (str): The name of the histogram to put into tensorboard. + # hist_tensor (torch.Tensor): A Tensor of arbitrary shape to be converted + # into a histogram. + # bins (int): Number of histogram bins. + # """ + # ht_min, ht_max = hist_tensor.min().item(), hist_tensor.max().item() + # + # # Create a histogram with PyTorch + # hist_counts = torch.histc(hist_tensor, bins=bins) + # hist_edges = torch.linspace(start=ht_min, end=ht_max, steps=bins + 1, dtype=torch.float32) + # + # # Parameter for the add_histogram_raw function of SummaryWriter + # hist_params = dict( + # tag=hist_name, + # min=ht_min, + # max=ht_max, + # num=len(hist_tensor), + # sum=float(hist_tensor.sum()), + # sum_squares=float(torch.sum(hist_tensor**2)), + # bucket_limits=hist_edges[1:].tolist(), + # bucket_counts=hist_counts.tolist(), + # global_step=self._iter, + # ) + # self._histograms.append(hist_params) + + def history(self, name): + """ + Returns: + AverageMeter: the history for name + """ + ret = self._history.get(name, None) + if ret is None: + raise KeyError("No history metric available for {}!".format(name)) + return ret + + def histories(self): + """ + Returns: + dict[name -> HistoryBuffer]: the HistoryBuffer for all scalars + """ + return self._history + + def latest(self): + """ + Returns: + dict[str -> (float, int)]: mapping from the name of each scalar to the most + recent value and the iteration number its added. + """ + return self._latest_scalars + + def latest_with_smoothing_hint(self, window_size=20): + """ + Similar to :meth:`latest`, but the returned values + are either the un-smoothed original latest value, + or a median of the given window_size, + depend on whether the smoothing_hint is True. + This provides a default behavior that other writers can use. + """ + result = {} + for k, (v, itr) in self._latest_scalars.items(): + result[k] = ( + self._history[k].median(window_size) if self._smoothing_hints[k] else v, + itr, + ) + return result + + def smoothing_hints(self): + """ + Returns: + dict[name -> bool]: the user-provided hint on whether the scalar + is noisy and needs smoothing. + """ + return self._smoothing_hints + + def step(self): + """ + User should either: (1) Call this function to increment storage.iter when needed. Or + (2) Set `storage.iter` to the correct iteration number before each iteration. + The storage will then be able to associate the new data with an iteration number. + """ + self._iter += 1 + + @property + def iter(self): + """ + Returns: + int: The current iteration number. When used together with a trainer, + this is ensured to be the same as trainer.iter. + """ + return self._iter + + @iter.setter + def iter(self, val): + self._iter = int(val) + + @property + def iteration(self): + # for backward compatibility + return self._iter + + def __enter__(self): + _CURRENT_STORAGE_STACK.append(self) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + assert _CURRENT_STORAGE_STACK[-1] == self + _CURRENT_STORAGE_STACK.pop() + + @contextmanager + def name_scope(self, name): + """ + Yields: + A context within which all the events added to this storage + will be prefixed by the name scope. + """ + old_prefix = self._current_prefix + self._current_prefix = name.rstrip("/") + "/" + yield + self._current_prefix = old_prefix + + def clear_images(self): + """ + Delete all the stored images for visualization. This should be called + after images are written to tensorboard. + """ + self._vis_data = [] + + def clear_histograms(self): + """ + Delete all the stored histograms for visualization. + This should be called after histograms are written to tensorboard. + """ + self._histograms = [] + + def reset_history(self, name): + ret = self._history.get(name, None) + if ret is None: + raise KeyError("No history metric available for {}!".format(name)) + ret.reset() + + def reset_histories(self): + for name in self._history.keys(): + self._history[name].reset() + + +class AverageMeter: + """Computes and stores the average and current value""" + + def __init__(self): + self.val = 0 + self.avg = 0 + self.total = 0 + self.count = 0 + + def reset(self): + self.val = 0 + self.avg = 0 + self.total = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.total += val * n + self.count += n + self.avg = self.total / self.count + + +class HistoryBuffer: + """ + Track a series of scalar values and provide access to smoothed values over a + window or the global average of the series. + """ + + def __init__(self, max_length: int = 1000000) -> None: + """ + Args: + max_length: maximal number of values that can be stored in the + buffer. When the capacity of the buffer is exhausted, old + values will be removed. + """ + self._max_length: int = max_length + self._data: List[Tuple[float, float]] = [] # (value, iteration) pairs + self._count: int = 0 + self._global_avg: float = 0 + + def update(self, value: float, iteration: Optional[float] = None) -> None: + """ + Add a new scalar value produced at certain iteration. If the length + of the buffer exceeds self._max_length, the oldest element will be + removed from the buffer. + """ + if iteration is None: + iteration = self._count + if len(self._data) == self._max_length: + self._data.pop(0) + self._data.append((value, iteration)) + + self._count += 1 + self._global_avg += (value - self._global_avg) / self._count + + def latest(self) -> float: + """ + Return the latest scalar value added to the buffer. + """ + return self._data[-1][0] + + def median(self, window_size: int) -> float: + """ + Return the median of the latest `window_size` values in the buffer. + """ + return np.median([x[0] for x in self._data[-window_size:]]) + + def avg(self, window_size: int) -> float: + """ + Return the mean of the latest `window_size` values in the buffer. + """ + return np.mean([x[0] for x in self._data[-window_size:]]) + + def global_avg(self) -> float: + """ + Return the mean of all the elements in the buffer. Note that this + includes those getting removed due to limited buffer storage. + """ + return self._global_avg + + def values(self) -> List[Tuple[float, float]]: + """ + Returns: + list[(number, iteration)]: content of the current buffer. + """ + return self._data + + +class ExceptionWriter: + + def __init__(self): + self.logger = logging.getLogger(__name__) + + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + if exc_type: + tb = traceback.format_exception(exc_type, exc_val, exc_tb) + formatted_tb_str = "".join(tb) + self.logger.error(formatted_tb_str) + sys.exit(1) # This prevents double logging the error to the console diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/misc.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/misc.py new file mode 100644 index 0000000..3177bae --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/utils/misc.py @@ -0,0 +1,164 @@ +""" +Misc + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import warnings +from collections import abc +import numpy as np +import torch +from importlib import import_module + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +def intersection_and_union(output, target, K, ignore_index=-1): + # 'K' classes, output and target sizes are N or N * L or N * H * W, each value in range 0 to K - 1. + assert output.ndim in [1, 2, 3] + assert output.shape == target.shape + output = output.reshape(output.size).copy() + target = target.reshape(target.size) + output[np.where(target == ignore_index)[0]] = ignore_index + intersection = output[np.where(output == target)[0]] + area_intersection, _ = np.histogram(intersection, bins=np.arange(K + 1)) + area_output, _ = np.histogram(output, bins=np.arange(K + 1)) + area_target, _ = np.histogram(target, bins=np.arange(K + 1)) + area_union = area_output + area_target - area_intersection + return area_intersection, area_union, area_target + + +def intersection_and_union_gpu(output, target, k, ignore_index=-1): + # 'K' classes, output and target sizes are N or N * L or N * H * W, each value in range 0 to K - 1. + assert output.dim() in [1, 2, 3] + assert output.shape == target.shape + output = output.view(-1) + target = target.view(-1) + output[target == ignore_index] = ignore_index + intersection = output[output == target] + area_intersection = torch.histc(intersection, bins=k, min=0, max=k - 1) + area_output = torch.histc(output, bins=k, min=0, max=k - 1) + area_target = torch.histc(target, bins=k, min=0, max=k - 1) + area_union = area_output + area_target - area_intersection + return area_intersection, area_union, area_target + + +def make_dirs(dir_name): + if not os.path.exists(dir_name): + os.makedirs(dir_name, exist_ok=True) + + +def find_free_port(): + import socket + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + # Binding to port 0 will cause the OS to find an available port for us + sock.bind(("", 0)) + port = sock.getsockname()[1] + sock.close() + # NOTE: there is still a chance the port could be taken by other processes. + return port + + +def is_seq_of(seq, expected_type, seq_type=None): + """Check whether it is a sequence of some type. + + Args: + seq (Sequence): The sequence to be checked. + expected_type (type): Expected type of sequence items. + seq_type (type, optional): Expected sequence type. + + Returns: + bool: Whether the sequence is valid. + """ + if seq_type is None: + exp_seq_type = abc.Sequence + else: + assert isinstance(seq_type, type) + exp_seq_type = seq_type + if not isinstance(seq, exp_seq_type): + return False + for item in seq: + if not isinstance(item, expected_type): + return False + return True + + +def is_str(x): + """Whether the input is an string instance. + + Note: This method is deprecated since python 2 is no longer supported. + """ + return isinstance(x, str) + + +def import_modules_from_strings(imports, allow_failed_imports=False): + """Import modules from the given list of strings. + + Args: + imports (list | str | None): The given module names to be imported. + allow_failed_imports (bool): If True, the failed imports will return + None. Otherwise, an ImportError is raise. Default: False. + + Returns: + list[module] | module | None: The imported modules. + + Examples: + >>> osp, sys = import_modules_from_strings( + ... ['os.path', 'sys']) + >>> import os.path as osp_ + >>> import sys as sys_ + >>> assert osp == osp_ + >>> assert sys == sys_ + """ + if not imports: + return + single_import = False + if isinstance(imports, str): + single_import = True + imports = [imports] + if not isinstance(imports, list): + raise TypeError(f"custom_imports must be a list but got type {type(imports)}") + imported = [] + for imp in imports: + if not isinstance(imp, str): + raise TypeError(f"{imp} is of type {type(imp)} and cannot be imported.") + try: + imported_tmp = import_module(imp) + except ImportError: + if allow_failed_imports: + warnings.warn(f"{imp} failed to import and is ignored.", UserWarning) + imported_tmp = None + else: + raise ImportError + imported.append(imported_tmp) + if single_import: + imported = imported[0] + return imported + + +class DummyClass: + def __init__(self): + pass diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/optimizer.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/optimizer.py new file mode 100644 index 0000000..eef8735 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/utils/optimizer.py @@ -0,0 +1,57 @@ +""" +Optimizer + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import copy +import torch +from pointcept.utils.logger import get_root_logger +from pointcept.utils.registry import Registry + +OPTIMIZERS = Registry("optimizers") + + +OPTIMIZERS.register_module(module=torch.optim.SGD, name="SGD") +OPTIMIZERS.register_module(module=torch.optim.Adam, name="Adam") +OPTIMIZERS.register_module(module=torch.optim.AdamW, name="AdamW") + + +def build_optimizer(cfg, model, param_dicts=None): + cfg = copy.deepcopy(cfg) + if param_dicts is None: + cfg.params = model.parameters() + else: + cfg.params = [dict(names=[], params=[], lr=cfg.lr)] + for i in range(len(param_dicts)): + param_group = dict(names=[], params=[]) + if "lr" in param_dicts[i].keys(): + param_group["lr"] = param_dicts[i].lr + if "momentum" in param_dicts[i].keys(): + param_group["momentum"] = param_dicts[i].momentum + if "weight_decay" in param_dicts[i].keys(): + param_group["weight_decay"] = param_dicts[i].weight_decay + cfg.params.append(param_group) + + for n, p in model.named_parameters(): + flag = False + for i in range(len(param_dicts)): + if param_dicts[i].keyword in n: + cfg.params[i + 1]["names"].append(n) + cfg.params[i + 1]["params"].append(p) + flag = True + break + if not flag: + cfg.params[0]["names"].append(n) + cfg.params[0]["params"].append(p) + + logger = get_root_logger() + for i in range(len(cfg.params)): + param_names = cfg.params[i].pop("names") + message = "" + for key in cfg.params[i].keys(): + if key != "params": + message += f" {key}: {cfg.params[i][key]};" + logger.info(f"Params Group {i+1} -{message} Params: {param_names}.") + return OPTIMIZERS.build(cfg=cfg) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/path.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/path.py new file mode 100644 index 0000000..ce98fa5 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/utils/path.py @@ -0,0 +1,103 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import os.path as osp +from pathlib import Path + +from .misc import is_str + + +def is_filepath(x): + return is_str(x) or isinstance(x, Path) + + +def fopen(filepath, *args, **kwargs): + if is_str(filepath): + return open(filepath, *args, **kwargs) + elif isinstance(filepath, Path): + return filepath.open(*args, **kwargs) + raise ValueError("`filepath` should be a string or a Path") + + +def check_file_exist(filename, msg_tmpl='file "{}" does not exist'): + if not osp.isfile(filename): + raise FileNotFoundError(msg_tmpl.format(filename)) + + +def mkdir_or_exist(dir_name, mode=0o777): + if dir_name == "": + return + dir_name = osp.expanduser(dir_name) + os.makedirs(dir_name, mode=mode, exist_ok=True) + + +def symlink(src, dst, overwrite=True, **kwargs): + if os.path.lexists(dst) and overwrite: + os.remove(dst) + os.symlink(src, dst, **kwargs) + + +def scandir(dir_path, suffix=None, recursive=False, case_sensitive=True): + """Scan a directory to find the interested files. + + Args: + dir_path (str | obj:`Path`): Path of the directory. + suffix (str | tuple(str), optional): File suffix that we are + interested in. Default: None. + recursive (bool, optional): If set to True, recursively scan the + directory. Default: False. + case_sensitive (bool, optional) : If set to False, ignore the case of + suffix. Default: True. + + Returns: + A generator for all the interested files with relative paths. + """ + if isinstance(dir_path, (str, Path)): + dir_path = str(dir_path) + else: + raise TypeError('"dir_path" must be a string or Path object') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('"suffix" must be a string or tuple of strings') + + if suffix is not None and not case_sensitive: + suffix = ( + suffix.lower() + if isinstance(suffix, str) + else tuple(item.lower() for item in suffix) + ) + + root = dir_path + + def _scandir(dir_path, suffix, recursive, case_sensitive): + for entry in os.scandir(dir_path): + if not entry.name.startswith(".") and entry.is_file(): + rel_path = osp.relpath(entry.path, root) + _rel_path = rel_path if case_sensitive else rel_path.lower() + if suffix is None or _rel_path.endswith(suffix): + yield rel_path + elif recursive and os.path.isdir(entry.path): + # scan recursively if entry.path is a directory + yield from _scandir(entry.path, suffix, recursive, case_sensitive) + + return _scandir(dir_path, suffix, recursive, case_sensitive) + + +def find_vcs_root(path, markers=(".git",)): + """Finds the root directory (including itself) of specified markers. + + Args: + path (str): Path of directory or file. + markers (list[str], optional): List of file or directory names. + + Returns: + The directory contained one of the markers or None if not found. + """ + if osp.isfile(path): + path = osp.dirname(path) + + prev, cur = None, osp.abspath(osp.expanduser(path)) + while cur != prev: + if any(osp.exists(osp.join(cur, marker)) for marker in markers): + return cur + prev, cur = cur, osp.split(cur)[0] + return None diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/registry.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/registry.py new file mode 100644 index 0000000..7ac308a --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/utils/registry.py @@ -0,0 +1,316 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect +import warnings +from functools import partial + +from .misc import is_seq_of + + +def build_from_cfg(cfg, registry, default_args=None): + """Build a module from configs dict. + + Args: + cfg (dict): Config dict. It should at least contain the key "type". + registry (:obj:`Registry`): The registry to search the type from. + default_args (dict, optional): Default initialization arguments. + + Returns: + object: The constructed object. + """ + if not isinstance(cfg, dict): + raise TypeError(f"cfg must be a dict, but got {type(cfg)}") + if "type" not in cfg: + if default_args is None or "type" not in default_args: + raise KeyError( + '`cfg` or `default_args` must contain the key "type", ' + f"but got {cfg}\n{default_args}" + ) + if not isinstance(registry, Registry): + raise TypeError( + "registry must be an mmcv.Registry object, " f"but got {type(registry)}" + ) + if not (isinstance(default_args, dict) or default_args is None): + raise TypeError( + "default_args must be a dict or None, " f"but got {type(default_args)}" + ) + + args = cfg.copy() + + if default_args is not None: + for name, value in default_args.items(): + args.setdefault(name, value) + + obj_type = args.pop("type") + if isinstance(obj_type, str): + obj_cls = registry.get(obj_type) + if obj_cls is None: + raise KeyError(f"{obj_type} is not in the {registry.name} registry") + elif inspect.isclass(obj_type): + obj_cls = obj_type + else: + raise TypeError(f"type must be a str or valid type, but got {type(obj_type)}") + try: + return obj_cls(**args) + except Exception as e: + # Normal TypeError does not print class name. + raise type(e)(f"{obj_cls.__name__}: {e}") + + +class Registry: + """A registry to map strings to classes. + + Registered object could be built from registry. + Example: + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + >>> resnet = MODELS.build(dict(type='ResNet')) + + Please refer to + https://mmcv.readthedocs.io/en/latest/understand_mmcv/registry.html for + advanced usage. + + Args: + name (str): Registry name. + build_func(func, optional): Build function to construct instance from + Registry, func:`build_from_cfg` is used if neither ``parent`` or + ``build_func`` is specified. If ``parent`` is specified and + ``build_func`` is not given, ``build_func`` will be inherited + from ``parent``. Default: None. + parent (Registry, optional): Parent registry. The class registered in + children registry could be built from parent. Default: None. + scope (str, optional): The scope of registry. It is the key to search + for children registry. If not specified, scope will be the name of + the package where class is defined, e.g. mmdet, mmcls, mmseg. + Default: None. + """ + + def __init__(self, name, build_func=None, parent=None, scope=None): + self._name = name + self._module_dict = dict() + self._children = dict() + self._scope = self.infer_scope() if scope is None else scope + + # self.build_func will be set with the following priority: + # 1. build_func + # 2. parent.build_func + # 3. build_from_cfg + if build_func is None: + if parent is not None: + self.build_func = parent.build_func + else: + self.build_func = build_from_cfg + else: + self.build_func = build_func + if parent is not None: + assert isinstance(parent, Registry) + parent._add_children(self) + self.parent = parent + else: + self.parent = None + + def __len__(self): + return len(self._module_dict) + + def __contains__(self, key): + return self.get(key) is not None + + def __repr__(self): + format_str = ( + self.__class__.__name__ + f"(name={self._name}, " + f"items={self._module_dict})" + ) + return format_str + + @staticmethod + def infer_scope(): + """Infer the scope of registry. + + The name of the package where registry is defined will be returned. + + Example: + # in mmdet/models/backbone/resnet.py + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + The scope of ``ResNet`` will be ``mmdet``. + + + Returns: + scope (str): The inferred scope name. + """ + # inspect.stack() trace where this function is called, the index-2 + # indicates the frame where `infer_scope()` is called + filename = inspect.getmodule(inspect.stack()[2][0]).__name__ + split_filename = filename.split(".") + return split_filename[0] + + @staticmethod + def split_scope_key(key): + """Split scope and key. + + The first scope will be split from key. + + Examples: + >>> Registry.split_scope_key('mmdet.ResNet') + 'mmdet', 'ResNet' + >>> Registry.split_scope_key('ResNet') + None, 'ResNet' + + Return: + scope (str, None): The first scope. + key (str): The remaining key. + """ + split_index = key.find(".") + if split_index != -1: + return key[:split_index], key[split_index + 1 :] + else: + return None, key + + @property + def name(self): + return self._name + + @property + def scope(self): + return self._scope + + @property + def module_dict(self): + return self._module_dict + + @property + def children(self): + return self._children + + def get(self, key): + """Get the registry record. + + Args: + key (str): The class name in string format. + + Returns: + class: The corresponding class. + """ + scope, real_key = self.split_scope_key(key) + if scope is None or scope == self._scope: + # get from self + if real_key in self._module_dict: + return self._module_dict[real_key] + else: + # get from self._children + if scope in self._children: + return self._children[scope].get(real_key) + else: + # goto root + parent = self.parent + while parent.parent is not None: + parent = parent.parent + return parent.get(key) + + def build(self, *args, **kwargs): + return self.build_func(*args, **kwargs, registry=self) + + def _add_children(self, registry): + """Add children for a registry. + + The ``registry`` will be added as children based on its scope. + The parent registry could build objects from children registry. + + Example: + >>> models = Registry('models') + >>> mmdet_models = Registry('models', parent=models) + >>> @mmdet_models.register_module() + >>> class ResNet: + >>> pass + >>> resnet = models.build(dict(type='mmdet.ResNet')) + """ + + assert isinstance(registry, Registry) + assert registry.scope is not None + assert ( + registry.scope not in self.children + ), f"scope {registry.scope} exists in {self.name} registry" + self.children[registry.scope] = registry + + def _register_module(self, module_class, module_name=None, force=False): + if not inspect.isclass(module_class): + raise TypeError("module must be a class, " f"but got {type(module_class)}") + + if module_name is None: + module_name = module_class.__name__ + if isinstance(module_name, str): + module_name = [module_name] + for name in module_name: + if not force and name in self._module_dict: + raise KeyError(f"{name} is already registered " f"in {self.name}") + self._module_dict[name] = module_class + + def deprecated_register_module(self, cls=None, force=False): + warnings.warn( + "The old API of register_module(module, force=False) " + "is deprecated and will be removed, please use the new API " + "register_module(name=None, force=False, module=None) instead." + ) + if cls is None: + return partial(self.deprecated_register_module, force=force) + self._register_module(cls, force=force) + return cls + + def register_module(self, name=None, force=False, module=None): + """Register a module. + + A record will be added to `self._module_dict`, whose key is the class + name or the specified name, and value is the class itself. + It can be used as a decorator or a normal function. + + Example: + >>> backbones = Registry('backbone') + >>> @backbones.register_module() + >>> class ResNet: + >>> pass + + >>> backbones = Registry('backbone') + >>> @backbones.register_module(name='mnet') + >>> class MobileNet: + >>> pass + + >>> backbones = Registry('backbone') + >>> class ResNet: + >>> pass + >>> backbones.register_module(ResNet) + + Args: + name (str | None): The module name to be registered. If not + specified, the class name will be used. + force (bool, optional): Whether to override an existing class with + the same name. Default: False. + module (type): Module class to be registered. + """ + if not isinstance(force, bool): + raise TypeError(f"force must be a boolean, but got {type(force)}") + # NOTE: This is a walkaround to be compatible with the old api, + # while it may introduce unexpected bugs. + if isinstance(name, type): + return self.deprecated_register_module(name, force=force) + + # raise the error ahead of time + if not (name is None or isinstance(name, str) or is_seq_of(name, str)): + raise TypeError( + "name must be either of None, an instance of str or a sequence" + f" of str, but got {type(name)}" + ) + + # use it as a normal method: x.register_module(module=SomeClass) + if module is not None: + self._register_module(module_class=module, module_name=name, force=force) + return module + + # use it as a decorator: @x.register_module() + def _register(cls): + self._register_module(module_class=cls, module_name=name, force=force) + return cls + + return _register diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/scheduler.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/scheduler.py new file mode 100644 index 0000000..e57084f --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/utils/scheduler.py @@ -0,0 +1,205 @@ +""" +Scheduler + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import copy +import numpy as np +import torch.optim.lr_scheduler as lr_scheduler +from .registry import Registry + +SCHEDULERS = Registry("schedulers") + + +@SCHEDULERS.register_module() +class MultiStepLR(lr_scheduler.MultiStepLR): + def __init__( + self, + optimizer, + milestones, + total_steps, + gamma=0.1, + last_epoch=-1, + ): + super().__init__( + optimizer=optimizer, + milestones=[int(rate * total_steps) for rate in milestones], + gamma=gamma, + last_epoch=last_epoch, + ) + + +@SCHEDULERS.register_module() +class MultiStepWithWarmupLR(lr_scheduler.LambdaLR): + def __init__( + self, + optimizer, + milestones, + total_steps, + gamma=0.1, + warmup_rate=0.05, + warmup_scale=1e-6, + last_epoch=-1, + ): + milestones = [rate * total_steps for rate in milestones] + + def multi_step_with_warmup(s): + factor = 1.0 + for i in range(len(milestones)): + if s < milestones[i]: + break + factor *= gamma + + if s <= warmup_rate * total_steps: + warmup_coefficient = 1 - (1 - s / warmup_rate / total_steps) * ( + 1 - warmup_scale + ) + else: + warmup_coefficient = 1.0 + return warmup_coefficient * factor + + super().__init__( + optimizer=optimizer, + lr_lambda=multi_step_with_warmup, + last_epoch=last_epoch, + ) + + +@SCHEDULERS.register_module() +class PolyLR(lr_scheduler.LambdaLR): + def __init__( + self, + optimizer, + total_steps, + power=0.9, + last_epoch=-1, + ): + super().__init__( + optimizer=optimizer, + lr_lambda=lambda s: (1 - s / (total_steps + 1)) ** power, + last_epoch=last_epoch, + ) + + +@SCHEDULERS.register_module() +class ExpLR(lr_scheduler.LambdaLR): + def __init__( + self, + optimizer, + total_steps, + gamma=0.9, + last_epoch=-1, + ): + super().__init__( + optimizer=optimizer, + lr_lambda=lambda s: gamma ** (s / total_steps), + last_epoch=last_epoch, + ) + + +@SCHEDULERS.register_module() +class CosineAnnealingLR(lr_scheduler.CosineAnnealingLR): + def __init__( + self, + optimizer, + total_steps, + eta_min=0, + last_epoch=-1, + ): + super().__init__( + optimizer=optimizer, + T_max=total_steps, + eta_min=eta_min, + last_epoch=last_epoch, + ) + + +@SCHEDULERS.register_module() +class OneCycleLR(lr_scheduler.OneCycleLR): + r""" + torch.optim.lr_scheduler.OneCycleLR, Block total_steps + """ + + def __init__( + self, + optimizer, + max_lr, + total_steps=None, + pct_start=0.3, + anneal_strategy="cos", + cycle_momentum=True, + base_momentum=0.85, + max_momentum=0.95, + div_factor=25.0, + final_div_factor=1e4, + three_phase=False, + last_epoch=-1, + ): + super().__init__( + optimizer=optimizer, + max_lr=max_lr, + total_steps=total_steps, + pct_start=pct_start, + anneal_strategy=anneal_strategy, + cycle_momentum=cycle_momentum, + base_momentum=base_momentum, + max_momentum=max_momentum, + div_factor=div_factor, + final_div_factor=final_div_factor, + three_phase=three_phase, + last_epoch=last_epoch, + ) + + +class CosineScheduler(object): + def __init__( + self, + base_value, + final_value, + total_iters, + start_value=0, + warmup_iters=0, + freeze_value=None, + freeze_iters=0, + ): + self.base_value = base_value + self.final_value = final_value + self.total_iters = total_iters + + warmup_schedule = np.linspace(start_value, base_value, warmup_iters) + + if freeze_value is None: + freeze_value = final_value + freeze_schedule = np.ones(freeze_iters) * freeze_value + + iters = np.arange(total_iters - warmup_iters - freeze_iters) + schedule = final_value + 0.5 * (base_value - final_value) * ( + 1 + np.cos(np.pi * iters / len(iters)) + ) + self.schedule = np.concatenate((warmup_schedule, schedule, freeze_schedule)) + self.iter = 0 + + def get(self, it): + if it >= self.total_iters: + return self.final_value + else: + return self.schedule[it] + + def step(self): + value = self.get(self.iter) + self.iter += 1 + return value + + def reset(self): + self.iter = 0 + + def __getitem__(self, it): + return self.get(it) + + +def build_scheduler(cfg, optimizer): + cfg = copy.deepcopy(cfg) + cfg.optimizer = optimizer + return SCHEDULERS.build(cfg=cfg) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/timer.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/timer.py new file mode 100644 index 0000000..3de4a16 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/utils/timer.py @@ -0,0 +1,70 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# -*- coding: utf-8 -*- + +from time import perf_counter +from typing import Optional + + +class Timer: + """ + A timer which computes the time elapsed since the start/reset of the timer. + """ + + def __init__(self) -> None: + self.reset() + + def reset(self) -> None: + """ + Reset the timer. + """ + self._start = perf_counter() + self._paused: Optional[float] = None + self._total_paused = 0 + self._count_start = 1 + + def pause(self) -> None: + """ + Pause the timer. + """ + if self._paused is not None: + raise ValueError("Trying to pause a Timer that is already paused!") + self._paused = perf_counter() + + def is_paused(self) -> bool: + """ + Returns: + bool: whether the timer is currently paused + """ + return self._paused is not None + + def resume(self) -> None: + """ + Resume the timer. + """ + if self._paused is None: + raise ValueError("Trying to resume a Timer that is not paused!") + # pyre-fixme[58]: `-` is not supported for operand types `float` and + # `Optional[float]`. + self._total_paused += perf_counter() - self._paused + self._paused = None + self._count_start += 1 + + def seconds(self) -> float: + """ + Returns: + (float): the total number of seconds since the start/reset of the + timer, excluding the time when the timer is paused. + """ + if self._paused is not None: + end_time: float = self._paused # type: ignore + else: + end_time = perf_counter() + return end_time - self._start - self._total_paused + + def avg_seconds(self) -> float: + """ + Returns: + (float): the average number of seconds between every start/reset and + pause. + """ + return self.seconds() / self._count_start diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/visualization.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/visualization.py new file mode 100644 index 0000000..92ac39b --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/pointcept/utils/visualization.py @@ -0,0 +1,128 @@ +""" +Visualization Utils + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os + +try: + import open3d as o3d +except ImportError: + o3d = None +import numpy as np +import torch + + +def to_numpy(x): + if isinstance(x, torch.Tensor): + x = x.clone().detach().cpu().numpy() + assert isinstance(x, np.ndarray) + return x + + +def get_point_cloud(coord, color=None, verbose=True): + if not isinstance(coord, list): + coord = [coord] + if color is not None: + color = [color] + + pcd_list = [] + for i in range(len(coord)): + coord_ = to_numpy(coord[i]) + if color is not None: + color_ = to_numpy(color[i]) + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(coord_) + pcd.colors = o3d.utility.Vector3dVector( + np.zeros_like(coord_) if color is None else color_ + ) + pcd_list.append(pcd) + if verbose: + o3d.visualization.draw_geometries(pcd_list) + return pcd_list + + +def get_line_set(coord, line, color=(1.0, 0.0, 0.0), verbose=True): + coord = to_numpy(coord) + line = to_numpy(line) + colors = np.array([color for _ in range(len(line))]) + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(coord) + line_set.lines = o3d.utility.Vector2iVector(line) + line_set.colors = o3d.utility.Vector3dVector(colors) + if verbose: + o3d.visualization.draw_geometries([line_set]) + return line_set + + +def save_point_cloud(coord, color=None, file_path="pc.ply", logger=None): + os.makedirs(os.path.dirname(file_path), exist_ok=True) + coord = to_numpy(coord) + if color is not None: + color = to_numpy(color) + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(coord) + pcd.colors = o3d.utility.Vector3dVector( + np.ones_like(coord) if color is None else color + ) + o3d.io.write_point_cloud(file_path, pcd) + if logger is not None: + logger.info(f"Save Point Cloud to: {file_path}") + + +def save_bounding_boxes( + bboxes_corners, color=(1.0, 0.0, 0.0), file_path="bbox.ply", logger=None +): + bboxes_corners = to_numpy(bboxes_corners) + # point list + points = bboxes_corners.reshape(-1, 3) + # line list + box_lines = np.array( + [ + [0, 1], + [1, 2], + [2, 3], + [3, 0], + [4, 5], + [5, 6], + [6, 7], + [7, 0], + [0, 4], + [1, 5], + [2, 6], + [3, 7], + ] + ) + lines = [] + for i, _ in enumerate(bboxes_corners): + lines.append(box_lines + i * 8) + lines = np.concatenate(lines) + # color list + color = np.array([color for _ in range(len(lines))]) + # generate line set + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + line_set.colors = o3d.utility.Vector3dVector(color) + o3d.io.write_line_set(file_path, line_set) + + if logger is not None: + logger.info(f"Save Boxes to: {file_path}") + + +def save_lines( + points, lines, color=(1.0, 0.0, 0.0), file_path="lines.ply", logger=None +): + points = to_numpy(points) + lines = to_numpy(lines) + colors = np.array([color for _ in range(len(lines))]) + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + line_set.colors = o3d.utility.Vector3dVector(colors) + o3d.io.write_line_set(file_path, line_set) + + if logger is not None: + logger.info(f"Save Lines to: {file_path}") diff --git a/point_transformer_v3/pointcept_minimal/scripts/build_image.sh b/point_transformer_v3/pointcept_minimal/scripts/build_image.sh new file mode 100644 index 0000000..7233b7f --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/scripts/build_image.sh @@ -0,0 +1,83 @@ +TORCH_VERSION=2.5.0 +CUDA_VERSION=12.4 +CUDNN_VERSION=9 + +ARGS=`getopt -o t:c: -l torch:,cuda:,cudnn: -n "$0" -- "$@"` +[ $? != 0 ] && exit 1 +eval set -- "${ARGS}" +while true ; do + case "$1" in + -t | --torch) + TORCH_VERSION=$2 + shift 2 + ;; + -c | --cuda) + CUDA_VERSION=$2 + shift 2 + ;; + --cudnn) + CUDNN_VERSION=$2 + shift 2 + ;; + --) + break + ;; + *) + echo "Invalid option: $1" + exit 1 + ;; + esac +done + +CUDA_VERSION_NO_DOT=`echo ${CUDA_VERSION} | tr -d "."` +BASE_TORCH_TAG=${TORCH_VERSION}-cuda${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel +IMG_TAG=pointcept/pointcept:v1.6.0-pytorch${BASE_TORCH_TAG} + +echo "TORCH VERSION: ${TORCH_VERSION}" +echo "CUDA VERSION: ${CUDA_VERSION}" +echo "CUDNN VERSION: ${CUDNN_VERSION}" + + +cat > ./Dockerfile <<- EOM +FROM pytorch/pytorch:${BASE_TORCH_TAG} + +# Fix nvidia-key error issue (NO_PUBKEY A4B469963BF863CC) +RUN rm /etc/apt/sources.list.d/*.list + +# Installing apt packages +RUN export DEBIAN_FRONTEND=noninteractive \ + && apt -y update --no-install-recommends \ + && apt -y install --no-install-recommends \ + git wget tmux vim zsh build-essential cmake ninja-build libopenblas-dev libsparsehash-dev \ + && apt autoremove -y \ + && apt clean -y \ + && export DEBIAN_FRONTEND=dialog + +# Install Pointcept environment +RUN conda install h5py pyyaml tensorboard tensorboardx wandb yapf addict einops scipy plyfile termcolor matplotlib black open3d -c conda-forge -y + +RUN pip install --upgrade pip +RUN pip install timm +RUN pip install torch-geometric +RUN pip install torch_scatter torch_sparse torch_cluster -f https://data.pyg.org/whl/torch-${TORCH_VERSION}+cu${CUDA_VERSION_NO_DOT}.html +RUN pip install spconv-cu${CUDA_VERSION_NO_DOT} +RUN pip install git+https://github.com/octree-nn/ocnn-pytorch.git +RUN pip install ftfy regex tqdm +RUN pip install git+https://github.com/openai/CLIP.git + +# Build swin3d +RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" pip install -U git+https://github.com/microsoft/Swin3D.git -v + +# Build FlashAttention2 +RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" pip install git+https://github.com/Dao-AILab/flash-attention.git + +# Build pointops +RUN git clone https://github.com/Pointcept/Pointcept.git +RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" pip install Pointcept/libs/pointops -v + +# Build pointgroup_ops +RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" pip install Pointcept/libs/pointgroup_ops -v + +EOM + +docker build . -f ./Dockerfile -t $IMG_TAG \ No newline at end of file diff --git a/point_transformer_v3/pointcept_minimal/scripts/create_tars.sh b/point_transformer_v3/pointcept_minimal/scripts/create_tars.sh new file mode 100644 index 0000000..8bd990b --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/scripts/create_tars.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +# Variables +SOURCE_DIR=$1 +DEST_DIR=$2 +MAX_SIZE=$(awk "BEGIN {printf \"%d\", $3 * 1024 * 1024}") # Convert GB to KB as an integer + +# Get the base name of the source directory to use as TAR_NAME +TAR_NAME=$(basename "$SOURCE_DIR") + +# Create destination directory if it doesn't exist +mkdir -p "$DEST_DIR" + +# Function to create a new tar file +create_tar() { + tar_number=$1 + file_list=$2 + tar_name=$(printf "%s/${TAR_NAME}_%0${width}d.tar.gz" "$DEST_DIR" "$tar_number") + tar -zcvf "$tar_name" -C "$SOURCE_DIR" -T "$file_list" +} + +# Initialize +tar_number=1 +current_size=0 +temp_dir=$(mktemp -d) +file_list="$temp_dir/file_list_$tar_number" +echo Start indexing "file_list_$tar_number" + +cd "$SOURCE_DIR" || exit 1 + +# Iterate over all files in the source directory +find . -type f | while IFS= read -r file; do + file_size=$(du -k "$file" | cut -f1) + + if [ $(( current_size + file_size )) -gt $MAX_SIZE ]; then + tar_number=$((tar_number + 1)) + file_list="$temp_dir/file_list_$tar_number" + echo Start indexing "file_list_$tar_number" + current_size=0 + fi + + echo "$file" >> "$file_list" + current_size=$((current_size + file_size)) +done + +# Determine the width for the tar file numbers +total_files=$(find "$temp_dir" -name 'file_list_*' | wc -l) +width=${#total_files} + +# Set PARALLEL_PROCESSES to the number of file lists if not provided +PARALLEL_PROCESSES=${4:-$total_files} + +# Debug information +echo "Total files: $total_files" +echo "Width: $width" +echo "Parallel processes: $PARALLEL_PROCESSES" + +# Run tar creation in parallel +find "$temp_dir" -name 'file_list_*' | xargs -P "$PARALLEL_PROCESSES" -I {} sh -c ' + file_list={} + tar_number=$(basename "$file_list" | cut -d_ -f3) + tar_name=$(printf "%s/'"$TAR_NAME"'_%0'"$width"'d.tar.gz" "'"$DEST_DIR"'" "$tar_number") + tar -zcvf "$tar_name" -C "'"$SOURCE_DIR"'" -T "$file_list" +' + +# Clean up +rm -rf "$temp_dir" \ No newline at end of file diff --git a/point_transformer_v3/pointcept_minimal/scripts/test.sh b/point_transformer_v3/pointcept_minimal/scripts/test.sh new file mode 100644 index 0000000..42ccf04 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/scripts/test.sh @@ -0,0 +1,92 @@ +#!/bin/sh + +cd $(dirname $(dirname "$0")) || exit +PYTHON=python + +TEST_CODE=test.py + +DATASET=scannet +CONFIG="None" +EXP_NAME=debug +WEIGHT=model_best +NUM_GPU=None +NUM_MACHINE=1 +DIST_URL="auto" + +while getopts "p:d:c:n:w:g:m:" opt; do + case $opt in + p) + PYTHON=$OPTARG + ;; + d) + DATASET=$OPTARG + ;; + c) + CONFIG=$OPTARG + ;; + n) + EXP_NAME=$OPTARG + ;; + w) + WEIGHT=$OPTARG + ;; + g) + NUM_GPU=$OPTARG + ;; + m) + NUM_MACHINE=$OPTARG + ;; + \?) + echo "Invalid option: -$OPTARG" + ;; + esac +done + +if [ "${NUM_GPU}" = 'None' ] +then + NUM_GPU=`$PYTHON -c 'import torch; print(torch.cuda.device_count())'` +fi + +echo "Experiment name: $EXP_NAME" +echo "Python interpreter dir: $PYTHON" +echo "Dataset: $DATASET" +echo "GPU Num: $NUM_GPU" +echo "Machine Num: $NUM_MACHINE" + +if [ -n "$SLURM_NODELIST" ]; then + MASTER_HOSTNAME=$(scontrol show hostname "$SLURM_NODELIST" | head -n 1) + MASTER_ADDR=$(getent hosts "$MASTER_HOSTNAME" | awk '{ print $1 }') + MASTER_PORT=$((10000 + 0x$(echo -n "${DATASET}/${EXP_NAME}" | md5sum | cut -c 1-4 | awk '{print $1}') % 20000)) + DIST_URL=tcp://$MASTER_ADDR:$MASTER_PORT +fi + +echo "Dist URL: $DIST_URL" + +EXP_DIR=exp/${DATASET}/${EXP_NAME} +MODEL_DIR=${EXP_DIR}/model +CODE_DIR=${EXP_DIR}/code +CONFIG_DIR=${EXP_DIR}/config.py + +if [ "${CONFIG}" = "None" ] +then + CONFIG_DIR=${EXP_DIR}/config.py +else + CONFIG_DIR=configs/${DATASET}/${CONFIG}.py +fi + +echo "Loading config in:" $CONFIG_DIR +#export PYTHONPATH=./$CODE_DIR +export PYTHONPATH=./ +echo "Running code in: $CODE_DIR" + + +echo " =========> RUN TASK <=========" +ulimit -n 65536 +#$PYTHON -u "$CODE_DIR"/tools/$TEST_CODE \ +$PYTHON -u tools/$TEST_CODE \ + --config-file "$CONFIG_DIR" \ + --num-gpus "$NUM_GPU" \ + --num-machines "$NUM_MACHINE" \ + --machine-rank ${SLURM_NODEID:-0} \ + --dist-url ${DIST_URL} \ + --options save_path="$EXP_DIR" weight="${MODEL_DIR}"/"${WEIGHT}".pth diff --git a/point_transformer_v3/pointcept_minimal/scripts/train.sh b/point_transformer_v3/pointcept_minimal/scripts/train.sh new file mode 100644 index 0000000..15abf05 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/scripts/train.sh @@ -0,0 +1,114 @@ +#!/bin/sh + +cd $(dirname $(dirname "$0")) || exit +ROOT_DIR=$(pwd) +PYTHON=python + +TRAIN_CODE=train.py + +DATASET=scannet +CONFIG="None" +EXP_NAME=debug +WEIGHT="None" +RESUME=false +NUM_GPU=None +NUM_MACHINE=1 +DIST_URL="auto" + + +while getopts "p:d:c:n:w:g:m:r:" opt; do + case $opt in + p) + PYTHON=$OPTARG + ;; + d) + DATASET=$OPTARG + ;; + c) + CONFIG=$OPTARG + ;; + n) + EXP_NAME=$OPTARG + ;; + w) + WEIGHT=$OPTARG + ;; + r) + RESUME=$OPTARG + ;; + g) + NUM_GPU=$OPTARG + ;; + m) + NUM_MACHINE=$OPTARG + ;; + \?) + echo "Invalid option: -$OPTARG" + ;; + esac +done + +if [ "${NUM_GPU}" = 'None' ] +then + NUM_GPU=`$PYTHON -c 'import torch; print(torch.cuda.device_count())'` +fi + +echo "Experiment name: $EXP_NAME" +echo "Python interpreter dir: $PYTHON" +echo "Dataset: $DATASET" +echo "Config: $CONFIG" +echo "GPU Num: $NUM_GPU" +echo "Machine Num: $NUM_MACHINE" + +if [ -n "$SLURM_NODELIST" ]; then + MASTER_HOSTNAME=$(scontrol show hostname "$SLURM_NODELIST" | head -n 1) + MASTER_ADDR=$(getent hosts "$MASTER_HOSTNAME" | awk '{ print $1 }') + MASTER_PORT=$((10000 + 0x$(echo -n "${DATASET}/${EXP_NAME}" | md5sum | cut -c 1-4 | awk '{print $1}') % 20000)) + DIST_URL=tcp://$MASTER_ADDR:$MASTER_PORT +fi + +echo "Dist URL: $DIST_URL" + +EXP_DIR=exp/${DATASET}/${EXP_NAME} +MODEL_DIR=${EXP_DIR}/model +CODE_DIR=${EXP_DIR}/code +CONFIG_DIR=configs/${DATASET}/${CONFIG}.py + + +echo " =========> CREATE EXP DIR <=========" +echo "Experiment dir: $ROOT_DIR/$EXP_DIR" +if [ "${RESUME}" = true ] && [ -d "$EXP_DIR" ] +then + CONFIG_DIR=${EXP_DIR}/config.py + WEIGHT=$MODEL_DIR/model_last.pth +else + RESUME=false + mkdir -p "$MODEL_DIR" "$CODE_DIR" + cp -r scripts tools pointcept "$CODE_DIR" +fi + +echo "Loading config in:" $CONFIG_DIR +export PYTHONPATH=./$CODE_DIR +echo "Running code in: $CODE_DIR" + + +echo " =========> RUN TASK <=========" +ulimit -n 65536 +if [ "${WEIGHT}" = "None" ] +then + $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ + --config-file "$CONFIG_DIR" \ + --num-gpus "$NUM_GPU" \ + --num-machines "$NUM_MACHINE" \ + --machine-rank ${SLURM_NODEID:-0} \ + --dist-url ${DIST_URL} \ + --options save_path="$EXP_DIR" +else + $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ + --config-file "$CONFIG_DIR" \ + --num-gpus "$NUM_GPU" \ + --num-machines "$NUM_MACHINE" \ + --machine-rank ${SLURM_NODEID:-0} \ + --dist-url ${DIST_URL} \ + --options save_path="$EXP_DIR" resume="$RESUME" weight="$WEIGHT" +fi diff --git a/point_transformer_v3/pointcept_minimal/tools/test.py b/point_transformer_v3/pointcept_minimal/tools/test.py new file mode 100644 index 0000000..8b477f9 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/tools/test.py @@ -0,0 +1,39 @@ +""" +Main Testing Script + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.engines.defaults import ( + default_argument_parser, + default_config_parser, + default_setup, +) +from pointcept.engines.test import TESTERS +from pointcept.engines.launch import launch + + +def main_worker(cfg): + cfg = default_setup(cfg) + test_cfg = dict(cfg=cfg, **cfg.test) + tester = TESTERS.build(test_cfg) + tester.test() + + +def main(): + args = default_argument_parser().parse_args() + cfg = default_config_parser(args.config_file, args.options) + + launch( + main_worker, + num_gpus_per_machine=args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + cfg=(cfg,), + ) + + +if __name__ == "__main__": + main() diff --git a/point_transformer_v3/pointcept_minimal/tools/train.py b/point_transformer_v3/pointcept_minimal/tools/train.py new file mode 100644 index 0000000..e3ed749 --- /dev/null +++ b/point_transformer_v3/pointcept_minimal/tools/train.py @@ -0,0 +1,38 @@ +""" +Main Training Script + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.engines.defaults import ( + default_argument_parser, + default_config_parser, + default_setup, +) +from pointcept.engines.train import TRAINERS +from pointcept.engines.launch import launch + + +def main_worker(cfg): + cfg = default_setup(cfg) + trainer = TRAINERS.build(dict(type=cfg.train.type, cfg=cfg)) + trainer.train() + + +def main(): + args = default_argument_parser().parse_args() + cfg = default_config_parser(args.config_file, args.options) + + launch( + main_worker, + num_gpus_per_machine=args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + cfg=(cfg,), + ) + + +if __name__ == "__main__": + main() From bcbd14d61685b49b9345e5fe710c98b4b6be58cc Mon Sep 17 00:00:00 2001 From: Christopher Horvath Date: Wed, 19 Nov 2025 09:32:45 -0800 Subject: [PATCH 2/7] Refactored to not modify pointcept directly Signed-off-by: Christopher Horvath --- .gitmodules | 3 + point_transformer_v3/README.md | 212 +-- point_transformer_v3/external/pointcept | 1 + .../fvdb_extensions/__init__.py | 4 + .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 207 bytes .../configs/fvdb_runtime.py} | 1 + .../configs}/semseg-pt-v3m1-0-fvdb-test.py | 3 +- .../configs}/semseg-pt-v3m1-0-test.py | 3 +- .../fvdb_extensions/models/__init__.py | 4 + .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 251 bytes ...oint_transformer_v3m1_fvdb.cpython-312.pyc | Bin 0 -> 7665 bytes .../__pycache__/ptv3_fvdb.cpython-312.pyc | Bin 0 -> 40740 bytes .../models}/point_transformer_v3m1_fvdb.py | 19 +- .../models/ptv3_fvdb.py} | 17 +- .../.github/workflows/formatter.yml | 20 - .../pointcept_minimal/.gitignore | 16 - .../pointcept_minimal/LICENSE | 21 - .../pointcept_minimal/README.md | 988 ----------- .../configs/_base_/dataset/scannet.py | 26 - .../configs/scannet/semseg-pt-v3m1-0-base.py | 317 ---- .../pointcept_minimal/environment.yml | 52 - .../pointcept_minimal/pointcept/__init__.py | 0 .../pointcept/datasets/__init__.py | 9 - .../pointcept/datasets/builder.py | 15 - .../pointcept/datasets/dataloader.py | 112 -- .../pointcept/datasets/defaults.py | 499 ------ .../concerto/scannet/preprocess_scannet.py | 574 ------- .../concerto/scannet/preprocess_scannet.sh | 42 - .../preprocessing/concerto/scannet/splits.py | 62 - .../preprocessing/sampling_chunking_data.py | 149 -- .../scannet/dino/prepare_scene_list.py | 27 - .../scannet/dino/preprocess_dino_feature.py | 362 ---- .../scannet/extract_partition.py | 71 - .../scannet/meta_data/scannet200_constants.py | 704 -------- .../scannet/meta_data/scannet200_splits.py | 625 ------- .../scannet/meta_data/scannet_means.npz | Bin 676 -> 0 bytes .../scannetv2-labels-old.combined.tsv | 608 ------- .../meta_data/scannetv2-labels.combined.tsv | 608 ------- .../scannet/preprocess_scannet.py | 253 --- .../pointcept/datasets/scannet.py | 118 -- .../pointcept/datasets/transform.py | 1457 ----------------- .../pointcept/datasets/utils.py | 140 -- .../pointcept/engines/__init__.py | 0 .../pointcept/engines/defaults.py | 152 -- .../pointcept/engines/hooks/__init__.py | 5 - .../pointcept/engines/hooks/builder.py | 18 - .../pointcept/engines/hooks/default.py | 66 - .../pointcept/engines/hooks/evaluator.py | 243 --- .../pointcept/engines/hooks/misc.py | 553 ------- .../pointcept/engines/launch.py | 137 -- .../pointcept/engines/test.py | 890 ---------- .../pointcept/engines/train.py | 372 ----- .../pointcept/models/__init__.py | 10 - .../pointcept/models/builder.py | 17 - .../pointcept/models/default.py | 230 --- .../pointcept/models/losses/__init__.py | 4 - .../pointcept/models/losses/builder.py | 31 - .../pointcept/models/losses/lovasz.py | 257 --- .../pointcept/models/losses/misc.py | 223 --- .../pointcept/models/modules.py | 120 -- .../models/point_transformer_v3/__init__.py | 3 - .../point_transformer_v3m1_base.py | 716 -------- .../point_transformer_v3m2_sonata.py | 732 --------- .../pointcept/models/utils/__init__.py | 9 - .../pointcept/models/utils/checkpoint.py | 57 - .../pointcept/models/utils/misc.py | 41 - .../models/utils/serialization/__init__.py | 8 - .../models/utils/serialization/default.py | 59 - .../models/utils/serialization/hilbert.py | 303 ---- .../models/utils/serialization/z_order.py | 126 -- .../pointcept/models/utils/structure.py | 209 --- .../pointcept/utils/__init__.py | 0 .../pointcept/utils/cache.py | 60 - .../pointcept_minimal/pointcept/utils/comm.py | 198 --- .../pointcept/utils/config.py | 694 -------- .../pointcept_minimal/pointcept/utils/env.py | 36 - .../pointcept/utils/events.py | 612 ------- .../pointcept_minimal/pointcept/utils/misc.py | 164 -- .../pointcept/utils/optimizer.py | 57 - .../pointcept_minimal/pointcept/utils/path.py | 103 -- .../pointcept/utils/registry.py | 316 ---- .../pointcept/utils/scheduler.py | 205 --- .../pointcept/utils/timer.py | 70 - .../pointcept/utils/visualization.py | 128 -- .../pointcept_minimal/scripts/build_image.sh | 83 - .../pointcept_minimal/scripts/create_tars.sh | 67 - .../pointcept_minimal/scripts/test.sh | 92 -- .../pointcept_minimal/scripts/train.sh | 114 -- .../pointcept_minimal/tools/test.py | 39 - .../pointcept_minimal/tools/train.py | 38 - point_transformer_v3/requirements.txt | 20 +- point_transformer_v3/scripts/README.md | 32 + .../download_example_data.cpython-312.pyc | Bin 0 -> 2745 bytes .../prepare_scannet_dataset.cpython-312.pyc | Bin 0 -> 14414 bytes .../data}/download_example_data.py | 4 +- .../data}/prepare_scannet_dataset.py | 8 +- .../compute_difference.cpython-312.pyc | Bin 0 -> 12248 bytes .../minimal_inference.cpython-312.pyc | Bin 0 -> 15236 bytes .../{ => scripts/test}/compute_difference.py | 8 +- .../{ => scripts/test}/minimal_inference.py | 20 +- point_transformer_v3/setup_env.py | 55 + point_transformer_v3/setup_env.sh | 13 + 102 files changed, 191 insertions(+), 16778 deletions(-) create mode 100644 .gitmodules create mode 160000 point_transformer_v3/external/pointcept create mode 100644 point_transformer_v3/fvdb_extensions/__init__.py create mode 100644 point_transformer_v3/fvdb_extensions/__pycache__/__init__.cpython-312.pyc rename point_transformer_v3/{pointcept_minimal/configs/_base_/default_runtime.py => fvdb_extensions/configs/fvdb_runtime.py} (99%) rename point_transformer_v3/{pointcept_minimal/configs/scannet => fvdb_extensions/configs}/semseg-pt-v3m1-0-fvdb-test.py (99%) rename point_transformer_v3/{pointcept_minimal/configs/scannet => fvdb_extensions/configs}/semseg-pt-v3m1-0-test.py (99%) create mode 100644 point_transformer_v3/fvdb_extensions/models/__init__.py create mode 100644 point_transformer_v3/fvdb_extensions/models/__pycache__/__init__.cpython-312.pyc create mode 100644 point_transformer_v3/fvdb_extensions/models/__pycache__/point_transformer_v3m1_fvdb.cpython-312.pyc create mode 100644 point_transformer_v3/fvdb_extensions/models/__pycache__/ptv3_fvdb.cpython-312.pyc rename point_transformer_v3/{pointcept_minimal/pointcept/models/point_transformer_v3 => fvdb_extensions/models}/point_transformer_v3m1_fvdb.py (93%) rename point_transformer_v3/{model.py => fvdb_extensions/models/ptv3_fvdb.py} (98%) delete mode 100644 point_transformer_v3/pointcept_minimal/.github/workflows/formatter.yml delete mode 100644 point_transformer_v3/pointcept_minimal/.gitignore delete mode 100644 point_transformer_v3/pointcept_minimal/LICENSE delete mode 100644 point_transformer_v3/pointcept_minimal/README.md delete mode 100644 point_transformer_v3/pointcept_minimal/configs/_base_/dataset/scannet.py delete mode 100644 point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-base.py delete mode 100644 point_transformer_v3/pointcept_minimal/environment.yml delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/__init__.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/__init__.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/builder.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/dataloader.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/defaults.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.sh delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/splits.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/sampling_chunking_data.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/prepare_scene_list.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/preprocess_dino_feature.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/extract_partition.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_constants.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_splits.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet_means.npz delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels-old.combined.tsv delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels.combined.tsv delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/preprocess_scannet.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/scannet.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/transform.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/datasets/utils.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/__init__.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/defaults.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/__init__.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/builder.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/default.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/evaluator.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/misc.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/launch.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/test.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/engines/train.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/__init__.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/builder.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/default.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/losses/__init__.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/losses/builder.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/losses/lovasz.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/losses/misc.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/modules.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/__init__.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m2_sonata.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/__init__.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/checkpoint.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/misc.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/__init__.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/default.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/hilbert.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/z_order.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/models/utils/structure.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/__init__.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/cache.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/comm.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/config.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/env.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/events.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/misc.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/optimizer.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/path.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/registry.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/scheduler.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/timer.py delete mode 100644 point_transformer_v3/pointcept_minimal/pointcept/utils/visualization.py delete mode 100644 point_transformer_v3/pointcept_minimal/scripts/build_image.sh delete mode 100644 point_transformer_v3/pointcept_minimal/scripts/create_tars.sh delete mode 100644 point_transformer_v3/pointcept_minimal/scripts/test.sh delete mode 100644 point_transformer_v3/pointcept_minimal/scripts/train.sh delete mode 100644 point_transformer_v3/pointcept_minimal/tools/test.py delete mode 100644 point_transformer_v3/pointcept_minimal/tools/train.py create mode 100644 point_transformer_v3/scripts/README.md create mode 100644 point_transformer_v3/scripts/data/__pycache__/download_example_data.cpython-312.pyc create mode 100644 point_transformer_v3/scripts/data/__pycache__/prepare_scannet_dataset.cpython-312.pyc rename point_transformer_v3/{ => scripts/data}/download_example_data.py (90%) rename point_transformer_v3/{ => scripts/data}/prepare_scannet_dataset.py (94%) create mode 100644 point_transformer_v3/scripts/test/__pycache__/compute_difference.cpython-312.pyc create mode 100644 point_transformer_v3/scripts/test/__pycache__/minimal_inference.cpython-312.pyc rename point_transformer_v3/{ => scripts/test}/compute_difference.py (96%) rename point_transformer_v3/{ => scripts/test}/minimal_inference.py (94%) create mode 100644 point_transformer_v3/setup_env.py create mode 100755 point_transformer_v3/setup_env.sh diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..06f6840 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "point_transformer_v3/pointcept_minimal"] + path = point_transformer_v3/external/pointcept + url = https://github.com/Pointcept/Pointcept.git diff --git a/point_transformer_v3/README.md b/point_transformer_v3/README.md index 34ab415..088d735 100644 --- a/point_transformer_v3/README.md +++ b/point_transformer_v3/README.md @@ -2,219 +2,37 @@ This repository contains a minimal implementation of Point Transformer V3 using the FVDB library for scalable 3D point cloud processing. -## Environment - -Use the FVDB default development environment and install FVDB package: +## Setup ```bash -cd fvdb/ -conda env create -f env/dev_environment.yml +# Activate fvdb conda environment conda activate fvdb -./build.sh -``` - -Next, activate the environment and install additional dependancies specifically for the point transformer project. -```bash +# Install dependencies cd fvdb-examples/point_transformer_v3 pip install -r requirements.txt ``` -In order to train on Scannet dataset with pointcept codebase, we need to additionally install: - -```bash -cd fvdb-examples/point_transformer_v3 -pip install -r requirements_pointcept.txt -``` - - - -## Files Overview - -### 2. `prepare_scannet_dataset.py` -**Purpose**: Prepares ScanNet dataset samples for testing and development - -**Prerequisites**: -- Download the full ScanNet dataset from https://github.com/ScanNet/ScanNet (requires application approval) -- Store the dataset to a local directory (e.g., `/path/to/scannet`) - -**Usage**: -```bash -python prepare_scannet_dataset.py --data_root /path/to/scannet --output_file scannet_samples.json --num_samples 16 -``` - -**What it does**: -- Loads ScanNet dataset from specified root directory where it is downloaded -- Performs grid sampling to reduce point density -- Exports a subset of samples to a single JSON file: the `scannet_samples.json` containing point coordinates, colors, and labels - -### 1. `download_example_data.py` -**Purpose**: Download the preprocessed ScanNet dataset samples for testing. - -**Usage**: -```bash -python download_example_data.py -``` - -**What it does**: -- Downloads a pre-processed ScanNet sample set together with the corresponding PT-v3 reference outputs. This replicates the result of running `python prepare_scannet_dataset.py` locally, but saves you from downloading the entire ScanNet dataset and performing the preprocessing yourself. -- The script provides a single set of samples; to generate additional datasets, run `python prepare_scannet_dataset.py` instead. - - -### 2. `model.py` -**Purpose**: Implements the PT-v3 architecture using FVDB. - -**Key Components**: -- `PTV3`: Main model class with configurable encoder depths and channels -- `PTV3_Encoder`: A PT-v3 encoder consisting of multiple PT-v3 block. The grid resolution remained the same throughout the encoder -- `PTV3_Block`: Transformer block with attention and MLP -- `PTV3_CPE`: Convolutional Positional Encoding -- `PTV3_Attention`: Multi-head self-attention -- `PTV3_Pooling`: Downsampling operations - -**Usage**: Imported by `minimal_inference.py` for model instantiation. - -### 3. `minimal_inference.py` -**Purpose**: Demonstrates PT-v3 inference on ScanNet point clouds. - -**Usage**: -```bash -python minimal_inference.py -``` - -**What it does**: -- Loads point cloud data from `scannet_samples.json` -- Converts ScanNet data to fVDB format -- Runs PT-v3 model inference -- Saves runtime statistics to `runtime_stats.json` - -**Prerequisites**: Requires `scannet_samples.json` from `prepare_scannet_dataset.py` - -### 4. `compute_difference.py` -**Purpose**: Compares inference results between fVDB implementation and original PT-v3 implementation. - -**Usage**: -```bash -python compute_difference.py --stats_path_1 stats1.json --stats_path_2 stats2.json -``` - -**What it does**: -- Loads two `runtime_stats.json` files -- Computes average absolute and relative deviations -- Reports differences in output features, sums, and last elements -- Useful for validating model changes or comparing implementations - -## Test PT-v3 - -To test the Point Transformer V3 implementation, follow these steps: - -### Step 1: Download the Dataset - -First, download the preprocessed ScanNet dataset samples and reference outputs: - -```bash -python download_example_data.py -``` - -This will download the following files to the `data/` directory: -- `scannet_samples_small.json` - Small point-clouds, each has a few thousands points. -- `scannet_samples_large.json` - Larger point-clouds, each has 50k~100k points. -- `scannet_samples_small_output_gt.json` - Reference outputs for small dataset. -- `scannet_samples_large_output_gt.json` - Reference outputs for large dataset. - -### Step 2: Inference point transformer PT-v3 - -Run the PT-v3 model inference on the downloaded samples: - -```bash -# Test with small dataset -python minimal_inference.py --data-path data/scannet_samples_small.json --voxel-size 0.1 --patch-size 1024 --batch-size 1 - -# Test with large dataset -python minimal_inference.py --data-path data/scannet_samples_large.json --voxel-size 0.02 --patch-size 1024 --batch-size 1 -``` - -This will: -- Load the point cloud data from the JSON file -- Convert the data to fVDB format -- Run PT-v3 model inference -- Save runtime statistics and results to the specified output file - -### Step 3: Compute the Difference - -Compare your inference results with the reference outputs to validate the implementation: - -```bash -# Compare small dataset results -python compute_difference.py --stats_path_1 data/scannet_samples_small_output.json --stats_path_2 data/scannet_samples_small_output_gt.json - -# Compare large dataset results -python compute_difference.py --stats_path_1 data/scannet_samples_large_output.json --stats_path_2 data/scannet_samples_large_output_gt.json -``` - -This will: -- Load both result files (your inference results and reference outputs) -- Compute average absolute and relative deviations -- Report differences in output features, sums, and last elements -- Expect only small numerical differences (typically < 1e-5) due to floating-point precision. - -## Training on ScanNet Dataset - -This section describes how to train PT-v3 models on the ScanNet dataset using the minimal Pointcept training codebase, using either their ptv3 implementation and our fVDB implementation. - -### Environment Setup -Follow the **Environment** section above to set up the development environment and install all required dependencies. -### ScanNet Dataset Preparation - -The preprocessing pipeline supports semantic and instance segmentation for `ScanNet20`, `ScanNet200`, and `ScanNet Data Efficient` benchmarks. - -1. **Download the dataset**: Obtain the [ScanNet v2 dataset](http://www.scan-net.org/) (requires registration and approval). - -2. **Preprocess the raw data**: Run the preprocessing script to convert the raw ScanNet data into the required format: +## Quick Test ```bash -# RAW_SCANNET_DIR: the directory containing the downloaded ScanNet v2 raw dataset -# PROCESSED_SCANNET_DIR: the output directory for the processed ScanNet dataset -python pointcept_minimal/pointcept/datasets/preprocessing/scannet/preprocess_scannet.py \ - --dataset_root ${RAW_SCANNET_DIR} \ - --output_root ${PROCESSED_SCANNET_DIR} -``` - -3. **Alternative**: Download preprocessed data directly from [HuggingFace](https://huggingface.co/datasets/Pointcept/scannet-compressed). Please ensure you agree to the official ScanNet license before downloading. +# Download test data +python scripts/data/download_example_data.py -4. **Link the processed dataset** to the codebase data directory: +# Run inference +python scripts/test/minimal_inference.py --data-path data/scannet_samples_small.json --voxel-size 0.1 --patch-size 1024 --batch-size 1 -```bash -# PROCESSED_SCANNET_DIR: the directory containing the processed ScanNet dataset -mkdir -p pointcept_minimal/data -ln -s ${PROCESSED_SCANNET_DIR} pointcept_minimal/data/scannet +# Compare results +python scripts/test/compute_difference.py --stats_path_1 data/scannet_samples_small_output.json --stats_path_2 data/scannet_samples_small_output_gt.json ``` -### Training Scripts - -To train the PT-v3 models with different configurations, use the following commands from the `pointcept_minimal` directory: - -```bash -# Train PT-v3 with FVDB backend (8 GPUs) -sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-fvdb-test -n semseg-pt-v3m1-0-fvdb-test - -# Train PT-v3 with standard backend (8 GPUs) -sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-test -n semseg-pt-v3m1-0-test -``` - -You should launch the above scripts within `point_transformer_v3/pointcept_minimal` folder. - -### Configuration Files - -The training configurations are located at: -- `pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test.py` - FVDB-based implementation -- `pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test.py` - Standard implementation -### Model Implementation +## Project Structure -The model implementations can be found in the following files: -- `pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py` - Base PT-v3 implementation -- `pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_fvdb.py` - FVDB-accelerated PT-v3 implementation +- `fvdb_extensions/models/ptv3_fvdb.py` - Core FVDB implementation +- `fvdb_extensions/models/point_transformer_v3m1_fvdb.py` - Pointcept framework adapter +- `scripts/data/` - Data download and preprocessing scripts +- `scripts/test/` - Inference and comparison scripts diff --git a/point_transformer_v3/external/pointcept b/point_transformer_v3/external/pointcept new file mode 160000 index 0000000..ef6817b --- /dev/null +++ b/point_transformer_v3/external/pointcept @@ -0,0 +1 @@ +Subproject commit ef6817ba5d0391281dd012aadc60474502def093 diff --git a/point_transformer_v3/fvdb_extensions/__init__.py b/point_transformer_v3/fvdb_extensions/__init__.py new file mode 100644 index 0000000..41e9962 --- /dev/null +++ b/point_transformer_v3/fvdb_extensions/__init__.py @@ -0,0 +1,4 @@ +# Lazy imports - only import when explicitly requested +# This allows models to be imported individually without pulling in all dependencies +__all__ = ['models'] + diff --git a/point_transformer_v3/fvdb_extensions/__pycache__/__init__.cpython-312.pyc b/point_transformer_v3/fvdb_extensions/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7e4aaf94b7f94eeea02e78ad18992cb8149ff29b GIT binary patch literal 207 zcmX@j%ge<81b01UGZz8r#~=<2FhUuhK}x1Gq%fp2Mln<}YBJqo%gs+o%_;WNWW2>5 zAD@_$6Cb~l;WJ42uR#5b{M=Oi!W;yyCR{qTJM?_%dU#lK9k$lGME7%>2A!{rLFIyv&mLc)fzkTO2k(>q>J{?TXle eHh^4J%m*YsFf%eTeqdwZ5$VusWG`X`3IPB#kvbOu literal 0 HcmV?d00001 diff --git a/point_transformer_v3/pointcept_minimal/configs/_base_/default_runtime.py b/point_transformer_v3/fvdb_extensions/configs/fvdb_runtime.py similarity index 99% rename from point_transformer_v3/pointcept_minimal/configs/_base_/default_runtime.py rename to point_transformer_v3/fvdb_extensions/configs/fvdb_runtime.py index a790099..4ebf03b 100644 --- a/point_transformer_v3/pointcept_minimal/configs/_base_/default_runtime.py +++ b/point_transformer_v3/fvdb_extensions/configs/fvdb_runtime.py @@ -44,3 +44,4 @@ # Tester test = dict(type="SemSegTester", verbose=True) + diff --git a/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test.py b/point_transformer_v3/fvdb_extensions/configs/semseg-pt-v3m1-0-fvdb-test.py similarity index 99% rename from point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test.py rename to point_transformer_v3/fvdb_extensions/configs/semseg-pt-v3m1-0-fvdb-test.py index 6eaad49..2690364 100644 --- a/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-fvdb-test.py +++ b/point_transformer_v3/fvdb_extensions/configs/semseg-pt-v3m1-0-fvdb-test.py @@ -1,4 +1,4 @@ -_base_ = ["../_base_/default_runtime.py"] +_base_ = ["fvdb_runtime.py"] # misc custom setting batch_size = 8 # bs: total bs in all gpus @@ -139,7 +139,6 @@ hash_type="fnv", mode="train", return_grid_coord=True, - return_inverse=True, ), dict(type="CenterShift", apply_z=False), dict(type="NormalizeColor"), diff --git a/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test.py b/point_transformer_v3/fvdb_extensions/configs/semseg-pt-v3m1-0-test.py similarity index 99% rename from point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test.py rename to point_transformer_v3/fvdb_extensions/configs/semseg-pt-v3m1-0-test.py index ca47e04..20aa775 100644 --- a/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-test.py +++ b/point_transformer_v3/fvdb_extensions/configs/semseg-pt-v3m1-0-test.py @@ -1,4 +1,4 @@ -_base_ = ["../_base_/default_runtime.py"] +_base_ = ["fvdb_runtime.py"] # misc custom setting batch_size = 8 # bs: total bs in all gpus @@ -153,7 +153,6 @@ hash_type="fnv", mode="train", return_grid_coord=True, - return_inverse=True, ), dict(type="CenterShift", apply_z=False), dict(type="NormalizeColor"), diff --git a/point_transformer_v3/fvdb_extensions/models/__init__.py b/point_transformer_v3/fvdb_extensions/models/__init__.py new file mode 100644 index 0000000..bd5f246 --- /dev/null +++ b/point_transformer_v3/fvdb_extensions/models/__init__.py @@ -0,0 +1,4 @@ +# Lazy imports - only import when explicitly requested +# This allows ptv3_fvdb to be imported without pulling in pointcept dependencies +__all__ = ['point_transformer_v3m1_fvdb', 'ptv3_fvdb'] + diff --git a/point_transformer_v3/fvdb_extensions/models/__pycache__/__init__.cpython-312.pyc b/point_transformer_v3/fvdb_extensions/models/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..215f7ac7a8c097c4c8289bfbb4e4cb59b9eaab13 GIT binary patch literal 251 zcmX@j%ge<81b01UGYF;!BDW z^NQ2*i*i$o;>(P44dc_wQj%_Q7L=43gPDGsjJMe1;}dgo;^S8`dGEnfZCe`nmZjsX4{^@$s2? znI-Y@dIgoYIBbA|r8%i~MI1nj7=gH05J-GrW@Kdiz{bF%xkCE_msulw5gSkx09OY~ AiU0rr literal 0 HcmV?d00001 diff --git a/point_transformer_v3/fvdb_extensions/models/__pycache__/point_transformer_v3m1_fvdb.cpython-312.pyc b/point_transformer_v3/fvdb_extensions/models/__pycache__/point_transformer_v3m1_fvdb.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b49e1bfa895014b3729e73fef7176526fb6a6fc1 GIT binary patch literal 7665 zcmcIJTWlLwc6WxuHz`V@NXe2cj;V)DNtTlcPGUQfU$VTlEnBv@M2Re{70{~o5lXL z=gjbBMaddSJC@Hq_ug~wJ+C?UoZ-JXoi+l`@1NWmEwP0B3_F#_R7Rfv6(H+`Co_bn zcsfqaP|zCUbiy!Wpm2~CSu+7*K_8EJ^G2=)$ zXPhu@xkY9gcqebYMa{TqA{b(9<>*?=+wM_%k0vJxZ@)!&hhV&C(8unT31^ya(1C`f z@7X3((WJ~yi@~H6PKgOYWcRWchuMh~FT85A9Zkz~De)ltPBfUh5llwdx6|yN#d2rp zdU_}%9f>4@(fCj(l?d1-<3dmp*icj!*i>3%ucyR$HX3H-IT(Z;*{H+naio(DcGdX>6IKZgPSTrQ7#_9AzTu_bQ zO-55ml{piYb za}~Q(+9{B0X>cl<08!Y<8=&bVI~SDZzyjf*Bny(v&ZcEHE=Uq9re@QU95AU2Qm95* zli$P(axgTf8bS+c)f|ooWm!n7COH)cg`i5IE2TsU#bJ@igE>j#R&0R{@xL?-V3~YjcYWWu8eUJVB|b_O zyn&oIkoOK0yu&%~aNfJW;C(CSeJk%B%{Ly-+mB?KBhO(O3DxT#^{rWKlYY@+3LUH3 z3|Hw-)fV-8Y?-U*IYY};YbtbQl&9wR5F(xcKiI~e3L|UpIc^zqlsB$x?$h^xPkbU& z#;8!gb&m#}x*oo;hbftkkc<)jn|MRUtk7fR-54Pmi(-s5)vwZVg(zml(*7-Zujqmz zifP+ZsFytD8>Oo|#gZ|97-xv=u1FL^FWT}b#>$F}Rc@|K#=I4**2Z?!@Cv3_6$Ac@ z7$Q`i-K%>pfyeMngyM~e-8WStW5)JdxH46_vMaV&N9DX)3n$wvUXgJC&Cc3f1yiiN z`M#w}v$1jc@WAzu04U4G+I~58h&kFM1cywN1wKtaK zgL06)9+l_V1u?Y%k$W~MhvuTm$dHYNM*~O2h;&fHa3~^1c`lSniTpwK7!nJ-HU_~N z#-d3$Sz<4r9b|{!t*!|JKQ(IXrNuJU@psFo;NGclSQ3D8S_?UBG|5LpK^eHw*5UPPQloQ1on%o*wfqE(e2Ubq$w0r8mDYJ_Dyp$;@! zsYVN}AQ5WC_!eXF7 zwbfh?b_ETqCf#9FTTKYD6?wF4&M0xnwQAF>q2u-iQMksHm)S#UQLJ>F;FBhAg!;q`jV;CXFVn_9{x^0hq;LBt_w_ zT_&3>X=q;UTkl`%|7ft#+>>kW$v5}qox7LEzcAZYUjMzDtHICAtxsBat!LIUcgKIy z`ws(uKd_p~w~lAMCs*u6Z~J;=Epq4L-|heL$d5)=BYE%9tmoKmTggOPcfh=-b;VZn zwyxO!vt!rF>7t{x;ONRZx;B>X4m_C8`i>QRCvv_Mk1jsG{9fU5ICnXm^_|H3B99$& zMTfWG*qL+e+~DsVx_dL*b*Ruan(G?j~$_6d++VjB{S*t zZ`z1wN1>%R*V21uKHu`%@&w%Ze>{47v|#t=?Ea#~Q}nWT`tQcFo&(vIgFl)2v?D(< zk!zVKHnpr8H>fo$Y~$#DP8f%2d2G``TG|Tko}9bq&P?9@%JLZyxZv&2dHe5 zPcEM;`uht0;hcZ?!Nemz>mSbhr?QOiiQTp8%lb#M_K_?zB6>jOOV-Khz1N1(RYTCN zKF%`sTm<9~APW=-AfYF!J@->WZs=I!8Aq5FsGNQIg z@hC*)2zMP`qtx{}a@Rt1K>!+o=d_@&+BB!;FjlAy(p)+mjtjb@OCeNJ<*6pd{|F<0 zN!piwaZ zG${-~vtk5jQA_}>iW#6yu>iCyR)BC-0(2^NfDMWRpi6NAY*ZQmHYrU2n-w=ex6<+v z^+y!&fl*R>ZjgZItZIk2#)aZRNfM-|{{xR|jV2e;GRH>~s!d3SI9^zg=OopEcxWz| zOhV?XI4_9f>q^b+upIeZx4RcynAMmP-B*eq29rrBDD~{u$lL)?p;CB$5LU02m>?K2Z;^OwwsSx9?d$%>ebbnGeF5p&?I6R`%Ibs^S>vrULK5c46{L;6Nby@>VUe!CIdgR=p^R+xXYbZYS{ zRDHuh+G-i(2M=E-3Vb-gP?Mnn(}GS0-Z9>wjWJNVz}(hnl@fdF%3S6iR)6sD%={Q3 zFzdxagEwa2-Gm}z6LyDkLoq4lb}TmxWena^#k6FDS({va+hXW!TOP$0p%f-&*)j}W z7=`s)pSqOWE?0syQ7o7~YsoRAuebuSCgaFBGY}JE+Y}hF#;rR;sa7o)YIBNZyHPCu zr2VG|#G`HJe?TBceXDbSK>mWM#M19U%!<{=;WB%e{8x&BE8qtxuj1TJGNoa=QOyrr zP$FXHwB=D;70W{;H)l*A{t61OnkCz|kV97iuee_;VH%ajDN<)48hnHK8Vk|e>G<2} zuWBI&d;-|UuK(v2+V&LQh&tY0``dA)(`&pBfOma8@6MOs^F>dpvtj#JZCLR*@%1{o z&acw(4ZQ1qW5xIO6Y!Cj_D~)^ZSthDmLf^W9RAoi3!0jCuCuxHV5 ziCm`wO=rbH5Ki2SU@vDA9pe71-AGmcFI+pb`(^E`jPm#t1e^EQN8{$!*_w}L6Ki--zIe^&t+h_)B%^r zGI`?Z+E8+yL8z5nZ5!U4i(O%gP3;?R=9+p|OeGs}G#2c>yxsTL$M2ecc>2MvAD$~U zVu5wC*xs>n`niR$eXI5-{=vJKA9iH@Cstj>PWDd6-OJg|1FP1efA0f(&VOjt^?7gq zCs#hc^5Agxwd2J%Mt-dPNO?5=uM__?v1y?CE>qb?|E7hEOjE^{?mL&VuN-?MZyM;4 z=3L`}Rc1Au?K%KuU0dh+;@aZ+2WuY`S_g8i19yM7ux~84Z|t+yu@WIoVLJOAF8f}p z@LoFiUOLNNgTd?6IU1TtdYb+cn|J7EdZYEO>bu-33~^Ks+A zldiy;y)+EE+I*9EJJv6)UD~*Q_vmBK(8_q}AZhfjI5&5b1Cvy-v3+CgPTzxRkp4hJ z&ar=GY_%`jz8?(H(p7MG=iJ?QPk3T5vK9t*i=ri{r;M6%qXAhsxUbs@Y zFq^wDn>`=OhHk=mhB}JUWvCM<9X49;wB9?d_s;9R^C+@HUBG5^3I)eq-G_?qo#msC z=G=YR{u71%)4BfBh5qxoemHU}*FXK4dm5RzV934`%g(0@^EY$zH?uKC<6sUqjM8HU zI66w7G5{mkXuV0jH)#NpIr=;{Z&Mcxcs6Y2p58Q(Mo+=DC+FJp*wz2Jr+xkE+SLc9 zPmTZX883G4E_A<<>we=Q^Dvz6KCyOYb!?;WO9Szqq_UoI@R)7w^~BS$er4?nl*k4D z@f`el#=r^|%?e+-%Zq;_;V11!uX|4!4gYL3o@y`$TApHR7hs-ZHYzT>0AQ4bfu~0h zEF*vr477Tz)dFr61Pogj8$=0~h$4db0RTmnTEn6QGQP;L_T&$Xc;B8KS4~*n2F3Al zl}QM}p4xU&chN z@n3F*u5(Gmj9u)*Wmc^O#bQ)Me}Z42Zr}${5$hTeO9b5tSQcnMBI(zeOB5pTwNDVs zR@7t>r;rGfW-qMd76>q^97B$igfRcftsAsg1VxBh;%K6`9^uNgQpAq{nO@!+F zIT^^4fnP8*b(DH;8>B`{S1CfbEqe=8YmRE&u;;0sP5L-Zc{e(q6KtOw2;KWk8`94H EU!;{N#{d8T literal 0 HcmV?d00001 diff --git a/point_transformer_v3/fvdb_extensions/models/__pycache__/ptv3_fvdb.cpython-312.pyc b/point_transformer_v3/fvdb_extensions/models/__pycache__/ptv3_fvdb.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0239e65efa620750e9250b2e56d858bb030777b0 GIT binary patch literal 40740 zcmd_Td30M>dMEg>kstuB07wwrz(ph>QKYEFlC{&OcA1hb*_J8WFhqcxNRWB}wXndl zyQ-2vRjM&rm5QjYN{jBU9#co|n3b7Mlyv%(>zt&^nRKTCLsdXt)E#Bo=k)lGnWin- zsieE7=l9*W0(_KY*{Sp-kHmfV-FLTlw{N-M_uW4!F1B&FetNua^mFN@^4 z=P+^HRW8K!b0I^>IA$0(_8Z4d{ibnqzj@r!Z!vI1^u{z+&|kpr)_yCy7fE;9xP8EA z;Km*O#RiIL9xE9y?Js41%UIcXd4D3o^<$*=vD99qu6c~qjh9T#T)41>;|nl)O#KBf zakGXi$1ZReO)qg54AQ$QyepJOmK7xx-84$$rOL>Kt8R)9T)&&8?vYdbYp0iO$4|bn z$+!Q7-lu$r#wW+ZbZ~+n5A#0cGm?{qZz?(x8Dg1t+788h7bf_#QD10;4-dq~F8MBu#D>L^5AY+Q zr-HG8VP7y3@*NHi4TVD|!;$C&?~7fU3`aX{w*Bx=Qjr7U$(V1D4~~Z^J0C*ALp-Z- zhc6lqi;)5`wO)bqo5s5XgXcqMIwvpLKBBhpn+0?4$Usc6oSecp3zp|2_+r6)bR-(< z^BV+fFF!GPJQy2>zi=|h$3}uEWISo~vA#>o;IwF@_A^;QCD=BV6+( z6Ur&-of;p%)c3;4Qv*_;D7ny_zGr61FL5^veK$D2Q7}itV}pWeJUSGm_&(p?ywo*3 zF&^$37@pwIqe@*-exQrKW_|c#keWN%#o8tBlUbBN_+kvrh;NH_p(Dd%h#Nbag>4C1C&)^*h;fvw;4SWf1QHt*4 z%isz{QpOXL)6Ny8O-!~U4qriz6HcGsB3J`~2s%0t5Nv^fI1=G^1On%#f@9(vzMK-c z$)QqNQdSyI0D(Cz@IeC1>vH>f&@2c2o-*#xZf7ST<*7rir_jD+(kow z@d?gv6G~{x2KJ4g35P%_Y7CL znDX*+L7e+GhLF4n8MwGP&IOdo5Vzoo(y}XV4jIm3An>g*s~o1==+xPHSKJUc$_pRz zl)I9Pd=I%AS*~kiG!WUPn=1{xyo+)Tp({g{cM5Jws04CX20~;IxfinBPki~gKSqv5 z-TK<)&x(}9s%3x38aIER`%U9bX-VZ*^l1k07L9qYS)yRUBwW0&_$4EmHeBM}CRMk13_ zfCu4VY>Fp9K^e#w8^fO?uZgMH>Y1&WBfj?71V1p`8Hsd0!+<2!6B8*oHZslNk&n$N z34-d>j3zIeUB32cj7RE|fUN}Y=sT79C+5=OYZ7N`lMfTS3E=qrri+`J5KP`J7q>L! z7O-Nv`q~NLp^y`7BKz1JW~;>0$Y5e@ieUb6%#d zqG|gPjg|d=lVCp^=Fw+?=*VNBWj{B z8qZ%QSnxW`)51Rz8Hoh~JdG>9o=UZ_<`#^Rh){5ptz3eIwUS^aa40xV08&T8^f{YC zLaEgGVnYSG^8+>twikk9Q{jC)Kfwz&wRZ*cfqh4x7m9)dF>y)s7cxL5*frx)S|63M z#n)0b)w&i10s~{gXfzOr(lR6t$Tvi|L||^ZDtEc=l)rZqUPk{LoL{`b-S=@tC6^0R zR?ls#H(TG4FyFS;WE(fqz5cGfdj8Z0_BD5^>#uvRd9F8HYgnvVilnReC3?SoI9psZ zf9Zqb_79ue-roAw)=cx(RP)wsSbx60q^et*ldiEG(*k2AKEBzk}5@GTc`CABQU zeJj_}{?H`19lrYoc@gUuN^bSM7kodQm-qb&&RLgi*m0{bx#LK(?r6H=nWW>Hzx1wK z>`QxhB+OSzmeg!$Vg3^orCW+Yz(YZ!d_eR!^Nal@$mMstrF;{dudW`ZxH z3Vcx`oHy_rVoI)~TKFYJ8ZA;|%Rb5#Fu?GR;ii;UKliwKCz7o8j+WdL+zsRO#slFP zP2m9^OIFwy^yN)Iptcj^K5_brleE)s=9`h6Cp<>5M<=HEfpB05(@U_&g8Wc8#vX21 zgu-AbghepQ<_o``qL=1nFO*XLEJ5H5Vca(?;^;P0hMIDU*a8ta8@?1JpvOkAgvAA@ zNJ0drJ^7U7!-*{)k=FEoP z)P~;7hLfocC$qI}O9k)sCST~sDC&ORfFatAA)0YCr5sHQPo^B}F;vT3nbI|>(lrYs zsnQKe>jrVS(D0Bisxdv^i(nW48Iiylr#mb{1{vG|HQ`33PL{KxyM#Q{$BlY}X2yh6 zdIQHK=R0GH+*l=N6GI#};jIFo)so}jFfg=3`Zf%0Zd3utns~;zSw?(j0RHuB85vro z4kGe$>JjSzbZwP7STuEbwC@3GII5rqIp0&D$!nGGL0ZE7^RCx5t&SLa2%R>DVU$RR zyeqE8Flxq*4zET=aWHa#CJq{bpIiHQFY;rMlJ}9Xk(?$tH;n28<69BncL?Ul`PfB~ zh#~MHSVluZ%<2wG^E5eK48ID&nYk;s9u5x`vf00~C%J@nQ>*icj`7!~hk znyP|@+-~u7(o*;sGLBYa9=*Zcu{z!?yBRmlCVl^0|3X39-FA6@wzxb~>`N8<77l$-y#B7U>gw*f z-3u*CJ-33hyVK6SiQcTslX11DT|6?k|&rM`E|-B%mt8Wt+kQVdhwnV{QtT$!8RGANN z%i0qb`u>v3CAY1fY^ihh`NUU}RxewE{3SdM5x$BXVhad`fk0?t09cHlg4R+{(NQ9^ z2u~CWM6^*b0axN_it~&DX1#ul;ucF-#6Wm#ED*S1;I|@_5!8D{RDumOayse72{^y_ z2KTYiXt6Grah;pqJ$2($X8oSj`aRkD)@)Pj$Hko%|FV^He(3YpD?_Wh!L=Kjo5FynC|<%B7S+{TjojQH6k0(r|i2 z*PuCM3KxJ@E&vT&02)|7r4G$JXjN!sE2EW*7?o_pEG!ZnY#<+>m;l~9^bw&0AMJ%B z&cH432@c@3`v}ix1O%}J zC&hO5O#pjm#7{0cqCuVITpo|4({|azb6S<4>+{?AHgu~vcwF6N>^uLLy(IapO(f4W*{hC%n@I7~2 z)mIPB9lUyM?%2YSrP{P>H*n>mKLg%;H0^!z0eCZwS^1(0u`&EjEZv_I5z@{DDL;MG zLPc|cmn$Pnt^#mk4LC&%z0uFxPH8cZ@PF{xas=jCsZqH|rujt05Q#N}VZ& z&RmIWV_0;*N{wOGAPL03%)8`y2P6UYWJrRkkAD%d{6-0>ADAH0m7(PrnFsJnLY8&emq@2k+Rk#Y_kWlRgIadwp3MH!jY}2x%&Ft z>kIL8)n@VOt8-sn7)w`e!jro;<8Dv6+ZSuo?k5s9Enu#kw8g^AhTw+ zKT9oT0-K3{4nA=~pl|0-lJh(qEySf>V_kQdUPurZ=tgZ_wx%vyTc2IOQH8o6+siHf zyKc|21$RWMtX?i8pOvffE*Fu{rh&zuQtZ!K?UrT{;!;rq_$ik>aqj&Ff>uJ@&joWW zc`&yiRQNgIu8qN6D|pma#+_E-uFRc&J|aN`{uC<9zeEmOW(o4LWm71Qo*kLYgTD&g zLo5aUE@17-gTI)y4}!n6a%gzUAkYGo8@pmQ*{8SWtU``xVY7nP05)6HwM`3EA5sQk z?0J{GMj#ito4JSNQks_q9vQA6VIjy#*_c>SQ-z3pNEr{Vse+Ob77^TqZu$p@ND#Eo z8OIMtN+>$*r$(>~3ZAQqDE0YE^N|u|wrCctb?9iG00U@8kP*Cso(M&RvjixAp@=aw znYTpYWU^N>shB$(`C-ac#bh4DwN&P|g3P>w#L+k55HeG*gUsBOkeY05J0UmKAL=7F zt~FTNU7HBe$zuv$DUkz+(OGxPL5xoGm6AWJUc0z6UA>L`cj}w34__N55cBJ^0OpD=_7z#sSg3uuml%B$}blz})PddQI_Cx(Kkt2xATW9Xqu<_DHc$cnN& zX!2E25sINq?p0zKbQd{pRM{HSq%-JpP?Yb8o4^(&1QQoS#f&qB`mF~PtsrihM!Xp- z7^)`Se64Z|G9<}JTk%DMVD-z0k%|tDA|r;A^JmFN>`2(NrJi|z#@m(hb}iY{-kr(P zok{CX5fusH8&;q~Llj8PP5h#-!2$C@iwe~>u4W?z3%WmR(|HsK^|=NS=%Lh)b4(7g<7vFoZF}=0P*VSWLiJEFm-Y)3M8r z{q#a`3JL@(Lvo)vdR(v%kAy3f2ae_C>0=>sa)*v5H7{D)R)M|*a?(978)%xps`+todDd$0*(zZLvZDe+v*SFG}zU30s>yGI)!MlFA9 z&(aY&stB+%u9lRmW#Of?s|y=-MQlRm%nq6vYp;|;ofo6t&I{XH(SxKa zOW_fAUsi#dxq0Y8&D!N#7L`Vm7H*X(qe+5TA0dzEKZ2u@_O{#BW))lj zLbcDe&u>jvG>h;m_bC^at@17VLrG1~$?K7`k=66$)0sNFpW)j__Nvef1MUWbn|3c*w#4&-s zHiM^a0#Dlvp0yR~EFj&SQQ4t_fAxQy)g0{mg3PGk95?{q|?IB3$_B%qQaEn8b zx9u-s(5e*BszQ?f>WM+HkH`nMW&quv8V?MIgP|x-%ZFf_q`h4cj#FsN45&+`_q-6ujs*`FOMR}X4y0r z1b`tLmF&VKRX~y)h8RWyesLxf7A-ssp=)`Xe9aKITr@->e661{WSmBRD1mCtu9!j( zk1CX_{0>{gatEtHw5bPi@I3IVw8-P-rvaG?C*mPFRcWdudDVqSOXoAmI%Oe?7vi1r zrEx2s6qEqSI2TYBp-X1JCHTAOkY5J0Rfg(Hpsfr@9~;Y%1oMRg%dQpe1ib)5bVH8>N__T<}a;>?1V2g z5u>VxrUqi2zC(i$5Q~I6AQv5u`ntQK=lEDVeQ6*xGVaeUWwoj5Yd-`2FEErBhr`%- zfq?V`W#^OPL#Udjwpw>zJ1b0G4}8I>9ErX+uUO4GAh*pSS`rJoNTgBkuE6QeojuQ(F#PL?E<)j7Bi}d4`4%~s;eeU1B6gH9Tb_9qGa*OP zq}htFC@l*#OxdMH{zRcMW$9N?z+X_RKY~LjV22J0n5}Li21(Y_yl^Sy*#Lhr_O9QY zxjZv}DOK9Oc;ti9tx0P*`D8c^w{+`Wciji>Z3``LuYYU(Qg5n}*ef;}ThzfLbGSJ* zY1b~qFOoK}a!|%AMOkkPUY2HMhLCgz?dR_?5YuMON~BU@+FW5sB^V6&*8FRf(9XO| zMKEVfF$E6j?G(jL8p$vnID{Q134xu#cJ+)UrXYhr`xLmM7B4e0dbwAQfaOuJ3Ua72 zZVKatAyF8}aK#G-i3NX!#4hoIVWO$KvSzDg#jPRJJBk1hW5X29*y1*k-2#}P%wV3h#AY3XUrr==T!5Neb`IWA=1>gl)nw=h$T3JW>=sF$+lqwczjA!A;F>%8X3S+ zM{5OSK7HpWE|O3$rekz6I1pYxI5I}N^S*&8{yb(stu9dYkca-s;8~=d*JvWZBCL-#qR)yAPhjz7RZ!P&rZ|Je4=eBB)EI*Zs5mK+K$^LA-LJgX z={tF8as-Pi8e`%N)e;?>*KcAE2Bt4vMAs>uCTFhcRa8TI5+nP@0Tm}gq%kDNeopM> zTp4rihi>=vVS-Ev_7bQ?# zh{_g(fTqCdW=70vprcNtMgWlL&ynXF+;S^d*_d&+rrfO=cW27oxyYy88xyws1zbZ* zroJ;(-?=#bhwh(xf8xDee>7>W0n4(&nJBp9shba7pSU*hb5CdDV79t0Q@t)#y>9Vf zx_V3EaJIC1{@HK7o-Okrx^L}b>(Udq4yQKlPc|G#I%<)(x8b_?nm5_9`BvlY>YbMl zCwdYW5Vgl}(C~h1YVYx+qhX<7v2tl`%D*$|*mc)YF*}-a_%82XuH&k^mYbOLYeTBE zA?e$c@$E|acHKIY_VuJopGsPv5<#Z~sPL5Wla%1Zes^m78=4ce%1<}tce`dx zNOAh8*Gak(y>4X<*iKlFYccljS8jae=1jVDSJEnSmvfZ#2xDN(tS_zcUU|I6V;Juw zAWg{kDD7wyn}$|Iei7f0(}-0%jYu)4TOZz#6z?Q8CMjgBY|Phhyq@mbn=b81T6=yO zL-EApwdO$)DR}T$ER?{?$_8}hG@ul1^94eHYyi7iOual zg$D~GG-sn>{238iAYd*1nhy*BHt z$~f1hoa+{Q)6VXsqdQw%HUH!X#VzbbSIXH1v2f>>q+?6Yi;k4DWAS9#xiRV3NH6w$ zP`s9<*pPB=SaPMEPb3{rd{|MNY}k6swiHj+^` z3xgh+DJduE5SSW*(ku_3X%N2Es2KLnEf{iO4)Bv0Mhb%uucQl%N=5rZZy!Z($3LX* zOccZm^w^(j3_UV2M7#Jdxd!SzWTHAN;mRSfs7vEzUT&1wV0l-l5HYP_YS?8Z?3s#H`nyQnTF@N} z?A@So#u;mr3mDxd`_y~9VswZ4B>Uu62vN`Plta~f-1&}O7EB)9qlU}AI01pZ@{@X3 z`qLFJ9(`I3mwj=U-0lYM${jPOSu2X=H)uryUw+MO!>G@{z$$|Mn_n@8h!WVF|CYm> zK_i>J60naYHQMLKf@t3Kf;xG-A`5tpXSJ~3SDYE}rfwrq{oFH-|?j%)J~ z{hG3?HRY1LeBKqW{9^f!KA)E%9@+XL^<2C*ETt4CkbDP0PhaVwbPw=_Mc0~uadw<5fnnEfmB2Ck@w z^)Xxt?{x5)cS_~fhCu+>*1Sj%*PsaE}{2c(fLNVJjc<3 z+-Tq-F$|-@2gaz?$U&W18B>O2O!B9e$-b8L@xOwIf(e!__&N9(vv>NXLCB{Lli)_g zhYbckd~OQXw7@XO#Abe;`N8rIkMUvhn@3{dalw3kBz!@rRx=A=M;Y5KXHnU~ zsmOp}9uAHT^8X8^D1=c5Rcywx9@M(%ZALP*}`8BZ7Cep~z)Wj6H_(8UDAZ~fTf$}us zW_2oCfC*UhuTgy}APzACuD*s(!2b@tFcTXE> zq7G+NFq}c}KrI%C&?_e~>Gnl)rhQwgecK;wO73|!z2mu5`*SJx ziG=Mgm?!QJ?DgPyT!jedf@@(KtdNXzTgtgDFTN7A)N6VKeKthqWiH@2{Kv1f56UAZf905yS(!0_ea z*$e5ChJ=Z!`nRX;?TaVh4crLaI+tF*_d}R=ar&1zuK0ivZpQ)RJ?2MXw%T_c7F`z3 zrKvX!31 zfje&R{MPHcuI>8%p2cTVHJv|qcg-5^cxvVkEc9GEeErz9W9i!UX;0^@Y07wZraU{d zka{>gclf(U7oWZr`|$l?b=RPR)L4`%ydW!ybck+pYx6GMs2UFV)!kgT5 zFx7P^U4MAa{vi}yMy`!47A`$|yMF7e{jPV-_0DUZi&gK|-Ka}@w|?`;-Qw~$1D6By z=hDSKv5%6DndG{eG~7q%nhH0Q^4vIo4h{Njx}q)dG)7`o?cZgGRaMWQydJm~NL6*r znlU!!HH6twr{8fdzIe0tmLcVT^1bGtuKmf{+b3VjoD8H+25#5AHfy`n*!p(ETMbK9?>X5H z;)zq46R)ODyn4GakZBxDH4Y}vjwKrh(~aY^w)yV)jk6^m*0*Qs52fl4WnERI`2F3( zSy#O@5VQ4-FyHX@+PBu;w-+>2{FQ@qdTEH&FPJmF4JqFSaflUj<+aN$u3_!<$hFAg zi|@X2%OE*qZLdQTmYDw`vzd?~dLWy;Xi={Jq|v9{I_U+l|K)N9SEBM}4+sL#AbWs%87F zjp>%%nU;O2mVMc>irFI>*P4`T&BB?K%a7`mSvHw})yWVlFW8o`HJ;|+5WPb)F zS9-6G%#AGA(@YH|_lZnV}LV+;TXNu)e9zMjbUtm1yA%h;gB*6)O#DU z-kPkpKI`wy*3@R})_m-2v-B948Vq?btz1>hh0JT^T-D1(%xmMIHEd^I^tETXn0ZUM z=Cf^&LP}l}p}qxVIfaS{Y|hU6LjZ0Qm|?R6^2vpv0=Tfo0iBv6>=Iig zotmeQ?-S*dOCl54vyYsIR6t+=aFz)C90Olg1Q30D?>hjHd0I7~pC6=ElP6mIg_V7y z%^IVN*bT_P^myS+P#89)-!aMjzewHVtJXdwWmSHSXd6k@#d%0-V(;Z$>c-NLvL^uX zU}c|yi3h_vwkqhgQVK~cfcb@tHs+2LJ#p92NHj=Noiv~%!-XJdd>Cx1qzs{Fz?pMR zmVQY#5F62aB7H>s&_0;`X`_&KatK4`fdh#)3stF6!6q8}Aq&IIG8^BCNP_hc)Xice zu}jS82>&dFnJ_`H3s*v!HA`pTKbL%|Kj~_}?K=ITXm~JFy*5?7HeKEE zVVV1?bOHslT>e>M%Z)(+u~+DCL7KB3|+<(Yi8y4BnSqv zVO~a@&$T_NYtSyb4onSsu}9YO1#Jh|x`iw$EycKIOt2iFtCtb7&^m&pSFwyRV~)V| z9Bpvw8F`wqteO*J-)L2%SB+V~%2bq(u`O!Q-3(P@Xf3nQgvz^e7%!q_mcl;%6T}i( zAB+h?lbj(G%#;Q6kPc8lIXP^iQ+M(N_L&5phAx(@1@+LY!{iWi7}_ zx`i+{&Al5h9|87J;{yifY0h~3DUUx<3=#2p^L6Vr>q7Z8`@%D6&&E~4D!rM?)>LKd z0*J*e3HygRM$t|uOMOYJPsGfO`^nl;CF-kMMq`{1{=ZNd4K;D?WDU7Gip9Exoc~DQ zPpd6c;&NMTq{M~WEf>2b`(OQ(9zW@17R<^mjUu)~NeA##E*XyA3n560?a10(@;y|N zr_}|kjL_yPfM!=AG`p<$72(+i>t*(E;b0}x@NzKgD5TG&T1P1rn7RGHQ>4!#F7CgI zvPHI906ekbeEk46!Wi@En}{eX7BJ*Vi|Ra_nZSF}tX^5n=zo>Wf&kL~0TNJn5luAJCB00_%ER|Y; zUC3W+MMXh)PzOay9x)X31|kRs5)t6bVqstuGOAU4@gchYbIN*ZrcQ^|yuT6H6giLyQ3-2JHQK$4q*PSi6M` z?sSWrHfGvT5m?AZQMu~^BI8hxu}PlRM*cH${$Du$Vhx^8pn!!wDi9E@1kgH0jUZoI zy1>b>Oa^Wg%D`m=`1~y-BFbQ!hBEljSu=lRu|E0a;k5Gzl(ej1zFs{wcPisRBrU&L~$~vmD4mbO+%sO0IM+*z7x>sK025Nt$=)Ma~37xdE+SeUcRCU@(wtlSH z4Nu;B`nOAe;92m4kRS`-fB5`QPyOUnX5R~`eJ`ZeZ>84z@G+>}saO7zexM$r7<@CijxoJ<5m zy4WD2S;dwi4f!Gxy5bTf5in2F3Pk4OR076v+qwp9QbwU{UUXyU=5lGmoET9}NV?yy zd8=me(z{SHNHy+Gm+eVdXb0xV+>u0)YNFZow)ZXX;=Xr}+&GeM+y*8DtWF|Z|26+Y z@8X)YcY~Cz%vZ*#MsN0A?wfy-HWSj0b>LKE1uMedSGilV`dq#E676@T8h574c72{g ziQ3Bfmd7tNm%P&2oVIHD-Xi0h|9{X8eJgEVhN<7aP7WiyuwSR8zE&r^XwdNFIH}(j z;33{NOf<>7?S>YGy-il8tJhmz5;@%DkvQDUD{{D**G9~4JM%h-*hH@Umf1Y~T{=piu-@ z3B7O8ixo5;Em8n;+&!9;c{E;eK8?p%;^-{#dWb1`H(9&crCCQs<0U+Reyw#v#YhQiO=9 zIce#VfeuonxEEkKH+T-sVUv-bzeR>L>$8=#6SHg(CB`#peTKzu&SgN8o}*~UVg*P1 z@hGn`aE~S9R4U;WWzs+YQ0-*=J)ycpn;Kt|ISiTw8F-PxIge2u9O@S{a@j&Q$tSm3~+-^)}2;{h;<1bv0yN zb@-nT&K+gGdXgmpLFDSly4*i7%^yia_z&IDW`j;@+zW^?8_}& zZ5!FT@ci{B&3M7^rhmPzXQwsCmYTx!2XBgYL%31g^DpRepgkC)o{lNX$L)<*%E-)v zcs2<;oDtG;c66I;a;^s%GGtD$h&^RS0JW+Hg%hoS%WWzlao?pTU5m)j;=(r3>0dvc z*!O4Nh6VFNIPLAYd?e93w+4s!R5pSyopVY?VJ^nPkQF-Jbftw!D+QAC8kO~HD2ot} zl6=IYZVkgaHdxx&wmR;>Im(>Nh2#UuQMFt|J_lhO#pEj?grk&vWrS~(ldpo%4JY|r zgl)LVCt?`n6A=vfJ_{L4vWYONSq^?d6N9dM|0kT)@e7%)ti6Y-$$U39*BQP6*CHxn z5R+N+RG9N*&3P)yd9vj^*^!3>Z0chCO7JTMtGX;)h=Y6!LKXPoNZxQE4xePoA0Zb) z+#$nYE5mLof!)|bRlsiSArD;jC`zwjCjLSnjHCk6u>;Sb@L$ACEs{wi*dK@t1Q`5` zb_x!97?2M36N>3c0g$jzAC_YZ4)UuJ=}CzwVoZ!UVF*+LEiIIXBeWqb9%vO{Mv?0f zE^g0UCg1N+v=T`gR1`^sOrvN3WI#**5sQ|gS&70GXcpwgA1H@F$O@%pSKN*#noHsn z*zl?$GH5wzj`)Bsl!j#3!rY698Ony&O-~EW~?xM~K$iPC15p7j;IUY1sIX zFYZ>;$iADB2c;+*5-~;7adfjBPrXA<-4@#_KdbeWTMY=gQ$olq00EqF=jfAiZ1pae z3q;#x=4M>bd{cBI^uJObFMupa4Ec9R%(Ly09f-eC%~|%1?vj1#J*H^Cg$h6#xJBJV zMj+sc-F18M9jh!qBiDkN_j}Y*d*m8WnXD9(bhk)%m$pSiMRB)R;m+m15p^2X_g)Tj zx(Hg5&{0H7c;vFwJ3@6v;7z+2L;a#F{{s3%>IhUcFEBP5<=l|hQ?g&ZDfegqniptO9wR(tr_na!iC97WYDPOW*0|ALxt-Lz?wmOw zzsH>MP(IRp(eKj&(w~jH$$h|la>%Qn{Pk=;J!*~A`6Sn-K&q8hx>tQ^bK+0T+@+#R zMG}ecyZ}O<5&Q7@i-X?J4Y(!s(?8HgHzok?s3hKL^#9l^2iBZDJ&BFzR= zBCik?E7L2ZPuU(uJmOu6MjSP42B={SX{=IFy|uXAgEN&8JC$p(N_Gm)_u%~UIVm7d zM9XA4*{Vq9gY!hZNBwXuuKM6iAL+w$klhF8_jx{Cqi!X!qKH6Ld;h;2d#fu* zh}AaRMf-Y0WCWFizG=)cE+9@R50PoW4j6vD04+E1RJ;ym)^7^J65?|%mryzBTRIC| zvhPay(Ye@cp5$cycqfAY{a}>&+MkbLK|2j8+72I++oxsmB-)BNXzfuy*^r%_z?vzV zlSE&1`koF($R2DED`R8`g2SR=Ss0XsawRhZ#`eTq3_rj3c!8nvS3)HiTxV2n&<4)YUJL&I!1G1WQtVVWb@snlFTR&S(A zF*Moa*w%%i$ryWgCOk9}VP+!np7D?+_73>VbOt=kQ3eNLno@kXV*C#Mw;LaS@XVRx zdxpN^`mFtQV7jFd{j0=+*Z3HT_C+I!fP$N!*h9&weJreoCTj2=yr)b&j zBT6OmXVT9NAZ6)aGNnB_cPdlAAyvO2(fjRVAJ(t=VfgLgw}vyV+f%LE-!s1- zyIp@W(R<|>*pW@Je|+0o|6z3-lb`X-mtQ`X#leWMD4DJC!8Z!b-HHH zeP;^}QyxfBbxM?}_xzlS$9>Sx+Okxrv7PurWAR6uj8ta%~!_UmuGeycpyw*7;$?aL-(@fpM2 z(%?M{d9r2OKm4LCnXUETXjyz}u_aa8m3ZbuYS&Cnr>w4;ZR~s}y0rb)nOo zZ@c;bc%rIrk9hz~fpRIE8}A^9c;%OMw>S8mF-&Ih(lcxp16?RyNKZ>_-X)X0@_ojj znnq#VZNoRLaYHXV!-|s1@lH^Ebn=68n{oO(3MKj+0OZ6t3>mM4w+uNU6VU@a6v)Fq z=-7`i^UJXL49$E-PLW191GHfcfS2qQ12MqSV{*nJ70J-W3Ub<(T5J=3_jbbhp5X{* zVB1HpfUP=XxU6eEJ#HW-!fzO^8es0m4N{!@7p4madBcc+8mvDUJ2$ZPoI#N&mEw~Q zClZQw4uvDo=I3{dc0@a6(+eWAy>lld-h*S&-JMFjok%Yp#qxjAu3XZy>qjpx8RlKr z>43lTpH;8>(Tji0&aC-*b*%yFodjP*!wgoKA_$SPJ_DAFx6fcN%*d9YPW%tyL;(l5 z*ThXMp(M95(@ZKduZCNwI(v}_b%EZdqYMiPbn8hi9$M-@JhA9{x8X)Zdd-vR%I#1~ zcY4xPVK`~$F0#`L9efzZPddZ{XLI)PXoyl{-3yt&3^_%adj}ai?D%PR95~%$igoq` zLr~L!li>Q%l zUovhQR7C9!98>*K_82rVMb5v@w}8J!OG)0fS{rHV4W4=aC8t7tvDd)N7#r_0aoLa5;0o6n(1fm#8f% zx8che@iT=p)|n#k%nQ{~%GyVdcdqm>i4WbxOrcAX!_}Ug_2u+iVgA=Vyk7FI+vC^i z%D-g&rtG_k-$?E#Vbpn-+>!=P(=!kLddv@IJ;>T8NuTYPtd3BJ3Jg;Y92a>I0Y-dD zVf#czUqC2j^5V)tmO#x}K(ELT3*dm-1IS5Ke%nY3@{j4Uk$g<j8K1K#>fbw{eYBVFB@D8aL@laR|= zvQq$1uyAs5OS-cAvK`{7;k!;(!U}5#3Co@G%GnqlRsQqxHW)W4Et~EB=F}bFYorgm ze#yExo%C(LUA6=Hl~-PM%sJ*m3;Py#C+jFMTfYG|@4fV|PN&MY&lb&ZPqu9b zwX=S&VYcXYng8yJFT6GKzBgTeQv6Dtuc4s4wp*Er(o9q#*i})RW6rv~h7eO6hf|!f z)~BrX8EZ?*+H%|4y1FoJ`9}p9Rp3aRq7jzs_$?p#DD)*bAH6{i&?;^OAd3W}S=_Hs zC?Sm>5eVb=(r_n9N{vGkacC2}(tDU746IeqHt_!aKwtiMiu^C(oP?-UB|Xpz%X`RS z(z~=QYD9P$+kwEJ$a`ofADfn|3-gvyrO3I4szhIahh%DOoVRA#Xlb)SO2)s}ko9lB z+uELOUz_!>Bl{IK_4E%b7ENv0`qr!{@7ul&)=56zbjoO{U~)I)!DuCO7R+nqKnFF? zN9Wt;jx86lAQx=N%s;*0oIf}>vFv6+l^o7fH~~vMr{-Q?u3|x6ZNeHBRI5!`$Aap$ z2^(0DkK}q9nYW3AeVUoKg}iH+x0S?w+L*T;BE!pm=3UEG*DkMP-VTxiTF<<@kvP1@8kIiUcz)?OKf1cGZGP;vagejgVDi%!7-+N1bbvk zfMD1!*l}vicqdynqfFTd2COIoR{bdBK{4wCLaCVFz*H#M895)jD45C1R(GMK?}d}6 z0zEJE96H+b)X{xBb{IMSIdTYciHxQp@?9Y3SIGHc;% zYNO)u{;1(FRlt4KGv~S5FxPOOqv!km#wQS}sQ@1D^M*4f6t?BfZI`zZBj;0&o`11? z%*9!qpKu04WwzY?^~2OCmH&0Qo(0+rRghAAqwlZG7EAfZ962SN!~LxtU+-OZ 0: # Perform sliding window attention per-grid using flash attention + assert flash_attn is not None, "flash_attn is required for sliding_window_attention. Install with: pip install flash-attn" num_voxels = feats_j.shape[0] H = self.num_heads D = self.head_dim @@ -405,6 +419,7 @@ def forward(self, grid, feats): elif self.patch_size > 0: # Perform attention within each patch_size window per-grid using varlen API + assert flash_attn is not None, "flash_attn is required when patch_size > 0. Install with: pip install flash-attn" num_voxels = feats_j.shape[0] H = self.num_heads D = self.head_dim diff --git a/point_transformer_v3/pointcept_minimal/.github/workflows/formatter.yml b/point_transformer_v3/pointcept_minimal/.github/workflows/formatter.yml deleted file mode 100644 index a95391b..0000000 --- a/point_transformer_v3/pointcept_minimal/.github/workflows/formatter.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: Formatter - -on: - workflow_dispatch: - push: - branches: - - main - pull_request: - types: [opened, reopened, synchronize] - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - formatter: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: psf/black@stable diff --git a/point_transformer_v3/pointcept_minimal/.gitignore b/point_transformer_v3/pointcept_minimal/.gitignore deleted file mode 100644 index 59c3884..0000000 --- a/point_transformer_v3/pointcept_minimal/.gitignore +++ /dev/null @@ -1,16 +0,0 @@ -image/ -__pycache__ -**/build/ -**/*.egg-info/ -**/dist/ -*.so -exp -weights -data -*log* -outputs/ -.vscode -.idea -*/.DS_Store -**/*.out -Dockerfile diff --git a/point_transformer_v3/pointcept_minimal/LICENSE b/point_transformer_v3/pointcept_minimal/LICENSE deleted file mode 100644 index ee1fac1..0000000 --- a/point_transformer_v3/pointcept_minimal/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2023 Pointcept - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/point_transformer_v3/pointcept_minimal/README.md b/point_transformer_v3/pointcept_minimal/README.md deleted file mode 100644 index 333dddf..0000000 --- a/point_transformer_v3/pointcept_minimal/README.md +++ /dev/null @@ -1,988 +0,0 @@ -

- - - - - - pointcept - -
- -

- -[![Formatter](https://github.com/pointcept/pointcept/actions/workflows/formatter.yml/badge.svg)](https://github.com/pointcept/pointcept/actions/workflows/formatter.yml) - -**Pointcept** is a powerful and flexible codebase for point cloud perception research. It is also an official implementation of the following paper: -- πŸš€ **Concerto: Joint 2D-3D Self-Supervised Learning Emerges Spatial Representations** -*Yujia Zhang, Xiaoyang Wu, Yixing Lao, Chengyao Wang, Zhuotao Tian, Naiyan Wang, Hengshuang Zhao* -Conference on Neural Information Processing Systems (**NeurIPS**) 2025 -[ Pretrain ] [Concerto] - [ [Project](https://pointcept.github.io/Concerto/) ] [ [Bib](https://xywu.me/research/concerto/bib.txt) ] [ [HF Demo](https://huggingface.co/spaces/Pointcept/Concerto) ] [ [Inference](https://github.com/Pointcept/Concerto) ] [ [Weight](https://huggingface.co/Pointcept/Concerto) ] → [here](#concerto) - - -- **Sonata: Self-Supervised Learning of Reliable Point Representations** -*Xiaoyang Wu, Daniel DeTone, Duncan Frost, Tianwei Shen, Chris Xie, Nan Yang, Jakob Engel, Richard Newcombe, Hengshuang Zhao, Julian Straub* -IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2025 - Highlight -[ Pretrain ] [Sonata] - [ [Project](https://xywu.me/sonata/) ] [ [arXiv](https://arxiv.org/abs/2503.16429) ] [ [Bib](https://xywu.me/research/sonata/bib.txt) ] [ [Demo](https://github.com/facebookresearch/sonata) ] [ [Weight](https://huggingface.co/facebook/sonata) ] → [here](#sonata) - - -- **Point Transformer V3: Simpler, Faster, Stronger** -*Xiaoyang Wu, Li Jiang, Peng-Shuai Wang, Zhijian Liu, Xihui Liu, Yu Qiao, Wanli Ouyang, Tong He, Hengshuang Zhao* -IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 - Oral -[ Backbone ] [PTv3] - [ [arXiv](https://arxiv.org/abs/2312.10035) ] [ [Bib](https://xywu.me/research/ptv3/bib.txt) ] [ [Project](https://github.com/Pointcept/PointTransformerV3) ] → [here](https://github.com/Pointcept/PointTransformerV3) - - -- **OA-CNNs: Omni-Adaptive Sparse CNNs for 3D Semantic Segmentation** -*Bohao Peng, Xiaoyang Wu, Li Jiang, Yukang Chen, Hengshuang Zhao, Zhuotao Tian, Jiaya Jia* -IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 -[ Backbone ] [ OA-CNNs ] - [ [arXiv](https://arxiv.org/abs/2403.14418) ] [ [Bib](https://xywu.me/research/oacnns/bib.txt) ] → [here](#oa-cnns) - - -- **Towards Large-scale 3D Representation Learning with Multi-dataset Point Prompt Training** -*Xiaoyang Wu, Zhuotao Tian, Xin Wen, Bohao Peng, Xihui Liu, Kaicheng Yu, Hengshuang Zhao* -IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 -[ Pretrain ] [PPT] - [ [arXiv](https://arxiv.org/abs/2308.09718) ] [ [Bib](https://xywu.me/research/ppt/bib.txt) ] → [here](#point-prompt-training-ppt) - - -- **Masked Scene Contrast: A Scalable Framework for Unsupervised 3D Representation Learning** -*Xiaoyang Wu, Xin Wen, Xihui Liu, Hengshuang Zhao* -IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2023 -[ Pretrain ] [ MSC ] - [ [arXiv](https://arxiv.org/abs/2303.14191) ] [ [Bib](https://xywu.me/research/msc/bib.txt) ] → [here](#masked-scene-contrast-msc) - - -- **Learning Context-aware Classifier for Semantic Segmentation** (3D Part) -*Zhuotao Tian, Jiequan Cui, Li Jiang, Xiaojuan Qi, Xin Lai, Yixin Chen, Shu Liu, Jiaya Jia* -AAAI Conference on Artificial Intelligence (**AAAI**) 2023 - Oral -[ SemSeg ] [ CAC ] - [ [arXiv](https://arxiv.org/abs/2303.11633) ] [ [Bib](https://xywu.me/research/cac/bib.txt) ] [ [2D Part](https://github.com/tianzhuotao/CAC) ] → [here](#context-aware-classifier) - - -- **Point Transformer V2: Grouped Vector Attention and Partition-based Pooling** -*Xiaoyang Wu, Yixing Lao, Li Jiang, Xihui Liu, Hengshuang Zhao* -Conference on Neural Information Processing Systems (**NeurIPS**) 2022 -[ Backbone ] [ PTv2 ] - [ [arXiv](https://arxiv.org/abs/2210.05666) ] [ [Bib](https://xywu.me/research/ptv2/bib.txt) ] → [here](#point-transformers) - - -- **Point Transformer** -*Hengshuang Zhao, Li Jiang, Jiaya Jia, Philip Torr, Vladlen Koltun* -IEEE International Conference on Computer Vision (**ICCV**) 2021 - Oral -[ Backbone ] [ PTv1 ] - [ [arXiv](https://arxiv.org/abs/2012.09164) ] [ [Bib](https://hszhao.github.io/papers/iccv21_pointtransformer_bib.txt) ] → [here](#point-transformers) - -Additionally, **Pointcept** integrates the following excellent work (contain above): -Backbone: -[MinkUNet](https://github.com/NVIDIA/MinkowskiEngine) ([here](#sparseunet)), -[SpUNet](https://github.com/traveller59/spconv) ([here](#sparseunet)), -[SPVCNN](https://github.com/mit-han-lab/spvnas) ([here](#spvcnn)), -[OACNNs](https://arxiv.org/abs/2403.14418) ([here](#oa-cnns)), -[PTv1](https://arxiv.org/abs/2012.09164) ([here](#point-transformers)), -[PTv2](https://arxiv.org/abs/2210.05666) ([here](#point-transformers)), -[PTv3](https://arxiv.org/abs/2312.10035) ([here](#point-transformers)), -[StratifiedFormer](https://github.com/dvlab-research/Stratified-Transformer) ([here](#stratified-transformer)), -[OctFormer](https://github.com/octree-nn/octformer) ([here](#octformer)), -[Swin3D](https://github.com/microsoft/Swin3D) ([here](#swin3d)); -Semantic Segmentation: -[Mix3d](https://github.com/kumuji/mix3d) ([here](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-spunet-v1m1-0-base.py#L5)), -[CAC](https://arxiv.org/abs/2303.11633) ([here](#context-aware-classifier)); -Instance Segmentation: -[PointGroup](https://github.com/dvlab-research/PointGroup) ([here](#pointgroup)); -Pre-training: -[PointContrast](https://github.com/facebookresearch/PointContrast) ([here](#pointcontrast)), -[Contrastive Scene Contexts](https://github.com/facebookresearch/ContrastiveSceneContexts) ([here](#contrastive-scene-contexts)), -[Masked Scene Contrast](https://arxiv.org/abs/2303.14191) ([here](#masked-scene-contrast-msc)), -[Point Prompt Training](https://arxiv.org/abs/2308.09718) ([here](#point-prompt-training-ppt)), -[Sonata](https://arxiv.org/abs/2503.16429) ([here](#sonata)), -[Concerto]() ([here](#concerto)); -Datasets: -[ScanNet](http://www.scan-net.org/) ([here](#scannet-v2)), -[ScanNet200](http://www.scan-net.org/) ([here](#scannet-v2)), -[ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) ([here](#scannet)), -[S3DIS](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1) ([here](#s3dis)), -[ArkitScene](https://github.com/apple/ARKitScenes) ([here](#arkitscenes)), -[HM3D](https://github.com/facebookresearch/habitat-matterport3d-dataset/) ([here](#habitat---matterport-3d-hm3d)), -[Matterport3D](https://niessner.github.io/Matterport/) ([here](#matterport3d)), -[Structured3D](https://structured3d-dataset.org/) ([here](#structured3d)), -[SemanticKITTI](http://www.semantic-kitti.org/) ([here](#semantickitti)), -[nuScenes](https://www.nuscenes.org/nuscenes) ([here](#nuscenes)), -[ModelNet40](https://modelnet.cs.princeton.edu/) ([here](#modelnet)), -[Waymo](https://waymo.com/open/) ([here](#waymo)). - - -## Highlights -- *Apr 2025* πŸš€: We now support `wandb`, check the [Quick Start](#quick-start) training section for more information. (Thanks @Streakfull for his contribution!) -- *Mar 2025* πŸš€: **Sonata** is accepted by CVPR 2025 and selected as one of the **Highlight** presentations (3.0% submissions)! We release the code with Pointcept v1.6.0. We release the pre-training **[code](#sonata)** along with Pointcept v1.6.0 and provide an easy-to-use pre-trained model for inference, tuning, and visualization in our project **[repository](https://github.com/facebookresearch/sonata)** hosted by Meta. -- *May 2024*: In v1.5.2, we redesigned the default structure for each dataset for better performance. Please **re-preprocess** datasets or **download** our preprocessed datasets from **[here](https://huggingface.co/Pointcept)**. -- *Apr 2024*: **PTv3** is selected as one of the 90 **Oral** papers (3.3% accepted papers, 0.78% submissions) by CVPR'24! -- *Mar 2024*: We release code for **OA-CNNs**, accepted by CVPR'24. Issue related to **OA-CNNs** can @Pbihao. -- *Feb 2024*: **PTv3** and **PPT** are accepted by CVPR'24, another **two** papers by our Pointcept team have also been accepted by CVPR'24 πŸŽ‰πŸŽ‰πŸŽ‰. We will make them publicly available soon! -- *Dec 2023*: **PTv3** is released on arXiv, and the code is available in Pointcept. PTv3 is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios. -- *Aug 2023*: **PPT** is released on arXiv. PPT presents a multi-dataset pre-training framework that achieves SOTA performance in both **indoor** and **outdoor** scenarios. It is compatible with various existing pre-training frameworks and backbones. A **pre-release** version of the code is accessible; for those interested, please feel free to contact me directly for access. -- *Mar 2023*: We released our codebase, **Pointcept**, a highly potent tool for point cloud representation learning and perception. We welcome new work to join the _Pointcept_ family and highly recommend reading [Quick Start](#quick-start) before starting your trail. -- *Feb 2023*: **MSC** and **CeCo** accepted by CVPR 2023. _MSC_ is a highly efficient and effective pretraining framework that facilitates cross-dataset large-scale pretraining, while _CeCo_ is a segmentation method specifically designed for long-tail datasets. Both approaches are compatible with all existing backbone models in our codebase, and we will soon make the code available for public use. -- *Jan 2023*: **CAC**, oral work of AAAI 2023, has expanded its 3D result with the incorporation of Pointcept. This addition will allow CAC to serve as a pluggable segmentor within our codebase. -- *Sep 2022*: **PTv2** accepted by NeurIPS 2022. It is a continuation of the Point Transformer. The proposed GVA theory can apply to most existing attention mechanisms, while Grid Pooling is also a practical addition to existing pooling methods. - -## Citation -If you find _Pointcept_ useful to your research, please cite our work as encouragement. (ΰ©­ΛŠκ’³β€‹Λ‹)੭✧ -``` -@misc{pointcept2023, - title={Pointcept: A Codebase for Point Cloud Perception Research}, - author={Pointcept Contributors}, - howpublished = {\url{https://github.com/Pointcept/Pointcept}}, - year={2023} -} -``` - -## Overview - -- [Installation](#installation) -- [Data Preparation](#data-preparation) -- [Quick Start](#quick-start) -- [Model Zoo](#model-zoo) -- [Acknowledgement](#acknowledgement) - -## Installation - -### Requirements -- Ubuntu: 18.04 and above. -- CUDA: 11.3 and above. -- PyTorch: 1.10.0 and above. - -### Conda Environment -- **Method 1**: Utilize conda `environment.yml` to create a new environment with one line code: - ```bash - # Create and activate conda environment named as 'pointcept-torch2.5.0-cu12.4' - # cuda: 12.4, pytorch: 2.5.0 - - # run `unset CUDA_PATH` if you have installed cuda in your local environment - conda env create -f environment.yml --verbose - conda activate pointcept-torch2.5.0-cu12.4 - ``` - -- **Method 2**: Use our pre-built Docker image and refer to the supported tags [here](https://hub.docker.com/repository/docker/pointcept/pointcept/general). Quickly verify the Docker image on your local machine with the following command: - ```bash - docker run --gpus all -it --rm pointcept/pointcept:v1.6.0-pytorch2.5.0-cuda12.4-cudnn9-devel bash - git clone https://github.com/facebookresearch/sonata - cd sonata - export PYTHONPATH=./ && python demo/0_pca.py - # Ignore the GUI error, we cannot expect a container to have its GUI, right? - ``` - -- **Method 3**: Manually create a conda environment: - ```bash - conda create -n pointcept python=3.10 -y - conda activate pointcept - - # (Optional) If no CUDA installed - conda install nvidia/label/cuda-12.4.1::cuda conda-forge::cudnn conda-forge::gcc=13.2 conda-forge::gxx=13.2 -y - - conda install ninja -y - # Choose version you want here: https://pytorch.org/get-started/previous-versions/ - conda install pytorch==2.5.0 torchvision==0.13.1 torchaudio==0.20.0 pytorch-cuda=12.4 -c pytorch -y - conda install h5py pyyaml -c anaconda -y - conda install sharedarray tensorboard tensorboardx wandb yapf addict einops scipy plyfile termcolor timm -c conda-forge -y - conda install pytorch-cluster pytorch-scatter pytorch-sparse -c pyg -y - pip install torch-geometric - - # spconv (SparseUNet) - # refer https://github.com/traveller59/spconv - pip install spconv-cu124 - - # PPT (clip) - pip install ftfy regex tqdm - pip install git+https://github.com/openai/CLIP.git - - # PTv1 & PTv2 or precise eval - cd libs/pointops - # usual - python setup.py install - # docker & multi GPU arch - TORCH_CUDA_ARCH_LIST="ARCH LIST" python setup.py install - # e.g. 7.5: RTX 3000; 8.0: a100 More available in: https://developer.nvidia.com/cuda-gpus - TORCH_CUDA_ARCH_LIST="7.5 8.0" python setup.py install - cd ../.. - - # Open3D (visualization, optional) - pip install open3d - ``` - -## Data Preparation - -### ScanNet v2 - -The preprocessing supports semantic and instance segmentation for both `ScanNet20`, `ScanNet200`, and `ScanNet Data Efficient`. -- Download the [ScanNet](http://www.scan-net.org/) v2 dataset. -- Run preprocessing code for raw ScanNet as follows: - - ```bash - # RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset. - # PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset (output dir). - python pointcept/datasets/preprocessing/scannet/preprocess_scannet.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_DIR} - ``` -- (Optional) Download ScanNet Data Efficient files: - ```bash - # download-scannet.py is the official download script - # or follow instructions here: https://kaldir.vc.in.tum.de/scannet_benchmark/data_efficient/documentation#download - python download-scannet.py --data_efficient -o ${RAW_SCANNET_DIR} - # unzip downloads - cd ${RAW_SCANNET_DIR}/tasks - unzip limited-annotation-points.zip - unzip limited-reconstruction-scenes.zip - # copy files to processed dataset folder - mkdir ${PROCESSED_SCANNET_DIR}/tasks - cp -r ${RAW_SCANNET_DIR}/tasks/points ${PROCESSED_SCANNET_DIR}/tasks - cp -r ${RAW_SCANNET_DIR}/tasks/scenes ${PROCESSED_SCANNET_DIR}/tasks - ``` -- (Alternative) Our preprocess data can be directly downloaded [[here](https://huggingface.co/datasets/Pointcept/scannet-compressed)], please agree the official license before download it. - -- Link processed dataset to codebase: - ```bash - # PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset. - mkdir data - ln -s ${PROCESSED_SCANNET_DIR} ${CODEBASE_DIR}/data/scannet - ``` - -### ScanNet++ -- Download the [ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) dataset. -- Run preprocessing code for raw ScanNet++ as follows: - ```bash - # RAW_SCANNETPP_DIR: the directory of downloaded ScanNet++ raw dataset. - # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir). - # NUM_WORKERS: the number of workers for parallel preprocessing. - python pointcept/datasets/preprocessing/scannetpp/preprocess_scannetpp.py --dataset_root ${RAW_SCANNETPP_DIR} --output_root ${PROCESSED_SCANNETPP_DIR} --num_workers ${NUM_WORKERS} - ``` -- Sampling and chunking large point cloud data in train/val split as follows (only used for training): - ```bash - # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir). - # NUM_WORKERS: the number of workers for parallel preprocessing. - python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split train --num_workers ${NUM_WORKERS} - python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split val --num_workers ${NUM_WORKERS} - ``` -- Link processed dataset to codebase: - ```bash - # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet dataset. - mkdir data - ln -s ${PROCESSED_SCANNETPP_DIR} ${CODEBASE_DIR}/data/scannetpp - ``` - -### S3DIS - -- Download S3DIS data by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1). Download the `Stanford3dDataset_v1.2.zip` file and unzip it. -- Fix error in `Area_5/office_19/Annotations/ceiling` Line 323474 (103.0οΏ½0000 => 103.000000). -- (Optional) Download Full 2D-3D S3DIS dataset (no XYZ) from [here](https://github.com/alexsax/2D-3D-Semantics) for parsing normal. -- Run preprocessing code for S3DIS as follows: - - ```bash - # S3DIS_DIR: the directory of downloaded Stanford3dDataset_v1.2 dataset. - # RAW_S3DIS_DIR: the directory of Stanford2d3dDataset_noXYZ dataset. (optional, for parsing normal) - # PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset (output dir). - - # S3DIS without aligned angle - python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} - # S3DIS with aligned angle - python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --align_angle - # S3DIS with normal vector (recommended, normal is helpful) - python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --parse_normal - python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --align_angle --parse_normal - ``` - -- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/s3dis-compressed -)] (with normal vector and aligned angle), please agree with the official license before downloading it. - -- Link processed dataset to codebase. - ```bash - # PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset. - mkdir data - ln -s ${PROCESSED_S3DIS_DIR} ${CODEBASE_DIR}/data/s3dis - ``` - - -### ArkitScenes - -- Download ArkitScenes 3DOD split with the following commands: - ```bash - # RAW_AS_DIR: the directory of downloaded Raw ArkitScenes dataset. - git clone https://github.com/apple/ARKitScenes.git - cd ARKitScenes - python download_data.py 3dod --download_dir $RAW_AS_DIR --video_id_csv threedod/3dod_train_val_splits.csv - ``` -- Run preprocessing code for ArkitScenes as follows: - ```bash - # RAW_AS_DIR: the directory of downloaded ArkitScenes dataset. - # PROCESSED_AS_DIR: the directory of processed ArkitScenes dataset (output dir). - # NUM_WORKERS: Number for workers for preprocessing, default same as cpu count (might OOM). - cd $POINTCEPT_DIR - export PYTHONPATH=./ - python pointcept/datasets/preprocessing/arkitscenes/preprocess_arkitscenes_mesh.py --dataset_root $RAW_AS_DIR --output_root $PROCESSED_AS_DIR --num_workers $NUM_WORKERS - ``` - -- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/arkitscenes-compressed -)] please read and agree the official [license](https://github.com/apple/ARKitScenes?tab=License-1-ov-file#readme) before download it. (Unzip with the following command: - `find ./ -name '*.tar.gz' | xargs -n 1 -P 8 -I {} sh -c 'tar -xzvf {}'`) - -- Link processed dataset to codebase. - ```bash - # PROCESSED_AR_DIR: the directory of processed ArkitScenes dataset (output dir). - mkdir data - ln -s ${PROCESSED_AR_DIR} ${CODEBASE_DIR}/data/arkitscenes - ``` - -### Habitat - Matterport 3D (HM3D) - -- Download HM3D `hm3d-train-glb-v0.2.tar` and `hm3d-val-glb-v0.2.tar` with instuction [here](https://github.com/facebookresearch/habitat-sim/blob/main/DATASETS.md#habitat-matterport-3d-research-dataset-hm3d) and unzip them. -- Run preprocessing code for HM3D as follows: - ```bash - # RAW_HM_DIR: the directory of downloaded HM3D dataset. - # PROCESSED_HM_DIR: the directory of processed HM3D dataset (output dir). - # NUM_WORKERS: Number for workers for preprocessing, default same as cpu count (might OOM). - export PYTHONPATH=./ - python pointcept/datasets/preprocessing/hm3d/preprocess_hm3d.py --dataset_root $RAW_HM_DIR --output_root $PROCESSED_HM_DIR --density 0.02 --num_workers $NUM_WORKERS - ``` - -- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/hm3d-compressed -)] please read and agree the official [license](https://matterport.com/legal/matterport-end-user-license-agreement-academic-use-model-data) before download it. (Unzip with the following command: - `find ./ -name '*.tar.gz' | xargs -n 1 -P 4 -I {} sh -c 'tar -xzvf {}'`) - -- Link processed dataset to codebase. - ```bash - # PROCESSED_HM_DIR: the directory of processed HM3D dataset (output dir). - mkdir data - ln -s ${PROCESSED_HM_DIR} ${CODEBASE_DIR}/data/hm3d - - -### Matterport3D -- Follow [this page](https://niessner.github.io/Matterport/#download) to request access to the dataset. -- Download the "region_segmentation" type, which represents the division of a scene into individual rooms. - ```bash - # download-mp.py is the official download script - # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset. - python download-mp.py -o {MATTERPORT3D_DIR} --type region_segmentations - ``` -- Unzip the region_segmentations data - ```bash - # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset. - python pointcept/datasets/preprocessing/matterport3d/unzip_matterport3d_region_segmentation.py --dataset_root {MATTERPORT3D_DIR} - ``` -- Run preprocessing code for Matterport3D as follows: - ```bash - # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset. - # PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir). - # NUM_WORKERS: the number of workers for this preprocessing. - python pointcept/datasets/preprocessing/matterport3d/preprocess_matterport3d_mesh.py --dataset_root ${MATTERPORT3D_DIR} --output_root ${PROCESSED_MATTERPORT3D_DIR} --num_workers ${NUM_WORKERS} - ``` -- Link processed dataset to codebase. - ```bash - # PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir). - mkdir data - ln -s ${PROCESSED_MATTERPORT3D_DIR} ${CODEBASE_DIR}/data/matterport3d - ``` - -Following the instruction of [OpenRooms](https://github.com/ViLab-UCSD/OpenRooms), we remapped Matterport3D's categories to ScanNet 20 semantic categories with the addition of a ceiling category. -* (Alternative) Our preprocess data can also be downloaded [here](https://huggingface.co/datasets/Pointcept/matterport3d-compressed), please agree the official license before download it. - - -### Structured3D - -- Download Structured3D panorama related and perspective (full) related zip files by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLSc0qtvh4vHSoZaW6UvlXYy79MbcGdZfICjh4_t4bYofQIVIdw/viewform?pli=1) (no need to unzip them). -- Organize all downloaded zip file in one folder (`${STRUCT3D_DIR}`). -- Run preprocessing code for Structured3D as follows: - ```bash - # STRUCT3D_DIR: the directory of downloaded Structured3D dataset. - # PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir). - # NUM_WORKERS: Number for workers for preprocessing, default same as cpu count (might OOM). - export PYTHONPATH=./ - python pointcept/datasets/preprocessing/structured3d/preprocess_structured3d.py --dataset_root ${STRUCT3D_DIR} --output_root ${PROCESSED_STRUCT3D_DIR} --num_workers ${NUM_WORKERS} --grid_size 0.01 --fuse_prsp --fuse_pano - ``` -Following the instruction of [Swin3D](https://arxiv.org/abs/2304.06906), we keep 25 categories with frequencies of more than 0.001, out of the original 40 categories. - -[//]: # (- (Alternative) Our preprocess data can also be downloaded [[here]()], please agree the official license before download it.) - -- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/structured3d-compressed -)] (with perspective views and panorama view, 471.7G after unzipping), please agree the official license before download it. (Unzip with the following command: - `find ./ -name '*.tar.gz' | xargs -n 1 -P 15 -I {} sh -c 'tar -xzvf {}'`) - -- Link processed dataset to codebase. - ```bash - # PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir). - mkdir data - ln -s ${PROCESSED_STRUCT3D_DIR} ${CODEBASE_DIR}/data/structured3d - ``` - -### SemanticKITTI -- Download [SemanticKITTI](http://www.semantic-kitti.org/dataset.html#download) dataset. -- Link dataset to codebase. - ```bash - # SEMANTIC_KITTI_DIR: the directory of SemanticKITTI dataset. - # |- SEMANTIC_KITTI_DIR - # |- dataset - # |- sequences - # |- 00 - # |- 01 - # |- ... - - mkdir -p data - ln -s ${SEMANTIC_KITTI_DIR} ${CODEBASE_DIR}/data/semantic_kitti - ``` - -### nuScenes -- Download the official [NuScene](https://www.nuscenes.org/nuscenes#download) dataset (with Lidar Segmentation) and organize the downloaded files as follows: - ```bash - NUSCENES_DIR - │── samples - │── sweeps - │── lidarseg - ... - │── v1.0-trainval - │── v1.0-test - ``` -- Run information preprocessing code (modified from OpenPCDet) for nuScenes as follows: - ```bash - # NUSCENES_DIR: the directory of downloaded nuScenes dataset. - # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir). - # MAX_SWEEPS: Max number of sweeps. Default: 10. - pip install nuscenes-devkit pyquaternion - python pointcept/datasets/preprocessing/nuscenes/preprocess_nuscenes_info.py --dataset_root ${NUSCENES_DIR} --output_root ${PROCESSED_NUSCENES_DIR} --max_sweeps ${MAX_SWEEPS} --with_camera - ``` -- (Alternative) Our preprocess nuScenes information data can also be downloaded [[here]( -https://huggingface.co/datasets/Pointcept/nuscenes-compressed)] (only processed information, still need to download raw dataset and link to the folder), please agree the official license before download it. - -- Link raw dataset to processed NuScene dataset folder: - ```bash - # NUSCENES_DIR: the directory of downloaded nuScenes dataset. - # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir). - ln -s ${NUSCENES_DIR} {PROCESSED_NUSCENES_DIR}/raw - ``` - then the processed nuscenes folder is organized as follows: - ```bash - nuscene - |── raw - │── samples - │── sweeps - │── lidarseg - ... - │── v1.0-trainval - │── v1.0-test - |── info - ``` - -- Link processed dataset to codebase. - ```bash - # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir). - mkdir data - ln -s ${PROCESSED_NUSCENES_DIR} ${CODEBASE_DIR}/data/nuscenes - ``` - -### Waymo -- Download the official [Waymo](https://waymo.com/open/download/) dataset (v1.4.3) and organize the downloaded files as follows: - ```bash - WAYMO_RAW_DIR - │── training - │── validation - │── testing - ``` -- Install the following dependence: - ```bash - # If shows "No matching distribution found", download whl directly from Pypi and install the package. - conda create -n waymo python=3.10 -y - conda activate waymo - pip install waymo-open-dataset-tf-2-12-0 - ``` -- Run the preprocessing code as follows: - ```bash - # WAYMO_DIR: the directory of the downloaded Waymo dataset. - # PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir). - # NUM_WORKERS: num workers for preprocessing - python pointcept/datasets/preprocessing/waymo/preprocess_waymo.py --dataset_root ${WAYMO_DIR} --output_root ${PROCESSED_WAYMO_DIR} --splits training validation --num_workers ${NUM_WORKERS} - ``` - -- Link processed dataset to the codebase. - ```bash - # PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir). - mkdir data - ln -s ${PROCESSED_WAYMO_DIR} ${CODEBASE_DIR}/data/waymo - ``` - -### ModelNet -- Download [modelnet40_normal_resampled.zip](https://huggingface.co/datasets/Pointcept/modelnet40_normal_resampled-compressed) and unzip -- Link dataset to the codebase. - ```bash - mkdir -p data - ln -s ${MODELNET_DIR} ${CODEBASE_DIR}/data/modelnet40_normal_resampled - ``` - -## Quick Start - -### Training -**Train from scratch.** The training processing is based on configs in `configs` folder. -The training script will generate an experiment folder in `exp` folder and backup essential code in the experiment folder. -Training config, log, tensorboard, and checkpoints will also be saved into the experiment folder during the training process. -```bash -export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} -# Script (Recommended) -sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME} -# Direct -export PYTHONPATH=./ -python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} -``` - -For example: -```bash -# By script (Recommended) -# -p is default set as python and can be ignored -sh scripts/train.sh -p python -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base -# Direct -export PYTHONPATH=./ -python tools/train.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base -``` -**Resume training from checkpoint.** If the training process is interrupted by accident, the following script can resume training from a given checkpoint. -```bash -export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} -# Script (Recommended) -# simply add "-r true" -sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME} -r true -# Direct -export PYTHONPATH=./ -python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} resume=True weight=${CHECKPOINT_PATH} -``` -**Weights and Biases.** -Pointcept by default enables both `tensorboard` and `wandb`. There are some usage notes related to `wandb`: -1. Disable by set `enable_wandb=False`; -2. Sync with `wandb` remote server by `wandb login` in the terminal or set `wandb_key=YOUR_WANDB_KEY` in config. -3. The project name is "Pointcept" by default, custom it to your research project name by setting `wandb_project=YOUR_PROJECT_NAME` (e.g. Sonata-Dev, PointTransformerV3-Dev) - -### Testing -During training, model evaluation is performed on point clouds after grid sampling (voxelization), providing an initial assessment of model performance. ~~However, to obtain precise evaluation results, testing is **essential**~~ *(now we automatically run the testing process after training with the `PreciseEvaluation` hook)*. The testing process involves subsampling a dense point cloud into a sequence of voxelized point clouds, ensuring comprehensive coverage of all points. These sub-results are then predicted and collected to form a complete prediction of the entire point cloud. This approach yields higher evaluation results compared to simply mapping/interpolating the prediction. In addition, our testing code supports TTA (test time augmentation) testing, which further enhances the stability of evaluation performance. - -```bash -# By script (Based on experiment folder created by training script) -sh scripts/test.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -n ${EXP_NAME} -w ${CHECKPOINT_NAME} -# Direct -export PYTHONPATH=./ -python tools/test.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} weight=${CHECKPOINT_PATH} -``` -For example: -```bash -# By script (Based on experiment folder created by training script) -# -p is default set as python and can be ignored -# -w is default set as model_best and can be ignored -sh scripts/test.sh -p python -d scannet -n semseg-pt-v2m2-0-base -w model_best -# Direct -export PYTHONPATH=./ -python tools/test.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base weight=exp/scannet/semseg-pt-v2m2-0-base/model/model_best.pth -``` - -The TTA can be disabled by replace `data.test.test_cfg.aug_transform = [...]` with: - -```python -data = dict( - train = dict(...), - val = dict(...), - test = dict( - ..., - test_cfg = dict( - ..., - aug_transform = [ - [dict(type="RandomRotateTargetAngle", angle=[0], axis="z", center=[0, 0, 0], p=1)] - ] - ) - ) -) -``` - -### Offset -`Offset` is the separator of point clouds in batch data, and it is similar to the concept of `Batch` in PyG. -A visual illustration of batch and offset is as follows: -

- - - - - - pointcept - -
- -

- -## Model Zoo -### 1. Backbones and Semantic Segmentation -#### SparseUNet - -_Pointcept_ provides `SparseUNet` implemented by `SpConv` and `MinkowskiEngine`. The SpConv version is recommended since SpConv is easy to install and faster than MinkowskiEngine. Meanwhile, SpConv is also widely applied in outdoor perception. - -- **SpConv (recommend)** - -The SpConv version `SparseUNet` in the codebase was fully rewrite from `MinkowskiEngine` version, example running script is as follows: - -```bash -# ScanNet val -sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base -# ScanNet200 -sh scripts/train.sh -g 4 -d scannet200 -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base -# S3DIS -sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base -# S3DIS (with normal) -sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-cn-base -n semseg-spunet-v1m1-0-cn-base -# SemanticKITTI -sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base -# nuScenes -sh scripts/train.sh -g 4 -d nuscenes -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base -# ModelNet40 -sh scripts/train.sh -g 2 -d modelnet40 -c cls-spunet-v1m1-0-base -n cls-spunet-v1m1-0-base - -# ScanNet Data Efficient -sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la20 -n semseg-spunet-v1m1-2-efficient-la20 -sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la50 -n semseg-spunet-v1m1-2-efficient-la50 -sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la100 -n semseg-spunet-v1m1-2-efficient-la100 -sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la200 -n semseg-spunet-v1m1-2-efficient-la200 -sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr1 -n semseg-spunet-v1m1-2-efficient-lr1 -sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr5 -n semseg-spunet-v1m1-2-efficient-lr5 -sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr10 -n semseg-spunet-v1m1-2-efficient-lr10 -sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr20 -n semseg-spunet-v1m1-2-efficient-lr20 - -# Profile model run time -sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-enable-profiler -n semseg-spunet-v1m1-0-enable-profiler -``` - -- **MinkowskiEngine** - -The MinkowskiEngine version `SparseUNet` in the codebase was modified from the original MinkowskiEngine repo, and example running scripts are as follows: -1. Install MinkowskiEngine, refer https://github.com/NVIDIA/MinkowskiEngine -2. Training with the following example scripts: -```bash -# Uncomment "# from .sparse_unet import *" in "pointcept/models/__init__.py" -# Uncomment "# from .mink_unet import *" in "pointcept/models/sparse_unet/__init__.py" -# ScanNet -sh scripts/train.sh -g 4 -d scannet -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base -# ScanNet200 -sh scripts/train.sh -g 4 -d scannet200 -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base -# S3DIS -sh scripts/train.sh -g 4 -d s3dis -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base -# SemanticKITTI -sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base -``` - -#### OA-CNNs -Introducing Omni-Adaptive 3D CNNs (**OA-CNNs**), a family of networks that integrates a lightweight module to greatly enhance the adaptivity of sparse CNNs at minimal computational cost. Without any self-attention modules, **OA-CNNs** favorably surpass point transformers in terms of accuracy in both indoor and outdoor scenes, with much less latency and memory cost. Issue related to **OA-CNNs** can @Pbihao. -```bash -# ScanNet -sh scripts/train.sh -g 4 -d scannet -c semseg-oacnns-v1m1-0-base -n semseg-oacnns-v1m1-0-base -``` - -#### Point Transformers -- **PTv3** - -[PTv3](https://arxiv.org/abs/2312.10035) is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios. The full PTv3 relies on FlashAttention, while FlashAttention relies on CUDA 11.6 and above, make sure your local Pointcept environment satisfies the requirements. - -If you can not upgrade your local environment to satisfy the requirements (CUDA >= 11.6), then you can disable FlashAttention by setting the model parameter `enable_flash` to `false` and reducing the `enc_patch_size` and `dec_patch_size` to a level (e.g. 128). - -FlashAttention force disables RPE and forces the accuracy reduced to fp16. If you require these features, please disable `enable_flash` and adjust `enable_rpe`, `upcast_attention` and`upcast_softmax`. - -Detailed instructions and experiment records (containing weights) are available on the [project repository](https://github.com/Pointcept/PointTransformerV3). Example running scripts are as follows: -```bash -# Scratched ScanNet -sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base -# PPT joint training (ScanNet + Structured3D) and evaluate in ScanNet -sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme - -# Scratched ScanNet200 -sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base -# Fine-tuning from PPT joint training (ScanNet + Structured3D) with ScanNet200 -# PTV3_PPT_WEIGHT_PATH: Path to model weight trained by PPT multi-dataset joint training -# e.g. exp/scannet/semseg-pt-v3m1-1-ppt-extreme/model/model_best.pth -sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-1-ppt-ft -n semseg-pt-v3m1-1-ppt-ft -w ${PTV3_PPT_WEIGHT_PATH} - -# Scratched ScanNet++ -sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base -# Scratched ScanNet++ test -sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-1-submit -n semseg-pt-v3m1-1-submit - - -# Scratched S3DIS -sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base -# an example for disbale flash_attention and enable rpe. -sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-1-rpe -n semseg-pt-v3m1-0-rpe -# PPT joint training (ScanNet + S3DIS + Structured3D) and evaluate in ScanNet -sh scripts/train.sh -g 8 -d s3dis -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme -# S3DIS 6-fold cross validation -# 1. The default configs are evaluated on Area_5, modify the "data.train.split", "data.val.split", and "data.test.split" to make the config evaluated on Area_1 ~ Area_6 respectively. -# 2. Train and evaluate the model on each split of areas and gather result files located in "exp/s3dis/EXP_NAME/result/Area_x.pth" in one single folder, noted as RECORD_FOLDER. -# 3. Run the following script to get S3DIS 6-fold cross validation performance: -export PYTHONPATH=./ -python tools/test_s3dis_6fold.py --record_root ${RECORD_FOLDER} - -# Scratched nuScenes -sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base -# Scratched Waymo -sh scripts/train.sh -g 4 -d waymo -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base - -# More configs and exp records for PTv3 will be available soon. -``` - -Indoor semantic segmentation -| Model | Benchmark | Additional Data | Num GPUs | Val mIoU | Config | Tensorboard | Exp Record | -| :---: | :---: |:---------------:| :---: | :---: | :---: | :---: | :---: | -| PTv3 | ScanNet | ✗ | 4 | 77.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-0-base) | -| PTv3 + PPT | ScanNet | ✓ | 8 | 78.5% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-1-ppt-extreme) | -| PTv3 | ScanNet200 | ✗ | 4 | 35.3% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet200/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) |[link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet200-semseg-pt-v3m1-0-base)| -| PTv3 | S3DIS (Area5) | ✗ | 4 | 73.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-0-rpe.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-0-rpe) | -| PTv3 + PPT | S3DIS (Area5) | ✓ | 8 | 75.4% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-1-ppt-extreme) | -_**\*Released model weights are trained for v1.5.1, weights for v1.5.2 and later is still ongoing.**_ - -- **PTv2 mode2** - -The original PTv2 was trained on 4 * RTX a6000 (48G memory). Even enabling AMP, the memory cost of the original PTv2 is slightly larger than 24G. Considering GPUs with 24G memory are much more accessible, I tuned the PTv2 on the latest Pointcept and made it runnable on 4 * RTX 3090 machines. - -`PTv2 Mode2` enables AMP and disables _Position Encoding Multiplier_ & _Grouped Linear_. During our further research, we found that precise coordinates are not necessary for point cloud understanding (Replacing precise coordinates with grid coordinates doesn't influence the performance. Also, SparseUNet is an example). As for Grouped Linear, my implementation of Grouped Linear seems to cost more memory than the Linear layer provided by PyTorch. Benefiting from the codebase and better parameter tuning, we also relieve the overfitting problem. The reproducing performance is even better than the results reported in our paper. - -Example running scripts are as follows: - -```bash -# ptv2m2: PTv2 mode2, disable PEM & Grouped Linear, GPU memory cost < 24G (recommend) -# ScanNet -sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base -sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-3-lovasz -n semseg-pt-v2m2-3-lovasz - -# ScanNet test -sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit -# ScanNet200 -sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base -# ScanNet++ -sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base -# ScanNet++ test -sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit -# S3DIS -sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base -# SemanticKITTI -sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base -# nuScenes -sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base -``` - -- **PTv2 mode1** - -`PTv2 mode1` is the original PTv2 we reported in our paper, example running scripts are as follows: - -```bash -# ptv2m1: PTv2 mode1, Original PTv2, GPU memory cost > 24G -# ScanNet -sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base -# ScanNet200 -sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base -# S3DIS -sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base -``` - -- **PTv1** - -The original PTv1 is also available in our Pointcept codebase. I haven't run PTv1 for a long time, but I have ensured that the example running script works well. - -```bash -# ScanNet -sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base -# ScanNet200 -sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base -# S3DIS -sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base -``` - - -#### Stratified Transformer -1. Additional requirements: -```bash -pip install torch-points3d -# Fix dependence, caused by installing torch-points3d -pip uninstall SharedArray -pip install SharedArray==3.2.1 - -cd libs/pointops2 -python setup.py install -cd ../.. -``` -2. Uncomment `# from .stratified_transformer import *` in `pointcept/models/__init__.py`. -3. Refer [Optional Installation](installation) to install dependence. -4. Training with the following example scripts: -```bash -# stv1m1: Stratified Transformer mode1, Modified from the original Stratified Transformer code. -# PTv2m2: Stratified Transformer mode2, My rewrite version (recommend). - -# ScanNet -sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined -sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m1-0-origin -n semseg-st-v1m1-0-origin -# ScanNet200 -sh scripts/train.sh -g 4 -d scannet200 -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined -# S3DIS -sh scripts/train.sh -g 4 -d s3dis -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined -``` - -#### SPVCNN -`SPVCNN` is a baseline model of [SPVNAS](https://github.com/mit-han-lab/spvnas), it is also a practical baseline for outdoor datasets. -1. Install torchsparse: -```bash -# refer https://github.com/mit-han-lab/torchsparse -# install method without sudo apt install -conda install google-sparsehash -c bioconda -export C_INCLUDE_PATH=${CONDA_PREFIX}/include:$C_INCLUDE_PATH -export CPLUS_INCLUDE_PATH=${CONDA_PREFIX}/include:CPLUS_INCLUDE_PATH -pip install --upgrade git+https://github.com/mit-han-lab/torchsparse.git -``` -2. Training with the following example scripts: -```bash -# SemanticKITTI -sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-spvcnn-v1m1-0-base -n semseg-spvcnn-v1m1-0-base -``` - -#### OctFormer -OctFormer from _OctFormer: Octree-based Transformers for 3D Point Clouds_. -1. Additional requirements: -```bash -cd libs -git clone https://github.com/octree-nn/dwconv.git -pip install ./dwconv -pip install ocnn -``` -2. Uncomment `# from .octformer import *` in `pointcept/models/__init__.py`. -2. Training with the following example scripts: -```bash -# ScanNet -sh scripts/train.sh -g 4 -d scannet -c semseg-octformer-v1m1-0-base -n semseg-octformer-v1m1-0-base -``` - -#### Swin3D -Swin3D from _Swin3D: A Pretrained Transformer Backbone for 3D Indoor Scene Understanding_. -1. Additional requirements: -```bash -# 1. Install MinkEngine v0.5.4, follow readme in https://github.com/NVIDIA/MinkowskiEngine; -# 2. Install Swin3D, mainly for cuda operation: -cd libs -git clone https://github.com/microsoft/Swin3D.git -cd Swin3D -pip install ./ -``` -2. Uncomment `# from .swin3d import *` in `pointcept/models/__init__.py`. -3. Pre-Training with the following example scripts (Structured3D preprocessing refer [here](#structured3d)): -```bash -# Structured3D + Swin-S -sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small -# Structured3D + Swin-L -sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large - -# Addition -# Structured3D + SpUNet -sh scripts/train.sh -g 4 -d structured3d -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base -# Structured3D + PTv2 -sh scripts/train.sh -g 4 -d structured3d -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base -``` -4. Fine-tuning with the following example scripts: -```bash -# ScanNet + Swin-S -sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small -# ScanNet + Swin-L -sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large - -# S3DIS + Swin-S (here we provide config support S3DIS normal vector) -sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small -# S3DIS + Swin-L (here we provide config support S3DIS normal vector) -sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large -``` - -#### Context-Aware Classifier -`Context-Aware Classifier` is a segmentor that can further boost the performance of each backbone, as a replacement for `Default Segmentor`. Training with the following example scripts: -```bash -# ScanNet -sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base -sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz -sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz - -# ScanNet200 -sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base -sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz -sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz -``` - - -### 2. Instance Segmentation -#### PointGroup -[PointGroup](https://github.com/dvlab-research/PointGroup) is a baseline framework for point cloud instance segmentation. -1. Additional requirements: -```bash -conda install -c bioconda google-sparsehash -cd libs/pointgroup_ops -python setup.py install --include_dirs=${CONDA_PREFIX}/include -cd ../.. -``` -2. Uncomment `# from .point_group import *` in `pointcept/models/__init__.py`. -3. Training with the following example scripts: -```bash -# ScanNet -sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base -# S3DIS -sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base -``` - -### 3. Pre-training -#### Concerto -Follow the instruction [here](https://github.com/Pointcept/Pointcept/tree/main/pointcept/models/concerto). - -#### Sonata -Follow the instruction [here](https://github.com/Pointcept/Pointcept/tree/main/pointcept/models/sonata). - -#### Masked Scene Contrast (MSC) -1. Pre-training with the following example scripts: -```bash -# ScanNet -sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-0-spunet-base -n pretrain-msc-v1m1-0-spunet-base -``` - -2. Fine-tuning with the following example scripts: -enable PointGroup ([here](#pointgroup)) before fine-tuning on instance segmentation task. -```bash -# ScanNet20 Semantic Segmentation -sh scripts/train.sh -g 8 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c semseg-spunet-v1m1-4-ft -n semseg-msc-v1m1-0f-spunet-base -# ScanNet20 Instance Segmentation (enable PointGroup before running the script) -sh scripts/train.sh -g 4 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-msc-v1m1-0f-pointgroup-spunet-base -``` -3. Example log and weight: [[Pretrain](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EYvNV4XUJ_5Mlk-g15RelN4BW_P8lVBfC_zhjC_BlBDARg?e=UoGFWH)] [[Semseg](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EQkDiv5xkOFKgCpGiGtAlLwBon7i8W6my3TIbGVxuiTttQ?e=tQFnbr)] - -#### Point Prompt Training (PPT) -PPT presents a multi-dataset pre-training framework, and it is compatible with various existing pre-training frameworks and backbones. -1. PPT supervised joint training with the following example scripts: -```bash -# ScanNet + Structured3d, validate on ScanNet (S3DIS might cause long data time, w/o S3DIS for a quick validation) >= 3090 * 8 -sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-0-sc-st-spunet -n semseg-ppt-v1m1-0-sc-st-spunet -sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-1-sc-st-spunet-submit -n semseg-ppt-v1m1-1-sc-st-spunet-submit -# ScanNet + S3DIS + Structured3d, validate on S3DIS (>= a100 * 8) -sh scripts/train.sh -g 8 -d s3dis -c semseg-ppt-v1m1-0-s3-sc-st-spunet -n semseg-ppt-v1m1-0-s3-sc-st-spunet -# SemanticKITTI + nuScenes + Waymo, validate on SemanticKITTI (bs12 >= 3090 * 4 >= 3090 * 8, v1m1-0 is still on tuning) -sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet -sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-0-sk-nu-wa-spunet -n semseg-ppt-v1m2-0-sk-nu-wa-spunet -sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit -n semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit -# SemanticKITTI + nuScenes + Waymo, validate on nuScenes (bs12 >= 3090 * 4; bs24 >= 3090 * 8, v1m1-0 is still on tuning)) -sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet -sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-0-nu-sk-wa-spunet -n semseg-ppt-v1m2-0-nu-sk-wa-spunet -sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit -n semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit -``` - -#### PointContrast -1. Preprocess and link ScanNet-Pair dataset (pair-wise matching with ScanNet raw RGB-D frame, ~1.5T): -```bash -# RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset. -# PROCESSED_SCANNET_PAIR_DIR: the directory of processed ScanNet pair dataset (output dir). -python pointcept/datasets/preprocessing/scannet/scannet_pair/preprocess.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_PAIR_DIR} -ln -s ${PROCESSED_SCANNET_PAIR_DIR} ${CODEBASE_DIR}/data/scannet -``` -2. Pre-training with the following example scripts: -```bash -# ScanNet -sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-1-spunet-pointcontrast -n pretrain-msc-v1m1-1-spunet-pointcontrast -``` -3. Fine-tuning refer [MSC](#masked-scene-contrast-msc). - -#### Contrastive Scene Contexts -1. Preprocess and link ScanNet-Pair dataset (refer [PointContrast](#pointcontrast)): -2. Pre-training with the following example scripts: -```bash -# ScanNet -sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m2-0-spunet-csc -n pretrain-msc-v1m2-0-spunet-csc -``` -3. Fine-tuning refer [MSC](#masked-scene-contrast-msc). - -## Acknowledgement -_Pointcept_ is designed by [Xiaoyang](https://xywu.me/), named by [Yixing](https://github.com/yxlao) and the logo is created by [Yuechen](https://julianjuaner.github.io/). It is derived from [Hengshuang](https://hszhao.github.io/)'s [Semseg](https://github.com/hszhao/semseg) and inspirited by several repos, e.g., [MinkowskiEngine](https://github.com/NVIDIA/MinkowskiEngine), [pointnet2](https://github.com/charlesq34/pointnet2), [mmcv](https://github.com/open-mmlab/mmcv/tree/master/mmcv), and [Detectron2](https://github.com/facebookresearch/detectron2). \ No newline at end of file diff --git a/point_transformer_v3/pointcept_minimal/configs/_base_/dataset/scannet.py b/point_transformer_v3/pointcept_minimal/configs/_base_/dataset/scannet.py deleted file mode 100644 index eeb9488..0000000 --- a/point_transformer_v3/pointcept_minimal/configs/_base_/dataset/scannet.py +++ /dev/null @@ -1,26 +0,0 @@ -class_names = [ - "wall", - "floor", - "cabinet", - "bed", - "chair", - "sofa", - "table", - "door", - "window", - "bookshelf", - "picture", - "counter", - "desk", - "curtain", - "refridgerator", - "shower curtain", - "toilet", - "sink", - "bathtub", - "otherfurniture", -] - -data = dict( - names=class_names, -) diff --git a/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-base.py b/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-base.py deleted file mode 100644 index 6f35823..0000000 --- a/point_transformer_v3/pointcept_minimal/configs/scannet/semseg-pt-v3m1-0-base.py +++ /dev/null @@ -1,317 +0,0 @@ -_base_ = ["../_base_/default_runtime.py"] - -# misc custom setting -batch_size = 12 # bs: total bs in all gpus -num_worker = 24 -mix_prob = 0.8 -empty_cache = False -enable_amp = True - -# model settings -model = dict( - type="DefaultSegmentorV2", - num_classes=20, - backbone_out_channels=64, - backbone=dict( - type="PT-v3m1", - in_channels=6, - order=("z", "z-trans", "hilbert", "hilbert-trans"), - stride=(2, 2, 2, 2), - enc_depths=(2, 2, 2, 6, 2), - enc_channels=(32, 64, 128, 256, 512), - enc_num_head=(2, 4, 8, 16, 32), - enc_patch_size=(1024, 1024, 1024, 1024, 1024), - dec_depths=(2, 2, 2, 2), - dec_channels=(64, 64, 128, 256), - dec_num_head=(4, 4, 8, 16), - dec_patch_size=(1024, 1024, 1024, 1024), - mlp_ratio=4, - qkv_bias=True, - qk_scale=None, - attn_drop=0.0, - proj_drop=0.0, - drop_path=0.3, - shuffle_orders=True, - pre_norm=True, - enable_rpe=False, - enable_flash=True, - upcast_attention=False, - upcast_softmax=False, - enc_mode=False, - pdnorm_bn=False, - pdnorm_ln=False, - pdnorm_decouple=True, - pdnorm_adaptive=False, - pdnorm_affine=True, - pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), - ), - criteria=[ - dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), - dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), - ], -) - -# scheduler settings -epoch = 800 -optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) -scheduler = dict( - type="OneCycleLR", - max_lr=[0.006, 0.0006], - pct_start=0.05, - anneal_strategy="cos", - div_factor=10.0, - final_div_factor=1000.0, -) -param_dicts = [dict(keyword="block", lr=0.0006)] - -# dataset settings -dataset_type = "ScanNetDataset" -data_root = "/lustre/fsw/portfolios/nvr/users/hexuz/pointcept/data/scannet" - - -data = dict( - num_classes=20, - ignore_index=-1, - names=[ - "wall", - "floor", - "cabinet", - "bed", - "chair", - "sofa", - "table", - "door", - "window", - "bookshelf", - "picture", - "counter", - "desk", - "curtain", - "refridgerator", - "shower curtain", - "toilet", - "sink", - "bathtub", - "otherfurniture", - ], - train=dict( - type=dataset_type, - split="train", - data_root=data_root, - transform=[ - dict(type="CenterShift", apply_z=True), - dict( - type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 - ), - # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), - dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), - dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), - dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), - dict(type="RandomScale", scale=[0.9, 1.1]), - # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), - dict(type="RandomFlip", p=0.5), - dict(type="RandomJitter", sigma=0.005, clip=0.02), - dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), - dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), - dict(type="ChromaticTranslation", p=0.95, ratio=0.05), - dict(type="ChromaticJitter", p=0.95, std=0.05), - # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), - # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), - dict( - type="GridSample", - grid_size=0.02, - hash_type="fnv", - mode="train", - return_grid_coord=True, - ), - dict(type="SphereCrop", point_max=102400, mode="random"), - dict(type="CenterShift", apply_z=False), - dict(type="NormalizeColor"), - # dict(type="ShufflePoint"), - dict(type="ToTensor"), - dict( - type="Collect", - keys=("coord", "grid_coord", "segment"), - feat_keys=("color", "normal"), - ), - ], - test_mode=False, - ), - val=dict( - type=dataset_type, - split="val", - data_root=data_root, - transform=[ - dict(type="CenterShift", apply_z=True), - dict(type="Copy", keys_dict={"segment": "origin_segment"}), - dict( - type="GridSample", - grid_size=0.02, - hash_type="fnv", - mode="train", - return_grid_coord=True, - return_inverse=True, - ), - dict(type="CenterShift", apply_z=False), - dict(type="NormalizeColor"), - dict(type="ToTensor"), - dict( - type="Collect", - keys=("coord", "grid_coord", "segment", "origin_segment", "inverse"), - feat_keys=("color", "normal"), - ), - ], - test_mode=False, - ), - test=dict( - type=dataset_type, - split="val", - data_root=data_root, - transform=[ - dict(type="CenterShift", apply_z=True), - dict(type="NormalizeColor"), - ], - test_mode=True, - test_cfg=dict( - voxelize=dict( - type="GridSample", - grid_size=0.02, - hash_type="fnv", - mode="test", - return_grid_coord=True, - ), - crop=None, - post_transform=[ - dict(type="CenterShift", apply_z=False), - dict(type="ToTensor"), - dict( - type="Collect", - keys=("coord", "grid_coord", "index"), - feat_keys=("color", "normal"), - ), - ], - aug_transform=[ - [ - dict( - type="RandomRotateTargetAngle", - angle=[0], - axis="z", - center=[0, 0, 0], - p=1, - ) - ], - [ - dict( - type="RandomRotateTargetAngle", - angle=[1 / 2], - axis="z", - center=[0, 0, 0], - p=1, - ) - ], - [ - dict( - type="RandomRotateTargetAngle", - angle=[1], - axis="z", - center=[0, 0, 0], - p=1, - ) - ], - [ - dict( - type="RandomRotateTargetAngle", - angle=[3 / 2], - axis="z", - center=[0, 0, 0], - p=1, - ) - ], - [ - dict( - type="RandomRotateTargetAngle", - angle=[0], - axis="z", - center=[0, 0, 0], - p=1, - ), - dict(type="RandomScale", scale=[0.95, 0.95]), - ], - [ - dict( - type="RandomRotateTargetAngle", - angle=[1 / 2], - axis="z", - center=[0, 0, 0], - p=1, - ), - dict(type="RandomScale", scale=[0.95, 0.95]), - ], - [ - dict( - type="RandomRotateTargetAngle", - angle=[1], - axis="z", - center=[0, 0, 0], - p=1, - ), - dict(type="RandomScale", scale=[0.95, 0.95]), - ], - [ - dict( - type="RandomRotateTargetAngle", - angle=[3 / 2], - axis="z", - center=[0, 0, 0], - p=1, - ), - dict(type="RandomScale", scale=[0.95, 0.95]), - ], - [ - dict( - type="RandomRotateTargetAngle", - angle=[0], - axis="z", - center=[0, 0, 0], - p=1, - ), - dict(type="RandomScale", scale=[1.05, 1.05]), - ], - [ - dict( - type="RandomRotateTargetAngle", - angle=[1 / 2], - axis="z", - center=[0, 0, 0], - p=1, - ), - dict(type="RandomScale", scale=[1.05, 1.05]), - ], - [ - dict( - type="RandomRotateTargetAngle", - angle=[1], - axis="z", - center=[0, 0, 0], - p=1, - ), - dict(type="RandomScale", scale=[1.05, 1.05]), - ], - [ - dict( - type="RandomRotateTargetAngle", - angle=[3 / 2], - axis="z", - center=[0, 0, 0], - p=1, - ), - dict(type="RandomScale", scale=[1.05, 1.05]), - ], - [dict(type="RandomFlip", p=1)], - ], - ), - ), -) - -# conda activate repro_fvdb -# sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base diff --git a/point_transformer_v3/pointcept_minimal/environment.yml b/point_transformer_v3/pointcept_minimal/environment.yml deleted file mode 100644 index cf277f2..0000000 --- a/point_transformer_v3/pointcept_minimal/environment.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: pointcept-torch2.5.0-cu12.4 -channels: - - pytorch - - nvidia/label/cuda-12.4.1 - - nvidia - - bioconda - - conda-forge - - defaults -dependencies: - - python=3.10 - - pip - - cuda - - conda-forge::cudnn - - gcc=13.2 - - gxx=13.2 - - pytorch=2.5.0 - - torchvision=0.20.0 - - torchaudio=2.5.0 - - pytorch-cuda=12.4 - - ninja - - google-sparsehash - - h5py - - pyyaml - - tensorboard - - tensorboardx - - wandb - - yapf - - addict - - einops - - scipy - - plyfile - - termcolor - - timm - - ftfy - - regex - - tqdm - - matplotlib - - black - - open3d - - pip: - - --find-links https://data.pyg.org/whl/torch-2.5.0+cu124.html - - torch-cluster - - torch-scatter - - torch-sparse - - torch-geometric - - spconv-cu124 - - peft #for lora finetune - - git+https://github.com/octree-nn/ocnn-pytorch.git - - git+https://github.com/openai/CLIP.git - - git+https://github.com/Dao-AILab/flash-attention.git - - ./libs/pointops - - ./libs/pointgroup_ops \ No newline at end of file diff --git a/point_transformer_v3/pointcept_minimal/pointcept/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/__init__.py deleted file mode 100644 index 9341b8f..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .defaults import DefaultDataset, DefaultImagePointDataset, ConcatDataset -from .builder import build_dataset -from .utils import point_collate_fn, collate_fn - -# indoor scene -from .scannet import ScanNetDataset, ScanNet200Dataset - -# dataloader -from .dataloader import MultiDatasetDataloader diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/builder.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/builder.py deleted file mode 100644 index 1fa5f0e..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/builder.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -Dataset Builder - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -from pointcept.utils.registry import Registry - -DATASETS = Registry("datasets") - - -def build_dataset(cfg): - """Build datasets.""" - return DATASETS.build(cfg) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/dataloader.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/dataloader.py deleted file mode 100644 index a3c8e1d..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/dataloader.py +++ /dev/null @@ -1,112 +0,0 @@ -from functools import partial -import weakref -import torch -import torch.utils.data - -import pointcept.utils.comm as comm -from pointcept.datasets.utils import point_collate_fn -from pointcept.datasets import ConcatDataset -from pointcept.utils.env import set_seed - - -class MultiDatasetDummySampler: - def __init__(self): - self.dataloader = None - - def set_epoch(self, epoch): - if comm.get_world_size() > 1: - for dataloader in self.dataloader.dataloaders: - dataloader.sampler.set_epoch(epoch) - return - - -class MultiDatasetDataloader: - """ - Multiple Datasets Dataloader, batch data from a same dataset and mix up ratio determined by loop of each sub dataset. - The overall length is determined by the main dataset (first) and loop of concat dataset. - """ - - def __init__( - self, - concat_dataset: ConcatDataset, - batch_size_per_gpu: int, - num_worker_per_gpu: int, - mix_prob=0, - seed=None, - ): - self.datasets = concat_dataset.datasets - self.ratios = [dataset.loop for dataset in self.datasets] - # reset data loop, original loop serve as ratios - for dataset in self.datasets: - dataset.loop = 1 - # determine union training epoch by main dataset - self.datasets[0].loop = concat_dataset.loop - # build sub-dataloaders - num_workers = num_worker_per_gpu // len(self.datasets) - self.dataloaders = [] - for dataset_id, dataset in enumerate(self.datasets): - if comm.get_world_size() > 1: - sampler = torch.utils.data.distributed.DistributedSampler(dataset) - else: - sampler = None - - init_fn = ( - partial( - self._worker_init_fn, - dataset_id=dataset_id, - num_workers=num_workers, - num_datasets=len(self.datasets), - rank=comm.get_rank(), - seed=seed, - ) - if seed is not None - else None - ) - self.dataloaders.append( - torch.utils.data.DataLoader( - dataset, - batch_size=batch_size_per_gpu, - shuffle=(sampler is None), - num_workers=num_worker_per_gpu, - sampler=sampler, - collate_fn=partial(point_collate_fn, mix_prob=mix_prob), - pin_memory=True, - worker_init_fn=init_fn, - drop_last=True, - persistent_workers=True, - ) - ) - self.sampler = MultiDatasetDummySampler() - self.sampler.dataloader = weakref.proxy(self) - - def __iter__(self): - iterator = [iter(dataloader) for dataloader in self.dataloaders] - while True: - for i in range(len(self.ratios)): - for _ in range(self.ratios[i]): - try: - batch = next(iterator[i]) - except StopIteration: - if i == 0: - return - else: - iterator[i] = iter(self.dataloaders[i]) - batch = next(iterator[i]) - yield batch - - def __len__(self): - main_data_loader_length = len(self.dataloaders[0]) - return ( - main_data_loader_length // self.ratios[0] * sum(self.ratios) - + main_data_loader_length % self.ratios[0] - ) - - @staticmethod - def _worker_init_fn(worker_id, num_workers, dataset_id, num_datasets, rank, seed): - worker_seed = ( - num_workers * num_datasets * rank - + num_workers * dataset_id - + worker_id - + seed - ) - set_seed(worker_seed) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/defaults.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/defaults.py deleted file mode 100644 index 929d135..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/defaults.py +++ /dev/null @@ -1,499 +0,0 @@ -""" -Default Datasets - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Yujia Zhang (yujia.zhang.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import os -import glob -import json - -import numpy as np -import torch -from copy import deepcopy -from torch.utils.data import Dataset -from collections.abc import Sequence -from torchvision.transforms import InterpolationMode -from PIL import Image -from torchvision.transforms import transforms as T -import torch.nn.functional as F - -from pointcept.utils.logger import get_root_logger -from pointcept.utils.cache import shared_dict - -from .builder import DATASETS, build_dataset -from .transform import Compose, TRANSFORMS - -INTERPOLATION_MODE = { - "bilinear": InterpolationMode.BILINEAR, - "bicubic": InterpolationMode.BICUBIC, - "nearest": InterpolationMode.NEAREST, -} - - -@DATASETS.register_module() -class DefaultDataset(Dataset): - VALID_ASSETS = [ - "coord", - "color", - "normal", - "strength", - "segment", - "instance", - "pose", - ] - - def __init__( - self, - split="train", - data_root="data/dataset", - transform=None, - test_mode=False, - test_cfg=None, - cache=False, - ignore_index=-1, - loop=1, - ): - super(DefaultDataset, self).__init__() - self.data_root = data_root - self.split = split - self.transform = Compose(transform) - self.cache = cache - self.ignore_index = ignore_index - self.loop = ( - loop if not test_mode else 1 - ) # force make loop = 1 while in test mode - self.test_mode = test_mode - self.test_cfg = test_cfg if test_mode else None - - if test_mode: - self.test_voxelize = TRANSFORMS.build(self.test_cfg.voxelize) - self.test_crop = ( - TRANSFORMS.build(self.test_cfg.crop) if self.test_cfg.crop else None - ) - self.post_transform = Compose(self.test_cfg.post_transform) - self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform] - - self.data_list = self.get_data_list() - logger = get_root_logger() - logger.info( - "Totally {} x {} samples in {} {} set.".format( - len(self.data_list), self.loop, os.path.basename(self.data_root), split - ) - ) - - def get_data_list(self): - if isinstance(self.split, str): - split_list = [self.split] - elif isinstance(self.split, Sequence): - split_list = self.split - else: - raise NotImplementedError - - data_list = [] - for split in split_list: - if os.path.isfile(os.path.join(self.data_root, split)): - with open(os.path.join(self.data_root, split)) as f: - data_list += [ - os.path.join(self.data_root, data) for data in json.load(f) - ] - else: - data_list += glob.glob(os.path.join(self.data_root, split, "*")) - return data_list - - def get_data(self, idx): - data_path = self.data_list[idx % len(self.data_list)] - name = self.get_data_name(idx) - split = self.get_split_name(idx) - if self.cache: - cache_name = f"pointcept-{name}" - return shared_dict(cache_name) - - data_dict = {} - assets = os.listdir(data_path) - for asset in assets: - if not asset.endswith(".npy"): - continue - if asset[:-4] not in self.VALID_ASSETS: - continue - data_dict[asset[:-4]] = np.load(os.path.join(data_path, asset)) - data_dict["name"] = name - data_dict["split"] = split - - if "coord" in data_dict.keys(): - data_dict["coord"] = data_dict["coord"].astype(np.float32) - - if "color" in data_dict.keys(): - data_dict["color"] = data_dict["color"].astype(np.float32) - - if "normal" in data_dict.keys(): - data_dict["normal"] = data_dict["normal"].astype(np.float32) - - if "segment" in data_dict.keys(): - data_dict["segment"] = data_dict["segment"].reshape([-1]).astype(np.int32) - else: - data_dict["segment"] = ( - np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 - ) - - if "instance" in data_dict.keys(): - data_dict["instance"] = data_dict["instance"].reshape([-1]).astype(np.int32) - else: - data_dict["instance"] = ( - np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 - ) - return data_dict - - def get_data_name(self, idx): - return os.path.basename(self.data_list[idx % len(self.data_list)]) - - def get_split_name(self, idx): - return os.path.basename( - os.path.dirname(self.data_list[idx % len(self.data_list)]) - ) - - def prepare_train_data(self, idx): - # load data - data_dict = self.get_data(idx) - data_dict = self.transform(data_dict) - return data_dict - - def prepare_test_data(self, idx): - # load data - data_dict = self.get_data(idx) - data_dict = self.transform(data_dict) - result_dict = dict(segment=data_dict.pop("segment"), name=data_dict.pop("name")) - if "origin_segment" in data_dict: - assert "inverse" in data_dict - result_dict["origin_segment"] = data_dict.pop("origin_segment") - result_dict["inverse"] = data_dict.pop("inverse") - - data_dict_list = [] - for aug in self.aug_transform: - data_dict_list.append(aug(deepcopy(data_dict))) - - fragment_list = [] - for data in data_dict_list: - if self.test_voxelize is not None: - data_part_list = self.test_voxelize(data) - else: - data["index"] = np.arange(data["coord"].shape[0]) - data_part_list = [data] - for data_part in data_part_list: - if self.test_crop is not None: - data_part = self.test_crop(data_part) - else: - data_part = [data_part] - fragment_list += data_part - - for i in range(len(fragment_list)): - fragment_list[i] = self.post_transform(fragment_list[i]) - result_dict["fragment_list"] = fragment_list - return result_dict - - def __getitem__(self, idx): - if self.test_mode: - return self.prepare_test_data(idx) - else: - return self.prepare_train_data(idx) - - def __len__(self): - return len(self.data_list) * self.loop - - -@DATASETS.register_module() -class DefaultImagePointDataset(Dataset): - PC_VALID_ASSETS = [ - "coord", - "color", - "normal", - ] - - def __init__( - self, - split="train", - data_root="data/dataset", - transform=None, - test_mode=False, - test_cfg=None, - cache=False, - ignore_index=-1, - loop=1, - crop_h=630, - crop_w=1120, - patch_size=14, - interpolation="bilinear", - ): - super(DefaultImagePointDataset, self).__init__() - self.data_root = data_root - self.split = split - self.transform = Compose(transform) - self.cache = cache - self.ignore_index = ignore_index - self.loop = ( - loop if not test_mode else 1 - ) # force make loop = 1 while in test mode - self.test_mode = test_mode - self.test_cfg = test_cfg if test_mode else None - - if test_mode: - self.test_voxelize = TRANSFORMS.build(self.test_cfg.voxelize) - self.test_crop = ( - TRANSFORMS.build(self.test_cfg.crop) if self.test_cfg.crop else None - ) - self.post_transform = Compose(self.test_cfg.post_transform) - self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform] - - self.data_list, self.split_list = self.get_data_list() - if isinstance(self.data_list, dict): - self.data_name = list(self.data_list.keys()) - else: - self.data_name = list([data["token"] for data in self.data_list]) - logger = get_root_logger() - logger.info( - "Totally {} x {} samples in {} {} set.".format( - len(self.data_name), self.loop, os.path.basename(self.data_root), split - ) - ) - - self.crop_h = crop_h - self.crop_w = crop_w - self.patch_size = patch_size - self.patch_h = crop_h // patch_size - self.patch_w = crop_w // patch_size - self.transform_img = T.Compose( - [ - T.Resize( - (self.patch_h * self.patch_size, self.patch_w * self.patch_size), - interpolation=INTERPOLATION_MODE[interpolation], - ), - T.ToTensor(), - ] - ) - - def resize_correspondence_info( - self, correspondence, size, size0, crop_size, _alignment - ): - h, w = size - h0, w0 = size0 - left, top, right, bottom = crop_size - crop_h = bottom - top - crop_w = right - left - mask_crop = ( - (correspondence[:, 1] >= top) - & (correspondence[:, 1] < bottom) - & (correspondence[:, 0] >= left) - & (correspondence[:, 0] < right) - ) - correspondence = correspondence[mask_crop] - correspondence[:, 1] -= top - correspondence[:, 0] -= left - correspondence[:, 1] = (correspondence[:, 1] * h / crop_h // _alignment).astype( - np.int32 - ) - correspondence[:, 0] = (correspondence[:, 0] * w / crop_w // _alignment).astype( - np.int32 - ) - correspondence = correspondence[:, [1, 0, 2]] - correspondence = np.unique(correspondence, axis=0) - return correspondence - - def get_data_list(self): - split_list = {} - if isinstance(self.split, str): - data_path = os.path.join(self.data_root, "splits", f"{self.split}.json") - with open(data_path, "r", encoding="utf-8") as file: - data_list = json.load(file) - split_list[self.split] = list(data_list.keys()) - elif isinstance(self.split, Sequence): - data_list = {} - for split in self.split: - data_path = os.path.join(self.data_root, "splits", f"{split}.json") - with open(data_path, "r", encoding="utf-8") as file: - data_split_dict = json.load(file) - data_list.update(data_split_dict) - split_list[split] = list(data_split_dict.keys()) - else: - raise NotImplementedError - return data_list, split_list - - def get_data_name(self, idx): - return self.data_name[idx % len(self.data_name)] - - def get_split_name(self, idx): - for split, names in self.split_list.items(): - if self.data_name[idx % len(self.data_name)] in names: - return split - return None - - def get_data(self, idx): - data_dict = {} - name = self.get_data_name(idx) - split = self.get_split_name(idx) - data_dict["name"] = name - data_dict["split"] = split - data_path = self.data_list[name] - - pointclouds_path = data_path["pointclouds"] - assets = os.listdir(pointclouds_path) - for asset in assets: - if not asset.endswith(".npy"): - continue - if asset[:-4] not in self.PC_VALID_ASSETS: - continue - data_dict[asset[:-4]] = np.load(os.path.join(pointclouds_path, asset)) - imgs_path = data_path["images"] - imgs = [Image.open(asset) for asset in imgs_path] - img_width, img_height = imgs[0].size - div_w = img_width // self.patch_w - div_h = img_height // self.patch_h - div_min = max(min(div_w, div_h), 1) - crop_img_width = div_min * self.patch_w - crop_img_height = div_min * self.patch_h - left = int((img_width - crop_img_width) / 2) - top = int((img_height - crop_img_height) / 2) - right = int((img_width + crop_img_width) / 2) - bottom = int((img_height + crop_img_height) / 2) - imgs = [img.crop((left, top, right, bottom)) for img in imgs] - imgs = [self.transform_img(img) for img in imgs] - if len(imgs) > 0: - imgs_list = torch.stack(imgs) - data_dict["images"] = imgs_list.float() - else: - data_dict["images"] = torch.empty( - (0, 3, self.patch_h * self.patch_size, self.patch_w * self.patch_size) - ) - data_dict["img_num"] = np.array([data_dict["images"].shape[0]], dtype=np.int32) - - correspondences_path = data_path["correspondences"] - correspondence_infos = np.ones( - (data_dict["coord"].shape[0], len(correspondences_path), 2), dtype=np.int32 - ) * (-1) - for asset_id, asset in enumerate(correspondences_path): - correspondence_info = np.load(asset).astype(np.int32) - if np.array_equal(correspondence_info, -np.ones((1, 3))): - continue - correspondence_info = self.resize_correspondence_info( - correspondence_info, - (self.patch_h * self.patch_size, self.patch_w * self.patch_size), - (img_height, img_width), - (left, top, right, bottom), - self.patch_size, - ) - correspondence_infos[correspondence_info[:, -1], asset_id, :] = ( - correspondence_info[:, :-1] - ) - data_dict["correspondence"] = correspondence_infos # .reshape(-1, 2) - - if "coord" in data_dict.keys(): - data_dict["coord"] = data_dict["coord"].astype(np.float32) - - if "color" in data_dict.keys(): - data_dict["color"] = data_dict["color"].astype(np.float32) - - if "normal" in data_dict.keys(): - data_dict["normal"] = data_dict["normal"].astype(np.float32) - - if "segment" in data_dict.keys(): - data_dict["segment"] = data_dict["segment"].reshape([-1]).astype(np.int32) - else: - data_dict["segment"] = ( - np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 - ) - - if "instance" in data_dict.keys(): - data_dict["instance"] = data_dict["instance"].reshape([-1]).astype(np.int32) - else: - data_dict["instance"] = ( - np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 - ) - return data_dict - - def prepare_train_data(self, idx): - # load data - data_dict = self.get_data(idx) - data_dict = self.transform(data_dict) - return data_dict - - def prepare_test_data(self, idx): - # load data - data_dict = self.get_data(idx) - data_dict = self.transform(data_dict) - result_dict = dict(segment=data_dict.pop("segment"), name=data_dict.pop("name")) - if "origin_segment" in data_dict: - assert "inverse" in data_dict - result_dict["origin_segment"] = data_dict.pop("origin_segment") - result_dict["inverse"] = data_dict.pop("inverse") - - data_dict_list = [] - for aug in self.aug_transform: - data_dict_list.append(aug(deepcopy(data_dict))) - - fragment_list = [] - for data in data_dict_list: - if self.test_voxelize is not None: - data_part_list = self.test_voxelize(data) - else: - data["index"] = np.arange(data["coord"].shape[0]) - data_part_list = [data] - for data_part in data_part_list: - if self.test_crop is not None: - data_part = self.test_crop(data_part) - else: - data_part = [data_part] - fragment_list += data_part - - for i in range(len(fragment_list)): - fragment_list[i] = self.post_transform(fragment_list[i]) - result_dict["fragment_list"] = fragment_list - return result_dict - - def __getitem__(self, idx): - if self.test_mode: - return self.prepare_test_data(idx) - else: - return self.prepare_train_data(idx) - - def __len__(self): - return len(self.data_list) * self.loop - - -@DATASETS.register_module() -class ConcatDataset(Dataset): - def __init__(self, datasets, loop=1): - super(ConcatDataset, self).__init__() - self.datasets = [build_dataset(dataset) for dataset in datasets] - self.loop = loop - self.data_list = self.get_data_list() - logger = get_root_logger() - logger.info( - "Totally {} x {} samples in the concat set.".format( - len(self.data_list), self.loop - ) - ) - - def get_data_list(self): - data_list = [] - for i in range(len(self.datasets)): - data_list.extend( - zip( - np.ones(len(self.datasets[i]), dtype=int) * i, - np.arange(len(self.datasets[i])), - ) - ) - return data_list - - def get_data(self, idx): - dataset_idx, data_idx = self.data_list[idx % len(self.data_list)] - return self.datasets[dataset_idx][data_idx] - - def get_data_name(self, idx): - dataset_idx, data_idx = self.data_list[idx % len(self.data_list)] - return self.datasets[dataset_idx].get_data_name(data_idx) - - def __getitem__(self, idx): - return self.get_data(idx) - - def __len__(self): - return len(self.data_list) * self.loop diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.py deleted file mode 100644 index 95c6f76..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.py +++ /dev/null @@ -1,574 +0,0 @@ -""" -Preprocessing Script for ScanNet 20/200 - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Yujia Zhang (yujia.zhang.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import warnings - -warnings.filterwarnings("ignore", category=DeprecationWarning) - -import os -import camtools as ct -import open3d as o3d -from scipy.spatial import cKDTree -import struct -import zlib -import imageio -import cv2 -import argparse -import glob -import json -import plyfile -import numpy as np -import pandas as pd -import multiprocessing as mp -from pathlib import Path - -# Load external constants -import sys - -sys.path.append("pointcept/datasets/preprocessing/scannet/meta_data") -from scannet200_constants import VALID_CLASS_IDS_200, VALID_CLASS_IDS_20 - -CLOUD_FILE_PFIX = "_vh_clean_2" -SEGMENTS_FILE_PFIX = ".0.010000.segs.json" -AGGREGATIONS_FILE_PFIX = ".aggregation.json" -CLASS_IDS200 = VALID_CLASS_IDS_200 -CLASS_IDS20 = VALID_CLASS_IDS_20 -IGNORE_INDEX = -1 - -COMPRESSION_TYPE_COLOR = {-1: "unknown", 0: "raw", 1: "png", 2: "jpeg"} -COMPRESSION_TYPE_DEPTH = { - -1: "unknown", - 0: "raw_ushort", - 1: "zlib_ushort", - 2: "occi_ushort", -} - - -class RGBDFrame: - def load(self, file_handle): - self.camera_to_world = np.asarray( - struct.unpack("f" * 16, file_handle.read(16 * 4)), dtype=np.float32 - ).reshape(4, 4) - self.timestamp_color = struct.unpack("Q", file_handle.read(8))[0] - self.timestamp_depth = struct.unpack("Q", file_handle.read(8))[0] - self.color_size_bytes = struct.unpack("Q", file_handle.read(8))[0] - self.depth_size_bytes = struct.unpack("Q", file_handle.read(8))[0] - self.color_data = b"".join( - struct.unpack( - "c" * self.color_size_bytes, file_handle.read(self.color_size_bytes) - ) - ) - self.depth_data = b"".join( - struct.unpack( - "c" * self.depth_size_bytes, file_handle.read(self.depth_size_bytes) - ) - ) - - def decompress_depth(self, compression_type): - if compression_type == "zlib_ushort": - return self.decompress_depth_zlib() - else: - raise - - def decompress_depth_zlib(self): - return zlib.decompress(self.depth_data) - - def decompress_color(self, compression_type): - if compression_type == "jpeg": - return self.decompress_color_jpeg() - else: - raise - - def decompress_color_jpeg(self): - return imageio.imread(self.color_data) - - -class SensorData: - def __init__(self, filename): - self.version = 4 - self.load(filename) - - def load(self, filename): - with open(filename, "rb") as f: - version = struct.unpack("I", f.read(4))[0] - assert self.version == version - strlen = struct.unpack("Q", f.read(8))[0] - self.sensor_name = b"".join(struct.unpack("c" * strlen, f.read(strlen))) - self.intrinsic_color = np.asarray( - struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 - ).reshape(4, 4) - self.extrinsic_color = np.asarray( - struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 - ).reshape(4, 4) - self.intrinsic_depth = np.asarray( - struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 - ).reshape(4, 4) - self.extrinsic_depth = np.asarray( - struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 - ).reshape(4, 4) - self.color_compression_type = COMPRESSION_TYPE_COLOR[ - struct.unpack("i", f.read(4))[0] - ] - self.depth_compression_type = COMPRESSION_TYPE_DEPTH[ - struct.unpack("i", f.read(4))[0] - ] - self.color_width = struct.unpack("I", f.read(4))[0] - self.color_height = struct.unpack("I", f.read(4))[0] - self.depth_width = struct.unpack("I", f.read(4))[0] - self.depth_height = struct.unpack("I", f.read(4))[0] - self.depth_shift = struct.unpack("f", f.read(4))[0] - num_frames = struct.unpack("Q", f.read(8))[0] - self.frames = [] - for i in range(num_frames): - frame = RGBDFrame() - frame.load(f) - self.frames.append(frame) - - def export_depth_images(self, output_path, image_size=None, frame_skip=1): - if not os.path.exists(output_path): - os.makedirs(output_path) - print( - "exporting", len(self.frames) // frame_skip, " depth frames to", output_path - ) - for f in range(0, len(self.frames), frame_skip): - if os.path.exists((os.path.join(output_path, str(f) + ".png"))): - continue - if f % 100 == 0: - print( - "exporting", - f, - "th depth frames to", - os.path.join(output_path, str(f) + ".png"), - ) - - depth_data = self.frames[f].decompress_depth(self.depth_compression_type) - depth = np.fromstring(depth_data, dtype=np.uint16).reshape( - self.depth_height, self.depth_width - ) - if image_size is not None: - depth = cv2.resize( - depth, - (image_size[1], image_size[0]), - interpolation=cv2.INTER_NEAREST, - ) - imageio.imwrite(os.path.join(output_path, str(f) + ".png"), depth) - - def export_color_images(self, output_path, image_size=None, frame_skip=1): - if not os.path.exists(output_path): - os.makedirs(output_path) - print( - "exporting", len(self.frames) // frame_skip, "color frames to", output_path - ) - for f in range(0, len(self.frames), frame_skip): - if os.path.exists((os.path.join(output_path, str(f) + ".png"))): - continue - if f % 100 == 0: - print( - "exporting", - f, - "th color frames to", - os.path.join(output_path, str(f) + ".png"), - ) - color = self.frames[f].decompress_color(self.color_compression_type) - if image_size is not None: - color = cv2.resize( - color, - (image_size[1], image_size[0]), - interpolation=cv2.INTER_NEAREST, - ) - # imageio.imwrite(os.path.join(output_path, str(f) + '.jpg'), color) - imageio.imwrite(os.path.join(output_path, str(f) + ".png"), color) - - def save_mat_to_file(self, matrix, filename): - with open(filename, "w") as f: - for line in matrix: - np.savetxt(f, line[np.newaxis], fmt="%f") - - def export_poses(self, output_path, frame_skip=1): - if not os.path.exists(output_path): - os.makedirs(output_path) - print( - "exporting", len(self.frames) // frame_skip, "camera poses to", output_path - ) - for f in range(0, len(self.frames), frame_skip): - np.save( - os.path.join(output_path, str(f) + ".npy"), - self.frames[f].camera_to_world, - ) - - def export_intrinsics(self, output_path): - if not os.path.exists(output_path): - os.makedirs(output_path) - print("exporting camera intrinsics to", output_path) - np.save(os.path.join(output_path, "intrinsic.npy"), self.intrinsic_color) - - -def reader( - filename, - output_path, - frame_skip, - export_color_images=False, - export_depth_images=False, - export_poses=False, - export_intrinsics=False, -): - if not os.path.exists(output_path): - os.makedirs(output_path) - - # load the data - print("loading %s..." % filename) - sd = SensorData(filename) - if export_depth_images: - sd.export_depth_images( - os.path.join(output_path, "depth"), frame_skip=frame_skip - ) - if export_color_images: - sd.export_color_images( - os.path.join(output_path, "color"), frame_skip=frame_skip - ) - if export_poses: - sd.export_poses(os.path.join(output_path, "pose"), frame_skip=frame_skip) - if export_intrinsics: - sd.export_intrinsics(os.path.join(output_path, "intrinsic")) - return sd.color_height, sd.color_width - - -def read_plymesh(filepath): - """Read ply file and return it as numpy array. Returns None if emtpy.""" - with open(filepath, "rb") as f: - plydata = plyfile.PlyData.read(f) - if plydata.elements: - vertices = pd.DataFrame(plydata["vertex"].data).values - faces = np.stack(plydata["face"].data["vertex_indices"], axis=0) - return vertices, faces - - -# Map the raw category id to the point cloud -def point_indices_from_group(seg_indices, group, labels_pd): - group_segments = np.array(group["segments"]) - label = group["label"] - - # Map the category name to id - label_id20 = labels_pd[labels_pd["raw_category"] == label]["nyu40id"] - label_id20 = int(label_id20.iloc[0]) if len(label_id20) > 0 else 0 - label_id200 = labels_pd[labels_pd["raw_category"] == label]["id"] - label_id200 = int(label_id200.iloc[0]) if len(label_id200) > 0 else 0 - - # Only store for the valid categories - if label_id20 in CLASS_IDS20: - label_id20 = CLASS_IDS20.index(label_id20) - else: - label_id20 = IGNORE_INDEX - - if label_id200 in CLASS_IDS200: - label_id200 = CLASS_IDS200.index(label_id200) - else: - label_id200 = IGNORE_INDEX - - # get points, where segment indices (points labelled with segment ids) are in the group segment list - point_idx = np.where(np.isin(seg_indices, group_segments))[0] - return point_idx, label_id20, label_id200 - - -def face_normal(vertex, face): - v01 = vertex[face[:, 1]] - vertex[face[:, 0]] - v02 = vertex[face[:, 2]] - vertex[face[:, 0]] - vec = np.cross(v01, v02) - length = np.sqrt(np.sum(vec**2, axis=1, keepdims=True)) + 1.0e-8 - nf = vec / length - area = length * 0.5 - return nf, area - - -def vertex_normal(vertex, face): - nf, area = face_normal(vertex, face) - nf = nf * area - - nv = np.zeros_like(vertex) - for i in range(face.shape[0]): - nv[face[i]] += nf[i] - - length = np.sqrt(np.sum(nv**2, axis=1, keepdims=True)) + 1.0e-8 - nv = nv / length - return nv - - -def correspondenceGet(mesh, K, T, img_size, coords_gt): - height, width = img_size - if np.isnan(T).any() or np.isinf(T).any(): - return None - - depth = ct.raycast.mesh_to_im_depth( - mesh=mesh, K=K, T=np.linalg.inv(T), height=height, width=width - ) - - pixel = np.transpose(np.indices((width, height)), (2, 1, 0)) - pixel = pixel.reshape((-1, 2)) - pixel = np.hstack((pixel, np.ones((pixel.shape[0], 1)))) - depth = depth.reshape((-1, 1)) - valid = ~np.isinf(depth).squeeze(-1) - coord = np.zeros_like(pixel, dtype=np.float32) - coord[valid] = depth[valid] * (np.linalg.inv(K) @ pixel[valid].T).T # coord_camera - coord[valid] = coord[valid] @ T[:3, :3].T + T[:3, 3] # column then row - - pixel = pixel[valid] - coord = coord[valid] - if coord.shape[0] == 0: - return None - pixel = pixel[:, :2] - coord_dict = {"pixel": pixel, "coord": coord} - return coord_dict - - -def correspondenceSave(mesh, scene_dir, coords_gt, output_dir, img_size): - os.makedirs(output_dir, exist_ok=True) - scene_dir = Path(scene_dir) - index_gt = [ - img_name.split(".")[0] - for img_name in os.listdir(str(scene_dir / "color")) - if img_name.endswith(".png") - ] - index_gt = sorted(index_gt, key=lambda x: int(x)) - - Ks_path = str(scene_dir / "intrinsic" / "intrinsic.npy") - Ts_path = str(scene_dir / "pose") - Ts_files = sorted( - [f for f in os.listdir(Ts_path) if f.endswith(".npy")], - key=lambda x: int(x.split(".")[0]), - ) - - print(f"total pose num:{len(Ts_files)}") - Ts = [] - for Ts_file in Ts_files: - file_path = os.path.join(Ts_path, Ts_file) - Ts_ = np.load(file_path) - Ts.append(Ts_) - Ts = np.stack(Ts) - Ks = np.load(Ks_path) - - Ks = np.tile(Ks, (Ts.shape[0], 1, 1)) - Ks = Ks[:, :3, :3] - coords_gt_ = coords_gt - pixels_ = [] - coords_ = [] - - for i, (K, T) in enumerate(zip(Ks, Ts)): - coord_dict = correspondenceGet(mesh, K, T, img_size, coords_gt) - if coord_dict is None: - correspondences = -np.ones((1, 3)) - else: - pixels_ = coord_dict["pixel"] - coords_ = coord_dict["coord"] - tree = cKDTree(coords_gt_) - dis, idx = tree.query(coords_, k=1) - idx_valid = idx[dis < 0.01] - pixels_valid = pixels_[dis < 0.01] - correspondences = np.hstack((pixels_valid, idx_valid.reshape(-1, 1))) - np.save(Path(output_dir) / "{}.npy".format(index_gt[i]), correspondences) - - -def handle_process( - scene_path, - output_path, - pointclouds_root, - labels_pd, - train_scenes, - val_scenes, - frame_gap=75, - parse_pointclouds=True, - parse_normals=True, - export_depth_images=True, -): - pc_output_path = output_path - im_output_path = os.path.join(output_path, "images") - scene_id = os.path.basename(scene_path) - mesh_path = os.path.join(scene_path, f"{scene_id}{CLOUD_FILE_PFIX}.ply") - segments_file = os.path.join( - scene_path, f"{scene_id}{CLOUD_FILE_PFIX}{SEGMENTS_FILE_PFIX}" - ) - aggregations_file = os.path.join(scene_path, f"{scene_id}{AGGREGATIONS_FILE_PFIX}") - - if scene_id in train_scenes: - pc_output_path = os.path.join(pc_output_path, "train", f"{scene_id}") - pc_input_path = os.path.join(pointclouds_root, "train", f"{scene_id}") - im_output_path = os.path.join(im_output_path, "train", f"{scene_id}") - split_name = "train" - elif scene_id in val_scenes: - pc_output_path = os.path.join(pc_output_path, "val", f"{scene_id}") - pc_input_path = os.path.join(pointclouds_root, "val", f"{scene_id}") - im_output_path = os.path.join(im_output_path, "val", f"{scene_id}") - split_name = "val" - else: - pc_output_path = os.path.join(pc_output_path, "test", f"{scene_id}") - pc_input_path = os.path.join(pointclouds_root, "test", f"{scene_id}") - im_output_path = os.path.join(im_output_path, "test", f"{scene_id}") - split_name = "test" - - print(f"Processing: {scene_id} in {split_name}") - - if parse_pointclouds: - vertices, faces = read_plymesh(mesh_path) - coords = vertices[:, :3] - colors = vertices[:, 3:6] - save_dict = dict( - coord=coords.astype(np.float32), - color=colors.astype(np.uint8), - ) - # Parse Normals - if parse_normals: - save_dict["normal"] = vertex_normal(coords, faces).astype(np.float32) - - # Load segments file - if split_name != "test": - with open(segments_file) as f: - segments = json.load(f) - seg_indices = np.array(segments["segIndices"]) - - # Load Aggregations file - with open(aggregations_file) as f: - aggregation = json.load(f) - seg_groups = np.array(aggregation["segGroups"]) - - # Generate new labels - semantic_gt20 = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX - semantic_gt200 = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX - instance_ids = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX - for group in seg_groups: - point_idx, label_id20, label_id200 = point_indices_from_group( - seg_indices, group, labels_pd - ) - - semantic_gt20[point_idx] = label_id20 - semantic_gt200[point_idx] = label_id200 - instance_ids[point_idx] = group["id"] - - semantic_gt20 = semantic_gt20.astype(int) - semantic_gt200 = semantic_gt200.astype(int) - instance_ids = instance_ids.astype(int) - - save_dict["segment20"] = semantic_gt20 - save_dict["segment200"] = semantic_gt200 - save_dict["instance"] = instance_ids - - # Concatenate with original cloud - processed_vertices = np.hstack((semantic_gt200, instance_ids)) - - if np.any(np.isnan(processed_vertices)) or not np.all( - np.isfinite(processed_vertices) - ): - raise ValueError(f"Find NaN in Scene: {scene_id}") - - # Save pointcloud data - os.makedirs(pc_output_path, exist_ok=True) - for key in save_dict.keys(): - np.save(os.path.join(pc_output_path, f"{key}.npy"), save_dict[key]) - else: - coords = np.load(os.path.join(pc_input_path, "coord.npy")) - save_dict = dict( - coord=coords.astype(np.float32), - ) - - # Save img data - os.makedirs(im_output_path, exist_ok=True) - sens_dir = os.path.join(scene_path, scene_id + ".sens") - print(f"Parsing sens data{sens_dir}") - h, w = reader( - sens_dir, - im_output_path, - frame_gap, - export_color_images=True, - export_depth_images=export_depth_images, - export_poses=True, - export_intrinsics=True, - ) - mesh = o3d.io.read_triangle_mesh(mesh_path) - correspondenceSave( - mesh, - im_output_path, - save_dict["coord"], - os.path.join(im_output_path, "correspondence"), - (h, w), - ) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--dataset_root", - required=True, - help="Path to the ScanNet dataset containing scene folders", - ) - parser.add_argument( - "--output_root", - required=True, - help="Output path where train/val folders will be located", - ) - parser.add_argument( - "--pointclouds_root", - default="data/scannet", - type=str, - help="Input path where previous pointclouds folder located", - ) - parser.add_argument( - "--frame_gap", default=75, type=int, help="Frame gap for processing" - ) - parser.add_argument( - "--parse_pointclouds", action="store_true", help="Whether parse point clouds" - ) - parser.add_argument( - "--parse_normals", action="store_true", help="Whether parse point normals" - ) - parser.add_argument( - "--parse_depths", action="store_true", help="Whether parse depths" - ) - parser.add_argument( - "--num_workers", - default=mp.cpu_count(), - type=int, - help="Num workers for preprocessing.", - ) - parser.add_argument( - "--thread_id", - default=0, - type=int, - help="Thread id for parallel processing", - ) - config = parser.parse_args() - meta_root = Path("pointcept/datasets/preprocessing/scannet") / "meta_data" - - # Load label map - labels_pd = pd.read_csv( - meta_root / "scannetv2-labels.combined.tsv", - sep="\t", - header=0, - ) - - # Load train/val splits - with open(meta_root / "scannetv2_train.txt") as train_file: - train_scenes = train_file.read().splitlines() - with open(meta_root / "scannetv2_val.txt") as val_file: - val_scenes = val_file.read().splitlines() - - # Load scene paths - scene_paths = sorted(glob.glob(config.dataset_root + "/scans*/scene*")) - scene_paths_list = np.array_split(scene_paths, config.num_workers) - scene_paths_ = scene_paths_list[config.thread_id] - # Preprocess data. - print("Processing scenes...") - for scene_paths_i in scene_paths_: - handle_process( - scene_paths_i, - config.output_root, - config.pointclouds_root, - labels_pd, - train_scenes, - val_scenes, - config.frame_gap, - config.parse_pointclouds, - config.parse_normals, - config.parse_depths, - ) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.sh b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.sh deleted file mode 100644 index 00b2112..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -dataset_root="" -output_root="" -num_workers=16 -parse_depths=false -parse_pointclouds=false - -while getopts "d:o:n:pc" opt; do - case $opt in - d) dataset_root=$OPTARG ;; - o) output_root=$OPTARG ;; - n) num_workers=$OPTARG ;; - p) parse_depths=true ;; - c) parse_pointclouds=true ;; - *) echo "Usage: $0 -d -o [-n ] [-p] [-c]"; exit 1 ;; - esac -done - -if [ -z "$dataset_root" ] || [ -z "$output_root" ]; then - echo "Usage: $0 -d -o [-n ] [-p] [-c]" - exit 1 -fi - -for i in $(seq 0 $((num_workers - 1))); do - cmd="python pointcept/datasets/preprocessing/concerto/scannet/preprocess_scannet.py --thread_id $i \ - --num_workers $num_workers \ - --dataset_root $dataset_root \ - --output_root $output_root \ - --parse_normal" - if $parse_depths; then - cmd="$cmd --parse_depths" - fi - - if $parse_pointclouds; then - cmd="$cmd --parse_pointclouds" - fi - - eval "$cmd &" -done - -wait diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/splits.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/splits.py deleted file mode 100644 index 5d4e5c6..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/concerto/scannet/splits.py +++ /dev/null @@ -1,62 +0,0 @@ -import os -import json -import argparse - - -def get_splits_paths(dataset_path): - # Get the names of all subfolders in the given folder - im_path = os.path.join(dataset_path, "images") - pc_path = dataset_path - splits = ["train", "val", "test"] - split_path = os.path.join(dataset_path, "splits") - os.makedirs(split_path, exist_ok=True) - for split in splits: - im_split_path = os.path.join(im_path, split) - pc_split_path = os.path.join(pc_path, split).replace( - dataset_path, "data/scannet" - ) - split_names = [f.name for f in os.scandir(im_split_path) if f.is_dir()] - split_dict = {} - for name in split_names: - im_split_name_path = os.path.join(im_split_path, name, "color") - co_split_name_path = os.path.join(im_split_path, name, "correspondence") - png_files = [ - f for f in os.listdir(im_split_name_path) if f.endswith(".png") - ] - png_files = sorted(png_files, key=lambda x: int(x.split(".")[0])) - # Get the full paths of the .png files - png_file_paths = [ - os.path.join(im_split_name_path, f).replace( - dataset_path, "data/scannet" - ) - for f in png_files - ] - co_file_paths = [ - os.path.join(co_split_name_path, f.replace(".png", ".npy")).replace( - dataset_path, "data/scannet" - ) - for f in png_files - ] - for i in range(0, len(png_file_paths), 4): - split_dict[f"{name}_{i//4}"] = {} - split_dict[f"{name}_{i//4}"]["pointclouds"] = os.path.join( - pc_split_path, name - ) - split_dict[f"{name}_{i//4}"]["images"] = png_file_paths[i : i + 4] - split_dict[f"{name}_{i//4}"]["correspondences"] = co_file_paths[ - i : i + 4 - ] - with open(os.path.join(split_path, f"{split}.json"), "w") as f: - json.dump(split_dict, f, indent=4) - - -# Example usage -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--dataset_root", - required=True, - help="Path to the ScanNet dataset containing scene folders", - ) - config = parser.parse_args() - get_splits_paths(config.dataset_root) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/sampling_chunking_data.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/sampling_chunking_data.py deleted file mode 100644 index 96536d4..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/sampling_chunking_data.py +++ /dev/null @@ -1,149 +0,0 @@ -""" -Chunking Data - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import os -import argparse -import numpy as np -import multiprocessing as mp -from concurrent.futures import ProcessPoolExecutor -from itertools import repeat -from pathlib import Path - - -def chunking_scene( - name, - dataset_root, - split, - grid_size=None, - chunk_range=(6, 6), - chunk_stride=(3, 3), - chunk_minimum_size=10000, -): - print(f"Chunking scene {name} in {split} split") - dataset_root = Path(dataset_root) - scene_path = dataset_root / split / name - assets = os.listdir(scene_path) - data_dict = dict() - for asset in assets: - if not asset.endswith(".npy"): - continue - data_dict[asset[:-4]] = np.load(scene_path / asset) - coord = data_dict["coord"] - data_dict["coord"].min(axis=0) - - if grid_size is not None: - grid_coord = np.floor(coord / grid_size).astype(int) - _, idx = np.unique(grid_coord, axis=0, return_index=True) - coord = coord[idx] - for key in data_dict.keys(): - data_dict[key] = data_dict[key][idx] - - bev_range = coord.max(axis=0)[:2] - x, y = np.meshgrid( - np.arange(0, bev_range[0] + chunk_stride[0] - chunk_range[0], chunk_stride[0]), - np.arange(0, bev_range[0] + chunk_stride[0] - chunk_range[0], chunk_stride[0]), - indexing="ij", - ) - chunks = np.concatenate([x.reshape([-1, 1]), y.reshape([-1, 1])], axis=-1) - chunk_idx = 0 - for chunk in chunks: - mask = ( - (coord[:, 0] >= chunk[0]) - & (coord[:, 0] < chunk[0] + chunk_range[0]) - & (coord[:, 1] >= chunk[1]) - & (coord[:, 1] < chunk[1] + chunk_range[1]) - ) - if np.sum(mask) < chunk_minimum_size: - continue - - chunk_data_name = f"{name}_{chunk_idx}" - if grid_size is not None: - chunk_split_name = ( - f"{split}_" - f"grid{grid_size * 100:.0f}mm_" - f"chunk{chunk_range[0]}x{chunk_range[1]}_" - f"stride{chunk_stride[0]}x{chunk_stride[1]}" - ) - else: - chunk_split_name = ( - f"{split}_" - f"chunk{chunk_range[0]}x{chunk_range[1]}_" - f"stride{chunk_stride[0]}x{chunk_stride[1]}" - ) - - chunk_save_path = dataset_root / chunk_split_name / chunk_data_name - chunk_save_path.mkdir(parents=True, exist_ok=True) - for key in data_dict.keys(): - np.save(chunk_save_path / f"{key}.npy", data_dict[key][mask]) - chunk_idx += 1 - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--dataset_root", - required=True, - help="Path to the Pointcept processed ScanNet++ dataset.", - ) - parser.add_argument( - "--split", - required=True, - default="train", - type=str, - help="Split need to process.", - ) - parser.add_argument( - "--grid_size", - default=None, - type=float, - help="Grid size for initial grid sampling", - ) - parser.add_argument( - "--chunk_range", - default=[6, 6], - type=int, - nargs="+", - help="Range of each chunk, e.g. --chunk_range 6 6", - ) - parser.add_argument( - "--chunk_stride", - default=[3, 3], - type=int, - nargs="+", - help="Stride of each chunk, e.g. --chunk_stride 3 3", - ) - parser.add_argument( - "--chunk_minimum_size", - default=10000, - type=int, - help="Minimum number of points in each chunk", - ) - parser.add_argument( - "--num_workers", - default=mp.cpu_count(), - type=int, - help="Num workers for preprocessing.", - ) - - config = parser.parse_args() - config.dataset_root = Path(config.dataset_root) - data_list = os.listdir(config.dataset_root / config.split) - - print("Processing scenes...") - pool = ProcessPoolExecutor(max_workers=config.num_workers) - _ = list( - pool.map( - chunking_scene, - data_list, - repeat(config.dataset_root), - repeat(config.split), - repeat(config.grid_size), - repeat(config.chunk_range), - repeat(config.chunk_stride), - repeat(config.chunk_minimum_size), - ) - ) - pool.shutdown() diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/prepare_scene_list.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/prepare_scene_list.py deleted file mode 100644 index 7b43f15..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/prepare_scene_list.py +++ /dev/null @@ -1,27 +0,0 @@ -import os -import argparse -from pathlib import Path - -import numpy as np - -if __name__ == "__main__": - num_train_list = 12 - num_val_list = 3 - meta_root = Path(os.path.dirname(__file__)).parent / "meta_data" - - # Load train/val splits - train_scenes = np.loadtxt(meta_root / "scannetv2_train.txt", dtype=str) - val_scenes = np.loadtxt(meta_root / "scannetv2_val.txt", dtype=str) - - for i in range(num_train_list): - np.savetxt( - meta_root / f"scannetv2_train_{i}.txt", - train_scenes[i::num_train_list], - fmt="%s", - ) - for i in range(num_val_list): - np.savetxt( - meta_root / f"scannetv2_val_{i}.txt", - val_scenes[i::num_val_list], - fmt="%s", - ) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/preprocess_dino_feature.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/preprocess_dino_feature.py deleted file mode 100644 index 1eb9541..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/dino/preprocess_dino_feature.py +++ /dev/null @@ -1,362 +0,0 @@ -import os -import argparse -import einops -import torch -import torch.nn.functional as F -import torchvision -import tqdm -import cv2 -import camtools as ct -import open3d as o3d -import zlib -import imageio -import struct -import numpy as np -import torch_scatter -from pathlib import Path - - -class RGBDFrame: - def __init__(self, file_handle): - self.camera_to_world = np.asarray( - struct.unpack("f" * 16, file_handle.read(16 * 4)), dtype=np.float32 - ).reshape(4, 4) - self.timestamp_color = struct.unpack("Q", file_handle.read(8))[0] - self.timestamp_depth = struct.unpack("Q", file_handle.read(8))[0] - self.color_size_bytes = struct.unpack("Q", file_handle.read(8))[0] - self.depth_size_bytes = struct.unpack("Q", file_handle.read(8))[0] - self.color_data = b"".join( - struct.unpack( - "c" * self.color_size_bytes, file_handle.read(self.color_size_bytes) - ) - ) - self.depth_data = b"".join( - struct.unpack( - "c" * self.depth_size_bytes, file_handle.read(self.depth_size_bytes) - ) - ) - - def decompress_depth(self, compression_type): - if compression_type == "zlib_ushort": - return self.decompress_depth_zlib() - else: - raise - - def decompress_depth_zlib(self): - return zlib.decompress(self.depth_data) - - def decompress_color(self, compression_type): - if compression_type == "jpeg": - return self.decompress_color_jpeg() - else: - raise - - def decompress_color_jpeg(self): - return imageio.v2.imread(self.color_data) - - -class SensorData: - COMPRESSION_TYPE_COLOR = { - -1: "unknown", - 0: "raw", - 1: "png", - 2: "jpeg", - } - COMPRESSION_TYPE_DEPTH = { - -1: "unknown", - 0: "raw_ushort", - 1: "zlib_ushort", - 2: "occi_ushort", - } - - def __init__(self, filename): - self.version = 4 - f = open(filename, "rb") - version = struct.unpack("I", f.read(4))[0] - assert self.version == version - strlen = struct.unpack("Q", f.read(8))[0] - self.sensor_name = b"".join(struct.unpack("c" * strlen, f.read(strlen))) - self.intrinsic_color = np.asarray( - struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 - ).reshape(4, 4) - self.extrinsic_color = np.asarray( - struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 - ).reshape(4, 4) - self.intrinsic_depth = np.asarray( - struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 - ).reshape(4, 4) - self.extrinsic_depth = np.asarray( - struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 - ).reshape(4, 4) - self.color_compression_type = self.COMPRESSION_TYPE_COLOR[ - struct.unpack("i", f.read(4))[0] - ] - self.depth_compression_type = self.COMPRESSION_TYPE_DEPTH[ - struct.unpack("i", f.read(4))[0] - ] - self.color_width = struct.unpack("I", f.read(4))[0] - self.color_height = struct.unpack("I", f.read(4))[0] - self.depth_width = struct.unpack("I", f.read(4))[0] - self.depth_height = struct.unpack("I", f.read(4))[0] - self.depth_shift = struct.unpack("f", f.read(4))[0] - self.num_frames = struct.unpack("Q", f.read(8))[0] - self.file_handle = f - - def export( - self, - frame_skip=20, - export_color=True, - export_depth=True, - export_pose=True, - ): - for i in range(self.num_frames): - if i % frame_skip != 0: - self.file_handle.seek(16 * 4 + 8 + 8, 1) # skip pose, timestamp - color_size_bytes = struct.unpack("Q", self.file_handle.read(8))[0] - depth_size_bytes = struct.unpack("Q", self.file_handle.read(8))[0] - self.file_handle.seek(color_size_bytes + depth_size_bytes, 1) - continue - else: - frame = RGBDFrame(self.file_handle) - data_dict = {} - if export_color: - color = frame.decompress_color(self.color_compression_type) - data_dict["color"] = color - if export_depth: - depth = frame.decompress_depth(self.depth_compression_type) - depth = np.frombuffer(depth, dtype=np.uint16).reshape( - self.depth_height, self.depth_width - ) - data_dict["depth"] = depth - if export_pose: - pose = frame.camera_to_world - data_dict["pose"] = pose - yield data_dict - - def __del__(self): - self.file_handle.close() - - -def ray_distance_to_z_depth(ray_depth, K): - height, width = ray_depth.shape - - u = np.arange(width) - v = np.arange(height) - u_grid, v_grid = np.meshgrid(u, v) - - fx = K[0, 0] - fy = K[1, 1] - cx = K[0, 2] - cy = K[1, 2] - - u_norm = (u_grid - cx) / fx - v_norm = (v_grid - cy) / fy - - norm_square = u_norm**2 + v_norm**2 - - z_depth = ray_depth / np.sqrt(norm_square + 1) - return z_depth - - -def center_crop(image, crop_ratio=1.0, patch_size=None): - if len(image.shape) == 2: - height, width = image.shape - elif len(image.shape) == 3: - height, width, _ = image.shape - else: - raise ValueError("Invalid image shape") - if patch_size is not None: - crop_h = int(height * crop_ratio // patch_size * patch_size) - crop_w = int(width * crop_ratio // patch_size * patch_size) - else: - crop_h = int(height * crop_ratio) - crop_w = int(width * crop_ratio) - - # Calculate the cropping box - start_h = (height - crop_h) // 2 - start_w = (width - crop_w) // 2 - - # Perform the center crop - cropped_image = image[start_h : start_h + crop_h, start_w : start_w + crop_w] - - return cropped_image - - -def parsing_scene( - scene_path, - output_root, - split, - model, - frame_skip=20, - grid_size=0.08, - crop_ratio=0.95, - device="cuda", -): - print(f"Parsing scene: {scene_path.name}") - device = torch.device(device) - scene_path = Path(scene_path) - sensor_reader = SensorData(scene_path / f"{scene_path.name}.sens") - mesh = o3d.io.read_triangle_mesh( - str(scene_path / f"{scene_path.name}_vh_clean_2.ply") - ) - transform = torchvision.transforms.Compose( - [ - torchvision.transforms.ToTensor(), - torchvision.transforms.Normalize( - mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225) - ), - ] - ) - scene_coord = [] - scene_feat = [] - scene_count = [] - for data in tqdm.tqdm( - sensor_reader.export(frame_skip=frame_skip), - total=sensor_reader.num_frames // frame_skip, - ): - height, width = data["depth"].shape - K = sensor_reader.intrinsic_depth[:3, :3] - T = data["pose"] - if np.isnan(T).any() or np.isinf(T).any(): - continue - depth = ct.raycast.mesh_to_depth( - mesh=mesh, K=K, T=np.linalg.inv(T), height=height, width=width - ) - depth = ray_distance_to_z_depth(depth, K) - depth = center_crop(depth, crop_ratio, model.patch_size) - height_, width_ = depth.shape - pixel = np.transpose(np.indices((width_, height_)), (2, 1, 0)) - pixel = pixel.reshape((-1, 2)) - pixel = np.hstack((pixel, np.ones((pixel.shape[0], 1)))) - depth = depth.reshape((-1, 1)) - valid = ~np.isinf(depth).squeeze(-1) - coord = depth[valid] * (np.linalg.inv(K) @ pixel[valid].T).T # coord_camera - coord = coord @ T[:3, :3].T + T[:3, 3] - - color = cv2.resize( - data["color"], (width, height), interpolation=cv2.INTER_LINEAR - ) - color = center_crop(color, crop_ratio, model.patch_size) - with torch.inference_mode(): - color_t = transform(color).unsqueeze(0).to(device) - feat_t = model.forward_features(color_t)["x_norm_patchtokens"] - feat_t = einops.rearrange( - feat_t, "1 (h w) c -> 1 c h w", w=width_ // model.patch_size - ) - feat_t = F.interpolate(feat_t, (height_, width_), mode="bilinear") - feat_t = einops.rearrange(feat_t, "1 c h w -> (h w) c")[valid] - coord_t = torch.tensor(coord, dtype=torch.float32).to(device) - scene_coord.append(coord_t) - scene_feat.append(feat_t) - scene_count.append( - torch.ones(coord_t.shape[0], dtype=torch.long, device=device) - ) - scene_coord = torch.concatenate(scene_coord, dim=0) - scene_feat = torch.concatenate(scene_feat, dim=0) - scene_count = torch.concatenate(scene_count, dim=0) - - # grid sampling - grid_coord = torch.floor_divide(scene_coord, grid_size).to(torch.int32) - grid_coord, cluster = torch.unique( - grid_coord, sorted=True, return_inverse=True, dim=0 - ) - scene_coord = [ - torch_scatter.scatter(scene_coord, cluster, reduce="mean", dim=0) - ] - scene_feat = [ - torch_scatter.scatter(scene_feat, cluster, reduce="sum", dim=0) - ] - scene_count = [ - torch_scatter.scatter(scene_count, cluster, reduce="sum", dim=0) - ] - - # color = color.reshape((-1, 3))[valid] - # pcd = o3d.geometry.PointCloud() - # pcd.points = o3d.utility.Vector3dVector(coord) - # pcd.colors = o3d.utility.Vector3dVector(color / 255) - # o3d.visualization.draw_geometries([pcd]) - - scene_coord = scene_coord[0] - scene_feat = scene_feat[0] / scene_count[0].unsqueeze(-1) - - scene_coord = scene_coord.half().cpu().numpy() - scene_feat = scene_feat.half().cpu().numpy() - np.savez( - Path(output_root) / split / f"{scene_path.name}.npz", - coord=scene_coord, - feat=scene_feat, - ) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--dataset_root", - required=True, - help="Path to the ScanNet dataset containing scene folders", - ) - parser.add_argument( - "--output_root", - required=True, - help="Output path where train/val folders will be located", - ) - parser.add_argument( - "--scene_list", - required=True, - help="Path to scene list need to process", - ) - parser.add_argument( - "--frame_skip", - default=10, - help="Frame skip for processing", - ) - parser.add_argument( - "--grid_size", - default=0.08, - help="Grid size for sampling", - ) - parser.add_argument( - "--crop_ratio", - default=0.95, - help="Crop ratio for center crop", - ) - - args = parser.parse_args() - scene_list = np.loadtxt(args.scene_list, dtype=str) - if "train" in args.scene_list: - split = "train" - folder = "scans" - elif "val" in args.scene_list: - split = "val" - folder = "scans" - else: - split = "test" - folder = "scans_test" - - os.makedirs(Path(args.output_root) / split, exist_ok=True) - - device = torch.device("cuda") - model = torch.hub.load("facebookresearch/dinov2", "dinov2_vitg14").to(device) - model.eval() - for scene in scene_list: - parsing_scene( - scene_path=Path(args.dataset_root) / folder / scene, - output_root=args.output_root, - split=split, - frame_skip=args.frame_skip, - grid_size=args.grid_size, - crop_ratio=args.crop_ratio, - model=model, - device="cuda", - ) - - # parsing_scene( - # scene_path=Path("/mnt/e/datasets/raw/scannet/scans/scene0230_00"), - # output_root=args.output_root, - # split=split, - # frame_skip=args.frame_skip, - # grid_size=args.grid_size, - # crop_ratio=args.crop_ratio, - # model=model, - # device="cuda", - # ) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/extract_partition.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/extract_partition.py deleted file mode 100644 index 432a490..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/extract_partition.py +++ /dev/null @@ -1,71 +0,0 @@ -import json -import shutil -import argparse -import torch -import glob -import os.path - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--dataset_root", - required=True, - help="Path to the ScanNet dataset containing scene folders", - ) - parser.add_argument( - "--processed_root", - required=True, - help="Path to the processed ScanNet dataset, add partition to test data dict", - ) - parser.add_argument( - "--segmentor_root", - required=True, - help="Path to Felzenswalb and Huttenlocher's Graph Based Image Segmentation binary", - ) - parser.add_argument( - "--split", - default="test", - choices=["test", "val"], - help="Split to process. [test / val]", - ) - config = parser.parse_args() - if config.split == "test": - raw_split = "scans_test" - else: - raw_split = "scans" - - scene_list = glob.glob(os.path.join(config.processed_root, config.split, "*.pth")) - os.makedirs(os.path.join(config.processed_root, "tmp"), exist_ok=True) - - for scene in scene_list: - scene_name = os.path.basename(scene).split(".")[0] - raw_scene = os.path.join( - config.dataset_root, - raw_split, - scene_name, - f"{scene_name}_vh_clean_2.ply", - ) - tmp_scene = os.path.join( - config.processed_root, - "tmp", - f"{scene_name}_vh_clean_2.ply", - ) - # copy original scene to tmp folder - shutil.copy(raw_scene, tmp_scene) - # run segmentor - process = os.popen(f"{config.segmentor_root} {tmp_scene}") - print(process.read()) - process.close() - # load partition file - partition_file = tmp_scene.replace(".ply", ".0.010000.segs.json") - with open(partition_file) as f: - partition = json.load(f)["segIndices"] - data_dict = torch.load(scene) - data_dict["partition"] = partition - torch.save(data_dict, scene) - # clean tmp - os.remove(partition_file) - os.remove(tmp_scene) - print(f"Adding partition information to {scene_name}") - - os.rmdir(os.path.join(config.processed_root, "tmp")) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_constants.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_constants.py deleted file mode 100644 index 0404fd6..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_constants.py +++ /dev/null @@ -1,704 +0,0 @@ -# ScanNet Benchmark constants -VALID_CLASS_IDS_20 = ( - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 16, - 24, - 28, - 33, - 34, - 36, - 39, -) - -CLASS_LABELS_20 = ( - "wall", - "floor", - "cabinet", - "bed", - "chair", - "sofa", - "table", - "door", - "window", - "bookshelf", - "picture", - "counter", - "desk", - "curtain", - "refrigerator", - "shower curtain", - "toilet", - "sink", - "bathtub", - "otherfurniture", -) - -SCANNET_COLOR_MAP_20 = { - 0: (0.0, 0.0, 0.0), - 1: (174.0, 199.0, 232.0), - 2: (152.0, 223.0, 138.0), - 3: (31.0, 119.0, 180.0), - 4: (255.0, 187.0, 120.0), - 5: (188.0, 189.0, 34.0), - 6: (140.0, 86.0, 75.0), - 7: (255.0, 152.0, 150.0), - 8: (214.0, 39.0, 40.0), - 9: (197.0, 176.0, 213.0), - 10: (148.0, 103.0, 189.0), - 11: (196.0, 156.0, 148.0), - 12: (23.0, 190.0, 207.0), - 14: (247.0, 182.0, 210.0), - 15: (66.0, 188.0, 102.0), - 16: (219.0, 219.0, 141.0), - 17: (140.0, 57.0, 197.0), - 18: (202.0, 185.0, 52.0), - 19: (51.0, 176.0, 203.0), - 20: (200.0, 54.0, 131.0), - 21: (92.0, 193.0, 61.0), - 22: (78.0, 71.0, 183.0), - 23: (172.0, 114.0, 82.0), - 24: (255.0, 127.0, 14.0), - 25: (91.0, 163.0, 138.0), - 26: (153.0, 98.0, 156.0), - 27: (140.0, 153.0, 101.0), - 28: (158.0, 218.0, 229.0), - 29: (100.0, 125.0, 154.0), - 30: (178.0, 127.0, 135.0), - 32: (146.0, 111.0, 194.0), - 33: (44.0, 160.0, 44.0), - 34: (112.0, 128.0, 144.0), - 35: (96.0, 207.0, 209.0), - 36: (227.0, 119.0, 194.0), - 37: (213.0, 92.0, 176.0), - 38: (94.0, 106.0, 211.0), - 39: (82.0, 84.0, 163.0), - 40: (100.0, 85.0, 144.0), -} - -# ScanNet200 Benchmark constants -VALID_CLASS_IDS_200 = ( - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 21, - 22, - 23, - 24, - 26, - 27, - 28, - 29, - 31, - 32, - 33, - 34, - 35, - 36, - 38, - 39, - 40, - 41, - 42, - 44, - 45, - 46, - 47, - 48, - 49, - 50, - 51, - 52, - 54, - 55, - 56, - 57, - 58, - 59, - 62, - 63, - 64, - 65, - 66, - 67, - 68, - 69, - 70, - 71, - 72, - 73, - 74, - 75, - 76, - 77, - 78, - 79, - 80, - 82, - 84, - 86, - 87, - 88, - 89, - 90, - 93, - 95, - 96, - 97, - 98, - 99, - 100, - 101, - 102, - 103, - 104, - 105, - 106, - 107, - 110, - 112, - 115, - 116, - 118, - 120, - 121, - 122, - 125, - 128, - 130, - 131, - 132, - 134, - 136, - 138, - 139, - 140, - 141, - 145, - 148, - 154, - 155, - 156, - 157, - 159, - 161, - 163, - 165, - 166, - 168, - 169, - 170, - 177, - 180, - 185, - 188, - 191, - 193, - 195, - 202, - 208, - 213, - 214, - 221, - 229, - 230, - 232, - 233, - 242, - 250, - 261, - 264, - 276, - 283, - 286, - 300, - 304, - 312, - 323, - 325, - 331, - 342, - 356, - 370, - 392, - 395, - 399, - 408, - 417, - 488, - 540, - 562, - 570, - 572, - 581, - 609, - 748, - 776, - 1156, - 1163, - 1164, - 1165, - 1166, - 1167, - 1168, - 1169, - 1170, - 1171, - 1172, - 1173, - 1174, - 1175, - 1176, - 1178, - 1179, - 1180, - 1181, - 1182, - 1183, - 1184, - 1185, - 1186, - 1187, - 1188, - 1189, - 1190, - 1191, -) - -CLASS_LABELS_200 = ( - "wall", - "chair", - "floor", - "table", - "door", - "couch", - "cabinet", - "shelf", - "desk", - "office chair", - "bed", - "pillow", - "sink", - "picture", - "window", - "toilet", - "bookshelf", - "monitor", - "curtain", - "book", - "armchair", - "coffee table", - "box", - "refrigerator", - "lamp", - "kitchen cabinet", - "towel", - "clothes", - "tv", - "nightstand", - "counter", - "dresser", - "stool", - "cushion", - "plant", - "ceiling", - "bathtub", - "end table", - "dining table", - "keyboard", - "bag", - "backpack", - "toilet paper", - "printer", - "tv stand", - "whiteboard", - "blanket", - "shower curtain", - "trash can", - "closet", - "stairs", - "microwave", - "stove", - "shoe", - "computer tower", - "bottle", - "bin", - "ottoman", - "bench", - "board", - "washing machine", - "mirror", - "copier", - "basket", - "sofa chair", - "file cabinet", - "fan", - "laptop", - "shower", - "paper", - "person", - "paper towel dispenser", - "oven", - "blinds", - "rack", - "plate", - "blackboard", - "piano", - "suitcase", - "rail", - "radiator", - "recycling bin", - "container", - "wardrobe", - "soap dispenser", - "telephone", - "bucket", - "clock", - "stand", - "light", - "laundry basket", - "pipe", - "clothes dryer", - "guitar", - "toilet paper holder", - "seat", - "speaker", - "column", - "bicycle", - "ladder", - "bathroom stall", - "shower wall", - "cup", - "jacket", - "storage bin", - "coffee maker", - "dishwasher", - "paper towel roll", - "machine", - "mat", - "windowsill", - "bar", - "toaster", - "bulletin board", - "ironing board", - "fireplace", - "soap dish", - "kitchen counter", - "doorframe", - "toilet paper dispenser", - "mini fridge", - "fire extinguisher", - "ball", - "hat", - "shower curtain rod", - "water cooler", - "paper cutter", - "tray", - "shower door", - "pillar", - "ledge", - "toaster oven", - "mouse", - "toilet seat cover dispenser", - "furniture", - "cart", - "storage container", - "scale", - "tissue box", - "light switch", - "crate", - "power outlet", - "decoration", - "sign", - "projector", - "closet door", - "vacuum cleaner", - "candle", - "plunger", - "stuffed animal", - "headphones", - "dish rack", - "broom", - "guitar case", - "range hood", - "dustpan", - "hair dryer", - "water bottle", - "handicap bar", - "purse", - "vent", - "shower floor", - "water pitcher", - "mailbox", - "bowl", - "paper bag", - "alarm clock", - "music stand", - "projector screen", - "divider", - "laundry detergent", - "bathroom counter", - "object", - "bathroom vanity", - "closet wall", - "laundry hamper", - "bathroom stall door", - "ceiling light", - "trash bin", - "dumbbell", - "stair rail", - "tube", - "bathroom cabinet", - "cd case", - "closet rod", - "coffee kettle", - "structure", - "shower head", - "keyboard piano", - "case of water bottles", - "coat rack", - "storage organizer", - "folded chair", - "fire alarm", - "power strip", - "calendar", - "poster", - "potted plant", - "luggage", - "mattress", -) - -SCANNET_COLOR_MAP_200 = { - 0: (0.0, 0.0, 0.0), - 1: (174.0, 199.0, 232.0), - 2: (188.0, 189.0, 34.0), - 3: (152.0, 223.0, 138.0), - 4: (255.0, 152.0, 150.0), - 5: (214.0, 39.0, 40.0), - 6: (91.0, 135.0, 229.0), - 7: (31.0, 119.0, 180.0), - 8: (229.0, 91.0, 104.0), - 9: (247.0, 182.0, 210.0), - 10: (91.0, 229.0, 110.0), - 11: (255.0, 187.0, 120.0), - 13: (141.0, 91.0, 229.0), - 14: (112.0, 128.0, 144.0), - 15: (196.0, 156.0, 148.0), - 16: (197.0, 176.0, 213.0), - 17: (44.0, 160.0, 44.0), - 18: (148.0, 103.0, 189.0), - 19: (229.0, 91.0, 223.0), - 21: (219.0, 219.0, 141.0), - 22: (192.0, 229.0, 91.0), - 23: (88.0, 218.0, 137.0), - 24: (58.0, 98.0, 137.0), - 26: (177.0, 82.0, 239.0), - 27: (255.0, 127.0, 14.0), - 28: (237.0, 204.0, 37.0), - 29: (41.0, 206.0, 32.0), - 31: (62.0, 143.0, 148.0), - 32: (34.0, 14.0, 130.0), - 33: (143.0, 45.0, 115.0), - 34: (137.0, 63.0, 14.0), - 35: (23.0, 190.0, 207.0), - 36: (16.0, 212.0, 139.0), - 38: (90.0, 119.0, 201.0), - 39: (125.0, 30.0, 141.0), - 40: (150.0, 53.0, 56.0), - 41: (186.0, 197.0, 62.0), - 42: (227.0, 119.0, 194.0), - 44: (38.0, 100.0, 128.0), - 45: (120.0, 31.0, 243.0), - 46: (154.0, 59.0, 103.0), - 47: (169.0, 137.0, 78.0), - 48: (143.0, 245.0, 111.0), - 49: (37.0, 230.0, 205.0), - 50: (14.0, 16.0, 155.0), - 51: (196.0, 51.0, 182.0), - 52: (237.0, 80.0, 38.0), - 54: (138.0, 175.0, 62.0), - 55: (158.0, 218.0, 229.0), - 56: (38.0, 96.0, 167.0), - 57: (190.0, 77.0, 246.0), - 58: (208.0, 49.0, 84.0), - 59: (208.0, 193.0, 72.0), - 62: (55.0, 220.0, 57.0), - 63: (10.0, 125.0, 140.0), - 64: (76.0, 38.0, 202.0), - 65: (191.0, 28.0, 135.0), - 66: (211.0, 120.0, 42.0), - 67: (118.0, 174.0, 76.0), - 68: (17.0, 242.0, 171.0), - 69: (20.0, 65.0, 247.0), - 70: (208.0, 61.0, 222.0), - 71: (162.0, 62.0, 60.0), - 72: (210.0, 235.0, 62.0), - 73: (45.0, 152.0, 72.0), - 74: (35.0, 107.0, 149.0), - 75: (160.0, 89.0, 237.0), - 76: (227.0, 56.0, 125.0), - 77: (169.0, 143.0, 81.0), - 78: (42.0, 143.0, 20.0), - 79: (25.0, 160.0, 151.0), - 80: (82.0, 75.0, 227.0), - 82: (253.0, 59.0, 222.0), - 84: (240.0, 130.0, 89.0), - 86: (123.0, 172.0, 47.0), - 87: (71.0, 194.0, 133.0), - 88: (24.0, 94.0, 205.0), - 89: (134.0, 16.0, 179.0), - 90: (159.0, 32.0, 52.0), - 93: (213.0, 208.0, 88.0), - 95: (64.0, 158.0, 70.0), - 96: (18.0, 163.0, 194.0), - 97: (65.0, 29.0, 153.0), - 98: (177.0, 10.0, 109.0), - 99: (152.0, 83.0, 7.0), - 100: (83.0, 175.0, 30.0), - 101: (18.0, 199.0, 153.0), - 102: (61.0, 81.0, 208.0), - 103: (213.0, 85.0, 216.0), - 104: (170.0, 53.0, 42.0), - 105: (161.0, 192.0, 38.0), - 106: (23.0, 241.0, 91.0), - 107: (12.0, 103.0, 170.0), - 110: (151.0, 41.0, 245.0), - 112: (133.0, 51.0, 80.0), - 115: (184.0, 162.0, 91.0), - 116: (50.0, 138.0, 38.0), - 118: (31.0, 237.0, 236.0), - 120: (39.0, 19.0, 208.0), - 121: (223.0, 27.0, 180.0), - 122: (254.0, 141.0, 85.0), - 125: (97.0, 144.0, 39.0), - 128: (106.0, 231.0, 176.0), - 130: (12.0, 61.0, 162.0), - 131: (124.0, 66.0, 140.0), - 132: (137.0, 66.0, 73.0), - 134: (250.0, 253.0, 26.0), - 136: (55.0, 191.0, 73.0), - 138: (60.0, 126.0, 146.0), - 139: (153.0, 108.0, 234.0), - 140: (184.0, 58.0, 125.0), - 141: (135.0, 84.0, 14.0), - 145: (139.0, 248.0, 91.0), - 148: (53.0, 200.0, 172.0), - 154: (63.0, 69.0, 134.0), - 155: (190.0, 75.0, 186.0), - 156: (127.0, 63.0, 52.0), - 157: (141.0, 182.0, 25.0), - 159: (56.0, 144.0, 89.0), - 161: (64.0, 160.0, 250.0), - 163: (182.0, 86.0, 245.0), - 165: (139.0, 18.0, 53.0), - 166: (134.0, 120.0, 54.0), - 168: (49.0, 165.0, 42.0), - 169: (51.0, 128.0, 133.0), - 170: (44.0, 21.0, 163.0), - 177: (232.0, 93.0, 193.0), - 180: (176.0, 102.0, 54.0), - 185: (116.0, 217.0, 17.0), - 188: (54.0, 209.0, 150.0), - 191: (60.0, 99.0, 204.0), - 193: (129.0, 43.0, 144.0), - 195: (252.0, 100.0, 106.0), - 202: (187.0, 196.0, 73.0), - 208: (13.0, 158.0, 40.0), - 213: (52.0, 122.0, 152.0), - 214: (128.0, 76.0, 202.0), - 221: (187.0, 50.0, 115.0), - 229: (180.0, 141.0, 71.0), - 230: (77.0, 208.0, 35.0), - 232: (72.0, 183.0, 168.0), - 233: (97.0, 99.0, 203.0), - 242: (172.0, 22.0, 158.0), - 250: (155.0, 64.0, 40.0), - 261: (118.0, 159.0, 30.0), - 264: (69.0, 252.0, 148.0), - 276: (45.0, 103.0, 173.0), - 283: (111.0, 38.0, 149.0), - 286: (184.0, 9.0, 49.0), - 300: (188.0, 174.0, 67.0), - 304: (53.0, 206.0, 53.0), - 312: (97.0, 235.0, 252.0), - 323: (66.0, 32.0, 182.0), - 325: (236.0, 114.0, 195.0), - 331: (241.0, 154.0, 83.0), - 342: (133.0, 240.0, 52.0), - 356: (16.0, 205.0, 144.0), - 370: (75.0, 101.0, 198.0), - 392: (237.0, 95.0, 251.0), - 395: (191.0, 52.0, 49.0), - 399: (227.0, 254.0, 54.0), - 408: (49.0, 206.0, 87.0), - 417: (48.0, 113.0, 150.0), - 488: (125.0, 73.0, 182.0), - 540: (229.0, 32.0, 114.0), - 562: (158.0, 119.0, 28.0), - 570: (60.0, 205.0, 27.0), - 572: (18.0, 215.0, 201.0), - 581: (79.0, 76.0, 153.0), - 609: (134.0, 13.0, 116.0), - 748: (192.0, 97.0, 63.0), - 776: (108.0, 163.0, 18.0), - 1156: (95.0, 220.0, 156.0), - 1163: (98.0, 141.0, 208.0), - 1164: (144.0, 19.0, 193.0), - 1165: (166.0, 36.0, 57.0), - 1166: (212.0, 202.0, 34.0), - 1167: (23.0, 206.0, 34.0), - 1168: (91.0, 211.0, 236.0), - 1169: (79.0, 55.0, 137.0), - 1170: (182.0, 19.0, 117.0), - 1171: (134.0, 76.0, 14.0), - 1172: (87.0, 185.0, 28.0), - 1173: (82.0, 224.0, 187.0), - 1174: (92.0, 110.0, 214.0), - 1175: (168.0, 80.0, 171.0), - 1176: (197.0, 63.0, 51.0), - 1178: (175.0, 199.0, 77.0), - 1179: (62.0, 180.0, 98.0), - 1180: (8.0, 91.0, 150.0), - 1181: (77.0, 15.0, 130.0), - 1182: (154.0, 65.0, 96.0), - 1183: (197.0, 152.0, 11.0), - 1184: (59.0, 155.0, 45.0), - 1185: (12.0, 147.0, 145.0), - 1186: (54.0, 35.0, 219.0), - 1187: (210.0, 73.0, 181.0), - 1188: (221.0, 124.0, 77.0), - 1189: (149.0, 214.0, 66.0), - 1190: (72.0, 185.0, 134.0), - 1191: (42.0, 94.0, 198.0), -} - -# For instance segmentation the non-object categories -VALID_PANOPTIC_IDS = (1, 3) - -CLASS_LABELS_PANOPTIC = ("wall", "floor") diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_splits.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_splits.py deleted file mode 100644 index 39ccc3c..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_splits.py +++ /dev/null @@ -1,625 +0,0 @@ -# This file contains the HEAD - COMMON - TAIL split category ids for ScanNet 200 - -HEAD_CATS_SCANNET_200 = [ - "tv stand", - "curtain", - "blinds", - "shower curtain", - "bookshelf", - "tv", - "kitchen cabinet", - "pillow", - "lamp", - "dresser", - "monitor", - "object", - "ceiling", - "board", - "stove", - "closet wall", - "couch", - "office chair", - "kitchen counter", - "shower", - "closet", - "doorframe", - "sofa chair", - "mailbox", - "nightstand", - "washing machine", - "picture", - "book", - "sink", - "recycling bin", - "table", - "backpack", - "shower wall", - "toilet", - "copier", - "counter", - "stool", - "refrigerator", - "window", - "file cabinet", - "chair", - "wall", - "plant", - "coffee table", - "stairs", - "armchair", - "cabinet", - "bathroom vanity", - "bathroom stall", - "mirror", - "blackboard", - "trash can", - "stair rail", - "box", - "towel", - "door", - "clothes", - "whiteboard", - "bed", - "floor", - "bathtub", - "desk", - "wardrobe", - "clothes dryer", - "radiator", - "shelf", -] -COMMON_CATS_SCANNET_200 = [ - "cushion", - "end table", - "dining table", - "keyboard", - "bag", - "toilet paper", - "printer", - "blanket", - "microwave", - "shoe", - "computer tower", - "bottle", - "bin", - "ottoman", - "bench", - "basket", - "fan", - "laptop", - "person", - "paper towel dispenser", - "oven", - "rack", - "piano", - "suitcase", - "rail", - "container", - "telephone", - "stand", - "light", - "laundry basket", - "pipe", - "seat", - "column", - "bicycle", - "ladder", - "jacket", - "storage bin", - "coffee maker", - "dishwasher", - "machine", - "mat", - "windowsill", - "bulletin board", - "fireplace", - "mini fridge", - "water cooler", - "shower door", - "pillar", - "ledge", - "furniture", - "cart", - "decoration", - "closet door", - "vacuum cleaner", - "dish rack", - "range hood", - "projector screen", - "divider", - "bathroom counter", - "laundry hamper", - "bathroom stall door", - "ceiling light", - "trash bin", - "bathroom cabinet", - "structure", - "storage organizer", - "potted plant", - "mattress", -] -TAIL_CATS_SCANNET_200 = [ - "paper", - "plate", - "soap dispenser", - "bucket", - "clock", - "guitar", - "toilet paper holder", - "speaker", - "cup", - "paper towel roll", - "bar", - "toaster", - "ironing board", - "soap dish", - "toilet paper dispenser", - "fire extinguisher", - "ball", - "hat", - "shower curtain rod", - "paper cutter", - "tray", - "toaster oven", - "mouse", - "toilet seat cover dispenser", - "storage container", - "scale", - "tissue box", - "light switch", - "crate", - "power outlet", - "sign", - "projector", - "candle", - "plunger", - "stuffed animal", - "headphones", - "broom", - "guitar case", - "dustpan", - "hair dryer", - "water bottle", - "handicap bar", - "purse", - "vent", - "shower floor", - "water pitcher", - "bowl", - "paper bag", - "alarm clock", - "music stand", - "laundry detergent", - "dumbbell", - "tube", - "cd case", - "closet rod", - "coffee kettle", - "shower head", - "keyboard piano", - "case of water bottles", - "coat rack", - "folded chair", - "fire alarm", - "power strip", - "calendar", - "poster", - "luggage", -] - - -# Given the different size of the official train and val sets, not all ScanNet200 categories are present in the validation set. -# Here we list of categories with labels and IDs present in both train and validation set, and the remaining categories those are present in train, but not in val -# We dont evaluate on unseen validation categories in this benchmark - -VALID_CLASS_IDS_200_VALIDATION = ( - "wall", - "chair", - "floor", - "table", - "door", - "couch", - "cabinet", - "shelf", - "desk", - "office chair", - "bed", - "pillow", - "sink", - "picture", - "window", - "toilet", - "bookshelf", - "monitor", - "curtain", - "book", - "armchair", - "coffee table", - "box", - "refrigerator", - "lamp", - "kitchen cabinet", - "towel", - "clothes", - "tv", - "nightstand", - "counter", - "dresser", - "stool", - "cushion", - "plant", - "ceiling", - "bathtub", - "end table", - "dining table", - "keyboard", - "bag", - "backpack", - "toilet paper", - "printer", - "tv stand", - "whiteboard", - "blanket", - "shower curtain", - "trash can", - "closet", - "stairs", - "microwave", - "stove", - "shoe", - "computer tower", - "bottle", - "bin", - "ottoman", - "bench", - "board", - "washing machine", - "mirror", - "copier", - "basket", - "sofa chair", - "file cabinet", - "fan", - "laptop", - "shower", - "paper", - "person", - "paper towel dispenser", - "oven", - "blinds", - "rack", - "plate", - "blackboard", - "piano", - "suitcase", - "rail", - "radiator", - "recycling bin", - "container", - "wardrobe", - "soap dispenser", - "telephone", - "bucket", - "clock", - "stand", - "light", - "laundry basket", - "pipe", - "clothes dryer", - "guitar", - "toilet paper holder", - "seat", - "speaker", - "column", - "ladder", - "bathroom stall", - "shower wall", - "cup", - "jacket", - "storage bin", - "coffee maker", - "dishwasher", - "paper towel roll", - "machine", - "mat", - "windowsill", - "bar", - "toaster", - "bulletin board", - "ironing board", - "fireplace", - "soap dish", - "kitchen counter", - "doorframe", - "toilet paper dispenser", - "mini fridge", - "fire extinguisher", - "ball", - "hat", - "shower curtain rod", - "water cooler", - "paper cutter", - "tray", - "shower door", - "pillar", - "ledge", - "toaster oven", - "mouse", - "toilet seat cover dispenser", - "furniture", - "cart", - "scale", - "tissue box", - "light switch", - "crate", - "power outlet", - "decoration", - "sign", - "projector", - "closet door", - "vacuum cleaner", - "plunger", - "stuffed animal", - "headphones", - "dish rack", - "broom", - "range hood", - "dustpan", - "hair dryer", - "water bottle", - "handicap bar", - "vent", - "shower floor", - "water pitcher", - "mailbox", - "bowl", - "paper bag", - "projector screen", - "divider", - "laundry detergent", - "bathroom counter", - "object", - "bathroom vanity", - "closet wall", - "laundry hamper", - "bathroom stall door", - "ceiling light", - "trash bin", - "dumbbell", - "stair rail", - "tube", - "bathroom cabinet", - "closet rod", - "coffee kettle", - "shower head", - "keyboard piano", - "case of water bottles", - "coat rack", - "folded chair", - "fire alarm", - "power strip", - "calendar", - "poster", - "potted plant", - "mattress", -) - -CLASS_LABELS_200_VALIDATION = ( - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 21, - 22, - 23, - 24, - 26, - 27, - 28, - 29, - 31, - 32, - 33, - 34, - 35, - 36, - 38, - 39, - 40, - 41, - 42, - 44, - 45, - 46, - 47, - 48, - 49, - 50, - 51, - 52, - 54, - 55, - 56, - 57, - 58, - 59, - 62, - 63, - 64, - 65, - 66, - 67, - 68, - 69, - 70, - 71, - 72, - 73, - 74, - 75, - 76, - 77, - 78, - 79, - 80, - 82, - 84, - 86, - 87, - 88, - 89, - 90, - 93, - 95, - 96, - 97, - 98, - 99, - 100, - 101, - 102, - 103, - 104, - 105, - 106, - 107, - 110, - 112, - 115, - 116, - 118, - 120, - 122, - 125, - 128, - 130, - 131, - 132, - 134, - 136, - 138, - 139, - 140, - 141, - 145, - 148, - 154, - 155, - 156, - 157, - 159, - 161, - 163, - 165, - 166, - 168, - 169, - 170, - 177, - 180, - 185, - 188, - 191, - 193, - 195, - 202, - 208, - 213, - 214, - 229, - 230, - 232, - 233, - 242, - 250, - 261, - 264, - 276, - 283, - 300, - 304, - 312, - 323, - 325, - 342, - 356, - 370, - 392, - 395, - 408, - 417, - 488, - 540, - 562, - 570, - 609, - 748, - 776, - 1156, - 1163, - 1164, - 1165, - 1166, - 1167, - 1168, - 1169, - 1170, - 1171, - 1172, - 1173, - 1175, - 1176, - 1179, - 1180, - 1181, - 1182, - 1184, - 1185, - 1186, - 1187, - 1188, - 1189, - 1191, -) - -VALID_CLASS_IDS_200_TRAIN_ONLY = ( - "bicycle", - "storage container", - "candle", - "guitar case", - "purse", - "alarm clock", - "music stand", - "cd case", - "structure", - "storage organizer", - "luggage", -) - -CLASS_LABELS_200_TRAIN_ONLY = ( - 121, - 221, - 286, - 331, - 399, - 572, - 581, - 1174, - 1178, - 1183, - 1190, -) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet_means.npz b/point_transformer_v3/pointcept_minimal/pointcept/datasets/preprocessing/scannet/meta_data/scannet_means.npz deleted file mode 100644 index e57647c9a3553ca4653a9d1e53ed4a2a58def822..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 676 zcmWIWW@Zs#fB;2?qMfn(4VV}hK$sIKm{?R4Z=jb~P&wHz)HfiKk)e#CT0JGTIJrpO zNVaPx7@2E}8$<-lIU?aSr=G`p*2`D_wHFi=4Ad6?Z2wg7 zT-4wBkL}%0{@(Oq&0~8dx7n)%|2(tb8@ImvNANfMQmeV|P8j~M=W9QlxM}k%d#067 zUa&{JxBoLix428>qy623b*tSSpV$lOrYR*cKeT7o{Lg!y@3DP|USJPD!yo(h`{xDw z&%CprZTw4G*5JAQt*;YQe#U*W=YO@prfcCV`xdJe0b(aV*c%%g_q#Iuwy*#AW9BTL z)ApCn)_;q8earr|Y)f&z;Y0i57fRxnOMSNohP6OR)Ia;sqByq**NgVcY$r}^ulQi! zH{+X)O!+sExn{*IZ|vWC+dbI9_RT)O$!tdV_b2wkTMovrc=pTwslw+=kseR&J0DEc zt3CM4-oiOsUFyUa`~8c<*;F6ivQO-)nYPC1hW(a21J<3af9!2LeYE%ddt~pgbh)GX z^&@*-e 0 else 0 - label_id200 = labels_pd[labels_pd["raw_category"] == label]["id"] - label_id200 = int(label_id200.iloc[0]) if len(label_id200) > 0 else 0 - - # Only store for the valid categories - if label_id20 in CLASS_IDS20: - label_id20 = CLASS_IDS20.index(label_id20) - else: - label_id20 = IGNORE_INDEX - - if label_id200 in CLASS_IDS200: - label_id200 = CLASS_IDS200.index(label_id200) - else: - label_id200 = IGNORE_INDEX - - # get points, where segment indices (points labelled with segment ids) are in the group segment list - point_idx = np.where(np.isin(seg_indices, group_segments))[0] - return point_idx, label_id20, label_id200 - - -def face_normal(vertex, face): - v01 = vertex[face[:, 1]] - vertex[face[:, 0]] - v02 = vertex[face[:, 2]] - vertex[face[:, 0]] - vec = np.cross(v01, v02) - length = np.sqrt(np.sum(vec**2, axis=1, keepdims=True)) + 1.0e-8 - nf = vec / length - area = length * 0.5 - return nf, area - - -def vertex_normal(vertex, face): - nf, area = face_normal(vertex, face) - nf = nf * area - - nv = np.zeros_like(vertex) - for i in range(face.shape[0]): - nv[face[i]] += nf[i] - - length = np.sqrt(np.sum(nv**2, axis=1, keepdims=True)) + 1.0e-8 - nv = nv / length - return nv - - -def handle_process( - scene_path, output_path, labels_pd, train_scenes, val_scenes, parse_normals=True -): - scene_id = os.path.basename(scene_path) - mesh_path = os.path.join(scene_path, f"{scene_id}{CLOUD_FILE_PFIX}.ply") - segments_file = os.path.join( - scene_path, f"{scene_id}{CLOUD_FILE_PFIX}{SEGMENTS_FILE_PFIX}" - ) - aggregations_file = os.path.join(scene_path, f"{scene_id}{AGGREGATIONS_FILE_PFIX}") - info_file = os.path.join(scene_path, f"{scene_id}.txt") - - if scene_id in train_scenes: - output_path = os.path.join(output_path, "train", f"{scene_id}") - split_name = "train" - elif scene_id in val_scenes: - output_path = os.path.join(output_path, "val", f"{scene_id}") - split_name = "val" - else: - output_path = os.path.join(output_path, "test", f"{scene_id}") - split_name = "test" - - print(f"Processing: {scene_id} in {split_name}") - - vertices, faces = read_plymesh(mesh_path) - coords = vertices[:, :3] - colors = vertices[:, 3:6] - save_dict = dict( - coord=coords.astype(np.float32), - color=colors.astype(np.uint8), - ) - - # # Rotating the mesh to axis aligned - # info_dict = {} - # with open(info_file) as f: - # for line in f: - # (key, val) = line.split(" = ") - # info_dict[key] = np.fromstring(val, sep=' ') - # - # if 'axisAlignment' not in info_dict: - # rot_matrix = np.identity(4) - # else: - # rot_matrix = info_dict['axisAlignment'].reshape(4, 4) - # r_coords = coords.transpose() - # r_coords = np.append(r_coords, np.ones((1, r_coords.shape[1])), axis=0) - # r_coords = np.dot(rot_matrix, r_coords) - # coords = r_coords - - # Parse Normals - if parse_normals: - save_dict["normal"] = vertex_normal(coords, faces).astype(np.float32) - - # Load segments file - if split_name != "test": - with open(segments_file) as f: - segments = json.load(f) - seg_indices = np.array(segments["segIndices"]) - - # Load Aggregations file - with open(aggregations_file) as f: - aggregation = json.load(f) - seg_groups = np.array(aggregation["segGroups"]) - - # Generate new labels - semantic_gt20 = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX - semantic_gt200 = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX - instance_ids = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX - for group in seg_groups: - point_idx, label_id20, label_id200 = point_indices_from_group( - seg_indices, group, labels_pd - ) - - semantic_gt20[point_idx] = label_id20 - semantic_gt200[point_idx] = label_id200 - instance_ids[point_idx] = group["id"] - - semantic_gt20 = semantic_gt20.astype(int) - semantic_gt200 = semantic_gt200.astype(int) - instance_ids = instance_ids.astype(int) - - save_dict["segment20"] = semantic_gt20 - save_dict["segment200"] = semantic_gt200 - save_dict["instance"] = instance_ids - - # Concatenate with original cloud - processed_vertices = np.hstack((semantic_gt200, instance_ids)) - - if np.any(np.isnan(processed_vertices)) or not np.all( - np.isfinite(processed_vertices) - ): - raise ValueError(f"Find NaN in Scene: {scene_id}") - - # Save processed data - os.makedirs(output_path, exist_ok=True) - for key in save_dict.keys(): - np.save(os.path.join(output_path, f"{key}.npy"), save_dict[key]) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--dataset_root", - required=True, - help="Path to the ScanNet dataset containing scene folders", - ) - parser.add_argument( - "--output_root", - required=True, - help="Output path where train/val folders will be located", - ) - parser.add_argument( - "--parse_normals", default=True, type=bool, help="Whether parse point normals" - ) - parser.add_argument( - "--num_workers", - default=mp.cpu_count(), - type=int, - help="Num workers for preprocessing.", - ) - config = parser.parse_args() - meta_root = Path(os.path.dirname(__file__)) / "meta_data" - - # Load label map - labels_pd = pd.read_csv( - meta_root / "scannetv2-labels.combined.tsv", - sep="\t", - header=0, - ) - - # Load train/val splits - with open(meta_root / "scannetv2_train.txt") as train_file: - train_scenes = train_file.read().splitlines() - with open(meta_root / "scannetv2_val.txt") as val_file: - val_scenes = val_file.read().splitlines() - - # Create output directories - train_output_dir = os.path.join(config.output_root, "train") - os.makedirs(train_output_dir, exist_ok=True) - val_output_dir = os.path.join(config.output_root, "val") - os.makedirs(val_output_dir, exist_ok=True) - test_output_dir = os.path.join(config.output_root, "test") - os.makedirs(test_output_dir, exist_ok=True) - - # Load scene paths - scene_paths = sorted(glob.glob(config.dataset_root + "/scans*/scene*")) - - # Preprocess data. - print("Processing scenes...") - pool = ProcessPoolExecutor(max_workers=config.num_workers) - _ = list( - pool.map( - handle_process, - scene_paths, - repeat(config.output_root), - repeat(labels_pd), - repeat(train_scenes), - repeat(val_scenes), - repeat(config.parse_normals), - ) - ) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/scannet.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/scannet.py deleted file mode 100644 index 35d4606..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/scannet.py +++ /dev/null @@ -1,118 +0,0 @@ -""" -ScanNet20 / ScanNet200 / ScanNet Data Efficient Dataset - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import os -import glob -import numpy as np -import torch -from copy import deepcopy -from torch.utils.data import Dataset -from collections.abc import Sequence - -from pointcept.utils.logger import get_root_logger -from pointcept.utils.cache import shared_dict -from .builder import DATASETS -from .defaults import DefaultDataset -from .transform import Compose, TRANSFORMS -from .preprocessing.scannet.meta_data.scannet200_constants import ( - VALID_CLASS_IDS_20, - VALID_CLASS_IDS_200, -) - - -@DATASETS.register_module() -class ScanNetDataset(DefaultDataset): - VALID_ASSETS = [ - "coord", - "color", - "normal", - "segment20", - "instance", - ] - class2id = np.array(VALID_CLASS_IDS_20) - - def __init__( - self, - lr_file=None, - la_file=None, - **kwargs, - ): - self.lr = np.loadtxt(lr_file, dtype=str) if lr_file is not None else None - self.la = torch.load(la_file) if la_file is not None else None - super().__init__(**kwargs) - - def get_data_list(self): - if self.lr is None: - data_list = super().get_data_list() - else: - data_list = [ - os.path.join(self.data_root, "train", name) for name in self.lr - ] - return data_list - - def get_data(self, idx): - data_path = self.data_list[idx % len(self.data_list)] - name = self.get_data_name(idx) - split = self.get_split_name(idx) - if self.cache: - cache_name = f"pointcept-{name}" - return shared_dict(cache_name) - - data_dict = {} - assets = os.listdir(data_path) - for asset in assets: - if not asset.endswith(".npy"): - continue - if asset[:-4] not in self.VALID_ASSETS: - continue - data_dict[asset[:-4]] = np.load(os.path.join(data_path, asset)) - data_dict["name"] = name - data_dict["split"] = split - data_dict["coord"] = data_dict["coord"].astype(np.float32) - data_dict["color"] = data_dict["color"].astype(np.float32) - data_dict["normal"] = data_dict["normal"].astype(np.float32) - - if "segment20" in data_dict.keys(): - data_dict["segment"] = ( - data_dict.pop("segment20").reshape([-1]).astype(np.int32) - ) - elif "segment200" in data_dict.keys(): - data_dict["segment"] = ( - data_dict.pop("segment200").reshape([-1]).astype(np.int32) - ) - else: - data_dict["segment"] = ( - np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 - ) - - if "instance" in data_dict.keys(): - data_dict["instance"] = ( - data_dict.pop("instance").reshape([-1]).astype(np.int32) - ) - else: - data_dict["instance"] = ( - np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 - ) - if self.la: - sampled_index = self.la[self.get_data_name(idx)] - mask = np.ones_like(data_dict["segment"], dtype=bool) - mask[sampled_index] = False - data_dict["segment"][mask] = self.ignore_index - data_dict["sampled_index"] = sampled_index - return data_dict - - -@DATASETS.register_module() -class ScanNet200Dataset(ScanNetDataset): - VALID_ASSETS = [ - "coord", - "color", - "normal", - "segment200", - "instance", - ] - class2id = np.array(VALID_CLASS_IDS_200) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/transform.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/transform.py deleted file mode 100644 index 8073c76..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/transform.py +++ /dev/null @@ -1,1457 +0,0 @@ -""" -3D point cloud augmentation - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Yujia Zhang (yujia.zhang.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import random -import numbers -import scipy -import scipy.ndimage -import scipy.interpolate -import scipy.stats -import numpy as np -import torch -from torchvision import transforms -import copy -from collections.abc import Sequence, Mapping -from pointcept.utils.registry import Registry - -TRANSFORMS = Registry("transforms") - - -def index_operator(data_dict, index, duplicate=False): - # index selection operator for keys in "index_valid_keys" - # custom these keys by "Update" transform in config - if "index_valid_keys" not in data_dict: - data_dict["index_valid_keys"] = [ - "coord", - "color", - "normal", - "superpoint", - "strength", - "segment", - "instance", - ] - if not duplicate: - for key in data_dict["index_valid_keys"]: - if key in data_dict: - data_dict[key] = data_dict[key][index] - return data_dict - else: - data_dict_ = dict() - for key in data_dict.keys(): - if key in data_dict["index_valid_keys"]: - data_dict_[key] = data_dict[key][index] - elif key == "index_valid_keys": - data_dict_[key] = copy.copy(data_dict[key]) - else: - data_dict_[key] = data_dict[key] - return data_dict_ - - -@TRANSFORMS.register_module() -class Collect(object): - def __init__(self, keys, offset_keys_dict=None, **kwargs): - """ - e.g. Collect(keys=[coord], feat_keys=[coord, color]) - """ - if offset_keys_dict is None: - offset_keys_dict = dict(offset="coord") - self.keys = keys - self.offset_keys = offset_keys_dict - self.kwargs = kwargs - - def __call__(self, data_dict): - data = dict() - if isinstance(self.keys, str): - self.keys = [self.keys] - for key in self.keys: - data[key] = data_dict[key] - for key, value in self.offset_keys.items(): - data[key] = torch.tensor([data_dict[value].shape[0]]) - for name, keys in self.kwargs.items(): - name = name.replace("_keys", "") - assert isinstance(keys, Sequence) - data[name] = torch.cat([data_dict[key].float() for key in keys], dim=1) - return data - - -@TRANSFORMS.register_module() -class Copy(object): - def __init__(self, keys_dict=None): - if keys_dict is None: - keys_dict = dict(coord="origin_coord", segment="origin_segment") - self.keys_dict = keys_dict - - def __call__(self, data_dict): - for key, value in self.keys_dict.items(): - if isinstance(data_dict[key], np.ndarray): - data_dict[value] = data_dict[key].copy() - elif isinstance(data_dict[key], torch.Tensor): - data_dict[value] = data_dict[key].clone().detach() - else: - data_dict[value] = copy.deepcopy(data_dict[key]) - return data_dict - - -@TRANSFORMS.register_module() -class Update(object): - def __init__(self, keys_dict=None): - if keys_dict is None: - keys_dict = dict() - self.keys_dict = keys_dict - - def __call__(self, data_dict): - for key, value in self.keys_dict.items(): - data_dict[key] = value - return data_dict - - -@TRANSFORMS.register_module() -class ToTensor(object): - def __call__(self, data): - if isinstance(data, torch.Tensor): - return data - elif isinstance(data, str): - # note that str is also a kind of sequence, judgement should before sequence - return data - elif isinstance(data, int): - return torch.LongTensor([data]) - elif isinstance(data, float): - return torch.FloatTensor([data]) - elif isinstance(data, np.ndarray) and np.issubdtype(data.dtype, bool): - return torch.from_numpy(data) - elif isinstance(data, np.ndarray) and np.issubdtype(data.dtype, np.integer): - return torch.from_numpy(data).long() - elif isinstance(data, np.ndarray) and np.issubdtype(data.dtype, np.floating): - return torch.from_numpy(data).float() - elif isinstance(data, Mapping): - result = {sub_key: self(item) for sub_key, item in data.items()} - return result - elif isinstance(data, Sequence): - result = [self(item) for item in data] - return result - else: - raise TypeError(f"type {type(data)} cannot be converted to tensor.") - - -@TRANSFORMS.register_module() -class NormalizeColor(object): - def __call__(self, data_dict): - if "color" in data_dict.keys(): - data_dict["color"] = data_dict["color"] / 255 - return data_dict - - -@TRANSFORMS.register_module() -class NormalizeCoord(object): - def __call__(self, data_dict): - if "coord" in data_dict.keys(): - # modified from pointnet2 - centroid = np.mean(data_dict["coord"], axis=0) - data_dict["coord"] -= centroid - m = np.max(np.sqrt(np.sum(data_dict["coord"] ** 2, axis=1))) - data_dict["coord"] = data_dict["coord"] / m - return data_dict - - -@TRANSFORMS.register_module() -class PositiveShift(object): - def __call__(self, data_dict): - if "coord" in data_dict.keys(): - coord_min = np.min(data_dict["coord"], 0) - data_dict["coord"] -= coord_min - return data_dict - - -@TRANSFORMS.register_module() -class CenterShift(object): - def __init__(self, apply_z=True): - self.apply_z = apply_z - - def __call__(self, data_dict): - if "coord" in data_dict.keys(): - x_min, y_min, z_min = data_dict["coord"].min(axis=0) - x_max, y_max, _ = data_dict["coord"].max(axis=0) - if self.apply_z: - shift = [(x_min + x_max) / 2, (y_min + y_max) / 2, z_min] - else: - shift = [(x_min + x_max) / 2, (y_min + y_max) / 2, 0] - data_dict["coord"] -= shift - return data_dict - - -@TRANSFORMS.register_module() -class RandomShift(object): - def __init__(self, shift=((-0.2, 0.2), (-0.2, 0.2), (0, 0))): - self.shift = shift - - def __call__(self, data_dict): - if "coord" in data_dict.keys(): - shift_x = np.random.uniform(self.shift[0][0], self.shift[0][1]) - shift_y = np.random.uniform(self.shift[1][0], self.shift[1][1]) - shift_z = np.random.uniform(self.shift[2][0], self.shift[2][1]) - data_dict["coord"] += [shift_x, shift_y, shift_z] - return data_dict - - -@TRANSFORMS.register_module() -class PointClip(object): - def __init__(self, point_cloud_range=(-80, -80, -3, 80, 80, 1)): - self.point_cloud_range = point_cloud_range - - def __call__(self, data_dict): - if "coord" in data_dict.keys(): - data_dict["coord"] = np.clip( - data_dict["coord"], - a_min=self.point_cloud_range[:3], - a_max=self.point_cloud_range[3:], - ) - return data_dict - - -@TRANSFORMS.register_module() -class RandomDropout(object): - def __init__(self, dropout_ratio=0.2, dropout_application_ratio=0.5): - """ - upright_axis: axis index among x,y,z, i.e. 2 for z - """ - self.dropout_ratio = dropout_ratio - self.dropout_application_ratio = dropout_application_ratio - - def __call__(self, data_dict): - if random.random() < self.dropout_application_ratio: - n = len(data_dict["coord"]) - idx = np.random.choice(n, int(n * (1 - self.dropout_ratio)), replace=False) - if "sampled_index" in data_dict: - # for ScanNet data efficient, we need to make sure labeled point is sampled. - idx = np.unique(np.append(idx, data_dict["sampled_index"])) - mask = np.zeros_like(data_dict["segment"]).astype(bool) - mask[data_dict["sampled_index"]] = True - data_dict["sampled_index"] = np.where(mask[idx])[0] - data_dict = index_operator(data_dict, idx) - return data_dict - - -@TRANSFORMS.register_module() -class RandomRotate(object): - def __init__(self, angle=None, center=None, axis="z", always_apply=False, p=0.5): - self.angle = [-1, 1] if angle is None else angle - self.axis = axis - self.always_apply = always_apply - self.p = p if not self.always_apply else 1 - self.center = center - - def __call__(self, data_dict): - if random.random() > self.p: - return data_dict - angle = np.random.uniform(self.angle[0], self.angle[1]) * np.pi - rot_cos, rot_sin = np.cos(angle), np.sin(angle) - if self.axis == "x": - rot_t = np.array([[1, 0, 0], [0, rot_cos, -rot_sin], [0, rot_sin, rot_cos]]) - elif self.axis == "y": - rot_t = np.array([[rot_cos, 0, rot_sin], [0, 1, 0], [-rot_sin, 0, rot_cos]]) - elif self.axis == "z": - rot_t = np.array([[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0], [0, 0, 1]]) - else: - raise NotImplementedError - if "coord" in data_dict.keys(): - if self.center is None: - x_min, y_min, z_min = data_dict["coord"].min(axis=0) - x_max, y_max, z_max = data_dict["coord"].max(axis=0) - center = [(x_min + x_max) / 2, (y_min + y_max) / 2, (z_min + z_max) / 2] - else: - center = self.center - data_dict["coord"] -= center - data_dict["coord"] = np.dot(data_dict["coord"], np.transpose(rot_t)) - data_dict["coord"] += center - if "normal" in data_dict.keys(): - data_dict["normal"] = np.dot(data_dict["normal"], np.transpose(rot_t)) - return data_dict - - -@TRANSFORMS.register_module() -class RandomRotateTargetAngle(object): - def __init__( - self, angle=(1 / 2, 1, 3 / 2), center=None, axis="z", always_apply=False, p=0.75 - ): - self.angle = angle - self.axis = axis - self.always_apply = always_apply - self.p = p if not self.always_apply else 1 - self.center = center - - def __call__(self, data_dict): - if random.random() > self.p: - return data_dict - angle = np.random.choice(self.angle) * np.pi - rot_cos, rot_sin = np.cos(angle), np.sin(angle) - if self.axis == "x": - rot_t = np.array([[1, 0, 0], [0, rot_cos, -rot_sin], [0, rot_sin, rot_cos]]) - elif self.axis == "y": - rot_t = np.array([[rot_cos, 0, rot_sin], [0, 1, 0], [-rot_sin, 0, rot_cos]]) - elif self.axis == "z": - rot_t = np.array([[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0], [0, 0, 1]]) - else: - raise NotImplementedError - if "coord" in data_dict.keys(): - if self.center is None: - x_min, y_min, z_min = data_dict["coord"].min(axis=0) - x_max, y_max, z_max = data_dict["coord"].max(axis=0) - center = [(x_min + x_max) / 2, (y_min + y_max) / 2, (z_min + z_max) / 2] - else: - center = self.center - data_dict["coord"] -= center - data_dict["coord"] = np.dot(data_dict["coord"], np.transpose(rot_t)) - data_dict["coord"] += center - if "normal" in data_dict.keys(): - data_dict["normal"] = np.dot(data_dict["normal"], np.transpose(rot_t)) - return data_dict - - -@TRANSFORMS.register_module() -class RandomScale(object): - def __init__(self, scale=None, anisotropic=False): - self.scale = scale if scale is not None else [0.95, 1.05] - self.anisotropic = anisotropic - - def __call__(self, data_dict): - if "coord" in data_dict.keys(): - scale = np.random.uniform( - self.scale[0], self.scale[1], 3 if self.anisotropic else 1 - ) - data_dict["coord"] *= scale - return data_dict - - -@TRANSFORMS.register_module() -class RandomFlip(object): - def __init__(self, p=0.5): - self.p = p - - def __call__(self, data_dict): - if np.random.rand() < self.p: - if "coord" in data_dict.keys(): - data_dict["coord"][:, 0] = -data_dict["coord"][:, 0] - if "normal" in data_dict.keys(): - data_dict["normal"][:, 0] = -data_dict["normal"][:, 0] - if np.random.rand() < self.p: - if "coord" in data_dict.keys(): - data_dict["coord"][:, 1] = -data_dict["coord"][:, 1] - if "normal" in data_dict.keys(): - data_dict["normal"][:, 1] = -data_dict["normal"][:, 1] - return data_dict - - -@TRANSFORMS.register_module() -class RandomJitter(object): - def __init__(self, sigma=0.01, clip=0.05): - assert clip > 0 - self.sigma = sigma - self.clip = clip - - def __call__(self, data_dict): - if "coord" in data_dict.keys(): - jitter = np.clip( - self.sigma * np.random.randn(data_dict["coord"].shape[0], 3), - -self.clip, - self.clip, - ) - data_dict["coord"] += jitter - return data_dict - - -@TRANSFORMS.register_module() -class ClipGaussianJitter(object): - def __init__(self, scalar=0.02, store_jitter=False): - self.scalar = scalar - self.mean = np.mean(3) - self.cov = np.identity(3) - self.quantile = 1.96 - self.store_jitter = store_jitter - - def __call__(self, data_dict): - if "coord" in data_dict.keys(): - jitter = np.random.multivariate_normal( - self.mean, self.cov, data_dict["coord"].shape[0] - ) - jitter = self.scalar * np.clip(jitter / 1.96, -1, 1) - data_dict["coord"] += jitter - if self.store_jitter: - data_dict["jitter"] = jitter - return data_dict - - -@TRANSFORMS.register_module() -class ChromaticAutoContrast(object): - def __init__(self, p=0.2, blend_factor=None): - self.p = p - self.blend_factor = blend_factor - - def __call__(self, data_dict): - if "color" in data_dict.keys() and np.random.rand() < self.p: - lo = np.min(data_dict["color"], 0, keepdims=True) - hi = np.max(data_dict["color"], 0, keepdims=True) - scale = 255 / (hi - lo) - contrast_feat = (data_dict["color"][:, :3] - lo) * scale - blend_factor = ( - np.random.rand() if self.blend_factor is None else self.blend_factor - ) - data_dict["color"][:, :3] = (1 - blend_factor) * data_dict["color"][ - :, :3 - ] + blend_factor * contrast_feat - return data_dict - - -@TRANSFORMS.register_module() -class ChromaticTranslation(object): - def __init__(self, p=0.95, ratio=0.05): - self.p = p - self.ratio = ratio - - def __call__(self, data_dict): - if "color" in data_dict.keys() and np.random.rand() < self.p: - tr = (np.random.rand(1, 3) - 0.5) * 255 * 2 * self.ratio - data_dict["color"][:, :3] = np.clip(tr + data_dict["color"][:, :3], 0, 255) - return data_dict - - -@TRANSFORMS.register_module() -class ChromaticJitter(object): - def __init__(self, p=0.95, std=0.005): - self.p = p - self.std = std - - def __call__(self, data_dict): - if "color" in data_dict.keys() and np.random.rand() < self.p: - noise = np.random.randn(data_dict["color"].shape[0], 3) - noise *= self.std * 255 - data_dict["color"][:, :3] = np.clip( - noise + data_dict["color"][:, :3], 0, 255 - ) - return data_dict - - -@TRANSFORMS.register_module() -class RandomColorGrayScale(object): - def __init__(self, p): - self.p = p - - @staticmethod - def rgb_to_grayscale(color, num_output_channels=1): - if color.shape[-1] < 3: - raise TypeError( - "Input color should have at least 3 dimensions, but found {}".format( - color.shape[-1] - ) - ) - - if num_output_channels not in (1, 3): - raise ValueError("num_output_channels should be either 1 or 3") - - r, g, b = color[..., 0], color[..., 1], color[..., 2] - gray = (0.2989 * r + 0.587 * g + 0.114 * b).astype(color.dtype) - gray = np.expand_dims(gray, axis=-1) - - if num_output_channels == 3: - gray = np.broadcast_to(gray, color.shape) - - return gray - - def __call__(self, data_dict): - if np.random.rand() < self.p: - data_dict["color"] = self.rgb_to_grayscale(data_dict["color"], 3) - return data_dict - - -@TRANSFORMS.register_module() -class RandomColorJitter(object): - """ - Random Color Jitter for 3D point cloud (refer torchvision) - """ - - def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, p=0.95): - self.brightness = self._check_input(brightness, "brightness") - self.contrast = self._check_input(contrast, "contrast") - self.saturation = self._check_input(saturation, "saturation") - self.hue = self._check_input( - hue, "hue", center=0, bound=(-0.5, 0.5), clip_first_on_zero=False - ) - self.p = p - - @staticmethod - def _check_input( - value, name, center=1, bound=(0, float("inf")), clip_first_on_zero=True - ): - if isinstance(value, numbers.Number): - if value < 0: - raise ValueError( - "If {} is a single number, it must be non negative.".format(name) - ) - value = [center - float(value), center + float(value)] - if clip_first_on_zero: - value[0] = max(value[0], 0.0) - elif isinstance(value, (tuple, list)) and len(value) == 2: - if not bound[0] <= value[0] <= value[1] <= bound[1]: - raise ValueError("{} values should be between {}".format(name, bound)) - else: - raise TypeError( - "{} should be a single number or a list/tuple with length 2.".format( - name - ) - ) - - # if value is 0 or (1., 1.) for brightness/contrast/saturation - # or (0., 0.) for hue, do nothing - if value[0] == value[1] == center: - value = None - return value - - @staticmethod - def blend(color1, color2, ratio): - ratio = float(ratio) - bound = 255.0 - return ( - (ratio * color1 + (1.0 - ratio) * color2) - .clip(0, bound) - .astype(color1.dtype) - ) - - @staticmethod - def rgb2hsv(rgb): - r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2] - maxc = np.max(rgb, axis=-1) - minc = np.min(rgb, axis=-1) - eqc = maxc == minc - cr = maxc - minc - s = cr / (np.ones_like(maxc) * eqc + maxc * (1 - eqc)) - cr_divisor = np.ones_like(maxc) * eqc + cr * (1 - eqc) - rc = (maxc - r) / cr_divisor - gc = (maxc - g) / cr_divisor - bc = (maxc - b) / cr_divisor - - hr = (maxc == r) * (bc - gc) - hg = ((maxc == g) & (maxc != r)) * (2.0 + rc - bc) - hb = ((maxc != g) & (maxc != r)) * (4.0 + gc - rc) - h = hr + hg + hb - h = (h / 6.0 + 1.0) % 1.0 - return np.stack((h, s, maxc), axis=-1) - - @staticmethod - def hsv2rgb(hsv): - h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2] - i = np.floor(h * 6.0) - f = (h * 6.0) - i - i = i.astype(np.int32) - - p = np.clip((v * (1.0 - s)), 0.0, 1.0) - q = np.clip((v * (1.0 - s * f)), 0.0, 1.0) - t = np.clip((v * (1.0 - s * (1.0 - f))), 0.0, 1.0) - i = i % 6 - mask = np.expand_dims(i, axis=-1) == np.arange(6) - - a1 = np.stack((v, q, p, p, t, v), axis=-1) - a2 = np.stack((t, v, v, q, p, p), axis=-1) - a3 = np.stack((p, p, t, v, v, q), axis=-1) - a4 = np.stack((a1, a2, a3), axis=-1) - - return np.einsum("...na, ...nab -> ...nb", mask.astype(hsv.dtype), a4) - - def adjust_brightness(self, color, brightness_factor): - if brightness_factor < 0: - raise ValueError( - "brightness_factor ({}) is not non-negative.".format(brightness_factor) - ) - - return self.blend(color, np.zeros_like(color), brightness_factor) - - def adjust_contrast(self, color, contrast_factor): - if contrast_factor < 0: - raise ValueError( - "contrast_factor ({}) is not non-negative.".format(contrast_factor) - ) - mean = np.mean(RandomColorGrayScale.rgb_to_grayscale(color)) - return self.blend(color, mean, contrast_factor) - - def adjust_saturation(self, color, saturation_factor): - if saturation_factor < 0: - raise ValueError( - "saturation_factor ({}) is not non-negative.".format(saturation_factor) - ) - gray = RandomColorGrayScale.rgb_to_grayscale(color) - return self.blend(color, gray, saturation_factor) - - def adjust_hue(self, color, hue_factor): - if not (-0.5 <= hue_factor <= 0.5): - raise ValueError( - "hue_factor ({}) is not in [-0.5, 0.5].".format(hue_factor) - ) - orig_dtype = color.dtype - hsv = self.rgb2hsv(color / 255.0) - h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2] - h = (h + hue_factor) % 1.0 - hsv = np.stack((h, s, v), axis=-1) - color_hue_adj = (self.hsv2rgb(hsv) * 255.0).astype(orig_dtype) - return color_hue_adj - - @staticmethod - def get_params(brightness, contrast, saturation, hue): - fn_idx = torch.randperm(4) - b = ( - None - if brightness is None - else np.random.uniform(brightness[0], brightness[1]) - ) - c = None if contrast is None else np.random.uniform(contrast[0], contrast[1]) - s = ( - None - if saturation is None - else np.random.uniform(saturation[0], saturation[1]) - ) - h = None if hue is None else np.random.uniform(hue[0], hue[1]) - return fn_idx, b, c, s, h - - def __call__(self, data_dict): - ( - fn_idx, - brightness_factor, - contrast_factor, - saturation_factor, - hue_factor, - ) = self.get_params(self.brightness, self.contrast, self.saturation, self.hue) - - for fn_id in fn_idx: - if ( - fn_id == 0 - and brightness_factor is not None - and np.random.rand() < self.p - ): - data_dict["color"] = self.adjust_brightness( - data_dict["color"], brightness_factor - ) - elif ( - fn_id == 1 and contrast_factor is not None and np.random.rand() < self.p - ): - data_dict["color"] = self.adjust_contrast( - data_dict["color"], contrast_factor - ) - elif ( - fn_id == 2 - and saturation_factor is not None - and np.random.rand() < self.p - ): - data_dict["color"] = self.adjust_saturation( - data_dict["color"], saturation_factor - ) - elif fn_id == 3 and hue_factor is not None and np.random.rand() < self.p: - data_dict["color"] = self.adjust_hue(data_dict["color"], hue_factor) - return data_dict - - -@TRANSFORMS.register_module() -class HueSaturationTranslation(object): - @staticmethod - def rgb_to_hsv(rgb): - # Translated from source of colorsys.rgb_to_hsv - # r,g,b should be a numpy arrays with values between 0 and 255 - # rgb_to_hsv returns an array of floats between 0.0 and 1.0. - rgb = rgb.astype("float") - hsv = np.zeros_like(rgb) - # in case an RGBA array was passed, just copy the A channel - hsv[..., 3:] = rgb[..., 3:] - r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2] - maxc = np.max(rgb[..., :3], axis=-1) - minc = np.min(rgb[..., :3], axis=-1) - hsv[..., 2] = maxc - mask = maxc != minc - hsv[mask, 1] = (maxc - minc)[mask] / maxc[mask] - rc = np.zeros_like(r) - gc = np.zeros_like(g) - bc = np.zeros_like(b) - rc[mask] = (maxc - r)[mask] / (maxc - minc)[mask] - gc[mask] = (maxc - g)[mask] / (maxc - minc)[mask] - bc[mask] = (maxc - b)[mask] / (maxc - minc)[mask] - hsv[..., 0] = np.select( - [r == maxc, g == maxc], [bc - gc, 2.0 + rc - bc], default=4.0 + gc - rc - ) - hsv[..., 0] = (hsv[..., 0] / 6.0) % 1.0 - return hsv - - @staticmethod - def hsv_to_rgb(hsv): - # Translated from source of colorsys.hsv_to_rgb - # h,s should be a numpy arrays with values between 0.0 and 1.0 - # v should be a numpy array with values between 0.0 and 255.0 - # hsv_to_rgb returns an array of uints between 0 and 255. - rgb = np.empty_like(hsv) - rgb[..., 3:] = hsv[..., 3:] - h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2] - i = (h * 6.0).astype("uint8") - f = (h * 6.0) - i - p = v * (1.0 - s) - q = v * (1.0 - s * f) - t = v * (1.0 - s * (1.0 - f)) - i = i % 6 - conditions = [s == 0.0, i == 1, i == 2, i == 3, i == 4, i == 5] - rgb[..., 0] = np.select(conditions, [v, q, p, p, t, v], default=v) - rgb[..., 1] = np.select(conditions, [v, v, v, q, p, p], default=t) - rgb[..., 2] = np.select(conditions, [v, p, t, v, v, q], default=p) - return rgb.astype("uint8") - - def __init__(self, hue_max=0.5, saturation_max=0.2): - self.hue_max = hue_max - self.saturation_max = saturation_max - - def __call__(self, data_dict): - if "color" in data_dict.keys(): - # Assume color[:, :3] is rgb - hsv = HueSaturationTranslation.rgb_to_hsv(data_dict["color"][:, :3]) - hue_val = (np.random.rand() - 0.5) * 2 * self.hue_max - sat_ratio = 1 + (np.random.rand() - 0.5) * 2 * self.saturation_max - hsv[..., 0] = np.remainder(hue_val + hsv[..., 0] + 1, 1) - hsv[..., 1] = np.clip(sat_ratio * hsv[..., 1], 0, 1) - data_dict["color"][:, :3] = np.clip( - HueSaturationTranslation.hsv_to_rgb(hsv), 0, 255 - ) - return data_dict - - -@TRANSFORMS.register_module() -class RandomColorDrop(object): - def __init__(self, p=0.2, color_augment=0.0): - self.p = p - self.color_augment = color_augment - - def __call__(self, data_dict): - if "color" in data_dict.keys() and np.random.rand() < self.p: - data_dict["color"] *= self.color_augment - return data_dict - - def __repr__(self): - return "RandomColorDrop(color_augment: {}, p: {})".format( - self.color_augment, self.p - ) - - -@TRANSFORMS.register_module() -class ElasticDistortion(object): - def __init__(self, distortion_params=None): - self.distortion_params = ( - [[0.2, 0.4], [0.8, 1.6]] if distortion_params is None else distortion_params - ) - - @staticmethod - def elastic_distortion(coords, granularity, magnitude): - """ - Apply elastic distortion on sparse coordinate space. - pointcloud: numpy array of (number of points, at least 3 spatial dims) - granularity: size of the noise grid (in same scale[m/cm] as the voxel grid) - magnitude: noise multiplier - """ - blurx = np.ones((3, 1, 1, 1)).astype("float32") / 3 - blury = np.ones((1, 3, 1, 1)).astype("float32") / 3 - blurz = np.ones((1, 1, 3, 1)).astype("float32") / 3 - coords_min = coords.min(0) - - # Create Gaussian noise tensor of the size given by granularity. - noise_dim = ((coords - coords_min).max(0) // granularity).astype(int) + 3 - noise = np.random.randn(*noise_dim, 3).astype(np.float32) - - # Smoothing. - for _ in range(2): - noise = scipy.ndimage.filters.convolve( - noise, blurx, mode="constant", cval=0 - ) - noise = scipy.ndimage.filters.convolve( - noise, blury, mode="constant", cval=0 - ) - noise = scipy.ndimage.filters.convolve( - noise, blurz, mode="constant", cval=0 - ) - - # Trilinear interpolate noise filters for each spatial dimensions. - ax = [ - np.linspace(d_min, d_max, d) - for d_min, d_max, d in zip( - coords_min - granularity, - coords_min + granularity * (noise_dim - 2), - noise_dim, - ) - ] - interp = scipy.interpolate.RegularGridInterpolator( - ax, noise, bounds_error=False, fill_value=0 - ) - coords += interp(coords) * magnitude - return coords - - def __call__(self, data_dict): - if "coord" in data_dict.keys() and self.distortion_params is not None: - if random.random() < 0.95: - for granularity, magnitude in self.distortion_params: - data_dict["coord"] = self.elastic_distortion( - data_dict["coord"], granularity, magnitude - ) - return data_dict - - -@TRANSFORMS.register_module() -class GridSample(object): - def __init__( - self, - grid_size=0.05, - hash_type="fnv", - mode="train", - return_inverse=False, - return_grid_coord=False, - return_min_coord=False, - return_displacement=False, - project_displacement=False, - ): - self.grid_size = grid_size - self.hash = self.fnv_hash_vec if hash_type == "fnv" else self.ravel_hash_vec - assert mode in ["train", "test"] - self.mode = mode - self.return_inverse = return_inverse - self.return_grid_coord = return_grid_coord - self.return_min_coord = return_min_coord - self.return_displacement = return_displacement - self.project_displacement = project_displacement - - def __call__(self, data_dict): - assert "coord" in data_dict.keys() - scaled_coord = data_dict["coord"] / np.array(self.grid_size) - grid_coord = np.floor(scaled_coord).astype(int) - min_coord = grid_coord.min(0) - grid_coord -= min_coord - scaled_coord -= min_coord - min_coord = min_coord * np.array(self.grid_size) - key = self.hash(grid_coord) - idx_sort = np.argsort(key) - key_sort = key[idx_sort] - _, inverse, count = np.unique(key_sort, return_inverse=True, return_counts=True) - if self.mode == "train": # train mode - idx_select = ( - np.cumsum(np.insert(count, 0, 0)[0:-1]) - + np.random.randint(0, count.max(), count.size) % count - ) - idx_unique = idx_sort[idx_select] - if "sampled_index" in data_dict: - # for ScanNet data efficient, we need to make sure labeled point is sampled. - idx_unique = np.unique( - np.append(idx_unique, data_dict["sampled_index"]) - ) - mask = np.zeros_like(data_dict["segment"]).astype(bool) - mask[data_dict["sampled_index"]] = True - data_dict["sampled_index"] = np.where(mask[idx_unique])[0] - data_dict = index_operator(data_dict, idx_unique) - if self.return_inverse: - data_dict["inverse"] = np.zeros_like(inverse) - data_dict["inverse"][idx_sort] = inverse - if self.return_grid_coord: - data_dict["grid_coord"] = grid_coord[idx_unique] - if "grid_coord" not in data_dict["index_valid_keys"]: - data_dict["index_valid_keys"].append("grid_coord") - if self.return_min_coord: - data_dict["min_coord"] = min_coord.reshape([1, 3]) - if self.return_displacement: - displacement = ( - scaled_coord - grid_coord - 0.5 - ) # [0, 1] -> [-0.5, 0.5] displacement to center - if self.project_displacement: - displacement = np.sum( - displacement * data_dict["normal"], axis=-1, keepdims=True - ) - data_dict["displacement"] = displacement[idx_unique] - if "displacement" not in data_dict["index_valid_keys"]: - data_dict["index_valid_keys"].append("displacement") - return data_dict - - elif self.mode == "test": # test mode - data_part_list = [] - for i in range(count.max()): - idx_select = np.cumsum(np.insert(count, 0, 0)[0:-1]) + i % count - idx_part = idx_sort[idx_select] - data_part = index_operator(data_dict, idx_part, duplicate=True) - data_part["index"] = idx_part - if self.return_inverse: - data_part["inverse"] = np.zeros_like(inverse) - data_part["inverse"][idx_sort] = inverse - if self.return_grid_coord: - data_part["grid_coord"] = grid_coord[idx_part] - if "grid_coord" not in data_part["index_valid_keys"]: - data_part["index_valid_keys"].append("grid_coord") - if self.return_min_coord: - data_part["min_coord"] = min_coord.reshape([1, 3]) - if self.return_displacement: - displacement = ( - scaled_coord - grid_coord - 0.5 - ) # [0, 1] -> [-0.5, 0.5] displacement to center - if self.project_displacement: - displacement = np.sum( - displacement * data_dict["normal"], axis=-1, keepdims=True - ) - data_part["displacement"] = displacement[idx_part] - if "displacement" not in data_part["index_valid_keys"]: - data_part["index_valid_keys"].append("displacement") - data_part_list.append(data_part) - return data_part_list - else: - raise NotImplementedError - - @staticmethod - def ravel_hash_vec(arr): - """ - Ravel the coordinates after subtracting the min coordinates. - """ - assert arr.ndim == 2 - arr = arr.copy() - arr -= arr.min(0) - arr = arr.astype(np.uint64, copy=False) - arr_max = arr.max(0).astype(np.uint64) + 1 - - keys = np.zeros(arr.shape[0], dtype=np.uint64) - # Fortran style indexing - for j in range(arr.shape[1] - 1): - keys += arr[:, j] - keys *= arr_max[j + 1] - keys += arr[:, -1] - return keys - - @staticmethod - def fnv_hash_vec(arr): - """ - FNV64-1A - """ - assert arr.ndim == 2 - # Floor first for negative coordinates - arr = arr.copy() - arr = arr.astype(np.uint64, copy=False) - hashed_arr = np.uint64(14695981039346656037) * np.ones( - arr.shape[0], dtype=np.uint64 - ) - for j in range(arr.shape[1]): - hashed_arr *= np.uint64(1099511628211) - hashed_arr = np.bitwise_xor(hashed_arr, arr[:, j]) - return hashed_arr - - -@TRANSFORMS.register_module() -class SphereCrop(object): - def __init__(self, point_max=80000, sample_rate=None, mode="random"): - self.point_max = point_max - self.sample_rate = sample_rate - assert mode in ["random", "center", "all", "given"] - self.mode = mode - - def __call__(self, data_dict): - point_max = ( - int(self.sample_rate * data_dict["coord"].shape[0]) - if self.sample_rate is not None - else self.point_max - ) - - assert "coord" in data_dict.keys() - if data_dict["coord"].shape[0] > point_max: - if self.mode == "random": - center = data_dict["coord"][ - np.random.randint(data_dict["coord"].shape[0]) - ] - elif self.mode == "center": - center = data_dict["coord"][data_dict["coord"].shape[0] // 2] - elif self.mode == "given": - given_index = data_dict["correspondence"].reshape( - data_dict["correspondence"].shape[0], -1 - ) - given_index = np.all( - given_index != np.ones_like(given_index[0]) * -1, axis=1 - ) - given_coord = data_dict["coord"][given_index] - if given_coord.shape[0] == 0: - center = data_dict["coord"][ - np.random.randint(data_dict["coord"].shape[0]) - ] - else: - center = np.mean(given_coord, axis=0) - else: - raise NotImplementedError - idx_crop = np.argsort(np.sum(np.square(data_dict["coord"] - center), 1))[ - :point_max - ] - data_dict = index_operator(data_dict, idx_crop) - return data_dict - - -@TRANSFORMS.register_module() -class ShufflePoint(object): - def __call__(self, data_dict): - assert "coord" in data_dict.keys() - shuffle_index = np.arange(data_dict["coord"].shape[0]) - np.random.shuffle(shuffle_index) - data_dict = index_operator(data_dict, shuffle_index) - return data_dict - - -@TRANSFORMS.register_module() -class CropBoundary(object): - def __call__(self, data_dict): - assert "segment" in data_dict - segment = data_dict["segment"].flatten() - mask = (segment != 0) * (segment != 1) - data_dict = index_operator(data_dict, mask) - return data_dict - - -@TRANSFORMS.register_module() -class ContrastiveViewsGenerator(object): - def __init__( - self, - view_keys=("coord", "color", "normal", "origin_coord"), - view_trans_cfg=None, - ): - self.view_keys = view_keys - self.view_trans = Compose(view_trans_cfg) - - def __call__(self, data_dict): - view1_dict = dict() - view2_dict = dict() - for key in self.view_keys: - view1_dict[key] = data_dict[key].copy() - view2_dict[key] = data_dict[key].copy() - view1_dict = self.view_trans(view1_dict) - view2_dict = self.view_trans(view2_dict) - for key, value in view1_dict.items(): - data_dict["view1_" + key] = value - for key, value in view2_dict.items(): - data_dict["view2_" + key] = value - return data_dict - - -@TRANSFORMS.register_module() -class MultiViewGenerator(object): - def __init__( - self, - global_view_num=2, - global_view_scale=(0.4, 1.0), - local_view_num=4, - local_view_scale=(0.1, 0.4), - global_shared_transform=None, - global_transform=None, - local_transform=None, - max_size=65536, - enc2d_max_size=102400, - enc2d_scale=(0.8, 1), - center_height_scale=(0, 1), - shared_global_view=False, - view_keys=("coord", "origin_coord", "color", "normal", "correspondence"), - static_view_keys=("name", "img_num"), - ): - self.global_view_num = global_view_num - self.global_view_scale = global_view_scale - self.local_view_num = local_view_num - self.local_view_scale = local_view_scale - self.global_shared_transform = Compose(global_shared_transform) - self.global_transform = Compose(global_transform) - self.local_transform = Compose(local_transform) - self.max_size = max_size - self.enc2d_max_size = enc2d_max_size - self.enc2d_scale = enc2d_scale - self.center_height_scale = center_height_scale - self.shared_global_view = shared_global_view - self.view_keys = view_keys - self.static_view_keys = static_view_keys - assert "coord" in view_keys - - def get_view(self, point, center, scale, if_enc2d=False): - coord = point["coord"] - max_size = min(self.max_size, coord.shape[0]) - enc2d_max_size = min(self.enc2d_max_size, coord.shape[0]) - size = 0 - for _ in range(10): - if if_enc2d: - size = enc2d_max_size - else: - size = int(np.random.uniform(*scale) * max_size) - if size > 0: - break - if size == 0: - size = max(10, scale[-1] * max_size) - assert size > 0 - index = np.argsort(np.sum(np.square(coord - center), axis=-1))[:size] - view = dict(index=index) - for key in point.keys(): - if key in self.view_keys: - view[key] = point[key][index] - if key in self.static_view_keys: - view[key] = point[key] - if "index_valid_keys" in point.keys(): - # inherit index_valid_keys from point - view["index_valid_keys"] = point["index_valid_keys"] - return view - - @staticmethod - def match_point_image(major_view, data_dict): - major_correspondence = major_view["correspondence"].transpose(1, 0, 2) - correspondence = data_dict["correspondence"].transpose(1, 0, 2) - is_all_neg1 = np.any(major_correspondence != np.array([-1, -1]), axis=(1, 2)) - indices = np.where(is_all_neg1)[0] - img_dict = { - "images": data_dict["images"][indices], - "img_num": indices.shape[0], - "major_correspondence": major_correspondence[indices].transpose(1, 0, 2), - "correspondence": correspondence[indices].transpose(1, 0, 2), - } - return img_dict - - def __call__(self, data_dict): - coord = data_dict["coord"] - point = self.global_shared_transform(copy.deepcopy(data_dict)) - z_min = coord[:, 2].min() - z_max = coord[:, 2].max() - z_min_ = z_min + (z_max - z_min) * self.center_height_scale[0] - z_max_ = z_min + (z_max - z_min) * self.center_height_scale[1] - if "correspondence" not in data_dict.keys(): - center_mask = np.logical_and(coord[:, 2] >= z_min_, coord[:, 2] <= z_max_) - major_center = coord[np.random.choice(np.where(center_mask)[0])] - major_view = self.get_view(point, major_center, self.global_view_scale) - else: - given_index = data_dict["correspondence"].reshape( - data_dict["correspondence"].shape[0], -1 - ) - given_index = np.all( - given_index != np.ones_like(given_index[0]) * -1, axis=1 - ) - given_coord = data_dict["coord"][given_index] - if given_coord.shape[0] == 0: - center_mask = np.logical_and( - coord[:, 2] >= z_min_, coord[:, 2] <= z_max_ - ) - major_center = coord[np.random.choice(np.where(center_mask)[0])] - else: - major_center = np.mean(given_coord, axis=0) - major_view = self.get_view( - point, major_center, self.global_view_scale, if_enc2d=True - ) - img_dict = self.match_point_image(major_view, data_dict) - major_view["correspondence"] = img_dict["major_correspondence"] - data_dict["correspondence"] = img_dict["correspondence"] - point["correspondence"] = img_dict["correspondence"] - data_dict["img_num"] = img_dict["img_num"] - data_dict["images"] = img_dict["images"] - major_coord = major_view["coord"] - - # get global views: restrict the center of left global view within the major global view - if not self.shared_global_view: - global_views = [ - self.get_view( - point=point, - center=major_coord[np.random.randint(major_coord.shape[0])], - scale=self.global_view_scale, - ) - for _ in range(self.global_view_num - 1) - ] - else: - global_views = [ - {key: value.copy() for key, value in major_view.items()} - for _ in range(self.global_view_num - 1) - ] - - global_views = [major_view] + global_views - - # get local views: restrict the center of local view within the major global view - cover_mask = np.zeros_like(major_view["index"], dtype=bool) - local_views = [] - for i in range(self.local_view_num): - if sum(~cover_mask) == 0: - # reset cover mask if all points are sampled - cover_mask[:] = False - local_view = self.get_view( - point=data_dict, - center=major_coord[np.random.choice(np.where(~cover_mask)[0])], - scale=self.local_view_scale, - ) - local_views.append(local_view) - cover_mask[np.isin(major_view["index"], local_view["index"])] = True - - # augmentation and concat - view_dict = {} - for global_view in global_views: - global_view.pop("index") - global_view = self.global_transform(global_view) - for key in self.view_keys: - if f"global_{key}" in view_dict.keys(): - view_dict[f"global_{key}"].append(global_view[key]) - else: - view_dict[f"global_{key}"] = [global_view[key]] - view_dict["global_offset"] = np.cumsum( - [data.shape[0] for data in view_dict["global_coord"]] - ) - for local_view in local_views: - local_view.pop("index") - local_view = self.local_transform(local_view) - for key in self.view_keys: - if f"local_{key}" in view_dict.keys(): - view_dict[f"local_{key}"].append(local_view[key]) - else: - view_dict[f"local_{key}"] = [local_view[key]] - view_dict["local_offset"] = np.cumsum( - [data.shape[0] for data in view_dict["local_coord"]] - ) - - for key in view_dict.keys(): - if "offset" not in key: - if key in self.static_view_keys: - view_dict[key] = view_dict[key] - else: - view_dict[key] = np.concatenate(view_dict[key], axis=0) - data_dict.update(view_dict) - return data_dict - - -@TRANSFORMS.register_module() -class InstanceParser(object): - def __init__(self, segment_ignore_index=(-1, 0, 1), instance_ignore_index=-1): - self.segment_ignore_index = segment_ignore_index - self.instance_ignore_index = instance_ignore_index - - def __call__(self, data_dict): - coord = data_dict["coord"] - segment = data_dict["segment"] - instance = data_dict["instance"] - mask = ~np.in1d(segment, self.segment_ignore_index) - # mapping ignored instance to ignore index - instance[~mask] = self.instance_ignore_index - # reorder left instance - unique, inverse = np.unique(instance[mask], return_inverse=True) - instance_num = len(unique) - instance[mask] = inverse - # init instance information - centroid = np.ones((coord.shape[0], 3)) * self.instance_ignore_index - bbox = np.ones((instance_num, 8)) * self.instance_ignore_index - vacancy = [ - index for index in self.segment_ignore_index if index >= 0 - ] # vacate class index - - for instance_id in range(instance_num): - mask_ = instance == instance_id - coord_ = coord[mask_] - bbox_min = coord_.min(0) - bbox_max = coord_.max(0) - bbox_centroid = coord_.mean(0) - bbox_center = (bbox_max + bbox_min) / 2 - bbox_size = bbox_max - bbox_min - bbox_theta = np.zeros(1, dtype=coord_.dtype) - bbox_class = np.array([segment[mask_][0]], dtype=coord_.dtype) - # shift class index to fill vacate class index caused by segment ignore index - bbox_class -= np.greater(bbox_class, vacancy).sum() - - centroid[mask_] = bbox_centroid - bbox[instance_id] = np.concatenate( - [bbox_center, bbox_size, bbox_theta, bbox_class] - ) # 3 + 3 + 1 + 1 = 8 - data_dict["instance"] = instance - data_dict["instance_centroid"] = centroid - data_dict["bbox"] = bbox - return data_dict - - -class Compose(object): - def __init__(self, cfg=None): - self.cfg = cfg if cfg is not None else [] - self.transforms = [] - for t_cfg in self.cfg: - self.transforms.append(TRANSFORMS.build(t_cfg)) - - def __call__(self, data_dict): - for t in self.transforms: - data_dict = t(data_dict) - return data_dict - - -@TRANSFORMS.register_module() -class ImgToTensor(object): - def __init__(self): - self.totensor = transforms.ToTensor() - - def __call__(self, img): - return self.totensor(img) - - -@TRANSFORMS.register_module() -class ImgGaussianBlur(object): - """ - Apply Gaussian Blur to the PIL image. - """ - - def __init__( - self, *, p: float = 0.5, radius_min: float = 0.1, radius_max: float = 2.0 - ): - # NOTE: torchvision is applying 1 - probability to return the original image - self.p = p - self.transform = transforms.GaussianBlur( - kernel_size=9, sigma=(radius_min, radius_max) - ) - super().__init__() - - def __call__(self, img): - if np.random.rand() < self.p: - img = self.transform(img) - return img - - -@TRANSFORMS.register_module() -class ImgChromaticJitter(object): - def __init__(self, p=0.95, std=0.005): - self.p = p - self.std = std - - def __call__(self, img): - if np.random.rand() < self.p: - noise = torch.rand(3) - noise *= self.std - noise = noise[:, None, None].expand_as(img) - img += noise - img = torch.clip(img, 0, 1) - return img - - -@TRANSFORMS.register_module() -class ImgPixelContrast(object): - def __init__(self, threshold, p=0.2): - super().__init__() - self.p = p - self.threshold = threshold - - def __call__(self, img): - if np.random.rand() < self.p: - n, h, w = img.shape[0], img.shape[2], img.shape[3] - num_pixels = int(self.threshold * h * w * n) - indices = torch.randint(0, n * h * w, (num_pixels,)) - img = img.permute(0, 2, 3, 1).reshape(-1, 3) - img[indices, :] = 255.0 - img[indices, :] - img = img.reshape(n, h, w, 3).permute(0, 3, 1, 2) - return img - - -IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406) -IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225) - - -@TRANSFORMS.register_module() -class Imgnormalize(object): - def __init__(self, mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD): - super().__init__() - self.normalize = transforms.Normalize(mean=mean, std=std) - - def __call__(self, img): - return self.normalize(img) - - -@TRANSFORMS.register_module() -class ImgRandomHorizontalFlip(object): - def __init__(self, p=0.5): - super().__init__() - self.p = p - self.imgrandomhorizontalflip = transforms.RandomHorizontalFlip(p=p) - - def __call__(self, img): - return self.imgrandomhorizontalflip(img) - - -@TRANSFORMS.register_module() -class ImgRandomResizedCrop(object): - def __init__(self, size, scale, interpolation): - super().__init__() - self.imgrandomresizedcrop = transforms.RandomResizedCrop( - size=size, scale=scale, interpolation=interpolation - ) - - def __call__(self, img): - return self.imgrandomresizedcrop(img) - - -@TRANSFORMS.register_module() -class ImgRandomColorJitter(object): - def __init__(self, brightness=0.4, contrast=0.4, saturation=0.2, hue=0.1, p=0.8): - colorjitter = transforms.ColorJitter( - brightness=brightness, contrast=contrast, saturation=saturation, hue=hue - ) - super().__init__() - self.p = p - self.colorjitter = colorjitter - - def __call__(self, img): - return self.colorjitter(img) - - -@TRANSFORMS.register_module() -class ImgRandomGrayscale(object): - def __init__(self, p=0.1): - super().__init__() - self.p = p - self.imgrandomgrayscale = transforms.RandomGrayscale(p=p) - - def __call__(self, img): - return self.imgrandomgrayscale(img) - - -@TRANSFORMS.register_module() -class ImgRandomSolarize(object): - def __init__(self, threshold, p=0.1): - super().__init__() - self.p = p - self.imgrandomsolarize = transforms.RandomSolarize(threshold=threshold, p=p) - - def __call__(self, img): - return self.imgrandomsolarize(img) - - -@TRANSFORMS.register_module() -class ImgAugmentation(object): - def __init__( - self, - imgtransforms, - crop_h=518, - crop_w=518, - patch_h=37, - patch_w=37, - patch_size=14, - ): - self.transforms = [] - self.transforms_cfg = imgtransforms - for t_cfg in self.transforms_cfg: - self.transforms.append(TRANSFORMS.build(t_cfg)) - self.crop_h = crop_h - self.crop_w = crop_w - self.patch_h = patch_h - self.patch_w = patch_w - self.patch_size = patch_size - self.crop_start = [ - random.randint(0, patch_h * patch_size - crop_h), - random.randint(0, patch_w * patch_size - crop_w), - ] - - def __call__(self, point): - point["images"] = transforms.functional.crop( - point["images"], - top=self.crop_start[0], - left=self.crop_start[1], - height=self.crop_h, - width=self.crop_w, - ) - for id, t in enumerate(self.transforms): - point["images"] = t(point["images"]) - correspondence = point["correspondence"] - correspondence_shape = correspondence.shape - correspondence = correspondence.reshape(-1, 2) - mask = ( - (self.crop_start[0] <= correspondence[:, 0]) - & (correspondence[:, 0] < self.crop_start[0] + self.crop_h) - & (self.crop_start[1] <= correspondence[:, 1]) - & (correspondence[:, 1] < self.crop_start[1] + self.crop_w) - ) - correspondence[~mask] = np.array([-1, -1]) - correspondence[mask] -= np.array(self.crop_start) - point["correspondence"] = correspondence.reshape(correspondence_shape) - return point diff --git a/point_transformer_v3/pointcept_minimal/pointcept/datasets/utils.py b/point_transformer_v3/pointcept_minimal/pointcept/datasets/utils.py deleted file mode 100644 index 89e4247..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/datasets/utils.py +++ /dev/null @@ -1,140 +0,0 @@ -""" -Utils for Datasets - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import random -from collections.abc import Mapping, Sequence -import numpy as np -import torch -from torch.utils.data.dataloader import default_collate -import torch.nn.functional as F - - -def collate_fn(batch): - """ - collate function for point cloud which support dict and list, - 'coord' is necessary to determine 'offset' - """ - if not isinstance(batch, Sequence): - raise TypeError(f"{batch.dtype} is not supported.") - - if isinstance(batch[0], torch.Tensor): - return torch.cat(list(batch)) - elif isinstance(batch[0], str): - # str is also a kind of Sequence, judgement should before Sequence - return list(batch) - elif isinstance(batch[0], Sequence): - for data in batch: - data.append(torch.tensor([data[0].shape[0]])) - batch = [collate_fn(samples) for samples in zip(*batch)] - batch[-1] = torch.cumsum(batch[-1], dim=0).int() - return batch - elif isinstance(batch[0], Mapping): - if "img_num" in batch[0].keys(): - max_img_num = max([d["img_num"] for d in batch]) - batch = { - key: ( - ( - collate_fn([d[key] for d in batch]) - if "offset" not in key - # offset -> bincount -> concat bincount-> concat offset - else torch.cumsum( - collate_fn( - [d[key].diff(prepend=torch.tensor([0])) for d in batch] - ), - dim=0, - ) - ) - if "correspondence" not in key - else collate_fn( - [ - F.pad( - d[key].permute(0, 2, 1), - (0, max_img_num - d[key].shape[1]), - value=-1, - ).permute(0, 2, 1) - for d in batch - ] - ) - ) - for key in batch[0] - } - return batch - else: - return default_collate(batch) - - -def point_collate_fn(batch, mix_prob=0): - assert isinstance( - batch[0], Mapping - ) # currently, only support input_dict, rather than input_list - batch = collate_fn(batch) - if random.random() < mix_prob: - if "instance" in batch.keys(): - offset = batch["offset"] - start = 0 - num_instance = 0 - for i in range(len(offset)): - if i % 2 == 0: - num_instance = max(batch["instance"][start : offset[i]]) - if i % 2 != 0: - mask = batch["instance"][start : offset[i]] != -1 - batch["instance"][start : offset[i]] += num_instance * mask - start = offset[i] - offset_assets = [asset for asset in batch.keys() if "offset" in asset] - for offset_asset in offset_assets: - batch[offset_asset] = torch.cat( - [batch[offset_asset][1:-1:2], batch[offset_asset][-1].unsqueeze(0)], - dim=0, - ) - if "img_num" in batch.keys(): - n = batch["img_num"].shape[0] - num_pairs = n // 2 - len_pairs = num_pairs * 2 - pairs_tensor = batch["img_num"][:len_pairs] - - if num_pairs == 0: - pass - else: - summed_pairs = pairs_tensor.view(-1, 2).sum(dim=1) - if n % 2 != 0: - last_element = batch["img_num"][-1:] - result = torch.cat((summed_pairs, last_element)) - else: - result = summed_pairs - batch["img_num"] = result - correspondence_assets = [ - asset for asset in batch.keys() if "correspondence" in asset - ] - for correspondence_asset in correspondence_assets: - offset = batch["offset"] - start = 0 - N, v, n = batch[correspondence_asset].shape - v2 = v * 2 - batch_correspondence_mix = -torch.ones((N, v2, n)) - for i in range(len(offset)): - if i % 2 == 0: - batch_correspondence_mix[start : offset[i], 0:v] = batch[ - correspondence_asset - ][start : offset[i], 0:v] - if i % 2 != 0: - batch_correspondence_mix[start : offset[i], v:] = batch[ - correspondence_asset - ][start : offset[i], 0:v] - start = offset[i] - if len(offset) % 2 == 0: - pass - else: - start = 0 if len(offset) == 1 else offset[-2] - batch_correspondence_mix[start:N, -v:] = batch[correspondence_asset][ - start:N, -v: - ] - batch[correspondence_asset] = batch_correspondence_mix - return batch - - -def gaussian_kernel(dist2: np.array, a: float = 1, c: float = 5): - return a * np.exp(-dist2 / (2 * c**2)) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/defaults.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/defaults.py deleted file mode 100644 index 6091e70..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/engines/defaults.py +++ /dev/null @@ -1,152 +0,0 @@ -""" -Default training/testing logic - -modified from detectron2(https://github.com/facebookresearch/detectron2) - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import os -import sys -import argparse -import multiprocessing as mp -from torch.nn.parallel import DistributedDataParallel - - -import pointcept.utils.comm as comm -from pointcept.utils.env import get_random_seed, set_seed -from pointcept.utils.config import Config, DictAction - - -def create_ddp_model(model, *, fp16_compression=False, **kwargs): - """ - Create a DistributedDataParallel model if there are >1 processes. - Args: - model: a torch.nn.Module - fp16_compression: add fp16 compression hooks to the ddp object. - See more at https://pytorch.org/docs/stable/ddp_comm_hooks.html#torch.distributed.algorithms.ddp_comm_hooks.default_hooks.fp16_compress_hook - kwargs: other arguments of :module:`torch.nn.parallel.DistributedDataParallel`. - """ - if comm.get_world_size() == 1: - return model - # kwargs['find_unused_parameters'] = True - if "device_ids" not in kwargs: - kwargs["device_ids"] = [comm.get_local_rank()] - if "output_device" not in kwargs: - kwargs["output_device"] = [comm.get_local_rank()] - ddp = DistributedDataParallel(model, **kwargs) - if fp16_compression: - from torch.distributed.algorithms.ddp_comm_hooks import default as comm_hooks - - ddp.register_comm_hook(state=None, hook=comm_hooks.fp16_compress_hook) - return ddp - - -def worker_init_fn(worker_id, num_workers, rank, seed): - """Worker init func for dataloader. - - The seed of each worker equals to num_worker * rank + worker_id + user_seed - - Args: - worker_id (int): Worker id. - num_workers (int): Number of workers. - rank (int): The rank of current process. - seed (int): The random seed to use. - """ - - worker_seed = None if seed is None else num_workers * rank + worker_id + seed - set_seed(worker_seed) - - -def default_argument_parser(epilog=None): - parser = argparse.ArgumentParser( - epilog=epilog - or f""" - Examples: - Run on single machine: - $ {sys.argv[0]} --num-gpus 8 --config-file cfg.yaml - Change some config options: - $ {sys.argv[0]} --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth SOLVER.BASE_LR 0.001 - Run on multiple machines: - (machine0)$ {sys.argv[0]} --machine-rank 0 --num-machines 2 --dist-url [--other-flags] - (machine1)$ {sys.argv[0]} --machine-rank 1 --num-machines 2 --dist-url [--other-flags] - """, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument( - "--config-file", default="", metavar="FILE", help="path to config file" - ) - parser.add_argument( - "--num-gpus", type=int, default=1, help="number of gpus *per machine*" - ) - parser.add_argument( - "--num-machines", type=int, default=1, help="total number of machines" - ) - parser.add_argument( - "--machine-rank", - type=int, - default=0, - help="the rank of this machine (unique per machine)", - ) - # PyTorch still may leave orphan processes in multi-gpu training. - # Therefore we use a deterministic way to obtain port, - # so that users are aware of orphan processes by seeing the port occupied. - # port = 2 ** 15 + 2 ** 14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2 ** 14 - parser.add_argument( - "--dist-url", - # default="tcp://127.0.0.1:{}".format(port), - default="auto", - help="initialization URL for pytorch distributed backend. See " - "https://pytorch.org/docs/stable/distributed.html for details.", - ) - parser.add_argument( - "--options", nargs="+", action=DictAction, help="custom options" - ) - return parser - - -def default_config_parser(file_path, options): - # config name protocol: dataset_name/model_name-exp_name - if os.path.isfile(file_path): - cfg = Config.fromfile(file_path) - else: - sep = file_path.find("-") - cfg = Config.fromfile(os.path.join(file_path[:sep], file_path[sep + 1 :])) - - if options is not None: - cfg.merge_from_dict(options) - - if cfg.seed is None: - cfg.seed = get_random_seed() - - cfg.data.train.loop = cfg.epoch // cfg.eval_epoch - - os.makedirs(os.path.join(cfg.save_path, "model"), exist_ok=True) - if not cfg.resume: - cfg.dump(os.path.join(cfg.save_path, "config.py")) - return cfg - - -def default_setup(cfg): - # scalar by world size - world_size = comm.get_world_size() - cfg.num_worker = cfg.num_worker if cfg.num_worker is not None else mp.cpu_count() - cfg.num_worker_per_gpu = cfg.num_worker // world_size - assert cfg.batch_size % world_size == 0 - assert cfg.batch_size_val is None or cfg.batch_size_val % world_size == 0 - assert cfg.batch_size_test is None or cfg.batch_size_test % world_size == 0 - cfg.batch_size_per_gpu = cfg.batch_size // world_size - cfg.batch_size_val_per_gpu = ( - cfg.batch_size_val // world_size if cfg.batch_size_val is not None else 1 - ) - cfg.batch_size_test_per_gpu = ( - cfg.batch_size_test // world_size if cfg.batch_size_test is not None else 1 - ) - # update data loop - assert cfg.epoch % cfg.eval_epoch == 0 - # settle random seed - rank = comm.get_rank() - seed = None if cfg.seed is None else cfg.seed + rank * cfg.num_worker_per_gpu - set_seed(seed) - return cfg diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/__init__.py deleted file mode 100644 index 41c0320..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .default import * -from .misc import * -from .evaluator import * - -from .builder import build_hooks diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/builder.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/builder.py deleted file mode 100644 index 2f4cce4..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/builder.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -Hook Builder - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -from pointcept.utils.registry import Registry - - -HOOKS = Registry("hooks") - - -def build_hooks(cfg): - hooks = [] - for hook_cfg in cfg: - hooks.append(HOOKS.build(hook_cfg)) - return hooks diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/default.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/default.py deleted file mode 100644 index 47f2aa1..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/default.py +++ /dev/null @@ -1,66 +0,0 @@ -""" -Default Hook - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import pointcept.utils.comm as comm -import weakref -from .builder import HOOKS - - -class HookBase: - """ - Base class for hooks that can be registered with :class:`TrainerBase`. - """ - - trainer = None # A weak reference to the trainer object. - - def before_train(self): - pass - - def before_epoch(self): - pass - - def before_step(self): - pass - - def after_step(self): - pass - - def after_epoch(self): - pass - - def after_train(self): - pass - - -@HOOKS.register_module() -class ModelHook(HookBase): - def before_train(self): - if comm.get_world_size() > 1 and isinstance( - self.trainer.model.module, HookBase - ): - self.model = weakref.proxy(self.trainer.model.module) - elif isinstance(self.trainer.model, HookBase): - self.model = weakref.proxy(self.trainer.model) - else: - self.model = HookBase() - self.model.trainer = self.trainer - self.model.before_train() - - def before_epoch(self): - self.model.before_epoch() - - def before_step(self): - self.model.before_step() - - def after_step(self): - self.model.after_step() - - def after_epoch(self): - self.model.after_epoch() - - def after_train(self): - self.model.after_train() diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/evaluator.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/evaluator.py deleted file mode 100644 index 55b19d0..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/evaluator.py +++ /dev/null @@ -1,243 +0,0 @@ -""" -Evaluate Hook - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import numpy as np -import wandb -import torch -import torch.distributed as dist -from uuid import uuid4 - -import pointcept.utils.comm as comm -from pointcept.utils.misc import intersection_and_union_gpu - -from .default import HookBase -from .builder import HOOKS - - -@HOOKS.register_module() -class ClsEvaluator(HookBase): - def after_epoch(self): - if self.trainer.cfg.evaluate: - self.eval() - - def eval(self): - self.trainer.logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") - self.trainer.model.eval() - for i, input_dict in enumerate(self.trainer.val_loader): - for key in input_dict.keys(): - if isinstance(input_dict[key], torch.Tensor): - input_dict[key] = input_dict[key].cuda(non_blocking=True) - with torch.no_grad(): - output_dict = self.trainer.model(input_dict) - output = output_dict["cls_logits"] - loss = output_dict["loss"] - pred = output.max(1)[1] - label = input_dict["category"] - intersection, union, target = intersection_and_union_gpu( - pred, - label, - self.trainer.cfg.data.num_classes, - self.trainer.cfg.data.ignore_index, - ) - if comm.get_world_size() > 1: - dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce( - target - ) - intersection, union, target = ( - intersection.cpu().numpy(), - union.cpu().numpy(), - target.cpu().numpy(), - ) - # Here there is no need to sync since sync happened in dist.all_reduce - self.trainer.storage.put_scalar("val_intersection", intersection) - self.trainer.storage.put_scalar("val_union", union) - self.trainer.storage.put_scalar("val_target", target) - self.trainer.storage.put_scalar("val_loss", loss.item()) - self.trainer.logger.info( - "Test: [{iter}/{max_iter}] " - "Loss {loss:.4f} ".format( - iter=i + 1, max_iter=len(self.trainer.val_loader), loss=loss.item() - ) - ) - loss_avg = self.trainer.storage.history("val_loss").avg - intersection = self.trainer.storage.history("val_intersection").total - union = self.trainer.storage.history("val_union").total - target = self.trainer.storage.history("val_target").total - iou_class = intersection / (union + 1e-10) - acc_class = intersection / (target + 1e-10) - m_iou = np.mean(iou_class) - m_acc = np.mean(acc_class) - all_acc = sum(intersection) / (sum(target) + 1e-10) - self.trainer.logger.info( - "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.".format( - m_iou, m_acc, all_acc - ) - ) - for i in range(self.trainer.cfg.data.num_classes): - self.trainer.logger.info( - "Class_{idx}-{name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( - idx=i, - name=self.trainer.cfg.data.names[i], - iou=iou_class[i], - accuracy=acc_class[i], - ) - ) - current_epoch = self.trainer.epoch + 1 - if self.trainer.writer is not None: - self.trainer.writer.add_scalar("val/loss", loss_avg, current_epoch) - self.trainer.writer.add_scalar("val/mIoU", m_iou, current_epoch) - self.trainer.writer.add_scalar("val/mAcc", m_acc, current_epoch) - self.trainer.writer.add_scalar("val/allAcc", all_acc, current_epoch) - if self.trainer.cfg.enable_wandb: - wandb.log( - { - "Epoch": current_epoch, - "val/loss": loss_avg, - "val/mIoU": m_iou, - "val/mAcc": m_acc, - "val/allAcc": all_acc, - }, - step=wandb.run.step, - ) - self.trainer.logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") - self.trainer.comm_info["current_metric_value"] = all_acc # save for saver - self.trainer.comm_info["current_metric_name"] = "allAcc" # save for saver - - def after_train(self): - self.trainer.logger.info( - "Best {}: {:.4f}".format("allAcc", self.trainer.best_metric_value) - ) - - -@HOOKS.register_module() -class SemSegEvaluator(HookBase): - def __init__(self, write_cls_iou=False): - self.write_cls_iou = write_cls_iou - - def before_train(self): - if self.trainer.writer is not None and self.trainer.cfg.enable_wandb: - wandb.define_metric("val/*", step_metric="Epoch") - - def after_epoch(self): - if self.trainer.cfg.evaluate: - self.eval() - - def eval(self): - self.trainer.logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") - self.trainer.model.eval() - for i, input_dict in enumerate(self.trainer.val_loader): - for key in input_dict.keys(): - if isinstance(input_dict[key], torch.Tensor): - input_dict[key] = input_dict[key].cuda(non_blocking=True) - with torch.no_grad(): - output_dict = self.trainer.model(input_dict) - output = output_dict["seg_logits"] - loss = output_dict["loss"] - pred = output.max(1)[1] - segment = input_dict["segment"] - if "inverse" in input_dict.keys(): - assert "origin_segment" in input_dict.keys() - pred = pred[input_dict["inverse"]] - segment = input_dict["origin_segment"] - intersection, union, target = intersection_and_union_gpu( - pred, - segment, - self.trainer.cfg.data.num_classes, - self.trainer.cfg.data.ignore_index, - ) - if comm.get_world_size() > 1: - dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce( - target - ) - intersection, union, target = ( - intersection.cpu().numpy(), - union.cpu().numpy(), - target.cpu().numpy(), - ) - # Here there is no need to sync since sync happened in dist.all_reduce - self.trainer.storage.put_scalar("val_intersection", intersection) - self.trainer.storage.put_scalar("val_union", union) - self.trainer.storage.put_scalar("val_target", target) - self.trainer.storage.put_scalar("val_loss", loss.item()) - info = "Test: [{iter}/{max_iter}] ".format( - iter=i + 1, max_iter=len(self.trainer.val_loader) - ) - if "origin_coord" in input_dict.keys(): - info = "Interp. " + info - self.trainer.logger.info( - info - + "Loss {loss:.4f} ".format( - iter=i + 1, max_iter=len(self.trainer.val_loader), loss=loss.item() - ) - ) - loss_avg = self.trainer.storage.history("val_loss").avg - intersection = self.trainer.storage.history("val_intersection").total - union = self.trainer.storage.history("val_union").total - target = self.trainer.storage.history("val_target").total - iou_class = intersection / (union + 1e-10) - acc_class = intersection / (target + 1e-10) - m_iou = np.mean(iou_class) - m_acc = np.mean(acc_class) - all_acc = sum(intersection) / (sum(target) + 1e-10) - self.trainer.logger.info( - "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.".format( - m_iou, m_acc, all_acc - ) - ) - for i in range(self.trainer.cfg.data.num_classes): - self.trainer.logger.info( - "Class_{idx}-{name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( - idx=i, - name=self.trainer.cfg.data.names[i], - iou=iou_class[i], - accuracy=acc_class[i], - ) - ) - current_epoch = self.trainer.epoch + 1 - if self.trainer.writer is not None: - self.trainer.writer.add_scalar("val/loss", loss_avg, current_epoch) - self.trainer.writer.add_scalar("val/mIoU", m_iou, current_epoch) - self.trainer.writer.add_scalar("val/mAcc", m_acc, current_epoch) - self.trainer.writer.add_scalar("val/allAcc", all_acc, current_epoch) - if self.trainer.cfg.enable_wandb: - wandb.log( - { - "Epoch": current_epoch, - "val/loss": loss_avg, - "val/mIoU": m_iou, - "val/mAcc": m_acc, - "val/allAcc": all_acc, - }, - step=wandb.run.step, - ) - if self.write_cls_iou: - for i in range(self.trainer.cfg.data.num_classes): - self.trainer.writer.add_scalar( - f"val/cls_{i}-{self.trainer.cfg.data.names[i]} IoU", - iou_class[i], - current_epoch, - ) - if self.trainer.cfg.enable_wandb: - for i in range(self.trainer.cfg.data.num_classes): - wandb.log( - { - "Epoch": current_epoch, - f"val/cls_{i}-{self.trainer.cfg.data.names[i]} IoU": iou_class[ - i - ], - }, - step=wandb.run.step, - ) - self.trainer.logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") - self.trainer.comm_info["current_metric_value"] = m_iou # save for saver - self.trainer.comm_info["current_metric_name"] = "mIoU" # save for saver - - def after_train(self): - self.trainer.logger.info( - "Best {}: {:.4f}".format("mIoU", self.trainer.best_metric_value) - ) - diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/misc.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/misc.py deleted file mode 100644 index 28d9682..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/engines/hooks/misc.py +++ /dev/null @@ -1,553 +0,0 @@ -""" -Misc Hook - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import sys -import glob -import os -import shutil -import time -import gc -import wandb -import torch -import torch.utils.data -from collections import OrderedDict - -if sys.version_info >= (3, 10): - from collections.abc import Sequence -else: - from collections import Sequence -from pointcept.utils.timer import Timer -from pointcept.utils.comm import is_main_process, synchronize -from pointcept.utils.cache import shared_dict -from pointcept.utils.scheduler import CosineScheduler -import pointcept.utils.comm as comm - -from .default import HookBase -from .builder import HOOKS - - -@HOOKS.register_module() -class IterationTimer(HookBase): - def __init__(self, warmup_iter=1): - self._warmup_iter = warmup_iter - self._start_time = time.perf_counter() - self._iter_timer = Timer() - self._remain_iter = 0 - - def before_train(self): - self._start_time = time.perf_counter() - _remain_epoch = self.trainer.max_epoch - self.trainer.start_epoch - self._remain_iter = _remain_epoch * len(self.trainer.train_loader) - - def before_epoch(self): - self._iter_timer.reset() - - def before_step(self): - data_time = self._iter_timer.seconds() - self.trainer.storage.put_scalar("data_time", data_time) - - def after_step(self): - batch_time = self._iter_timer.seconds() - self._iter_timer.reset() - self.trainer.storage.put_scalar("batch_time", batch_time) - self._remain_iter -= 1 - remain_time = self._remain_iter * self.trainer.storage.history("batch_time").avg - t_m, t_s = divmod(remain_time, 60) - t_h, t_m = divmod(t_m, 60) - remain_time = "{:02d}:{:02d}:{:02d}".format(int(t_h), int(t_m), int(t_s)) - if "iter_info" in self.trainer.comm_info.keys(): - info = ( - "Data {data_time_val:.3f} ({data_time_avg:.3f}) " - "Batch {batch_time_val:.3f} ({batch_time_avg:.3f}) " - "Remain {remain_time} ".format( - data_time_val=self.trainer.storage.history("data_time").val, - data_time_avg=self.trainer.storage.history("data_time").avg, - batch_time_val=self.trainer.storage.history("batch_time").val, - batch_time_avg=self.trainer.storage.history("batch_time").avg, - remain_time=remain_time, - ) - ) - self.trainer.comm_info["iter_info"] += info - if self.trainer.comm_info["iter"] <= self._warmup_iter: - self.trainer.storage.history("data_time").reset() - self.trainer.storage.history("batch_time").reset() - - -@HOOKS.register_module() -class InformationWriter(HookBase): - def __init__(self): - self.curr_iter = 0 - self.model_output_keys = [] - - def before_train(self): - self.trainer.comm_info["iter_info"] = "" - self.curr_iter = self.trainer.start_epoch * len(self.trainer.train_loader) - if self.trainer.writer is not None and self.trainer.cfg.enable_wandb: - wandb.define_metric("params/*", step_metric="Iter") - wandb.define_metric("train_batch/*", step_metric="Iter") - wandb.define_metric("train/*", step_metric="Epoch") - - def before_step(self): - self.curr_iter += 1 - info = "Train: [{epoch}/{max_epoch}][{iter}/{max_iter}] ".format( - epoch=self.trainer.epoch + 1, - max_epoch=self.trainer.max_epoch, - iter=self.trainer.comm_info["iter"] + 1, - max_iter=len(self.trainer.train_loader), - ) - self.trainer.comm_info["iter_info"] += info - - def after_step(self): - if "model_output_dict" in self.trainer.comm_info.keys(): - model_output_dict = self.trainer.comm_info["model_output_dict"] - self.model_output_keys = model_output_dict.keys() - for key in self.model_output_keys: - self.trainer.storage.put_scalar(key, model_output_dict[key].item()) - - for key in self.model_output_keys: - self.trainer.comm_info["iter_info"] += "{key}: {value:.4f} ".format( - key=key, value=self.trainer.storage.history(key).val - ) - lr = self.trainer.optimizer.state_dict()["param_groups"][0]["lr"] - self.trainer.comm_info["iter_info"] += "Lr: {lr:.5f}".format(lr=lr) - self.trainer.logger.info(self.trainer.comm_info["iter_info"]) - self.trainer.comm_info["iter_info"] = "" # reset iter info - if self.trainer.writer is not None: - self.trainer.writer.add_scalar("params/lr", lr, self.curr_iter) - for key in self.model_output_keys: - self.trainer.writer.add_scalar( - "train_batch/" + key, - self.trainer.storage.history(key).val, - self.curr_iter, - ) - if self.trainer.cfg.enable_wandb: - - wandb.log( - {"Iter": self.curr_iter, "params/lr": lr}, step=self.curr_iter - ) - for key in self.model_output_keys: - wandb.log( - { - "Iter": self.curr_iter, - f"train_batch/{key}": self.trainer.storage.history(key).val, - }, - step=wandb.run.step, - ) - - def after_epoch(self): - epoch_info = "Train result: " - for key in self.model_output_keys: - epoch_info += "{key}: {value:.4f} ".format( - key=key, value=self.trainer.storage.history(key).avg - ) - self.trainer.logger.info(epoch_info) - if self.trainer.writer is not None: - for key in self.model_output_keys: - self.trainer.writer.add_scalar( - "train/" + key, - self.trainer.storage.history(key).avg, - self.trainer.epoch + 1, - ) - - if self.trainer.cfg.enable_wandb: - - for key in self.model_output_keys: - wandb.log( - { - "Epoch": self.trainer.epoch + 1, - f"train/{key}": self.trainer.storage.history(key).avg, - }, - step=wandb.run.step, - ) - - -@HOOKS.register_module() -class CheckpointSaver(HookBase): - def __init__(self, save_freq=None): - self.save_freq = save_freq # None or int, None indicate only save model last - - def after_epoch(self): - if is_main_process(): - is_best = False - if self.trainer.cfg.evaluate: - current_metric_value = self.trainer.comm_info["current_metric_value"] - current_metric_name = self.trainer.comm_info["current_metric_name"] - if current_metric_value > self.trainer.best_metric_value: - self.trainer.best_metric_value = current_metric_value - is_best = True - self.trainer.logger.info( - "Best validation {} updated to: {:.4f}".format( - current_metric_name, current_metric_value - ) - ) - self.trainer.logger.info( - "Currently Best {}: {:.4f}".format( - current_metric_name, self.trainer.best_metric_value - ) - ) - - filename = os.path.join( - self.trainer.cfg.save_path, "model", "model_last.pth" - ) - self.trainer.logger.info("Saving checkpoint to: " + filename) - torch.save( - { - "epoch": self.trainer.epoch + 1, - "state_dict": self.trainer.model.state_dict(), - "optimizer": self.trainer.optimizer.state_dict(), - "scheduler": self.trainer.scheduler.state_dict(), - "scaler": ( - self.trainer.scaler.state_dict() - if self.trainer.cfg.enable_amp - else None - ), - "best_metric_value": self.trainer.best_metric_value, - }, - filename + ".tmp", - ) - os.replace(filename + ".tmp", filename) - if is_best: - shutil.copyfile( - filename, - os.path.join(self.trainer.cfg.save_path, "model", "model_best.pth"), - ) - if self.save_freq and (self.trainer.epoch + 1) % self.save_freq == 0: - shutil.copyfile( - filename, - os.path.join( - self.trainer.cfg.save_path, - "model", - f"epoch_{self.trainer.epoch + 1}.pth", - ), - ) - - -@HOOKS.register_module() -class CheckpointLoader(HookBase): - def __init__(self, keywords="", replacement=None, strict=False): - self.keywords = keywords - self.replacement = replacement if replacement is not None else keywords - self.strict = strict - - def before_train(self): - self.trainer.logger.info("=> Loading checkpoint & weight ...") - if self.trainer.cfg.weight and os.path.isfile(self.trainer.cfg.weight): - self.trainer.logger.info(f"Loading weight at: {self.trainer.cfg.weight}") - checkpoint = torch.load( - self.trainer.cfg.weight, - map_location=lambda storage, loc: storage.cuda(), - weights_only=False, - ) - self.trainer.logger.info( - f"Loading layer weights with keyword: {self.keywords}, " - f"replace keyword with: {self.replacement}" - ) - weight = OrderedDict() - for key, value in checkpoint["state_dict"].items(): - if not key.startswith("module."): - key = "module." + key # xxx.xxx -> module.xxx.xxx - # Now all keys contain "module." no matter DDP or not. - if self.keywords in key: - key = key.replace(self.keywords, self.replacement, 1) - if comm.get_world_size() == 1: - key = key[7:] # module.xxx.xxx -> xxx.xxx - weight[key] = value - load_state_info = self.trainer.model.load_state_dict( - weight, strict=self.strict - ) - self.trainer.logger.info(f"Missing keys: {load_state_info[0]}") - if self.trainer.cfg.resume: - self.trainer.logger.info( - f"Resuming train at eval epoch: {checkpoint['epoch']}" - ) - self.trainer.start_epoch = checkpoint["epoch"] - self.trainer.best_metric_value = checkpoint["best_metric_value"] - self.trainer.optimizer.load_state_dict(checkpoint["optimizer"]) - self.trainer.scheduler.load_state_dict(checkpoint["scheduler"]) - if self.trainer.cfg.enable_amp: - self.trainer.scaler.load_state_dict(checkpoint["scaler"]) - else: - self.trainer.logger.info(f"No weight found at: {self.trainer.cfg.weight}") - - -@HOOKS.register_module() -class PreciseEvaluator(HookBase): - def __init__(self, test_last=False): - self.test_last = test_last - - def after_train(self): - from pointcept.engines.test import TESTERS - - self.trainer.logger.info( - ">>>>>>>>>>>>>>>> Start Precise Evaluation >>>>>>>>>>>>>>>>" - ) - torch.cuda.empty_cache() - cfg = self.trainer.cfg - test_cfg = dict(cfg=cfg, model=self.trainer.model, **cfg.test) - tester = TESTERS.build(test_cfg) - if self.test_last: - self.trainer.logger.info("=> Testing on model_last ...") - else: - self.trainer.logger.info("=> Testing on model_best ...") - best_path = os.path.join( - self.trainer.cfg.save_path, "model", "model_best.pth" - ) - checkpoint = torch.load(best_path, weights_only=False) - weight = OrderedDict() - for key, value in checkpoint["state_dict"].items(): - if not key.startswith("module."): - key = "module." + key # xxx.xxx -> module.xxx.xxx - # Now all keys contain "module." no matter DDP or not. - if comm.get_world_size() == 1: - key = key[7:] # module.xxx.xxx -> xxx.xxx - weight[key] = value - tester.model.load_state_dict(weight, strict=True) - tester.test() - - -@HOOKS.register_module() -class DataCacheOperator(HookBase): - def __init__(self, data_root, split): - self.data_root = data_root - self.split = split - self.data_list = self.get_data_list() - - def get_data_list(self): - if isinstance(self.split, str): - data_list = glob.glob(os.path.join(self.data_root, self.split)) - elif isinstance(self.split, Sequence): - data_list = [] - for split in self.split: - data_list += glob.glob(os.path.join(self.data_root, split)) - else: - raise NotImplementedError - return data_list - - def get_cache_name(self, data_path): - data_name = data_path.replace(os.path.dirname(self.data_root), "") - return "pointcept" + data_name.replace(os.path.sep, "-") - - def before_train(self): - self.trainer.logger.info( - f"=> Caching dataset: {self.data_root}, split: {self.split} ..." - ) - if is_main_process(): - dataset = self.trainer.train_loader.dataset - for i in range(len(dataset)): - data_dict = dataset[i] - name = data_dict["name"] - shared_dict(f"Pointcept-{name}", data_dict) - synchronize() - - -@HOOKS.register_module() -class RuntimeProfiler(HookBase): - def __init__( - self, - forward=True, - backward=True, - interrupt=False, - warm_up=2, - sort_by="cuda_time_total", - row_limit=30, - ): - self.forward = forward - self.backward = backward - self.interrupt = interrupt - self.warm_up = warm_up - self.sort_by = sort_by - self.row_limit = row_limit - - def before_train(self): - self.trainer.logger.info("Profiling runtime ...") - from torch.profiler import profile, record_function, ProfilerActivity - - for i, input_dict in enumerate(self.trainer.train_loader): - if i == self.warm_up + 1: - break - for key in input_dict.keys(): - if isinstance(input_dict[key], torch.Tensor): - input_dict[key] = input_dict[key].cuda(non_blocking=True) - if self.forward: - with profile( - activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], - record_shapes=True, - profile_memory=True, - with_stack=True, - ) as forward_prof: - with record_function("model_inference"): - output_dict = self.trainer.model(input_dict) - else: - output_dict = self.trainer.model(input_dict) - loss = output_dict["loss"] - if self.backward: - with profile( - activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], - record_shapes=True, - profile_memory=True, - with_stack=True, - ) as backward_prof: - with record_function("model_inference"): - loss.backward() - self.trainer.logger.info(f"Profile: [{i + 1}/{self.warm_up + 1}]") - if self.forward: - self.trainer.logger.info( - "Forward profile: \n" - + str( - forward_prof.key_averages().table( - sort_by=self.sort_by, row_limit=self.row_limit - ) - ) - ) - forward_prof.export_chrome_trace( - os.path.join(self.trainer.cfg.save_path, "forward_trace.json") - ) - - if self.backward: - self.trainer.logger.info( - "Backward profile: \n" - + str( - backward_prof.key_averages().table( - sort_by=self.sort_by, row_limit=self.row_limit - ) - ) - ) - backward_prof.export_chrome_trace( - os.path.join(self.trainer.cfg.save_path, "backward_trace.json") - ) - if self.interrupt: - sys.exit(0) - - -@HOOKS.register_module() -class RuntimeProfilerV2(HookBase): - def __init__( - self, - interrupt=False, - wait=1, - warmup=1, - active=10, - repeat=1, - sort_by="cuda_time_total", - row_limit=30, - ): - self.interrupt = interrupt - self.wait = wait - self.warmup = warmup - self.active = active - self.repeat = repeat - self.sort_by = sort_by - self.row_limit = row_limit - - def before_train(self): - self.trainer.logger.info("Profiling runtime ...") - from torch.profiler import ( - profile, - record_function, - ProfilerActivity, - schedule, - tensorboard_trace_handler, - ) - - prof = profile( - activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], - schedule=schedule( - wait=self.wait, - warmup=self.warmup, - active=self.active, - repeat=self.repeat, - ), - on_trace_ready=tensorboard_trace_handler(self.trainer.cfg.save_path), - record_shapes=True, - profile_memory=True, - with_stack=True, - ) - prof.start() - for i, input_dict in enumerate(self.trainer.train_loader): - if i >= (self.wait + self.warmup + self.active) * self.repeat: - break - for key in input_dict.keys(): - if isinstance(input_dict[key], torch.Tensor): - input_dict[key] = input_dict[key].cuda(non_blocking=True) - with record_function("model_forward"): - output_dict = self.trainer.model(input_dict) - loss = output_dict["loss"] - with record_function("model_backward"): - loss.backward() - prof.step() - self.trainer.logger.info( - f"Profile: [{i + 1}/{(self.wait + self.warmup + self.active) * self.repeat}]" - ) - self.trainer.logger.info( - "Profile: \n" - + str( - prof.key_averages().table( - sort_by=self.sort_by, row_limit=self.row_limit - ) - ) - ) - prof.stop() - - if self.interrupt: - sys.exit(0) - - -@HOOKS.register_module() -class WeightDecaySchedular(HookBase): - def __init__( - self, - base_value=0.04, - final_value=0.2, - ): - self.base_value = base_value - self.final_value = final_value - self.scheduler = None - - def before_train(self): - curr_step = self.trainer.start_epoch * len(self.trainer.train_loader) - self.scheduler = CosineScheduler( - base_value=self.base_value, - final_value=self.final_value, - total_iters=self.trainer.cfg.scheduler.total_steps, - ) - self.scheduler.iter = curr_step - - def before_step(self): - wd = self.scheduler.step() - for param_group in self.trainer.optimizer.param_groups: - param_group["weight_decay"] = wd - if self.trainer.writer is not None: - self.trainer.writer.add_scalar("params/wd", wd, self.scheduler.iter) - - -@HOOKS.register_module() -class GarbageHandler(HookBase): - def __init__(self, interval=150, disable_auto=True, empty_cache=False): - self.interval = interval - self.disable_auto = disable_auto - self.empty_cache = empty_cache - self.iter = 1 - - def before_train(self): - if self.disable_auto: - gc.disable() - self.trainer.logger.info("Disable automatic garbage collection") - - def before_epoch(self): - self.iter = 1 - - def after_step(self): - if self.iter % self.interval == 0: - gc.collect() - if self.empty_cache: - torch.cuda.empty_cache() - self.trainer.logger.info("Garbage collected") - self.iter += 1 - - def after_train(self): - gc.collect() - torch.cuda.empty_cache() diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/launch.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/launch.py deleted file mode 100644 index 99a8351..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/engines/launch.py +++ /dev/null @@ -1,137 +0,0 @@ -""" -Launcher - -modified from detectron2(https://github.com/facebookresearch/detectron2) - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import os -import logging -from datetime import timedelta -import torch -import torch.distributed as dist -import torch.multiprocessing as mp - -from pointcept.utils import comm - -__all__ = ["DEFAULT_TIMEOUT", "launch"] - -DEFAULT_TIMEOUT = timedelta(minutes=60) - - -def _find_free_port(): - import socket - - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - # Binding to port 0 will cause the OS to find an available port for us - sock.bind(("", 0)) - port = sock.getsockname()[1] - sock.close() - # NOTE: there is still a chance the port could be taken by other processes. - return port - - -def launch( - main_func, - num_gpus_per_machine, - num_machines=1, - machine_rank=0, - dist_url=None, - cfg=(), - timeout=DEFAULT_TIMEOUT, -): - """ - Launch multi-gpu or distributed training. - This function must be called on all machines involved in the training. - It will spawn child processes (defined by ``num_gpus_per_machine``) on each machine. - Args: - main_func: a function that will be called by `main_func(*args)` - num_gpus_per_machine (int): number of GPUs per machine - num_machines (int): the total number of machines - machine_rank (int): the rank of this machine - dist_url (str): url to connect to for distributed jobs, including protocol - e.g. "tcp://127.0.0.1:8686". - Can be set to "auto" to automatically select a free port on localhost - timeout (timedelta): timeout of the distributed workers - args (tuple): arguments passed to main_func - """ - world_size = num_machines * num_gpus_per_machine - if world_size > 1: - if dist_url == "auto": - assert ( - num_machines == 1 - ), "dist_url=auto not supported in multi-machine jobs." - port = _find_free_port() - dist_url = f"tcp://127.0.0.1:{port}" - if num_machines > 1 and dist_url.startswith("file://"): - logger = logging.getLogger(__name__) - logger.warning( - "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://" - ) - - mp.spawn( - _distributed_worker, - nprocs=num_gpus_per_machine, - args=( - main_func, - world_size, - num_gpus_per_machine, - machine_rank, - dist_url, - cfg, - timeout, - ), - daemon=False, - ) - else: - main_func(*cfg) - - -def _distributed_worker( - local_rank, - main_func, - world_size, - num_gpus_per_machine, - machine_rank, - dist_url, - cfg, - timeout=DEFAULT_TIMEOUT, -): - assert ( - torch.cuda.is_available() - ), "cuda is not available. Please check your installation." - global_rank = machine_rank * num_gpus_per_machine + local_rank - try: - dist.init_process_group( - backend="NCCL", - init_method=dist_url, - world_size=world_size, - rank=global_rank, - timeout=timeout, - ) - except Exception as e: - logger = logging.getLogger(__name__) - logger.error("Process group URL: {}".format(dist_url)) - raise e - - # Setup the local process group (which contains ranks within the same machine) - assert comm._LOCAL_PROCESS_GROUP is None - num_machines = world_size // num_gpus_per_machine - for i in range(num_machines): - ranks_on_i = list( - range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine) - ) - pg = dist.new_group(ranks_on_i) - if i == machine_rank: - comm._LOCAL_PROCESS_GROUP = pg - - assert num_gpus_per_machine <= torch.cuda.device_count() - torch.cuda.set_device(local_rank) - - # synchronize is needed here to prevent a possible timeout after calling init_process_group - # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172 - comm.synchronize() - - main_func(*cfg) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/test.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/test.py deleted file mode 100644 index 55f5964..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/engines/test.py +++ /dev/null @@ -1,890 +0,0 @@ -""" -Tester - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import json -from uuid import uuid4 -import os -import time -import numpy as np -from collections import OrderedDict -import torch -import torch.distributed as dist -import torch.nn.functional as F -import torch.utils.data - -from .defaults import create_ddp_model -import pointcept.utils.comm as comm -from pointcept.datasets import build_dataset, collate_fn -from pointcept.models import build_model -from pointcept.utils.logger import get_root_logger -from pointcept.utils.registry import Registry -from pointcept.utils.misc import ( - AverageMeter, - intersection_and_union, - intersection_and_union_gpu, - make_dirs, -) - - -TESTERS = Registry("testers") - - -class TesterBase: - def __init__(self, cfg, model=None, test_loader=None, verbose=False) -> None: - torch.multiprocessing.set_sharing_strategy("file_system") - self.logger = get_root_logger( - log_file=os.path.join(cfg.save_path, "test.log"), - file_mode="a" if cfg.resume else "w", - ) - self.logger.info("=> Loading config ...") - self.cfg = cfg - self.verbose = verbose - if self.verbose and model is None: - # if model is not none, trigger tester with trainer, no need to print config - self.logger.info(f"Save path: {cfg.save_path}") - self.logger.info(f"Config:\n{cfg.pretty_text}") - if model is None: - self.logger.info("=> Building model ...") - self.model = self.build_model() - else: - self.model = model - if test_loader is None: - self.logger.info("=> Building test dataset & dataloader ...") - self.test_loader = self.build_test_loader() - else: - self.test_loader = test_loader - - def build_model(self): - model = build_model(self.cfg.model) - n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) - self.logger.info(f"Num params: {n_parameters}") - model = create_ddp_model( - model.cuda(), - broadcast_buffers=False, - find_unused_parameters=self.cfg.find_unused_parameters, - ) - if os.path.isfile(self.cfg.weight): - self.logger.info(f"Loading weight at: {self.cfg.weight}") - checkpoint = torch.load(self.cfg.weight, weights_only=False) - weight = OrderedDict() - for key, value in checkpoint["state_dict"].items(): - if key.startswith("module."): - if comm.get_world_size() == 1: - key = key[7:] # module.xxx.xxx -> xxx.xxx - else: - if comm.get_world_size() > 1: - key = "module." + key # xxx.xxx -> module.xxx.xxx - weight[key] = value - model.load_state_dict(weight, strict=True) - self.logger.info( - "=> Loaded weight '{}' (epoch {})".format( - self.cfg.weight, checkpoint["epoch"] - ) - ) - else: - raise RuntimeError("=> No checkpoint found at '{}'".format(self.cfg.weight)) - return model - - def build_test_loader(self): - test_dataset = build_dataset(self.cfg.data.test) - if comm.get_world_size() > 1: - test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset) - else: - test_sampler = None - test_loader = torch.utils.data.DataLoader( - test_dataset, - batch_size=self.cfg.batch_size_test_per_gpu, - shuffle=False, - num_workers=self.cfg.batch_size_test_per_gpu, - pin_memory=True, - sampler=test_sampler, - collate_fn=self.__class__.collate_fn, - ) - return test_loader - - def test(self): - raise NotImplementedError - - @staticmethod - def collate_fn(batch): - raise collate_fn(batch) - - -@TESTERS.register_module() -class SemSegTester(TesterBase): - def test(self): - assert self.test_loader.batch_size == 1 - logger = get_root_logger() - logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") - - batch_time = AverageMeter() - intersection_meter = AverageMeter() - union_meter = AverageMeter() - target_meter = AverageMeter() - self.model.eval() - - save_path = os.path.join(self.cfg.save_path, "result") - make_dirs(save_path) - # create submit folder only on main process - if ( - self.cfg.data.test.type == "ScanNetDataset" - or self.cfg.data.test.type == "ScanNet200Dataset" - or self.cfg.data.test.type == "ScanNetPPDataset" - ) and comm.is_main_process(): - make_dirs(os.path.join(save_path, "submit")) - elif ( - self.cfg.data.test.type == "SemanticKITTIDataset" and comm.is_main_process() - ): - make_dirs(os.path.join(save_path, "submit")) - elif self.cfg.data.test.type == "NuScenesDataset" and comm.is_main_process(): - import json - - make_dirs(os.path.join(save_path, "submit", "lidarseg", "test")) - make_dirs(os.path.join(save_path, "submit", "test")) - submission = dict( - meta=dict( - use_camera=False, - use_lidar=True, - use_radar=False, - use_map=False, - use_external=False, - ) - ) - with open( - os.path.join(save_path, "submit", "test", "submission.json"), "w" - ) as f: - json.dump(submission, f, indent=4) - comm.synchronize() - record = {} - # fragment inference - for idx, data_dict in enumerate(self.test_loader): - start = time.time() - data_dict = data_dict[0] # current assume batch size is 1 - fragment_list = data_dict.pop("fragment_list") - segment = data_dict.pop("segment") - data_name = data_dict.pop("name") - pred_save_path = os.path.join(save_path, "{}_pred.npy".format(data_name)) - if os.path.isfile(pred_save_path): - logger.info( - "{}/{}: {}, loaded pred and label.".format( - idx + 1, len(self.test_loader), data_name - ) - ) - pred = np.load(pred_save_path) - if "origin_segment" in data_dict.keys(): - segment = data_dict["origin_segment"] - else: - pred = torch.zeros((segment.size, self.cfg.data.num_classes)).cuda() - for i in range(len(fragment_list)): - fragment_batch_size = 1 - s_i, e_i = i * fragment_batch_size, min( - (i + 1) * fragment_batch_size, len(fragment_list) - ) - input_dict = collate_fn(fragment_list[s_i:e_i]) - for key in input_dict.keys(): - if isinstance(input_dict[key], torch.Tensor): - input_dict[key] = input_dict[key].cuda(non_blocking=True) - idx_part = input_dict["index"] - with torch.no_grad(): - pred_part = self.model(input_dict)["seg_logits"] # (n, k) - pred_part = F.softmax(pred_part, -1) - if self.cfg.empty_cache: - torch.cuda.empty_cache() - bs = 0 - for be in input_dict["offset"]: - pred[idx_part[bs:be], :] += pred_part[bs:be] - bs = be - - logger.info( - "Test: {}/{}-{data_name}, Batch: {batch_idx}/{batch_num}".format( - idx + 1, - len(self.test_loader), - data_name=data_name, - batch_idx=i, - batch_num=len(fragment_list), - ) - ) - if self.cfg.data.test.type == "ScanNetPPDataset": - pred = pred.topk(3, dim=1)[1].data.cpu().numpy() - else: - pred = pred.max(1)[1].data.cpu().numpy() - if "origin_segment" in data_dict.keys(): - assert "inverse" in data_dict.keys() - pred = pred[data_dict["inverse"]] - segment = data_dict["origin_segment"] - np.save(pred_save_path, pred) - if ( - self.cfg.data.test.type == "ScanNetDataset" - or self.cfg.data.test.type == "ScanNet200Dataset" - ): - np.savetxt( - os.path.join(save_path, "submit", "{}.txt".format(data_name)), - self.test_loader.dataset.class2id[pred].reshape([-1, 1]), - fmt="%d", - ) - elif self.cfg.data.test.type == "ScanNetPPDataset": - np.savetxt( - os.path.join(save_path, "submit", "{}.txt".format(data_name)), - pred.astype(np.int32), - delimiter=",", - fmt="%d", - ) - pred = pred[:, 0] # for mIoU, TODO: support top3 mIoU - elif self.cfg.data.test.type == "SemanticKITTIDataset": - # 00_000000 -> 00, 000000 - sequence_name, frame_name = data_name.split("_") - os.makedirs( - os.path.join( - save_path, "submit", "sequences", sequence_name, "predictions" - ), - exist_ok=True, - ) - submit = pred.astype(np.uint32) - submit = np.vectorize( - self.test_loader.dataset.learning_map_inv.__getitem__ - )(submit).astype(np.uint32) - submit.tofile( - os.path.join( - save_path, - "submit", - "sequences", - sequence_name, - "predictions", - f"{frame_name}.label", - ) - ) - elif self.cfg.data.test.type == "NuScenesDataset": - np.array(pred + 1).astype(np.uint8).tofile( - os.path.join( - save_path, - "submit", - "lidarseg", - "test", - "{}_lidarseg.bin".format(data_name), - ) - ) - - intersection, union, target = intersection_and_union( - pred, segment, self.cfg.data.num_classes, self.cfg.data.ignore_index - ) - intersection_meter.update(intersection) - union_meter.update(union) - target_meter.update(target) - record[data_name] = dict( - intersection=intersection, union=union, target=target - ) - - mask = union != 0 - iou_class = intersection / (union + 1e-10) - iou = np.mean(iou_class[mask]) - acc = sum(intersection) / (sum(target) + 1e-10) - - m_iou = np.mean(intersection_meter.sum / (union_meter.sum + 1e-10)) - m_acc = np.mean(intersection_meter.sum / (target_meter.sum + 1e-10)) - - batch_time.update(time.time() - start) - logger.info( - "Test: {} [{}/{}]-{} " - "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " - "Accuracy {acc:.4f} ({m_acc:.4f}) " - "mIoU {iou:.4f} ({m_iou:.4f})".format( - data_name, - idx + 1, - len(self.test_loader), - segment.size, - batch_time=batch_time, - acc=acc, - m_acc=m_acc, - iou=iou, - m_iou=m_iou, - ) - ) - - logger.info("Syncing ...") - comm.synchronize() - record_sync = comm.gather(record, dst=0) - - if comm.is_main_process(): - record = {} - for _ in range(len(record_sync)): - r = record_sync.pop() - record.update(r) - del r - intersection = np.sum( - [meters["intersection"] for _, meters in record.items()], axis=0 - ) - union = np.sum([meters["union"] for _, meters in record.items()], axis=0) - target = np.sum([meters["target"] for _, meters in record.items()], axis=0) - - if self.cfg.data.test.type == "S3DISDataset": - torch.save( - dict(intersection=intersection, union=union, target=target), - os.path.join(save_path, f"{self.test_loader.dataset.split}.pth"), - ) - - iou_class = intersection / (union + 1e-10) - accuracy_class = intersection / (target + 1e-10) - mIoU = np.mean(iou_class) - mAcc = np.mean(accuracy_class) - allAcc = sum(intersection) / (sum(target) + 1e-10) - - logger.info( - "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}".format( - mIoU, mAcc, allAcc - ) - ) - for i in range(self.cfg.data.num_classes): - logger.info( - "Class_{idx} - {name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( - idx=i, - name=self.cfg.data.names[i], - iou=iou_class[i], - accuracy=accuracy_class[i], - ) - ) - logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") - - @staticmethod - def collate_fn(batch): - return batch - - -@TESTERS.register_module() -class DINOSemSegTester(TesterBase): - def test(self): - assert self.test_loader.batch_size == 1 - logger = get_root_logger() - logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") - - batch_time = AverageMeter() - intersection_meter = AverageMeter() - union_meter = AverageMeter() - target_meter = AverageMeter() - self.model.eval() - - save_path = os.path.join(self.cfg.save_path, "result") - make_dirs(save_path) - # create submit folder only on main process - if ( - self.cfg.data.test.type == "ScanNetDataset" - or self.cfg.data.test.type == "ScanNet200Dataset" - or self.cfg.data.test.type == "ScanNetPPDataset" - ) and comm.is_main_process(): - make_dirs(os.path.join(save_path, "submit")) - elif ( - self.cfg.data.test.type == "SemanticKITTIDataset" and comm.is_main_process() - ): - make_dirs(os.path.join(save_path, "submit")) - elif self.cfg.data.test.type == "NuScenesDataset" and comm.is_main_process(): - import json - - make_dirs(os.path.join(save_path, "submit", "lidarseg", "test")) - make_dirs(os.path.join(save_path, "submit", "test")) - submission = dict( - meta=dict( - use_camera=False, - use_lidar=True, - use_radar=False, - use_map=False, - use_external=False, - ) - ) - with open( - os.path.join(save_path, "submit", "test", "submission.json"), "w" - ) as f: - json.dump(submission, f, indent=4) - comm.synchronize() - record = {} - # fragment inference - for idx, data_dict in enumerate(self.test_loader): - end = time.time() - data_dict = data_dict[0] # current assume batch size is 1 - fragment_list = data_dict.pop("fragment_list") - segment = data_dict.pop("segment") - data_name = data_dict.pop("name") - dino_coord = data_dict.pop("dino_coord").cuda(non_blocking=True) - dino_feat = data_dict.pop("dino_feat").cuda(non_blocking=True) - dino_offset = data_dict.pop("dino_offset").cuda(non_blocking=True) - pred_save_path = os.path.join(save_path, "{}_pred.npy".format(data_name)) - if os.path.isfile(pred_save_path): - logger.info( - "{}/{}: {}, loaded pred and label.".format( - idx + 1, len(self.test_loader), data_name - ) - ) - pred = np.load(pred_save_path) - if "origin_segment" in data_dict.keys(): - segment = data_dict["origin_segment"] - else: - pred = torch.zeros((segment.size, self.cfg.data.num_classes)).cuda() - for i in range(len(fragment_list)): - fragment_batch_size = 1 - s_i, e_i = i * fragment_batch_size, min( - (i + 1) * fragment_batch_size, len(fragment_list) - ) - input_dict = collate_fn(fragment_list[s_i:e_i]) - for key in input_dict.keys(): - if isinstance(input_dict[key], torch.Tensor): - input_dict[key] = input_dict[key].cuda(non_blocking=True) - input_dict["dino_coord"] = dino_coord - input_dict["dino_feat"] = dino_feat - input_dict["dino_offset"] = dino_offset - idx_part = input_dict["index"] - with torch.no_grad(): - pred_part = self.model(input_dict)["seg_logits"] # (n, k) - pred_part = F.softmax(pred_part, -1) - if self.cfg.empty_cache: - torch.cuda.empty_cache() - bs = 0 - for be in input_dict["offset"]: - pred[idx_part[bs:be], :] += pred_part[bs:be] - bs = be - - logger.info( - "Test: {}/{}-{data_name}, Batch: {batch_idx}/{batch_num}".format( - idx + 1, - len(self.test_loader), - data_name=data_name, - batch_idx=i, - batch_num=len(fragment_list), - ) - ) - if self.cfg.data.test.type == "ScanNetPPDataset": - pred = pred.topk(3, dim=1)[1].data.cpu().numpy() - else: - pred = pred.max(1)[1].data.cpu().numpy() - if "origin_segment" in data_dict.keys(): - assert "inverse" in data_dict.keys() - pred = pred[data_dict["inverse"]] - segment = data_dict["origin_segment"] - np.save(pred_save_path, pred) - if ( - self.cfg.data.test.type == "ScanNetDataset" - or self.cfg.data.test.type == "ScanNet200Dataset" - ): - np.savetxt( - os.path.join(save_path, "submit", "{}.txt".format(data_name)), - self.test_loader.dataset.class2id[pred].reshape([-1, 1]), - fmt="%d", - ) - elif self.cfg.data.test.type == "ScanNetPPDataset": - np.savetxt( - os.path.join(save_path, "submit", "{}.txt".format(data_name)), - pred.astype(np.int32), - delimiter=",", - fmt="%d", - ) - pred = pred[:, 0] # for mIoU, TODO: support top3 mIoU - elif self.cfg.data.test.type == "SemanticKITTIDataset": - # 00_000000 -> 00, 000000 - sequence_name, frame_name = data_name.split("_") - os.makedirs( - os.path.join( - save_path, "submit", "sequences", sequence_name, "predictions" - ), - exist_ok=True, - ) - submit = pred.astype(np.uint32) - submit = np.vectorize( - self.test_loader.dataset.learning_map_inv.__getitem__ - )(submit).astype(np.uint32) - submit.tofile( - os.path.join( - save_path, - "submit", - "sequences", - sequence_name, - "predictions", - f"{frame_name}.label", - ) - ) - elif self.cfg.data.test.type == "NuScenesDataset": - np.array(pred + 1).astype(np.uint8).tofile( - os.path.join( - save_path, - "submit", - "lidarseg", - "test", - "{}_lidarseg.bin".format(data_name), - ) - ) - - intersection, union, target = intersection_and_union( - pred, segment, self.cfg.data.num_classes, self.cfg.data.ignore_index - ) - intersection_meter.update(intersection) - union_meter.update(union) - target_meter.update(target) - record[data_name] = dict( - intersection=intersection, union=union, target=target - ) - - mask = union != 0 - iou_class = intersection / (union + 1e-10) - iou = np.mean(iou_class[mask]) - acc = sum(intersection) / (sum(target) + 1e-10) - - m_iou = np.mean(intersection_meter.sum / (union_meter.sum + 1e-10)) - m_acc = np.mean(intersection_meter.sum / (target_meter.sum + 1e-10)) - - batch_time.update(time.time() - end) - logger.info( - "Test: {} [{}/{}]-{} " - "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " - "Accuracy {acc:.4f} ({m_acc:.4f}) " - "mIoU {iou:.4f} ({m_iou:.4f})".format( - data_name, - idx + 1, - len(self.test_loader), - segment.size, - batch_time=batch_time, - acc=acc, - m_acc=m_acc, - iou=iou, - m_iou=m_iou, - ) - ) - - logger.info("Syncing ...") - comm.synchronize() - record_sync = comm.gather(record, dst=0) - - if comm.is_main_process(): - record = {} - for _ in range(len(record_sync)): - r = record_sync.pop() - record.update(r) - del r - intersection = np.sum( - [meters["intersection"] for _, meters in record.items()], axis=0 - ) - union = np.sum([meters["union"] for _, meters in record.items()], axis=0) - target = np.sum([meters["target"] for _, meters in record.items()], axis=0) - - if self.cfg.data.test.type == "S3DISDataset": - torch.save( - dict(intersection=intersection, union=union, target=target), - os.path.join(save_path, f"{self.test_loader.dataset.split}.pth"), - ) - - iou_class = intersection / (union + 1e-10) - accuracy_class = intersection / (target + 1e-10) - mIoU = np.mean(iou_class) - mAcc = np.mean(accuracy_class) - allAcc = sum(intersection) / (sum(target) + 1e-10) - - logger.info( - "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}".format( - mIoU, mAcc, allAcc - ) - ) - for i in range(self.cfg.data.num_classes): - logger.info( - "Class_{idx} - {name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( - idx=i, - name=self.cfg.data.names[i], - iou=iou_class[i], - accuracy=accuracy_class[i], - ) - ) - logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") - - @staticmethod - def collate_fn(batch): - return batch - - -@TESTERS.register_module() -class ClsTester(TesterBase): - def test(self): - logger = get_root_logger() - logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") - batch_time = AverageMeter() - intersection_meter = AverageMeter() - union_meter = AverageMeter() - target_meter = AverageMeter() - self.model.eval() - - for i, input_dict in enumerate(self.test_loader): - for key in input_dict.keys(): - if isinstance(input_dict[key], torch.Tensor): - input_dict[key] = input_dict[key].cuda(non_blocking=True) - end = time.time() - with torch.no_grad(): - output_dict = self.model(input_dict) - output = output_dict["cls_logits"] - pred = output.max(1)[1] - label = input_dict["category"] - intersection, union, target = intersection_and_union_gpu( - pred, label, self.cfg.data.num_classes, self.cfg.data.ignore_index - ) - if comm.get_world_size() > 1: - dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce( - target - ) - intersection, union, target = ( - intersection.cpu().numpy(), - union.cpu().numpy(), - target.cpu().numpy(), - ) - intersection_meter.update(intersection), union_meter.update( - union - ), target_meter.update(target) - - accuracy = sum(intersection_meter.val) / (sum(target_meter.val) + 1e-10) - batch_time.update(time.time() - end) - - logger.info( - "Test: [{}/{}] " - "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " - "Accuracy {accuracy:.4f} ".format( - i + 1, - len(self.test_loader), - batch_time=batch_time, - accuracy=accuracy, - ) - ) - - iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) - accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) - mIoU = np.mean(iou_class) - mAcc = np.mean(accuracy_class) - allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) - logger.info( - "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.".format( - mIoU, mAcc, allAcc - ) - ) - - for i in range(self.cfg.data.num_classes): - logger.info( - "Class_{idx} - {name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( - idx=i, - name=self.cfg.data.names[i], - iou=iou_class[i], - accuracy=accuracy_class[i], - ) - ) - logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") - - @staticmethod - def collate_fn(batch): - return collate_fn(batch) - - -@TESTERS.register_module() -class ClsVotingTester(TesterBase): - def __init__( - self, - num_repeat=100, - metric="allAcc", - **kwargs, - ): - super().__init__(**kwargs) - self.num_repeat = num_repeat - self.metric = metric - self.best_idx = 0 - self.best_record = None - self.best_metric = 0 - - def test(self): - for i in range(self.num_repeat): - logger = get_root_logger() - logger.info(f">>>>>>>>>>>>>>>> Start Evaluation {i + 1} >>>>>>>>>>>>>>>>") - record = self.test_once() - if comm.is_main_process(): - if record[self.metric] > self.best_metric: - self.best_record = record - self.best_idx = i - self.best_metric = record[self.metric] - info = f"Current best record is Evaluation {i + 1}: " - for m in self.best_record.keys(): - info += f"{m}: {self.best_record[m]:.4f} " - logger.info(info) - - def test_once(self): - logger = get_root_logger() - batch_time = AverageMeter() - intersection_meter = AverageMeter() - target_meter = AverageMeter() - record = {} - self.model.eval() - - for idx, data_dict in enumerate(self.test_loader): - end = time.time() - data_dict = data_dict[0] # current assume batch size is 1 - voting_list = data_dict.pop("voting_list") - category = data_dict.pop("category") - data_name = data_dict.pop("name") - # pred = torch.zeros([1, self.cfg.data.num_classes]).cuda() - # for i in range(len(voting_list)): - # input_dict = voting_list[i] - # for key in input_dict.keys(): - # if isinstance(input_dict[key], torch.Tensor): - # input_dict[key] = input_dict[key].cuda(non_blocking=True) - # with torch.no_grad(): - # pred += F.softmax(self.model(input_dict)["cls_logits"], -1) - input_dict = collate_fn(voting_list) - for key in input_dict.keys(): - if isinstance(input_dict[key], torch.Tensor): - input_dict[key] = input_dict[key].cuda(non_blocking=True) - with torch.no_grad(): - pred = F.softmax(self.model(input_dict)["cls_logits"], -1).sum( - 0, keepdim=True - ) - pred = pred.max(1)[1].cpu().numpy() - intersection, union, target = intersection_and_union( - pred, category, self.cfg.data.num_classes, self.cfg.data.ignore_index - ) - intersection_meter.update(intersection) - target_meter.update(target) - record[data_name] = dict(intersection=intersection, target=target) - acc = sum(intersection) / (sum(target) + 1e-10) - m_acc = np.mean(intersection_meter.sum / (target_meter.sum + 1e-10)) - batch_time.update(time.time() - end) - logger.info( - "Test: {} [{}/{}] " - "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " - "Accuracy {acc:.4f} ({m_acc:.4f}) ".format( - data_name, - idx + 1, - len(self.test_loader), - batch_time=batch_time, - acc=acc, - m_acc=m_acc, - ) - ) - - logger.info("Syncing ...") - comm.synchronize() - record_sync = comm.gather(record, dst=0) - - if comm.is_main_process(): - record = {} - for _ in range(len(record_sync)): - r = record_sync.pop() - record.update(r) - del r - intersection = np.sum( - [meters["intersection"] for _, meters in record.items()], axis=0 - ) - target = np.sum([meters["target"] for _, meters in record.items()], axis=0) - accuracy_class = intersection / (target + 1e-10) - mAcc = np.mean(accuracy_class) - allAcc = sum(intersection) / (sum(target) + 1e-10) - - logger.info("Val result: mAcc/allAcc {:.4f}/{:.4f}".format(mAcc, allAcc)) - for i in range(self.cfg.data.num_classes): - logger.info( - "Class_{idx} - {name} Result: iou/accuracy {accuracy:.4f}".format( - idx=i, - name=self.cfg.data.names[i], - accuracy=accuracy_class[i], - ) - ) - return dict(mAcc=mAcc, allAcc=allAcc) - - @staticmethod - def collate_fn(batch): - return batch - - -@TESTERS.register_module() -class PartSegTester(TesterBase): - def test(self): - test_dataset = self.test_loader.dataset - logger = get_root_logger() - logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") - - batch_time = AverageMeter() - - num_categories = len(self.test_loader.dataset.categories) - iou_category, iou_count = np.zeros(num_categories), np.zeros(num_categories) - self.model.eval() - - save_path = os.path.join( - self.cfg.save_path, "result", "test_epoch{}".format(self.cfg.test_epoch) - ) - make_dirs(save_path) - - for idx in range(len(test_dataset)): - end = time.time() - data_name = test_dataset.get_data_name(idx) - - data_dict_list, label = test_dataset[idx] - pred = torch.zeros((label.size, self.cfg.data.num_classes)).cuda() - batch_num = int(np.ceil(len(data_dict_list) / self.cfg.batch_size_test)) - for i in range(batch_num): - s_i, e_i = i * self.cfg.batch_size_test, min( - (i + 1) * self.cfg.batch_size_test, len(data_dict_list) - ) - input_dict = collate_fn(data_dict_list[s_i:e_i]) - for key in input_dict.keys(): - if isinstance(input_dict[key], torch.Tensor): - input_dict[key] = input_dict[key].cuda(non_blocking=True) - with torch.no_grad(): - pred_part = self.model(input_dict)["cls_logits"] - pred_part = F.softmax(pred_part, -1) - if self.cfg.empty_cache: - torch.cuda.empty_cache() - pred_part = pred_part.reshape(-1, label.size, self.cfg.data.num_classes) - pred = pred + pred_part.total(dim=0) - logger.info( - "Test: {} {}/{}, Batch: {batch_idx}/{batch_num}".format( - data_name, - idx + 1, - len(test_dataset), - batch_idx=i, - batch_num=batch_num, - ) - ) - pred = pred.max(1)[1].data.cpu().numpy() - - category_index = data_dict_list[0]["cls_token"] - category = self.test_loader.dataset.categories[category_index] - parts_idx = self.test_loader.dataset.category2part[category] - parts_iou = np.zeros(len(parts_idx)) - for j, part in enumerate(parts_idx): - if (np.sum(label == part) == 0) and (np.sum(pred == part) == 0): - parts_iou[j] = 1.0 - else: - i = (label == part) & (pred == part) - u = (label == part) | (pred == part) - parts_iou[j] = np.sum(i) / (np.sum(u) + 1e-10) - iou_category[category_index] += parts_iou.mean() - iou_count[category_index] += 1 - - batch_time.update(time.time() - end) - logger.info( - "Test: {} [{}/{}] " - "Batch {batch_time.val:.3f} " - "({batch_time.avg:.3f}) ".format( - data_name, idx + 1, len(self.test_loader), batch_time=batch_time - ) - ) - - ins_mIoU = iou_category.sum() / (iou_count.sum() + 1e-10) - cat_mIoU = (iou_category / (iou_count + 1e-10)).mean() - logger.info( - "Val result: ins.mIoU/cat.mIoU {:.4f}/{:.4f}.".format(ins_mIoU, cat_mIoU) - ) - for i in range(num_categories): - logger.info( - "Class_{idx}-{name} Result: iou_cat/num_sample {iou_cat:.4f}/{iou_count:.4f}".format( - idx=i, - name=self.test_loader.dataset.categories[i], - iou_cat=iou_category[i] / (iou_count[i] + 1e-10), - iou_count=int(iou_count[i]), - ) - ) - logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") - - @staticmethod - def collate_fn(batch): - return collate_fn(batch) - - diff --git a/point_transformer_v3/pointcept_minimal/pointcept/engines/train.py b/point_transformer_v3/pointcept_minimal/pointcept/engines/train.py deleted file mode 100644 index fea77cd..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/engines/train.py +++ /dev/null @@ -1,372 +0,0 @@ -""" -Trainer - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import os -import sys -import weakref -import wandb -import torch -import torch.nn as nn -import torch.utils.data -from packaging import version -from functools import partial -from pathlib import Path - -if sys.version_info >= (3, 10): - from collections.abc import Iterator -else: - from collections import Iterator -from tensorboardX import SummaryWriter - -from .defaults import create_ddp_model, worker_init_fn -from .hooks import HookBase, build_hooks -import pointcept.utils.comm as comm -from pointcept.datasets import build_dataset, point_collate_fn, collate_fn -from pointcept.models import build_model -from pointcept.utils.logger import get_root_logger -from pointcept.utils.optimizer import build_optimizer -from pointcept.utils.scheduler import build_scheduler -from pointcept.utils.events import EventStorage, ExceptionWriter -from pointcept.utils.registry import Registry - - -TRAINERS = Registry("trainers") -AMP_DTYPE = dict( - float16=torch.float16, - bfloat16=torch.bfloat16, -) - - -class TrainerBase: - def __init__(self) -> None: - self.hooks = [] - self.model = None - self.epoch = 0 - self.start_epoch = 0 - self.max_epoch = 0 - self.max_iter = 0 - self.comm_info = dict() - self.data_iterator: Iterator = enumerate([]) - self.storage: EventStorage - self.writer: SummaryWriter - - def register_hooks(self, hooks) -> None: - hooks = build_hooks(hooks) - for h in hooks: - assert isinstance(h, HookBase) - # To avoid circular reference, hooks and trainer cannot own each other. - # This normally does not matter, but will cause memory leak if the - # involved objects contain __del__: - # See http://engineering.hearsaysocial.com/2013/06/16/circular-references-in-python/ - h.trainer = weakref.proxy(self) - self.hooks.extend(hooks) - - def train(self): - with EventStorage() as self.storage: - # => before train - self.before_train() - for self.epoch in range(self.start_epoch, self.max_epoch): - # => before epoch - self.before_epoch() - # => run_epoch - for ( - self.comm_info["iter"], - self.comm_info["input_dict"], - ) in self.data_iterator: - # => before_step - self.before_step() - # => run_step - self.run_step() - # => after_step - self.after_step() - # => after epoch - self.after_epoch() - # => after train - self.after_train() - - def before_train(self): - for h in self.hooks: - h.before_train() - - def before_epoch(self): - for h in self.hooks: - h.before_epoch() - - def before_step(self): - for h in self.hooks: - h.before_step() - - def run_step(self): - raise NotImplementedError - - def after_step(self): - for h in self.hooks: - h.after_step() - - def after_epoch(self): - for h in self.hooks: - h.after_epoch() - self.storage.reset_histories() - - def after_train(self): - # Sync GPU before running train hooks - comm.synchronize() - for h in self.hooks: - h.after_train() - if comm.is_main_process(): - self.writer.close() - - -@TRAINERS.register_module("DefaultTrainer") -class Trainer(TrainerBase): - def __init__(self, cfg): - super(Trainer, self).__init__() - self.epoch = 0 - self.start_epoch = 0 - self.max_epoch = cfg.eval_epoch - self.best_metric_value = -torch.inf - self.logger = get_root_logger( - log_file=os.path.join(cfg.save_path, "train.log"), - file_mode="a" if cfg.resume else "w", - ) - self.logger.info("=> Loading config ...") - self.cfg = cfg - self.logger.info(f"Save path: {cfg.save_path}") - self.logger.info(f"Config:\n{cfg.pretty_text}") - self.logger.info("=> Building model ...") - self.model = self.build_model() - self.logger.info("=> Building writer ...") - self.writer = self.build_writer() - self.logger.info("=> Building train dataset & dataloader ...") - self.train_loader = self.build_train_loader() - self.logger.info("=> Building val dataset & dataloader ...") - self.val_loader = self.build_val_loader() - self.logger.info("=> Building optimize, scheduler, scaler(amp) ...") - self.optimizer = self.build_optimizer() - self.scheduler = self.build_scheduler() - self.scaler = self.build_scaler() - self.logger.info("=> Building hooks ...") - self.register_hooks(self.cfg.hooks) - self._gradient_accumulation_counter = 0 - - def train(self): - with EventStorage() as self.storage, ExceptionWriter(): - # => before train - self.before_train() - self.logger.info(">>>>>>>>>>>>>>>> Start Training >>>>>>>>>>>>>>>>") - for self.epoch in range(self.start_epoch, self.max_epoch): - # => before epoch - if comm.get_world_size() > 1: - self.train_loader.sampler.set_epoch(self.epoch) - self.model.train() - self.data_iterator = enumerate(self.train_loader) - self.before_epoch() - # => run_epoch - for ( - self.comm_info["iter"], - self.comm_info["input_dict"], - ) in self.data_iterator: - # => before_step - self.before_step() - # => run_step - self.run_step() - # => after_step - self.after_step() - # => after epoch - self.after_epoch() - # => after train - self.after_train() - - def run_step(self): - if version.parse(torch.__version__) >= version.parse("2.4"): - auto_cast = partial(torch.amp.autocast, device_type="cuda") - else: - # deprecated warning - auto_cast = torch.cuda.amp.autocast - - input_dict = self.comm_info["input_dict"] - for key in input_dict.keys(): - if isinstance(input_dict[key], torch.Tensor): - input_dict[key] = input_dict[key].cuda(non_blocking=True) - - # Only clear gradients on first accumulation step - if self._gradient_accumulation_counter == 0: - self.optimizer.zero_grad() - - # Forward pass - with auto_cast( - enabled=self.cfg.enable_amp, dtype=AMP_DTYPE[self.cfg.amp_dtype] - ): - output_dict = self.model(input_dict) - loss = ( - output_dict["loss"] / self.cfg.gradient_accumulation_steps - ) # scale loss - - # Backward pass - if self.cfg.enable_amp: - self.scaler.scale(loss).backward() - else: - loss.backward() - self._gradient_accumulation_counter += 1 - - # Perform optimizer step only when enough gradients have accumulated - if self._gradient_accumulation_counter >= self.cfg.gradient_accumulation_steps: - if self.cfg.enable_amp: - self.scaler.unscale_(self.optimizer) - if self.cfg.clip_grad is not None: - torch.nn.utils.clip_grad_norm_( - self.model.parameters(), self.cfg.clip_grad - ) - self.scaler.step(self.optimizer) - - # When enable amp, optimizer.step call are skipped if the loss scaling factor is too large. - # Fix torch warning scheduler step before optimizer step. - scale = self.scaler.get_scale() - self.scaler.update() - if scale <= self.scaler.get_scale(): - self.scheduler.step() - else: - if self.cfg.clip_grad is not None: - torch.nn.utils.clip_grad_norm_( - self.model.parameters(), self.cfg.clip_grad - ) - self.optimizer.step() - self.scheduler.step() - - # Reset grad accumulation counter - self._gradient_accumulation_counter = 0 - - if self.cfg.empty_cache: - torch.cuda.empty_cache() - self.comm_info["model_output_dict"] = output_dict - - def after_epoch(self): - for h in self.hooks: - h.after_epoch() - self.storage.reset_histories() - if self.cfg.empty_cache_per_epoch: - torch.cuda.empty_cache() - - def build_model(self): - model = build_model(self.cfg.model) - if self.cfg.sync_bn: - model = nn.SyncBatchNorm.convert_sync_batchnorm(model) - n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) - # logger.info(f"Model: \n{self.model}") - self.logger.info(f"Num params: {n_parameters}") - model = create_ddp_model( - model.cuda(), - broadcast_buffers=False, - find_unused_parameters=self.cfg.find_unused_parameters, - ) - return model - - def build_writer(self): - writer = SummaryWriter(self.cfg.save_path) if comm.is_main_process() else None - self.logger.info(f"Tensorboard writer logging dir: {self.cfg.save_path}") - if self.cfg.enable_wandb and comm.is_main_process(): - tag, name = Path(self.cfg.save_path).parts[-2:] - wandb.init( - project=self.cfg.wandb_project, - name=f"{tag}/{name}", - tags=[tag], - dir=self.cfg.save_path, - settings=wandb.Settings(api_key=self.cfg.wandb_key), - config=self.cfg, - ) - return writer - - def build_train_loader(self): - train_data = build_dataset(self.cfg.data.train) - - if comm.get_world_size() > 1: - train_sampler = torch.utils.data.distributed.DistributedSampler(train_data) - else: - train_sampler = None - - init_fn = ( - partial( - worker_init_fn, - num_workers=self.cfg.num_worker_per_gpu, - rank=comm.get_rank(), - seed=self.cfg.seed, - ) - if self.cfg.seed is not None - else None - ) - - train_loader = torch.utils.data.DataLoader( - train_data, - batch_size=self.cfg.batch_size_per_gpu, - shuffle=(train_sampler is None), - num_workers=self.cfg.num_worker_per_gpu, - sampler=train_sampler, - collate_fn=partial(point_collate_fn, mix_prob=self.cfg.mix_prob), - pin_memory=True, - worker_init_fn=init_fn, - drop_last=len(train_data) > self.cfg.batch_size, - persistent_workers=True, - ) - return train_loader - - def build_val_loader(self): - val_loader = None - if self.cfg.evaluate: - val_data = build_dataset(self.cfg.data.val) - if comm.get_world_size() > 1: - val_sampler = torch.utils.data.distributed.DistributedSampler(val_data) - else: - val_sampler = None - val_loader = torch.utils.data.DataLoader( - val_data, - batch_size=self.cfg.batch_size_val_per_gpu, - shuffle=False, - num_workers=self.cfg.num_worker_per_gpu, - pin_memory=True, - sampler=val_sampler, - collate_fn=collate_fn, - ) - return val_loader - - def build_optimizer(self): - return build_optimizer(self.cfg.optimizer, self.model, self.cfg.param_dicts) - - def build_scheduler(self): - assert hasattr(self, "optimizer") - assert hasattr(self, "train_loader") - self.cfg.scheduler.total_steps = ( - len(self.train_loader) - * self.cfg.eval_epoch - // self.cfg.gradient_accumulation_steps - ) - return build_scheduler(self.cfg.scheduler, self.optimizer) - - def build_scaler(self): - if version.parse(torch.__version__) >= version.parse("2.4"): - grad_scaler = partial(torch.amp.GradScaler, device="cuda") - else: - # deprecated warning - grad_scaler = torch.cuda.amp.GradScaler - scaler = grad_scaler() if self.cfg.enable_amp else None - return scaler - - -@TRAINERS.register_module("MultiDatasetTrainer") -class MultiDatasetTrainer(Trainer): - def build_train_loader(self): - from pointcept.datasets import MultiDatasetDataloader - - train_data = build_dataset(self.cfg.data.train) - train_loader = MultiDatasetDataloader( - train_data, - self.cfg.batch_size_per_gpu, - self.cfg.num_worker_per_gpu, - self.cfg.mix_prob, - self.cfg.seed, - ) - self.comm_info["iter_per_epoch"] = len(train_loader) - return train_loader diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/models/__init__.py deleted file mode 100644 index 1a1a1a4..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .builder import build_model -from .default import DefaultSegmentor, DefaultClassifier -from .modules import PointModule, PointModel - -# Backbones -from .point_transformer_v3 import * - -# Semantic Segmentation -from .context_aware_classifier import * - diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/builder.py b/point_transformer_v3/pointcept_minimal/pointcept/models/builder.py deleted file mode 100644 index 8c723d7..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/builder.py +++ /dev/null @@ -1,17 +0,0 @@ -""" -Model Builder - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import copy -from pointcept.utils.registry import Registry - -MODELS = Registry("models") -MODULES = Registry("modules") - - -def build_model(cfg): - """Build models.""" - return MODELS.build(copy.deepcopy(cfg)) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/default.py b/point_transformer_v3/pointcept_minimal/pointcept/models/default.py deleted file mode 100644 index da934e5..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/default.py +++ /dev/null @@ -1,230 +0,0 @@ -import torch -import torch.nn as nn -import torch_scatter -import torch_cluster -from collections import OrderedDict - -from pointcept.models.losses import build_criteria -from pointcept.models.utils.structure import Point -from pointcept.models.utils import offset2batch -from .builder import MODELS, build_model - - -@MODELS.register_module() -class DefaultSegmentor(nn.Module): - def __init__(self, backbone=None, criteria=None): - super().__init__() - self.backbone = build_model(backbone) - self.criteria = build_criteria(criteria) - - def forward(self, input_dict): - if "condition" in input_dict.keys(): - # PPT (https://arxiv.org/abs/2308.09718) - # currently, only support one batch one condition - input_dict["condition"] = input_dict["condition"][0] - seg_logits = self.backbone(input_dict) - # train - if self.training: - loss = self.criteria(seg_logits, input_dict["segment"]) - return dict(loss=loss) - # eval - elif "segment" in input_dict.keys(): - loss = self.criteria(seg_logits, input_dict["segment"]) - return dict(loss=loss, seg_logits=seg_logits) - # test - else: - return dict(seg_logits=seg_logits) - - -@MODELS.register_module() -class DefaultSegmentorV2(nn.Module): - def __init__( - self, - num_classes, - backbone_out_channels, - backbone=None, - criteria=None, - freeze_backbone=False, - ): - super().__init__() - self.seg_head = ( - nn.Linear(backbone_out_channels, num_classes) - if num_classes > 0 - else nn.Identity() - ) - self.backbone = build_model(backbone) - self.criteria = build_criteria(criteria) - self.freeze_backbone = freeze_backbone - if self.freeze_backbone: - for p in self.backbone.parameters(): - p.requires_grad = False - - def forward(self, input_dict, return_point=False): - point = Point(input_dict) - point = self.backbone(point) - # Backbone added after v1.5.0 return Point instead of feat and use DefaultSegmentorV2 - # TODO: remove this part after make all backbone return Point only. - if isinstance(point, Point): - while "pooling_parent" in point.keys(): - assert "pooling_inverse" in point.keys() - parent = point.pop("pooling_parent") - inverse = point.pop("pooling_inverse") - parent.feat = torch.cat([parent.feat, point.feat[inverse]], dim=-1) - point = parent - feat = point.feat - else: - feat = point - seg_logits = self.seg_head(feat) - return_dict = dict() - if return_point: - # PCA evaluator parse feat and coord in point - return_dict["point"] = point - # train - if self.training: - loss = self.criteria(seg_logits, input_dict["segment"]) - return_dict["loss"] = loss - # eval - elif "segment" in input_dict.keys(): - loss = self.criteria(seg_logits, input_dict["segment"]) - return_dict["loss"] = loss - return_dict["seg_logits"] = seg_logits - # test - else: - return_dict["seg_logits"] = seg_logits - return return_dict - - - -@MODELS.register_module() -class DINOEnhancedSegmentor(nn.Module): - def __init__( - self, - num_classes, - backbone_out_channels, - backbone=None, - criteria=None, - freeze_backbone=False, - ): - super().__init__() - self.seg_head = ( - nn.Linear(backbone_out_channels, num_classes) - if num_classes > 0 - else nn.Identity() - ) - self.backbone = build_model(backbone) if backbone is not None else None - self.criteria = build_criteria(criteria) - self.freeze_backbone = freeze_backbone - if self.backbone is not None and self.freeze_backbone: - for p in self.backbone.parameters(): - p.requires_grad = False - - def forward(self, input_dict, return_point=False): - point = Point(input_dict) - if self.backbone is not None: - if self.freeze_backbone: - with torch.no_grad(): - point = self.backbone(point) - else: - point = self.backbone(point) - point_list = [point] - while "unpooling_parent" in point_list[-1].keys(): - point_list.append(point_list[-1].pop("unpooling_parent")) - for i in reversed(range(1, len(point_list))): - point = point_list[i] - parent = point_list[i - 1] - assert "pooling_inverse" in point.keys() - inverse = point.pooling_inverse - parent.feat = torch.cat([parent.feat, point.feat[inverse]], dim=-1) - point = point_list[0] - while "pooling_parent" in point.keys(): - assert "pooling_inverse" in point.keys() - parent = point.pop("pooling_parent") - inverse = point.pooling_inverse - parent.feat = torch.cat([parent.feat, point.feat[inverse]], dim=-1) - point = parent - feat = [point.feat] - else: - feat = [] - dino_coord = input_dict["dino_coord"] - dino_feat = input_dict["dino_feat"] - dino_offset = input_dict["dino_offset"] - idx = torch_cluster.knn( - x=dino_coord, - y=point.origin_coord, - batch_x=offset2batch(dino_offset), - batch_y=offset2batch(point.origin_offset), - k=1, - )[1] - - feat.append(dino_feat[idx]) - feat = torch.concatenate(feat, dim=-1) - seg_logits = self.seg_head(feat) - return_dict = dict() - if return_point: - # PCA evaluator parse feat and coord in point - return_dict["point"] = point - # train - if self.training: - loss = self.criteria(seg_logits, input_dict["segment"]) - return_dict["loss"] = loss - # eval - elif "segment" in input_dict.keys(): - loss = self.criteria(seg_logits, input_dict["segment"]) - return_dict["loss"] = loss - return_dict["seg_logits"] = seg_logits - # test - else: - return_dict["seg_logits"] = seg_logits - return return_dict - - -@MODELS.register_module() -class DefaultClassifier(nn.Module): - def __init__( - self, - backbone=None, - criteria=None, - num_classes=40, - backbone_embed_dim=256, - ): - super().__init__() - self.backbone = build_model(backbone) - self.criteria = build_criteria(criteria) - self.num_classes = num_classes - self.backbone_embed_dim = backbone_embed_dim - self.cls_head = nn.Sequential( - nn.Linear(backbone_embed_dim, 256), - nn.BatchNorm1d(256), - nn.ReLU(inplace=True), - nn.Dropout(p=0.5), - nn.Linear(256, 128), - nn.BatchNorm1d(128), - nn.ReLU(inplace=True), - nn.Dropout(p=0.5), - nn.Linear(128, num_classes), - ) - - def forward(self, input_dict): - point = Point(input_dict) - point = self.backbone(point) - # Backbone added after v1.5.0 return Point instead of feat - # And after v1.5.0 feature aggregation for classification operated in classifier - # TODO: remove this part after make all backbone return Point only. - if isinstance(point, Point): - point.feat = torch_scatter.segment_csr( - src=point.feat, - indptr=nn.functional.pad(point.offset, (1, 0)), - reduce="mean", - ) - feat = point.feat - else: - feat = point - cls_logits = self.cls_head(feat) - if self.training: - loss = self.criteria(cls_logits, input_dict["category"]) - return dict(loss=loss) - elif "category" in input_dict.keys(): - loss = self.criteria(cls_logits, input_dict["category"]) - return dict(loss=loss, cls_logits=cls_logits) - else: - return dict(cls_logits=cls_logits) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/losses/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/models/losses/__init__.py deleted file mode 100644 index 0f4f29c..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/losses/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .builder import build_criteria, LOSSES - -from .misc import CrossEntropyLoss, SmoothCELoss, DiceLoss, FocalLoss, BinaryFocalLoss -from .lovasz import LovaszLoss diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/losses/builder.py b/point_transformer_v3/pointcept_minimal/pointcept/models/losses/builder.py deleted file mode 100644 index ef642d9..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/losses/builder.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -Criteria Builder - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -from pointcept.utils.registry import Registry - -LOSSES = Registry("losses") - - -class Criteria(object): - def __init__(self, cfg=None): - self.cfg = cfg if cfg is not None else [] - self.criteria = [] - for loss_cfg in self.cfg: - self.criteria.append(LOSSES.build(cfg=loss_cfg)) - - def __call__(self, pred, target): - if len(self.criteria) == 0: - # loss computation occur in model - return pred - loss = 0 - for c in self.criteria: - loss += c(pred, target) - return loss - - -def build_criteria(cfg): - return Criteria(cfg) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/losses/lovasz.py b/point_transformer_v3/pointcept_minimal/pointcept/models/losses/lovasz.py deleted file mode 100644 index 690c2ba..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/losses/lovasz.py +++ /dev/null @@ -1,257 +0,0 @@ -""" -Lovasz Loss -refer https://arxiv.org/abs/1705.08790 - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -from typing import Optional -from itertools import filterfalse -import torch -import torch.nn.functional as F -from torch.nn.modules.loss import _Loss - -from .builder import LOSSES - -BINARY_MODE: str = "binary" -MULTICLASS_MODE: str = "multiclass" -MULTILABEL_MODE: str = "multilabel" - - -def _lovasz_grad(gt_sorted): - """Compute gradient of the Lovasz extension w.r.t sorted errors - See Alg. 1 in paper - """ - p = len(gt_sorted) - gts = gt_sorted.sum() - intersection = gts - gt_sorted.float().cumsum(0) - union = gts + (1 - gt_sorted).float().cumsum(0) - jaccard = 1.0 - intersection / union - if p > 1: # cover 1-pixel case - jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] - return jaccard - - -def _lovasz_hinge(logits, labels, per_image=True, ignore=None): - """ - Binary Lovasz hinge loss - logits: [B, H, W] Logits at each pixel (between -infinity and +infinity) - labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) - per_image: compute the loss per image instead of per batch - ignore: void class id - """ - if per_image: - loss = mean( - _lovasz_hinge_flat( - *_flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore) - ) - for log, lab in zip(logits, labels) - ) - else: - loss = _lovasz_hinge_flat(*_flatten_binary_scores(logits, labels, ignore)) - return loss - - -def _lovasz_hinge_flat(logits, labels): - """Binary Lovasz hinge loss - Args: - logits: [P] Logits at each prediction (between -infinity and +infinity) - labels: [P] Tensor, binary ground truth labels (0 or 1) - """ - if len(labels) == 0: - # only void pixels, the gradients should be 0 - return logits.sum() * 0.0 - signs = 2.0 * labels.float() - 1.0 - errors = 1.0 - logits * signs - errors_sorted, perm = torch.sort(errors, dim=0, descending=True) - perm = perm.data - gt_sorted = labels[perm] - grad = _lovasz_grad(gt_sorted) - loss = torch.dot(F.relu(errors_sorted), grad) - return loss - - -def _flatten_binary_scores(scores, labels, ignore=None): - """Flattens predictions in the batch (binary case) - Remove labels equal to 'ignore' - """ - scores = scores.view(-1) - labels = labels.view(-1) - if ignore is None: - return scores, labels - valid = labels != ignore - vscores = scores[valid] - vlabels = labels[valid] - return vscores, vlabels - - -def _lovasz_softmax( - probas, labels, classes="present", class_seen=None, per_image=False, ignore=None -): - """Multi-class Lovasz-Softmax loss - Args: - @param probas: [B, C, H, W] Class probabilities at each prediction (between 0 and 1). - Interpreted as binary (sigmoid) output with outputs of size [B, H, W]. - @param labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) - @param classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. - @param per_image: compute the loss per image instead of per batch - @param ignore: void class labels - """ - if per_image: - loss = mean( - _lovasz_softmax_flat( - *_flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), - classes=classes - ) - for prob, lab in zip(probas, labels) - ) - else: - loss = _lovasz_softmax_flat( - *_flatten_probas(probas, labels, ignore), - classes=classes, - class_seen=class_seen - ) - return loss - - -def _lovasz_softmax_flat(probas, labels, classes="present", class_seen=None): - """Multi-class Lovasz-Softmax loss - Args: - @param probas: [P, C] Class probabilities at each prediction (between 0 and 1) - @param labels: [P] Tensor, ground truth labels (between 0 and C - 1) - @param classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. - """ - if probas.numel() == 0: - # only void pixels, the gradients should be 0 - return probas * 0.0 - C = probas.size(1) - losses = [] - class_to_sum = list(range(C)) if classes in ["all", "present"] else classes - # for c in class_to_sum: - for c in labels.unique(): - if class_seen is None: - fg = (labels == c).type_as(probas) # foreground for class c - if classes == "present" and fg.sum() == 0: - continue - if C == 1: - if len(classes) > 1: - raise ValueError("Sigmoid output possible only with 1 class") - class_pred = probas[:, 0] - else: - class_pred = probas[:, c] - errors = (fg - class_pred).abs() - errors_sorted, perm = torch.sort(errors, 0, descending=True) - perm = perm.data - fg_sorted = fg[perm] - losses.append(torch.dot(errors_sorted, _lovasz_grad(fg_sorted))) - else: - if c in class_seen: - fg = (labels == c).type_as(probas) # foreground for class c - if classes == "present" and fg.sum() == 0: - continue - if C == 1: - if len(classes) > 1: - raise ValueError("Sigmoid output possible only with 1 class") - class_pred = probas[:, 0] - else: - class_pred = probas[:, c] - errors = (fg - class_pred).abs() - errors_sorted, perm = torch.sort(errors, 0, descending=True) - perm = perm.data - fg_sorted = fg[perm] - losses.append(torch.dot(errors_sorted, _lovasz_grad(fg_sorted))) - return mean(losses) - - -def _flatten_probas(probas, labels, ignore=None): - """Flattens predictions in the batch""" - if probas.dim() == 3: - # assumes output of a sigmoid layer - B, H, W = probas.size() - probas = probas.view(B, 1, H, W) - - C = probas.size(1) - probas = torch.movedim(probas, 1, -1) # [B, C, Di, Dj, ...] -> [B, Di, Dj, ..., C] - probas = probas.contiguous().view(-1, C) # [P, C] - - labels = labels.view(-1) - if ignore is None: - return probas, labels - valid = labels != ignore - vprobas = probas[valid] - vlabels = labels[valid] - return vprobas, vlabels - - -def isnan(x): - return x != x - - -def mean(values, ignore_nan=False, empty=0): - """Nan-mean compatible with generators.""" - values = iter(values) - if ignore_nan: - values = filterfalse(isnan, values) - try: - n = 1 - acc = next(values) - except StopIteration: - if empty == "raise": - raise ValueError("Empty mean") - return empty - for n, v in enumerate(values, 2): - acc += v - if n == 1: - return acc - return acc / n - - -@LOSSES.register_module() -class LovaszLoss(_Loss): - def __init__( - self, - mode: str, - class_seen: Optional[int] = None, - per_image: bool = False, - ignore_index: Optional[int] = None, - loss_weight: float = 1.0, - ): - """Lovasz loss for segmentation task. - It supports binary, multiclass and multilabel cases - Args: - mode: Loss mode 'binary', 'multiclass' or 'multilabel' - ignore_index: Label that indicates ignored pixels (does not contribute to loss) - per_image: If True loss computed per each image and then averaged, else computed per whole batch - Shape - - **y_pred** - torch.Tensor of shape (N, C, H, W) - - **y_true** - torch.Tensor of shape (N, H, W) or (N, C, H, W) - Reference - https://github.com/BloodAxe/pytorch-toolbelt - """ - assert mode in {BINARY_MODE, MULTILABEL_MODE, MULTICLASS_MODE} - super().__init__() - - self.mode = mode - self.ignore_index = ignore_index - self.per_image = per_image - self.class_seen = class_seen - self.loss_weight = loss_weight - - def forward(self, y_pred, y_true): - if self.mode in {BINARY_MODE, MULTILABEL_MODE}: - loss = _lovasz_hinge( - y_pred, y_true, per_image=self.per_image, ignore=self.ignore_index - ) - elif self.mode == MULTICLASS_MODE: - y_pred = y_pred.softmax(dim=1) - loss = _lovasz_softmax( - y_pred, - y_true, - class_seen=self.class_seen, - per_image=self.per_image, - ignore=self.ignore_index, - ) - else: - raise ValueError("Wrong mode {}.".format(self.mode)) - return loss * self.loss_weight diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/losses/misc.py b/point_transformer_v3/pointcept_minimal/pointcept/models/losses/misc.py deleted file mode 100644 index ec300a5..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/losses/misc.py +++ /dev/null @@ -1,223 +0,0 @@ -""" -Misc Losses - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import torch -import torch.nn as nn -import torch.nn.functional as F -from .builder import LOSSES - - -@LOSSES.register_module() -class CrossEntropyLoss(nn.Module): - def __init__( - self, - weight=None, - size_average=None, - reduce=None, - reduction="mean", - label_smoothing=0.0, - loss_weight=1.0, - ignore_index=-1, - ): - super(CrossEntropyLoss, self).__init__() - weight = torch.tensor(weight).cuda() if weight is not None else None - self.loss_weight = loss_weight - self.loss = nn.CrossEntropyLoss( - weight=weight, - size_average=size_average, - ignore_index=ignore_index, - reduce=reduce, - reduction=reduction, - label_smoothing=label_smoothing, - ) - - def forward(self, pred, target): - return self.loss(pred, target) * self.loss_weight - - -@LOSSES.register_module() -class SmoothCELoss(nn.Module): - def __init__(self, smoothing_ratio=0.1): - super(SmoothCELoss, self).__init__() - self.smoothing_ratio = smoothing_ratio - - def forward(self, pred, target): - eps = self.smoothing_ratio - n_class = pred.size(1) - one_hot = torch.zeros_like(pred).scatter(1, target.view(-1, 1), 1) - one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1) - log_prb = F.log_softmax(pred, dim=1) - loss = -(one_hot * log_prb).total(dim=1) - loss = loss[torch.isfinite(loss)].mean() - return loss - - -@LOSSES.register_module() -class BinaryFocalLoss(nn.Module): - def __init__(self, gamma=2.0, alpha=0.5, logits=True, reduce=True, loss_weight=1.0): - """Binary Focal Loss - ` - """ - super(BinaryFocalLoss, self).__init__() - assert 0 < alpha < 1 - self.gamma = gamma - self.alpha = alpha - self.logits = logits - self.reduce = reduce - self.loss_weight = loss_weight - - def forward(self, pred, target, **kwargs): - """Forward function. - Args: - pred (torch.Tensor): The prediction with shape (N) - target (torch.Tensor): The ground truth. If containing class - indices, shape (N) where each value is 0≀targets[i]≀1, If containing class probabilities, - same shape as the input. - Returns: - torch.Tensor: The calculated loss - """ - if self.logits: - bce = F.binary_cross_entropy_with_logits(pred, target, reduction="none") - else: - bce = F.binary_cross_entropy(pred, target, reduction="none") - pt = torch.exp(-bce) - alpha = self.alpha * target + (1 - self.alpha) * (1 - target) - focal_loss = alpha * (1 - pt) ** self.gamma * bce - - if self.reduce: - focal_loss = torch.mean(focal_loss) - return focal_loss * self.loss_weight - - -@LOSSES.register_module() -class FocalLoss(nn.Module): - def __init__( - self, gamma=2.0, alpha=0.5, reduction="mean", loss_weight=1.0, ignore_index=-1 - ): - """Focal Loss - ` - """ - super(FocalLoss, self).__init__() - assert reduction in ( - "mean", - "sum", - ), "AssertionError: reduction should be 'mean' or 'sum'" - assert isinstance( - alpha, (float, list) - ), "AssertionError: alpha should be of type float" - assert isinstance(gamma, float), "AssertionError: gamma should be of type float" - assert isinstance( - loss_weight, float - ), "AssertionError: loss_weight should be of type float" - assert isinstance(ignore_index, int), "ignore_index must be of type int" - self.gamma = gamma - self.alpha = alpha - self.reduction = reduction - self.loss_weight = loss_weight - self.ignore_index = ignore_index - - def forward(self, pred, target, **kwargs): - """Forward function. - Args: - pred (torch.Tensor): The prediction with shape (N, C) where C = number of classes. - target (torch.Tensor): The ground truth. If containing class - indices, shape (N) where each value is 0≀targets[i]≀Cβˆ’1, If containing class probabilities, - same shape as the input. - Returns: - torch.Tensor: The calculated loss - """ - # [B, C, d_1, d_2, ..., d_k] -> [C, B, d_1, d_2, ..., d_k] - pred = pred.transpose(0, 1) - # [C, B, d_1, d_2, ..., d_k] -> [C, N] - pred = pred.reshape(pred.size(0), -1) - # [C, N] -> [N, C] - pred = pred.transpose(0, 1).contiguous() - # (B, d_1, d_2, ..., d_k) --> (B * d_1 * d_2 * ... * d_k,) - target = target.view(-1).contiguous() - assert pred.size(0) == target.size( - 0 - ), "The shape of pred doesn't match the shape of target" - valid_mask = target != self.ignore_index - target = target[valid_mask] - pred = pred[valid_mask] - - if len(target) == 0: - return 0.0 - - num_classes = pred.size(1) - target = F.one_hot(target, num_classes=num_classes) - - alpha = self.alpha - if isinstance(alpha, list): - alpha = pred.new_tensor(alpha) - pred_sigmoid = pred.sigmoid() - target = target.type_as(pred) - one_minus_pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) - focal_weight = (alpha * target + (1 - alpha) * (1 - target)) * one_minus_pt.pow( - self.gamma - ) - - loss = ( - F.binary_cross_entropy_with_logits(pred, target, reduction="none") - * focal_weight - ) - if self.reduction == "mean": - loss = loss.mean() - elif self.reduction == "sum": - loss = loss.total() - return self.loss_weight * loss - - -@LOSSES.register_module() -class DiceLoss(nn.Module): - def __init__(self, smooth=1, exponent=2, loss_weight=1.0, ignore_index=-1): - """DiceLoss. - This loss is proposed in `V-Net: Fully Convolutional Neural Networks for - Volumetric Medical Image Segmentation `_. - """ - super(DiceLoss, self).__init__() - self.smooth = smooth - self.exponent = exponent - self.loss_weight = loss_weight - self.ignore_index = ignore_index - - def forward(self, pred, target, **kwargs): - # [B, C, d_1, d_2, ..., d_k] -> [C, B, d_1, d_2, ..., d_k] - pred = pred.transpose(0, 1) - # [C, B, d_1, d_2, ..., d_k] -> [C, N] - pred = pred.reshape(pred.size(0), -1) - # [C, N] -> [N, C] - pred = pred.transpose(0, 1).contiguous() - # (B, d_1, d_2, ..., d_k) --> (B * d_1 * d_2 * ... * d_k,) - target = target.view(-1).contiguous() - assert pred.size(0) == target.size( - 0 - ), "The shape of pred doesn't match the shape of target" - valid_mask = target != self.ignore_index - target = target[valid_mask] - pred = pred[valid_mask] - - pred = F.softmax(pred, dim=1) - num_classes = pred.shape[1] - target = F.one_hot( - torch.clamp(target.long(), 0, num_classes - 1), num_classes=num_classes - ) - - total_loss = 0 - for i in range(num_classes): - if i != self.ignore_index: - num = torch.sum(torch.mul(pred[:, i], target[:, i])) * 2 + self.smooth - den = ( - torch.sum( - pred[:, i].pow(self.exponent) + target[:, i].pow(self.exponent) - ) - + self.smooth - ) - dice_loss = 1 - num / den - total_loss += dice_loss - loss = total_loss / num_classes - return self.loss_weight * loss diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/modules.py b/point_transformer_v3/pointcept_minimal/pointcept/models/modules.py deleted file mode 100644 index 0ec8fbd..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/modules.py +++ /dev/null @@ -1,120 +0,0 @@ -import sys -import torch.nn as nn -import spconv.pytorch as spconv - -try: - import ocnn -except ImportError: - ocnn = None - -from collections import OrderedDict -from pointcept.models.utils.structure import Point -from pointcept.engines.hooks import HookBase - - -def is_ocnn_module(module): - if ocnn is not None: - ocnn_modules = ( - ocnn.nn.OctreeConv, - ocnn.nn.OctreeDeconv, - ocnn.nn.OctreeGroupConv, - ocnn.nn.OctreeDWConv, - ) - return isinstance(module, ocnn_modules) - else: - return False - - -class PointModule(nn.Module): - r"""PointModule - placeholder, all module subclass from this will take Point in PointSequential. - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - -class PointSequential(PointModule): - r"""A sequential container. - Modules will be added to it in the order they are passed in the constructor. - Alternatively, an ordered dict of modules can also be passed in. - """ - - def __init__(self, *args, **kwargs): - super().__init__() - if len(args) == 1 and isinstance(args[0], OrderedDict): - for key, module in args[0].items(): - self.add_module(key, module) - else: - for idx, module in enumerate(args): - self.add_module(str(idx), module) - for name, module in kwargs.items(): - if sys.version_info < (3, 6): - raise ValueError("kwargs only supported in py36+") - if name in self._modules: - raise ValueError("name exists.") - self.add_module(name, module) - - def __getitem__(self, idx): - if not (-len(self) <= idx < len(self)): - raise IndexError("index {} is out of range".format(idx)) - if idx < 0: - idx += len(self) - it = iter(self._modules.values()) - for i in range(idx): - next(it) - return next(it) - - def __len__(self): - return len(self._modules) - - def add(self, module, name=None): - if name is None: - name = str(len(self._modules)) - if name in self._modules: - raise KeyError("name exists") - self.add_module(name, module) - - def forward(self, input): - for k, module in self._modules.items(): - # Point module - if isinstance(module, PointModule): - input = module(input) - # Spconv module - elif spconv.modules.is_spconv_module(module): - if isinstance(input, Point): - input.sparse_conv_feat = module(input.sparse_conv_feat) - input.feat = input.sparse_conv_feat.features - else: - input = module(input) - elif is_ocnn_module(module): - if isinstance(input, Point): - input.octree.features[-1] = module( - input.feat[input.octree_order], input.octree, input.octree.depth - ) - input.feat = input.octree.features[-1][input.octree_inverse] - else: - input = module(input) - # PyTorch module - else: - if isinstance(input, Point): - input.feat = module(input.feat) - if "sparse_conv_feat" in input.keys(): - input.sparse_conv_feat = input.sparse_conv_feat.replace_feature( - input.feat - ) - elif isinstance(input, spconv.SparseConvTensor): - if input.indices.shape[0] != 0: - input = input.replace_feature(module(input.features)) - else: - input = module(input) - return input - - -class PointModel(PointModule, HookBase): - r"""PointModel - placeholder, PointModel can be customized as a Pointcept hook. - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/__init__.py deleted file mode 100644 index 2fd471d..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .point_transformer_v3m1_base import * -from .point_transformer_v3m2_sonata import * -from .point_transformer_v3m1_fvdb import * diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py b/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py deleted file mode 100644 index b4b29e6..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py +++ /dev/null @@ -1,716 +0,0 @@ -""" -Point Transformer - V3 Mode1 - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -from functools import partial -from addict import Dict -import math -import torch -import torch.nn as nn -import spconv.pytorch as spconv -import torch_scatter -from timm.layers import DropPath - -try: - import flash_attn -except ImportError: - flash_attn = None - -# from pointcept.models.point_prompt_training import PDNorm -from pointcept.models.builder import MODELS -from pointcept.models.utils.misc import offset2bincount -from pointcept.models.utils.structure import Point -from pointcept.models.modules import PointModule, PointSequential - - -class RPE(torch.nn.Module): - def __init__(self, patch_size, num_heads): - super().__init__() - self.patch_size = patch_size - self.num_heads = num_heads - self.pos_bnd = int((4 * patch_size) ** (1 / 3) * 2) - self.rpe_num = 2 * self.pos_bnd + 1 - self.rpe_table = torch.nn.Parameter(torch.zeros(3 * self.rpe_num, num_heads)) - torch.nn.init.trunc_normal_(self.rpe_table, std=0.02) - - def forward(self, coord): - idx = ( - coord.clamp(-self.pos_bnd, self.pos_bnd) # clamp into bnd - + self.pos_bnd # relative position to positive index - + torch.arange(3, device=coord.device) * self.rpe_num # x, y, z stride - ) - out = self.rpe_table.index_select(0, idx.reshape(-1)) - out = out.view(idx.shape + (-1,)).sum(3) - out = out.permute(0, 3, 1, 2) # (N, K, K, H) -> (N, H, K, K) - return out - - -class SerializedAttention(PointModule): - def __init__( - self, - channels, - num_heads, - patch_size, - qkv_bias=True, - qk_scale=None, - attn_drop=0.0, - proj_drop=0.0, - order_index=0, - enable_rpe=False, - enable_flash=True, - upcast_attention=True, - upcast_softmax=True, - ): - super().__init__() - assert channels % num_heads == 0 - self.channels = channels - self.num_heads = num_heads - self.scale = qk_scale or (channels // num_heads) ** -0.5 - self.order_index = order_index - self.upcast_attention = upcast_attention - self.upcast_softmax = upcast_softmax - self.enable_rpe = enable_rpe - self.enable_flash = enable_flash - if enable_flash: - assert ( - enable_rpe is False - ), "Set enable_rpe to False when enable Flash Attention" - assert ( - upcast_attention is False - ), "Set upcast_attention to False when enable Flash Attention" - assert ( - upcast_softmax is False - ), "Set upcast_softmax to False when enable Flash Attention" - assert flash_attn is not None, "Make sure flash_attn is installed." - self.patch_size = patch_size - self.attn_drop = attn_drop - else: - # when disable flash attention, we still don't want to use mask - # consequently, patch size will auto set to the - # min number of patch_size_max and number of points - self.patch_size_max = patch_size - self.patch_size = 0 - self.attn_drop = torch.nn.Dropout(attn_drop) - - self.qkv = torch.nn.Linear(channels, channels * 3, bias=qkv_bias) - self.proj = torch.nn.Linear(channels, channels) - self.proj_drop = torch.nn.Dropout(proj_drop) - self.softmax = torch.nn.Softmax(dim=-1) - self.rpe = RPE(patch_size, num_heads) if self.enable_rpe else None - - @torch.no_grad() - def get_rel_pos(self, point, order): - K = self.patch_size - rel_pos_key = f"rel_pos_{self.order_index}" - if rel_pos_key not in point.keys(): - grid_coord = point.grid_coord[order] - grid_coord = grid_coord.reshape(-1, K, 3) - point[rel_pos_key] = grid_coord.unsqueeze(2) - grid_coord.unsqueeze(1) - return point[rel_pos_key] - - @torch.no_grad() - def get_padding_and_inverse(self, point): - pad_key = "pad" - unpad_key = "unpad" - cu_seqlens_key = "cu_seqlens_key" - if ( - pad_key not in point.keys() - or unpad_key not in point.keys() - or cu_seqlens_key not in point.keys() - ): - offset = point.offset - bincount = offset2bincount(offset) - bincount_pad = ( - torch.div( - bincount + self.patch_size - 1, - self.patch_size, - rounding_mode="trunc", - ) - * self.patch_size - ) - # only pad point when num of points larger than patch_size - mask_pad = bincount > self.patch_size - bincount_pad = ~mask_pad * bincount + mask_pad * bincount_pad - _offset = nn.functional.pad(offset, (1, 0)) - _offset_pad = nn.functional.pad(torch.cumsum(bincount_pad, dim=0), (1, 0)) - pad = torch.arange(_offset_pad[-1], device=offset.device) - unpad = torch.arange(_offset[-1], device=offset.device) - cu_seqlens = [] - for i in range(len(offset)): - unpad[_offset[i] : _offset[i + 1]] += _offset_pad[i] - _offset[i] - if bincount[i] != bincount_pad[i]: - pad[ - _offset_pad[i + 1] - - self.patch_size - + (bincount[i] % self.patch_size) : _offset_pad[i + 1] - ] = pad[ - _offset_pad[i + 1] - - 2 * self.patch_size - + (bincount[i] % self.patch_size) : _offset_pad[i + 1] - - self.patch_size - ] - pad[_offset_pad[i] : _offset_pad[i + 1]] -= _offset_pad[i] - _offset[i] - cu_seqlens.append( - torch.arange( - _offset_pad[i], - _offset_pad[i + 1], - step=self.patch_size, - dtype=torch.int32, - device=offset.device, - ) - ) - point[pad_key] = pad - point[unpad_key] = unpad - point[cu_seqlens_key] = nn.functional.pad( - torch.concat(cu_seqlens), (0, 1), value=_offset_pad[-1] - ) - return point[pad_key], point[unpad_key], point[cu_seqlens_key] - - def forward(self, point): - if not self.enable_flash: - self.patch_size = min( - offset2bincount(point.offset).min().tolist(), self.patch_size_max - ) - - H = self.num_heads - K = self.patch_size - C = self.channels - - pad, unpad, cu_seqlens = self.get_padding_and_inverse(point) - - order = point.serialized_order[self.order_index][pad] - inverse = unpad[point.serialized_inverse[self.order_index]] - - # padding and reshape feat and batch for serialized point patch - qkv = self.qkv(point.feat)[order] - - if not self.enable_flash: - # encode and reshape qkv: (N', K, 3, H, C') => (3, N', H, K, C') - q, k, v = ( - qkv.reshape(-1, K, 3, H, C // H).permute(2, 0, 3, 1, 4).unbind(dim=0) - ) - # attn - if self.upcast_attention: - q = q.float() - k = k.float() - attn = (q * self.scale) @ k.transpose(-2, -1) # (N', H, K, K) - if self.enable_rpe: - attn = attn + self.rpe(self.get_rel_pos(point, order)) - if self.upcast_softmax: - attn = attn.float() - attn = self.softmax(attn) - attn = self.attn_drop(attn).to(qkv.dtype) - feat = (attn @ v).transpose(1, 2).reshape(-1, C) - else: - feat = flash_attn.flash_attn_varlen_qkvpacked_func( - qkv.to(torch.bfloat16).reshape(-1, 3, H, C // H), - cu_seqlens, - max_seqlen=self.patch_size, - dropout_p=self.attn_drop if self.training else 0, - softmax_scale=self.scale, - ).reshape(-1, C) - feat = feat.to(qkv.dtype) - feat = feat[inverse] - - # ffn - feat = self.proj(feat) - feat = self.proj_drop(feat) - point.feat = feat - return point - - -class MLP(nn.Module): - def __init__( - self, - in_channels, - hidden_channels=None, - out_channels=None, - act_layer=nn.GELU, - drop=0.0, - ): - super().__init__() - out_channels = out_channels or in_channels - hidden_channels = hidden_channels or in_channels - self.fc1 = nn.Linear(in_channels, hidden_channels) - self.act = act_layer() - self.fc2 = nn.Linear(hidden_channels, out_channels) - self.drop = nn.Dropout(drop) - - def forward(self, x): - x = self.fc1(x) - x = self.act(x) - x = self.drop(x) - x = self.fc2(x) - x = self.drop(x) - return x - - -class Block(PointModule): - def __init__( - self, - channels, - num_heads, - patch_size=48, - mlp_ratio=4.0, - qkv_bias=True, - qk_scale=None, - attn_drop=0.0, - proj_drop=0.0, - drop_path=0.0, - norm_layer=nn.LayerNorm, - act_layer=nn.GELU, - pre_norm=True, - order_index=0, - cpe_indice_key=None, - enable_rpe=False, - enable_flash=True, - upcast_attention=True, - upcast_softmax=True, - ): - super().__init__() - self.channels = channels - self.pre_norm = pre_norm - - self.cpe = PointSequential( - spconv.SubMConv3d( - channels, - channels, - kernel_size=3, - bias=True, - indice_key=cpe_indice_key, - ), - nn.Linear(channels, channels), - norm_layer(channels), - ) - - self.norm1 = PointSequential(norm_layer(channels)) - self.attn = SerializedAttention( - channels=channels, - patch_size=patch_size, - num_heads=num_heads, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - attn_drop=attn_drop, - proj_drop=proj_drop, - order_index=order_index, - enable_rpe=enable_rpe, - enable_flash=enable_flash, - upcast_attention=upcast_attention, - upcast_softmax=upcast_softmax, - ) - self.norm2 = PointSequential(norm_layer(channels)) - self.mlp = PointSequential( - MLP( - in_channels=channels, - hidden_channels=int(channels * mlp_ratio), - out_channels=channels, - act_layer=act_layer, - drop=proj_drop, - ) - ) - self.drop_path = PointSequential( - DropPath(drop_path) if drop_path > 0.0 else nn.Identity() - ) - - def forward(self, point: Point): - shortcut = point.feat - point = self.cpe(point) - point.feat = shortcut + point.feat - shortcut = point.feat - if self.pre_norm: - point = self.norm1(point) - point = self.drop_path(self.attn(point)) - point.feat = shortcut + point.feat - if not self.pre_norm: - point = self.norm1(point) - - shortcut = point.feat - if self.pre_norm: - point = self.norm2(point) - point = self.drop_path(self.mlp(point)) - point.feat = shortcut + point.feat - if not self.pre_norm: - point = self.norm2(point) - point.sparse_conv_feat = point.sparse_conv_feat.replace_feature(point.feat) - return point - - -class SerializedPooling(PointModule): - def __init__( - self, - in_channels, - out_channels, - stride=2, - norm_layer=None, - act_layer=None, - reduce="max", - shuffle_orders=True, - traceable=True, # record parent and cluster - ): - super().__init__() - self.in_channels = in_channels - self.out_channels = out_channels - - assert stride == 2 ** (math.ceil(stride) - 1).bit_length() # 2, 4, 8 - # TODO: add support to grid pool (any stride) - self.stride = stride - assert reduce in ["sum", "mean", "min", "max"] - self.reduce = reduce - self.shuffle_orders = shuffle_orders - self.traceable = traceable - - self.proj = nn.Linear(in_channels, out_channels) - if norm_layer is not None: - self.norm = PointSequential(norm_layer(out_channels)) - if act_layer is not None: - self.act = PointSequential(act_layer()) - - def forward(self, point: Point): - pooling_depth = (math.ceil(self.stride) - 1).bit_length() - if pooling_depth > point.serialized_depth: - pooling_depth = 0 - assert { - "serialized_code", - "serialized_order", - "serialized_inverse", - "serialized_depth", - }.issubset( - point.keys() - ), "Run point.serialization() point cloud before SerializedPooling" - - code = point.serialized_code >> pooling_depth * 3 - code_, cluster, counts = torch.unique( - code[0], - sorted=True, - return_inverse=True, - return_counts=True, - ) - # indices of point sorted by cluster, for torch_scatter.segment_csr - _, indices = torch.sort(cluster) - # index pointer for sorted point, for torch_scatter.segment_csr - idx_ptr = torch.cat([counts.new_zeros(1), torch.cumsum(counts, dim=0)]) - # head_indices of each cluster, for reduce attr e.g. code, batch - head_indices = indices[idx_ptr[:-1]] - # generate down code, order, inverse - code = code[:, head_indices] - order = torch.argsort(code) - inverse = torch.zeros_like(order).scatter_( - dim=1, - index=order, - src=torch.arange(0, code.shape[1], device=order.device).repeat( - code.shape[0], 1 - ), - ) - - if self.shuffle_orders: - perm = torch.randperm(code.shape[0]) - code = code[perm] - order = order[perm] - inverse = inverse[perm] - - # collect information - point_dict = Dict( - feat=torch_scatter.segment_csr( - self.proj(point.feat)[indices], idx_ptr, reduce=self.reduce - ), - coord=torch_scatter.segment_csr( - point.coord[indices], idx_ptr, reduce="mean" - ), - grid_coord=point.grid_coord[head_indices] >> pooling_depth, - serialized_code=code, - serialized_order=order, - serialized_inverse=inverse, - serialized_depth=point.serialized_depth - pooling_depth, - batch=point.batch[head_indices], - ) - - if "condition" in point.keys(): - point_dict["condition"] = point.condition - if "context" in point.keys(): - point_dict["context"] = point.context - - if self.traceable: - point_dict["pooling_inverse"] = cluster - point_dict["pooling_parent"] = point - point = Point(point_dict) - if self.norm is not None: - point = self.norm(point) - if self.act is not None: - point = self.act(point) - point.sparsify() - return point - - -class SerializedUnpooling(PointModule): - def __init__( - self, - in_channels, - skip_channels, - out_channels, - norm_layer=None, - act_layer=None, - traceable=False, # record parent and cluster - ): - super().__init__() - self.proj = PointSequential(nn.Linear(in_channels, out_channels)) - self.proj_skip = PointSequential(nn.Linear(skip_channels, out_channels)) - - if norm_layer is not None: - self.proj.add(norm_layer(out_channels)) - self.proj_skip.add(norm_layer(out_channels)) - - if act_layer is not None: - self.proj.add(act_layer()) - self.proj_skip.add(act_layer()) - - self.traceable = traceable - - def forward(self, point): - assert "pooling_parent" in point.keys() - assert "pooling_inverse" in point.keys() - parent = point.pop("pooling_parent") - inverse = point.pop("pooling_inverse") - point = self.proj(point) - parent = self.proj_skip(parent) - parent.feat = parent.feat + point.feat[inverse] - - if self.traceable: - parent["unpooling_parent"] = point - return parent - - -class Embedding(PointModule): - def __init__( - self, - in_channels, - embed_channels, - norm_layer=None, - act_layer=None, - ): - super().__init__() - self.in_channels = in_channels - self.embed_channels = embed_channels - - # TODO: check remove spconv - self.stem = PointSequential( - conv=spconv.SubMConv3d( - in_channels, - embed_channels, - kernel_size=5, - padding=1, - bias=False, - indice_key="stem", - ) - ) - if norm_layer is not None: - self.stem.add(norm_layer(embed_channels), name="norm") - if act_layer is not None: - self.stem.add(act_layer(), name="act") - - def forward(self, point: Point): - point = self.stem(point) - return point - - -@MODELS.register_module("PT-v3m1") -class PointTransformerV3(PointModule): - def __init__( - self, - in_channels=6, - order=("z", "z-trans"), - stride=(2, 2, 2, 2), - enc_depths=(2, 2, 2, 6, 2), - enc_channels=(32, 64, 128, 256, 512), - enc_num_head=(2, 4, 8, 16, 32), - enc_patch_size=(48, 48, 48, 48, 48), - dec_depths=(2, 2, 2, 2), - dec_channels=(64, 64, 128, 256), - dec_num_head=(4, 4, 8, 16), - dec_patch_size=(48, 48, 48, 48), - mlp_ratio=4, - qkv_bias=True, - qk_scale=None, - attn_drop=0.0, - proj_drop=0.0, - drop_path=0.3, - pre_norm=True, - shuffle_orders=True, - enable_rpe=False, - enable_flash=True, - upcast_attention=False, - upcast_softmax=False, - enc_mode=False, - pdnorm_bn=False, - pdnorm_ln=False, - pdnorm_decouple=True, - pdnorm_adaptive=False, - pdnorm_affine=True, - pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), - ): - super().__init__() - self.num_stages = len(enc_depths) - self.order = [order] if isinstance(order, str) else order - self.enc_mode = enc_mode - self.shuffle_orders = shuffle_orders - - assert self.num_stages == len(stride) + 1 - assert self.num_stages == len(enc_depths) - assert self.num_stages == len(enc_channels) - assert self.num_stages == len(enc_num_head) - assert self.num_stages == len(enc_patch_size) - assert self.enc_mode or self.num_stages == len(dec_depths) + 1 - assert self.enc_mode or self.num_stages == len(dec_channels) + 1 - assert self.enc_mode or self.num_stages == len(dec_num_head) + 1 - assert self.enc_mode or self.num_stages == len(dec_patch_size) + 1 - - # norm layers - if pdnorm_bn: - assert False, "PDNorm is not supported in this minimal pointcept codebase for fvdb." - bn_layer = partial( - PDNorm, - norm_layer=partial( - nn.BatchNorm1d, eps=1e-3, momentum=0.01, affine=pdnorm_affine - ), - conditions=pdnorm_conditions, - decouple=pdnorm_decouple, - adaptive=pdnorm_adaptive, - ) - else: - bn_layer = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01) - if pdnorm_ln: - assert False, "PDNorm is not supported in this minimal pointcept codebase for fvdb." - ln_layer = partial( - PDNorm, - norm_layer=partial(nn.LayerNorm, elementwise_affine=pdnorm_affine), - conditions=pdnorm_conditions, - decouple=pdnorm_decouple, - adaptive=pdnorm_adaptive, - ) - else: - ln_layer = nn.LayerNorm - # activation layers - act_layer = nn.GELU - - self.embedding = Embedding( - in_channels=in_channels, - embed_channels=enc_channels[0], - norm_layer=bn_layer, - act_layer=act_layer, - ) - - # encoder - enc_drop_path = [ - x.item() for x in torch.linspace(0, drop_path, sum(enc_depths)) - ] - self.enc = PointSequential() - for s in range(self.num_stages): - enc_drop_path_ = enc_drop_path[ - sum(enc_depths[:s]) : sum(enc_depths[: s + 1]) - ] - enc = PointSequential() - if s > 0: - enc.add( - SerializedPooling( - in_channels=enc_channels[s - 1], - out_channels=enc_channels[s], - stride=stride[s - 1], - norm_layer=bn_layer, - act_layer=act_layer, - ), - name="down", - ) - for i in range(enc_depths[s]): - enc.add( - Block( - channels=enc_channels[s], - num_heads=enc_num_head[s], - patch_size=enc_patch_size[s], - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - attn_drop=attn_drop, - proj_drop=proj_drop, - drop_path=enc_drop_path_[i], - norm_layer=ln_layer, - act_layer=act_layer, - pre_norm=pre_norm, - order_index=i % len(self.order), - cpe_indice_key=f"stage{s}", - enable_rpe=enable_rpe, - enable_flash=enable_flash, - upcast_attention=upcast_attention, - upcast_softmax=upcast_softmax, - ), - name=f"block{i}", - ) - if len(enc) != 0: - self.enc.add(module=enc, name=f"enc{s}") - - # decoder - if not self.enc_mode: - dec_drop_path = [ - x.item() for x in torch.linspace(0, drop_path, sum(dec_depths)) - ] - self.dec = PointSequential() - dec_channels = list(dec_channels) + [enc_channels[-1]] - for s in reversed(range(self.num_stages - 1)): - dec_drop_path_ = dec_drop_path[ - sum(dec_depths[:s]) : sum(dec_depths[: s + 1]) - ] - dec_drop_path_.reverse() - dec = PointSequential() - dec.add( - SerializedUnpooling( - in_channels=dec_channels[s + 1], - skip_channels=enc_channels[s], - out_channels=dec_channels[s], - norm_layer=bn_layer, - act_layer=act_layer, - ), - name="up", - ) - for i in range(dec_depths[s]): - dec.add( - Block( - channels=dec_channels[s], - num_heads=dec_num_head[s], - patch_size=dec_patch_size[s], - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - attn_drop=attn_drop, - proj_drop=proj_drop, - drop_path=dec_drop_path_[i], - norm_layer=ln_layer, - act_layer=act_layer, - pre_norm=pre_norm, - order_index=i % len(self.order), - cpe_indice_key=f"stage{s}", - enable_rpe=enable_rpe, - enable_flash=enable_flash, - upcast_attention=upcast_attention, - upcast_softmax=upcast_softmax, - ), - name=f"block{i}", - ) - self.dec.add(module=dec, name=f"dec{s}") - - def forward(self, data_dict): - point = Point(data_dict) - point.serialization(order=self.order, shuffle_orders=self.shuffle_orders) - point.sparsify() - - point = self.embedding(point) - point = self.enc(point) - if not self.enc_mode: - point = self.dec(point) - # else: - # point.feat = torch_scatter.segment_csr( - # src=point.feat, - # indptr=nn.functional.pad(point.offset, (1, 0)), - # reduce="mean", - # ) - return point diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m2_sonata.py b/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m2_sonata.py deleted file mode 100644 index cc8fee1..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/point_transformer_v3/point_transformer_v3m2_sonata.py +++ /dev/null @@ -1,732 +0,0 @@ -""" -Point Transformer - V3 Mode2 - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -from addict import Dict -import torch -import torch.nn as nn -from torch.nn.init import trunc_normal_ -import spconv.pytorch as spconv -import torch_scatter -from timm.layers import DropPath - -try: - import flash_attn -except ImportError: - flash_attn = None - -from pointcept.models.builder import MODELS -from pointcept.models.utils.misc import offset2bincount -from pointcept.models.utils.structure import Point -from pointcept.models.modules import PointModule, PointSequential - - -class LayerScale(nn.Module): - def __init__( - self, - dim: int, - init_values: float = 1e-5, - inplace: bool = False, - ) -> None: - super().__init__() - self.inplace = inplace - self.gamma = nn.Parameter(init_values * torch.ones(dim)) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - return x.mul_(self.gamma) if self.inplace else x * self.gamma - - -class RPE(torch.nn.Module): - def __init__(self, patch_size, num_heads): - super().__init__() - self.patch_size = patch_size - self.num_heads = num_heads - self.pos_bnd = int((4 * patch_size) ** (1 / 3) * 2) - self.rpe_num = 2 * self.pos_bnd + 1 - self.rpe_table = torch.nn.Parameter(torch.zeros(3 * self.rpe_num, num_heads)) - torch.nn.init.trunc_normal_(self.rpe_table, std=0.02) - - def forward(self, coord): - idx = ( - coord.clamp(-self.pos_bnd, self.pos_bnd) # clamp into bnd - + self.pos_bnd # relative position to positive index - + torch.arange(3, device=coord.device) * self.rpe_num # x, y, z stride - ) - out = self.rpe_table.index_select(0, idx.reshape(-1)) - out = out.view(idx.shape + (-1,)).sum(3) - out = out.permute(0, 3, 1, 2) # (N, K, K, H) -> (N, H, K, K) - return out - - -class SerializedAttention(PointModule): - def __init__( - self, - channels, - num_heads, - patch_size, - qkv_bias=True, - qk_scale=None, - attn_drop=0.0, - proj_drop=0.0, - order_index=0, - enable_rpe=False, - enable_flash=True, - upcast_attention=True, - upcast_softmax=True, - ): - super().__init__() - assert channels % num_heads == 0 - self.channels = channels - self.num_heads = num_heads - self.scale = qk_scale or (channels // num_heads) ** -0.5 - self.order_index = order_index - self.upcast_attention = upcast_attention - self.upcast_softmax = upcast_softmax - self.enable_rpe = enable_rpe - self.enable_flash = enable_flash - if enable_flash: - assert ( - enable_rpe is False - ), "Set enable_rpe to False when enable Flash Attention" - assert ( - upcast_attention is False - ), "Set upcast_attention to False when enable Flash Attention" - assert ( - upcast_softmax is False - ), "Set upcast_softmax to False when enable Flash Attention" - assert flash_attn is not None, "Make sure flash_attn is installed." - self.patch_size = patch_size - self.attn_drop = attn_drop - else: - # when disable flash attention, we still don't want to use mask - # consequently, patch size will auto set to the - # min number of patch_size_max and number of points - self.patch_size_max = patch_size - self.patch_size = 0 - self.attn_drop = torch.nn.Dropout(attn_drop) - - self.qkv = torch.nn.Linear(channels, channels * 3, bias=qkv_bias) - self.proj = torch.nn.Linear(channels, channels) - self.proj_drop = torch.nn.Dropout(proj_drop) - self.softmax = torch.nn.Softmax(dim=-1) - self.rpe = RPE(patch_size, num_heads) if self.enable_rpe else None - - @torch.no_grad() - def get_rel_pos(self, point, order): - K = self.patch_size - rel_pos_key = f"rel_pos_{self.order_index}" - if rel_pos_key not in point.keys(): - grid_coord = point.grid_coord[order] - grid_coord = grid_coord.reshape(-1, K, 3) - point[rel_pos_key] = grid_coord.unsqueeze(2) - grid_coord.unsqueeze(1) - return point[rel_pos_key] - - @torch.no_grad() - def get_padding_and_inverse(self, point): - pad_key = "pad" - unpad_key = "unpad" - cu_seqlens_key = "cu_seqlens_key" - if ( - pad_key not in point.keys() - or unpad_key not in point.keys() - or cu_seqlens_key not in point.keys() - ): - offset = point.offset - bincount = offset2bincount(offset) - bincount_pad = ( - torch.div( - bincount + self.patch_size - 1, - self.patch_size, - rounding_mode="trunc", - ) - * self.patch_size - ) - # only pad point when num of points larger than patch_size - mask_pad = bincount > self.patch_size - bincount_pad = ~mask_pad * bincount + mask_pad * bincount_pad - _offset = nn.functional.pad(offset, (1, 0)) - _offset_pad = nn.functional.pad(torch.cumsum(bincount_pad, dim=0), (1, 0)) - pad = torch.arange(_offset_pad[-1], device=offset.device) - unpad = torch.arange(_offset[-1], device=offset.device) - cu_seqlens = [] - for i in range(len(offset)): - unpad[_offset[i] : _offset[i + 1]] += _offset_pad[i] - _offset[i] - if bincount[i] != bincount_pad[i]: - pad[ - _offset_pad[i + 1] - - self.patch_size - + (bincount[i] % self.patch_size) : _offset_pad[i + 1] - ] = pad[ - _offset_pad[i + 1] - - 2 * self.patch_size - + (bincount[i] % self.patch_size) : _offset_pad[i + 1] - - self.patch_size - ] - pad[_offset_pad[i] : _offset_pad[i + 1]] -= _offset_pad[i] - _offset[i] - cu_seqlens.append( - torch.arange( - _offset_pad[i], - _offset_pad[i + 1], - step=self.patch_size, - dtype=torch.int32, - device=offset.device, - ) - ) - point[pad_key] = pad - point[unpad_key] = unpad - point[cu_seqlens_key] = nn.functional.pad( - torch.concat(cu_seqlens), (0, 1), value=_offset_pad[-1] - ) - return point[pad_key], point[unpad_key], point[cu_seqlens_key] - - def forward(self, point): - if not self.enable_flash: - self.patch_size = min( - offset2bincount(point.offset).min().tolist(), self.patch_size_max - ) - - H = self.num_heads - K = self.patch_size - C = self.channels - - pad, unpad, cu_seqlens = self.get_padding_and_inverse(point) - - order = point.serialized_order[self.order_index][pad] - inverse = unpad[point.serialized_inverse[self.order_index]] - - # padding and reshape feat and batch for serialized point patch - qkv = self.qkv(point.feat)[order] - - if not self.enable_flash: - # encode and reshape qkv: (N', K, 3, H, C') => (3, N', H, K, C') - q, k, v = ( - qkv.reshape(-1, K, 3, H, C // H).permute(2, 0, 3, 1, 4).unbind(dim=0) - ) - # attn - if self.upcast_attention: - q = q.float() - k = k.float() - attn = (q * self.scale) @ k.transpose(-2, -1) # (N', H, K, K) - if self.enable_rpe: - attn = attn + self.rpe(self.get_rel_pos(point, order)) - if self.upcast_softmax: - attn = attn.float() - attn = self.softmax(attn) - attn = self.attn_drop(attn).to(qkv.dtype) - feat = (attn @ v).transpose(1, 2).reshape(-1, C) - else: - feat = flash_attn.flash_attn_varlen_qkvpacked_func( - qkv.to(torch.bfloat16).reshape(-1, 3, H, C // H), - cu_seqlens, - max_seqlen=self.patch_size, - dropout_p=self.attn_drop if self.training else 0, - softmax_scale=self.scale, - ).reshape(-1, C) - feat = feat.to(qkv.dtype) - feat = feat[inverse] - - # ffn - feat = self.proj(feat) - feat = self.proj_drop(feat) - point.feat = feat - return point - - -class MLP(nn.Module): - def __init__( - self, - in_channels, - hidden_channels=None, - out_channels=None, - act_layer=nn.GELU, - drop=0.0, - ): - super().__init__() - out_channels = out_channels or in_channels - hidden_channels = hidden_channels or in_channels - self.fc1 = nn.Linear(in_channels, hidden_channels) - self.act = act_layer() - self.fc2 = nn.Linear(hidden_channels, out_channels) - self.drop = nn.Dropout(drop) - - def forward(self, x): - x = self.fc1(x) - x = self.act(x) - x = self.drop(x) - x = self.fc2(x) - x = self.drop(x) - return x - - -class Block(PointModule): - def __init__( - self, - channels, - num_heads, - patch_size=48, - mlp_ratio=4.0, - qkv_bias=True, - qk_scale=None, - attn_drop=0.0, - proj_drop=0.0, - drop_path=0.0, - layer_scale=None, - norm_layer=nn.LayerNorm, - act_layer=nn.GELU, - pre_norm=True, - order_index=0, - cpe_indice_key=None, - enable_rpe=False, - enable_flash=True, - upcast_attention=True, - upcast_softmax=True, - ): - super().__init__() - self.channels = channels - self.pre_norm = pre_norm - - self.cpe = PointSequential( - spconv.SubMConv3d( - channels, - channels, - kernel_size=3, - bias=True, - indice_key=cpe_indice_key, - ), - nn.Linear(channels, channels), - norm_layer(channels), - ) - - self.norm1 = PointSequential(norm_layer(channels)) - self.ls1 = PointSequential( - LayerScale(channels, init_values=layer_scale) - if layer_scale is not None - else nn.Identity() - ) - self.attn = SerializedAttention( - channels=channels, - patch_size=patch_size, - num_heads=num_heads, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - attn_drop=attn_drop, - proj_drop=proj_drop, - order_index=order_index, - enable_rpe=enable_rpe, - enable_flash=enable_flash, - upcast_attention=upcast_attention, - upcast_softmax=upcast_softmax, - ) - self.norm2 = PointSequential(norm_layer(channels)) - self.ls2 = PointSequential( - LayerScale(channels, init_values=layer_scale) - if layer_scale is not None - else nn.Identity() - ) - self.mlp = PointSequential( - MLP( - in_channels=channels, - hidden_channels=int(channels * mlp_ratio), - out_channels=channels, - act_layer=act_layer, - drop=proj_drop, - ) - ) - self.drop_path = PointSequential( - DropPath(drop_path) if drop_path > 0.0 else nn.Identity() - ) - - def forward(self, point: Point): - shortcut = point.feat - point = self.cpe(point) - point.feat = shortcut + point.feat - shortcut = point.feat - if self.pre_norm: - point = self.norm1(point) - point = self.drop_path(self.ls1(self.attn(point))) - point.feat = shortcut + point.feat - if not self.pre_norm: - point = self.norm1(point) - - shortcut = point.feat - if self.pre_norm: - point = self.norm2(point) - point = self.drop_path(self.ls2(self.mlp(point))) - point.feat = shortcut + point.feat - if not self.pre_norm: - point = self.norm2(point) - point.sparse_conv_feat = point.sparse_conv_feat.replace_feature(point.feat) - return point - - -class GridPooling(PointModule): - def __init__( - self, - in_channels, - out_channels, - stride=2, - norm_layer=None, - act_layer=None, - reduce="max", - shuffle_orders=True, - traceable=True, # record parent and cluster - ): - super().__init__() - self.in_channels = in_channels - self.out_channels = out_channels - - self.stride = stride - assert reduce in ["sum", "mean", "min", "max"] - self.reduce = reduce - self.shuffle_orders = shuffle_orders - self.traceable = traceable - - self.proj = nn.Linear(in_channels, out_channels) - if norm_layer is not None: - self.norm = PointSequential(norm_layer(out_channels)) - if act_layer is not None: - self.act = PointSequential(act_layer()) - - def forward(self, point: Point): - if "grid_coord" in point.keys(): - grid_coord = point.grid_coord - elif {"coord", "grid_size"}.issubset(point.keys()): - grid_coord = torch.div( - point.coord - point.coord.min(0)[0], - point.grid_size, - rounding_mode="trunc", - ).int() - else: - raise AssertionError( - "[gird_coord] or [coord, grid_size] should be include in the Point" - ) - grid_coord = torch.div(grid_coord, self.stride, rounding_mode="trunc") - grid_coord = grid_coord | point.batch.view(-1, 1) << 48 - grid_coord, cluster, counts = torch.unique( - grid_coord, - sorted=True, - return_inverse=True, - return_counts=True, - dim=0, - ) - grid_coord = grid_coord & ((1 << 48) - 1) - # indices of point sorted by cluster, for torch_scatter.segment_csr - _, indices = torch.sort(cluster) - # index pointer for sorted point, for torch_scatter.segment_csr - idx_ptr = torch.cat([counts.new_zeros(1), torch.cumsum(counts, dim=0)]) - # head_indices of each cluster, for reduce attr e.g. code, batch - head_indices = indices[idx_ptr[:-1]] - point_dict = Dict( - feat=torch_scatter.segment_csr( - self.proj(point.feat)[indices], idx_ptr, reduce=self.reduce - ), - coord=torch_scatter.segment_csr( - point.coord[indices], idx_ptr, reduce="mean" - ), - grid_coord=grid_coord, - batch=point.batch[head_indices], - ) - if "origin_coord" in point.keys(): - point_dict["origin_coord"] = torch_scatter.segment_csr( - point.origin_coord[indices], idx_ptr, reduce="mean" - ) - if "condition" in point.keys(): - point_dict["condition"] = point.condition - if "context" in point.keys(): - point_dict["context"] = point.context - if "name" in point.keys(): - point_dict["name"] = point.name - if "split" in point.keys(): - point_dict["split"] = point.split - if "color" in point.keys(): - point_dict["color"] = torch_scatter.segment_csr( - point.color[indices], idx_ptr, reduce="mean" - ) - if "grid_size" in point.keys(): - point_dict["grid_size"] = point.grid_size * self.stride - - if self.traceable: - point_dict["pooling_inverse"] = cluster - point_dict["pooling_parent"] = point - point_dict["idx_ptr"] = idx_ptr - order = point.order - point = Point(point_dict) - if self.norm is not None: - point = self.norm(point) - if self.act is not None: - point = self.act(point) - point.serialization(order=order, shuffle_orders=self.shuffle_orders) - point.sparsify() - return point - - -class GridUnpooling(PointModule): - def __init__( - self, - in_channels, - skip_channels, - out_channels, - norm_layer=None, - act_layer=None, - traceable=False, # record parent and cluster - ): - super().__init__() - self.proj = PointSequential(nn.Linear(in_channels, out_channels)) - self.proj_skip = PointSequential(nn.Linear(skip_channels, out_channels)) - - if norm_layer is not None: - self.proj.add(norm_layer(out_channels)) - self.proj_skip.add(norm_layer(out_channels)) - - if act_layer is not None: - self.proj.add(act_layer()) - self.proj_skip.add(act_layer()) - - self.traceable = traceable - - def forward(self, point): - assert "pooling_parent" in point.keys() - assert "pooling_inverse" in point.keys() - parent = point.pop("pooling_parent") - inverse = point.pooling_inverse - feat = point.feat - - parent = self.proj_skip(parent) - parent.feat = parent.feat + self.proj(point).feat[inverse] - parent.sparse_conv_feat = parent.sparse_conv_feat.replace_feature(parent.feat) - - if self.traceable: - point.feat = feat - parent["unpooling_parent"] = point - return parent - - -class Embedding(PointModule): - def __init__( - self, - in_channels, - embed_channels, - norm_layer=None, - act_layer=None, - mask_token=False, - ): - super().__init__() - self.in_channels = in_channels - self.embed_channels = embed_channels - - self.stem = PointSequential(linear=nn.Linear(in_channels, embed_channels)) - if norm_layer is not None: - self.stem.add(norm_layer(embed_channels), name="norm") - if act_layer is not None: - self.stem.add(act_layer(), name="act") - - if mask_token: - self.mask_token = nn.Parameter(torch.zeros(1, embed_channels)) - else: - self.mask_token = None - - def forward(self, point: Point): - point = self.stem(point) - if "mask" in point.keys(): - point.feat = torch.where( - point.mask.unsqueeze(-1), - self.mask_token.to(point.feat.dtype), - point.feat, - ) - return point - - -@MODELS.register_module("PT-v3m2") -class PointTransformerV3(PointModule): - def __init__( - self, - in_channels=6, - order=("z", "z-trans"), - stride=(2, 2, 2, 2), - enc_depths=(2, 2, 2, 6, 2), - enc_channels=(32, 64, 128, 256, 512), - enc_num_head=(2, 4, 8, 16, 32), - enc_patch_size=(48, 48, 48, 48, 48), - dec_depths=(2, 2, 2, 2), - dec_channels=(64, 64, 128, 256), - dec_num_head=(4, 4, 8, 16), - dec_patch_size=(48, 48, 48, 48), - mlp_ratio=4, - qkv_bias=True, - qk_scale=None, - attn_drop=0.0, - proj_drop=0.0, - drop_path=0.3, - layer_scale=None, - pre_norm=True, - shuffle_orders=True, - enable_rpe=False, - enable_flash=True, - upcast_attention=False, - upcast_softmax=False, - traceable=False, - mask_token=False, - enc_mode=False, - freeze_encoder=False, - ): - super().__init__() - self.num_stages = len(enc_depths) - self.order = [order] if isinstance(order, str) else order - self.shuffle_orders = shuffle_orders - self.enc_mode = enc_mode - self.freeze_encoder = freeze_encoder - - assert self.num_stages == len(stride) + 1 - assert self.num_stages == len(enc_depths) - assert self.num_stages == len(enc_channels) - assert self.num_stages == len(enc_num_head) - assert self.num_stages == len(enc_patch_size) - assert self.enc_mode or self.num_stages == len(dec_depths) + 1 - assert self.enc_mode or self.num_stages == len(dec_channels) + 1 - assert self.enc_mode or self.num_stages == len(dec_num_head) + 1 - assert self.enc_mode or self.num_stages == len(dec_patch_size) + 1 - - # normalization layer - ln_layer = nn.LayerNorm - # activation layers - act_layer = nn.GELU - - self.embedding = Embedding( - in_channels=in_channels, - embed_channels=enc_channels[0], - norm_layer=ln_layer, - act_layer=act_layer, - mask_token=mask_token, - ) - - # encoder - enc_drop_path = [ - x.item() for x in torch.linspace(0, drop_path, sum(enc_depths)) - ] - self.enc = PointSequential() - for s in range(self.num_stages): - enc_drop_path_ = enc_drop_path[ - sum(enc_depths[:s]) : sum(enc_depths[: s + 1]) - ] - enc = PointSequential() - if s > 0: - enc.add( - GridPooling( - in_channels=enc_channels[s - 1], - out_channels=enc_channels[s], - stride=stride[s - 1], - norm_layer=ln_layer, - act_layer=act_layer, - ), - name="down", - ) - for i in range(enc_depths[s]): - enc.add( - Block( - channels=enc_channels[s], - num_heads=enc_num_head[s], - patch_size=enc_patch_size[s], - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - attn_drop=attn_drop, - proj_drop=proj_drop, - drop_path=enc_drop_path_[i], - layer_scale=layer_scale, - norm_layer=ln_layer, - act_layer=act_layer, - pre_norm=pre_norm, - order_index=i % len(self.order), - cpe_indice_key=f"stage{s}", - enable_rpe=enable_rpe, - enable_flash=enable_flash, - upcast_attention=upcast_attention, - upcast_softmax=upcast_softmax, - ), - name=f"block{i}", - ) - if len(enc) != 0: - self.enc.add(module=enc, name=f"enc{s}") - - # decoder - if not self.enc_mode: - dec_drop_path = [ - x.item() for x in torch.linspace(0, drop_path, sum(dec_depths)) - ] - self.dec = PointSequential() - dec_channels = list(dec_channels) + [enc_channels[-1]] - for s in reversed(range(self.num_stages - 1)): - dec_drop_path_ = dec_drop_path[ - sum(dec_depths[:s]) : sum(dec_depths[: s + 1]) - ] - dec_drop_path_.reverse() - dec = PointSequential() - dec.add( - GridUnpooling( - in_channels=dec_channels[s + 1], - skip_channels=enc_channels[s], - out_channels=dec_channels[s], - norm_layer=ln_layer, - act_layer=act_layer, - traceable=traceable, - ), - name="up", - ) - for i in range(dec_depths[s]): - dec.add( - Block( - channels=dec_channels[s], - num_heads=dec_num_head[s], - patch_size=dec_patch_size[s], - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - attn_drop=attn_drop, - proj_drop=proj_drop, - drop_path=dec_drop_path_[i], - layer_scale=layer_scale, - norm_layer=ln_layer, - act_layer=act_layer, - pre_norm=pre_norm, - order_index=i % len(self.order), - cpe_indice_key=f"stage{s}", - enable_rpe=enable_rpe, - enable_flash=enable_flash, - upcast_attention=upcast_attention, - upcast_softmax=upcast_softmax, - ), - name=f"block{i}", - ) - self.dec.add(module=dec, name=f"dec{s}") - if self.freeze_encoder: - for p in self.embedding.parameters(): - p.requires_grad = False - for p in self.enc.parameters(): - p.requires_grad = False - self.apply(self._init_weights) - - @staticmethod - def _init_weights(module): - if isinstance(module, nn.Linear): - trunc_normal_(module.weight, std=0.02) - if module.bias is not None: - nn.init.zeros_(module.bias) - elif isinstance(module, spconv.SubMConv3d): - trunc_normal_(module.weight, std=0.02) - if module.bias is not None: - nn.init.zeros_(module.bias) - - def forward(self, data_dict): - point = Point(data_dict) - point = self.embedding(point) - - point.serialization(order=self.order, shuffle_orders=self.shuffle_orders) - point.sparsify() - - point = self.enc(point) - if not self.enc_mode: - point = self.dec(point) - return point diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/__init__.py deleted file mode 100644 index da0c47b..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .misc import ( - offset2batch, - offset2bincount, - bincount2offset, - batch2offset, - off_diagonal, -) -from .checkpoint import checkpoint -from .serialization import encode, decode diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/checkpoint.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/checkpoint.py deleted file mode 100644 index 5882035..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/checkpoint.py +++ /dev/null @@ -1,57 +0,0 @@ -""" -Checkpoint Utils for Models - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import torch - - -class CheckpointFunction(torch.autograd.Function): - @staticmethod - def forward(ctx, run_function, length, *args): - ctx.run_function = run_function - ctx.input_tensors = list(args[:length]) - ctx.input_params = list(args[length:]) - - with torch.no_grad(): - output_tensors = ctx.run_function(*ctx.input_tensors) - return output_tensors - - @staticmethod - def backward(ctx, *output_grads): - ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors] - with torch.enable_grad(): - # Fixes a bug where the first op in run_function modifies the - # Tensor storage in place, which is not allowed for detach()'d - # Tensors. - shallow_copies = [x.view_as(x) for x in ctx.input_tensors] - output_tensors = ctx.run_function(*shallow_copies) - input_grads = torch.autograd.grad( - output_tensors, - ctx.input_tensors + ctx.input_params, - output_grads, - allow_unused=True, - ) - del ctx.input_tensors - del ctx.input_params - del output_tensors - return (None, None) + input_grads - - -def checkpoint(func, inputs, params, flag): - """ - Evaluate a function without caching intermediate activations, allowing for - reduced memory at the expense of extra compute in the backward pass. - :param func: the function to evaluate. - :param inputs: the argument sequence to pass to `func`. - :param params: a sequence of parameters `func` depends on but does not - explicitly take as arguments. - :param flag: if False, disable gradient checkpointing. - """ - if flag: - args = tuple(inputs) + tuple(params) - return CheckpointFunction.apply(func, len(inputs), *args) - else: - return func(*inputs) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/misc.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/misc.py deleted file mode 100644 index 4eef9eb..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/misc.py +++ /dev/null @@ -1,41 +0,0 @@ -""" -General Utils for Models - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import torch -from itertools import chain - - -@torch.no_grad() -def offset2bincount(offset): - return torch.diff( - offset, prepend=torch.tensor([0], device=offset.device, dtype=torch.long) - ) - - -@torch.no_grad() -def bincount2offset(bincount): - return torch.cumsum(bincount, dim=0) - - -@torch.no_grad() -def offset2batch(offset): - bincount = offset2bincount(offset) - return torch.arange( - len(bincount), device=offset.device, dtype=torch.long - ).repeat_interleave(bincount) - - -@torch.no_grad() -def batch2offset(batch): - return torch.cumsum(batch.bincount(), dim=0).long() - - -def off_diagonal(x): - # return a flattened view of the off-diagonal elements of a square matrix - n, m = x.shape - assert n == m - return x.flatten()[:-1].view(n - 1, n + 1)[:, 1:].flatten() diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/__init__.py deleted file mode 100644 index 058c5e1..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from .default import ( - encode, - decode, - z_order_encode, - z_order_decode, - hilbert_encode, - hilbert_decode, -) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/default.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/default.py deleted file mode 100644 index 15898b5..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/default.py +++ /dev/null @@ -1,59 +0,0 @@ -import torch -from .z_order import xyz2key as z_order_encode_ -from .z_order import key2xyz as z_order_decode_ -from .hilbert import encode as hilbert_encode_ -from .hilbert import decode as hilbert_decode_ - - -@torch.inference_mode() -def encode(grid_coord, batch=None, depth=16, order="z"): - assert order in {"z", "z-trans", "hilbert", "hilbert-trans"} - if order == "z": - code = z_order_encode(grid_coord, depth=depth) - elif order == "z-trans": - code = z_order_encode(grid_coord[:, [1, 0, 2]], depth=depth) - elif order == "hilbert": - code = hilbert_encode(grid_coord, depth=depth) - elif order == "hilbert-trans": - code = hilbert_encode(grid_coord[:, [1, 0, 2]], depth=depth) - else: - raise NotImplementedError - if batch is not None: - batch = batch.long() - code = batch << depth * 3 | code - return code - - -@torch.inference_mode() -def decode(code, depth=16, order="z"): - assert order in {"z", "hilbert"} - batch = code >> depth * 3 - code = code & ((1 << depth * 3) - 1) - if order == "z": - grid_coord = z_order_decode(code, depth=depth) - elif order == "hilbert": - grid_coord = hilbert_decode(code, depth=depth) - else: - raise NotImplementedError - return grid_coord, batch - - -def z_order_encode(grid_coord: torch.Tensor, depth: int = 16): - x, y, z = grid_coord[:, 0].long(), grid_coord[:, 1].long(), grid_coord[:, 2].long() - # we block the support to batch, maintain batched code in Point class - code = z_order_encode_(x, y, z, b=None, depth=depth) - return code - - -def z_order_decode(code: torch.Tensor, depth): - x, y, z = z_order_decode_(code, depth=depth) - grid_coord = torch.stack([x, y, z], dim=-1) # (N, 3) - return grid_coord - - -def hilbert_encode(grid_coord: torch.Tensor, depth: int = 16): - return hilbert_encode_(grid_coord, num_dims=3, num_bits=depth) - - -def hilbert_decode(code: torch.Tensor, depth: int = 16): - return hilbert_decode_(code, num_dims=3, num_bits=depth) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/hilbert.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/hilbert.py deleted file mode 100644 index c96a3a9..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/hilbert.py +++ /dev/null @@ -1,303 +0,0 @@ -""" -Hilbert Order -Modified from https://github.com/PrincetonLIPS/numpy-hilbert-curve - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Kaixin Xu -Please cite our work if the code is helpful to you. -""" - -import torch - - -def right_shift(binary, k=1, axis=-1): - """Right shift an array of binary values. - - Parameters: - ----------- - binary: An ndarray of binary values. - - k: The number of bits to shift. Default 1. - - axis: The axis along which to shift. Default -1. - - Returns: - -------- - Returns an ndarray with zero prepended and the ends truncated, along - whatever axis was specified.""" - - # If we're shifting the whole thing, just return zeros. - if binary.shape[axis] <= k: - return torch.zeros_like(binary) - - # Determine the padding pattern. - # padding = [(0,0)] * len(binary.shape) - # padding[axis] = (k,0) - - # Determine the slicing pattern to eliminate just the last one. - slicing = [slice(None)] * len(binary.shape) - slicing[axis] = slice(None, -k) - shifted = torch.nn.functional.pad( - binary[tuple(slicing)], (k, 0), mode="constant", value=0 - ) - - return shifted - - -def binary2gray(binary, axis=-1): - """Convert an array of binary values into Gray codes. - - This uses the classic X ^ (X >> 1) trick to compute the Gray code. - - Parameters: - ----------- - binary: An ndarray of binary values. - - axis: The axis along which to compute the gray code. Default=-1. - - Returns: - -------- - Returns an ndarray of Gray codes. - """ - shifted = right_shift(binary, axis=axis) - - # Do the X ^ (X >> 1) trick. - gray = torch.logical_xor(binary, shifted) - - return gray - - -def gray2binary(gray, axis=-1): - """Convert an array of Gray codes back into binary values. - - Parameters: - ----------- - gray: An ndarray of gray codes. - - axis: The axis along which to perform Gray decoding. Default=-1. - - Returns: - -------- - Returns an ndarray of binary values. - """ - - # Loop the log2(bits) number of times necessary, with shift and xor. - shift = 2 ** (torch.Tensor([gray.shape[axis]]).log2().ceil().int() - 1) - while shift > 0: - gray = torch.logical_xor(gray, right_shift(gray, shift)) - shift = torch.div(shift, 2, rounding_mode="floor") - return gray - - -def encode(locs, num_dims, num_bits): - """Decode an array of locations in a hypercube into a Hilbert integer. - - This is a vectorized-ish version of the Hilbert curve implementation by John - Skilling as described in: - - Skilling, J. (2004, April). Programming the Hilbert curve. In AIP Conference - Proceedings (Vol. 707, No. 1, pp. 381-387). American Institute of Physics. - - Params: - ------- - locs - An ndarray of locations in a hypercube of num_dims dimensions, in - which each dimension runs from 0 to 2**num_bits-1. The shape can - be arbitrary, as long as the last dimension of the same has size - num_dims. - - num_dims - The dimensionality of the hypercube. Integer. - - num_bits - The number of bits for each dimension. Integer. - - Returns: - -------- - The output is an ndarray of uint64 integers with the same shape as the - input, excluding the last dimension, which needs to be num_dims. - """ - - # Keep around the original shape for later. - orig_shape = locs.shape - bitpack_mask = 1 << torch.arange(0, 8).to(locs.device) - bitpack_mask_rev = bitpack_mask.flip(-1) - - if orig_shape[-1] != num_dims: - raise ValueError( - """ - The shape of locs was surprising in that the last dimension was of size - %d, but num_dims=%d. These need to be equal. - """ - % (orig_shape[-1], num_dims) - ) - - if num_dims * num_bits > 63: - raise ValueError( - """ - num_dims=%d and num_bits=%d for %d bits total, which can't be encoded - into a int64. Are you sure you need that many points on your Hilbert - curve? - """ - % (num_dims, num_bits, num_dims * num_bits) - ) - - # Treat the location integers as 64-bit unsigned and then split them up into - # a sequence of uint8s. Preserve the association by dimension. - locs_uint8 = locs.long().view(torch.uint8).reshape((-1, num_dims, 8)).flip(-1) - - # Now turn these into bits and truncate to num_bits. - gray = ( - locs_uint8.unsqueeze(-1) - .bitwise_and(bitpack_mask_rev) - .ne(0) - .byte() - .flatten(-2, -1)[..., -num_bits:] - ) - - # Run the decoding process the other way. - # Iterate forwards through the bits. - for bit in range(0, num_bits): - # Iterate forwards through the dimensions. - for dim in range(0, num_dims): - # Identify which ones have this bit active. - mask = gray[:, dim, bit] - - # Where this bit is on, invert the 0 dimension for lower bits. - gray[:, 0, bit + 1 :] = torch.logical_xor( - gray[:, 0, bit + 1 :], mask[:, None] - ) - - # Where the bit is off, exchange the lower bits with the 0 dimension. - to_flip = torch.logical_and( - torch.logical_not(mask[:, None]).repeat(1, gray.shape[2] - bit - 1), - torch.logical_xor(gray[:, 0, bit + 1 :], gray[:, dim, bit + 1 :]), - ) - gray[:, dim, bit + 1 :] = torch.logical_xor( - gray[:, dim, bit + 1 :], to_flip - ) - gray[:, 0, bit + 1 :] = torch.logical_xor(gray[:, 0, bit + 1 :], to_flip) - - # Now flatten out. - gray = gray.swapaxes(1, 2).reshape((-1, num_bits * num_dims)) - - # Convert Gray back to binary. - hh_bin = gray2binary(gray) - - # Pad back out to 64 bits. - extra_dims = 64 - num_bits * num_dims - padded = torch.nn.functional.pad(hh_bin, (extra_dims, 0), "constant", 0) - - # Convert binary values into uint8s. - hh_uint8 = ( - (padded.flip(-1).reshape((-1, 8, 8)) * bitpack_mask) - .sum(2) - .squeeze() - .type(torch.uint8) - ) - - # Convert uint8s into uint64s. - hh_uint64 = hh_uint8.view(torch.int64).squeeze() - - return hh_uint64 - - -def decode(hilberts, num_dims, num_bits): - """Decode an array of Hilbert integers into locations in a hypercube. - - This is a vectorized-ish version of the Hilbert curve implementation by John - Skilling as described in: - - Skilling, J. (2004, April). Programming the Hilbert curve. In AIP Conference - Proceedings (Vol. 707, No. 1, pp. 381-387). American Institute of Physics. - - Params: - ------- - hilberts - An ndarray of Hilbert integers. Must be an integer dtype and - cannot have fewer bits than num_dims * num_bits. - - num_dims - The dimensionality of the hypercube. Integer. - - num_bits - The number of bits for each dimension. Integer. - - Returns: - -------- - The output is an ndarray of unsigned integers with the same shape as hilberts - but with an additional dimension of size num_dims. - """ - - if num_dims * num_bits > 64: - raise ValueError( - """ - num_dims=%d and num_bits=%d for %d bits total, which can't be encoded - into a uint64. Are you sure you need that many points on your Hilbert - curve? - """ - % (num_dims, num_bits) - ) - - # Handle the case where we got handed a naked integer. - hilberts = torch.atleast_1d(hilberts) - - # Keep around the shape for later. - orig_shape = hilberts.shape - bitpack_mask = 2 ** torch.arange(0, 8).to(hilberts.device) - bitpack_mask_rev = bitpack_mask.flip(-1) - - # Treat each of the hilberts as a s equence of eight uint8. - # This treats all of the inputs as uint64 and makes things uniform. - hh_uint8 = ( - hilberts.ravel().type(torch.int64).view(torch.uint8).reshape((-1, 8)).flip(-1) - ) - - # Turn these lists of uints into lists of bits and then truncate to the size - # we actually need for using Skilling's procedure. - hh_bits = ( - hh_uint8.unsqueeze(-1) - .bitwise_and(bitpack_mask_rev) - .ne(0) - .byte() - .flatten(-2, -1)[:, -num_dims * num_bits :] - ) - - # Take the sequence of bits and Gray-code it. - gray = binary2gray(hh_bits) - - # There has got to be a better way to do this. - # I could index them differently, but the eventual packbits likes it this way. - gray = gray.reshape((-1, num_bits, num_dims)).swapaxes(1, 2) - - # Iterate backwards through the bits. - for bit in range(num_bits - 1, -1, -1): - # Iterate backwards through the dimensions. - for dim in range(num_dims - 1, -1, -1): - # Identify which ones have this bit active. - mask = gray[:, dim, bit] - - # Where this bit is on, invert the 0 dimension for lower bits. - gray[:, 0, bit + 1 :] = torch.logical_xor( - gray[:, 0, bit + 1 :], mask[:, None] - ) - - # Where the bit is off, exchange the lower bits with the 0 dimension. - to_flip = torch.logical_and( - torch.logical_not(mask[:, None]), - torch.logical_xor(gray[:, 0, bit + 1 :], gray[:, dim, bit + 1 :]), - ) - gray[:, dim, bit + 1 :] = torch.logical_xor( - gray[:, dim, bit + 1 :], to_flip - ) - gray[:, 0, bit + 1 :] = torch.logical_xor(gray[:, 0, bit + 1 :], to_flip) - - # Pad back out to 64 bits. - extra_dims = 64 - num_bits - padded = torch.nn.functional.pad(gray, (extra_dims, 0), "constant", 0) - - # Now chop these up into blocks of 8. - locs_chopped = padded.flip(-1).reshape((-1, num_dims, 8, 8)) - - # Take those blocks and turn them unto uint8s. - # from IPython import embed; embed() - locs_uint8 = (locs_chopped * bitpack_mask).sum(3).squeeze().type(torch.uint8) - - # Finally, treat these as uint64s. - flat_locs = locs_uint8.view(torch.int64) - - # Return them in the expected shape. - return flat_locs.reshape((*orig_shape, num_dims)) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/z_order.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/z_order.py deleted file mode 100644 index 6fd01a5..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/serialization/z_order.py +++ /dev/null @@ -1,126 +0,0 @@ -# -------------------------------------------------------- -# Octree-based Sparse Convolutional Neural Networks -# Copyright (c) 2022 Peng-Shuai Wang -# Licensed under The MIT License [see LICENSE for details] -# Written by Peng-Shuai Wang -# -------------------------------------------------------- - -import torch -from typing import Optional, Union - - -class KeyLUT: - def __init__(self): - r256 = torch.arange(256, dtype=torch.int64) - r512 = torch.arange(512, dtype=torch.int64) - zero = torch.zeros(256, dtype=torch.int64) - device = torch.device("cpu") - - self._encode = { - device: ( - self.xyz2key(r256, zero, zero, 8), - self.xyz2key(zero, r256, zero, 8), - self.xyz2key(zero, zero, r256, 8), - ) - } - self._decode = {device: self.key2xyz(r512, 9)} - - def encode_lut(self, device=torch.device("cpu")): - if device not in self._encode: - cpu = torch.device("cpu") - self._encode[device] = tuple(e.to(device) for e in self._encode[cpu]) - return self._encode[device] - - def decode_lut(self, device=torch.device("cpu")): - if device not in self._decode: - cpu = torch.device("cpu") - self._decode[device] = tuple(e.to(device) for e in self._decode[cpu]) - return self._decode[device] - - def xyz2key(self, x, y, z, depth): - key = torch.zeros_like(x) - for i in range(depth): - mask = 1 << i - key = ( - key - | ((x & mask) << (2 * i + 2)) - | ((y & mask) << (2 * i + 1)) - | ((z & mask) << (2 * i + 0)) - ) - return key - - def key2xyz(self, key, depth): - x = torch.zeros_like(key) - y = torch.zeros_like(key) - z = torch.zeros_like(key) - for i in range(depth): - x = x | ((key & (1 << (3 * i + 2))) >> (2 * i + 2)) - y = y | ((key & (1 << (3 * i + 1))) >> (2 * i + 1)) - z = z | ((key & (1 << (3 * i + 0))) >> (2 * i + 0)) - return x, y, z - - -_key_lut = KeyLUT() - - -def xyz2key( - x: torch.Tensor, - y: torch.Tensor, - z: torch.Tensor, - b: Optional[Union[torch.Tensor, int]] = None, - depth: int = 16, -): - r"""Encodes :attr:`x`, :attr:`y`, :attr:`z` coordinates to the shuffled keys - based on pre-computed look up tables. The speed of this function is much - faster than the method based on for-loop. - - Args: - x (torch.Tensor): The x coordinate. - y (torch.Tensor): The y coordinate. - z (torch.Tensor): The z coordinate. - b (torch.Tensor or int): The batch index of the coordinates, and should be - smaller than 32768. If :attr:`b` is :obj:`torch.Tensor`, the size of - :attr:`b` must be the same as :attr:`x`, :attr:`y`, and :attr:`z`. - depth (int): The depth of the shuffled key, and must be smaller than 17 (< 17). - """ - - EX, EY, EZ = _key_lut.encode_lut(x.device) - x, y, z = x.long(), y.long(), z.long() - - mask = 255 if depth > 8 else (1 << depth) - 1 - key = EX[x & mask] | EY[y & mask] | EZ[z & mask] - if depth > 8: - mask = (1 << (depth - 8)) - 1 - key16 = EX[(x >> 8) & mask] | EY[(y >> 8) & mask] | EZ[(z >> 8) & mask] - key = key16 << 24 | key - - if b is not None: - b = b.long() - key = b << 48 | key - - return key - - -def key2xyz(key: torch.Tensor, depth: int = 16): - r"""Decodes the shuffled key to :attr:`x`, :attr:`y`, :attr:`z` coordinates - and the batch index based on pre-computed look up tables. - - Args: - key (torch.Tensor): The shuffled key. - depth (int): The depth of the shuffled key, and must be smaller than 17 (< 17). - """ - - DX, DY, DZ = _key_lut.decode_lut(key.device) - x, y, z = torch.zeros_like(key), torch.zeros_like(key), torch.zeros_like(key) - - b = key >> 48 - key = key & ((1 << 48) - 1) - - n = (depth + 2) // 3 - for i in range(n): - k = key >> (i * 9) & 511 - x = x | (DX[k] << (i * 3)) - y = y | (DY[k] << (i * 3)) - z = z | (DZ[k] << (i * 3)) - - return x, y, z, b diff --git a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/structure.py b/point_transformer_v3/pointcept_minimal/pointcept/models/utils/structure.py deleted file mode 100644 index 1e8e80a..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/models/utils/structure.py +++ /dev/null @@ -1,209 +0,0 @@ -import torch -import spconv.pytorch as spconv - -try: - import ocnn -except ImportError: - ocnn = None -from addict import Dict -from typing import List - -from pointcept.models.utils.serialization import encode -from pointcept.models.utils import ( - offset2batch, - batch2offset, - offset2bincount, - bincount2offset, -) - - -class Point(Dict): - """ - Point Structure of Pointcept - - A Point (point cloud) in Pointcept is a dictionary that contains various properties of - a batched point cloud. The property with the following names have a specific definition - as follows: - - - "coord": original coordinate of point cloud; - - "grid_coord": grid coordinate for specific grid size (related to GridSampling); - Point also support the following optional attributes: - - "offset": if not exist, initialized as batch size is 1; - - "batch": if not exist, initialized as batch size is 1; - - "feat": feature of point cloud, default input of model; - - "grid_size": Grid size of point cloud (related to GridSampling); - (related to Serialization) - - "serialized_depth": depth of serialization, 2 ** depth * grid_size describe the maximum of point cloud range; - - "serialized_code": a list of serialization codes; - - "serialized_order": a list of serialization order determined by code; - - "serialized_inverse": a list of inverse mapping determined by code; - (related to Sparsify: SpConv) - - "sparse_shape": Sparse shape for Sparse Conv Tensor; - - "sparse_conv_feat": SparseConvTensor init with information provide by Point; - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - # If one of "offset" or "batch" do not exist, generate by the existing one - if "batch" not in self.keys() and "offset" in self.keys(): - self["batch"] = offset2batch(self.offset) - elif "offset" not in self.keys() and "batch" in self.keys(): - self["offset"] = batch2offset(self.batch) - - def serialization(self, order="z", depth=None, shuffle_orders=False): - """ - Point Cloud Serialization - - relay on ["grid_coord" or "coord" + "grid_size", "batch", "feat"] - """ - self["order"] = order - assert "batch" in self.keys() - if "grid_coord" not in self.keys(): - # if you don't want to operate GridSampling in data augmentation, - # please add the following augmentation into your pipline: - # dict(type="Copy", keys_dict={"grid_size": 0.01}), - # (adjust `grid_size` to what your want) - assert {"grid_size", "coord"}.issubset(self.keys()) - - self["grid_coord"] = torch.div( - self.coord - self.coord.min(0)[0], self.grid_size, rounding_mode="trunc" - ).int() - - if depth is None: - # Adaptive measure the depth of serialization cube (length = 2 ^ depth) - depth = int(self.grid_coord.max() + 1).bit_length() - self["serialized_depth"] = depth - # Maximum bit length for serialization code is 63 (int64) - assert depth * 3 + len(self.offset).bit_length() <= 63 - # Here we follow OCNN and set the depth limitation to 16 (48bit) for the point position. - # Although depth is limited to less than 16, we can encode a 655.36^3 (2^16 * 0.01) meter^3 - # cube with a grid size of 0.01 meter. We consider it is enough for the current stage. - # We can unlock the limitation by optimizing the z-order encoding function if necessary. - assert depth <= 16 - - # The serialization codes are arranged as following structures: - # [Order1 ([n]), - # Order2 ([n]), - # ... - # OrderN ([n])] (k, n) - code = [ - encode(self.grid_coord, self.batch, depth, order=order_) for order_ in order - ] - code = torch.stack(code) - order = torch.argsort(code) - inverse = torch.zeros_like(order).scatter_( - dim=1, - index=order, - src=torch.arange(0, code.shape[1], device=order.device).repeat( - code.shape[0], 1 - ), - ) - - if shuffle_orders: - perm = torch.randperm(code.shape[0]) - code = code[perm] - order = order[perm] - inverse = inverse[perm] - - self["serialized_code"] = code - self["serialized_order"] = order - self["serialized_inverse"] = inverse - - def sparsify(self, pad=96): - """ - Point Cloud Serialization - - Point cloud is sparse, here we use "sparsify" to specifically refer to - preparing "spconv.SparseConvTensor" for SpConv. - - relay on ["grid_coord" or "coord" + "grid_size", "batch", "feat"] - - pad: padding sparse for sparse shape. - """ - assert {"feat", "batch"}.issubset(self.keys()) - if "grid_coord" not in self.keys(): - # if you don't want to operate GridSampling in data augmentation, - # please add the following augmentation into your pipline: - # dict(type="Copy", keys_dict={"grid_size": 0.01}), - # (adjust `grid_size` to what your want) - assert {"grid_size", "coord"}.issubset(self.keys()) - self["grid_coord"] = torch.div( - self.coord - self.coord.min(0)[0], self.grid_size, rounding_mode="trunc" - ).int() - if "sparse_shape" in self.keys(): - sparse_shape = self.sparse_shape - else: - sparse_shape = torch.add( - torch.max(self.grid_coord, dim=0).values, pad - ).tolist() - sparse_conv_feat = spconv.SparseConvTensor( - features=self.feat, - indices=torch.cat( - [self.batch.unsqueeze(-1).int(), self.grid_coord.int()], dim=1 - ).contiguous(), - spatial_shape=sparse_shape, - batch_size=self.batch[-1].tolist() + 1, - ) - self["sparse_shape"] = sparse_shape - self["sparse_conv_feat"] = sparse_conv_feat - - def octreelization(self, depth=None, full_depth=None): - """ - Point Cloud Octreelization - - Generate octree with OCNN - relay on ["grid_coord", "batch", "feat"] - """ - assert ( - ocnn is not None - ), "Please follow https://github.com/octree-nn/ocnn-pytorch install ocnn." - assert {"feat", "batch"}.issubset(self.keys()) - # add 1 to make grid space support shift order - if "grid_coord" not in self.keys(): - # if you don't want to operate GridSampling in data augmentation, - # please add the following augmentation into your pipline: - # dict(type="Copy", keys_dict={"grid_size": 0.01}), - # (adjust `grid_size` to what your want) - assert {"grid_size", "coord"}.issubset(self.keys()) - self["grid_coord"] = torch.div( - self.coord - self.coord.min(0)[0], self.grid_size, rounding_mode="trunc" - ).int() - if depth is None: - if "depth" in self.keys(): - depth = self.depth - else: - depth = int(self.grid_coord.max() + 1).bit_length() - if full_depth is None: - full_depth = 1 - self["depth"] = depth - assert depth <= 16 # maximum in ocnn - - # [0, 2**depth] -> [0, 2] -> [-1, 1] - coord = self.grid_coord / 2 ** (self.depth - 1) - 1.0 - point = ocnn.octree.Points( - points=coord, - features=self.feat, - batch_id=self.batch.unsqueeze(-1), - batch_size=self.batch[-1] + 1, - ) - octree = ocnn.octree.Octree( - depth=depth, - full_depth=full_depth, - batch_size=self.batch[-1] + 1, - device=coord.device, - ) - octree.build_octree(point) - octree.construct_all_neigh() - - query_pts = torch.cat([self.grid_coord, point.batch_id], dim=1).contiguous() - inverse = octree.search_xyzb(query_pts, depth, True) - assert torch.sum(inverse < 0) == 0 # all mapping should be valid - inverse_ = torch.unique(inverse) - order = torch.zeros_like(inverse_).scatter_( - dim=0, - index=inverse, - src=torch.arange(0, inverse.shape[0], device=inverse.device), - ) - self["octree"] = octree - self["octree_order"] = order - self["octree_inverse"] = inverse diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/__init__.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/cache.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/cache.py deleted file mode 100644 index c7aec25..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/utils/cache.py +++ /dev/null @@ -1,60 +0,0 @@ -""" -Data Cache Utils - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import os - -try: - import SharedArray -except ImportError: - SharedArray = None - -try: - from multiprocessing.shared_memory import ShareableList -except ImportError: - import warnings - - warnings.warn("Please update python version >= 3.8 to enable shared_memory") -import numpy as np - - -def shared_array(name, var=None): - if var is not None: - # check exist - if os.path.exists(f"/dev/shm/{name}"): - return SharedArray.attach(f"shm://{name}") - # create shared_array - data = SharedArray.create(f"shm://{name}", var.shape, dtype=var.dtype) - data[...] = var[...] - data.flags.writeable = False - else: - data = SharedArray.attach(f"shm://{name}").copy() - return data - - -def shared_dict(name, var=None): - name = str(name) - assert "." not in name # '.' is used as sep flag - data = {} - if var is not None: - assert isinstance(var, dict) - keys = var.keys() - # current version only cache np.array - keys_valid = [] - for key in keys: - if isinstance(var[key], np.ndarray): - keys_valid.append(key) - keys = keys_valid - - ShareableList(sequence=keys, name=name + ".keys") - for key in keys: - if isinstance(var[key], np.ndarray): - data[key] = shared_array(name=f"{name}.{key}", var=var[key]) - else: - keys = list(ShareableList(name=name + ".keys")) - for key in keys: - data[key] = shared_array(name=f"{name}.{key}") - return data diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/comm.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/comm.py deleted file mode 100644 index 69e29e7..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/utils/comm.py +++ /dev/null @@ -1,198 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -""" -This file contains primitives for multi-gpu communication. -This is useful when doing distributed training. -Modified from detectron2(https://github.com/facebookresearch/detectron2) - -Copyright (c) Xiaoyang Wu (xiaoyang.wu@connect.hku.hk). All Rights Reserved. -Please cite our work if you use any part of the code. -""" - -import functools -import numpy as np -import torch -import torch.distributed as dist - -_LOCAL_PROCESS_GROUP = None -""" -A torch process group which only includes processes that on the same machine as the current process. -This variable is set when processes are spawned by `launch()` in "engine/launch.py". -""" - - -def get_world_size() -> int: - if not dist.is_available(): - return 1 - if not dist.is_initialized(): - return 1 - return dist.get_world_size() - - -def get_rank() -> int: - if not dist.is_available(): - return 0 - if not dist.is_initialized(): - return 0 - return dist.get_rank() - - -def get_local_rank() -> int: - """ - Returns: - The rank of the current process within the local (per-machine) process group. - """ - if not dist.is_available(): - return 0 - if not dist.is_initialized(): - return 0 - assert ( - _LOCAL_PROCESS_GROUP is not None - ), "Local process group is not created! Please use launch() to spawn processes!" - return dist.get_rank(group=_LOCAL_PROCESS_GROUP) - - -def get_local_size() -> int: - """ - Returns: - The size of the per-machine process group, - i.e. the number of processes per machine. - """ - if not dist.is_available(): - return 1 - if not dist.is_initialized(): - return 1 - return dist.get_world_size(group=_LOCAL_PROCESS_GROUP) - - -def is_main_process() -> bool: - return get_rank() == 0 - - -def synchronize(): - """ - Helper function to synchronize (barrier) among all processes when - using distributed training - """ - if not dist.is_available(): - return - if not dist.is_initialized(): - return - world_size = dist.get_world_size() - if world_size == 1: - return - if dist.get_backend() == dist.Backend.NCCL: - # This argument is needed to avoid warnings. - # It's valid only for NCCL backend. - dist.barrier(device_ids=[torch.cuda.current_device()]) - else: - dist.barrier() - - -@functools.lru_cache() -def _get_global_gloo_group(): - """ - Return a process group based on gloo backend, containing all the ranks - The result is cached. - """ - if dist.get_backend() == "nccl": - return dist.new_group(backend="gloo") - else: - return dist.group.WORLD - - -def all_gather(data, group=None): - """ - Run all_gather on arbitrary picklable data (not necessarily tensors). - Args: - data: any picklable object - group: a torch process group. By default, will use a group which - contains all ranks on gloo backend. - Returns: - list[data]: list of data gathered from each rank - """ - if get_world_size() == 1: - return [data] - if group is None: - group = ( - _get_global_gloo_group() - ) # use CPU group by default, to reduce GPU RAM usage. - world_size = dist.get_world_size(group) - if world_size == 1: - return [data] - - output = [None for _ in range(world_size)] - dist.all_gather_object(output, data, group=group) - return output - - -def gather(data, dst=0, group=None): - """ - Run gather on arbitrary picklable data (not necessarily tensors). - Args: - data: any picklable object - dst (int): destination rank - group: a torch process group. By default, will use a group which - contains all ranks on gloo backend. - Returns: - list[data]: on dst, a list of data gathered from each rank. Otherwise, - an empty list. - """ - if get_world_size() == 1: - return [data] - if group is None: - group = _get_global_gloo_group() - world_size = dist.get_world_size(group=group) - if world_size == 1: - return [data] - rank = dist.get_rank(group=group) - - if rank == dst: - output = [None for _ in range(world_size)] - dist.gather_object(data, output, dst=dst, group=group) - return output - else: - dist.gather_object(data, None, dst=dst, group=group) - return [] - - -def shared_random_seed(): - """ - Returns: - int: a random number that is the same across all workers. - If workers need a shared RNG, they can use this shared seed to - create one. - All workers must call this function, otherwise it will deadlock. - """ - ints = np.random.randint(2**31) - all_ints = all_gather(ints) - return all_ints[0] - - -def reduce_dict(input_dict, average=True): - """ - Reduce the values in the dictionary from all processes so that process with rank - 0 has the reduced results. - Args: - input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor. - average (bool): whether to do average or sum - Returns: - a dict with the same keys as input_dict, after reduction. - """ - world_size = get_world_size() - if world_size < 2: - return input_dict - with torch.no_grad(): - names = [] - values = [] - # sort the keys so that they are consistent across processes - for k in sorted(input_dict.keys()): - names.append(k) - values.append(input_dict[k]) - values = torch.stack(values, dim=0) - dist.reduce(values, dst=0) - if dist.get_rank() == 0 and average: - # only main process gets accumulated, so only divide by - # world_size in this case - values /= world_size - reduced_dict = {k: v for k, v in zip(names, values)} - return reduced_dict diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/config.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/config.py deleted file mode 100644 index 762ebf4..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/utils/config.py +++ /dev/null @@ -1,694 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import ast -import copy -import os -import os.path as osp -import platform -import shutil -import sys -import tempfile -import uuid -import warnings -from argparse import Action, ArgumentParser -from collections import abc -from importlib import import_module - -from addict import Dict -from yapf.yapflib.yapf_api import FormatCode - -from .misc import import_modules_from_strings -from .path import check_file_exist - -if platform.system() == "Windows": - import regex as re -else: - import re - -BASE_KEY = "_base_" -DELETE_KEY = "_delete_" -DEPRECATION_KEY = "_deprecation_" -RESERVED_KEYS = ["filename", "text", "pretty_text"] - - -class ConfigDict(Dict): - def __missing__(self, name): - raise KeyError(name) - - def __getattr__(self, name): - try: - value = super(ConfigDict, self).__getattr__(name) - except KeyError: - ex = AttributeError( - f"'{self.__class__.__name__}' object has no " f"attribute '{name}'" - ) - except Exception as e: - ex = e - else: - return value - raise ex - - -def add_args(parser, cfg, prefix=""): - for k, v in cfg.items(): - if isinstance(v, str): - parser.add_argument("--" + prefix + k) - elif isinstance(v, int): - parser.add_argument("--" + prefix + k, type=int) - elif isinstance(v, float): - parser.add_argument("--" + prefix + k, type=float) - elif isinstance(v, bool): - parser.add_argument("--" + prefix + k, action="store_true") - elif isinstance(v, dict): - add_args(parser, v, prefix + k + ".") - elif isinstance(v, abc.Iterable): - parser.add_argument("--" + prefix + k, type=type(v[0]), nargs="+") - else: - print(f"cannot parse key {prefix + k} of type {type(v)}") - return parser - - -class Config: - """A facility for config and config files. - - It supports common file formats as configs: python/json/yaml. The interface - is the same as a dict object and also allows access config values as - attributes. - - Example: - >>> cfg = Config(dict(a=1, b=dict(b1=[0, 1]))) - >>> cfg.a - 1 - >>> cfg.b - {'b1': [0, 1]} - >>> cfg.b.b1 - [0, 1] - >>> cfg = Config.fromfile('tests/data/config/a.py') - >>> cfg.filename - "/home/kchen/projects/mmcv/tests/data/config/a.py" - >>> cfg.item4 - 'test' - >>> cfg - "Config [path: /home/kchen/projects/mmcv/tests/data/config/a.py]: " - "{'item1': [1, 2], 'item2': {'a': 0}, 'item3': True, 'item4': 'test'}" - """ - - @staticmethod - def _validate_py_syntax(filename): - with open(filename, "r", encoding="utf-8") as f: - # Setting encoding explicitly to resolve coding issue on windows - content = f.read() - try: - ast.parse(content) - except SyntaxError as e: - raise SyntaxError( - "There are syntax errors in config " f"file {filename}: {e}" - ) - - @staticmethod - def _substitute_predefined_vars(filename, temp_config_name): - file_dirname = osp.dirname(filename) - file_basename = osp.basename(filename) - file_basename_no_extension = osp.splitext(file_basename)[0] - file_extname = osp.splitext(filename)[1] - support_templates = dict( - fileDirname=file_dirname, - fileBasename=file_basename, - fileBasenameNoExtension=file_basename_no_extension, - fileExtname=file_extname, - ) - with open(filename, "r", encoding="utf-8") as f: - # Setting encoding explicitly to resolve coding issue on windows - config_file = f.read() - for key, value in support_templates.items(): - regexp = r"\{\{\s*" + str(key) + r"\s*\}\}" - value = value.replace("\\", "/") - config_file = re.sub(regexp, value, config_file) - with open(temp_config_name, "w", encoding="utf-8") as tmp_config_file: - tmp_config_file.write(config_file) - - @staticmethod - def _pre_substitute_base_vars(filename, temp_config_name): - """Substitute base variable placehoders to string, so that parsing - would work.""" - with open(filename, "r", encoding="utf-8") as f: - # Setting encoding explicitly to resolve coding issue on windows - config_file = f.read() - base_var_dict = {} - regexp = r"\{\{\s*" + BASE_KEY + r"\.([\w\.]+)\s*\}\}" - base_vars = set(re.findall(regexp, config_file)) - for base_var in base_vars: - randstr = f"_{base_var}_{uuid.uuid4().hex.lower()[:6]}" - base_var_dict[randstr] = base_var - regexp = r"\{\{\s*" + BASE_KEY + r"\." + base_var + r"\s*\}\}" - config_file = re.sub(regexp, f'"{randstr}"', config_file) - with open(temp_config_name, "w", encoding="utf-8") as tmp_config_file: - tmp_config_file.write(config_file) - return base_var_dict - - @staticmethod - def _substitute_base_vars(cfg, base_var_dict, base_cfg): - """Substitute variable strings to their actual values.""" - cfg = copy.deepcopy(cfg) - - if isinstance(cfg, dict): - for k, v in cfg.items(): - if isinstance(v, str) and v in base_var_dict: - new_v = base_cfg - for new_k in base_var_dict[v].split("."): - new_v = new_v[new_k] - cfg[k] = new_v - elif isinstance(v, (list, tuple, dict)): - cfg[k] = Config._substitute_base_vars(v, base_var_dict, base_cfg) - elif isinstance(cfg, tuple): - cfg = tuple( - Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg - ) - elif isinstance(cfg, list): - cfg = [ - Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg - ] - elif isinstance(cfg, str) and cfg in base_var_dict: - new_v = base_cfg - for new_k in base_var_dict[cfg].split("."): - new_v = new_v[new_k] - cfg = new_v - - return cfg - - @staticmethod - def _file2dict(filename, use_predefined_variables=True): - filename = osp.abspath(osp.expanduser(filename)) - check_file_exist(filename) - fileExtname = osp.splitext(filename)[1] - if fileExtname not in [".py", ".json", ".yaml", ".yml"]: - raise IOError("Only py/yml/yaml/json type are supported now!") - - with tempfile.TemporaryDirectory() as temp_config_dir: - temp_config_file = tempfile.NamedTemporaryFile( - dir=temp_config_dir, suffix=fileExtname - ) - if platform.system() == "Windows": - temp_config_file.close() - temp_config_name = osp.basename(temp_config_file.name) - # Substitute predefined variables - if use_predefined_variables: - Config._substitute_predefined_vars(filename, temp_config_file.name) - else: - shutil.copyfile(filename, temp_config_file.name) - # Substitute base variables from placeholders to strings - base_var_dict = Config._pre_substitute_base_vars( - temp_config_file.name, temp_config_file.name - ) - - if filename.endswith(".py"): - temp_module_name = osp.splitext(temp_config_name)[0] - sys.path.insert(0, temp_config_dir) - Config._validate_py_syntax(filename) - mod = import_module(temp_module_name) - sys.path.pop(0) - cfg_dict = { - name: value - for name, value in mod.__dict__.items() - if not name.startswith("__") - } - # delete imported module - del sys.modules[temp_module_name] - elif filename.endswith((".yml", ".yaml", ".json")): - raise NotImplementedError - # close temp file - temp_config_file.close() - - # check deprecation information - if DEPRECATION_KEY in cfg_dict: - deprecation_info = cfg_dict.pop(DEPRECATION_KEY) - warning_msg = ( - f"The config file {filename} will be deprecated " "in the future." - ) - if "expected" in deprecation_info: - warning_msg += f' Please use {deprecation_info["expected"]} ' "instead." - if "reference" in deprecation_info: - warning_msg += ( - " More information can be found at " - f'{deprecation_info["reference"]}' - ) - warnings.warn(warning_msg) - - cfg_text = filename + "\n" - with open(filename, "r", encoding="utf-8") as f: - # Setting encoding explicitly to resolve coding issue on windows - cfg_text += f.read() - - if BASE_KEY in cfg_dict: - cfg_dir = osp.dirname(filename) - base_filename = cfg_dict.pop(BASE_KEY) - base_filename = ( - base_filename if isinstance(base_filename, list) else [base_filename] - ) - - cfg_dict_list = list() - cfg_text_list = list() - for f in base_filename: - _cfg_dict, _cfg_text = Config._file2dict(osp.join(cfg_dir, f)) - cfg_dict_list.append(_cfg_dict) - cfg_text_list.append(_cfg_text) - - base_cfg_dict = dict() - for c in cfg_dict_list: - duplicate_keys = base_cfg_dict.keys() & c.keys() - if len(duplicate_keys) > 0: - raise KeyError( - "Duplicate key is not allowed among bases. " - f"Duplicate keys: {duplicate_keys}" - ) - base_cfg_dict.update(c) - - # Substitute base variables from strings to their actual values - cfg_dict = Config._substitute_base_vars( - cfg_dict, base_var_dict, base_cfg_dict - ) - - base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict) - cfg_dict = base_cfg_dict - - # merge cfg_text - cfg_text_list.append(cfg_text) - cfg_text = "\n".join(cfg_text_list) - - return cfg_dict, cfg_text - - @staticmethod - def _merge_a_into_b(a, b, allow_list_keys=False): - """merge dict ``a`` into dict ``b`` (non-inplace). - - Values in ``a`` will overwrite ``b``. ``b`` is copied first to avoid - in-place modifications. - - Args: - a (dict): The source dict to be merged into ``b``. - b (dict): The origin dict to be fetch keys from ``a``. - allow_list_keys (bool): If True, int string keys (e.g. '0', '1') - are allowed in source ``a`` and will replace the element of the - corresponding index in b if b is a list. Default: False. - - Returns: - dict: The modified dict of ``b`` using ``a``. - - Examples: - # Normally merge a into b. - >>> Config._merge_a_into_b( - ... dict(obj=dict(a=2)), dict(obj=dict(a=1))) - {'obj': {'a': 2}} - - # Delete b first and merge a into b. - >>> Config._merge_a_into_b( - ... dict(obj=dict(_delete_=True, a=2)), dict(obj=dict(a=1))) - {'obj': {'a': 2}} - - # b is a list - >>> Config._merge_a_into_b( - ... {'0': dict(a=2)}, [dict(a=1), dict(b=2)], True) - [{'a': 2}, {'b': 2}] - """ - b = b.copy() - for k, v in a.items(): - if allow_list_keys and k.isdigit() and isinstance(b, list): - k = int(k) - if len(b) <= k: - raise KeyError(f"Index {k} exceeds the length of list {b}") - b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) - elif isinstance(v, dict) and k in b and not v.pop(DELETE_KEY, False): - allowed_types = (dict, list) if allow_list_keys else dict - if not isinstance(b[k], allowed_types): - raise TypeError( - f"{k}={v} in child config cannot inherit from base " - f"because {k} is a dict in the child config but is of " - f"type {type(b[k])} in base config. You may set " - f"`{DELETE_KEY}=True` to ignore the base config" - ) - b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) - else: - b[k] = v - return b - - @staticmethod - def fromfile(filename, use_predefined_variables=True, import_custom_modules=True): - cfg_dict, cfg_text = Config._file2dict(filename, use_predefined_variables) - if import_custom_modules and cfg_dict.get("custom_imports", None): - import_modules_from_strings(**cfg_dict["custom_imports"]) - return Config(cfg_dict, cfg_text=cfg_text, filename=filename) - - @staticmethod - def fromstring(cfg_str, file_format): - """Generate config from config str. - - Args: - cfg_str (str): Config str. - file_format (str): Config file format corresponding to the - config str. Only py/yml/yaml/json type are supported now! - - Returns: - obj:`Config`: Config obj. - """ - if file_format not in [".py", ".json", ".yaml", ".yml"]: - raise IOError("Only py/yml/yaml/json type are supported now!") - if file_format != ".py" and "dict(" in cfg_str: - # check if users specify a wrong suffix for python - warnings.warn('Please check "file_format", the file format may be .py') - with tempfile.NamedTemporaryFile( - "w", encoding="utf-8", suffix=file_format, delete=False - ) as temp_file: - temp_file.write(cfg_str) - # on windows, previous implementation cause error - # see PR 1077 for details - cfg = Config.fromfile(temp_file.name) - os.remove(temp_file.name) - return cfg - - @staticmethod - def auto_argparser(description=None): - """Generate argparser from config file automatically (experimental)""" - partial_parser = ArgumentParser(description=description) - partial_parser.add_argument("config", help="config file path") - cfg_file = partial_parser.parse_known_args()[0].config - cfg = Config.fromfile(cfg_file) - parser = ArgumentParser(description=description) - parser.add_argument("config", help="config file path") - add_args(parser, cfg) - return parser, cfg - - def __init__(self, cfg_dict=None, cfg_text=None, filename=None): - if cfg_dict is None: - cfg_dict = dict() - elif not isinstance(cfg_dict, dict): - raise TypeError("cfg_dict must be a dict, but " f"got {type(cfg_dict)}") - for key in cfg_dict: - if key in RESERVED_KEYS: - raise KeyError(f"{key} is reserved for config file") - - super(Config, self).__setattr__("_cfg_dict", ConfigDict(cfg_dict)) - super(Config, self).__setattr__("_filename", filename) - if cfg_text: - text = cfg_text - elif filename: - with open(filename, "r") as f: - text = f.read() - else: - text = "" - super(Config, self).__setattr__("_text", text) - - @property - def filename(self): - return self._filename - - @property - def text(self): - return self._text - - @property - def pretty_text(self): - indent = 4 - - def _indent(s_, num_spaces): - s = s_.split("\n") - if len(s) == 1: - return s_ - first = s.pop(0) - s = [(num_spaces * " ") + line for line in s] - s = "\n".join(s) - s = first + "\n" + s - return s - - def _format_basic_types(k, v, use_mapping=False): - if isinstance(v, str): - v_str = f"'{v}'" - else: - v_str = str(v) - - if use_mapping: - k_str = f"'{k}'" if isinstance(k, str) else str(k) - attr_str = f"{k_str}: {v_str}" - else: - attr_str = f"{str(k)}={v_str}" - attr_str = _indent(attr_str, indent) - - return attr_str - - def _format_list(k, v, use_mapping=False): - # check if all items in the list are dict - if all(isinstance(_, dict) for _ in v): - v_str = "[\n" - v_str += "\n".join( - f"dict({_indent(_format_dict(v_), indent)})," for v_ in v - ).rstrip(",") - if use_mapping: - k_str = f"'{k}'" if isinstance(k, str) else str(k) - attr_str = f"{k_str}: {v_str}" - else: - attr_str = f"{str(k)}={v_str}" - attr_str = _indent(attr_str, indent) + "]" - else: - attr_str = _format_basic_types(k, v, use_mapping) - return attr_str - - def _contain_invalid_identifier(dict_str): - contain_invalid_identifier = False - for key_name in dict_str: - contain_invalid_identifier |= not str(key_name).isidentifier() - return contain_invalid_identifier - - def _format_dict(input_dict, outest_level=False): - r = "" - s = [] - - use_mapping = _contain_invalid_identifier(input_dict) - if use_mapping: - r += "{" - for idx, (k, v) in enumerate(input_dict.items()): - is_last = idx >= len(input_dict) - 1 - end = "" if outest_level or is_last else "," - if isinstance(v, dict): - v_str = "\n" + _format_dict(v) - if use_mapping: - k_str = f"'{k}'" if isinstance(k, str) else str(k) - attr_str = f"{k_str}: dict({v_str}" - else: - attr_str = f"{str(k)}=dict({v_str}" - attr_str = _indent(attr_str, indent) + ")" + end - elif isinstance(v, list): - attr_str = _format_list(k, v, use_mapping) + end - else: - attr_str = _format_basic_types(k, v, use_mapping) + end - - s.append(attr_str) - r += "\n".join(s) - if use_mapping: - r += "}" - return r - - cfg_dict = self._cfg_dict.to_dict() - text = _format_dict(cfg_dict, outest_level=True) - # copied from setup.cfg - yapf_style = dict( - based_on_style="pep8", - blank_line_before_nested_class_or_def=True, - split_before_expression_after_opening_paren=True, - ) - text, _ = FormatCode(text, style_config=yapf_style) - - return text - - def __repr__(self): - return f"Config (path: {self.filename}): {self._cfg_dict.__repr__()}" - - def __len__(self): - return len(self._cfg_dict) - - def __getattr__(self, name): - return getattr(self._cfg_dict, name) - - def __getitem__(self, name): - return self._cfg_dict.__getitem__(name) - - def __setattr__(self, name, value): - if isinstance(value, dict): - value = ConfigDict(value) - self._cfg_dict.__setattr__(name, value) - - def __setitem__(self, name, value): - if isinstance(value, dict): - value = ConfigDict(value) - self._cfg_dict.__setitem__(name, value) - - def __iter__(self): - return iter(self._cfg_dict) - - def __getstate__(self): - return (self._cfg_dict, self._filename, self._text) - - def __setstate__(self, state): - _cfg_dict, _filename, _text = state - super(Config, self).__setattr__("_cfg_dict", _cfg_dict) - super(Config, self).__setattr__("_filename", _filename) - super(Config, self).__setattr__("_text", _text) - - def dump(self, file=None): - cfg_dict = super(Config, self).__getattribute__("_cfg_dict").to_dict() - if self.filename.endswith(".py"): - if file is None: - return self.pretty_text - else: - with open(file, "w", encoding="utf-8") as f: - f.write(self.pretty_text) - else: - import mmcv - - if file is None: - file_format = self.filename.split(".")[-1] - return mmcv.dump(cfg_dict, file_format=file_format) - else: - mmcv.dump(cfg_dict, file) - - def merge_from_dict(self, options, allow_list_keys=True): - """Merge list into cfg_dict. - - Merge the dict parsed by MultipleKVAction into this cfg. - - Examples: - >>> options = {'models.backbone.depth': 50, - ... 'models.backbone.with_cp':True} - >>> cfg = Config(dict(models=dict(backbone=dict(type='ResNet')))) - >>> cfg.merge_from_dict(options) - >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') - >>> assert cfg_dict == dict( - ... models=dict(backbone=dict(depth=50, with_cp=True))) - - # Merge list element - >>> cfg = Config(dict(pipeline=[ - ... dict(type='LoadImage'), dict(type='LoadAnnotations')])) - >>> options = dict(pipeline={'0': dict(type='SelfLoadImage')}) - >>> cfg.merge_from_dict(options, allow_list_keys=True) - >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') - >>> assert cfg_dict == dict(pipeline=[ - ... dict(type='SelfLoadImage'), dict(type='LoadAnnotations')]) - - Args: - options (dict): dict of configs to merge from. - allow_list_keys (bool): If True, int string keys (e.g. '0', '1') - are allowed in ``options`` and will replace the element of the - corresponding index in the config if the config is a list. - Default: True. - """ - option_cfg_dict = {} - for full_key, v in options.items(): - d = option_cfg_dict - key_list = full_key.split(".") - for subkey in key_list[:-1]: - d.setdefault(subkey, ConfigDict()) - d = d[subkey] - subkey = key_list[-1] - d[subkey] = v - - cfg_dict = super(Config, self).__getattribute__("_cfg_dict") - super(Config, self).__setattr__( - "_cfg_dict", - Config._merge_a_into_b( - option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys - ), - ) - - -class DictAction(Action): - """ - argparse action to split an argument into KEY=VALUE form - on the first = and append to a dictionary. List options can - be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit - brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build - list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]' - """ - - @staticmethod - def _parse_int_float_bool(val): - try: - return int(val) - except ValueError: - pass - try: - return float(val) - except ValueError: - pass - if val.lower() in ["true", "false"]: - return True if val.lower() == "true" else False - return val - - @staticmethod - def _parse_iterable(val): - """Parse iterable values in the string. - - All elements inside '()' or '[]' are treated as iterable values. - - Args: - val (str): Value string. - - Returns: - list | tuple: The expanded list or tuple from the string. - - Examples: - >>> DictAction._parse_iterable('1,2,3') - [1, 2, 3] - >>> DictAction._parse_iterable('[a, b, c]') - ['a', 'b', 'c'] - >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]') - [(1, 2, 3), ['a', 'b'], 'c'] - """ - - def find_next_comma(string): - """Find the position of next comma in the string. - - If no ',' is found in the string, return the string length. All - chars inside '()' and '[]' are treated as one element and thus ',' - inside these brackets are ignored. - """ - assert (string.count("(") == string.count(")")) and ( - string.count("[") == string.count("]") - ), f"Imbalanced brackets exist in {string}" - end = len(string) - for idx, char in enumerate(string): - pre = string[:idx] - # The string before this ',' is balanced - if ( - (char == ",") - and (pre.count("(") == pre.count(")")) - and (pre.count("[") == pre.count("]")) - ): - end = idx - break - return end - - # Strip ' and " characters and replace whitespace. - val = val.strip("'\"").replace(" ", "") - is_tuple = False - if val.startswith("(") and val.endswith(")"): - is_tuple = True - val = val[1:-1] - elif val.startswith("[") and val.endswith("]"): - val = val[1:-1] - elif "," not in val: - # val is a single value - return DictAction._parse_int_float_bool(val) - - values = [] - while len(val) > 0: - comma_idx = find_next_comma(val) - element = DictAction._parse_iterable(val[:comma_idx]) - values.append(element) - val = val[comma_idx + 1 :] - if is_tuple: - values = tuple(values) - return values - - def __call__(self, parser, namespace, values, option_string=None): - options = {} - for kv in values: - key, val = kv.split("=", maxsplit=1) - options[key] = self._parse_iterable(val) - setattr(namespace, self.dest, options) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/env.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/env.py deleted file mode 100644 index 653f007..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/utils/env.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -Environment Utils - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import os -import random -import numpy as np -import torch -import torch.backends.cudnn as cudnn - -from datetime import datetime - - -def get_random_seed(): - seed = ( - os.getpid() - + int(datetime.now().strftime("%S%f")) - + int.from_bytes(os.urandom(2), "big") - ) - return seed - - -def set_seed(seed=None): - if seed is None: - seed = get_random_seed() - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - torch.cuda.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - cudnn.benchmark = False - cudnn.deterministic = True - os.environ["PYTHONHASHSEED"] = str(seed) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/events.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/events.py deleted file mode 100644 index 718ee91..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/utils/events.py +++ /dev/null @@ -1,612 +0,0 @@ -""" -Events Utils - -Modified from Detectron2 - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import datetime -import json -import logging -import os -import time -import torch -import numpy as np -import traceback -import sys - -from typing import List, Optional, Tuple -from collections import defaultdict -from contextlib import contextmanager - -__all__ = [ - "get_event_storage", - "JSONWriter", - "TensorboardXWriter", - "CommonMetricPrinter", - "EventStorage", - "ExceptionWriter", -] - -_CURRENT_STORAGE_STACK = [] - - -def get_event_storage(): - """ - Returns: - The :class:`EventStorage` object that's currently being used. - Throws an error if no :class:`EventStorage` is currently enabled. - """ - assert len( - _CURRENT_STORAGE_STACK - ), "get_event_storage() has to be called inside a 'with EventStorage(...)' context!" - return _CURRENT_STORAGE_STACK[-1] - - -class EventWriter: - """ - Base class for writers that obtain events from :class:`EventStorage` and process them. - """ - - def write(self): - raise NotImplementedError - - def close(self): - pass - - -class JSONWriter(EventWriter): - """ - Write scalars to a json file. - It saves scalars as one json per line (instead of a big json) for easy parsing. - Examples parsing such a json file: - :: - $ cat metrics.json | jq -s '.[0:2]' - [ - { - "data_time": 0.008433341979980469, - "iteration": 19, - "loss": 1.9228371381759644, - "loss_box_reg": 0.050025828182697296, - "loss_classifier": 0.5316952466964722, - "loss_mask": 0.7236229181289673, - "loss_rpn_box": 0.0856662318110466, - "loss_rpn_cls": 0.48198649287223816, - "lr": 0.007173333333333333, - "time": 0.25401854515075684 - }, - { - "data_time": 0.007216215133666992, - "iteration": 39, - "loss": 1.282649278640747, - "loss_box_reg": 0.06222952902317047, - "loss_classifier": 0.30682939291000366, - "loss_mask": 0.6970193982124329, - "loss_rpn_box": 0.038663312792778015, - "loss_rpn_cls": 0.1471673548221588, - "lr": 0.007706666666666667, - "time": 0.2490077018737793 - } - ] - $ cat metrics.json | jq '.loss_mask' - 0.7126231789588928 - 0.689423680305481 - 0.6776131987571716 - ... - """ - - def __init__(self, json_file, window_size=20): - """ - Args: - json_file (str): path to the json file. New data will be appended if the file exists. - window_size (int): the window size of median smoothing for the scalars whose - `smoothing_hint` are True. - """ - self._file_handle = open(json_file, "a") - self._window_size = window_size - self._last_write = -1 - - def write(self): - storage = get_event_storage() - to_save = defaultdict(dict) - - for k, (v, iter) in storage.latest_with_smoothing_hint( - self._window_size - ).items(): - # keep scalars that have not been written - if iter <= self._last_write: - continue - to_save[iter][k] = v - if len(to_save): - all_iters = sorted(to_save.keys()) - self._last_write = max(all_iters) - - for itr, scalars_per_iter in to_save.items(): - scalars_per_iter["iteration"] = itr - self._file_handle.write(json.dumps(scalars_per_iter, sort_keys=True) + "\n") - self._file_handle.flush() - try: - os.fsync(self._file_handle.fileno()) - except AttributeError: - pass - - def close(self): - self._file_handle.close() - - -class TensorboardXWriter(EventWriter): - """ - Write all scalars to a tensorboard file. - """ - - def __init__(self, log_dir: str, window_size: int = 20, **kwargs): - """ - Args: - log_dir (str): the directory to save the output events - window_size (int): the scalars will be median-smoothed by this window size - kwargs: other arguments passed to `torch.utils.tensorboard.SummaryWriter(...)` - """ - self._window_size = window_size - from torch.utils.tensorboard import SummaryWriter - - self._writer = SummaryWriter(log_dir, **kwargs) - self._last_write = -1 - - def write(self): - storage = get_event_storage() - new_last_write = self._last_write - for k, (v, iter) in storage.latest_with_smoothing_hint( - self._window_size - ).items(): - if iter > self._last_write: - self._writer.add_scalar(k, v, iter) - new_last_write = max(new_last_write, iter) - self._last_write = new_last_write - - # storage.put_{image,histogram} is only meant to be used by - # tensorboard writer. So we access its internal fields directly from here. - if len(storage._vis_data) >= 1: - for img_name, img, step_num in storage._vis_data: - self._writer.add_image(img_name, img, step_num) - # Storage stores all image data and rely on this writer to clear them. - # As a result it assumes only one writer will use its image data. - # An alternative design is to let storage store limited recent - # data (e.g. only the most recent image) that all writers can access. - # In that case a writer may not see all image data if its period is long. - storage.clear_images() - - if len(storage._histograms) >= 1: - for params in storage._histograms: - self._writer.add_histogram_raw(**params) - storage.clear_histograms() - - def close(self): - if hasattr(self, "_writer"): # doesn't exist when the code fails at import - self._writer.close() - - -class CommonMetricPrinter(EventWriter): - """ - Print **common** metrics to the terminal, including - iteration time, ETA, memory, all losses, and the learning rate. - It also applies smoothing using a window of 20 elements. - It's meant to print common metrics in common ways. - To print something in more customized ways, please implement a similar printer by yourself. - """ - - def __init__(self, max_iter: Optional[int] = None, window_size: int = 20): - """ - Args: - max_iter: the maximum number of iterations to train. - Used to compute ETA. If not given, ETA will not be printed. - window_size (int): the losses will be median-smoothed by this window size - """ - self.logger = logging.getLogger(__name__) - self._max_iter = max_iter - self._window_size = window_size - self._last_write = ( - None # (step, time) of last call to write(). Used to compute ETA - ) - - def _get_eta(self, storage) -> Optional[str]: - if self._max_iter is None: - return "" - iteration = storage.iter - try: - eta_seconds = storage.history("time").median(1000) * ( - self._max_iter - iteration - 1 - ) - storage.put_scalar("eta_seconds", eta_seconds, smoothing_hint=False) - return str(datetime.timedelta(seconds=int(eta_seconds))) - except KeyError: - # estimate eta on our own - more noisy - eta_string = None - if self._last_write is not None: - estimate_iter_time = (time.perf_counter() - self._last_write[1]) / ( - iteration - self._last_write[0] - ) - eta_seconds = estimate_iter_time * (self._max_iter - iteration - 1) - eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) - self._last_write = (iteration, time.perf_counter()) - return eta_string - - def write(self): - storage = get_event_storage() - iteration = storage.iter - if iteration == self._max_iter: - # This hook only reports training progress (loss, ETA, etc) but not other data, - # therefore do not write anything after training succeeds, even if this method - # is called. - return - - try: - data_time = storage.history("data_time").avg(20) - except KeyError: - # they may not exist in the first few iterations (due to warmup) - # or when SimpleTrainer is not used - data_time = None - try: - iter_time = storage.history("time").global_avg() - except KeyError: - iter_time = None - try: - lr = "{:.5g}".format(storage.history("lr").latest()) - except KeyError: - lr = "N/A" - - eta_string = self._get_eta(storage) - - if torch.cuda.is_available(): - max_mem_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0 - else: - max_mem_mb = None - - # NOTE: max_mem is parsed by grep in "dev/parse_results.sh" - self.logger.info( - " {eta}iter: {iter} {losses} {time}{data_time}lr: {lr} {memory}".format( - eta=f"eta: {eta_string} " if eta_string else "", - iter=iteration, - losses=" ".join( - [ - "{}: {:.4g}".format(k, v.median(self._window_size)) - for k, v in storage.histories().items() - if "loss" in k - ] - ), - time=( - "time: {:.4f} ".format(iter_time) if iter_time is not None else "" - ), - data_time=( - "data_time: {:.4f} ".format(data_time) - if data_time is not None - else "" - ), - lr=lr, - memory=( - "max_mem: {:.0f}M".format(max_mem_mb) - if max_mem_mb is not None - else "" - ), - ) - ) - - -class EventStorage: - """ - The user-facing class that provides metric storage functionalities. - In the future we may add support for storing / logging other types of data if needed. - """ - - def __init__(self, start_iter=0): - """ - Args: - start_iter (int): the iteration number to start with - """ - self._history = defaultdict(AverageMeter) - self._smoothing_hints = {} - self._latest_scalars = {} - self._iter = start_iter - self._current_prefix = "" - self._vis_data = [] - self._histograms = [] - - # def put_image(self, img_name, img_tensor): - # """ - # Add an `img_tensor` associated with `img_name`, to be shown on - # tensorboard. - # Args: - # img_name (str): The name of the image to put into tensorboard. - # img_tensor (torch.Tensor or numpy.array): An `uint8` or `float` - # Tensor of shape `[channel, height, width]` where `channel` is - # 3. The image format should be RGB. The elements in img_tensor - # can either have values in [0, 1] (float32) or [0, 255] (uint8). - # The `img_tensor` will be visualized in tensorboard. - # """ - # self._vis_data.append((img_name, img_tensor, self._iter)) - - def put_scalar(self, name, value, n=1, smoothing_hint=False): - """ - Add a scalar `value` to the `HistoryBuffer` associated with `name`. - Args: - smoothing_hint (bool): a 'hint' on whether this scalar is noisy and should be - smoothed when logged. The hint will be accessible through - :meth:`EventStorage.smoothing_hints`. A writer may ignore the hint - and apply custom smoothing rule. - It defaults to True because most scalars we save need to be smoothed to - provide any useful signal. - """ - name = self._current_prefix + name - history = self._history[name] - history.update(value, n) - self._latest_scalars[name] = (value, self._iter) - - existing_hint = self._smoothing_hints.get(name) - if existing_hint is not None: - assert ( - existing_hint == smoothing_hint - ), "Scalar {} was put with a different smoothing_hint!".format(name) - else: - self._smoothing_hints[name] = smoothing_hint - - # def put_scalars(self, *, smoothing_hint=True, **kwargs): - # """ - # Put multiple scalars from keyword arguments. - # Examples: - # storage.put_scalars(loss=my_loss, accuracy=my_accuracy, smoothing_hint=True) - # """ - # for k, v in kwargs.items(): - # self.put_scalar(k, v, smoothing_hint=smoothing_hint) - # - # def put_histogram(self, hist_name, hist_tensor, bins=1000): - # """ - # Create a histogram from a tensor. - # Args: - # hist_name (str): The name of the histogram to put into tensorboard. - # hist_tensor (torch.Tensor): A Tensor of arbitrary shape to be converted - # into a histogram. - # bins (int): Number of histogram bins. - # """ - # ht_min, ht_max = hist_tensor.min().item(), hist_tensor.max().item() - # - # # Create a histogram with PyTorch - # hist_counts = torch.histc(hist_tensor, bins=bins) - # hist_edges = torch.linspace(start=ht_min, end=ht_max, steps=bins + 1, dtype=torch.float32) - # - # # Parameter for the add_histogram_raw function of SummaryWriter - # hist_params = dict( - # tag=hist_name, - # min=ht_min, - # max=ht_max, - # num=len(hist_tensor), - # sum=float(hist_tensor.sum()), - # sum_squares=float(torch.sum(hist_tensor**2)), - # bucket_limits=hist_edges[1:].tolist(), - # bucket_counts=hist_counts.tolist(), - # global_step=self._iter, - # ) - # self._histograms.append(hist_params) - - def history(self, name): - """ - Returns: - AverageMeter: the history for name - """ - ret = self._history.get(name, None) - if ret is None: - raise KeyError("No history metric available for {}!".format(name)) - return ret - - def histories(self): - """ - Returns: - dict[name -> HistoryBuffer]: the HistoryBuffer for all scalars - """ - return self._history - - def latest(self): - """ - Returns: - dict[str -> (float, int)]: mapping from the name of each scalar to the most - recent value and the iteration number its added. - """ - return self._latest_scalars - - def latest_with_smoothing_hint(self, window_size=20): - """ - Similar to :meth:`latest`, but the returned values - are either the un-smoothed original latest value, - or a median of the given window_size, - depend on whether the smoothing_hint is True. - This provides a default behavior that other writers can use. - """ - result = {} - for k, (v, itr) in self._latest_scalars.items(): - result[k] = ( - self._history[k].median(window_size) if self._smoothing_hints[k] else v, - itr, - ) - return result - - def smoothing_hints(self): - """ - Returns: - dict[name -> bool]: the user-provided hint on whether the scalar - is noisy and needs smoothing. - """ - return self._smoothing_hints - - def step(self): - """ - User should either: (1) Call this function to increment storage.iter when needed. Or - (2) Set `storage.iter` to the correct iteration number before each iteration. - The storage will then be able to associate the new data with an iteration number. - """ - self._iter += 1 - - @property - def iter(self): - """ - Returns: - int: The current iteration number. When used together with a trainer, - this is ensured to be the same as trainer.iter. - """ - return self._iter - - @iter.setter - def iter(self, val): - self._iter = int(val) - - @property - def iteration(self): - # for backward compatibility - return self._iter - - def __enter__(self): - _CURRENT_STORAGE_STACK.append(self) - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - assert _CURRENT_STORAGE_STACK[-1] == self - _CURRENT_STORAGE_STACK.pop() - - @contextmanager - def name_scope(self, name): - """ - Yields: - A context within which all the events added to this storage - will be prefixed by the name scope. - """ - old_prefix = self._current_prefix - self._current_prefix = name.rstrip("/") + "/" - yield - self._current_prefix = old_prefix - - def clear_images(self): - """ - Delete all the stored images for visualization. This should be called - after images are written to tensorboard. - """ - self._vis_data = [] - - def clear_histograms(self): - """ - Delete all the stored histograms for visualization. - This should be called after histograms are written to tensorboard. - """ - self._histograms = [] - - def reset_history(self, name): - ret = self._history.get(name, None) - if ret is None: - raise KeyError("No history metric available for {}!".format(name)) - ret.reset() - - def reset_histories(self): - for name in self._history.keys(): - self._history[name].reset() - - -class AverageMeter: - """Computes and stores the average and current value""" - - def __init__(self): - self.val = 0 - self.avg = 0 - self.total = 0 - self.count = 0 - - def reset(self): - self.val = 0 - self.avg = 0 - self.total = 0 - self.count = 0 - - def update(self, val, n=1): - self.val = val - self.total += val * n - self.count += n - self.avg = self.total / self.count - - -class HistoryBuffer: - """ - Track a series of scalar values and provide access to smoothed values over a - window or the global average of the series. - """ - - def __init__(self, max_length: int = 1000000) -> None: - """ - Args: - max_length: maximal number of values that can be stored in the - buffer. When the capacity of the buffer is exhausted, old - values will be removed. - """ - self._max_length: int = max_length - self._data: List[Tuple[float, float]] = [] # (value, iteration) pairs - self._count: int = 0 - self._global_avg: float = 0 - - def update(self, value: float, iteration: Optional[float] = None) -> None: - """ - Add a new scalar value produced at certain iteration. If the length - of the buffer exceeds self._max_length, the oldest element will be - removed from the buffer. - """ - if iteration is None: - iteration = self._count - if len(self._data) == self._max_length: - self._data.pop(0) - self._data.append((value, iteration)) - - self._count += 1 - self._global_avg += (value - self._global_avg) / self._count - - def latest(self) -> float: - """ - Return the latest scalar value added to the buffer. - """ - return self._data[-1][0] - - def median(self, window_size: int) -> float: - """ - Return the median of the latest `window_size` values in the buffer. - """ - return np.median([x[0] for x in self._data[-window_size:]]) - - def avg(self, window_size: int) -> float: - """ - Return the mean of the latest `window_size` values in the buffer. - """ - return np.mean([x[0] for x in self._data[-window_size:]]) - - def global_avg(self) -> float: - """ - Return the mean of all the elements in the buffer. Note that this - includes those getting removed due to limited buffer storage. - """ - return self._global_avg - - def values(self) -> List[Tuple[float, float]]: - """ - Returns: - list[(number, iteration)]: content of the current buffer. - """ - return self._data - - -class ExceptionWriter: - - def __init__(self): - self.logger = logging.getLogger(__name__) - - def __enter__(self): - pass - - def __exit__(self, exc_type, exc_val, exc_tb): - if exc_type: - tb = traceback.format_exception(exc_type, exc_val, exc_tb) - formatted_tb_str = "".join(tb) - self.logger.error(formatted_tb_str) - sys.exit(1) # This prevents double logging the error to the console diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/misc.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/misc.py deleted file mode 100644 index 3177bae..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/utils/misc.py +++ /dev/null @@ -1,164 +0,0 @@ -""" -Misc - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import os -import warnings -from collections import abc -import numpy as np -import torch -from importlib import import_module - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - - -def intersection_and_union(output, target, K, ignore_index=-1): - # 'K' classes, output and target sizes are N or N * L or N * H * W, each value in range 0 to K - 1. - assert output.ndim in [1, 2, 3] - assert output.shape == target.shape - output = output.reshape(output.size).copy() - target = target.reshape(target.size) - output[np.where(target == ignore_index)[0]] = ignore_index - intersection = output[np.where(output == target)[0]] - area_intersection, _ = np.histogram(intersection, bins=np.arange(K + 1)) - area_output, _ = np.histogram(output, bins=np.arange(K + 1)) - area_target, _ = np.histogram(target, bins=np.arange(K + 1)) - area_union = area_output + area_target - area_intersection - return area_intersection, area_union, area_target - - -def intersection_and_union_gpu(output, target, k, ignore_index=-1): - # 'K' classes, output and target sizes are N or N * L or N * H * W, each value in range 0 to K - 1. - assert output.dim() in [1, 2, 3] - assert output.shape == target.shape - output = output.view(-1) - target = target.view(-1) - output[target == ignore_index] = ignore_index - intersection = output[output == target] - area_intersection = torch.histc(intersection, bins=k, min=0, max=k - 1) - area_output = torch.histc(output, bins=k, min=0, max=k - 1) - area_target = torch.histc(target, bins=k, min=0, max=k - 1) - area_union = area_output + area_target - area_intersection - return area_intersection, area_union, area_target - - -def make_dirs(dir_name): - if not os.path.exists(dir_name): - os.makedirs(dir_name, exist_ok=True) - - -def find_free_port(): - import socket - - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - # Binding to port 0 will cause the OS to find an available port for us - sock.bind(("", 0)) - port = sock.getsockname()[1] - sock.close() - # NOTE: there is still a chance the port could be taken by other processes. - return port - - -def is_seq_of(seq, expected_type, seq_type=None): - """Check whether it is a sequence of some type. - - Args: - seq (Sequence): The sequence to be checked. - expected_type (type): Expected type of sequence items. - seq_type (type, optional): Expected sequence type. - - Returns: - bool: Whether the sequence is valid. - """ - if seq_type is None: - exp_seq_type = abc.Sequence - else: - assert isinstance(seq_type, type) - exp_seq_type = seq_type - if not isinstance(seq, exp_seq_type): - return False - for item in seq: - if not isinstance(item, expected_type): - return False - return True - - -def is_str(x): - """Whether the input is an string instance. - - Note: This method is deprecated since python 2 is no longer supported. - """ - return isinstance(x, str) - - -def import_modules_from_strings(imports, allow_failed_imports=False): - """Import modules from the given list of strings. - - Args: - imports (list | str | None): The given module names to be imported. - allow_failed_imports (bool): If True, the failed imports will return - None. Otherwise, an ImportError is raise. Default: False. - - Returns: - list[module] | module | None: The imported modules. - - Examples: - >>> osp, sys = import_modules_from_strings( - ... ['os.path', 'sys']) - >>> import os.path as osp_ - >>> import sys as sys_ - >>> assert osp == osp_ - >>> assert sys == sys_ - """ - if not imports: - return - single_import = False - if isinstance(imports, str): - single_import = True - imports = [imports] - if not isinstance(imports, list): - raise TypeError(f"custom_imports must be a list but got type {type(imports)}") - imported = [] - for imp in imports: - if not isinstance(imp, str): - raise TypeError(f"{imp} is of type {type(imp)} and cannot be imported.") - try: - imported_tmp = import_module(imp) - except ImportError: - if allow_failed_imports: - warnings.warn(f"{imp} failed to import and is ignored.", UserWarning) - imported_tmp = None - else: - raise ImportError - imported.append(imported_tmp) - if single_import: - imported = imported[0] - return imported - - -class DummyClass: - def __init__(self): - pass diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/optimizer.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/optimizer.py deleted file mode 100644 index eef8735..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/utils/optimizer.py +++ /dev/null @@ -1,57 +0,0 @@ -""" -Optimizer - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import copy -import torch -from pointcept.utils.logger import get_root_logger -from pointcept.utils.registry import Registry - -OPTIMIZERS = Registry("optimizers") - - -OPTIMIZERS.register_module(module=torch.optim.SGD, name="SGD") -OPTIMIZERS.register_module(module=torch.optim.Adam, name="Adam") -OPTIMIZERS.register_module(module=torch.optim.AdamW, name="AdamW") - - -def build_optimizer(cfg, model, param_dicts=None): - cfg = copy.deepcopy(cfg) - if param_dicts is None: - cfg.params = model.parameters() - else: - cfg.params = [dict(names=[], params=[], lr=cfg.lr)] - for i in range(len(param_dicts)): - param_group = dict(names=[], params=[]) - if "lr" in param_dicts[i].keys(): - param_group["lr"] = param_dicts[i].lr - if "momentum" in param_dicts[i].keys(): - param_group["momentum"] = param_dicts[i].momentum - if "weight_decay" in param_dicts[i].keys(): - param_group["weight_decay"] = param_dicts[i].weight_decay - cfg.params.append(param_group) - - for n, p in model.named_parameters(): - flag = False - for i in range(len(param_dicts)): - if param_dicts[i].keyword in n: - cfg.params[i + 1]["names"].append(n) - cfg.params[i + 1]["params"].append(p) - flag = True - break - if not flag: - cfg.params[0]["names"].append(n) - cfg.params[0]["params"].append(p) - - logger = get_root_logger() - for i in range(len(cfg.params)): - param_names = cfg.params[i].pop("names") - message = "" - for key in cfg.params[i].keys(): - if key != "params": - message += f" {key}: {cfg.params[i][key]};" - logger.info(f"Params Group {i+1} -{message} Params: {param_names}.") - return OPTIMIZERS.build(cfg=cfg) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/path.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/path.py deleted file mode 100644 index ce98fa5..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/utils/path.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os -import os.path as osp -from pathlib import Path - -from .misc import is_str - - -def is_filepath(x): - return is_str(x) or isinstance(x, Path) - - -def fopen(filepath, *args, **kwargs): - if is_str(filepath): - return open(filepath, *args, **kwargs) - elif isinstance(filepath, Path): - return filepath.open(*args, **kwargs) - raise ValueError("`filepath` should be a string or a Path") - - -def check_file_exist(filename, msg_tmpl='file "{}" does not exist'): - if not osp.isfile(filename): - raise FileNotFoundError(msg_tmpl.format(filename)) - - -def mkdir_or_exist(dir_name, mode=0o777): - if dir_name == "": - return - dir_name = osp.expanduser(dir_name) - os.makedirs(dir_name, mode=mode, exist_ok=True) - - -def symlink(src, dst, overwrite=True, **kwargs): - if os.path.lexists(dst) and overwrite: - os.remove(dst) - os.symlink(src, dst, **kwargs) - - -def scandir(dir_path, suffix=None, recursive=False, case_sensitive=True): - """Scan a directory to find the interested files. - - Args: - dir_path (str | obj:`Path`): Path of the directory. - suffix (str | tuple(str), optional): File suffix that we are - interested in. Default: None. - recursive (bool, optional): If set to True, recursively scan the - directory. Default: False. - case_sensitive (bool, optional) : If set to False, ignore the case of - suffix. Default: True. - - Returns: - A generator for all the interested files with relative paths. - """ - if isinstance(dir_path, (str, Path)): - dir_path = str(dir_path) - else: - raise TypeError('"dir_path" must be a string or Path object') - - if (suffix is not None) and not isinstance(suffix, (str, tuple)): - raise TypeError('"suffix" must be a string or tuple of strings') - - if suffix is not None and not case_sensitive: - suffix = ( - suffix.lower() - if isinstance(suffix, str) - else tuple(item.lower() for item in suffix) - ) - - root = dir_path - - def _scandir(dir_path, suffix, recursive, case_sensitive): - for entry in os.scandir(dir_path): - if not entry.name.startswith(".") and entry.is_file(): - rel_path = osp.relpath(entry.path, root) - _rel_path = rel_path if case_sensitive else rel_path.lower() - if suffix is None or _rel_path.endswith(suffix): - yield rel_path - elif recursive and os.path.isdir(entry.path): - # scan recursively if entry.path is a directory - yield from _scandir(entry.path, suffix, recursive, case_sensitive) - - return _scandir(dir_path, suffix, recursive, case_sensitive) - - -def find_vcs_root(path, markers=(".git",)): - """Finds the root directory (including itself) of specified markers. - - Args: - path (str): Path of directory or file. - markers (list[str], optional): List of file or directory names. - - Returns: - The directory contained one of the markers or None if not found. - """ - if osp.isfile(path): - path = osp.dirname(path) - - prev, cur = None, osp.abspath(osp.expanduser(path)) - while cur != prev: - if any(osp.exists(osp.join(cur, marker)) for marker in markers): - return cur - prev, cur = cur, osp.split(cur)[0] - return None diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/registry.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/registry.py deleted file mode 100644 index 7ac308a..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/utils/registry.py +++ /dev/null @@ -1,316 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import inspect -import warnings -from functools import partial - -from .misc import is_seq_of - - -def build_from_cfg(cfg, registry, default_args=None): - """Build a module from configs dict. - - Args: - cfg (dict): Config dict. It should at least contain the key "type". - registry (:obj:`Registry`): The registry to search the type from. - default_args (dict, optional): Default initialization arguments. - - Returns: - object: The constructed object. - """ - if not isinstance(cfg, dict): - raise TypeError(f"cfg must be a dict, but got {type(cfg)}") - if "type" not in cfg: - if default_args is None or "type" not in default_args: - raise KeyError( - '`cfg` or `default_args` must contain the key "type", ' - f"but got {cfg}\n{default_args}" - ) - if not isinstance(registry, Registry): - raise TypeError( - "registry must be an mmcv.Registry object, " f"but got {type(registry)}" - ) - if not (isinstance(default_args, dict) or default_args is None): - raise TypeError( - "default_args must be a dict or None, " f"but got {type(default_args)}" - ) - - args = cfg.copy() - - if default_args is not None: - for name, value in default_args.items(): - args.setdefault(name, value) - - obj_type = args.pop("type") - if isinstance(obj_type, str): - obj_cls = registry.get(obj_type) - if obj_cls is None: - raise KeyError(f"{obj_type} is not in the {registry.name} registry") - elif inspect.isclass(obj_type): - obj_cls = obj_type - else: - raise TypeError(f"type must be a str or valid type, but got {type(obj_type)}") - try: - return obj_cls(**args) - except Exception as e: - # Normal TypeError does not print class name. - raise type(e)(f"{obj_cls.__name__}: {e}") - - -class Registry: - """A registry to map strings to classes. - - Registered object could be built from registry. - Example: - >>> MODELS = Registry('models') - >>> @MODELS.register_module() - >>> class ResNet: - >>> pass - >>> resnet = MODELS.build(dict(type='ResNet')) - - Please refer to - https://mmcv.readthedocs.io/en/latest/understand_mmcv/registry.html for - advanced usage. - - Args: - name (str): Registry name. - build_func(func, optional): Build function to construct instance from - Registry, func:`build_from_cfg` is used if neither ``parent`` or - ``build_func`` is specified. If ``parent`` is specified and - ``build_func`` is not given, ``build_func`` will be inherited - from ``parent``. Default: None. - parent (Registry, optional): Parent registry. The class registered in - children registry could be built from parent. Default: None. - scope (str, optional): The scope of registry. It is the key to search - for children registry. If not specified, scope will be the name of - the package where class is defined, e.g. mmdet, mmcls, mmseg. - Default: None. - """ - - def __init__(self, name, build_func=None, parent=None, scope=None): - self._name = name - self._module_dict = dict() - self._children = dict() - self._scope = self.infer_scope() if scope is None else scope - - # self.build_func will be set with the following priority: - # 1. build_func - # 2. parent.build_func - # 3. build_from_cfg - if build_func is None: - if parent is not None: - self.build_func = parent.build_func - else: - self.build_func = build_from_cfg - else: - self.build_func = build_func - if parent is not None: - assert isinstance(parent, Registry) - parent._add_children(self) - self.parent = parent - else: - self.parent = None - - def __len__(self): - return len(self._module_dict) - - def __contains__(self, key): - return self.get(key) is not None - - def __repr__(self): - format_str = ( - self.__class__.__name__ + f"(name={self._name}, " - f"items={self._module_dict})" - ) - return format_str - - @staticmethod - def infer_scope(): - """Infer the scope of registry. - - The name of the package where registry is defined will be returned. - - Example: - # in mmdet/models/backbone/resnet.py - >>> MODELS = Registry('models') - >>> @MODELS.register_module() - >>> class ResNet: - >>> pass - The scope of ``ResNet`` will be ``mmdet``. - - - Returns: - scope (str): The inferred scope name. - """ - # inspect.stack() trace where this function is called, the index-2 - # indicates the frame where `infer_scope()` is called - filename = inspect.getmodule(inspect.stack()[2][0]).__name__ - split_filename = filename.split(".") - return split_filename[0] - - @staticmethod - def split_scope_key(key): - """Split scope and key. - - The first scope will be split from key. - - Examples: - >>> Registry.split_scope_key('mmdet.ResNet') - 'mmdet', 'ResNet' - >>> Registry.split_scope_key('ResNet') - None, 'ResNet' - - Return: - scope (str, None): The first scope. - key (str): The remaining key. - """ - split_index = key.find(".") - if split_index != -1: - return key[:split_index], key[split_index + 1 :] - else: - return None, key - - @property - def name(self): - return self._name - - @property - def scope(self): - return self._scope - - @property - def module_dict(self): - return self._module_dict - - @property - def children(self): - return self._children - - def get(self, key): - """Get the registry record. - - Args: - key (str): The class name in string format. - - Returns: - class: The corresponding class. - """ - scope, real_key = self.split_scope_key(key) - if scope is None or scope == self._scope: - # get from self - if real_key in self._module_dict: - return self._module_dict[real_key] - else: - # get from self._children - if scope in self._children: - return self._children[scope].get(real_key) - else: - # goto root - parent = self.parent - while parent.parent is not None: - parent = parent.parent - return parent.get(key) - - def build(self, *args, **kwargs): - return self.build_func(*args, **kwargs, registry=self) - - def _add_children(self, registry): - """Add children for a registry. - - The ``registry`` will be added as children based on its scope. - The parent registry could build objects from children registry. - - Example: - >>> models = Registry('models') - >>> mmdet_models = Registry('models', parent=models) - >>> @mmdet_models.register_module() - >>> class ResNet: - >>> pass - >>> resnet = models.build(dict(type='mmdet.ResNet')) - """ - - assert isinstance(registry, Registry) - assert registry.scope is not None - assert ( - registry.scope not in self.children - ), f"scope {registry.scope} exists in {self.name} registry" - self.children[registry.scope] = registry - - def _register_module(self, module_class, module_name=None, force=False): - if not inspect.isclass(module_class): - raise TypeError("module must be a class, " f"but got {type(module_class)}") - - if module_name is None: - module_name = module_class.__name__ - if isinstance(module_name, str): - module_name = [module_name] - for name in module_name: - if not force and name in self._module_dict: - raise KeyError(f"{name} is already registered " f"in {self.name}") - self._module_dict[name] = module_class - - def deprecated_register_module(self, cls=None, force=False): - warnings.warn( - "The old API of register_module(module, force=False) " - "is deprecated and will be removed, please use the new API " - "register_module(name=None, force=False, module=None) instead." - ) - if cls is None: - return partial(self.deprecated_register_module, force=force) - self._register_module(cls, force=force) - return cls - - def register_module(self, name=None, force=False, module=None): - """Register a module. - - A record will be added to `self._module_dict`, whose key is the class - name or the specified name, and value is the class itself. - It can be used as a decorator or a normal function. - - Example: - >>> backbones = Registry('backbone') - >>> @backbones.register_module() - >>> class ResNet: - >>> pass - - >>> backbones = Registry('backbone') - >>> @backbones.register_module(name='mnet') - >>> class MobileNet: - >>> pass - - >>> backbones = Registry('backbone') - >>> class ResNet: - >>> pass - >>> backbones.register_module(ResNet) - - Args: - name (str | None): The module name to be registered. If not - specified, the class name will be used. - force (bool, optional): Whether to override an existing class with - the same name. Default: False. - module (type): Module class to be registered. - """ - if not isinstance(force, bool): - raise TypeError(f"force must be a boolean, but got {type(force)}") - # NOTE: This is a walkaround to be compatible with the old api, - # while it may introduce unexpected bugs. - if isinstance(name, type): - return self.deprecated_register_module(name, force=force) - - # raise the error ahead of time - if not (name is None or isinstance(name, str) or is_seq_of(name, str)): - raise TypeError( - "name must be either of None, an instance of str or a sequence" - f" of str, but got {type(name)}" - ) - - # use it as a normal method: x.register_module(module=SomeClass) - if module is not None: - self._register_module(module_class=module, module_name=name, force=force) - return module - - # use it as a decorator: @x.register_module() - def _register(cls): - self._register_module(module_class=cls, module_name=name, force=force) - return cls - - return _register diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/scheduler.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/scheduler.py deleted file mode 100644 index e57084f..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/utils/scheduler.py +++ /dev/null @@ -1,205 +0,0 @@ -""" -Scheduler - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import copy -import numpy as np -import torch.optim.lr_scheduler as lr_scheduler -from .registry import Registry - -SCHEDULERS = Registry("schedulers") - - -@SCHEDULERS.register_module() -class MultiStepLR(lr_scheduler.MultiStepLR): - def __init__( - self, - optimizer, - milestones, - total_steps, - gamma=0.1, - last_epoch=-1, - ): - super().__init__( - optimizer=optimizer, - milestones=[int(rate * total_steps) for rate in milestones], - gamma=gamma, - last_epoch=last_epoch, - ) - - -@SCHEDULERS.register_module() -class MultiStepWithWarmupLR(lr_scheduler.LambdaLR): - def __init__( - self, - optimizer, - milestones, - total_steps, - gamma=0.1, - warmup_rate=0.05, - warmup_scale=1e-6, - last_epoch=-1, - ): - milestones = [rate * total_steps for rate in milestones] - - def multi_step_with_warmup(s): - factor = 1.0 - for i in range(len(milestones)): - if s < milestones[i]: - break - factor *= gamma - - if s <= warmup_rate * total_steps: - warmup_coefficient = 1 - (1 - s / warmup_rate / total_steps) * ( - 1 - warmup_scale - ) - else: - warmup_coefficient = 1.0 - return warmup_coefficient * factor - - super().__init__( - optimizer=optimizer, - lr_lambda=multi_step_with_warmup, - last_epoch=last_epoch, - ) - - -@SCHEDULERS.register_module() -class PolyLR(lr_scheduler.LambdaLR): - def __init__( - self, - optimizer, - total_steps, - power=0.9, - last_epoch=-1, - ): - super().__init__( - optimizer=optimizer, - lr_lambda=lambda s: (1 - s / (total_steps + 1)) ** power, - last_epoch=last_epoch, - ) - - -@SCHEDULERS.register_module() -class ExpLR(lr_scheduler.LambdaLR): - def __init__( - self, - optimizer, - total_steps, - gamma=0.9, - last_epoch=-1, - ): - super().__init__( - optimizer=optimizer, - lr_lambda=lambda s: gamma ** (s / total_steps), - last_epoch=last_epoch, - ) - - -@SCHEDULERS.register_module() -class CosineAnnealingLR(lr_scheduler.CosineAnnealingLR): - def __init__( - self, - optimizer, - total_steps, - eta_min=0, - last_epoch=-1, - ): - super().__init__( - optimizer=optimizer, - T_max=total_steps, - eta_min=eta_min, - last_epoch=last_epoch, - ) - - -@SCHEDULERS.register_module() -class OneCycleLR(lr_scheduler.OneCycleLR): - r""" - torch.optim.lr_scheduler.OneCycleLR, Block total_steps - """ - - def __init__( - self, - optimizer, - max_lr, - total_steps=None, - pct_start=0.3, - anneal_strategy="cos", - cycle_momentum=True, - base_momentum=0.85, - max_momentum=0.95, - div_factor=25.0, - final_div_factor=1e4, - three_phase=False, - last_epoch=-1, - ): - super().__init__( - optimizer=optimizer, - max_lr=max_lr, - total_steps=total_steps, - pct_start=pct_start, - anneal_strategy=anneal_strategy, - cycle_momentum=cycle_momentum, - base_momentum=base_momentum, - max_momentum=max_momentum, - div_factor=div_factor, - final_div_factor=final_div_factor, - three_phase=three_phase, - last_epoch=last_epoch, - ) - - -class CosineScheduler(object): - def __init__( - self, - base_value, - final_value, - total_iters, - start_value=0, - warmup_iters=0, - freeze_value=None, - freeze_iters=0, - ): - self.base_value = base_value - self.final_value = final_value - self.total_iters = total_iters - - warmup_schedule = np.linspace(start_value, base_value, warmup_iters) - - if freeze_value is None: - freeze_value = final_value - freeze_schedule = np.ones(freeze_iters) * freeze_value - - iters = np.arange(total_iters - warmup_iters - freeze_iters) - schedule = final_value + 0.5 * (base_value - final_value) * ( - 1 + np.cos(np.pi * iters / len(iters)) - ) - self.schedule = np.concatenate((warmup_schedule, schedule, freeze_schedule)) - self.iter = 0 - - def get(self, it): - if it >= self.total_iters: - return self.final_value - else: - return self.schedule[it] - - def step(self): - value = self.get(self.iter) - self.iter += 1 - return value - - def reset(self): - self.iter = 0 - - def __getitem__(self, it): - return self.get(it) - - -def build_scheduler(cfg, optimizer): - cfg = copy.deepcopy(cfg) - cfg.optimizer = optimizer - return SCHEDULERS.build(cfg=cfg) diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/timer.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/timer.py deleted file mode 100644 index 3de4a16..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/utils/timer.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -*- coding: utf-8 -*- - -from time import perf_counter -from typing import Optional - - -class Timer: - """ - A timer which computes the time elapsed since the start/reset of the timer. - """ - - def __init__(self) -> None: - self.reset() - - def reset(self) -> None: - """ - Reset the timer. - """ - self._start = perf_counter() - self._paused: Optional[float] = None - self._total_paused = 0 - self._count_start = 1 - - def pause(self) -> None: - """ - Pause the timer. - """ - if self._paused is not None: - raise ValueError("Trying to pause a Timer that is already paused!") - self._paused = perf_counter() - - def is_paused(self) -> bool: - """ - Returns: - bool: whether the timer is currently paused - """ - return self._paused is not None - - def resume(self) -> None: - """ - Resume the timer. - """ - if self._paused is None: - raise ValueError("Trying to resume a Timer that is not paused!") - # pyre-fixme[58]: `-` is not supported for operand types `float` and - # `Optional[float]`. - self._total_paused += perf_counter() - self._paused - self._paused = None - self._count_start += 1 - - def seconds(self) -> float: - """ - Returns: - (float): the total number of seconds since the start/reset of the - timer, excluding the time when the timer is paused. - """ - if self._paused is not None: - end_time: float = self._paused # type: ignore - else: - end_time = perf_counter() - return end_time - self._start - self._total_paused - - def avg_seconds(self) -> float: - """ - Returns: - (float): the average number of seconds between every start/reset and - pause. - """ - return self.seconds() / self._count_start diff --git a/point_transformer_v3/pointcept_minimal/pointcept/utils/visualization.py b/point_transformer_v3/pointcept_minimal/pointcept/utils/visualization.py deleted file mode 100644 index 92ac39b..0000000 --- a/point_transformer_v3/pointcept_minimal/pointcept/utils/visualization.py +++ /dev/null @@ -1,128 +0,0 @@ -""" -Visualization Utils - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -import os - -try: - import open3d as o3d -except ImportError: - o3d = None -import numpy as np -import torch - - -def to_numpy(x): - if isinstance(x, torch.Tensor): - x = x.clone().detach().cpu().numpy() - assert isinstance(x, np.ndarray) - return x - - -def get_point_cloud(coord, color=None, verbose=True): - if not isinstance(coord, list): - coord = [coord] - if color is not None: - color = [color] - - pcd_list = [] - for i in range(len(coord)): - coord_ = to_numpy(coord[i]) - if color is not None: - color_ = to_numpy(color[i]) - pcd = o3d.geometry.PointCloud() - pcd.points = o3d.utility.Vector3dVector(coord_) - pcd.colors = o3d.utility.Vector3dVector( - np.zeros_like(coord_) if color is None else color_ - ) - pcd_list.append(pcd) - if verbose: - o3d.visualization.draw_geometries(pcd_list) - return pcd_list - - -def get_line_set(coord, line, color=(1.0, 0.0, 0.0), verbose=True): - coord = to_numpy(coord) - line = to_numpy(line) - colors = np.array([color for _ in range(len(line))]) - line_set = o3d.geometry.LineSet() - line_set.points = o3d.utility.Vector3dVector(coord) - line_set.lines = o3d.utility.Vector2iVector(line) - line_set.colors = o3d.utility.Vector3dVector(colors) - if verbose: - o3d.visualization.draw_geometries([line_set]) - return line_set - - -def save_point_cloud(coord, color=None, file_path="pc.ply", logger=None): - os.makedirs(os.path.dirname(file_path), exist_ok=True) - coord = to_numpy(coord) - if color is not None: - color = to_numpy(color) - pcd = o3d.geometry.PointCloud() - pcd.points = o3d.utility.Vector3dVector(coord) - pcd.colors = o3d.utility.Vector3dVector( - np.ones_like(coord) if color is None else color - ) - o3d.io.write_point_cloud(file_path, pcd) - if logger is not None: - logger.info(f"Save Point Cloud to: {file_path}") - - -def save_bounding_boxes( - bboxes_corners, color=(1.0, 0.0, 0.0), file_path="bbox.ply", logger=None -): - bboxes_corners = to_numpy(bboxes_corners) - # point list - points = bboxes_corners.reshape(-1, 3) - # line list - box_lines = np.array( - [ - [0, 1], - [1, 2], - [2, 3], - [3, 0], - [4, 5], - [5, 6], - [6, 7], - [7, 0], - [0, 4], - [1, 5], - [2, 6], - [3, 7], - ] - ) - lines = [] - for i, _ in enumerate(bboxes_corners): - lines.append(box_lines + i * 8) - lines = np.concatenate(lines) - # color list - color = np.array([color for _ in range(len(lines))]) - # generate line set - line_set = o3d.geometry.LineSet() - line_set.points = o3d.utility.Vector3dVector(points) - line_set.lines = o3d.utility.Vector2iVector(lines) - line_set.colors = o3d.utility.Vector3dVector(color) - o3d.io.write_line_set(file_path, line_set) - - if logger is not None: - logger.info(f"Save Boxes to: {file_path}") - - -def save_lines( - points, lines, color=(1.0, 0.0, 0.0), file_path="lines.ply", logger=None -): - points = to_numpy(points) - lines = to_numpy(lines) - colors = np.array([color for _ in range(len(lines))]) - line_set = o3d.geometry.LineSet() - line_set.points = o3d.utility.Vector3dVector(points) - line_set.lines = o3d.utility.Vector2iVector(lines) - line_set.colors = o3d.utility.Vector3dVector(colors) - o3d.io.write_line_set(file_path, line_set) - - if logger is not None: - logger.info(f"Save Lines to: {file_path}") diff --git a/point_transformer_v3/pointcept_minimal/scripts/build_image.sh b/point_transformer_v3/pointcept_minimal/scripts/build_image.sh deleted file mode 100644 index 7233b7f..0000000 --- a/point_transformer_v3/pointcept_minimal/scripts/build_image.sh +++ /dev/null @@ -1,83 +0,0 @@ -TORCH_VERSION=2.5.0 -CUDA_VERSION=12.4 -CUDNN_VERSION=9 - -ARGS=`getopt -o t:c: -l torch:,cuda:,cudnn: -n "$0" -- "$@"` -[ $? != 0 ] && exit 1 -eval set -- "${ARGS}" -while true ; do - case "$1" in - -t | --torch) - TORCH_VERSION=$2 - shift 2 - ;; - -c | --cuda) - CUDA_VERSION=$2 - shift 2 - ;; - --cudnn) - CUDNN_VERSION=$2 - shift 2 - ;; - --) - break - ;; - *) - echo "Invalid option: $1" - exit 1 - ;; - esac -done - -CUDA_VERSION_NO_DOT=`echo ${CUDA_VERSION} | tr -d "."` -BASE_TORCH_TAG=${TORCH_VERSION}-cuda${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel -IMG_TAG=pointcept/pointcept:v1.6.0-pytorch${BASE_TORCH_TAG} - -echo "TORCH VERSION: ${TORCH_VERSION}" -echo "CUDA VERSION: ${CUDA_VERSION}" -echo "CUDNN VERSION: ${CUDNN_VERSION}" - - -cat > ./Dockerfile <<- EOM -FROM pytorch/pytorch:${BASE_TORCH_TAG} - -# Fix nvidia-key error issue (NO_PUBKEY A4B469963BF863CC) -RUN rm /etc/apt/sources.list.d/*.list - -# Installing apt packages -RUN export DEBIAN_FRONTEND=noninteractive \ - && apt -y update --no-install-recommends \ - && apt -y install --no-install-recommends \ - git wget tmux vim zsh build-essential cmake ninja-build libopenblas-dev libsparsehash-dev \ - && apt autoremove -y \ - && apt clean -y \ - && export DEBIAN_FRONTEND=dialog - -# Install Pointcept environment -RUN conda install h5py pyyaml tensorboard tensorboardx wandb yapf addict einops scipy plyfile termcolor matplotlib black open3d -c conda-forge -y - -RUN pip install --upgrade pip -RUN pip install timm -RUN pip install torch-geometric -RUN pip install torch_scatter torch_sparse torch_cluster -f https://data.pyg.org/whl/torch-${TORCH_VERSION}+cu${CUDA_VERSION_NO_DOT}.html -RUN pip install spconv-cu${CUDA_VERSION_NO_DOT} -RUN pip install git+https://github.com/octree-nn/ocnn-pytorch.git -RUN pip install ftfy regex tqdm -RUN pip install git+https://github.com/openai/CLIP.git - -# Build swin3d -RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" pip install -U git+https://github.com/microsoft/Swin3D.git -v - -# Build FlashAttention2 -RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" pip install git+https://github.com/Dao-AILab/flash-attention.git - -# Build pointops -RUN git clone https://github.com/Pointcept/Pointcept.git -RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" pip install Pointcept/libs/pointops -v - -# Build pointgroup_ops -RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" pip install Pointcept/libs/pointgroup_ops -v - -EOM - -docker build . -f ./Dockerfile -t $IMG_TAG \ No newline at end of file diff --git a/point_transformer_v3/pointcept_minimal/scripts/create_tars.sh b/point_transformer_v3/pointcept_minimal/scripts/create_tars.sh deleted file mode 100644 index 8bd990b..0000000 --- a/point_transformer_v3/pointcept_minimal/scripts/create_tars.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/sh - -# Variables -SOURCE_DIR=$1 -DEST_DIR=$2 -MAX_SIZE=$(awk "BEGIN {printf \"%d\", $3 * 1024 * 1024}") # Convert GB to KB as an integer - -# Get the base name of the source directory to use as TAR_NAME -TAR_NAME=$(basename "$SOURCE_DIR") - -# Create destination directory if it doesn't exist -mkdir -p "$DEST_DIR" - -# Function to create a new tar file -create_tar() { - tar_number=$1 - file_list=$2 - tar_name=$(printf "%s/${TAR_NAME}_%0${width}d.tar.gz" "$DEST_DIR" "$tar_number") - tar -zcvf "$tar_name" -C "$SOURCE_DIR" -T "$file_list" -} - -# Initialize -tar_number=1 -current_size=0 -temp_dir=$(mktemp -d) -file_list="$temp_dir/file_list_$tar_number" -echo Start indexing "file_list_$tar_number" - -cd "$SOURCE_DIR" || exit 1 - -# Iterate over all files in the source directory -find . -type f | while IFS= read -r file; do - file_size=$(du -k "$file" | cut -f1) - - if [ $(( current_size + file_size )) -gt $MAX_SIZE ]; then - tar_number=$((tar_number + 1)) - file_list="$temp_dir/file_list_$tar_number" - echo Start indexing "file_list_$tar_number" - current_size=0 - fi - - echo "$file" >> "$file_list" - current_size=$((current_size + file_size)) -done - -# Determine the width for the tar file numbers -total_files=$(find "$temp_dir" -name 'file_list_*' | wc -l) -width=${#total_files} - -# Set PARALLEL_PROCESSES to the number of file lists if not provided -PARALLEL_PROCESSES=${4:-$total_files} - -# Debug information -echo "Total files: $total_files" -echo "Width: $width" -echo "Parallel processes: $PARALLEL_PROCESSES" - -# Run tar creation in parallel -find "$temp_dir" -name 'file_list_*' | xargs -P "$PARALLEL_PROCESSES" -I {} sh -c ' - file_list={} - tar_number=$(basename "$file_list" | cut -d_ -f3) - tar_name=$(printf "%s/'"$TAR_NAME"'_%0'"$width"'d.tar.gz" "'"$DEST_DIR"'" "$tar_number") - tar -zcvf "$tar_name" -C "'"$SOURCE_DIR"'" -T "$file_list" -' - -# Clean up -rm -rf "$temp_dir" \ No newline at end of file diff --git a/point_transformer_v3/pointcept_minimal/scripts/test.sh b/point_transformer_v3/pointcept_minimal/scripts/test.sh deleted file mode 100644 index 42ccf04..0000000 --- a/point_transformer_v3/pointcept_minimal/scripts/test.sh +++ /dev/null @@ -1,92 +0,0 @@ -#!/bin/sh - -cd $(dirname $(dirname "$0")) || exit -PYTHON=python - -TEST_CODE=test.py - -DATASET=scannet -CONFIG="None" -EXP_NAME=debug -WEIGHT=model_best -NUM_GPU=None -NUM_MACHINE=1 -DIST_URL="auto" - -while getopts "p:d:c:n:w:g:m:" opt; do - case $opt in - p) - PYTHON=$OPTARG - ;; - d) - DATASET=$OPTARG - ;; - c) - CONFIG=$OPTARG - ;; - n) - EXP_NAME=$OPTARG - ;; - w) - WEIGHT=$OPTARG - ;; - g) - NUM_GPU=$OPTARG - ;; - m) - NUM_MACHINE=$OPTARG - ;; - \?) - echo "Invalid option: -$OPTARG" - ;; - esac -done - -if [ "${NUM_GPU}" = 'None' ] -then - NUM_GPU=`$PYTHON -c 'import torch; print(torch.cuda.device_count())'` -fi - -echo "Experiment name: $EXP_NAME" -echo "Python interpreter dir: $PYTHON" -echo "Dataset: $DATASET" -echo "GPU Num: $NUM_GPU" -echo "Machine Num: $NUM_MACHINE" - -if [ -n "$SLURM_NODELIST" ]; then - MASTER_HOSTNAME=$(scontrol show hostname "$SLURM_NODELIST" | head -n 1) - MASTER_ADDR=$(getent hosts "$MASTER_HOSTNAME" | awk '{ print $1 }') - MASTER_PORT=$((10000 + 0x$(echo -n "${DATASET}/${EXP_NAME}" | md5sum | cut -c 1-4 | awk '{print $1}') % 20000)) - DIST_URL=tcp://$MASTER_ADDR:$MASTER_PORT -fi - -echo "Dist URL: $DIST_URL" - -EXP_DIR=exp/${DATASET}/${EXP_NAME} -MODEL_DIR=${EXP_DIR}/model -CODE_DIR=${EXP_DIR}/code -CONFIG_DIR=${EXP_DIR}/config.py - -if [ "${CONFIG}" = "None" ] -then - CONFIG_DIR=${EXP_DIR}/config.py -else - CONFIG_DIR=configs/${DATASET}/${CONFIG}.py -fi - -echo "Loading config in:" $CONFIG_DIR -#export PYTHONPATH=./$CODE_DIR -export PYTHONPATH=./ -echo "Running code in: $CODE_DIR" - - -echo " =========> RUN TASK <=========" -ulimit -n 65536 -#$PYTHON -u "$CODE_DIR"/tools/$TEST_CODE \ -$PYTHON -u tools/$TEST_CODE \ - --config-file "$CONFIG_DIR" \ - --num-gpus "$NUM_GPU" \ - --num-machines "$NUM_MACHINE" \ - --machine-rank ${SLURM_NODEID:-0} \ - --dist-url ${DIST_URL} \ - --options save_path="$EXP_DIR" weight="${MODEL_DIR}"/"${WEIGHT}".pth diff --git a/point_transformer_v3/pointcept_minimal/scripts/train.sh b/point_transformer_v3/pointcept_minimal/scripts/train.sh deleted file mode 100644 index 15abf05..0000000 --- a/point_transformer_v3/pointcept_minimal/scripts/train.sh +++ /dev/null @@ -1,114 +0,0 @@ -#!/bin/sh - -cd $(dirname $(dirname "$0")) || exit -ROOT_DIR=$(pwd) -PYTHON=python - -TRAIN_CODE=train.py - -DATASET=scannet -CONFIG="None" -EXP_NAME=debug -WEIGHT="None" -RESUME=false -NUM_GPU=None -NUM_MACHINE=1 -DIST_URL="auto" - - -while getopts "p:d:c:n:w:g:m:r:" opt; do - case $opt in - p) - PYTHON=$OPTARG - ;; - d) - DATASET=$OPTARG - ;; - c) - CONFIG=$OPTARG - ;; - n) - EXP_NAME=$OPTARG - ;; - w) - WEIGHT=$OPTARG - ;; - r) - RESUME=$OPTARG - ;; - g) - NUM_GPU=$OPTARG - ;; - m) - NUM_MACHINE=$OPTARG - ;; - \?) - echo "Invalid option: -$OPTARG" - ;; - esac -done - -if [ "${NUM_GPU}" = 'None' ] -then - NUM_GPU=`$PYTHON -c 'import torch; print(torch.cuda.device_count())'` -fi - -echo "Experiment name: $EXP_NAME" -echo "Python interpreter dir: $PYTHON" -echo "Dataset: $DATASET" -echo "Config: $CONFIG" -echo "GPU Num: $NUM_GPU" -echo "Machine Num: $NUM_MACHINE" - -if [ -n "$SLURM_NODELIST" ]; then - MASTER_HOSTNAME=$(scontrol show hostname "$SLURM_NODELIST" | head -n 1) - MASTER_ADDR=$(getent hosts "$MASTER_HOSTNAME" | awk '{ print $1 }') - MASTER_PORT=$((10000 + 0x$(echo -n "${DATASET}/${EXP_NAME}" | md5sum | cut -c 1-4 | awk '{print $1}') % 20000)) - DIST_URL=tcp://$MASTER_ADDR:$MASTER_PORT -fi - -echo "Dist URL: $DIST_URL" - -EXP_DIR=exp/${DATASET}/${EXP_NAME} -MODEL_DIR=${EXP_DIR}/model -CODE_DIR=${EXP_DIR}/code -CONFIG_DIR=configs/${DATASET}/${CONFIG}.py - - -echo " =========> CREATE EXP DIR <=========" -echo "Experiment dir: $ROOT_DIR/$EXP_DIR" -if [ "${RESUME}" = true ] && [ -d "$EXP_DIR" ] -then - CONFIG_DIR=${EXP_DIR}/config.py - WEIGHT=$MODEL_DIR/model_last.pth -else - RESUME=false - mkdir -p "$MODEL_DIR" "$CODE_DIR" - cp -r scripts tools pointcept "$CODE_DIR" -fi - -echo "Loading config in:" $CONFIG_DIR -export PYTHONPATH=./$CODE_DIR -echo "Running code in: $CODE_DIR" - - -echo " =========> RUN TASK <=========" -ulimit -n 65536 -if [ "${WEIGHT}" = "None" ] -then - $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ - --config-file "$CONFIG_DIR" \ - --num-gpus "$NUM_GPU" \ - --num-machines "$NUM_MACHINE" \ - --machine-rank ${SLURM_NODEID:-0} \ - --dist-url ${DIST_URL} \ - --options save_path="$EXP_DIR" -else - $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ - --config-file "$CONFIG_DIR" \ - --num-gpus "$NUM_GPU" \ - --num-machines "$NUM_MACHINE" \ - --machine-rank ${SLURM_NODEID:-0} \ - --dist-url ${DIST_URL} \ - --options save_path="$EXP_DIR" resume="$RESUME" weight="$WEIGHT" -fi diff --git a/point_transformer_v3/pointcept_minimal/tools/test.py b/point_transformer_v3/pointcept_minimal/tools/test.py deleted file mode 100644 index 8b477f9..0000000 --- a/point_transformer_v3/pointcept_minimal/tools/test.py +++ /dev/null @@ -1,39 +0,0 @@ -""" -Main Testing Script - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -from pointcept.engines.defaults import ( - default_argument_parser, - default_config_parser, - default_setup, -) -from pointcept.engines.test import TESTERS -from pointcept.engines.launch import launch - - -def main_worker(cfg): - cfg = default_setup(cfg) - test_cfg = dict(cfg=cfg, **cfg.test) - tester = TESTERS.build(test_cfg) - tester.test() - - -def main(): - args = default_argument_parser().parse_args() - cfg = default_config_parser(args.config_file, args.options) - - launch( - main_worker, - num_gpus_per_machine=args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - cfg=(cfg,), - ) - - -if __name__ == "__main__": - main() diff --git a/point_transformer_v3/pointcept_minimal/tools/train.py b/point_transformer_v3/pointcept_minimal/tools/train.py deleted file mode 100644 index e3ed749..0000000 --- a/point_transformer_v3/pointcept_minimal/tools/train.py +++ /dev/null @@ -1,38 +0,0 @@ -""" -Main Training Script - -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. -""" - -from pointcept.engines.defaults import ( - default_argument_parser, - default_config_parser, - default_setup, -) -from pointcept.engines.train import TRAINERS -from pointcept.engines.launch import launch - - -def main_worker(cfg): - cfg = default_setup(cfg) - trainer = TRAINERS.build(dict(type=cfg.train.type, cfg=cfg)) - trainer.train() - - -def main(): - args = default_argument_parser().parse_args() - cfg = default_config_parser(args.config_file, args.options) - - launch( - main_worker, - num_gpus_per_machine=args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - cfg=(cfg,), - ) - - -if __name__ == "__main__": - main() diff --git a/point_transformer_v3/requirements.txt b/point_transformer_v3/requirements.txt index 2c18427..debc317 100644 --- a/point_transformer_v3/requirements.txt +++ b/point_transformer_v3/requirements.txt @@ -1,2 +1,20 @@ -flash-attn==2.7.4.post1 +# Core dependencies for PT-v3 FVDB implementation timm +requests + +# flash-attn is only needed when patch_size > 0 (default config uses patch_size=1024) +# While PyTorch 2.8+ has built-in flash attention, flash-attn provides optimized varlen functions +# that are faster for variable-length sequences. The build is slow but worth it for performance. +# +# If pip install freezes or is very slow, try installing separately with: +# MAX_JOBS=4 pip install flash-attn==2.7.4.post1 --no-build-isolation +# Or check for pre-built wheels at: https://github.com/Dao-AILab/flash-attention/releases +flash-attn==2.7.4.post1 + +# Pointcept framework dependencies (only needed when using point_transformer_v3m1_fvdb.py) +# Install from PyG wheels for PyTorch 2.8.0 + CUDA 12.9 +--find-links https://data.pyg.org/whl/torch-2.8.0+cu129.html +torch-cluster +# Sparse convolution - spconv-cu129 not available, try cu124 (usually compatible with 12.9) +# If this fails, install from source: https://github.com/traveller59/spconv +spconv-cu124 diff --git a/point_transformer_v3/scripts/README.md b/point_transformer_v3/scripts/README.md new file mode 100644 index 0000000..0f89317 --- /dev/null +++ b/point_transformer_v3/scripts/README.md @@ -0,0 +1,32 @@ +# Scripts Directory + +This directory contains utility scripts organized by purpose. + +## `data/` - Data Management Scripts + +Scripts for downloading and preprocessing datasets: + +- **`download_example_data.py`**: Downloads preprocessed test data from remote repository +- **`prepare_scannet_dataset.py`**: Prepares ScanNet dataset samples from raw data + +## `test/` - Testing and Validation Scripts + +Scripts for running inference and validating results: + +- **`minimal_inference.py`**: Runs PT-v3 model inference on point cloud data +- **`compute_difference.py`**: Compares inference outputs between different implementations + +## Usage + +All scripts should be run from the `point_transformer_v3/` directory: + +```bash +# Data scripts +python scripts/data/download_example_data.py +python scripts/data/prepare_scannet_dataset.py --data-root /path/to/scannet --output data/samples.json + +# Test scripts +python scripts/test/minimal_inference.py --data-path data/scannet_samples.json +python scripts/test/compute_difference.py --stats_path_1 data/output1.json --stats_path_2 data/output2.json +``` + diff --git a/point_transformer_v3/scripts/data/__pycache__/download_example_data.cpython-312.pyc b/point_transformer_v3/scripts/data/__pycache__/download_example_data.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10026d61b5f51f279bc4cf20f9820dcbbdd11864 GIT binary patch literal 2745 zcma)8OKclO7@qa++PkqGr?Hc!DI{)_hFB;zkn##8Jen5TLW&3o#S*Nx-m$yRde`iX z^ROIb1QMx)L@N#r0z}0rkRHm3UZE{Q;$qWO4$~kZ!2xcDAR)npnYA}2B|OIR%s=yg z|8wS_ov(tyHbC3QPu{M-*=HnR}8RI(iH~CnN=~ z!4s3B#zU_rKrY+vDr=2tK@(pWk`k0P@60d&_K^(Gd>?YjHV>c@-7Ta`(bXQ8ZjG9B zQC{^l<{WTjds|$Wg7PZYpp)&eZ34XL2Ot@uXbFa3drSV!pa2%Q1uoY16N692I3hfw z;?qP<>jqRzH3x~;ure8FrC{Id00Fx z860#mJ8f=h3%Temd9%A*Y-@CVrPjUKaBJPAT>hdDce!gNQS-W6buU_nM7AVQv^FK|aS(IOq7idIm zoa1n(i_S(}sx|(r_Tc}}zTegLubSHbThkn9G|yQ5)thq+`ooh&x(}$jtYjo2#75?$?l-h}gcn8H*SWTIoySf|a- zK9Ll~xxEzPr9#vV;EB%M9|H&MI;-(TwqZXJ_9l7uvHyFC2iRX!YVEx zB4m+e62Zzt+G~Yep6~`Gd4z4!@oYo*$b1UsS#pW!xIse9!3mhMH0U@^ICv{FK*aivYZ7WLyQEbb?L}IB|n3I01fb#|H#KL? z#?Dov{eA$YY&}o6l0gz~#uYg4oH9o+o?jqcJ96wHs*IH_O3{nF4C)ftHN#bAa3x!) zw9l5G_|*#{J?}|xkJrM()$njFoT!EqweWq_@O`!Lcr`qJJ^X0-srAs_<#aVPSblO{ z2)r@*>g4i7d2&tI_g!c2@^rOxsC=R>bcvC5sr@bg8Gohwfy=uu%azFEYtrG3@ZR%> z&Ku`|`w>4_6}7Yf^F})Oq%y>!JRN z)c>1I1uDUTKYk5>K&0j$tojFk0iK3?fir>1?!+Z-1y#C6*QA3Rq3|CY{!q=|U-kEI zN8P#d*ve?7E3qcs)j&nI^J6RfR>GC8ku@pK_;rz*_#GXi$^NYGv91X}_g0@?8~tl)mZ@AMN0;_C+S{=da%1P3ddF$dMub+E9Se_e75R`7it;rP*_d-kdj literal 0 HcmV?d00001 diff --git a/point_transformer_v3/scripts/data/__pycache__/prepare_scannet_dataset.cpython-312.pyc b/point_transformer_v3/scripts/data/__pycache__/prepare_scannet_dataset.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c24169258853160a6eab862ff0602feeb8be6aa GIT binary patch literal 14414 zcmch8ZEzb$cHj&!_yz%ffh71jBq)iHNb1Y7Y+9m3i=-@(q9sxO5G@&jFhdd$2+%V_ zQ9_1V?w0SCOtd8`U!vA>L}guNDpF#mlFD(bQnkEGF6XW6-eL^y5<9D{(^XuR_#f9& z;?3Sw?Y-B70U(I7T$hiQ(B0Gh`t|GX*RS7u-Tb?vB0B})k6)<^-`h%2e~%w}D9dCX zcGDDfo#Ln=ilaG0lpdleAv#p-!%<+Y*g+aSHl(a6x+O zk~A)MY~MZ-mPRMewTI$k+mFY?F)754OSxP-RNL)?p3i)`x;>hO&kNUnKMCbL53W%&QKTWA=@D3x2mpgemFQ0l`=j0EvhLLj|&_o z(YTo?L)hbE?9~+TIRg1`v;H;_Ea7>hfu@J8^IC~)`k6I)ae-AH1^!xO6O3GGB z20bfN-*Qk?j;CaUOa=5|QZfx+S)8&FzD&86ks>{xOLa^DY0*nBQFrJ87^g{`fK?D| zP_9}7fp9D=1p=xKnI<5_;}QUjN5hip43EHwc>v(}i)zsbF9irp6jmk!j#h&Mu0~O!!ufJz#Bk|s}KuC$wqx7cQa!b zep@Qh;r{)4*+We+vO&^MBr1X{Ul3$Qr>UE|Y6`(N4;jw3!hD(j&FC`eqrYaj(#`<> zDN_RGZpy&H>Bv!fElpkd2~dj+PC^8kz99UY(FA9&elF_c*JzrfuXq_sHVgutG@*5p z@=-e}k)DL_(-aN=0A|}o<7sM=_8SINdaK{5+QXtQr^seijUdTXLtF%@1p-(#hsEJ= z6l7QgBsST}Dia^)V^AQ*A&$m_oXU(uwN9((FfkT zTh%wJXO2E7ub8pj_cYA(+;^8;ot&M#dU^Kp&2zU#Zj9WD-iR(f_fhAa&gDI+`mR;? z%lAFrn@zX2+}Luf^+xN0^x@^(mzO3|)%#aHohyy$^19U%udSUJQcesd-#W8;;%ut% zZRNz-kIO%3`f$tbEg!btZe5fFHcIQzg|m zPu)6wtXEwZLhbd+Vt1OJ9|9#zoAW#|L-0WSDSj8ZJ~T|gu zqb=K`?ccpe8}=wOpex!35=?t+{L*Lt_t(GvwaUbTW4wSoF5vkrI59%j{0wS;Ew~}) zcN6ib8l!wnXu^6QM5^^1sQTDiwV(*)!h&k$W1Kh%_JZnsy{rF7cc5!hVMB9JlrD`!vtf|Q((YZV1$6GwAe5LuA>X~rZ_(tlG0a`esA?$T1r)EDZw|F= zzytc0sG77Cr3n$&8G)@@b>aMkDyCO-LKATow&ygJF0Z;iaBW~=_r0>kQy;y3=k1h# zpW@r6lD^&LzA%L zWYZ4MyjO8FrJLIpBTDm&Yt3CsbJxnQmF86Q;kD)grFq~7!jCR~|6;27O=uEPoQ)6M zRdA#Tb+9Cl9TF(~@%OM9B63J%wOc621Ub7c2PpxCrk@rLVqzeyB)kN95sx07HynWg zFhN-2#{z*wb-|&YS2u(WQIP}=ahF}4nw^>-Ot~AA)<%ued|I-Uf^zp2v~XP<=oA}- zozRYKHJ~0f6cMp&+J|Zh1Yr9Cu@z4cOUln8yhiAKcj?@7$b>0(Q_|W*2nGApBniQA zEa4=^3(;_RN!S6+{C2dYa3_N$We)_#;@m_O^Ugrvorz#ns}c49D6wpaCPb!Jcp1yO zAySJn22+TSMgxI6w1DE14B()qKH;HD%r-nP#CC{&`7ZUj!CG#f?q9E>+!g6kHtnhW zC!5{W{&`iA$@_VIHAFAarlx;3(ws_!(2*TL4m_|2G>6Ep9Xm93 zK;8l`j)}wL%5M|U%p-iS8pq?~!i)I17o+Df>VODLju;Oo5;@NNOiuU+`w@if31lBQ zr>3=SaS*-|m5;MyyM8{QJ}mwSqF;hVQSQBVSSfE_7)+J>XKW7~74v?@;RB)k;r`qE zmugbJ{cFA+#n-bUtZ*q`-DDc_llY}U1RI9puR^QV=f%{q?S zl&^Em*Q@w?KQ^pLDPRAZZ&2|KuAX{*?bI3N)R~m;td7H~Spbhr6r5yxZ|_}vap?_Z z+y1p}hm>uHR=SjJy+1hjqtWk=uKM0c4xLf5o##n;FvEIbXu@HPUcrb+n1h%*g3(ck z@+C+=7Ge~{3beF|1Tmuo`J&iv@~%@5X>tO=RO7`<2~Dd(g+{esh+pKR0WqB5RkOfL z6GH4UUl*DRWM6~J_o*jMZoh)%4+>QRcA#ubKnt+vu{8-1T;zg~SG$2x!etE**)#$& z<6hRI@hrSDvynl=F3^zdRG}|6^v%IXPB< z*N``ktR&1$yNv#R%RQ^^hd6_)WS+9Xx)Bv0Jd;{#$fPDM9Y!_T7Q6`HcGM`3Yo6pM z9m!mLGN!qyGtT5tG(G_?X*gBbbC=ljJU@;uXOIb*dUPNYE^N0G%-!J&VGbP5ySrI& zJQ(6xP=Mp?L@bP+YYh^dY0zzauEj;JF3=`h|SV@la@D9Q^kLwbl*W z#c}9n55t zng)xTnQh@T-**SQqkX4eo7t0Of9KJBY65N(@SdSXTQM#GqiD^z^SJ;aHussKBv*j5 z0zfcRVveSsU}%&L3PSJ_I~o+(W7_q$n@cK zS;c(iwfzf2O6m6LzDH)N%yYf{T03~KT5h*2KA&pZwbYkvJh<}CYUz>bzWa9P)%Mx; z`7O7$-`Kv`xOgH}y?v=H>D{y3wQBF2K6Kydx;ijBFyC|Q$c-Zl!T0*tYIi8LJC;h8 zx>B{zFPoAzohv6+orkA;?z@Vw#%5#ScpJSjx*)w5TO3K&J->Wn)pcO{Fp%M`xn;Xy zTiCYLwbH#>bwpu2(_Pb-A2?hymla3-f@vY9uzQv!moF=O2a?C%N}h=*$ImAn=abBN z?Ic2njOvI_NaGVyfOse!v5B#OHaDW?bF{-OXQoNDj|DGg^I-ReM#+g4%J-rBd0aW`{9Oj&a}oJ{GZa3TUTP1$7I zMlP|t8G1yQg#HB*KrgkuN4=jt1;0V(++ctu>tWa7!5L%Hd(+b?yKIf%d=~`Hob!3h zb{+aXxY3o35L@5RVuJB!on1Ld*>E1VqtFW2(N9wlIGbz$EcT6BLb>Bv*uhDeL2w-CS`3pkcmB^5!wyAC#BM zCBPq9T>$Rt%ce^6Xmdtd3c7>O9KC1ZK7>$PHdQW{$mJJ6wY=hx0S8URXmEd)PFFl- zeE}Z&_q=b^vB5pcw2hLTk!?@*D3|I`Qy!^Jua`Ys1+osLm6^1Pq?UW$yBQ7a45U*P zQs%ITrQQ1XSdn2!pWvWgtf}7sD zJW7l7I+WKj8uokrQ%2diQ7afh-($EaC6Y0>+U_| zrAhX2N#@uscvP~Xl8ahAc~a29o`i=5EI7D91&<4tHh8RJC%O+rdFOlw`zkr~Gp|?R zZjwbeb`~^G3+P}6CdSTzeh%ts9);Q~nQQ}cUD=U+V?#c1AQK{=g~=%r9qh4;ey{0Y zd5>3e&{Zu=!Cf!zu1HW9y#&g1p-@+O_;&1gC+_4Ngfn!cv1uarP(z;Hvuo7V$n-% z!p;US1jAtWMETFQ0ya?UQIXM}rJya4a2yxnsBEM0KoozK-kvZ3=OoO7*dLqlaa zk$>TSB+s*nsy(=tU&F+t+K~Y>x;at*gV-U60Sd;C23W$TZ62iA%;L2Uo3Lb-E>YSG z%OJi#0cb&38HBi{B4 zcw%PGvU{kJNx`5vv1n>MdP7hRP1cef|@~96;ZAG zNA#m5E4q{&2N!qFy)$3?`(K0XzleAMf5TluCrY#d1NY+7Gl}oOg~=dpWfN`~trU!A zrT_|z?K_8IIfEBQGG*H}V1EPLiN!}oV69XKT$6=EhvKo}@QBJB8R&ggwSj-HAFAOU z6Y+rIM5$&TtUI{PD(QuJ9f(VWn|cHR-fl7EiGm-Ym~af35>G7EG8q(N(1T{@w9X#jz8(QyYgj6PyZ5 zkMI1TxoQG3_)G3Ia3xRl4bVT{N#su9eMVid**`As$BeDH$) z=mI67>L42>fO%1MX)X?)%Y5UeL;h8ZwS|U?%u1-v%+ixrc`6+Sq__h}Lp@I}CNmw7 zH(=xrNZ5x25npV9#f!|ZnzbHOnio*DQ=MeJv(^}_b0#n1j@M=WQ&PM6b z>+lo51FH`Y!JeZ`&NZe|VJdHu_aDjKZzo5EA2Z_pqVlz(dZnoT=EYP|%XBZ;q@@*e z;oogo*rSxROdo}-wlzns;;3D9)ZceCB%5AXu3mN~*`AcEH)-wt6o3=gzjp0w3u3Br zTdK5mx-ZSxzuEWozPZge%jVxodHu<%t&7{1_@y_Ndy~b7R+;WkE%uq0QkKg3ZJ$^+ zrLC^3u36W7^+Mmm{^X9A|7Q2fZ>A0nCSN|4JoR?6GO%j>jr$ee`E99+%`*-l&Sd%K zk*^>5=D^nn=60$Y#f zvk<&zTpUQXyp-IsZ~5ir#BwxQ-nVM)hl|q6niKg?@reAtU7zs&hmAp z(dKzj)v#98qExjk)}*TT%pAGDy(?Ypo!_DqH!U1l98sE|U$(9Il^6Pxr$dj7h8-@& z*$Q2H<}C{~(1WMp1K&dEJ1wi_FDynNf}7w6Kup(fU4BsEO;#Vcd1%4--YbiCrFPfS zkWzafS#faAn69cz*7tm1T{J?drW$sqz4fTlEohy21}u5iutt`}29b!$$a;`A-_ zt~$4+omKN)inDH}A7`y^wr}2#v$lBQFG9<{KObEhTAoZ+9sY^)mGuBkRquWjr762> z=KTEbRa?Wt&QGn*tF~ENvb1TTYt_2>3llh7AK9t$N`*-1R7F>+tZTaer!O9vtNp;3 zY<@1q?)ju-&&TgfAAO*4aN1sT^H7rAk*eL9tl71+6&PlwecrujU)r7AvNyT+$hwiX z_0f-vMw3}vU#8-hk2b;8#pjg4=0T$SXZw>ia7Vp(X1R2^Yk7CldtlXh@aN9*|Fdoa zkY9aa2da{`hF^)Tuv!0hUl()CZTOfzY<-drUvvW5WD? z(}39$0{Cn|=QK+Bhjxgc<*)(gf7T5$g)fD`Xe$hy5gq*Z%sI!+Q{XHBXZ;FrHp#5_ z!Pz;-J*RG#f*QXu1>hWj7t8KHqPgOGCC`K|&BI^Dm45~Np1c+nT;*42QI*%i`;}T$ z=Nww_sJ^zK57dPU!rjdG6fJ>Cs0Fw#LCvZl+y$)zLu7-?%MIOG@Y?ELU7k6I`X32^ ze}jjb8}|yO8@$JYQg9a(1}NRY-2|ohjISU-DO*qkpHSNLWa+*7yBRkjxVEy+Bd!76 zWH|>P{OJMZ2ko^!qjNpw`9Ju*9dJWJ{%ioWk%XhI4HbhneCvHGQD4{!i*uG5D3MkH z6mPKH;O*xqA01Cv+uAf6E#cYFJWCX{wSlJ8meDmn!_`aFe2IAm<^rQVXUg0{2!Du8 zENyK>^A~#H%Wou!YP zdieNX19tCIZMy_Q2QTW-3;&r$=%?9s2u;kgHoQm)Q=kcB5wXEY(a3L>|LNiXo%p>2 ziQ3ok_Vg)+8BDYV{@?@di$r(?hKg(;ejctBhz;{>?J!ZAVFF$82BXSfrdor-2w)X? z)dlxYpuWbWfIRYPg9KJc(^Bbf*m|y%}J(dl{o+~o^9#U z3cS?8w|0Hk`XE^=Z-yrgX;pty;R;Tbi_yq2vL=VKE?9lWt7%R;@ET`z=pHU9--}N)f_H(N0e^E^e)%5pN>CdU= zN9JCd_RYQVh{E*INkbX!oqPKc1<5a3>*y#w|MIP)H;%4Tm|qk=O590k`IVRd$I<_q zDNmjWYK6(@I}ZuudM`~?Ri{g;(iPR@e`UI=<`*TCG+mi?m#%|(Pgg!DDPK2X2K4h% z(7Z|3N;%!@Hj=ed0N@~5CuWOC)`eL&$re*373&1&dMRbLJuJi0&n06my<@$TVw^LH UPmJE5GS>gpyKbbIYBFvA50RGU6aWAK literal 0 HcmV?d00001 diff --git a/point_transformer_v3/download_example_data.py b/point_transformer_v3/scripts/data/download_example_data.py similarity index 90% rename from point_transformer_v3/download_example_data.py rename to point_transformer_v3/scripts/data/download_example_data.py index 47db6e6..3b54ab5 100644 --- a/point_transformer_v3/download_example_data.py +++ b/point_transformer_v3/scripts/data/download_example_data.py @@ -15,7 +15,9 @@ def download_example_data(file_name: str, logger: logging.Logger): """ raw_url = f"https://raw.githubusercontent.com/voxel-foundation/fvdb-test-data/scannet/unit_tests/ptv3/{file_name}" - data_dir = Path("data") + # Script is in scripts/data/, so go up one level to get project root + project_root = Path(__file__).parent.parent.parent.resolve() + data_dir = project_root / "data" data_dir.mkdir(exist_ok=True) output_file = data_dir / file_name diff --git a/point_transformer_v3/prepare_scannet_dataset.py b/point_transformer_v3/scripts/data/prepare_scannet_dataset.py similarity index 94% rename from point_transformer_v3/prepare_scannet_dataset.py rename to point_transformer_v3/scripts/data/prepare_scannet_dataset.py index 09d0ebd..73feb92 100644 --- a/point_transformer_v3/prepare_scannet_dataset.py +++ b/point_transformer_v3/scripts/data/prepare_scannet_dataset.py @@ -304,8 +304,6 @@ def main(): if __name__ == "__main__": main() -# Create scannet_samples_small.json -# python prepare_scannet_dataset.py --data-root /home/hexuz/openvdb/fvdb/projects/sparse_attention/Pointcept/data/scannet --output data/scannet_samples_small.json --num-samples 8 --split train --min-points 2048 --max-points 4096 --voxel-size 0.1 --patch-size 1024 - -# Create scannet_samples_large.json -# python prepare_scannet_dataset.py --data-root /home/hexuz/openvdb/fvdb/projects/sparse_attention/Pointcept/data/scannet --output data/scannet_samples_large.json --num-samples 4 --split train --min-points 50000 --max-points 100000 --voxel-size 0.02 --patch-size 1024 +# Run from point_transformer_v3/ directory: +# python scripts/data/prepare_scannet_dataset.py --data-root /path/to/scannet --output data/scannet_samples_small.json --num-samples 8 --split train --min-points 2048 --max-points 4096 --voxel-size 0.1 --patch-size 1024 +# python scripts/data/prepare_scannet_dataset.py --data-root /path/to/scannet --output data/scannet_samples_large.json --num-samples 4 --split train --min-points 50000 --max-points 100000 --voxel-size 0.02 --patch-size 1024 diff --git a/point_transformer_v3/scripts/test/__pycache__/compute_difference.cpython-312.pyc b/point_transformer_v3/scripts/test/__pycache__/compute_difference.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..47b0445dcac80b39efb42d4deba17549b0861ae0 GIT binary patch literal 12248 zcmcIqdvF`adA|b=?M5(MIoBxn+#?~X6z z5UZ`*QW-S@<2Hg~d5k1#hl-U6J@qu4iT|K$cZO-*$w8n*Na_4~eW|IK1CV(|RvSdn+F3d4SdUgXD|OgtLKG3*9L zVZ9iIQ>tOS7nfpHuS$y5y=o~YdI>4k^lBhh4{Jwsy}A*7uYSbPYrqqAjlCv{pv{zq zRu65F8%o8LmbTIcT2Gs4UB4C%*8gS^e#Lv5kIJxN(= zwO>yefqn*MN^hG=luUB=j6Q5e(*iUzDeIe9Zx-#KY_C;g7^7EVbk=)G5#ZRAYD(p_ z2d*1?JdAggBRN0m@sEs-aWv^3ry2JEO;Yr@*UfqTKC+MICTQA6aua@X#Ow2pxQAU{ zUq8*zJ`e3;IXB1F4zW;obXJcD0Fd*S9`I3RZki{Ud_p?0aNH5q6l6+jxjBp(A7LmcgU7% zS58q{uR=2j6gMnG45Z3vcnwb={-E6qnMx|Abf{fQ|BhOg6i6mZ{yyZ8Q>mCTq)JYy z%CS0(#V1rab^*iTckS+^`T{nAmt$OxT&DtKYCujKw{Mi%H~s(GzW^(MQ)bRkp4?(8 zhPIo6+D+OvOKn?tEsQ9U>eY+){{lv-g4%Ctr(kBGcmkim68(2t-9Lt@chc{sNVkt7 zN8Jnyi}LvkuhwWJ;b~<>Q94Z8{uM7&A;r4lSsZqA)zt+QV$fVxBux>?dcOd+;L z+#FdwG{$luq+!@9oI8L;v@@_|H6pNsq(?@%DcFBJoKt4k?PcjSZhNL!jvhHO>E)V9 zXhv$n%aT4n2Q7@jC_ulHe6k>uardw{!D50t5ofvr`Dn;cFVgrzgqlgw{-g|2g?;pd z+=*mibGmo}<>!6$!tfc^yGBMYN%L)=qo;6fI>qb&M9+WYGR8;{8AmhKE5r9)mVa5vN)7 z3C&x0zAR&ETc>0>=+&IwVs+LuhFtLDt>C$63lwlvHgn`;*G4lJ1u z#&3?<$W~m%*N%s*Y?09Im@et(rE!1^iX(O4NR^9XOk)|U!er~>UT79 zDLAt<2`bnN^CF+jK{(OT*}Me{!WorJ;RDzJMrm%THj63kEh0#yFa(DrB0W4%k!j2P zwPP1Q$FNu7qzY=JP#e_o#76i+#&%HS0hHGBdR{vYC+szp(n%>euXsH|xgUYwa*2cBQpqlxrlvmGLGnSEiU&PN9*Or_`0@AAk)hX!fZnL(r^nYuuDu zv)ne|n1xMdsfpLa+%xl<^_byWoWQO3Ga;<_gJdslaoF6BEj~>ivao>au zw=JndD7piyC=VV#!_e%g-zS}sw2x!FG&)NqtwP!`q|*%42P4}3*Ta~^h61Pr4kH9A z&_M*6iCd!4&-h1@$bf;*DOxG$Am!CRuRaS$6`b`O5qK6Xns74`dKsZvr1;1^cnT6B zZAM~xO2!fhEj*=eu?ei@_6(A~u@Rc_dfdYx8hV)8AxQ>FXTl3$uFnsFyN~q`OKKZ% z&d|f4v5kZ7rs!J04XO}uqT`tb+?4i}Lk;xN3_z)VQXY}&06~tkq=-Aa?x;k)OkdQ? z#GQm_gnF(~zt_jHqTN5nf!^rqr$G&Lv121*{<@rDH_N%`VR{4_K=^`XnXNFL7zDom z6aLb}R6!J|=<@q&Qhf%LHHLsE!e0{B0TBSWF;$U(_`E)fo+O<%Q9TUkMVA}{Q4RX3 zXrLt`ZjKh!0q>|tyhcy4OeQoTs)1)w(+{WtfDH6a)VfCj{!mcSJt=CufC5=j<%2pS zwA<&*U`nADQ-%TwwdjDp3$q(TidrPTDLC?^xE`87111vChdL>&7cJ17OQ|JV zK%Qx@R`>Wo>XilhFoZ5{YLsS0gOV1tl2BL{DP(9|*F8)L>SCsXlw<{-Wjf&lTEwgx z#C`>PT!`sS(@lTzjlcDTx4!k-JzXgR71pa(A^-3v4GZHRzqC-Z$Ss}wn(*~Yv2!k= z=dy6wE#&qs={+l%*@;TG8zSRBd?}W>b6OYA$PMemxk5(iv^G7xro}3@&ecbAn&Qsw z^O@1YmU#W{g|cYnk$6$b?AfS&*XIUJruNG5$5xDV-p!hQdG6(yv*kg_{;0lS8jqR^ zrjLe>u4b3qaLu?LWN)2561O?R+HbuPX`DU!gM;zhqA)!Z3=vA=VOCL?y4`cPGIDY$ ztL}rL1;?lAPtPy#OHJn zSc~Nqgm=F+6{-795Ewf5UjAJ7kBZ{ejUTE%Y+E3H*7*s$xclR&*xv4Fb@xrnYW4Q8 zWkX{39Uaj9?1CFzGhISu=VIkzu5jiR;nj=6E0=_F*L}z3&nvNSl+*KF|p zzf<^(S2XRkJO#-~R;~vrTm`4LMsB4YyRN;i^QZ?fkLn^I5d}$Uc^yI~GpcK50x(k$ z&{YbL4M{djB&pPsLO_)2O^nyKWAf=NpPF(UG`uh+HBcg+LCN^Sln9`tc?KmDZ=^I5 za?0jS$&?*%pJFfKjexw+)(nq=fwX{u47{1r-BR%|?h@uuZDT0S zn*)HbDGR(=-_gm^!7*P3;3UOflxPu!lr2?;vQrrYD!@e+K(*h4d1d9TXx2%n){15w zq%44&t)r^q5LPpSLtapJ8KYP&^%ov{bzJ&t+QL0wQgfjKkX=bw$$68nM# zgaB}PJpf50j0N^zq-4toXFWz+x0y7P9g>kJ38oah28Q4DXbsUz;k0xb0N0*NBZ=77 z6Zz9&HRbe9V6+6c32c_cSz=zl7eJ_M#7~WZ!E1m4>jXkTbxj@+x$+<_wgLW+5EMEA z*3TOlk6i>!FbI1okE0dyUHk?P+JuR~rm!EYC-6IX*B#8M5>>UkM4V#L8YRiZiEn5b zpplh`IS|+(AO1;6rZ}!`8TJE`WDnFTrN09-SfqUY7xq6_Frn(u`;&8}v-oW5k2UX4 zegQfO*aB=B_#etSa|Q@8-4NVSF=rtX@$q#2nRAf$)j0nQgdk9noakt}cmYACd@s!kz~BF+ zy0)o5U_#c-1j^t4rHZWnH3bCVQ+@xJs!C^|rb8xy>~^wcam34ll@t^TfYHfGD=9gm zh4XXB5-XW!0d)4EQZCjTpq&;8Ff(X}W@=DShk|_&hy?pO1MnHJJ_BYMj6wS=Q;PyL zrkas|u^aXW!NdwFjq+~tWsM)zUI%Ya257cu<<{M+XTBBs^jAfN94_Q__Ulyp>qb-bJmilGhm$YuiGn?ju z3q3+p=VAf$T=4V@=LUoUuP`(rOnxIe^bLW3V-2i0c&iGcR#h8{GTK$YRjVx4H9cm@ zy54oQODI5=O%P*#^=zS#+awT<&p=<$#7*|lse7i<2t6CTx3wjnMMkotSyj{RsnkPT zez@Xx)@?dszHh7fVB7q;Puf1sUTC>re`bX&kF?M3{{D#=xhrH@$;t_FZ|w`e{+<1| zduAO!yfByXgNyNsx)0huRL{5n%m4$yeY`KW`&6{z)Xb^XimK45)xAeRyNPah3-;1T zucju;Ff-(6k1(RJTfp1;wdNPMv!($F#x6r zJg0^N7gIIBQD#rQ$tf8E$uYQH6^H4Dylemnc@?FXp-xcEpoUoC79Y1?p|& z(xVJ%wKupHz$>oIv~qT*h!?11X7@jW#D@PL^z#aVG8FFoW)`Av}p!IjH-8%qxq zp5#1A7=VM?bu}vF+Nt=78aglo6GK(0?{bPVaZWi6YElxKAoFCpJ9U$~mbAJGpntM1 zw>?E4jLn2Br=S6pMHtc{I(ra)#WVHnye(@(|n z=z&V{dQufMAtUcba^g+UFZ(*Dz4jfXYF>;+x;y$>2N-BsPymd~KWtKa%U6=8@i~3})5_EK73674%G34- z$Wx-RkvusBg`{VwUQVTAm~{yJJn^Jv-F34EoI_xUfIS=LotY{?`Xrbj3(RPr>yCU|KY_hFFA41M9~SyGuY|_)|wg#DZ9|sBUfEur+keJueOjAx)k zkEAz{t;ri>?TW#rXac9s?97Z&|Dar+3r99Cn6+-XeH}Jz{2sBYd9pz=v2#7i``b#sq z>X4-Zd_bb9&&_&0ZGKM&RZU6M_;yg+KcyfUzssiY0s5QW3RO#2uF&t5sI* zm7}0{XiUpQZj{Is@~VZMmjxnsiEzgWV_K$L%IsS;Wh|Qtqo%@`srX9!!-N|4?&+JS z?;ie1$DNL!oW653~R5lAO=VE#1L2Fzg zl8vrdv#$?d9gf?YADXk4%_UKDi9puH%=IhiwKQrjjhV~WH0n&_Z*@A2_OTtaLLZ8w zmf~BJF-!H8qoDK^?U-#}-myQrWB>ektng@*$iH%Q`qWC{_KmVv3FBX%__GtC?YBIU zt#7{;z9?+lFW3((5eK2qn|y&ba;j&wbJ}@rEc@V-S?L|$IYUZ!p6Cgg^R+u z-dN!)f<9l8C?X`->z9ZInW}z?$d4CoUoNVP7S%l{YLNXH;9i1TCZVy=K)5?}Vo6WN z%XTc6)kn+fAC&Emnv21oF%h@rg^%B}ZH?P=ZkT3F;l4=zY(vC0%gj~Hm(LZ=_bucu zW_?t!*drY37P@{XiH%E-K%`vh`=)4>)amRA|K$Bj{&42O2=|h#A zCW3v}5>Y3A*d`R!%zA{vdcoeXMC^Vld#+4qJhI@4Hg+zS3%k1(`viOUed6pFzp-Fs zAO3|{;%3pEu1|B!UG@5ZH=4Wl=tbD4&{B1|oY|tz<)Zu^*s{i<6oa%z<_Zds*@Zzj zF-0x-Jz-yDkZ#2gC_uV0n19AbMyC=sC^g)J6Sc`*P{X0yhSC<8us)^sJG3`2e}#f> z6dXr^_1Sf2$_A A+5i9m literal 0 HcmV?d00001 diff --git a/point_transformer_v3/scripts/test/__pycache__/minimal_inference.cpython-312.pyc b/point_transformer_v3/scripts/test/__pycache__/minimal_inference.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c8c7bb7f983395eab4e2fc70455e3801bbf094f6 GIT binary patch literal 15236 zcmc(G3v3%#l3;hUN%1f0NBu48XPc5mf8?bP*S&3$fJnt!&YNl{bWHo~#emKUv1nz*8}A)p)was4z73`_tgqxP_uPJ$|30 zN95rcC?Rp_KqbC$QLfjX|3v%WYUw`^ZL$5{%G;bz0wP>N0bgX{Pv^7D{jewsD}Ep^i|T=`D&gYrPw&1)N$9 zXQ1jVb#>cC>XPm(b&+;E;(uj69*hPfzOXA89pVK(>gQdeUkHv$uA!LVI&r#Vyweqn zx=#6h(SBZXjmCme$>k5n#yA(}lYAcY>EWQLp>TX878NDIC-I_dICjw`#k$N}J+5Oh zANK^RYe zJ>ipv6(fI1;)SR$te6OYetuLUm8Z{io<8g^RE4ZQQje>k^ajOKpxqQ|Hpp4WG4N$C z>)fnj>K%(jF87}~{fa-UCOIlPNIhl=`eo`vy8lDU%_ywMhldnhL=1=s?{fXNQ5B=O z8Dt@Cj;K=h=A`Tyy{wo8Uo^mbN5{nBczLctPrh^mHl>Hp1eLKm9z%liYdydWhGpys zIKKr@O3Fh-=p<_oJeLNOwg;RjnKsH8Q)h*|ushkp@yd5ijzp-m-^4e(I`vD%Lv&;s&F zI%I|K4Jtu}w3>pLqse6GkaCy_hGRG#$8vhWz@b?YIK%CtTUi4a(hOYzOoG-XaZ;Di zom#71BWZWbYVC3+&dgcXY~6Z`7+x@he|YP*HG6Gev)zAcZ+wqDKEaJ9GVZsDYW@Lf7_1vJuKA745s z@L*V7zCu&zF`HcwdW3-3MN*jaVGhg2B79U5U9k%x-Vatg7L|NphoS)&F{4;86by`k zxemsno*X*Ij|csHm#de=V8dObVIQmuXoXe8Lg?sN#On|HM3IL!`^O>|c)=ALa*2Gz z7nOp3SSv_fmeG^EG$up~c^w0njaC5Sf+s>Le}tCxGD{6+6b#5d~JuaJOHtr3PQ zlz=J=B9(QJ4Z?Pj(y#~<9w>cC79%1sRzR=n7Q9k6qWib~ONFbrBO#ckPyTJE;4IY@#V2nu8YZO2r8C`{O@7n@bg zF@fU+uXK5ohX&1LCgG~e3#j4*M`5HG)Uh~$6@xktNu!FMfQvYMIK$Niy~Ix}f_GFJ z?jk zO!fLqO>+Uk3JVZ)<_T6J!NXaCFpTxkp4A9FC1 zDfZ+kZhv6fewY2b;-3`Dec=bsgbS#z&s1;8)UwaMBev+OxFdOo6gt5s~9{j4h7^tqxQ*A)1E<%;;rIu}@aDwCJ=<#WoSGbMp zrLO7xOn~w;=RrY}ItfiA2_ZDBD?$QJcsOGdjFhWcZCiDlxs~05U1zR#uvC(T?bGky zmhOqRa6+sIPqd|nUYInXPLCS6N?E@7$A@HL< zfp=ZN^^oV_3<+zgs2=Qmu5eJ4a4V2KJ9dPFBXGFPp7U1Qw-7SgNw6!2&jtNJ^kPsN zc8&0tMORZm2y$M34D6}c|oq>MNj&r7@$pHLeax%y>pvVL}UO77Wq`Hg^VhuT(1-(9?8AHrlJe; zQKdK-#f_skSE>);nO|W;&?!ZSaC$=7ZBcB?22U~Pc~!ebi;6`ZGB4@{W9TZMp0G+1ST#eDuaA zWvArxp@}}ZbmY^sL+R2$>gGEw8#}_Td*Y-~BU9dEzEKM0>MaJoxZJP_p6}#o~uD?BXuQuI%JXL-?Q&K*y zpP{dtmWn8A^J9v&8YX(bGE+dUtR+>}GCP}G`?rQX-`*eoZBo{cHN`rZ3iX}XDr3j&dFzGw(^@V z>#4FZ{f$x@5Yejs(Ux8p`!`#9TUkHkasf>ySz?T(xWs(>g2K^YzOzeC5>MB7r?Db% zIw@!N@gxr>6?Z!j=(RA4@@Abga;B%iFiW|Gg0pber@*ikVA#3hr@(L&^rM6;U4da! z`&9VVOmLExatl!i_r5EFgtsFkgh1vi16Njny~=fKf)1^+P=s*pT@mo~tSvy=s1?v& z<+@etL4djK)D}XTD!{?*`idaoj-CXqjU*#AYGolj3atoQD?k}fMkIuAtGyyfEm}cn zqn66Yq4s=QOKFq^hO3OyTvd)EU4jYWp<+egsuQ|*s2}8X8aUdlfrN1Ty&_1RTEXh0 zz}2jjcar65llnDUuhaO0c6a0{Cv*wDCjF$LKt^!YzcOe&-hs2r5Zb-693poU!>uE? zFp~gvIA5!!R>R~PQ0d|R8vcF{EIVQi>64}c2|TB@laCXogzk2uHl9gy!W`<$*J>%Q zfor8fH5AMmgEn$BXx7!uECo*?ew3dt1ZuA zfJ#~B+1i5c2Xu+ZJ0_7b)$Q?C6zg8bpj|LBx3sYhYtK*~QcK<5rtzArNK}M^T6sR!dIzHmJC&p}uSp4~ zCRbvS{~T&73pBGLQHk~`G?J&Kr8JrvezIygo~lVtja|LEG!j*bYS7yn7-_;uvNmA} zMKm7raY5U)p;*3LOL3iv>O>XxUXzu^cLAmbk=;`!YC@wLem>?}$;wdJ+nQC^17g{= zR$%>a2rxkL+eW8h1Xd(rL;esB@M6AYEu~5RX+{Ow-LCbVQ|Z?g(&u(O?FbP{|6lm$ zcE0=FHfL9&GH1JrdZqGX`_iUo(yXA(EKQL?KPgbQLvujz* z?mS$9omtId?#)B4K4ZR3W0G}=y3m-WNBLOOsAcy0TdgFlNA=&e!#fX_S=-;iDn6cX zPfJbESIZLhZ_-zt3Ht<-C`#JldWbkBi1bz5MXiGKZM%Wwx`I_}ohB)6-w1F56ptp> zar<$<5W1{!kdHMPB7vPB!EH#GQ!Em5yo61WC!xaRWf z$VDhv7}Dp4q$|;oaN%kS*O*qg#$#l{Ib9_4a?(+9tDHQMSYdh|k~l~}wJiSJg2-i?B%)508 zORiUoIDW_nFQf`P%!fzg){YKxVcCIix`8*!|NO{5#{YC*yzUI%BfGGWxI^K+q)HSX z(Gdx_@(&ZFAjMKty!yEBQZOBn*n9tQEGF`vxVv?uPxMQ{2=5kM9j?}mVSbz^ zK0Dcbgcn6$0BYlU;&T;eJ>ggYNGg9q0lb){3BWAl!j3h zMy-Ur#Rr#g9IUEnkp&p63N;rHd|Z&YqY%G8Xb*JaUl6>q$L}8lHw5xXk^}2sgx|ML1#3@CdDKly!xn9NimZlA&)?ySJpr{b^&W8kb&ZW ziJ1jdOX60qDoipN6#63AAww|A0mJd~Q-A|+7C}EaI{L;zt#G1cj~*~u>P0wC&Bu5f z*$3to-(1hb>tvF=A>s?+f1q#BG41~}2#a)Mk;3nzvts-#s)h=%6a*1P+?s(BAR}?Hjw|l>01a|~@<`a3vuK5(f zd=z;>vHCd93$?`02q&!|;LQg-4S@=Rmka=5HM~}VZGqRSfMS7p8T20{jz{W_D-6CTW zub9DLOW<-98v~gPfyTo-oiIW0iF2mHM#rT~s#E1_P^{XHG6XP&VnahBFNp$Pi3xZQ zMV|L|ZddeRQTQ-)PvRqr!8Zzh;uIZ-3CN*ilMY5yzzs3JtU<30QjEO8>Egjr#T4g- zY|mM4EHdiepcu%=3Bn#kG2qaFtkF;_R&DLYZmRAu-GnZgfO;Y`;fGkFlOm%^Db6-^ z=6IzTOt^7g?QLa&1}>Y{%O^bXF;yonY7TL_f_5cEPcmqSgA(AwYoVG%3n#!40z^ZM z9Xyk&LQ~4o>Sj#?yauWdgNlhvDsPax1OrGYgU=bK6otHE7<}Lqi;4m6VSZ3(l>;yw_m#3D<9$EWpJJy%Hcuzsk<%mq1Q;cZ+Us6d|-%_2Xf^Z zqx0QUa$BE#a(r1k*3tz_UCL6Iwyc9UO=#Duw!3JrxE7uYXNtSNu$C=Y8&cMWS@r|_ z`}PIb&XjBCos0JjY1fgo^|?i?ZcJGl)7IuCy{^>!Peny~!&jx09lE?OWn1^|rL=7$ zJbx1WFW`L~WiBlfau=cdm6@X!a(zyJI~%brxrp81yUo9plGNZXDQ*}UENX5X}D z);VjG9qxIy4RFe*165Wr(f1i^MkLwMG0%Dy zt@Sh9?BTh-wDp;ZLx5~9zE(d~|L+k7eUrV@tzTG67OeFtK)5h( zZCte4uN|E_daZw|e`edfb=_id>GX~pd#>+E7dOmqeNfzzsb2S9&CQyHYImyIJ=d73 z-ZbaCQ*x*0&e&hae;i-fbtJXx$bCAs>v_4VZ(6_Tti3UEePp)vj$z)pYg(6al-;mg zx6GEzvD21$M@OdK_1@sk!MT#Tp1IfWG~RhJUB7Fg{y?h!K)SwnTL16OpE)XSn6H}` z98D=l(;Ph~$&RLZ$M%f1^jhCk-%Q)=Y3OCs+)FI& ziuSpICj5Dyh2dU+XAy^;dP29<;9_#a2vTm}jdOVPuZJdGzi6H~Xiz z&9ly&C_VG+=1g_1%sLn01^o04?{#mwq;=wX5P3th%+_Y`TenzQH}m2Tb+gCPuB~^v z7Ap5nS~8BhnZda|cWYAZ2W3a^q&{P`U$abEWM}(aACzu7c+Z#GbVP1{?!Mu_it?#f zei6SPmIsF9^1!@t7|5H733&Ood1L3I(R!_Ts(8jSZ*0k?<*MED#;(PNqZw<-4EJ8_ zX6(+;S31UJP8sVaP1Cz!#7(wq_9^?kvFcN2?aa~H%W3DsOvat^6 z*EGltoeS05Cyy?cSI!j8no?D53ssv_Rh!dQTfwC)|!2Q!*t%;{w3rpY6jng*ay(>B=$OxYS1tgR_)>laPnr|?1S{n(xGy@U5# zKk7@bKap;Fak1SqC*FDK!#C3Hd*n0c7tUNrow*`!_2r#%PeHy)H<^Jh+-kANhIYzE?gMl1GGvk@3{XczWd0!bmbTl1z_WS)!;Ork8PT3WucF+rD-GEew`u}+rdoEUTb`LQE!M7^x%|%F zxs&PI&6)1Q_r!bBUGGBosZ{r=boWbgUDu~|jY~L_m+B~Mn{3&UDPAuZdopd${k#OM zd&;o^baO+S+}0x>^v$oo0JEaK^waehZgMle*^=p=8TwBS$?Gq`5>T@75e0Qi#;gdY z!?Rrd}?uLjZLO>gS>ISyrJh_seJaLd?_wpj;GGPk?u{v z7hG@d)AyV2UHoW3KKn+hHz6NPg0R0w_n^#QX2viYXD+GHWgMiJnK%y80p<-Y2OuYz zQwVaHIfBu1Odm#vm}3ZXkU5Sy#Dg5-L9T~MU{sQ1prDM z(>ZT~GFN|I#cC`Crw!!ow0l5%|~PpnCAPJan=jSj`Q}R_)qaj-Q956@0DH2j;kV*0 z*cg4h?hx%S(fz8b*uT#B53Lq|i{l^mSp4qtU$lEkcpimAmn5LG8rk!Y zUKZZRM)1(15En!*{t_-K!o`{BQ9Z^z#5o+VL{vBPV+dQUdOzZYj)xpLuonVu7!^DI z=FQu4rswFfo&(1YDVC#=(U>3|5`>tb{-$d$!oG$d++~t8*oxnNDaL2j6Z>A_e*y@e zl*RuApCuhl(~pdLntg1iXvZ%p(=RE@FDW}on14l8Ju*@y71OW$iG9J*nsT%*I5wpm zn-(0MDM#mmV^7MlC++B-uw^LS+r~GIZx_E=JWXHkoKB|8TIZ?tPZZC_Q(*E0YB literal 0 HcmV?d00001 diff --git a/point_transformer_v3/compute_difference.py b/point_transformer_v3/scripts/test/compute_difference.py similarity index 96% rename from point_transformer_v3/compute_difference.py rename to point_transformer_v3/scripts/test/compute_difference.py index df75b1e..db650cf 100644 --- a/point_transformer_v3/compute_difference.py +++ b/point_transformer_v3/scripts/test/compute_difference.py @@ -251,8 +251,6 @@ def main(): if __name__ == "__main__": main() -# scannet_samples_large.json -# python compute_difference.py --stats_path_1 data/scannet_samples_large_output.json --stats_path_2 data/scannet_samples_large_output_gt.json - -# scannet_samples_small.json -# python compute_difference.py --stats_path_1 data/scannet_samples_small_output.json --stats_path_2 data/scannet_samples_small_output_gt.json +# Run from point_transformer_v3/ directory: +# python scripts/test/compute_difference.py --stats_path_1 data/scannet_samples_large_output.json --stats_path_2 data/scannet_samples_large_output_gt.json +# python scripts/test/compute_difference.py --stats_path_1 data/scannet_samples_small_output.json --stats_path_2 data/scannet_samples_small_output_gt.json diff --git a/point_transformer_v3/minimal_inference.py b/point_transformer_v3/scripts/test/minimal_inference.py similarity index 94% rename from point_transformer_v3/minimal_inference.py rename to point_transformer_v3/scripts/test/minimal_inference.py index 216da66..4869eb0 100644 --- a/point_transformer_v3/minimal_inference.py +++ b/point_transformer_v3/scripts/test/minimal_inference.py @@ -13,10 +13,18 @@ import json import logging import os +import sys +from pathlib import Path + +# Setup paths for imports +# Script is in scripts/test/, so go up two levels to get project root +_project_root = Path(__file__).parent.parent.parent.resolve() +sys.path.insert(0, str(_project_root)) +sys.path.insert(0, str(_project_root / "external" / "pointcept")) import numpy as np import torch -from model import PTV3 +from fvdb_extensions.models.ptv3_fvdb import PTV3 import fvdb @@ -165,7 +173,7 @@ def main(): parser = argparse.ArgumentParser(description="Minimal inference script for PT-v3 on ScanNet point cloud data") parser.add_argument( - "--data-path", type=str, default="scannet_samples.json", help="Path to the scannet samples json file" + "--data-path", type=str, default="data/scannet_samples.json", help="Path to the scannet samples json file" ) parser.add_argument("--voxel-size", type=float, default=0.02, help="Voxel size for grid sampling") parser.add_argument("--patch-size", type=int, default=1024, help="Maximum points per sample") @@ -342,8 +350,6 @@ def main(): main() ## Example commands: -# scannet_samples_small.json -# python minimal_inference.py --data-path data/scannet_samples_small.json --voxel-size 0.1 --patch-size 1024 --batch-size 1 - -# scannet_samples_large.json -# python minimal_inference.py --data-path data/scannet_samples_large.json --voxel-size 0.02 --patch-size 1024 --batch-size 1 +# Run from point_transformer_v3/ directory: +# python scripts/test/minimal_inference.py --data-path data/scannet_samples_small.json --voxel-size 0.1 --patch-size 1024 --batch-size 1 +# python scripts/test/minimal_inference.py --data-path data/scannet_samples_large.json --voxel-size 0.02 --patch-size 1024 --batch-size 1 diff --git a/point_transformer_v3/setup_env.py b/point_transformer_v3/setup_env.py new file mode 100644 index 0000000..952c9d5 --- /dev/null +++ b/point_transformer_v3/setup_env.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +""" +Setup script for point_transformer_v3 project. + +This script sets up the Python path to allow imports from: +- fvdb_extensions (local extensions) +- external.pointcept.pointcept (pointcept submodule) + +Usage: + python setup_env.py + # or source it: + source setup_env.py # This will export PYTHONPATH + +Or import it in your scripts: + import setup_env # This will add paths to sys.path +""" + +import os +import sys +from pathlib import Path + +# Get the directory containing this script (point_transformer_v3) +PROJECT_ROOT = Path(__file__).parent.resolve() + +def setup_paths(): + """Add necessary paths to sys.path for imports.""" + paths_to_add = [ + str(PROJECT_ROOT), # For importing fvdb_extensions + str(PROJECT_ROOT / "external" / "pointcept"), # For importing pointcept + ] + + for path in paths_to_add: + if path not in sys.path: + sys.path.insert(0, path) + + return paths_to_add + +def get_pythonpath(): + """Get PYTHONPATH string for shell export.""" + paths = [ + str(PROJECT_ROOT), + str(PROJECT_ROOT / "external" / "pointcept"), + ] + return os.pathsep.join(paths) + +if __name__ == "__main__": + # When run as script, print export command + pythonpath = get_pythonpath() + print(f"export PYTHONPATH={pythonpath}:$PYTHONPATH") + print("\n# Or run this script in Python to set up paths:") + print("import setup_env") +else: + # When imported, automatically set up paths + setup_paths() + diff --git a/point_transformer_v3/setup_env.sh b/point_transformer_v3/setup_env.sh new file mode 100755 index 0000000..db1247d --- /dev/null +++ b/point_transformer_v3/setup_env.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Setup script for point_transformer_v3 +# This sets up PYTHONPATH so imports work correctly + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +export PYTHONPATH="${SCRIPT_DIR}:${SCRIPT_DIR}/external/pointcept:${PYTHONPATH}" + +echo "PYTHONPATH set to:" +echo "$PYTHONPATH" +echo "" +echo "You can now run scripts from this directory." +echo "Example: python minimal_inference.py --help" + From 9476eb002702af4c18e6c2be5f8709b784c64d0c Mon Sep 17 00:00:00 2001 From: Christopher Horvath Date: Wed, 19 Nov 2025 10:41:36 -0800 Subject: [PATCH 3/7] Various changes to help with code style checks Signed-off-by: Christopher Horvath --- .clang-format | 141 ++++++++++++++++++ .clangd | 72 +++++++++ .gitignore | 29 ++++ .vscode/c_cpp_properties.json | 42 ++++++ .vscode/extensions.json | 26 ++++ .vscode/settings.json | 71 +++++++++ CODE_OF_CONDUCT.md | 1 + CONTRIBUTING.md | 89 +++++++++++ .../fvdb_extensions/__init__.py | 10 +- .../__pycache__/__init__.cpython-312.pyc | Bin 207 -> 269 bytes .../fvdb_extensions/configs/fvdb_runtime.py | 10 +- .../configs/semseg-pt-v3m1-0-fvdb-test.py | 8 +- .../configs/semseg-pt-v3m1-0-test.py | 8 +- .../fvdb_extensions/models/__init__.py | 7 +- .../__pycache__/__init__.cpython-312.pyc | Bin 251 -> 251 bytes .../__pycache__/ptv3_fvdb.cpython-312.pyc | Bin 40740 -> 41080 bytes .../models/point_transformer_v3m1_fvdb.py | 113 +++++++------- .../fvdb_extensions/models/ptv3_fvdb.py | 116 +++++++------- .../scripts/apply_formatting.py | 68 +++++++++ .../scripts/data/download_example_data.py | 2 + .../scripts/data/prepare_scannet_dataset.py | 7 +- .../scripts/test/compute_difference.py | 14 +- .../scripts/test/minimal_inference.py | 11 +- 23 files changed, 717 insertions(+), 128 deletions(-) create mode 100644 .clang-format create mode 100644 .clangd create mode 100644 .gitignore create mode 100644 .vscode/c_cpp_properties.json create mode 100644 .vscode/extensions.json create mode 100644 .vscode/settings.json create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md create mode 100755 point_transformer_v3/scripts/apply_formatting.py diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..43e9b37 --- /dev/null +++ b/.clang-format @@ -0,0 +1,141 @@ +--- +Language: Cpp +Standard: c++20 + +# Indentation +TabWidth: 4 +IndentWidth: 4 +UseTab: Never +IndentPPDirectives: None +IndentWrappedFunctionNames: false +NamespaceIndentation: None + +# Empty lines +KeepEmptyLinesAtTheStartOfBlocks: false +MaxEmptyLinesToKeep: 1 + +# Line length +ColumnLimit: 100 + +# Line endings +DeriveLineEnding: false +LineEnding: LF + +# Breaking and Penalties +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakInheritanceList: BeforeColon +BreakStringLiterals: false + +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 + +# Spacing and padding +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceAfterLogicalNot: false +SpaceAfterControlStatementKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: false +SpaceInEmptyParentheses: false +SpacesInAngles: false +SpacesInContainerLiterals: false +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesBeforeTrailingComments: 1 +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeSquareBrackets: false +SpacesInSquareBrackets: false +SpaceBeforeCaseColon: false + +# Brace placement +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: false + SplitEmptyNamespace: false +Cpp11BracedListStyle: true + +# Function definitions +# BreakAfterReturnType: AllDefinitions +AlwaysBreakAfterDefinitionReturnType: All + +AttributeMacros: + - __host__ + - __device__ + - __hostdev__ + - __global__ + - __forceinline__ + - __shared__ + - __launch_bounds__ + +# Alignment +AlignConsecutiveAssignments: true +AlignConsecutiveBitFields: true +AlignConsecutiveDeclarations: false +AlignConsecutiveMacros: true +AlignEscapedNewlines: Left +AlignOperands: true +AlignTrailingComments: + Kind: Always + OverEmptyLines: 2 + +# Single line allowances +BinPackParameters: false +BinPackArguments: false +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: true +AllowShortCaseLabelsOnASingleLine: true +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: Inline +AllowShortIfStatementsOnASingleLine: false +AllowShortLambdasOnASingleLine: true +AllowShortLoopsOnASingleLine: false + +# Sorting +IncludeBlocks: Regroup +IncludeIsMainRegex: "$" +IncludeCategories: + - Regex: '^F6aJ delta 86 zcmeBWI?u>=nwOW00SNAT%4RNNn8+s~4dPB`NMT54jAE!{)MUEFmYbiFnp5nj$vCkp hPk|jM%?QNBd_dv@Gb1D82Q~&Ckq)g!_99lG5CA*&5u^YB diff --git a/point_transformer_v3/fvdb_extensions/configs/fvdb_runtime.py b/point_transformer_v3/fvdb_extensions/configs/fvdb_runtime.py index 4ebf03b..049e382 100644 --- a/point_transformer_v3/fvdb_extensions/configs/fvdb_runtime.py +++ b/point_transformer_v3/fvdb_extensions/configs/fvdb_runtime.py @@ -1,3 +1,12 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + +""" +Runtime configuration for FVDB-based Point Transformer V3 models. +""" + +from __future__ import annotations + weight = None # path to model weight resume = False # whether to resume training process evaluate = True # evaluate after each epoch training process @@ -44,4 +53,3 @@ # Tester test = dict(type="SemSegTester", verbose=True) - diff --git a/point_transformer_v3/fvdb_extensions/configs/semseg-pt-v3m1-0-fvdb-test.py b/point_transformer_v3/fvdb_extensions/configs/semseg-pt-v3m1-0-fvdb-test.py index 2690364..70b79e3 100644 --- a/point_transformer_v3/fvdb_extensions/configs/semseg-pt-v3m1-0-fvdb-test.py +++ b/point_transformer_v3/fvdb_extensions/configs/semseg-pt-v3m1-0-fvdb-test.py @@ -1,3 +1,7 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + _base_ = ["fvdb_runtime.py"] # misc custom setting @@ -89,9 +93,7 @@ data_root=data_root, transform=[ dict(type="CenterShift", apply_z=True), - dict( - type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 - ), + dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), diff --git a/point_transformer_v3/fvdb_extensions/configs/semseg-pt-v3m1-0-test.py b/point_transformer_v3/fvdb_extensions/configs/semseg-pt-v3m1-0-test.py index 20aa775..e7ddc43 100644 --- a/point_transformer_v3/fvdb_extensions/configs/semseg-pt-v3m1-0-test.py +++ b/point_transformer_v3/fvdb_extensions/configs/semseg-pt-v3m1-0-test.py @@ -1,3 +1,7 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + _base_ = ["fvdb_runtime.py"] # misc custom setting @@ -103,9 +107,7 @@ data_root=data_root, transform=[ dict(type="CenterShift", apply_z=True), - dict( - type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 - ), + dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), diff --git a/point_transformer_v3/fvdb_extensions/models/__init__.py b/point_transformer_v3/fvdb_extensions/models/__init__.py index bd5f246..984658f 100644 --- a/point_transformer_v3/fvdb_extensions/models/__init__.py +++ b/point_transformer_v3/fvdb_extensions/models/__init__.py @@ -1,4 +1,7 @@ +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + # Lazy imports - only import when explicitly requested # This allows ptv3_fvdb to be imported without pulling in pointcept dependencies -__all__ = ['point_transformer_v3m1_fvdb', 'ptv3_fvdb'] - +__all__ = ["point_transformer_v3m1_fvdb", "ptv3_fvdb"] diff --git a/point_transformer_v3/fvdb_extensions/models/__pycache__/__init__.cpython-312.pyc b/point_transformer_v3/fvdb_extensions/models/__pycache__/__init__.cpython-312.pyc index 215f7ac7a8c097c4c8289bfbb4e4cb59b9eaab13..d7ab26dfe1c76de692f4cf9e9edd9e9430ee550e 100644 GIT binary patch delta 39 scmey(_?waMG%qg~0}vSS$z_T&PUO2Qz{kL&xkCE_msulw5gSkj0J~oZg#Z8m delta 39 scmey(_?waMG%qg~0}$Nxl+8TAFp=-B02>33<_hf#TxN~zMQlJB0N{-Z3;+NC diff --git a/point_transformer_v3/fvdb_extensions/models/__pycache__/ptv3_fvdb.cpython-312.pyc b/point_transformer_v3/fvdb_extensions/models/__pycache__/ptv3_fvdb.cpython-312.pyc index 0239e65efa620750e9250b2e56d858bb030777b0..f8071c67e6522c1e631360fefda5ce17005dc504 100644 GIT binary patch delta 8501 zcmaJ`3ve4nnx3A~=q<~#El0L2zv7oj>^w0EfyBWdU`uc;dwVQY!E$@-1?RT5;u@eraeH^b9gD#sP<2;u|KB6| zA;M0TzUl7&@1A~q|KESNe)5|5&Ff;`I}V3cfTudIJi6oLaGqz#ED6FXK@kFiLKHDZ z`ow_PCk3QFL%=`;3zkYTIUvK^5Ht0e17@O^SppU=vj(jEZOgpveU7k5guc9hlWWSc z{61H}HPKc8M+L>ET6;>_yrx37`B35ThZd)Yi|inBOcyyV zc~Iz_UTATaa{YV|xpGBbE-C;~VXml*i;6(x&K3E%s2D^uazv~`G+I0$D9I5xD|nq! z5P5S%8!Hb;GX>RHBTyrZT?)vZ!U^)w#{I&e)G6#I8QBbwnKIL2hGs0!inwNi5GWP} zwfI>*BLs@l4U(BONt)CeKcJacgkrH!S4`Cm;ZQQw?kAe5KSWc}Pz;jhl{C@c5lVHZ z!-jg|u|XTu4&W!$0k0$hsH9Hzl-yu+p|F6B$g_6jW3mVVm1zNj6Ty;&8b~BjcvSdO zBo^1W5kOZ&oiO|+9&;u&{xi1O*e01&0~_E zej!2>(@{sjuE1mj9Et@n?|xyB1f1++<2kWTWLJDM+4Cl+=dgH4j+3ZxNC*P0_2-a5 z5!t_*wpyg#2I%+=seHC6>|=d_;+djiP-MmUoatFPqbmftd0;LcftqG^*nF!gqs87b zubW8Euv8HXTWq;gCVp$^B74E|&%Oiw>dsVxhP#{M@um$4Wgw>RsytMwFAO_kJ!ey* z;S`j#C*mrbeMgJbPD@#jZD*-JUz3vq{VLVW!C*8VO$CFr3_3x5>_giSaj-r1Dw`Jz zGNljMy+*ze_3Y>NDp4kE#O}2wNas%Wnf)VDkREa56Js@2oXP$(&u{c$ zayD~28(g^Vnqe=EDm0_e?kpir*6n@Y&h)K?E6EVj&!11-@S?>CGf=}63@V8*Oc*S$K>b)`Ssyx}()I-H(+nwC70sxx z1~s5mN~Wl0z@?z|ScUhj9VL!nFdPddlVLR$3kEL`+6<-KHd=u8L!irN0|7^p>;m}a zsPKg-%I2|rVcAOZo9q%+~A*}{nsO{zhpsozT5uqn>3U6@*pa2x8G`$B`k{zM|C zlX8Mx-y;-V!wS5&=(f|&ws_y>th(UHVbvs=Xa}}0MdK;D2|sr`0`f?o7UT`xim(k} zf-8{6VQ_c=1=wGp%9v3o^No6aqh;lzOIILUj5=M|qh+;AuS1frY%bYk<)&+k^5t9gmF-S{ zR(nz$nw-xA_@U{us7aCVJU)M#6bh#>J6|*5-VVkKJ`ECngFZN3`ye*KtqvJG#pWP) z(8U*i!}^Y?Ks1J@?U`b@tbo^FaqV89El}D8AtuYwuU{WgRFLTLnU80 z%ulltjA`&0{}G$p?;TPK6j7BF34R6@ws&|N;Z1k|Vp3(r3?&xT7|G{`V+F%8DK-c^ z%nC%K5cxU)tqKI80h{6kv?~yGLU7^&%mgQ3T!k6qYE7locq*ERw^Lje&2EVg^aZ=s zkdmYa)Aq(iqWGG~UbOhwSM_Vyb93G`WrKvq$oBNbNF!n2&1)X2gu3m17tNu_F>KO| ztp$~69|RKNP)z+EaqjHh$8C__1M2$H-H)jU5gtM~4gfxunBOVjpbwpDr#ZxFIa82b`5m54*s!TZ+&Wl0gB5;(;^S`yCaKXXf{ zoa$LfcW&NO8 z-SWTW1yuB9?@Rw_wM!%$xw>U- zbEe_Vkm_KhsZciX&px%b5gHmcuCtSEy!Hd@s(D$N*wcws z*$3;2+}E7|*0I4Sne~yg+3S7m+}c8R$9k(ZXULW*LpHNB>q{pG)98-RJY-hjbbPO< zz@efChLii;D@Mc;DD2sVQ_MUYX23Mf-QXv?S*Wy##W%dm4!4)fZm1o`5jfq?61|PP z-G@kz%18CL?ZqU*>)%r#vv=Apw);RwllCcH^d!5|9(Oay^RSy%VyYA22D z1sq9t2#zGU18K&+aCU@BCr}F=1x1&jStAfGcLzi5sZ^YDckwV5n|l(GNK#EDH4)B3 z*dI-X0IGQ*eT=8U`>8P+!L%WoQXxv;7ghIDJcUtYK6|C3y6zDao2aVy;sEB1q5gg~ zuFzAMk;1SqF!o(XPaO{e_-=&$Mc;^gla3;@5wS-$c`9GQssVs7keP~a! zw>LGD*Vwn4j%TBRhi|VWQTD>^zw=%-3NG(Re%+WPI(o^6&Yp1@6QlWcAF?@HE7?n% zU%jL3{h}G~7nNTwT6C#s(WuAwW9wtqGxisy*B8Eh^BXsh?2KHMM0fGHESD5~Z50Yj zhwDd+iccFZdm1iz8pb8Duwcw4xQgL4;qsiy8_qjD@W=exu_~c>?$v7c>n(exb)pM8 z5yDP%@lK@YY<-k8S7&>$>{8LPQIGe>=EuxutfQXl(V6wn1gj`-Ve12nWl@oyGqB@LC9J0gRK{?;}-`Tk<_l97oT{n32=X4c-KQDLt zzltIs0$Qqm0JiGq-Sts8>dwZ}MgYycM!_&7b%3AVG6x&ztzXt{1#OH@Hix@>V-k%L z_xS7%57|uQ2*aO<96tf4mVBCdcGXO~gjl+35cb6p$K^`82peC5unb_L*-S{doISsL zhD`vb(G{ozHy69_CV_EGS0YSI0H+~$fDkdZp(r1LcY8UezCpm)gP#m_XOKCQwqPlb zO4qUTduI9cXcP($=4ThQDUzW3LsYSd?DIX(=w=|cwL@Gq{n|k%{1`U41%VTtfB(9C z=3nBYLHHPFs|gbP^1v-ypIk8}!0VSq{52BTJrYtc1=O$RVCk1l{3gI9SLy7=7N)V= z0|oeu_p=j&6+?TE-1S=`J||_v z-E^7iBH4I1-5B`~l4Ap{L+FI2$Gv`EFhnXiYrgV2yB z^~L(B3Hyg$g?)wqLMa*B)>Kpxh|Fz|^E={IF5r zs3`*6wF}8-Ab~#K?wN4%qZKuy{-#lH*{H7;Zq(RAi5A`G|2}ckJbqtR(kOS5YrDzB zT^Tkv%|eQy0H5(5IR2I2lsCmdBsl&3uz?uCK(fYB#2)VQF0=C~TNMu{Ac)Gie_2uc zf&ZL2ljo%z8{j7=U;5wrtBE)?N&h?O>Z$ZEC+zvXN{$J%47CF>>RaulqfYL*46k7n>qcx|lOO4{}*{^Y>JWyQj{} z1j&2TRr`#j#0E7XGPOg$v-cA%Pz0ARE&GcqC*5$-v_>+7e4M5BFWAYi3ry4b$f-z8 zI2+rbxv95r7A!>QL0UZmQlwR0E~|7(iGw%ml)7cGVXjW8xpMtAg-F@kS8Qn-qw1kW ze8&F-KKIF0`ptn8oM}Gr#RcyI_!X+)bore^5#-zoiPUiiTg)Bo39WV&6(1TNA=Pc_Pe7_E?Lbg?|mohh8bQvEWhspu|fO*R}nmx zOaI@o^JEskm*XGjAx9_nwh922wHnh45q2VY*~JI8uRDn{Iae0t3Mi_XLhAxtGxW!k zL!YvTcv=5(_jc}AFo>VR{|?N~Lofnpw&gfYc>6z3(F3CrEVih9U8!lOWBn0R5$4S#0iub!$to@7Gy7 zT8e_))xws&*fJT^9BMoqoZLhm?97AnNi)0n;LH+k7|67P4m6AjtNLl$`cS)Eo3SdK z4Zoy`O>?OOKZdrRlmK|p#_k?&CT;Ac;W;B$DNjIB7i25xr#`s8^7sbV*tx-l>rd@aC+Uk7zsjVV5bGcm>Iu?AB` zD6NN-CiN%!X#{7YuKBoYHfTh6!S@T^;Cn0PX^z$xC_n(YHQOA zPv?nxpyv7n2>?{&w44rp`IAa)ECBGyJqQByA&Ma<@RREv!csE=4=G!*k9NpT0i2V) z3;6SgW!xFT!KhS#|AyMyVDer@?SBB+;aCaP=u$U z2g$9FKzC*pO1)#ETyGs?YtMLHRkfql_4vP&zGGI;XZy~)tUI<@XPafu>Du9StoLk# z1AQrP&0j?~vM0}a8{UEv&91Xq{XM~XMW`4NDn1Y@ z$BgW=$JOGvL@G{fJ+)(a$Eo0O@T!3Mt6i-7i5)~`Z#>~9mF)5pv#K99tePRbWLs4$ XyfwqLs#Uo}QdN+chzecAGT&|8r%@ z4|;l!{cC3af9B5o|2O~t&)h%0t$Y6ay8NH!VmpneNf+P2pW3DphyHWa`Z7NDDhuY(8PbunO{q4>S<6IYT-M8WTIkyP7#nZU? zW)HM`r?s03DtNCF;FeD3Zmc}2uND+zjX)*Piasdq63&oQ8;=P6`dz{ilHnBt&zRAe z2}Uws(xFKOAy}*vl;TTTMF_gnHF^_SXiyESyCW%8+;*rhrl{ifcy}VM8rr&(srCR- zO{;05Z&Ntc1!ZGjn5MeJv2@TdlXz?}5X}P;AT%F-k_1QvwXE_Yzj zjRb|M6Nw!OB8^&@L-JPeMyyBRKl>OnrLpg@R;ihtm5SJ#4bECwR7|p@n4{98LX^nH z6L~?43@Z&R=AD zQg-%$uZ(q=yaByzkj0mzOJYV+2=NVpwRjZz8F`PbxgN$$5B8yHeYRX@t|TVbYCb5M z1D5b*_O|&awgioIHN@i$8xry%_Fc|kFOWOl>RZJf0Q>bbRg}US2 zsZfae;KkI>%(g+2&mOZ?)|X*})(R!n912BZ;bbxtN}{?d_;bIe;5zo9ty1@;!2V<_ z)m1KF<$0eHC;QpnYwTC~%cW`@sD^n80+Jt#b6873odYeY8aipWOfv%a7kG)CJyGyD zai-_nPwNItP`VUJ1(Hf6RX{H4GF`Bt7D&LR8sdjj{gguh=d6l7ayS*H^(e9m$y_A! zkSsv52#9L!33qlXawyh)K;g&<7&*dd6LQ%~=Uie>C!DLvX2eY(pB7=)Vk91vFor^M zA_B?_VACvYGPi|~DzrU8dsPVy07%y-Q`$K{P=? z(iEN@mvB$L|L3J|Ju6q&ykg)d>3GKuYbhHM3RBI#=rb#7)hzeL}3l78Kum?+P zk*q^GQ*XFG)R#!aGz?|IFwOq7l^rNuA#o3~k^N)oyMqXVi`FS1$l0E5#_{#t@f6*H zujAI&^a4>OszkfviuOJPLcpBc>a-O_%;4Tq96zds7o$oz zm8ARF_iN^j!{A2E$F2BwDkGU{(oSqdg0U~T4+!kvi6-@xPqHfW)y^%+Sv0RiY`C?| zF5anGv@_jTdsbJIQ*u8HI00^aQ|Y6T`E((x4U|`NeR7$jD7X|z(f|auPu5R&1sH6e zF9Tt+>j38h=TJ=EOF#%d?-|f=UXI z-vi}Oz%+H+DA2Bo(O4p!`dd2%XG)K-7whY^_>J{RP{--xGuF?pA1xh}9Wqh$vJU?B zia|EOuL!@ABFaW6n-nQ(=bOL`o4|5GYjhH*MRoFw~SvDe)AfF%;FVn&XFhLtTm(mXq{o`tdo-bh_7ctSK2~OXmF| zZJYll8N?XY9&pepM*SvsH+3mtIV5-YLU4|RW6CsBA3SiFZzVbi;+lOP!O}@2-$H_} z1e{+E2{XKclZem6k55TeffOA( z5?||LeT^U3VHk7chT8c?C7p+@c$9~x2WUt(3lJ0e% z(Zw&pjLJ!WdT6afN7i$h%dJbafzPgALfTlk*(nbs?}r2PuHhkerSVxf&@ z(G8=7Ce%nzI8$?z{d|N?anV&4)xrL@x6_AVL8|_+yq^xS7dOV;XQ9rMPc04Qf$~8% zd(#}k*se|T;J+aMEg*16c@uvYO!2I0K`7aEC>o7HzL}u1Lix5;^)V$*U&GFSkK`#J zs-*Pyh2t`~fvzwl=>gu>2}dL7bQ+~_n^WA-swE1Msw)&;ol3Zfh8ciyx zq^e6L_r;c&Ndgq_@6Rr`0?XrzkvZtmBla9^Ji zm+9lk(nr8ypJl(?+_RF0-TR?&LWJ<;b~lfzJVL02;Yh0cu%azIlsej{B#Y>QXo z>-h)v#Fi!GU)j|yC&vSAcxwe|We;wBuk62^zAMhas37FE=z!L@=*D;%8b?@k+pjiO zUGuEC>RB=3@}6rQYGrLNu6jN7gMl{&uI$`@L$C7|{=U#VG;_pNblz~yQ+L%TG) z3r9u4Q8a219G-LeL;2?qePEw4S}GLHyHUoT+`iALz4VIh-Yc{3eIF>iD_ykXane$E z&A;xdf8B_y_}to|wXAi-LEJ9iNNJ;5Pi67v77yDq>hQV$kmTQR# zc9|4^jnIJ4*d0I~fomK^2ohNc>SSXk5*r{AivMIm><$Y+~pA;QMwj7vZ|-B|SU6`vb`k z5W^zj5%Rr~xm!UQ|=4c0$AW3V%227_kjo{c6fss2vX^CLw*w%zBOrB{Qdn-KK#}S2K8?a|5 z6iv`0VJd%3m|_2mdw5=`Yi@@uW%6N~dn5Y4IF=qs9TNWj+k%(BiLVC9Z&7n~%oHh` zcT;eSmd_jb;nyKnYKTRd2>fTC;!J5whK#X%P|lx{F~Z#pWQr1GjYh~CP4I7qx}3k0k=>VxEdx!JZ{a`;+P=};Q_op zv@FoF#Un6~s_%{UQQW)q7LvaJ3FJ*cF^?zMnwc}^j8vO6Yg*{kZgZBhhMuBwZVoir zfHSxLEU;ZH((@XC8DY5Sz%2cyukcmY325UhScl`UMIs<}2@?K#xYh+!AaP^I%uZ`U zNe;gMVK2wMH1Vkrm4zm7v( z4n^VwH=cYJ3g|;^o~#ocDX$p`G>nv%jre9jkYGRTYh=>FrE_k2=ZL$=t-U0BO@pIM zLMpBppRv7wRNtVSHvy?|BM1ncj#LSdYK*!#O068Fc&^EzG+(uGE3Jt~5}1kbC@;!N zAHb6vJ$GWrVpK~yB!0~Br+u`Fkb~*QL=`@E_ocMO@m*_bAGb@ApJBi^^Ya1sqrC4_7~%j^rlrjaha%l{sSgTC90Xl%_Q@V{LPttS zI6ZJgA{+2(84@-tOffL^Y^#detTIRFjJHJ)47et0uOZ{$82fqu!rk1z87D)IqYbap zr{5Tuuo1~VM9w2HI3g#1DsVK^>^^!28$McBf73fpoOw$o`tiFx9AFY?Q2Y3deF~s? zc2xoRExTfbZx51O2>BIB>*{ z^V4=D2gsxBo@2#!)V3Ba9>db*^oe6{8zC!R^Uw{ljoon~0`!>^+c+&{`42D3l=eQn zEK~a4!^^YW8La-B1zCxD_WHcLz!BcZK28-d+x(sBmmaxD9Q@P4ZV1cu6+8vHC%x~n z7s%`~d_SKKkK`EAc$fpzR%3ZF677DGE%?^X!6B%FGnvBcLMa?7P%{C=@a<^Q(ou*f z4Sdlhz*Q~X$?kYE6^=&~x)u4{e_X~=4*r|5x&Vo38Bo=FcQ_U4!u#U+vX+39V6$22 z-LJ@Ucf3>EAr`z^fU!FvalXkKPkG$$;TY$U{5z7JNKCBvlsCT74}{-6uusk|AuE`N&0UJ`=GszC6plcU@%3^AP_9N) zDBSmAZ~8nAfm!oD*qQ7;R!0`I7uY(u-MY?RFpw%Xe137T1tobpvj7MF0ah!q#1&VC zC9b_{ETN~RwNO&^eThCg6Wb6-su+nWVLE=V(u88ItZsOQm9OO>)+y^4uJiCH$UOx2 zuj8JfF@1hGUpI@p2FGU-$O9rdIwzBW_*n%Gwu@bP(mlv|$Dsb12?-y&5$D9eZsiE- zpmQOA-e1Wb51b?Lt4VV+=<1)LKwky2ZQADxE#pG>HX-J5;9|PET!U{Nvbpm9dib;B ztdI<&6WhVqQ}vD#|47wL{7j)mzPe@X%2Thh$DUqdDLG$#{?PeZS6wsMJ5Sf;p;mb3 zKo#A@{`7RI2Ngu$0is&h_4dJ~(Hcq<6t16EJX1|(v3<|XaG0Zq;*k`bfsz!J1D}*0 zc&2m^t*z#_@7UH6YTD7XuC3|rwl&(vzbz<)sMYof?huM85KoUN&!N7;DtE_xIb6Tm z+<>8)R`l|ZId{=buuk|BlBIos&O-5q)qEM(M_3sO_MC1e5b~ zfsmpRhx?&cd_~cpNBwN}v$Lzt7}j`&%hokBg&%p1YijgAswPNhdYbk6AL|Iv{|BZr Bcnkml diff --git a/point_transformer_v3/fvdb_extensions/models/point_transformer_v3m1_fvdb.py b/point_transformer_v3/fvdb_extensions/models/point_transformer_v3m1_fvdb.py index 111f61a..62d22c0 100644 --- a/point_transformer_v3/fvdb_extensions/models/point_transformer_v3m1_fvdb.py +++ b/point_transformer_v3/fvdb_extensions/models/point_transformer_v3m1_fvdb.py @@ -1,46 +1,52 @@ -""" -Point Transformer - V3 Mode1 +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 -Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) -Please cite our work if the code is helpful to you. +""" +Point Transformer - V3 Mode1 FVDB Implementation """ -import torch -from external.pointcept.pointcept.models.builder import MODELS -from external.pointcept.pointcept.models.modules import PointModule +from __future__ import annotations import fvdb +import torch +from external.pointcept.pointcept.models.builder import MODELS +from external.pointcept.pointcept.models.modules import PointModule # Import PTV3 FVDB implementation - use relative import since we're in the same package from .ptv3_fvdb import PTV3 -from typing import Dict, Tuple, Union, List -def tensor_hash_simple(tensor): +def tensor_hash_simple(tensor: torch.Tensor) -> int: """Simple Python hash - fastest but less robust""" return hash(tuple(tensor.detach().cpu().flatten().tolist())) -def create_grid_from_points(grid_coord, feat, offset, voxel_size, device="cuda"): +def create_grid_from_points( + grid_coord: torch.Tensor, + feat: torch.Tensor, + offset: torch.Tensor, + voxel_size: float, + device: str = "cuda", +) -> tuple[fvdb.GridBatch, fvdb.JaggedTensor, fvdb.JaggedTensor]: """Create FVDB tensor from ScanNet-like point data with proper batching. - + Args: grid_coord: Batched grid coordinates [N, 3] - feat: Batched features [N, C] + feat: Batched features [N, C] offset: Tensor indicating batch boundaries [B] voxel_size: Voxel size for grid creation device: Device for tensor operations - + Returns: grid: fvdb.GridBatch jfeats: fvdb.JaggedTensor with features original_coord_to_voxel_idx: Mapping from original coords to voxel indices """ - offset = list(offset.cpu().numpy()) + offset_list = list(offset.cpu().numpy()) # Convert offset to individual sample boundaries - if len(offset) == 1: + if len(offset_list) == 1: # Single sample case coords_list = [grid_coord.to(device=device, dtype=torch.int32)] feats_list = [feat.to(device=device, dtype=torch.float32)] @@ -49,11 +55,11 @@ def create_grid_from_points(grid_coord, feat, offset, voxel_size, device="cuda") coords_list = [] feats_list = [] prev_offset = 0 - for curr_offset in offset: + for curr_offset in offset_list: coords_list.append(grid_coord[prev_offset:curr_offset].to(device=device, dtype=torch.int32)) feats_list.append(feat[prev_offset:curr_offset].to(device=device, dtype=torch.float32)) prev_offset = curr_offset - + coords_jagged = fvdb.JaggedTensor(coords_list) grid = fvdb.GridBatch.from_ijk( @@ -63,7 +69,7 @@ def create_grid_from_points(grid_coord, feat, offset, voxel_size, device="cuda") ) feats_jagged = fvdb.JaggedTensor(feats_list) - feats_vdb_order = grid.inject_from_ijk(coords_jagged, feats_jagged) # + feats_vdb_order = grid.inject_from_ijk(coords_jagged, feats_jagged) # original_coord_to_voxel_idx = grid.ijk_to_index(coords_jagged, cumulative=True) return grid, feats_vdb_order, original_coord_to_voxel_idx @@ -73,25 +79,25 @@ def create_grid_from_points(grid_coord, feat, offset, voxel_size, device="cuda") class PointTransformerV3(PointModule): def __init__( self, - in_channels=6, - enc_depths=(2, 2, 2, 2), - enc_channels=(32, 64, 128, 256), - enc_num_heads=(1, 1, 1, 1), - dec_depths=(2, 2, 2), - dec_channels=(128, 64, 32), - dec_num_heads=(1, 1, 1), - patch_size=1024, - drop_path=0.3, - proj_drop=0.0, - qk_scale=None, - enable_batch_norm=False, - embedding_mode="linear", - no_conv_in_cpe=False, + in_channels: int = 6, + enc_depths: tuple[int, ...] = (2, 2, 2, 2), + enc_channels: tuple[int, ...] = (32, 64, 128, 256), + enc_num_heads: tuple[int, ...] = (1, 1, 1, 1), + dec_depths: tuple[int, ...] = (2, 2, 2), + dec_channels: tuple[int, ...] = (128, 64, 32), + dec_num_heads: tuple[int, ...] = (1, 1, 1), + patch_size: int = 1024, + drop_path: float = 0.3, + proj_drop: float = 0.0, + qk_scale: float = 1.0, + enable_batch_norm: bool = False, + embedding_mode: str = "linear", + no_conv_in_cpe: bool = False, cross_patch_attention: bool = False, cross_patch_pooling: str = "mean", sliding_window_attention: bool = False, pipelined_batch: bool = False, - order_type: Union[str, tuple] = ("z", "z-trans"), + order_type: str | tuple[str, ...] = ("z", "z-trans"), shuffle_orders: bool = True, ): super().__init__() @@ -124,9 +130,9 @@ def __init__( def forward(self, data_dict): - grid_coord = data_dict['grid_coord'] - feat = data_dict['feat'] - offset = data_dict['offset'] + grid_coord = data_dict["grid_coord"] + feat = data_dict["feat"] + offset = data_dict["offset"] # import pdb; pdb.set_trace() # print(f"grid_coord.shape: {grid_coord.shape}, feat.shape: {feat.shape}, offset.shape: {offset.shape}") # exit() @@ -149,28 +155,29 @@ def forward(self, data_dict): curr_grid_coord = grid_coord[prev_offset:curr_offset] curr_feat = feat[prev_offset:curr_offset] curr_num_points = curr_offset - prev_offset - curr_offset_tensor = torch.tensor([curr_num_points], - dtype=offset.dtype, device=offset.device) - + curr_offset_tensor = torch.tensor([curr_num_points], dtype=offset.dtype, device=offset.device) + # Process single point cloud grid, jfeats, original_coord_to_voxel_idx = create_grid_from_points( curr_grid_coord, curr_feat, curr_offset_tensor, voxel_size=0.02 ) - assert grid.ijk.jdata.shape == curr_grid_coord.shape, f"curr_grid_coord.shape: {curr_grid_coord.shape}, grid.ijk.jdata.shape: {grid.ijk.jdata.shape}" # + assert ( + grid.ijk.jdata.shape == curr_grid_coord.shape + ), f"curr_grid_coord.shape: {curr_grid_coord.shape}, grid.ijk.jdata.shape: {grid.ijk.jdata.shape}" # # catted_input_grid_ijk.append(grid.ijk.jdata) # catted_input_feat.append(jfeats.jdata) # catted_original_coord_to_voxel_idx.append(original_coord_to_voxel_idx.jdata) - # grid shape and feats values match here. + # grid shape and feats values match here. grid, jfeats = self.fvdb_ptv3_model(grid, jfeats) - # feats values does not match here. + # feats values does not match here. - # Get output for this point cloud. + # Get output for this point cloud. curr_output = jfeats.jdata[original_coord_to_voxel_idx.jdata] outputs.append(curr_output) - + prev_offset = curr_offset - + # Concatenate all outputs output = torch.cat(outputs, dim=0) # import pdb; pdb.set_trace() @@ -178,7 +185,6 @@ def forward(self, data_dict): # catted_input_grid_ijk = torch.cat(catted_input_grid_ijk, dim=0) # catted_input_feat = torch.cat(catted_input_feat, dim=0) # catted_original_coord_to_voxel_idx = torch.cat(catted_original_coord_to_voxel_idx, dim=0) - else: # Standard batch mode (original implementation) @@ -187,8 +193,12 @@ def forward(self, data_dict): ) # import pdb; pdb.set_trace() # TODO: check the downsampling behavior is the same or not? - assert grid_coord.shape == grid.ijk.jdata.shape, f"grid_coord.shape: {grid_coord.shape}, grid.ijk.jdata.shape: {grid.ijk.jdata.shape}" # this is not always true, because mix-prob may duplicate points with the same coordinate. - assert grid_coord.shape[0] == original_coord_to_voxel_idx.jdata.shape[0], f"grid_coord.shape: {grid_coord.shape}, original_coord_to_voxel_idx.jdata.shape: {original_coord_to_voxel_idx.jdata.shape}" + assert ( + grid_coord.shape == grid.ijk.jdata.shape + ), f"grid_coord.shape: {grid_coord.shape}, grid.ijk.jdata.shape: {grid.ijk.jdata.shape}" # this is not always true, because mix-prob may duplicate points with the same coordinate. + assert ( + grid_coord.shape[0] == original_coord_to_voxel_idx.jdata.shape[0] + ), f"grid_coord.shape: {grid_coord.shape}, original_coord_to_voxel_idx.jdata.shape: {original_coord_to_voxel_idx.jdata.shape}" # import pdb; pdb.set_trace() if torch.is_autocast_enabled(): @@ -200,9 +210,4 @@ def forward(self, data_dict): output = jfeats.jdata[original_coord_to_voxel_idx.jdata] # import pdb; pdb.set_trace() - - return output # return logits in torch.tensor format - - - - + return output # return logits in torch.tensor format diff --git a/point_transformer_v3/fvdb_extensions/models/ptv3_fvdb.py b/point_transformer_v3/fvdb_extensions/models/ptv3_fvdb.py index 77d6c71..16ed724 100644 --- a/point_transformer_v3/fvdb_extensions/models/ptv3_fvdb.py +++ b/point_transformer_v3/fvdb_extensions/models/ptv3_fvdb.py @@ -10,22 +10,22 @@ For pointcept framework integration, see point_transformer_v3m1_fvdb.py """ -from typing import Dict, Tuple, Union, List +from typing import Any, Callable, cast -# Add NVTX import for profiling try: import flash_attn except ImportError: flash_attn = None +from functools import partial + +import fvdb import torch import torch.nn import torch.nn.functional as F from timm.layers import DropPath -from functools import partial - -import fvdb +# Add NVTX import for profiling try: import torch.cuda.nvtx as nvtx @@ -52,21 +52,21 @@ def __init__( self, in_channels, embed_channels, - norm_layer_module: torch.nn.Module = torch.nn.LayerNorm, + norm_layer_module: type[torch.nn.Module] | Callable = torch.nn.LayerNorm, embedding_mode: str = "linear", - shared_plan_cache: Dict = None, + shared_plan_cache: dict | None = None, ): """ Args: in_channels (int): Number of channels in the input features. embed_channels (int): Number of channels in the output features. - norm_layer_module (torch.nn.Module): Normalization layer module. + norm_layer_module (type[torch.nn.Module] | Callable): Normalization layer module. embedding_mode (str): The type of embedding layer, "linear" or "conv3x3", "conv5x5". - shared_plan_cache (Dict): Shared cache for ConvolutionPlans across all layers. + shared_plan_cache (dict | None): Shared cache for ConvolutionPlans across all layers. """ super().__init__() self.embedding_mode = embedding_mode - self.shared_plan_cache = shared_plan_cache + self.shared_plan_cache = shared_plan_cache if shared_plan_cache is not None else {} if embedding_mode == "linear": self.embed = torch.nn.Linear(in_channels, embed_channels) @@ -140,7 +140,7 @@ def __init__( kernel_size: int = 2, in_channels: int = 64, out_channels: int = 64, - norm_layer_module: torch.nn.Module = torch.nn.LayerNorm, + norm_layer_module: type[torch.nn.Module] | Callable = torch.nn.LayerNorm, ): """ Args: @@ -175,7 +175,7 @@ def __init__( in_channels: int = 64, out_channels: int = 64, skip_channels: int = 64, - norm_layer_module: torch.nn.Module = torch.nn.LayerNorm, + norm_layer_module: type[torch.nn.Module] | Callable = torch.nn.LayerNorm, ): """ Args: @@ -251,7 +251,7 @@ def __init__( num_heads: int, proj_drop: float = 0.0, patch_size: int = 0, - qk_scale: float = None, + qk_scale: float | None = None, sliding_window_attention: bool = False, order_index: int = 0, order_types: tuple = ("vdb",), @@ -262,7 +262,7 @@ def __init__( num_heads (int): Number of attention heads in each block. proj_drop (float): Dropout rate for MLP layers. patch_size (int): Patch size for patch attention. - qk_scale (float): Scale factor for query-key dot product. If None, uses 1/sqrt(head_dim). + qk_scale (float | None): Scale factor for query-key dot product. If None, uses 1/sqrt(head_dim). sliding_window_attention (bool): Whether to use sliding window attention (uses patch_size as window size). order_index (int): Index into order_types to select which order to use for this block. order_types (tuple): Tuple of order type strings (e.g., ("z", "z-trans")). @@ -390,7 +390,9 @@ def forward(self, grid, feats): if self.sliding_window_attention and self.patch_size > 0: # Perform sliding window attention per-grid using flash attention - assert flash_attn is not None, "flash_attn is required for sliding_window_attention. Install with: pip install flash-attn" + assert ( + flash_attn is not None + ), "flash_attn is required for sliding_window_attention. Install with: pip install flash-attn" num_voxels = feats_j.shape[0] H = self.num_heads D = self.head_dim @@ -404,8 +406,11 @@ def forward(self, grid, feats): continue qkv_b = qkv[start:end].view(1, Li, 3, H, D) window_size = (self.patch_size // 2, self.patch_size // 2) - out_b = flash_attn.flash_attn_qkvpacked_func( - qkv_b.half(), dropout_p=0.0, softmax_scale=self.scale, window_size=window_size + out_b = cast( + Any, + flash_attn.flash_attn_qkvpacked_func( + qkv_b.half(), dropout_p=0.0, softmax_scale=self.scale, window_size=window_size + ), ).reshape( Li, self.hidden_size ) # dtype: float16 @@ -419,7 +424,9 @@ def forward(self, grid, feats): elif self.patch_size > 0: # Perform attention within each patch_size window per-grid using varlen API - assert flash_attn is not None, "flash_attn is required when patch_size > 0. Install with: pip install flash-attn" + assert ( + flash_attn is not None + ), "flash_attn is required when patch_size > 0. Install with: pip install flash-attn" num_voxels = feats_j.shape[0] H = self.num_heads D = self.head_dim @@ -446,12 +453,15 @@ def forward(self, grid, feats): cu_seqlens = torch.zeros(len(lengths) + 1, device=qkv.device, dtype=torch.int32) cu_seqlens[1:] = torch.as_tensor(lengths, device=qkv.device, dtype=torch.int32).cumsum(dim=0) - feats_out_j = flash_attn.flash_attn_varlen_qkvpacked_func( - qkv.half(), - cu_seqlens, - max_seqlen=self.patch_size, - dropout_p=0.0, # TODO: implement attention dropout in the future. By default, it is 0. - softmax_scale=self.scale, + feats_out_j = cast( + Any, + flash_attn.flash_attn_varlen_qkvpacked_func( + qkv.half(), + cu_seqlens, + max_seqlen=self.patch_size, + dropout_p=0.0, # TODO: implement attention dropout in the future. By default, it is 0. + softmax_scale=self.scale, + ), ).reshape( num_voxels, self.hidden_size ) # dtype: float16 @@ -476,17 +486,17 @@ def forward(self, grid, feats): class PTV3_CPE(torch.nn.Module): - def __init__(self, hidden_size: int, no_conv_in_cpe: bool = False, shared_plan_cache: Dict = None): + def __init__(self, hidden_size: int, no_conv_in_cpe: bool = False, shared_plan_cache: dict | None = None): """ Args: hidden_size (int): Number of channels in the input features. no_conv_in_cpe (bool): Whether to disable convolution in CPE. - shared_plan_cache (Dict): Shared cache for ConvolutionPlans across all layers. + shared_plan_cache (dict | None): Shared cache for ConvolutionPlans across all layers. """ super().__init__() self.hidden_size = hidden_size self.no_conv_in_cpe = no_conv_in_cpe - self.shared_plan_cache = shared_plan_cache + self.shared_plan_cache = shared_plan_cache if shared_plan_cache is not None else {} self.cpe = torch.nn.ModuleList( [ ( @@ -536,12 +546,12 @@ def __init__( drop_path: float, proj_drop: float = 0.0, patch_size: int = 0, - qk_scale: float = None, + qk_scale: float | None = None, no_conv_in_cpe: bool = False, sliding_window_attention: bool = False, order_index: int = 0, order_types: tuple = ("vdb",), - shared_plan_cache: Dict = None, + shared_plan_cache: dict | None = None, ): """ Args: @@ -550,12 +560,12 @@ def __init__( drop_path (float): Drop path rate for regularization. proj_drop (float): Dropout rate for MLP layers. patch_size (int): Patch size for patch attention. - qk_scale (float): Scale factor for query-key dot product. If None, uses 1/sqrt(head_dim). + qk_scale (float | None): Scale factor for query-key dot product. If None, uses 1/sqrt(head_dim). no_conv_in_cpe (bool): Whether to disable convolution in CPE. sliding_window_attention (bool): Whether to use sliding window attention (uses patch_size as window size). order_index (int): Index into order_types to select which order to use for this block. order_types (tuple): Tuple of order type strings (e.g., ("z", "z-trans")). - shared_plan_cache (Dict): Shared cache for ConvolutionPlans across all layers. + shared_plan_cache (dict | None): Shared cache for ConvolutionPlans across all layers. """ super().__init__() @@ -614,11 +624,11 @@ def __init__( drop_path, # drop_path is a list of drop path rates for each block. proj_drop: float = 0.0, patch_size: int = 0, - qk_scale: float = None, + qk_scale: float | None = None, no_conv_in_cpe: bool = False, sliding_window_attention: bool = False, order_types: tuple = ("vdb",), - shared_plan_cache: Dict = None, + shared_plan_cache: dict | None = None, ): """ Args: @@ -628,11 +638,11 @@ def __init__( drop_path (list): Drop path rates for each block. proj_drop (float): Dropout rate for MLP layers. patch_size (int): Patch size for patch attention. - qk_scale (float): Scale factor for query-key dot product. If None, uses 1/sqrt(head_dim). + qk_scale (float | None): Scale factor for query-key dot product. If None, uses 1/sqrt(head_dim). no_conv_in_cpe (bool): Whether to disable convolution in CPE. sliding_window_attention (bool): Whether to use sliding window attention (uses patch_size as window size). order_types (tuple): Tuple of order type strings (e.g., ("z", "z-trans")). - shared_plan_cache (Dict): Shared cache for ConvolutionPlans across all layers. + shared_plan_cache (dict | None): Shared cache for ConvolutionPlans across all layers. """ super().__init__() self.depth = depth @@ -668,27 +678,27 @@ def __init__( self, num_classes: int, input_dim: int = 6, # xyz + intensity/reflectance + additional features - enc_depths: Tuple[int, ...] = ( + enc_depths: tuple[int, ...] = ( 2, 2, 2, 2, ), # default hyper-parameters to align with sonata ptv3's default hyper-parameters. - enc_channels: Tuple[int, ...] = (32, 64, 128, 256, 512), - enc_num_heads: Tuple[int, ...] = (2, 4, 8, 16, 32), - # enc_patch_size: Tuple[int, ...] = (4096), - dec_depths: Tuple[int, ...] = (), # by default, no decoder. - dec_channels: Tuple[int, ...] = (), - dec_num_heads: Tuple[int, ...] = (), + enc_channels: tuple[int, ...] = (32, 64, 128, 256, 512), + enc_num_heads: tuple[int, ...] = (2, 4, 8, 16, 32), + # enc_patch_size: tuple[int, ...] = (4096), + dec_depths: tuple[int, ...] = (), # by default, no decoder. + dec_channels: tuple[int, ...] = (), + dec_num_heads: tuple[int, ...] = (), patch_size: int = 0, drop_path: float = 0.3, proj_drop: float = 0.0, - qk_scale: float = None, + qk_scale: float | None = None, enable_batch_norm: bool = False, embedding_mode: str = "linear", no_conv_in_cpe: bool = False, sliding_window_attention: bool = False, - order_type: Union[str, tuple] = ("z", "z-trans"), + order_type: str | tuple = ("z", "z-trans"), shuffle_orders: bool = True, ) -> None: """ @@ -697,22 +707,22 @@ def __init__( Args: num_classes (int): Number of classes for segmentation. input_dim (int): Input feature dimension (default: 4 for xyz + intensity). - hidden_dims (Tuple[int, ...]): Hidden layer dimensions (not used in simplified version). - enc_depths (Tuple[int, ...]): Number of encoder blocks for each stage. - enc_channels (Tuple[int, ...]): Number of channels for each stage. - enc_num_heads (Tuple[int, ...]): Number of attention heads for each stage. - dec_depths (Tuple[int, ...]): Number of decoder blocks for each stage. - dec_channels (Tuple[int, ...]): Number of channels for each stage. - dec_num_heads (Tuple[int, ...]): Number of attention heads for each stage. + hidden_dims (tuple[int, ...]): Hidden layer dimensions (not used in simplified version). + enc_depths (tuple[int, ...]): Number of encoder blocks for each stage. + enc_channels (tuple[int, ...]): Number of channels for each stage. + enc_num_heads (tuple[int, ...]): Number of attention heads for each stage. + dec_depths (tuple[int, ...]): Number of decoder blocks for each stage. + dec_channels (tuple[int, ...]): Number of channels for each stage. + dec_num_heads (tuple[int, ...]): Number of attention heads for each stage. patch_size (int): Patch size for patch attention. drop_path (float): Drop path rate for regularization. proj_drop (float): Dropout rate for MLP layers. - qk_scale (float): Scale factor for query-key dot product. If None, uses 1/sqrt(head_dim). + qk_scale (float | None): Scale factor for query-key dot product. If None, uses 1/sqrt(head_dim). enable_batch_norm (bool): Whether to use batch normalization for the embedding, down pooling, and up pooling. embedding_mode (bool): the mode for the embedding layer, "linear" or "conv3x3", "conv5x5". no_conv_in_cpe (bool): Whether to disable convolution in CPE. sliding_window_attention (bool): Whether to use sliding window attention (uses patch_size as window size). - order (Union[str, tuple]): The type(s) of point ordering. Can be a single string ("vdb", "z", "z-trans", "hilbert", "hilbert-trans") + order (str | tuple): The type(s) of point ordering. Can be a single string ("vdb", "z", "z-trans", "hilbert", "hilbert-trans") or a tuple of strings (e.g., ("z", "z-trans")). Each block within a stage cycles through the order types. shuffle_orders (bool): Whether to shuffle the order of order types at the beginning of each forward pass and after each pooling. """ diff --git a/point_transformer_v3/scripts/apply_formatting.py b/point_transformer_v3/scripts/apply_formatting.py new file mode 100755 index 0000000..32bb461 --- /dev/null +++ b/point_transformer_v3/scripts/apply_formatting.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 + +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + + +""" +Apply code formatting to point_transformer_v3 project. + +This script applies black formatting to: +- scripts directory +- fvdb_extensions directory +- setup_env.py + +It ignores the external directory. +""" + +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +# Get the directory containing this script +SCRIPT_DIR = Path(__file__).parent.resolve() +PROJECT_ROOT = SCRIPT_DIR.parent.resolve() + + +def main(): + """Apply formatting using black.""" + # Directories and files to format + targets = [ + str(PROJECT_ROOT / "scripts"), + str(PROJECT_ROOT / "fvdb_extensions"), + str(PROJECT_ROOT / "setup_env.py"), + ] + + # Black options matching codestyle.yml + black_options = [ + "--target-version=py311", + "--line-length=120", + "--verbose", + ] + + # Run black via python -m for better portability + cmd = [sys.executable, "-m", "black"] + black_options + targets + + print(f"Running: {' '.join(cmd)}") + print(f"Formatting targets:") + for target in targets: + print(f" - {target}") + print() + + try: + result = subprocess.run(cmd, check=True) + print("\nβœ“ Formatting applied successfully!") + return 0 + except subprocess.CalledProcessError as e: + print(f"\nβœ— Formatting failed with exit code {e.returncode}") + return e.returncode + except FileNotFoundError: + print("\nβœ— Error: black not found. Please install it:") + print(" pip install black~=24.0") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/point_transformer_v3/scripts/data/download_example_data.py b/point_transformer_v3/scripts/data/download_example_data.py index 3b54ab5..4ea12c5 100644 --- a/point_transformer_v3/scripts/data/download_example_data.py +++ b/point_transformer_v3/scripts/data/download_example_data.py @@ -1,6 +1,8 @@ # Copyright Contributors to the OpenVDB Project # SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + import json import logging import os diff --git a/point_transformer_v3/scripts/data/prepare_scannet_dataset.py b/point_transformer_v3/scripts/data/prepare_scannet_dataset.py index 73feb92..0c4a8b7 100644 --- a/point_transformer_v3/scripts/data/prepare_scannet_dataset.py +++ b/point_transformer_v3/scripts/data/prepare_scannet_dataset.py @@ -9,13 +9,15 @@ ensures consistent point counts per sample. """ +from __future__ import annotations + import argparse import glob import json import logging import os from pathlib import Path -from typing import Any, Dict, List +from typing import Any import numpy as np from torch.utils.data import Dataset @@ -192,7 +194,8 @@ def export_scannet_samples( # Randomly sample scenes np.random.seed(42) # create a permutation of the scene paths - selected_paths = np.random.permutation(scene_paths) + selected_paths = np.array(scene_paths) + selected_paths = selected_paths[np.random.permutation(len(selected_paths))] # Initialize dataset dataset = ScanNetDataset(data_root=data_root, split=split) diff --git a/point_transformer_v3/scripts/test/compute_difference.py b/point_transformer_v3/scripts/test/compute_difference.py index db650cf..d48d3e7 100644 --- a/point_transformer_v3/scripts/test/compute_difference.py +++ b/point_transformer_v3/scripts/test/compute_difference.py @@ -6,17 +6,19 @@ Usage: python compute_difference.py file1.json file2.json """ +from __future__ import annotations + import argparse import json import logging import os import sys -from typing import Any, Dict, List +from typing import Any import numpy as np -def load_stats_file(filepath: str, logger: logging.Logger) -> tuple[List[Dict[str, Any]], Dict[str, Any]]: +def load_stats_file(filepath: str, logger: logging.Logger) -> tuple[list[dict[str, Any]], dict[str, Any]]: """Load and parse a minimal_inference_stats.json file. Args: @@ -58,8 +60,8 @@ def load_stats_file(filepath: str, logger: logging.Logger) -> tuple[List[Dict[st def compute_deviations( - stats1: List[Dict[str, Any]], stats2: List[Dict[str, Any]], logger: logging.Logger -) -> Dict[str, Dict[str, float]]: + stats1: list[dict[str, Any]], stats2: list[dict[str, Any]], logger: logging.Logger +) -> dict[str, dict[str, float]]: """Compute deviations between corresponding entries in two stats files. Args: @@ -120,8 +122,8 @@ def compute_deviations( def compute_global_deviations( - global_stats1: Dict[str, Any], global_stats2: Dict[str, Any], logger: logging.Logger -) -> Dict[str, Dict[str, float]]: + global_stats1: dict[str, Any], global_stats2: dict[str, Any], logger: logging.Logger +) -> dict[str, dict[str, float]]: """Compute deviations between global statistics from two files. Args: diff --git a/point_transformer_v3/scripts/test/minimal_inference.py b/point_transformer_v3/scripts/test/minimal_inference.py index 4869eb0..fd2b34a 100644 --- a/point_transformer_v3/scripts/test/minimal_inference.py +++ b/point_transformer_v3/scripts/test/minimal_inference.py @@ -8,6 +8,8 @@ 2. Load and run the PT-v3 model """ +from __future__ import annotations + import argparse import gc import json @@ -15,6 +17,7 @@ import os import sys from pathlib import Path +from typing import Any # Setup paths for imports # Script is in scripts/test/, so go up two levels to get project root @@ -45,7 +48,9 @@ def range_pop(self): nvtx = DummyNVTX() -def create_ptv3_model(args, device, num_classes): +def create_ptv3_model( + args: argparse.Namespace, device: torch.device | str, num_classes: int +) -> torch.nn.Module: """Create a PT-v3 model. Args: @@ -137,7 +142,9 @@ def create_ptv3_model(args, device, num_classes): return model -def prepare_batched_inputs_from_scannet_points(batch_samples, voxel_size=0.1, device="cuda"): +def prepare_batched_inputs_from_scannet_points( + batch_samples: list[dict[str, Any]], voxel_size: float = 0.1, device: torch.device | str = "cuda" +) -> tuple[fvdb.GridBatch, fvdb.JaggedTensor]: """Prepare batched inputs from a list of ScanNet-like samples. Args: From 2b55bc8b20954038da74e197a5b7f98c6b147cd5 Mon Sep 17 00:00:00 2001 From: Christopher Horvath Date: Wed, 19 Nov 2025 11:05:55 -0800 Subject: [PATCH 4/7] More help with conformance Signed-off-by: Christopher Horvath --- .github/workflows/codestyle.yml | 28 ++++---- point_transformer_v3/scripts/check_spdx.py | 70 +++++++++++++++++++ .../scripts/data/prepare_scannet_dataset.py | 2 + .../scripts/fix_formatting.sh | 23 ++++++ .../scripts/test/minimal_inference.py | 4 +- point_transformer_v3/setup_env.py | 11 ++- point_transformer_v3/setup_env.sh | 4 +- 7 files changed, 120 insertions(+), 22 deletions(-) create mode 100755 point_transformer_v3/scripts/check_spdx.py create mode 100755 point_transformer_v3/scripts/fix_formatting.sh diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml index 7ca5a86..99fc8be 100644 --- a/.github/workflows/codestyle.yml +++ b/.github/workflows/codestyle.yml @@ -20,8 +20,8 @@ jobs: - uses: actions/checkout@v4 - uses: psf/black@stable with: - options: "--check --verbose --target-version=py311 --line-length=120" - src: "./" + options: "--check --verbose --target-version=py311 --line-length=120 --extend-exclude 'point_transformer_v3/external/pointcept'" + src: "point_transformer_v3" version: "~= 24.0" # NOTE: Enable this when/if we have C++ code @@ -36,12 +36,12 @@ jobs: # clangFormatVersion: 18 # style: file - include-guards: - name: Check include guards - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: swahtz/include-guards-check-action@master + # include-guards: + # name: Check include guards + # runs-on: ubuntu-latest + # steps: + # - uses: actions/checkout@v4 + # - uses: swahtz/include-guards-check-action@master check-spdx-identifiers: name: Check SPDX identifiers @@ -49,11 +49,9 @@ jobs: steps: - name: checkout uses: actions/checkout@v4 - - uses: enarx/spdx@master - with: - licenses: |- - Apache-2.0 - MIT + - name: Check SPDX headers + run: | + python3 point_transformer_v3/scripts/check_spdx.py # Search the git repository for any trailing spaces excluding auto-generated wlt files # NOTE: Migrated from openvdb whitespace.yml @@ -64,7 +62,7 @@ jobs: - name: test run: | set +e - git grep -n -I -E '^.+[ ]+$' -- ':!*.wlt' + git grep -n -I -E '^.+[ ]+$' -- point_transformer_v3 ':!point_transformer_v3/external/pointcept' ':!*.wlt' test $? -eq 1 # Search for any tabs excluding meeting notes, image files and a few others @@ -76,5 +74,5 @@ jobs: - name: test run: | set +e - git grep -n " " -- ':!*/codestyle.yml' ':!*.svg' ':!*.cmd' ':!*.png' ':!*.wlt' ':!*.jpg' ':!*.gif' ':!*.mp4' ':!*.pt' ':!*.pth' ':!*.nvdb' ':!*.npz' ':!*.gitmodules' + git grep -n -I " " -- point_transformer_v3 ':!point_transformer_v3/external/pointcept' ':!*/codestyle.yml' ':!*.svg' ':!*.cmd' ':!*.png' ':!*.wlt' ':!*.jpg' ':!*.gif' ':!*.mp4' ':!*.pt' ':!*.pth' ':!*.nvdb' ':!*.npz' ':!*.gitmodules' test $? -eq 1 diff --git a/point_transformer_v3/scripts/check_spdx.py b/point_transformer_v3/scripts/check_spdx.py new file mode 100755 index 0000000..0f1acd6 --- /dev/null +++ b/point_transformer_v3/scripts/check_spdx.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + +""" +Check for SPDX identifiers in source files. +Excludes external directory and hidden files. +""" + +import os +import sys +from pathlib import Path + +# Extensions to check +EXTENSIONS = {'.py', '.cpp', '.h', '.cu', '.cuh', '.sh'} + +# Directories to exclude +EXCLUDES = {'external', '__pycache__', '.git', '.github', '.vscode', '.idea'} + +def check_file(filepath): + """Check if file contains SPDX-License-Identifier.""" + try: + with open(filepath, 'r', encoding='utf-8') as f: + # Read first 20 lines + for _ in range(20): + line = f.readline() + if not line: + break + if "SPDX-License-Identifier" in line: + return True + except Exception as e: + print(f"Error reading {filepath}: {e}") + return False + return False + +def main(): + script_dir = Path(__file__).parent.resolve() + project_root = script_dir.parent + + print(f"Checking for SPDX identifiers in {project_root}...") + print(f"Excluding: {', '.join(EXCLUDES)}") + + failed_files = [] + checked_count = 0 + + for root, dirs, files in os.walk(project_root): + # Modify dirs in-place to skip excluded directories + dirs[:] = [d for d in dirs if d not in EXCLUDES] + + for file in files: + file_path = Path(root) / file + if file_path.suffix in EXTENSIONS: + checked_count += 1 + if not check_file(file_path): + failed_files.append(str(file_path.relative_to(project_root))) + + print(f"Checked {checked_count} files.") + + if failed_files: + print("\nMissing SPDX-License-Identifier in:") + for f in failed_files: + print(f" - {f}") + return 1 + + print("\nAll files have SPDX identifiers.") + return 0 + +if __name__ == "__main__": + sys.exit(main()) + diff --git a/point_transformer_v3/scripts/data/prepare_scannet_dataset.py b/point_transformer_v3/scripts/data/prepare_scannet_dataset.py index 0c4a8b7..7561f93 100644 --- a/point_transformer_v3/scripts/data/prepare_scannet_dataset.py +++ b/point_transformer_v3/scripts/data/prepare_scannet_dataset.py @@ -1,4 +1,6 @@ +# Copyright Contributors to the OpenVDB Project # SPDX-License-Identifier: Apache-2.0 + """ Modified from https://github.com/Pointcept/Pointcept.git diff --git a/point_transformer_v3/scripts/fix_formatting.sh b/point_transformer_v3/scripts/fix_formatting.sh new file mode 100755 index 0000000..d8313d5 --- /dev/null +++ b/point_transformer_v3/scripts/fix_formatting.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + +# Exit on error +set -e + +# Determine the directory of this script +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# Determine the root of point_transformer_v3 (parent of scripts) +PTV3_ROOT="$(dirname "$DIR")" + +# Change to the project root +cd "$PTV3_ROOT" + +echo "Running black formatting on $(pwd)..." + +# Run black, excluding the submodule +# The pattern "external/pointcept" will match the directory relative to the root +black --target-version=py311 --line-length=120 --extend-exclude "external/pointcept" . + +echo "Formatting complete." diff --git a/point_transformer_v3/scripts/test/minimal_inference.py b/point_transformer_v3/scripts/test/minimal_inference.py index fd2b34a..42e4a0c 100644 --- a/point_transformer_v3/scripts/test/minimal_inference.py +++ b/point_transformer_v3/scripts/test/minimal_inference.py @@ -48,9 +48,7 @@ def range_pop(self): nvtx = DummyNVTX() -def create_ptv3_model( - args: argparse.Namespace, device: torch.device | str, num_classes: int -) -> torch.nn.Module: +def create_ptv3_model(args: argparse.Namespace, device: torch.device | str, num_classes: int) -> torch.nn.Module: """Create a PT-v3 model. Args: diff --git a/point_transformer_v3/setup_env.py b/point_transformer_v3/setup_env.py index 952c9d5..f61300e 100644 --- a/point_transformer_v3/setup_env.py +++ b/point_transformer_v3/setup_env.py @@ -1,4 +1,7 @@ #!/usr/bin/env python3 +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 + """ Setup script for point_transformer_v3 project. @@ -22,19 +25,21 @@ # Get the directory containing this script (point_transformer_v3) PROJECT_ROOT = Path(__file__).parent.resolve() + def setup_paths(): """Add necessary paths to sys.path for imports.""" paths_to_add = [ str(PROJECT_ROOT), # For importing fvdb_extensions str(PROJECT_ROOT / "external" / "pointcept"), # For importing pointcept ] - + for path in paths_to_add: if path not in sys.path: sys.path.insert(0, path) - + return paths_to_add + def get_pythonpath(): """Get PYTHONPATH string for shell export.""" paths = [ @@ -43,6 +48,7 @@ def get_pythonpath(): ] return os.pathsep.join(paths) + if __name__ == "__main__": # When run as script, print export command pythonpath = get_pythonpath() @@ -52,4 +58,3 @@ def get_pythonpath(): else: # When imported, automatically set up paths setup_paths() - diff --git a/point_transformer_v3/setup_env.sh b/point_transformer_v3/setup_env.sh index db1247d..98dc0e1 100755 --- a/point_transformer_v3/setup_env.sh +++ b/point_transformer_v3/setup_env.sh @@ -1,4 +1,7 @@ #!/usr/bin/env bash +# Copyright Contributors to the OpenVDB Project +# SPDX-License-Identifier: Apache-2.0 +# # Setup script for point_transformer_v3 # This sets up PYTHONPATH so imports work correctly @@ -10,4 +13,3 @@ echo "$PYTHONPATH" echo "" echo "You can now run scripts from this directory." echo "Example: python minimal_inference.py --help" - From 7e51d28af76820642daf8750086c45588bde8bd0 Mon Sep 17 00:00:00 2001 From: Christopher Horvath Date: Wed, 19 Nov 2025 11:10:03 -0800 Subject: [PATCH 5/7] More conformance Signed-off-by: Christopher Horvath --- point_transformer_v3/requirements.txt | 3 +++ point_transformer_v3/scripts/check_spdx.py | 10 ++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/point_transformer_v3/requirements.txt b/point_transformer_v3/requirements.txt index debc317..c4b1285 100644 --- a/point_transformer_v3/requirements.txt +++ b/point_transformer_v3/requirements.txt @@ -18,3 +18,6 @@ torch-cluster # Sparse convolution - spconv-cu129 not available, try cu124 (usually compatible with 12.9) # If this fails, install from source: https://github.com/traveller59/spconv spconv-cu124 + +# Development +black~=24.0 diff --git a/point_transformer_v3/scripts/check_spdx.py b/point_transformer_v3/scripts/check_spdx.py index 0f1acd6..adea244 100755 --- a/point_transformer_v3/scripts/check_spdx.py +++ b/point_transformer_v3/scripts/check_spdx.py @@ -12,15 +12,16 @@ from pathlib import Path # Extensions to check -EXTENSIONS = {'.py', '.cpp', '.h', '.cu', '.cuh', '.sh'} +EXTENSIONS = {".py", ".cpp", ".h", ".cu", ".cuh", ".sh"} # Directories to exclude -EXCLUDES = {'external', '__pycache__', '.git', '.github', '.vscode', '.idea'} +EXCLUDES = {"external", "__pycache__", ".git", ".github", ".vscode", ".idea"} + def check_file(filepath): """Check if file contains SPDX-License-Identifier.""" try: - with open(filepath, 'r', encoding='utf-8') as f: + with open(filepath, "r", encoding="utf-8") as f: # Read first 20 lines for _ in range(20): line = f.readline() @@ -33,6 +34,7 @@ def check_file(filepath): return False return False + def main(): script_dir = Path(__file__).parent.resolve() project_root = script_dir.parent @@ -65,6 +67,6 @@ def main(): print("\nAll files have SPDX identifiers.") return 0 + if __name__ == "__main__": sys.exit(main()) - From a3b901befab88cc3252282af0d8d3282482d2a26 Mon Sep 17 00:00:00 2001 From: Christopher Horvath Date: Wed, 19 Nov 2025 11:23:15 -0800 Subject: [PATCH 6/7] More conformity to checks Signed-off-by: Christopher Horvath --- .clang-format-ignore | 3 ++ .github/workflows/codestyle.yml | 42 ++++++++++--------- .../fvdb_extensions/models/ptv3_fvdb.py | 4 +- .../scripts/apply_formatting.py | 6 +-- 4 files changed, 30 insertions(+), 25 deletions(-) create mode 100644 .clang-format-ignore diff --git a/.clang-format-ignore b/.clang-format-ignore new file mode 100644 index 0000000..8a9b8b7 --- /dev/null +++ b/.clang-format-ignore @@ -0,0 +1,3 @@ +# Ignore all submodules in external directories +**/external/** + diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml index 99fc8be..8497c4f 100644 --- a/.github/workflows/codestyle.yml +++ b/.github/workflows/codestyle.yml @@ -20,28 +20,30 @@ jobs: - uses: actions/checkout@v4 - uses: psf/black@stable with: - options: "--check --verbose --target-version=py311 --line-length=120 --extend-exclude 'point_transformer_v3/external/pointcept'" + options: "--check --verbose --target-version=py311 --line-length=120 --extend-exclude '**/external/**'" src: "point_transformer_v3" version: "~= 24.0" - # NOTE: Enable this when/if we have C++ code - # test-cpp-clang-format-lint: - # name: Check C++ code style with clang-format - # runs-on: ubuntu-latest - # steps: - # - uses: actions/checkout@v4 - # - uses: DoozyX/clang-format-lint-action@v0.18.2 - # with: - # extensions: 'h,cpp,cc,cu,cuh' - # clangFormatVersion: 18 - # style: file + test-cpp-clang-format-lint: + name: Check C++ code style with clang-format + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: DoozyX/clang-format-lint-action@v0.18.2 + with: + extensions: 'h,cpp,cc,cu,cuh' + clangFormatVersion: 18 + style: file + exclude: '**/external/**' - # include-guards: - # name: Check include guards - # runs-on: ubuntu-latest - # steps: - # - uses: actions/checkout@v4 - # - uses: swahtz/include-guards-check-action@master + include-guards: + name: Check include guards + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: swahtz/include-guards-check-action@master + with: + exclude: '**/external/**' check-spdx-identifiers: name: Check SPDX identifiers @@ -62,7 +64,7 @@ jobs: - name: test run: | set +e - git grep -n -I -E '^.+[ ]+$' -- point_transformer_v3 ':!point_transformer_v3/external/pointcept' ':!*.wlt' + git grep -n -I -E '^.+[ ]+$' -- point_transformer_v3 ':!**/external/**' ':!*.wlt' test $? -eq 1 # Search for any tabs excluding meeting notes, image files and a few others @@ -74,5 +76,5 @@ jobs: - name: test run: | set +e - git grep -n -I " " -- point_transformer_v3 ':!point_transformer_v3/external/pointcept' ':!*/codestyle.yml' ':!*.svg' ':!*.cmd' ':!*.png' ':!*.wlt' ':!*.jpg' ':!*.gif' ':!*.mp4' ':!*.pt' ':!*.pth' ':!*.nvdb' ':!*.npz' ':!*.gitmodules' + git grep -n -I " " -- point_transformer_v3 ':!**/external/**' ':!*/codestyle.yml' ':!*.svg' ':!*.cmd' ':!*.png' ':!*.wlt' ':!*.jpg' ':!*.gif' ':!*.mp4' ':!*.pt' ':!*.pth' ':!*.nvdb' ':!*.npz' ':!*.gitmodules' test $? -eq 1 diff --git a/point_transformer_v3/fvdb_extensions/models/ptv3_fvdb.py b/point_transformer_v3/fvdb_extensions/models/ptv3_fvdb.py index 16ed724..f19a29c 100644 --- a/point_transformer_v3/fvdb_extensions/models/ptv3_fvdb.py +++ b/point_transformer_v3/fvdb_extensions/models/ptv3_fvdb.py @@ -78,7 +78,7 @@ def __init__( elif embedding_mode == "conv5x5": ## Implementation Option 1: Cascaded 3x3 convolutions # This approach uses two 3x3 convs to achieve a 5x5 receptive field with fewer parameters - # Parameters: (27 Γ— in_channels Γ— embed_channels) + (27 Γ— embed_channelsΒ²) + # Parameters: (27 x in_channels x embed_channels) + (27 x embed_channels^2) self.embed_conv3x3_1 = fvdb.nn.SparseConv3d( in_channels, embed_channels, kernel_size=3, stride=1, bias=False ) @@ -88,7 +88,7 @@ def __init__( ## Implementation Option 2: Direct 5x5 convolution # TODO: Implementation pending - requires additional sparse convolution support from fVDB-core. - # Expected parameters: 125 Γ— in_channels Γ— embed_channels + # Expected parameters: 125 x in_channels x embed_channels # self.embed_conv5x5_1 = fvdb.nn.SparseConv3d(in_channels, embed_channels, kernel_size=5, stride=1) else: raise ValueError(f"Unsupported embedding mode: {embedding_mode}") diff --git a/point_transformer_v3/scripts/apply_formatting.py b/point_transformer_v3/scripts/apply_formatting.py index 32bb461..e8130a5 100755 --- a/point_transformer_v3/scripts/apply_formatting.py +++ b/point_transformer_v3/scripts/apply_formatting.py @@ -53,13 +53,13 @@ def main(): try: result = subprocess.run(cmd, check=True) - print("\nβœ“ Formatting applied successfully!") + print("\n[OK] Formatting applied successfully!") return 0 except subprocess.CalledProcessError as e: - print(f"\nβœ— Formatting failed with exit code {e.returncode}") + print(f"\n[FAIL] Formatting failed with exit code {e.returncode}") return e.returncode except FileNotFoundError: - print("\nβœ— Error: black not found. Please install it:") + print("\n[FAIL] Error: black not found. Please install it:") print(" pip install black~=24.0") return 1 From 0a3dc5e5a293847d1eaa65f6cd0c81c3e1fe61b7 Mon Sep 17 00:00:00 2001 From: Christopher Horvath Date: Wed, 19 Nov 2025 11:30:07 -0800 Subject: [PATCH 7/7] Fixed workflows to resolve errors in CI Signed-off-by: Christopher Horvath --- .github/workflows/codestyle.yml | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml index 8497c4f..924a433 100644 --- a/.github/workflows/codestyle.yml +++ b/.github/workflows/codestyle.yml @@ -20,7 +20,7 @@ jobs: - uses: actions/checkout@v4 - uses: psf/black@stable with: - options: "--check --verbose --target-version=py311 --line-length=120 --extend-exclude '**/external/**'" + options: "--check --verbose --target-version=py311 --line-length=120 --extend-exclude '.*/external/.*'" src: "point_transformer_v3" version: "~= 24.0" @@ -29,21 +29,27 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: DoozyX/clang-format-lint-action@v0.18.2 - with: - extensions: 'h,cpp,cc,cu,cuh' - clangFormatVersion: 18 - style: file - exclude: '**/external/**' + # TODO: Re-enable when C++ files are present + # - uses: DoozyX/clang-format-lint-action@v0.18.2 + # with: + # extensions: 'h,cpp,cc,cu,cuh' + # clangFormatVersion: 18 + # style: file + # exclude: '**/external/**' + - name: Placeholder - always succeed + run: echo "clang-format check placeholder - always succeeds" include-guards: name: Check include guards runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: swahtz/include-guards-check-action@master - with: - exclude: '**/external/**' + # TODO: Re-enable when C++ header files are present + # - uses: swahtz/include-guards-check-action@master + # with: + # exclude: '**/external/**' + - name: Placeholder - always succeed + run: echo "include-guards check placeholder - always succeeds" check-spdx-identifiers: name: Check SPDX identifiers