Skip to content

Commit 236be7a

Browse files
committed
fixed stupid mistake w negative sampling
1 parent d43616c commit 236be7a

21 files changed

Lines changed: 1884 additions & 272 deletions

conda-recipe/build.sh

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/bin/bash
2+
3+
# Install the package using pip (no dependencies - conda handles those)
4+
$PYTHON -m pip install . --no-deps -vv
5+
6+
# Note: PyTorch Geometric should be installed separately by users
7+
# via: pip install torch-geometric
8+
# We don't install it here to reduce package size and avoid ZIP64 issues
9+
10+
# Copy external tools if needed
11+
mkdir -p $PREFIX/bin
12+
mkdir -p $PREFIX/share/foldtree2
13+
14+
# Copy bundled tools (raxml-ng, mad, mafft utilities)
15+
if [ -d "raxml-ng" ]; then
16+
cp -r raxml-ng/* $PREFIX/share/foldtree2/
17+
if [ -f "$PREFIX/share/foldtree2/raxml-ng" ]; then
18+
chmod +x $PREFIX/share/foldtree2/raxml-ng
19+
ln -sf $PREFIX/share/foldtree2/raxml-ng $PREFIX/bin/raxml-ng
20+
fi
21+
fi
22+
23+
if [ -d "madroot" ]; then
24+
cp -r madroot/* $PREFIX/share/foldtree2/
25+
if [ -f "$PREFIX/share/foldtree2/mad" ]; then
26+
chmod +x $PREFIX/share/foldtree2/mad
27+
ln -sf $PREFIX/share/foldtree2/mad $PREFIX/bin/mad
28+
fi
29+
fi
30+
31+
if [ -d "mafft_tools" ]; then
32+
cp -r mafft_tools/* $PREFIX/share/foldtree2/
33+
if [ -f "$PREFIX/share/foldtree2/hex2maffttext" ]; then
34+
chmod +x $PREFIX/share/foldtree2/hex2maffttext
35+
ln -sf $PREFIX/share/foldtree2/hex2maffttext $PREFIX/bin/hex2maffttext
36+
fi
37+
if [ -f "$PREFIX/share/foldtree2/maffttext2hex" ]; then
38+
chmod +x $PREFIX/share/foldtree2/maffttext2hex
39+
ln -sf $PREFIX/share/foldtree2/maffttext2hex $PREFIX/bin/maffttext2hex
40+
fi
41+
fi
42+
43+
# Copy configuration files
44+
if [ -d "foldtree2/config" ]; then
45+
cp -r foldtree2/config $PREFIX/share/foldtree2/
46+
fi
47+
48+
# Skip copying pretrained models to reduce package size
49+
# Users should download models separately from GitHub releases
50+
# echo "Note: Pretrained models not included in conda package to reduce size"
51+
# echo "Download models from: https://github.com/DessimozLab/foldtree2/releases"

conda-recipe/meta.yaml

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
{% set name = "foldtree2" %}
2+
{% set version = "0.1.0" %}
3+
4+
package:
5+
name: {{ name|lower }}
6+
version: {{ version }}
7+
8+
source:
9+
path: ..
10+
11+
build:
12+
number: 0
13+
noarch: python
14+
entry_points:
15+
- foldtree2 = foldtree2.ft2treebuilder:main
16+
- ft2treebuilder = foldtree2.ft2treebuilder:main
17+
- pdbs-to-graphs = foldtree2.encode_pdbs:main
18+
- makesubmat = foldtree2.makesubmat:main
19+
20+
requirements:
21+
host:
22+
- python >=3.8
23+
- pip
24+
- setuptools
25+
- wheel
26+
run:
27+
- python >=3.8
28+
# Core scientific computing
29+
- numpy >=1.21.0
30+
- pandas >=1.3.0
31+
- scipy >=1.7.0
32+
- matplotlib-base >=3.4.0
33+
34+
# PyTorch (PyTorch Geometric will be pip installed)
35+
- pytorch >=2.0.0
36+
- torchvision
37+
38+
# Bioinformatics tools
39+
- biopython >=1.79
40+
- prody >=2.0.0
41+
42+
# Deep learning utilities
43+
- pytorch-lightning >=2.0.0
44+
- tensorboard >=2.10.0
45+
- einops >=0.6.0
46+
47+
# Phylogenetics
48+
- ete3 >=3.1.2
49+
# raxml-ng and mafft are bundled as external tools in the package
50+
51+
# Utilities
52+
- colour >=0.1.5
53+
- datasketch >=1.5.0
54+
- pebble >=5.0.0
55+
- tqdm >=4.62.0
56+
- h5py >=3.6.0
57+
- pyyaml >=5.4.0
58+
- pip # for installing torch-geometric and pydssp via pip
59+
60+
test:
61+
imports:
62+
- foldtree2
63+
- foldtree2.src.encoder
64+
- foldtree2.src.mono_decoders
65+
- foldtree2.src.pdbgraph
66+
commands:
67+
- foldtree2 --about
68+
- pdbs-to-graphs --help
69+
- makesubmat --help
70+
71+
about:
72+
home: https://github.com/DessimozLab/foldtree2
73+
license: MIT
74+
license_file: LICENSE.txt
75+
summary: 'Maximum Likelihood Phylogenetic Tree Inference from Protein Structures'
76+
description: |
77+
FoldTree2 performs phylogenetic tree inference from protein 3D structures
78+
using neural network-based structural encoding. The core workflow:
79+
PDB → Graph → Encoder → Discrete Alphabet → Substitution Matrix → Tree Inference.
80+
dev_url: https://github.com/DessimozLab/foldtree2
81+
doc_url: https://github.com/DessimozLab/foldtree2/blob/main/README.md
82+
83+
extra:
84+
recipe-maintainers:
85+
- dmoi

conda-recipe/post-install.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
# Post-install script to install packages only available via pip
3+
4+
echo "Installing PyTorch Geometric and pydssp via pip..."
5+
$PYTHON -m pip install --no-deps torch-geometric pydssp transformers
6+
echo "Post-install complete!"

conda-recipe/post-link.sh

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/bin/bash
2+
3+
cat << 'EOF'
4+
5+
╔═════════════════════════════════════════════════════════════╗
6+
║ FoldTree2 Installation Complete! ║
7+
╚═════════════════════════════════════════════════════════════╝
8+
9+
⚠️ IMPORTANT: Additional setup required
10+
11+
1️⃣ Install PyTorch Geometric (required for graph neural networks):
12+
13+
pip install torch-geometric
14+
15+
2️⃣ Download pretrained models (if needed):
16+
17+
Visit: https://github.com/DessimozLab/foldtree2/releases
18+
19+
Or train your own models using:
20+
python foldtree2/learn_lightning.py --config config.yaml
21+
22+
3️⃣ Test your installation:
23+
24+
foldtree2 --about
25+
pdbs-to-graphs --help
26+
27+
📚 Documentation: https://github.com/DessimozLab/foldtree2
28+
🐛 Issues: https://github.com/DessimozLab/foldtree2/issues
29+
30+
EOF

conda-recipe/run_test.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#!/usr/bin/env python
2+
"""
3+
Test script for FoldTree2 conda package
4+
"""
5+
import sys
6+
import subprocess
7+
8+
def test_imports():
9+
"""Test that all core modules can be imported"""
10+
print("Testing module imports...")
11+
try:
12+
import foldtree2
13+
print(" ✓ foldtree2")
14+
15+
import foldtree2.src.encoder
16+
print(" ✓ foldtree2.src.encoder")
17+
18+
import foldtree2.src.mono_decoders
19+
print(" ✓ foldtree2.src.mono_decoders")
20+
21+
import foldtree2.src.pdbgraph
22+
print(" ✓ foldtree2.src.pdbgraph")
23+
24+
print("✓ All core modules imported successfully\n")
25+
return True
26+
except ImportError as e:
27+
print(f"✗ Import failed: {e}\n")
28+
return False
29+
30+
def test_entry_points():
31+
"""Test that entry points are available"""
32+
print("Testing command-line entry points...")
33+
34+
commands = [
35+
['foldtree2', '--about'],
36+
['pdbs-to-graphs', '--help'],
37+
['makesubmat', '--help']
38+
]
39+
40+
for cmd in commands:
41+
try:
42+
result = subprocess.run(
43+
cmd,
44+
capture_output=True,
45+
timeout=10,
46+
text=True
47+
)
48+
if result.returncode == 0:
49+
print(f" ✓ Command '{' '.join(cmd)}' works")
50+
else:
51+
print(f" ✗ Command '{' '.join(cmd)}' failed with return code {result.returncode}")
52+
if result.stderr:
53+
print(f" Error: {result.stderr[:200]}")
54+
return False
55+
except subprocess.TimeoutExpired:
56+
print(f" ✗ Command '{' '.join(cmd)}' timed out")
57+
return False
58+
except FileNotFoundError:
59+
print(f" ✗ Command '{' '.join(cmd)}' not found")
60+
return False
61+
except Exception as e:
62+
print(f" ✗ Command '{' '.join(cmd)}' error: {e}")
63+
return False
64+
65+
print("✓ All entry points work correctly\n")
66+
return True
67+
68+
def test_dependencies():
69+
"""Test that key dependencies are available"""
70+
print("Testing key dependencies...")
71+
72+
deps = [
73+
'torch',
74+
'torch_geometric',
75+
'Bio',
76+
'pytorch_lightning',
77+
'numpy',
78+
'pandas'
79+
]
80+
81+
for dep in deps:
82+
try:
83+
__import__(dep)
84+
print(f" ✓ {dep}")
85+
except ImportError:
86+
print(f" ✗ {dep} not found")
87+
return False
88+
89+
print("✓ All key dependencies available\n")
90+
return True
91+
92+
if __name__ == '__main__':
93+
print("="*60)
94+
print("FoldTree2 Conda Package Test Suite")
95+
print("="*60 + "\n")
96+
97+
success = (
98+
test_imports() and
99+
test_dependencies() and
100+
test_entry_points()
101+
)
102+
103+
if success:
104+
print("="*60)
105+
print("✓ ALL TESTS PASSED")
106+
print("="*60)
107+
sys.exit(0)
108+
else:
109+
print("="*60)
110+
print("✗ SOME TESTS FAILED")
111+
print("="*60)
112+
sys.exit(1)

config_all_three_decoders.yaml

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# FoldTree2 Configuration: All Three Decoders (Sequence + Geometry + FoldX)
2+
# This config trains a model with:
3+
# - Sequence Transformer decoder (amino acid prediction + SS)
4+
# - Geometry CNN decoder (contacts, angles, secondary structure)
5+
# - FoldX decoder (energy prediction)
6+
7+
# Model Architecture
8+
model_name: "ft2_all_decoders"
9+
output_dir: "./models/all_three_decoders_run/"
10+
run_name: "all_decoders_seq_geo_foldx"
11+
12+
# Dataset
13+
dataset: "structs_train_final.h5"
14+
data_dir: "../../datasets/foldtree2/"
15+
aapropcsv: "config/aaindex1.csv"
16+
17+
# Training Parameters
18+
epochs: 100
19+
batch_size: 8
20+
learning_rate: 0.0001
21+
seed: 42
22+
23+
# Model Dimensions
24+
hidden_size: 200
25+
num_embeddings: 40
26+
embedding_dim: 128
27+
28+
# Encoder Configuration
29+
EMA: true
30+
use_commitment_scheduling: true
31+
commitment_cost: 0.9
32+
commitment_start: 0.1
33+
commitment_schedule: "cosine"
34+
commitment_warmup_steps: 5000
35+
36+
# Decoder Outputs
37+
output_fft: false
38+
output_rt: false
39+
output_foldx: true # Enable FoldX decoder
40+
hetero_gae: false
41+
42+
# Optimization
43+
clip_grad: true
44+
gradient_accumulation_steps: 2
45+
mixed_precision: true
46+
tensor_core_precision: "high"
47+
48+
# Learning Rate Schedule
49+
lr_schedule: "cosine"
50+
lr_warmup_ratio: 0.1
51+
lr_min: 0.000001
52+
53+
# Loss Weights - Balanced for all three decoders
54+
edge_weight: 0.25 # Geometry: contact reconstruction
55+
logit_weight: 0.25 # Geometry: edge logits
56+
x_weight: 1.0 # Sequence: amino acid prediction
57+
vq_weight: 0.1 # VQ-VAE commitment
58+
angles_weight: 0.05 # Geometry: backbone angles
59+
ss_weight: 0.25 # Sequence/Geometry: secondary structure
60+
fft2_weight: 0.01 # Optional: FFT features
61+
62+
# pLDDT Masking (for low-confidence regions)
63+
mask_plddt: true
64+
plddt_threshold: 0.3
65+
66+
# Logging
67+
tensorboard_dir: "./runs/"
68+
69+
# GPU Settings (for single GPU training)
70+
# For multi-GPU, use learn_lightning.py instead
71+
device: null # auto-select

0 commit comments

Comments
 (0)