DessimozLab
diff --git a/‎foldtree2/notebooks/experiments/dd.ipynb‎
Lines changed: 0 additions & 2732 deletions b/‎foldtree2/notebooks/experiments/dd.ipynb‎
Lines changed: 0 additions & 2732 deletions
diff --git a/‎foldtree2/notebooks/experiments/test_monodecoders.ipynb‎
Lines changed: 698 additions & 319 deletions b/‎foldtree2/notebooks/experiments/test_monodecoders.ipynb‎
Lines changed: 698 additions & 319 deletions
diff --git a/‎foldtree2/notebooks/monomer_graph_trainingdata.ipynb‎
Lines changed: 73 additions & 72 deletions b/‎foldtree2/notebooks/monomer_graph_trainingdata.ipynb‎
Lines changed: 73 additions & 72 deletions
diff --git a/‎foldtree2/src/encoder.py‎
Lines changed: 16 additions & 3 deletions b/‎foldtree2/src/encoder.py‎
Lines changed: 16 additions & 3 deletions
diff --git a/‎foldtree2/src/losses/fape.py‎
Lines changed: 72 additions & 87 deletions b/‎foldtree2/src/losses/fape.py‎
Lines changed: 72 additions & 87 deletions
diff --git a/‎foldtree2/src/losses/losses.py‎
Lines changed: 4 additions & 9 deletions b/‎foldtree2/src/losses/losses.py‎
Lines changed: 4 additions & 9 deletions
diff --git a/‎foldtree2/src/mono_decoders.py‎
Lines changed: 6 additions & 7 deletions b/‎foldtree2/src/mono_decoders.py‎
Lines changed: 6 additions & 7 deletions
@@ -284,9 +284,22 @@ def forward(self, data, edge_attr_dict=None, **kwargs):
 		for i, convs in enumerate(self.body['convs']):
 			# Apply graph convolutions and average over all edge types
 			if edge_attr_dict is not None:
-				x_list = [conv(x, edge_index=edge_index_dict[tuple(edge_type.split('_'))], 
-							  edge_attr = edge_attr_dict[tuple(edge_type.split('_') )] ) 
-						 for edge_type, conv in convs.items()]
+				x_list = []
+				for edge_type, conv in convs.items():
+					edge_key = tuple(edge_type.split('_'))
+					edge_attr = edge_attr_dict[edge_key]
+
+					# Normalize edge attributes to [num_edges, edge_dim] for TransformerConv.
+					if edge_attr is not None and edge_attr.dim() == 1:
+						edge_attr = edge_attr.unsqueeze(-1)
+					if edge_attr is not None and edge_attr.size(-1) != self.edge_dim:
+						if edge_attr.size(-1) > self.edge_dim:
+							edge_attr = edge_attr[:, :self.edge_dim]
+						else:
+							pad_cols = self.edge_dim - edge_attr.size(-1)
+							edge_attr = F.pad(edge_attr, (0, pad_cols))
+
+					x_list.append(conv(x, edge_index=edge_index_dict[edge_key], edge_attr=edge_attr))
 			else:
 				x_list = [conv(x, edge_index=edge_index_dict[tuple(edge_type.split('_'))]) 
 						 for edge_type, conv in convs.items()]
 
@@ -334,40 +334,23 @@ def quaternion_to_rotation_matrix(quat):
 
 def compute_chain_positions(quaternions, translations, reference_coords=None):
 	"""
-	Apply rotation (quaternion) and translation to a set of 3D reference coordinates using PyTorch.
+	Build chain coordinates from quaternion + translation predictions.
 	
 	Parameters:
 	- quaternions: (N, 4) tensor of quaternions (w, x, y, z) - scalar first
-	- translations: (N, 3) tensor of translations (tx, ty, tz)
+	- translations: (N, 3) tensor of per-step translations
 	- reference_coords: (M, 3) tensor of reference points (default is [[0, 0, 0]])
 	
 	Returns:
-	- transformed_coords: (N, 3) tensor of transformed coordinates
+	- transformed_coords: (N, 3) tensor of reconstructed coordinates
 	"""
-	device = quaternions.device
-	quaternions = quaternions / quaternions.norm(dim=-1, keepdim=True)  # Normalize quaternions
-	
-	if reference_coords is None:
-		reference_coords = torch.zeros(1, 3, device=device)
-	
-	N = quaternions.shape[0]
-	
-	w, x, y, z = quaternions.unbind(-1)
-	
-	# Rotation matrix components
-	R = torch.stack([
-		1 - 2*(y**2 + z**2), 2*(x*y - z*w),     2*(x*z + y*w),
-		2*(x*y + z*w),     1 - 2*(x**2 + z**2), 2*(y*z - x*w),
-		2*(x*z - y*w),     2*(y*z + x*w),     1 - 2*(x**2 + y**2)
-	], dim=-1).reshape(N, 3, 3)
-	
-	# Apply rotation to reference coordinates (take first point if multiple)
-	rotated = torch.matmul(reference_coords[0:1], R.transpose(1,2)).squeeze(0)  # (N, 3)
-	
-	# Apply translation
-	transformed_coords = rotated + translations
-	
-	return transformed_coords
+	if reference_coords is not None:
+		# Legacy argument kept for API compatibility. Chain reconstruction does not
+		# use an external reference point.
+		pass
+	R = quaternion_to_rotation_matrix(quaternions)
+	# For generic RT chain predictions, translations are interpreted in local frame.
+	return reconstruct_positions(R, translations, translation_frame='local', include_origin=False)
 
 
 def compute_chain_positions_rotmat(rotations, translations):
@@ -381,37 +364,14 @@ def compute_chain_positions_rotmat(rotations, translations):
 	Returns:
 		Tensor: Shape (*, N, 3) global coordinates for each position
 	"""
-	# Handle batched or unbatched input
-	orig_shape = rotations.shape[:-2]
 	if rotations.ndim == 3:
-		rotations = rotations.unsqueeze(0)
-		translations = translations.unsqueeze(0)
+		return reconstruct_positions(rotations, translations, translation_frame='local', include_origin=False)
 
-	batch_size = rotations.shape[0]
-	N = rotations.shape[1]
-	positions = []
-
-	for b in range(batch_size):
-		# Initialize starting position and rotation
-		global_R = torch.eye(3, dtype=rotations.dtype, device=rotations.device)
-		curr_pos = torch.zeros(3, dtype=translations.dtype, device=translations.device)
-		chain_positions = []
-
-		for i in range(N):
-			chain_positions.append(curr_pos.clone())
-			# Update global rotation and position
-			global_R = torch.matmul(global_R, rotations[b, i])
-			curr_pos = curr_pos + torch.matmul(global_R, translations[b, i])
-
-		positions.append(torch.stack(chain_positions))
-
-	positions = torch.stack(positions)
-	
-	# Return to original shape if unbatched input
-	if len(orig_shape) == 1:
-		positions = positions.squeeze(0)
-		
-	return positions
+	# Batched input: process each structure and stack
+	coords = []
+	for b in range(rotations.shape[0]):
+		coords.append(reconstruct_positions(rotations[b], translations[b], translation_frame='local', include_origin=False))
+	return torch.stack(coords, dim=0)
 
 
 
@@ -466,13 +426,14 @@ def transform_rt_to_coordinates(rotations, translations):
 	"""
 	Convert R, t matrices into global 3D coordinates.
 	"""
-	batch_size, num_residues, _ = rotations.shape
-	coords = torch.zeros((batch_size, num_residues, 3), device=rotations.device)
-	for b in range(batch_size):
-		transform = torch.eye(4, device=rotations.device)
-		for i in range(num_residues):
-			pass  # Implementation needed
-	return coords
+	if rotations.ndim == 3:
+		return reconstruct_positions(rotations, translations, translation_frame='local', include_origin=False)
+	if rotations.ndim != 4:
+		raise ValueError(f"Expected rotations ndim 3 or 4, got {rotations.ndim}")
+	coords = []
+	for b in range(rotations.shape[0]):
+		coords.append(reconstruct_positions(rotations[b], translations[b], translation_frame='local', include_origin=False))
+	return torch.stack(coords, dim=0)
 
 
 # ============================================================================
@@ -937,36 +898,60 @@ def rotation_matrix_to_quaternion(rot_matrices):
 	return quat
 
 
-def reconstruct_positions(R, T , batch_idx=None):
+def reconstruct_positions(R, T, batch_idx=None, translation_frame='global', include_origin=True):
 	"""
-	Reconstruct 3D CA positions from CA-to-CA displacement vectors.
+	Reconstruct CA positions from rotations and translations.
 
-	T[i] = CA[i+1] - CA[i] in the global frame, so positions are simply the
-	cumulative sum of translations starting from the origin.  The rotation
-	matrices R are kept as a parameter for API compatibility but are not used
-	here — they represent the *local frame orientation*, not a transform that
-	should be applied to global-frame displacements.
+	Supports two translation conventions:
+	- `global`: T[i] is already in global frame (e.g., CA[i+1] - CA[i]).
+	- `local`:  T[i] is in the current local frame and must be rotated into
+	  global coordinates as the chain is composed.
 
 	Args:
-		R (torch.Tensor): Local rotation matrices, shape (N, 3, 3)  [unused]
-		T (torch.Tensor): CA-to-CA displacement vectors, shape (N, 3)
+		R (torch.Tensor): Rotation matrices, shape (N, 3, 3).
+		T (torch.Tensor): Translation vectors, shape (N, 3).
+		batch_idx (torch.Tensor, optional): Per-residue batch indices (N,).
+		translation_frame (str): 'global' or 'local'.
+		include_origin (bool): If True, prepend origin row for each chain.
 
 	Returns:
-		torch.Tensor: Reconstructed CA positions of shape (N+1, 3), starting
-		              from the origin.
+		torch.Tensor:
+			- Unbatched: (N+1, 3) if include_origin else (N, 3)
+			- Batched: concatenation of per-chain outputs in batch order.
 	"""
-	if batch_idx is not None:
-		# Handle batched input
-		unique_batches = batch_idx.unique()
-		positions = []
-		for b in unique_batches:
-			mask = batch_idx == b
-			T_b = T[mask]
-			origin = torch.zeros(1, 3, dtype=T.dtype, device=T.device)
-			positions.append(torch.cat([origin, torch.cumsum(T_b, dim=0)], dim=0))
-		return torch.cat(positions, dim=0)
-	else:
-		origin = torch.zeros(1, 3, dtype=T.dtype, device=T.device)
-		return torch.cat([origin, torch.cumsum(T, dim=0)], dim=0)
+	if translation_frame not in ('global', 'local'):
+		raise ValueError(f"Unknown translation_frame: {translation_frame}")
+
+	def _reconstruct_single(R_s, T_s):
+		if translation_frame == 'global':
+			pos_no_origin = torch.cumsum(T_s, dim=0)
+		else:
+			# Compose rigid transforms along chain: x_{i+1} = x_i + R_global @ t_i
+			N = T_s.shape[0]
+			curr_pos = torch.zeros(3, dtype=T_s.dtype, device=T_s.device)
+			curr_R = torch.eye(3, dtype=T_s.dtype, device=T_s.device)
+			positions = []
+			for i in range(N):
+				step_global = curr_R @ T_s[i]
+				curr_pos = curr_pos + step_global
+				positions.append(curr_pos.clone())
+				curr_R = curr_R @ R_s[i]
+			pos_no_origin = torch.stack(positions, dim=0) if positions else T_s.new_zeros((0, 3))
+
+		if include_origin:
+			origin = torch.zeros(1, 3, dtype=T_s.dtype, device=T_s.device)
+			return torch.cat([origin, pos_no_origin], dim=0)
+		return pos_no_origin
+
+	if batch_idx is None:
+		return _reconstruct_single(R, T)
+
+	outs = []
+	for b in torch.unique(batch_idx):
+		mask = batch_idx == b
+		outs.append(_reconstruct_single(R[mask], T[mask]))
+	if len(outs) == 0:
+		return torch.zeros((0, 3), dtype=T.dtype, device=T.device)
+	return torch.cat(outs, dim=0)
 
 
@@ -1198,7 +1198,7 @@ def ss_reconstruction_loss(ss, recon_ss, mask_plddt=False, plddt_threshold=0.3 ,
 	return ss_loss
 
 
-def angles_reconstruction_loss(true, pred, beta=0.5 , plddt_mask = None , plddt_thresh = 0.3 , normalize = False , convert_to_radians = True):
+def angles_reconstruction_loss(true, pred, beta=0.5 , plddt_mask = None , plddt_thresh = 0.3 , normalize = False):
 	"""Compute backbone dihedral angle reconstruction loss with circular distance.
 	
 	This loss trains the decoder to predict protein backbone torsion angles (phi, psi, omega)
@@ -1237,19 +1237,14 @@ def angles_reconstruction_loss(true, pred, beta=0.5 , plddt_mask = None , plddt_
 		... )
 	
 	Note:
-		Angles are computed from PDB coordinates during preprocessing using
-		BioPython's calc_dihedral function. They represent protein backbone geometry.
-		Circular distance is essential because 179° and -179° are actually very close!
+		Angles are expected in radians throughout the training path.
+		Circular distance is essential because angles near +π and -π are actually very close.
 	
 	Reference:
 		Smooth L1 (Huber) loss: Girshick, R. (2015). Fast R-CNN. ICCV.
 	"""
 	# Compute circular angular difference in [-π, π]
-	# atan2 correctly handles the wrap-around at ±180°
-
-	if convert_to_radians:
-		true = true * (torch.pi / 180.0)
-		pred = pred * (torch.pi / 180.0)
+	# atan2 correctly handles the wrap-around at ±pi
 
 	delta = torch.atan2(torch.sin(pred - true), torch.cos(pred - true))
 
 
@@ -274,8 +274,7 @@ def forward(self, data , contact_pred_index, **kwargs):
 
 		if self.angles_mlp is not None:
 			angles = self.angles_mlp( z )
-			#tanh is -1 to 1, multiply by 180 to get angles in degrees
-			angles = angles * 180  # Scale from [-1, 1] to [-180, 180]
+			angles = angles * torch.pi  # Scale from [-1, 1] to [-pi, pi]
 
 		if contact_pred_index is None:
 			return { 'edge_probs': None , 'zgodnode' :None , 'fft2pred':fft2_pred , 'rt_pred': None , 'angles': angles  , 'edge_logits': edge_logits  , 'ss_pred': ss_pred , 'z': z  }
@@ -581,7 +580,7 @@ def forward(self, data, contact_pred_index, **kwargs):
 		angles = None
 		if 'angles_mlp' in self.head:
 			angles = self.head['angles_mlp'](z)
-			angles = angles * 180  # Scale from [-1, 1] to [-180, 180]
+			angles = angles * torch.pi  # Scale from [-1, 1] to [-pi, pi]
 
 		# Contact prediction
 		edge_logits = None
@@ -1350,7 +1349,7 @@ def forward(self, data, contact_pred_index=None, **kwargs):
 					if self.output_angles and 'angles_cnn' in self.head:
 						angles_out = self.head['angles_cnn'](xi_cnn)  # (1, 3, seq_len)
 						angles_out = angles_out.permute(2, 0, 1).squeeze(1)  # (seq_len, 3)
-						angles_out = angles_out * 180  # Scale from [-1, 1] to [-180, 180]
+						angles_out = angles_out * torch.pi  # Scale from [-1, 1] to [-pi, pi]
 						angles_list.append(angles_out)
 				else:
 					# DNN decoder path
@@ -1371,7 +1370,7 @@ def forward(self, data, contact_pred_index=None, **kwargs):
 				ss_pred = torch.cat(ss_list, dim=0)
 			if angles_list:
 				angles = torch.cat(angles_list, dim=0)
-				angles = angles * 180  # Scale from [-1, 1] to [-180, 180]
+				angles = angles * torch.pi  # Scale from [-1, 1] to [-pi, pi]
 		else:
 			# Single graph case
 			if use_cnn:
@@ -1393,7 +1392,7 @@ def forward(self, data, contact_pred_index=None, **kwargs):
 				if self.output_angles and 'angles_cnn' in self.head:
 					angles = self.head['angles_cnn'](x_cnn)  # (1, 3, seq_len)
 					angles = angles.permute(2, 0, 1).squeeze(1)  # (seq_len, 3)
-					angles = angles * 180  # Scale from [-1, 1] to [-180, 180]
+					angles = angles * torch.pi  # Scale from [-1, 1] to [-pi, pi]
 			else:
 				# DNN decoder path
 				x = x.squeeze(1)  # (seq_len, d_model)
@@ -1406,7 +1405,7 @@ def forward(self, data, contact_pred_index=None, **kwargs):
 
 				if self.output_angles and 'angles_head' in self.head:
 					angles = self.head['angles_head'](x)
-					angles = angles * 180  # Scale from [-1, 1] to [-180, 180]
+					angles = angles * torch.pi  # Scale from [-1, 1] to [-pi, pi]
 
 		# Normalize quaternion part (first 4 dims) of rt_pred for proper geometry
 		if rt_pred is not None: