Skip to content

Commit 358ca3b

Browse files
author
Luis Figueiredo
committed
remove protein_id qualifier if already present in another CDS
1 parent fce49ec commit 358ca3b

2 files changed

Lines changed: 3 additions & 1 deletion

File tree

deepbgc/util.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def get_protein_features(record):
4848

4949

5050
def get_proteins_by_id(protein_features):
51-
return {get_protein_id(feature): feature for feature in protein_features}
51+
return {protein_id: feature for feature in protein_features for protein_id in get_protein_ids(feature)}
5252

5353

5454
def get_features_of_type(record, feature_type):
@@ -502,6 +502,7 @@ def fix_duplicate_cds(record):
502502

503503
if protein_id != new_protein_id:
504504
logging.warning('Setting new unique_protein_id %s for CDS %s', new_protein_id, protein_id)
505+
del feature.qualifiers['protein_id']
505506
feature.qualifiers['unique_protein_id'] = [new_protein_id]
506507

507508
protein_ids.add(new_protein_id)

test/unit/test_pfam.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ def test_same_protein_id(mock_find_exe, mock_popen, mock_searchio, mock_read_csv
188188
)
189189

190190
expected_record = deepcopy(record)
191+
del expected_record.features[-1].qualifiers['protein_id']
191192
expected_record.features[-1].qualifiers['unique_protein_id'] = 'AAK73500.1_1'
192193
expected_record.features += [
193194
SeqFeature(

0 commit comments

Comments
 (0)