File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -162,7 +162,7 @@ rule annotate_bakta:
162162 """--- Running BAKTA annotation for sample {wildcards.sample} ---"""
163163 params :
164164 prefix = lambda wc : wc .sample ,
165- locustag = lambda wc : samples .loc [wc .sample ]["id_prefix" ],
165+ locustag = lambda wc : format_bakta_locustag ( samples .loc [wc .sample ]["id_prefix" ]) ,
166166 species = lambda wc : samples .loc [wc .sample ]["species" ],
167167 strain = lambda wc : samples .loc [wc .sample ]["strain" ],
168168 outdir = lambda wc , output : os .path .dirname (output [0 ]),
Original file line number Diff line number Diff line change 11# import basic packages
22import pandas as pd
3+ import re
4+ from snakemake import logging
35from snakemake .utils import validate
46
57
@@ -62,3 +64,27 @@ def get_final_input(wildcards):
6264 tool = config ["tool" ],
6365 )
6466 return inputs
67+
68+
69+ # -----------------------------------------------------
70+ # helper functions
71+ # -----------------------------------------------------
72+ def format_bakta_locustag (raw ):
73+ """Format locustag for BAKTA annotation."""
74+ tag = str (raw )
75+ # uppercase for BAKTA
76+ tag_up = tag .upper ()
77+ # keep only A-Z0-9
78+ cleaned = re .sub (r"[^A-Z0-9]" , "" , tag_up )
79+ if len (cleaned ) < 3 or len (cleaned ) > 12 :
80+ raise ValueError (
81+ f"locustag '{ raw } ' -> '{ cleaned } ' must contain between 3-12 alphanumeric uppercase characters\n "
82+ )
83+ if not re .match (r"^[A-Z]" , cleaned ):
84+ raise ValueError (f"locustag '{ raw } ' -> '{ cleaned } ' must start with a letter" )
85+ # warn if cleaned tag is different from original
86+ if cleaned != tag :
87+ logger .warning (
88+ f"\n locustag '{ raw } ' converted to '{ cleaned } ' to meet BAKTA requirements (between 3 and 12 alphanumeric uppercase characters, start with a letter)\n "
89+ )
90+ return cleaned
You can’t perform that action at this time.
0 commit comments