@@ -77,6 +77,10 @@ def get_args():
7777 default = "./" , help = 'subfolder for the plots' )
7878 argument_parser .add_argument ('--out_format' , metavar = 'STR' , type = str ,
7979 default = 'pdf' , required = False , help = 'Output format of the plot, [default = %(default)s]' )
80+ argument_parser .add_argument ('--ref_base' , metavar = 'STR' , type = str ,
81+ default = '' , required = False , help = 'Reference base for the variant entry, [default = %(default)s]' )
82+ argument_parser .add_argument ('--alt_base' , metavar = 'STR' , type = str ,
83+ default = '' , required = False , help = 'Alternate base for the variant entry, [default = %(default)s]' )
8084
8185
8286 parsed_arguments = argument_parser .parse_args ()
@@ -286,7 +290,7 @@ def parse_cigar( cigar, pos, r_left, r_right):
286290
287291 # Iterate over the cigar structure
288292 for n ,t in cigar_struct :
289-
293+
290294 # If the cigar is a match or a soft clipped, then add the position to the list
291295 if t in ['M' ,'S' ]:
292296
@@ -315,7 +319,7 @@ def parse_cigar( cigar, pos, r_left, r_right):
315319
316320 rel_pos += n
317321
318- elif t == "N" :
322+ elif t == "N" :
319323 for m in range (n ):
320324 if r_left <= abs_pos + m <= r_right :
321325 cigar_struct2 .append ((t , abs_pos + m , rel_pos ))
@@ -390,14 +394,52 @@ def plot_histogram( cigars, ax, ax_id):
390394
391395
392396
393- def plot_cigars ( cigars , sequences , reverses , ax , reference_function ):
394-
397+ def plot_cigars (cigars , sequences , reverses , ax , reference_function , region_center , region_chrom ):
395398 patches = []
396399
397400 right_limits = [0 ] * len (cigars )
398401 basepair_colors = { 'A' :"#009600" , 'C' :"#3030fe" , 'G' :"#d17105" , 'T' :"#ff0000" , 'N' :"#00ffff" }
399402
400- for cigar ,sequence ,reverse in zip (cigars ,sequences ,reverses ):
403+ # Sort cigars based on non-reference variants at the region center
404+ # The cigar list has the following structure: [ (cigar_type, absolute_position, relative_position), ... ]
405+ # The cigar_type can be 'M' (match), 'I' (insertion), 'D' (deletion), 'S' (soft clip), 'N' (skipped region)
406+ # The absolute_position is the position in the reference genome
407+ # The relative_position is the position in the read sequence
408+ # The cigar list is sorted based on the non-reference variants at the region center
409+ # The region center is the position of the variant in the reference genome
410+ # The reference_function is the reference genome sequence
411+ # Now separated reads based on the non-reference variants at the region center to group similar reads together
412+ # Create two lists: one for the reads that have the non-reference variant at the region center and one for the reads that do not have the non-reference variant at the region center
413+
414+ cigar_with_variants = []
415+ cigar_without_variants = []
416+ seq_order_with_variants = []
417+ seq_order_without_variants = []
418+
419+ for i , (read_cigar , sequence ) in enumerate (zip (cigars , sequences )):
420+ # Check if the region center is in the read
421+ for position_cigar in read_cigar :
422+ c_type , abs_pos , rel_pos = position_cigar
423+ if abs_pos == region_center :
424+ if (c_type == 'M' and sequence [rel_pos ] != reference_function [abs_pos ]) and sequence [rel_pos ] == parsed_arguments .alt_base :
425+ cigar_with_variants .append (read_cigar )
426+ seq_order_with_variants .append (i )
427+ break
428+ elif abs_pos == (region_center + 1 ) and (len (parsed_arguments .alt_base ) > 1 or len (parsed_arguments .ref_base ) > 1 ):
429+ if c_type == 'I' or c_type == 'D' :
430+ cigar_with_variants .append (read_cigar )
431+ seq_order_with_variants .append (i )
432+ break
433+ else :
434+ cigar_without_variants .append (read_cigar )
435+ seq_order_without_variants .append (i )
436+
437+
438+ # Sort the reads with the non-reference variant at the region center based on the non-reference variant
439+ cigars_sorted = cigar_with_variants + cigar_without_variants
440+ sequences_sorted = [sequences [i ] for i in seq_order_with_variants ] + [sequences [i ] for i in seq_order_without_variants ]
441+
442+ for cigar , sequence , reverse in zip (cigars_sorted , sequences_sorted , reverses ):
401443
402444 for j in range (len (cigars )):
403445 if right_limits [j ] <= cigar [0 ][1 ]:
@@ -531,7 +573,7 @@ def plot_region(region_chrom, region_center, region_left, region_right, plot_tit
531573 plot_cigars ([parse_cigar (read [5 ], int (read [3 ]), region_left , region_right ) for read in samtools_read_plot if read [5 ] != "*" ],
532574 [read [9 ] for read in samtools_read_plot if read [5 ] != "*" ],
533575 [bool (int (read [1 ]) & 0x10 ) for read in samtools_read_plot if read [5 ] != "*" ],
534- ax [idx * 2 + 1 ], reference_buffer )
576+ ax [idx * 2 + 1 ], reference_buffer , region_center , region_chrom )
535577
536578 plot_title_vaf = plot_title
537579 if parsed_arguments .vaf :
@@ -623,9 +665,9 @@ def main():
623665 region_left = region_center - parsed_arguments .window
624666 region_right = region_center + parsed_arguments .window
625667
626- plot_title = "%s:%s" % ( region_chrom , region_center )
668+ plot_title = "%s:%s-%s>%s " % ( region_chrom , region_center , parsed_arguments . ref_base , parsed_arguments . alt_base )
627669
628- print (region_chrom , region_left , region_center , region_right )
670+ # print(region_chrom, region_left, region_center, region_right)
629671
630672 plot_region ( region_chrom , region_center , region_left , region_right , plot_title )
631673
@@ -670,7 +712,7 @@ def main():
670712 line .split ('\t ' )[ vcf_columns ["Tumor_VAF" ] ],
671713 line .split ('\t ' )[ vcf_columns ["RNA_VARIANT_AF" ] ],
672714 ]
673-
715+
674716 plot_region ( region_chrom , region_center , region_left , region_right , plot_title , VAFs )
675717 plot_output_file = os .path .join (parsed_arguments .plot_dir , ("%s_%s_%i.%s" % (parsed_arguments .prefix , region_chrom , region_center , parsed_arguments .out_format )))
676718 plot .savefig ( plot_output_file )
0 commit comments