Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Config/configure_PRONTO.ini
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ data_path = /data/sample_data/analysis_results/
encoding_sys = utf-8
;Specify the number of columns you want to do the filtering (NB: this will also make the script to generate the number of output tables):
filter_col_nu = 5
;Specify the number of max rows of the table per slide starting from the 8th slide in report. This is used to split long tables.
table_max_rows_per_slide = 15
;Please modify this for local env if you use MTF files to import the clinical data into meta file. Specify the version of year of the MTF files.
material_file_version = 2025

Expand Down
82 changes: 79 additions & 3 deletions Script/PRONTO.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,80 @@ def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,
return data_nrows


def insert_table_to_ppt_end(table_data_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that the function is similar to insert_table_to_ppt - my first question would be, is it possible to combine them and turn on the max rows per slide specifically when needed instead of having similar business logic in two functions - another option would be to refactor the original function and reuse the blocks in both functions that are reusable. Let me know what you think.

I also feel like there are so many input variables, which makes it hard to see all the different values that go in - would it make sense to maybe group some of them into dicts? It might make sense to define a class and add some helper functions to it?

Finally, the function should have unit tests as we have discussed in the last workshop. Would it be possible to add different test cases to make sure it always works as expected?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have merged the 2 functions into 1 now. Feel free to add comments.

table_file = open(table_data_file)
lines = table_file.readlines()
if not lines:
return
first_line = lines[0]
rows = len(lines)
first_line_cells = first_line.split('\t')
cols = len(table_header)
header_not_exist_in_table = []
for n in range(len(table_header)):
if_exist = False
if(table_header[n] in first_line_cells):
if_exist = True
if not if_exist:
header_not_exist_in_table.append(n)
data_rows = []
for line in lines:
if(line != first_line):
line_cells = line.strip('\t')
if header_not_exist_in_table:
for num in header_not_exist_in_table:
line_cells.insert(num," ")
row_data = [cell.strip() for cell in line.split('\t')]
data_rows.append(row_data)

ppt = Presentation(output_ppt_file)
if(rows <= table_max_rows_per_slide):
total_slides_needed = 1
else:
total_slides_needed = rows // table_max_rows_per_slide + 1

total_rows = len (data_rows)
start_idx = 0
table_page_num = 1
while start_idx < total_rows:
end_idx = min(start_idx + table_max_rows_per_slide, total_rows)
slide_data = data_rows[start_idx:end_idx]
slide = ppt.slides.add_slide(ppt.slide_layouts[6])
shapes = slide.shapes
left = Inches(left_t)
top = Inches(top_t)
width = Inches(width_t)
height = Inches(height_t)
table_rows = len(slide_data) + 1
table = shapes.add_table(table_rows,cols,left,top,width,height).table
for c in range(cols):
if table_column_width:
table.columns[c].width = Inches(table_column_width[c])
table.cell(0,c).text = table_header[c]
table.cell(0,c).text_frame.paragraphs[0].font.size = Pt(font_size)

for row_idx, row_data in enumerate(slide_data, start=1):
for col_idx in range(cols):
table.cell(row_idx,col_idx).text = str(row_data[col_idx])
table.cell(row_idx,col_idx).text_frame.paragraphs[0].font.size = Pt(font_size)

start_idx = end_idx

textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25))
tf = textbox.text_frame
if(if_print_rowNo == True):
tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ", Page " + str(table_page_num) + "/" + str(total_slides_needed) + ")"
else:
tf.paragraphs[0].text = table_name
tf.paragraphs[0].font.size = Pt(8)
tf.paragraphs[0].font.bold = True
tf.paragraphs[0].alignment = PP_ALIGN.CENTER
table_page_num = table_page_num + 1

ppt.save(output_ppt_file)
return total_slides_needed


def update_ppt_variant_summary_table(data_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP_nr,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file):
DNA_summary_file = open(DNA_variant_summary_file)
global str_TMB_DRUP
Expand Down Expand Up @@ -1534,7 +1608,8 @@ def main(argv):
slide8_table_font_size = 7
if_print_rowNo = True
table8_column_width = [0.54, 0.96, 0.96, 0.51, 0.73, 1.12, 2.26, 0.79, 0.81, 0.53]
slide8_table_nrows = insert_table_to_ppt(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width)
table_max_rows_per_slide = int(cfg.get("INPUT", "table_max_rows_per_slide")) - 1
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we substract 1 here? Is it because of the header?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the header will take 1 row for each slide.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we then just supply the correct number directly instead of reading it and changing it immediately? Would that make more sense?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. I will update then.

slide8_table_slides = insert_table_to_ppt_end(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width,table_max_rows_per_slide)

# Insert the CNV_overveiw_plots pictures A2, B3 and C1 into report.
A2_to_extract=[2]
Expand All @@ -1545,12 +1620,13 @@ def main(argv):
B3_C1_to_extract = [4, 5]
pdf_page_image_to_ppt(CNV_overview_plots_pdf,output_ppt_file,B3_C1_to_extract,width_scale=1,height_scale=0.5)

# Change slides order.
# Change slides order.
ppt = Presentation(output_ppt_file)
slide_count = len(ppt.slides)
Comment thread
marrip marked this conversation as resolved.
Outdated
slides = ppt.slides._sldIdLst
slides_list = list(slides)
slides.remove(slides_list[7])
slides.insert(12,slides_list[7])
slides.insert(slide_count + 1,slides_list[7])
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we add 1 here?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Emm, this slide is the last slide in the template file, and need to be still the last slide of the final report. Since it was removed, so it will be added to be as one more slide. I could update the function to make it more easy to understand. :)

ppt.save(output_ppt_file)
print("Generate report for " + DNA_sampleID)
ppt_nr += 1
Expand Down