-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgadi_usage_report_v1.2.pl
More file actions
executable file
·207 lines (185 loc) · 7.51 KB
/
gadi_usage_report_v1.2.pl
File metadata and controls
executable file
·207 lines (185 loc) · 7.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#!/usr/bin/env perl
#
### NOTE
### NCI PBS .o logs slightly changed format in Q1 2026
### This script is a temporary workaround to report on old logs.
#
#------------------------------------------------------------------
# gadi_usage_report_v1.2_pre2026
# Platform: NCI Gadi HPC
#
# Description:
# This script gathers the job requests and usage metrics from Gadi
# PBS log and reports them as TSV. Efficiency/utilisation values are
# reported for CPU using the formula cpu_e = cputime/walltime/cpus_used.
# GPU usage can be optionally reported.
#
# Date last modified: 13/04/26
# Version 1.2 updates:
# - reorder headings to bring VIP details to fore
# - remove clock time, to reduce log complexity
# - updated to match new NCI .o log format omitting 'NCPUs Used' from Q1.2025,
# - will function correctly for 2025- or 2026+ files
# - added optional reporting of gpu metrics with `-g`
# - added usage option to do all logs matching pattern word:
# - reformatted as options (min 1, max 2) rather than 1 positional arg
#
# Options:
# -a <dir> Report on all .o log files in the specified directory
# -l <logfile> Report on one exact logfile
# -p <pattern> Report on .o log files matching a filename pattern
# -g Include GPU metrics
#
# At least one of a, l or p umust be supplied with arguments, -g can be
# optionallay added to any of these 3 options.
#
# Usage examples:
#
# perl gadi_usage_report_v1.2.pl -a /path/to/logdir # all logs in dir
# perl gadi_usage_report_v1.2.pl myjob.o -g # a specific log, report GPU usage
# perl gadi_usage_report_v1.2.pl name # all logs with name including 'name'
#
# Output:
# Tab-delimited summary of the resources requested and used for each job
# will be printed to STDOUT. Use output redirection when executing the
# script to save the data to a text file, eg:
# perl <path/to/script/gadi_usage_report.pl <input> > resources_summary.txt
#
# If you use this script towards a publication, please acknowledge the
# Sydney Informatics Hub (or co-authorship, where appropriate).
#
# Suggested acknowledgement:
# The authors acknowledge the scientific and technical assistance
# <or e.g. bioinformatics assistance of <PERSON>> of Sydney Informatics
# Hub and resources and services from the National Computational
# Infrastructure (NCI), which is supported by the Australian Government
# with access facilitated by the University of Sydney.
#------------------------------------------------------------------
use warnings;
use strict;
use POSIX;
use File::Basename;
use Getopt::Std;
my %opt;
getopts('a:l:p:g', \%opt);
my $dir=`pwd`;
chomp $dir;
my @logs;
my @no_report;
# Check that exactly one of -a, -l, -p was supplied
my $n_modes = 0;
$n_modes++ if defined $opt{a};
$n_modes++ if defined $opt{l};
$n_modes++ if defined $opt{p};
if ($n_modes > 1) {
die "\nERROR: Please supply only one of -a <dir>, -l <logfile>, or -p <pattern>\n\n";
}
if (defined $opt{l}) {
my $logfile = $opt{l};
chomp $logfile;
@logs = (`ls "$logfile"`);
}
elsif (defined $opt{p}) {
my $prefix = $opt{p};
chomp $prefix;
@logs = split(' ', `ls $dir/*$prefix*.o`);
}
elsif (defined $opt{a}) {
my $target_dir = $opt{a};
chomp $target_dir;
print "\n######\nReporting on all usage log files in $target_dir.\n######\n\n";
@logs = split(' ', `ls $target_dir/*.o`);
}
else {
print "\n######\nNo log selection flag specified. Will report on all usage log files in $dir.\n######\n\n";
@logs = split(' ', `ls $dir/*.o`);
}
my $g = 0;
if ($opt{g}) {
$g = 1;
}
my $report={};
if (@logs){
if ($g) {
print "#JobName\tExit_status\tService_units\tCPU_efficiency\tCPUs\tGPU_util\tNGPUS\tMem_req\tMem_used\tGPU_mem_used\tCPUtime_mins\tWalltime_req\tWalltime_mins\tJobFS_req\tJobFS_used\tDate\n";
}
else {
print "#JobName\tExit_status\tService_units\tCPU_efficiency\tCPUs\tMem_req\tMem_used\tCPUtime_mins\tWalltime_req\tWalltime_mins\tJobFS_req\tJobFS_used\tDate\n";
}
foreach my $file (@logs) {
chomp $file;
my @name_fields = split('\/', $file);
my $name=basename($file);
my @walltime = split(' ', `tail -12 $file | grep "Walltime"`);
if($walltime[2]){
# walltime
my $walltime_req = $walltime[2];
my $walltime_used = $walltime[5];
my ($wall_hours, $wall_mins, $wall_secs) = split('\:', $walltime_used);
my $walltime_mins = sprintf("%.2f",(($wall_hours*60) + $wall_mins + ($wall_secs/60)));
# memory
my @mem = split(' ', `tail -n 12 $file | grep -i "Memory"`);
my $mem_req = $mem[2];
my $mem_used = $mem[5];
# cpus, cpu time and cpu e
my @cpus = split(' ', `tail -12 $file | grep -i "NCPUs"`);
my $cpus = $cpus[2];
chomp (my $cputime_line = `tail -12 $file | grep -i "CPU Time Used"`);
$cputime_line =~ m/CPU Time Used: (.+)$/;
my $cputime = $1;
my ($cpu_hours, $cpu_mins, $cpu_secs, $cputime_mins) = 0;
my $cpu_e = 0;
if ($cpus!~m/unknown/) { # not sure if this 'unknown' report ever happens on Gadi like it does on Artemis...
$cpus = ceil($cpus);
($cpu_hours, $cpu_mins, $cpu_secs) = split('\:', $cputime);
$cputime_mins = sprintf("%.2f",(($cpu_hours*60) + $cpu_mins + ($cpu_secs/60)));
$cpu_e = sprintf("%.2f",($cputime_mins/$walltime_mins/$cpus));
}
chomp (my $SUs = `tail -12 $file | grep -i "Service Units" | awk '{print \$3}'`);
chomp (my $exit_status = `tail -12 $file | grep -i "Exit Status" | cut -d ":" -f2 | awk '{\$1=\$1};1' | awk '{print \$1}'`);
chomp (my $date_line = `tail -14 $file | grep -i "Resource Usage on"`);
my ($date) = $date_line =~ /on\s+(\d{4}-\d{2}-\d{2})\s+\d{2}:\d{2}:\d{2}:/;
my $date = $1;
# jobfs
my @jobFS = split(' ', `tail -12 $file | grep -i "JobFS"`);
my $jobFS_req = $jobFS[2];
my $jobFS_used = $jobFS[5];
# ngpus, gpu util, gpu memory
my ($gpu_u, $ngpus, $gpu_mem) = 0;
if ($g) {
my @gpus = split(' ', `tail -n 12 $file | grep -i -m 1 "NGPU"`);
my @gpu_mem = split(' ', `tail -n 12 $file | grep -i -m 1 "GPU Memory"`);
if (defined $gpus[2] && defined $gpus[5] && $gpus[2] ne '0') {
my $gpu_util = $gpus[5];
$gpu_util =~ s/\%$//;
$ngpus = $gpus[2];
$gpu_mem = $gpu_mem[3];
$gpu_u = sprintf("%.2f", ($gpu_util / ($ngpus * 100)));
}
else {
$gpu_u = 'NA';
$ngpus = 'NA';
$gpu_mem = 'NA';
}
}
# print
if ($g) {
print "$name\t$exit_status\t$SUs\t$cpu_e\t$cpus\t$gpu_u\t$ngpus\t$mem_req\t$mem_used\t$gpu_mem\t$cputime_mins\t$walltime_req\t$walltime_mins\t$jobFS_req\t$jobFS_used\t$date\n";
}
else {
print "$name\t$exit_status\t$SUs\t$cpu_e\t$cpus\t$mem_req\t$mem_used\t$cputime_mins\t$walltime_req\t$walltime_mins\t$jobFS_req\t$jobFS_used\t$date\n";
}
}
else{
push(@no_report, $file);
}
}
}
if (@no_report){
print "\n######\n";
foreach my $file (@no_report) {
if ($file !~ m/.e$/ ) {
print "WARNING: Usage metrics were not reported for: $file\n";
}
}
}