Skip to content

Commit a1e5b5c

Browse files
Added code for montage validation
1 parent a957995 commit a1e5b5c

10 files changed

Lines changed: 455 additions & 21 deletions

File tree

wfcommons/wfperf/bench_plot.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
from argparse import ArgumentParser
2+
from datetime import timedelta
3+
import plotly.graph_objects as go
4+
import pathlib
5+
import pandas as pd
6+
from typing import Dict, Tuple
7+
import argparse
8+
import re
9+
import plotly.express as px
10+
import plotly.graph_objects as go
11+
12+
13+
this_dir = pathlib.Path(__file__).resolve().parent
14+
15+
def convert_time(text: str) -> float:
16+
return sum([float(p) * 60**i for i, p in enumerate(str(text).split(":")[::-1])])
17+
18+
def get_parser() -> argparse.ArgumentParser:
19+
parser = argparse.ArgumentParser()
20+
parser.add_argument("path", help="Path to the csv")
21+
parser.add_argument("-m", "--machine", help="Machine used")
22+
return parser
23+
24+
colors = {
25+
"1_9": "#e69f00",
26+
"2_8": "#56b4e9",
27+
"3_7": "#009e73",
28+
"4_6": "#f0e442",
29+
"5_5": "#0072b2",
30+
"6_4": "#d55e00",
31+
"7_3": "#cc79a7",
32+
"8_2": "#000000",
33+
"9_1": "#aa3377",
34+
"real":"#332288"
35+
}
36+
37+
symbols = ["circle", "square", "diamond", "cross", "x", "triangle-up", "triangle-down", "star", "hexagon", "pentagon"]
38+
def main():
39+
parser = get_parser()
40+
args = parser.parse_args()
41+
path = pathlib.Path(args.path)
42+
machine = args.machine
43+
files = []
44+
45+
for file in path.glob("*.csv"):
46+
task, _ = re.match(r"^(.+)_(^.+)?.*?_.+?.*?.*?$", str(file)).groups()
47+
task = pathlib.Path(task).stem
48+
df = pd.read_csv(str(file), index_col=0)
49+
df["task"] = task
50+
files.append(df)
51+
52+
df_all = pd.concat(files)
53+
df_all["tool"] = df_all["type"]
54+
df_all.loc[df_all["tool"] != "real", "tool"] = "sysbench"
55+
56+
df_all["time"] = df_all["time"].apply(convert_time)
57+
df_all["label"] = df_all["tool"] #+ "_" + df_all["server"]
58+
59+
fig = px.strip(
60+
df_all,
61+
x="task", y="time",
62+
color="type",
63+
width=1500,
64+
height=750,
65+
color_discrete_map=colors,
66+
title=machine,
67+
category_orders={
68+
"task": sorted(df_all["task"].unique())
69+
}
70+
# symbol="tool",
71+
# symbol_sequence=symbols
72+
).update_traces(
73+
marker={
74+
"size": 15,
75+
"line": {
76+
"width": 2,
77+
"color": "DarkSlateGrey"
78+
},
79+
# "symbol": symbols
80+
},
81+
jitter=1,
82+
)
83+
84+
85+
fig.update_layout(
86+
legend=dict(
87+
font_size=20
88+
),
89+
font_size = 30,
90+
yaxis_title="Time (s)",
91+
xaxis_title="Task"
92+
)
93+
94+
fig.update_xaxes(
95+
tickangle = 45
96+
)
97+
savedir = this_dir.joinpath(f"new_test/{machine}")
98+
fig.write_image(savedir.joinpath(f"{machine}_time_plot.png"))
99+
100+
if __name__ == '__main__':
101+
main()

wfcommons/wfperf/mean.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import pandas as pd
2+
import pathlib
3+
import argparse
4+
import re
5+
6+
this_dir = pathlib.Path(__file__).resolve().parent
7+
8+
def find_mean(df: pd.Series) -> pd.DataFrame:
9+
return df.mean()
10+
11+
def convert_time(text: str) -> float:
12+
return sum([float(p) * 60**i for i, p in enumerate(str(text).split(":")[::-1])])
13+
14+
def percent_error(real_mean: float, bm_mean: float) -> float:
15+
return ((bm_mean - real_mean)/real_mean)*100
16+
17+
def get_parser() -> argparse.ArgumentParser:
18+
parser = argparse.ArgumentParser()
19+
parser.add_argument("path", help="Path to the csv")
20+
parser.add_argument("-r", "--ratio", help="Correct CPU percentage for task")
21+
parser.add_argument("-t", "--task", help="Task")
22+
23+
return parser
24+
25+
def main():
26+
parser = get_parser()
27+
args = parser.parse_args()
28+
path = pathlib.Path(args.path)
29+
ratio = float(args.ratio)
30+
cpu_thread = int(ratio*10)
31+
mem_thread = 10 - cpu_thread
32+
label = f'{cpu_thread}_{mem_thread}'
33+
34+
lines = []
35+
for file in path.glob("*.csv"):
36+
df = pd.read_csv(file, index_col=0)
37+
df["time"] = df["time"].apply(convert_time)
38+
real = df[df["type"] == "real"]
39+
real = real.set_index("type")
40+
bm = df[~(df["type"] == "real")]
41+
bm = bm.set_index("type")
42+
43+
44+
real_mean = round(float(find_mean(real["time"])), 3)
45+
bm_thread = bm.loc[label]
46+
bm_mean = round(float(find_mean(bm_thread)), 3)
47+
48+
error = round(percent_error(real_mean, bm_mean), 3)
49+
50+
savedir = file.parent.joinpath(f"error")
51+
savedir.mkdir(exist_ok=True, parents=True)
52+
53+
_, machine = re.match(r"^.+_(^.+)?.*?_(.+).*?.*?$", str(file.stem), re.DOTALL).groups()
54+
lines.append(f'{machine} {cpu_thread} {mem_thread} {real_mean} {bm_mean} {error} \n')
55+
56+
with savedir.joinpath(f"{args.task}_error.txt").open("w+") as fp:
57+
fp.writelines(lines)
58+
59+
60+
if __name__ == "__main__":
61+
main()
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from wfcommons.wfperf.montage_validation.montage_perf import WorkflowBenchmark
2+
from wfcommons.wfchef.recipes import MontageRecipe
3+
import pathlib
4+
5+
this_dir = pathlib.Path(__file__).resolve().parent
6+
7+
def total_tasks():
8+
num_tasks = {'mProject': 64,
9+
'mDiffFit': 2016,
10+
'mConcatFit': 1,
11+
'mBgModel': 1,
12+
'mBackground': 64,
13+
'mImgtbl': 1,
14+
'mAdd': 1,
15+
'mViewer': 1}
16+
total = 0
17+
for (k, v) in num_tasks.items():
18+
if k == 'mViewer':
19+
num_tasks[k] = v*3 + 1
20+
else:
21+
num_tasks[k] = v*3
22+
23+
for values in num_tasks.values():
24+
total += values
25+
26+
return total
27+
28+
def main():
29+
30+
num_tasks = total_tasks()
31+
tasks = {'mProject': (12800000, 7, 120),
32+
'mDiffFit': (24900000 , 7, 1),
33+
'mConcatFit': (24900000 , 7, 5),
34+
'mBgModel': (1910000, 7, 120),
35+
'mBackground': (24900000 , 7, 1),
36+
'mImgtbl': (24900000 , 7, 2),
37+
'mAdd': (1050000, 6, 120),
38+
'mViewer': (7400000, 6, 120)}
39+
40+
41+
bench = WorkflowBenchmark(MontageRecipe, num_tasks)
42+
bench.create(this_dir.joinpath("Montage"), tasks, verbose=True)
43+
44+
45+
46+
if __name__ == "__main__":
47+
main()
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
from fractions import Fraction
2+
from wfcommons.wfgen.abstract_recipe import WorkflowRecipe
3+
from wfcommons import WorkflowGenerator
4+
from typing import Dict, Union, List, Type, Tuple
5+
from numpy.random import choice
6+
from wfcommons.wfperf.data_gen import generate_sys_data, cleanup_sys_files
7+
import pathlib
8+
import json
9+
import subprocess
10+
11+
class WorkflowBenchmark():
12+
def __init__(self, Recipe: Type[WorkflowRecipe], num_tasks: int) -> None:
13+
self.Recipe = Recipe
14+
self.num_tasks = num_tasks
15+
16+
def create(self,
17+
save_dir: pathlib.Path,
18+
tasks: Dict[str, Tuple[int, int]],
19+
mem_total_size: str = "1000T",
20+
block_size: str = "4096",
21+
verbose: bool = False) -> Dict:
22+
23+
24+
if verbose:
25+
print("Checking if the sysbench is installed.")
26+
self._check_sysbench()
27+
if verbose:
28+
print("Creating directory.")
29+
save_dir = pathlib.Path(save_dir).resolve()
30+
save_dir.mkdir(exist_ok=True, parents=True)
31+
32+
if verbose:
33+
print("Generating workflow")
34+
generator = WorkflowGenerator(self.Recipe.from_num_tasks(self.num_tasks))
35+
workflow = generator.build_workflow()
36+
workflow.write_json(f'{save_dir.joinpath(workflow.name)}.json')
37+
38+
with open(f'{save_dir.joinpath(workflow.name)}.json') as json_file:
39+
wf = json.load(json_file)
40+
41+
params = {
42+
job: [
43+
f"--memory-block-size={block_size}",
44+
f"--memory-total-size={mem_total_size}",
45+
f"--cpu-max-prime={cpu_max_prime}",
46+
f"--percent_cpu={percent_cpu}",
47+
f"--forced-shutdown=0",
48+
f"--time={time}"
49+
]
50+
for job, (cpu_max_prime, percent_cpu, time) in tasks.items()
51+
}
52+
53+
for job in wf["workflow"]["jobs"]:
54+
job["files"] = []
55+
job.setdefault("command", {})
56+
job["command"]["program"] = f"sys_test.py"
57+
job_name = job["name"].rsplit("_", 1)[0]
58+
job["command"]["arguments"] = params[job_name]
59+
60+
61+
with open(f'{save_dir.joinpath(workflow.name)}.json', 'w') as fp:
62+
json.dump(wf, fp, indent=4)
63+
64+
65+
def _check_sysbench(self,):
66+
proc = subprocess.Popen(["which", "sysbench"], stdout=subprocess.PIPE)
67+
out, _ = proc.communicate()
68+
if not out:
69+
raise FileNotFoundError("Sysbench not found. Please install sysbench: https://github.com/aakopytov/sysbench")

wfcommons/wfperf/real_run.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/bin/bash
2+
for i in {1..9}; do echo "======= $i"; time /home/tgcoleman/1000genome-sequential/bin/individuals.py ALL.chr1.250000.vcf 1 1 1001 3000 -s /home/tgcoleman/wfcommons/wfcommons/wfperf/new_test/real_individuals_$i; done

wfcommons/wfperf/run.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/bin/bash
2+
for i in {1..9}; do echo "======= $i"; python ten_samples.py frequency -m 14990000 -p 0.$i -s /home/tgcoleman/wfcommons/wfcommons/wfperf/new_test/frequency; done

wfcommons/wfperf/sys_test.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import subprocess
44
import os
55
import time
6+
from typing import List
67

78
def get_parser() -> argparse.ArgumentParser:
89
parser = argparse.ArgumentParser()
@@ -11,6 +12,7 @@ def get_parser() -> argparse.ArgumentParser:
1112
parser.add_argument("--percent-cpu", type=float, help="percent of threads which will be cpu heavy")
1213
parser.add_argument("--save", type=pathlib.Path, help="directory to save to.")
1314

15+
1416
return parser
1517

1618
def main():
@@ -27,49 +29,45 @@ def main():
2729

2830
with save_dir.joinpath(f"{name}_cpu.txt").open("w+") as fp_cpu, save_dir.joinpath(f"{name}_memory.txt").open("w+") as fp_mem, save_dir.joinpath(f"{name}_ps.txt").open("w+") as fp_ps:
2931
num_cores = 1 #os.cpu_count()
30-
cpu_threads = 1 #int(args.percent_cpu*10)
31-
mem_threads = 1 #int(10 - cpu_threads)
32-
32+
cpu_threads = int(args.percent_cpu*10)
33+
mem_threads = int(10 - cpu_threads)
34+
print(f"cpu_threads={cpu_threads}, mem_threads={mem_threads}")
3335

3436
print("Starting CPU benchmark...")
3537
sysbench_cpu_args = [arg for arg in other if arg.startswith("--cpu")] + [f"--threads={cpu_threads}"]
3638

39+
proc_cpus: List[subprocess.Popen] = []
40+
proc_mems: List[subprocess.Popen] = []
3741
for i in range(num_cores):
38-
39-
proc_cpu = subprocess.Popen(
42+
proc_cpus.append(subprocess.Popen(
4043
[
4144
"sysbench", "cpu",
4245
*sysbench_cpu_args, "run"
4346
],
4447
stdout=fp_cpu, stderr=fp_cpu,
45-
)
46-
47-
pid_1 = proc_cpu.pid
48-
print(pid_1)
49-
os.sched_setaffinity(pid_1, {i})
50-
48+
))
49+
os.sched_setaffinity(proc_cpus[-1].pid, {13})
5150

5251
print("Starting Memory benchmark...")
5352
sysbench_mem_args = [arg for arg in other if arg.startswith("--memory")] + [f"--time={args.time}", f"--threads={mem_threads}"]
54-
proc_mem = subprocess.Popen(
53+
proc_mems.append(subprocess.Popen(
5554
[
5655
"sysbench", "memory","run",
5756
*sysbench_mem_args
5857
],
5958
stdout=fp_mem, stderr=fp_mem
60-
)
59+
))
6160

62-
pid_2 = proc_mem.pid
63-
print(pid_2)
64-
os.sched_setaffinity(pid_2, {i})
61+
os.sched_setaffinity(proc_mems[-1].pid, {13})
6562

6663
proc = subprocess.Popen(["ps", "-o","pid,psr,comm,lstart"], stdout=fp_ps)
6764
proc.wait()
68-
proc_cpu.wait()
69-
print(time.time)
70-
subprocess.Popen(["killall", "sysbench"])
65+
for proc_cpu in proc_cpus:
66+
proc_cpu.wait()
67+
for proc_mem in proc_mems:
68+
proc_mem.kill()
7169

7270

7371

7472
if __name__ == "__main__":
75-
main()
73+
main()

0 commit comments

Comments
 (0)