Skip to content

Commit bbb02d4

Browse files
authored
Merge pull request #5 from izzet/main
Add DFAnalyzer code
2 parents e57e952 + 2de2ed3 commit bbb02d4

62 files changed

Lines changed: 9548 additions & 0 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.devcontainer.json

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{
2+
"name": "dfanalyzer-devcontainer",
3+
"build": {
4+
"dockerfile": "Dockerfile"
5+
},
6+
"customizations": {
7+
"vscode": {
8+
"extensions": [
9+
"GitHub.copilot",
10+
"GitHub.copilot-chat",
11+
"Tyriar.sort-lines",
12+
"WakaTime.vscode-wakatime",
13+
"charliermarsh.ruff",
14+
"foxundermoon.shell-format",
15+
"mesonbuild.mesonbuild",
16+
"ms-azuretools.vscode-containers",
17+
"ms-python.debugpy",
18+
"ms-python.python",
19+
"ms-toolsai.jupyter",
20+
"ms-toolsai.jupyter-renderers",
21+
"ms-toolsai.vscode-jupyter-cell-tags",
22+
"ms-vscode.cpptools-extension-pack",
23+
"ms-vscode-remote.remote-containers",
24+
"redhat.vscode-yaml",
25+
"tamasfe.even-better-toml",
26+
"vscode-icons-team.vscode-icons"
27+
]
28+
}
29+
},
30+
"postCreateCommand": "ulimit -c unlimited && git config --global core.autocrlf input",
31+
"remoteEnv": {
32+
"OMPI_ALLOW_RUN_AS_ROOT": "1",
33+
"OMPI_ALLOW_RUN_AS_ROOT_CONFIRM": "1",
34+
"PYTHONPATH": "${containerWorkspaceFolder}"
35+
}
36+
}

.gitattributes

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.git/* export-ignore
2+
.github/* export-ignore
3+
.vscode/* export-ignore
4+
5+
notebooks/* export-ignore
6+
tests/* export-ignore
7+
8+
.wakatime-project export-ignore

.github/workflows/ci.yml

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
name: Build and Test
2+
3+
on:
4+
pull_request:
5+
branches: [main]
6+
push:
7+
branches: [main, dev]
8+
tags:
9+
- "v*.*.*" # Match version tags like v1.0.0, v0.1.2, etc.
10+
workflow_dispatch:
11+
inputs:
12+
run_full_tests:
13+
description: "Run full test suite"
14+
type: boolean
15+
default: false
16+
17+
jobs:
18+
build-and-test:
19+
strategy:
20+
fail-fast: false
21+
matrix:
22+
os: ["ubuntu-22.04"]
23+
python: ["3.8", "3.9", "3.10"]
24+
name: ${{ matrix.os }}-${{ matrix.python }}
25+
runs-on: ${{ matrix.os }}
26+
timeout-minutes: 30
27+
env:
28+
OMPI_ALLOW_RUN_AS_ROOT: 1
29+
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
30+
steps:
31+
- name: Checkout repository
32+
uses: actions/checkout@v4
33+
with:
34+
submodules: recursive
35+
fetch-depth: 0
36+
37+
- name: Set up Python
38+
uses: actions/setup-python@v5
39+
with:
40+
python-version: ${{ matrix.python }}
41+
42+
- name: Install system dependencies
43+
run: |
44+
sudo apt update
45+
sudo apt install -y \
46+
ca-certificates \
47+
lsb-release \
48+
wget
49+
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
50+
sudo apt install -y ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
51+
sudo apt update
52+
sudo apt install -y \
53+
build-essential \
54+
cmake \
55+
git \
56+
libarrow-dev \
57+
libhdf5-dev \
58+
libncurses-dev \
59+
libopenmpi-dev \
60+
libparquet-dev \
61+
libreadline-dev \
62+
meson \
63+
ninja-build \
64+
nlohmann-json3-dev \
65+
openmpi-bin \
66+
openmpi-common
67+
68+
- name: Install Python dependencies
69+
run: |
70+
python -m pip install --upgrade pip
71+
pip install --upgrade meson-python setuptools wheel
72+
pip install -r tests/requirements.txt
73+
74+
- name: Install DFAnalyzer
75+
run: |
76+
pip install .[darshan] \
77+
-Csetup-args="--prefix=$HOME/.local" \
78+
-Csetup-args="-Denable_tests=true" \
79+
-Csetup-args="-Denable_tools=true"
80+
81+
- name: Determine test type
82+
id: test-type
83+
run: |
84+
# Check if this is a tagged release (v*.*.*)
85+
if [[ "${{ github.ref }}" == refs/tags/v* ]]; then
86+
echo "Run full test suite for release tag"
87+
echo "run_full=true" >> $GITHUB_OUTPUT
88+
# Check if this is a manual run requesting full tests
89+
elif [[ "${{ github.event_name }}" == "workflow_dispatch" && "${{ github.event.inputs.run_full_tests }}" == "true" ]]; then
90+
echo "Run full test suite due to manual request"
91+
echo "run_full=true" >> $GITHUB_OUTPUT
92+
else
93+
echo "Run smoke tests for regular development"
94+
echo "run_full=false" >> $GITHUB_OUTPUT
95+
fi
96+
97+
- name: Run Python tests with coverage
98+
run: |
99+
if [[ "${{ steps.test-type.outputs.run_full }}" == "true" ]]; then
100+
echo "Running FULL test suite"
101+
pytest -m full --verbose --cov=dfanalyzer --cov-report=xml
102+
else
103+
echo "Running SMOKE test suite"
104+
pytest -m smoke --verbose --cov=dfanalyzer --cov-report=xml
105+
fi
106+
107+
- name: Run C++ tests
108+
run: |
109+
meson build --prefix=$HOME/.local --reconfigure -Denable_tests=true -Denable_tools=true
110+
meson compile -C build --verbose
111+
meson test -C build --verbose
112+
meson test -C build --verbose --setup=mpi
113+
114+
- name: Run DFAnalyzer
115+
run: |
116+
# dfanalyzer analyzer=darshan percentile=0.99 trace_path=tests/data/extracted/darshan-raw view_types=[file_name,proc_name] metrics=[time]
117+
dfanalyzer analyzer=darshan percentile=0.99 trace_path=tests/data/extracted/darshan-posix-dxt
118+
dfanalyzer analyzer=dftracer analyzer/preset=dlio percentile=0.99 trace_path=tests/data/extracted/dftracer-dlio
119+
dfanalyzer analyzer=recorder percentile=0.99 trace_path=tests/data/extracted/recorder-posix-parquet
120+
121+
- name: Upload test coverage
122+
uses: codecov/codecov-action@v3
123+
with:
124+
file: ./coverage.xml
125+
fail_ci_if_error: false
126+
127+
- name: Upload test artifacts
128+
if: always()
129+
uses: actions/upload-artifact@v4
130+
with:
131+
name: test-results-${{ matrix.os }}-py${{ matrix.python }}
132+
path: |
133+
build/meson-logs/
134+
.coverage
135+
coverage.xml
136+
retention-days: 7

.wakatime-project

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
dfanalyzer

CITATION.cff

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
cff-version: 1.2.0
2+
message: "If you use this software, please cite the software and the paper."
3+
title: "WisIO: Automated I/O Bottleneck Detection with Multi-Perspective Views for HPC Workflows"
4+
version: 0.1.0
5+
abstract: "Analyze, visualize, and understand I/O performance issues in HPC workflows."
6+
license: MIT
7+
url: https://github.com/grc-iit/wisio
8+
repository-code: https://github.com/grc-iit/wisio
9+
contact:
10+
- name: Izzet Yildirim
11+
email: izzetcyildirim@gmail.com
12+
authors:
13+
- family-names: Yildirim
14+
given-names: Izzet
15+
orcid: https://orcid.org/0000-0003-3513-0764
16+
- family-names: Devarajan
17+
given-names: Hariharan
18+
orcid: https://orcid.org/0000-0001-5625-3494
19+
- family-names: Kougkas
20+
given-names: Anthony
21+
orcid: https://orcid.org/0000-0003-3943-663X
22+
- family-names: Sun
23+
given-names: Xian-He
24+
orcid: https://orcid.org/0000-0002-1093-0792
25+
- family-names: Mohror
26+
given-names: Kathryn
27+
orcid: https://orcid.org/0000-0002-1366-1655
28+
preferred-citation:
29+
type: conference-paper
30+
title: "WisIO: Automated I/O Bottleneck Detection with Multi-Perspective Views for HPC Workflows"
31+
year: 2025
32+
authors:
33+
- family-names: Yildirim
34+
given-names: Izzet
35+
orcid: https://orcid.org/0000-0003-3513-0764
36+
- family-names: Devarajan
37+
given-names: Hariharan
38+
orcid: https://orcid.org/0000-0001-5625-3494
39+
- family-names: Kougkas
40+
given-names: Anthony
41+
orcid: https://orcid.org/0000-0003-3943-663X
42+
- family-names: Sun
43+
given-names: Xian-He
44+
orcid: https://orcid.org/0000-0002-1093-0792
45+
- family-names: Mohror
46+
given-names: Kathryn
47+
orcid: https://orcid.org/0000-0002-1366-1655
48+
conference:
49+
name: "ICS'25: 2025 International Conference on Supercomputing"
50+
city: "Salt Lake City"
51+
region: UT
52+
country: USA
53+
date-start: 2025-06-08
54+
date-end: 2025-06-11
55+
doi: 10.1145/3721145.3725742
56+
url: https://doi.org/10.1145/3721145.3725742

Dockerfile

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
FROM ubuntu:22.04
2+
3+
RUN apt-get update && \
4+
apt-get install -y \
5+
ca-certificates \
6+
lsb-release \
7+
wget && \
8+
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
9+
apt-get install -y ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
10+
apt-get update && \
11+
apt-get install -y \
12+
build-essential \
13+
cmake \
14+
git \
15+
libarrow-dev \
16+
libhdf5-dev \
17+
libncurses-dev \
18+
libopenmpi-dev \
19+
libparquet-dev \
20+
libreadline-dev \
21+
meson \
22+
ninja-build \
23+
nlohmann-json3-dev \
24+
openmpi-bin \
25+
openmpi-common \
26+
python3.10 \
27+
python3-pip && \
28+
rm -rf /var/lib/apt/lists/*
29+
30+
WORKDIR /dfanalyzer
31+
32+
COPY . .
33+
34+
RUN pip install --upgrade pip && \
35+
pip install build meson-python setuptools streamlit wheel && \
36+
pip install .[darshan] -Csetup-args="-Denable_tools=true"
37+
38+
ENTRYPOINT ["dfanalyzer"]

dfanalyzer/__init__.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import dask
2+
import warnings
3+
from dataclasses import dataclass
4+
from distributed import Client
5+
from hydra import compose, initialize
6+
from hydra.core.hydra_config import DictConfig, HydraConfig
7+
from hydra.utils import instantiate
8+
from omegaconf import OmegaConf
9+
from typing import List, Union, Optional
10+
11+
from .analyzer import Analyzer
12+
from .cluster import ClusterType, ExternalCluster
13+
from .config import init_hydra_config_store
14+
from .dftracer import DFTracerAnalyzer
15+
from .output import ConsoleOutput, CSVOutput, SQLiteOutput
16+
from .recorder import RecorderAnalyzer
17+
from .types import ViewType
18+
19+
try:
20+
from .darshan import DarshanAnalyzer
21+
except ModuleNotFoundError:
22+
DarshanAnalyzer = Analyzer
23+
24+
AnalyzerType = Union[DarshanAnalyzer, DFTracerAnalyzer, RecorderAnalyzer]
25+
OutputType = Union[ConsoleOutput, CSVOutput, SQLiteOutput]
26+
27+
# Suppress Dask warnings that are not relevant to the user
28+
dask.config.set({"dataframe.query-planning-warning": False})
29+
30+
# Suppress FutureWarnings related to pandas grouper
31+
warnings.filterwarnings(
32+
action="ignore",
33+
message=".*grouper",
34+
category=FutureWarning,
35+
)
36+
37+
38+
@dataclass
39+
class DFAnalyzerInstance:
40+
analyzer: Analyzer
41+
client: Client
42+
cluster: ClusterType
43+
hydra_config: DictConfig
44+
output: OutputType
45+
46+
def analyze_trace(
47+
self,
48+
percentile: Optional[float] = None,
49+
view_types: Optional[List[ViewType]] = None,
50+
):
51+
return self.analyzer.analyze_trace(
52+
exclude_characteristics=self.hydra_config.exclude_characteristics,
53+
logical_view_types=self.hydra_config.logical_view_types,
54+
metric_boundaries=OmegaConf.to_object(self.hydra_config.metric_boundaries),
55+
percentile=self.hydra_config.percentile if not percentile else percentile,
56+
time_view_type=self.hydra_config.time_view_type,
57+
trace_path=self.hydra_config.trace_path,
58+
unoverlapped_posix_only=self.hydra_config.unoverlapped_posix_only,
59+
view_types=self.hydra_config.view_types if not view_types else view_types,
60+
)
61+
62+
63+
def init_with_hydra(hydra_overrides: List[str]):
64+
with initialize(version_base=None, config_path=None):
65+
init_hydra_config_store()
66+
hydra_config = compose(
67+
config_name="config",
68+
overrides=hydra_overrides,
69+
return_hydra_config=True,
70+
)
71+
HydraConfig.instance().set_config(hydra_config)
72+
cluster = instantiate(hydra_config.cluster)
73+
if isinstance(cluster, ExternalCluster):
74+
client = Client(cluster.scheduler_address)
75+
else:
76+
client = Client(cluster)
77+
analyzer = instantiate(
78+
hydra_config.analyzer,
79+
debug=hydra_config.debug,
80+
verbose=hydra_config.verbose,
81+
)
82+
output = instantiate(hydra_config.output)
83+
return DFAnalyzerInstance(
84+
analyzer=analyzer,
85+
client=client,
86+
cluster=cluster,
87+
hydra_config=hydra_config,
88+
output=output,
89+
)

0 commit comments

Comments
 (0)