Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
321 changes: 244 additions & 77 deletions benchmarks/report.py

Large diffs are not rendered by default.

117 changes: 71 additions & 46 deletions benchmarks/search_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from hed import HedString, QueryHandler # noqa: E402
from hed.models.basic_search import find_matching # noqa: E402
from hed.models.string_search import StringQueryHandler, search_series # noqa: E402
from hed.models.string_search import StringQueryHandler, string_search # noqa: E402

from data_generator import DataGenerator # noqa: E402

Expand Down Expand Up @@ -143,7 +143,7 @@ def _bench_basic(self, raw, query, cfg_label, q_label):
med, _ = time_it(lambda: find_matching(series, query), self.n_runs)
matches = int(find_matching(series, query).sum())
return {
"engine": "basic_search",
"engine": "Basic search",
"query_label": q_label,
"config_label": cfg_label,
"query": query,
Expand All @@ -166,7 +166,7 @@ def do_search():
search_med, _ = time_it(do_search, self.n_runs)
result = do_search()
return {
"engine": "QueryHandler",
"engine": "Object search",
"query_label": q_label,
"config_label": cfg_label,
"query": query,
Expand All @@ -181,8 +181,9 @@ def _bench_string_qh(self, raw, query, cfg_label, q_label, schema_lookup, suffix
sqh = StringQueryHandler(query)
search_med, _ = time_it(lambda: sqh.search(raw, schema_lookup=schema_lookup), self.n_runs)
result = sqh.search(raw, schema_lookup=schema_lookup)
label = "String search" if suffix == "no_lookup" else "String search (lookup)"
return {
"engine": f"StringQueryHandler_{suffix}",
"engine": label,
"query_label": q_label,
"config_label": cfg_label,
"query": query,
Expand Down Expand Up @@ -235,15 +236,15 @@ def run_all(self, series_configs):
rec = self._bench_basic_series(series, bs_query, label, q_label, n_rows)
records.append(rec)

# --- search_series (StringQueryHandler) no lookup ---
# --- String search (StringQueryHandler) no lookup ---
rec = self._bench_search_series(series, qh_query, label, q_label, n_rows, None, "no_lookup")
records.append(rec)

# --- search_series (StringQueryHandler) with lookup ---
# --- String search (StringQueryHandler) with lookup ---
rec = self._bench_search_series(series, qh_query, label, q_label, n_rows, self.lookup, "with_lookup")
records.append(rec)

# --- QueryHandler loop ---
# --- Object search (QueryHandler loop) ---
rec = self._bench_qh_loop(series, qh_query, label, q_label, n_rows)
records.append(rec)

Expand All @@ -253,7 +254,7 @@ def _bench_basic_series(self, series, query, cfg_label, q_label, n_rows):
med, _ = time_it(lambda: find_matching(series, query), self.n_runs)
matches = int(find_matching(series, query).sum())
return {
"engine": "basic_search",
"engine": "Basic search",
"query_label": q_label,
"config_label": cfg_label,
"n_rows": n_rows,
Expand All @@ -263,10 +264,12 @@ def _bench_basic_series(self, series, query, cfg_label, q_label, n_rows):
}

def _bench_search_series(self, series, query, cfg_label, q_label, n_rows, lookup, suffix):
med, _ = time_it(lambda: search_series(series, query, schema_lookup=lookup), self.n_runs)
matches = int(search_series(series, query, schema_lookup=lookup).sum())
strings = series.tolist()
med, _ = time_it(lambda: string_search(strings, query, schema_lookup=lookup), self.n_runs)
matches = sum(string_search(strings, query, schema_lookup=lookup))
label = "String search" if suffix == "no_lookup" else "String search (lookup)"
return {
"engine": f"search_series_{suffix}",
"engine": label,
"query_label": q_label,
"config_label": cfg_label,
"n_rows": n_rows,
Expand Down Expand Up @@ -294,7 +297,7 @@ def do_all():
if qh.search(hs):
count += 1
return {
"engine": "QueryHandler_loop",
"engine": "Object search",
"query_label": q_label,
"config_label": cfg_label,
"n_rows": n_rows,
Expand Down Expand Up @@ -402,16 +405,35 @@ def sweep_query_complexity(self):
return records

def sweep_schema_lookup(self):
"""Compare StringQueryHandler with vs without schema_lookup."""
raw = self.gen.make_string(n_tags=15, n_groups=3, depth=1)
query = "Event"
sqh = StringQueryHandler(query)
"""Compare StringQueryHandler with vs without schema_lookup across query types.

Uses a fixed short-form string containing known descendants of Event and Action so
the behavioural difference (which strings match) is deterministic.
"""
# Fixed short-form string with known Event and Action descendants.
# Sensory-event, Agent-action, Data-feature are Event descendants;
# Communicate, Clap-hands are Action descendants.
raw = (
"Sensory-event, Agent-action, Data-feature, Communicate, Clap-hands, "
"Communicate-gesturally, Blue, High, (Red, Move), (Experiment-control, Frown)"
)
queries = [
("Ancestor: Event", "Event"),
("Ancestor: Action", "Action"),
("Exact: Sensory-event", "Sensory-event"),
("Compound: Event && Action", "Event && Action"),
]
records = []
for with_lookup in [False, True]:
lk = self.lookup if with_lookup else None
label = "with_lookup" if with_lookup else "no_lookup"
med, _ = time_it(lambda lk=lk: sqh.search(raw, schema_lookup=lk), self.n_runs)
records.append({"factor": "schema_lookup", "level": label, "engine": "StringQueryHandler", "time": med})
for q_label, query in queries:
sqh = StringQueryHandler(query)
for with_lookup in [False, True]:
lk = self.lookup if with_lookup else None
mode = "With lookup" if with_lookup else "No lookup"
med, _ = time_it(lambda lk=lk, _sqh=sqh: _sqh.search(raw, schema_lookup=lk), self.n_runs)
matches = len(sqh.search(raw, schema_lookup=lk))
records.append(
{"factor": "schema_lookup", "level": q_label, "engine": mode, "time": med, "matches": matches}
)
return records

def sweep_string_form(self):
Expand Down Expand Up @@ -440,18 +462,18 @@ def qh_search():
qh.search(hs)

search_med, _ = time_it(qh_search, self.n_runs)
records.append({"factor": "compile_vs_search", "level": "compile", "engine": "QueryHandler", "time": comp})
records.append({"factor": "compile_vs_search", "level": "search", "engine": "QueryHandler", "time": search_med})
records.append({"factor": "compile_vs_search", "level": "compile", "engine": "Object search", "time": comp})
records.append(
{"factor": "compile_vs_search", "level": "search", "engine": "Object search", "time": search_med}
)

# StringQueryHandler
comp2, _ = time_it(lambda: StringQueryHandler(query), self.n_runs)
sqh = StringQueryHandler(query)
search_med2, _ = time_it(lambda: sqh.search(raw, schema_lookup=self.lookup), self.n_runs)
records.append({"factor": "compile_vs_search", "level": "compile", "engine": "String search", "time": comp2})
records.append(
{"factor": "compile_vs_search", "level": "compile", "engine": "StringQueryHandler", "time": comp2}
)
records.append(
{"factor": "compile_vs_search", "level": "search", "engine": "StringQueryHandler", "time": search_med2}
{"factor": "compile_vs_search", "level": "search", "engine": "String search", "time": search_med2}
)

return records
Expand Down Expand Up @@ -534,7 +556,7 @@ def _bench_all_engines(self, raw, qh_query, bs_query=None):
# basic_search
if bs_query is not None:
med, _ = time_it(lambda: find_matching(series1, bs_query), self.n_runs)
yield "basic_search", med
yield "Basic search", med

# QueryHandler
qh = QueryHandler(qh_query)
Expand All @@ -544,31 +566,32 @@ def qh_search():
qh.search(hs)

med, _ = time_it(qh_search, self.n_runs)
yield "QueryHandler", med
yield "Object search", med

# StringQueryHandler no lookup
sqh = StringQueryHandler(qh_query)
med, _ = time_it(lambda: sqh.search(raw, schema_lookup=None), self.n_runs)
yield "SQH_no_lookup", med
yield "String search", med

# StringQueryHandler with lookup
med, _ = time_it(lambda: sqh.search(raw, schema_lookup=self.lookup), self.n_runs)
yield "SQH_with_lookup", med
yield "String search (lookup)", med

def _bench_series_engines(self, series, qh_query, bs_query, n_rows):
"""Yield (engine_name, median_time) for series-level engines."""
# basic_search
if bs_query is not None:
med, _ = time_it(lambda: find_matching(series, bs_query), max(3, self.n_runs // 2))
yield "basic_search", med
med, _ = time_it(lambda: find_matching(series, bs_query), self.n_runs)
yield "Basic search", med

# search_series no lookup
med, _ = time_it(lambda: search_series(series, qh_query, schema_lookup=None), max(3, self.n_runs // 2))
yield "search_series_no_lookup", med
# String search no lookup
strings = series.tolist()
med, _ = time_it(lambda: string_search(strings, qh_query, schema_lookup=None), self.n_runs)
yield "String search", med

# search_series with lookup
med, _ = time_it(lambda: search_series(series, qh_query, schema_lookup=self.lookup), max(3, self.n_runs // 2))
yield "search_series_with_lookup", med
# String search with lookup
med, _ = time_it(lambda: string_search(strings, qh_query, schema_lookup=self.lookup), self.n_runs)
yield "String search (lookup)", med

# QueryHandler loop
qh = QueryHandler(qh_query)
Expand All @@ -580,8 +603,8 @@ def qh_loop():
hs = HedString(s, schema)
qh.search(hs)

med, _ = time_it(qh_loop, max(3, self.n_runs // 2))
yield "QueryHandler_loop", med
med, _ = time_it(qh_loop, self.n_runs)
yield "Object search", med


# ======================================================================
Expand All @@ -595,7 +618,7 @@ def run_full_benchmark(quick=False):
gen = DataGenerator()

n_single = 10 if quick else 20
n_series = 3 if quick else 5
n_series = 3 if quick else 10
n_sweep = 5 if quick else 10

# ------------------------------------------------------------------
Expand Down Expand Up @@ -675,20 +698,22 @@ def run_full_benchmark(quick=False):
med, _ = time_it(lambda bs_query=bs_query: find_matching(real_series, bs_query), n_series)
real_results.append(
{
"engine": "basic_search",
"engine": "Basic search",
"query_label": q_label,
"total_time": med,
"per_row": med / real_n,
"n_rows": real_n,
}
)

real_strings = real_series.tolist()
med, _ = time_it(
lambda qh_query=qh_query: search_series(real_series, qh_query, schema_lookup=gen.lookup), n_series
lambda qh_query=qh_query, _rs=real_strings: string_search(_rs, qh_query, schema_lookup=gen.lookup),
n_series,
)
real_results.append(
{
"engine": "search_series",
"engine": "String search",
"query_label": q_label,
"total_time": med,
"per_row": med / real_n,
Expand All @@ -708,7 +733,7 @@ def qh_loop(qh=qh, schema=schema):
med, _ = time_it(qh_loop, n_series)
real_results.append(
{
"engine": "QueryHandler_loop",
"engine": "Object search",
"query_label": q_label,
"total_time": med,
"per_row": med / real_n,
Expand Down
Binary file modified docs/_static/images/benchmark_compile_vs_search.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_query_heatmap.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_real_data.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/_static/images/benchmark_schema_lookup.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_series_scaling.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_sweep_compile_vs_search.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_sweep_deep_nest_bare_term.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_sweep_deep_nest_exact_group.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_sweep_deep_nest_group_match.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_sweep_deep_nest_negation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_sweep_deep_nest_two_and.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_sweep_group_count.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_sweep_nesting_depth.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_sweep_per_operation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_sweep_query_complexity.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_sweep_repeated_tags.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_sweep_schema_lookup.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_sweep_series_size.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_sweep_string_form.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/images/benchmark_sweep_tag_count.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions docs/api/models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,10 @@ parse_hed_string

.. autofunction:: hed.models.string_search.parse_hed_string

search_series
string_search
~~~~~~~~~~~~~

.. autofunction:: hed.models.string_search.search_series
.. autofunction:: hed.models.string_search.string_search

Schema lookup utilities
~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
Loading
Loading