|
70 | 70 | "https://huggingface.co/kadubon" |
71 | 71 | ], |
72 | 72 | "publication": [ |
| 73 | + { |
| 74 | + "@type": "ScholarlyArticle", |
| 75 | + "name": "AI Benchmark Half-Life in Recursive Corpora: A Theory of Validity Decay under Semantic Leakage and Regeneration", |
| 76 | + "genre": "Preprint", |
| 77 | + "url": "https://doi.org/10.5281/zenodo.18954286", |
| 78 | + "datePublished": "2026-03-11", |
| 79 | + "author": [ |
| 80 | + { |
| 81 | + "@type": "Person", |
| 82 | + "name": "K Takahashi" |
| 83 | + } |
| 84 | + ], |
| 85 | + "isPartOf": { |
| 86 | + "@type": "Periodical", |
| 87 | + "name": "Zenodo" |
| 88 | + }, |
| 89 | + "citation": "K Takahashi (2026-03-11). AI Benchmark Half-Life in Recursive Corpora: A Theory of Validity Decay under Semantic Leakage and Regeneration. Zenodo. https://doi.org/10.5281/zenodo.18954286", |
| 90 | + "abstract": "This preprint develops a theory of AI benchmark half-life in recursive corpora under semantic leakage and regeneration, yielding validity-decay bounds and monitoring rules for evaluation systems whose items and solution traces re-enter public data. It models benchmark validity through discriminative power and construct validity, and derives jump-aware lifetime bounds, partial-identification results, portfolio design criteria, and safe sequential control under ambiguity and partial observability.", |
| 91 | + "keywords": "AI benchmark half-life, recursive corpora, semantic leakage, validity decay, benchmark contamination, construct validity, discriminative power, dynamic benchmarks, partial identification, sequential monitoring, lineage observability, model metrology" |
| 92 | + }, |
73 | 93 | { |
74 | 94 | "@type": "ScholarlyArticle", |
75 | 95 | "name": "When Should Inference Be Split? A Fixed-Budget Theory of Predictable Multi-Agent Advantage under Local Context Ceilings", |
@@ -3737,6 +3757,27 @@ <h1> |
3737 | 3757 | Machine-readable endpoints: <a href="https://kadubon.github.io/github.io/feed.xml">RSS feed</a>, <a href="https://kadubon.github.io/github.io/CITATION.cff">CITATION.cff</a>, <a href="https://kadubon.github.io/github.io/robots.txt">robots.txt</a>, and <a href="https://kadubon.github.io/github.io/sitemap.xml">sitemap.xml</a>. |
3738 | 3758 | </p> |
3739 | 3759 | <ol class="publication-list"> |
| 3760 | + <li> |
| 3761 | + <div class="publication-item"> |
| 3762 | + <h3> |
| 3763 | + AI Benchmark Half-Life in Recursive Corpora: A Theory of Validity Decay under Semantic Leakage and Regeneration |
| 3764 | + </h3> |
| 3765 | + <p> |
| 3766 | + <span class="publication-meta"> |
| 3767 | + Preprint | Published: 2026-03-11 |
| 3768 | + </span> |
| 3769 | + <a href="https://doi.org/10.5281/zenodo.18954286" target="_blank"> |
| 3770 | + DOI: 10.5281/zenodo.18954286 |
| 3771 | + </a> |
| 3772 | + </p> |
| 3773 | + <p class="publication-abstract"> |
| 3774 | + Abstract: This preprint develops a theory of AI benchmark half-life in recursive corpora under semantic leakage and regeneration, yielding validity-decay bounds and monitoring rules for evaluation systems whose items and solution traces re-enter public data. It models benchmark validity through discriminative power and construct validity, and derives jump-aware lifetime bounds, partial-identification results, portfolio design criteria, and safe sequential control under ambiguity and partial observability. |
| 3775 | + </p> |
| 3776 | + <p class="publication-keywords"> |
| 3777 | + Keywords: AI benchmark half-life, recursive corpora, semantic leakage, validity decay, benchmark contamination, construct validity, discriminative power, dynamic benchmarks, partial identification, sequential monitoring, lineage observability, model metrology |
| 3778 | + </p> |
| 3779 | + </div> |
| 3780 | + </li> |
3740 | 3781 | <li> |
3741 | 3782 | <div class="publication-item"> |
3742 | 3783 | <h3> |
|
0 commit comments