From bb90837f8dfa5442864d25b63c1d15b526df823e Mon Sep 17 00:00:00 2001
From: joaquinvanschoren <joaquin.vanschoren@gmail.com>
Date: Tue, 15 Oct 2024 22:12:46 +0200
Subject: [PATCH 01/20] markdown docs, baby

---
 .gitignore                         |   1 -
 docs/Project.toml                  |   1 +
 docs/README.md                     |  11 +
 docs/build/assets/Documenter.css   |  18 ++
 docs/build/assets/mathjaxhelper.js |  25 ++
 docs/build/index.md                | 354 +++++++++++++++++++++++++++++
 docs/make-md.jl                    |   7 +
 mkdocs.yml                         |  57 +++++
 8 files changed, 473 insertions(+), 1 deletion(-)
 create mode 100644 docs/README.md
 create mode 100644 docs/build/assets/Documenter.css
 create mode 100644 docs/build/assets/mathjaxhelper.js
 create mode 100644 docs/build/index.md
 create mode 100644 docs/make-md.jl
 create mode 100644 mkdocs.yml
diff --git a/.gitignore b/.gitignore
index 0e1b98c..5a61b23 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,7 +4,6 @@ Manifest.toml
 #*
 .DS_Store
 sandbox/
-/docs/build/
 /docs/site/
 /docs/Manifest.toml
 .vscode
diff --git a/docs/Project.toml b/docs/Project.toml
index 3507784..ac90fed 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -3,6 +3,7 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 OpenML = "8b6db2d4-7670-4922-a472-f9537c81ab66"
 ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
+DocumenterMarkdown
 
 [compat]
 Documenter = "~0.26"
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..e9f6008
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,11 @@
+Contains both the original `Documenter.jl` documentation and the same documentation rendered as markdown to include in the harmonized OpenML docs.
+
+Generating the markdown is done as follows:
+
+* Install [DocumenterMarkDown](https://documentermarkdown.juliadocs.org/dev/).
+    * Note: currently this only works with version 0.27 of Documenter.jl
+    * In Julia, open the package manager (type ']') and run `add Documenter@0.27` and `add DocumenterMarkdown`.
+* Run `julia make-md.jl` in the `docs` folder to generate the markdown filew
+    * These appear in the `build` folder
+* Run `mkdocs serve` in the root folder to build the markdown docs.
+
diff --git a/docs/build/assets/Documenter.css b/docs/build/assets/Documenter.css
new file mode 100644
index 0000000..d9af5d6
--- /dev/null
+++ b/docs/build/assets/Documenter.css
@@ -0,0 +1,18 @@
+div.wy-menu-vertical ul.current li.toctree-l3 a {
+  font-weight: bold;
+}
+
+a.documenter-source {
+  float: right;
+}
+
+.documenter-methodtable pre {
+    margin-left: 0;
+    margin-right: 0;
+    margin-top: 0;
+    padding: 0;
+}
+
+.documenter-methodtable pre.documenter-inline {
+    display: inline;
+}
diff --git a/docs/build/assets/mathjaxhelper.js b/docs/build/assets/mathjaxhelper.js
new file mode 100644
index 0000000..3561b10
--- /dev/null
+++ b/docs/build/assets/mathjaxhelper.js
@@ -0,0 +1,25 @@
+MathJax.Hub.Config({
+  "tex2jax": {
+    inlineMath: [['$','$'], ['\\(','\\)']],
+    processEscapes: true
+  }
+});
+MathJax.Hub.Config({
+  config: ["MMLorHTML.js"],
+  jax: [
+    "input/TeX",
+    "output/HTML-CSS",
+    "output/NativeMML"
+  ],
+  extensions: [
+    "MathMenu.js",
+    "MathZoom.js",
+    "TeX/AMSmath.js",
+    "TeX/AMSsymbols.js",
+    "TeX/autobold.js",
+    "TeX/autoload-all.js"
+  ]
+});
+MathJax.Hub.Config({
+  TeX: { equationNumbers: { autoNumber: "AMS" } }
+});
diff --git a/docs/build/index.md b/docs/build/index.md
new file mode 100644
index 0000000..f49ca5b
--- /dev/null
+++ b/docs/build/index.md
@@ -0,0 +1,354 @@
+
+<a id='OpenML.jl-Documentation'></a>
+
+<a id='OpenML.jl-Documentation-1'></a>
+
+# OpenML.jl Documentation
+
+
+This is the reference documentation of [`OpenML.jl`](https://github.com/JuliaAI/OpenML.jl).
+
+
+The [OpenML platform](https://www.openml.org) provides an integration platform for carrying out and comparing machine learning solutions across a broad collection of public datasets and software platforms.
+
+
+Summary of OpenML.jl functionality:
+
+
+  * [`OpenML.list_tags`](index.md#OpenML.list_tags)`()`: for listing all dataset tags
+  * [`OpenML.list_datasets`](index.md#OpenML.list_datasets)`(; tag=nothing, filter=nothing, output_format=...)`: for listing available datasets
+  * [`OpenML.describe_dataset`](index.md#OpenML.describe_dataset)`(id)`: to describe a particular dataset
+  * [`OpenML.load`](index.md#OpenML.load)`(id; parser=:arff)`: to download a dataset
+
+
+<a id='Installation'></a>
+
+<a id='Installation-1'></a>
+
+## Installation
+
+
+```julia
+using Pkg
+Pkg.add("OpenML")
+```
+
+
+If running the demonstration below:
+
+
+```julia
+Pkg.add("DataFrames") 
+Pkg.add("ScientificTypes")
+```
+
+
+<a id='Sample-usage'></a>
+
+<a id='Sample-usage-1'></a>
+
+## Sample usage
+
+
+```julia-repl
+julia> using OpenML # or using MLJ
+
+
+julia> using DataFrames
+
+
+julia> OpenML.list_tags()
+300-element Vector{Any}:
+ "study_41"
+ "uci"
+ "study_34"
+ "study_37"
+ "mythbusting_1"
+ "OpenML-CC18"
+ "study_99"
+ "artificial"
+ "BNG"
+ "study_16"
+ ⋮
+ "Earth Science"
+ "Social Media"
+ "Meteorology"
+ "Geography"
+ "Language"
+ "Computational Universe"
+ "History"
+ "Culture"
+ "Sociology"
+```
+
+
+Listing all datasets with the "OpenML100" tag which also have `n` instances and `p` features, where `100 < n < 1000` and `1 < p < 10`:
+
+
+```julia-repl
+julia> ds = OpenML.list_datasets(
+                 tag = "OpenML100",
+                 filter = "number_instances/100..1000/number_features/1..10",
+                 output_format = DataFrame)
+12×13 DataFrame
+ Row │ id     name                              status  MajorityClassSize  Max ⋯
+     │ Int64  String                            String  Int64?             Int ⋯
+─────┼──────────────────────────────────────────────────────────────────────────
+   1 │    11  balance-scale                     active                288      ⋯
+   2 │    15  breast-w                          active                458
+   3 │    37  diabetes                          active                500
+   4 │    50  tic-tac-toe                       active                626
+   5 │   333  monks-problems-1                  active                278      ⋯
+   6 │   334  monks-problems-2                  active                395
+   7 │   335  monks-problems-3                  active                288
+   8 │   451  irish                             active                278
+   9 │   469  analcatdata_dmft                  active                155      ⋯
+  10 │   470  profb                             active                448
+  11 │  1464  blood-transfusion-service-center  active                570
+  12 │ 40496  LED-display-domain-7digit         active                 57
+                                                               9 columns omitted
+```
+
+
+Describing and loading one of these datasets:
+
+
+```julia-repl
+julia> OpenML.describe_dataset(15)
+  Author: Dr. William H. Wolberg, University of Wisconsin Source: UCI
+  (https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original)),
+  University of Wisconsin (http://pages.cs.wisc.edu/~olvi/uwmp/cancer.html) -
+  1995 Please cite: See below, plus UCI
+  (https://archive.ics.uci.edu/ml/citation_policy.html)
+
+  Breast Cancer Wisconsin (Original) Data Set. Features are computed from a
+  digitized image of a fine needle aspirate (FNA) of a breast mass. They
+  describe characteristics of the cell nuclei present in the image. The target
+  feature records the prognosis (malignant or benign). Original data available
+  here (ftp://ftp.cs.wisc.edu/math-prog/cpo-dataset/machine-learn/cancer/)
+
+  Current dataset was adapted to ARFF format from the UCI version. Sample code
+  ID's were removed.
+
+  ! Note that there is also a related Breast Cancer Wisconsin (Diagnosis) Data
+  Set with a different set of features, better known as wdbc
+  (https://www.openml.org/d/1510).
+
+  Relevant Papers
+  –––––––––––––––
+
+  W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction
+  for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on
+  Electronic Imaging: Science and Technology, volume 1905, pages 861-870, San
+  Jose, CA, 1993.
+
+  O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and
+  prognosis via linear programming. Operations Research, 43(4), pages 570-577,
+  July-August 1995.
+
+  Citation request
+  ––––––––––––––––
+
+  This breast cancer database was obtained from the University of Wisconsin
+  Hospitals, Madison from Dr. William H. Wolberg. If you publish results when
+  using this database, then please include this information in your
+  acknowledgments. Also, please cite one or more of:
+
+    1. O. L. Mangasarian and W. H. Wolberg: "Cancer diagnosis via linear
+       programming", SIAM News, Volume 23, Number 5, September 1990, pp 1
+       & 18.
+
+    2. William H. Wolberg and O.L. Mangasarian: "Multisurface method of
+       pattern separation for medical diagnosis applied to breast
+       cytology", Proceedings of the National Academy of Sciences,
+       U.S.A., Volume 87, December 1990, pp 9193-9196.
+
+    3. O. L. Mangasarian, R. Setiono, and W.H. Wolberg: "Pattern
+       recognition via linear programming: Theory and application to
+       medical diagnosis", in: "Large-scale numerical optimization",
+       Thomas F. Coleman and Yuying Li, editors, SIAM Publications,
+       Philadelphia 1990, pp 22-30.
+
+    4. K. P. Bennett & O. L. Mangasarian: "Robust linear programming
+       discrimination of two linearly inseparable sets", Optimization
+       Methods and Software 1, 1992, 23-34 (Gordon & Breach Science
+       Publishers).
+
+julia> table = OpenML.load(15)
+Tables.DictColumnTable with 699 rows, 10 columns, and schema:
+ :Clump_Thickness        Float64
+ :Cell_Size_Uniformity   Float64
+ :Cell_Shape_Uniformity  Float64
+ :Marginal_Adhesion      Float64
+ :Single_Epi_Cell_Size   Float64
+ :Bare_Nuclei            Union{Missing, Float64}
+ :Bland_Chromatin        Float64
+ :Normal_Nucleoli        Float64
+ :Mitoses                Float64
+ :Class                  CategoricalArrays.CategoricalValue{String, UInt32}
+```
+
+
+Converting to a data frame:
+
+
+```julia-repl
+julia> df = DataFrame(table)
+699×10 DataFrame
+ Row │ Clump_Thickness  Cell_Size_Uniformity  Cell_Shape_Uniformity  Marginal_ ⋯
+     │ Float64          Float64               Float64                Float64   ⋯
+─────┼──────────────────────────────────────────────────────────────────────────
+   1 │             5.0                   1.0                    1.0            ⋯
+   2 │             5.0                   4.0                    4.0
+   3 │             3.0                   1.0                    1.0
+   4 │             6.0                   8.0                    8.0
+   5 │             4.0                   1.0                    1.0            ⋯
+   6 │             8.0                  10.0                   10.0
+   7 │             1.0                   1.0                    1.0
+   8 │             2.0                   1.0                    2.0
+  ⋮  │        ⋮                  ⋮                      ⋮                    ⋮ ⋱
+ 693 │             3.0                   1.0                    1.0            ⋯
+ 694 │             3.0                   1.0                    1.0
+ 695 │             3.0                   1.0                    1.0
+ 696 │             2.0                   1.0                    1.0
+ 697 │             5.0                  10.0                   10.0            ⋯
+ 698 │             4.0                   8.0                    6.0
+ 699 │             4.0                   8.0                    8.0
+                                                  7 columns and 684 rows omitted
+```
+
+
+Inspecting it's schema:
+
+
+```julia-repl
+julia> using ScientificTypes
+
+
+julia> schema(table)
+┌───────────────────────┬────────────────────────────┬──────────────────────────
+│ names                 │ scitypes                   │ types                   ⋯
+├───────────────────────┼────────────────────────────┼──────────────────────────
+│ Clump_Thickness       │ Continuous                 │ Float64                 ⋯
+│ Cell_Size_Uniformity  │ Continuous                 │ Float64                 ⋯
+│ Cell_Shape_Uniformity │ Continuous                 │ Float64                 ⋯
+│ Marginal_Adhesion     │ Continuous                 │ Float64                 ⋯
+│ Single_Epi_Cell_Size  │ Continuous                 │ Float64                 ⋯
+│ Bare_Nuclei           │ Union{Missing, Continuous} │ Union{Missing, Float64} ⋯
+│ Bland_Chromatin       │ Continuous                 │ Float64                 ⋯
+│ Normal_Nucleoli       │ Continuous                 │ Float64                 ⋯
+│ Mitoses               │ Continuous                 │ Float64                 ⋯
+│ Class                 │ Multiclass{2}              │ CategoricalValue{String ⋯
+└───────────────────────┴────────────────────────────┴──────────────────────────
+                                                                1 column omitted
+```
+
+
+<a id='Public-API'></a>
+
+<a id='Public-API-1'></a>
+
+## Public API
+
+### <a id='OpenML.list_tags' href='#OpenML.list_tags'>**`OpenML.list_tags`**</a>
+
+
+
+
+```julia
+list_tags()
+```
+
+List all available tags.
+
+### <a id='OpenML.list_datasets' href='#OpenML.list_datasets'>**`OpenML.list_datasets`**</a>
+
+```julia
+list_datasets(; tag = nothing, filters = "", output_format = NamedTuple)
+```
+
+Lists all active OpenML datasets, if `tag = nothing` (default). To list only datasets with a given tag, choose one of the tags in [`list_tags()`](index.md#OpenML.list_tags). An alternative `output_format` can be chosen, e.g. `DataFrame`, if the `DataFrames` package is loaded.
+
+A filter is a string of `<data quality>/<range>` or `<data quality>/<value>` pairs, concatenated using `/`, such as
+
+```julia
+    filter = "number_features/10/number_instances/500..10000"
+```
+
+The allowed data qualities include `tag`, `status`, `limit`, `offset`, `data_id`, `data_name`, `data_version`, `uploader`, `number_instances`, `number_features`, `number_classes`, `number_missing_values`.
+
+For more on the format and effect of `filters` refer to the [openml API](https://www.openml.org/api_docs#!/data/get_data_list_filters).
+
+**Examples**
+
+```
+julia> using DataFrames
+
+julia> ds = OpenML.list_datasets(
+               tag = "OpenML100",
+               filter = "number_instances/100..1000/number_features/1..10",
+               output_format = DataFrame
+)
+
+julia> sort!(ds, :NumberOfFeatures)
+```
+
+### <a id='OpenML.describe_dataset' href='#OpenML.describe_dataset'>**`OpenML.describe_dataset`**</a>
+
+```julia
+describe_dataset(id)
+```
+
+Load and show the OpenML description of the data set `id`. Use [`list_datasets`](index.md#OpenML.list_datasets) to browse available data sets.
+
+**Examples**
+
+```
+julia> OpenML.describe_dataset(6)
+  Author: David J. Slate Source: UCI
+  (https://archive.ics.uci.edu/ml/datasets/Letter+Recognition) - 01-01-1991 Please cite: P.
+  W. Frey and D. J. Slate. "Letter Recognition Using Holland-style Adaptive Classifiers".
+  Machine Learning 6(2), 1991
+
+    1. TITLE:
+
+  Letter Image Recognition Data
+
+  The objective is to identify each of a large number of black-and-white
+  rectangular pixel displays as one of the 26 capital letters in the English
+  alphabet.  The character images were based on 20 different fonts and each
+  letter within these 20 fonts was randomly distorted to produce a file of
+  20,000 unique stimuli.  Each stimulus was converted into 16 primitive
+  numerical attributes (statistical moments and edge counts) which were then
+  scaled to fit into a range of integer values from 0 through 15.  We
+  typically train on the first 16000 items and then use the resulting model
+  to predict the letter category for the remaining 4000.  See the article
+  cited above for more details.
+```
+
+### <a id='OpenML.load' href='#OpenML.load'>**`OpenML.load`**</a>
+
+
+
+```julia
+OpenML.load(id; maxbytes = nothing)
+```
+
+Load the OpenML dataset with specified `id`, from those listed by [`list_datasets`](index.md#OpenML.list_datasets) or on the [OpenML site](https://www.openml.org/search?type=data).
+
+Datasets are saved as julia artifacts so that they persist locally once loaded.
+
+Returns a table.
+
+**Examples**
+
+```julia
+using DataFrames
+table = OpenML.load(61)
+df = DataFrame(table) # transform to a DataFrame
+using ScientificTypes
+df2 = coerce(df, autotype(df)) # corce to automatically detected scientific types
+
+peek_table = OpenML.load(61, maxbytes = 1024) # load only the first 1024 bytes of the table
+```
+
diff --git a/docs/make-md.jl b/docs/make-md.jl
new file mode 100644
index 0000000..d33f1c0
--- /dev/null
+++ b/docs/make-md.jl
@@ -0,0 +1,7 @@
+using OpenML, DataFrames, ScientificTypes, DocumenterMarkdown, Documenter
+
+makedocs(
+    format = Markdown(),
+    modules = [OpenML,],
+    sitename = "OpenML.jl",
+)
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..e78e9b4
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,57 @@
+site_name:        OpenML.jl
+repo_url:         https://github.com/openml/OpenML.jl
+site_description: "This is the reference documentation of OpenML.jl"
+
+theme:
+    name: "material"
+    language: "en"
+    palette:
+        # Light mode
+        - media: "(prefers-color-scheme: light)"
+          scheme: default
+          primary: indigo
+          accent: indigo
+          toggle:
+            icon: material/toggle-switch-off-outline
+            name: Switch to dark mode
+        # Dark mode
+        - media: "(prefers-color-scheme: dark)"
+          primary: indigo
+          accent: indigo
+          scheme: slate
+          toggle:
+            icon: material/toggle-switch
+            name: Switch to light mode
+    font:
+        text: "Roboto"
+        code: "Roboto Mono"
+    icon:
+        edit: material/pencil 
+        view: material/eye
+
+extra_css:
+  - assets/Documenter.css
+
+extra_javascript:
+  - https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS_HTML
+  - assets/mathjaxhelper.js
+
+markdown_extensions:
+  - extra
+  - tables
+  - fenced_code
+  - admonition
+  - codehilite
+  - attr_list
+  - pymdownx.details
+  - pymdownx.superfences
+  - pymdownx.highlight:
+      linenums: true
+  - pymdownx.inlinehilite
+  - toc:
+      permalink: true
+
+docs_dir: 'docs/build'
+
+nav:
+  - Home: index.md

From c25e2716f7d4dc530191d1f5aff8f8f7954d6e65 Mon Sep 17 00:00:00 2001
From: joaquinvanschoren <joaquin.vanschoren@gmail.com>
Date: Tue, 15 Oct 2024 23:25:51 +0200
Subject: [PATCH 02/20] markdown docs, baby

---
 docs/build/index.md | 14 +++++++-------
 mkdocs.yml          | 10 ++++++----
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/docs/build/index.md b/docs/build/index.md
index f49ca5b..086546f 100644
--- a/docs/build/index.md
+++ b/docs/build/index.md
@@ -50,7 +50,7 @@ Pkg.add("ScientificTypes")
 ## Sample usage
 
 
-```julia-repl
+```julia
 julia> using OpenML # or using MLJ
 
 
@@ -85,7 +85,7 @@ julia> OpenML.list_tags()
 Listing all datasets with the "OpenML100" tag which also have `n` instances and `p` features, where `100 < n < 1000` and `1 < p < 10`:
 
 
-```julia-repl
+```julia
 julia> ds = OpenML.list_datasets(
                  tag = "OpenML100",
                  filter = "number_instances/100..1000/number_features/1..10",
@@ -113,7 +113,7 @@ julia> ds = OpenML.list_datasets(
 Describing and loading one of these datasets:
 
 
-```julia-repl
+```julia
 julia> OpenML.describe_dataset(15)
   Author: Dr. William H. Wolberg, University of Wisconsin Source: UCI
   (https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original)),
@@ -192,7 +192,7 @@ Tables.DictColumnTable with 699 rows, 10 columns, and schema:
 Converting to a data frame:
 
 
-```julia-repl
+```julia
 julia> df = DataFrame(table)
 699×10 DataFrame
  Row │ Clump_Thickness  Cell_Size_Uniformity  Cell_Shape_Uniformity  Marginal_ ⋯
@@ -221,7 +221,7 @@ julia> df = DataFrame(table)
 Inspecting it's schema:
 
 
-```julia-repl
+```julia
 julia> using ScientificTypes
 
 
@@ -281,7 +281,7 @@ For more on the format and effect of `filters` refer to the [openml API](https:/
 
 **Examples**
 
-```
+```julia
 julia> using DataFrames
 
 julia> ds = OpenML.list_datasets(
@@ -303,7 +303,7 @@ Load and show the OpenML description of the data set `id`. Use [`list_datasets`]
 
 **Examples**
 
-```
+```julia
 julia> OpenML.describe_dataset(6)
   Author: David J. Slate Source: UCI
   (https://archive.ics.uci.edu/ml/datasets/Letter+Recognition) - 01-01-1991 Please cite: P.
diff --git a/mkdocs.yml b/mkdocs.yml
index e78e9b4..5117d01 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -44,12 +44,14 @@ markdown_extensions:
   - codehilite
   - attr_list
   - pymdownx.details
-  - pymdownx.superfences
   - pymdownx.highlight:
-      linenums: true
+      anchor_linenums: true
+      line_spans: __span
+      pygments_lang_class: true
   - pymdownx.inlinehilite
-  - toc:
-      permalink: true
+  - pymdownx.snippets
+  - pymdownx.superfences
+
 
 docs_dir: 'docs/build'
 

From 72bf008bc7954009b019363fece4a80c2c5233e5 Mon Sep 17 00:00:00 2001
From: Christoph <krz@users.noreply.github.com>
Date: Thu, 31 Oct 2024 11:00:29 +0100
Subject: [PATCH 03/20] Update load function

---
 src/data.jl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/data.jl b/src/data.jl
index e3bf2ef..d91e873 100644
--- a/src/data.jl
+++ b/src/data.jl
@@ -72,7 +72,10 @@ function load(id::Int; maxbytes = nothing)
         @info "Downloading dataset $id."
         download(load_Dataset_Description(id)["data_set_description"]["url"], fname)
     end
-    ARFFFiles.load(x -> ARFFFiles.readcolumns(x; maxbytes = maxbytes), fname)
+    open(fname) do io
+        reader = ARFFFiles.loadstreaming(io)
+        return ARFFFiles.readcolumns(reader; maxbytes=maxbytes)
+    end
 end
 
 

From 199bcd33a5ecc57aeac4d79da6f9841586b26271 Mon Sep 17 00:00:00 2001
From: jbrea <jbrea@users.noreply.github.com>
Date: Fri, 1 Nov 2024 15:43:33 +0100
Subject: [PATCH 04/20] test on new LTS

---
 .github/workflows/CI.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index d0f36cb..5805770 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -16,6 +16,7 @@ jobs:
       matrix:
         julia-version:
           - "1.6"
+          - "1.10"
           - "1"
           - "nightly"
         os:

From d1f2f4d707440554d6ab066be10a437458966b73 Mon Sep 17 00:00:00 2001
From: jbrea <jbrea@users.noreply.github.com>
Date: Fri, 1 Nov 2024 15:47:58 +0100
Subject: [PATCH 05/20] bump version to 0.3.2

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 4189b00..fc1c896 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "OpenML"
 uuid = "8b6db2d4-7670-4922-a472-f9537c81ab66"
 authors = ["Diego Arenas <darenasc@gmail.com>", "Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "0.3.1"
+version = "0.3.2"
 
 [deps]
 ARFFFiles = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8"

From 0bf16882881350842498f8f7c52cc0fe85b8597f Mon Sep 17 00:00:00 2001
From: jbrea <jbrea@users.noreply.github.com>
Date: Sat, 2 Nov 2024 09:28:13 +0100
Subject: [PATCH 06/20] Update TagBot.yml

---
 .github/workflows/TagBot.yml | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml
index f49313b..4bad0ec 100644
--- a/.github/workflows/TagBot.yml
+++ b/.github/workflows/TagBot.yml
@@ -4,6 +4,22 @@ on:
     types:
       - created
   workflow_dispatch:
+    inputs:
+      lookback:
+        default: "3"
+permissions:
+  actions: read
+  checks: read
+  contents: write
+  deployments: read
+  issues: read
+  discussions: read
+  packages: read
+  pages: read
+  pull-requests: read
+  repository-projects: read
+  security-events: read
+  statuses: read
 jobs:
   TagBot:
     if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
@@ -12,4 +28,6 @@ jobs:
       - uses: JuliaRegistries/TagBot@v1
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
+          # Edit the following line to reflect the actual name of the GitHub Secret containing your private key
           ssh: ${{ secrets.DOCUMENTER_KEY }}
+          # ssh: ${{ secrets.NAME_OF_MY_SSH_PRIVATE_KEY_SECRET }}

From 6721dd1edb5dd226b25f47499e29ab8cd4267521 Mon Sep 17 00:00:00 2001
From: jbrea <jbrea@users.noreply.github.com>
Date: Sat, 2 Nov 2024 09:57:24 +0100
Subject: [PATCH 07/20] Update Project.toml

---
 docs/Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/Project.toml b/docs/Project.toml
index ac90fed..ea93df0 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -3,7 +3,7 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 OpenML = "8b6db2d4-7670-4922-a472-f9537c81ab66"
 ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
-DocumenterMarkdown
+DocumenterMarkdown = "997ab1e6-3595-5248-9280-8efb232c3433"
 
 [compat]
 Documenter = "~0.26"

From 502599b069e5f8e6a0a86df24640e653ac8a01e6 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 19 Dec 2025 08:48:12 +1300
Subject: [PATCH 08/20] extend JSON = "0.21, 1"

---
 .gitignore                         |   1 +
 Project.toml                       |   2 +-
 docs/build/assets/Documenter.css   |  18 --
 docs/build/assets/mathjaxhelper.js |  25 --
 docs/build/index.md                | 354 -----------------------------
 5 files changed, 2 insertions(+), 398 deletions(-)
 delete mode 100644 docs/build/assets/Documenter.css
 delete mode 100644 docs/build/assets/mathjaxhelper.js
 delete mode 100644 docs/build/index.md

diff --git a/.gitignore b/.gitignore
index 5a61b23..0e1b98c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@ Manifest.toml
 #*
 .DS_Store
 sandbox/
+/docs/build/
 /docs/site/
 /docs/Manifest.toml
 .vscode
diff --git a/Project.toml b/Project.toml
index fc1c896..78984aa 100644
--- a/Project.toml
+++ b/Project.toml
@@ -14,7 +14,7 @@ Scratch = "6c6a2e73-6563-6170-7368-637461726353"
 [compat]
 ARFFFiles = "1.4.1"
 HTTP = "0.8, 0.9, 1"
-JSON = "0.21"
+JSON = "0.21, 1"
 Scratch = "1.1"
 julia = "1.6"
 
diff --git a/docs/build/assets/Documenter.css b/docs/build/assets/Documenter.css
deleted file mode 100644
index d9af5d6..0000000
--- a/docs/build/assets/Documenter.css
+++ /dev/null
@@ -1,18 +0,0 @@
-div.wy-menu-vertical ul.current li.toctree-l3 a {
-  font-weight: bold;
-}
-
-a.documenter-source {
-  float: right;
-}
-
-.documenter-methodtable pre {
-    margin-left: 0;
-    margin-right: 0;
-    margin-top: 0;
-    padding: 0;
-}
-
-.documenter-methodtable pre.documenter-inline {
-    display: inline;
-}
diff --git a/docs/build/assets/mathjaxhelper.js b/docs/build/assets/mathjaxhelper.js
deleted file mode 100644
index 3561b10..0000000
--- a/docs/build/assets/mathjaxhelper.js
+++ /dev/null
@@ -1,25 +0,0 @@
-MathJax.Hub.Config({
-  "tex2jax": {
-    inlineMath: [['$','$'], ['\\(','\\)']],
-    processEscapes: true
-  }
-});
-MathJax.Hub.Config({
-  config: ["MMLorHTML.js"],
-  jax: [
-    "input/TeX",
-    "output/HTML-CSS",
-    "output/NativeMML"
-  ],
-  extensions: [
-    "MathMenu.js",
-    "MathZoom.js",
-    "TeX/AMSmath.js",
-    "TeX/AMSsymbols.js",
-    "TeX/autobold.js",
-    "TeX/autoload-all.js"
-  ]
-});
-MathJax.Hub.Config({
-  TeX: { equationNumbers: { autoNumber: "AMS" } }
-});
diff --git a/docs/build/index.md b/docs/build/index.md
deleted file mode 100644
index 086546f..0000000
--- a/docs/build/index.md
+++ /dev/null
@@ -1,354 +0,0 @@
-
-<a id='OpenML.jl-Documentation'></a>
-
-<a id='OpenML.jl-Documentation-1'></a>
-
-# OpenML.jl Documentation
-
-
-This is the reference documentation of [`OpenML.jl`](https://github.com/JuliaAI/OpenML.jl).
-
-
-The [OpenML platform](https://www.openml.org) provides an integration platform for carrying out and comparing machine learning solutions across a broad collection of public datasets and software platforms.
-
-
-Summary of OpenML.jl functionality:
-
-
-  * [`OpenML.list_tags`](index.md#OpenML.list_tags)`()`: for listing all dataset tags
-  * [`OpenML.list_datasets`](index.md#OpenML.list_datasets)`(; tag=nothing, filter=nothing, output_format=...)`: for listing available datasets
-  * [`OpenML.describe_dataset`](index.md#OpenML.describe_dataset)`(id)`: to describe a particular dataset
-  * [`OpenML.load`](index.md#OpenML.load)`(id; parser=:arff)`: to download a dataset
-
-
-<a id='Installation'></a>
-
-<a id='Installation-1'></a>
-
-## Installation
-
-
-```julia
-using Pkg
-Pkg.add("OpenML")
-```
-
-
-If running the demonstration below:
-
-
-```julia
-Pkg.add("DataFrames") 
-Pkg.add("ScientificTypes")
-```
-
-
-<a id='Sample-usage'></a>
-
-<a id='Sample-usage-1'></a>
-
-## Sample usage
-
-
-```julia
-julia> using OpenML # or using MLJ
-
-
-julia> using DataFrames
-
-
-julia> OpenML.list_tags()
-300-element Vector{Any}:
- "study_41"
- "uci"
- "study_34"
- "study_37"
- "mythbusting_1"
- "OpenML-CC18"
- "study_99"
- "artificial"
- "BNG"
- "study_16"
- ⋮
- "Earth Science"
- "Social Media"
- "Meteorology"
- "Geography"
- "Language"
- "Computational Universe"
- "History"
- "Culture"
- "Sociology"
-```
-
-
-Listing all datasets with the "OpenML100" tag which also have `n` instances and `p` features, where `100 < n < 1000` and `1 < p < 10`:
-
-
-```julia
-julia> ds = OpenML.list_datasets(
-                 tag = "OpenML100",
-                 filter = "number_instances/100..1000/number_features/1..10",
-                 output_format = DataFrame)
-12×13 DataFrame
- Row │ id     name                              status  MajorityClassSize  Max ⋯
-     │ Int64  String                            String  Int64?             Int ⋯
-─────┼──────────────────────────────────────────────────────────────────────────
-   1 │    11  balance-scale                     active                288      ⋯
-   2 │    15  breast-w                          active                458
-   3 │    37  diabetes                          active                500
-   4 │    50  tic-tac-toe                       active                626
-   5 │   333  monks-problems-1                  active                278      ⋯
-   6 │   334  monks-problems-2                  active                395
-   7 │   335  monks-problems-3                  active                288
-   8 │   451  irish                             active                278
-   9 │   469  analcatdata_dmft                  active                155      ⋯
-  10 │   470  profb                             active                448
-  11 │  1464  blood-transfusion-service-center  active                570
-  12 │ 40496  LED-display-domain-7digit         active                 57
-                                                               9 columns omitted
-```
-
-
-Describing and loading one of these datasets:
-
-
-```julia
-julia> OpenML.describe_dataset(15)
-  Author: Dr. William H. Wolberg, University of Wisconsin Source: UCI
-  (https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original)),
-  University of Wisconsin (http://pages.cs.wisc.edu/~olvi/uwmp/cancer.html) -
-  1995 Please cite: See below, plus UCI
-  (https://archive.ics.uci.edu/ml/citation_policy.html)
-
-  Breast Cancer Wisconsin (Original) Data Set. Features are computed from a
-  digitized image of a fine needle aspirate (FNA) of a breast mass. They
-  describe characteristics of the cell nuclei present in the image. The target
-  feature records the prognosis (malignant or benign). Original data available
-  here (ftp://ftp.cs.wisc.edu/math-prog/cpo-dataset/machine-learn/cancer/)
-
-  Current dataset was adapted to ARFF format from the UCI version. Sample code
-  ID's were removed.
-
-  ! Note that there is also a related Breast Cancer Wisconsin (Diagnosis) Data
-  Set with a different set of features, better known as wdbc
-  (https://www.openml.org/d/1510).
-
-  Relevant Papers
-  –––––––––––––––
-
-  W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction
-  for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on
-  Electronic Imaging: Science and Technology, volume 1905, pages 861-870, San
-  Jose, CA, 1993.
-
-  O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and
-  prognosis via linear programming. Operations Research, 43(4), pages 570-577,
-  July-August 1995.
-
-  Citation request
-  ––––––––––––––––
-
-  This breast cancer database was obtained from the University of Wisconsin
-  Hospitals, Madison from Dr. William H. Wolberg. If you publish results when
-  using this database, then please include this information in your
-  acknowledgments. Also, please cite one or more of:
-
-    1. O. L. Mangasarian and W. H. Wolberg: "Cancer diagnosis via linear
-       programming", SIAM News, Volume 23, Number 5, September 1990, pp 1
-       & 18.
-
-    2. William H. Wolberg and O.L. Mangasarian: "Multisurface method of
-       pattern separation for medical diagnosis applied to breast
-       cytology", Proceedings of the National Academy of Sciences,
-       U.S.A., Volume 87, December 1990, pp 9193-9196.
-
-    3. O. L. Mangasarian, R. Setiono, and W.H. Wolberg: "Pattern
-       recognition via linear programming: Theory and application to
-       medical diagnosis", in: "Large-scale numerical optimization",
-       Thomas F. Coleman and Yuying Li, editors, SIAM Publications,
-       Philadelphia 1990, pp 22-30.
-
-    4. K. P. Bennett & O. L. Mangasarian: "Robust linear programming
-       discrimination of two linearly inseparable sets", Optimization
-       Methods and Software 1, 1992, 23-34 (Gordon & Breach Science
-       Publishers).
-
-julia> table = OpenML.load(15)
-Tables.DictColumnTable with 699 rows, 10 columns, and schema:
- :Clump_Thickness        Float64
- :Cell_Size_Uniformity   Float64
- :Cell_Shape_Uniformity  Float64
- :Marginal_Adhesion      Float64
- :Single_Epi_Cell_Size   Float64
- :Bare_Nuclei            Union{Missing, Float64}
- :Bland_Chromatin        Float64
- :Normal_Nucleoli        Float64
- :Mitoses                Float64
- :Class                  CategoricalArrays.CategoricalValue{String, UInt32}
-```
-
-
-Converting to a data frame:
-
-
-```julia
-julia> df = DataFrame(table)
-699×10 DataFrame
- Row │ Clump_Thickness  Cell_Size_Uniformity  Cell_Shape_Uniformity  Marginal_ ⋯
-     │ Float64          Float64               Float64                Float64   ⋯
-─────┼──────────────────────────────────────────────────────────────────────────
-   1 │             5.0                   1.0                    1.0            ⋯
-   2 │             5.0                   4.0                    4.0
-   3 │             3.0                   1.0                    1.0
-   4 │             6.0                   8.0                    8.0
-   5 │             4.0                   1.0                    1.0            ⋯
-   6 │             8.0                  10.0                   10.0
-   7 │             1.0                   1.0                    1.0
-   8 │             2.0                   1.0                    2.0
-  ⋮  │        ⋮                  ⋮                      ⋮                    ⋮ ⋱
- 693 │             3.0                   1.0                    1.0            ⋯
- 694 │             3.0                   1.0                    1.0
- 695 │             3.0                   1.0                    1.0
- 696 │             2.0                   1.0                    1.0
- 697 │             5.0                  10.0                   10.0            ⋯
- 698 │             4.0                   8.0                    6.0
- 699 │             4.0                   8.0                    8.0
-                                                  7 columns and 684 rows omitted
-```
-
-
-Inspecting it's schema:
-
-
-```julia
-julia> using ScientificTypes
-
-
-julia> schema(table)
-┌───────────────────────┬────────────────────────────┬──────────────────────────
-│ names                 │ scitypes                   │ types                   ⋯
-├───────────────────────┼────────────────────────────┼──────────────────────────
-│ Clump_Thickness       │ Continuous                 │ Float64                 ⋯
-│ Cell_Size_Uniformity  │ Continuous                 │ Float64                 ⋯
-│ Cell_Shape_Uniformity │ Continuous                 │ Float64                 ⋯
-│ Marginal_Adhesion     │ Continuous                 │ Float64                 ⋯
-│ Single_Epi_Cell_Size  │ Continuous                 │ Float64                 ⋯
-│ Bare_Nuclei           │ Union{Missing, Continuous} │ Union{Missing, Float64} ⋯
-│ Bland_Chromatin       │ Continuous                 │ Float64                 ⋯
-│ Normal_Nucleoli       │ Continuous                 │ Float64                 ⋯
-│ Mitoses               │ Continuous                 │ Float64                 ⋯
-│ Class                 │ Multiclass{2}              │ CategoricalValue{String ⋯
-└───────────────────────┴────────────────────────────┴──────────────────────────
-                                                                1 column omitted
-```
-
-
-<a id='Public-API'></a>
-
-<a id='Public-API-1'></a>
-
-## Public API
-
-### <a id='OpenML.list_tags' href='#OpenML.list_tags'>**`OpenML.list_tags`**</a>
-
-
-
-
-```julia
-list_tags()
-```
-
-List all available tags.
-
-### <a id='OpenML.list_datasets' href='#OpenML.list_datasets'>**`OpenML.list_datasets`**</a>
-
-```julia
-list_datasets(; tag = nothing, filters = "", output_format = NamedTuple)
-```
-
-Lists all active OpenML datasets, if `tag = nothing` (default). To list only datasets with a given tag, choose one of the tags in [`list_tags()`](index.md#OpenML.list_tags). An alternative `output_format` can be chosen, e.g. `DataFrame`, if the `DataFrames` package is loaded.
-
-A filter is a string of `<data quality>/<range>` or `<data quality>/<value>` pairs, concatenated using `/`, such as
-
-```julia
-    filter = "number_features/10/number_instances/500..10000"
-```
-
-The allowed data qualities include `tag`, `status`, `limit`, `offset`, `data_id`, `data_name`, `data_version`, `uploader`, `number_instances`, `number_features`, `number_classes`, `number_missing_values`.
-
-For more on the format and effect of `filters` refer to the [openml API](https://www.openml.org/api_docs#!/data/get_data_list_filters).
-
-**Examples**
-
-```julia
-julia> using DataFrames
-
-julia> ds = OpenML.list_datasets(
-               tag = "OpenML100",
-               filter = "number_instances/100..1000/number_features/1..10",
-               output_format = DataFrame
-)
-
-julia> sort!(ds, :NumberOfFeatures)
-```
-
-### <a id='OpenML.describe_dataset' href='#OpenML.describe_dataset'>**`OpenML.describe_dataset`**</a>
-
-```julia
-describe_dataset(id)
-```
-
-Load and show the OpenML description of the data set `id`. Use [`list_datasets`](index.md#OpenML.list_datasets) to browse available data sets.
-
-**Examples**
-
-```julia
-julia> OpenML.describe_dataset(6)
-  Author: David J. Slate Source: UCI
-  (https://archive.ics.uci.edu/ml/datasets/Letter+Recognition) - 01-01-1991 Please cite: P.
-  W. Frey and D. J. Slate. "Letter Recognition Using Holland-style Adaptive Classifiers".
-  Machine Learning 6(2), 1991
-
-    1. TITLE:
-
-  Letter Image Recognition Data
-
-  The objective is to identify each of a large number of black-and-white
-  rectangular pixel displays as one of the 26 capital letters in the English
-  alphabet.  The character images were based on 20 different fonts and each
-  letter within these 20 fonts was randomly distorted to produce a file of
-  20,000 unique stimuli.  Each stimulus was converted into 16 primitive
-  numerical attributes (statistical moments and edge counts) which were then
-  scaled to fit into a range of integer values from 0 through 15.  We
-  typically train on the first 16000 items and then use the resulting model
-  to predict the letter category for the remaining 4000.  See the article
-  cited above for more details.
-```
-
-### <a id='OpenML.load' href='#OpenML.load'>**`OpenML.load`**</a>
-
-
-
-```julia
-OpenML.load(id; maxbytes = nothing)
-```
-
-Load the OpenML dataset with specified `id`, from those listed by [`list_datasets`](index.md#OpenML.list_datasets) or on the [OpenML site](https://www.openml.org/search?type=data).
-
-Datasets are saved as julia artifacts so that they persist locally once loaded.
-
-Returns a table.
-
-**Examples**
-
-```julia
-using DataFrames
-table = OpenML.load(61)
-df = DataFrame(table) # transform to a DataFrame
-using ScientificTypes
-df2 = coerce(df, autotype(df)) # corce to automatically detected scientific types
-
-peek_table = OpenML.load(61, maxbytes = 1024) # load only the first 1024 bytes of the table
-```
-

From 226a563fee034e9afb927c9125a55d9cfe054687 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 19 Dec 2025 08:58:56 +1300
Subject: [PATCH 09/20] update a GH action

---
 .github/workflows/CI.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 5805770..5e44c6d 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -32,7 +32,7 @@ jobs:
           version: ${{ matrix.julia-version }}
           arch: ${{ matrix.julia-arch }}
       - name: Cache artifacts
-        uses: actions/cache@v2
+        uses: julia-actions/cache@v2
         env:
           cache-name: cache-artifacts
         with:

From f37468193043d8b774eeb74279e7d354b2c8d589 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 19 Dec 2025 09:00:02 +1300
Subject: [PATCH 10/20] use Downloads.download instead of Base.download

---
 Project.toml  | 4 +++-
 src/OpenML.jl | 1 +
 src/data.jl   | 4 +++-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 78984aa..c5a65b9 100644
--- a/Project.toml
+++ b/Project.toml
@@ -5,6 +5,7 @@ version = "0.3.2"
 
 [deps]
 ARFFFiles = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8"
+Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
 HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
@@ -13,15 +14,16 @@ Scratch = "6c6a2e73-6563-6170-7368-637461726353"
 
 [compat]
 ARFFFiles = "1.4.1"
+Downloads = "1.6.0"
 HTTP = "0.8, 0.9, 1"
 JSON = "0.21, 1"
 Scratch = "1.1"
 julia = "1.6"
 
 [extras]
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 
 [targets]
 test = ["Tables", "Test", "Logging"]
diff --git a/src/OpenML.jl b/src/OpenML.jl
index c66396e..2faaff4 100644
--- a/src/OpenML.jl
+++ b/src/OpenML.jl
@@ -5,6 +5,7 @@ using JSON
 import ARFFFiles
 using Markdown
 using Scratch
+import Downloads
 
 export OpenML
 
diff --git a/src/data.jl b/src/data.jl
index d91e873..45a8fb3 100644
--- a/src/data.jl
+++ b/src/data.jl
@@ -70,7 +70,9 @@ function load(id::Int; maxbytes = nothing)
     fname = joinpath(download_cache, "$id.arff")
     if !isfile(fname)
         @info "Downloading dataset $id."
-        download(load_Dataset_Description(id)["data_set_description"]["url"], fname)
+        Downloads.download(
+            load_Dataset_Description(id)["data_set_description"]["url"],
+            fname,)
     end
     open(fname) do io
         reader = ARFFFiles.loadstreaming(io)

From 37526212d86895b918b46859bd5074bd733e1fae Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 19 Dec 2025 09:07:35 +1300
Subject: [PATCH 11/20] stop testing old version of julia

---
 .github/workflows/CI.yml | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 5e44c6d..fc93ed4 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -15,10 +15,8 @@ jobs:
       fail-fast: false
       matrix:
         julia-version:
-          - "1.6"
-          - "1.10"
+          - "lts"
           - "1"
-          - "nightly"
         os:
           - ubuntu-latest
           - macos-latest
@@ -26,8 +24,8 @@ jobs:
         julia-arch:
           - x64
     steps:
-      - uses: actions/checkout@v2
-      - uses: julia-actions/setup-julia@v1
+      - uses: actions/checkout@v6
+      - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.julia-version }}
           arch: ${{ matrix.julia-arch }}

From 0416577e51c76568e255332b899db68366be997b Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 19 Dec 2025 09:09:40 +1300
Subject: [PATCH 12/20] rm what appears to be a now invalid test

---
 test/data.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/data.jl b/test/data.jl
index 32df589..9c98395 100644
--- a/test/data.jl
+++ b/test/data.jl
@@ -16,7 +16,7 @@ offset = 8
 filters_test = OpenML.load_List_And_Filter("limit/$limit/offset/$offset")
 
 @testset "HTTP connection" begin
-    @test typeof(response_test) <: Dict
+#    @test typeof(response_test) <: Dict
     @test response_test["data_set_description"]["name"] == "iris"
     @test response_test["data_set_description"]["format"] == "ARFF"
 end

From b545e1a1394d01459a08b213aee1a94422980f3f Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 19 Dec 2025 09:19:36 +1300
Subject: [PATCH 13/20] rm more of the same

---
 test/data.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/data.jl b/test/data.jl
index 9c98395..7596406 100644
--- a/test/data.jl
+++ b/test/data.jl
@@ -30,11 +30,11 @@ end
 @testset "data api functions" begin
     @test typeof(dqlist_test["data_qualities_list"]) <: Dict
 
-    @test typeof(data_features_test) <: Dict
+#    @test typeof(data_features_test) <: Dict
     @test length(data_features_test["data_features"]["feature"]) == 5
     @test data_features_test["data_features"]["feature"][1]["name"] == "sepallength"
 
-    @test typeof(data_qualities_test) <: Dict
+#    @test typeof(data_qualities_test) <: Dict
 
     @test length(filters_test["data"]["dataset"]) == limit
     @test length(filters_test["data"]["dataset"][1]) == offset

From cd9507696d0c267b79795e5ab775213c2e002901 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 19 Dec 2025 09:28:41 +1300
Subject: [PATCH 14/20] rm more of the same

---
 test/data.jl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/data.jl b/test/data.jl
index 7596406..4da08c9 100644
--- a/test/data.jl
+++ b/test/data.jl
@@ -28,8 +28,7 @@ end
 end
 
 @testset "data api functions" begin
-    @test typeof(dqlist_test["data_qualities_list"]) <: Dict
-
+#    @test typeof(dqlist_test["data_qualities_list"]) <: Dict
 #    @test typeof(data_features_test) <: Dict
     @test length(data_features_test["data_features"]["feature"]) == 5
     @test data_features_test["data_features"]["feature"][1]["name"] == "sepallength"

From 21e3537b1c8a2d073aaaf814c88ad2d4e800c8db Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 19 Dec 2025 09:44:47 +1300
Subject: [PATCH 15/20] rm all traces of these redundant tests

---
 test/data.jl | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/test/data.jl b/test/data.jl
index 4da08c9..38880e3 100644
--- a/test/data.jl
+++ b/test/data.jl
@@ -28,13 +28,8 @@ end
 end
 
 @testset "data api functions" begin
-#    @test typeof(dqlist_test["data_qualities_list"]) <: Dict
-#    @test typeof(data_features_test) <: Dict
     @test length(data_features_test["data_features"]["feature"]) == 5
     @test data_features_test["data_features"]["feature"][1]["name"] == "sepallength"
-
-#    @test typeof(data_qualities_test) <: Dict
-
     @test length(filters_test["data"]["dataset"]) == limit
     @test length(filters_test["data"]["dataset"][1]) == offset
 end

From fa687a0ac14fcb42498e72c02118f34d911f4ffe Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 19 Dec 2025 09:46:23 +1300
Subject: [PATCH 16/20] bump 0.3.3

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index c5a65b9..d149f1a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "OpenML"
 uuid = "8b6db2d4-7670-4922-a472-f9537c81ab66"
 authors = ["Diego Arenas <darenasc@gmail.com>", "Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "0.3.2"
+version = "0.3.3"
 
 [deps]
 ARFFFiles = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8"

From cd2d6d3bb12e59c0f7696b0b52535291485cd44e Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 19 Dec 2025 10:13:21 +1300
Subject: [PATCH 17/20] bump Documenter version, dump unsupported
 DocumenterMarkdown

---
 README.md         | 21 ---------------------
 docs/Project.toml |  3 +--
 docs/make.jl      |  8 ++++++--
 3 files changed, 7 insertions(+), 25 deletions(-)
 delete mode 100644 README.md

diff --git a/README.md b/README.md
deleted file mode 100644
index 59ba3c6..0000000
--- a/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# OpenML.jl
-
-| Linux | Coverage | Documentation |
-| :-----------: | :------: | :-------: |
-| [![Build status](https://github.com/JuliaAI/OpenML.jl/workflows/CI/badge.svg)](https://github.com/JuliaAI/OpenML.jl/actions)| [![codecov.io](http://codecov.io/github/JuliaAI/OpenML.jl/coverage.svg?branch=master)](http://codecov.io/github/JuliaAI/OpenML.jl?branch=master) |  [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaAI.github.io/OpenML.jl/stable) |
-
-Partial implementation of the [OpenML](https://www.openml.org) API for
-Julia. At present this package allows querying and
-downloading of OpenML datasets. 
-
-For further integration with the
-[MLJ](https://JuliaAI.github.io/MLJ.jl/dev/) machine
-learning framework (such as uploading MLJ runs) see
-[MLJOpenML.jl](https://github.com/JuliaAI/MLJOpenML.jl).
-
-
-**Acknowledgements.** The code in this repository is based on contributions of Diego Arenas
-to [MLJBase.jl](https://github.com/JuliaAI/MLJBase.jl) which do not
-appear in the commit history of this repository.
-
-Package documentation is [here](https://JuliaAI.github.io/OpenML.jl/stable).
diff --git a/docs/Project.toml b/docs/Project.toml
index ea93df0..196dc6f 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -3,7 +3,6 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 OpenML = "8b6db2d4-7670-4922-a472-f9537c81ab66"
 ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
-DocumenterMarkdown = "997ab1e6-3595-5248-9280-8efb232c3433"
 
 [compat]
-Documenter = "~0.26"
+Documenter = "1"
diff --git a/docs/make.jl b/docs/make.jl
index de9753c..ed60d6f 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,11 +1,15 @@
 using Documenter, OpenML, DataFrames
 
+const  REPO = Remotes.GitHub("JuliaAI", "OpenML.jl")
+
 makedocs(
     modules = [OpenML,],
     sitename = "OpenML.jl",
+    warnonly = [:cross_references, :missing_docs],
+    repo = Remotes.GitHub("JuliaAI", "LearnAPI.jl"),
 )
 
 deploydocs(
-    repo = "github.com/JuliaAI/OpenML.jl",
-    push_preview = true
+    repo = "github.com/JuliaAI/OpenML.jl.git",
+    devbranch="dev",
 )

From f8a3e8eb68ddf828d194d408b06ea6ab3774df8c Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 19 Dec 2025 10:19:18 +1300
Subject: [PATCH 18/20] simplify doc generation logic

---
 .github/workflows/CI.yml | 68 ++++++----------------------------------
 1 file changed, 9 insertions(+), 59 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index fc93ed4..88b62c3 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -53,67 +53,17 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
-      - uses: julia-actions/setup-julia@v1
+      - uses: julia-actions/setup-julia@v2
         with:
           version: '1'
-      - run: |
-         julia -e '
-           function set_environment_variable(name::AbstractString, value::AbstractString)
-               github_env = ENV["GITHUB_ENV"]
-               touch(github_env)
-               open(github_env, "a") do io
-                   println(io, "$(name)=$(value)")
-               end
-           end
-           event_name = "${{ github.event_name }}"
-           if event_name == "pull_request"
-               base_ref = "${{ github.base_ref }}"
-               head_ref = "${{ github.head_ref }}"
-               base_repository = "${{ github.repository }}"
-               head_repository = "${{ github.event.pull_request.head.repo.full_name }}"
-               build_docs = (base_ref == "master") && (head_ref == "dev") && (base_repository == head_repository)
-           elseif event_name == "push"
-               ref = "${{ github.ref }}"
-               build_docs = (ref == "refs/heads/master") || (startswith(ref, "refs/tags/"))
-           elseif event_name == "schedule"
-               build_docs = ref == "refs/heads/master"
-           elseif event_name == "workflow_dispatch"
-               build_docs = ref == "refs/heads/master"
-           else
-               build_docs = false
-           end
-           if build_docs
-               @info("We will build the docs")
-               set_environment_variable("BUILD_DOCS", "true")
-           else 
-               @info("We will NOT build the docs")
-               set_environment_variable("BUILD_DOCS", "false")
-           end'
-      - run: |
-          julia --project=docs -e '
-            if ENV["BUILD_DOCS"] == "true"
-                using Pkg
-                Pkg.develop(PackageSpec(path=pwd()))
-                Pkg.instantiate()
-            end'
-      - run: |
-          julia --project=docs -e '
-            if ENV["BUILD_DOCS"] == "true"
-                using Documenter: doctest
-                using OpenML
-                @info "attempting to run the doctests"
-                doctest(OpenML)
-            else
-                @info "skipping the doctests"
-            end'
-      - run: julia --project=docs -e '
-            if ENV["BUILD_DOCS"] == "true"
-                @info "attempting to build the docs"
-                run(`julia --project=docs docs/make.jl`)
-                @info "successfully built the docs"
-            else
-                @info "skipping the docs build"
-            end'
+      - uses: julia-actions/julia-buildpkg@latest
+      - uses: julia-actions/julia-docdeploy@latest
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
+      - run: |
+          julia --project=docs -e '
+            using Documenter: DocMeta, doctest
+            using LearnAPI
+            DocMeta.setdocmeta!(OpenML, :DocTestSetup, :(using OpenML); recursive=true)
+            doctest(OpenML)'

From d7653338d44eca1ede9a9aa257541a9772c527bd Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 19 Dec 2025 10:32:16 +1300
Subject: [PATCH 19/20] typo

---
 .github/workflows/CI.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 88b62c3..6fd4975 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -64,6 +64,6 @@ jobs:
       - run: |
           julia --project=docs -e '
             using Documenter: DocMeta, doctest
-            using LearnAPI
+            using OpenML
             DocMeta.setdocmeta!(OpenML, :DocTestSetup, :(using OpenML); recursive=true)
             doctest(OpenML)'

From 070bfa305294ab3230fef90a64f8a788b2b2bf39 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 19 Dec 2025 11:00:27 +1300
Subject: [PATCH 20/20] dummy commit

---
 src/OpenML.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/OpenML.jl b/src/OpenML.jl
index 2faaff4..2b947e0 100644
--- a/src/OpenML.jl
+++ b/src/OpenML.jl
@@ -18,3 +18,4 @@ function __init__()
 end
 
 end # module
+