diff --git a/.Rbuildignore b/.Rbuildignore index f2b1c07..02fb2e5 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -13,3 +13,5 @@ ^CRAN-SUBMISSION$ ^\.python-version$ ^revdep$ +^[.]?air[.]toml$ +^\.vscode$ diff --git a/.github/workflows/R-CMD-check-hard.yaml b/.github/workflows/R-CMD-check-hard.yaml index feda3de..c68a03b 100644 --- a/.github/workflows/R-CMD-check-hard.yaml +++ b/.github/workflows/R-CMD-check-hard.yaml @@ -8,12 +8,13 @@ # dependency. on: push: - branches: [main] + branches: main pull_request: - branches: [main] name: R-CMD-check-hard +permissions: read-all + jobs: R-CMD-check: runs-on: ${{ matrix.config.os }} diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 3625607..04c06df 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -8,10 +8,11 @@ on: push: branches: main pull_request: - branches: main name: R-CMD-check +permissions: read-all + jobs: R-CMD-check: runs-on: ${{ matrix.config.os }} @@ -26,10 +27,10 @@ jobs: - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} - - {os: ubuntu-latest, r: 'oldrel-2'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: ubuntu-latest, r: 'oldrel-2'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} @@ -70,3 +71,4 @@ jobs: - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/format-suggest.yaml b/.github/workflows/format-suggest.yaml new file mode 100644 index 0000000..8c4f117 --- /dev/null +++ b/.github/workflows/format-suggest.yaml @@ -0,0 +1,46 @@ +# Workflow derived from https://github.com/posit-dev/setup-air/tree/main/examples + +on: + # Using `pull_request_target` over `pull_request` for elevated `GITHUB_TOKEN` + # privileges, otherwise we can't set `pull-requests: write` when the pull + # request comes from a fork, which is our main use case (external contributors). + # + # `pull_request_target` runs in the context of the target branch (`main`, usually), + # rather than in the context of the pull request like `pull_request` does. Due + # to this, we must explicitly checkout `ref: ${{ github.event.pull_request.head.sha }}`. + # This is typically frowned upon by GitHub, as it exposes you to potentially running + # untrusted code in a context where you have elevated privileges, but they explicitly + # call out the use case of reformatting and committing back / commenting on the PR + # as a situation that should be safe (because we aren't actually running the untrusted + # code, we are just treating it as passive data). + # https://securitylab.github.com/resources/github-actions-preventing-pwn-requests/ + pull_request_target: + +name: format-suggest.yaml + +jobs: + format-suggest: + name: format-suggest + runs-on: ubuntu-latest + + permissions: + # Required to push suggestion comments to the PR + pull-requests: write + + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Install + uses: posit-dev/setup-air@v1 + + - name: Format + run: air format . + + - name: Suggest + uses: reviewdog/action-suggester@v1 + with: + level: error + fail_level: error + tool_name: air diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index f1f34a7..5190e07 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -2,15 +2,16 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] + branches: main pull_request: - branches: [main, master] release: types: [published] workflow_dispatch: name: pkgdown +permissions: read-all + jobs: pkgdown: runs-on: ubuntu-latest @@ -57,7 +58,7 @@ jobs: - name: Deploy to GitHub pages 🚀 if: github.event_name != 'pull_request' - uses: JamesIves/github-pages-deploy-action@v4.4.1 + uses: JamesIves/github-pages-deploy-action@v4.5.0 with: clean: false branch: gh-pages diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 8770d4b..7434c81 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -4,7 +4,6 @@ on: push: branches: main pull_request: - branches: main name: test-coverage @@ -51,9 +50,19 @@ jobs: clean = FALSE, install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") ) + print(cov) covr::to_cobertura(cov) shell: Rscript {0} + - uses: codecov/codecov-action@v5 + with: + # Fail if error if not on PR, or if on PR and token is given + fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }} + files: ./cobertura.xml + plugins: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + - name: Show testthat output if: always() run: | @@ -67,4 +76,3 @@ jobs: with: name: coverage-test-failures path: ${{ runner.temp }}/package - \ No newline at end of file diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..344f76e --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,5 @@ +{ + "recommendations": [ + "Posit.air-vscode" + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..a9f69fe --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,10 @@ +{ + "[r]": { + "editor.formatOnSave": true, + "editor.defaultFormatter": "Posit.air-vscode" + }, + "[quarto]": { + "editor.formatOnSave": true, + "editor.defaultFormatter": "quarto.quarto" + } +} diff --git a/DESCRIPTION b/DESCRIPTION index e723f7c..d5099f2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,8 @@ Authors@R: c( person("Simon", "Couch", , "simonpatrickcouch@gmail.com", role = "aut"), person("Qiushi", "Yan", , "qiushi.yann@gmail.com", role = "aut"), person("Max", "Kuhn", , "max@posit.co", role = "aut"), - person("Posit Software, PBC", role = c("cph", "fnd")) + person("Posit Software, PBC", role = c("cph", "fnd"), + comment = c(ROR = "03wc8by49")) ) Description: Typically, models in 'R' exist in memory and can be saved via regular 'R' serialization. However, some models store information in @@ -19,9 +20,10 @@ License: MIT + file LICENSE URL: https://github.com/rstudio/bundle, https://rstudio.github.io/bundle/ BugReports: https://github.com/rstudio/bundle/issues Depends: - R (>= 3.6) + R (>= 4.1) Imports: glue, + lifecycle, purrr, rlang, utils, @@ -56,10 +58,11 @@ Suggests: xgboost (>= 1.6.0.1) VignetteBuilder: knitr +Remotes: + tidymodels/butcher Config/Needs/website: tidyverse/tidytemplate Config/testthat/edition: 3 +Config/usethis/last-upkeep: 2025-12-08 Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 -Remotes: - tidymodels/butcher +RoxygenNote: 7.3.3 diff --git a/LICENSE b/LICENSE index b81bb0f..b4cb7e2 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ -YEAR: 2023 +YEAR: 2025 COPYRIGHT HOLDER: bundle authors diff --git a/LICENSE.md b/LICENSE.md index e46a148..9a0c557 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ # MIT License -Copyright (c) 2023 bundle authors +Copyright (c) 2025 bundle authors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/NAMESPACE b/NAMESPACE index 11d19cf..c5dab6d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,4 +23,4 @@ export(bundle_constr) export(situate_constr) export(swap_element) export(unbundle) -importFrom(purrr,"%>%") +importFrom(lifecycle,deprecated) diff --git a/R/bundle-package.R b/R/bundle-package.R new file mode 100644 index 0000000..425b3c1 --- /dev/null +++ b/R/bundle-package.R @@ -0,0 +1,7 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +#' @importFrom lifecycle deprecated +## usethis namespace: end +NULL diff --git a/R/bundle_bart.R b/R/bundle_bart.R index ad28861..242444e 100644 --- a/R/bundle_bart.R +++ b/R/bundle_bart.R @@ -36,8 +36,10 @@ bundle.bart <- function(x, ...) { # `parsnip::bart()` and `dbarts::bart()` unfortunately both inherit from `bart` if (inherits(x, "model_spec")) { rlang::abort(c( - paste0("`x` should be the output of `dbarts::bart()`, not a model ", - "specification from `parsnip::bart()`."), + paste0( + "`x` should be the output of `dbarts::bart()`, not a model ", + "specification from `parsnip::bart()`." + ), "To bundle `parsnip::bart()` output, train it with `parsnip::fit()` first." )) } diff --git a/R/bundle_embed.R b/R/bundle_embed.R index 5404294..fa7b56c 100644 --- a/R/bundle_embed.R +++ b/R/bundle_embed.R @@ -18,9 +18,9 @@ #' #' set.seed(1) #' -#' rec <- recipe(Species ~ ., data = iris) %>% -#' step_normalize(all_predictors()) %>% -#' step_umap(all_predictors(), outcome = vars(Species), num_comp = 2) %>% +#' rec <- recipe(Species ~ ., data = iris) |> +#' step_normalize(all_predictors()) |> +#' step_umap(all_predictors(), outcome = vars(Species), num_comp = 2) |> #' prep() #' #' rec_bundle <- bundle(rec) diff --git a/R/bundle_keras.R b/R/bundle_keras.R index 62579a8..6e23410 100644 --- a/R/bundle_keras.R +++ b/R/bundle_keras.R @@ -38,20 +38,20 @@ #' #' mod <- keras_model_sequential() #' -#' mod %>% -#' layer_dense(units = 128, activation = 'relu', input_shape = c(784)) %>% -#' layer_dropout(rate = 0.4) %>% -#' layer_dense(units = 64, activation = 'relu') %>% -#' layer_dropout(rate = 0.3) %>% +#' mod |> +#' layer_dense(units = 128, activation = 'relu', input_shape = c(784)) |> +#' layer_dropout(rate = 0.4) |> +#' layer_dense(units = 64, activation = 'relu') |> +#' layer_dropout(rate = 0.3) |> #' layer_dense(units = 10, activation = 'softmax') #' -#' mod %>% compile( +#' mod |> compile( #' loss = 'categorical_crossentropy', #' optimizer = optimizer_rmsprop(), #' metrics = c('accuracy') #' ) #' -#' mod %>% fit( +#' mod |> fit( #' x_train, y_train, #' epochs = 5, batch_size = 128, #' validation_split = 0.2, @@ -90,7 +90,10 @@ bundle.keras.engine.training.Model <- function(x, ...) { bundle_constr( object = serialized, situate = situate_constr(function(object) { - new_file <- withr::local_tempfile(pattern = "unbundle", fileext = ".tar.gz") + new_file <- withr::local_tempfile( + pattern = "unbundle", + fileext = ".tar.gz" + ) unbundle_dir <- withr::local_tempdir("unbundle") writeBin(object, new_file, endian = "little") utils::untar(new_file, exdir = unbundle_dir) diff --git a/R/bundle_parsnip.R b/R/bundle_parsnip.R index 5f34e43..238d63a 100644 --- a/R/bundle_parsnip.R +++ b/R/bundle_parsnip.R @@ -21,9 +21,9 @@ #' set.seed(1) #' #' mod <- -#' boost_tree(trees = 5, mtry = 3) %>% -#' set_mode("regression") %>% -#' set_engine("xgboost") %>% +#' boost_tree(trees = 5, mtry = 3) |> +#' set_mode("regression") |> +#' set_engine("xgboost") |> #' fit(mpg ~ ., data = mtcars) #' #' mod_bundle <- bundle(mod) diff --git a/R/bundle_stacks.R b/R/bundle_stacks.R index 5f7d7b7..5741650 100644 --- a/R/bundle_stacks.R +++ b/R/bundle_stacks.R @@ -20,10 +20,10 @@ #' set.seed(1) #' #' mod <- -#' stacks() %>% -#' add_candidates(reg_res_lr) %>% -#' add_candidates(reg_res_svm) %>% -#' blend_predictions(times = 10) %>% +#' stacks() |> +#' add_candidates(reg_res_lr) |> +#' add_candidates(reg_res_svm) |> +#' blend_predictions(times = 10) |> #' fit_members() #' #' mod_bundle <- bundle(mod) diff --git a/R/bundle_torch.R b/R/bundle_torch.R index 7ee8e31..74aa03f 100644 --- a/R/bundle_torch.R +++ b/R/bundle_torch.R @@ -71,21 +71,21 @@ #' ) #' }, #' forward = function(x) { -#' x %>% -#' self$encoder() %>% +#' x |> +#' self$encoder() |> #' self$decoder() #' }, #' predict = function(x) { -#' self$encoder(x) %>% +#' self$encoder(x) |> #' torch_flatten(start_dim = 2) #' } #' ) #' -#' mod <- net %>% +#' mod <- net |> #' setup( #' loss = nn_mse_loss(), #' optimizer = optim_adam -#' ) %>% +#' ) |> #' fit(train_dl, epochs = 1, valid_data = test_dl) #' #' mod_bundle <- bundle(mod) @@ -116,9 +116,12 @@ bundle.luz_module_fitted <- function(x, ...) { object = res, situate = situate_constr(function(object) { con <- rawConnection(object) - on.exit({ - close(con) - }, add = TRUE) + on.exit( + { + close(con) + }, + add = TRUE + ) res <- luz::luz_load(con) }), desc_class = class(x)[1] diff --git a/R/bundle_workflows.R b/R/bundle_workflows.R index 43ba923..0370eb1 100644 --- a/R/bundle_workflows.R +++ b/R/bundle_workflows.R @@ -24,18 +24,18 @@ #' set.seed(1) #' #' spec <- -#' boost_tree(trees = 5, mtry = 3) %>% -#' set_mode("regression") %>% +#' boost_tree(trees = 5, mtry = 3) |> +#' set_mode("regression") |> #' set_engine("xgboost") #' #' rec <- -#' recipe(mpg ~ ., data = mtcars) %>% +#' recipe(mpg ~ ., data = mtcars) |> #' step_log(hp) #' #' mod <- -#' workflow() %>% -#' add_model(spec) %>% -#' add_recipe(rec) %>% +#' workflow() |> +#' add_model(spec) |> +#' add_recipe(rec) |> #' fit(data = mtcars) #' #' mod_bundle <- bundle(mod) diff --git a/R/package.R b/R/package.R deleted file mode 100644 index d333e7c..0000000 --- a/R/package.R +++ /dev/null @@ -1,6 +0,0 @@ -#' bundle: Serialize Model Objects With A Consistent Interface -#' -#' @docType package -#' @name bundle_description -#' @aliases bundle-package -"_PACKAGE" diff --git a/R/utils.R b/R/utils.R index 5a5cbc3..a1ead7b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -25,7 +25,7 @@ bundle_constr <- function(object, situate, desc_class) { #' @rdname internal_functions #' @keywords internal #' @export -situate_constr <- function (fn) { +situate_constr <- function(fn) { env <- rlang::child_env(rlang::caller_env()) fn <- rlang::eval_bare(rlang::enexpr(fn), env) rlang::env_poke_parent(env, rlang::base_env()) @@ -73,11 +73,7 @@ swap_element <- function(x, ...) { # global variables ------------------------------------------------------------- utils::globalVariables(c( - "extract_fit_engine", "getS3method", "map" + "extract_fit_engine", + "getS3method", + "map" )) - -# imports ---------------------------------------------------------------------- -#' @keywords internal -#' @importFrom purrr %>% -#' @export -NULL diff --git a/README.Rmd b/README.Rmd index e3ffd96..73e5463 100644 --- a/README.Rmd +++ b/README.Rmd @@ -26,6 +26,7 @@ knitr::opts_chunk$set( [![CRAN status](https://www.r-pkg.org/badges/version/bundle)](https://CRAN.R-project.org/package=bundle) [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![Codecov test coverage](https://codecov.io/gh/rstudio/bundle/branch/main/graph/badge.svg)](https://app.codecov.io/gh/rstudio/bundle?branch=main) +[![Codecov test coverage](https://codecov.io/gh/rstudio/bundle/graph/badge.svg)](https://app.codecov.io/gh/rstudio/bundle) Typically, models in R exist in memory and can be saved as `.rds` files. However, some models store information in locations that cannot be saved using `save()` or `saveRDS()` directly. The goal of bundle is to provide a common interface to capture this information, situate it within a portable object, and restore it for use in new settings. @@ -77,9 +78,9 @@ Fit the boosted tree model: ```{r} # fit an boosted tree with xgboost via parsnip mod <- - boost_tree(trees = 5, mtry = 3) %>% - set_mode("regression") %>% - set_engine("xgboost") %>% + boost_tree(trees = 5, mtry = 3) |> + set_mode("regression") |> + set_engine("xgboost") |> fit(mpg ~ ., data = mtcars[1:25,]) mod diff --git a/README.md b/README.md index c11ad4d..28fddfb 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,8 @@ status](https://www.r-pkg.org/badges/version/bundle)](https://CRAN.R-project.org experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![Codecov test coverage](https://codecov.io/gh/rstudio/bundle/branch/main/graph/badge.svg)](https://app.codecov.io/gh/rstudio/bundle?branch=main) +[![Codecov test +coverage](https://codecov.io/gh/rstudio/bundle/graph/badge.svg)](https://app.codecov.io/gh/rstudio/bundle) Typically, models in R exist in memory and can be saved as `.rds` files. @@ -87,38 +89,32 @@ Fit the boosted tree model: ``` r # fit an boosted tree with xgboost via parsnip mod <- - boost_tree(trees = 5, mtry = 3) %>% - set_mode("regression") %>% - set_engine("xgboost") %>% + boost_tree(trees = 5, mtry = 3) |> + set_mode("regression") |> + set_engine("xgboost") |> fit(mpg ~ ., data = mtcars[1:25,]) mod #> parsnip model object #> #> ##### xgb.Booster -#> raw: 8.1 Kb #> call: #> xgboost::xgb.train(params = list(eta = 0.3, max_depth = 6, gamma = 0, #> colsample_bytree = 1, colsample_bynode = 0.3, min_child_weight = 1, -#> subsample = 1), data = x$data, nrounds = 5, watchlist = x$watchlist, -#> verbose = 0, nthread = 1, objective = "reg:squarederror") -#> params (as set within xgb.train): -#> eta = "0.3", max_depth = "6", gamma = "0", colsample_bytree = "1", colsample_bynode = "0.3", min_child_weight = "1", subsample = "1", nthread = "1", objective = "reg:squarederror", validate_parameters = "TRUE" -#> xgb.attributes: -#> niter -#> callbacks: -#> cb.evaluation.log() +#> subsample = 1, nthread = 1, objective = "reg:squarederror"), +#> data = x$data, nrounds = 5, evals = x$watchlist, verbose = 0) #> # of features: 10 -#> niter: 5 -#> nfeatures : 10 +#> # of rounds: 5 +#> callbacks: +#> evaluation_log #> evaluation_log: #> iter training_rmse -#> -#> 1 14.631798 -#> 2 10.896629 -#> 3 8.188981 -#> 4 6.173644 -#> 5 4.707690 +#> +#> 1 4.618358 +#> 2 3.627921 +#> 3 2.891176 +#> 4 2.300624 +#> 5 1.852596 ``` Note that simply saving and loading the model results in changes to the @@ -130,14 +126,8 @@ saveRDS(mod, temp_file) mod2 <- readRDS(temp_file) compare(mod, mod2, ignore_formula_env = TRUE) -#> `old$fit$handle` is -#> `new$fit$handle` is -#> -#> `old$fit$handle` is attr(,"class") -#> `new$fit$handle` is attr(,"class") -#> -#> `old$fit$handle` is [1] "xgb.Booster.handle" -#> `new$fit$handle` is [1] "xgb.Booster.handle" +#> `old$fit$ptr` is +#> `new$fit$ptr` is ``` Saving and reloading `mod2` didn’t preserve XGBoost’s reference to its @@ -177,13 +167,13 @@ r( #> # A tibble: 7 × 1 #> .pred #> -#> 1 22.3 -#> 2 22.3 -#> 3 20.4 -#> 4 14.5 -#> 5 14.5 -#> 6 12.1 -#> 7 17.0 +#> 1 28.7 +#> 2 25.0 +#> 3 23.7 +#> 4 18.0 +#> 5 20.3 +#> 6 14.9 +#> 7 22.2 ``` For a more in-depth demonstration of the package, see the [main diff --git a/air.toml b/air.toml new file mode 100644 index 0000000..e69de29 diff --git a/man-roxygen/butcher_details.R b/man-roxygen/butcher_details.R index 022b341..898374c 100644 --- a/man-roxygen/butcher_details.R +++ b/man-roxygen/butcher_details.R @@ -7,8 +7,8 @@ #' #' ``` #' res <- -#' x %>% -#' butcher() %>% +#' x |> +#' butcher() |> #' bundle() #' ``` #' diff --git a/man/bundle_description.Rd b/man/bundle-package.Rd similarity index 79% rename from man/bundle_description.Rd rename to man/bundle-package.Rd index 37a06e8..d8d3d29 100644 --- a/man/bundle_description.Rd +++ b/man/bundle-package.Rd @@ -1,10 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/package.R +% Please edit documentation in R/bundle-package.R \docType{package} -\name{bundle_description} -\alias{bundle_description} +\name{bundle-package} \alias{bundle-package} -\title{bundle: Serialize Model Objects With A Consistent Interface} +\title{bundle: Serialize Model Objects with a Consistent Interface} \description{ Typically, models in 'R' exist in memory and can be saved via regular 'R' serialization. However, some models store information in locations that cannot be saved using 'R' serialization alone. The goal of 'bundle' is to provide a common interface to capture this information, situate it within a portable object, and restore it for use in new settings. } @@ -29,7 +28,8 @@ Authors: Other contributors: \itemize{ - \item Posit Software, PBC [copyright holder, funder] + \item Posit Software, PBC (\href{https://ror.org/03wc8by49}{ROR}) [copyright holder, funder] } } +\keyword{internal} diff --git a/man/bundle_bart.Rd b/man/bundle_bart.Rd index 70a8cb4..545db0c 100644 --- a/man/bundle_bart.Rd +++ b/man/bundle_bart.Rd @@ -67,8 +67,8 @@ This bundle method is compatible with pre-butchering. That is, for a fitted model \code{x}, you can safely call: \if{html}{\out{
}}\preformatted{res <- - x \%>\% - butcher() \%>\% + x |> + butcher() |> bundle() }\if{html}{\out{
}} @@ -76,7 +76,7 @@ and predict with the output of \code{unbundle(res)} in a new R session. } \examples{ -\dontshow{if (rlang::is_installed(c("dbarts"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("dbarts"))) withAutoprint(\{ # examplesIf} # fit model and bundle ------------------------------------------------ library(dbarts) diff --git a/man/bundle_caret.Rd b/man/bundle_caret.Rd index 7b4f6f1..c4a4a50 100644 --- a/man/bundle_caret.Rd +++ b/man/bundle_caret.Rd @@ -73,8 +73,8 @@ This bundle method is compatible with pre-butchering. That is, for a fitted model \code{x}, you can safely call: \if{html}{\out{
}}\preformatted{res <- - x \%>\% - butcher() \%>\% + x |> + butcher() |> bundle() }\if{html}{\out{
}} @@ -82,7 +82,7 @@ and predict with the output of \code{unbundle(res)} in a new R session. } \examples{ -\dontshow{if (rlang::is_installed("caret") && identical(Sys.getenv("NOT_CRAN"), "true")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed("caret") && identical(Sys.getenv("NOT_CRAN"), "true")) withAutoprint(\{ # examplesIf} # fit model and bundle ------------------------------------------------ library(caret) diff --git a/man/bundle_embed.Rd b/man/bundle_embed.Rd index f198aa9..f4fab8d 100644 --- a/man/bundle_embed.Rd +++ b/man/bundle_embed.Rd @@ -68,8 +68,8 @@ This bundle method is compatible with pre-butchering. That is, for a fitted model \code{x}, you can safely call: \if{html}{\out{
}}\preformatted{res <- - x \%>\% - butcher() \%>\% + x |> + butcher() |> bundle() }\if{html}{\out{
}} @@ -77,16 +77,16 @@ and predict with the output of \code{unbundle(res)} in a new R session. } \examples{ -\dontshow{if (rlang::is_installed(c("recipes", "embed", "irlba (>= 2.3.5.2)"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("recipes", "embed", "irlba (>= 2.3.5.2)"))) withAutoprint(\{ # examplesIf} # fit model and bundle ------------------------------------------------ library(recipes) library(embed) set.seed(1) -rec <- recipe(Species ~ ., data = iris) \%>\% - step_normalize(all_predictors()) \%>\% - step_umap(all_predictors(), outcome = vars(Species), num_comp = 2) \%>\% +rec <- recipe(Species ~ ., data = iris) |> + step_normalize(all_predictors()) |> + step_umap(all_predictors(), outcome = vars(Species), num_comp = 2) |> prep() rec_bundle <- bundle(rec) diff --git a/man/bundle_h2o.Rd b/man/bundle_h2o.Rd index 850ae56..b62f97d 100644 --- a/man/bundle_h2o.Rd +++ b/man/bundle_h2o.Rd @@ -76,7 +76,7 @@ restored for prediction in a new R session. See the 'Value' section for more information on bundles and their usage. } \examples{ -\dontshow{if (rlang::is_installed(c("h2o")) && identical(Sys.getenv("NOT_CRAN"), "true")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("h2o")) && identical(Sys.getenv("NOT_CRAN"), "true")) withAutoprint(\{ # examplesIf} # fit model and bundle ------------------------------------------------ library(h2o) diff --git a/man/bundle_keras.Rd b/man/bundle_keras.Rd index 0f86927..a0fc958 100644 --- a/man/bundle_keras.Rd +++ b/man/bundle_keras.Rd @@ -64,7 +64,7 @@ such as use of a \code{\link[keras:new-classes]{keras::new_layer_class()}} or cu In such situations, consider using \code{\link[keras:with_custom_object_scope]{keras::with_custom_object_scope()}}. } \examples{ -\dontshow{if (rlang::is_installed(c("keras")) && identical(Sys.getenv("NOT_CRAN"), "true")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("keras")) && identical(Sys.getenv("NOT_CRAN"), "true")) withAutoprint(\{ # examplesIf} # fit model and bundle ------------------------------------------------ library(keras) @@ -87,20 +87,20 @@ y_test <- to_categorical(y_test, 10) mod <- keras_model_sequential() -mod \%>\% - layer_dense(units = 128, activation = 'relu', input_shape = c(784)) \%>\% - layer_dropout(rate = 0.4) \%>\% - layer_dense(units = 64, activation = 'relu') \%>\% - layer_dropout(rate = 0.3) \%>\% +mod |> + layer_dense(units = 128, activation = 'relu', input_shape = c(784)) |> + layer_dropout(rate = 0.4) |> + layer_dense(units = 64, activation = 'relu') |> + layer_dropout(rate = 0.3) |> layer_dense(units = 10, activation = 'softmax') -mod \%>\% compile( +mod |> compile( loss = 'categorical_crossentropy', optimizer = optimizer_rmsprop(), metrics = c('accuracy') ) -mod \%>\% fit( +mod |> fit( x_train, y_train, epochs = 5, batch_size = 128, validation_split = 0.2, diff --git a/man/bundle_parsnip.Rd b/man/bundle_parsnip.Rd index 25d3655..aa2f528 100644 --- a/man/bundle_parsnip.Rd +++ b/man/bundle_parsnip.Rd @@ -73,8 +73,8 @@ This bundle method is compatible with pre-butchering. That is, for a fitted model \code{x}, you can safely call: \if{html}{\out{
}}\preformatted{res <- - x \%>\% - butcher() \%>\% + x |> + butcher() |> bundle() }\if{html}{\out{
}} @@ -82,7 +82,7 @@ and predict with the output of \code{unbundle(res)} in a new R session. } \examples{ -\dontshow{if (rlang::is_installed("parsnip") && rlang::is_installed("xgboost")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed("parsnip") && rlang::is_installed("xgboost")) withAutoprint(\{ # examplesIf} # fit model and bundle ------------------------------------------------ library(parsnip) library(xgboost) @@ -90,9 +90,9 @@ library(xgboost) set.seed(1) mod <- - boost_tree(trees = 5, mtry = 3) \%>\% - set_mode("regression") \%>\% - set_engine("xgboost") \%>\% + boost_tree(trees = 5, mtry = 3) |> + set_mode("regression") |> + set_engine("xgboost") |> fit(mpg ~ ., data = mtcars) mod_bundle <- bundle(mod) diff --git a/man/bundle_stacks.Rd b/man/bundle_stacks.Rd index eb81e61..1c3981c 100644 --- a/man/bundle_stacks.Rd +++ b/man/bundle_stacks.Rd @@ -64,17 +64,17 @@ This bundler wraps \code{\link[=bundle.model_fit]{bundle.model_fit()}} and \code Both the fitted members and the meta-learner (in \code{x$coefs}) are bundled. } \examples{ -\dontshow{if (rlang::is_installed(c("stacks")) && identical(Sys.getenv("NOT_CRAN"), "true")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("stacks")) && identical(Sys.getenv("NOT_CRAN"), "true")) withAutoprint(\{ # examplesIf} # fit model and bundle ------------------------------------------------ library(stacks) set.seed(1) mod <- - stacks() \%>\% - add_candidates(reg_res_lr) \%>\% - add_candidates(reg_res_svm) \%>\% - blend_predictions(times = 10) \%>\% + stacks() |> + add_candidates(reg_res_lr) |> + add_candidates(reg_res_svm) |> + blend_predictions(times = 10) |> fit_members() mod_bundle <- bundle(mod) diff --git a/man/bundle_torch.Rd b/man/bundle_torch.Rd index e6c05a2..f71d20a 100644 --- a/man/bundle_torch.Rd +++ b/man/bundle_torch.Rd @@ -64,7 +64,7 @@ via the luz package, "a higher level API for torch providing abstractions to allow for much less verbose training loops." } \examples{ -\dontshow{if (rlang::is_installed(c("torch")) && identical(Sys.getenv("NOT_CRAN"), "true")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("torch")) && identical(Sys.getenv("NOT_CRAN"), "true")) withAutoprint(\{ # examplesIf} if (torch::torch_is_installed()) { # fit model and bundle ------------------------------------------------ library(torch) @@ -117,21 +117,21 @@ net <- nn_module( ) }, forward = function(x) { - x \%>\% - self$encoder() \%>\% + x |> + self$encoder() |> self$decoder() }, predict = function(x) { - self$encoder(x) \%>\% + self$encoder(x) |> torch_flatten(start_dim = 2) } ) -mod <- net \%>\% +mod <- net |> setup( loss = nn_mse_loss(), optimizer = optim_adam - ) \%>\% + ) |> fit(train_dl, epochs = 1, valid_data = test_dl) mod_bundle <- bundle(mod) diff --git a/man/bundle_workflows.Rd b/man/bundle_workflows.Rd index f34cd7d..a69497c 100644 --- a/man/bundle_workflows.Rd +++ b/man/bundle_workflows.Rd @@ -71,8 +71,8 @@ This bundle method is compatible with pre-butchering. That is, for a fitted model \code{x}, you can safely call: \if{html}{\out{
}}\preformatted{res <- - x \%>\% - butcher() \%>\% + x |> + butcher() |> bundle() }\if{html}{\out{
}} @@ -80,7 +80,7 @@ and predict with the output of \code{unbundle(res)} in a new R session. } \examples{ -\dontshow{if (rlang::is_installed(c("workflows", "parsnip", "recipes", "xgboost"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("workflows", "parsnip", "recipes", "xgboost"))) withAutoprint(\{ # examplesIf} # fit model and bundle ------------------------------------------------ library(workflows) library(recipes) @@ -90,18 +90,18 @@ library(xgboost) set.seed(1) spec <- - boost_tree(trees = 5, mtry = 3) \%>\% - set_mode("regression") \%>\% + boost_tree(trees = 5, mtry = 3) |> + set_mode("regression") |> set_engine("xgboost") rec <- - recipe(mpg ~ ., data = mtcars) \%>\% + recipe(mpg ~ ., data = mtcars) |> step_log(hp) mod <- - workflow() \%>\% - add_model(spec) \%>\% - add_recipe(rec) \%>\% + workflow() |> + add_model(spec) |> + add_recipe(rec) |> fit(data = mtcars) mod_bundle <- bundle(mod) diff --git a/man/bundle_xgboost.Rd b/man/bundle_xgboost.Rd index 1850ed7..b8ed244 100644 --- a/man/bundle_xgboost.Rd +++ b/man/bundle_xgboost.Rd @@ -7,7 +7,7 @@ \method{bundle}{xgb.Booster}(x, ...) } \arguments{ -\item{x}{An \code{xgb.Booster} object returned from \code{\link[xgboost:xgb.train]{xgboost::xgboost()}} or +\item{x}{An \code{xgb.Booster} object returned from \code{\link[xgboost:xgboost]{xgboost::xgboost()}} or \code{\link[xgboost:xgb.train]{xgboost::xgb.train()}}.} \item{...}{Not used in this bundler and included for compatibility with @@ -67,8 +67,8 @@ This bundle method is compatible with pre-butchering. That is, for a fitted model \code{x}, you can safely call: \if{html}{\out{
}}\preformatted{res <- - x \%>\% - butcher() \%>\% + x |> + butcher() |> bundle() }\if{html}{\out{
}} @@ -76,7 +76,7 @@ and predict with the output of \code{unbundle(res)} in a new R session. } \examples{ -\dontshow{if (rlang::is_installed("xgboost")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed("xgboost")) withAutoprint(\{ # examplesIf} # fit model and bundle ------------------------------------------------ library(xgboost) diff --git a/man/figures/lifecycle-deprecated.svg b/man/figures/lifecycle-deprecated.svg new file mode 100644 index 0000000..b61c57c --- /dev/null +++ b/man/figures/lifecycle-deprecated.svg @@ -0,0 +1,21 @@ + + lifecycle: deprecated + + + + + + + + + + + + + + + lifecycle + + deprecated + + diff --git a/man/figures/lifecycle-experimental.svg b/man/figures/lifecycle-experimental.svg new file mode 100644 index 0000000..5d88fc2 --- /dev/null +++ b/man/figures/lifecycle-experimental.svg @@ -0,0 +1,21 @@ + + lifecycle: experimental + + + + + + + + + + + + + + + lifecycle + + experimental + + diff --git a/man/figures/lifecycle-stable.svg b/man/figures/lifecycle-stable.svg new file mode 100644 index 0000000..9bf21e7 --- /dev/null +++ b/man/figures/lifecycle-stable.svg @@ -0,0 +1,29 @@ + + lifecycle: stable + + + + + + + + + + + + + + + + lifecycle + + + + stable + + + diff --git a/man/figures/lifecycle-superseded.svg b/man/figures/lifecycle-superseded.svg new file mode 100644 index 0000000..db8d757 --- /dev/null +++ b/man/figures/lifecycle-superseded.svg @@ -0,0 +1,21 @@ + + lifecycle: superseded + + + + + + + + + + + + + + + lifecycle + + superseded + + diff --git a/tests/testthat.R b/tests/testthat.R index f280fec..00d0ee2 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -3,8 +3,8 @@ # # Where should you do additional test configuration? # Learn more about the roles of various files in: -# * https://r-pkgs.org/tests.html -# * https://testthat.r-lib.org/reference/test_package.html#special-files +# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview +# * https://testthat.r-lib.org/articles/special-files.html library(testthat) library(bundle) diff --git a/tests/testthat/test_bundle_embed.R b/tests/testthat/test_bundle_embed.R index fb0fb78..b18e3d7 100644 --- a/tests/testthat/test_bundle_embed.R +++ b/tests/testthat/test_bundle_embed.R @@ -11,8 +11,8 @@ test_that("bundling + unbundling step_umap", { prep_rec <- function() { set.seed(1) - rec <- recipe(mpg ~ ., data = mtcars) %>% - step_umap(all_predictors(), outcome = vars(mpg), num_comp = 2) %>% + rec <- recipe(mpg ~ ., data = mtcars) |> + step_umap(all_predictors(), outcome = vars(mpg), num_comp = 2) |> prep() rec @@ -93,5 +93,8 @@ test_that("bundling + unbundling step_umap", { # compare baked data expect_equal(as.data.frame(rec_data), as.data.frame(rec_unbundled_data)) - expect_equal(as.data.frame(rec_data), as.data.frame(rec_butchered_unbundled_data)) + expect_equal( + as.data.frame(rec_data), + as.data.frame(rec_butchered_unbundled_data) + ) }) diff --git a/tests/testthat/test_bundle_h2o.R b/tests/testthat/test_bundle_h2o.R index 0b92991..6d3df5a 100644 --- a/tests/testthat/test_bundle_h2o.R +++ b/tests/testthat/test_bundle_h2o.R @@ -100,7 +100,6 @@ test_that("bundling + unbundling h2o fits (regression)", { h2o.shutdown(prompt = FALSE) res - }, args = list( mod_butchered_bundle = mod_butchered_bundle, @@ -130,7 +129,6 @@ test_that("bundling + unbundling h2o fits (regression)", { # compare predictions expect_equal(mod_preds, mod_unbundled_preds) expect_equal(mod_preds, mod_butchered_unbundled_preds) - }) test_that("bundling + unbundling h2o fits (binary)", { @@ -154,7 +152,12 @@ test_that("bundling + unbundling h2o fits (binary)", { bin_data <- as.h2o( - modeldata::sim_noise(100, 5, outcome = "classification", num_classes = 2) + modeldata::sim_noise( + 100, + 5, + outcome = "classification", + num_classes = 2 + ) ) bin_fit <- @@ -276,8 +279,6 @@ test_that("bundling + unbundling h2o fits (binary)", { # compare predictions expect_equal(mod_preds, mod_unbundled_preds) expect_equal(mod_preds, mod_butchered_unbundled_preds) - - }) test_that("bundling + unbundling h2o fits (multinomial)", { @@ -301,7 +302,12 @@ test_that("bundling + unbundling h2o fits (multinomial)", { multi_data <- as.h2o( - modeldata::sim_noise(100, 5, outcome = "classification", num_classes = 3) + modeldata::sim_noise( + 100, + 5, + outcome = "classification", + num_classes = 3 + ) ) multi_fit <- @@ -581,7 +587,12 @@ test_that("bundling + unbundling h2o fits (automl classification)", { bin_data <- as.h2o( - modeldata::sim_noise(100, 5, outcome = "classification", num_classes = 2) + modeldata::sim_noise( + 100, + 5, + outcome = "classification", + num_classes = 2 + ) ) bin_fit <- diff --git a/tests/testthat/test_bundle_keras.R b/tests/testthat/test_bundle_keras.R index 667ec7b..5d2ddf6 100644 --- a/tests/testthat/test_bundle_keras.R +++ b/tests/testthat/test_bundle_keras.R @@ -7,31 +7,37 @@ test_that("bundling + unbundling keras fits", { library(keras) test_data <- - mtcars[26:32, 2:ncol(mtcars)] %>% - as.matrix() %>% + mtcars[26:32, 2:ncol(mtcars)] |> + as.matrix() |> scale() # define a function to fit a model ------------------------------------------- fit_model <- function() { - cars <- mtcars[1:25, ] %>% - as.matrix() %>% + cars <- mtcars[1:25, ] |> + as.matrix() |> scale() x_train <- cars[, 2:ncol(cars)] y_train <- cars[, 1] keras_fit <- - keras_model_sequential() %>% - layer_dense(units = 1, input_shape = ncol(x_train), activation = 'linear') %>% + keras_model_sequential() |> + layer_dense( + units = 1, + input_shape = ncol(x_train), + activation = 'linear' + ) |> compile( loss = 'mean_squared_error', optimizer = optimizer_adam(learning_rate = .01) ) - keras_fit %>% + keras_fit |> fit( - x = x_train, y = y_train, - epochs = 100, batch_size = 1, + x = x_train, + y = y_train, + epochs = 100, + batch_size = 1, verbose = 0 ) diff --git a/tests/testthat/test_bundle_parsnip.R b/tests/testthat/test_bundle_parsnip.R index db4d6cd..4565ea0 100644 --- a/tests/testthat/test_bundle_parsnip.R +++ b/tests/testthat/test_bundle_parsnip.R @@ -11,9 +11,9 @@ test_that("bundling + unbundling parsnip model_fits (xgboost)", { set.seed(1) mod <- - boost_tree(trees = 5, mtry = 3) %>% - set_mode("regression") %>% - set_engine("xgboost") %>% + boost_tree(trees = 5, mtry = 3) |> + set_mode("regression") |> + set_engine("xgboost") |> fit(mpg ~ ., data = mtcars) } diff --git a/tests/testthat/test_bundle_stacks.R b/tests/testthat/test_bundle_stacks.R index 8730ca2..2df88e2 100644 --- a/tests/testthat/test_bundle_stacks.R +++ b/tests/testthat/test_bundle_stacks.R @@ -13,10 +13,10 @@ test_that("bundling + unbundling tidymodels stacks", { set.seed(1) mod <- - stacks() %>% - add_candidates(reg_res_lr) %>% - add_candidates(reg_res_svm) %>% - blend_predictions(times = 10) %>% + stacks() |> + add_candidates(reg_res_lr) |> + add_candidates(reg_res_svm) |> + blend_predictions(times = 10) |> fit_members() } diff --git a/tests/testthat/test_bundle_torch.R b/tests/testthat/test_bundle_torch.R index b0e132c..d6b3569 100644 --- a/tests/testthat/test_bundle_torch.R +++ b/tests/testthat/test_bundle_torch.R @@ -47,9 +47,9 @@ test_that("bundling + unbundling torch fits", { "Net", initialize = function() { self$encoder <- nn_sequential( - nn_conv2d(1, 6, kernel_size=5), + nn_conv2d(1, 6, kernel_size = 5), nn_relu(), - nn_conv2d(6, 16, kernel_size=5), + nn_conv2d(6, 16, kernel_size = 5), nn_relu() ) self$decoder <- nn_sequential( @@ -60,21 +60,21 @@ test_that("bundling + unbundling torch fits", { ) }, forward = function(x) { - x %>% - self$encoder() %>% + x |> + self$encoder() |> self$decoder() }, predict = function(x) { - self$encoder(x) %>% + self$encoder(x) |> torch_flatten(start_dim = 2) } ) - mod <- net %>% + mod <- net |> setup( loss = nn_mse_loss(), optimizer = optim_adam - ) %>% + ) |> fit(train_dl, epochs = 1, valid_data = test_dl) mod_bundle <- bundle(mod) @@ -91,7 +91,7 @@ test_that("bundling + unbundling torch fits", { mod_preds <- as_array(predict(mod, test_dl)) mod_unbundled_preds <- as_array(predict(mod_unbundled, test_dl)) - expect_equal(mod_preds[1:100,1:100], mod_unbundled_preds[1:100,1:100]) + expect_equal(mod_preds[1:100, 1:100], mod_unbundled_preds[1:100, 1:100]) # only want bundled model and original preds to persist. # test again in new R session: @@ -101,7 +101,6 @@ test_that("bundling + unbundling torch fits", { library(torch) library(luz) - mod_unbundled <- unbundle(mod_bundle) as_array(predict(mod_unbundled, test_dl)) } @@ -114,7 +113,7 @@ test_that("bundling + unbundling torch fits", { ) ) - expect_equal(mod_preds[1:100,1:100], mod_unbundled_preds_new[1:100,1:100]) + expect_equal(mod_preds[1:100, 1:100], mod_unbundled_preds_new[1:100, 1:100]) # interaction with butcher expect_silent({ @@ -129,5 +128,8 @@ test_that("bundling + unbundling torch fits", { ) ) - expect_equal(mod_preds[1:100,1:100], mod_unbundled_preds_butchered[1:100,1:100]) + expect_equal( + mod_preds[1:100, 1:100], + mod_unbundled_preds_butchered[1:100, 1:100] + ) }) diff --git a/tests/testthat/test_bundle_workflows.R b/tests/testthat/test_bundle_workflows.R index 1027444..8eeccf5 100644 --- a/tests/testthat/test_bundle_workflows.R +++ b/tests/testthat/test_bundle_workflows.R @@ -16,18 +16,18 @@ test_that("bundling + unbundling tidymodels workflows (xgboost + step_log)", { set.seed(1) spec <- - boost_tree(trees = 5, mtry = 3) %>% - set_mode("regression") %>% + boost_tree(trees = 5, mtry = 3) |> + set_mode("regression") |> set_engine("xgboost") rec <- - recipe(mpg ~ ., data = mtcars) %>% + recipe(mpg ~ ., data = mtcars) |> step_log(hp) mod <- - workflow() %>% - add_model(spec) %>% - add_recipe(rec) %>% + workflow() |> + add_model(spec) |> + add_recipe(rec) |> fit(data = mtcars) mod @@ -143,18 +143,18 @@ test_that("bundling + unbundling tidymodels workflows (lm + step_umap)", { set.seed(1) spec <- - linear_reg() %>% - set_mode("regression") %>% + linear_reg() |> + set_mode("regression") |> set_engine("lm") rec <- - recipe(mpg ~ ., data = mtcars) %>% + recipe(mpg ~ ., data = mtcars) |> step_umap(all_predictors(), outcome = vars(mpg), num_comp = 2) mod <- - workflow() %>% - add_model(spec) %>% - add_recipe(rec) %>% + workflow() |> + add_model(spec) |> + add_recipe(rec) |> fit(data = mtcars) mod diff --git a/tests/testthat/test_bundle_xgboost.R b/tests/testthat/test_bundle_xgboost.R index c6b9256..f153597 100644 --- a/tests/testthat/test_bundle_xgboost.R +++ b/tests/testthat/test_bundle_xgboost.R @@ -13,21 +13,21 @@ test_that("bundling + unbundling xgboost fits", { data(agaricus.train) if (utils::packageVersion("xgboost") > "2.0.0.0") { xgb <- xgboost( - x = agaricus.train$data, - y = agaricus.train$label, - max_depth = 2, - learning_rate = 1, - nthread = 2, - nrounds = 2, + x = agaricus.train$data, + y = agaricus.train$label, + max_depth = 2, + learning_rate = 1, + nthread = 2, + nrounds = 2, objective = "reg:squarederror" ) } else { xgb <- xgboost( data = agaricus.train$data, label = agaricus.train$label, - max_depth = 2, - eta = 1, - nthread = 2, + max_depth = 2, + eta = 1, + nthread = 2, nrounds = 2, objective = "binary:logistic" ) @@ -101,7 +101,9 @@ test_that("bundling + unbundling xgboost fits", { mod_preds <- predict(mod_fit, agaricus.test$data) # check classes - expect_true(any(class(mod_bundle) %in% c("bundled_xgb.Booster", "bundled_xgboost"))) + expect_true(any( + class(mod_bundle) %in% c("bundled_xgb.Booster", "bundled_xgboost") + )) expect_s3_class(unbundle(mod_bundle), "xgb.Booster") # ensure that the situater function didn't bring along the whole model diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index 8c81181..e119971 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -20,7 +20,7 @@ test_that("situate constructor works", { a_1_env <- environment(a_1()) a_2_env <- environment(a_2()) - expect_true( "a_" %in% names(a_1_env)) + expect_true("a_" %in% names(a_1_env)) expect_false("a_" %in% names(a_2_env)) }) @@ -34,9 +34,9 @@ test_that("swap_element works", { set.seed(1) mod <- - boost_tree(trees = 5, mtry = 3) %>% - set_mode("regression") %>% - set_engine("xgboost") %>% + boost_tree(trees = 5, mtry = 3) |> + set_mode("regression") |> + set_engine("xgboost") |> fit(mpg ~ ., data = mtcars) res <- swap_element(mod, "fit") @@ -48,6 +48,3 @@ test_that("swap_element works", { expect_silent(silly <- swap_element(mod, "silly", "nonexistent", "element")) expect_equal(mod, silly) }) - - - diff --git a/vignettes/bundle.Rmd b/vignettes/bundle.Rmd index bb7fd15..b16251d 100644 --- a/vignettes/bundle.Rmd +++ b/vignettes/bundle.Rmd @@ -12,7 +12,8 @@ resource_files: - figures/diagram_04.png --- -```{r, include = FALSE} +```{r} +#| include: false should_eval <- rlang::is_installed("keras") && rlang::is_installed("callr") && @@ -32,13 +33,15 @@ Typically, models in R exist in memory and can be saved as `.rds` files. However This vignette walks through how to prepare a statistical model for saving to demonstrate the benefits of using bundle. -```{r setup} +```{r} +#| label: setup library(bundle) ``` In addition to the package itself, we'll load the keras and xgboost packages to fit some example models, and the callr package to generate fresh R sessions to test our models inside of. -```{r setup-exts} +```{r} +#| label: setup-exts library(keras) library(xgboost) @@ -49,9 +52,10 @@ library(callr) As an example, let's fit a model with the keras package, building a neural network that models miles per gallon using the rest of the variables in the built-in `mtcars` dataset. -```{r mtcars-fit} -cars <- mtcars %>% - as.matrix() %>% +```{r} +#| label: mtcars-fit +cars <- mtcars |> + as.matrix() |> scale() x_train <- cars[1:25, 2:ncol(cars)] @@ -61,14 +65,14 @@ x_test <- cars[26:32, 2:ncol(cars)] y_test <- cars[26:32, 1] keras_fit <- - keras_model_sequential() %>% - layer_dense(units = 1, input_shape = ncol(x_train), activation = 'linear') %>% + keras_model_sequential() |> + layer_dense(units = 1, input_shape = ncol(x_train), activation = 'linear') |> compile( loss = 'mean_squared_error', optimizer = optimizer_adam(learning_rate = .01) ) -keras_fit %>% +keras_fit |> fit( x = x_train, y = y_train, epochs = 100, batch_size = 1, @@ -78,13 +82,20 @@ keras_fit %>% Easy peasy! Now, given that this model is trained, we assume that it's ready to go to predict on new data. Our mental map might look something like this: -```{r diagram-01, echo = FALSE, fig.alt = "A diagram showing a rectangle, labeled model object, and another rectangle, labeled predictions. The two are connected by an arrow from model object to predictions, with the label predict.", out.width = '100%'} +```{r} +#| label: diagram-01 +#| echo: false +#| fig-alt: A diagram showing a rectangle, labeled model object, and another rectangle, +#| labeled predictions. The two are connected by an arrow from model object to predictions, +#| with the label predict. +#| out-width: 100% knitr::include_graphics("figures/diagram_01.png") ``` We pass a model object to the `predict()` function, along with some new data to predict on, and get predictions back. Let's try that out: -```{r predict-example} +```{r} +#| label: predict-example predict(keras_fit, x_test) ``` @@ -101,7 +112,8 @@ We'll just make use of two of the arguments to the function `r()`: As an example: -```{r callr-example} +```{r} +#| label: callr-example r( function(x) { x * 2 @@ -121,7 +133,8 @@ So, our approach might be: First, saving our model object to a file: -```{r keras-save} +```{r} +#| label: keras-save temp_file <- tempfile() saveRDS(keras_fit, file = temp_file) @@ -129,7 +142,10 @@ saveRDS(keras_fit, file = temp_file) Now, starting up a fresh R session and predicting on new data: -```{r keras-fresh-rds, linewidth = 60, error = TRUE} +```{r} +#| label: keras-fresh-rds +#| linewidth: 60 +#| error: true r( function(temp_file, new_data) { library(keras) @@ -151,7 +167,15 @@ After a bit of poking around in keras' documentation, you might come across the Given this new understanding, we can update our mental map a bit. Some objects require extra information when they're loaded into new environments in order to do their thing. In this case, this keras model object needs access to additional references in order to predict on new data. -```{r diagram-02, echo = FALSE, fig.alt = "A diagram showing the same pair of rectangles as before, connected by the arrow labeled predict. This time, though, we introduce two boxes labeled reference. These two boxes are connected to the arrow labeled predict with dotted arrows, to show that, most of the time, we don't need to think about including them in our workflow.", out.width = '100%'} +```{r} +#| label: diagram-02 +#| echo: false +#| fig-alt: A diagram showing the same pair of rectangles as before, connected by the +#| arrow labeled predict. This time, though, we introduce two boxes labeled reference. +#| These two boxes are connected to the arrow labeled predict with dotted arrows, to +#| show that, most of the time, we don't need to think about including them in our +#| workflow. +#| out-width: 100% knitr::include_graphics("figures/diagram_02.png") ``` @@ -167,14 +191,16 @@ keras' vignette is really informative in telling us what we ought to do from her Saving our model object with their methods: -```{r save-model-tf} +```{r} +#| label: save-model-tf temp_dir <- tempdir() save_model_tf(keras_fit, filepath = temp_dir) ``` Now, starting up a fresh R session and predicting on new data: -```{r fresh-keras-fit} +```{r} +#| label: fresh-keras-fit r( function(temp_dir, new_data) { library(keras) @@ -200,7 +226,15 @@ We've done the work of figuring that out, and it turns out the interface is a li What if we could just use the same function for any R object, and it would _just work_? -```{r diagram-03, echo = FALSE, fig.alt = "A diagram showing the same set of rectangles, representing a prediction problem, as before. This version of the diagram adds two boxes, labeled R Session number one, and R session number two. In R session number two, we have a new rectangle labeled standalone model object. In focus is the arrow from the model object, in R Session number one, to the standalone model object in R session number two.", out.width = '100%'} +```{r} +#| label: diagram-03 +#| echo: false +#| fig-alt: A diagram showing the same set of rectangles, representing a prediction problem, +#| as before. This version of the diagram adds two boxes, labeled R Session number +#| one, and R session number two. In R session number two, we have a new rectangle +#| labeled standalone model object. In focus is the arrow from the model object, in +#| R Session number one, to the standalone model object in R session number two. +#| out-width: 100% knitr::include_graphics("figures/diagram_03.png") ``` @@ -208,7 +242,14 @@ knitr::include_graphics("figures/diagram_03.png") bundle provides a consistent interface to prepare R model objects to be saved and re-loaded. The package provides two functions, `bundle()` and `unbundle()`, that take care of all of the minutae of preparing to save and load R objects effectively. -```{r diagram-04, echo = FALSE, fig.alt = "A replica of the previous diagram, where the arrow previously connecting the model object in R session one and the standalone model object in R session two is connected by a verb called bundle. The bundle function outputs an object called a bundle.", out.width = '100%'} +```{r} +#| label: diagram-04 +#| echo: false +#| fig-alt: A replica of the previous diagram, where the arrow previously connecting +#| the model object in R session one and the standalone model object in R session two +#| is connected by a verb called bundle. The bundle function outputs an object called +#| a bundle. +#| out-width: 100% knitr::include_graphics("figures/diagram_04.png") ``` @@ -222,13 +263,15 @@ When `unbundle()` is called on a bundle object, the `situate()` element of the b To be a bit more concrete, lets return to the keras example. Bundling the model fit: -```{r keras-bundle} +```{r} +#| label: keras-bundle keras_bundle <- bundle(keras_fit) ``` Now, starting up a fresh R session and predicting on new data: -```{r keras-fresh-bundle} +```{r} +#| label: keras-fresh-bundle r( function(model_bundle, new_data) { library(bundle) @@ -250,7 +293,8 @@ The best part is, if you wanted to do the same thing for an xgboost object, you First, fitting a quick xgboost model: -```{r xgboost-fit} +```{r} +#| label: xgboost-fit xgb_fit <- xgboost( data = x_train, @@ -261,13 +305,15 @@ xgb_fit <- Now, bundling it: -```{r xgboost-bundle} +```{r} +#| label: xgboost-bundle xgb_bundle <- bundle(xgb_fit) ``` Now, starting up a fresh R session and predicting on new data: -```{r xgboost-fresh-bundle} +```{r} +#| label: xgboost-fresh-bundle r( function(model_bundle, new_data) { library(bundle)