diff --git a/R/dataProcess.R b/R/dataProcess.R index e9581c8b..4691398b 100755 --- a/R/dataProcess.R +++ b/R/dataProcess.R @@ -146,7 +146,9 @@ dataProcess = function( peptides_dict = makePeptidesDictionary(as.data.table(unclass(raw)), normalization) input = MSstatsPrepareForDataProcess(raw, logTrans, fix_missing) + rm(raw) input = MSstatsNormalize(input, normalization, peptides_dict, nameStandards) + rm(peptides_dict) input = MSstatsMergeFractions(input) input = MSstatsHandleMissing(input, summaryMethod, MBimpute, censoredInt, maxQuantileforCensored) @@ -173,6 +175,7 @@ dataProcess = function( "== Summarization is done.") getOption("MSstatsMsg")("INFO", " == Summarization is done.") + gc(verbose = FALSE) output = MSstatsSummarizationOutput(input, summarized, processed, summaryMethod, MBimpute, censoredInt) output @@ -412,7 +415,7 @@ MSstatsSummarizeSingleLinear = function(single_protein, pred = predict(survival_fit, newdata = .SD, se.fit = TRUE) list(pred$fit, pred$se.fit^2 + sigma2) }] - + rm(survival_fit) if (is_labeled_reference) { single_protein[, predicted := ifelse(censored & is_labeled_ref == FALSE, predicted, NA)] single_protein[, newABUNDANCE := ifelse(censored & is_labeled_ref == FALSE, predicted, newABUNDANCE)] @@ -489,7 +492,7 @@ MSstatsSummarizeSingleLinear = function(single_protein, # Todo: enable SRM linear summarization result[, LABEL := if (is_labeled_reference) "L" else unique(single_protein$LABEL)] } - + rm(fit) return(list(result, survival)) } } @@ -567,7 +570,7 @@ MSstatsSummarizeSingleTMP = function(single_protein, impute, censored_symbol, } else { single_protein[, predicted := NA_real_] } - + rm(survival_fit) if (is_labeled_reference) { single_protein[, predicted := ifelse(censored & is_labeled_ref == FALSE, predicted, NA)] single_protein[, newABUNDANCE := ifelse(censored & is_labeled_ref == FALSE, predicted, newABUNDANCE)] diff --git a/R/utils_imputation.R b/R/utils_imputation.R index 406920ee..99bffcf1 100644 --- a/R/utils_imputation.R +++ b/R/utils_imputation.R @@ -46,8 +46,10 @@ data = input, dist = "gaussian", control = list(maxiter=aft_iterations)) } - } + } } + fit$y = NULL + fit$linear.predictors = NULL fit } diff --git a/R/utils_summarization.R b/R/utils_summarization.R index 3b762ae7..47bbef21 100644 --- a/R/utils_summarization.R +++ b/R/utils_summarization.R @@ -180,10 +180,11 @@ } } if (!equal_variances) { - linear_model = .updateUnequalVariances(input = input, + linear_model = .updateUnequalVariances(input = input, fit = linear_model, num_iter = 1) } + linear_model$model = NULL linear_model } @@ -197,27 +198,25 @@ #' @return merMod #' @keywords internal .updateUnequalVariances = function(input, fit, num_iter) { - weight = NULL - + weight = abs.resids = loess.fitted = NULL + if (!data.table::is.data.table(input)) { + input = data.table::as.data.table(input) + } for (i in seq_len(num_iter)) { if (i == 1) { - abs.resids = data.frame(abs.resids = abs(fit$residuals)) - fitted = data.frame(fitted = fit$fitted.values) - input = data.frame(input, - "abs.resids" = abs.resids, - "fitted" = fitted) + input[, abs.resids := abs(fit$residuals)] + input[, fitted := fit$fitted.values] } fit.loess = loess(abs.resids ~ fitted, data = input) - loess.fitted = data.frame(loess.fitted = fitted(fit.loess)) - input = data.frame(input, "loess.fitted" = loess.fitted) - ## loess fitted valuaes are predicted sd - input$weight = 1 / (input$loess.fitted ^ 2) - input = input[, !(colnames(input) %in% "abs.resids")] + input[, loess.fitted := fitted(fit.loess)] + ## loess fitted values are predicted sd + input[, weight := 1 / (loess.fitted ^ 2)] + input[, abs.resids := NULL] ## re-fit using weight wls.fit = lm(formula(fit), data = input, weights = weight) - abs.resids = data.frame(abs.resids = abs(wls.fit$residuals)) - input = data.frame(input, "abs.resids" = abs.resids) - input = input[, -which(colnames(input) %in% c("loess.fitted", "weight"))] + input[, abs.resids := abs(wls.fit$residuals)] + input[, loess.fitted := NULL] + input[, weight := NULL] } wls.fit } diff --git a/inst/tinytest/test_memory_optimization_fits.R b/inst/tinytest/test_memory_optimization_fits.R new file mode 100644 index 00000000..21da53fe --- /dev/null +++ b/inst/tinytest/test_memory_optimization_fits.R @@ -0,0 +1,82 @@ +# Tests that .fitSurvival() and .fitLinearModel() return model objects with +# heavy fields stripped, and that downstream predict/summary/vcov still work. + + +# --- .fitSurvival: $y and $linear.predictors are stripped --------------------- +# +# Neither field is needed by predict.survreg(). + +surv_input = data.table::data.table( + newABUNDANCE = c(10.1, 11.2, 9.5, 10.8, 12.0, 9.0, 11.5, 10.3, + 10.5, 11.0, 9.8, 10.2, 12.2, 9.3, 11.8, 10.6), + cen = c(1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1), + RUN = factor(rep(1:4, each = 4)), + FEATURE = factor(rep(c("feat1", "feat2", "feat3", "feat4"), 4)) +) +# cen = 0 marks left-censored rows; set them to the upper-bound threshold. +surv_input[cen == 0, newABUNDANCE := 7.0] + +surv_fit = MSstats:::.fitSurvival(surv_input, aft_iterations = 90) + +expect_true(is.null(surv_fit$y), + info = "survreg $y should be stripped") +expect_true(is.null(surv_fit$linear.predictors), + info = "survreg $linear.predictors should be stripped") + +expect_false(is.null(surv_fit$coefficients), + info = "survreg $coefficients must survive stripping") +expect_false(is.null(surv_fit$scale), + info = "survreg $scale must survive stripping") + +predictions = predict(surv_fit, newdata = surv_input) +expect_equal(length(predictions), nrow(surv_input), + info = "predict() must work on the stripped survreg object") +expect_true(all(is.finite(predictions)), + info = "predict() should return finite values") + +unstripped_fit = survival::survreg( + survival::Surv(newABUNDANCE, cen, type = "left") ~ FEATURE + RUN, + data = surv_input, dist = "gaussian") +expect_true(object.size(surv_fit) < object.size(unstripped_fit), + info = paste("Stripped survreg should be smaller.", + "Stripped:", object.size(surv_fit), + "Unstripped:", object.size(unstripped_fit))) + + +# --- .fitLinearModel: $model is stripped -------------------------------------- +# +# $model is not needed by summary() or vcov(). + +lm_input = data.table::data.table( + newABUNDANCE = c(10.1, 11.2, 9.5, 10.8, 12.0, 9.0, 11.5, 10.3, + 10.5, 11.0, 9.8, 10.2, 12.2, 9.3, 11.8, 10.6), + RUN = factor(rep(1:4, each = 4)), + FEATURE = factor(rep(c("feat1", "feat2", "feat3", "feat4"), 4)), + weights = NA +) + +lm_fit = MSstats:::.fitLinearModel(lm_input, is_single_feature = FALSE, + is_labeled = FALSE, equal_variances = TRUE) + +expect_true(is.null(lm_fit$model), + info = "lm $model should be stripped") + +expect_false(is.null(lm_fit$coefficients), + info = "lm $coefficients must survive stripping") +expect_false(is.null(lm_fit$qr), + info = "lm $qr must survive stripping (needed by summary and vcov)") +expect_false(is.null(lm_fit$residuals), + info = "lm $residuals must survive stripping (needed by summary)") + +lm_summary = summary(lm_fit) +expect_false(is.null(lm_summary$coefficients), + info = "summary() must work on the stripped lm object") +lm_vcov = vcov(lm_fit) +expect_true(is.matrix(lm_vcov), + info = "vcov() must work on the stripped lm object") + +unstripped_lm = lm(newABUNDANCE ~ FEATURE + RUN, data = lm_input) +expect_true(object.size(lm_fit) < object.size(unstripped_lm), + info = paste("Stripped lm should be smaller.", + "Stripped:", object.size(lm_fit), + "Unstripped:", object.size(unstripped_lm)))