Skip to content

Commit c5cf6c3

Browse files
authored
Add files via upload
1 parent 33ce988 commit c5cf6c3

1 file changed

Lines changed: 142 additions & 0 deletions

File tree

RT_Corrector_XCMS.R

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
suppressPackageStartupMessages({
2+
library(xcms)
3+
library(MSnbase)
4+
library(reticulate)
5+
})
6+
7+
# Python read model
8+
.ensure_py_pkgs <- function(pkgs) {
9+
reticulate::py_config()
10+
for (p in pkgs) {
11+
if (!reticulate::py_module_available(p)) {
12+
reticulate::py_require(p, action = "add")
13+
}
14+
}
15+
}
16+
17+
18+
.load_py_models <- function(pkl_path) {
19+
pickle <- reticulate::import("pickle", convert = FALSE)
20+
builtins <- reticulate::import("builtins", convert = FALSE)
21+
f <- builtins$open(pkl_path, "rb")
22+
on.exit(f$close(), add = TRUE)
23+
pickle$load(f)
24+
}
25+
26+
.select_model_for_file <- function(py_models, file_path,
27+
input_suffix = ".mzML",
28+
model_suffix = ".txt") {
29+
fn <- basename(file_path)
30+
base <- tools::file_path_sans_ext(fn)
31+
32+
keys <- unique(c(
33+
paste0(base, model_suffix),
34+
paste0(base, "_feature_list", model_suffix),
35+
fn,
36+
base,
37+
paste0(base, ".txt"),
38+
paste0(base, ".csv")
39+
))
40+
41+
for (k in keys) {
42+
m <- tryCatch(py_models$get(k, NULL), error = function(e) NULL)
43+
if (!is.null(m)) return(m)
44+
}
45+
NULL
46+
}
47+
48+
# Prediction
49+
.predict_batch_min <- function(py_model, x_min) {
50+
inputs <- lapply(as.numeric(x_min), function(z) list(z))
51+
res <- py_model(inputs)
52+
r <- reticulate::py_to_r(res)
53+
54+
if (is.null(r)) return(rep(NA_real_, length(x_min)))
55+
if (is.atomic(r)) return(as.numeric(r))
56+
if (is.matrix(r) || is.data.frame(r)) return(as.numeric(r[, 1]))
57+
if (is.list(r)) {
58+
return(vapply(r, function(el) as.numeric(el)[1], numeric(1)))
59+
}
60+
as.numeric(r)
61+
}
62+
63+
# adjustedRtime builder
64+
.build_adjustedRtime_from_models <- function(xdata, models_by_sample, verbose = TRUE) {
65+
66+
raw_rt <- rtime(xdata, adjusted = FALSE)
67+
fd <- MSnbase::fData(xdata)
68+
69+
map_col <- "fileIdx"
70+
file_map <- as.integer(fd[[map_col]])
71+
n_files <- max(file_map, na.rm = TRUE)
72+
73+
unit <- if (max(raw_rt, na.rm = TRUE) > 200) "sec" else "min"
74+
raw_min <- if (unit == "sec") raw_rt / 60 else raw_rt
75+
corr_min <- raw_min
76+
77+
for (i in seq_len(n_files)) {
78+
idx <- which(file_map == i)
79+
if (!length(idx)) next
80+
m <- models_by_sample[[i]]
81+
if (is.null(m)) next
82+
83+
pred <- .predict_batch_min(m, raw_min[idx])
84+
bad <- is.na(pred)
85+
pred[bad] <- raw_min[idx][bad]
86+
corr_min[idx] <- pred
87+
88+
if (verbose) {
89+
delta <- pred - raw_min[idx]
90+
j <- which.max(abs(delta))
91+
max_delta <- delta[j]
92+
93+
message(
94+
"[file ", i, "] max ΔRT (min): ",
95+
sprintf("%+.4f", max_delta)
96+
)
97+
}
98+
}
99+
100+
corr_store <- if (unit == "sec") corr_min * 60 else corr_min
101+
scan_names <- rownames(fd)
102+
103+
out <- vector("list", n_files)
104+
for (i in seq_len(n_files)) {
105+
idx <- which(file_map == i)
106+
v <- corr_store[idx]
107+
names(v) <- scan_names[idx]
108+
out[[i]] <- v
109+
}
110+
111+
out
112+
}
113+
114+
# MAIN FUNCTION
115+
apply_RT_Corrector_XCMS <- function(xdata,
116+
model_pkl,
117+
input_suffix = ".mzML",
118+
model_suffix = ".txt",
119+
verbose = TRUE) {
120+
121+
stopifnot(is(xdata, "XCMSnExp"))
122+
123+
.ensure_py_pkgs(c("numpy", "scipy", "scikit-learn", "joblib"))
124+
125+
py_models <- .load_py_models(model_pkl)
126+
127+
files <- fileNames(xdata_peaks)
128+
models_by_sample <- vector("list", length(files))
129+
for (i in seq_along(files)) {
130+
models_by_sample[[i]] <-
131+
.select_model_for_file(py_models, files[[i]],
132+
input_suffix, model_suffix)
133+
}
134+
135+
adj_list <- .build_adjustedRtime_from_models(xdata_peaks, models_by_sample, verbose)
136+
137+
xdata_corr <- xdata
138+
139+
adjustedRtime(xdata_corr) <- adj_list
140+
141+
xdata_corr
142+
}

0 commit comments

Comments
 (0)