-
Notifications
You must be signed in to change notification settings - Fork 0
/
taxa_mediation_run.R
76 lines (61 loc) · 2.75 KB
/
taxa_mediation_run.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
source("./preprocess.R")
source("./debiasedLasso.R")
source("./taxa_mediation_model.R")
source("./parameter_estimation.R")
# run the preprocess
processed <- process_phyloseq_data(counts, meta, taxa,
taxrank = "Genus",
lib.size.cut.off = 1000,
prevalence.cut.off = 0.2,
mean.prop.cut.off = 0.0001,
relative.abundance = TRUE,
clr.transform = TRUE)
# effect could be "indirect_effects", "direct_effect" or "total_effect"
run <- function(a,b, effect = C("indirect_effects","direct_effect","total_effect")) {
binary <- subset_phyloseq_samples(processed = processed, treatment = "host_clinical_state", binary = c(a,b))
if (any(is.na(binary))) {
stop("Data contains missing values. Please handle them before proceeding.")
}
binary$isolate <- as.character(binary$isolate)
# Filter isolates with at least two samples
df_filtered <- binary %>%
group_by(isolate) %>%
filter(n() >= 2) %>%
ungroup()
df_filtered$isolate <- droplevels(as.factor(df_filtered$isolate))
group_sizes <- table(df_filtered$isolate)
if (any(group_sizes < 2)) {
stop("There are still groups with fewer than two observations.")
}
ID <- df_filtered$isolate
Y <- df_filtered$host_fev1
X <- df_filtered$host_clinical_state
cov <- df_filtered$Host_age
otu <- df_filtered %>%
dplyr::select(-c(isolate, host_fev1, Host_age, host_clinical_state)) %>%
as.matrix()
#run mediation analysis with high-dimensional, compositional data
res <- mediation_with_multilevel(
X=X, Y=Y, OTU = otu, COV = cov, grp = ID, method = "bootstrap",
n_boot = 2000, seed = 1234,
n_cores = parallel::detectCores() - 1)
for (i in effect) {
if (!is.null(res[[i]])) {
save_file <- res[[i]]
file_path <- paste0(
"./Outputs/",
a, "_", b, "_", i, "_table.tsv"
)
write.table(save_file, file_path, row.names = TRUE, col.names = NA)
} else {
warning(paste("Effect", i, "not found in results. Skipping."))
}
}
return(res)
}
run(a="Baseline", b="Exacerbation", effect = c("indirect_effects","direct_effect","total_effect"))
run(a="Baseline", b="Treatment", effect = c("indirect_effects","direct_effect","total_effect"))
run(a="Baseline", b="Recovery", effect = c("indirect_effects","direct_effect","total_effect"))
run(a="Exacerbation", b="Treatment", effect = c("indirect_effects","direct_effect","total_effect"))
run(a="Exacerbation",b="Recovery", effect = c("indirect_effects","direct_effect","total_effect"))
run(a="Treatment", b="Recovery", effect = c("indirect_effects","direct_effect","total_effect"))