Skip to content

Commit

Permalink
Merge pull request #121 from bdwilliamson/master
Browse files Browse the repository at this point in the history
Add capability for VIM analysis to be based on discrete SL
  • Loading branch information
youyifong authored Feb 5, 2022
2 parents 54e0d50 + 2a14a2a commit e6dc33b
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 85 deletions.
1 change: 1 addition & 0 deletions .gitignore
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -650,3 +650,4 @@ cor_coxph/output/janssen_pooled_realADCP/D29IncludeNotMolecConfirmedstart1/timep
cor_coxph/output/janssen_pooled_realADCP/D29IncludeNotMolecConfirmedstart1/ylims.cor.ENSEMBLE.Rdata
cor_coxph/output/janssen_pooled_realPsV/D29IncludeNotMolecConfirmed/coxph_slopes.Rdata
cor_coxph/output/janssen_pooled_realPsV/D29IncludeNotMolecConfirmedstart1/coxph_slopes.Rdata
cor_surrogates/output/*.csv
55 changes: 28 additions & 27 deletions config.yml
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
default: &default
is_ows_trial: no
case_cohort: no
use_ensemble_sl: no

hvtn705base: &hvtn705base
two_marker_timepoints: no
timepoints: [210]
subset_variable: None
subset_value: All
llox_label: loq
times: [B, Day210]
times: [B, Day210]
time_labels: [Day 1, Day 210]
study_name: HVTN705
covariates_riskscore: ~.+ RSA + Age + BMI + Riskscore
Expand All @@ -30,8 +31,8 @@ hvtn705: &hvtn705
assay_labels: [IgG Vx-VT-C,ADCP Vx-C97ZA,IgG3 V1V2 breadth,IgG3 Env breadth,Multi-epitope functions,IgG Vx-VT-M,IgG3 gp140 Vx-C97ZA,IgG3 gp140 Vx-Mosaic,IgG3 gp41,IgG3 gp120 breadth,IgG3 gp140 breadth,IgG3 Multi-epitope]
lloxs: [40,NA,NA,NA,NA,50,NA,NA,NA,NA,NA,NA]
uloqs: [1884160,NA,NA,NA,NA,1869824,22000,22000,22000,NA,NA,NA]
primary_assays: [ELCZ,ADCPgp140C97ZAfib,IgG340mdw_V1V2,IgG340mdw_gp120_gp140_vm,mdw_xassay]
multivariate_assays: [ELCZ,ADCPgp140C97ZAfib,IgG340mdw_V1V2,IgG340mdw_gp120_gp140_vm]
primary_assays: [ELCZ,ADCPgp140C97ZAfib,IgG340mdw_V1V2,IgG340mdw_gp120_gp140_vm,mdw_xassay]
multivariate_assays: [ELCZ,ADCPgp140C97ZAfib,IgG340mdw_V1V2,IgG340mdw_gp120_gp140_vm]
data_cleaned: /trials/vaccine/p705/analysis/lab/cc/copcor/HVTN705_firstcasecontrolprocesseddata.csv

hvtn705V1V2: &hvtn705V1V2
Expand Down Expand Up @@ -75,7 +76,7 @@ moderna_real: &moderna_real
data_cleaned: /trials/covpn/p3001/analysis/correlates/Part_A_Blinded_Phase_Data/adata/P3001ModernaCOVEimmunemarkerdata_correlates_processed_v1.1_lvmn_added_Jan14_2022.csv
#data_cleaned: ../../data/P3001ModernaCOVEimmunemarkerdata_correlates_processed_v1.1_lvmn_added_Jan14_2022.csv
study_name: COVE
multivariate_assays: [bindRBD + pseudoneutid50 + liveneutmn50, pseudoneutid50 + liveneutmn50]
multivariate_assays: [bindRBD + pseudoneutid50 + liveneutmn50, pseudoneutid50 + liveneutmn50]
num_boot_replicates: 1000
num_perm_replicates: 10000

Expand Down Expand Up @@ -105,7 +106,7 @@ janssen_trial_realbAb: &janssen_trial_realbAb # binding Ab markers
assays: [bindSpike, bindRBD]
assay_labels: [Binding Antibody to Spike, Binding Antibody to RBD]
assay_labels_short: [Anti Spike IgG (BAU/ml), Anti RBD IgG (BAU/ml)]
primary_assays: []
primary_assays: []
study_name: ENSEMBLE
num_boot_replicates: 1000
num_perm_replicates: 10 # not doing westfall and young multitesting adjustment
Expand All @@ -131,23 +132,23 @@ janssen_trial_realPsV: &janssen_trial_realPsV
num_perm_replicates: 10 # not doing westfall and young multitesting adjustment


janssen_pooled_realbAb:
janssen_pooled_realbAb:
<<: *janssen_trial_realbAb
data_cleaned: /trials/covpn/p3003/analysis/correlates/Part_A_Blinded_Phase_Data/adata/janssen_pooled_realbAb_data_processed_with_riskscore.csv
subset_variable: None
subset_value: All
covariates_riskscore: ~.+ risk_score + as.factor(Region)
covariates_norisksco: ~.+ Age + as.factor(Region)

janssen_pooled_realPsV:
janssen_pooled_realPsV:
<<: *janssen_trial_realPsV
data_cleaned: /trials/covpn/p3003/analysis/correlates/Part_A_Blinded_Phase_Data/adata/janssen_pooled_realPsV_data_processed_with_riskscore.csv
subset_variable: None
subset_value: All
covariates_riskscore: ~.+ risk_score + as.factor(Region)
covariates_norisksco: ~.+ Age + as.factor(Region)
janssen_pooled_realADCP:

janssen_pooled_realADCP:
<<: *janssen_trial_realADCP
data_cleaned: /trials/covpn/p3003/analysis/correlates/Part_A_Blinded_Phase_Data/adata/janssen_pooled_realADCP_data_processed_with_riskscore.csv
subset_variable: None
Expand All @@ -156,23 +157,23 @@ janssen_pooled_realADCP:
covariates_norisksco: ~.+ Age + as.factor(Region)


janssen_na_realbAb:
janssen_na_realbAb:
<<: *janssen_trial_realbAb
data_cleaned: /trials/covpn/p3003/analysis/correlates/Part_A_Blinded_Phase_Data/adata/janssen_na_realbAb_data_processed_with_riskscore.csv
subset_variable: Region
subset_value: 0
covariates_riskscore: ~.+ risk_score
covariates_norisksco: ~.+ Age

janssen_na_realPsV:
janssen_na_realPsV:
<<: *janssen_trial_realPsV
data_cleaned: /trials/covpn/p3003/analysis/correlates/Part_A_Blinded_Phase_Data/adata/janssen_na_realPsV_data_processed_with_riskscore.csv
subset_variable: Region
subset_value: 0
covariates_riskscore: ~.+ risk_score
covariates_norisksco: ~.+ Age

janssen_na_realADCP:
janssen_na_realADCP:
<<: *janssen_trial_realADCP
data_cleaned: /trials/covpn/p3003/analysis/correlates/Part_A_Blinded_Phase_Data/adata/janssen_na_realADCP_data_processed_with_riskscore.csv
subset_variable: Region
Expand All @@ -181,23 +182,23 @@ janssen_na_realADCP:
covariates_norisksco: ~.+ Age


janssen_la_realbAb:
janssen_la_realbAb:
<<: *janssen_trial_realbAb
data_cleaned: /trials/covpn/p3003/analysis/correlates/Part_A_Blinded_Phase_Data/adata/janssen_la_realbAb_data_processed_with_riskscore.csv
subset_variable: Region
subset_value: 1
covariates_riskscore: ~.+ risk_score
covariates_norisksco: ~.+ Age

janssen_la_realPsV:
janssen_la_realPsV:
<<: *janssen_trial_realPsV
data_cleaned: /trials/covpn/p3003/analysis/correlates/Part_A_Blinded_Phase_Data/adata/janssen_la_realPsV_data_processed_with_riskscore.csv
subset_variable: Region
subset_value: 1
covariates_riskscore: ~.+ risk_score
covariates_norisksco: ~.+ Age

janssen_la_realADCP:
janssen_la_realADCP:
<<: *janssen_trial_realADCP
data_cleaned: /trials/covpn/p3003/analysis/correlates/Part_A_Blinded_Phase_Data/adata/janssen_la_realADCP_data_processed_with_riskscore.csv
subset_variable: Region
Expand All @@ -206,23 +207,23 @@ janssen_la_realADCP:
covariates_norisksco: ~.+ Age


janssen_sa_realbAb:
janssen_sa_realbAb:
<<: *janssen_trial_realbAb
data_cleaned: /trials/covpn/p3003/analysis/correlates/Part_A_Blinded_Phase_Data/adata/janssen_sa_realbAb_data_processed_with_riskscore.csv
subset_variable: Region
subset_value: 2
covariates_riskscore: ~.+ risk_score
covariates_norisksco: ~.+ Age

janssen_sa_realPsV:
janssen_sa_realPsV:
<<: *janssen_trial_realPsV
data_cleaned: /trials/covpn/p3003/analysis/correlates/Part_A_Blinded_Phase_Data/adata/janssen_sa_realPsV_data_processed_with_riskscore.csv
subset_variable: Region
subset_value: 2
covariates_riskscore: ~.+ risk_score
covariates_norisksco: ~.+ Age

janssen_sa_realADCP:
janssen_sa_realADCP:
<<: *janssen_trial_realADCP
data_cleaned: /trials/covpn/p3003/analysis/correlates/Part_A_Blinded_Phase_Data/adata/janssen_sa_realADCP_data_processed_with_riskscore.csv
subset_variable: Region
Expand Down Expand Up @@ -284,9 +285,9 @@ D57:
ph2: ph2.D57
wt: wt.D57
WtStratum: Wstratum
EventIndPrimary: EventIndPrimaryD57
EventIndPrimary: EventIndPrimaryD57
EventTimePrimary: EventTimePrimaryD57
Earlyendpoint: EarlyendpointD57
Earlyendpoint: EarlyendpointD57
tpeak: 57
tpeaklag: 7
tfinal.tpeak: 0
Expand All @@ -299,7 +300,7 @@ D29:
ph2: ph2.D29
wt: wt.D29
WtStratum: Wstratum
EventIndPrimary: EventIndPrimaryD29
EventIndPrimary: EventIndPrimaryD29
EventTimePrimary: EventTimePrimaryD29
Earlyendpoint: EarlyendpointD29
tpeak: 29
Expand All @@ -314,7 +315,7 @@ D29start1:
ph2: ph2.D29start1
wt: wt.D29start1
WtStratum: Wstratum
EventIndPrimary: EventIndPrimaryD29
EventIndPrimary: EventIndPrimaryD29
EventTimePrimary: EventTimePrimaryD29
Earlyendpoint: EarlyendpointD29start1
tpeak: 29
Expand All @@ -330,7 +331,7 @@ D29IncludeNotMolecConfirmed:
ph2: ph2.D29
wt: wt.D29
WtStratum: Wstratum
EventIndPrimary: EventIndPrimaryIncludeNotMolecConfirmedD29
EventIndPrimary: EventIndPrimaryIncludeNotMolecConfirmedD29
EventTimePrimary: EventTimePrimaryIncludeNotMolecConfirmedD29
Earlyendpoint: EarlyendpointD29
tpeak: 29
Expand All @@ -346,7 +347,7 @@ D29IncludeNotMolecConfirmedstart1:
ph2: ph2.D29start1
wt: wt.D29start1
WtStratum: Wstratum
EventIndPrimary: EventIndPrimaryIncludeNotMolecConfirmedD29
EventIndPrimary: EventIndPrimaryIncludeNotMolecConfirmedD29
EventTimePrimary: EventTimePrimaryIncludeNotMolecConfirmedD29
Earlyendpoint: EarlyendpointD29start1
tpeak: 29
Expand All @@ -361,17 +362,17 @@ D210:
wt: wt.D210
WtStratum: Sampstratum.D210
tpsStratum: tps.stratum
EventIndPrimary: Delta.D210
EventIndPrimary: Delta.D210
EventTimePrimary: Ttilde.D210
tpeak: 210
tpeaklag: 1
tfinal.tpeak: 550
txt.endpoint: HIV
txt.coxph.note2: No. at-risk = estimated number in the population for analysis, i.e. per-protocol vaccine recipients not infected through 1 days post Month 7 visit; no. cases = number of this cohort with an observed endpoint.


####################
# two time points
# two time points

D29D57:
tinterm: 29
Expand Down
23 changes: 18 additions & 5 deletions cor_surrogates/code/get_vimp.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ source(here::here("..", "_common.R"))
# common setup for CV super learners and variable importance
source(here::here("code", "cor_surrogates_setup.R"))

# drop "SL.xgboost.2.yes" and "SL.xgboost.4.yes" from SL_library as class-balancing learners in the variable
# importance computation doesn’t make sense – the regression we’re doing there (to account for the two-phase sampling)
# is based on a continuous outcome, not a binary outcome, so there shouldn’t be any imbalance.
# drop "SL.xgboost.2.yes" and "SL.xgboost.4.yes" from SL_library as class-balancing learners in the variable
# importance computation doesn’t make sense – the regression we’re doing there (to account for the two-phase sampling)
# is based on a continuous outcome, not a binary outcome, so there shouldn’t be any imbalance.
for (i in 1:length(SL_library)) {
if(SL_library[[i]][1] %in% c("SL.xgboost.2.yes", "SL.xgboost.4.yes")){
if(!exists("vec"))
Expand Down Expand Up @@ -44,6 +44,12 @@ X <- dat.ph1 %>%

# read in the fits for the baseline risk factors
baseline_fits <- readRDS(here("output", paste0("CVSLfits_vacc_", endpoint, "_", varset_names[1], ".rds")))
baseline_aucs <- readRDS(here("output", paste0("CVSLaucs_vacc_", endpoint, "_", varset_names[1], ".rds")))
if (!use_ensemble_sl) {
baseline_fits <- lapply(as.list(1:length(baseline_fits)), function(i) {
make_discrete_sl_auc(cvsl_fit = baseline_fits[[i]], all_aucs = baseline_aucs[[i]])
})
}

# get the common CV folds
list_of_indices <- as.list(seq_len(length(baseline_fits)))
Expand Down Expand Up @@ -79,13 +85,20 @@ for (i in seq_len(nrow(varset_matrix))) {
}
# get the correct CV.SL lists
full_fits <- readRDS(here("output", paste0("CVSLfits_vacc_", endpoint, "_", varset_names[i], ".rds")))
full_aucs <- readRDS(here("output", paste0("CVSLaucs_vacc_", endpoint, "_", varset_names[i], ".rds")))
if (!use_ensemble_sl) {
full_fits <- lapply(as.list(1:length(baseline_fits)), function(i) {
make_discrete_sl_auc(cvsl_fit = full_fits[[i]], all_aucs = full_aucs[[i]])
})
}
if (i == 1) {
vim_lst <- lapply(list_of_indices, function(l) {
get_cv_vim(seed = seeds[l], Y = full_y, X = X, full_fit = full_fits[[l]], reduced_fit = naive_fits[[l]],
index = this_s, type = "auc", scale = "identity", cross_fitting_folds = cf_folds[[l]],
sample_splitting_folds = sample_splitting_folds[[l]], V = vim_V,
C = C, Z = c("Y", paste0("X", which(briskfactors %in% names(X)))), sl_lib = sl_lib,
ipc_est_type = "ipw", ipc_weights = all_ipw_weights_treatment, baseline = TRUE)
ipc_est_type = "ipw", ipc_weights = all_ipw_weights_treatment, baseline = TRUE,
use_ensemble = use_ensemble_sl)
})
} else {
# get variable importance for each fold
Expand All @@ -94,7 +107,7 @@ for (i in seq_len(nrow(varset_matrix))) {
index = this_s, type = "auc", scale = "identity", cross_fitting_folds = cf_folds[[l]],
sample_splitting_folds = sample_splitting_folds[[l]], V = vim_V,
C = C, Z = c("Y", paste0("X", which(briskfactors %in% names(X)))), sl_lib = sl_lib,
ipc_est_type = "ipw", ipc_weights = all_ipw_weights_treatment)
ipc_est_type = "ipw", ipc_weights = all_ipw_weights_treatment, use_ensemble = use_ensemble_sl)
})
}
# pool variable importance and predictiveness over the list
Expand Down
Loading

0 comments on commit e6dc33b

Please sign in to comment.