forked from zuberek/probAge
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
14 changed files
with
836 additions
and
517 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,4 +26,5 @@ dependencies: | |
- jax | ||
- numpyro | ||
- blackjax | ||
- nutpie | ||
- nutpie | ||
- multiprocess |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# %% ######################## | ||
# IMPORTING | ||
%load_ext autoreload | ||
%autoreload 2 | ||
|
||
import sys | ||
sys.path.append("..") # fix to import modules from root | ||
from src.general_imports import * | ||
|
||
# %% ######################## | ||
# LOAD DATA | ||
|
||
DATASET_NAME = 'wave1' | ||
|
||
pheno = pd.read_csv(f'{paths.DATA_RAW_DIR}/wave1/phenotypes_and_prevalent_disease.csv', index_col='Basename') | ||
survival = pd.read_csv(f'{paths.DATA_RAW_DIR}/wave1/survival.csv', index_col='Basename') | ||
clock_results = pd.read_csv(f'{paths.DATA_RAW_DIR}/wave1/DNAmAge_output.csv', index_col='Basename') | ||
|
||
# %% ######################## | ||
# ADD PARTICIPANT META DATA | ||
|
||
# Create weighted_smoke phenotype | ||
# Normalize pack_years data | ||
pheno['norm_pack_years'] = np.log(1+pheno.pack_years) | ||
|
||
# Combine ever_smoke with pack_years | ||
pheno['weighted_smoke'] = pheno['norm_pack_years']/np.exp(pheno['ever_smoke']) | ||
|
||
pheno['log_bmi'] = np.log(pheno.bmi) | ||
pheno['log_pack_1'] = np.log(pheno.pack_years+1) | ||
pheno['log_units_1'] = np.log(pheno.units+1) | ||
|
||
# Add accelerations given to genscot participant by other clocks | ||
#################### | ||
# rename columns to some manageable naming | ||
clock_results = clock_results.rename(columns={ | ||
'AgeAccelerationResidualHannum': 'Hannum', | ||
'EEAA': 'Horvath', | ||
'AgeAccelGrim': 'GrimAge', | ||
'AgeAccelPheno': 'PhenoAge', | ||
}) | ||
clock_columns = ['Hannum','Horvath','GrimAge','PhenoAge'] | ||
pheno[clock_columns] = clock_results[clock_columns] | ||
|
||
### | ||
pheno[['Event', 'tte']] = survival[['Event', 'tte']] | ||
|
||
# %% ######################## | ||
# SAVE RESULTS | ||
|
||
pheno.to_csv(f'{paths.DATA_PROCESSED_DIR}/{DATASET_NAME}_participants.csv') | ||
|
||
# %% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# %% ######################## | ||
# IMPORTING | ||
%load_ext autoreload | ||
%autoreload 2 | ||
|
||
import sys | ||
sys.path.append("..") # fix to import modules from root | ||
from src.general_imports import * | ||
|
||
# %% ######################## | ||
# LOAD DATA | ||
|
||
DATASET_NAME = 'wave3' | ||
|
||
pheno = pd.read_csv(f'{paths.DATA_RAW_DIR}/wave3/phenotypes_and_prevalent_disease.csv', index_col='Basename') | ||
survival = pd.read_csv(f'{paths.DATA_RAW_DIR}/wave3/survival.csv', index_col='Basename') | ||
clock_results = pd.read_csv(f'{paths.DATA_RAW_DIR}/wave3/DNAmAge_output.csv', index_col='Basename') | ||
|
||
# %% ######################## | ||
# ADD PARTICIPANT META DATA | ||
|
||
# Create weighted_smoke phenotype | ||
# Normalize pack_years data | ||
pheno['norm_pack_years'] = np.log(1+pheno.pack_years) | ||
|
||
# Combine ever_smoke with pack_years | ||
pheno['weighted_smoke'] = pheno['norm_pack_years']/np.exp(pheno['ever_smoke']) | ||
|
||
pheno['log_bmi'] = np.log(pheno.bmi) | ||
pheno['log_pack_1'] = np.log(pheno.pack_years+1) | ||
pheno['log_units_1'] = np.log(pheno.units+1) | ||
|
||
# Add accelerations given to genscot participant by other clocks | ||
#################### | ||
# rename columns to some manageable naming | ||
clock_results = clock_results.rename(columns={ | ||
'AgeAccelerationResidualHannum': 'Hannum', | ||
'EEAA': 'Horvath', | ||
'AgeAccelGrim': 'GrimAge', | ||
'AgeAccelPheno': 'PhenoAge', | ||
}) | ||
clock_columns = ['Hannum','Horvath','GrimAge','PhenoAge'] | ||
pheno[clock_columns] = clock_results[clock_columns] | ||
|
||
### | ||
pheno[['Event', 'tte']] = survival[['Event', 'tte']] | ||
|
||
# %% ######################## | ||
# SAVE RESULTS | ||
|
||
pheno.to_csv(f'{paths.DATA_PROCESSED_DIR}/{DATASET_NAME}_participants.csv') | ||
|
||
# %% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# %% ######################## | ||
# IMPORTING | ||
%load_ext autoreload | ||
%autoreload 2 | ||
|
||
import sys | ||
sys.path.append("..") # fix to import modules from root | ||
from src.general_imports import * | ||
|
||
# %% ######################## | ||
# LOAD DATA | ||
|
||
DATASET_NAME = 'wave4' | ||
|
||
pheno = pd.read_csv(f'{paths.DATA_RAW_DIR}/wave4/2023-08-02_w4_phenotypes.csv', index_col='id') | ||
# survival = pd.read_csv(f'{paths.DATA_RAW_DIR}/wave4/2023-08-02_w4_deaths.csv', index_col='Basename') | ||
|
||
sample_meta = pd.read_csv(f'{paths.DATA_RAW_DIR}/wave4/sample_meta.csv', index_col='Sample_Sentrix_ID') | ||
|
||
# fix index naming to fit with other waves | ||
sample_meta = sample_meta[['Sample_Name', 'age', 'sex']] | ||
sample_meta.index.name = 'Basename' | ||
sample_meta = sample_meta.rename({'Sample_Name': 'id'}, axis='columns') | ||
pheno = sample_meta.join(pheno, on='id') | ||
|
||
# %% ######################## | ||
# ADD PARTICIPANT META DATA | ||
|
||
# Create weighted_smoke phenotype | ||
# Normalize pack_years data | ||
pheno['norm_pack_years'] = np.log(1+pheno.pack_years) | ||
|
||
# Combine ever_smoke with pack_years | ||
pheno['weighted_smoke'] = pheno['norm_pack_years']/np.exp(pheno['ever_smoke']) | ||
|
||
pheno['log_bmi'] = np.log(pheno.bmi) | ||
pheno['log_pack_1'] = np.log(pheno.pack_years+1) | ||
pheno['log_units_1'] = np.log(pheno.units+1) | ||
|
||
### | ||
# pheno[['dob_ym', 'dod_ym']] = survival[['dob_ym', 'dod_ym']] | ||
|
||
# %% | ||
pheno.to_csv(f'{paths.DATA_PROCESSED_DIR}/{DATASET_NAME}_participants.csv') |
Oops, something went wrong.