Skip to content

Commit

Permalink
Merge pull request #152 from reworkhow/runtests
Browse files Browse the repository at this point in the history
add fast_blocks methods
  • Loading branch information
reworkhow authored Mar 12, 2024
2 parents 97d5fb4 + a59acb1 commit 704bea1
Show file tree
Hide file tree
Showing 12 changed files with 255 additions and 12 deletions.
2 changes: 1 addition & 1 deletion Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -629,4 +629,4 @@ version = "1.48.0+0"
[[deps.p7zip_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
version = "17.4.0+0"
version = "17.4.0+0"
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ Distributions = "0.21, 0.22, 0.23, 0.24, 0.25"
ForwardDiff = "0.10"
ProgressMeter = "1.0, 1.1, 1.2"
StatsBase = "0.30, 0.31, 0.32, 0.33"
julia = "1"
julia = "1"
17 changes: 16 additions & 1 deletion src/1.JWAS/src/JWAS.jl
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ function runMCMC(mme::MME,df;
printout_frequency = chain_length+1,
big_memory = false,
double_precision = false,
fast_blocks = false,
#MCMC samples (defaut to marker effects and hyperparametes (variance componets))
output_folder = "results",
output_samples_for_all_parameters = false,
Expand Down Expand Up @@ -283,7 +284,7 @@ function runMCMC(mme::MME,df;
printout_model_info,printout_frequency, single_step_analysis,
fitting_J_vector,missing_phenotypes,
update_priors_frequency,outputEBV,output_heritability,prediction_equation,
seed,double_precision,output_folder,RRM)
seed,double_precision,output_folder,RRM,fast_blocks)
############################################################################
# Check 1)Arguments; 2)Input Pedigree,Genotype,Phenotypes,
# 3)output individual IDs; 4)Priors 5)Prediction equation
Expand All @@ -307,6 +308,20 @@ function runMCMC(mme::MME,df;
set_marker_hyperparameters_variances_and_pi(mme)
end
############################################################################
#fast blocks #now only work for one geno
############################################################################
if fast_blocks != false
if fast_blocks == true
block_size = Int(floor(sqrt(mme.M[1].nObs)))
elseif typeof(fast_blocks) <: Number
block_size = Int(floor(fast_blocks))
end
mme.MCMCinfo.fast_blocks = collect(range(1, step=block_size, stop=mme.M[1].nMarkers))
mme.MCMCinfo.chain_length = Int(floor(chain_length/(mme.MCMCinfo.fast_blocks[2]-mme.MCMCinfo.fast_blocks[1]))) #number of outer loop
println("BLOCK SIZE: $block_size")
flush(stdout)
end
############################################################################
# Adhoc functions
############################################################################
#save MCMC samples for all parameters (?seperate function user call)
Expand Down
20 changes: 17 additions & 3 deletions src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ function MCMC_BayesianAlphabet(mme,df)
is_nnbayes_partial = mme.nonlinear_function != false && mme.is_fully_connected==false
is_activation_fcn = mme.is_activation_fcn
nonlinear_function = mme.nonlinear_function
fast_blocks = mme.MCMCinfo.fast_blocks
############################################################################
# Categorical Traits (starting values for maker effects defaulting to 0s)
############################################################################
Expand Down Expand Up @@ -45,8 +46,13 @@ function MCMC_BayesianAlphabet(mme,df)
if mme.M != 0
for Mi in mme.M
#Mi.α (starting values were set in get_genotypes)
mGibbs = GibbsMats(Mi.genotypes,invweights)
mGibbs = GibbsMats(Mi.genotypes,invweights,fast_blocks=mme.MCMCinfo.fast_blocks)
Mi.mArray,Mi.mRinvArray,Mi.mpRinvm = mGibbs.xArray,mGibbs.xRinvArray,mGibbs.xpRinvx
if fast_blocks != false
Mi.MArray = mGibbs.XArray
Mi.MRinvArray = mGibbs.XRinvArray
Mi.MpRinvM = mGibbs.XpRinvX
end

if Mi.method=="BayesB" #α=β.*δ
Mi.G.val = fill(Mi.G.val,Mi.nMarkers) #a scalar in BayesC but a vector in BayeB
Expand Down Expand Up @@ -204,12 +210,20 @@ function MCMC_BayesianAlphabet(mme,df)
if Mi.G.constraint==true
megaBayesABC!(Mi,wArray,mme.R.val,locus_effect_variances)
else
MTBayesABC!(Mi,wArray,mme.R.val,locus_effect_variances,mme.nModels)
if fast_blocks == false
MTBayesABC!(Mi,wArray,mme.R.val,locus_effect_variances,mme.nModels)
else
MTBayesABC_block!(Mi,wArray,mme.R.val,locus_effect_variances)
end
end
elseif is_nnbayes_partial
BayesABC!(Mi,wArray[i],mme.R.val[i,i],locus_effect_variances) #this can be parallelized (conflict with others)
else
BayesABC!(Mi,ycorr,mme.R.val,locus_effect_variances)
if fast_blocks == false
BayesABC!(Mi,ycorr,mme.R.val,locus_effect_variances)
else
BayesABC_block!(Mi,ycorr,mme.R.val,locus_effect_variances)
end
end
elseif Mi.method =="RR-BLUP"
if is_multi_trait && !is_nnbayes_partial
Expand Down
2 changes: 1 addition & 1 deletion src/1.JWAS/src/Nonlinear/nnbayes_check.jl
Original file line number Diff line number Diff line change
Expand Up @@ -215,4 +215,4 @@ function nnmm_print_info_input_to_middle_layer(mme)
else
printstyled(" - Bayesian Alphabet: multi-trait Bayesian models are used to sample marker effect. \n",bold=false,color=:green)
end
end
end
2 changes: 1 addition & 1 deletion src/1.JWAS/src/input_data_validation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -488,4 +488,4 @@ function G_constraint!(mme)
end


#TO-DO: multi-trait constraint on non-genetic random effects: modify df and scale
#TO-DO: multi-trait constraint on non-genetic random effects: modify df and scale
61 changes: 61 additions & 0 deletions src/1.JWAS/src/markers/BayesianAlphabet/BayesABC.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,64 @@ function BayesABC!(xArray,xRinvArray,xpRinvx,
end
end
end


function BayesABC_block!(genotypes,ycorr,vare,locus_effect_variances)
BayesABC_block!(genotypes.MArray,genotypes.MRinvArray,genotypes.mpRinvm,
genotypes.genotypes,genotypes.MpRinvM,
ycorr,genotypes.α[1],genotypes.β[1],genotypes.δ[1],vare,
locus_effect_variances,genotypes.π)
end

function BayesABC_block!(XArray,XRinvArray,xpRinvx,
X, XpRinvX,
yCorr,
α,β,δ,
vare,varEffects,π)

logPi = log(π)
logPiComp = log(1-π)
logDelta0 = logPi
invVarRes = 1/vare
invVarEffects = 1 ./ varEffects
logVarEffects = log.(varEffects)
nMarkers = length(α)
XpRinvX = XpRinvX
nblocks = length(XpRinvX)
start_pos = 0
for i in 1:nblocks
XpRinvycorr = XRinvArray[i]*yCorr
αold = copy(α)
block_size = size(XpRinvX[i],1)
nreps = block_size #user-defined nreps=block_size, outer_niter =niter/block_size
for reps = 1:nreps
for j=1:block_size #additional code to save all sampled αs is needed
locus_j = start_pos+j
rhs = (XpRinvycorr[j] + xpRinvx[locus_j]*α[locus_j])*invVarRes
lhs = xpRinvx[locus_j]*invVarRes + invVarEffects[locus_j]
invLhs = 1/lhs
gHat = rhs*invLhs
logDelta1 = -0.5*(log(lhs) + logVarEffects[locus_j] - gHat*rhs) + logPiComp
probDelta1 = 1/(1+ exp(logDelta0 - logDelta1))
oldAlpha = α[locus_j]

if rand() < probDelta1
δ[locus_j] = 1
β[locus_j] = gHat + randn()*sqrt(invLhs)
α[locus_j] = β[locus_j]
BLAS.axpy!(oldAlpha-α[locus_j],view(XpRinvX[i],:,j),XpRinvycorr)
else
if oldAlpha != 0
BLAS.axpy!(oldAlpha,view(XpRinvX[i],:,j),XpRinvycorr)
end
δ[locus_j] = 0
β[locus_j] = randn()*sqrt(varEffects[locus_j])
α[locus_j] = 0
end
end
end

yCorr[:] = yCorr + XArray[i]*(αold-α)[(start_pos+1):(start_pos+block_size)] #?subset at first is better
start_pos += block_size
end
end
94 changes: 94 additions & 0 deletions src/1.JWAS/src/markers/BayesianAlphabet/MTBayesABC.jl
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,97 @@ function MTBayesABC_samplerII!(xArray,

end
end

#block
function MTBayesABC_block!(genotypes,ycorr_array,vare,locus_effect_variances)
MTBayesABC_block!(genotypes.MArray,genotypes.MRinvArray,genotypes.mpRinvm,
genotypes.genotypes,genotypes.MpRinvM,
ycorr_array,genotypes.β,genotypes.δ,genotypes.α,vare,
locus_effect_variances,genotypes.π)
end

function MTBayesABC_block!(XArray,XRinvArray,xpRinvx,
X, XpRinvX,
wArray,
betaArray,deltaArray,alphaArray,
vare,varEffects,
BigPi)
nMarkers = length(xpRinvx)
ntraits = length(alphaArray)

Rinv = inv(vare) #Do Not Use inv.(): elementwise inversion
Ginv = inv.(varEffects) # vector of length p, each element is a t-by-t covariance matrix

#vector of length t, since we only focus one SNP at a time
β = zeros(typeof(betaArray[1][1]),ntraits)
newα = zeros(typeof(alphaArray[1][1]),ntraits)
oldα = zeros(typeof(alphaArray[1][1]),ntraits)
δ = zeros(typeof(deltaArray[1][1]),ntraits)
w = zeros(typeof(wArray[1][1]),ntraits) #for rhs

XpRinvX = XpRinvX
nblocks = length(XpRinvX)
start_pos = 0
for i in 1:nblocks
XpRinvycorr = [XRinvArray[i]*wArray[trait] for trait = 1:ntraits]
oldalphaArray = deepcopy(alphaArray)
block_size = size(XpRinvX[i],1)
nreps = block_size #user-defined nreps=block_size, outer_niter =niter/block_size
for reps = 1:nreps
for j = 1:block_size #additional code to save all sampled αs is needed
locus_j = start_pos + j
for trait = 1:ntraits
β[trait] = betaArray[trait][locus_j]
oldα[trait] = newα[trait] = alphaArray[trait][locus_j]
δ[trait] = deltaArray[trait][locus_j]
w[trait] = XpRinvycorr[trait][j]+xpRinvx[locus_j]*oldα[trait]
end
for k=1:ntraits
Ginv11 = Ginv[locus_j][k,k]
nok = deleteat!(collect(1:ntraits),k)
Ginv12 = Ginv[locus_j][k,nok]
C11 = Ginv11+Rinv[k,k]*xpRinvx[locus_j]
C12 = Ginv12+xpRinvx[locus_j]*Matrix(Diagonal(δ[nok]))*Rinv[k,nok]

invLhs0 = 1/Ginv11
rhs0 = - Ginv12'β[nok]
gHat0 = (rhs0*invLhs0)[1,1]
invLhs1 = 1/C11
rhs1 = w'*Rinv[:,k]-C12'β[nok]
gHat1 = (rhs1*invLhs1)[1,1]

d0 = copy(δ)
d1 = copy(δ)
d0[k] = 0.0
d1[k] = 1.0

logDelta0 = -0.5*(log(Ginv11)- gHat0^2*Ginv11) + log(BigPi[d0]) #logPi
logDelta1 = -0.5*(log(C11)-gHat1^2*C11) + log(BigPi[d1]) #logPiComp

probDelta1 = 1.0/(1.0+exp(logDelta0-logDelta1))
if(rand()<probDelta1)
δ[k] = 1
β[k] = newα[k] = gHat1 + randn()*sqrt(invLhs1)
BLAS.axpy!(oldα[k]-newα[k],view(XpRinvX[i],:,j),XpRinvycorr[k])
else
β[k] = gHat0 + randn()*sqrt(invLhs0)
δ[k] = 0
newα[k] = 0
if oldα[k] != 0
BLAS.axpy!(oldα[k],view(XpRinvX[i],:,j),XpRinvycorr[k])
end
end
end
for trait = 1:ntraits
betaArray[trait][locus_j] = β[trait]
deltaArray[trait][locus_j] = δ[trait]
alphaArray[trait][locus_j] = newα[trait]
end
end
end
for trait = 1:ntraits
wArray[trait][:] = wArray[trait] + XArray[i]*(oldalphaArray[trait]-alphaArray[trait])[(start_pos+1):(start_pos+block_size)]
end
start_pos += block_size
end
end
2 changes: 1 addition & 1 deletion src/1.JWAS/src/markers/readgenotypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ function get_genotypes(file::Union{AbstractString,Array{Float64,2},Array{Float32
#clean memory
data = nothing
GC.gc()
#Note: why we choose to use `genotypes = Matrix(data[!,2:end])`
#Note: why we choose to use `genotypes = Matrix(data[!,2:end])` due to limitation of DataFrames.jl
#- cannot use `genotypes=@view data[!,2:end]` because the view will be copied as a new DataFrame in QC: `genotypes = genotypes[:,select]`
#- cannot use `select!(genotypes, Not(1))` to simply delete the 1st column of obsID from the DataFrame, because genotypes must be a Matrix used for matrix multiplication (e.g. EBV += Mi.output_genotypes*Mi.α[traiti]).
elseif typeof(file) == DataFrames.DataFrame #Datafarme
Expand Down
28 changes: 26 additions & 2 deletions src/1.JWAS/src/markers/tools4genotypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,28 @@ function getXpRinvX(X)
return XpRinvX
end

function get_column_blocks_ref(X,fast_blocks=false)
xArray = Array{typeof(X)}(undef,length(fast_blocks))
@showprogress "building prerequisite matrices ..." for i in 1:length(fast_blocks)
pos_start = fast_blocks[i]
pos_end = (i != length(fast_blocks)) ? (fast_blocks[i+1]-1) : size(X,2)
xArray[i] = view(X,:,pos_start:pos_end)
end
return xArray
end

mutable struct GibbsMats
X::Union{Array{Float64,2},Array{Float32,2}}
nrows::Int64
ncols::Int64
xArray #do not declare type because it is a vector of views
xRinvArray #do not declare type because it is a vector of views
xpRinvx::Union{Array{Float64,1},Array{Float32,1}}
function GibbsMats(X::Union{Array{Float64,2},Array{Float32,2}},Rinv)
XArray
XRinvArray
XpRinvX
function GibbsMats(X::Union{Array{Float64,2},Array{Float32,2}},Rinv;fast_blocks=false)
#single-locus, adjusting y, approach
nrows,ncols = size(X)
xArray = get_column_ref(X)
xpRinvx = getXpRinvX(X,Rinv)
Expand All @@ -51,7 +65,17 @@ mutable struct GibbsMats
else
xRinvArray = [x.*Rinv for x in xArray]
end
new(X,nrows,ncols,xArray,xRinvArray,xpRinvx)
#block, adjusting rhs and/or y, approach
if fast_blocks != false
XArray = get_column_blocks_ref(X,fast_blocks)
XRinvArray = @showprogress "building prerequisite matrices ..." [X'Diagonal(Rinv) for X in XArray]
XpRinvX = @showprogress "building prerequisite matrices ..." [XRinvArray[i]*XArray[i] for i in 1:length(XArray)]
else
XArray = XRinvArray = XpRinvX = false
end
new(X,nrows,ncols,
xArray,xRinvArray,xpRinvx,
XArray,XRinvArray,XpRinvX)
end
end

Expand Down
6 changes: 5 additions & 1 deletion src/1.JWAS/src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ mutable struct Genotypes
mRinvArray #a collection of matrices used in Bayesian Alphabet
mpRinvm #a collection of matrices used in Bayesian Alphabet
mΦΦArray # a collection of matrices used in RRM
MArray #a collection of matrices used in Bayesian Alphabet (rhs approach)
MRinvArray #a collection of matrices used in Bayesian Alphabet (rhs approach)
MpRinvM #a collection of matrices used in Bayesian Alphabet (rhs approach)
D #eigen values used in GBLUP
gammaArray #array used in Bayesian LASSO

Expand All @@ -150,7 +153,7 @@ mutable struct Genotypes
a1,a2,a3,a4,a5,a6,a7,a8,a4,false,
Variance(false,false,false,true,false,false),Variance(false,false,false,true,false,false), #false,false,
false,true, #true,false,
false,false,false,false,false,false,
false,false,false,false,false,false,false,false,false,
false,false,false,false,
false,false,false,false,false,false,false,false,false,
false,a9)
Expand Down Expand Up @@ -184,6 +187,7 @@ mutable struct MCMCinfo
double_precision
output_folder
RRM
fast_blocks
end
################################################################################
#the class MME is shown below with members for models, mixed model equations...
Expand Down
31 changes: 31 additions & 0 deletions test/ssBR-block.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Step 1: Load packages
using Revise
using JWAS,DataFrames,CSV,Statistics,JWAS.Datasets

# Step 2: Read data
phenofile = dataset("phenotypes.csv")
pedfile = dataset("pedigree.csv")
genofile = dataset("genotypes.csv")
phenotypes = CSV.read(phenofile,DataFrame,delim = ',',header=true,missingstrings=["NA"])
pedigree = get_pedigree(pedfile,separator=",",header=true);
genotypes = get_genotypes(genofile,separator=',',method="BayesC");
first(phenotypes,5)

# Step 3: Build Model Equations

model_equation ="y1 = intercept + x1 + x2 + x2*x3 + ID + dam + genotypes";
model = build_model(model_equation);

# Step 4: Set Factors or Covariates
set_covariate(model,"x1");

# Step 5: Set Random or Fixed Effects
set_random(model,"x2");
set_random(model,"ID dam",pedigree);

# Step 6: Run Analysis
out=runMCMC(model,phenotypes,single_step_analysis=true,pedigree=pedigree, chain_length = 100, fast_blocks = 10);

# Step 7: Check Accuruacy
results = innerjoin(out["EBV_y1"], phenotypes, on = :ID)
accuruacy = cor(results[!,:EBV],results[!,:bv1])

0 comments on commit 704bea1

Please sign in to comment.