diff --git a/src/1.JWAS/src/JWAS.jl b/src/1.JWAS/src/JWAS.jl index b469ceb1..f909575a 100644 --- a/src/1.JWAS/src/JWAS.jl +++ b/src/1.JWAS/src/JWAS.jl @@ -108,7 +108,7 @@ export dataset methods = "conventional (no markers)", Pi = 0.0, estimatePi = false, - estimateScale = false) + estimate_scale = false) **Run MCMC for Bayesian Linear Mixed Models with or without estimation of variance components.** @@ -160,9 +160,9 @@ function runMCMC(mme::MME,df; pedigree = false, #parameters for single-step analysis fitting_J_vector = true, #parameters for single-step analysis causal_structure = false, - mega_trait = mme.nonlinear_function == false ? false : true, #NNBayes -> default mega_trait=true + # mega_trait = mme.nonlinear_function == false ? false : true, #NNBayes -> default mega_trait=true missing_phenotypes = mme.nonlinear_function == false ? true : false, #NN-MM -> missing hidden nodes will be sampled - constraint = false, + # constraint = false, RRM = false, # RRM -> false or a matrix (Phi: orthogonalized time covariates) #Genomic Prediction outputEBV = true, @@ -181,7 +181,7 @@ function runMCMC(mme::MME,df; methods = "conventional (no markers)", Pi = 0.0, estimatePi = false, - estimateScale = false, + estimate_scale = false, categorical_trait = false, #this has been moved to build_model() censored_trait = false) #this has been moved to build_model() @@ -215,6 +215,11 @@ function runMCMC(mme::MME,df; end end end + mme.R.constraint=true + for Mi in mme.M + Mi.G.constraint=true + end + nnmm_print_info_input_to_middle_layer(mme) end ############################################################################ #for deprecated JWAS fucntions @@ -225,7 +230,7 @@ function runMCMC(mme::MME,df; Mi.name = "geno" Mi.π = Pi Mi.estimatePi = estimatePi - Mi.estimateScale = estimateScale + Mi.estimate_scale = estimate_scale Mi.method = methods end end @@ -272,7 +277,7 @@ function runMCMC(mme::MME,df; mme.MCMCinfo = MCMCinfo(heterogeneous_residuals, chain_length,burnin,output_samples_frequency, printout_model_info,printout_frequency, single_step_analysis, - fitting_J_vector,missing_phenotypes,constraint,mega_trait,estimate_variance, + fitting_J_vector,missing_phenotypes,estimate_variance, update_priors_frequency,outputEBV,output_heritability,prediction_equation, seed,double_precision,output_folder,RRM) ############################################################################ @@ -322,26 +327,25 @@ function runMCMC(mme::MME,df; if mme.M != 0 for Mi in mme.M Mi.genotypes = map(Float64,Mi.genotypes) - Mi.G = map(Float64,Mi.G) + Mi.G.val = map(Float64,Mi.G.val) Mi.α = map(Float64,Mi.α) end end for random_term in mme.rndTrmVec random_term.Vinv = map(Float64,random_term.Vinv) - random_term.GiOld = map(Float64,random_term.GiOld) - random_term.GiNew = map(Float64,random_term.GiNew) - random_term.Gi = map(Float64,random_term.Gi) + random_term.GiOld.val = map(Float64,random_term.GiOld.val) + random_term.GiNew.val = map(Float64,random_term.GiNew.val) + random_term.Gi.val = map(Float64,random_term.Gi.val) end mme.Gi = map(Float64,mme.Gi) end - # NNBayes mega trait: from multi-trait to multiple single-trait - if mme.MCMCinfo.mega_trait == true - printstyled(" - Bayesian Alphabet: multiple independent single-trait Bayesian models are used to sample marker effect. \n",bold=false,color=:green) - printstyled(" - Multi-threaded parallelism: $nThread threads are used to run single-trait models in parallel. \n",bold=false,color=:green) - nnbayes_mega_trait(mme) - elseif mme.nonlinear_function != false #only print for NNBayes - printstyled(" - Bayesian Alphabet: multi-trait Bayesian models are used to sample marker effect. \n",bold=false,color=:green) + #constraint on covariance matrix + if mme.R.constraint==true + R_constraint!(mme) #modify mme.R.df and mme.R.scale; scale is a diagonal matrix + end + if mme.M[1].G.constraint==true + G_constraint!(mme) #modify Mi.G.df and Mi.G.scale; scale is a diagonal matrix end # NNBayes: modify parameters for partial connected NN @@ -489,7 +493,14 @@ function getMCMCinfo(mme) @printf("%-30s %20s\n","starting_value",mme.sol != false ? "true" : "false") @printf("%-30s %20d\n","printout_frequency",MCMCinfo.printout_frequency) @printf("%-30s %20d\n","output_samples_frequency",MCMCinfo.output_samples_frequency) - @printf("%-30s %20s\n","constraint",MCMCinfo.constraint ? "true" : "false") + # @printf("%-30s %20s\n","constraint",MCMCinfo.constraint ? "true" : "false") + @printf("%-30s %19s\n","constraint on residual variance",mme.R.constraint ? "true" : "false") + if length(mme.M)>0 + for Mi in mme.M + geno_name = Mi.name + @printf("%-30s %5s\n","constraint on marker effect variance for $geno_name",Mi.G.constraint ? "true" : "false") + end + end @printf("%-30s %20s\n","missing_phenotypes",MCMCinfo.missing_phenotypes ? "true" : "false") @printf("%-30s %20d\n","update_priors_frequency",MCMCinfo.update_priors_frequency) @printf("%-30s %20s\n","seed",MCMCinfo.seed) @@ -498,23 +509,26 @@ function getMCMCinfo(mme) for i in mme.rndTrmVec thisterm= join(i.term_array, ",") if mme.nModels == 1 - @printf("%-30s %20s\n","random effect variances ("*thisterm*"):",Float64.(round.(inv(i.GiNew),digits=3))) + @printf("%-30s %20s\n","random effect variances ("*thisterm*"):",Float64.(round.(inv(i.GiNew.val),digits=3))) else @printf("%-30s\n","random effect variances ("*thisterm*"):") - Base.print_matrix(stdout,round.(inv(i.GiNew),digits=3)) + Base.print_matrix(stdout,round.(inv(i.GiNew.val),digits=3)) println() end end + if mme.pedTrmVec!=0 + polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + end if mme.pedTrmVec!=0 @printf("%-30s\n","genetic variances (polygenic):") - Base.print_matrix(stdout,round.(inv(mme.Gi),digits=3)) + Base.print_matrix(stdout,round.(inv(mme.rndTrmVec[polygenic_pos].Gi.val),digits=3)) println() end if mme.nModels == 1 - @printf("%-30s %20.3f\n","residual variances:", mme.R) + @printf("%-30s %20.3f\n","residual variances:", mme.R.val) else @printf("%-30s\n","residual variances:") - Base.print_matrix(stdout,round.(mme.R,digits=3)) + Base.print_matrix(stdout,round.(mme.R.val,digits=3)) println() end @@ -527,21 +541,21 @@ function getMCMCinfo(mme) @printf("%-30s %20s\n","Genomic Category", Mi.name) @printf("%-30s %20s\n","Method",Mi.method) for Mi in mme.M - if Mi.genetic_variance != false + if Mi.genetic_variance.val != false if (mme.nModels == 1 || is_nnbayes_partial) && mme.MCMCinfo.RRM == false - @printf("%-30s %20.3f\n","genetic variances (genomic):",Mi.genetic_variance) + @printf("%-30s %20.3f\n","genetic variances (genomic):",Mi.genetic_variance.val) else @printf("%-30s\n","genetic variances (genomic):") - Base.print_matrix(stdout,round.(Mi.genetic_variance,digits=3)) + Base.print_matrix(stdout,round.(Mi.genetic_variance.val,digits=3)) println() end end if !(Mi.method in ["GBLUP"]) if (mme.nModels == 1 || is_nnbayes_partial) && mme.MCMCinfo.RRM == false - @printf("%-30s %20.3f\n","marker effect variances:",Mi.G) + @printf("%-30s %20.3f\n","marker effect variances:",Mi.G.val) else @printf("%-30s\n","marker effect variances:") - Base.print_matrix(stdout,round.(Mi.G,digits=3)) + Base.print_matrix(stdout,round.(Mi.G.val,digits=3)) println() end end @@ -561,19 +575,20 @@ function getMCMCinfo(mme) end @printf("%-30s %20s\n","estimatePi",Mi.estimatePi ? "true" : "false") end - @printf("%-30s %20s\n","estimateScale",Mi.estimateScale ? "true" : "false") + @printf("%-30s %20s\n","estimate_scale",Mi.estimate_scale ? "true" : "false") end end end printstyled("\nDegree of freedom for hyper-parameters:\n\n",bold=true) - @printf("%-30s %20.3f\n","residual variances:",mme.df.residual) + @printf("%-30s %20.3f\n","residual variances:",mme.R.df) for randomeffect in mme.rndTrmVec if randomeffect.randomType != "A" - @printf("%-30s %20.3f\n","random effect variances:",randomeffect.df) + @printf("%-30s %20.3f\n","random effect variances:",randomeffect.Gi.df) end end if mme.pedTrmVec!=0 - @printf("%-30s %20.3f\n","polygenic effect variances:",mme.df.polygenic) + # polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + @printf("%-30s %20.3f\n","polygenic effect variances:",mme.rndTrmVec[polygenic_pos].Gi.df) end if mme.M!=0 for Mi in mme.M diff --git a/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl b/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl index 0085eaef..44af600b 100644 --- a/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl +++ b/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl @@ -14,10 +14,8 @@ function MCMC_BayesianAlphabet(mme,df) has_binary_trait = "categorical(binary)" ∈ mme.traits_type has_censored_trait = "censored" ∈ mme.traits_type missing_phenotypes = mme.MCMCinfo.missing_phenotypes - constraint = mme.MCMCinfo.constraint causal_structure = mme.causal_structure is_multi_trait = mme.nModels != 1 - is_mega_trait = mme.MCMCinfo.mega_trait is_nnbayes_partial = mme.nonlinear_function != false && mme.is_fully_connected==false is_activation_fcn = mme.is_activation_fcn nonlinear_function = mme.nonlinear_function @@ -37,8 +35,8 @@ function MCMC_BayesianAlphabet(mme,df) #mme.sol (its starting values were set in runMCMC) mme.solMean, mme.solMean2 = zero(mme.sol),zero(mme.sol) #residual variance - mme.meanVare = zero(mme.R) - mme.meanVare2 = zero(mme.R) + mme.meanVare = zero(mme.R.val) + mme.meanVare2 = zero(mme.R.val) #polygenic effects (pedigree), e.g, Animal+ Maternal if mme.pedTrmVec != 0 @@ -52,16 +50,16 @@ function MCMC_BayesianAlphabet(mme,df) Mi.mArray,Mi.mRinvArray,Mi.mpRinvm = mGibbs.xArray,mGibbs.xRinvArray,mGibbs.xpRinvx if Mi.method=="BayesB" #α=β.*δ - Mi.G = fill(Mi.G,Mi.nMarkers) #a scalar in BayesC but a vector in BayeB + Mi.G.val = fill(Mi.G.val,Mi.nMarkers) #a scalar in BayesC but a vector in BayeB end if Mi.method=="BayesL" #in the MTBayesLasso paper if mme.nModels == 1 - Mi.G /= 8 #mme.M.G is the scale Matrix, Sigma - Mi.scale /= 8 + Mi.G.val /= 8 #mme.M.G.val is the scale Matrix, Sigma + Mi.G.scale /= 8 gammaDist = Gamma(1, 8) #8 is the scale parameter of the Gamma distribution (1/8 is the rate parameter) else - Mi.G /= 4*(Mi.ntraits+1) - Mi.scale /= 4*(Mi.ntraits+1) + Mi.G.val /= 4*(Mi.ntraits+1) + Mi.G.scale /= 4*(Mi.ntraits+1) gammaDist = Gamma((Mi.ntraits+1)/2, 8) #8 (1/8): the scale (rate) parameter end Mi.gammaArray = rand(gammaDist,Mi.nMarkers) @@ -74,12 +72,13 @@ function MCMC_BayesianAlphabet(mme,df) Mi.meanAlpha = [zero(Mi.α[traiti]) for traiti = 1:Mi.ntraits] #marker effects Mi.meanAlpha2 = [zero(Mi.α[traiti]) for traiti = 1:Mi.ntraits] #marker effects Mi.meanDelta = [zero(Mi.δ[traiti]) for traiti = 1:Mi.ntraits] #inclusion indicator for marker effects - Mi.meanVara = zero(mme.R) #posterir mean of variance for marker effect - Mi.meanVara2 = zero(mme.R) #variable to save variance for marker effect - Mi.meanScaleVara = zero(mme.R) #variable to save Scale parameter for prior of marker effect variance - Mi.meanScaleVara2 = zero(mme.R) #variable to save Scale parameter for prior of marker effect variance + Mi.meanVara = zero(mme.R.val) #posterir mean of variance for marker effect + Mi.meanVara2 = zero(mme.R.val) #variable to save variance for marker effect + Mi.meanScaleVara = zero(mme.R.val) #variable to save Scale parameter for prior of marker effect variance + Mi.meanScaleVara2 = zero(mme.R.val) #variable to save Scale parameter for prior of marker effect variance if is_multi_trait - if is_mega_trait + # if is_mega_trait + if Mi.G.constraint==true Mi.π = zeros(Mi.ntraits) Mi.mean_pi = zeros(Mi.ntraits) Mi.mean_pi2 = zeros(Mi.ntraits) @@ -151,6 +150,9 @@ function MCMC_BayesianAlphabet(mme,df) if nonlinear_function != false mme.weights_NN = vcat(mean(mme.ySparse),zeros(mme.nModels)) end + if mme.pedTrmVec!=0 + polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + end @showprogress "running MCMC ..." for iter=1:chain_length ######################################################################## # 0. Categorical and censored traits @@ -184,7 +186,7 @@ function MCMC_BayesianAlphabet(mme,df) if is_multi_trait Gibbs(mme.mmeLhs,mme.sol,mme.mmeRhs) else - Gibbs(mme.mmeLhs,mme.sol,mme.mmeRhs,mme.R) + Gibbs(mme.mmeLhs,mme.sol,mme.mmeRhs,mme.R.val) end ycorr[:] = ycorr - mme.X*mme.sol @@ -198,53 +200,54 @@ function MCMC_BayesianAlphabet(mme,df) # Marker Effects ######################################################################## if Mi.method in ["BayesC","BayesB","BayesA"] - locus_effect_variances = (Mi.method == "BayesC" ? fill(Mi.G,Mi.nMarkers) : Mi.G) + locus_effect_variances = (Mi.method == "BayesC" ? fill(Mi.G.val,Mi.nMarkers) : Mi.G.val) if is_multi_trait && !is_nnbayes_partial - if is_mega_trait - megaBayesABC!(Mi,wArray,mme.R,locus_effect_variances) + if Mi.G.constraint==true + megaBayesABC!(Mi,wArray,mme.R.val,locus_effect_variances) else - MTBayesABC!(Mi,wArray,mme.R,locus_effect_variances,mme.nModels) + MTBayesABC!(Mi,wArray,mme.R.val,locus_effect_variances,mme.nModels) end elseif is_nnbayes_partial - BayesABC!(Mi,wArray[i],mme.R[i,i],locus_effect_variances) #this can be parallelized (conflict with others) + BayesABC!(Mi,wArray[i],mme.R.val[i,i],locus_effect_variances) #this can be parallelized (conflict with others) else - BayesABC!(Mi,ycorr,mme.R,locus_effect_variances) + BayesABC!(Mi,ycorr,mme.R.val,locus_effect_variances) end elseif Mi.method =="RR-BLUP" if is_multi_trait && !is_nnbayes_partial - if is_mega_trait - megaBayesC0!(Mi,wArray,mme.R) + if Mi.G.constraint==true + megaBayesC0!(Mi,wArray,mme.R.val) else - MTBayesC0!(Mi,wArray,mme.R) + MTBayesC0!(Mi,wArray,mme.R.val) end elseif is_nnbayes_partial - BayesC0!(Mi,wArray[i],mme.R[i,i]) + BayesC0!(Mi,wArray[i],mme.R.val[i,i]) else - BayesC0!(Mi,ycorr,mme.R) + BayesC0!(Mi,ycorr,mme.R.val) end elseif Mi.method == "BayesL" if is_multi_trait && !is_nnbayes_partial - if is_mega_trait #problem with sampleGammaArray - megaBayesL!(Mi,wArray,mme.R) + #problem with sampleGammaArray + if Mi.G.constraint==true + megaBayesL!(Mi,wArray,mme.R.val) else - MTBayesL!(Mi,wArray,mme.R) + MTBayesL!(Mi,wArray,mme.R.val) end elseif is_nnbayes_partial - BayesC0!(Mi,wArray[i],mme.R[i,i]) + BayesC0!(Mi,wArray[i],mme.R.val[i,i]) else - BayesL!(Mi,ycorr,mme.R) + BayesL!(Mi,ycorr,mme.R.val) end elseif Mi.method == "GBLUP" if is_multi_trait && !is_nnbayes_partial - if is_mega_trait - megaGBLUP!(Mi,wArray,mme.R,invweights) + if Mi.G.constraint==true + megaGBLUP!(Mi,wArray,mme.R.val,invweights) else - MTGBLUP!(Mi,wArray,ycorr,mme.R,invweights) + MTGBLUP!(Mi,wArray,ycorr,mme.R.val,invweights) end elseif is_nnbayes_partial - GBLUP!(Mi,wArray[i],mme.R[i,i],invweights) + GBLUP!(Mi,wArray[i],mme.R.val[i,i],invweights) else - GBLUP!(Mi,ycorr,mme.R,invweights) + GBLUP!(Mi,ycorr,mme.R.val,invweights) end end ######################################################################## @@ -252,7 +255,7 @@ function MCMC_BayesianAlphabet(mme,df) ######################################################################## if Mi.estimatePi == true if is_multi_trait && !is_nnbayes_partial - if is_mega_trait + if Mi.G.constraint==true Mi.π = [samplePi(sum(Mi.δ[i]), Mi.nMarkers) for i in 1:mme.nModels] else samplePi(Mi.δ,Mi.π) #samplePi(deltaArray,Mi.π,labels) @@ -264,20 +267,20 @@ function MCMC_BayesianAlphabet(mme,df) ######################################################################## # Variance of Marker Effects ######################################################################## - if Mi.estimateVariance == true #methd specific estimate_variance - sample_marker_effect_variance(Mi,constraint) + if Mi.estimate_variance == true #methd specific estimate_variance + sample_marker_effect_variance(Mi) if mme.MCMCinfo.double_precision == false && Mi.method != "BayesB" - Mi.G = Float32.(Mi.G) + Mi.G.val = Float32.(Mi.G.val) end end ######################################################################## # Scale Parameter in Priors for Marker Effect Variances ######################################################################## - if Mi.estimateScale == true + if Mi.estimate_scale == true if !is_multi_trait - a = size(Mi.G,1)*Mi.df/2 + 1 - b = sum(Mi.df ./ (2*Mi.G)) + 1 - Mi.scale = rand(Gamma(a,1/b)) + a = size(Mi.G.val,1)*Mi.G.df/2 + 1 + b = sum(Mi.G.df ./ (2*Mi.G.val)) + 1 + Mi.G.scale = rand(Gamma(a,1/b)) end end end @@ -295,26 +298,26 @@ function MCMC_BayesianAlphabet(mme,df) # 3.2 Residual Variance ######################################################################## if is_multi_trait - mme.R = sample_variance(wArray, length(mme.obsID), - mme.df.residual, mme.scaleR, - invweights,constraint; + mme.R.val = sample_variance(wArray, length(mme.obsID), + mme.R.df, mme.R.scale, + invweights,mme.R.constraint; binary_trait_index=has_binary_trait ? findall(x->x=="categorical(binary)", mme.traits_type) : false) - Ri = kron(inv(mme.R),spdiagm(0=>invweights)) + Ri = kron(inv(mme.R.val),spdiagm(0=>invweights)) else #single trait - if !has_categorical_trait && !has_binary_trait # fixed mme.R=1 for single categorical/binary trait - mme.ROld = mme.R - mme.R = sample_variance(ycorr,length(ycorr), mme.df.residual, mme.scaleR, invweights) + if !has_categorical_trait && !has_binary_trait # fixed =1 for single categorical/binary trait + mme.ROld = mme.R.val + mme.R.val = sample_variance(ycorr,length(ycorr), mme.R.df, mme.R.scale, invweights) end end if mme.MCMCinfo.double_precision == false - mme.R = Float32.(mme.R) + mme.R.val = Float32.(mme.R.val) end end ######################################################################## # 4. Causal Relationships among Phenotypes (Structure Equation Model) ######################################################################## if is_multi_trait && causal_structure != false - sample4λ,sample4λ_vec = get_Λ(Y,mme.R,ycorr,Λy,mme.ySparse,causal_structure) #no missing phenotypes + sample4λ,sample4λ_vec = get_Λ(Y,mme.R.val,ycorr,Λy,mme.ySparse,causal_structure) #no missing phenotypes end ######################################################################## # 5. Latent Traits (NNBayes) @@ -328,15 +331,16 @@ function MCMC_BayesianAlphabet(mme,df) if update_priors_frequency !=0 && iter%update_priors_frequency==0 if mme.M!=0 && methods != "BayesB" if is_multi_trait - mme.M[1].scale = mme.M[1].meanVara*(mme.df.marker-mme.M[1].ntraits-1) + mme.M[1].scale = mme.M[1].meanVara*(mme.M[1].G.df-mme.M[1].ntraits-1) else - mme.M[1].scale = mme.M[1].meanVara*(mme.df.marker-2)/mme.df.marker + mme.M[1].scale = mme.M[1].meanVara*(mme.M[1].G.df-2)/mme.M[1].G.df end end if mme.pedTrmVec != 0 - mme.scalePed = mme.G0Mean*(mme.df.polygenic - size(mme.pedTrmVec,1) - 1) + polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + mme.scalePed = mme.G0Mean*(mme.rndTrmVec[polygenic_pos].Gi.df - size(mme.pedTrmVec,1) - 1) end - mme.scaleR = mme.meanVare*(mme.df.residual-2)/mme.df.residual + mme.R.scale = mme.meanVare*(mme.R.df-2)/mme.R.df println("\n Update priors from posteriors.") end ######################################################################## @@ -347,7 +351,12 @@ function MCMC_BayesianAlphabet(mme,df) nsamples = (iter-burnin)/output_samples_frequency output_posterior_mean_variance(mme,nsamples) #mean and variance of posterior distribution - output_MCMC_samples(mme,mme.R,(mme.pedTrmVec!=0 ? inv(mme.Gi) : false),outfile) + if mme.pedTrmVec!=0 + polygenic_effects_variance = inv(mme.rndTrmVec[polygenic_pos].Gi.val) + else + polygenic_effects_variance=false + end + output_MCMC_samples(mme,mme.R.val,polygenic_effects_variance,outfile) if causal_structure != false writedlm(causal_structure_outfile,sample4λ_vec',',') end diff --git a/src/1.JWAS/src/MCMC/deprecated.jl b/src/1.JWAS/src/MCMC/deprecated.jl index 8db9ae28..1dd3ed19 100644 --- a/src/1.JWAS/src/MCMC/deprecated.jl +++ b/src/1.JWAS/src/MCMC/deprecated.jl @@ -1,7 +1,7 @@ M = mme.M[1].genotypes X = M XPX = X'X -lhs = XPX+I*mme.R[1,1]/mme.M[1].G[1,1] +lhs = XPX+I*mme.R.val[1,1]/mme.M[1].G[1,1] Ch = cholesky(lhs) iL = inv(Ch.L) iLhs = inv(Ch) diff --git a/src/1.JWAS/src/Nonlinear/nnbayes_check.jl b/src/1.JWAS/src/Nonlinear/nnbayes_check.jl index e5d3c003..58b4789b 100644 --- a/src/1.JWAS/src/Nonlinear/nnbayes_check.jl +++ b/src/1.JWAS/src/Nonlinear/nnbayes_check.jl @@ -135,7 +135,6 @@ end # below function is to modify mme from multi-trait model to multiple single trait models # coded by Hao function nnbayes_mega_trait(mme) - #mega_trait if mme.nModels == 1 error("more than 1 trait is required for MegaLMM analysis.") @@ -143,12 +142,12 @@ function nnbayes_mega_trait(mme) mme.MCMCinfo.constraint = true ##sample from scale-inv-⁠χ2, not InverseWishart - mme.df.residual = mme.df.residual - mme.nModels - mme.scaleR = diag(mme.scaleR/(mme.df.residual - 1))*(mme.df.residual-2)/mme.df.residual #diag(R_prior_mean)*(ν-2)/ν + mme.R.df = mme.R.df - mme.nModels + mme.R.scale = diag(mme.R.scale/(mme.R.df - 1))*(mme.R.df-2)/mme.R.df #diag(R_prior_mean)*(ν-2)/ν if mme.M != 0 for Mi in mme.M - Mi.df = Mi.df - mme.nModels - Mi.scale = diag(Mi.scale/(Mi.df - 1))*(Mi.df-2)/Mi.df + Mi.G.df = Mi.G.df - mme.nModels + Mi.G.scale = diag(Mi.G.scale/(Mi.G.df - 1))*(Mi.G.df-2)/Mi.G.df end end @@ -157,9 +156,9 @@ end # below function is to modify essential parameters for partial connected NN function nnbayes_partial_para_modify2(mme) for Mi in mme.M - Mi.scale = Mi.scale[1] - Mi.G = Mi.G[1,1] - Mi.genetic_variance=Mi.genetic_variance[1,1] + Mi.G.scale = Mi.G.scale[1] + Mi.G.val = Mi.G.val[1,1] + Mi.genetic_variance.val=Mi.genetic_variance.val[1,1] end end @@ -206,3 +205,14 @@ function nnlmm_initialize_missing(mme,df) full_omics = n_observed_omics .== n_omics #indicator for ind with full omics mme.incomplete_omics = vec(.!full_omics) #indicator for ind with no/partial omics end + + +function nnmm_print_info_input_to_middle_layer(mme) + is_mega_trait = mme.R.constraint==true && mme.M[1].G.constraint==true #is_mega_trait when no residual and marker effect covariances + if is_mega_trait + printstyled(" - Bayesian Alphabet: multiple independent single-trait Bayesian models are used to sample marker effect. \n",bold=false,color=:green) + printstyled(" - Multi-threaded parallelism: $nThread threads are used to run single-trait models in parallel. \n",bold=false,color=:green) + else + printstyled(" - Bayesian Alphabet: multi-trait Bayesian models are used to sample marker effect. \n",bold=false,color=:green) + end +end \ No newline at end of file diff --git a/src/1.JWAS/src/Nonlinear/nonlinear.jl b/src/1.JWAS/src/Nonlinear/nonlinear.jl index 50420c1b..f77ede4b 100644 --- a/src/1.JWAS/src/Nonlinear/nonlinear.jl +++ b/src/1.JWAS/src/Nonlinear/nonlinear.jl @@ -28,7 +28,7 @@ function sample_latent_traits(yobs,mme,ycorr,nonlinear_function) if mme.is_activation_fcn == true #Neural Network with activation function if sum(incomplete_omics) != 0 #at least 1 ind with incomplete omics #step 1. sample latent trait (only for individuals with incomplete omics data - ylats_new = hmc_one_iteration(10,0.1,ylats_old[incomplete_omics,:],yobs[incomplete_omics],mme.weights_NN,mme.R,σ2_yobs,ycorr2[incomplete_omics,:],nonlinear_function) + ylats_new = hmc_one_iteration(10,0.1,ylats_old[incomplete_omics,:],yobs[incomplete_omics],mme.weights_NN,mme.R.val,σ2_yobs,ycorr2[incomplete_omics,:],nonlinear_function) #step 2. update ylats with sampled latent traits ylats_old[incomplete_omics,:] = ylats_new #step 3. for individuals with partial omics data, put back the partial real omics. diff --git a/src/1.JWAS/src/RRM/MCMC_BayesianAlphabet_RRM.jl b/src/1.JWAS/src/RRM/MCMC_BayesianAlphabet_RRM.jl index cbf5b662..02784678 100644 --- a/src/1.JWAS/src/RRM/MCMC_BayesianAlphabet_RRM.jl +++ b/src/1.JWAS/src/RRM/MCMC_BayesianAlphabet_RRM.jl @@ -25,10 +25,10 @@ function MCMC_BayesianAlphabet_RRM(mme,df; Mi.genotypes = Z*Mi.genotypes Mi.obsID = unique(df[!,1]) Mi.nObs = length(Mi.obsID) - Mi.meanVara = zero(Mi.G) #variable to save variance for marker effect - Mi.meanVara2 = zero(Mi.G) - Mi.meanScaleVara = zero(Mi.G) #variable to save Scale parameter for prior of marker effect variance - Mi.meanScaleVara2 = zero(Mi.G) + Mi.meanVara = zero(Mi.G.val) #variable to save variance for marker effect + Mi.meanVara2 = zero(Mi.G.val) + Mi.meanScaleVara = zero(Mi.G.val) #variable to save Scale parameter for prior of marker effect variance + Mi.meanScaleVara2 = zero(Mi.G.val) end end @@ -101,13 +101,18 @@ function MCMC_BayesianAlphabet_RRM(mme,df; ############################################################################ # MCMC (starting values for sol (zeros); mme.RNew; G0 are used) ############################################################################ + + if mme.pedTrmVec!=0 + polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + end + @showprogress "running MCMC for" for iter=1:nIter ######################################################################## # 1.1. Non-Marker Location Parameters ######################################################################## ycorr[:] = ycorr + mme.X*mme.sol mme.mmeRhs = mme.X'ycorr - Gibbs(mme.mmeLhs,mme.sol,mme.mmeRhs,mme.R) + Gibbs(mme.mmeLhs,mme.sol,mme.mmeRhs,mme.R.val) ycorr[:] = ycorr - mme.X*mme.sol @@ -120,12 +125,12 @@ function MCMC_BayesianAlphabet_RRM(mme,df; if mme.M != 0 for Mi in mme.M if Mi.method in ["BayesC","BayesB","BayesA"] - locus_effect_variances = (Mi.method == "BayesC" ? fill(Mi.G,Mi.nMarkers) : Mi.G) + locus_effect_variances = (Mi.method == "BayesC" ? fill(Mi.G.val,Mi.nMarkers) : Mi.G.val) BayesABCRRM!(Mi.mArray,Mi.mpRinvm,wArray,yfull, Mi.β, Mi.δ, Mi.α, - mme.R,locus_effect_variances,Mi.π, + mme.R.val,locus_effect_variances,Mi.π, Φ, whichzeros, Mi.mΦΦArray) end end @@ -157,8 +162,8 @@ function MCMC_BayesianAlphabet_RRM(mme,df; ######################################################################## # 2.3 Residual Variance ######################################################################## - mme.ROld = mme.R - mme.R = sample_variance(ycorr, length(ycorr), mme.df.residual, mme.scaleR) + mme.ROld = mme.R.val + mme.R.val = sample_variance(ycorr, length(ycorr), mme.R.df, mme.R.scale) ######################################################################## # 2.4 Marker Effects Variance @@ -167,7 +172,7 @@ function MCMC_BayesianAlphabet_RRM(mme,df; for Mi in mme.M if Mi.method in ["RR-BLUP","BayesC","GBLUP"] data = (Mi.method == "BayesC" ? Mi.β : Mi.α) - Mi.G =sample_variance(data, Mi.nMarkers, Mi.df, Mi.scale, false, false) + Mi.G.val =sample_variance(data, Mi.nMarkers, Mi.G.df, Mi.G.scale, false, false) end end end @@ -176,10 +181,10 @@ function MCMC_BayesianAlphabet_RRM(mme,df; ######################################################################## if mme.M != 0 for Mi in mme.M - if Mi.estimateScale == true - a = size(Mi.G,1)*Mi.df/2 + 1 - b = sum(Mi.df ./ (2*Mi.G )) + 1 - Mi.scale = rand(Gamma(a,1/b)) + if Mi.estimate_scale == true + a = size(Mi.G.val,1)*Mi.G.df/2 + 1 + b = sum(Mi.G.df ./ (2*Mi.G.val )) + 1 + Mi.G.scale = rand(Gamma(a,1/b)) end end end @@ -187,12 +192,18 @@ function MCMC_BayesianAlphabet_RRM(mme,df; # 3.1 Save MCMC samples ######################################################################## if iter>burnin && (iter-burnin)%output_samples_frequency == 0 - output_MCMC_samples(mme,mme.R,(mme.pedTrmVec!=0 ? inv(mme.Gi) : false),outfile) + if mme.pedTrmVec!=0 + # polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + polygenic_effects_variance = inv(mme.rndTrmVec[polygenic_pos].Gi.val) + else + polygenic_effects_variance=false + end + output_MCMC_samples(mme,mme.R.val,polygenic_effects_variance,outfile) nsamples = (iter-burnin)/output_samples_frequency mme.solMean += (mme.sol - mme.solMean)/nsamples mme.solMean2 += (mme.sol .^2 - mme.solMean2)/nsamples - mme.meanVare += (mme.R - mme.meanVare)/nsamples - mme.meanVare2 += (mme.R .^2 - mme.meanVare2)/nsamples + mme.meanVare += (mme.R.val- mme.meanVare)/nsamples + mme.meanVare2 += (mme.R.val.^2 - mme.meanVare2)/nsamples if mme.pedTrmVec != 0 mme.G0Mean += (inv(mme.Gi) - mme.G0Mean )/nsamples @@ -212,13 +223,13 @@ function MCMC_BayesianAlphabet_RRM(mme,df; end end if Mi.method != "BayesB" - Mi.meanVara += (Mi.G - Mi.meanVara)/nsamples - Mi.meanVara2 += (Mi.G.^2 - Mi.meanVara2)/nsamples + Mi.meanVara += (Mi.G.val - Mi.meanVara)/nsamples + Mi.meanVara2 += (Mi.G.val.^2 - Mi.meanVara2)/nsamples end - if Mi.estimateScale == true - Mi.meanScaleVara += (Mi.scale - Mi.meanScaleVara)/nsamples - Mi.meanScaleVara2 += (Mi.scale .^2 - Mi.meanScaleVara2)/nsamples + if Mi.estimate_scale == true + Mi.meanScaleVara += (Mi.G.scale - Mi.meanScaleVara)/nsamples + Mi.meanScaleVara2 += (Mi.G.scale .^2 - Mi.meanScaleVara2)/nsamples end end end diff --git a/src/1.JWAS/src/build_MME.jl b/src/1.JWAS/src/build_MME.jl index 246d4b89..a6917a20 100644 --- a/src/1.JWAS/src/build_MME.jl +++ b/src/1.JWAS/src/build_MME.jl @@ -38,9 +38,15 @@ R = [6.72 24.84 models = build_model(model_equations,R); ``` """ -function build_model(model_equations::AbstractString, R = false; df = 4.0, +function build_model(model_equations::AbstractString, + ## residual variance: + R = false; df = 4.0, + estimate_variance=true, estimate_scale=false, + constraint=false, #for multi-trait only, constraint=true means no residual covariance among traits + ## nnmm: num_hidden_nodes = false, nonlinear_function = false, latent_traits=false, #nonlinear_function(x1,x2) = x1+x2 user_σ2_yobs = false, user_σ2_weightsNN = false, + ## censored, categorical traits: censored_trait = false, categorical_trait = false) if R != false && !isposdef(map(AbstractFloat,R)) @@ -107,10 +113,10 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, genotypei.ntraits = is_nnbayes_partial ? 1 : nModels genotypei.trait_names = is_nnbayes_partial ? trait_names : string.(lhsVec) if nModels != 1 - genotypei.df = genotypei.df + nModels + genotypei.G.df = genotypei.G.df + nModels end - if !is_nnbayes_partial && (genotypei.G != false || genotypei.genetic_variance != false) - if size(genotypei.G,1) != nModels && size(genotypei.genetic_variance,1) != nModels + if !is_nnbayes_partial && (genotypei.G.val != false || genotypei.genetic_variance.val != false) + if size(genotypei.G.val,1) != nModels && size(genotypei.genetic_variance.val,1) != nModels error("The genomic covariance matrix is not a ",nModels," by ",nModels," matrix.") end end @@ -121,11 +127,27 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, end #create mme with genotypes - filter!(x->x.random_type != "genotypes",modelTerms) - filter!(x->x[2].random_type != "genotypes",dict) - mme = MME(nModels,modelVec,modelTerms,dict,lhsVec,R == false ? R : Float32.(R),Float32(df)) + filter!(x->x.random_type != "genotypes",modelTerms) #remove "genotypes" from modelTerms + filter!(x->x[2].random_type != "genotypes",dict) #remove "genotypes" from dict + + + #set scale and df for residual variance + if nModels == 1 + scale_R = R*(df - 2)/df + df_R = df + else + scale_R = R*(df - 1) + df_R = df + nModels + end + + #initialize mme + mme = MME(nModels,modelVec,modelTerms,dict,lhsVec, + Variance(R==false ? R : Float32.(R), #val + Float32(df_R), #df + R==false ? R : scale_R, #scale + estimate_variance, estimate_scale, constraint)) if length(genotypes) != 0 - mme.M = genotypes + mme.M = genotypes #add genotypes into mme end #NNBayes: @@ -343,7 +365,8 @@ function getMME(mme::MME, df::DataFrame) #such that no imputation of missing phenotypes is required. #mixed model equations is obtained below for multi-trait PBLUP #with known residual covariance matrix and missing phenotypes. - if mme.MCMCinfo.mega_trait == true #multiple single trait + # if mme.MCMCinfo.mega_trait == true #multiple single trait + if mme.R.constraint == true #tj: Hao, please confirm! now we do not have mege_trait option. We only have constraint option for variances Ri = Diagonal(repeat(mme.invweights,mme.nModels)) else #multi-trait Ri = mkRi(mme,df,mme.invweights) @@ -354,13 +377,13 @@ function getMME(mme::MME, df::DataFrame) #Random effects parts in MME if mme.nModels == 1 - #random_term.GiNew*mme.R - random_term.GiOld*mme.ROld + #random_term.GiNew*mme.R.val - random_term.GiOld*mme.ROld for random_term in mme.rndTrmVec #trick - random_term.GiOld = zero(random_term.GiOld) + random_term.GiOld.val = zero(random_term.GiOld.val) end addVinv(mme) for random_term in mme.rndTrmVec #trick - random_term.GiOld = copy(random_term.GiNew) + random_term.GiOld.val = copy(random_term.GiNew.val) end else addVinv(mme) diff --git a/src/1.JWAS/src/categorical_and_censored_trait/categorical_and_censored_trait.jl b/src/1.JWAS/src/categorical_and_censored_trait/categorical_and_censored_trait.jl index c155ab95..c242e3c8 100644 --- a/src/1.JWAS/src/categorical_and_censored_trait/categorical_and_censored_trait.jl +++ b/src/1.JWAS/src/categorical_and_censored_trait/categorical_and_censored_trait.jl @@ -33,7 +33,7 @@ function categorical_censored_traits_setup!(mme,df) ############################################################################ nInd = length(mme.obsID) nTrait = mme.nModels - R = mme.R + R = mme.R.val starting_value = mme.sol cmean = mme.X*starting_value[1:size(mme.mmeLhs,1)] #maker effects defaulting to all zeros @@ -81,7 +81,7 @@ function categorical_censored_traits_setup!(mme,df) ################################################################################## for i in 1:nInd if lower_bound[t][i] != upper_bound[t][i] - ySparse[i,t] = rand(truncated(Normal(cmean[i,t], sqrt(R[t,t])), lower_bound[t][i], upper_bound[t][i])) #mme.R has been fixed to 1.0 for category single-trait analysis + ySparse[i,t] = rand(truncated(Normal(cmean[i,t], sqrt(R[t,t])), lower_bound[t][i], upper_bound[t][i])) #mme.R.val has been fixed to 1.0 for category single-trait analysis else ySparse[i,t] = lower_bound[t][i] #mme.ySparse will also change since reshape is a reference, not copy end @@ -176,7 +176,7 @@ function sample_liabilities!(mme,ycorr,lower_bound,upper_bound) nInd = length(mme.obsID) nTrait = mme.nModels is_multi_trait = nTrait>1 - R = mme.R + R = mme.R.val cmean = reshape(cmean, nInd,nTrait) ySparse = reshape(mme.ySparse,nInd,nTrait) #mme.ySparse will also be updated since reshape is a reference, not copy ycorr_ref = reshape(ycorr, nInd,nTrait) #ycorr will be updated since reshape is a reference, not copy diff --git a/src/1.JWAS/src/input_data_validation.jl b/src/1.JWAS/src/input_data_validation.jl index 938af8d9..1172c243 100644 --- a/src/1.JWAS/src/input_data_validation.jl +++ b/src/1.JWAS/src/input_data_validation.jl @@ -35,7 +35,7 @@ function errors_args(mme) println("BayesA is equivalent to BayesB with known π=0. BayesB with known π=0 runs.") end if Mi.method == "GBLUP" - if Mi.genetic_variance == false && Mi.G != false + if Mi.genetic_variance.val == false && Mi.G.val != false error("Please provide values for the genetic variance for GBLUP analysis") end if mme.MCMCinfo.single_step_analysis == true @@ -241,47 +241,47 @@ function set_default_priors_for_variance_components(mme,df) #genetic variance or marker effect variance if mme.M!=0 for Mi in mme.M - if Mi.G == false && Mi.genetic_variance == false + if Mi.G.val == false && Mi.genetic_variance.val == false printstyled("Prior information for genomic variance is not provided and is generated from the data.\n",bold=false,color=:green) if mme.nModels==1 && mme.MCMCinfo.RRM == false - Mi.genetic_variance = varg[1,1] + Mi.genetic_variance.val = varg[1,1] elseif mme.nModels==1 && mme.MCMCinfo.RRM != false - Mi.genetic_variance = diagm(0=>fill(varg[1,1],size(mme.MCMCinfo.RRM,2))) + Mi.genetic_variance.val = diagm(0=>fill(varg[1,1],size(mme.MCMCinfo.RRM,2))) elseif mme.nModels>1 - Mi.genetic_variance = varg - end #mme.M.G and its scale parameter will be reset in function set_marker_hyperparameters_variances_and_pi + Mi.genetic_variance.val = varg + end #mme.M.G.val and its scale parameter will be reset in function set_marker_hyperparameters_variances_and_pi end end end #residual effects - if mme.nModels==1 && isposdef(mme.R) == false #single-trait + if mme.nModels==1 && isposdef(mme.R.val) == false #single-trait printstyled("Prior information for residual variance is not provided and is generated from the data.\n",bold=false,color=:green) is_categorical_or_binary_trait = mme.traits_type==["categorical"] || mme.traits_type==["categorical(binary)"] - mme.R = mme.ROld = is_categorical_or_binary_trait ? 1.0 : vare[1,1] #residual variance known to be 1.0 in single trait categorical analysis - mme.scaleR = mme.R*(mme.df.residual-2)/mme.df.residual - elseif mme.nModels>1 && isposdef(mme.R) == false #multi-trait + mme.R.val = mme.ROld = is_categorical_or_binary_trait ? 1.0 : vare[1,1] #residual variance known to be 1.0 in single trait categorical analysis + mme.R.scale = mme.R.val*(mme.R.df-2)/mme.R.df + elseif mme.nModels>1 && isposdef(mme.R.val) == false #multi-trait printstyled("Prior information for residual variance is not provided and is generated from the data.\n",bold=false,color=:green) - mme.R = mme.ROld = vare - mme.scaleR = mme.R*(mme.df.residual - mme.nModels - 1) + mme.R.val = mme.ROld = vare + mme.R.scale = mme.R.val*(mme.R.df - mme.nModels - 1) end #random effects if length(mme.rndTrmVec) != 0 for randomEffect in mme.rndTrmVec - if isposdef(randomEffect.Gi) == false + if isposdef(randomEffect.Gi.val) == false printstyled("Prior information for random effect variance is not provided and is generated from the data.\n",bold=false,color=:green) - myvarout = [split(i,":")[1] for i in randomEffect.term_array] - myvarin = string.(mme.lhsVec) - Zdesign = mkmat_incidence_factor(myvarout,myvarin) + myvarout = [split(i,":")[1] for i in randomEffect.term_array] #e.g., ["y1:x2","y2:x2"] -> ["y1","y2"] + myvarin = string.(mme.lhsVec) #e.g., ["y1","y2","y3"] + Zdesign = mkmat_incidence_factor(myvarout,myvarin) #design matrix (sparse) if randomEffect.randomType == "A" G = diagm(Zdesign*diag(varg)) else G = diagm(Zdesign*diag(vare)) end - randomEffect.Gi = randomEffect.GiOld = randomEffect.GiNew = Symmetric(inv(G)) - randomEffect.scale = G*(randomEffect.df-length(randomEffect.term_array)-1) + randomEffect.Gi.val = randomEffect.GiOld.val = randomEffect.GiNew.val = Symmetric(inv(G)) + randomEffect.Gi.scale = randomEffect.GiOld.scale = randomEffect.GiNew.scale = G*(randomEffect.Gi.df-length(randomEffect.term_array)-1) if randomEffect.randomType == "A" - mme.Gi = randomEffect.Gi - mme.scalePed = randomEffect.scale + mme.Gi = randomEffect.Gi.val + mme.scalePed = randomEffect.Gi.scale end end end @@ -447,3 +447,38 @@ function init_mixed_model_equations(mme,df,starting_value) end end end + + +#multi-trait constraint on R: modify df and scale for residual variance +function R_constraint!(mme) + #print + printstyled("Constraint on residual variance-covariance matrix (i.e., zero covariance)\n",bold=false,color=:green) + #check + if mme.nModels == 1 && mme.R.constraint==true + error("constraint==true is for multi-trait only") + end + #modify df and scale + mme.R.df = mme.R.df - mme.nModels + mme.R.scale = Diagonal(mme.R.scale/(mme.R.df - 1))*(mme.R.df-2)/mme.R.df #Diagonal(R_prior_mean)*(ν-2)/ν, a diagonal matrix +end + +#multi-trait constraint on G: modify df and scale for marker effect variance +function G_constraint!(mme) + if length(mme.M) > 0 + for Mi in mme.M + #print + geno_name = Mi.name + printstyled("Constraint on marker effect variance-covariance matrix (i.e., zero covariance) for $geno_name \n",bold=false,color=:green) + #check + if mme.nModels == 1 && Mi.G.constraint==true + error("constraint==true is for multi-trait only") + end + #modify df and scale + Mi.G.df = Mi.G.df - mme.nModels + Mi.G.scale = Diagonal(Mi.G.scale/(Mi.G.df - 1))*(Mi.G.df-2)/Mi.G.df #a diagonal matrix + end + end +end + + +#TO-DO: multi-trait constraint on non-genetic random effects: modify df and scale \ No newline at end of file diff --git a/src/1.JWAS/src/iterative_solver/solver.jl b/src/1.JWAS/src/iterative_solver/solver.jl index 8c226fb8..6710878a 100644 --- a/src/1.JWAS/src/iterative_solver/solver.jl +++ b/src/1.JWAS/src/iterative_solver/solver.jl @@ -40,7 +40,7 @@ function solve(mme::MME, return [getNames(mme) Gibbs(A,x,b, maxiter,printout_frequency=printout_frequency)] elseif solver=="Gibbs" && mme.nModels==1 - return [getNames(mme) Gibbs(A,x,b,mme.R, + return [getNames(mme) Gibbs(A,x,b,mme.R.val, maxiter,printout_frequency=printout_frequency)] elseif solver=="default" println("To solve the equations, please choose a solver. (run ?solve for help)") diff --git a/src/1.JWAS/src/markers/BayesianAlphabet/GBLUP.jl b/src/1.JWAS/src/markers/BayesianAlphabet/GBLUP.jl index 6ba84ca7..f6460d3f 100644 --- a/src/1.JWAS/src/markers/BayesianAlphabet/GBLUP.jl +++ b/src/1.JWAS/src/markers/BayesianAlphabet/GBLUP.jl @@ -32,12 +32,12 @@ end function megaGBLUP!(Mi::Genotypes,wArray,vare,Rinv) Threads.@threads for i in 1:length(wArray) #ntraits - GBLUP!(Mi.genotypes,Mi.α[i],Mi.D,wArray[i],vare[i,i],Mi.G[i,i],Rinv,Mi.nObs) + GBLUP!(Mi.genotypes,Mi.α[i],Mi.D,wArray[i],vare[i,i],Mi.G.val[i,i],Rinv,Mi.nObs) end end function GBLUP!(Mi::Genotypes,ycorr,vare,Rinv) #single-trait - GBLUP!(Mi.genotypes,Mi.α[1],Mi.D,ycorr,vare,Mi.G[1,1],Rinv,Mi.nObs) + GBLUP!(Mi.genotypes,Mi.α[1],Mi.D,ycorr,vare,Mi.G.val[1,1],Rinv,Mi.nObs) end function GBLUP!(genotypes,α,D,ycorr,vare,vara,Rinv,nObs) @@ -51,7 +51,7 @@ end function MTGBLUP!(Mi::Genotypes,ycorr_array,ycorr,vare,Rinv) iR0 = inv(vare) - iGM = inv(Mi.G) + iGM = inv(Mi.G.val) for trait = 1:Mi.ntraits ycorr_array[trait][:] = ycorr_array[trait] + Mi.genotypes*Mi.α[trait] end diff --git a/src/1.JWAS/src/markers/BayesianAlphabet/MTBayesABC.jl b/src/1.JWAS/src/markers/BayesianAlphabet/MTBayesABC.jl index 1e4045a0..20de78f3 100644 --- a/src/1.JWAS/src/markers/BayesianAlphabet/MTBayesABC.jl +++ b/src/1.JWAS/src/markers/BayesianAlphabet/MTBayesABC.jl @@ -97,14 +97,14 @@ function MTBayesABC_samplerII!(xArray, betaArray, deltaArray, alphaArray, - vare, #mme.R, t-by-t + vare, #mme.R.val, t-by-t varEffects, # vector of length #SNPs, each element is a t-by-t covariance matrix BigPi) #genotypes.π nMarkers = length(xArray) ntraits = length(alphaArray) - Rinv = inv(vare) #inv(mme.R) + Rinv = inv(vare) #inv(mme.R.val) Ginv = inv.(varEffects) β = zeros(typeof(betaArray[1][1]),ntraits) diff --git a/src/1.JWAS/src/markers/readgenotypes.jl b/src/1.JWAS/src/markers/readgenotypes.jl index e4f402ee..28dc07da 100644 --- a/src/1.JWAS/src/markers/readgenotypes.jl +++ b/src/1.JWAS/src/markers/readgenotypes.jl @@ -31,16 +31,16 @@ function add_genotypes(mme::MME,M::Union{AbstractString,Array{Float64,2},Array{F genotypei.ntraits = mme.nModels genotypei.trait_names = string.(mme.lhsVec) if mme.nModels != 1 - genotypei.df = genotypei.df + mme.nModels + genotypei.G.df = genotypei.G.df + mme.nModels end genotypes = [] push!(genotypes,genotypei) mme.M = genotypes if mme.nModels == 1 #?move to set_marker_hyperparameters_variances_and_pi? - mme.df.marker = Float32(df) + mme.M[1].G.df = Float32(df) else νG0 = Float32(df) + mme.nModels - mme.df.marker = νG0 #final df for inverse wishart + mme.M[1].G.df = νG0 #final df for inverse wishart end end ################################################################################ @@ -54,7 +54,7 @@ end get_genotypes(file::Union{AbstractString,Array{Float64,2},Array{Float32,2},Array{Any,2},DataFrames.DataFrame},G=false; separator=',',header=true,rowID=false, center=true,quality_control=false, - method = "RR-BLUP",Pi = 0.0,estimatePi = true,estimateScale=false, + method = "RR-BLUP",Pi = 0.0,estimatePi = true,estimate_scale=false, G_is_marker_variance = false,df = 4.0) * Get marker informtion from a genotype file/matrix. This file needs to be column-wise sorted by marker positions. * If a text file is provided, the file format should be: @@ -79,15 +79,22 @@ end a dictionary such as `Pi=Dict([1.0; 1.0]=>0.7,[1.0; 0.0]=>0.1,[0.0; 1.0]=>0.1,[0.0; 0.0]=>0.1)`, defaulting to `all markers have effects (Pi = 0.0)` in single-trait analysis and `all markers have effects on all traits (Pi=Dict([1.0; 1.0]=>1.0,[0.0; 0.0]=>0.0))` in multi-trait analysis. `Pi` is estimated if `estimatePi` = true, , defaulting to `false`. -* Scale parameter for prior of marker effect variance is estimated if `estimateScale` = true, defaulting to `false`. +* Scale parameter for prior of marker effect variance is estimated if `estimate_scale` = true, defaulting to `false`. """ function get_genotypes(file::Union{AbstractString,Array{Float64,2},Array{Float32,2},Array{Any,2},DataFrames.DataFrame},G=false; - method = "BayesC",Pi = 0.0,estimatePi = true, estimateVariance=true, estimateScale=false, + ## method: + method = "BayesC",Pi = 0.0,estimatePi = true, + ## variance: + G_is_marker_variance = false, df = 4.0, + estimate_variance=true, estimate_scale=false, + constraint = false, #for multi-trait only, constraint=true means no genetic covariance among traits + ## format: separator=',',header=true,rowID=false, - center=true,G_is_marker_variance = false,df = 4.0, - starting_value=false, - quality_control=true, MAF = 0.01, missing_value = 9.0) + ## quality control: + quality_control=true, MAF = 0.01, missing_value = 9.0, + ## others: + center=true,starting_value=false) #Read the genotype file if typeof(file) <: AbstractString printstyled("The delimiterd in ",split(file,['/','\\'])[end]," is \'",separator,"\'. ",bold=false,color=:green) @@ -204,17 +211,16 @@ function get_genotypes(file::Union{AbstractString,Array{Float64,2},Array{Float32 end genotypes = Genotypes(obsID,markerID,nObs,nMarkers,p,sum2pq,center,genotypes,isGRM) - if G_is_marker_variance == true - genotypes.G = G - else - genotypes.genetic_variance = G - end + + genotypes.G = Variance(G_is_marker_variance ? G : false, df,false,estimate_variance,estimate_scale,constraint) + genotypes.genetic_variance = Variance(G_is_marker_variance ? false : G, df,false,estimate_variance,estimate_scale,constraint) + genotypes.method = method genotypes.estimatePi = estimatePi genotypes.π = Pi - genotypes.df = df #It will be modified base on number of traits in build_model() - genotypes.estimateScale = estimateScale - genotypes.estimateVariance = estimateVariance + # genotypes.df = df #It will be modified base on number of traits in build_model() + # genotypes.estimate_scale = estimate_scale + # genotypes.estimate_variance = estimate_variance writedlm("IDs_for_individuals_with_genotypes.txt",genotypes.obsID) println("Genotype informatin:") diff --git a/src/1.JWAS/src/markers/tools4genotypes.jl b/src/1.JWAS/src/markers/tools4genotypes.jl index 88152f8e..0dc25437 100644 --- a/src/1.JWAS/src/markers/tools4genotypes.jl +++ b/src/1.JWAS/src/markers/tools4genotypes.jl @@ -153,44 +153,44 @@ function set_marker_hyperparameters_variances_and_pi(mme::MME) Mi.π = Pi end #(2) marker effect variances - if Mi.G == false + if Mi.G.val == false if Mi.method!="GBLUP" genetic2marker(Mi,Mi.π) println() if mme.nModels != 1 || mme.MCMCinfo.RRM != false - if !isposdef(Mi.G) #also work for scalar + if !isposdef(Mi.G.val) #also work for scalar error("Marker effects covariance matrix is not postive definite! Please modify the argument: Pi.") end println("The prior for marker effects covariance matrix is calculated from genetic covariance matrix and Π.") println("The mean of the prior for the marker effects covariance matrix is:") if mme.MCMCinfo.printout_model_info==true - Base.print_matrix(stdout,round.(Mi.G,digits=6)) + Base.print_matrix(stdout,round.(Mi.G.val,digits=6)) end else - if !isposdef(Mi.G) #positive scalar (>0) + if !isposdef(Mi.G.val) #positive scalar (>0) error("Marker effects variance is negative!") end println("The prior for marker effects variance is calculated from the genetic variance and π.") print("The mean of the prior for the marker effects variance is: ") - print(round.(Mi.G,digits=6)) + print(round.(Mi.G.val,digits=6)) end print("\n\n\n") elseif Mi.method == "GBLUP" - Mi.G = Mi.genetic_variance + Mi.G.val = Mi.genetic_variance.val end end #(3) scale parameter for marker effect variance if Mi.ntraits == 1 && mme.MCMCinfo.RRM == false - Mi.scale = Mi.G*(Mi.df-Mi.ntraits-1)/Mi.df + Mi.G.scale = Mi.G.val*(Mi.G.df-Mi.ntraits-1)/Mi.G.df else - Mi.scale = Mi.G*(Mi.df-Mi.ntraits-1) + Mi.G.scale = Mi.G.val*(Mi.G.df-Mi.ntraits-1) end end end end function genetic2marker(M::Genotypes,Pi::Dict) - ntraits = size(M.genetic_variance,1) + ntraits = size(M.genetic_variance.val,1) denom = zeros(ntraits,ntraits) for i in 1:ntraits for j in i:ntraits @@ -200,9 +200,9 @@ function genetic2marker(M::Genotypes,Pi::Dict) denom[j,i] = denom[i,j] end end - M.G = M.genetic_variance ./ denom + M.G.val = M.genetic_variance.val ./ denom end function genetic2marker(M::Genotypes,π::Float64) - M.G = M.genetic_variance/((1-π)*M.sum2pq) + M.G.val = M.genetic_variance.val/((1-π)*M.sum2pq) end diff --git a/src/1.JWAS/src/output.jl b/src/1.JWAS/src/output.jl index 978c28ad..f76a456f 100644 --- a/src/1.JWAS/src/output.jl +++ b/src/1.JWAS/src/output.jl @@ -145,7 +145,7 @@ function output_result(mme,output_folder, if Mi.estimatePi == true output["pi_"*Mi.name] = dict2dataframe(Mi.mean_pi,Mi.mean_pi2) end - if Mi.estimateScale == true + if Mi.estimate_scale == true output["ScaleEffectVar"*Mi.name] = matrix2dataframe(string.(mme.lhsVec),Mi.meanScaleVara,Mi.meanScaleVara2) end end @@ -373,7 +373,7 @@ function output_MCMC_samples_setup(mme,nIter,output_samples_frequency,file_name= #add headers mytraits=map(string,mme.lhsVec) - if mme.MCMCinfo.mega_trait == false + if mme.R.constraint == false varheader = repeat(mytraits,inner=length(mytraits)).*"_".*repeat(mytraits,outer=length(mytraits)) else varheader = transubstrarr(map(string,mme.lhsVec)) @@ -411,6 +411,8 @@ function output_MCMC_samples_setup(mme,nIter,output_samples_frequency,file_name= writedlm(outfile["EBV_"*string(mme.lhsVec[traiti])],transubstrarr(mme.output_ID),',') end if mme.MCMCinfo.output_heritability == true && mme.MCMCinfo.single_step_analysis == false + varheader = repeat(mytraits,inner=length(mytraits)).*"_".*repeat(mytraits,outer=length(mytraits)) + writedlm(outfile["genetic_variance"],transubstrarr(varheader),',') if mme.MCMCinfo.RRM == false writedlm(outfile["heritability"],transubstrarr(map(string,mme.lhsVec)),',') @@ -436,10 +438,10 @@ function output_MCMC_samples(mme,vRes,G0, #random effects variances for effect in mme.rndTrmVec trmStri = join(effect.term_array, "_") - writedlm(outfile[trmStri*"_variances"],vec(inv(effect.Gi))',',') + writedlm(outfile[trmStri*"_variances"],vec(inv(effect.Gi.val))',',') end - if mme.MCMCinfo.mega_trait != false + if mme.R.constraint == true vRes=diag(vRes) end writedlm(outfile["residual_variance"],(typeof(vRes) <: Number) ? vRes : vec(vRes)' ,',') @@ -456,14 +458,14 @@ function output_MCMC_samples(mme,vRes,G0, writedlm(outfile["marker_effects_"*Mi.name*"_"*geno_names[traiti]],Mi.α[traiti]',',') end - if Mi.G != false && mme.MCMCinfo.mega_trait == false #Do not save marker effect variances in mega-trait analysis + if Mi.G.val != false && mme.nonlinear_function == false #do not save marker effect variances in NNMM to save space if mme.nModels == 1 - writedlm(outfile["marker_effects_variances"*"_"*Mi.name],Mi.G',',') + writedlm(outfile["marker_effects_variances"*"_"*Mi.name],Mi.G.val',',') else if Mi.method == "BayesB" - writedlm(outfile["marker_effects_variances"*"_"*Mi.name],hcat([x for x in Mi.G]...),',') + writedlm(outfile["marker_effects_variances"*"_"*Mi.name],hcat([x for x in Mi.G.val]...),',') else - writedlm(outfile["marker_effects_variances"*"_"*Mi.name],Mi.G,',') + writedlm(outfile["marker_effects_variances"*"_"*Mi.name],Mi.G.val,',') end end end @@ -486,14 +488,15 @@ function output_MCMC_samples(mme,vRes,G0, if mme.MCMCinfo.output_heritability == true && mme.MCMCinfo.single_step_analysis == false #single-trait: a scalar ; multi-trait: a matrix; mega-trait: a vector - mygvar = cov(EBVmat) #this might be slow in megatrats - if mme.MCMCinfo.mega_trait != false - mygvar=diag(mygvar) + if mme.M[1].G.constraint==true + mygvar = Diagonal(vec(var(EBVmat,dims=1))) + else + mygvar = cov(EBVmat) end genetic_variance = (ntraits == 1 ? mygvar : vec(mygvar)') if mme.MCMCinfo.RRM == false - heritability = (ntraits == 1 ? mygvar/(mygvar+vRes) : - (typeof(mygvar)<:Vector ? (mygvar./(mygvar+vRes))' : (diag(mygvar)./(diag(mygvar)+diag(vRes)))')) + vRes = mme.R.constraint==true ? Diagonal(vRes) : vRes #change to diagonal matrix to avoid error + heritability = (ntraits == 1 ? mygvar/(mygvar+vRes) : (diag(mygvar)./(diag(mygvar)+diag(vRes)))') writedlm(outfile["heritability"],heritability,',') end writedlm(outfile["genetic_variance"],genetic_variance,',') @@ -554,12 +557,13 @@ end function output_posterior_mean_variance(mme,nsamples) mme.solMean += (mme.sol - mme.solMean)/nsamples mme.solMean2 += (mme.sol .^2 - mme.solMean2)/nsamples - mme.meanVare += (mme.R - mme.meanVare)/nsamples - mme.meanVare2 += (mme.R .^2 - mme.meanVare2)/nsamples + mme.meanVare += (mme.R.val - mme.meanVare)/nsamples + mme.meanVare2 += (mme.R.val .^2 - mme.meanVare2)/nsamples if mme.pedTrmVec != 0 - mme.G0Mean += (inv(mme.Gi) - mme.G0Mean )/nsamples - mme.G0Mean2 += (inv(mme.Gi) .^2 - mme.G0Mean2 )/nsamples + polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + mme.G0Mean += (inv(mme.rndTrmVec[polygenic_pos].Gi.val) - mme.G0Mean )/nsamples + mme.G0Mean2 += (inv(mme.rndTrmVec[polygenic_pos].Gi.val) .^2 - mme.G0Mean2 )/nsamples end if mme.M != 0 for Mi in mme.M @@ -569,7 +573,7 @@ function output_posterior_mean_variance(mme,nsamples) Mi.meanDelta[trait] += (Mi.δ[trait] - Mi.meanDelta[trait])/nsamples end if Mi.estimatePi == true - if Mi.ntraits == 1 || mme.MCMCinfo.mega_trait + if Mi.ntraits == 1 || mme.M[1].G.constraint==true #may need to change for multiple M Mi.mean_pi += (Mi.π-Mi.mean_pi)/nsamples Mi.mean_pi2 += (Mi.π .^2-Mi.mean_pi2)/nsamples else @@ -580,12 +584,12 @@ function output_posterior_mean_variance(mme,nsamples) end end if Mi.method != "BayesB" - Mi.meanVara += (Mi.G - Mi.meanVara)/nsamples - Mi.meanVara2 += (Mi.G .^2 - Mi.meanVara2)/nsamples + Mi.meanVara += (Mi.G.val - Mi.meanVara)/nsamples + Mi.meanVara2 += (Mi.G.val .^2 - Mi.meanVara2)/nsamples end - if Mi.estimateScale == true - Mi.meanScaleVara += (Mi.scale - Mi.meanScaleVara)/nsamples - Mi.meanScaleVara2 += (Mi.scale .^2 - Mi.meanScaleVara2)/nsamples + if Mi.estimate_scale == true + Mi.meanScaleVara += (Mi.G.scale - Mi.meanScaleVara)/nsamples + Mi.meanScaleVara2 += (Mi.G.scale .^2 - Mi.meanScaleVara2)/nsamples end end end diff --git a/src/1.JWAS/src/random_effects.jl b/src/1.JWAS/src/random_effects.jl index bb0b416c..129a264e 100644 --- a/src/1.JWAS/src/random_effects.jl +++ b/src/1.JWAS/src/random_effects.jl @@ -37,13 +37,20 @@ G = [6.72 2.84 set_random(model,"animal",ped,G) ``` """ -function set_random(mme::MME,randomStr::AbstractString,ped::PedModule.Pedigree,G=false;df=4.0) +function set_random(mme::MME,randomStr::AbstractString,ped::PedModule.Pedigree, + ## variance: + G=false; + df=4.0, + estimate_variance=true, estimate_scale=false, + constraint=false #for multi-trait only, constraint=true means no covariance for this random effects among traits + ) if mme.ped == 0 mme.ped = deepcopy(ped) else error("Pedigree information is already added. Polygenic effects can only be set for one time") end - set_random(mme,randomStr,G,Vinv=mme.ped,df=df) + set_random(mme,randomStr,G,Vinv=mme.ped,df=df, + estimate_variance=estimate_variance,estimate_scale=estimate_scale,constraint=constraint) end ############################################################################ #Set specific ModelTerm as random (Not specific for IID or A(pedigree)) @@ -83,7 +90,10 @@ G = 0.6 set_random(model,"litter",G,Vinv=inv(V),names=[a1;a2;a3]) ``` """ -function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],df=4.0) +function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],df=4.0, + estimate_variance=true,estimate_scale=false, + constraint=false #for multi-trait only, constraint=true means no covariance for this random effects among traits + ) ############################################################################ #Pre-Check ############################################################################ @@ -94,6 +104,11 @@ function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],d end names = string.(names) df = Float32(df) + + if constraint != false #check constraint + error("Constraint for set_random is not supported now.") + end + ############################################################################ #add trait names (model equation number) to variables; #e.g., "litter"=>"y1:litter";"ϵ"=>"y1:ϵ" @@ -149,8 +164,8 @@ function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],d mme.Gi = Gi ν, k = Float32(df), size(mme.pedTrmVec,1) νG0 = ν + k - mme.df.polygenic = νG0 #final df for this inverse wisahrt - mme.scalePed = G*(mme.df.polygenic - k - 1) + df_polygenic = νG0 #final df for this inverse wisahrt + scale_polygenic = G*(df_polygenic - k - 1) random_type = "A" elseif Vinv != 0 random_type = "V" @@ -161,10 +176,13 @@ function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],d random_type = "I" end term_array = res - df = df+length(term_array) - scale = G*(df-length(term_array)-1) #G*(df-2)/df #from inv χ to inv-wishat + df = random_type == "A" ? df_polygenic : df+length(term_array) + scale = random_type == "A" ? scale_polygenic : G*(df-length(term_array)-1) #G*(df-2)/df #from inv χ to inv-wishat Vinv = Float32.(Vinv) - randomEffect = RandomEffect(term_array,Gi,GiOld,GiNew,df,scale,Vinv,names,random_type) + Gi_struct = Variance(Gi, df, scale, estimate_variance, estimate_scale, constraint) + GiOld_struct = deepcopy(Gi_struct) + GiNew_struct = deepcopy(Gi_struct) + randomEffect = RandomEffect(term_array,Gi_struct,GiOld_struct,GiNew_struct, Vinv,names,random_type) push!(mme.rndTrmVec,randomEffect) nothing end @@ -187,9 +205,9 @@ end #NOTE: SINGLE TRAIT #The equation Ai*(GiNew*R - GiOld*ROld) is used to update Ai part in LHS #The 1st time to add Ai to set up MME, -#mme.GiOld == zeros(G),mme.GiNew == inv(G),mme.R == mme.Rold= R +#mme.GiOld == zeros(G),mme.GiNew == inv(G), mme.R.val == mme.Rold= R #After that, if variances are constant, -#mme.GiOld == mme.GiNew; mme.R == mme.Rold +#mme.GiOld == mme.GiNew; mme.R.val == mme.Rold #If sampling genetic variances, mme.Ginew is updated with a new sample, then #LHS is update as Ai*(GiNew*R - GiOld*ROld), then GiOld = Ginew #if sample residual variances, similar approaches to update the LHS is used. @@ -208,7 +226,7 @@ function addVinv(mme::MME) startPosj = randTrmj.startPos endPosj = startPosj + randTrmj.nLevels - 1 #lamda version (single trait) or not (multi-trait) - myaddVinv = (mme.nModels!=1) ? (Vi*random_term.Gi[i,j]) : (Vi*(random_term.GiNew[i,j]*mme.R - random_term.GiOld[i,j]*mme.ROld)) + myaddVinv = (mme.nModels!=1) ? (Vi*random_term.Gi.val[i,j]) : (Vi*(random_term.GiNew.val[i,j]*mme.R.val - random_term.GiOld.val[i,j]*mme.ROld)) mme.mmeLhs[startPosi:endPosi,startPosj:endPosj] = mme.mmeLhs[startPosi:endPosi,startPosj:endPosj] + myaddVinv end diff --git a/src/1.JWAS/src/residual.jl b/src/1.JWAS/src/residual.jl index 1b9e96d0..084a99c4 100644 --- a/src/1.JWAS/src/residual.jl +++ b/src/1.JWAS/src/residual.jl @@ -13,7 +13,7 @@ end #make tricky Ri (big) allowing NA in phenotypes and fixed effects #make ResVar, dictionary for Rinv, no sample Missing residuals function mkRi(mme::MME,df::DataFrame,Rinv) - resVar = ResVar(mme.R,Dict()) + resVar = ResVar(mme.R.val,Dict()) tstMsng = .!ismissing.(Matrix(df[!,mme.lhsVec])) if mme.missingPattern == false mme.missingPattern = tstMsng @@ -62,9 +62,9 @@ function sampleMissingResiduals(mme,resVec) mybool = mybool .& (msngPtrn[:,i].==notmissing[i]) end #create cov and var matrices for BLP imputation - Ri = inv(Symmetric(mme.R[notmissing,notmissing])) - Rc = mme.R[.!notmissing,notmissing] - L = (cholesky(Symmetric(mme.R[.!notmissing,.!notmissing] - Rc*Ri*Rc')).U)#scalar cholesky + Ri = inv(Symmetric(mme.R.val[notmissing,notmissing])) + Rc = mme.R.val[.!notmissing,notmissing] + L = (cholesky(Symmetric(mme.R.val[.!notmissing,.!notmissing] - Rc*Ri*Rc')).U)#scalar cholesky #imputation mydata[mybool,.!notmissing]= mydata[mybool,notmissing]*Ri*Rc' + randn(sum(mybool),sum(.!notmissing))*L end @@ -84,8 +84,8 @@ end # msng = .!notMsng # resi = resVec[yIndex .+ i][notMsng] # Ri = mme.resVar.RiDict[notMsng][notMsng,notMsng] -# Rc = mme.R[msng,notMsng] -# L = (cholesky(Symmetric(mme.R[msng,msng] - Rc*Ri*Rc')).U)' +# Rc = mme.R.val[msng,notMsng] +# L = (cholesky(Symmetric(mme.R.val[msng,msng] - Rc*Ri*Rc')).U)' # resVec[(yIndex .+ i)[msng]] = Rc*Ri*resi + L*randn(sum(msng)) # #resVec[yIndex+i][msng] = Rc*Ri*resi + L*randn(nMsng) this does not work! # end diff --git a/src/1.JWAS/src/types.jl b/src/1.JWAS/src/types.jl index 2dd86c96..a50e483d 100644 --- a/src/1.JWAS/src/types.jl +++ b/src/1.JWAS/src/types.jl @@ -46,6 +46,25 @@ mutable struct ModelTerm end end +################################################################################ +# struct for variance/covariance +# e.g. Variance(1.5,4.0,1.3, true, false, true) +# - marker effect variances (??locus-specific variance: how to reduce memory) +# - residual variances +# - variance for non-genetic effects +################################################################################ +mutable struct Variance + val::Union{AbstractFloat, AbstractArray, Bool} #value of the variance, e.g., single-trait: 1.5, two-trait: [1.3 0.4; 0.4 0.8] + df::Union{AbstractFloat,Bool} #degrees of freedom, e.g., 4.0 + scale::Union{AbstractFloat, AbstractArray, Bool} #scale, e.g., single-trait: 1.0, two-trait: [1.0 0; 0 1.0] + + estimate_variance::Bool #estimate_variance=true means estimate variance at each MCMC iteration + estimate_scale::Bool #estimate_scale=true means estimate scale at each MCMC iteration + constraint::Bool #constraint=true means in multi-trait analysis, covariance is zero +end + + + ################################################################################ #A class for residual covariance matrix for all observations of size (nob*nModel) #where Ri is modified based on missing pattern (number of Ri= 2^ntraits-1) @@ -66,11 +85,11 @@ end ################################################################################ mutable struct RandomEffect #Better to be a dict? key: term_array::Array{AbstractString,1}?? term_array::Array{AbstractString,1} - Gi #covariance matrix (multi-trait) #::Array{Float64,2} - GiOld #specific for lambda version of MME (single-trait) #::Array{Float64,2} - GiNew #specific for lambda version of MME (single-trait) #::Array{Float64,2} - df::AbstractFloat - scale #::Array{Float64,2} + Gi #covariance matrix (multi-trait) #the "Variance" object + GiOld #specific for lambda version of MME (single-trait) #Variance.val has type ::Array{Float64,2} + GiNew #specific for lambda version of MME (single-trait) #Variance.val has type ::Array{Float64,2} + # df::AbstractFloat + # scale #::Array{Float64,2} Vinv # 0, identity matrix names #[] General IDs and Vinv matrix (order is important now)(modelterm.names) randomType::String @@ -91,15 +110,15 @@ mutable struct Genotypes nLoci #number of markers included in the model ntraits #number of traits included in the model - genetic_variance #genetic variance - G #marker effect variance; ST->Float64;MT->Array{Float64,2} - scale #scale parameter for marker effect variance (G) - df #degree of freedom + genetic_variance #genetic variance, type: Variance struct + G #marker effect variance; the "Variance" object, for Variance.val: ST->Float64;MT->Array{Float64,2} +# scale #scale parameter for marker effect variance (G) +# df #degree of freedom method #prior for marker effects (Bayesian ALphabet, GBLUP ...) estimatePi - estimateVariance - estimateScale + estimate_variance + estimate_scale mArray #a collection of matrices used in Bayesian Alphabet mRinvArray #a collection of matrices used in Bayesian Alphabet @@ -129,7 +148,7 @@ mutable struct Genotypes Genotypes(a1,a2,a3,a4,a5,a6,a7,a8,a9)=new(false,false, a1,a2,a3,a4,a5,a6,a7,a8,a4,false, - false,false,false,false, + Variance(false,false,false,true,false,false),Variance(false,false,false,true,false,false), #false,false, false,true,true,false, false,false,false,false,false,false, false,false,false,false, @@ -137,12 +156,12 @@ mutable struct Genotypes false,a9) end -mutable struct DF - residual::AbstractFloat - polygenic::AbstractFloat #df+size(mme.pedTrmVec,1) - marker::AbstractFloat - random::AbstractFloat -end +# mutable struct DF +# residual::AbstractFloat +# polygenic::AbstractFloat #df+size(mme.pedTrmVec,1) +# marker::AbstractFloat +# random::AbstractFloat +# end mutable struct MCMCinfo heterogeneous_residuals @@ -154,8 +173,8 @@ mutable struct MCMCinfo single_step_analysis fitting_J_vector missing_phenotypes - constraint - mega_trait + # constraint + # mega_trait estimate_variance update_priors_frequency outputEBV @@ -210,11 +229,11 @@ mutable struct MME #may merge pedTrmVec here #RESIDUAL EFFECTS - R #residual covariance matrix (multi-trait) ::Array{Union{Float64,Float32},2} + R::Variance #residual covariance matrix (multi-trait) ::Array{Union{Float64,Float32},2} missingPattern #for impuation of missing residual resVar #for impuation of missing residual ROld #initilized to 0 ?? #residual variance (single-trait) for - scaleR #scale parameters + # scaleR #scale parameters meanVare meanVare2 @@ -226,7 +245,7 @@ mutable struct MME outputSamplesVec::Array{ModelTerm,1} #for which location parameters to save MCMC samples - df::DF #prior degree of freedom + # df::DF #prior degree of freedom output_ID output_genotypes @@ -257,25 +276,17 @@ mutable struct MME traits_type #by default all traits are continuous thresholds #thresholds for categorial&binary traits. Dictionary: 1=>[-Inf,0,Inf], where 1 means the 1st trait - function MME(nModels,modelVec,modelTerms,dict,lhsVec,R,ν) - if nModels == 1 - scaleR = R*(ν-2)/ν - νR0 = ν - else - ν,k = ν, nModels - νR0 = ν + k - scaleR = R*(νR0 - k - 1) - end + function MME(nModels,modelVec,modelTerms,dict,lhsVec,R) #MME(nModels,modelVec,modelTerms,dict,lhsVec,R,ν) return new(nModels,modelVec,modelTerms,dict,lhsVec,[], 0,0,[],0,0, 0,0,zeros(1,1),zeros(1,1),zeros(1,1),zeros(1,1),false,false, [], - R,0,0,R,scaleR,false,false, + R,0,0,R,false,false, #R,0,0,R,scaleR,false,false, [], 0, 1, [], - DF(νR0,4,4,4), + # DF(νR0,4,4,4), 0,0,Dict{String,Any}(), 0, 0, diff --git a/src/1.JWAS/src/variance_components.jl b/src/1.JWAS/src/variance_components.jl index a746b278..36e0339e 100644 --- a/src/1.JWAS/src/variance_components.jl +++ b/src/1.JWAS/src/variance_components.jl @@ -90,9 +90,9 @@ function sample_variance(ycorr_array, nobs, df, scale, invweights, constraint; b R = sample_from_conditional_inverse_Wishart(df + nobs, convert(Array,Symmetric(inv(scale + SSE))), binary_trait_index) end else #diagonal elements only, from scale-inv-⁠χ2 - R = zeros(ntraits,ntraits) + R = Diagonal(zeros(ntraits)) for traiti = 1:ntraits - R[traiti,traiti]= (SSE[traiti,traiti]+df*scale[traiti])/rand(Chisq(nobs+df)) + R[traiti,traiti]= (SSE[traiti,traiti]+df*scale[traiti,traiti])/rand(Chisq(nobs+df)) end end return R @@ -120,13 +120,13 @@ function sampleVCs(mme::MME,sol::Union{Array{Float64,1},Array{Float32,1}}) end end q = mme.modelTermDict[term_array[1]].nLevels - G0 = rand(InverseWishart(random_term.df + q, convert(Array,Symmetric(random_term.scale + S)))) + G0 = rand(InverseWishart(random_term.Gi.df + q, convert(Array,Symmetric(random_term.Gi.scale + S)))) if mme.MCMCinfo.double_precision == false G0 = Float32.(G0) end - random_term.GiOld = copy(random_term.GiNew) - random_term.GiNew = copy(inv(G0)) - random_term.Gi = copy(inv(G0)) + random_term.GiOld.val = copy(random_term.GiNew.val) + random_term.GiNew.val = copy(inv(G0)) + random_term.Gi.val = copy(inv(G0)) if random_term.randomType == "A" mme.Gi = random_term.Gi end @@ -135,7 +135,7 @@ end ######################################################################## # Marker Effects Variance ######################################################################## -function sample_marker_effect_variance(Mi,constraint=false) +function sample_marker_effect_variance(Mi) if Mi.method == "BayesL" invweights = 1 ./ Mi.gammaArray elseif Mi.method == "GBLUP" @@ -146,27 +146,27 @@ function sample_marker_effect_variance(Mi,constraint=false) if Mi.ntraits == 1 if Mi.method in ["BayesC","BayesL","RR-BLUP","GBLUP"] nloci = Mi.method == "BayesC" ? sum(Mi.δ[1]) : Mi.nMarkers - Mi.G = sample_variance(Mi.α[1], nloci, Mi.df, Mi.scale, invweights) + Mi.G.val = sample_variance(Mi.α[1], nloci, Mi.G.df, Mi.G.scale, invweights) if Mi.method == "BayesL" - sampleGammaArray!(Mi.gammaArray,Mi.α,Mi.G) # MH sampler of gammaArray (Appendix C in paper) + sampleGammaArray!(Mi.gammaArray,Mi.α,Mi.G.val) # MH sampler of gammaArray (Appendix C in paper) end elseif Mi.method == "BayesB" for j=1:Mi.nMarkers - Mi.G[j] = sample_variance(Mi.β[1][j],1,Mi.df, Mi.scale) + Mi.G.val[j] = sample_variance(Mi.β[1][j],1,Mi.G.df, Mi.G.scale) end end else if Mi.method in ["RR-BLUP","BayesC","BayesL","GBLUP"] data = (Mi.method == "BayesC" ? Mi.β : Mi.α) - Mi.G =sample_variance(data, Mi.nMarkers, Mi.df, Mi.scale, invweights, constraint) + Mi.G.val =sample_variance(data, Mi.nMarkers, Mi.G.df, Mi.G.scale, invweights, Mi.G.constraint) if Mi.method == "BayesL" - sampleGammaArray!(Mi.gammaArray,Mi.α,Mi.G) #MH sampler of gammaArray (Appendix C in paper) + sampleGammaArray!(Mi.gammaArray,Mi.α,Mi.G.val) #MH sampler of gammaArray (Appendix C in paper) end elseif Mi.method == "BayesB" #potential slowdown (scalar multiplication is used instead of matrices) marker_effects_matrix = reduce(hcat,Mi.β)' for i = 1:Mi.nMarkers data = marker_effects_matrix[:,i] - Mi.G[i] = sample_variance(data, 1, Mi.df, Mi.scale, false, constraint) + Mi.G.val[i] = sample_variance(data, 1, Mi.G.df, Mi.G.scale, false, Mi.G.constraint) end end end