From 944968ebb0b01a0f24eac70e6a7f2d650ce5df27 Mon Sep 17 00:00:00 2001 From: tj Date: Thu, 18 May 2023 14:19:26 -0700 Subject: [PATCH 1/8] mega trait: fix the scale of inverse Wishart --- src/1.JWAS/src/variance_components.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/1.JWAS/src/variance_components.jl b/src/1.JWAS/src/variance_components.jl index a746b278..102b0ed0 100644 --- a/src/1.JWAS/src/variance_components.jl +++ b/src/1.JWAS/src/variance_components.jl @@ -92,7 +92,7 @@ function sample_variance(ycorr_array, nobs, df, scale, invweights, constraint; b else #diagonal elements only, from scale-inv-⁠χ2 R = zeros(ntraits,ntraits) for traiti = 1:ntraits - R[traiti,traiti]= (SSE[traiti,traiti]+df*scale[traiti])/rand(Chisq(nobs+df)) + R[traiti,traiti]= (SSE[traiti,traiti]+scale[traiti])/rand(Chisq(nobs+df)) #scale is a vector in mega_trait, where t-th entry is the scale for t-th trait end end return R From 4e6cc439d25452e0c5a30ccd575e9331f40d4bcb Mon Sep 17 00:00:00 2001 From: tj Date: Wed, 7 Jun 2023 13:56:42 -0700 Subject: [PATCH 2/8] GWAS: remove rounding for "prop_genvar" --- src/3.GWAS/src/GWAS.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/3.GWAS/src/GWAS.jl b/src/3.GWAS/src/GWAS.jl index 713156e3..abf2a654 100644 --- a/src/3.GWAS/src/GWAS.jl +++ b/src/3.GWAS/src/GWAS.jl @@ -174,7 +174,7 @@ function GWAS(mme,map_file,marker_effects_file::AbstractString...; end winVarProps[isnan.(winVarProps)] .= 0.0 #replace NaN caused by situations no markers are included in the model WPPA, prop_genvar = vec(mean(winVarProps .> threshold,dims=1)), vec(mean(winVarProps,dims=1)) - prop_genvar = round.(prop_genvar*100,digits=2) + prop_genvar = prop_genvar*100 winVarmean = vec(mean(winVar,dims=1)) winVarstd = vec(std(winVar,dims=1)) From 270c3a0432cef3ae0ae9b6e7d53d339a32d86fe3 Mon Sep 17 00:00:00 2001 From: tj Date: Sun, 30 Jul 2023 20:52:57 -0700 Subject: [PATCH 3/8] variance module unfinished --- src/1.JWAS/src/JWAS.jl | 84 ++++++----- src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl | 132 ++++++++++-------- src/1.JWAS/src/MCMC/deprecated.jl | 2 +- src/1.JWAS/src/Nonlinear/nnbayes_check.jl | 26 ++-- src/1.JWAS/src/Nonlinear/nonlinear.jl | 2 +- .../src/RRM/MCMC_BayesianAlphabet_RRM.jl | 55 +++++--- src/1.JWAS/src/build_MME.jl | 47 +++++-- .../categorical_and_censored_trait.jl | 6 +- src/1.JWAS/src/input_data_validation.jl | 91 +++++++++--- src/1.JWAS/src/iterative_solver/solver.jl | 2 +- .../src/markers/BayesianAlphabet/GBLUP.jl | 6 +- .../markers/BayesianAlphabet/MTBayesABC.jl | 4 +- src/1.JWAS/src/markers/readgenotypes.jl | 40 +++--- src/1.JWAS/src/markers/tools4genotypes.jl | 22 +-- src/1.JWAS/src/output.jl | 53 ++++--- src/1.JWAS/src/random_effects.jl | 42 ++++-- src/1.JWAS/src/residual.jl | 12 +- src/1.JWAS/src/types.jl | 78 ++++++----- src/1.JWAS/src/variance_components.jl | 28 ++-- 19 files changed, 453 insertions(+), 279 deletions(-) diff --git a/src/1.JWAS/src/JWAS.jl b/src/1.JWAS/src/JWAS.jl index 3adde701..bd3cb9aa 100644 --- a/src/1.JWAS/src/JWAS.jl +++ b/src/1.JWAS/src/JWAS.jl @@ -108,7 +108,7 @@ export dataset methods = "conventional (no markers)", Pi = 0.0, estimatePi = false, - estimateScale = false) + estimate_scale = false) **Run MCMC for Bayesian Linear Mixed Models with or without estimation of variance components.** @@ -160,9 +160,9 @@ function runMCMC(mme::MME,df; pedigree = false, #parameters for single-step analysis fitting_J_vector = true, #parameters for single-step analysis causal_structure = false, - mega_trait = mme.nonlinear_function == false ? false : true, #NNBayes -> default mega_trait=true + # mega_trait = mme.nonlinear_function == false ? false : true, #NNBayes -> default mega_trait=true missing_phenotypes = mme.nonlinear_function == false ? true : false, #NN-MM -> missing hidden nodes will be sampled - constraint = false, + # constraint = false, RRM = false, # RRM -> false or a matrix (Phi: orthogonalized time covariates) #Genomic Prediction outputEBV = true, @@ -181,7 +181,7 @@ function runMCMC(mme::MME,df; methods = "conventional (no markers)", Pi = 0.0, estimatePi = false, - estimateScale = false, + estimate_scale = false, categorical_trait = false, #this has been moved to build_model() censored_trait = false) #this has been moved to build_model() @@ -215,6 +215,11 @@ function runMCMC(mme::MME,df; end end end + mme.R.constraint=true + for Mi in mme.M + Mi.G.constraint=true + end + nnmm_print_info_input_to_middle_layer(mme) end ############################################################################ #for deprecated JWAS fucntions @@ -225,7 +230,7 @@ function runMCMC(mme::MME,df; Mi.name = "geno" Mi.π = Pi Mi.estimatePi = estimatePi - Mi.estimateScale = estimateScale + Mi.estimate_scale = estimate_scale Mi.method = methods end end @@ -272,7 +277,7 @@ function runMCMC(mme::MME,df; mme.MCMCinfo = MCMCinfo(heterogeneous_residuals, chain_length,burnin,output_samples_frequency, printout_model_info,printout_frequency, single_step_analysis, - fitting_J_vector,missing_phenotypes,constraint,mega_trait,estimate_variance, + fitting_J_vector,missing_phenotypes,estimate_variance, update_priors_frequency,outputEBV,output_heritability,prediction_equation, seed,double_precision,output_folder,RRM) ############################################################################ @@ -322,26 +327,28 @@ function runMCMC(mme::MME,df; if mme.M != 0 for Mi in mme.M Mi.genotypes = map(Float64,Mi.genotypes) - Mi.G = map(Float64,Mi.G) + Mi.G.val = map(Float64,Mi.G.val) Mi.α = map(Float64,Mi.α) end end for random_term in mme.rndTrmVec random_term.Vinv = map(Float64,random_term.Vinv) - random_term.GiOld = map(Float64,random_term.GiOld) - random_term.GiNew = map(Float64,random_term.GiNew) - random_term.Gi = map(Float64,random_term.Gi) + random_term.GiOld.val = map(Float64,random_term.GiOld.val) + random_term.GiNew.val = map(Float64,random_term.GiNew.val) + random_term.Gi.val = map(Float64,random_term.Gi.val) end mme.Gi = map(Float64,mme.Gi) end - # NNBayes mega trait: from multi-trait to multiple single-trait - if mme.MCMCinfo.mega_trait == true - printstyled(" - Bayesian Alphabet: multiple independent single-trait Bayesian models are used to sample marker effect. \n",bold=false,color=:green) - printstyled(" - Multi-threaded parallelism: $nThread threads are used to run single-trait models in parallel. \n",bold=false,color=:green) - nnbayes_mega_trait(mme) - elseif mme.nonlinear_function != false #only print for NNBayes - printstyled(" - Bayesian Alphabet: multi-trait Bayesian models are used to sample marker effect. \n",bold=false,color=:green) + + + + #constraint on covariance matrix: modify df and scale + if mme.R.constraint==true + R_constraint!(mme) + end + if mme.M[1].G.constraint==true + G_constraint!(mme) end # NNBayes: modify parameters for partial connected NN @@ -489,7 +496,14 @@ function getMCMCinfo(mme) @printf("%-30s %20s\n","starting_value",mme.sol != false ? "true" : "false") @printf("%-30s %20d\n","printout_frequency",MCMCinfo.printout_frequency) @printf("%-30s %20d\n","output_samples_frequency",MCMCinfo.output_samples_frequency) - @printf("%-30s %20s\n","constraint",MCMCinfo.constraint ? "true" : "false") + # @printf("%-30s %20s\n","constraint",MCMCinfo.constraint ? "true" : "false") + @printf("%-30s %19s\n","constraint on residual variance",mme.R.constraint ? "true" : "false") + if length(mme.M)>0 + for Mi in mme.M + geno_name = Mi.name + @printf("%-30s %5s\n","constraint on marker effect variance for $geno_name",Mi.G.constraint ? "true" : "false") + end + end @printf("%-30s %20s\n","missing_phenotypes",MCMCinfo.missing_phenotypes ? "true" : "false") @printf("%-30s %20d\n","update_priors_frequency",MCMCinfo.update_priors_frequency) @printf("%-30s %20s\n","seed",MCMCinfo.seed) @@ -498,23 +512,26 @@ function getMCMCinfo(mme) for i in mme.rndTrmVec thisterm= join(i.term_array, ",") if mme.nModels == 1 - @printf("%-30s %20s\n","random effect variances ("*thisterm*"):",Float64.(round.(inv(i.GiNew),digits=3))) + @printf("%-30s %20s\n","random effect variances ("*thisterm*"):",Float64.(round.(inv(i.GiNew.val),digits=3))) else @printf("%-30s\n","random effect variances ("*thisterm*"):") - Base.print_matrix(stdout,round.(inv(i.GiNew),digits=3)) + Base.print_matrix(stdout,round.(inv(i.GiNew.val),digits=3)) println() end end + if mme.pedTrmVec!=0 + polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + end if mme.pedTrmVec!=0 @printf("%-30s\n","genetic variances (polygenic):") - Base.print_matrix(stdout,round.(inv(mme.Gi),digits=3)) + Base.print_matrix(stdout,round.(inv(mme.rndTrmVec[polygenic_pos].Gi.val),digits=3)) println() end if mme.nModels == 1 - @printf("%-30s %20.3f\n","residual variances:", mme.R) + @printf("%-30s %20.3f\n","residual variances:", mme.R.val) else @printf("%-30s\n","residual variances:") - Base.print_matrix(stdout,round.(mme.R,digits=3)) + Base.print_matrix(stdout,round.(mme.R.val,digits=3)) println() end @@ -527,21 +544,21 @@ function getMCMCinfo(mme) @printf("%-30s %20s\n","Genomic Category", Mi.name) @printf("%-30s %20s\n","Method",Mi.method) for Mi in mme.M - if Mi.genetic_variance != false + if Mi.genetic_variance.val != false if (mme.nModels == 1 || is_nnbayes_partial) && mme.MCMCinfo.RRM == false - @printf("%-30s %20.3f\n","genetic variances (genomic):",Mi.genetic_variance) + @printf("%-30s %20.3f\n","genetic variances (genomic):",Mi.genetic_variance.val) else @printf("%-30s\n","genetic variances (genomic):") - Base.print_matrix(stdout,round.(Mi.genetic_variance,digits=3)) + Base.print_matrix(stdout,round.(Mi.genetic_variance.val,digits=3)) println() end end if !(Mi.method in ["GBLUP"]) if (mme.nModels == 1 || is_nnbayes_partial) && mme.MCMCinfo.RRM == false - @printf("%-30s %20.3f\n","marker effect variances:",Mi.G) + @printf("%-30s %20.3f\n","marker effect variances:",Mi.G.val) else @printf("%-30s\n","marker effect variances:") - Base.print_matrix(stdout,round.(Mi.G,digits=3)) + Base.print_matrix(stdout,round.(Mi.G.val,digits=3)) println() end end @@ -561,22 +578,23 @@ function getMCMCinfo(mme) end @printf("%-30s %20s\n","estimatePi",Mi.estimatePi ? "true" : "false") end - @printf("%-30s %20s\n","estimateScale",Mi.estimateScale ? "true" : "false") + @printf("%-30s %20s\n","estimate_scale",Mi.estimate_scale ? "true" : "false") end end end printstyled("\nDegree of freedom for hyper-parameters:\n\n",bold=true) - @printf("%-30s %20.3f\n","residual variances:",mme.df.residual) + @printf("%-30s %20.3f\n","residual variances:",mme.R.df) for randomeffect in mme.rndTrmVec if randomeffect.randomType != "A" - @printf("%-30s %20.3f\n","random effect variances:",randomeffect.df) + @printf("%-30s %20.3f\n","random effect variances:",randomeffect.Gi.df) end end if mme.pedTrmVec!=0 - @printf("%-30s %20.3f\n","polygenic effect variances:",mme.df.polygenic) + # polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + @printf("%-30s %20.3f\n","polygenic effect variances:",mme.rndTrmVec[polygenic_pos].Gi.df) end if mme.M!=0 - @printf("%-30s %20.3f\n","marker effect variances:",mme.df.marker) + @printf("%-30s %20.3f\n","marker effect variances:",mme.M[1].G.df) end @printf("\n\n\n") end diff --git a/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl b/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl index 0085eaef..3a3b89b2 100644 --- a/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl +++ b/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl @@ -14,10 +14,10 @@ function MCMC_BayesianAlphabet(mme,df) has_binary_trait = "categorical(binary)" ∈ mme.traits_type has_censored_trait = "censored" ∈ mme.traits_type missing_phenotypes = mme.MCMCinfo.missing_phenotypes - constraint = mme.MCMCinfo.constraint + # constraint = mme.MCMCinfo.constraint causal_structure = mme.causal_structure is_multi_trait = mme.nModels != 1 - is_mega_trait = mme.MCMCinfo.mega_trait + # is_mega_trait = mme.R.constraint==true && mme.M[1].G.constraint==true #is_mega_trait when no residual and marker effect covariances is_nnbayes_partial = mme.nonlinear_function != false && mme.is_fully_connected==false is_activation_fcn = mme.is_activation_fcn nonlinear_function = mme.nonlinear_function @@ -37,8 +37,8 @@ function MCMC_BayesianAlphabet(mme,df) #mme.sol (its starting values were set in runMCMC) mme.solMean, mme.solMean2 = zero(mme.sol),zero(mme.sol) #residual variance - mme.meanVare = zero(mme.R) - mme.meanVare2 = zero(mme.R) + mme.meanVare = zero(mme.R.val) + mme.meanVare2 = zero(mme.R.val) #polygenic effects (pedigree), e.g, Animal+ Maternal if mme.pedTrmVec != 0 @@ -52,16 +52,16 @@ function MCMC_BayesianAlphabet(mme,df) Mi.mArray,Mi.mRinvArray,Mi.mpRinvm = mGibbs.xArray,mGibbs.xRinvArray,mGibbs.xpRinvx if Mi.method=="BayesB" #α=β.*δ - Mi.G = fill(Mi.G,Mi.nMarkers) #a scalar in BayesC but a vector in BayeB + Mi.G.val = fill(Mi.G.val,Mi.nMarkers) #a scalar in BayesC but a vector in BayeB end if Mi.method=="BayesL" #in the MTBayesLasso paper if mme.nModels == 1 - Mi.G /= 8 #mme.M.G is the scale Matrix, Sigma - Mi.scale /= 8 + Mi.G.val /= 8 #mme.M.G.val is the scale Matrix, Sigma + Mi.G.scale /= 8 gammaDist = Gamma(1, 8) #8 is the scale parameter of the Gamma distribution (1/8 is the rate parameter) else - Mi.G /= 4*(Mi.ntraits+1) - Mi.scale /= 4*(Mi.ntraits+1) + Mi.G.val /= 4*(Mi.ntraits+1) + Mi.G.scale /= 4*(Mi.ntraits+1) gammaDist = Gamma((Mi.ntraits+1)/2, 8) #8 (1/8): the scale (rate) parameter end Mi.gammaArray = rand(gammaDist,Mi.nMarkers) @@ -74,12 +74,13 @@ function MCMC_BayesianAlphabet(mme,df) Mi.meanAlpha = [zero(Mi.α[traiti]) for traiti = 1:Mi.ntraits] #marker effects Mi.meanAlpha2 = [zero(Mi.α[traiti]) for traiti = 1:Mi.ntraits] #marker effects Mi.meanDelta = [zero(Mi.δ[traiti]) for traiti = 1:Mi.ntraits] #inclusion indicator for marker effects - Mi.meanVara = zero(mme.R) #posterir mean of variance for marker effect - Mi.meanVara2 = zero(mme.R) #variable to save variance for marker effect - Mi.meanScaleVara = zero(mme.R) #variable to save Scale parameter for prior of marker effect variance - Mi.meanScaleVara2 = zero(mme.R) #variable to save Scale parameter for prior of marker effect variance + Mi.meanVara = zero(mme.R.val) #posterir mean of variance for marker effect + Mi.meanVara2 = zero(mme.R.val) #variable to save variance for marker effect + Mi.meanScaleVara = zero(mme.R.val) #variable to save Scale parameter for prior of marker effect variance + Mi.meanScaleVara2 = zero(mme.R.val) #variable to save Scale parameter for prior of marker effect variance if is_multi_trait - if is_mega_trait + # if is_mega_trait + if Mi.G.constraint==true Mi.π = zeros(Mi.ntraits) Mi.mean_pi = zeros(Mi.ntraits) Mi.mean_pi2 = zeros(Mi.ntraits) @@ -151,6 +152,9 @@ function MCMC_BayesianAlphabet(mme,df) if nonlinear_function != false mme.weights_NN = vcat(mean(mme.ySparse),zeros(mme.nModels)) end + if mme.pedTrmVec!=0 + polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + end @showprogress "running MCMC ..." for iter=1:chain_length ######################################################################## # 0. Categorical and censored traits @@ -184,7 +188,7 @@ function MCMC_BayesianAlphabet(mme,df) if is_multi_trait Gibbs(mme.mmeLhs,mme.sol,mme.mmeRhs) else - Gibbs(mme.mmeLhs,mme.sol,mme.mmeRhs,mme.R) + Gibbs(mme.mmeLhs,mme.sol,mme.mmeRhs,mme.R.val) end ycorr[:] = ycorr - mme.X*mme.sol @@ -198,53 +202,57 @@ function MCMC_BayesianAlphabet(mme,df) # Marker Effects ######################################################################## if Mi.method in ["BayesC","BayesB","BayesA"] - locus_effect_variances = (Mi.method == "BayesC" ? fill(Mi.G,Mi.nMarkers) : Mi.G) + locus_effect_variances = (Mi.method == "BayesC" ? fill(Mi.G.val,Mi.nMarkers) : Mi.G.val) if is_multi_trait && !is_nnbayes_partial - if is_mega_trait - megaBayesABC!(Mi,wArray,mme.R,locus_effect_variances) + # if is_mega_trait + if Mi.G.constraint==true + megaBayesABC!(Mi,wArray,mme.R.val,locus_effect_variances) else - MTBayesABC!(Mi,wArray,mme.R,locus_effect_variances,mme.nModels) + MTBayesABC!(Mi,wArray,mme.R.val,locus_effect_variances,mme.nModels) end elseif is_nnbayes_partial - BayesABC!(Mi,wArray[i],mme.R[i,i],locus_effect_variances) #this can be parallelized (conflict with others) + BayesABC!(Mi,wArray[i],mme.R.val[i,i],locus_effect_variances) #this can be parallelized (conflict with others) else - BayesABC!(Mi,ycorr,mme.R,locus_effect_variances) + BayesABC!(Mi,ycorr,mme.R.val,locus_effect_variances) end elseif Mi.method =="RR-BLUP" if is_multi_trait && !is_nnbayes_partial - if is_mega_trait - megaBayesC0!(Mi,wArray,mme.R) + # if is_mega_trait + if Mi.G.constraint==true + megaBayesC0!(Mi,wArray,mme.R.val) else - MTBayesC0!(Mi,wArray,mme.R) + MTBayesC0!(Mi,wArray,mme.R.val) end elseif is_nnbayes_partial - BayesC0!(Mi,wArray[i],mme.R[i,i]) + BayesC0!(Mi,wArray[i],mme.R.val[i,i]) else - BayesC0!(Mi,ycorr,mme.R) + BayesC0!(Mi,ycorr,mme.R.val) end elseif Mi.method == "BayesL" if is_multi_trait && !is_nnbayes_partial - if is_mega_trait #problem with sampleGammaArray - megaBayesL!(Mi,wArray,mme.R) + # if is_mega_trait #problem with sampleGammaArray + if Mi.G.constraint==true + megaBayesL!(Mi,wArray,mme.R.val) else - MTBayesL!(Mi,wArray,mme.R) + MTBayesL!(Mi,wArray,mme.R.val) end elseif is_nnbayes_partial - BayesC0!(Mi,wArray[i],mme.R[i,i]) + BayesC0!(Mi,wArray[i],mme.R.val[i,i]) else - BayesL!(Mi,ycorr,mme.R) + BayesL!(Mi,ycorr,mme.R.val) end elseif Mi.method == "GBLUP" if is_multi_trait && !is_nnbayes_partial - if is_mega_trait - megaGBLUP!(Mi,wArray,mme.R,invweights) + # if is_mega_trait + if Mi.G.constraint==true + megaGBLUP!(Mi,wArray,mme.R.val,invweights) else - MTGBLUP!(Mi,wArray,ycorr,mme.R,invweights) + MTGBLUP!(Mi,wArray,ycorr,mme.R.val,invweights) end elseif is_nnbayes_partial - GBLUP!(Mi,wArray[i],mme.R[i,i],invweights) + GBLUP!(Mi,wArray[i],mme.R.val[i,i],invweights) else - GBLUP!(Mi,ycorr,mme.R,invweights) + GBLUP!(Mi,ycorr,mme.R.val,invweights) end end ######################################################################## @@ -252,7 +260,8 @@ function MCMC_BayesianAlphabet(mme,df) ######################################################################## if Mi.estimatePi == true if is_multi_trait && !is_nnbayes_partial - if is_mega_trait + # if is_mega_trait + if Mi.G.constraint==true Mi.π = [samplePi(sum(Mi.δ[i]), Mi.nMarkers) for i in 1:mme.nModels] else samplePi(Mi.δ,Mi.π) #samplePi(deltaArray,Mi.π,labels) @@ -264,20 +273,20 @@ function MCMC_BayesianAlphabet(mme,df) ######################################################################## # Variance of Marker Effects ######################################################################## - if Mi.estimateVariance == true #methd specific estimate_variance - sample_marker_effect_variance(Mi,constraint) + if Mi.estimate_variance == true #methd specific estimate_variance + sample_marker_effect_variance(Mi) if mme.MCMCinfo.double_precision == false && Mi.method != "BayesB" - Mi.G = Float32.(Mi.G) + Mi.G.val = Float32.(Mi.G.val) end end ######################################################################## # Scale Parameter in Priors for Marker Effect Variances ######################################################################## - if Mi.estimateScale == true + if Mi.estimate_scale == true if !is_multi_trait - a = size(Mi.G,1)*Mi.df/2 + 1 - b = sum(Mi.df ./ (2*Mi.G)) + 1 - Mi.scale = rand(Gamma(a,1/b)) + a = size(Mi.G.val,1)*Mi.G.df/2 + 1 + b = sum(Mi.G.df ./ (2*Mi.G.val)) + 1 + Mi.G.scale = rand(Gamma(a,1/b)) end end end @@ -295,26 +304,26 @@ function MCMC_BayesianAlphabet(mme,df) # 3.2 Residual Variance ######################################################################## if is_multi_trait - mme.R = sample_variance(wArray, length(mme.obsID), - mme.df.residual, mme.scaleR, - invweights,constraint; + mme.R.val = sample_variance(wArray, length(mme.obsID), + mme.R.df, mme.R.scale, + invweights,mme.R.constraint; binary_trait_index=has_binary_trait ? findall(x->x=="categorical(binary)", mme.traits_type) : false) - Ri = kron(inv(mme.R),spdiagm(0=>invweights)) + Ri = kron(inv(mme.R.val),spdiagm(0=>invweights)) else #single trait - if !has_categorical_trait && !has_binary_trait # fixed mme.R=1 for single categorical/binary trait - mme.ROld = mme.R - mme.R = sample_variance(ycorr,length(ycorr), mme.df.residual, mme.scaleR, invweights) + if !has_categorical_trait && !has_binary_trait # fixed =1 for single categorical/binary trait + mme.ROld = mme.R.val + mme.R.val = sample_variance(ycorr,length(ycorr), mme.R.df, mme.R.scale, invweights) end end if mme.MCMCinfo.double_precision == false - mme.R = Float32.(mme.R) + mme.R.val = Float32.(mme.R.val) end end ######################################################################## # 4. Causal Relationships among Phenotypes (Structure Equation Model) ######################################################################## if is_multi_trait && causal_structure != false - sample4λ,sample4λ_vec = get_Λ(Y,mme.R,ycorr,Λy,mme.ySparse,causal_structure) #no missing phenotypes + sample4λ,sample4λ_vec = get_Λ(Y,mme.R.val,ycorr,Λy,mme.ySparse,causal_structure) #no missing phenotypes end ######################################################################## # 5. Latent Traits (NNBayes) @@ -328,15 +337,16 @@ function MCMC_BayesianAlphabet(mme,df) if update_priors_frequency !=0 && iter%update_priors_frequency==0 if mme.M!=0 && methods != "BayesB" if is_multi_trait - mme.M[1].scale = mme.M[1].meanVara*(mme.df.marker-mme.M[1].ntraits-1) + mme.M[1].scale = mme.M[1].meanVara*(mme.M[1].G.df-mme.M[1].ntraits-1) else - mme.M[1].scale = mme.M[1].meanVara*(mme.df.marker-2)/mme.df.marker + mme.M[1].scale = mme.M[1].meanVara*(mme.M[1].G.df-2)/mme.M[1].G.df end end if mme.pedTrmVec != 0 - mme.scalePed = mme.G0Mean*(mme.df.polygenic - size(mme.pedTrmVec,1) - 1) + polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + mme.scalePed = mme.G0Mean*(mme.rndTrmVec[polygenic_pos].Gi.df - size(mme.pedTrmVec,1) - 1) end - mme.scaleR = mme.meanVare*(mme.df.residual-2)/mme.df.residual + mme.R.scale = mme.meanVare*(mme.R.df-2)/mme.R.df println("\n Update priors from posteriors.") end ######################################################################## @@ -347,7 +357,13 @@ function MCMC_BayesianAlphabet(mme,df) nsamples = (iter-burnin)/output_samples_frequency output_posterior_mean_variance(mme,nsamples) #mean and variance of posterior distribution - output_MCMC_samples(mme,mme.R,(mme.pedTrmVec!=0 ? inv(mme.Gi) : false),outfile) + if mme.pedTrmVec!=0 + # polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + polygenic_effects_variance = inv(mme.rndTrmVec[polygenic_pos].Gi.val) + else + polygenic_effects_variance=false + end + output_MCMC_samples(mme,mme.R.val,polygenic_effects_variance,outfile) if causal_structure != false writedlm(causal_structure_outfile,sample4λ_vec',',') end diff --git a/src/1.JWAS/src/MCMC/deprecated.jl b/src/1.JWAS/src/MCMC/deprecated.jl index 8db9ae28..1dd3ed19 100644 --- a/src/1.JWAS/src/MCMC/deprecated.jl +++ b/src/1.JWAS/src/MCMC/deprecated.jl @@ -1,7 +1,7 @@ M = mme.M[1].genotypes X = M XPX = X'X -lhs = XPX+I*mme.R[1,1]/mme.M[1].G[1,1] +lhs = XPX+I*mme.R.val[1,1]/mme.M[1].G[1,1] Ch = cholesky(lhs) iL = inv(Ch.L) iLhs = inv(Ch) diff --git a/src/1.JWAS/src/Nonlinear/nnbayes_check.jl b/src/1.JWAS/src/Nonlinear/nnbayes_check.jl index e5d3c003..58b4789b 100644 --- a/src/1.JWAS/src/Nonlinear/nnbayes_check.jl +++ b/src/1.JWAS/src/Nonlinear/nnbayes_check.jl @@ -135,7 +135,6 @@ end # below function is to modify mme from multi-trait model to multiple single trait models # coded by Hao function nnbayes_mega_trait(mme) - #mega_trait if mme.nModels == 1 error("more than 1 trait is required for MegaLMM analysis.") @@ -143,12 +142,12 @@ function nnbayes_mega_trait(mme) mme.MCMCinfo.constraint = true ##sample from scale-inv-⁠χ2, not InverseWishart - mme.df.residual = mme.df.residual - mme.nModels - mme.scaleR = diag(mme.scaleR/(mme.df.residual - 1))*(mme.df.residual-2)/mme.df.residual #diag(R_prior_mean)*(ν-2)/ν + mme.R.df = mme.R.df - mme.nModels + mme.R.scale = diag(mme.R.scale/(mme.R.df - 1))*(mme.R.df-2)/mme.R.df #diag(R_prior_mean)*(ν-2)/ν if mme.M != 0 for Mi in mme.M - Mi.df = Mi.df - mme.nModels - Mi.scale = diag(Mi.scale/(Mi.df - 1))*(Mi.df-2)/Mi.df + Mi.G.df = Mi.G.df - mme.nModels + Mi.G.scale = diag(Mi.G.scale/(Mi.G.df - 1))*(Mi.G.df-2)/Mi.G.df end end @@ -157,9 +156,9 @@ end # below function is to modify essential parameters for partial connected NN function nnbayes_partial_para_modify2(mme) for Mi in mme.M - Mi.scale = Mi.scale[1] - Mi.G = Mi.G[1,1] - Mi.genetic_variance=Mi.genetic_variance[1,1] + Mi.G.scale = Mi.G.scale[1] + Mi.G.val = Mi.G.val[1,1] + Mi.genetic_variance.val=Mi.genetic_variance.val[1,1] end end @@ -206,3 +205,14 @@ function nnlmm_initialize_missing(mme,df) full_omics = n_observed_omics .== n_omics #indicator for ind with full omics mme.incomplete_omics = vec(.!full_omics) #indicator for ind with no/partial omics end + + +function nnmm_print_info_input_to_middle_layer(mme) + is_mega_trait = mme.R.constraint==true && mme.M[1].G.constraint==true #is_mega_trait when no residual and marker effect covariances + if is_mega_trait + printstyled(" - Bayesian Alphabet: multiple independent single-trait Bayesian models are used to sample marker effect. \n",bold=false,color=:green) + printstyled(" - Multi-threaded parallelism: $nThread threads are used to run single-trait models in parallel. \n",bold=false,color=:green) + else + printstyled(" - Bayesian Alphabet: multi-trait Bayesian models are used to sample marker effect. \n",bold=false,color=:green) + end +end \ No newline at end of file diff --git a/src/1.JWAS/src/Nonlinear/nonlinear.jl b/src/1.JWAS/src/Nonlinear/nonlinear.jl index 50420c1b..f77ede4b 100644 --- a/src/1.JWAS/src/Nonlinear/nonlinear.jl +++ b/src/1.JWAS/src/Nonlinear/nonlinear.jl @@ -28,7 +28,7 @@ function sample_latent_traits(yobs,mme,ycorr,nonlinear_function) if mme.is_activation_fcn == true #Neural Network with activation function if sum(incomplete_omics) != 0 #at least 1 ind with incomplete omics #step 1. sample latent trait (only for individuals with incomplete omics data - ylats_new = hmc_one_iteration(10,0.1,ylats_old[incomplete_omics,:],yobs[incomplete_omics],mme.weights_NN,mme.R,σ2_yobs,ycorr2[incomplete_omics,:],nonlinear_function) + ylats_new = hmc_one_iteration(10,0.1,ylats_old[incomplete_omics,:],yobs[incomplete_omics],mme.weights_NN,mme.R.val,σ2_yobs,ycorr2[incomplete_omics,:],nonlinear_function) #step 2. update ylats with sampled latent traits ylats_old[incomplete_omics,:] = ylats_new #step 3. for individuals with partial omics data, put back the partial real omics. diff --git a/src/1.JWAS/src/RRM/MCMC_BayesianAlphabet_RRM.jl b/src/1.JWAS/src/RRM/MCMC_BayesianAlphabet_RRM.jl index cbf5b662..02784678 100644 --- a/src/1.JWAS/src/RRM/MCMC_BayesianAlphabet_RRM.jl +++ b/src/1.JWAS/src/RRM/MCMC_BayesianAlphabet_RRM.jl @@ -25,10 +25,10 @@ function MCMC_BayesianAlphabet_RRM(mme,df; Mi.genotypes = Z*Mi.genotypes Mi.obsID = unique(df[!,1]) Mi.nObs = length(Mi.obsID) - Mi.meanVara = zero(Mi.G) #variable to save variance for marker effect - Mi.meanVara2 = zero(Mi.G) - Mi.meanScaleVara = zero(Mi.G) #variable to save Scale parameter for prior of marker effect variance - Mi.meanScaleVara2 = zero(Mi.G) + Mi.meanVara = zero(Mi.G.val) #variable to save variance for marker effect + Mi.meanVara2 = zero(Mi.G.val) + Mi.meanScaleVara = zero(Mi.G.val) #variable to save Scale parameter for prior of marker effect variance + Mi.meanScaleVara2 = zero(Mi.G.val) end end @@ -101,13 +101,18 @@ function MCMC_BayesianAlphabet_RRM(mme,df; ############################################################################ # MCMC (starting values for sol (zeros); mme.RNew; G0 are used) ############################################################################ + + if mme.pedTrmVec!=0 + polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + end + @showprogress "running MCMC for" for iter=1:nIter ######################################################################## # 1.1. Non-Marker Location Parameters ######################################################################## ycorr[:] = ycorr + mme.X*mme.sol mme.mmeRhs = mme.X'ycorr - Gibbs(mme.mmeLhs,mme.sol,mme.mmeRhs,mme.R) + Gibbs(mme.mmeLhs,mme.sol,mme.mmeRhs,mme.R.val) ycorr[:] = ycorr - mme.X*mme.sol @@ -120,12 +125,12 @@ function MCMC_BayesianAlphabet_RRM(mme,df; if mme.M != 0 for Mi in mme.M if Mi.method in ["BayesC","BayesB","BayesA"] - locus_effect_variances = (Mi.method == "BayesC" ? fill(Mi.G,Mi.nMarkers) : Mi.G) + locus_effect_variances = (Mi.method == "BayesC" ? fill(Mi.G.val,Mi.nMarkers) : Mi.G.val) BayesABCRRM!(Mi.mArray,Mi.mpRinvm,wArray,yfull, Mi.β, Mi.δ, Mi.α, - mme.R,locus_effect_variances,Mi.π, + mme.R.val,locus_effect_variances,Mi.π, Φ, whichzeros, Mi.mΦΦArray) end end @@ -157,8 +162,8 @@ function MCMC_BayesianAlphabet_RRM(mme,df; ######################################################################## # 2.3 Residual Variance ######################################################################## - mme.ROld = mme.R - mme.R = sample_variance(ycorr, length(ycorr), mme.df.residual, mme.scaleR) + mme.ROld = mme.R.val + mme.R.val = sample_variance(ycorr, length(ycorr), mme.R.df, mme.R.scale) ######################################################################## # 2.4 Marker Effects Variance @@ -167,7 +172,7 @@ function MCMC_BayesianAlphabet_RRM(mme,df; for Mi in mme.M if Mi.method in ["RR-BLUP","BayesC","GBLUP"] data = (Mi.method == "BayesC" ? Mi.β : Mi.α) - Mi.G =sample_variance(data, Mi.nMarkers, Mi.df, Mi.scale, false, false) + Mi.G.val =sample_variance(data, Mi.nMarkers, Mi.G.df, Mi.G.scale, false, false) end end end @@ -176,10 +181,10 @@ function MCMC_BayesianAlphabet_RRM(mme,df; ######################################################################## if mme.M != 0 for Mi in mme.M - if Mi.estimateScale == true - a = size(Mi.G,1)*Mi.df/2 + 1 - b = sum(Mi.df ./ (2*Mi.G )) + 1 - Mi.scale = rand(Gamma(a,1/b)) + if Mi.estimate_scale == true + a = size(Mi.G.val,1)*Mi.G.df/2 + 1 + b = sum(Mi.G.df ./ (2*Mi.G.val )) + 1 + Mi.G.scale = rand(Gamma(a,1/b)) end end end @@ -187,12 +192,18 @@ function MCMC_BayesianAlphabet_RRM(mme,df; # 3.1 Save MCMC samples ######################################################################## if iter>burnin && (iter-burnin)%output_samples_frequency == 0 - output_MCMC_samples(mme,mme.R,(mme.pedTrmVec!=0 ? inv(mme.Gi) : false),outfile) + if mme.pedTrmVec!=0 + # polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + polygenic_effects_variance = inv(mme.rndTrmVec[polygenic_pos].Gi.val) + else + polygenic_effects_variance=false + end + output_MCMC_samples(mme,mme.R.val,polygenic_effects_variance,outfile) nsamples = (iter-burnin)/output_samples_frequency mme.solMean += (mme.sol - mme.solMean)/nsamples mme.solMean2 += (mme.sol .^2 - mme.solMean2)/nsamples - mme.meanVare += (mme.R - mme.meanVare)/nsamples - mme.meanVare2 += (mme.R .^2 - mme.meanVare2)/nsamples + mme.meanVare += (mme.R.val- mme.meanVare)/nsamples + mme.meanVare2 += (mme.R.val.^2 - mme.meanVare2)/nsamples if mme.pedTrmVec != 0 mme.G0Mean += (inv(mme.Gi) - mme.G0Mean )/nsamples @@ -212,13 +223,13 @@ function MCMC_BayesianAlphabet_RRM(mme,df; end end if Mi.method != "BayesB" - Mi.meanVara += (Mi.G - Mi.meanVara)/nsamples - Mi.meanVara2 += (Mi.G.^2 - Mi.meanVara2)/nsamples + Mi.meanVara += (Mi.G.val - Mi.meanVara)/nsamples + Mi.meanVara2 += (Mi.G.val.^2 - Mi.meanVara2)/nsamples end - if Mi.estimateScale == true - Mi.meanScaleVara += (Mi.scale - Mi.meanScaleVara)/nsamples - Mi.meanScaleVara2 += (Mi.scale .^2 - Mi.meanScaleVara2)/nsamples + if Mi.estimate_scale == true + Mi.meanScaleVara += (Mi.G.scale - Mi.meanScaleVara)/nsamples + Mi.meanScaleVara2 += (Mi.G.scale .^2 - Mi.meanScaleVara2)/nsamples end end end diff --git a/src/1.JWAS/src/build_MME.jl b/src/1.JWAS/src/build_MME.jl index 246d4b89..6ca68941 100644 --- a/src/1.JWAS/src/build_MME.jl +++ b/src/1.JWAS/src/build_MME.jl @@ -38,9 +38,15 @@ R = [6.72 24.84 models = build_model(model_equations,R); ``` """ -function build_model(model_equations::AbstractString, R = false; df = 4.0, +function build_model(model_equations::AbstractString, + ## residual variance: + R = false; df = 4.0, + estimate_variance=true, estimate_scale=false, + constraint=false, #for multi-trait only, constraint=true means no residual covariance among traits + ## nnmm: num_hidden_nodes = false, nonlinear_function = false, latent_traits=false, #nonlinear_function(x1,x2) = x1+x2 user_σ2_yobs = false, user_σ2_weightsNN = false, + ## censored, categorical traits: censored_trait = false, categorical_trait = false) if R != false && !isposdef(map(AbstractFloat,R)) @@ -107,10 +113,10 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, genotypei.ntraits = is_nnbayes_partial ? 1 : nModels genotypei.trait_names = is_nnbayes_partial ? trait_names : string.(lhsVec) if nModels != 1 - genotypei.df = genotypei.df + nModels + genotypei.G.df = genotypei.G.df + nModels end - if !is_nnbayes_partial && (genotypei.G != false || genotypei.genetic_variance != false) - if size(genotypei.G,1) != nModels && size(genotypei.genetic_variance,1) != nModels + if !is_nnbayes_partial && (genotypei.G.val != false || genotypei.genetic_variance.val != false) + if size(genotypei.G.val,1) != nModels && size(genotypei.genetic_variance.val,1) != nModels error("The genomic covariance matrix is not a ",nModels," by ",nModels," matrix.") end end @@ -121,11 +127,27 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, end #create mme with genotypes - filter!(x->x.random_type != "genotypes",modelTerms) - filter!(x->x[2].random_type != "genotypes",dict) - mme = MME(nModels,modelVec,modelTerms,dict,lhsVec,R == false ? R : Float32.(R),Float32(df)) + filter!(x->x.random_type != "genotypes",modelTerms) #remove "genotypes" from modelTerms + filter!(x->x[2].random_type != "genotypes",dict) #remove "genotypes" from dict + + + #set scale and df for residual variance + if nModels == 1 + scale_R = R*(df - 2)/df + df_R = df + else + scale_R = R*(df - 1) + df_R = df + nModels + end + + #initialize mme + mme = MME(nModels,modelVec,modelTerms,dict,lhsVec, + Variance(R==false ? R : Float32.(R), #val + Float32(df_R), #df + R==false ? R : scale_R, #scale + estimate_variance, estimate_scale, constraint)) if length(genotypes) != 0 - mme.M = genotypes + mme.M = genotypes #add genotypes into mme end #NNBayes: @@ -343,7 +365,8 @@ function getMME(mme::MME, df::DataFrame) #such that no imputation of missing phenotypes is required. #mixed model equations is obtained below for multi-trait PBLUP #with known residual covariance matrix and missing phenotypes. - if mme.MCMCinfo.mega_trait == true #multiple single trait + # if mme.MCMCinfo.mega_trait == true #multiple single trait + if mme.R.constraint == true #tj: Hao, please confirm!!! Ri = Diagonal(repeat(mme.invweights,mme.nModels)) else #multi-trait Ri = mkRi(mme,df,mme.invweights) @@ -354,13 +377,13 @@ function getMME(mme::MME, df::DataFrame) #Random effects parts in MME if mme.nModels == 1 - #random_term.GiNew*mme.R - random_term.GiOld*mme.ROld + #random_term.GiNew*mme.R.val - random_term.GiOld*mme.ROld for random_term in mme.rndTrmVec #trick - random_term.GiOld = zero(random_term.GiOld) + random_term.GiOld.val = zero(random_term.GiOld.val) end addVinv(mme) for random_term in mme.rndTrmVec #trick - random_term.GiOld = copy(random_term.GiNew) + random_term.GiOld.val = copy(random_term.GiNew.val) end else addVinv(mme) diff --git a/src/1.JWAS/src/categorical_and_censored_trait/categorical_and_censored_trait.jl b/src/1.JWAS/src/categorical_and_censored_trait/categorical_and_censored_trait.jl index c155ab95..c242e3c8 100644 --- a/src/1.JWAS/src/categorical_and_censored_trait/categorical_and_censored_trait.jl +++ b/src/1.JWAS/src/categorical_and_censored_trait/categorical_and_censored_trait.jl @@ -33,7 +33,7 @@ function categorical_censored_traits_setup!(mme,df) ############################################################################ nInd = length(mme.obsID) nTrait = mme.nModels - R = mme.R + R = mme.R.val starting_value = mme.sol cmean = mme.X*starting_value[1:size(mme.mmeLhs,1)] #maker effects defaulting to all zeros @@ -81,7 +81,7 @@ function categorical_censored_traits_setup!(mme,df) ################################################################################## for i in 1:nInd if lower_bound[t][i] != upper_bound[t][i] - ySparse[i,t] = rand(truncated(Normal(cmean[i,t], sqrt(R[t,t])), lower_bound[t][i], upper_bound[t][i])) #mme.R has been fixed to 1.0 for category single-trait analysis + ySparse[i,t] = rand(truncated(Normal(cmean[i,t], sqrt(R[t,t])), lower_bound[t][i], upper_bound[t][i])) #mme.R.val has been fixed to 1.0 for category single-trait analysis else ySparse[i,t] = lower_bound[t][i] #mme.ySparse will also change since reshape is a reference, not copy end @@ -176,7 +176,7 @@ function sample_liabilities!(mme,ycorr,lower_bound,upper_bound) nInd = length(mme.obsID) nTrait = mme.nModels is_multi_trait = nTrait>1 - R = mme.R + R = mme.R.val cmean = reshape(cmean, nInd,nTrait) ySparse = reshape(mme.ySparse,nInd,nTrait) #mme.ySparse will also be updated since reshape is a reference, not copy ycorr_ref = reshape(ycorr, nInd,nTrait) #ycorr will be updated since reshape is a reference, not copy diff --git a/src/1.JWAS/src/input_data_validation.jl b/src/1.JWAS/src/input_data_validation.jl index 938af8d9..245d5ae9 100644 --- a/src/1.JWAS/src/input_data_validation.jl +++ b/src/1.JWAS/src/input_data_validation.jl @@ -35,7 +35,7 @@ function errors_args(mme) println("BayesA is equivalent to BayesB with known π=0. BayesB with known π=0 runs.") end if Mi.method == "GBLUP" - if Mi.genetic_variance == false && Mi.G != false + if Mi.genetic_variance.val == false && Mi.G.val != false error("Please provide values for the genetic variance for GBLUP analysis") end if mme.MCMCinfo.single_step_analysis == true @@ -241,47 +241,47 @@ function set_default_priors_for_variance_components(mme,df) #genetic variance or marker effect variance if mme.M!=0 for Mi in mme.M - if Mi.G == false && Mi.genetic_variance == false + if Mi.G.val == false && Mi.genetic_variance.val == false printstyled("Prior information for genomic variance is not provided and is generated from the data.\n",bold=false,color=:green) if mme.nModels==1 && mme.MCMCinfo.RRM == false - Mi.genetic_variance = varg[1,1] + Mi.genetic_variance.val = varg[1,1] elseif mme.nModels==1 && mme.MCMCinfo.RRM != false - Mi.genetic_variance = diagm(0=>fill(varg[1,1],size(mme.MCMCinfo.RRM,2))) + Mi.genetic_variance.val = diagm(0=>fill(varg[1,1],size(mme.MCMCinfo.RRM,2))) elseif mme.nModels>1 - Mi.genetic_variance = varg - end #mme.M.G and its scale parameter will be reset in function set_marker_hyperparameters_variances_and_pi + Mi.genetic_variance.val = varg + end #mme.M.G.val and its scale parameter will be reset in function set_marker_hyperparameters_variances_and_pi end end end #residual effects - if mme.nModels==1 && isposdef(mme.R) == false #single-trait + if mme.nModels==1 && isposdef(mme.R.val) == false #single-trait printstyled("Prior information for residual variance is not provided and is generated from the data.\n",bold=false,color=:green) is_categorical_or_binary_trait = mme.traits_type==["categorical"] || mme.traits_type==["categorical(binary)"] - mme.R = mme.ROld = is_categorical_or_binary_trait ? 1.0 : vare[1,1] #residual variance known to be 1.0 in single trait categorical analysis - mme.scaleR = mme.R*(mme.df.residual-2)/mme.df.residual - elseif mme.nModels>1 && isposdef(mme.R) == false #multi-trait + mme.R.val = mme.ROld = is_categorical_or_binary_trait ? 1.0 : vare[1,1] #residual variance known to be 1.0 in single trait categorical analysis + mme.R.scale = mme.R.val*(mme.R.df-2)/mme.R.df + elseif mme.nModels>1 && isposdef(mme.R.val) == false #multi-trait printstyled("Prior information for residual variance is not provided and is generated from the data.\n",bold=false,color=:green) - mme.R = mme.ROld = vare - mme.scaleR = mme.R*(mme.df.residual - mme.nModels - 1) + mme.R.val = mme.ROld = vare + mme.R.scale = mme.R.val*(mme.R.df - mme.nModels - 1) end #random effects if length(mme.rndTrmVec) != 0 for randomEffect in mme.rndTrmVec - if isposdef(randomEffect.Gi) == false + if isposdef(randomEffect.Gi.val) == false printstyled("Prior information for random effect variance is not provided and is generated from the data.\n",bold=false,color=:green) - myvarout = [split(i,":")[1] for i in randomEffect.term_array] - myvarin = string.(mme.lhsVec) - Zdesign = mkmat_incidence_factor(myvarout,myvarin) + myvarout = [split(i,":")[1] for i in randomEffect.term_array] #e.g., ["y1:x2","y2:x2"] -> ["y1","y2"] + myvarin = string.(mme.lhsVec) #e.g., ["y1","y2","y3"] + Zdesign = mkmat_incidence_factor(myvarout,myvarin) #design matrix (sparse) if randomEffect.randomType == "A" G = diagm(Zdesign*diag(varg)) else G = diagm(Zdesign*diag(vare)) end - randomEffect.Gi = randomEffect.GiOld = randomEffect.GiNew = Symmetric(inv(G)) - randomEffect.scale = G*(randomEffect.df-length(randomEffect.term_array)-1) + randomEffect.Gi.val = randomEffect.GiOld.val = randomEffect.GiNew.val = Symmetric(inv(G)) + randomEffect.Gi.scale = randomEffect.GiOld.scale = randomEffect.GiNew.scale = G*(randomEffect.Gi.df-length(randomEffect.term_array)-1) if randomEffect.randomType == "A" - mme.Gi = randomEffect.Gi - mme.scalePed = randomEffect.scale + mme.Gi = randomEffect.Gi.val + mme.scalePed = randomEffect.Gi.scale end end end @@ -447,3 +447,54 @@ function init_mixed_model_equations(mme,df,starting_value) end end end + + +#multi-trait constraint on R: modify df and scale for residual variance +function R_constraint!(mme) + #print + printstyled("Constraint on residual variance matrix (i.e., zero covariance)\n",bold=false,color=:green) + #check + if mme.nModels == 1 && mme.R.constraint==true + error("constraint==true is for multi-trait only") + end + #modify df and scale + mme.R.df = mme.R.df - mme.nModels + mme.R.scale = Diagonal(mme.R.scale/(mme.R.df - 1))*(mme.R.df-2)/mme.R.df #Diagonal(R_prior_mean)*(ν-2)/ν, a diagonal matrix +end + +#multi-trait constraint on G: modify df and scale for marker effect variance +function G_constraint!(mme) + if length(mme.M) > 0 + for Mi in mme.M + #print + geno_name = Mi.name + printstyled("Constraint on marker effect variance matrix (i.e., zero covariance) for $geno_name \n",bold=false,color=:green) + #check + if mme.nModels == 1 && Mi.G.constraint==true + error("constraint==true is for multi-trait only") + end + #modify df and scale + Mi.G.df = Mi.G.df - mme.nModels + Mi.G.scale = Diagonal(Mi.G.scale/(Mi.G.df - 1))*(Mi.G.df-2)/Mi.G.df #a diagonal matrix + end + end +end + + +#multi-trait constraint on non-genetic random effects: modify df and scale +function A_constraint!(mme) + if mme > 0 + for Mi in mme.M + #print + geno_name = Mi.name + printstyled("Constraint on marker effect variance matrix (i.e., zero covariance) for $geno_name \n",bold=false,color=:green) + #check + if mme.nModels == 1 && Mi.G.constraint==true + error("constraint==true is for multi-trait only") + end + #modify df and scale + Mi.G.df = Mi.G.df - mme.nModels + Mi.G.scale = Diagonal(Mi.G.scale/(Mi.G.df - 1))*(Mi.G.df-2)/Mi.G.df #a diagonal matrix + end + end +end \ No newline at end of file diff --git a/src/1.JWAS/src/iterative_solver/solver.jl b/src/1.JWAS/src/iterative_solver/solver.jl index 8c226fb8..6710878a 100644 --- a/src/1.JWAS/src/iterative_solver/solver.jl +++ b/src/1.JWAS/src/iterative_solver/solver.jl @@ -40,7 +40,7 @@ function solve(mme::MME, return [getNames(mme) Gibbs(A,x,b, maxiter,printout_frequency=printout_frequency)] elseif solver=="Gibbs" && mme.nModels==1 - return [getNames(mme) Gibbs(A,x,b,mme.R, + return [getNames(mme) Gibbs(A,x,b,mme.R.val, maxiter,printout_frequency=printout_frequency)] elseif solver=="default" println("To solve the equations, please choose a solver. (run ?solve for help)") diff --git a/src/1.JWAS/src/markers/BayesianAlphabet/GBLUP.jl b/src/1.JWAS/src/markers/BayesianAlphabet/GBLUP.jl index 6ba84ca7..f6460d3f 100644 --- a/src/1.JWAS/src/markers/BayesianAlphabet/GBLUP.jl +++ b/src/1.JWAS/src/markers/BayesianAlphabet/GBLUP.jl @@ -32,12 +32,12 @@ end function megaGBLUP!(Mi::Genotypes,wArray,vare,Rinv) Threads.@threads for i in 1:length(wArray) #ntraits - GBLUP!(Mi.genotypes,Mi.α[i],Mi.D,wArray[i],vare[i,i],Mi.G[i,i],Rinv,Mi.nObs) + GBLUP!(Mi.genotypes,Mi.α[i],Mi.D,wArray[i],vare[i,i],Mi.G.val[i,i],Rinv,Mi.nObs) end end function GBLUP!(Mi::Genotypes,ycorr,vare,Rinv) #single-trait - GBLUP!(Mi.genotypes,Mi.α[1],Mi.D,ycorr,vare,Mi.G[1,1],Rinv,Mi.nObs) + GBLUP!(Mi.genotypes,Mi.α[1],Mi.D,ycorr,vare,Mi.G.val[1,1],Rinv,Mi.nObs) end function GBLUP!(genotypes,α,D,ycorr,vare,vara,Rinv,nObs) @@ -51,7 +51,7 @@ end function MTGBLUP!(Mi::Genotypes,ycorr_array,ycorr,vare,Rinv) iR0 = inv(vare) - iGM = inv(Mi.G) + iGM = inv(Mi.G.val) for trait = 1:Mi.ntraits ycorr_array[trait][:] = ycorr_array[trait] + Mi.genotypes*Mi.α[trait] end diff --git a/src/1.JWAS/src/markers/BayesianAlphabet/MTBayesABC.jl b/src/1.JWAS/src/markers/BayesianAlphabet/MTBayesABC.jl index 1e4045a0..20de78f3 100644 --- a/src/1.JWAS/src/markers/BayesianAlphabet/MTBayesABC.jl +++ b/src/1.JWAS/src/markers/BayesianAlphabet/MTBayesABC.jl @@ -97,14 +97,14 @@ function MTBayesABC_samplerII!(xArray, betaArray, deltaArray, alphaArray, - vare, #mme.R, t-by-t + vare, #mme.R.val, t-by-t varEffects, # vector of length #SNPs, each element is a t-by-t covariance matrix BigPi) #genotypes.π nMarkers = length(xArray) ntraits = length(alphaArray) - Rinv = inv(vare) #inv(mme.R) + Rinv = inv(vare) #inv(mme.R.val) Ginv = inv.(varEffects) β = zeros(typeof(betaArray[1][1]),ntraits) diff --git a/src/1.JWAS/src/markers/readgenotypes.jl b/src/1.JWAS/src/markers/readgenotypes.jl index e4f402ee..28dc07da 100644 --- a/src/1.JWAS/src/markers/readgenotypes.jl +++ b/src/1.JWAS/src/markers/readgenotypes.jl @@ -31,16 +31,16 @@ function add_genotypes(mme::MME,M::Union{AbstractString,Array{Float64,2},Array{F genotypei.ntraits = mme.nModels genotypei.trait_names = string.(mme.lhsVec) if mme.nModels != 1 - genotypei.df = genotypei.df + mme.nModels + genotypei.G.df = genotypei.G.df + mme.nModels end genotypes = [] push!(genotypes,genotypei) mme.M = genotypes if mme.nModels == 1 #?move to set_marker_hyperparameters_variances_and_pi? - mme.df.marker = Float32(df) + mme.M[1].G.df = Float32(df) else νG0 = Float32(df) + mme.nModels - mme.df.marker = νG0 #final df for inverse wishart + mme.M[1].G.df = νG0 #final df for inverse wishart end end ################################################################################ @@ -54,7 +54,7 @@ end get_genotypes(file::Union{AbstractString,Array{Float64,2},Array{Float32,2},Array{Any,2},DataFrames.DataFrame},G=false; separator=',',header=true,rowID=false, center=true,quality_control=false, - method = "RR-BLUP",Pi = 0.0,estimatePi = true,estimateScale=false, + method = "RR-BLUP",Pi = 0.0,estimatePi = true,estimate_scale=false, G_is_marker_variance = false,df = 4.0) * Get marker informtion from a genotype file/matrix. This file needs to be column-wise sorted by marker positions. * If a text file is provided, the file format should be: @@ -79,15 +79,22 @@ end a dictionary such as `Pi=Dict([1.0; 1.0]=>0.7,[1.0; 0.0]=>0.1,[0.0; 1.0]=>0.1,[0.0; 0.0]=>0.1)`, defaulting to `all markers have effects (Pi = 0.0)` in single-trait analysis and `all markers have effects on all traits (Pi=Dict([1.0; 1.0]=>1.0,[0.0; 0.0]=>0.0))` in multi-trait analysis. `Pi` is estimated if `estimatePi` = true, , defaulting to `false`. -* Scale parameter for prior of marker effect variance is estimated if `estimateScale` = true, defaulting to `false`. +* Scale parameter for prior of marker effect variance is estimated if `estimate_scale` = true, defaulting to `false`. """ function get_genotypes(file::Union{AbstractString,Array{Float64,2},Array{Float32,2},Array{Any,2},DataFrames.DataFrame},G=false; - method = "BayesC",Pi = 0.0,estimatePi = true, estimateVariance=true, estimateScale=false, + ## method: + method = "BayesC",Pi = 0.0,estimatePi = true, + ## variance: + G_is_marker_variance = false, df = 4.0, + estimate_variance=true, estimate_scale=false, + constraint = false, #for multi-trait only, constraint=true means no genetic covariance among traits + ## format: separator=',',header=true,rowID=false, - center=true,G_is_marker_variance = false,df = 4.0, - starting_value=false, - quality_control=true, MAF = 0.01, missing_value = 9.0) + ## quality control: + quality_control=true, MAF = 0.01, missing_value = 9.0, + ## others: + center=true,starting_value=false) #Read the genotype file if typeof(file) <: AbstractString printstyled("The delimiterd in ",split(file,['/','\\'])[end]," is \'",separator,"\'. ",bold=false,color=:green) @@ -204,17 +211,16 @@ function get_genotypes(file::Union{AbstractString,Array{Float64,2},Array{Float32 end genotypes = Genotypes(obsID,markerID,nObs,nMarkers,p,sum2pq,center,genotypes,isGRM) - if G_is_marker_variance == true - genotypes.G = G - else - genotypes.genetic_variance = G - end + + genotypes.G = Variance(G_is_marker_variance ? G : false, df,false,estimate_variance,estimate_scale,constraint) + genotypes.genetic_variance = Variance(G_is_marker_variance ? false : G, df,false,estimate_variance,estimate_scale,constraint) + genotypes.method = method genotypes.estimatePi = estimatePi genotypes.π = Pi - genotypes.df = df #It will be modified base on number of traits in build_model() - genotypes.estimateScale = estimateScale - genotypes.estimateVariance = estimateVariance + # genotypes.df = df #It will be modified base on number of traits in build_model() + # genotypes.estimate_scale = estimate_scale + # genotypes.estimate_variance = estimate_variance writedlm("IDs_for_individuals_with_genotypes.txt",genotypes.obsID) println("Genotype informatin:") diff --git a/src/1.JWAS/src/markers/tools4genotypes.jl b/src/1.JWAS/src/markers/tools4genotypes.jl index 88152f8e..0dc25437 100644 --- a/src/1.JWAS/src/markers/tools4genotypes.jl +++ b/src/1.JWAS/src/markers/tools4genotypes.jl @@ -153,44 +153,44 @@ function set_marker_hyperparameters_variances_and_pi(mme::MME) Mi.π = Pi end #(2) marker effect variances - if Mi.G == false + if Mi.G.val == false if Mi.method!="GBLUP" genetic2marker(Mi,Mi.π) println() if mme.nModels != 1 || mme.MCMCinfo.RRM != false - if !isposdef(Mi.G) #also work for scalar + if !isposdef(Mi.G.val) #also work for scalar error("Marker effects covariance matrix is not postive definite! Please modify the argument: Pi.") end println("The prior for marker effects covariance matrix is calculated from genetic covariance matrix and Π.") println("The mean of the prior for the marker effects covariance matrix is:") if mme.MCMCinfo.printout_model_info==true - Base.print_matrix(stdout,round.(Mi.G,digits=6)) + Base.print_matrix(stdout,round.(Mi.G.val,digits=6)) end else - if !isposdef(Mi.G) #positive scalar (>0) + if !isposdef(Mi.G.val) #positive scalar (>0) error("Marker effects variance is negative!") end println("The prior for marker effects variance is calculated from the genetic variance and π.") print("The mean of the prior for the marker effects variance is: ") - print(round.(Mi.G,digits=6)) + print(round.(Mi.G.val,digits=6)) end print("\n\n\n") elseif Mi.method == "GBLUP" - Mi.G = Mi.genetic_variance + Mi.G.val = Mi.genetic_variance.val end end #(3) scale parameter for marker effect variance if Mi.ntraits == 1 && mme.MCMCinfo.RRM == false - Mi.scale = Mi.G*(Mi.df-Mi.ntraits-1)/Mi.df + Mi.G.scale = Mi.G.val*(Mi.G.df-Mi.ntraits-1)/Mi.G.df else - Mi.scale = Mi.G*(Mi.df-Mi.ntraits-1) + Mi.G.scale = Mi.G.val*(Mi.G.df-Mi.ntraits-1) end end end end function genetic2marker(M::Genotypes,Pi::Dict) - ntraits = size(M.genetic_variance,1) + ntraits = size(M.genetic_variance.val,1) denom = zeros(ntraits,ntraits) for i in 1:ntraits for j in i:ntraits @@ -200,9 +200,9 @@ function genetic2marker(M::Genotypes,Pi::Dict) denom[j,i] = denom[i,j] end end - M.G = M.genetic_variance ./ denom + M.G.val = M.genetic_variance.val ./ denom end function genetic2marker(M::Genotypes,π::Float64) - M.G = M.genetic_variance/((1-π)*M.sum2pq) + M.G.val = M.genetic_variance.val/((1-π)*M.sum2pq) end diff --git a/src/1.JWAS/src/output.jl b/src/1.JWAS/src/output.jl index 0e80a73f..5a519b50 100644 --- a/src/1.JWAS/src/output.jl +++ b/src/1.JWAS/src/output.jl @@ -130,7 +130,7 @@ function output_result(mme,output_folder, if Mi.estimatePi == true output["pi_"*Mi.name] = dict2dataframe(Mi.mean_pi,Mi.mean_pi2) end - if Mi.estimateScale == true + if Mi.estimate_scale == true output["ScaleEffectVar"*Mi.name] = matrix2dataframe(string.(mme.lhsVec),Mi.meanScaleVara,Mi.meanScaleVara2) end end @@ -358,7 +358,7 @@ function output_MCMC_samples_setup(mme,nIter,output_samples_frequency,file_name= #add headers mytraits=map(string,mme.lhsVec) - if mme.MCMCinfo.mega_trait == false + if mme.R.constraint == false varheader = repeat(mytraits,inner=length(mytraits)).*"_".*repeat(mytraits,outer=length(mytraits)) else varheader = transubstrarr(map(string,mme.lhsVec)) @@ -396,6 +396,11 @@ function output_MCMC_samples_setup(mme,nIter,output_samples_frequency,file_name= writedlm(outfile["EBV_"*string(mme.lhsVec[traiti])],transubstrarr(mme.output_ID),',') end if mme.MCMCinfo.output_heritability == true && mme.MCMCinfo.single_step_analysis == false + if mme.M[1].G.constraint == false #may need change when there are multiple M + varheader = repeat(mytraits,inner=length(mytraits)).*"_".*repeat(mytraits,outer=length(mytraits)) + else + varheader = transubstrarr(map(string,mme.lhsVec)) + end writedlm(outfile["genetic_variance"],transubstrarr(varheader),',') if mme.MCMCinfo.RRM == false writedlm(outfile["heritability"],transubstrarr(map(string,mme.lhsVec)),',') @@ -421,10 +426,10 @@ function output_MCMC_samples(mme,vRes,G0, #random effects variances for effect in mme.rndTrmVec trmStri = join(effect.term_array, "_") - writedlm(outfile[trmStri*"_variances"],vec(inv(effect.Gi))',',') + writedlm(outfile[trmStri*"_variances"],vec(inv(effect.Gi.val))',',') end - if mme.MCMCinfo.mega_trait != false + if mme.R.constraint == true vRes=diag(vRes) end writedlm(outfile["residual_variance"],(typeof(vRes) <: Number) ? vRes : vec(vRes)' ,',') @@ -441,14 +446,14 @@ function output_MCMC_samples(mme,vRes,G0, writedlm(outfile["marker_effects_"*Mi.name*"_"*geno_names[traiti]],Mi.α[traiti]',',') end - if Mi.G != false && mme.MCMCinfo.mega_trait == false #Do not save marker effect variances in mega-trait analysis + if Mi.G.val != false && Mi.G.constraint == false #Do not save marker effect variances with constraint if mme.nModels == 1 - writedlm(outfile["marker_effects_variances"*"_"*Mi.name],Mi.G',',') + writedlm(outfile["marker_effects_variances"*"_"*Mi.name],Mi.G.val',',') else if Mi.method == "BayesB" - writedlm(outfile["marker_effects_variances"*"_"*Mi.name],hcat([x for x in Mi.G]...),',') + writedlm(outfile["marker_effects_variances"*"_"*Mi.name],hcat([x for x in Mi.G.val]...),',') else - writedlm(outfile["marker_effects_variances"*"_"*Mi.name],Mi.G,',') + writedlm(outfile["marker_effects_variances"*"_"*Mi.name],Mi.G.val,',') end end end @@ -471,14 +476,15 @@ function output_MCMC_samples(mme,vRes,G0, if mme.MCMCinfo.output_heritability == true && mme.MCMCinfo.single_step_analysis == false #single-trait: a scalar ; multi-trait: a matrix; mega-trait: a vector - mygvar = cov(EBVmat) #this might be slow in megatrats - if mme.MCMCinfo.mega_trait != false - mygvar=diag(mygvar) + if mme.M[1].G.constraint==true + mygvar = Diagonal(vec(var(EBVmat,dims=1))) + else + mygvar = cov(EBVmat) end genetic_variance = (ntraits == 1 ? mygvar : vec(mygvar)') if mme.MCMCinfo.RRM == false - heritability = (ntraits == 1 ? mygvar/(mygvar+vRes) : - (typeof(mygvar)<:Vector ? (mygvar./(mygvar+vRes))' : (diag(mygvar)./(diag(mygvar)+diag(vRes)))')) + vRes = mme.R.constraint==true ? Diagonal(vRes) : vRes #change to diagonal matrix to avoid error + heritability = (ntraits == 1 ? mygvar/(mygvar+vRes) : (diag(mygvar)./(diag(mygvar)+diag(vRes)))') writedlm(outfile["heritability"],heritability,',') end writedlm(outfile["genetic_variance"],genetic_variance,',') @@ -539,12 +545,13 @@ end function output_posterior_mean_variance(mme,nsamples) mme.solMean += (mme.sol - mme.solMean)/nsamples mme.solMean2 += (mme.sol .^2 - mme.solMean2)/nsamples - mme.meanVare += (mme.R - mme.meanVare)/nsamples - mme.meanVare2 += (mme.R .^2 - mme.meanVare2)/nsamples + mme.meanVare += (mme.R.val - mme.meanVare)/nsamples + mme.meanVare2 += (mme.R.val .^2 - mme.meanVare2)/nsamples if mme.pedTrmVec != 0 - mme.G0Mean += (inv(mme.Gi) - mme.G0Mean )/nsamples - mme.G0Mean2 += (inv(mme.Gi) .^2 - mme.G0Mean2 )/nsamples + polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + mme.G0Mean += (inv(mme.rndTrmVec[polygenic_pos].Gi.val) - mme.G0Mean )/nsamples + mme.G0Mean2 += (inv(mme.rndTrmVec[polygenic_pos].Gi.val) .^2 - mme.G0Mean2 )/nsamples end if mme.M != 0 for Mi in mme.M @@ -554,7 +561,7 @@ function output_posterior_mean_variance(mme,nsamples) Mi.meanDelta[trait] += (Mi.δ[trait] - Mi.meanDelta[trait])/nsamples end if Mi.estimatePi == true - if Mi.ntraits == 1 || mme.MCMCinfo.mega_trait + if Mi.ntraits == 1 || mme.M[1].G.constraint==true #may need to change for multiple M Mi.mean_pi += (Mi.π-Mi.mean_pi)/nsamples Mi.mean_pi2 += (Mi.π .^2-Mi.mean_pi2)/nsamples else @@ -565,12 +572,12 @@ function output_posterior_mean_variance(mme,nsamples) end end if Mi.method != "BayesB" - Mi.meanVara += (Mi.G - Mi.meanVara)/nsamples - Mi.meanVara2 += (Mi.G .^2 - Mi.meanVara2)/nsamples + Mi.meanVara += (Mi.G.val - Mi.meanVara)/nsamples + Mi.meanVara2 += (Mi.G.val .^2 - Mi.meanVara2)/nsamples end - if Mi.estimateScale == true - Mi.meanScaleVara += (Mi.scale - Mi.meanScaleVara)/nsamples - Mi.meanScaleVara2 += (Mi.scale .^2 - Mi.meanScaleVara2)/nsamples + if Mi.estimate_scale == true + Mi.meanScaleVara += (Mi.G.scale - Mi.meanScaleVara)/nsamples + Mi.meanScaleVara2 += (Mi.G.scale .^2 - Mi.meanScaleVara2)/nsamples end end end diff --git a/src/1.JWAS/src/random_effects.jl b/src/1.JWAS/src/random_effects.jl index bb0b416c..5c3636b6 100644 --- a/src/1.JWAS/src/random_effects.jl +++ b/src/1.JWAS/src/random_effects.jl @@ -37,13 +37,20 @@ G = [6.72 2.84 set_random(model,"animal",ped,G) ``` """ -function set_random(mme::MME,randomStr::AbstractString,ped::PedModule.Pedigree,G=false;df=4.0) +function set_random(mme::MME,randomStr::AbstractString,ped::PedModule.Pedigree, + ## variance: + G=false; + df=4.0, + estimate_variance=true, estimate_scale=false, + constraint=false #for multi-trait only, constraint=true means no covariance for this random effects among traits + ) if mme.ped == 0 mme.ped = deepcopy(ped) else error("Pedigree information is already added. Polygenic effects can only be set for one time") end - set_random(mme,randomStr,G,Vinv=mme.ped,df=df) + set_random(mme,randomStr,G,Vinv=mme.ped,df=df, + estimate_variance=estimate_variance,estimate_scale=estimate_scale,constraint=constraint) end ############################################################################ #Set specific ModelTerm as random (Not specific for IID or A(pedigree)) @@ -83,7 +90,10 @@ G = 0.6 set_random(model,"litter",G,Vinv=inv(V),names=[a1;a2;a3]) ``` """ -function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],df=4.0) +function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],df=4.0, + estimate_variance=true,estimate_scale=false, + constraint=false #for multi-trait only, constraint=true means no covariance for this random effects among traits + ) ############################################################################ #Pre-Check ############################################################################ @@ -94,6 +104,9 @@ function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],d end names = string.(names) df = Float32(df) + #check for constraint + #?now + ############################################################################ #add trait names (model equation number) to variables; #e.g., "litter"=>"y1:litter";"ϵ"=>"y1:ϵ" @@ -116,6 +129,8 @@ function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],d error(trm," is not found in model equation.") end end # "y1:litter"; "y2:litter"; "y1:group" + # + # e.g. set model.modelTerms[4].random_type ############################################################################ #Set type of model terms ############################################################################ @@ -139,7 +154,7 @@ function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],d end #Gi : multi-trait; #GiOld & GiNew : single-trait, allow multiple correlated effects in single-trait - Gi = GiOld = GiNew = (G == false ? false : Symmetric(inv(Float32.(G)))) + Gi = GiOld = GiNew = (G == false ? false : Symmetric(inv(Float32.(G)))) #not reference ############################################################################ #return random_effct type ############################################################################ @@ -149,8 +164,8 @@ function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],d mme.Gi = Gi ν, k = Float32(df), size(mme.pedTrmVec,1) νG0 = ν + k - mme.df.polygenic = νG0 #final df for this inverse wisahrt - mme.scalePed = G*(mme.df.polygenic - k - 1) + df_polygenic = νG0 #final df for this inverse wisahrt + scale_polygenic = G*(df_polygenic - k - 1) random_type = "A" elseif Vinv != 0 random_type = "V" @@ -161,10 +176,13 @@ function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],d random_type = "I" end term_array = res - df = df+length(term_array) - scale = G*(df-length(term_array)-1) #G*(df-2)/df #from inv χ to inv-wishat + df = random_type == "A" ? df_polygenic : df+length(term_array) + scale = random_type == "A" ? scale_polygenic : G*(df-length(term_array)-1) #G*(df-2)/df #from inv χ to inv-wishat Vinv = Float32.(Vinv) - randomEffect = RandomEffect(term_array,Gi,GiOld,GiNew,df,scale,Vinv,names,random_type) + Gi_struct = Variance(Gi, df, scale, estimate_variance, estimate_scale, constraint) + GiOld_struct = deepcopy(Gi_struct) + GiNew_struct = deepcopy(Gi_struct) + randomEffect = RandomEffect(term_array,Gi_struct,GiOld_struct,GiNew_struct, Vinv,names,random_type) push!(mme.rndTrmVec,randomEffect) nothing end @@ -187,9 +205,9 @@ end #NOTE: SINGLE TRAIT #The equation Ai*(GiNew*R - GiOld*ROld) is used to update Ai part in LHS #The 1st time to add Ai to set up MME, -#mme.GiOld == zeros(G),mme.GiNew == inv(G),mme.R == mme.Rold= R +#mme.GiOld == zeros(G),mme.GiNew == inv(G), mme.R.val == mme.Rold= R #After that, if variances are constant, -#mme.GiOld == mme.GiNew; mme.R == mme.Rold +#mme.GiOld == mme.GiNew; mme.R.val == mme.Rold #If sampling genetic variances, mme.Ginew is updated with a new sample, then #LHS is update as Ai*(GiNew*R - GiOld*ROld), then GiOld = Ginew #if sample residual variances, similar approaches to update the LHS is used. @@ -208,7 +226,7 @@ function addVinv(mme::MME) startPosj = randTrmj.startPos endPosj = startPosj + randTrmj.nLevels - 1 #lamda version (single trait) or not (multi-trait) - myaddVinv = (mme.nModels!=1) ? (Vi*random_term.Gi[i,j]) : (Vi*(random_term.GiNew[i,j]*mme.R - random_term.GiOld[i,j]*mme.ROld)) + myaddVinv = (mme.nModels!=1) ? (Vi*random_term.Gi.val[i,j]) : (Vi*(random_term.GiNew.val[i,j]*mme.R.val - random_term.GiOld.val[i,j]*mme.ROld)) mme.mmeLhs[startPosi:endPosi,startPosj:endPosj] = mme.mmeLhs[startPosi:endPosi,startPosj:endPosj] + myaddVinv end diff --git a/src/1.JWAS/src/residual.jl b/src/1.JWAS/src/residual.jl index 1b9e96d0..084a99c4 100644 --- a/src/1.JWAS/src/residual.jl +++ b/src/1.JWAS/src/residual.jl @@ -13,7 +13,7 @@ end #make tricky Ri (big) allowing NA in phenotypes and fixed effects #make ResVar, dictionary for Rinv, no sample Missing residuals function mkRi(mme::MME,df::DataFrame,Rinv) - resVar = ResVar(mme.R,Dict()) + resVar = ResVar(mme.R.val,Dict()) tstMsng = .!ismissing.(Matrix(df[!,mme.lhsVec])) if mme.missingPattern == false mme.missingPattern = tstMsng @@ -62,9 +62,9 @@ function sampleMissingResiduals(mme,resVec) mybool = mybool .& (msngPtrn[:,i].==notmissing[i]) end #create cov and var matrices for BLP imputation - Ri = inv(Symmetric(mme.R[notmissing,notmissing])) - Rc = mme.R[.!notmissing,notmissing] - L = (cholesky(Symmetric(mme.R[.!notmissing,.!notmissing] - Rc*Ri*Rc')).U)#scalar cholesky + Ri = inv(Symmetric(mme.R.val[notmissing,notmissing])) + Rc = mme.R.val[.!notmissing,notmissing] + L = (cholesky(Symmetric(mme.R.val[.!notmissing,.!notmissing] - Rc*Ri*Rc')).U)#scalar cholesky #imputation mydata[mybool,.!notmissing]= mydata[mybool,notmissing]*Ri*Rc' + randn(sum(mybool),sum(.!notmissing))*L end @@ -84,8 +84,8 @@ end # msng = .!notMsng # resi = resVec[yIndex .+ i][notMsng] # Ri = mme.resVar.RiDict[notMsng][notMsng,notMsng] -# Rc = mme.R[msng,notMsng] -# L = (cholesky(Symmetric(mme.R[msng,msng] - Rc*Ri*Rc')).U)' +# Rc = mme.R.val[msng,notMsng] +# L = (cholesky(Symmetric(mme.R.val[msng,msng] - Rc*Ri*Rc')).U)' # resVec[(yIndex .+ i)[msng]] = Rc*Ri*resi + L*randn(sum(msng)) # #resVec[yIndex+i][msng] = Rc*Ri*resi + L*randn(nMsng) this does not work! # end diff --git a/src/1.JWAS/src/types.jl b/src/1.JWAS/src/types.jl index 2dd86c96..b2d82c7c 100644 --- a/src/1.JWAS/src/types.jl +++ b/src/1.JWAS/src/types.jl @@ -46,6 +46,25 @@ mutable struct ModelTerm end end +################################################################################ +# struct for variance/covariance +# e.g. Variance(1.5,4.0,1.3, true, false, true) +# - marker effect variances (??locus-specific variance: how to reduce memory) +# - residual variances +# - variance for non-genetic effects +################################################################################ +mutable struct Variance + val::Union{AbstractFloat, AbstractArray, Bool} #value of the variance, e.g., single-trait: 1.5, two-trait: [1.3 0.4; 0.4 0.8] + df::Union{AbstractFloat,Bool} #degrees of freedom, e.g., 4.0 + scale::Union{AbstractFloat, AbstractArray, Bool} #scale, e.g., single-trait: 1.0, two-trait: [1.0 0; 0 1.0] + + estimate_variance::Bool #estimate_variance=true means estimate variance at each MCMC iteration + estimate_scale::Bool #estimate_scale=true means estimate scale at each MCMC iteration + constraint::Bool #constraint=true means in multi-trait analysis, covariance is zero +end + + + ################################################################################ #A class for residual covariance matrix for all observations of size (nob*nModel) #where Ri is modified based on missing pattern (number of Ri= 2^ntraits-1) @@ -54,7 +73,7 @@ end #or missing phenotypes are not imputed at each step of MCMC (no marker effects). ################################################################################ mutable struct ResVar - R0::Union{Array{Float64,2},Array{Float32,2}} + R0::Union{Array{Float64,2},Array{Float32,2}} #2-by-2 co RiDict::Dict{BitArray{1},Union{Array{Float64,2},Array{Float32,2}}} end @@ -66,11 +85,11 @@ end ################################################################################ mutable struct RandomEffect #Better to be a dict? key: term_array::Array{AbstractString,1}?? term_array::Array{AbstractString,1} - Gi #covariance matrix (multi-trait) #::Array{Float64,2} + Gi #covariance matrix (multi-trait) #::Array{Float64,2}-> Variance struct GiOld #specific for lambda version of MME (single-trait) #::Array{Float64,2} GiNew #specific for lambda version of MME (single-trait) #::Array{Float64,2} - df::AbstractFloat - scale #::Array{Float64,2} + # df::AbstractFloat + # scale #::Array{Float64,2} Vinv # 0, identity matrix names #[] General IDs and Vinv matrix (order is important now)(modelterm.names) randomType::String @@ -91,15 +110,15 @@ mutable struct Genotypes nLoci #number of markers included in the model ntraits #number of traits included in the model - genetic_variance #genetic variance - G #marker effect variance; ST->Float64;MT->Array{Float64,2} - scale #scale parameter for marker effect variance (G) - df #degree of freedom + genetic_variance #genetic variance, type: Variance struct + G #marker effect variance; ST->Float64;MT->Array{Float64,2}, type: Variance struct +# scale #scale parameter for marker effect variance (G) +# df #degree of freedom method #prior for marker effects (Bayesian ALphabet, GBLUP ...) estimatePi - estimateVariance - estimateScale + estimate_variance + estimate_scale mArray #a collection of matrices used in Bayesian Alphabet mRinvArray #a collection of matrices used in Bayesian Alphabet @@ -129,7 +148,7 @@ mutable struct Genotypes Genotypes(a1,a2,a3,a4,a5,a6,a7,a8,a9)=new(false,false, a1,a2,a3,a4,a5,a6,a7,a8,a4,false, - false,false,false,false, + Variance(false,false,false,true,false,false),Variance(false,false,false,true,false,false), #false,false, false,true,true,false, false,false,false,false,false,false, false,false,false,false, @@ -137,12 +156,12 @@ mutable struct Genotypes false,a9) end -mutable struct DF - residual::AbstractFloat - polygenic::AbstractFloat #df+size(mme.pedTrmVec,1) - marker::AbstractFloat - random::AbstractFloat -end +# mutable struct DF +# residual::AbstractFloat +# polygenic::AbstractFloat #df+size(mme.pedTrmVec,1) +# marker::AbstractFloat +# random::AbstractFloat +# end mutable struct MCMCinfo heterogeneous_residuals @@ -154,8 +173,8 @@ mutable struct MCMCinfo single_step_analysis fitting_J_vector missing_phenotypes - constraint - mega_trait + # constraint + # mega_trait estimate_variance update_priors_frequency outputEBV @@ -210,11 +229,11 @@ mutable struct MME #may merge pedTrmVec here #RESIDUAL EFFECTS - R #residual covariance matrix (multi-trait) ::Array{Union{Float64,Float32},2} + R::Variance #residual covariance matrix (multi-trait) ::Array{Union{Float64,Float32},2} missingPattern #for impuation of missing residual resVar #for impuation of missing residual ROld #initilized to 0 ?? #residual variance (single-trait) for - scaleR #scale parameters + # scaleR #scale parameters meanVare meanVare2 @@ -226,7 +245,7 @@ mutable struct MME outputSamplesVec::Array{ModelTerm,1} #for which location parameters to save MCMC samples - df::DF #prior degree of freedom + # df::DF #prior degree of freedom output_ID output_genotypes @@ -257,25 +276,18 @@ mutable struct MME traits_type #by default all traits are continuous thresholds #thresholds for categorial&binary traits. Dictionary: 1=>[-Inf,0,Inf], where 1 means the 1st trait - function MME(nModels,modelVec,modelTerms,dict,lhsVec,R,ν) - if nModels == 1 - scaleR = R*(ν-2)/ν - νR0 = ν - else - ν,k = ν, nModels - νR0 = ν + k - scaleR = R*(νR0 - k - 1) - end + function MME(nModels,modelVec,modelTerms,dict,lhsVec,R) #MME(nModels,modelVec,modelTerms,dict,lhsVec,R,ν) + return new(nModels,modelVec,modelTerms,dict,lhsVec,[], 0,0,[],0,0, 0,0,zeros(1,1),zeros(1,1),zeros(1,1),zeros(1,1),false,false, [], - R,0,0,R,scaleR,false,false, + R,0,0,R,false,false, #R,0,0,R,scaleR,false,false, [], 0, 1, [], - DF(νR0,4,4,4), + # DF(νR0,4,4,4), 0,0,Dict{String,Any}(), 0, 0, diff --git a/src/1.JWAS/src/variance_components.jl b/src/1.JWAS/src/variance_components.jl index 102b0ed0..da3849ec 100644 --- a/src/1.JWAS/src/variance_components.jl +++ b/src/1.JWAS/src/variance_components.jl @@ -90,9 +90,9 @@ function sample_variance(ycorr_array, nobs, df, scale, invweights, constraint; b R = sample_from_conditional_inverse_Wishart(df + nobs, convert(Array,Symmetric(inv(scale + SSE))), binary_trait_index) end else #diagonal elements only, from scale-inv-⁠χ2 - R = zeros(ntraits,ntraits) + R = Diagonal(zeros(ntraits)) for traiti = 1:ntraits - R[traiti,traiti]= (SSE[traiti,traiti]+scale[traiti])/rand(Chisq(nobs+df)) #scale is a vector in mega_trait, where t-th entry is the scale for t-th trait + R[traiti,traiti]= (SSE[traiti,traiti]+scale[traiti,traiti])/rand(Chisq(nobs+df)) #scale is a vector in mega_trait, where t-th entry is the scale for t-th trait end end return R @@ -105,6 +105,7 @@ function sampleVCs(mme::MME,sol::Union{Array{Float64,1},Array{Float32,1}}) myI = SparseMatrixCSC{mme.MCMCinfo.double_precision ? Float64 : Float32}(I, mme.modelTermDict[term_array[1]].nLevels, mme.modelTermDict[term_array[1]].nLevels) Vi = (random_term.Vinv!=0) ? random_term.Vinv : myI S = zeros(length(term_array),length(term_array)) + @show size(S) for i = 1:length(term_array) termi = term_array[i] randTrmi = mme.modelTermDict[termi] @@ -112,6 +113,7 @@ function sampleVCs(mme::MME,sol::Union{Array{Float64,1},Array{Float32,1}}) endPosi = startPosi + randTrmi.nLevels - 1 for j = i:length(term_array) termj = term_array[j] + @show termi,termj randTrmj = mme.modelTermDict[termj] startPosj = randTrmj.startPos endPosj = startPosj + randTrmj.nLevels - 1 @@ -120,13 +122,13 @@ function sampleVCs(mme::MME,sol::Union{Array{Float64,1},Array{Float32,1}}) end end q = mme.modelTermDict[term_array[1]].nLevels - G0 = rand(InverseWishart(random_term.df + q, convert(Array,Symmetric(random_term.scale + S)))) + G0 = rand(InverseWishart(random_term.Gi.df + q, convert(Array,Symmetric(random_term.Gi.scale + S)))) if mme.MCMCinfo.double_precision == false G0 = Float32.(G0) end - random_term.GiOld = copy(random_term.GiNew) - random_term.GiNew = copy(inv(G0)) - random_term.Gi = copy(inv(G0)) + random_term.GiOld.val = copy(random_term.GiNew.val) + random_term.GiNew.val = copy(inv(G0)) + random_term.Gi.val = copy(inv(G0)) if random_term.randomType == "A" mme.Gi = random_term.Gi end @@ -135,7 +137,7 @@ end ######################################################################## # Marker Effects Variance ######################################################################## -function sample_marker_effect_variance(Mi,constraint=false) +function sample_marker_effect_variance(Mi) if Mi.method == "BayesL" invweights = 1 ./ Mi.gammaArray elseif Mi.method == "GBLUP" @@ -146,27 +148,27 @@ function sample_marker_effect_variance(Mi,constraint=false) if Mi.ntraits == 1 if Mi.method in ["BayesC","BayesL","RR-BLUP","GBLUP"] nloci = Mi.method == "BayesC" ? sum(Mi.δ[1]) : Mi.nMarkers - Mi.G = sample_variance(Mi.α[1], nloci, Mi.df, Mi.scale, invweights) + Mi.G.val = sample_variance(Mi.α[1], nloci, Mi.G.df, Mi.G.scale, invweights) if Mi.method == "BayesL" - sampleGammaArray!(Mi.gammaArray,Mi.α,Mi.G) # MH sampler of gammaArray (Appendix C in paper) + sampleGammaArray!(Mi.gammaArray,Mi.α,Mi.G.val) # MH sampler of gammaArray (Appendix C in paper) end elseif Mi.method == "BayesB" for j=1:Mi.nMarkers - Mi.G[j] = sample_variance(Mi.β[1][j],1,Mi.df, Mi.scale) + Mi.G.val[j] = sample_variance(Mi.β[1][j],1,Mi.G.df, Mi.G.scale) end end else if Mi.method in ["RR-BLUP","BayesC","BayesL","GBLUP"] data = (Mi.method == "BayesC" ? Mi.β : Mi.α) - Mi.G =sample_variance(data, Mi.nMarkers, Mi.df, Mi.scale, invweights, constraint) + Mi.G.val =sample_variance(data, Mi.nMarkers, Mi.G.df, Mi.G.scale, invweights, Mi.G.constraint) if Mi.method == "BayesL" - sampleGammaArray!(Mi.gammaArray,Mi.α,Mi.G) #MH sampler of gammaArray (Appendix C in paper) + sampleGammaArray!(Mi.gammaArray,Mi.α,Mi.G.val) #MH sampler of gammaArray (Appendix C in paper) end elseif Mi.method == "BayesB" #potential slowdown (scalar multiplication is used instead of matrices) marker_effects_matrix = reduce(hcat,Mi.β)' for i = 1:Mi.nMarkers data = marker_effects_matrix[:,i] - Mi.G[i] = sample_variance(data, 1, Mi.df, Mi.scale, false, constraint) + Mi.G.val[i] = sample_variance(data, 1, Mi.G.df, Mi.G.scale, false, Mi.G.constraint) end end end From 86b469b79536dbb8251a6ec7938e97a764a40b87 Mon Sep 17 00:00:00 2001 From: zhaotianjing Date: Mon, 5 Feb 2024 10:58:30 -0600 Subject: [PATCH 4/8] todo: constraint on non-genetic random effects --- src/1.JWAS/src/input_data_validation.jl | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/src/1.JWAS/src/input_data_validation.jl b/src/1.JWAS/src/input_data_validation.jl index 245d5ae9..f1693f91 100644 --- a/src/1.JWAS/src/input_data_validation.jl +++ b/src/1.JWAS/src/input_data_validation.jl @@ -481,20 +481,4 @@ function G_constraint!(mme) end -#multi-trait constraint on non-genetic random effects: modify df and scale -function A_constraint!(mme) - if mme > 0 - for Mi in mme.M - #print - geno_name = Mi.name - printstyled("Constraint on marker effect variance matrix (i.e., zero covariance) for $geno_name \n",bold=false,color=:green) - #check - if mme.nModels == 1 && Mi.G.constraint==true - error("constraint==true is for multi-trait only") - end - #modify df and scale - Mi.G.df = Mi.G.df - mme.nModels - Mi.G.scale = Diagonal(Mi.G.scale/(Mi.G.df - 1))*(Mi.G.df-2)/Mi.G.df #a diagonal matrix - end - end -end \ No newline at end of file +#TO-DO: multi-trait constraint on non-genetic random effects: modify df and scale \ No newline at end of file From 1fd0cdc61caaef6bbff1eed97965d895d119bc28 Mon Sep 17 00:00:00 2001 From: zhaotianjing Date: Mon, 5 Feb 2024 11:27:12 -0600 Subject: [PATCH 5/8] printed info --- src/1.JWAS/src/input_data_validation.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/1.JWAS/src/input_data_validation.jl b/src/1.JWAS/src/input_data_validation.jl index f1693f91..1172c243 100644 --- a/src/1.JWAS/src/input_data_validation.jl +++ b/src/1.JWAS/src/input_data_validation.jl @@ -452,7 +452,7 @@ end #multi-trait constraint on R: modify df and scale for residual variance function R_constraint!(mme) #print - printstyled("Constraint on residual variance matrix (i.e., zero covariance)\n",bold=false,color=:green) + printstyled("Constraint on residual variance-covariance matrix (i.e., zero covariance)\n",bold=false,color=:green) #check if mme.nModels == 1 && mme.R.constraint==true error("constraint==true is for multi-trait only") @@ -468,7 +468,7 @@ function G_constraint!(mme) for Mi in mme.M #print geno_name = Mi.name - printstyled("Constraint on marker effect variance matrix (i.e., zero covariance) for $geno_name \n",bold=false,color=:green) + printstyled("Constraint on marker effect variance-covariance matrix (i.e., zero covariance) for $geno_name \n",bold=false,color=:green) #check if mme.nModels == 1 && Mi.G.constraint==true error("constraint==true is for multi-trait only") From a99e4145cda8024104ea11d5cc20dc9debdff61d Mon Sep 17 00:00:00 2001 From: zhaotianjing Date: Tue, 6 Feb 2024 11:02:54 -0600 Subject: [PATCH 6/8] modify --- src/1.JWAS/src/JWAS.jl | 13 +++++-------- src/1.JWAS/src/build_MME.jl | 2 +- src/1.JWAS/src/random_effects.jl | 10 +++++----- src/1.JWAS/src/types.jl | 11 +++++------ src/1.JWAS/src/variance_components.jl | 4 +--- 5 files changed, 17 insertions(+), 23 deletions(-) diff --git a/src/1.JWAS/src/JWAS.jl b/src/1.JWAS/src/JWAS.jl index bd3cb9aa..1aa186b4 100644 --- a/src/1.JWAS/src/JWAS.jl +++ b/src/1.JWAS/src/JWAS.jl @@ -340,15 +340,12 @@ function runMCMC(mme::MME,df; mme.Gi = map(Float64,mme.Gi) end - - - - #constraint on covariance matrix: modify df and scale + #constraint on covariance matrix if mme.R.constraint==true - R_constraint!(mme) + R_constraint!(mme) #modify mme.R.df and mme.R.scale; scale is a diagonal matrix end if mme.M[1].G.constraint==true - G_constraint!(mme) + G_constraint!(mme) #modify Mi.G.df and Mi.G.scale; scale is a diagonal matrix end # NNBayes: modify parameters for partial connected NN @@ -520,11 +517,11 @@ function getMCMCinfo(mme) end end if mme.pedTrmVec!=0 - polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) + polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) #print all polygenic models terms? end if mme.pedTrmVec!=0 @printf("%-30s\n","genetic variances (polygenic):") - Base.print_matrix(stdout,round.(inv(mme.rndTrmVec[polygenic_pos].Gi.val),digits=3)) + Base.print_matrix(stdout,round.(inv(mme.rndTrmVec[polygenic_pos].Gi.val),digits=3)) #print all polygenic models terms? println() end if mme.nModels == 1 diff --git a/src/1.JWAS/src/build_MME.jl b/src/1.JWAS/src/build_MME.jl index 6ca68941..a6917a20 100644 --- a/src/1.JWAS/src/build_MME.jl +++ b/src/1.JWAS/src/build_MME.jl @@ -366,7 +366,7 @@ function getMME(mme::MME, df::DataFrame) #mixed model equations is obtained below for multi-trait PBLUP #with known residual covariance matrix and missing phenotypes. # if mme.MCMCinfo.mega_trait == true #multiple single trait - if mme.R.constraint == true #tj: Hao, please confirm!!! + if mme.R.constraint == true #tj: Hao, please confirm! now we do not have mege_trait option. We only have constraint option for variances Ri = Diagonal(repeat(mme.invweights,mme.nModels)) else #multi-trait Ri = mkRi(mme,df,mme.invweights) diff --git a/src/1.JWAS/src/random_effects.jl b/src/1.JWAS/src/random_effects.jl index 5c3636b6..129a264e 100644 --- a/src/1.JWAS/src/random_effects.jl +++ b/src/1.JWAS/src/random_effects.jl @@ -104,8 +104,10 @@ function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],d end names = string.(names) df = Float32(df) - #check for constraint - #?now + + if constraint != false #check constraint + error("Constraint for set_random is not supported now.") + end ############################################################################ #add trait names (model equation number) to variables; @@ -129,8 +131,6 @@ function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],d error(trm," is not found in model equation.") end end # "y1:litter"; "y2:litter"; "y1:group" - # - # e.g. set model.modelTerms[4].random_type ############################################################################ #Set type of model terms ############################################################################ @@ -154,7 +154,7 @@ function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],d end #Gi : multi-trait; #GiOld & GiNew : single-trait, allow multiple correlated effects in single-trait - Gi = GiOld = GiNew = (G == false ? false : Symmetric(inv(Float32.(G)))) #not reference + Gi = GiOld = GiNew = (G == false ? false : Symmetric(inv(Float32.(G)))) ############################################################################ #return random_effct type ############################################################################ diff --git a/src/1.JWAS/src/types.jl b/src/1.JWAS/src/types.jl index b2d82c7c..a50e483d 100644 --- a/src/1.JWAS/src/types.jl +++ b/src/1.JWAS/src/types.jl @@ -73,7 +73,7 @@ end #or missing phenotypes are not imputed at each step of MCMC (no marker effects). ################################################################################ mutable struct ResVar - R0::Union{Array{Float64,2},Array{Float32,2}} #2-by-2 co + R0::Union{Array{Float64,2},Array{Float32,2}} RiDict::Dict{BitArray{1},Union{Array{Float64,2},Array{Float32,2}}} end @@ -85,9 +85,9 @@ end ################################################################################ mutable struct RandomEffect #Better to be a dict? key: term_array::Array{AbstractString,1}?? term_array::Array{AbstractString,1} - Gi #covariance matrix (multi-trait) #::Array{Float64,2}-> Variance struct - GiOld #specific for lambda version of MME (single-trait) #::Array{Float64,2} - GiNew #specific for lambda version of MME (single-trait) #::Array{Float64,2} + Gi #covariance matrix (multi-trait) #the "Variance" object + GiOld #specific for lambda version of MME (single-trait) #Variance.val has type ::Array{Float64,2} + GiNew #specific for lambda version of MME (single-trait) #Variance.val has type ::Array{Float64,2} # df::AbstractFloat # scale #::Array{Float64,2} Vinv # 0, identity matrix @@ -111,7 +111,7 @@ mutable struct Genotypes ntraits #number of traits included in the model genetic_variance #genetic variance, type: Variance struct - G #marker effect variance; ST->Float64;MT->Array{Float64,2}, type: Variance struct + G #marker effect variance; the "Variance" object, for Variance.val: ST->Float64;MT->Array{Float64,2} # scale #scale parameter for marker effect variance (G) # df #degree of freedom @@ -277,7 +277,6 @@ mutable struct MME thresholds #thresholds for categorial&binary traits. Dictionary: 1=>[-Inf,0,Inf], where 1 means the 1st trait function MME(nModels,modelVec,modelTerms,dict,lhsVec,R) #MME(nModels,modelVec,modelTerms,dict,lhsVec,R,ν) - return new(nModels,modelVec,modelTerms,dict,lhsVec,[], 0,0,[],0,0, 0,0,zeros(1,1),zeros(1,1),zeros(1,1),zeros(1,1),false,false, diff --git a/src/1.JWAS/src/variance_components.jl b/src/1.JWAS/src/variance_components.jl index da3849ec..36e0339e 100644 --- a/src/1.JWAS/src/variance_components.jl +++ b/src/1.JWAS/src/variance_components.jl @@ -92,7 +92,7 @@ function sample_variance(ycorr_array, nobs, df, scale, invweights, constraint; b else #diagonal elements only, from scale-inv-⁠χ2 R = Diagonal(zeros(ntraits)) for traiti = 1:ntraits - R[traiti,traiti]= (SSE[traiti,traiti]+scale[traiti,traiti])/rand(Chisq(nobs+df)) #scale is a vector in mega_trait, where t-th entry is the scale for t-th trait + R[traiti,traiti]= (SSE[traiti,traiti]+df*scale[traiti,traiti])/rand(Chisq(nobs+df)) end end return R @@ -105,7 +105,6 @@ function sampleVCs(mme::MME,sol::Union{Array{Float64,1},Array{Float32,1}}) myI = SparseMatrixCSC{mme.MCMCinfo.double_precision ? Float64 : Float32}(I, mme.modelTermDict[term_array[1]].nLevels, mme.modelTermDict[term_array[1]].nLevels) Vi = (random_term.Vinv!=0) ? random_term.Vinv : myI S = zeros(length(term_array),length(term_array)) - @show size(S) for i = 1:length(term_array) termi = term_array[i] randTrmi = mme.modelTermDict[termi] @@ -113,7 +112,6 @@ function sampleVCs(mme::MME,sol::Union{Array{Float64,1},Array{Float32,1}}) endPosi = startPosi + randTrmi.nLevels - 1 for j = i:length(term_array) termj = term_array[j] - @show termi,termj randTrmj = mme.modelTermDict[termj] startPosj = randTrmj.startPos endPosj = startPosj + randTrmj.nLevels - 1 From ea23d8cee3c7631ea033d9d1d9cda05721b85ae5 Mon Sep 17 00:00:00 2001 From: zhaotianjing Date: Tue, 6 Feb 2024 14:36:09 -0600 Subject: [PATCH 7/8] fix output file --- src/1.JWAS/src/output.jl | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/1.JWAS/src/output.jl b/src/1.JWAS/src/output.jl index 5a519b50..a8a1a13a 100644 --- a/src/1.JWAS/src/output.jl +++ b/src/1.JWAS/src/output.jl @@ -396,11 +396,8 @@ function output_MCMC_samples_setup(mme,nIter,output_samples_frequency,file_name= writedlm(outfile["EBV_"*string(mme.lhsVec[traiti])],transubstrarr(mme.output_ID),',') end if mme.MCMCinfo.output_heritability == true && mme.MCMCinfo.single_step_analysis == false - if mme.M[1].G.constraint == false #may need change when there are multiple M - varheader = repeat(mytraits,inner=length(mytraits)).*"_".*repeat(mytraits,outer=length(mytraits)) - else - varheader = transubstrarr(map(string,mme.lhsVec)) - end + varheader = repeat(mytraits,inner=length(mytraits)).*"_".*repeat(mytraits,outer=length(mytraits)) + writedlm(outfile["genetic_variance"],transubstrarr(varheader),',') if mme.MCMCinfo.RRM == false writedlm(outfile["heritability"],transubstrarr(map(string,mme.lhsVec)),',') @@ -446,7 +443,7 @@ function output_MCMC_samples(mme,vRes,G0, writedlm(outfile["marker_effects_"*Mi.name*"_"*geno_names[traiti]],Mi.α[traiti]',',') end - if Mi.G.val != false && Mi.G.constraint == false #Do not save marker effect variances with constraint + if Mi.G.val != false && mme.nonlinear_function == false #do not save marker effect variances in NNMM to save space if mme.nModels == 1 writedlm(outfile["marker_effects_variances"*"_"*Mi.name],Mi.G.val',',') else From 5550fee8eb074c7e940b3628787c63e31e297a6f Mon Sep 17 00:00:00 2001 From: zhaotianjing Date: Tue, 6 Feb 2024 15:02:34 -0600 Subject: [PATCH 8/8] clean comment --- src/1.JWAS/src/JWAS.jl | 4 ++-- src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl | 9 +-------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/1.JWAS/src/JWAS.jl b/src/1.JWAS/src/JWAS.jl index 1aa186b4..7df09bb2 100644 --- a/src/1.JWAS/src/JWAS.jl +++ b/src/1.JWAS/src/JWAS.jl @@ -517,11 +517,11 @@ function getMCMCinfo(mme) end end if mme.pedTrmVec!=0 - polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) #print all polygenic models terms? + polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) end if mme.pedTrmVec!=0 @printf("%-30s\n","genetic variances (polygenic):") - Base.print_matrix(stdout,round.(inv(mme.rndTrmVec[polygenic_pos].Gi.val),digits=3)) #print all polygenic models terms? + Base.print_matrix(stdout,round.(inv(mme.rndTrmVec[polygenic_pos].Gi.val),digits=3)) println() end if mme.nModels == 1 diff --git a/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl b/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl index 3a3b89b2..44af600b 100644 --- a/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl +++ b/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl @@ -14,10 +14,8 @@ function MCMC_BayesianAlphabet(mme,df) has_binary_trait = "categorical(binary)" ∈ mme.traits_type has_censored_trait = "censored" ∈ mme.traits_type missing_phenotypes = mme.MCMCinfo.missing_phenotypes - # constraint = mme.MCMCinfo.constraint causal_structure = mme.causal_structure is_multi_trait = mme.nModels != 1 - # is_mega_trait = mme.R.constraint==true && mme.M[1].G.constraint==true #is_mega_trait when no residual and marker effect covariances is_nnbayes_partial = mme.nonlinear_function != false && mme.is_fully_connected==false is_activation_fcn = mme.is_activation_fcn nonlinear_function = mme.nonlinear_function @@ -204,7 +202,6 @@ function MCMC_BayesianAlphabet(mme,df) if Mi.method in ["BayesC","BayesB","BayesA"] locus_effect_variances = (Mi.method == "BayesC" ? fill(Mi.G.val,Mi.nMarkers) : Mi.G.val) if is_multi_trait && !is_nnbayes_partial - # if is_mega_trait if Mi.G.constraint==true megaBayesABC!(Mi,wArray,mme.R.val,locus_effect_variances) else @@ -217,7 +214,6 @@ function MCMC_BayesianAlphabet(mme,df) end elseif Mi.method =="RR-BLUP" if is_multi_trait && !is_nnbayes_partial - # if is_mega_trait if Mi.G.constraint==true megaBayesC0!(Mi,wArray,mme.R.val) else @@ -230,7 +226,7 @@ function MCMC_BayesianAlphabet(mme,df) end elseif Mi.method == "BayesL" if is_multi_trait && !is_nnbayes_partial - # if is_mega_trait #problem with sampleGammaArray + #problem with sampleGammaArray if Mi.G.constraint==true megaBayesL!(Mi,wArray,mme.R.val) else @@ -243,7 +239,6 @@ function MCMC_BayesianAlphabet(mme,df) end elseif Mi.method == "GBLUP" if is_multi_trait && !is_nnbayes_partial - # if is_mega_trait if Mi.G.constraint==true megaGBLUP!(Mi,wArray,mme.R.val,invweights) else @@ -260,7 +255,6 @@ function MCMC_BayesianAlphabet(mme,df) ######################################################################## if Mi.estimatePi == true if is_multi_trait && !is_nnbayes_partial - # if is_mega_trait if Mi.G.constraint==true Mi.π = [samplePi(sum(Mi.δ[i]), Mi.nMarkers) for i in 1:mme.nModels] else @@ -358,7 +352,6 @@ function MCMC_BayesianAlphabet(mme,df) output_posterior_mean_variance(mme,nsamples) #mean and variance of posterior distribution if mme.pedTrmVec!=0 - # polygenic_pos = findfirst(i -> i.randomType=="A", mme.rndTrmVec) polygenic_effects_variance = inv(mme.rndTrmVec[polygenic_pos].Gi.val) else polygenic_effects_variance=false