From 792f7793cac6745921dcd22542f0e403b01b521b Mon Sep 17 00:00:00 2001 From: ShouvikGhosh2048 Date: Mon, 9 Dec 2024 09:51:49 +0000 Subject: [PATCH 1/3] Store the samples instead of the chain/distribution --- src/CRRao.jl | 2 +- src/bayesian/getter.jl | 55 +++++---------------- src/bayesian/linear_regression.jl | 14 +++++- src/bayesian/logistic_regression.jl | 9 +++- src/bayesian/negativebinomial_regression.jl | 9 +++- src/bayesian/poisson_regression.jl | 9 +++- src/fitmodel.jl | 35 +++---------- src/print.jl | 13 ++--- 8 files changed, 57 insertions(+), 89 deletions(-) diff --git a/src/CRRao.jl b/src/CRRao.jl index 19b6c49..27d046b 100644 --- a/src/CRRao.jl +++ b/src/CRRao.jl @@ -396,7 +396,7 @@ export LinearRegression, LogisticRegression, PoissonRegression, NegBinomRegressi export Prior_Ridge, Prior_Laplace, Prior_Cauchy, Prior_TDist, Prior_HorseShoe, Prior_Gauss export CRRaoLink, Logit, Probit, Cloglog, Cauchit, fit export coef, coeftable, r2, adjr2, loglikelihood, aic, bic, sigma, predict, residuals, cooksdistance, BPTest, pvalue -export FrequentistRegression, BayesianRegressionMCMC, BayesianRegressionVI +export FrequentistRegression, BayesianRegression include("random_number_generator.jl") include("general_stats.jl") diff --git a/src/bayesian/getter.jl b/src/bayesian/getter.jl index b02bddf..15f37f4 100644 --- a/src/bayesian/getter.jl +++ b/src/bayesian/getter.jl @@ -1,60 +1,27 @@ -function predict(container::BayesianRegressionMCMC{:LinearRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) +function predict(container::BayesianRegression{:LinearRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) X = modelmatrix(container.formula, newdata) - - params = get_params(container.chain[prediction_chain_start:end,:,:]) - W = params.β - if isa(W, Tuple) - W = reduce(hcat, W) - end - #predictions = params.α' .+ X * W' - predictions = X * W' - return vec(mean(predictions, dims=2)) -end - -function predict(container::BayesianRegressionVI{:LinearRegression}, newdata::DataFrame, number_of_samples::Int64 = 1000) - X = modelmatrix(container.formula, newdata) - - W = rand(CRRao_rng, container.dist, number_of_samples) - W = W[union(container.symbol_to_range[:β]...), :] + W = container.samples[:, prediction_chain_start:end] predictions = X * W return vec(mean(predictions, dims=2)) end -function predict(container::BayesianRegressionMCMC{:LogisticRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) +function predict(container::BayesianRegression{:LogisticRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) X = modelmatrix(container.formula, newdata) - - params = get_params(container.chain[prediction_chain_start:end,:,:]) - W = params.β - if isa(W, Tuple) - W = reduce(hcat, W) - end - #z = params.α' .+ X * W' - z = X * W' + W = container.samples[:, prediction_chain_start:end] + z = X * W return vec(mean(container.link.link_function.(z), dims=2)) end -function predict(container::BayesianRegressionMCMC{:NegativeBinomialRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) +function predict(container::BayesianRegression{:NegativeBinomialRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) X = modelmatrix(container.formula, newdata) - - params = get_params(container.chain[prediction_chain_start:end,:,:]) - W = params.β - if isa(W, Tuple) - W = reduce(hcat, W) - end - #z = params.α' .+ X * W' - z = X * W' + W = container.samples[:, prediction_chain_start:end] + z = X * W return vec(mean(exp.(z), dims=2)) end -function predict(container::BayesianRegressionMCMC{:PoissonRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) +function predict(container::BayesianRegression{:PoissonRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) X = modelmatrix(container.formula, newdata) - - params = get_params(container.chain[prediction_chain_start:end,:,:]) - W = params.β - if isa(W, Tuple) - W = reduce(hcat, W) - end - #z = params.α' .+ X * W' - z = X * W' + W = container.samples[:, prediction_chain_start:end] + z = X * W return vec(mean(exp.(z), dims=2)) end \ No newline at end of file diff --git a/src/bayesian/linear_regression.jl b/src/bayesian/linear_regression.jl index d50dc3e..ffe7e85 100644 --- a/src/bayesian/linear_regression.jl +++ b/src/bayesian/linear_regression.jl @@ -6,7 +6,14 @@ function linear_reg_mcmc(formula::FormulaTerm, data::DataFrame, turingModel::Fun @warn "Simulation size should generally be atleast 500." end chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size) - return BayesianRegressionMCMC(:LinearRegression, chain, formula) + params = get_params(chain[:,:,:]) + samples = params.β + if isa(samples, Tuple) + samples = reduce(hcat, samples) + end + samples = samples' + + return BayesianRegression(:LinearRegression, samples, formula) end function linear_reg_vi(formula::FormulaTerm, data::DataFrame, turingModel::Function, max_iter::Int64) @@ -16,10 +23,13 @@ function linear_reg_vi(formula::FormulaTerm, data::DataFrame, turingModel::Funct if max_iter < 500 @warn "Max iterations should generally be atleast 500." end + model = turingModel(X, y) dist = vi(model, ADVI(100, max_iter)) + samples = rand(CRRao_rng, dist, max_iter) _, symbol_to_range = bijector(model, Val(true)) - return BayesianRegressionVI(:LinearRegression, dist, formula, symbol_to_range) + samples = samples[union(symbol_to_range[:β]...), :] + return BayesianRegression(:LinearRegression, samples, formula) end """ diff --git a/src/bayesian/logistic_regression.jl b/src/bayesian/logistic_regression.jl index d2f122f..682717c 100644 --- a/src/bayesian/logistic_regression.jl +++ b/src/bayesian/logistic_regression.jl @@ -6,7 +6,14 @@ function logistic_reg(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, tu @warn "Simulation size should generally be atleast 500." end chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size) - return BayesianRegressionMCMC(:LogisticRegression, chain, formula, Link) + params = get_params(chain[:,:,:]) + samples = params.β + if isa(samples, Tuple) + samples = reduce(hcat, samples) + end + samples = samples' + + return BayesianRegression(:LogisticRegression, samples, formula, Link) end """ diff --git a/src/bayesian/negativebinomial_regression.jl b/src/bayesian/negativebinomial_regression.jl index 7e220c7..7070cee 100644 --- a/src/bayesian/negativebinomial_regression.jl +++ b/src/bayesian/negativebinomial_regression.jl @@ -11,7 +11,14 @@ function negativebinomial_reg( @warn "Simulation size should generally be atleast 500." end chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size) - return BayesianRegressionMCMC(:NegativeBinomialRegression, chain, formula) + params = get_params(chain[:,:,:]) + samples = params.β + if isa(samples, Tuple) + samples = reduce(hcat, samples) + end + samples = samples' + + return BayesianRegression(:NegativeBinomialRegression, samples, formula) end """ diff --git a/src/bayesian/poisson_regression.jl b/src/bayesian/poisson_regression.jl index 1bec20e..3f175cd 100644 --- a/src/bayesian/poisson_regression.jl +++ b/src/bayesian/poisson_regression.jl @@ -6,7 +6,14 @@ function poisson_reg(formula::FormulaTerm, data::DataFrame, turingModel::Functio @warn "Simulation size should generally be atleast 500." end chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size) - return BayesianRegressionMCMC(:PoissonRegression, chain, formula) + params = get_params(chain[:,:,:]) + samples = params.β + if isa(samples, Tuple) + samples = reduce(hcat, samples) + end + samples = samples' + + return BayesianRegression(:PoissonRegression, samples, formula) end """ diff --git a/src/fitmodel.jl b/src/fitmodel.jl index df45575..ec7b7a5 100644 --- a/src/fitmodel.jl +++ b/src/fitmodel.jl @@ -22,48 +22,25 @@ FrequentistRegression(RegressionType::Symbol, model, formula, link = GLM.Identit """ ```julia -BayesianRegressionMCMC{RegressionType} +BayesianRegression{RegressionType} ``` Type to represent bayesian regression models (using MCMC) returned by `fit` functions. This type is used internally by the package to represent all bayesian regression models using MCMC. `RegressionType` is a `Symbol` representing the model class. """ -struct BayesianRegressionMCMC{RegressionType} <: RegressionModel - chain +struct BayesianRegression{RegressionType} <: RegressionModel + samples formula::FormulaTerm link end """ ```julia -BayesianRegressionMCMC(::Symbol, chain) +BayesianRegression(::Symbol, samples) ``` -Constructor for `BayesianRegressionMCMC`. `model` can be any regression model. Used by `fit` functions to return a bayesian regression model container. +Constructor for `BayesianRegression`. `model` can be any regression model. Used by `fit` functions to return a bayesian regression model container. """ -BayesianRegressionMCMC(RegressionType::Symbol, chain, formula, link = Identity()) = BayesianRegressionMCMC{RegressionType}(chain, formula, link) - -""" -```julia -BayesianRegressionVI{RegressionType} -``` - -Type to represent bayesian regression models (using VI) returned by `fit` functions. This type is used internally by the package to represent all bayesian regression models using VI. `RegressionType` is a `Symbol` representing the model class. -""" -struct BayesianRegressionVI{RegressionType} <: RegressionModel - dist - formula::FormulaTerm - symbol_to_range - link -end - -""" -```julia -BayesianRegressionVI(::Symbol, dist) -``` - -Constructor for `BayesianRegressionVI`. `model` can be any regression model. Used by `fit` functions to return a bayesian regression model container. -""" -BayesianRegressionVI(RegressionType::Symbol, dist, formula, symbol_to_range, link = Identity()) = BayesianRegressionVI{RegressionType}(dist, formula, symbol_to_range, link) +BayesianRegression(RegressionType::Symbol, samples, formula, link = Identity()) = BayesianRegression{RegressionType}(samples, formula, link) # Print Messages include("print.jl") diff --git a/src/print.jl b/src/print.jl index 2b57ed7..f216e5b 100644 --- a/src/print.jl +++ b/src/print.jl @@ -27,16 +27,9 @@ function Base.show(io::IO, container::FrequentistRegression{:PoissonRegression}) end # Bayesian Models -function Base.show(io::IO, container::BayesianRegressionMCMC) +function Base.show(io::IO, container::BayesianRegression) println(io, "Formula: ", container.formula) println(io, "Link: ", container.link) - print(io, "Chain: ") - show(io, MIME("text/plain"), container.chain) -end - -function Base.show(io::IO, container::BayesianRegressionVI) - println(io, "Formula: ", container.formula) - println(io, "Link: ", container.link) - print(io, "Distribution: ") - show(io, MIME("text/plain"), container.dist) + print(io, "Samples: ") + show(io, MIME("text/plain"), container.samples) end \ No newline at end of file From 84631dbdb43a6238617848d83be34ebc30228936 Mon Sep 17 00:00:00 2001 From: ShouvikGhosh2048 Date: Mon, 9 Dec 2024 18:43:34 +0530 Subject: [PATCH 2/3] Use common bayesian struct --- src/CRRao.jl | 40 +++++++- src/bayesian/getter.jl | 4 +- src/bayesian/linear_regression.jl | 104 ++++++-------------- test/numerical/bayesian/LinearRegression.jl | 24 +++-- 4 files changed, 86 insertions(+), 86 deletions(-) diff --git a/src/CRRao.jl b/src/CRRao.jl index 27d046b..e795faf 100644 --- a/src/CRRao.jl +++ b/src/CRRao.jl @@ -392,9 +392,47 @@ end Cauchit() = Cauchit(Cauchit_Link) +""" +```julia +BayesianAlgorithm +``` + +Abstract type representing bayesian algorithms which are used to dispatch to appropriate calls. +""" +abstract type BayesianAlgorithm end + +""" +```julia +MCMC <: BayesianAlgorithm +``` + +A type representing MCMC algorithms. +""" +struct MCMC <: BayesianAlgorithm + sim_size::Int64 + prediction_chain_start::Int64 +end + +MCMC() = MCMC(1000, 200) + +""" +```julia +VI <: BayesianAlgorithm +``` + +A type representing variational inference algorithms. +""" +struct VI <: BayesianAlgorithm + distribution_sample_count::Int64 + vi_max_iters::Int64 + vi_samples_per_step::Int64 +end + +VI() = VI(1000, 10000, 100) + export LinearRegression, LogisticRegression, PoissonRegression, NegBinomRegression, Boot_Residual export Prior_Ridge, Prior_Laplace, Prior_Cauchy, Prior_TDist, Prior_HorseShoe, Prior_Gauss -export CRRaoLink, Logit, Probit, Cloglog, Cauchit, fit +export CRRaoLink, Logit, Probit, Cloglog, Cauchit, fit, MCMC, VI export coef, coeftable, r2, adjr2, loglikelihood, aic, bic, sigma, predict, residuals, cooksdistance, BPTest, pvalue export FrequentistRegression, BayesianRegression diff --git a/src/bayesian/getter.jl b/src/bayesian/getter.jl index 15f37f4..86a5b50 100644 --- a/src/bayesian/getter.jl +++ b/src/bayesian/getter.jl @@ -1,6 +1,6 @@ -function predict(container::BayesianRegression{:LinearRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) +function predict(container::BayesianRegression{:LinearRegression}, newdata::DataFrame) X = modelmatrix(container.formula, newdata) - W = container.samples[:, prediction_chain_start:end] + W = container.samples predictions = X * W return vec(mean(predictions, dims=2)) end diff --git a/src/bayesian/linear_regression.jl b/src/bayesian/linear_regression.jl index ffe7e85..959bf7a 100644 --- a/src/bayesian/linear_regression.jl +++ b/src/bayesian/linear_regression.jl @@ -1,12 +1,12 @@ -function linear_reg_mcmc(formula::FormulaTerm, data::DataFrame, turingModel::Function, sim_size::Int64) - formula = apply_schema(formula, schema(formula, data),RegressionModel) +function linear_reg(formula::FormulaTerm, data::DataFrame, turingModel::Function, algorithm::MCMC) + formula = apply_schema(formula, schema(formula, data), RegressionModel) y, X = modelcols(formula, data) - if sim_size < 500 + if algorithm.sim_size < 500 @warn "Simulation size should generally be atleast 500." end - chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size) - params = get_params(chain[:,:,:]) + chain = sample(CRRao_rng, turingModel(X, y), NUTS(), algorithm.sim_size) + params = get_params(chain[algorithm.prediction_chain_start:end,:,:]) samples = params.β if isa(samples, Tuple) samples = reduce(hcat, samples) @@ -16,17 +16,17 @@ function linear_reg_mcmc(formula::FormulaTerm, data::DataFrame, turingModel::Fun return BayesianRegression(:LinearRegression, samples, formula) end -function linear_reg_vi(formula::FormulaTerm, data::DataFrame, turingModel::Function, max_iter::Int64) - formula = apply_schema(formula, schema(formula, data),RegressionModel) +function linear_reg(formula::FormulaTerm, data::DataFrame, turingModel::Function, algorithm::VI) + formula = apply_schema(formula, schema(formula, data), RegressionModel) y, X = modelcols(formula, data) - if max_iter < 500 + if algorithm.vi_max_iters < 500 @warn "Max iterations should generally be atleast 500." end model = turingModel(X, y) - dist = vi(model, ADVI(100, max_iter)) - samples = rand(CRRao_rng, dist, max_iter) + dist = vi(model, ADVI(algorithm.vi_samples_per_step, algorithm.vi_max_iters)) + samples = rand(CRRao_rng, dist, algorithm.distribution_sample_count) _, symbol_to_range = bijector(model, Val(true)) samples = samples[union(symbol_to_range[:β]...), :] return BayesianRegression(:LinearRegression, samples, formula) @@ -39,9 +39,8 @@ fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_Ridge, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.01, - sim_size::Int64 = use_vi ? 20000 : 1000, ) ``` @@ -108,9 +107,8 @@ function fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_Ridge, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.01, - sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -129,11 +127,7 @@ function fit( y ~ MvNormal(X * β, σ) end - if use_vi - return linear_reg_vi(formula, data, LinearRegression, sim_size) - else - return linear_reg_mcmc(formula, data, LinearRegression, sim_size) - end + return linear_reg(formula, data, LinearRegression, algorithm) end """ @@ -143,9 +137,8 @@ fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_Laplace, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.01, - sim_size::Int64 = use_vi ? 20000 : 1000, ) ``` @@ -210,9 +203,8 @@ function fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_Laplace, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.01, - sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -230,11 +222,7 @@ function fit( y ~ MvNormal(X * β, σ) end - if use_vi - return linear_reg_vi(formula, data, LinearRegression, sim_size) - else - return linear_reg_mcmc(formula, data, LinearRegression, sim_size) - end + return linear_reg(formula, data, LinearRegression, algorithm) end """ @@ -244,8 +232,7 @@ fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_Cauchy, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000, + algorithm::BayesianAlgorithm = MCMC(), ) ``` @@ -309,8 +296,7 @@ function fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_Cauchy, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000 + algorithm::BayesianAlgorithm = MCMC(), ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -325,11 +311,7 @@ function fit( y ~ MvNormal(X * β, σ) end - if use_vi - return linear_reg_vi(formula, data, LinearRegression, sim_size) - else - return linear_reg_mcmc(formula, data, LinearRegression, sim_size) - end + return linear_reg(formula, data, LinearRegression, algorithm) end """ @@ -339,9 +321,8 @@ fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_TDist, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 2.0, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000, ) ``` @@ -409,9 +390,8 @@ function fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_TDist, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 2.0, - sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -429,11 +409,7 @@ function fit( y ~ MvNormal(X * β, σ) end - if use_vi - return linear_reg_vi(formula, data, LinearRegression, sim_size) - else - return linear_reg_mcmc(formula, data, LinearRegression, sim_size) - end + return linear_reg(formula, data, LinearRegression, algorithm) end @@ -444,8 +420,7 @@ fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_HorseShoe, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000, + algorithm::BayesianAlgorithm = MCMC(), ) ``` @@ -506,8 +481,7 @@ function fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_HorseShoe, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000 + algorithm::BayesianAlgorithm = MCMC(), ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -528,11 +502,7 @@ function fit( y ~ MvNormal( X * β, σ) end - if use_vi - return linear_reg_vi(formula, data, LinearRegression, sim_size) - else - return linear_reg_mcmc(formula, data, LinearRegression, sim_size) - end + return linear_reg(formula, data, LinearRegression, algorithm) end """ @@ -544,8 +514,7 @@ fit( prior::Prior_Gauss, alpha_prior_mean::Float64, beta_prior_mean::Vector{Float64}, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000, + algorithm::BayesianAlgorithm = MCMC(), ) ``` @@ -598,8 +567,7 @@ function fit( prior::Prior_Gauss, alpha_prior_mean::Float64, beta_prior_mean::Vector{Float64}, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000 + algorithm::BayesianAlgorithm = MCMC(), ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -625,11 +593,7 @@ function fit( y ~ MvNormal(X * β, σ) end - if use_vi - return linear_reg_vi(formula, data, LinearRegression, sim_size) - else - return linear_reg_mcmc(formula, data, LinearRegression, sim_size) - end + return linear_reg(formula, data, LinearRegression, algorithm) end @@ -644,8 +608,7 @@ fit( alpha_prior_sd::Float64, beta_prior_mean::Vector{Float64}, beta_prior_sd::Vector{Float64}, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000, + algorithm::BayesianAlgorithm = MCMC(), ) ``` @@ -699,8 +662,7 @@ function fit( alpha_prior_sd::Float64, beta_prior_mean::Vector{Float64}, beta_prior_sd::Vector{Float64}, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000 + algorithm::BayesianAlgorithm = MCMC(), ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -728,9 +690,5 @@ function fit( y ~ MvNormal(X * β, σ) end - if use_vi - return linear_reg_vi(formula, data, LinearRegression, sim_size) - else - return linear_reg_mcmc(formula, data, LinearRegression, sim_size) - end + return linear_reg(formula, data, LinearRegression, algorithm) end \ No newline at end of file diff --git a/test/numerical/bayesian/LinearRegression.jl b/test/numerical/bayesian/LinearRegression.jl index f1285dc..339e3f3 100644 --- a/test/numerical/bayesian/LinearRegression.jl +++ b/test/numerical/bayesian/LinearRegression.jl @@ -12,24 +12,28 @@ for (prior, test_mean) in tests # MCMC CRRao.set_rng(StableRNG(123)) model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), prior) - prediction = predict(model, mtcars) - @test mean(prediction) - 2 * std(prediction) <= test_mean && test_mean <= mean(prediction) + 2 * std(prediction) + mcmc_prediction = predict(model, mtcars) + @test mean(mcmc_prediction) - 2 * std(mcmc_prediction) <= test_mean && test_mean <= mean(mcmc_prediction) + 2 * std(mcmc_prediction) # VI CRRao.set_rng(StableRNG(123)) - model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), prior, true) - prediction = predict(model, mtcars) - @test mean(prediction) - 2 * std(prediction) <= test_mean && test_mean <= mean(prediction) + 2 * std(prediction) + model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), prior, VI()) + vi_prediction = predict(model, mtcars) + @test mean(vi_prediction) - 2 * std(vi_prediction) <= test_mean && test_mean <= mean(vi_prediction) + 2 * std(vi_prediction) + + @test maximum(abs.(mcmc_prediction - vi_prediction)) <= 5.0 end gauss_test = 20.0796026428345 CRRao.set_rng(StableRNG(123)) model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), Prior_Gauss(), 30.0, [0.0,-3.0,1.0]) -prediction = predict(model, mtcars) -@test mean(prediction) - 2 * std(prediction) <= gauss_test && gauss_test <= mean(prediction) + 2 * std(prediction) +mcmc_prediction = predict(model, mtcars) +@test mean(mcmc_prediction) - 2 * std(mcmc_prediction) <= gauss_test && gauss_test <= mean(mcmc_prediction) + 2 * std(mcmc_prediction) CRRao.set_rng(StableRNG(123)) -model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), Prior_Gauss(), 30.0, [0.0,-3.0,1.0], true) -prediction = predict(model, mtcars) -@test mean(prediction) - 2 * std(prediction) <= gauss_test && gauss_test <= mean(prediction) + 2 * std(prediction) \ No newline at end of file +model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), Prior_Gauss(), 30.0, [0.0,-3.0,1.0], VI()) +vi_prediction = predict(model, mtcars) +@test mean(vi_prediction) - 2 * std(vi_prediction) <= gauss_test && gauss_test <= mean(vi_prediction) + 2 * std(vi_prediction) + +@test maximum(abs.(mcmc_prediction - vi_prediction)) <= 5.0 \ No newline at end of file From f1947a3cfeda72bdbcb6449f827b2dcf86da78e5 Mon Sep 17 00:00:00 2001 From: ShouvikGhosh2048 Date: Mon, 9 Dec 2024 14:08:30 +0000 Subject: [PATCH 3/3] Update docs --- docs/src/api/bayesian_regression.md | 22 +++++++++++++++------- src/CRRao.jl | 2 +- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/docs/src/api/bayesian_regression.md b/docs/src/api/bayesian_regression.md index 018096c..c9e9ad2 100644 --- a/docs/src/api/bayesian_regression.md +++ b/docs/src/api/bayesian_regression.md @@ -4,34 +4,42 @@ BayesianRegression ``` +## Bayesian Algorithms + +```@docs +BayesianAlgorithm +MCMC +VI +``` + ## Linear Regression ### Linear Regression with User Specific Gaussian Prior ```@docs -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Gauss, alpha_prior_mean::Float64, beta_prior_mean::Vector{Float64}, sim_size::Int64 = 1000) -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Gauss, alpha_prior_mean::Float64, alpha_prior_sd::Float64, beta_prior_mean::Vector{Float64}, beta_prior_sd::Vector{Float64}, sim_size::Int64 = 1000) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Gauss, alpha_prior_mean::Float64, beta_prior_mean::Vector{Float64}, algorithm::BayesianAlgorithm = MCMC()) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Gauss, alpha_prior_mean::Float64, alpha_prior_sd::Float64, beta_prior_mean::Vector{Float64}, beta_prior_sd::Vector{Float64}, algorithm::BayesianAlgorithm = MCMC()) ``` ### Linear Regression with Ridge Prior ```@docs -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Ridge, h::Float64 = 0.01, sim_size::Int64 = 1000) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Ridge, algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.01) ``` ### Linear Regression with Laplace Prior ```@docs -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Laplace, h::Float64 = 0.01, sim_size::Int64 = 1000) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Laplace, algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.01) ``` ### Linear Regression with Cauchy Prior ```@docs -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Cauchy, sim_size::Int64 = 1000) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Cauchy, algorithm::BayesianAlgorithm = MCMC()) ``` ### Linear Regression with T-distributed Prior ```@docs -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_TDist, h::Float64 = 2.0, sim_size::Int64 = 1000) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_TDist, algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 2.0) ``` ### Linear Regression with Horse Shoe Prior ```@docs -fit(formula::FormulaTerm,data::DataFrame,modelClass::LinearRegression,prior::Prior_HorseShoe,sim_size::Int64 = 1000) +fit(formula::FormulaTerm,data::DataFrame,modelClass::LinearRegression,prior::Prior_HorseShoe,algorithm::BayesianAlgorithm = MCMC()) ``` ## Logistic Regression diff --git a/src/CRRao.jl b/src/CRRao.jl index e795faf..4f54037 100644 --- a/src/CRRao.jl +++ b/src/CRRao.jl @@ -432,7 +432,7 @@ VI() = VI(1000, 10000, 100) export LinearRegression, LogisticRegression, PoissonRegression, NegBinomRegression, Boot_Residual export Prior_Ridge, Prior_Laplace, Prior_Cauchy, Prior_TDist, Prior_HorseShoe, Prior_Gauss -export CRRaoLink, Logit, Probit, Cloglog, Cauchit, fit, MCMC, VI +export CRRaoLink, Logit, Probit, Cloglog, Cauchit, fit, BayesianAlgorithm, MCMC, VI export coef, coeftable, r2, adjr2, loglikelihood, aic, bic, sigma, predict, residuals, cooksdistance, BPTest, pvalue export FrequentistRegression, BayesianRegression