Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(Draft) Support WLS #14

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions docs/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ repo_name: dbt_linreg
docs_dir: src
nav:
- Home: index.md

theme:
name: material
palette:
Expand Down Expand Up @@ -49,5 +50,3 @@ extra:
link: https://github.com/dwreeves/dbt_linreg
- icon: fontawesome/brands/linkedin
link: https://www.linkedin.com/in/daniel-reeves-27700545/
- icon: fontawesome/brands/twitter
link: https://twitter.com/mueblesfeos
Binary file removed docs/src/img/dbt-linreg-banner.png
Binary file not shown.
16 changes: 16 additions & 0 deletions integration_tests/models/collinear_matrix_weights_chol.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{{
config(
materialized="table",
enabled=False
)
}}
select * from {{
dbt_linreg.ols(
table=ref('collinear_matrix'),
endog='y',
exog=['x1', 'x2', 'x3', 'x4'],
format='long',
weights='abs(x5)',
method='chol'
)
}}
16 changes: 16 additions & 0 deletions integration_tests/models/collinear_matrix_weights_fwl.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{{
config(
materialized="table",
enabled=False
)
}}
select * from {{
dbt_linreg.ols(
table=ref('collinear_matrix'),
endog='y',
exog=['x1', 'x2', 'x3', 'x4'],
format='long',
weights='abs(x5)',
method='fwl'
)
}}
17 changes: 17 additions & 0 deletions integration_tests/models/collinear_matrix_weights_ridge_chol.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{{
config(
materialized="table",
enabled=False
)
}}
select * from {{
dbt_linreg.ols(
table=ref('collinear_matrix'),
endog='y',
exog=['x1', 'x2', 'x3', 'x4'],
format='long',
alpha=0.6,
weights='abs(x5)',
method='chol'
)
}}
17 changes: 17 additions & 0 deletions integration_tests/models/collinear_matrix_weights_ridge_fwl.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{{
config(
materialized="table",
enabled=False
)
}}
select * from {{
dbt_linreg.ols(
table=ref('collinear_matrix'),
endog='y',
exog=['x1', 'x2', 'x3', 'x4'],
format='long',
alpha=0.6,
weights='abs(x5)',
method='fwl'
)
}}
29 changes: 29 additions & 0 deletions integration_tests/tests/test_collinear_matrix_weights_chol.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{{ config(enabled=False) }}
with

expected as (

select 'const' as variable_name, 78.32986168327125 as coefficient, 2.2478951158027343 as standard_error, 34.84587031334835 as t_statistic
union all
select 'x1' as variable_name, 9.690057328206695 as coefficient, 0.5903103592025547 as standard_error, 16.41519105525594 as t_statistic
union all
select 'x2' as variable_name, 6.5995521027081505 as coefficient, 1.1251104763856294 as standard_error, 5.865692517510758 as t_statistic
union all
select 'x3' as variable_name, 19.439295801040092 as coefficient, 0.5784265337086496 as standard_error, 33.60719930395096 as t_statistic
union all
select 'x4' as variable_name, 3.786031479906997 as coefficient, 0.16143609506528953 as standard_error, 23.452199326153263 as t_statistic

)

select base.variable_name
from {{ ref('collinear_matrix_weights_chol') }} as base
full outer join expected
on base.variable_name = expected.variable_name
where
round(base.coefficient, 7) != round(expected.coefficient, 7)
or round(base.standard_error, 7) != round(expected.standard_error, 7)
or round(base.t_statistic, 7) != round(expected.t_statistic, 7)
or base.coefficient is null
or base.standard_error is null
or base.t_statistic is null
or expected.coefficient is null
29 changes: 29 additions & 0 deletions integration_tests/tests/test_collinear_matrix_weights_fwl.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{{ config(enabled=False) }}
with

expected as (

select 'const' as variable_name, 78.32986168327125 as coefficient, 2.2478951158027343 as standard_error, 34.84587031334835 as t_statistic
union all
select 'x1' as variable_name, 9.690057328206695 as coefficient, 0.5903103592025547 as standard_error, 16.41519105525594 as t_statistic
union all
select 'x2' as variable_name, 6.5995521027081505 as coefficient, 1.1251104763856294 as standard_error, 5.865692517510758 as t_statistic
union all
select 'x3' as variable_name, 19.439295801040092 as coefficient, 0.5784265337086496 as standard_error, 33.60719930395096 as t_statistic
union all
select 'x4' as variable_name, 3.786031479906997 as coefficient, 0.16143609506528953 as standard_error, 23.452199326153263 as t_statistic

)

select base.variable_name
from {{ ref('collinear_matrix_weights_fwl') }} as base
full outer join expected
on base.variable_name = expected.variable_name
where
round(base.coefficient, 7) != round(expected.coefficient, 7)
or round(base.standard_error, 7) != round(expected.standard_error, 7)
or round(base.t_statistic, 7) != round(expected.t_statistic, 7)
or base.coefficient is null
or base.standard_error is null
or base.t_statistic is null
or expected.coefficient is null
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{{ config(enabled=False) }}
/* Ridge regression coefficients do not match exactly.
Instead, a threshold of no more than 0.01% deviation is enforced. */
{% set THRESHOLD = 0.0001 %}
with

expected as (

select 'const' as variable_name, 93.43172535198633 as coefficient
union all
select 'x1' as variable_name, 5.301810664300932 as coefficient
union all
select 'x2' as variable_name, 8.3991554645256 as coefficient
union all
select 'x3' as variable_name, 17.327608839976932 as coefficient
union all
select 'x4' as variable_name, 4.577399536301482 as coefficient

)

select base.variable_name
from {{ ref('collinear_matrix_weights_ridge_chol') }} as base
full outer join expected
on base.variable_name = expected.variable_name
where
abs(log(abs(base.coefficient)) - log(abs(expected.coefficient))) > {{ THRESHOLD }}
or sign(base.coefficient) != sign(expected.coefficient)
or base.coefficient is null
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{{ config(enabled=False) }}
/* Ridge regression coefficients do not match exactly.
Instead, a threshold of no more than 0.01% deviation is enforced. */
{% set THRESHOLD = 0.0001 %}
with

expected as (

select 'const' as variable_name, 93.43172535198633 as coefficient
union all
select 'x1' as variable_name, 5.301810664300932 as coefficient
union all
select 'x2' as variable_name, 8.3991554645256 as coefficient
union all
select 'x3' as variable_name, 17.327608839976932 as coefficient
union all
select 'x4' as variable_name, 4.577399536301482 as coefficient

)

select base.variable_name
from {{ ref('collinear_matrix_weights_ridge_fwl') }} as base
full outer join expected
on base.variable_name = expected.variable_name
where
abs(log(abs(base.coefficient)) - log(abs(expected.coefficient))) > {{ THRESHOLD }}
or sign(base.coefficient) != sign(expected.coefficient)
or base.coefficient is null
3 changes: 3 additions & 0 deletions macros/linear_regression/ols.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
exog=None,
x=None,
y=None,
weights=None,
add_constant=True,
format='wide',
format_options=None,
Expand Down Expand Up @@ -142,6 +143,7 @@
table=table,
endog=endog,
exog=exog,
weights=weights,
add_constant=add_constant,
format=format,
format_options=format_options,
Expand All @@ -156,6 +158,7 @@
table=table,
endog=endog,
exog=exog,
weights=weights,
add_constant=add_constant,
format=format,
format_options=format_options,
Expand Down
1 change: 1 addition & 0 deletions macros/linear_regression/ols_impl_chol/_ols_impl_chol.sql
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@
{% macro _ols_chol(table,
endog,
exog,
weights=None,
add_constant=True,
format=None,
format_options=None,
Expand Down
3 changes: 3 additions & 0 deletions macros/linear_regression/ols_impl_fwl/_ols_impl_fwl.sql
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
{% macro _ols_fwl(table,
endog,
exog,
weights=None,
add_constant=True,
format=None,
format_options=None,
Expand All @@ -107,6 +108,7 @@
table=table,
endog=endog,
exog=exog,
weights=weights,
add_constant=add_constant,
format=format,
format_options=format_options,
Expand All @@ -118,6 +120,7 @@
table=table,
endog=endog,
exog=exog,
weights=weights,
add_constant=add_constant,
format=format,
format_options=format_options,
Expand Down
1 change: 1 addition & 0 deletions macros/linear_regression/ols_impl_special/_ols_0var.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{% macro _ols_0var(table,
endog,
exog,
weights=None,
add_constant=True,
format=None,
format_options=None,
Expand Down
1 change: 1 addition & 0 deletions macros/linear_regression/ols_impl_special/_ols_1var.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{% macro _ols_1var(table,
endog,
exog,
weights=None,
add_constant=True,
format=None,
format_options=None,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dbt-postgres = "^1.6.6"
pyyaml = "*"

[tool.ruff]
line-length = 100
line-length = 120

[tool.ruff.lint]
select = ["F", "E", "W", "I001"]
Expand Down
26 changes: 18 additions & 8 deletions scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def click_option_size(**kwargs):
)


@click.group("main")
@click.group("main", context_settings=dict(help_option_names=["-h", "--help"]))
def cli():
"""CLI for manually testing the code base."""

Expand All @@ -217,9 +217,14 @@ def cli():
type=click.FLOAT,
show_default=True,
help="Alpha for the regression.")
@click.option("--weights", "-w",
default=None,
type=click.STRING,
show_default=True,
help="Weight column for regression.")
@click_option_size()
@click_option_seed()
def regress(table: str, const: bool, columns: int, alpha: float, size: int, seed: int):
def regress(table: str, const: bool, columns: int, alpha: float, size: int, seed: int, weights: str):
"""
Run regression on integration test cases.

Expand Down Expand Up @@ -253,21 +258,26 @@ def _run_model(cond=None):
cond = slice(None)
y = test_case.df.loc[cond, test_case.y_col]
x_mat = test_case.df.loc[cond, x_cols]
kw = {}
if weights:
kw["weights"] = np.abs(test_case.df[weights])
if alpha:
if const:
alpha_arr = [0, *([alpha] * (len(x_mat.columns) - 1))]
else:
alpha_arr = [alpha] * len(x_mat.columns)
model = sm.OLS(
model = sm.WLS(
y,
x_mat
).fit_regularized(L1_wt=0, alpha=alpha_arr)
x_mat,
**kw,
).fit_regularized(L1_wt=0.00000001, alpha=alpha_arr)
else:
model = sm.OLS(y, x_mat).fit()
model = sm.WLS(y, x_mat, **kw).fit()
res_df = pd.DataFrame(index=x_mat.columns)
res_df["coef"] = model.params
res_df["stderr"] = model.bse
res_df["tstat"] = res_df["coef"] / res_df["stderr"]
if not alpha:
res_df["stderr"] = model.bse
res_df["tstat"] = res_df["coef"] / res_df["stderr"]
click.echo(
tabulate(
res_df,
Expand Down
Loading