-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplots.py
89 lines (76 loc) · 2.78 KB
/
plots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
"""Create desired plots."""
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
def scatter_plots(
variable_dict: dict,
y_name: str,
folder_name: str,
variables_df: pd.DataFrame,
):
"""Create scatter plots of outcome on continuous/discrete predictors."""
for x_name in variable_dict:
if variable_dict[x_name].get_x_or_y == "x" and (
variable_dict[x_name].get_type == "Continuous"
or variable_dict[x_name].get_type == "Discrete"
):
new_df = variables_df[[y_name, x_name]]
new_df.dropna()
plt.scatter(new_df[x_name], new_df[y_name], color="black")
plt.title(f"{y_name} vs {x_name}")
plt.xlabel(f"{x_name}")
plt.ylabel(f"{y_name}")
plt.savefig(folder_name + f"/{x_name}_scatter.png", dpi=300)
def cor_mtx(folder_name: str, variables_df: pd.DataFrame):
"""Plot correlation matrix."""
corr = variables_df.corr(
method="pearson",
min_periods=1,
).round(2)
sns.heatmap(corr, annot=True)
plt.title("Correlation Matrix")
plt.savefig(folder_name + "/correlation_matrix.png", dpi=300)
def boxplots(
variable_dict: dict,
y_name: str,
folder_name: str,
variables_df: pd.DataFrame,
):
"""Create boxplots of outcome on categorical/binary predictors."""
for x_name in variable_dict:
if variable_dict[x_name].get_x_or_y == "x" and (
variable_dict[x_name].get_type == "Categorical"
or variable_dict[x_name].get_type == "Binary"
):
new_df = variables_df[[y_name, x_name]]
new_df.dropna()
plt.clf()
sns.boxplot(x=x_name, y=y_name, data=variables_df)
plt.title(f"{y_name} by {x_name}")
plt.savefig(folder_name + f"/{x_name}_boxplot.png", dpi=300)
def hist_plot(
variable_dict: dict, folder_name: str, variables_df: pd.DataFrame
):
"""Create histogram plots of variables."""
for var_name in variable_dict:
if (
variable_dict[var_name].get_type == "Continuous"
or variable_dict[var_name].get_type == "Discrete"
):
new_df = variables_df[var_name]
plt.clf()
sns.displot(new_df, kde=True)
plt.xlabel(f"{var_name}")
plt.ylabel("Frequency")
plt.savefig(folder_name + f"/{var_name}_histogram.png", dpi=300)
def pair_plot(
variable_dict: dict, folder_name: str, variables_df: pd.DataFrame
):
"""Plot pairplot."""
cont_vars = []
for var_name in variable_dict:
if variable_dict[var_name].get_type == "Continuous":
cont_vars.append(var_name)
new_df = variables_df[cont_vars]
sns.pairplot(new_df)
plt.savefig(folder_name + "/pairplot.png", dpi=300)