-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunivariate_analysis.py
48 lines (38 loc) · 1.38 KB
/
univariate_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# univariate analysis in factory design pattern
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
class UnivariateAnalysis:
def __init__(self, data):
self.data = data
def analyze(self, column):
if self.data[column].dtype == 'object':
self.analyze_categorical(column)
else:
self.analyze_numerical(column)
def analyze_categorical(self, column):
print(f"Categorical Analysis for {column}")
print(self.data[column].describe())
print(self.data[column].value_counts())
sns.countplot(x=column, data=self.data)
plt.title(f"Count Plot for {column}")
plt.show()
def analyze_numerical(self, column):
print(f"Numerical Analysis for {column}")
print(self.data[column].describe())
sns.histplot(self.data[column], kde=True)
plt.title(f"Histogram for {column}")
plt.show()
class UnivariateAnalysisFactory:
@staticmethod
def create_analysis(data):
return UnivariateAnalysis(data)
# Instructions to use it
# Load your dataset
data = pd.read_csv("your_dataset.csv")
# Create a univariate analysis object using the factory
analysis = UnivariateAnalysisFactory.create_analysis(data)
# Analyze specific columns
analysis.analyze("column1")
analysis.analyze("column2")