-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrating_analysis.py
59 lines (42 loc) · 2.15 KB
/
rating_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
with open("Hospital General Information.csv") as csv_file:
input_file = pd.read_csv(csv_file)
for col in input_file.columns:
print(col)
np.random.seed(19680801)
# fake up some data
spread = np.random.rand(50) * 100
center = np.ones(25) * 50
flier_high = np.random.rand(10) * 100 + 100
flier_low = np.random.rand(10) * -100
data = np.concatenate((spread, center, flier_high, flier_low))
print(data.dtype)
input_file['Hospital overall rating'] = pd.to_numeric(input_file['Hospital overall rating'],
errors='coerce')
input_file=input_file.dropna(subset=['Hospital overall rating', 'Mortality national comparison', 'Readmission national comparison'])
print(input_file['Mortality national comparison'].head())
below_df = input_file.loc[input_file['Mortality national comparison'].str.contains('Below the national average')]
same_df = input_file.loc[input_file['Mortality national comparison'].str.contains('Same as the national average')]
above_df = input_file.loc[input_file['Mortality national comparison'].str.contains('Above the national average')]
data=[below_df['Hospital overall rating'], same_df['Hospital overall rating'], above_df['Hospital overall rating']]
fig, ax = plt.subplots()
ax.boxplot(data)
ax.set(xlabel='Comparison to National Average', ylabel='Rating',
title='Mortality National Comparison')
ax.grid()
fig.savefig("Mortality.png")
plt.show()
below_df = input_file.loc[input_file['Readmission national comparison'].str.contains('Below the national average')]
same_df = input_file.loc[input_file['Readmission national comparison'].str.contains('Same as the national average')]
above_df = input_file.loc[input_file['Readmission national comparison'].str.contains('Above the national average')]
data=[below_df['Hospital overall rating'], same_df['Hospital overall rating'], above_df['Hospital overall rating']]
fig, ax = plt.subplots()
ax.boxplot(data)
ax.set(xlabel='Comparison to National Average', ylabel='Rating',
title='Readmission National Comparison')
ax.grid()
fig.savefig("Readmission.png")
plt.show()