-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhr_data.sql
219 lines (189 loc) · 4.83 KB
/
hr_data.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
CREATE DATABASE hr;
USE hr;
SELECT *
FROM hr_data;
SELECT termdate
FROM hr_data
ORDER BY termdate DESC;
-- fix termdate format
-- format the UTC datetime values
-- update date/time to date format (yyyy-MM-dd)
UPDATE hr_data
SET termdate = FORMAT(CONVERT(DATETIME, LEFT(termdate,19), 120), 'yyyy-MM-dd');
-- create new column named new_termdate
-- update from nvachar to date
ALTER TABLE hr_data
ADD new_termdate DATE;
-- copy converted timevalues from termdate to new_termdate
UPDATE hr_data
SET new_termdate = CASE
WHEN termdate IS NOT NULL AND ISDATE(termdate) = 1
THEN CAST(termdate AS DATETIME)
ELSE NULL
END;
-- create new column age
ALTER TABLE hr_data
ADD age nvarchar(50);
-- populate new colum with age
UPDATE hr_data
SET age = DATEDIFF(YEAR, birthdate, GETDATE());
SELECT age
FROM hr_data;
-- QUESTIONS
-- 1. What is the age distribution of employees in the company?
-- age distribution
SELECT
MIN(age) AS youngest,
MAX(age) AS oldest
FROM hr_data;
-- age group distribution
SELECT
age_group,
COUNT(*) AS count
FROM (
SELECT
CASE
WHEN age >= 21 AND age <= 30 THEN '21 to 30'
WHEN age >= 31 AND age <= 40 THEN '31 to 40'
WHEN age >= 41 AND age <= 50 THEN '41-50'
ELSE '50+'
END AS age_group
FROM hr_data
WHERE new_termdate IS NULL
) AS Subquery
GROUP BY age_group
ORDER BY age_group ASC;
-- age group by gender
SELECT
age_group, gender,
COUNT(*) AS count
FROM (
SELECT
CASE
WHEN age >= 21 AND age <= 30 THEN '21 to 30'
WHEN age >= 31 AND age <= 40 THEN '31 to 40'
WHEN age >= 41 AND age <= 50 THEN '41-50'
ELSE '50+'
END AS age_group, gender
FROM hr_data
WHERE new_termdate IS NULL
) AS Subquery
GROUP BY age_group, gender
ORDER BY age_group, gender ASC;
-- 2. What is the gender breakdown of employees in the company?
SELECT gender,
COUNT(gender) AS count
FROM hr_data
WHERE new_termdate IS NULL
GROUP BY gender
ORDER BY gender ASC;
-- 3. How does the gender distribution vary across departments and job titles?
--gender department
SELECT department, gender,
COUNT(gender) AS count
FROM hr_data
WHERE new_termdate IS NULL
GROUP BY department, gender
ORDER BY department, gender ASC;
--job title
SELECT department, jobtitle, gender,
COUNT(gender) AS count
FROM hr_data
WHERE new_termdate IS NULL
GROUP BY department, jobtitle, gender
ORDER BY department, jobtitle, gender ASC;
-- 4. What is the race/ethnicity breakdown of employees in the company?
SELECT race,
COUNT(*) AS count
FROM hr_data
WHERE new_termdate IS NULL
GROUP BY race
ORDER BY count DESC;
-- 5. What is the average length of employment for employees who have been terminated?
SELECT
AVG(DATEDIFF(year, hire_date, new_termdate)) AS tenure
FROM hr_data
WHERE new_termdate IS NOT NULL AND new_termdate <= GETDATE();
-- 6. Which department has the highest turnover rate?
-- get total count
-- get terminated count
-- terminated count/total count
SELECT
department,
total_count,
terminated_count,
(round(CAST(terminated_count AS FLOAT)/total_count, 2)*100) AS turnover_rate
FROM
(SELECT
department,
count(*) AS total_count,
SUM(CASE
WHEN new_termdate IS NOT NULL AND new_termdate <= getdate()
THEN 1 ELSE 0
END
) AS terminated_count
FROM hr_data
GROUP BY department
) AS Subquery
ORDER BY turnover_rate DESC;
-- 7. What is the tenure distribution for each department?
SELECT
AVG(DATEDIFF(year, hire_date, new_termdate)) AS tenure
FROM hr_data
WHERE new_termdate IS NOT NULL AND new_termdate <= GETDATE()
GROUP BY department
ORDER BY tenure DESC;
-- 8. How many employees work at headquarters versus remote locations?
SELECT
location,
count(*) AS count
FROM hr_data
WHERE new_termdate IS NULL
GROUP BY location;
-- 9. What is the distribution of employees across locations by city and state?
--city
SELECT
location_city,
count(*) AS count
FROM hr_data
WHERE new_termdate IS NULL
GROUP BY location_city
ORDER BY count DESC;
--state
SELECT
location_state,
count(*) AS count
FROM hr_data
WHERE new_termdate IS NULL
GROUP BY location_state
ORDER BY count DESC;
-- 10. How is the distribution of job titles across the company?
SELECT
jobtitle,
count(*) AS count
FROM hr_data
WHERE new_termdate IS NULL
GROUP BY jobtitle
ORDER BY count DESC;
-- 11. How has the company's employee count changed over time based on hire and term dates?
-- calculate hires
-- calculate terminations
-- ((hires-terminations)/hires)*100 percent hire change
SELECT
hire_year,
hires,
terminations,
hires - terminations AS net_change,
(round(CAST(hires-terminations AS FLOAT)/hires, 2)) * 100 AS percent_hire_change
FROM
(SELECT
YEAR(hire_date) AS hire_year,
count(*) AS hires,
SUM(CASE
WHEN new_termdate is not null and new_termdate <= GETDATE() THEN 1 ELSE 0
END
) AS terminations
FROM hr_data
GROUP BY YEAR(hire_date)
) AS subquery
ORDER BY percent_hire_change ASC, hire_year ASC;