-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimdb_movies.sqbpro
115 lines (101 loc) · 3.04 KB
/
imdb_movies.sqbpro
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
-- Let's take a look at the highest grossing genre
SELECT
Genre,
SUM(Gross) AS Total_Gross
FROM imdb_top_1000
GROUP BY Genre
ORDER BY Total_GROSS DESC
-- The query above does return a numerical descending list since it is a text column type. Let's replace empty cells with 0, remove the commas, and then change the column type.
UPDATE imdb_top_1000
SET Gross = ifnull(Gross,0);
-- Saw that blanks cells didn't populate with zeros. This means we have to use NULLIF and IFNULL functions to convert empty strings to 0
UPDATE imdb_top_1000
SET Gross = IFNULL(NULLIF(Gross, ''),0);
-- Remove the commas
UPDATE imdb_top_1000
SET Gross = REPLACE(Gross, ',','');
-- Let's take a look.
Select Gross
FROM imdb_top_1000-- As you can see, the commas are gone and the blanks are filled with 0. We can perform these two actions in one query. For the sake of the exercsie, we had to break it down into two.
-- Now we can change the column type with a few clicks. Click on Database Structure -> Click on the table -> Edit -> Modify Table -> Scroll down to Gross and change text field to integer field
-- Let's take a look at the highest grossing genre again
SELECT
Genre,
SUM(Gross) AS Total_Gross
FROM imdb_top_1000
GROUP BY Genre
ORDER BY Total_GROSS DESC
-- We are going to perform more analysis now.
-- Highest Grossing Movies of all time with their ratings and meta score
SELECT
Series_Title,
Gross,
IMDB_Rating,
Meta_score
FROM imdb_top_1000
ORDER by Gross DESC
Limit 10;
-- Which actors has a high average rating
SELECT
Star1,
ROUND(AVG(IMDB_Rating),1) AS avg_rating
FROM imdb_top_1000GROUP BY Star1
ORDER BY avg_rating DESC
-- Count of Genres
SELECT
Genre,
COUNT(*) as genre_count
FROM imdb_top_1000
GROUP BY Genre
ORDER by genre_count DESC;
-- Director's gross
SELECT
Director,
COUNT(*) AS Movie_Count,
SUM(Gross) AS Total_Gross,
MAX(GROSS) AS max_Gross,
ROUND(AVG(Gross),2) AS avg_Gross
FROM imdb_top_1000
GROUP BY Director
ORDER BY Total_Gross DESC;
-- Movie count of Star1
SELECT
Star1,
COUNT(*) AS MovieCount
FROM imdb_top_1000
WHERE Star1 IS NOT NULL
GROUP BY Star1
ORDER BY MovieCount DESC;
-- Which actor prefer which Genre more?
SELECT Star1, Genre, COUNT(*) AS MovieCount
FROM imdb_top_1000
WHERE Star1 IS NOT NULL
AND Genre IS NOT NULL
GROUP BY Star1
ORDER BY MovieCount DESC;
-- Which combination of actors are getting good IMDB_Rating maximum time?
SELECT Star1, Star2, Star3, Star4, AVG(IMDB_Rating) AS AvgRating, COUNT(*) AS MovieCount
FROM imdb_top_1000
WHERE Star1 IS NOT NULL
AND Star2 IS NOT NULL
AND Star3 IS NOT NULL
AND Star4 IS NOT NULL
GROUP BY Star1, Star2, Star3, Star4
HAVING COUNT(*) >= 1
ORDER BY AvgRating DESC, MovieCount DESC;
-- Which combination of actors are getting good gross?
SELECT
Star1,
Star2,
Star3,
Star4,
SUM(Gross) AS Total_Gross
FROM imdb_top_1000
WHERE Star1 IS NOT NULL
AND Star2 IS NOT NULL
AND Star3 IS NOT NULL
AND Star4 IS NOT NULL
GROUP BY Star1, Star2, Star3, Star4
ORDER BY Total_Gross DESC;