-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmovie.py
109 lines (87 loc) · 3.81 KB
/
movie.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import pandas as pd
import streamlit as st
import openpyxl
st.set_page_config(page_title='Find your movie')
logo = "logo.png"
st.image(logo, width=150, caption="", use_column_width=False)
st.header('Helping you focus more on the chill then the Netflix')
st.subheader('Please select your criterias')
# Define the year ranges
year_ranges = {
'1901 to 1990': (1901, 1990),
'1990 to 2000': (1990, 2000),
'2000 to 2010': (2000, 2010),
'2010 to 2023': (2010, 2023)
}
# Multiselect for type
type_options = ['MOVIE','SHOW']
type_selection = st.multiselect('Type of content:',
type_options)
#Slidebar for runtime
runtime_selection = st.slider('Runtime in minutes:',
min_value= 15,
max_value= 200,
value=(15,200))
#Slidebar for imdb score
imdb_score_selection = st.slider('IMDB score:',
min_value= 1.0,
max_value= 10.0,
value=(1.0,10.0),
step=0.1)
# Multiselect for genres
genres_options = ['drama', 'comedy', 'thriller', 'action', 'documentation','romance', 'family','animation','scifi','fantasy','horror','european','music','reality','sport','war','western']
genres_selection = st.multiselect('Movie Genres:',
genres_options)
# Allow the user to select a year range
year_selection = st.multiselect('Release Year Range', list(year_ranges.keys()))
# Multiselect for platforms
platforms_options = ['netflix','hbomax','amazonprime','paramount','appletv','disney']
platforms_selection = st.multiselect('Platforms:',
platforms_options)
df = pd.read_excel('titles_expanded_platforms.xlsx',
'Titles',
usecols='A:AP')
# --- FILTER DATAFRAME BASED ON SELECTION
genre_masks = []
for genre in genres_selection:
genre_masks.append((df['genre_1'] == genre) | (df['genre_2'] == genre) | (df['genre_3'] == genre) |
(df['genre_4'] == genre) | (df['genre_5'] == genre) | (df['genre_6'] == genre) |
(df['genre_7'] == genre) | (df['genre_8'] == genre) | (df['genre_9'] == genre) |
(df['genre_10'] == genre) | (df['genre_11'] == genre) | (df['genre_12'] == genre))
if genre_masks:
mask = genre_masks[0]
for genre_mask in genre_masks[1:]:
mask |= genre_mask
mask &= (df['runtime'].between(*runtime_selection)) & (df['imdb_score'].between(*imdb_score_selection))
else:
mask = (df['runtime'].between(*runtime_selection)) & (df['imdb_score'].between(*imdb_score_selection))
# --- FILTER DATAFRAME BASED ON SELECTED TYPE
if 'MOVIE' in type_selection:
mask &= (df['type'] == 'MOVIE')
if 'SHOW' in type_selection:
mask &= (df['type'] == 'SHOW')
# --- FILTER DATAFRAME BASED ON SELECTED SELECTED YEAR RANGE
if '1901 to 1990' in year_selection:
mask &= (df['release_year'].between(1901,1990))
if '1990 to 2000' in year_selection:
mask &= (df['release_year'].between(1990,2000))
if '2000 to 2010' in year_selection:
mask &= (df['release_year'].between(2000,2010))
if '2010 to 2023' in year_selection:
mask &= (df['release_year'].between(2010,2023))
# --- FILTER DATAFRAME BASED ON SELECTED PLATFORMS
platform_masks = []
for platform in platforms_selection:
if platform in platforms_options:
platform_masks.append(df[platform] == 1)
if platform_masks:
platform_mask = platform_masks[0]
for mask in platform_masks[1:]:
platform_mask |= mask
mask &= platform_mask
number_of_result = df[mask].shape[0]
## --- GROUP DATAFRAME AFTER SELECTION
st.subheader('Our selection !')
df_grouped = df[mask].groupby(by=['title','type','platforms']).count().reset_index()
df_grouped.head(15).reset_index()[['title','type','platforms']]
number_of_result