-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrapingNightlife.py
95 lines (71 loc) · 3.5 KB
/
scrapingNightlife.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import requests
import pandas as pd
import datetime
from bs4 import BeautifulSoup
from sqlalchemy import create_engine
# 1 - Fucking Monday
# Load the HTML content from the uploaded file
file_path = "Clubs_Scraping/FUCKING MONDAY.html"
with open(file_path, 'r', encoding='utf-8') as file:
html_content = file.read()
# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content, 'lxml')
# Find the event details container
event_details = soup.find('div', class_='event-details')
# Extract the title, date and time, and music types
title = event_details.find('h1', itemprop='name').get_text(strip=True)
date_time = event_details.find('h6', itemprop='date').get_text(strip=True)
music_types = [li.get_text(strip=True) for li in event_details.find('div', class_='tags tags-left').find_all('li')]
print((title, date_time, music_types))
# 2 - Teatro Kapital
file_path2 = "Clubs_Scraping/TEATROKAPITAL.html"
with open(file_path2, 'r', encoding='utf-8') as file:
html_content2 = file.read()
# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content2, 'lxml')
# Find the event details container
event_details = soup.find('div', class_='event-details')
# Extract the title, date and time, and music types
title = event_details.find('h1', itemprop='name').get_text(strip=True)
date_time = event_details.find('h6', itemprop='date').get_text(strip=True)
music_types = [li.get_text(strip=True) for li in event_details.find('div', class_='tags tags-left').find_all('li')]
print((title, date_time, music_types))
# 3 - Icon
file_path3 = "Clubs_Scraping/ICON.html"
with open(file_path3, 'r', encoding='utf-8') as file:
html_content3 = file.read()
# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content3, 'lxml')
# Find the event details container
event_details = soup.find('div', class_='event-details')
# Extract the title, date and time, and music types
title = event_details.find('h1', itemprop='name').get_text(strip=True)
date_time = event_details.find('h6', itemprop='date').get_text(strip=True)
music_types = [li.get_text(strip=True) for li in event_details.find('div', class_='tags tags-left').find_all('li')]
print((title, date_time, music_types))
# 4 - Toy Room
file_path4 = "Clubs_Scraping/toyroom.html"
with open(file_path4, 'r', encoding='utf-8') as file:
html_content4 = file.read()
# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content4, 'lxml')
# Find the event details container
event_details = soup.find('div', class_='event-details')
# Extract the title, date and time, and music types
title = event_details.find('h1', itemprop='name').get_text(strip=True)
date_time = event_details.find('h6', itemprop='date').get_text(strip=True)
music_types = [li.get_text(strip=True) for li in event_details.find('div', class_='tags tags-left').find_all('li')]
print((title, date_time, music_types))
# 4 - Commo
file_path5 = "Clubs_Scraping/commo.html"
with open(file_path5, 'r', encoding='utf-8') as file:
html_content5 = file.read()
# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content5, 'lxml')
# Find the event details container
event_details = soup.find('div', class_='event-details')
# Extract the title, date and time, and music types
title = event_details.find('h1', itemprop='name').get_text(strip=True)
date_time = event_details.find('h6', itemprop='date').get_text(strip=True)
music_types = [li.get_text(strip=True) for li in event_details.find('div', class_='tags tags-left').find_all('li')]
print((title, date_time, music_types))