diff --git a/plugin.audio.tripler/LICENSE.txt b/plugin.audio.tripler/LICENSE.txt index e14c37141c..2ec82aaa2e 100644 --- a/plugin.audio.tripler/LICENSE.txt +++ b/plugin.audio.tripler/LICENSE.txt @@ -1,3 +1,5 @@ +Copyright (c) 2023 Simon Mollema + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -14,4 +16,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file +THE SOFTWARE. diff --git a/plugin.audio.tripler/README.md b/plugin.audio.tripler/README.md new file mode 100644 index 0000000000..0d083ab9d8 --- /dev/null +++ b/plugin.audio.tripler/README.md @@ -0,0 +1,88 @@ +# plugin.audio.tripler + +A Kodi plugin for Triple R: an independent community radio station in Melbourne, Australia. + +![Triple R Logo](resources/icon.png) + +----- + +## Frequently Asked Questions + +### What is Triple R? + +For more than 40 years Triple R has shaped and inspired the culture of Melbourne. Since its inception as an educational broadcaster in 1976, Triple R has become Australia's most influential community radio station with nearly 21,000 paid subscribers and broadcasting live to over 1,000,000 listeners per month across FM and digital (DAB+ digital radio, podcasts and online). + +Broadcasting on 102.7FM and 3RRR Digital, the Triple R grid houses over 60 diverse programs. Music shows cover every genre imaginable from pop to punk rock, from R&B and electro to jazz, hip hop, country and metal. Specialist talks programs delve into topics as varied as the environment, human rights, politics, medical issues, gardening, cultural ventures and local interests. + +### What does this plugin do? + +This plugin aims to provide as much content as possible from the [Triple R Website, rrr.org.au,](https://www.rrr.org.au) in the Kodi media player. Currently provided are the following: + +- Listen Live! +- Browse by Program +- Browse by Date +- Latest Segments +- Audio Archives +- Album Of The Week +- Soundscapes +- Events +- Subscriber Giveaways (for logged in users only!) + +----- + +## Installation + +If you don't already use [Kodi](https://kodi.tv/), download and install that first. + +### Release Version + +If you would like to use the release version with automatic updates, please install [the published release in the Kodi repository](https://kodi.tv/addons/matrix/plugin.audio.tripler/) with the following steps: + +- Open Kodi. +- Navigate to Add-ons > Install from Repository. +- Navigate to Kodi Add-on repository > Music add-ons > Triple R. +- Select "Install". + +### Latest Development Version + +If you would instead like to use the latest development version, [download the zip file](https://github.com/molzy/plugin.audio.tripler/archive/refs/heads/scraper.zip) to the same computer that is running Kodi. Afterwareds, follow these steps: + +- Open Kodi. +- From the main menu, navigate to Settings > System > Add-ons. +- Enable "Unknown Sources". +- Go back to the main menu, and navigate to Add-ons > Install from Zip File. +- If you are prompted to confirm, select "Yes" on the prompt. +- Navigate to the downloaded zip file on your filesystem. +- Select "OK". + +----- + +## Screenshots + +### Menu +![Plugin Menu](resources/screenshots/menu.jpg) + +### Browse By Program +![Browse By Program](resources/screenshots/browse-by-program.jpg) + +### Broadcast + Playlist +![Broadcast + Playlist](resources/screenshots/broadcast-playlist.jpg) + +### Album Of The Week +![Album Of The Week](resources/screenshots/album-of-the-week.jpg) + +### Browse By Date +![Browse By Date](resources/screenshots/browse-by-date.jpg) + +### Soundscape +![Soundscape](resources/screenshots/soundscape.jpg) + +----- + +## License + +This plugin was initially forked from a Triple R plugin written by [Damon Toumbourou](https://github.com/DamonToumbourou/plugin.audio.tripler). + +The plugin was then fully rewritten by Simon Mollema. + +The plugin is released under the terms of [the MIT license](LICENSE.txt). diff --git a/plugin.audio.tripler/addon.py b/plugin.audio.tripler/addon.py index e5257416d3..cd5ba36cfe 100644 --- a/plugin.audio.tripler/addon.py +++ b/plugin.audio.tripler/addon.py @@ -1,4 +1,4 @@ import resources.lib.tripler as tripler if __name__ == '__main__': - tripler.instance.run() + result = tripler.instance.parse() diff --git a/plugin.audio.tripler/addon.xml b/plugin.audio.tripler/addon.xml index 24a4fb356a..02d8e2a1ce 100644 --- a/plugin.audio.tripler/addon.xml +++ b/plugin.audio.tripler/addon.xml @@ -1,9 +1,13 @@ - + - - - + + + + + + + audio @@ -11,20 +15,30 @@ all en - Listen to podcasts avaliable from the Triple R: Independent Melbourne Radio station. + Listen to Triple R: an independent community radio station in Melbourne, Australia. For more than 40 years Triple R has shaped and inspired the culture of Melbourne. Since its inception as an educational broadcaster in 1976, Triple R has become Australia's most influential community radio station with nearly 21,000 paid subscribers and broadcasting live to over 1,000,000 listeners per month across FM and digital (DAB+ digital radio, podcasts and online). Broadcasting on 102.7FM and 3RRR Digital, the Triple R grid houses over 60 diverse programs. Music shows cover every genre imaginable from pop to punk rock, from R&B and electro to jazz, hip hop, country and metal. Specialist talks programs delve into topics as varied as the environment, human rights, politics, medical issues, gardening, cultural ventures and local interests. MIT xbmc@molzy.com https://github.com/molzy/plugin.audio.tripler - Version 2.0.0 -- Rewritten for Matrix / Python 3. -- Now scraping media information directly from new Triple R website. - -Version 1.0.1 -- Initial working version + Version 3.0.0 +- Rewritten with more flexible parser +- More content added from website +- Improved user interface and menus +- Browsing programs by name and date now possible +- Broadcast track playlists are available +- Support for playing music through Bandcamp, YouTube and indigiTUBE +- Support for searching for music content by title and artist +- Subscriber-only giveaways can now be entered by signing in + resources/icon.png resources/fanart.png + resources/screenshots/menu.jpg + resources/screenshots/browse-by-program.jpg + resources/screenshots/browse-by-date.jpg + resources/screenshots/broadcast-playlist.jpg + resources/screenshots/album-of-the-week.jpg + resources/screenshots/soundscape.jpg diff --git a/plugin.audio.tripler/resources/language/resource.language.en_gb/strings.po b/plugin.audio.tripler/resources/language/resource.language.en_gb/strings.po index 7e3796d36a..9e760fcf82 100644 --- a/plugin.audio.tripler/resources/language/resource.language.en_gb/strings.po +++ b/plugin.audio.tripler/resources/language/resource.language.en_gb/strings.po @@ -25,6 +25,10 @@ msgctxt "Addon Description" msgid "For more than 40 years Triple R has shaped and inspired the culture of Melbourne. Since its inception as an educational broadcaster in 1976, Triple R has become Australia's most influential community radio station with nearly 21,000 paid subscribers and broadcasting live to over 1,000,000 listeners per month across FM and digital (DAB+ digital radio, podcasts and online). Broadcasting on 102.7FM and 3RRR Digital, the Triple R grid houses over 60 diverse programs. Music shows cover every genre imaginable from pop to punk rock, from R&B and electro to jazz, hip hop, country and metal. Specialist talks programs delve into topics as varied as the environment, human rights, politics, medical issues, gardening, cultural ventures and local interests." msgstr "" +msgctxt "#30999" +msgid "" +msgstr "" + #Plugin name msgctxt "#30000" @@ -32,23 +36,15 @@ msgid "Triple R" msgstr "" msgctxt "#30001" -msgid "Triple R - Live Stream" -msgstr "" - -msgctxt "#30002" -msgid "On Demand - Segments" -msgstr "" - -msgctxt "#30003" -msgid "On Demand - Episodes" +msgid "Listen Live!" msgstr "" msgctxt "#30004" -msgid "On Demand - Audio Archives" +msgid "> Next Page" msgstr "" msgctxt "#30005" -msgid "> Next Page" +msgid ">> Last Page" msgstr "" msgctxt "#30006" @@ -56,5 +52,297 @@ msgid "aired %s" msgstr "" msgctxt "#30007" -msgid "Aired on %s." +msgid "Dated %s, Melbourne time." +msgstr "" + +msgctxt "#30008" +msgid "Play with %s" +msgstr "" + +msgctxt "#30009" +msgid "Settings" +msgstr "" + +# Settings - Account + +msgctxt "#30010" +msgid "Subscriber Account" +msgstr "" + +msgctxt "#30011" +msgid "Use Account" +msgstr "" + +msgctxt "#30012" +msgid "Email Address" +msgstr "" + +msgctxt "#30013" +msgid "Sign In" +msgstr "" + +msgctxt "#30014" +msgid "Sign Out" +msgstr "" + +msgctxt "#30015" +msgid "Enter Email Address" +msgstr "" + +msgctxt "#30016" +msgid "Enter Password" +msgstr "" + +msgctxt "#30017" +msgid "Full Name" +msgstr "" + +msgctxt "#30020" +msgid "Sign in to your Triple R account using your email address and password" +msgstr "" + +msgctxt "#30021" +msgid "Sign out of your Triple R account" +msgstr "" + +# Settings - Image Quality + +msgctxt "#30022" +msgid "Image Quality" +msgstr "" + +msgctxt "#30023" +msgid "Set the desired quality level for album art and band images" +msgstr "" + +msgctxt "#30024" +msgid "High" +msgstr "" + +msgctxt "#30025" +msgid "Medium" +msgstr "" + +msgctxt "#30026" +msgid "Low" +msgstr "" + +# Main Menu + +msgctxt "#30032" +msgid "Browse by Program" +msgstr "" + +msgctxt "#30033" +msgid "Browse by Date" +msgstr "" + +msgctxt "#30034" +msgid "Latest Programs" +msgstr "" + +msgctxt "#30035" +msgid "Latest Segments" +msgstr "" + +msgctxt "#30036" +msgid "Audio Archives" +msgstr "" + +msgctxt "#30037" +msgid "Album Of The Week" +msgstr "" + +msgctxt "#30038" +msgid "Soundscapes" +msgstr "" + +msgctxt "#30039" +msgid "Events" +msgstr "" + +msgctxt "#30040" +msgid "Subscriber Giveaways" +msgstr "" + +msgctxt "#30041" +msgid "Search" +msgstr "" + +msgctxt "#30042" +msgid "Videos" +msgstr "" + +# Broadcasts + +msgctxt "#30049" +msgid "Broadcast" +msgstr "" + +msgctxt "#30050" +msgid "Full Broadcast" +msgstr "" + +msgctxt "#30051" +msgid "Segment" +msgstr "" + +msgctxt "#30052" +msgid "Track Search" +msgstr "" + +# Date Selection + +msgctxt "#30059" +msgid "Next Day (%s)" +msgstr "" + +msgctxt "#30060" +msgid "Today (%s)" +msgstr "" + +msgctxt "#30061" +msgid "Previous Day (%s)" +msgstr "" + +msgctxt "#30062" +msgid "%s (%s)" +msgstr "" + +msgctxt "#30063" +msgid "This Month (%s)" +msgstr "" + +msgctxt "#30064" +msgid "This Year (%s)" +msgstr "" + +msgctxt "#30065" +msgid "Select Date (%s)" +msgstr "" + +# Searching + +msgctxt "#30066" +msgid "Search playlists instead" +msgstr "" + +msgctxt "#30067" +msgid "Enter Search Query" +msgstr "" + +msgctxt "#30068" +msgid "Enter Playlist Search Query" +msgstr "" + +# Subscription + +msgctxt "#30069" +msgid "Enter Giveaway" +msgstr "" + +msgctxt "#30070" +msgid "Select to enter this giveaway." +msgstr "" + +msgctxt "#30071" +msgid "Giveaway Entered" +msgstr "" + +msgctxt "#30072" +msgid "Thanks for being a Triple R subscriber!" +msgstr "" + +msgctxt "#30073" +msgid "Cannot Enter Giveaway" +msgstr "" + +msgctxt "#30074" +msgid "Already entered!" +msgstr "" + +msgctxt "#30075" +msgid "Is your subscription active?" +msgstr "" + +msgctxt "#30076" +msgid "Subscribe and sign in to enter!" +msgstr "" + +msgctxt "#30077" +msgid "%s signed in" +msgstr "" + +msgctxt "#30078" +msgid "Thanks for subscribing!" +msgstr "" + +msgctxt "#30079" +msgid "%s signed out" +msgstr "" + +msgctxt "#30081" +msgid "Subscribe To Listen" +msgstr "" + +msgctxt "#30082" +msgid "https://www.rrr.org.au/subscribe - Subscribe and sign in to enter the below giveaways!" +msgstr "" + +msgctxt "#30083" +msgid "https://www.rrr.org.au/subscribe - Subscribe and sign in to listen to all audio archives!" +msgstr "" + +msgctxt "#30084" +msgid "Subscribers Only" +msgstr "" + +msgctxt "#30085" +msgid "Sign in failure" +msgstr "" + +msgctxt "#30086" +msgid "%s could not sign in" +msgstr "" + +msgctxt "#30087" +msgid "Sign out failure" +msgstr "" + +msgctxt "#30088" +msgid "%s could not sign out" +msgstr "" + +# Context Menus + +msgctxt "#30100" +msgid "%s using the context menu." +msgstr "" + +msgctxt "#30101" +msgid "View playlist" +msgstr "" + +msgctxt "#30102" +msgid "Search for this track" +msgstr "" + +msgctxt "#30103" +msgid "Search for this artist" +msgstr "" + +msgctxt "#30104" +msgid "Search for '%s'" +msgstr "" + +msgctxt "#30105" +msgid "Search for '%s' on Triple R" +msgstr "" + +msgctxt "#30106" +msgid "Search for '%s' on Bandcamp" +msgstr "" + +msgctxt "#30107" +msgid "Search for '%s' on YouTube" msgstr "" diff --git a/plugin.audio.tripler/resources/lib/media.py b/plugin.audio.tripler/resources/lib/media.py new file mode 100644 index 0000000000..d066e9e3d0 --- /dev/null +++ b/plugin.audio.tripler/resources/lib/media.py @@ -0,0 +1,152 @@ +import re + +class Media: + RE_BANDCAMP_ALBUM_ID = re.compile(r'https://bandcamp.com/EmbeddedPlayer/.*album=(?P[^/]+)') + RE_BANDCAMP_ALBUM_ART = re.compile(r'"art_id":(\w+)') + BANDCAMP_ALBUM_PLUGIN_BASE_URL = 'plugin://plugin.audio.kxmxpxtx.bandcamp/?mode=list_songs' + BANDCAMP_ALBUM_PLUGIN_FORMAT = '{}&album_id={}&item_type=a' + BANDCAMP_ALBUM_ART_URL = 'https://bandcamp.com/api/mobile/24/tralbum_details?band_id=1&tralbum_type=a&tralbum_id={}' + + RE_BANDCAMP_ALBUM_LINK_ID = re.compile(r'(?Phttps?://[^/\.]+\.bandcamp.com/album/[\w\-]+)') + RE_BANDCAMP_BAND_LINK_ID = re.compile(r'(?Phttps?://[^/\.]+\.bandcamp.com/)$') + + RE_BANDCAMP_TRACK_ID = re.compile(r'(?Phttps?://[^/\.]+\.bandcamp.com/(track|album)/[\w\-]+)') + BANDCAMP_TRACK_PLUGIN_BASE_URL = 'plugin://plugin.audio.kxmxpxtx.bandcamp/?mode=url' + BANDCAMP_TRACK_PLUGIN_FORMAT = '{}&url={}' + RE_BANDCAMP_TRACK_ART = re.compile(r'art_id":(?P\d+),') + RE_BANDCAMP_TRACK_BAND_ART = re.compile(r'data-band="[^"]*image_id":(?P\d+)}"') + + RE_BANDCAMP_ART_QUALITY_SEARCH = r'/img/(?P[^_]+)_(?P\d+)\.jpg' + + RE_SOUNDCLOUD_PLAYLIST_ID = re.compile(r'.+soundcloud\.com/playlists/(?P[^&]+)') + SOUNDCLOUD_PLUGIN_BASE_URL = 'plugin://plugin.audio.soundcloud/' + SOUNDCLOUD_PLUGIN_FORMAT = '{}?action=call&call=/playlists/{}' + + RE_YOUTUBE_VIDEO_ID = re.compile(r'^(?:(?:https?:)?\/\/)?(?:(?:www|m)\.)?(?:youtube(?:-nocookie)?\.com|youtu.be)(?:\/(?:[\w\-]+\?v=|embed\/|v\/)?)(?P[\w\-]+)(?!.*list)\S*$') + YOUTUBE_PLUGIN_BASE_URL = 'plugin://plugin.video.youtube/play/' + YOUTUBE_VIDEO_PLUGIN_FORMAT = '{}?video_id={}&play=1' + YOUTUBE_VIDEO_ART_URL_FORMAT = 'https://i.ytimg.com/vi/{}/hqdefault.jpg' + + RE_YOUTUBE_PLAYLIST_ID = re.compile(r'^(?:(?:https?:)?\/\/)?(?:(?:www|m)\.)?(?:youtube(?:-nocookie)?\.com|youtu.be)\/.+\?.*list=(?P[\w\-]+)') + YOUTUBE_PLAYLIST_PLUGIN_FORMAT = '{}?playlist_id={}&order=default&play=1' + YOUTUBE_PLAYLIST_ART_URL = 'https://youtube.com/oembed?url=https%3A//www.youtube.com/playlist%3Flist%3D{}&format=json' + + RE_INDIGITUBE_ALBUM_ID = re.compile(r'https://www.indigitube.com.au/embed/album/(?P[^"]+)') + INDIGITUBE_ALBUM_PLUGIN_BASE_URL = 'plugin://plugin.audio.indigitube/?mode=list_songs' + INDIGITUBE_ALBUM_PLUGIN_FORMAT = '{}&album_id={}' + + RE_SPOTIFY_ALBUM_ID = re.compile(r'.+spotify\.com(\/embed)?\/album\/(?P[^&?\/]+)') + RE_SPOTIFY_PLAYLIST_ID = re.compile(r'.+spotify\.com(\/embed)?\/playlist\/(?P[^&]+)') + + RE_APPLE_ALBUM_ID = re.compile(r'.+music\.apple\.com\/au\/album\/(?P.+)') + APPLE_ALBUM_URL = 'https://music.apple.com/au/album/{}' + + EXT_SEARCH_PLUGIN_FORMAT = 'plugin://plugin.audio.tripler/tracks/ext_search?q={search}' + + RE_MEDIA_URLS = { + 'bandcamp': { + 're': RE_BANDCAMP_ALBUM_ID, + 'base': BANDCAMP_ALBUM_PLUGIN_BASE_URL, + 'format': BANDCAMP_ALBUM_PLUGIN_FORMAT, + 'name': 'Bandcamp', + }, + 'bandcamp_track': { + 're': RE_BANDCAMP_TRACK_ID, + 'base': BANDCAMP_TRACK_PLUGIN_BASE_URL, + 'format': BANDCAMP_TRACK_PLUGIN_FORMAT, + 'name': 'Bandcamp', + }, + 'bandcamp_link': { + 're': RE_BANDCAMP_ALBUM_LINK_ID, + 'base': BANDCAMP_TRACK_PLUGIN_BASE_URL, + 'format': BANDCAMP_TRACK_PLUGIN_FORMAT, + 'name': 'Bandcamp', + }, + 'bandcamp_band_link': { + 're': RE_BANDCAMP_BAND_LINK_ID, + 'format': EXT_SEARCH_PLUGIN_FORMAT, + 'name': 'Bandcamp Band Search', + }, + 'soundcloud': { + 're': RE_SOUNDCLOUD_PLAYLIST_ID, + 'base': SOUNDCLOUD_PLUGIN_BASE_URL, + 'format': SOUNDCLOUD_PLUGIN_FORMAT, + 'name': 'SoundCloud', + }, + 'youtube': { + 're': RE_YOUTUBE_VIDEO_ID, + 'base': YOUTUBE_PLUGIN_BASE_URL, + 'format': YOUTUBE_VIDEO_PLUGIN_FORMAT, + 'name': 'YouTube', + }, + 'youtube_playlist': { + 're': RE_YOUTUBE_PLAYLIST_ID, + 'base': YOUTUBE_PLUGIN_BASE_URL, + 'format': YOUTUBE_PLAYLIST_PLUGIN_FORMAT, + 'name': 'YouTube', + }, + 'indigitube': { + 're': RE_INDIGITUBE_ALBUM_ID, + 'base': INDIGITUBE_ALBUM_PLUGIN_BASE_URL, + 'format': INDIGITUBE_ALBUM_PLUGIN_FORMAT, + 'name': 'indigiTUBE', + }, + 'spotify': { + 're': RE_SPOTIFY_ALBUM_ID, + 'format': EXT_SEARCH_PLUGIN_FORMAT, + 'name': 'Album Search', + }, + 'spotify_playlist': { + 're': RE_SPOTIFY_PLAYLIST_ID, + 'format': EXT_SEARCH_PLUGIN_FORMAT, + 'name': 'Playlist Search', + }, + 'apple': { + 're': RE_APPLE_ALBUM_ID, + 'format': EXT_SEARCH_PLUGIN_FORMAT, + 'name': 'Album Search', + }, + } + + def __init__(self, quality): + self.quality = quality + + def parse_media_id(self, plugin, media_id, search=''): + info = self.RE_MEDIA_URLS.get(plugin, {}) + if info: + return info.get('format', '').format(info.get('base', ''), media_id, search=search) + else: + return '' + + def parse_art(self, art): + if art and 'f4.bcbits.com' in art: + band = '/img/a' not in art + quality = self._bandcamp_band_quality() if band else self._bandcamp_album_quality() + art = re.sub(self.RE_BANDCAMP_ART_QUALITY_SEARCH, f'/img/\g_{quality}.jpg', art) + if art and '/600x600bf-60.jpg' in art: + art = art.replace('/600x600bf-60.jpg', self._apple_album_quality()) + return art + + def _bandcamp_band_quality(self): + if self.quality == 0: + return 1 # full resolution + if self.quality == 1: + return 10 # 1200px wide + if self.quality == 2: + return 25 # 700px wide + + def _bandcamp_album_quality(self): + if self.quality == 0: + return 5 # 700px wide + if self.quality == 1: + return 2 # 350px wide + if self.quality == 2: + return 9 # 210px wide + + def _apple_album_quality(self): + if self.quality == 0: + return '/600x600bf.jpg' + if self.quality == 1: + return '/600x600bf-60.jpg' + if self.quality == 2: + return '/300x300bb-60.jpg' diff --git a/plugin.audio.tripler/resources/lib/scraper.py b/plugin.audio.tripler/resources/lib/scraper.py new file mode 100644 index 0000000000..749df8a3f3 --- /dev/null +++ b/plugin.audio.tripler/resources/lib/scraper.py @@ -0,0 +1,2230 @@ +#!/usr/bin/env python +import bs4, html, time, json, re, sys +from concurrent.futures import ThreadPoolExecutor +from datetime import datetime, timedelta + +from urllib.request import Request, urlopen +from urllib.parse import parse_qs, urlencode +from urllib.error import URLError + +DATE_FORMAT = '%Y-%m-%d' + +URL_BASE = 'https://www.rrr.org.au' + +USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36' + +ignore_on_air = False + + +def get(resource_path): + return urlopen_ua(Scraper.url_for(resource_path)) + +def urlopen_ua(url): + return urlopen(Request(url, headers={'User-Agent': USER_AGENT}), timeout=5) + +def get_json(url): + return urlopen_ua(url).read().decode() + +def get_json_obj(url): + return json.loads(get_json(url)) + +def strptime(s, fmt): + return datetime.fromtimestamp(time.mktime(time.strptime(s, fmt))) + + +class Resource: + def __init__(self, itemobj): + self._itemobj = itemobj + + def id(self): + return self.path.split('/')[-1] + + @property + def path(self): + return Scraper.resource_path_for(self._itemobj.find('a').attrs['href']) + + RE_CAMEL = re.compile(r'(?[^/]+?)', pattern) + + '(?:[?](?P.+))?' + + '$' + ) + + @classmethod + def resource_path_regex(cls): + return cls.regex_from(cls.RESOURCE_PATH_PATTERN) + + @classmethod + def match_resource_path(cls, path): + return cls.regex_from(cls.RESOURCE_PATH_PATTERN).match(path) + + @classmethod + def match_website_path(cls, path): + return cls.regex_from(cls.WEBSITE_PATH_PATTERN).match(path) + + @classmethod + def matching_resource_path(cls, resource_path): + if cls.match_resource_path(resource_path): + return cls(resource_path) + + + def __init__(self, resource_path): + self.resource_path = resource_path + m = self.__class__.resource_path_regex().match(self.resource_path) + if m: + self.groupdict = m.groupdict() + + def soup(self): + return bs4.BeautifulSoup(get(self.resource_path), 'html.parser') + + def url(self): + return f'{URL_BASE}{self.website_path()}' + + def website_path(self): + template = self.__class__.WEBSITE_PATH_PATTERN + + if self.groupdict.get('query_params'): + template += '?{query_params}' + + return template.format_map(self.groupdict) + + def pagination(self, pagekey='page', selfval=1, nextval=None, lastval=None): + resource_path = self.resource_path.split('?') + if len(resource_path) > 1: + resource_params = parse_qs(resource_path[-1]) + if not resource_params.get(pagekey): + resource_params[pagekey] = selfval + else: + resource_params[pagekey] = resource_params[pagekey][0] + else: + resource_params = {pagekey: selfval} + + template = resource_path[0] + '?{}' + links = {} + + links['self'] = template.format(urlencode(resource_params)) + + if nextval: + resource_params[pagekey] = nextval + else: + resource_params[pagekey] = int(resource_params[pagekey]) + 1 + links['next'] = template.format(urlencode(resource_params)) + + links_last = None + if lastval: + resource_params[pagekey] = lastval + links['last'] = template.format(urlencode(resource_params)) + + return links + + + +class ProgramsScraper(Scraper): + RESOURCE_PATH_PATTERN = '/programs' + WEBSITE_PATH_PATTERN = '/explore/programs' + + def generate(self): + return { + 'data': [ + Program(item).to_dict() + for item in self.soup().findAll('div', class_='card clearfix') + ], + 'links': { + 'self': self.__class__.RESOURCE_PATH_PATTERN + }, + } + + +class ProgramScraper(Scraper): + RESOURCE_PATH_PATTERN = '/programs/{program_id}' + WEBSITE_PATH_PATTERN = '/explore/programs/{program_id}' + + def generate(self): + soup = self.soup() + programtitle = soup.find(class_='page-banner__heading') + if programtitle: + title = programtitle.text + + thumbnail, background = None, None + programimage = soup.find(class_='card__background-image') + if programimage: + programimagesrc = re.search(r"https://[^']+", programimage.attrs.get('style')) + if programimagesrc: + thumbnail = programimagesrc[0] + + programbg = soup.find(class_='banner__image') + if programbg: + background = programbg.attrs.get('src') + + textbody = '\n'.join(( + soup.find(class_='page-banner__summary').text, + soup.find(class_='page-banner__time').text + )) + + # Aarrgh the website dragons strike again! + def map_path(path): + m = re.match('^/explore/(?P[^/]+?)/(?P[^/]+?)#episode-selector', path) + if m: + d = m.groupdict() + if d['collection'] == 'programs': + return f"/explore/{d['collection']}/{d['program']}/episodes/page" + elif d['collection'] == 'podcasts': + return f"/explore/{d['collection']}/{d['program']}/episodes" + + collections = [ + { + 'type': 'collection', + 'id': Scraper.resource_path_for(map_path(anchor.attrs['href'])), + 'attributes': { + 'title': ' - '.join((title, anchor.text)), + 'thumbnail': thumbnail, + 'background': background, + 'textbody': textbody, + }, + 'links': { + 'self': Scraper.resource_path_for(map_path(anchor.attrs['href'])), + } + } + for anchor in soup.find_all('a', class_='program-nav__anchor') + ] + highlights = soup.find('a', string=re.compile('highlights')) + if highlights: + collections.append( + { + 'type': 'collection', + 'id': Scraper.resource_path_for(highlights.attrs['href']), + 'attributes': { + 'title': ' - '.join((title, 'Segments')), + 'thumbnail': thumbnail, + 'background': background, + 'textbody': textbody, + }, + 'links': { + 'self': Scraper.resource_path_for(highlights.attrs['href']), + } + } + ) + return { + 'data': collections, + } + + +class AudioItemGenerator: + def generate(self): + return { + 'data': [ + item for item in [ + AudioItem.factory(div) + for div in self.soup().findAll(class_='card__text') + ] + ], + 'links': self.pagination() + } + +class ProgramBroadcastsScraper(Scraper): + RESOURCE_PATH_PATTERN = '/programs/{program_id}/broadcasts' + WEBSITE_PATH_PATTERN = '/explore/programs/{program_id}/episodes/page' + + def generate(self): + soup = self.soup() + programtitle = soup.find(class_='page-banner__heading') + if programtitle: + title = programtitle.text + + thumbnail, background = None, None + programimage = soup.find(class_='card__background-image') + if programimage: + programimagesrc = re.search(r"https://[^']+", programimage.attrs.get('style')) + if programimagesrc: + thumbnail = programimagesrc[0] + + programbg = soup.find(class_='banner__image') + if programbg: + background = programbg.attrs.get('src') + + textbody = '\n'.join(( + soup.find(class_='page-banner__summary').text, + soup.find(class_='page-banner__time').text + )) + + # Aarrgh the website dragons strike again! + def map_path(path): + m = re.match('^/explore/(?P[^/]+?)/(?P[^/]+?)#episode-selector', path) + if m: + d = m.groupdict() + if d['collection'] == 'programs': + return f"/explore/{d['collection']}/{d['program']}/episodes/page" + elif d['collection'] == 'podcasts': + return f"/explore/{d['collection']}/{d['program']}/episodes" + + collections = [ + { + 'type': 'collection', + 'id': Scraper.resource_path_for(map_path(anchor.attrs['href'])), + 'attributes': { + 'title': ' - '.join((title, anchor.text)), + 'thumbnail': thumbnail, + 'background': background, + 'textbody': textbody, + }, + 'links': { + 'self': Scraper.resource_path_for(map_path(anchor.attrs['href'])), + } + } + for anchor in soup.find_all('a', class_='program-nav__anchor') + ] + + # hackety - hack - hack - hack ... just blindly turn "Broadcasts" into "Segments" while nobody is looking + collections[0]['id'] = collections[0]['id'].replace('broadcasts', 'segments') + collections[0]['links']['self'] = collections[0]['id'] + collections[0]['attributes']['title'] = collections[0]['attributes']['title'].replace('Broadcasts', 'Segments') + + broadcasts = [ + item for item in [ + BroadcastCollection(div).to_dict() + for div in self.soup().findAll(class_='card') + ] + ] + + images = { + 'background': background, + } + [b['attributes'].update(images) for b in broadcasts] + + collections = [item for item in (collections[::-1] + broadcasts) if item] + + return { + 'data': collections, + 'links': self.pagination(), + } + + + +class ProgramPodcastsScraper(Scraper, AudioItemGenerator): + RESOURCE_PATH_PATTERN = '/programs/{program_id}/podcasts' + WEBSITE_PATH_PATTERN = '/explore/podcasts/{program_id}/episodes' + + +class ProgramSegmentsScraper(Scraper, AudioItemGenerator): + RESOURCE_PATH_PATTERN = '/programs/{program_id}/segments' + WEBSITE_PATH_PATTERN = '/explore/programs/{program_id}/highlights' + + +class OnDemandSegmentsScraper(Scraper, AudioItemGenerator): + RESOURCE_PATH_PATTERN = '/segments' + WEBSITE_PATH_PATTERN = '/on-demand/segments' + + +class OnDemandBroadcastsScraper(Scraper, AudioItemGenerator): + RESOURCE_PATH_PATTERN = '/broadcasts' + WEBSITE_PATH_PATTERN = '/on-demand/episodes' + + +class ArchivesScraper(Scraper, AudioItemGenerator): + RESOURCE_PATH_PATTERN = '/archives' + WEBSITE_PATH_PATTERN = '/on-demand/archives' + + +class ArchiveScraper(Scraper): + RESOURCE_PATH_PATTERN = '/archives/{item}' + WEBSITE_PATH_PATTERN = '/on-demand/archives/{item}' + + def generate(self): + item = self.soup().find(class_='adaptive-banner__audio-component') + return { + 'data': AudioItem.factory(item) + } + + +class ExternalMedia: + RE_BANDCAMP_ALBUM_ID = re.compile(r'https://bandcamp.com/EmbeddedPlayer/.*album=(?P[^/]+)') + RE_BANDCAMP_ALBUM_ART = re.compile(r'"art_id":(\w+)') + BANDCAMP_ALBUM_ART_URL = 'https://bandcamp.com/api/mobile/24/tralbum_details?band_id=1&tralbum_type=a&tralbum_id={}' + + RE_BANDCAMP_ALBUM_LINK_ID = re.compile(r'(?Phttps?://[^/\.]+\.bandcamp.com/album/[\w\-]+)') + RE_BANDCAMP_BAND_LINK_ID = re.compile(r'(?Phttps?://[^/\.]+\.bandcamp.com/)$') + + RE_BANDCAMP_TRACK_ID = re.compile(r'(?Phttps?://[^/\.]+\.bandcamp.com/track/[\w\-]+)') + RE_BANDCAMP_TRACK_ART = re.compile(r'art_id":(?P\d+),') + RE_BANDCAMP_TRACK_TITLE = re.compile(r'\

\s+(?P[^\n]*)\s+\<\/h2\>') + RE_BANDCAMP_TRACK_ARTIST = re.compile(r'data-band="[^"]*;name":"(?P<artist>[^&]+)"') + RE_BANDCAMP_TRACK_DURATION = re.compile(r'duration":(?P<duration>[\d\.]+),') + RE_BANDCAMP_TRACK_BAND_ART = re.compile(r'data-band="[^"]*image_id":(?P<band_art_id>\d+)}"') + + RE_SOUNDCLOUD_PLAYLIST_ID = re.compile(r'.+soundcloud\.com/playlists/(?P<media_id>[^&]+)') + + RE_YOUTUBE_VIDEO_ID = re.compile(r'^(?:(?:https?:)?\/\/)?(?:(?:www|m)\.)?(?:youtube(?:-nocookie)?\.com|youtu.be)(?:\/(?:[\w\-]+\?v=|embed\/|v\/)?)(?P<media_id>[\w\-]+)(?!.*list)\S*$') + RE_YOUTUBE_VIDEO_ART_ID = re.compile(r'^https:\/\/i\.ytimg\.com\/vi\/(?P<media_id>[\w\-]+)\/hqdefault\.jpg$') + RE_YOUTUBE_VIDEO_TITLE = re.compile(r'"videoDetails":{[^}]*,"title":"(?P<title>[^"]+)"') + RE_YOUTUBE_VIDEO_ARTIST = re.compile(r'<link itemprop="name" content="(?P<artist>[^"]+)"') + RE_YOUTUBE_VIDEO_DESC = re.compile(r'"attributedDescription":{"content":"(?P<textbody>[^{]*)","') + RE_YOUTUBE_VIDEO_DURATION = re.compile(r'itemprop="duration" content="PT(?P<hours>[\d]+H)?(?P<minutes>[\d]+M)?(?P<seconds>[\d]+S)?"') + YOUTUBE_VIDEO_DURATION_URL = 'https://www.youtube.com/watch?v={}' + YOUTUBE_VIDEO_ART_URL_FORMAT = 'https://i.ytimg.com/vi/{}/hqdefault.jpg' + + RE_YOUTUBE_PLAYLIST_ID = re.compile(r'^(?:(?:https?:)?\/\/)?(?:(?:www|m)\.)?(?:youtube(?:-nocookie)?\.com|youtu.be)\/.+\?.*list=(?P<media_id>[\w\-]+)') + YOUTUBE_PLAYLIST_ART_URL = 'https://www.youtube.com/playlist?list={}' + RE_YOUTUBE_PLAYLIST_ART = re.compile(r'og:image" content="(?P<art_url>[^"]+)"><meta property="og:image:width" content="640"') + RE_YOUTUBE_PLAYLIST_ART_LQ = re.compile(r'og:image" content="(?P<art_url>[^?]+)[^"]+"') + RE_YOUTUBE_PLAYLIST_TITLE = re.compile(r'<meta property="og:title" content="(?P<title>[^"]+)"') + RE_YOUTUBE_PLAYLIST_ARTIST = re.compile(r'"shortBylineText":{"runs":\[{"text":"(?P<artist>[^"]+)"') + RE_YOUTUBE_PLAYLIST_DURATION = re.compile(r'"lengthText":[^}]+}},"simpleText":"(?P<duration>[^"]+)"}') + + RE_INDIGITUBE_ALBUM_ID = re.compile(r'https://www.indigitube.com.au/embed/album/(?P<media_id>[^"]+)') + INDIGITUBE_ACCESS_KEY = 'access_token=%242a%2410%24x2Zy%2FTgIAOC0UUMi3NPKc.KY49e%2FZLUJFOpBCNYAs8D72UUnlI526' + INDIGITUBE_ALBUM_URL = 'https://api.appbooks.com/content/album/{}?' + INDIGITUBE_ACCESS_KEY + INDIGITUBE_ALBUM_ART_URL = 'https://api.appbooks.com/get/{}/file/file.jpg?w=512&quality=90&' + INDIGITUBE_ACCESS_KEY + '&ext=.jpg' + + RE_SPOTIFY_ALBUM_ID = re.compile(r'.+spotify\.com(\/embed)?\/album\/(?P<media_id>[^&?\/]+)') + RE_SPOTIFY_PLAYLIST_ID = re.compile(r'.+spotify\.com(\/embed)?\/playlist\/(?P<media_id>[^&]+)') + RE_SPOTIFY_ALBUM_ART = re.compile(r'\-\-image\-src:url\((\&\#x27\;|\')(?P<art_url>[^\&\']+)(\&\#x27\;|\')') + RE_SPOTIFY_DURATION = re.compile(r'<\/h4><div class="[^"]+">(?P<duration>[^<]+)</div></li>') + + RE_APPLE_ALBUM_ID = re.compile(r'.+music\.apple\.com\/au\/album\/(?P<media_id>.+)') + APPLE_ALBUM_URL = 'https://music.apple.com/au/album/{}' + RE_APPLE_ALBUM_ART = re.compile(r'meta name="twitter:image" content="(?P<art_url>[^"]+)">') + RE_APPLE_DURATION = re.compile(r'meta property="music:song:duration" content="PT(?P<hours>[\d]+H)?(?P<minutes>[\d]+M)?(?P<seconds>[\d]+S)?">') + + RE_MEDIA_URLS = { + 'bandcamp': { + 're': RE_BANDCAMP_ALBUM_ID, + }, + 'bandcamp_link': { + 're': RE_BANDCAMP_ALBUM_LINK_ID, + }, + 'bandcamp_band_link': { + 're': RE_BANDCAMP_BAND_LINK_ID, + }, + 'bandcamp_track': { + 're': RE_BANDCAMP_TRACK_ID, + }, + 'soundcloud': { + 're': RE_SOUNDCLOUD_PLAYLIST_ID, + }, + 'youtube': { + 're': RE_YOUTUBE_VIDEO_ID, + }, + 'youtube_art': { + 're': RE_YOUTUBE_VIDEO_ART_ID, + }, + 'youtube_playlist': { + 're': RE_YOUTUBE_PLAYLIST_ID, + }, + 'indigitube': { + 're': RE_INDIGITUBE_ALBUM_ID, + }, + 'spotify': { + 're': RE_SPOTIFY_ALBUM_ID, + }, + 'spotify_playlist': { + 're': RE_SPOTIFY_PLAYLIST_ID, + }, + 'apple': { + 're': RE_APPLE_ALBUM_ID, + }, + } + + fetch_yt_video = False + + def media_items(self, iframes, fetch_album_art=False, fetch_yt_video=False): + matches = [] + self.fetch_yt_video = fetch_yt_video + + for iframe in iframes: + if not iframe.get('src'): + continue + media_id = None + for plugin, info in self.RE_MEDIA_URLS.items(): + plugin_match = re.match(info.get('re'), iframe.get('src')) + if plugin_match: + media_id = plugin_match.groupdict().get('media_id') + if media_id: + break + + matches.append({ + 'media_id': media_id, + 'src': iframe.get('src'), + 'attrs': iframe.get('attrs') if iframe.get('attrs') else {}, + 'plugin': plugin if plugin_match else None, + }) + + if fetch_album_art: + executor = ThreadPoolExecutor(max_workers=3) + art_exec = [executor.submit(self.get_album_art, match=match) for match in matches] + matches = [match.result() for match in art_exec] + + return matches + + def get_album_art(self, match={}): + result = match + media_id, plugin = match['media_id'], match['plugin'] + album_art = {} + if plugin == 'bandcamp': + album_art = self.bandcamp_album_art(media_id) + elif plugin == 'bandcamp_link': + album_art = self.bandcamp_track_art(media_id) + elif plugin == 'bandcamp_band_link': + album_art = self.bandcamp_band_art(media_id) + elif plugin == 'bandcamp_track': + album_art = self.bandcamp_track_art(media_id) + elif plugin == 'indigitube': + album_art = self.indigitube_album_art(media_id) + elif plugin == 'spotify' or plugin == 'spotify_playlist': + album_art = self.spotify_album_art(match['src']) + elif plugin == 'apple': + album_art = self.apple_album_art(media_id) + elif plugin == 'youtube_playlist': + album_art = self.youtube_playlist_art(media_id) + elif plugin == 'youtube' or plugin == 'youtube_art': + result['plugin'] = 'youtube' + if self.fetch_yt_video: + album_art = self.youtube_video_duration(media_id) + album_art['art'] = self.YOUTUBE_VIDEO_ART_URL_FORMAT.format(media_id) + + result['thumbnail'] = album_art.get('art') + result['background'] = album_art.get('band') + result['duration'] = album_art.get('duration') + if 'attrs' not in result.keys(): + result['attrs'] = {} + if 'title' not in result['attrs'].keys() and album_art.get('title'): + result['attrs']['title'] = album_art.get('title') + if 'artist' not in result['attrs'].keys() and album_art.get('artist'): + result['attrs']['artist'] = album_art.get('artist') + if 'textbody' not in result['attrs'].keys() and album_art.get('textbody'): + result['attrs']['textbody'] = album_art.get('textbody') + return result + + def get_sum_duration(self, duration_matches): + durations = [int(x.split(':')[0]) * 60 + int(x.split(':')[1]) for x in duration_matches] + return sum(durations) + + def get_pt_duration(self, duration): + result = 0 + if duration['hours']: + result += int(duration['hours'][:-1]) * 3600 + if duration['minutes']: + result += int(duration['minutes'][:-1]) * 60 + if duration['seconds']: + result += int(duration['seconds'][:-1]) + return result + + def bandcamp_album_art(self, album_id): + api_url = self.BANDCAMP_ALBUM_ART_URL.format(album_id) + try: + json_obj = get_json_obj(api_url) + except URLError as e: + return {} + + art_id = json_obj.get('art_id') + band_id = json_obj.get('band', {}).get('image_id') + + duration = 0.0 + for track in json_obj.get('tracks', []): + duration += float(track.get('duration', '0')) + + result = {} + if art_id: + result['art'] = f'https://f4.bcbits.com/img/a{art_id}_5.jpg' + if band_id: + result['band'] = f'https://f4.bcbits.com/img/{band_id}_20.jpg' + if duration: + result['duration'] = int(duration) + return result + + def bandcamp_track_art(self, track_url): + try: + track_page = get_json(track_url) + except URLError as e: + return {} + + art_match = re.search(self.RE_BANDCAMP_TRACK_ART, track_page) + band_match = re.search(self.RE_BANDCAMP_TRACK_BAND_ART, track_page) + title_match = re.search(self.RE_BANDCAMP_TRACK_TITLE, track_page) + artist_match = re.search(self.RE_BANDCAMP_TRACK_ARTIST, track_page) + duration_matches = re.finditer(self.RE_BANDCAMP_TRACK_DURATION, track_page) + result = {} + if art_match: + art_id = art_match.groupdict().get('art_id') + result['art'] = f'https://f4.bcbits.com/img/a{art_id}_5.jpg' + if band_match: + band_id = band_match.groupdict().get('band_art_id') + result['band'] = f'https://f4.bcbits.com/img/{band_id}_20.jpg' + if title_match: + result['title'] = title_match.groupdict().get('title', '').strip() + if artist_match: + result['artist'] = artist_match.groupdict().get('artist') + + duration = 0.0 + for match in duration_matches: + duration += float(match.groupdict().get('duration', '0')) + result['duration'] = int(duration) + + return result + + def bandcamp_band_art(self, track_url): + try: + track_page = get_json(track_url) + except URLError as e: + return {} + + band_match = re.search(self.RE_BANDCAMP_TRACK_BAND_ART, track_page) + artist_match = re.search(self.RE_BANDCAMP_TRACK_ARTIST, track_page) + result = {} + if band_match: + band_id = band_match.groupdict().get('band_art_id') + result['band'] = f'https://f4.bcbits.com/img/{band_id}_20.jpg' + result['art'] = result['band'] + if artist_match: + result['artist'] = artist_match.groupdict().get('artist') + result['title'] = result['artist'] + + return result + + def indigitube_album_art(self, album_id): + api_url = self.INDIGITUBE_ALBUM_URL.format(album_id) + result = {} + + try: + json_obj = get_json_obj(api_url) + except URLError as e: + return result + + data = json_obj.get('data', {}) + + art_id = data.get('coverImage', {}).get('_id') + if art_id: + result['art'] = self.INDIGITUBE_ALBUM_ART_URL.format(art_id) + description = json_obj.get('data', {}).get('description', '') + if description: + result['textbody'] = re.compile(r'<[^>]+>').sub('', description) + result['title'] = json_obj.get('title') + result['artist'] = json_obj.get('realms', [{}])[0].get('title') + + return result + + def youtube_video_duration(self, video_id): + video_url = self.YOUTUBE_VIDEO_DURATION_URL.format(video_id) + try: + video_page = get_json(video_url) + except URLError as e: + return {} + + duration_match = re.search(self.RE_YOUTUBE_VIDEO_DURATION, video_page) + title_match = re.search(self.RE_YOUTUBE_VIDEO_TITLE, video_page) + artist_match = re.search(self.RE_YOUTUBE_VIDEO_ARTIST, video_page) + desc_match = re.search(self.RE_YOUTUBE_VIDEO_DESC, video_page) + result = {'duration': 0} + if duration_match: + gd = duration_match.groupdict() + result['duration'] = self.get_pt_duration(gd) + if title_match: + result['title'] = title_match.groupdict().get('title', '').strip() + if artist_match: + result['artist'] = artist_match.groupdict().get('artist', '').strip() + if desc_match: + result['textbody'] = html.unescape(desc_match.groupdict().get('textbody', '').strip()) + + return result + + def youtube_playlist_art(self, playlist_id): + api_url = self.YOUTUBE_PLAYLIST_ART_URL.format(playlist_id) + try: + playlist_page = get_json(api_url) + except URLError as e: + return {} + + art_match = re.search(self.RE_YOUTUBE_PLAYLIST_ART, playlist_page) + duration_matches = re.findall(self.RE_YOUTUBE_PLAYLIST_DURATION, playlist_page) + title_match = re.search(self.RE_YOUTUBE_PLAYLIST_TITLE, playlist_page) + artist_match = re.search(self.RE_YOUTUBE_PLAYLIST_ARTIST, playlist_page) + + result = {} + if art_match: + result['art'] = art_match.groupdict().get('art_url').replace('&', '&') + '&ext=.jpg' + else: + art_match = re.search(self.RE_YOUTUBE_PLAYLIST_ART_LQ, playlist_page) + result['art'] = art_match.groupdict().get('art_url') + if duration_matches: + result['duration'] = self.get_sum_duration(duration_matches) + if title_match: + result['title'] = html.unescape(title_match.groupdict().get('title', '').strip()) + if artist_match: + result['artist'] = html.unescape(artist_match.groupdict().get('artist', '').strip()) + return result + + def spotify_album_art(self, src): + api_url = src + try: + spotify_page = get_json(api_url) + except URLError as e: + return {} + + art_match = re.search(self.RE_SPOTIFY_ALBUM_ART, spotify_page) + duration_matches = re.findall(self.RE_SPOTIFY_DURATION, spotify_page) + + result = {} + if art_match: + result['art'] = art_match.groupdict().get('art_url') + if duration_matches: + result['duration'] = self.get_sum_duration(duration_matches) + return result + + def apple_album_art(self, album_id): + api_url = self.APPLE_ALBUM_URL.format(album_id) + try: + album_page = get_json(api_url) + except URLError as e: + return {} + + art_match = re.search(self.RE_APPLE_ALBUM_ART, album_page) + duration_match = re.finditer(self.RE_APPLE_DURATION, album_page) + result = {'duration': 0} + + if art_match: + result['art'] = art_match.groupdict().get('art_url') + for duration in duration_match: + gd = duration.groupdict() + result['duration'] += self.get_pt_duration(duration) + + return result + +class FeaturedAlbumScraper(Scraper, ExternalMedia): + RESOURCE_PATH_PATTERN = '/featured_albums/{album_id}' + WEBSITE_PATH_PATTERN = '/explore/album-of-the-week/{album_id}' + + @property + def path(self): + return self.resource_path + + def generate(self): + pagesoup = self.soup() + + iframes = [ + { + 'src': iframe.attrs.get('src'), + 'attrs': None + } + for iframe in pagesoup.findAll('iframe') + if iframe.attrs.get('src') + ] + album_urls = self.media_items(iframes, fetch_album_art=True, fetch_yt_video=True) + + album_copy = '\n'.join([p.text for p in pagesoup.find(class_='feature-album__copy').findAll("p", recursive=False)]) + album_image = pagesoup.find(class_='audio-summary__album-artwork') + album_info = pagesoup.find(class_='album-banner__copy') + album_title = album_info.find(class_='album-banner__heading', recursive=False).text + album_artist = album_info.find(class_='album-banner__artist', recursive=False).text + + album_type = 'featured_album' + album_id = self.resource_path.split('/')[-1] + background = None + duration = None + + for album in [album for album in album_urls if album.get('plugin')]: + album_type = album.get('plugin') + album_id = album.get('media_id') + background = album.get('background') + duration = album.get('duration') + + data = [ + { + 'type': album_type, + 'id': album_id, + 'attributes': { + 'title': album_title, + 'artist': album_artist, + 'textbody': album_copy, + 'duration': duration, + }, + 'links': { + 'self': self.path, + } + } + ] + + if album_image: + data[0]['attributes']['thumbnail'] = album_image.attrs.get('src') + + if background: + data[0]['attributes']['background'] = background + + return { + 'data': data, + } + + + +class FeaturedAlbumsScraper(Scraper): + RESOURCE_PATH_PATTERN = '/featured_albums' + WEBSITE_PATH_PATTERN = '/explore/album-of-the-week' + + def generate(self): + return { + 'data': [ + FeaturedAlbum(item).to_dict() + for item in self.soup().findAll('div', class_='card clearfix') + ], + 'links': self.pagination() + } + + +class NewsItemsScraper(Scraper): + RESOURCE_PATH_PATTERN = '/news_items' + WEBSITE_PATH_PATTERN = '/explore/news-articles' + + def generate(self): + return { + 'data': [ + News(item).to_dict() + for item in self.soup().findAll(class_='list-view__item') + ], + 'links': self.pagination(), + } + + +class NewsItemScraper(Scraper): + RESOURCE_PATH_PATTERN = '/news_items/{item}' + WEBSITE_PATH_PATTERN = '/explore/news-articles/{item}' + + +class ProgramBroadcastScraper(Scraper): + RESOURCE_PATH_PATTERN = '/programs/{program_id}/broadcasts/{item}' + WEBSITE_PATH_PATTERN = '/explore/programs/{program_id}/episodes/{item}' + + def generate(self): + soup = self.soup() + programbg = soup.find(class_='banner__image') + programbg = programbg.attrs.get('src') if programbg else None + + broadcast = ProgramBroadcast( + soup.find(class_='audio-summary') + ).to_dict() + broadcast['attributes']['textbody'] = soup.find(class_='page-banner__summary').text + + segments = [ + ProgramBroadcastSegment(item).to_dict() + for item in soup.findAll(class_='episode-detail__highlights-item') + ] + + tracks = [ + ProgramBroadcastTrack(item).to_dict() + for item in soup.findAll(class_='audio-summary__track clearfix') + ] + + items = [] + for item in ([broadcast] + segments + tracks): + if not item: + continue + if programbg and not item.get('attributes', {}).get('background'): + item['attributes']['background'] = programbg + items.append(item) + + return { + 'data': items + } + + +class ProgramPodcastScraper(Scraper): + RESOURCE_PATH_PATTERN = '/programs/{program_id}/podcasts/{item}' + WEBSITE_PATH_PATTERN = '/explore/podcasts/{program_id}/episodes/{item}' + + def generate(self): + return {'data': []} + + +class ProgramSegmentScraper(Scraper): + RESOURCE_PATH_PATTERN = '/segments/{item}' + WEBSITE_PATH_PATTERN = '/on-demand/segments/{item}' + + def generate(self): + return {'data': []} + + +class ScheduleScraper(Scraper): + RESOURCE_PATH_PATTERN = '/schedule' + WEBSITE_PATH_PATTERN = '/explore/schedule' + + def generate(self): + soup = self.soup() + date = soup.find(class_='calendar__hidden-input').attrs.get('value') + prevdate, nextdate = [x.find('a').attrs.get('href').split('=')[-1] for x in soup.findAll(class_='page-nav__item')] + return { + 'data': [ + ScheduleItem(item).to_dict() + for item in self.soup().findAll(class_='list-view__item') + ], + 'links': self.pagination(pagekey='date', selfval=date, nextval=prevdate), + } + + +class SearchScraper(Scraper): + RESOURCE_PATH_PATTERN = '/search' + WEBSITE_PATH_PATTERN = '/search' + + def generate(self): + return { + 'data': [ + SearchItem(item).to_dict() + for item in self.soup().findAll(class_='search-result') + ], + 'links': self.pagination(), + } + + +class SoundscapesScraper(Scraper): + RESOURCE_PATH_PATTERN = '/soundscapes' + WEBSITE_PATH_PATTERN = '/explore/soundscape' + + def generate(self): + return { + 'data': [ + Soundscape(item).to_dict() + for item in self.soup().findAll(class_='list-view__item') + ], + 'links': self.pagination() + } + + +class SoundscapeScraper(Scraper, ExternalMedia): + RESOURCE_PATH_PATTERN = '/soundscapes/{item}' + WEBSITE_PATH_PATTERN = '/explore/soundscape/{item}' + + def generate(self): + pagesoup = self.soup() + + iframes = [] + section = pagesoup.find('section', class_='copy') + for heading in section.findAll(['h1', 'h2', 'h3', 'h4', 'p'], recursive=False): + iframe = heading.find_next_sibling() + while iframe != None and iframe.find('iframe') == None: + iframe = iframe.find_next_sibling() + if iframe == None or len(heading.text) < 2: + continue + + aotw = len(heading.text.split('**')) > 1 + + attrs = { + 'id': ' '.join(heading.text.split('**')[0].split(' - ')), + 'title': heading.text.split('**')[0].split(' - ')[-1].split(' – ')[-1], + 'artist': heading.text.split(' - ')[0].split(' – ')[0], + 'featured_album': heading.text.split('**')[1] if aotw else '', + } + media = { + 'src': iframe.find('iframe').attrs.get('src'), + 'attrs': attrs, + } + if aotw: + iframes.insert(0, media) + else: + iframes.append(media) + + media_items = self.media_items(iframes, fetch_album_art=True, fetch_yt_video=True) + soundscape_date = pagesoup.find(class_='news-item__title').text.split(' - ')[-1] + + data = [] + for media in media_items: + dataitem = {} + attributes = { + 'subtitle': soundscape_date, + 'artist': media.get('attrs').get('artist'), + 'thumbnail': media.get('thumbnail'), + } + + if media.get('background'): + attributes['background'] = media.get('background') + + if media.get('duration'): + attributes['duration'] = media.get('duration') + + if media.get('plugin'): + # dataitem['id'] = media.get('attrs').get('id', '').replace(' ', '-').lower() + dataitem['id'] = media.get('media_id') + dataitem['type'] = media.get('plugin') + attributes['title'] = media.get('attrs').get('title') + # attributes['url'] = media.get('url') + else: + dataitem['id'] = '' + attributes['title'] = media.get('attrs').get('title') + + if media.get('attrs').get('textbody'): + attributes['textbody'] = media.get('attrs').get('textbody', '').strip() + else: + attributes['textbody'] = '{}\n{}\n'.format( + media.get('attrs').get('title'), + media.get('attrs').get('featured_album') + ).strip() + + dataitem['attributes'] = attributes + + data.append(dataitem) + + return { + 'data': data, + } + + +class Program(Resource): + @property + def path(self): + return f"{Scraper.resource_path_for(self._itemobj.find('a').attrs['href'])}/broadcasts?page=1" + + def id(self): + return self.path.split("/")[2] + + @property + def title(self): + return self._itemobj.find('h1', class_='card__title' ).find('a').text + + @property + def textbody(self): + return self._itemobj.find('p').text + + def attributes(self): + return { + 'title': self.title, + 'thumbnail': self.thumbnail, + 'textbody': self.textbody, + } + + +class Topic(Resource): + @property + def title(self): + return self._itemobj.find('a').text + + def attributes(self): + return { + 'title': self.title + } + + +class TopicsScraper(Scraper): + RESOURCE_PATH_PATTERN = '/topics' + WEBSITE_PATH_PATTERN = '/' + + def generate(self): + return { + 'data': [ + Topic(item).to_dict() + for item in self.soup().findAll(class_='topic-list__item') + ], + 'links': { + 'self': self.__class__.RESOURCE_PATH_PATTERN + }, + } + + +class TopicScraper(Scraper): + RESOURCE_PATH_PATTERN = '/topics/{topic}' + WEBSITE_PATH_PATTERN = '/topics/{topic}' + + def generate(self): + return { + 'data': [ + SearchItem(item).to_dict() + for item in self.soup().findAll(class_='search-result') + ], + 'links': self.pagination(), + } + + +class TracksSearchScraper(Scraper): + RESOURCE_PATH_PATTERN = '/tracks/search' + WEBSITE_PATH_PATTERN = '/tracks/search' + + def generate(self): + return { + 'data': [ + BroadcastTrack(item).to_dict() + for item in self.soup().findAll(class_='search-result') + ], + } + + +class TrackScraper(Scraper): + RESOURCE_PATH_PATTERN = '/tracks/{track_id}' + WEBSITE_PATH_PATTERN = '/tracks/{track_id}' + + def generate(self): + return {'data': []} + + +class Track(Resource): + def __init__(self, path, artist, title): + self._path = path + self.artist = artist + self.title = title + + @property + def path(self): + return self._path + + def id(self): + return self.path.split('/')[-1] + + def attributes(self): + return { + 'title': self.title, + 'artist': self.artist, + } + + +class EventsScraper(Scraper): + RESOURCE_PATH_PATTERN = '/events' + WEBSITE_PATH_PATTERN = '/events' + + def generate(self): + return { + 'data': [ + Event(item).to_dict() + for item in self.soup().findAll('div', class_='card') + ], + 'links': self.pagination() + } + + +class EventScraper(Scraper, ExternalMedia): + RESOURCE_PATH_PATTERN = '/events/{item}' + WEBSITE_PATH_PATTERN = '/events/{item}' + + @property + def path(self): + return self.resource_path + + def generate(self): + item = self.soup().find(class_='event') + venue = item.find(class_='event__venue-address-details') + eventdetails = item.find(class_='event__details-copy').get_text(' ').strip() + copy = item.find(class_='copy') + textbody = copy.get_text('\n') + + flag_label = item.find(class_='flag-label') + if flag_label: + event_type = flag_label.text.replace(' ', '-').lower() + else: + # event_type = None + event_type = 'event' + + result = { + 'data': [ + { + 'type': event_type, + 'id': Resource.id(self), + 'attributes': { + 'title': item.find(class_='event__title').text, + 'venue': venue.get_text(' ') if venue else '', + 'textbody': '\n'.join((eventdetails, textbody)), + }, + 'links': { + 'self': self.resource_path, + } + } + ], + } + + for link in copy.find_all(['a', 'iframe']): + link_href = { + 'src': link.attrs.get('href', link.attrs.get('src')), + } + media = self.media_items([link_href], fetch_album_art=True, fetch_yt_video=True)[0] + if media.get('plugin'): + dataitem = {} + if media.get('plugin'): + dataitem['id'] = media.get('media_id') + dataitem['type'] = media.get('plugin') + else: + dataitem['id'] = '' + + dataitem['attributes'] = { + 'thumbnail': media.get('thumbnail'), + 'background': media.get('background'), + 'duration': media.get('duration'), + 'title': media.get('attrs').get('title'), + 'textbody': media.get('attrs').get('textbody', media.get('attrs').get('title')), + 'artist': media.get('attrs').get('artist'), + } + + result['data'].append(dataitem) + + return result + + +class GiveawaysScraper(Scraper): + RESOURCE_PATH_PATTERN = '/giveaways' + WEBSITE_PATH_PATTERN = '/subscriber-giveaways' + + def generate(self): + return { + 'data': [ + Giveaway(item).to_dict() + for item in self.soup().findAll(class_='list-view__item') + ], + } + + +class GiveawayScraper(Scraper): + RESOURCE_PATH_PATTERN = '/giveaways/{giveaway}' + WEBSITE_PATH_PATTERN = '/subscriber-giveaways/{giveaway}' + + @property + def path(self): + return self.resource_path + + def generate(self): + item = self.soup().find(class_='subscriber_giveaway') + banner = self.soup().find(class_='compact-banner') + closes = banner.find(class_='compact-banner__date').text + textbody = item.find(class_='subscriber-giveaway__copy').get_text(' ') + + return { + 'data': [ + { + 'type': 'giveaway', + 'id': Resource.id(self), + 'attributes': { + 'title': banner.find(class_='compact-banner__heading').text, + 'textbody': f'{closes}\n\n{textbody}', + 'thumbnail': item.find(class_='summary-inset__artwork').attrs.get('src'), + }, + 'links': { + 'self': '/'.join((self.resource_path, 'entries')), + } + } + ], + } + + +class VideoScraper(Scraper): + RESOURCE_PATH_PATTERN = '/videos/{item}' + WEBSITE_PATH_PATTERN = '/explore/videos/{item}' + + def generate(self): + return {'data': []} + + +class VideosScraper(Scraper, ExternalMedia): + RESOURCE_PATH_PATTERN = '/videos' + WEBSITE_PATH_PATTERN = '/explore/videos' + + def generate(self): + pagesoup = self.soup() + + images = [] + for card in pagesoup.findAll(class_='card'): + img = card.find('img', class_='scalable-image__image') + carddate = card.find('span', class_='card__meta') + cardurl = card.find('a', class_='card__anchor') + #time.strptime(carddate.text, '%d %B %Y') + + attrs = { + 'id': cardurl.attrs.get('href', '/').split('/')[-1], + 'title': img.attrs.get('alt'), + 'date': carddate.text, + } + media = { + 'src': img.attrs.get('data-src'), + 'attrs': attrs, + } + images.append(media) + + media_items = self.media_items(images, fetch_album_art=True) + + data = [] + for media in media_items: + dataitem = {} + attributes = { + 'subtitle': media.get('attrs').get('date'), + 'artist': media.get('attrs').get('artist'), + 'thumbnail': media.get('thumbnail'), + } + + if media.get('background'): + attributes['background'] = media.get('background') + + if media.get('duration'): + attributes['duration'] = media.get('duration') + + if media.get('plugin'): + dataitem['id'] = media.get('media_id') + dataitem['type'] = media.get('plugin') + attributes['title'] = media.get('attrs').get('title') + else: + dataitem['id'] = '' + attributes['title'] = media.get('attrs').get('title') + + attributes['textbody'] = media.get('attrs').get('title').strip() + + dataitem['attributes'] = attributes + + data.append(dataitem) + + return { + 'data': data, + 'links': self.pagination(), + } + + + +### Scrapers ############################################## + +class FeaturedAlbum(Resource): + @property + def title(self): + return self._itemobj.find('h1', class_='card__title' ).find('a').text + + @property + def subtitle(self): + return self._itemobj.find(class_='card__meta').text + + @property + def textbody(self): + return self._itemobj.find('p').text + + def attributes(self): + return { + 'title': self.title, + 'subtitle': self.subtitle, + 'thumbnail': self.thumbnail, + 'textbody': self.textbody, + } + + +class Giveaway(Resource): + @property + def title(self): + return self._itemobj.find('span').text + + @property + def textbody(self): + return self._itemobj.find('p').text + + def attributes(self): + return { + 'title': self.title, + 'textbody': self.textbody, + 'thumbnail': self.thumbnail, + } + + +class News(Resource): + @property + def title(self): + return self._itemobj.find(class_='list-view__title').text + + @property + def type(self): + return 'news_item' + + @property + def textbody(self): + return self._itemobj.find(class_='list-view__summary').text + + def attributes(self): + return { + 'title': self.title, + 'textbody': self.textbody, + } + + +class Soundscape(Resource): + @property + def title(self): + return self._itemobj.find('span').text.replace(':', '').replace('Triple R ', '') + + @property + def subtitle(self): + return self._itemobj.find('span').text.split(' - ')[-1] + + @property + def textbody(self): + return self._itemobj.find('p').text + + def attributes(self): + return { + 'title': self.title, + 'subtitle': self.subtitle, + 'textbody': self.textbody, + 'thumbnail': self.thumbnail, + } + + +class Event(Resource): + @property + def _itemtitle(self): + return self._itemobj.find(class_='card__title').find('a').text + + @property + def title(self): + if self.label: + return ' - '.join((self._itemtitle, self._itemdate, self.label)) + else: + return ' - '.join((self._itemtitle, self._itemdate)) + + @property + def label(self): + label = self._itemobj.find(class_='card__label') + return label.text if label else '' + + @property + def _itemtype(self): + return self._itemobj.find(class_='card__meta').find('div').text + + @property + def type(self): + return self._itemtype.replace(' ', '-').lower() + + @property + def img(self): + return self._itemobj.find('a', class_='card__anchor').find('img') + + @property + def _itemdate(self): + meta = self._itemobj.find('span', class_='card__meta') + metadiv = meta.findAll('div') + if len(metadiv) > 0: + return metadiv[0].text + else: + return meta.text if meta else '' + + @property + def venue(self): + meta = self._itemobj.find('span', class_='card__meta') + metadiv = meta.findAll('div') + if len(metadiv) > 1: + return metadiv[1].text + + @property + def textbody(self): + venue = self.venue + return '\n'.join((self._itemtitle, 'Date: ' + self._itemdate, ('Venue:\n' + venue) if venue else '', '', self._itemtype)) + + def attributes(self): + return { + 'title': self.title, + 'thumbnail': self.thumbnail, + 'venue': self.venue, + 'textbody': self.textbody, + } + + +class ScheduleItem: + def __init__(self, itemobj): + self._itemobj = itemobj + self._audio_item = AudioItem.factory(itemobj) + + @property + def path(self): + path = Scraper.resource_path_for(self._itemobj.find('a').attrs['href']) + segments = path.split('?')[0].split('/') + if 'programs' in segments and 'broadcasts' not in segments: + path += '/broadcasts?page=1' + + return path + + @property + def start(self): + return self._itemobj.attrs.get('data-timeslot-start') + + @property + def end(self): + return self._itemobj.attrs.get('data-timeslot-end') + + @property + def _on_air_status(self): + if self.start and self.end and '+' in self.start: + start = self.start.split('+') + end = self.end.split('+') + td = timedelta(hours=int(start[1][:2])) + try: + start = strptime(start[0], '%Y-%m-%dT%H:%M:%S') - td + end = strptime(end[0], '%Y-%m-%dT%H:%M:%S') - td + return start, end + except (ValueError, TypeError) as e: + pass + return None, None + + @property + def textbody(self): + return self._itemobj.find('p').text + + @property + def duration(self): + if self.audio_item: + return self.audio_item.get('attributes').get('duration') + + @property + def content(self): + content = json.loads(self._itemobj.find(class_='hide-from-all').attrs['data-content']) + content['title'] = content.pop('name') + + if self.audio_item: + content['type'] = 'broadcast_index' + content['title'] = self.audio_item.get('attributes').get('title') + else: + if '/broadcasts?page=1' not in self.path: + content['type'] = 'broadcast_index' + elif content['type'] == 'programs': + content['type'] = 'program' + else: + content['type'] = 'scheduled' + + start, end = self._on_air_status + if (not ignore_on_air) and start and end: + localtime = datetime.utcnow() + if start < localtime and end > localtime: + flag_label = self._itemobj.find(class_='flag-label__on-air').next_sibling + if flag_label: + content['on_air'] = flag_label.string + img = self._itemobj.find(class_='list-view__image') + if img: + content['thumbnail'] = img.attrs.get('data-src') + + return content + + @property + def audio_item(self): + return self._audio_item or {} + + def to_dict(self): + attrs = { + **self.content, + 'start': self.start, + 'end': self.end, + 'textbody': self.textbody, + 'duration': self.duration, + } + itemid = attrs.pop('id') + itemtype = attrs.pop('type') + + return { + 'type': itemtype, + 'id': itemid, + 'attributes': attrs, + 'links': { + 'self': self.path + } + } + + +class ItemType: + def from_label(val): + default = "_".join(val.lower().split()) + return { + 'album_of_the_week': 'featured_album', + 'audio_archive': 'archive', + 'broadcast_episode': 'broadcast', + 'news': 'news_item', + 'podcast_episode': 'podcast', + }.get(default, default) + + +class SearchItem(Resource): + @property + def type(self): + return ItemType.from_label(self._itemobj.find(class_='flag-label').text) + + @property + def title(self): + return self._itemobj.find(class_='search-result__title').text + + @property + def textbody(self): + body = self._itemobj.find(class_='search-result__body') + if body: + return "\n\n".join([item.text for item in body.children]) + + def attributes(self): + return { + **Resource.attributes(self), + 'textbody': self.textbody, + } + + +class BroadcastTrack(Resource): + def id(self): + return f'{SearchItem.id(self)}.{self.track.id()}' + + @property + def title(self): + return f'{self.track.artist} - {self.track.title} (Broadcast on {self.broadcast_date} by {self.program_title})' + + RE = re.compile(r'Played (?P<played_date>[^/]+) by (?P<played_by>.+)View all plays$') + @property + def played(self): + return self.RE.match(self._itemobj.find(class_='search-result__meta-info').text) + + @property + def broadcast_date(self): + return time.strftime(DATE_FORMAT, time.strptime(self.played['played_date'], '%A %d %b %Y')) + + @property + def program_title(self): + return self.played['played_by'] + + @property + def track(self): + return Track( + Scraper.resource_path_for(self._itemobj.find(class_='search-result__meta-links').find('a').attrs['href']), + self._itemobj.find(class_='search-result__track-artist').text, + self._itemobj.find(class_='search-result__track-title').text, + ) + + def attributes(self): + return { + 'broadcast_date': self.broadcast_date, + 'program_title': self.program_title, + } + + def relationships(self): + return { + 'broadcast': { + 'links': { + # TODO - FIXME: + # Nb. this shouldn't be `self.path` as this class is a BroadcastTrack not a Broadcast + # which _also_ means that BroadcastTrack shouldn't have a `links.self` + 'related': self.path + }, + 'data': { + 'type': 'broadcast', + 'id': Resource.id(self), + }, + }, + 'track': { + 'links': { + 'related': self.track.path, + }, + 'data': { + 'type': self.track.type, + 'id': self.track.id(), + }, + }, + } + + def included(self): + return [ + self.track.to_dict(), + ] + + +class PlayableResource(Resource): + @property + def _playable(self): + view_playable_div = self._itemobj.find(lambda tag:tag.name == 'div' and 'data-view-playable' in tag.attrs) + if view_playable_div: + return json.loads(view_playable_div.attrs['data-view-playable'])['items'][0] + else: + return {} + + @property + def _data(self): + return self._playable.get('data', {}) + + @property + def _audio_data(self): + return self._data.get('audio_file', {}) + + @property + def _on_air_toggle(self): + dataview = self._itemobj.attrs.get('data-view-on-air-toggle') + if dataview: + return json.loads(dataview) + + @property + def _on_air_status(self): + toggle = self._on_air_toggle + if toggle: + start = toggle.get('startTime').split('+') + end = toggle.get('endTime').split('+') + td = timedelta(hours=int(start[1][:2])) + try: + start = strptime(start[0], '%Y-%m-%dT%H:%M:%S') - td + end = strptime(end[0], '%Y-%m-%dT%H:%M:%S') - td + return start, end + except (ValueError, TypeError) as e: + pass + return None, None + + @property + def type(self): + t = self._playable.get('type') + if t == 'clip': + return 'segment' + if t == 'broadcast_episode': + return 'broadcast' + else: + return t + + def id(self): + if self._playable: + return str(self._playable.get('source_id')) + + @property + def path(self): + return + + @property + def title(self): + if self._data: + return self._data.get('title') + else: + start, end = self._on_air_status + localtime = datetime.utcnow() + title = None + + if start and end and self._on_air_toggle: + if start > localtime: + title = self._itemobj.find(class_=self._on_air_toggle.get('upcomingEl')[1:]) + if start < localtime and end > localtime: + title = self._itemobj.find(class_=self._on_air_toggle.get('onAirEl')[1:]) + if end < localtime: + title = self._itemobj.find(class_=self._on_air_toggle.get('offAirEl')[1:]) + elif self._on_air_toggle: + title = self._itemobj.find(class_=self._on_air_toggle.get('offAirEl')[1:]) + + return title.find('span').text if title else None + + @property + def subtitle(self): + return self._data.get('subtitle') + + @property + def textbody(self): + return None + + @property + def _itemtime(self): + if self.subtitle: + try: + return time.strptime(self.subtitle, '%d %B %Y') + except ValueError: + return + + @property + def date(self): + if self._itemtime: + return time.strftime(DATE_FORMAT, self._itemtime) + + @property + def year(self): + if self._itemtime: + return self._itemtime[0] + + @property + def aired(self): + return self.date + + @property + def duration(self): + if self._audio_data: + return round(self._audio_data.get('duration', 0)) + elif self._data: + return round(self._data.get('duration', 0)) + + @property + def url(self): + if self._data and self._data.get('timestamp'): + return f"https://ondemand.rrr.org.au/getclip?bw=h&l={self.duration}&m=r&p=1&s={self._data.get('timestamp')}" + elif self._audio_data and self._audio_data.get('path'): + return self._audio_data.get('path') + else: + start, end = self._on_air_status + localtime = datetime.utcnow() + + if start and end: + if start < localtime and end > localtime: + return 'https://ondemand.rrr.org.au/stream/ws-hq.m3u' + + @property + def thumbnail(self): + if self._data: + return self._data.get('image', {}).get('path') + else: + img = self._itemobj.find(class_='audio-summary__image') + if img: + return img.attrs.get('data-src') + + def attributes(self): + return { + 'title': self.title, + 'subtitle': self.subtitle, + 'textbody': self.textbody, + 'date': self.date, + 'year': self.year, + 'aired': self.aired, + 'duration': self.duration, + 'url': self.url, + 'thumbnail': self.thumbnail, + } + + +class ProgramBroadcast(PlayableResource): + ''' + <div data-view-playable=' + { + "component":"episode_player", + "formattedDuration":"02:00:00", + "shareURL":"https://www.rrr.org.au/explore/programs/the-international-pop-underground/episodes/22347-the-international-pop-underground-19-october-2022", + "sharedMomentBaseURL":"https://www.rrr.org.au/shared/broadcast-episode/22347", + "items":[ + { + "type":"broadcast_episode", + "source_id":22347, + "player_item_id":269091, + "data":{ + "title":"The International Pop Underground – 19 October 2022", + "subtitle":"19 October 2022", + "timestamp":"20221019200000", + "duration":7200, + "platform_id":1, + "image":{ + "title":"International Pop Underground program image" + "path":"https://cdn-images-w3.rrr.org.au/81wyES6vU8Hyr8MdSUu_kY6cBGA=/300x300/https://s3.ap-southeast-2.amazonaws.com/assets-w3.rrr.org.au/assets/041/aa8/63b/041aa863b5c3655493e6771ea91c13bb55e94d24/International%20Pop%20Underground.jpg" + } + } + } + ] + }" + ''' + + + +class ProgramBroadcastSegment(PlayableResource): + ''' + <div data-view-playable=' + { + "component": "player_buttons", + "size": "normal", + "items": [ + { + "type": "clip", + "source_id": 3021, + "player_item_id": 270803, + "data": { + "title": "International Pop Underground: Guatemalan Cellist/Songwriter Mabe Fratti Seeks Transcendence", + "subtitle": "19 October 2022", + "platform_id": 1, + "timestamp": "20221019211747", + "duration": 1097, + "image": { + "title": "Mabe Fratti", + "path": "https://cdn-images-w3.rrr.org.au/1v6kamv_8_4xheocBJCa6FKZY_8=/300x300/https://s3.ap-southeast-2.amazonaws.com/assets-w3.rrr.org.au/assets/3a7/61f/143/3a761f1436b97a186be0cf578962436d9c5404a8/Mabe-Fratti.jpg" + } + } + } + ] + } + '><div class="d-flex"> + ''' + + + +class ProgramBroadcastTrack(Resource, ExternalMedia): + _media = {} + + def id(self): + if self.media: + return self.media + else: + return re.sub(r'[\[\]\{\}\(\)\.\/\\,\:\;]', '', f'{self.artist}-{self.title}'.lower().replace(' ', '-')) + + @property + def type(self): + if self.media: + return self._media.get('plugin') + else: + return super().type + + @property + def artist(self): + return self._itemobj.find(class_='audio-summary__track-artist').text.strip() + + @property + def broadcast_artist(self): + params = { 'q': self.artist } + return '/tracks/search?' + urlencode(params) + + @property + def broadcast_track(self): + params = { 'q': f'{self.title} - {self.artist}' } + return '/tracks/search?' + urlencode(params) + + @property + def title(self): + return self._itemobj.find(class_='audio-summary__track-title').text.strip() + + def _get_media(self): + if not self._media: + href = self._itemobj.find(class_='audio-summary__track-title').attrs.get('href') + if href: + self._media = self.media_items([{'src': href}], fetch_album_art=True)[0] + return self._media if self._media else {} + + @property + def media(self): + return self._get_media().get('media_id') + + @property + def thumbnail(self): + return self._get_media().get('thumbnail') + + @property + def background(self): + return self._get_media().get('background') + + @property + def duration(self): + return self._get_media().get('duration') + + def attributes(self): + attr = { + 'artist': self.artist, + 'title': self.title, + } + if self.thumbnail: + attr['thumbnail'] = self.thumbnail + if self.background: + attr['background'] = self.background + if self.duration: + attr['duration'] = self.duration + return attr + + def links(self): + return { + 'broadcast_artist': self.broadcast_artist, + 'broadcast_track': self.broadcast_track, + } + + +class BroadcastCollection(Resource): + @property + def type(self): + return 'broadcast_index' + + def id(self): + return self.path + + @property + def _playable(self): + view_playable_div = self._itemobj.find(lambda tag:tag.name == 'div' and 'data-view-playable' in tag.attrs) + if view_playable_div: + return json.loads(view_playable_div.attrs['data-view-playable'])['items'][0] + else: + return {} + + @property + def _data(self): + return self._playable.get('data', {}) + + @property + def duration(self): + if self._data: + return round(self._data.get('duration')) + + @property + def title(self): + return self._itemobj.find(class_='card__title').text + + @property + def thumbnail(self): + programimage = self._itemobj.find(class_='card__background-image') + if programimage: + programimagesrc = re.search(r"https://[^']+", programimage.attrs.get('style')) + if programimagesrc: + return programimagesrc[0] + + programimage = self._itemobj.find(class_='scalable-image__image') + if programimage: + return programimage.attrs.get('data-src') + + @property + def textbody(self): + cardbody = self._itemobj.find(class_='card__meta') + if cardbody: + return cardbody.text + + def attributes(self): + return { + 'title': self.title, + 'textbody': self.textbody, + 'thumbnail': self.thumbnail, + 'duration': self.duration, + } + + + +class AudioItem: + + @classmethod + def factory(cls, item): + cardbody = item.find(class_='card__body') + if cardbody: + textbody = cardbody.text + else: + cardbody = item.find(class_='card__meta') + if cardbody: + textbody = cardbody.text + else: + textbody = '' + + view_playable_div = item.find(lambda tag:tag.name == 'div' and 'data-view-playable' in tag.attrs) + if view_playable_div: + view_playable = view_playable_div.attrs['data-view-playable'] + itemobj = json.loads(view_playable)['items'][0] + + if 'data-view-account-toggle' in view_playable_div.parent.parent.attrs: + itemobj['subscription_required'] = True + else: + itemobj['subscription_required'] = False + + if itemobj['type'] == 'clip': + obj = Segment(item, itemobj, textbody) + elif itemobj['type'] == 'broadcast_episode': + obj = Broadcast(item, itemobj, textbody) + elif itemobj['type'] == 'audio_archive_item': + obj = Archive(item, itemobj, textbody) + elif itemobj['type'] == 'podcast_episode': + obj = Podcast(item, itemobj, textbody) + else: + obj = AudioItem(item, itemobj, textbody) + return obj.to_dict() + else: + # Should we _also_ have a NonPlayable AudioItem ? + return None + + + def __init__(self, item, itemobj, textbody): + self._item = item + self._itemobj = itemobj + self._itemdata = itemobj['data'] + self.textbody = textbody + + @property + def resource_path(self): + card_anchor = self._item.find(class_='card__anchor') + if card_anchor: + return Scraper.resource_path_for(card_anchor.attrs['href']) + + @property + def type(self): + return self.__class__.__name__.lower() + + @property + def subscription_required(self): + return self._itemobj.get('subscription_required') + + @property + def id(self): + return str(self._itemobj['source_id']) + + @property + def title(self): + return self._itemdata['title'] + + @property + def subtitle(self): + return self._itemdata['subtitle'] + + @property + def _itemtime(self): + return time.strptime(self._itemdata['subtitle'], '%d %B %Y') + + @property + def date(self): + return time.strftime(DATE_FORMAT, self._itemtime) + + @property + def year(self): + return self._itemtime[0] + + @property + def aired(self): + return self.date + + @property + def duration(self): + duration = self._itemobj.get('data', {}).get('duration', {}) + if not duration: + audio_file = self._itemdata.get('audio_file') + if audio_file: + duration = audio_file['duration'] + else: + duration = 0 + return round(duration) + + @property + def thumbnail(self): + return self._itemdata['image']['path'] if 'image' in self._itemdata.keys() else '' + + @property + def url(self): + audio_file = self._itemdata.get('audio_file') + if audio_file: + return audio_file['path'] + else: + ts = self._itemdata['timestamp'] + l = self.duration + return 'https://ondemand.rrr.org.au/getclip?bw=h&l={}&m=r&p=1&s={}'.format(l, ts) + + def to_dict(self): + item = { + 'type': self.type, + 'id': self.id, + 'attributes': { + 'title': self.title, + 'subtitle': self.subtitle, + 'textbody': self.textbody, + 'date': self.date, + 'year': self.year, + 'aired': self.aired, + 'duration': self.duration, + 'url': self.url, + 'thumbnail': self.thumbnail, + }, + 'links': { + 'self': self.resource_path, + } + } + if self.subscription_required: + item['links']['subscribe'] = '/subscribe' + return item + + +class Archive(AudioItem): + '' + +class Broadcast(AudioItem): + '' + +class Segment(AudioItem): + '' + +class Podcast(AudioItem): + '' + + +if __name__ == "__main__": + print(json.dumps(Scraper.call(sys.argv[1]))) diff --git a/plugin.audio.tripler/resources/lib/tripler.py b/plugin.audio.tripler/resources/lib/tripler.py index 02ffe3b94a..123ebfc084 100644 --- a/plugin.audio.tripler/resources/lib/tripler.py +++ b/plugin.audio.tripler/resources/lib/tripler.py @@ -1,169 +1,630 @@ from bs4 import BeautifulSoup -import json, time, sys, os -from xbmcswift2 import Plugin, ListItem, xbmcgui +from datetime import datetime, timedelta +import time, sys, os, json, re +import pytz from xbmcaddon import Addon +import xbmcgui +import xbmcplugin +import xbmc -IS_PY3 = sys.version_info[0] > 2 -if IS_PY3: - from urllib.request import Request, urlopen -else: - from urllib2 import Request, urlopen +from resources.lib.scraper import Scraper +from resources.lib.website import TripleRWebsite +from resources.lib.media import Media + +from urllib.parse import parse_qs, urlencode, unquote_plus, quote_plus class TripleR(): def __init__(self): - self.plugin = Plugin() - respath = os.path.join(Addon().getAddonInfo('path'), 'resources') - self.icon = os.path.join(respath, 'icon.png') - self.fanart = os.path.join(respath, 'fanart.png') - self.nextpage = self.plugin.get_string(30005) + self.matrix = '19.' in xbmc.getInfoLabel('System.BuildVersion') + self.handle = int(sys.argv[1]) + self.id = 'plugin.audio.tripler' + self.url = 'plugin://' + self.id + self.tz = pytz.timezone('Australia/Melbourne') + self.addon = Addon() + self.dialog = xbmcgui.Dialog() + self._respath = os.path.join(self.addon.getAddonInfo('path'), 'resources') + self.icon = os.path.join(self._respath, 'icon.png') + self.fanart = os.path.join(self._respath, 'fanart.png') + self.website = TripleRWebsite(os.path.join(self._respath, 'cookies.lwp')) + self._signed_in = -1 + self.supported_plugins = Media.RE_MEDIA_URLS.keys() + quality = self.addon.getSetting('image_quality') + self.quality = int(quality) if quality else 1 + self.media = Media(self.quality) + + self.nextpage = self.get_string(30004) + self.lastpage = self.get_string(30005) + + def get_string(self, string_id): + return self.addon.getLocalizedString(string_id) + + def _notify(self, title, message): + xbmc.log(f'TripleR plugin notification: {title} - {message}', xbmc.LOGDEBUG) + self.dialog.notification(title, message, icon=self.icon) + + def parse(self): + args = parse_qs(sys.argv[2][1:]) + segments = sys.argv[0].split('/')[3:] + xbmc.log("TripleR plugin called: " + str(sys.argv), xbmc.LOGDEBUG) + + if 'schedule' in segments and args.get('picker'): + date = self.select_date(args.get('picker')[0]) + if date: + args['date'] = date + + k_title = args.get('k_title', [None])[0] + if k_title: + xbmcplugin.setPluginCategory(self.handle, k_title) + del args['k_title'] + + if args.get('picker'): + del args['picker'] + + if 'search' in segments and not args.get('q'): + search = self.search(tracks=('tracks' in segments)) + if search: + args['q'] = search + else: + return + + if 'ext_search' in segments: + self.ext_search(args) + return + + path = '/' + '/'.join(segments) + if args: + path += '?' + urlencode(args, doseq=True) - def run(self): - self.plugin.run() + if len(segments[0]) < 1: + return self.main_menu() + elif 'subscribe' in segments: + self._notify(self.get_string(30084), self.get_string(30083)) + elif 'settings' in segments: + self.login() + Addon().openSettings() + elif 'sign-in' in segments: + if self.sign_in(): + xbmc.executebuiltin("Container.Refresh") + elif 'sign-out' in segments: + self.sign_out() + xbmc.executebuiltin("Container.Refresh") + elif 'entries' in segments: + if self.addon.getSettingBool('authenticated'): + self.subscriber_giveaway(path=path) + else: + self._notify(self.get_string(30073), self.get_string(30076)) + elif 'play' in args: + self.play_stream(handle=self.handle, args=args, segments=segments) + return None + else: + scraped = Scraper.call(path) + parsed = self.parse_programs(**scraped, args=args, segments=segments, k_title=k_title) + if parsed: + return parsed def main_menu(self): items = [ - { - 'label': self.plugin.get_string(30001), - 'path': "https://ondemand.rrr.org.au/stream/ws-hq.m3u", - 'thumbnail': self.icon, - 'properties': { - 'StationName': self.plugin.get_string(30000), - 'fanart_image': self.fanart - }, - 'info': { - 'mediatype': 'music' - }, - 'is_playable': True - }, - {'label': self.plugin.get_string(30002), 'path': self.plugin.url_for(segment_menu, page=1)}, - {'label': self.plugin.get_string(30003), 'path': self.plugin.url_for(program_menu, page=1)}, - {'label': self.plugin.get_string(30004), 'path': self.plugin.url_for(audio_archives, page=1)}, + self.livestream_item(), + {'label': self.get_string(30032), 'path': self.url + '/programs', 'icon': 'DefaultPartyMode.png'}, + {'label': self.get_string(30033), 'path': self.url + '/schedule', 'icon': 'DefaultYear.png'}, + # {'label': self.get_string(30034), 'path': self.url + '/broadcasts', 'icon': 'DefaultPlaylist.png'}, + {'label': self.get_string(30035), 'path': self.url + '/segments', 'icon': 'DefaultPlaylist.png'}, + {'label': self.get_string(30036), 'path': self.url + '/archives', 'icon': 'DefaultPlaylist.png'}, + {'label': self.get_string(30037), 'path': self.url + '/featured_albums', 'icon': 'DefaultMusicAlbums.png'}, + {'label': self.get_string(30038), 'path': self.url + '/soundscapes', 'icon': 'DefaultSets.png'}, + {'label': self.get_string(30042), 'path': self.url + '/videos', 'icon': 'DefaultMusicVideos.png'}, + {'label': self.get_string(30039), 'path': self.url + '/events', 'icon': 'DefaultPVRGuide.png'}, + {'label': self.get_string(30040), 'path': self.url + '/giveaways', 'icon': 'DefaultAddonsRecentlyUpdated.png'}, + {'label': self.get_string(30041), 'path': self.url + '/search', 'icon': 'DefaultMusicSearch.png'}, ] - listitems = [ListItem.from_dict(**item) for item in items] - return listitems - - def segment_menu(self, page): - programs = self.get_programs("segments", page) - items = self.parse_programs(programs, page) - if len(items) > 0: - items.append({'label': self.nextpage, 'path': self.plugin.url_for(segment_menu, page=int(page) + 1)}) - return items - - def program_menu(self, page): - programs = self.get_programs("episodes", page) - items = self.parse_programs(programs, page) - if len(items) > 0: - items.append({'label': self.nextpage, 'path': self.plugin.url_for(program_menu, page=int(page) + 1)}) - return items - - def audio_archives(self, page): - programs = self.get_programs("archives", page) - items = self.parse_programs(programs, page) - if len(items) > 0: - items.append({'label': self.nextpage, 'path': self.plugin.url_for(audio_archives, page=int(page) + 1)}) - return items - - def get_programs(self, collection, page): - output_final = [] - - url = "https://www.rrr.org.au/on-demand/{}?page={}".format(collection, page) - ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" - req = Request(url, headers={'User-Agent': ua}) - html = urlopen(req) - soup = BeautifulSoup(html, 'html.parser') - - divs = soup.findAll(class_='card__text') - - for item in divs: - cardbody = item.find(class_='card__body') - if not cardbody: - continue - textbody = ' '.join(cardbody.strings) - if len(item.contents) < 3: + if self.login(): + emailaddress = self.addon.getSetting('emailaddress') + fullname = self.addon.getSetting('fullname') + name = fullname if fullname else emailaddress + items.append( + { + 'label': f'{self.get_string(30014)} ({name})', + 'path': self.url + '/sign-out', + 'icon': 'DefaultUser.png', + } + ) + else: + items.append( + { + 'label': self.get_string(30013), + 'path': self.url + '/sign-in', + 'icon': 'DefaultUser.png', + } + ) + + listitems = [] + + for item in items: + path = self._k_title(item['path'], item['label']) + li = xbmcgui.ListItem(item['label'], '', path, True) + li.setArt( + { + 'icon': item['icon'], + 'fanart': self.fanart, + } + ) + if 'properties' in item: + li.setProperties(item['properties']) + listitems.append((path, li, item.get('properties') == None)) + + xbmcplugin.addDirectoryItems(self.handle, listitems, len(listitems)) + xbmcplugin.addSortMethod(self.handle, xbmcplugin.SORT_METHOD_UNSORTED) + xbmcplugin.endOfDirectory(self.handle) + + def livestream_item(self): + item = { + 'label': self.get_string(30001), + 'path': 'https://ondemand.rrr.org.au/stream/ws-hq.m3u', + 'icon': self.icon, + 'properties': { + 'StationName': self.get_string(30000), + 'IsPlayable': 'true' + }, + } + return item + + def _sub_item(self, text): + path = self.url + '/settings' + li = xbmcgui.ListItem(text, '', path, True) + li.setArt({'thumbnail': os.path.join(self._respath, 'qr-subscribe.png')}) + return (path, li, True) + + def _k_title(self, url, title): + if title: + return url + ('?' if '?' not in url else '&') + 'k_title=' + quote_plus(title) + else: + return url + + def select_date(self, self_date): + self_date_str = '/'.join([i for i in self_date.split('-')[::-1]]) + dialog_title = self.get_string(30065) % (self.get_string(30033)) + picked_date_str = self.dialog.input(dialog_title, defaultt=str(self_date_str), type=xbmcgui.INPUT_DATE) + + if picked_date_str: + date_str = '-'.join([i.zfill(2) for i in picked_date_str.replace(' ', '').split('/')[::-1]]) + current = datetime(*(time.strptime(date_str, '%Y-%m-%d')[0:6]), tzinfo=self.tz) + daydelta = datetime.now(self.tz) - current - timedelta(hours=6) + if daydelta.days != 0: + return date_str + + return None + + def context_item(self, label, path, plugin=None): + plugin = plugin if plugin else self.id + return (self.get_string(label), f'Container.Update(plugin://{plugin}{path})') + + def play_stream(self, handle, args, segments): + li = xbmcgui.ListItem( + label = args.get('title', [''])[0], + path = unquote_plus(args.get('play', [''])[0]), + offscreen=True, + ) + li.setArt( + { + 'thumb': unquote_plus(args.get('thumbnail', [''])[0]).replace(' ', '%20'), + 'fanart': unquote_plus(args.get('fanart', [''])[0]).replace(' ', '%20'), + } + ) + if not self.matrix: + vi = li.getVideoInfoTag() + vi.setTitle(args.get('title', [''])[0]) + vi.setMediaType('song') + else: + li.setInfo('video', + { + 'title': args.get('title', [''])[0], + 'mediatype': 'song', + } + ) + xbmcplugin.setResolvedUrl(self.handle, True, li) + + def parse_programs(self, data, args, segments, links=None, k_title=None): + items = [] + + for menuitem in data: + if menuitem is None: continue - if 'data-view-playable' not in item.contents[-1].attrs: + m_id, m_type = menuitem.get('id', ''), menuitem.get('type', '') + m_links = menuitem.get('links', {}) + m_self = m_links.get('self', '/') + m_sub = m_links.get('subscribe') + m_playlist = m_links.get('playlist') + attributes = menuitem.get('attributes', {}) + if attributes is None: continue - viewplayable = item.contents[-1].attrs['data-view-playable'] - mediaurl = '' - try: - itemobj = json.loads(viewplayable)['items'][0] - itemdata = itemobj['data'] - if itemobj['type'] == 'clip': - ts = itemdata['timestamp'] - l = int(itemdata['duration']) - mediaurl = 'https://ondemand.rrr.org.au/getclip?bw=h&l={}&m=r&p=1&s={}'.format(l, ts) - elif itemobj['type'] == 'broadcast_episode': - ts = itemdata['timestamp'] - mediaurl = 'https://ondemand.rrr.org.au/getclip?bw=h&l=0&m=r&p=1&s={}'.format(ts) + + textbody = attributes.get('textbody', '') + thumbnail = attributes.get('thumbnail', '') + fanart = attributes.get('background', self.fanart) + pathurl = None + + if attributes.get('subtitle') and not ('soundscapes' in segments and len(segments) > 1): + textbody = '\n'.join((self.get_string(30007) % (attributes.get('subtitle')), textbody)) + + if attributes.get('venue'): + textbody = '\n'.join((attributes['venue'], textbody)) + + if m_type in self.supported_plugins: + title = attributes.get('title', '') + artist = attributes.get('artist') + if artist: + title = f'{artist} - {title}' + pathurl = self.media.parse_media_id(m_type, m_id, quote_plus(title.split('(')[0].strip())) + + name = Media.RE_MEDIA_URLS[m_type].get('name') + title = f'{title} ({name})' + textbody = self.get_string(30008) % (name) + '\n' + textbody + + if 'bandcamp' in m_type or 'apple' in m_type: + thumbnail = self.media.parse_art(thumbnail) + if fanart != self.fanart: + fanart = self.media.parse_art(fanart) + + if not thumbnail: + thumbnail = 'DefaultMusicSongs.png' + + if m_type in ['bandcamp_track', 'youtube']: + is_playable = True else: - if 'audio_file' not in list(itemdata.keys()): - continue - mediaurl = itemdata['audio_file']['path'] - - itemtime = time.strptime(itemdata['subtitle'], '%d %B %Y') - itemtimestr = time.strftime('%Y-%m-%d', itemtime) - output_final.append({ - 'id': itemobj['source_id'], - 'title': itemdata['title'], - 'desc': '\n'.join((self.plugin.get_string(30007), '%s')) % (itemdata['subtitle'], textbody), - 'date': time.strftime('%d.%m.%Y', itemtime), - 'year': int(itemtimestr[0:4]), - 'aired': itemtimestr, - 'duration': int(itemdata['duration']) if 'duration' in list(itemdata.keys()) else 0, - 'url': mediaurl, - 'art': itemdata['image']['path'] if 'image' in list(itemdata.keys()) else '' - }) - except: - continue + is_playable = False + else: + title = attributes.get('title', '') + artist = attributes.get('artist') + pathurl = attributes.get('url') + if artist: + title = f'{artist} - {title}' + if m_type == 'broadcast' and pathurl: + title = f'{title} ({self.get_string(30050)})' + if m_type == 'broadcast_index' and 'schedule' in segments: + title = f'{title} ({self.get_string(30049)})' + if m_type == 'segment': + title = f'{title} ({self.get_string(30051)})' + on_air = attributes.get('on_air') + if on_air: + title = f'{title} ({on_air})' + is_playable = True - return output_final + if m_type == 'program_broadcast_track': + title = f'{title} ({self.get_string(30052)})' + thumbnail = 'DefaultMusicSongs.png' + ext_search = m_links.get('broadcast_track').replace('search', 'ext_search') + pathurl = self._k_title(self.url + ext_search, attributes.get('title')) + is_playable = False - def parse_programs(self, programs, page): - items = [] + icon = thumbnail - for program in programs: - item = { - 'label': program['title'], - 'label2': self.plugin.get_string(30006) % (program['aired']), - 'info_type': 'video', - 'info': { - 'count': program['id'], - 'title': program['title'], - 'plot': program['desc'], - 'date': program['date'], - 'year': program['year'], - 'premiered': program['aired'], - 'aired': program['aired'], - 'duration': program['duration'], - 'mediatype': 'song' - }, - 'properties': { - 'StationName': self.plugin.get_string(30000), - 'fanart_image': self.fanart - }, - 'path': program['url'], - 'thumbnail': program['art'], - 'is_playable': True - } - listitem = ListItem.from_dict(**item) - items.append(listitem) + if m_sub: + if not self.login() or not self.subscribed(): + icon = 'OverlayLocked.png' + title = f'{self.get_string(30081)} - {title}' + textbody = f'{self.get_string(30081)}\n{textbody}' + pathurl = self.url + m_sub + is_playable = False + else: + title = f'{self.get_string(30084)} - {title}' - return items + if m_type == 'giveaway' and 'entries' in m_self.split('/'): + title += ' ({})'.format(self.get_string(30069)) + textbody = '\n'.join((self.get_string(30070), textbody)) -instance = TripleR() + if attributes.get('start') and attributes.get('end'): + datestart = datetime.fromisoformat(attributes['start']) + dateend = datetime.fromisoformat(attributes['end']) + start = datetime.strftime(datestart, '%H:%M') + end = datetime.strftime(dateend, '%H:%M') + textbody = f'{start} - {end}\n{textbody}' + title = ' - '.join((start, end, title)) + + if attributes.get('aired'): + aired = self.get_string(30006) % (attributes['aired']) + else: + aired = attributes.get('date', '') + + if pathurl: + is_playable = not pathurl.startswith('plugin://') + if is_playable: + encodedurl = quote_plus(pathurl) + pathurl = '{}/{}?play={}&title={}&thumbnail={}&fanart={}'.format( + self.url, + '/'.join(segments), + quote_plus(encodedurl), + quote_plus(title), + quote_plus(thumbnail), + quote_plus(fanart), + ) + mediatype = 'song' + info_type = 'video' + else: + pathurl = self._k_title(self.url + m_self, attributes.get('title')) + is_playable = False + mediatype = '' + info_type = 'video' + + date, year = attributes.get('date', ''), attributes.get('year', '') + if date: + date = time.strftime('%d.%m.%Y', time.strptime(date, '%Y-%m-%d')) + year = date[0] + else: + # prevents log entries regarding empty date string + date = time.strftime('%d.%m.%Y', time.localtime()) + + + li = xbmcgui.ListItem(title, aired, pathurl, True) + li.setArt( + { + 'icon': icon, + 'thumb': thumbnail, + 'fanart': fanart, + } + ) + li.setProperties({ + 'StationName': self.get_string(30000), + 'IsPlayable': 'true' if is_playable else 'false', + }) + + context_menu = [] + + if m_playlist: + textbody += f'\n\n{self.get_string(30100)}' % (self.get_string(30101)) + context_menu.append(self.context_item(30101, m_playlist)) + + if 'broadcast_track' in m_links: + if m_type != 'program_broadcast_track': + textbody += f'\n{self.get_string(30100)}' % (self.get_string(30102)) + ext_search = m_links.get('broadcast_track').replace('search', 'ext_search') + context_menu.append(self.context_item(30102, ext_search)) + + if context_menu: + li.addContextMenuItems(context_menu) + + if not self.matrix: + vi = li.getVideoInfoTag() + # vi.setDbId((abs(hash(m_id)) % 2147083647) + 400000) + vi.setTitle(title) + vi.setPlot(textbody) + vi.setDateAdded(date) + if year.isdecimal(): + vi.setYear(int(year)) + vi.setFirstAired(aired) + vi.setPremiered(aired) + if attributes.get('duration', 0) > 0: + vi.setDuration(attributes.get('duration')) + if mediatype: + vi.setMediaType(mediatype) + else: # Matrix v19.0 + vi = { + 'title': title, + 'plot': textbody, + 'date': date, + 'year': year, + 'premiered': aired, + 'aired': aired, + } + + if attributes.get('duration', 0) > 0: + vi['duration'] = attributes.get('duration') + if mediatype: + vi['mediatype'] = mediatype + + li.setInfo('video', vi) + + items.append((pathurl, li, not is_playable)) + + + if 'schedule' in segments: + self_date = links.get('self', '?date=').split('?date=')[-1] + next_date = links.get('next', '?date=').split('?date=')[-1] + + if links.get('next'): + path = self.url + self._k_title(links['next'], k_title) + li = xbmcgui.ListItem(self.get_string(30061) % (next_date), '', path, True) + items.insert(0, (path, li, True)) + + path = self.url + self._k_title(f'/schedule?picker={self_date}', k_title) + li = xbmcgui.ListItem(self.get_string(30065) % (self_date), '', path, True) + li.setArt({'icon': 'DefaultPVRGuide.png'}) + items.insert(0, (path, li, True)) + + elif 'giveaways' in segments: + if not self.login() or not self.subscribed(): + items.insert(0, self._sub_item(self.get_string(30082))) + + elif links and links.get('next'): + if len(items) > 0: + if links.get('next'): + path = self.url + self._k_title(links['next'], k_title) + li = xbmcgui.ListItem(self.nextpage, '', path, True) + items.append((path, li, True)) + if links.get('last'): + path = self.url + self._k_title(links['last'], k_title) + li = xbmcgui.ListItem(self.lastpage, '', path, True) + items.append((path, li, True)) -@instance.plugin.route('/') -def main_menu(): - return instance.main_menu() -@instance.plugin.route('/segment_menu/<page>') -def segment_menu(page): - return instance.segment_menu(page) + if 'archives' in segments: + if not self.login() or not self.subscribed(): + items.insert(0, self._sub_item(self.get_string(30083))) -@instance.plugin.route('/program_menu/<page>') -def program_menu(page): - return instance.program_menu(page) + elif 'search' in segments and 'tracks' not in segments: + link = links.get('self').split('?page=')[0] + path = self.url + '/tracks' + link + li = xbmcgui.ListItem(self.get_string(30066), '', path, True) + li.setArt({'icon': 'DefaultMusicSearch.png'}) + items.insert(0, (path, li, True)) -@instance.plugin.route('/audio_archives/<page>') -def audio_archives(page): - return instance.audio_archives(page) + xbmcplugin.addSortMethod(self.handle, xbmcplugin.SORT_METHOD_UNSORTED, labelMask='%L', label2Mask='%D') + if len(segments) > 3 and 'broadcasts' in segments[2]: + # broadcast playlist + xbmcplugin.setContent(self.handle, 'episodes') + elif 'segments' in segments or 'archives' in segments: + # any segment or archive listing + xbmcplugin.setContent(self.handle, 'episodes') + elif len(segments) == 3 and 'broadcasts' in segments: + # index of broadcasts + xbmcplugin.setContent(self.handle, 'songs') + elif len(segments) == 2 and 'soundscapes' in segments: + # soundscape + xbmcplugin.setContent(self.handle, 'songs') + elif len(segments) == 2 and 'featured_albums' in segments: + # featured albums + xbmcplugin.setContent(self.handle, 'songs') + else: + xbmcplugin.setContent(self.handle, '') + + xbmcplugin.addDirectoryItems(self.handle, items, len(items)) + xbmcplugin.endOfDirectory(self.handle) + + def search(self, tracks=False): + prompt = self.get_string(30068 if tracks else 30067) + return self.dialog.input(prompt, type=xbmcgui.INPUT_ALPHANUM) + + def ext_search(self, args): + q = args.get('q', ['']) + title = q[0] + opts = q + if ' - ' in q[0]: + qsplit = q[0].split(' - ') + opts.append(qsplit[0]) + opts.append(qsplit[1]) + + yt_addon = 'special://home/addons/plugin.video.youtube/' + yt_icon = yt_addon + ('icon.png' if self.matrix else 'resources/media/icon.png') + + options = [] + for opt in opts: + query = urlencode({'q': [opt]}, doseq=True) + options.append({ + 'label': self.get_string(30105) % opt, + 'path': self.url + '/tracks/search?' + query, + 'icon': self.icon, + }) + for opt in opts: + query_sub = urlencode({'query': [opt]}, doseq=True) + options.append({ + 'label': self.get_string(30106) % opt, + 'path': 'plugin://plugin.audio.kxmxpxtx.bandcamp/?mode=search&action=search&' + query_sub, + 'icon': 'special://home/addons/plugin.audio.kxmxpxtx.bandcamp/icon.png', + }) + for opt in opts: + query = urlencode({'q': [opt]}, doseq=True) + options.append({ + 'label': self.get_string(30107) % opt, + 'path': 'plugin://plugin.video.youtube/kodion/search/query/?' + query, + 'icon': yt_icon, + }) + + listitems = [] + for item in options: + li = xbmcgui.ListItem(item['label'], '', item['path'], True) + li.setArt( + { + 'thumb': item.get('icon', 'DefaultMusicSearch.png'), + 'icon': 'DefaultMusicSearch.png', + 'fanart': self.fanart, + } + ) + listitems.append((item['path'], li, True)) + + xbmcplugin.setPluginCategory(self.handle, self.get_string(30104) % title) + xbmcplugin.addDirectoryItems(self.handle, listitems, len(listitems)) + xbmcplugin.addSortMethod(self.handle, xbmcplugin.SORT_METHOD_UNSORTED) + xbmcplugin.endOfDirectory(self.handle) + + def sign_in(self): + emailaddress = self.dialog.input(self.get_string(30015), type=xbmcgui.INPUT_ALPHANUM) + if emailaddress == '': + return False + password = self.dialog.input(self.get_string(30016), type=xbmcgui.INPUT_ALPHANUM, option=xbmcgui.ALPHANUM_HIDE_INPUT) + if password == '': + return False + return self.login(prompt=True, emailaddress=emailaddress, password=password) + + def login(self, prompt=False, emailaddress=None, password=None): + if self._signed_in != -1: + return self._signed_in + if self.addon.getSettingBool('authenticated') and self.website.logged_in(): + return True + + emailSetting = self.addon.getSetting('emailaddress') + if emailaddress is None: + emailaddress = emailSetting + + logged_in = self.website.login(emailaddress, password) + + if logged_in: + if prompt: + self._notify(self.get_string(30077) % (emailaddress), self.get_string(30078)) + if not self.addon.getSettingBool('authenticated'): + self.addon.setSetting('subscribed-check', '0') + self.addon.setSettingBool('authenticated', True) + self.subscribed() + + if emailSetting == '': + self.addon.setSetting('emailaddress', emailaddress) + for cookie in logged_in: + if cookie.name == 'account': + fullname = json.loads(unquote_plus(cookie.value)).get('name') + if fullname: + self.addon.setSetting('fullname', fullname) + self._signed_in = logged_in + else: + if prompt: + self._notify(self.get_string(30085), self.get_string(30086) % (emailaddress)) + self.addon.setSettingBool('authenticated', False) + self.addon.setSetting('emailaddress', '') + self.addon.setSetting('fullname', '') + + return logged_in + + def sign_out(self, emailaddress=None): + if emailaddress is None: + emailaddress = self.addon.getSetting('emailaddress') + if self.website.logout(): + self.addon.setSettingBool('authenticated', False) + self.addon.setSetting('subscribed-check', '0') + self.addon.setSettingInt('subscribed', 0) + self._signed_in = -1 + if emailaddress: + self._notify(self.get_string(30079) % (emailaddress), self.get_string(30078)) + self.addon.setSetting('emailaddress', '') + self.addon.setSetting('fullname', '') + return True + else: + if emailaddress: + self._notify(self.get_string(30087), self.get_string(30088) % (emailaddress)) + return False + + def subscribed(self): + if not self.addon.getSettingBool('authenticated'): + return False + check = int(self.addon.getSetting('subscribed-check')) + now = int(time.time()) + if now - check < (15*60): + setting = self.addon.getSettingInt('subscribed') + subscribed = (setting == 1) + else: + subscribed = self.website.subscribed() + self.addon.setSettingInt('subscribed', 1 if subscribed else 0) + self.addon.setSetting('subscribed-check', str(now)) + return subscribed + + def subscriber_giveaway(self, path): + if self.login(): + source = self.website.enter(path) + + if 'Thank you! You have been entered' in source: + self._notify(self.get_string(30071), self.get_string(30072)) + elif 'already entered this giveaway' in source: + self._notify(self.get_string(30073), self.get_string(30074)) + else: + self._notify(self.get_string(30073), self.get_string(30075)) + + else: + self._notify(self.get_string(30073), self.get_string(30076)) + +instance = TripleR() diff --git a/plugin.audio.tripler/resources/lib/website.py b/plugin.audio.tripler/resources/lib/website.py new file mode 100644 index 0000000000..874ac8a082 --- /dev/null +++ b/plugin.audio.tripler/resources/lib/website.py @@ -0,0 +1,131 @@ +from resources.lib.scraper import USER_AGENT + +from urllib.request import Request, build_opener, HTTPCookieProcessor +from urllib.parse import urlencode +from urllib.error import HTTPError + +import http.cookiejar +import os + +class TripleRWebsite(): + def __init__(self, cookiepath): + self._cookiepath = cookiepath + self.cj = http.cookiejar.LWPCookieJar() + + def _loadcj(self): + if os.path.isfile(self._cookiepath): + self.cj.load(self._cookiepath) + return True + else: + return False + + def _delcj(self): + self.cj = http.cookiejar.LWPCookieJar() + try: + os.remove(self._cookiepath) + except: + pass + + def request(self, url, data=None): + if data: + req = Request(url, data.encode()) + else: + req = Request(url) + req.add_header('User-Agent', USER_AGENT) + + opener = build_opener(HTTPCookieProcessor(self.cj)) + + try: + response = opener.open(req) + except HTTPError as e: + return e + + source = response.read().decode() + response.close() + + return source + + def login(self, emailaddress, password): + if password is None and self._loadcj(): + account_url = 'https://www.rrr.org.au/account' + source = self.request(account_url) + if self._check_login(source, emailaddress): + return self.cj + else: + return False + + if emailaddress and password: + login_url = 'https://www.rrr.org.au/sign-in' + login_data = urlencode( + { + 'subscriber_account[email]': emailaddress, + 'subscriber_account[password]': password, + '_csrf': ['', 'javascript-disabled'], + } + ) + + source = self.request(login_url, data=login_data) + + if isinstance(source, HTTPError): + return False + + if source and self._check_login(source, emailaddress): + self.cj.save(self._cookiepath) + return self.cj + else: + return False + + def _check_login(self, source, emailaddress): + if emailaddress.lower() in source.lower(): + return True + else: + return False + + def logout(self): + logout_url = 'https://www.rrr.org.au/sign-out' + logout_data = urlencode( + { + '_csrf': ['', 'javascript-disabled'], + } + ) + source = self.request(logout_url, data=logout_data) + if isinstance(source, HTTPError): + if source.code == 500: + return True + else: + return False + if source: + self._delcj() + return True + else: + return False + + def logged_in(self): + return self._loadcj() + + def subscribed(self): + check_url = 'https://www.rrr.org.au/account/check-active.json' + source = self.request(check_url) + if isinstance(source, HTTPError): + if source.code == 500: + return True + else: + return False + return self._check_subscription(source) + + def _check_subscription(self, source): + if '"active":' in source and 'true' in source: + return True + else: + return False + + def enter(self, resource_path): + entry_url = ''.join(('https://www.rrr.org.au/subscriber-', resource_path[1:])) + entry_data = urlencode( + { + 'entry[null]': '', + '_csrf': ['', 'javascript-disabled'], + } + ) + + return self.request(entry_url, entry_data) diff --git a/plugin.audio.tripler/resources/qr-subscribe.png b/plugin.audio.tripler/resources/qr-subscribe.png new file mode 100644 index 0000000000..5be65e8353 Binary files /dev/null and b/plugin.audio.tripler/resources/qr-subscribe.png differ diff --git a/plugin.audio.tripler/resources/screenshots/album-of-the-week.jpg b/plugin.audio.tripler/resources/screenshots/album-of-the-week.jpg new file mode 100644 index 0000000000..735a71a7dd Binary files /dev/null and b/plugin.audio.tripler/resources/screenshots/album-of-the-week.jpg differ diff --git a/plugin.audio.tripler/resources/screenshots/broadcast-playlist.jpg b/plugin.audio.tripler/resources/screenshots/broadcast-playlist.jpg new file mode 100644 index 0000000000..0f2cbf4e69 Binary files /dev/null and b/plugin.audio.tripler/resources/screenshots/broadcast-playlist.jpg differ diff --git a/plugin.audio.tripler/resources/screenshots/browse-by-date.jpg b/plugin.audio.tripler/resources/screenshots/browse-by-date.jpg new file mode 100644 index 0000000000..169766e2ec Binary files /dev/null and b/plugin.audio.tripler/resources/screenshots/browse-by-date.jpg differ diff --git a/plugin.audio.tripler/resources/screenshots/browse-by-program.jpg b/plugin.audio.tripler/resources/screenshots/browse-by-program.jpg new file mode 100644 index 0000000000..7cb817ddae Binary files /dev/null and b/plugin.audio.tripler/resources/screenshots/browse-by-program.jpg differ diff --git a/plugin.audio.tripler/resources/screenshots/menu.jpg b/plugin.audio.tripler/resources/screenshots/menu.jpg new file mode 100644 index 0000000000..160792df8f Binary files /dev/null and b/plugin.audio.tripler/resources/screenshots/menu.jpg differ diff --git a/plugin.audio.tripler/resources/screenshots/soundscape.jpg b/plugin.audio.tripler/resources/screenshots/soundscape.jpg new file mode 100644 index 0000000000..24172ebe9d Binary files /dev/null and b/plugin.audio.tripler/resources/screenshots/soundscape.jpg differ diff --git a/plugin.audio.tripler/resources/settings.xml b/plugin.audio.tripler/resources/settings.xml new file mode 100644 index 0000000000..6e63302c33 --- /dev/null +++ b/plugin.audio.tripler/resources/settings.xml @@ -0,0 +1,112 @@ +<?xml version="1.0" ?> +<settings version="1"> + <section id="plugin.audio.tripler"> + <category id="subscriber account" label="30010" help=""> + <group id="1"> + <setting id="authenticated" type="boolean" label="30999" help=""> + <level>4</level> + <default>false</default> + </setting> + <setting id="fullname" type="string" label="30017" help=""> + <level>0</level> + <default/> + <enable>false</enable> + <constraints> + <allowempty>true</allowempty> + </constraints> + <dependencies> + <dependency type="visible"> + <condition operator="!is" setting="authenticated">false</condition> + </dependency> + </dependencies> + <control type="edit" format="string"> + <heading>30017</heading> + </control> + </setting> + <setting id="emailaddress" type="string" label="30012" help=""> + <level>0</level> + <default/> + <enable>false</enable> + <constraints> + <allowempty>true</allowempty> + </constraints> + <dependencies> + <dependency type="visible"> + <condition operator="!is" setting="authenticated">false</condition> + </dependency> + </dependencies> + <control type="edit" format="string"> + <heading>30012</heading> + </control> + </setting> + <setting id="subscribed" type="integer" label="30075" help="30075"> + <level>0</level> + <default>0</default> + <enable>false</enable> + <constraints> + <options> + <option label="No">0</option> + <option label="Yes">1</option> + </options> + </constraints> + <dependencies> + <dependency type="visible"> + <condition operator="!is" setting="authenticated">false</condition> + </dependency> + </dependencies> + <control type="list" format="string"> + <heading>30075</heading> + </control> + </setting> + <setting id="subscribed-check" type="string" label="30999" help=""> + <level>4</level> + <default/> + <constraints> + <allowempty>true</allowempty> + </constraints> + <control type="edit" format="string"> + <heading>30999</heading> + </control> + </setting> + <setting id="sign-in" type="action" label="30013" help="30020"> + <level>0</level> + <dependencies> + <dependency type="visible"> + <condition operator="!is" setting="authenticated">true</condition> + </dependency> + </dependencies> + <control type="button" format="action"> + <data>RunPlugin("plugin://plugin.audio.tripler/sign-in")</data> + </control> + </setting> + <setting id="sign-out" type="action" label="30014" help="30021"> + <level>0</level> + <dependencies> + <dependency type="visible"> + <condition operator="!is" setting="authenticated">false</condition> + </dependency> + </dependencies> + <control type="button" format="action"> + <data>RunPlugin("plugin://plugin.audio.tripler/sign-out")</data> + </control> + </setting> + </group> + <group id="2"> + <setting id="image_quality" type="integer" label="30022" help="30023"> + <level>0</level> + <default>1</default> + <constraints> + <options> + <option label="30024">0</option> + <option label="30025">1</option> + <option label="30026">2</option> + </options> + </constraints> + <control type="list" format="string"> + <heading>30022</heading> + </control> + </setting> + </group> + </category> + </section> +</settings>