Merge branch 'yt-dlp:master' into networking/curl-impersonate

coletdjnz · Mar 4, 2024 · a59d09e · a59d09e
2 parents c60b1b5 + ac340d0
commit a59d09e
Show file tree

Hide file tree

Showing 27 changed files with 824 additions and 340 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -164,7 +164,7 @@ jobs:
       - name: Upload artifacts
         uses: actions/upload-artifact@v4
         with:
-          name: build-${{ github.job }}
+          name: build-bin-${{ github.job }}
           path: |
             yt-dlp
             yt-dlp.tar.gz
@@ -227,7 +227,7 @@ jobs:
       - name: Upload artifacts
         uses: actions/upload-artifact@v4
         with:
-          name: build-linux_${{ matrix.architecture }}
+          name: build-bin-linux_${{ matrix.architecture }}
           path: | # run-on-arch-action designates armv7l as armv7
             repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
           compression-level: 0
@@ -287,7 +287,7 @@ jobs:
       - name: Upload artifacts
         uses: actions/upload-artifact@v4
         with:
-          name: build-${{ github.job }}
+          name: build-bin-${{ github.job }}
           path: |
             dist/yt-dlp_macos
             dist/yt-dlp_macos.zip
@@ -340,7 +340,7 @@ jobs:
       - name: Upload artifacts
         uses: actions/upload-artifact@v4
         with:
-          name: build-${{ github.job }}
+          name: build-bin-${{ github.job }}
           path: |
             dist/yt-dlp_macos_legacy
           compression-level: 0
@@ -389,7 +389,7 @@ jobs:
       - name: Upload artifacts
         uses: actions/upload-artifact@v4
         with:
-          name: build-${{ github.job }}
+          name: build-bin-${{ github.job }}
           path: |
             dist/yt-dlp.exe
             dist/yt-dlp_min.exe
@@ -437,7 +437,7 @@ jobs:
       - name: Upload artifacts
         uses: actions/upload-artifact@v4
         with:
-          name: build-${{ github.job }}
+          name: build-bin-${{ github.job }}
           path: |
             dist/yt-dlp_x86.exe
           compression-level: 0
@@ -457,7 +457,7 @@ jobs:
       - uses: actions/download-artifact@v4
         with:
           path: artifact
-          pattern: build-*
+          pattern: build-bin-*
           merge-multiple: true
 
       - name: Make SHA2-SUMS files
@@ -500,3 +500,4 @@ jobs:
             _update_spec
             SHA*SUMS*
           compression-level: 0
+          overwrite: true
diff --git a/test/test_websockets.py b/test/test_websockets.py
@@ -192,8 +192,8 @@ def test_raise_http_error(self, handler, status):
 
     @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
     @pytest.mark.parametrize('params,extensions', [
-        ({'timeout': 0.00001}, {}),
-        ({}, {'timeout': 0.00001}),
+        ({'timeout': sys.float_info.min}, {}),
+        ({}, {'timeout': sys.float_info.min}),
     ])
     def test_timeout(self, handler, params, extensions):
         with handler(**params) as rh:

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
@@ -694,7 +694,6 @@ def process_color_policy(stream):
         self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
         self._load_cookies(self.params['http_headers'].get('Cookie'))  # compat
         self.params['http_headers'].pop('Cookie', None)
-        self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
 
         if auto_init and auto_init != 'no_verbose_header':
             self.print_debug_header()
@@ -976,6 +975,7 @@ def __exit__(self, *args):
     def close(self):
         self.save_cookies()
         self._request_director.close()
+        del self._request_director
 
     def trouble(self, message=None, tb=None, is_error=True):
         """Determine action to take when a download problem appears.
@@ -4198,6 +4198,10 @@ def build_request_director(self, handlers, preferences=None):
             director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
         return director
 
+    @functools.cached_property
+    def _request_director(self):
+        return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
+
     def encode(self, s):
         if isinstance(s, bytes):
             return s  # Already encoded

diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
@@ -16,7 +16,7 @@
 import re
 import traceback
 
-from .compat import compat_shlex_quote
+from .compat import compat_os_name, compat_shlex_quote
 from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
 from .downloader.external import get_external_downloader
 from .extractor import list_extractor_classes
@@ -1027,7 +1027,28 @@ def _real_main(argv=None):
             if pre_process:
                 return ydl._download_retcode
 
-            ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
+            args = sys.argv[1:] if argv is None else argv
+            ydl.warn_if_short_id(args)
+
+            # Show a useful error message and wait for keypress if not launched from shell on Windows
+            if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False):
+                import ctypes.wintypes
+                import msvcrt
+
+                kernel32 = ctypes.WinDLL('Kernel32')
+
+                buffer = (1 * ctypes.wintypes.DWORD)()
+                attached_processes = kernel32.GetConsoleProcessList(buffer, 1)
+                # If we only have a single process attached, then the executable was double clicked
+                # When using `pyinstaller` with `--onefile`, two processes get attached
+                is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
+                if attached_processes == 1 or is_onefile and attached_processes == 2:
+                    print(parser._generate_error_message(
+                        'Do not double-click the executable, instead call it from a command line.\n'
+                        'Please read the README for further information on how to use yt-dlp: '
+                        'https://github.com/yt-dlp/yt-dlp#readme'))
+                    msvcrt.getch()
+                    _exit(2)
             parser.error(
                 'You must provide at least one URL.\n'
                 'Type yt-dlp --help to see a list of all options.')

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
@@ -444,6 +444,7 @@
 from .dailymotion import (
     DailymotionIE,
     DailymotionPlaylistIE,
+    DailymotionSearchIE,
     DailymotionUserIE,
 )
 from .dailywire import (
@@ -2499,6 +2500,7 @@
     Zee5SeriesIE,
 )
 from .zeenews import ZeeNewsIE
+from .zenporn import ZenPornIE
 from .zetland import ZetlandDKArticleIE
 from .zhihu import ZhihuIE
 from .zingmp3 import (

diff --git a/yt_dlp/extractor/altcensored.py b/yt_dlp/extractor/altcensored.py
@@ -22,7 +22,7 @@ class AltCensoredIE(InfoExtractor):
             'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
             'display_id': 'k0srjLSkga8.webm',
             'release_date': '20180403',
-            'creator': 'Virginie Vota',
+            'creators': ['Virginie Vota'],
             'release_year': 2018,
             'upload_date': '20230318',
             'uploader': '[email protected]',
@@ -32,7 +32,7 @@ class AltCensoredIE(InfoExtractor):
             'duration': 926.09,
             'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
             'view_count': int,
-            'categories': ['News & Politics'],
+            'categories': ['News & Politics'],  # FIXME
         }
     }]
 
@@ -62,14 +62,21 @@ class AltCensoredChannelIE(InfoExtractor):
             'title': 'Virginie Vota',
             'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
         },
-        'playlist_count': 91
+        'playlist_count': 85,
     }, {
         'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
         'info_dict': {
             'title': 'yukikaze775',
             'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
         },
-        'playlist_count': 4
+        'playlist_count': 4,
+    }, {
+        'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw',
+        'info_dict': {
+            'title': 'Mister Metokur',
+            'id': 'UCfYbb7nga6-icsFWWgS-kWw',
+        },
+        'playlist_count': 121,
     }]
 
     def _real_extract(self, url):
@@ -78,7 +85,7 @@ def _real_extract(self, url):
             url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
         title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
         page_count = int_or_none(self._html_search_regex(
-            r'<a[^>]+href="/channel/\w+/page/(\d+)">(?:\1)</a>',
+            r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>',
             webpage, 'page count', default='1'))
 
         def page_func(page_num):

diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py
@@ -300,7 +300,7 @@ def _real_extract(self, url):
             is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
             if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
                 entry['formats'].append({
-                    'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
+                    'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
                     'format': f.get('format'),
                     'width': int_or_none(f.get('width')),
                     'height': int_or_none(f.get('height')),

diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
@@ -1996,7 +1996,7 @@ def _extract_video_metadata(self, url, video_id, season_id):
                 'title': get_element_by_class(
                     'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
                 'description': get_element_by_class(
-                    'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
+                    'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
             }, self._search_json_ld(webpage, video_id, default={}))
 
     def _get_comments_reply(self, root_id, next_id=0, display_id=None):

diff --git a/yt_dlp/extractor/cctv.py b/yt_dlp/extractor/cctv.py
@@ -88,6 +88,20 @@ class CCTVIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # videoCenterId: "id"
+        'url': 'http://news.cctv.com/2024/02/21/ARTIcU5tKIOIF2myEGCATkLo240221.shtml',
+        'info_dict': {
+            'id': '5c846c0518444308ba32c4159df3b3e0',
+            'ext': 'mp4',
+            'title': '《平“语”近人——习近平喜欢的典故》第三季 第5集：风物长宜放眼量',
+            'uploader': 'yangjuan',
+            'timestamp': 1708554940,
+            'upload_date': '20240221',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         # var ids = ["id"]
         'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
@@ -128,7 +142,7 @@ def _real_extract(self, url):
 
         video_id = self._search_regex(
             [r'var\s+guid\s*=\s*["\']([\da-fA-F]+)',
-             r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)',
+             r'videoCenterId(?:["\']\s*,|:)\s*["\']([\da-fA-F]+)',
              r'changePlayer\s*\(\s*["\']([\da-fA-F]+)',
              r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)',
              r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)',

diff --git a/yt_dlp/extractor/chzzk.py b/yt_dlp/extractor/chzzk.py
@@ -2,7 +2,7 @@
 
 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
+    UserNotLive,
     float_or_none,
     int_or_none,
     parse_iso8601,
@@ -40,7 +40,7 @@ def _real_extract(self, url):
             note='Downloading channel info', errnote='Unable to download channel info')['content']
 
         if live_detail.get('status') == 'CLOSE':
-            raise ExtractorError('The channel is not currently live', expected=True)
+            raise UserNotLive(video_id=channel_id)
 
         live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)
 

diff --git a/yt_dlp/extractor/cloudflarestream.py b/yt_dlp/extractor/cloudflarestream.py
@@ -4,27 +4,25 @@
 
 
 class CloudflareStreamIE(InfoExtractor):
+    _SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
     _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
-    _EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE
+    _EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
     _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
-    _VALID_URL = r'''(?x)
-                    https?://
-                        (?:
-                            (?:watch\.)?%s/|
-                            %s
-                        )
-                        (?P<id>%s)
-                    ''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
-    _EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1']
+    _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
+    _EMBED_REGEX = [
+        rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
+        rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
+    ]
     _TESTS = [{
         'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
         'info_dict': {
             'id': '31c9291ab41fac05471db4e73aa11717',
             'ext': 'mp4',
             'title': '31c9291ab41fac05471db4e73aa11717',
+            'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
         },
         'params': {
-            'skip_download': True,
+            'skip_download': 'm3u8',
         },
     }, {
         'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
@@ -35,6 +33,21 @@ class CloudflareStreamIE(InfoExtractor):
     }, {
         'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
         'only_matching': True,
+    }, {
+        'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
+        'only_matching': True,
+    }]
+    _WEBPAGE_TESTS = [{
+        'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
+        'info_dict': {
+            'id': 'eaef9dea5159cf968be84241b5cedfe7',
+            'ext': 'mp4',
+            'title': 'eaef9dea5159cf968be84241b5cedfe7',
+            'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
+        },
+        'params': {
+            'skip_download': 'm3u8',
+        },
     }]
 
     def _real_extract(self, url):