ytdl-org · dirkf · Oct 25, 2021 · Oct 25, 2021
diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py
@@ -51,7 +51,9 @@ def test_all_files(self):
                     r'(?:(?:#.*?|\s*)\n)*from __future__ import (?:[a-z_]+,\s*)*unicode_literals',
                     'unicode_literals import  missing in %s' % fn)
 
-                m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code)
+                # match explicit unicode literal on a line not starting with #|'|"
+                # and preceded by a space or =
+                m = re.search(r'(?m)(?:^\s*?[^#\s"\'].*?)(?<=\s|=)u[\'"](?!\)|,|$)', code)
                 if m is not None:
                     self.assertTrue(
                         m is None,

diff --git a/test/test_utils.py b/test/test_utils.py
@@ -63,6 +63,7 @@
     pkcs1pad,
     read_batch_urls,
     sanitize_filename,
+    sanitize_open,
     sanitize_path,
     sanitize_url,
     expand_path,
@@ -118,6 +119,16 @@
 
 
 class TestUtil(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.tearDown()
+
+    @classmethod
+    def tearDown(cls):
+        for tf in os.listdir('.'):
+            if os.path.splitext(tf)[1] == '.test':
+                os.remove(tf)
+
     def test_timeconvert(self):
         self.assertTrue(timeconvert('') is None)
         self.assertTrue(timeconvert('bougrg') is None)
@@ -231,6 +242,16 @@ def test_sanitize_path(self):
         self.assertEqual(sanitize_path('./abc'), 'abc')
         self.assertEqual(sanitize_path('./../abc'), '..\\abc')
 
+    def test_sanitize_open(self):
+        long_name = " I'm a lumberjack ".join(['I sleep all night and I work all day %d' % n for n in range(50)])
+        result = sanitize_open(
+            '%s%s.test' % ('.\\' if sys.platform == 'win32' else './', long_name, ),
+            open_mode='w')
+        result[0].close()
+        self.assertEqual(
+            result[1][2:] if result[1].startswith('./') else result[1],
+            "I sleep all night and I work all day 0 I'm a lumberjack I sleep all night and I work[...] night and I work all day 48 I'm a lumberjack I sleep all night and I work all day 49.test")
+
     def test_sanitize_url(self):
         self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
         self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
@@ -2997,6 +2997,10 @@ def resf(tpl, *args, **kwargs):
     def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
         return ctypes.WINFUNCTYPE(*args, **kwargs)
 
+try:
+    import reprlib as compat_reprlib
+except ImportError:
+    import repr as compat_reprlib
 
 __all__ = [
     'compat_HTMLParseError',
@@ -3032,6 +3036,7 @@ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
     'compat_parse_qs',
     'compat_print',
     'compat_realpath',
+    'compat_reprlib',
     'compat_setenv',
     'compat_shlex_quote',
     'compat_shlex_split',

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
@@ -3,6 +3,7 @@
 
 from __future__ import unicode_literals
 
+import ast
 import base64
 import binascii
 import calendar
@@ -53,6 +54,7 @@
     compat_kwargs,
     compat_os_name,
     compat_parse_qs,
+    compat_reprlib,
     compat_shlex_quote,
     compat_str,
     compat_struct_pack,
@@ -2036,6 +2038,43 @@ def clean_html(html):
     return html.strip()
 
 
+def eviscerate(text, width, placeholder=' [...]'):
+    """Shorten the text to width by replacing text
+    from the middle of text with placeholder.
+    """
+
+    r = compat_reprlib.Repr()
+    r.ellipsis = '...'
+    r.maxstring = width - len(placeholder) + len(r.ellipsis) + len(r.repr(''))
+    r.maxother = r.maxstring
+
+    t = r.repr(text)
+    # u'xx...xx'/'xx...xx' -> xx[...]xx
+    return ast.literal_eval(t).replace(r.ellipsis, placeholder)
+
+
+def reduce_filename(path, reduction=0.5, min_length=20, ellipsis='[...]'):
+    """Try to reduce the filename by a specified reduction factor
+
+    Arguments:
+    path -- the path name to reduce
+    reduction -- factor by which to reduce its filename component
+    ellipsis -- placeholder for removed text
+
+    Returns path name with reduced filename, or None
+    """
+
+    fname = os.path.split(path)
+    fname = list(fname[:1] + os.path.splitext(fname[1]))
+    fname[1] = fname[1].replace(ellipsis, ' ')
+    flen = len(fname[1])
+    if flen < min_length:
+        # give up
+        return None
+    fname[1] = eviscerate(fname[1], int(1 + reduction * flen), placeholder=ellipsis)
+    return os.path.join(fname[0], ''.join(fname[1:]))
+
+
 def sanitize_open(filename, open_mode):
     """Try to open the given filename, and slightly tweak it if this fails.
 
@@ -2046,26 +2085,54 @@ def sanitize_open(filename, open_mode):
 
     It returns the tuple (stream, definitive_file_name).
     """
+    def openfile(filename, open_mode):
+        stream = open(encodeFilename(filename), open_mode)
+        return (stream, filename)
+
     try:
         if filename == '-':
             if sys.platform == 'win32':
                 import msvcrt
                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
-        stream = open(encodeFilename(filename), open_mode)
-        return (stream, filename)
+        return openfile(filename, open_mode)
     except (IOError, OSError) as err:
         if err.errno in (errno.EACCES,):
             raise
 
-        # In case of error, try to remove win32 forbidden chars
-        alt_filename = sanitize_path(filename)
-        if alt_filename == filename:
+        if 'w' not in open_mode or '+' in open_mode:
+            # only mung filename when creating the file
             raise
+
+        org_err = err
+
+        # In case of error, try to remove win32 forbidden chars
+        if err.errno in (errno.EINVAL, ):
+            alt_filename = sanitize_path(filename)
+            if alt_filename != filename:
+                try:
+                    return openfile(alt_filename, open_mode)
+                except (IOError, OSError) as new_err:
+                    err = new_err
         else:
-            # An exception here should be caught in the caller
-            stream = open(encodeFilename(alt_filename), open_mode)
-            return (stream, alt_filename)
+            alt_filename = filename
+
+        # Windows: an over-long file name can be detected by the CreateFile()
+        # API, and then get EINVAL, or by the filesystem, and then perhaps
+        # ENAMETOOLONG
+        # POSIX: ENAMETOOLONG in general
+        while err.errno in (errno.ENAMETOOLONG, errno.EINVAL, ):
+            alt_filename = reduce_filename(alt_filename)
+            if not alt_filename:
+                break
+            try:
+                return openfile(alt_filename, open_mode)
+            except (IOError, OSError) as new_err:
+                err = new_err
+
+        # Reduction didn't help; give up and report what initially went wrong
+        # This exception should be caught in the caller
+        raise org_err
 
 
 def timeconvert(timestr):