Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[utils] Shorten proposed file name on create if too long #29989

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion test/test_unicode_literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ def test_all_files(self):
r'(?:(?:#.*?|\s*)\n)*from __future__ import (?:[a-z_]+,\s*)*unicode_literals',
'unicode_literals import missing in %s' % fn)

m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code)
# match explicit unicode literal on a line not starting with #|'|"
# and preceded by a space or =
m = re.search(r'(?m)(?:^\s*?[^#\s"\'].*?)(?<=\s|=)u[\'"](?!\)|,|$)', code)
if m is not None:
self.assertTrue(
m is None,
Expand Down
21 changes: 21 additions & 0 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
pkcs1pad,
read_batch_urls,
sanitize_filename,
sanitize_open,
sanitize_path,
sanitize_url,
expand_path,
Expand Down Expand Up @@ -118,6 +119,16 @@


class TestUtil(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.tearDown()

@classmethod
def tearDown(cls):
for tf in os.listdir('.'):
if os.path.splitext(tf)[1] == '.test':
os.remove(tf)

def test_timeconvert(self):
self.assertTrue(timeconvert('') is None)
self.assertTrue(timeconvert('bougrg') is None)
Expand Down Expand Up @@ -231,6 +242,16 @@ def test_sanitize_path(self):
self.assertEqual(sanitize_path('./abc'), 'abc')
self.assertEqual(sanitize_path('./../abc'), '..\\abc')

def test_sanitize_open(self):
long_name = " I'm a lumberjack ".join(['I sleep all night and I work all day %d' % n for n in range(50)])
result = sanitize_open(
'%s%s.test' % ('.\\' if sys.platform == 'win32' else './', long_name, ),
open_mode='w')
result[0].close()
self.assertEqual(
result[1][2:] if result[1].startswith('./') else result[1],
"I sleep all night and I work all day 0 I'm a lumberjack I sleep all night and I work[...] night and I work all day 48 I'm a lumberjack I sleep all night and I work all day 49.test")

def test_sanitize_url(self):
self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
Expand Down
5 changes: 5 additions & 0 deletions youtube_dl/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2997,6 +2997,10 @@ def resf(tpl, *args, **kwargs):
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
return ctypes.WINFUNCTYPE(*args, **kwargs)

try:
import reprlib as compat_reprlib
except ImportError:
import repr as compat_reprlib

__all__ = [
'compat_HTMLParseError',
Expand Down Expand Up @@ -3032,6 +3036,7 @@ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
'compat_parse_qs',
'compat_print',
'compat_realpath',
'compat_reprlib',
'compat_setenv',
'compat_shlex_quote',
'compat_shlex_split',
Expand Down
83 changes: 75 additions & 8 deletions youtube_dl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import unicode_literals

import ast
import base64
import binascii
import calendar
Expand Down Expand Up @@ -53,6 +54,7 @@
compat_kwargs,
compat_os_name,
compat_parse_qs,
compat_reprlib,
compat_shlex_quote,
compat_str,
compat_struct_pack,
Expand Down Expand Up @@ -2036,6 +2038,43 @@ def clean_html(html):
return html.strip()


def eviscerate(text, width, placeholder=' [...]'):
"""Shorten the text to width by replacing text
from the middle of text with placeholder.
"""

r = compat_reprlib.Repr()
r.ellipsis = '...'
r.maxstring = width - len(placeholder) + len(r.ellipsis) + len(r.repr(''))
r.maxother = r.maxstring

t = r.repr(text)
# u'xx...xx'/'xx...xx' -> xx[...]xx
return ast.literal_eval(t).replace(r.ellipsis, placeholder)


def reduce_filename(path, reduction=0.5, min_length=20, ellipsis='[...]'):
"""Try to reduce the filename by a specified reduction factor

Arguments:
path -- the path name to reduce
reduction -- factor by which to reduce its filename component
ellipsis -- placeholder for removed text

Returns path name with reduced filename, or None
"""

fname = os.path.split(path)
fname = list(fname[:1] + os.path.splitext(fname[1]))
fname[1] = fname[1].replace(ellipsis, ' ')
flen = len(fname[1])
if flen < min_length:
# give up
return None
fname[1] = eviscerate(fname[1], int(1 + reduction * flen), placeholder=ellipsis)
return os.path.join(fname[0], ''.join(fname[1:]))


def sanitize_open(filename, open_mode):
"""Try to open the given filename, and slightly tweak it if this fails.

Expand All @@ -2046,26 +2085,54 @@ def sanitize_open(filename, open_mode):

It returns the tuple (stream, definitive_file_name).
"""
def openfile(filename, open_mode):
stream = open(encodeFilename(filename), open_mode)
return (stream, filename)

try:
if filename == '-':
if sys.platform == 'win32':
import msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
stream = open(encodeFilename(filename), open_mode)
return (stream, filename)
return openfile(filename, open_mode)
except (IOError, OSError) as err:
if err.errno in (errno.EACCES,):
raise

# In case of error, try to remove win32 forbidden chars
alt_filename = sanitize_path(filename)
if alt_filename == filename:
if 'w' not in open_mode or '+' in open_mode:
# only mung filename when creating the file
raise

org_err = err

# In case of error, try to remove win32 forbidden chars
if err.errno in (errno.EINVAL, ):
alt_filename = sanitize_path(filename)
if alt_filename != filename:
try:
return openfile(alt_filename, open_mode)
except (IOError, OSError) as new_err:
err = new_err
else:
# An exception here should be caught in the caller
stream = open(encodeFilename(alt_filename), open_mode)
return (stream, alt_filename)
alt_filename = filename

# Windows: an over-long file name can be detected by the CreateFile()
# API, and then get EINVAL, or by the filesystem, and then perhaps
# ENAMETOOLONG
# POSIX: ENAMETOOLONG in general
while err.errno in (errno.ENAMETOOLONG, errno.EINVAL, ):
alt_filename = reduce_filename(alt_filename)
if not alt_filename:
break
try:
return openfile(alt_filename, open_mode)
except (IOError, OSError) as new_err:
err = new_err

# Reduction didn't help; give up and report what initially went wrong
# This exception should be caught in the caller
raise org_err


def timeconvert(timestr):
Expand Down