From 4644ac5527e48a1a8c48dc790621c73913e6dbf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 30 Sep 2014 22:27:53 +0700 Subject: [PATCH 1/2] [core] Decode environment variables with filesystem encoding (Fixes #3854, Fixes #3217, Fixes #2918) Introduces compat versions of os.getenv and os.path.expanduser --- test/test_utils.py | 13 ++++++++++ youtube_dl/YoutubeDL.py | 3 ++- youtube_dl/__init__.py | 3 ++- youtube_dl/cache.py | 3 ++- youtube_dl/options.py | 14 ++++++----- youtube_dl/utils.py | 53 +++++++++++++++++++++++++++++++++++++---- 6 files changed, 76 insertions(+), 13 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 97551ce9c..19c9ba7f8 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -44,6 +44,9 @@ from youtube_dl.utils import ( limit_length, escape_rfc3986, escape_url, + get_filesystem_encoding, + compat_getenv, + compat_expanduser, ) @@ -331,5 +334,15 @@ class TestUtil(unittest.TestCase): ) self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') + def test_compat_getenv(self): + test_str = 'тест' + os.environ['YOUTUBE-DL-TEST'] = test_str.encode(get_filesystem_encoding()) + self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str) + + def test_compat_expanduser(self): + test_str = 'C:\Documents and Settings\тест\Application Data' + os.environ['HOME'] = test_str.encode(get_filesystem_encoding()) + self.assertEqual(compat_expanduser('~'), test_str) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4a9610355..34a1e3b5c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -24,6 +24,7 @@ if os.name == 'nt': from .utils import ( compat_cookiejar, + compat_expanduser, compat_http_client, compat_str, compat_urllib_error, @@ -447,7 +448,7 @@ class YoutubeDL(object): template_dict = collections.defaultdict(lambda: 'NA', template_dict) outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) - tmpl = os.path.expanduser(outtmpl) + tmpl = compat_expanduser(outtmpl) filename = tmpl % template_dict return filename except ValueError as err: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 7f2b4dfcc..e73bc5c37 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -94,6 +94,7 @@ from .options import ( parseOpts, ) from .utils import ( + compat_expanduser, compat_getpass, compat_print, DateRange, @@ -285,7 +286,7 @@ def _real_main(argv=None): u' template'.format(outtmpl)) any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson - download_archive_fn = os.path.expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive + download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive ydl_opts = { 'usenetrc': opts.usenetrc, diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py index 79ff09f78..ac5925d32 100644 --- a/youtube_dl/cache.py +++ b/youtube_dl/cache.py @@ -9,6 +9,7 @@ import shutil import traceback from .utils import ( + compat_expanduser, write_json_file, ) @@ -22,7 +23,7 @@ class Cache(object): if res is None: cache_root = os.environ.get('XDG_CACHE_HOME', '~/.cache') res = os.path.join(cache_root, 'youtube-dl') - return os.path.expanduser(res) + return compat_expanduser(res) def _get_cache_fn(self, section, key, dtype): assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ diff --git a/youtube_dl/options.py b/youtube_dl/options.py index f651337ad..e6f9f33a2 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -6,6 +6,8 @@ import shlex import sys from .utils import ( + compat_expanduser, + compat_getenv, get_term_width, write_string, ) @@ -27,19 +29,19 @@ def parseOpts(overrideArguments=None): return res def _readUserConf(): - xdg_config_home = os.environ.get('XDG_CONFIG_HOME') + xdg_config_home = compat_getenv('XDG_CONFIG_HOME') if xdg_config_home: userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config') if not os.path.isfile(userConfFile): userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf') else: - userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config') + userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl', 'config') if not os.path.isfile(userConfFile): - userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') + userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl.conf') userConf = _readOptions(userConfFile, None) if userConf is None: - appdata_dir = os.environ.get('appdata') + appdata_dir = compat_getenv('appdata') if appdata_dir: userConf = _readOptions( os.path.join(appdata_dir, 'youtube-dl', 'config'), @@ -51,11 +53,11 @@ def parseOpts(overrideArguments=None): if userConf is None: userConf = _readOptions( - os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'), + os.path.join(compat_expanduser('~'), 'youtube-dl.conf'), default=None) if userConf is None: userConf = _readOptions( - os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'), + os.path.join(compat_expanduser('~'), 'youtube-dl.conf.txt'), default=None) if userConf is None: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f05747097..afe32ae05 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -203,6 +203,48 @@ def compat_ord(c): if type(c) is int: return c else: return ord(c) + +# Environment variables should be decoded with filesystem encoding +# otherwise this results in issues like #3854 #2918 #3217 +if sys.version_info >= (3, 0): + compat_getenv = os.getenv + compat_expanduser = os.path.expanduser +else: + def compat_getenv(key, default=None): + env = os.getenv(key, default) + if env: + env = env.decode(get_filesystem_encoding()) + return env + + def compat_expanduser(path): + """Expand ~ and ~user constructs. + + If user or $HOME is unknown, do nothing.""" + if path[:1] != '~': + return path + i, n = 1, len(path) + while i < n and path[i] not in '/\\': + i += 1 + + if 'HOME' in os.environ: + userhome = compat_getenv('HOME') + elif 'USERPROFILE' in os.environ: + userhome = compat_getenv('USERPROFILE') + elif not 'HOMEPATH' in os.environ: + return path + else: + try: + drive = compat_getenv('HOMEDRIVE') + except KeyError: + drive = '' + userhome = os.path.join(drive, compat_getenv('HOMEPATH')) + + if i != 1: # ~user + userhome = os.path.join(os.path.dirname(userhome), path[1:i]) + + return userhome + path[i:] + + # This is not clearly defined otherwise compiled_regex_type = type(re.compile('')) @@ -1204,11 +1246,14 @@ class locked_file(object): return self.f.read(*args) +def get_filesystem_encoding(): + encoding = sys.getfilesystemencoding() + return encoding if encoding is not None else 'utf-8' + + def shell_quote(args): quoted_args = [] - encoding = sys.getfilesystemencoding() - if encoding is None: - encoding = 'utf-8' + encoding = get_filesystem_encoding() for a in args: if isinstance(a, bytes): # We may get a filename encoded with 'encodeFilename' @@ -1258,7 +1303,7 @@ def format_bytes(bytes): def get_term_width(): - columns = os.environ.get('COLUMNS', None) + columns = compat_getenv('COLUMNS', None) if columns: return int(columns) From fc66e4a0d59d064518c3f18d65d1f4d87de8fb8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 1 Oct 2014 19:48:55 +0700 Subject: [PATCH 2/2] [utils] Add posix expanduser implementation and clarify the original source --- youtube_dl/utils.py | 88 +++++++++++++++++++++++++++++++-------------- 1 file changed, 61 insertions(+), 27 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index afe32ae05..526d2cc02 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -204,45 +204,79 @@ def compat_ord(c): else: return ord(c) -# Environment variables should be decoded with filesystem encoding -# otherwise this results in issues like #3854 #2918 #3217 if sys.version_info >= (3, 0): compat_getenv = os.getenv compat_expanduser = os.path.expanduser else: + # Environment variables should be decoded with filesystem encoding. + # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918) + def compat_getenv(key, default=None): env = os.getenv(key, default) if env: env = env.decode(get_filesystem_encoding()) return env - def compat_expanduser(path): - """Expand ~ and ~user constructs. - - If user or $HOME is unknown, do nothing.""" - if path[:1] != '~': - return path - i, n = 1, len(path) - while i < n and path[i] not in '/\\': - i += 1 - - if 'HOME' in os.environ: - userhome = compat_getenv('HOME') - elif 'USERPROFILE' in os.environ: - userhome = compat_getenv('USERPROFILE') - elif not 'HOMEPATH' in os.environ: - return path - else: - try: - drive = compat_getenv('HOMEDRIVE') - except KeyError: - drive = '' - userhome = os.path.join(drive, compat_getenv('HOMEPATH')) + # HACK: The default implementations of os.path.expanduser from cpython do not decode + # environment variables with filesystem encoding. We will work around this by + # providing adjusted implementations. + # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib + # for different platforms with correct environment variables decoding. + + if os.name == 'posix': + def compat_expanduser(path): + """Expand ~ and ~user constructions. If user or $HOME is unknown, + do nothing.""" + if not path.startswith('~'): + return path + i = path.find('/', 1) + if i < 0: + i = len(path) + if i == 1: + if 'HOME' not in os.environ: + import pwd + userhome = pwd.getpwuid(os.getuid()).pw_dir + else: + userhome = compat_getenv('HOME') + else: + import pwd + try: + pwent = pwd.getpwnam(path[1:i]) + except KeyError: + return path + userhome = pwent.pw_dir + userhome = userhome.rstrip('/') + return (userhome + path[i:]) or '/' + elif os.name == 'nt' or os.name == 'ce': + def compat_expanduser(path): + """Expand ~ and ~user constructs. + + If user or $HOME is unknown, do nothing.""" + if path[:1] != '~': + return path + i, n = 1, len(path) + while i < n and path[i] not in '/\\': + i = i + 1 + + if 'HOME' in os.environ: + userhome = compat_getenv('HOME') + elif 'USERPROFILE' in os.environ: + userhome = compat_getenv('USERPROFILE') + elif not 'HOMEPATH' in os.environ: + return path + else: + try: + drive = compat_getenv('HOMEDRIVE') + except KeyError: + drive = '' + userhome = os.path.join(drive, compat_getenv('HOMEPATH')) - if i != 1: # ~user - userhome = os.path.join(os.path.dirname(userhome), path[1:i]) + if i != 1: #~user + userhome = os.path.join(os.path.dirname(userhome), path[1:i]) - return userhome + path[i:] + return userhome + path[i:] + else: + compat_expanduser = os.path.expanduser # This is not clearly defined otherwise