Description: fix for several UnicodeDecode/Encode/Error if DEFAULT_ENCODING cannot encode unicode characters Author: Ben Carrillo Bug: https://github.com/amoffat/sh/issues/123 Forwarded: yes --- a/sh.py +++ b/sh.py @@ -117,8 +117,14 @@ if err_delta: tstderr += ("... (%d more, please see e.stderr)" % err_delta).encode() - msg = "\n\n RAN: %r\n\n STDOUT:\n%s\n\n STDERR:\n%s" %\ - (full_cmd, tstdout.decode(DEFAULT_ENCODING), tstderr.decode(DEFAULT_ENCODING)) + try: + msg = "\n\n ran: %r\n\n stdout:\n%s\n\n stderr:\n%s" %\ + (full_cmd, tstdout.decode(DEFAULT_ENCODING), + tstderr.decode(DEFAULT_ENCODING)) + except UnicodeDecodeError: + msg = "\n\n ran: %r\n\n stdout:\n%s\n\n stderr:\n%s" %\ + (full_cmd, tstdout.decode('utf-8'), tstderr.decode('utf-8')) + super(ErrorReturnCode, self).__init__(msg) @@ -371,8 +377,12 @@ def __unicode__(self): if self.process and self.stdout: - return self.stdout.decode(self.call_args["encoding"], - self.call_args["decode_errors"]) + try: + return self.stdout.decode(self.call_args["encoding"], + self.call_args["decode_errors"]) + except UnicodeDecodeError: + return self.stdout.decode('utf-8', + self.call_args["decode_errors"]) return "" def __eq__(self, other): @@ -561,7 +571,11 @@ # if the argument is already unicode, or a number or whatever, # this first call will fail. try: arg = unicode(arg, DEFAULT_ENCODING).encode(DEFAULT_ENCODING) - except TypeError: arg = unicode(arg).encode(DEFAULT_ENCODING) + except TypeError: + try: + arg = unicode(arg).encode(DEFAULT_ENCODING) + except UnicodeEncodeError: + arg = unicode(arg).encode('utf-8') return arg @@ -633,7 +647,11 @@ def __str__(self): if IS_PY3: return self.__unicode__() - else: return unicode(self).encode(DEFAULT_ENCODING) + else: + try: + return unicode(self).encode(DEFAULT_ENCODING) + except UnicodeEncodeError: + return unicode(self).encode('utf-8') def __eq__(self, other): try: return str(self) == str(other) @@ -839,7 +857,11 @@ self.setwinsize(1) # actually execute the process - if self.call_args["env"] is None: os.execv(cmd[0], cmd) + if self.call_args["env"] is None: + if IS_PY3: + os.execv(cmd[0], [c.encode('utf-8') for c in cmd]) + else: + os.execv(cmd[0], cmd) else: os.execve(cmd[0], cmd, self.call_args["env"]) os._exit(255) --- a/test.py +++ b/test.py @@ -1338,9 +1338,9 @@ import sys sys.stdout.write("te漢字st") """) - fn = partial(python, py.name, _encoding="ascii") - def s(fn): str(fn()) - self.assertRaises(UnicodeDecodeError, s, fn) + #fn = partial(python, py.name, _encoding="ascii") + #def s(fn): str(fn()) + #self.assertRaises(UnicodeDecodeError, s, fn) p = python(py.name, _encoding="ascii", _decode_errors="ignore") self.assertEqual(p, "test")