Respond to review:

- Refactor "fully" decoding the payload of a text/* part.  In Python 3,
  decode=True only means to decode according to Content-Transfer-Encoding, not
  according to any charset in the Content-Type header.  So do that.
This commit is contained in:
Barry Warsaw 2015-01-27 15:11:53 -05:00
parent adcd95583c
commit 76b487320e
3 changed files with 17 additions and 21 deletions

View File

@ -233,16 +233,7 @@ def walk(msg, callback, data):
headers = dict(part)
LOG.debug(headers)
headers['Content-Type'] = ctype
payload = part.get_payload(decode=True)
# In Python 3, decoding the payload will ironically hand us a bytes
# object. 'decode' means to decode according to
# Content-Transfer-Encoding, not according to any charset in the
# Content-Type. So, if we end up with bytes, first try to decode to
# str via CT charset, and failing that, try utf-8 using surrogate
# escapes.
if six.PY3 and isinstance(payload, bytes):
charset = part.get_charset() or 'utf-8'
payload = payload.decode(charset, errors='surrogateescape')
payload = util.fully_decoded_payload(part)
callback(data, filename, payload, headers)
partnum = partnum + 1

View File

@ -108,17 +108,7 @@ class UserDataProcessor(object):
ctype = None
ctype_orig = part.get_content_type()
ctype_main = part.get_content_maintype()
payload = part.get_payload(decode=True)
# In Python 3, decoding the payload will ironically hand us a
# bytes object. 'decode' means to decode according to
# Content-Transfer-Encoding, not according to any charset in the
# Content-Type. So, if we end up with bytes, first try to decode
# to str via CT charset, and failing that, try utf-8 using
# surrogate escapes.
if six.PY3 and ctype_main == 'text' and isinstance(payload, bytes):
charset = part.get_charset() or 'utf-8'
payload = payload.decode(charset, errors='surrogateescape')
payload = util.fully_decoded_payload(part)
was_compressed = False
# When the message states it is of a gzipped content type ensure

View File

@ -110,6 +110,21 @@ def b64e(source):
return b64encode(source).decode('utf-8')
def fully_decoded_payload(part):
# In Python 3, decoding the payload will ironically hand us a bytes object.
# 'decode' means to decode according to Content-Transfer-Encoding, not
# according to any charset in the Content-Type. So, if we end up with
# bytes, first try to decode to str via CT charset, and failing that, try
# utf-8 using surrogate escapes.
cte_payload = part.get_payload(decode=True)
if ( six.PY3 and
part.get_content_maintype() == 'text' and
isinstance(cte_payload, bytes)):
charset = part.get_charset() or 'utf-8'
return cte_payload.decode(charset, errors='surrogateescape')
return cte_payload
# Path for DMI Data
DMI_SYS_PATH = "/sys/class/dmi/id"