Respond to review:
- Refactor "fully" decoding the payload of a text/* part. In Python 3, decode=True only means to decode according to Content-Transfer-Encoding, not according to any charset in the Content-Type header. So do that.
This commit is contained in:
parent
adcd95583c
commit
76b487320e
@ -233,16 +233,7 @@ def walk(msg, callback, data):
|
||||
headers = dict(part)
|
||||
LOG.debug(headers)
|
||||
headers['Content-Type'] = ctype
|
||||
payload = part.get_payload(decode=True)
|
||||
# In Python 3, decoding the payload will ironically hand us a bytes
|
||||
# object. 'decode' means to decode according to
|
||||
# Content-Transfer-Encoding, not according to any charset in the
|
||||
# Content-Type. So, if we end up with bytes, first try to decode to
|
||||
# str via CT charset, and failing that, try utf-8 using surrogate
|
||||
# escapes.
|
||||
if six.PY3 and isinstance(payload, bytes):
|
||||
charset = part.get_charset() or 'utf-8'
|
||||
payload = payload.decode(charset, errors='surrogateescape')
|
||||
payload = util.fully_decoded_payload(part)
|
||||
callback(data, filename, payload, headers)
|
||||
partnum = partnum + 1
|
||||
|
||||
|
@ -108,17 +108,7 @@ class UserDataProcessor(object):
|
||||
|
||||
ctype = None
|
||||
ctype_orig = part.get_content_type()
|
||||
ctype_main = part.get_content_maintype()
|
||||
payload = part.get_payload(decode=True)
|
||||
# In Python 3, decoding the payload will ironically hand us a
|
||||
# bytes object. 'decode' means to decode according to
|
||||
# Content-Transfer-Encoding, not according to any charset in the
|
||||
# Content-Type. So, if we end up with bytes, first try to decode
|
||||
# to str via CT charset, and failing that, try utf-8 using
|
||||
# surrogate escapes.
|
||||
if six.PY3 and ctype_main == 'text' and isinstance(payload, bytes):
|
||||
charset = part.get_charset() or 'utf-8'
|
||||
payload = payload.decode(charset, errors='surrogateescape')
|
||||
payload = util.fully_decoded_payload(part)
|
||||
was_compressed = False
|
||||
|
||||
# When the message states it is of a gzipped content type ensure
|
||||
|
@ -110,6 +110,21 @@ def b64e(source):
|
||||
return b64encode(source).decode('utf-8')
|
||||
|
||||
|
||||
def fully_decoded_payload(part):
|
||||
# In Python 3, decoding the payload will ironically hand us a bytes object.
|
||||
# 'decode' means to decode according to Content-Transfer-Encoding, not
|
||||
# according to any charset in the Content-Type. So, if we end up with
|
||||
# bytes, first try to decode to str via CT charset, and failing that, try
|
||||
# utf-8 using surrogate escapes.
|
||||
cte_payload = part.get_payload(decode=True)
|
||||
if ( six.PY3 and
|
||||
part.get_content_maintype() == 'text' and
|
||||
isinstance(cte_payload, bytes)):
|
||||
charset = part.get_charset() or 'utf-8'
|
||||
return cte_payload.decode(charset, errors='surrogateescape')
|
||||
return cte_payload
|
||||
|
||||
|
||||
# Path for DMI Data
|
||||
DMI_SYS_PATH = "/sys/class/dmi/id"
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user