Respond to review:

- Refactor "fully" decoding the payload of a text/* part. In Python 3, decode=True only means to decode according to Content-Transfer-Encoding, not according to any charset in the Content-Type header. So do that.
2015-01-27 15:11:53 -05:00 · 2015-01-27 15:11:53 -05:00 · 76b487320e
commit 76b487320e
parent adcd95583c
3 changed files with 17 additions and 21 deletions
--- a/cloudinit/handlers/init.py
+++ b/cloudinit/handlers/init.py
@ -233,16 +233,7 @@ def walk(msg, callback, data):
        headers = dict(part)
        LOG.debug(headers)
        headers['Content-Type'] = ctype
-        payload = part.get_payload(decode=True)
-        # In Python 3, decoding the payload will ironically hand us a bytes
-        # object.  'decode' means to decode according to
-        # Content-Transfer-Encoding, not according to any charset in the
-        # Content-Type.  So, if we end up with bytes, first try to decode to
-        # str via CT charset, and failing that, try utf-8 using surrogate
-        # escapes.
-        if six.PY3 and isinstance(payload, bytes):
-            charset = part.get_charset() or 'utf-8'
-            payload = payload.decode(charset, errors='surrogateescape')
+        payload = util.fully_decoded_payload(part)
        callback(data, filename, payload, headers)
        partnum = partnum + 1

--- a/cloudinit/user_data.py
+++ b/cloudinit/user_data.py
@ -108,17 +108,7 @@ class UserDataProcessor(object):

            ctype = None
            ctype_orig = part.get_content_type()
-            ctype_main = part.get_content_maintype()
-            payload = part.get_payload(decode=True)
-            # In Python 3, decoding the payload will ironically hand us a
-            # bytes object.  'decode' means to decode according to
-            # Content-Transfer-Encoding, not according to any charset in the
-            # Content-Type.  So, if we end up with bytes, first try to decode
-            # to str via CT charset, and failing that, try utf-8 using
-            # surrogate escapes.
-            if six.PY3 and ctype_main == 'text' and isinstance(payload, bytes):
-                charset = part.get_charset() or 'utf-8'
-                payload = payload.decode(charset, errors='surrogateescape')
+            payload = util.fully_decoded_payload(part)
            was_compressed = False

            # When the message states it is of a gzipped content type ensure
--- a/cloudinit/util.py
+++ b/cloudinit/util.py
@ -110,6 +110,21 @@ def b64e(source):
    return b64encode(source).decode('utf-8')


+def fully_decoded_payload(part):
+    # In Python 3, decoding the payload will ironically hand us a bytes object.
+    # 'decode' means to decode according to Content-Transfer-Encoding, not
+    # according to any charset in the Content-Type.  So, if we end up with
+    # bytes, first try to decode to str via CT charset, and failing that, try
+    # utf-8 using surrogate escapes.
+    cte_payload = part.get_payload(decode=True)
+    if (    six.PY3 and
+            part.get_content_maintype() == 'text' and
+            isinstance(cte_payload, bytes)):
+        charset = part.get_charset() or 'utf-8'
+        return cte_payload.decode(charset, errors='surrogateescape')
+    return cte_payload
+
+
 # Path for DMI Data
 DMI_SYS_PATH = "/sys/class/dmi/id"