cloud-init/cloudinit/user_data.py

288 lines
10 KiB
Python

# vi: ts=4 expandtab
#
# Copyright (C) 2012 Canonical Ltd.
# Copyright (C) 2012 Hewlett-Packard Development Company, L.P.
# Copyright (C) 2012 Yahoo! Inc.
#
# Author: Scott Moser <scott.moser@canonical.com>
# Author: Juerg Haefliger <juerg.haefliger@hp.com>
# Author: Joshua Harlow <harlowja@yahoo-inc.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3, as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import email
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from cloudinit import handlers
from cloudinit import log as logging
from cloudinit import url_helper
from cloudinit import util
LOG = logging.getLogger(__name__)
# Constants copied in from the handler module
NOT_MULTIPART_TYPE = handlers.NOT_MULTIPART_TYPE
PART_FN_TPL = handlers.PART_FN_TPL
OCTET_TYPE = handlers.OCTET_TYPE
# Saves typing errors
CONTENT_TYPE = 'Content-Type'
# Various special content types that cause special actions
TYPE_NEEDED = ["text/plain", "text/x-not-multipart"]
INCLUDE_TYPES = ['text/x-include-url', 'text/x-include-once-url']
ARCHIVE_TYPES = ["text/cloud-config-archive"]
UNDEF_TYPE = "text/plain"
ARCHIVE_UNDEF_TYPE = "text/cloud-config"
# Msg header used to track attachments
ATTACHMENT_FIELD = 'Number-Attachments'
# Only the following content types can have there launch index examined
# in there payload, evey other content type can still provide a header
EXAMINE_FOR_LAUNCH_INDEX = ["text/cloud-config"]
class UserDataProcessor(object):
def __init__(self, paths):
self.paths = paths
def process(self, blob):
accumulating_msg = MIMEMultipart()
self._process_msg(convert_string(blob), accumulating_msg)
return accumulating_msg
def _process_msg(self, base_msg, append_msg):
for part in base_msg.walk():
if is_skippable(part):
continue
ctype = None
ctype_orig = part.get_content_type()
payload = part.get_payload(decode=True)
if not ctype_orig:
ctype_orig = UNDEF_TYPE
if ctype_orig in TYPE_NEEDED:
ctype = handlers.type_from_starts_with(payload)
if ctype is None:
ctype = ctype_orig
if ctype != ctype_orig:
if CONTENT_TYPE in part:
part.replace_header(CONTENT_TYPE, ctype)
else:
part[CONTENT_TYPE] = ctype
if ctype in INCLUDE_TYPES:
self._do_include(payload, append_msg)
continue
if ctype in ARCHIVE_TYPES:
self._explode_archive(payload, append_msg)
continue
# Should this be happening, shouldn't
# the part header be modified and not the base?
if CONTENT_TYPE in base_msg:
base_msg.replace_header(CONTENT_TYPE, ctype)
else:
base_msg[CONTENT_TYPE] = ctype
self._attach_part(append_msg, part)
def _attach_launch_index(self, msg):
header_idx = msg.get('Launch-Index', None)
payload_idx = None
if msg.get_content_type() in EXAMINE_FOR_LAUNCH_INDEX:
try:
# See if it has a launch-index field
# that might affect the final header
payload = util.load_yaml(msg.get_payload(decode=True))
if payload:
payload_idx = payload.get('launch-index')
except:
pass
# Header overrides contents, for now (?) or the other way around?
if header_idx is not None:
payload_idx = header_idx
# Nothing found in payload, use header (if anything there)
if payload_idx is None:
payload_idx = header_idx
if payload_idx is not None:
try:
msg.add_header('Launch-Index', str(int(payload_idx)))
except (ValueError, TypeError):
pass
def _get_include_once_filename(self, entry):
entry_fn = util.hash_blob(entry, 'md5', 64)
return os.path.join(self.paths.get_ipath_cur('data'),
'urlcache', entry_fn)
def _process_before_attach(self, msg, attached_id):
if not msg.get_filename():
msg.add_header('Content-Disposition',
'attachment', filename=PART_FN_TPL % (attached_id))
self._attach_launch_index(msg)
def _do_include(self, content, append_msg):
# Include a list of urls, one per line
# also support '#include <url here>'
# or #include-once '<url here>'
include_once_on = False
for line in content.splitlines():
lc_line = line.lower()
if lc_line.startswith("#include-once"):
line = line[len("#include-once"):].lstrip()
# Every following include will now
# not be refetched.... but will be
# re-read from a local urlcache (if it worked)
include_once_on = True
elif lc_line.startswith("#include"):
line = line[len("#include"):].lstrip()
# Disable the include once if it was on
# if it wasn't, then this has no effect.
include_once_on = False
if line.startswith("#"):
continue
include_url = line.strip()
if not include_url:
continue
include_once_fn = None
content = None
if include_once_on:
include_once_fn = self._get_include_once_filename(include_url)
if include_once_on and os.path.isfile(include_once_fn):
content = util.load_file(include_once_fn)
else:
resp = url_helper.readurl(include_url)
if include_once_on and resp.ok():
util.write_file(include_once_fn, str(resp), mode=0600)
if resp.ok():
content = str(resp)
else:
LOG.warn(("Fetching from %s resulted in"
" a invalid http code of %s"),
include_url, resp.code)
if content is not None:
new_msg = convert_string(content)
self._process_msg(new_msg, append_msg)
def _explode_archive(self, archive, append_msg):
entries = util.load_yaml(archive, default=[], allowed=(list, set))
for ent in entries:
# ent can be one of:
# dict { 'filename' : 'value', 'content' :
# 'value', 'type' : 'value' }
# filename and type not be present
# or
# scalar(payload)
if isinstance(ent, (str, basestring)):
ent = {'content': ent}
if not isinstance(ent, (dict)):
# TODO(harlowja) raise?
continue
content = ent.get('content', '')
mtype = ent.get('type')
if not mtype:
mtype = handlers.type_from_starts_with(content,
ARCHIVE_UNDEF_TYPE)
maintype, subtype = mtype.split('/', 1)
if maintype == "text":
msg = MIMEText(content, _subtype=subtype)
else:
msg = MIMEBase(maintype, subtype)
msg.set_payload(content)
if 'filename' in ent:
msg.add_header('Content-Disposition',
'attachment', filename=ent['filename'])
if 'launch-index' in ent:
msg.add_header('Launch-Index', str(ent['launch-index']))
for header in list(ent.keys()):
if header in ('content', 'filename', 'type', 'launch-index'):
continue
msg.add_header(header, ent['header'])
self._attach_part(append_msg, msg)
def _multi_part_count(self, outer_msg, new_count=None):
"""
Return the number of attachments to this MIMEMultipart by looking
at its 'Number-Attachments' header.
"""
if ATTACHMENT_FIELD not in outer_msg:
outer_msg[ATTACHMENT_FIELD] = '0'
if new_count is not None:
outer_msg.replace_header(ATTACHMENT_FIELD, str(new_count))
fetched_count = 0
try:
fetched_count = int(outer_msg.get(ATTACHMENT_FIELD))
except (ValueError, TypeError):
outer_msg.replace_header(ATTACHMENT_FIELD, str(fetched_count))
return fetched_count
def _attach_part(self, outer_msg, part):
"""
Attach a message to an outer message. outermsg must be a MIMEMultipart.
Modifies a header in the outer message to keep track of number of attachments.
"""
part_count = self._multi_part_count(outer_msg)
self._process_before_attach(part, part_count + 1)
outer_msg.attach(part)
self._multi_part_count(outer_msg, part_count + 1)
def is_skippable(part):
# multipart/* are just containers
part_maintype = part.get_content_maintype() or ''
if part_maintype.lower() == 'multipart':
return True
return False
# Coverts a raw string into a mime message
def convert_string(raw_data, headers=None):
if not raw_data:
raw_data = ''
if not headers:
headers = {}
data = util.decomp_gzip(raw_data)
if "mime-version:" in data[0:4096].lower():
msg = email.message_from_string(data)
for (key, val) in headers.iteritems():
if key in msg:
msg.replace_header(key, val)
else:
msg[key] = val
else:
mtype = headers.get(CONTENT_TYPE, NOT_MULTIPART_TYPE)
maintype, subtype = mtype.split("/", 1)
msg = MIMEBase(maintype, subtype, *headers)
msg.set_payload(data)
return msg