Moved the common user data classes and functionality back to this file since for now it seems to make organizational sense to put it here.

This commit is contained in:
Joshua Harlow 2012-06-15 17:38:32 -07:00
parent 71ae0da583
commit a2a0bb8998
1 changed files with 383 additions and 0 deletions

383
cloudinit/user_data.py Normal file
View File

@ -0,0 +1,383 @@
# vi: ts=4 expandtab
#
# Copyright (C) 2012 Canonical Ltd.
# Copyright (C) 2012 Hewlett-Packard Development Company, L.P.
# Copyright (C) 2012 Yahoo! Inc.
#
# Author: Scott Moser <scott.moser@canonical.com>
# Author: Juerg Haefliger <juerg.haefliger@hp.com>
# Author: Joshua Harlow <harlowja@yahoo-inc.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3, as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import glob
import email
from email.mime.base import MIMEBase
from cloudinit import importer
from cloudinit import log as logging
from cloudinit import util
from cloudinit.settings import (PER_ALWAYS, PER_INSTANCE, FREQUENCIES)
LOG = logging.getLogger(__name__)
# Special content types that signal the start and end of processing
CONTENT_END = "__end__"
CONTENT_START = "__begin__"
CONTENT_SIGNALS = [CONTENT_START, CONTENT_END]
# Used when a part-handler type is encountered
# to allow for registration of new types.
PART_CONTENT_TYPES = ["text/part-handler"]
PART_HANDLER_FN_TMPL = 'part-handler-%03d'
# For parts without filenames
PART_FN_TPL = 'part-%03d'
# Used as the content type when a message is not multipart
# and it doesn't contain its own content-type
NOT_MULTIPART_TYPE = "text/x-not-multipart"
OCTET_TYPE = 'application/octet-stream'
# Different file beginnings to there content type
INCLUSION_TYPES_MAP = {
'#include': 'text/x-include-url',
'#include-once': 'text/x-include-once-url',
'#!': 'text/x-shellscript',
'#cloud-config': 'text/cloud-config',
'#upstart-job': 'text/upstart-job',
'#part-handler': 'text/part-handler',
'#cloud-boothook': 'text/cloud-boothook',
'#cloud-config-archive': 'text/cloud-config-archive',
}
# Sorted longest first
INCLUSION_SRCH = sorted(INCLUSION_TYPES_MAP.keys(), key=(lambda e: 0 - len(e)))
# Various special content types
TYPE_NEEDED = ["text/plain", "text/x-not-multipart"]
INCLUDE_TYPES = ['text/x-include-url', 'text/x-include-once-url']
ARCHIVE_TYPES = ["text/cloud-config-archive"]
UNDEF_TYPE = "text/plain"
ARCHIVE_UNDEF_TYPE = "text/cloud-config"
OCTET_TYPE = 'application/octet-stream'
# Msg header used to track attachments
ATTACHMENT_FIELD = 'Number-Attachments'
class UserDataProcessor(object):
def __init__(self, paths):
self.paths = paths
def process(self, blob):
base_msg = ud.convert_string(blob)
process_msg = MIMEMultipart()
self._process_msg(base_msg, process_msg)
return process_msg
def _process_msg(self, base_msg, append_msg):
for part in base_msg.walk():
# multipart/* are just containers
if part.get_content_maintype() == 'multipart':
continue
ctype = None
ctype_orig = part.get_content_type()
payload = part.get_payload(decode=True)
if not ctype_orig:
ctype_orig = UNDEF_TYPE
if ctype_orig in TYPE_NEEDED:
ctype = ud.type_from_starts_with(payload)
if ctype is None:
ctype = ctype_orig
if ctype in INCLUDE_TYPES:
self._do_include(payload, append_msg)
continue
if ctype in ARCHIVE_TYPES:
self._explode_archive(payload, append_msg)
continue
if 'Content-Type' in base_msg:
base_msg.replace_header('Content-Type', ctype)
else:
base_msg['Content-Type'] = ctype
self._attach_part(append_msg, part)
def _get_include_once_filename(self, entry):
entry_fn = util.hash_blob(entry, 'md5', 64)
return os.path.join(self.paths.get_ipath_cur('data'),
'urlcache', entry_fn)
def _do_include(self, content, append_msg):
# is just a list of urls, one per line
# also support '#include <url here>'
for line in content.splitlines():
includeonce = False
if line in ("#include", "#include-once"):
continue
if line.startswith("#include-once"):
line = line[len("#include-once"):].lstrip()
includeonce = True
elif line.startswith("#include"):
line = line[len("#include"):].lstrip()
if line.startswith("#"):
continue
include_url = line.strip()
if not include_url:
continue
includeonce_filename = self._get_include_once_filename(include_url)
if includeonce and os.path.isfile(includeonce_filename):
content = util.load_file(includeonce_filename)
else:
(content, st) = url_helper.readurl(include_url)
if includeonce and url_helper.ok_http_code(st):
util.write_file(includeonce_filename, content, mode=0600)
if not url_helper.ok_http_code(st):
content = ''
new_msg = ud.convert_string(content)
self._process_msg(new_msg, append_msg)
def _explode_archive(self, archive, append_msg):
entries = util.load_yaml(archive, default=[], allowed=[list, set])
for ent in entries:
# ent can be one of:
# dict { 'filename' : 'value', 'content' :
# 'value', 'type' : 'value' }
# filename and type not be present
# or
# scalar(payload)
if isinstance(ent, (str, basestring)):
ent = {'content': ent}
if not isinstance(ent, (dict)):
# TODO raise?
continue
content = ent.get('content', '')
mtype = ent.get('type')
if not mtype:
mtype = ud.type_from_starts_with(content, ARCHIVE_UNDEF_TYPE)
maintype, subtype = mtype.split('/', 1)
if maintype == "text":
msg = MIMEText(content, _subtype=subtype)
else:
msg = MIMEBase(maintype, subtype)
msg.set_payload(content)
if 'filename' in ent:
msg.add_header('Content-Disposition', 'attachment',
filename=ent['filename'])
for header in ent.keys():
if header in ('content', 'filename', 'type'):
continue
msg.add_header(header, ent['header'])
self._attach_part(append_msg, msg)
def _multi_part_count(self, outer_msg, new_count=None):
"""
Return the number of attachments to this MIMEMultipart by looking
at its 'Number-Attachments' header.
"""
if ATTACHMENT_FIELD not in outer_msg:
outer_msg[ATTACHMENT_FIELD] = '0'
if new_count is not None:
outer_msg.replace_header(ATTACHMENT_FIELD, str(new_count))
fetched_count = 0
try:
fetched_count = int(outer_msg.get(ATTACHMENT_FIELD))
except (ValueError, TypeError):
outer_msg.replace_header(ATTACHMENT_FIELD, str(fetched_count))
return fetched_count
def _attach_part(self, outer_msg, part):
"""
Attach an part to an outer message. outermsg must be a MIMEMultipart.
Modifies a header in the message to keep track of number of attachments.
"""
cur = self._multi_part_count(outer_msg)
if not part.get_filename():
fn = PART_FN_TPL % (cur + 1)
part.add_header('Content-Disposition', 'attachment', filename=fn)
outer_msg.attach(part)
self._multi_part_count(outer_msg, cur + 1)
class PartHandler(object):
def __init__(self, frequency, version=2):
self.handler_version = version
self.frequency = frequency
def __repr__(self):
return "%s: [%s]" % (util.obj_name(self), self.list_types())
def list_types(self):
raise NotImplementedError()
def handle_part(self, data, ctype, filename, payload, frequency):
return self._handle_part(data, ctype, filename, payload, frequency)
def _handle_part(self, data, ctype, filename, payload, frequency):
raise NotImplementedError()
def fixup_module(mod, def_freq=PER_INSTANCE):
if not hasattr(mod, "handler_version"):
setattr(mod, "handler_version", 1)
if not hasattr(mod, 'list_types'):
def empty_types():
return []
setattr(mod, 'list_types', empty_types)
if not hasattr(mod, 'frequency'):
setattr(mod, 'frequency', def_freq)
else:
freq = mod.frequency
if freq and freq not in FREQUENCIES:
LOG.warn("Module %s has an unknown frequency %s", mod, freq)
if not hasattr(mod, 'handle_part'):
def empty_handler(_data, _ctype, _filename, _payload):
pass
setattr(mod, 'handle_part', empty_handler)
return mod
def run_part(mod, data, ctype, filename, payload, frequency):
mod_freq = mod.frequency
if not (mod_freq == PER_ALWAYS or
(frequency == PER_INSTANCE and mod_freq == PER_INSTANCE)):
return
mod_ver = mod.handler_version
try:
if mod_ver == 1:
mod.handle_part(data, ctype, filename, payload)
else:
mod.handle_part(data, ctype, filename, payload, frequency)
except:
util.logexc(LOG, ("Failed calling mod %s (%s, %s, %s)"
" with frequency %s"),
mod, ctype, filename,
mod_ver, frequency)
def call_begin(mod, data, frequency):
run_part(mod, data, CONTENT_START, None, None, frequency)
def call_end(mod, data, frequency):
run_part(mod, data, CONTENT_END, None, None, frequency)
def walker_handle_handler(pdata, _ctype, _filename, payload):
curcount = pdata['handlercount']
modname = PART_HANDLER_FN_TMPL % (curcount)
frequency = pdata['frequency']
modfname = os.path.join(pdata['handlerdir'], "%s" % (modname))
if not modfname.endswith(".py"):
modfname = "%s.py" % (modfname)
# TODO: Check if path exists??
util.write_file(modfname, payload, 0600)
handlers = pdata['handlers']
try:
mod = fixup_module(importer.import_module(modname))
handlers.register(mod)
call_begin(mod, pdata['data'], frequency)
pdata['handlercount'] = curcount + 1
except:
util.logexc(LOG, "Failed at registered python file: %s", modfname)
def walker_callback(pdata, ctype, filename, payload):
if ctype in PART_CONTENT_TYPES:
walker_handle_handler(pdata, ctype, filename, payload)
return
handlers = pdata['handlers']
if ctype not in handlers:
if ctype == NOT_MULTIPART_TYPE:
# Extract the first line or 24 bytes for displaying in the log
start = payload.split("\n", 1)[0][:24]
if start < payload:
details = "starting '%s...'" % start.encode("string-escape")
else:
details = repr(payload)
LOG.warning("Unhandled non-multipart userdata: %s", details)
return
run_part(handlers[ctype], pdata['data'], ctype, filename,
payload, pdata['frequency'])
# Callback is a function that will be called with
# (data, content_type, filename, payload)
def walk(msg, callback, data):
partnum = 0
for part in msg.walk():
# multipart/* are just containers
if part.get_content_maintype() == 'multipart':
continue
ctype = part.get_content_type()
if ctype is None:
ctype = OCTET_TYPE
filename = part.get_filename()
if not filename:
filename = PART_FN_TPL % (partnum)
callback(data, ctype, filename, part.get_payload(decode=True))
partnum = partnum + 1
# Coverts a raw string into a mime message
def convert_string(raw_data, headers=None):
if not raw_data:
raw_data = ''
if not headers:
headers = {}
data = util.decomp_str(raw_data)
if "mime-version:" in data[0:4096].lower():
msg = email.message_from_string(data)
for (key, val) in headers.items():
if key in msg:
msg.replace_header(key, val)
else:
msg[key] = val
else:
mtype = headers.get("Content-Type", NOT_MULTIPART_TYPE)
maintype, subtype = mtype.split("/", 1)
msg = MIMEBase(maintype, subtype, *headers)
msg.set_payload(data)
return msg
def type_from_starts_with(payload, default=None):
for text in INCLUSION_SRCH:
if payload.startswith(text):
return INCLUSION_TYPES_MAP[text]
return default