From e645ac2acff34c7e58af2b4715b5bcdd2dffa31a Mon Sep 17 00:00:00 2001 From: Wayne Warren Date: Tue, 31 May 2016 07:27:11 -0700 Subject: [PATCH] Move macro expansion into YamlParser. Introduce the registry.MacroRegistry class to handle: * registration of macro types via setuptools' entrypoints * registration of individual macros for lookup by component list type * expansion of macros references during YAML "parsing" As a consequence there is a reduction in performance due to moving the expansion of macros from inline with XML generation, to requiring multiple passes over macro component lists. This decrease in efficiency results in approx ~30-50% increase in unit test time. Since this will allow for jobs to be expanded from templates/macros in parallel with future changes, it is a reasonable short term trade-off as the most computationally expensive task is updating the definitions on the remote master Change-Id: I292c6b1f8472370282205426cd8ceb847eb969bd --- jenkins_jobs/local_yaml.py | 34 +++- jenkins_jobs/parser.py | 13 ++ jenkins_jobs/registry.py | 265 ++++++++++++++++++++++++---- setup.cfg | 13 ++ tests/xml_config/test_xml_config.py | 5 +- 5 files changed, 288 insertions(+), 42 deletions(-) diff --git a/jenkins_jobs/local_yaml.py b/jenkins_jobs/local_yaml.py index 18b7802c0..49faf65ce 100644 --- a/jenkins_jobs/local_yaml.py +++ b/jenkins_jobs/local_yaml.py @@ -143,6 +143,7 @@ Examples: .. literalinclude:: /../../tests/yamlparser/fixtures/jinja01.yaml.inc """ +import copy import functools import io import logging @@ -290,6 +291,32 @@ class LocalLoader(OrderedConstructor, LocalAnchorLoader): def _escape(self, data): return re.sub(r'({|})', r'\1\1', data) + def __deepcopy__(self, memo): + """ + Make a deep copy of a LocalLoader excluding the uncopyable self.stream. + + This is achieved by performing a shallow copy of self, setting the + stream attribute to None and then performing a deep copy of the shallow + copy. + + (As this method will be called again on that deep copy, we also set a + sentinel attribute on the shallow copy to ensure that we don't recurse + infinitely.) + """ + assert self.done, 'Unsafe to copy an in-progress loader' + if getattr(self, '_copy', False): + # This is a shallow copy for an in-progress deep copy, remove the + # _copy marker and return self + del self._copy + return self + # Make a shallow copy + shallow = copy.copy(self) + shallow.stream = None + shallow._copy = True + deep = copy.deepcopy(shallow, memo) + memo[id(self)] = deep + return deep + class LocalDumper(OrderedRepresenter, yaml.Dumper): def __init__(self, *args, **kwargs): @@ -440,11 +467,12 @@ class CustomLoader(object): class Jinja2Loader(CustomLoader): """A loader for Jinja2-templated files.""" def __init__(self, contents): - self._template = jinja2.Template(contents) - self._template.environment.undefined = jinja2.StrictUndefined + self._contents = contents def format(self, **kwargs): - return self._template.render(kwargs) + _template = jinja2.Template(self._contents) + _template.environment.undefined = jinja2.StrictUndefined + return _template.render(kwargs) class CustomLoaderCollection(object): diff --git a/jenkins_jobs/parser.py b/jenkins_jobs/parser.py index 3594f8b93..5dee3cac3 100644 --- a/jenkins_jobs/parser.py +++ b/jenkins_jobs/parser.py @@ -25,6 +25,7 @@ import os from jenkins_jobs.constants import MAGIC_MANAGE_STRING from jenkins_jobs.errors import JenkinsJobsException from jenkins_jobs.formatter import deep_format +from jenkins_jobs.registry import MacroRegistry import jenkins_jobs.local_yaml as local_yaml from jenkins_jobs import utils @@ -81,6 +82,8 @@ class YamlParser(object): self.keep_desc = jjb_config.yamlparser['keep_descriptions'] self.path = jjb_config.yamlparser['include_path'] + self._macro_registry = MacroRegistry() + def load_files(self, fn): # handle deprecated behavior, and check that it's not a file like @@ -218,6 +221,11 @@ class YamlParser(object): job["description"] = description + \ self._get_managed_string().lstrip() + def _register_macros(self): + for component_type in self._macro_registry.component_types: + for macro in self.data.get(component_type, {}).values(): + self._macro_registry.register(component_type, macro) + def expandYaml(self, registry, jobs_glob=None): changed = True while changed: @@ -227,7 +235,11 @@ class YamlParser(object): if module.handle_data(self.data): changed = True + self._register_macros() + for default in self.data.get('defaults', {}).values(): + self._macro_registry.expand_macros(default) for job in self.data.get('job', {}).values(): + self._macro_registry.expand_macros(job) if jobs_glob and not matches(job['name'], jobs_glob): logger.debug("Ignoring job {0}".format(job['name'])) continue @@ -389,6 +401,7 @@ class YamlParser(object): "params '%s'", template_name, template, params) raise + self._macro_registry.expand_macros(expanded, params) job_name = expanded.get('name') if jobs_glob and not matches(job_name, jobs_glob): continue diff --git a/jenkins_jobs/registry.py b/jenkins_jobs/registry.py index b90e36a1f..9321ad1cf 100644 --- a/jenkins_jobs/registry.py +++ b/jenkins_jobs/registry.py @@ -15,6 +15,7 @@ # Manage Jenkins plugin module registry. +import copy import logging import operator import pkg_resources @@ -31,8 +32,225 @@ __all__ = [ logger = logging.getLogger(__name__) +class MacroRegistry(object): + + _component_to_component_list_mapping = {} + _component_list_to_component_mapping = {} + _macros_by_component_type = {} + _macros_by_component_list_type = {} + + def __init__(self): + + for entrypoint in pkg_resources.iter_entry_points( + group='jenkins_jobs.macros'): + Mod = entrypoint.load() + self._component_list_to_component_mapping[ + Mod.component_list_type] = Mod.component_type + self._component_to_component_list_mapping[ + Mod.component_type] = Mod.component_list_type + self._macros_by_component_type[ + Mod.component_type] = {} + self._macros_by_component_list_type[ + Mod.component_list_type] = {} + + self._mask_warned = {} + + @property + def _nonempty_component_list_types(self): + return [clt for clt in self._macros_by_component_list_type + if len(self._macros_by_component_list_type[clt]) != 0] + + @property + def component_types(self): + return self._macros_by_component_type.keys() + + def _is_macro(self, component_name, component_list_type): + return (component_name in + self._macros_by_component_list_type[component_list_type]) + + def register(self, component_type, macro): + macro_name = macro["name"] + clt = self._component_to_component_list_mapping[component_type] + self._macros_by_component_type[component_type][macro_name] = macro + self._macros_by_component_list_type[clt][macro_name] = macro + + def expand_macros(self, jobish, template_data=None): + """Create a copy of the given job-like thing, expand macros in place on + the copy, and return that object to calling context. + + :arg dict jobish: A job-like JJB data structure. Could be anything that + might provide JJB "components" that get expanded to XML configuration. + This includes "job", "job-template", and "default" DSL items. This + argument is not modified in place, but rather copied so that the copy + may be returned to calling context. + + :arg dict template_data: If jobish is a job-template, use the same + template data used to fill in job-template variables to fill in macro + variables. + """ + for component_list_type in self._nonempty_component_list_types: + self._expand_macros_for_component_list_type( + jobish, component_list_type, template_data) + + def _expand_macros_for_component_list_type(self, + jobish, + component_list_type, + template_data=None): + """In-place expansion of macros on jobish. + + :arg dict jobish: A job-like JJB data structure. Could be anything that + might provide JJB "components" that get expanded to XML configuration. + This includes "job", "job-template", and "default" DSL items. This + argument is not modified in place, but rather copied so that the copy + may be returned to calling context. + + :arg str component_list_type: A string value indicating which type of + component we are expanding macros for. + + :arg dict template_data: If jobish is a job-template, use the same + template data used to fill in job-template variables to fill in macro + variables. + """ + if (jobish.get("project-type", None) == "pipeline" + and component_list_type == "scm"): + # Pipeline projects have an atypical scm type, eg: + # + # - job: + # name: whatever + # project-type: pipeline + # pipeline-scm: + # script-path: nonstandard-scriptpath.groovy + # scm: + # - macro_name + # + # as opposed to the more typical: + # + # - job: + # name: whatever2 + # scm: + # - macro_name + # + # So we treat that case specially here. + component_list = jobish.get("pipeline-scm", {}).get("scm", []) + else: + component_list = jobish.get(component_list_type, []) + + component_substitutions = [] + for component in component_list: + macro_component_list = self._maybe_expand_macro( + component, component_list_type, template_data) + + if macro_component_list is not None: + # Since macros can contain other macros, we need to recurse + # into the newly-expanded macro component list to expand any + # macros that might be hiding in there. In order to do this we + # have to make the macro component list look like a job by + # embedding it in a dictionary like so. + self._expand_macros_for_component_list_type( + {component_list_type: macro_component_list}, + component_list_type, + template_data) + + component_substitutions.append( + (component, macro_component_list)) + + for component, macro_component_list in component_substitutions: + component_index = component_list.index(component) + component_list.remove(component) + i = 0 + for macro_component in macro_component_list: + component_list.insert(component_index + i, macro_component) + i += 1 + + def _maybe_expand_macro(self, + component, + component_list_type, + template_data=None): + """For a given component, if it refers to a macro, return the + components defined for that macro with template variables (if any) + interpolated in. + + :arg str component_list_type: A string value indicating which type of + component we are expanding macros for. + + :arg dict template_data: If component is a macro and contains template + variables, use the same template data used to fill in job-template + variables to fill in macro variables. + """ + component_copy = copy.deepcopy(component) + + if isinstance(component, dict): + # The component is a singleton dictionary of name: + # dict(args) + component_name, component_data = next(iter(component_copy.items())) + else: + # The component is a simple string name, eg "run-tests". + component_name, component_data = component_copy, None + + if template_data: + # Address the case where a macro name contains a variable to be + # interpolated by template variables. + component_name = deep_format(component_name, template_data, True) + + # Check that the component under consideration actually is a + # macro. + if not self._is_macro(component_name, component_list_type): + return None + + # Warn if the macro shadows an actual module type name for this + # component list type. + if ModuleRegistry.is_module_name(component_name, component_list_type): + self._mask_warned[component_name] = True + logger.warning( + "You have a macro ('%s') defined for '%s' " + "component list type that is masking an inbuilt " + "definition" % (component_name, component_list_type)) + + macro_component_list = self._get_macro_components(component_name, + component_list_type) + + # If macro instance contains component_data, interpolate that + # into macro components. + if component_data: + + # Also use template_data, but prefer data obtained directly from + # the macro instance. + if template_data: + template_data = copy.deepcopy(template_data) + template_data.update(component_data) + + macro_component_list = deep_format( + macro_component_list, template_data, False) + else: + macro_component_list = deep_format( + macro_component_list, component_data, False) + + return macro_component_list + + def _get_macro_components(self, macro_name, component_list_type): + """Return the list of components that a macro expands into. For example: + + - wrapper: + name: timeout-wrapper + wrappers: + - timeout: + fail: true + elastic-percentage: 150 + elastic-default-timeout: 90 + type: elastic + + Provides a single "wrapper" type (corresponding to the "wrappers" list + type) component named "timeout" with the values shown above. + + The macro_name argument in this case would be "timeout-wrapper". + """ + macro_component_list = self._macros_by_component_list_type[ + component_list_type][macro_name][component_list_type] + return copy.deepcopy(macro_component_list) + + class ModuleRegistry(object): - entry_points_cache = {} + _entry_points_cache = {} def __init__(self, jjb_config, plugins_list=None): self.modules = [] @@ -129,8 +347,7 @@ class ModuleRegistry(object): def set_parser_data(self, parser_data): self.__parser_data = parser_data - def dispatch(self, component_type, xml_parent, - component, template_data={}): + def dispatch(self, component_type, xml_parent, component): """This is a method that you can call from your implementation of Base.gen_xml or component. It allows modules to define a type of component, and benefit from extensibility via Python @@ -140,8 +357,6 @@ class ModuleRegistry(object): (e.g., `builder`) :arg YAMLParser parser: the global YAML Parser :arg Element xml_parent: the parent XML element - :arg dict template_data: values that should be interpolated into - the component definition See :py:class:`jenkins_jobs.modules.base.Base` for how to register components of a module. @@ -160,25 +375,13 @@ class ModuleRegistry(object): if isinstance(component, dict): # The component is a singleton dictionary of name: dict(args) name, component_data = next(iter(component.items())) - if template_data: - # Template data contains values that should be interpolated - # into the component definition - try: - component_data = deep_format( - component_data, template_data, - self.jjb_config.yamlparser['allow_empty_variables']) - except Exception: - logging.error( - "Failure formatting component ('%s') data '%s'", - name, component_data) - raise else: # The component is a simple string name, eg "run-tests" name = component component_data = {} # Look for a component function defined in an entry point - eps = ModuleRegistry.entry_points_cache.get(component_list_type) + eps = self._entry_points_cache.get(component_list_type) if eps is None: module_eps = [] # auto build entry points by inferring from base component_types @@ -230,29 +433,21 @@ class ModuleRegistry(object): eps[module_ep.name] = module_ep # cache both sets of entry points - ModuleRegistry.entry_points_cache[component_list_type] = eps + self._entry_points_cache[component_list_type] = eps logger.debug("Cached entry point group %s = %s", component_list_type, eps) - # check for macro first - component = self.parser_data.get(component_type, {}).get(name) - if component: - if name in eps and name not in self.masked_warned: - self.masked_warned[name] = True - logger.warning( - "You have a macro ('%s') defined for '%s' " - "component type that is masking an inbuilt " - "definition" % (name, component_type)) - - for b in component[component_list_type]: - # Pass component_data in as template data to this function - # so that if the macro is invoked with arguments, - # the arguments are interpolated into the real defn. - self.dispatch(component_type, xml_parent, b, component_data) - elif name in eps: + if name in eps: func = eps[name].load() func(self, xml_parent, component_data) else: raise JenkinsJobsException("Unknown entry point or macro '{0}' " "for component type: '{1}'.". format(name, component_type)) + + @classmethod + def is_module_name(self, name, component_list_type): + eps = self._entry_points_cache.get(component_list_type) + if not eps: + return False + return (name in eps) diff --git a/setup.cfg b/setup.cfg index a3713f7bc..eaeda0fcb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -85,3 +85,16 @@ jenkins_jobs.modules = triggers=jenkins_jobs.modules.triggers:Triggers wrappers=jenkins_jobs.modules.wrappers:Wrappers zuul=jenkins_jobs.modules.zuul:Zuul +jenkins_jobs.macros = + builder=jenkins_jobs.modules.builders:Builders + general=jenkins_jobs.modules.general:General + hipchat=jenkins_jobs.modules.hipchat_notif:HipChat + metadata=jenkins_jobs.modules.metadata:Metadata + notification=jenkins_jobs.modules.notifications:Notifications + parameter=jenkins_jobs.modules.parameters:Parameters + property=jenkins_jobs.modules.properties:Properties + publisher=jenkins_jobs.modules.publishers:Publishers + reporter=jenkins_jobs.modules.reporters:Reporters + scm=jenkins_jobs.modules.scm:SCM + trigger=jenkins_jobs.modules.triggers:Triggers + wrapper=jenkins_jobs.modules.wrappers:Wrappers diff --git a/tests/xml_config/test_xml_config.py b/tests/xml_config/test_xml_config.py index 0f084edd3..c3016fa6a 100644 --- a/tests/xml_config/test_xml_config.py +++ b/tests/xml_config/test_xml_config.py @@ -52,9 +52,6 @@ class TestXmlJobGeneratorExceptions(base.BaseTestCase): reg = registry.ModuleRegistry(config) reg.set_parser_data(yp.data) - job_data_list, view_data_list = yp.expandYaml(reg) - xml_generator = xml_config.XmlJobGenerator(reg) - self.assertRaises(Exception, xml_generator.generateXML, job_data_list) - self.assertIn("Failure formatting component", self.logger.output) + self.assertRaises(errors.JenkinsJobsException, yp.expandYaml, reg) self.assertIn("Problem formatting with args", self.logger.output)