Retire scientific-wg repo

As discussed in TC meeting[1], TC is retiring the scientific-wg repo. [1] https://meetings.opendev.org/meetings/tc/2021/tc.2021-06-17-15.00.log.html#l-98 Change-Id: I23bfbb89fbf08286f231fa7d9cabfb04d3bf85fc
2021-06-23 15:30:11 -05:00 · 2021-06-23 15:30:11 -05:00 · 43ed86f2bc
parent 75707c3f03
commit 43ed86f2bc
38 changed files with 9 additions and 3454 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,7 +0,0 @@
-doc/build/
-.tox/
-.idea
-AUTHORS
-ChangeLog
-.testrepository/
-*.pyc
--- a/.testr.conf
+++ b/.testr.conf
@ -1,4 +0,0 @@
-[DEFAULT]
-test_command=OS_STDOUT_CAPTURE=1 OS_STDERR_CAPTURE=1 OS_TEST_TIMEOUT=60 ${PYTHON:-python} -m subunit.run discover -t ./ . $LISTOPT $IDOPTION
-test_id_option=--load-list $IDFILE
-test_list_option=--list
--- a/.zuul.yaml
+++ b/.zuul.yaml
@ -1,7 +0,0 @@
- project:
-    check:
-      jobs:
-        - openstack-tox-py27
-    gate:
-      jobs:
-        - openstack-tox-py27
--- a/README.rst
+++ b/README.rst
@ -1,37 +1,10 @@
-========================
-Scientific Working Group
-========================
+This project is no longer maintained.
+  
+The contents of this repository are still available in the Git
+source code management system.  To see the contents of this
+repository before it reached its end of life, please check out the
+previous commit with "git checkout HEAD^1".

-This repository is for storing `Scientific Working Group
-<https://wiki.openstack.org/wiki/Scientific_working_group>`_ resources.
-Principally, this includes the text for the WG's book,
-`The Crossroads of Cloud and HPC: OpenStack for Scientific Research
-<https://www.openstack.org/assets/science/OpenStack-CloudandHPC6x9Booklet-v4-online.pdf>`_
-
-All documents are in RST format.
-
-Building
-========
-
-Python Tox is used to automate the creation of virtual environments for
-building the working group's documentation resources.
-
-Prerequisites
-------------
-
-To get started, you need to install all necessary tools:
-
- * `virtualenv`
- * `pip` (use the latest from `https://bootstrap.pypa.io/get-pip.py`)
- * `tox`
-
-Run the build
-------------
-
-To build the resources::
-
-   $ tox -e docs
-
-This will generate build artifacts in ``doc/build``.  To view the generated
-HTML artifacts navigate to
-`file:///path/to/scientific-wg/doc/build/html/index.html`.
+For any further questions, please email
+openstack-discuss@lists.openstack.org or join #openstack-dev on
+OFTC.
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@ -1,436 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# scale_rnd documentation build configuration file, created by
-# sphinx-quickstart on Mon Nov 23 13:22:23 2015.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-import sys
-import os
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, os.path.abspath('../..'))
-
-# -- General configuration ------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
-    'rst2pdf.pdfbuilder',
-    'sphinx.ext.autodoc',
-    'sphinxcontrib.httpdomain',
-    'oslosphinx',
-]
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix of source filenames.
-source_suffix = '.rst'
-
-# The encoding of source files.
-#source_encoding = 'utf-8-sig'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = u'Performance Docs'
-copyright = u'2015, OpenStack Foundation'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = '0.1'
-# The full version, including alpha/beta/rc tags.
-release = '0.1'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#language = None
-
-# There are two options for replacing |today|: either, you set today to some
-# non-false value, then it is used:
-#today = ''
-# Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-exclude_patterns = []
-
-# The reST default role (used for this markup: `text`) to use for all
-# documents.
-#default_role = None
-
-# If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
-
-# If true, the current module name will be prepended to all description
-# unit titles (such as .. function::).
-#add_module_names = True
-
-# If true, sectionauthor and moduleauthor directives will be shown in the
-# output. They are ignored by default.
-#show_authors = False
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
-
-# If true, keep warnings as "system message" paragraphs in the built documents.
-#keep_warnings = False
-
-
-# -- Options for HTML output ----------------------------------------------
-
-# on_rtd is whether we are on readthedocs.org, this line of code grabbed from docs.readthedocs.org
-on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
-
-# The name for this set of Sphinx documents.  If None, it defaults to
-# "<project> v<release> documentation".
-#html_title = None
-
-# A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-#html_logo = None
-
-# The name of an image file (within the static path) to use as favicon of the
-# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-#html_favicon = None
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-# Add any extra paths that contain custom files (such as robots.txt or
-# .htaccess) here, relative to this directory. These files are copied
-# directly to the root of the documentation.
-#html_extra_path = []
-
-# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
-# using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
-
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-#html_use_smartypants = True
-
-# Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-#html_additional_pages = {}
-
-# If false, no module index is generated.
-#html_domain_indices = True
-
-# If false, no index is generated.
-#html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-#html_split_index = False
-
-# If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
-
-# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
-
-# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a <link> tag referring to it.  The value of this option must be the
-# base URL from which the finished HTML is served.
-#html_use_opensearch = ''
-
-# This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'performance_docs'
-
-
-# -- Options for LaTeX output ---------------------------------------------
-
-latex_elements = {
-# The paper size ('letterpaper' or 'a4paper').
-#'papersize': 'letterpaper',
-
-# The font size ('10pt', '11pt' or '12pt').
-#'pointsize': '10pt',
-
-# Additional stuff for the LaTeX preamble.
-#'preamble': '',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-#  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-  ('index', 'performance_docs.tex', u'Performance Documentation',
-   u'OpenStack Foundation', 'manual'),
-]
-
-# The name of an image file (relative to this directory) to place at the top of
-# the title page.
-#latex_logo = None
-
-# For "manual" documents, if this is true, then toplevel headings are parts,
-# not chapters.
-#latex_use_parts = False
-
-# If true, show page references after internal links.
-#latex_show_pagerefs = False
-
-# If true, show URL addresses after external links.
-#latex_show_urls = False
-
-# Documents to append as an appendix to all manuals.
-#latex_appendices = []
-
-# If false, no module index is generated.
-#latex_domain_indices = True
-
-
-# -- Options for manual page output ---------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
-    ('index', 'performance_docs', u'Performance Documentation',
-     [u'OpenStack Foundation'], 1)
-]
-
-# If true, show URL addresses after external links.
-#man_show_urls = False
-
-
-# -- Options for Texinfo output -------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-texinfo_documents = [
-  ('index', 'performance_docs', u'Performance Documentation',
-   u'OpenStack Foundation', 'performance_docs', 'OpenStack performance testing plans, results and investigations.',
-   'Miscellaneous'),
-]
-
-# Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
-
-# If false, no module index is generated.
-#texinfo_domain_indices = True
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
-
-# If true, do not generate a @detailmenu in the "Top" node's menu.
-#texinfo_no_detailmenu = False
-
-
-# -- Options for Epub output ----------------------------------------------
-
-# Bibliographic Dublin Core info.
-epub_title = u'performance_docs'
-epub_author = u'OpenStack Foundation'
-epub_publisher = u'OpenStack Foundation'
-epub_copyright = u'2015, OpenStack Foundation'
-
-# The basename for the epub file. It defaults to the project name.
-#epub_basename = u'scale_rnd'
-
-# The HTML theme for the epub output. Since the default themes are not optimized
-# for small screen space, using the same theme for HTML and epub output is
-# usually not wise. This defaults to 'epub', a theme designed to save visual
-# space.
-#epub_theme = 'epub'
-
-# The language of the text. It defaults to the language option
-# or en if the language is not set.
-#epub_language = ''
-
-# The scheme of the identifier. Typical schemes are ISBN or URL.
-#epub_scheme = ''
-
-# The unique identifier of the text. This can be a ISBN number
-# or the project homepage.
-#epub_identifier = ''
-
-# A unique identification for the text.
-#epub_uid = ''
-
-# A tuple containing the cover image and cover page html template filenames.
-#epub_cover = ()
-
-# A sequence of (type, uri, title) tuples for the guide element of content.opf.
-#epub_guide = ()
-
-# HTML files that should be inserted before the pages created by sphinx.
-# The format is a list of tuples containing the path and title.
-#epub_pre_files = []
-
-# HTML files shat should be inserted after the pages created by sphinx.
-# The format is a list of tuples containing the path and title.
-#epub_post_files = []
-
-# A list of files that should not be packed into the epub file.
-epub_exclude_files = ['search.html']
-
-# The depth of the table of contents in toc.ncx.
-#epub_tocdepth = 3
-
-# Allow duplicate toc entries.
-#epub_tocdup = True
-
-# Choose between 'default' and 'includehidden'.
-#epub_tocscope = 'default'
-
-# Fix unsupported image types using the PIL.
-#epub_fix_images = False
-
-# Scale large images.
-#epub_max_image_width = 0
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#epub_show_urls = 'inline'
-
-# If false, no index is generated.
-#epub_use_index = True
-
-
-# -- Options for PDF output --------------------------------------------------
-
-# Grouping the document tree into PDF files. List of tuples
-# (source start file, target name, title, author, options).
-#
-# If there is more than one author, separate them with \\.
-# For example: r'Guido van Rossum\\Fred L. Drake, Jr., editor'
-#
-# The options element is a dictionary that lets you override
-# this config per-document.
-# For example,
-# ('index', u'MyProject', u'My Project', u'Author Name',
-#  dict(pdf_compressed = True))
-# would mean that specific document would be compressed
-# regardless of the global pdf_compressed setting.
-
-pdf_documents = [
-    ('index', 'openstack_performance_docs', 'Performance Documentation', 'OpenStack Foundation'),
-]
-
-# A comma-separated list of custom stylesheets. Example:
-pdf_stylesheets = ['pdf']
-
-# A list of folders to search for stylesheets. Example:
-pdf_style_path = ['.', '_styles', 'source/_styles', 'doc/source/_styles']
-
-# Create a compressed PDF
-# Use True/False or 1/0
-# Example: compressed=True
-#pdf_compressed = False
-
-# A colon-separated list of folders to search for fonts. Example:
-# pdf_font_path = ['/usr/share/fonts', '/usr/share/texmf-dist/fonts/']
-
-# Language to be used for hyphenation support
-#pdf_language = "en_US"
-
-# Mode for literal blocks wider than the frame. Can be
-# overflow, shrink or truncate
-#pdf_fit_mode = "shrink"
-
-# Section level that forces a break page.
-# For example: 1 means top-level sections start in a new page
-# 0 means disabled
-#pdf_break_level = 1
-
-# When a section starts in a new page, force it to be 'even', 'odd',
-# or just use 'any'
-#pdf_breakside = 'any'
-
-# Insert footnotes where they are defined instead of
-# at the end.
-#pdf_inline_footnotes = True
-
-# verbosity level. 0 1 or 2
-#pdf_verbosity = 0
-
-# If false, no index is generated.
-#pdf_use_index = True
-
-# If false, no modindex is generated.
-#pdf_use_modindex = True
-
-# If false, no coverpage is generated.
-#pdf_use_coverpage = True
-
-# Name of the cover page template to use
-pdf_cover_template = 'pdf_cover.tmpl'
-
-# Documents to append as an appendix to all manuals.
-#pdf_appendices = []
-
-# Enable experimental feature to split table cells. Use it
-# if you get "DelayedTable too big" errors
-#pdf_splittables = False
-
-# Set the default DPI for images
-#pdf_default_dpi = 72
-
-# Enable rst2pdf extension modules (default is only vectorpdf)
-# you need vectorpdf if you want to use sphinx's graphviz support
-#pdf_extensions = ['vectorpdf']
-
-# Page template name for "regular" pages
-pdf_page_template = 'cutePage'
-
-# Show Table Of Contents at the beginning?
-#pdf_use_toc = True
-
-# How many levels deep should the table of contents be?
-pdf_toc_depth = 9999
-
-# Add section number to section references
-pdf_use_numbered_links = False
-
-# Background images fitting mode
-pdf_fit_background_mode = 'scale'
-
-pdf_default_dpi = 72
--- a/doc/source/images/cc-by-sa.png
+++ b/doc/source/images/cc-by-sa.png
--- a/doc/source/images/high_performance_data-climb.jpg
+++ b/doc/source/images/high_performance_data-climb.jpg
--- a/doc/source/images/high_performance_data-oicr_logo.jpg
+++ b/doc/source/images/high_performance_data-oicr_logo.jpg
--- a/doc/source/images/hpc_fabrics-cambridge.png
+++ b/doc/source/images/hpc_fabrics-cambridge.png
--- a/doc/source/images/hpc_fabrics-ivshmem.png
+++ b/doc/source/images/hpc_fabrics-ivshmem.png
--- a/doc/source/images/hpc_fabrics-node_stack.png
+++ b/doc/source/images/hpc_fabrics-node_stack.png
--- a/doc/source/images/hpc_infrastructure-aurora.jpg
+++ b/doc/source/images/hpc_infrastructure-aurora.jpg
--- a/doc/source/images/hpc_infrastructure-bridges.png
+++ b/doc/source/images/hpc_infrastructure-bridges.png
--- a/doc/source/images/hpc_infrastructure-chameleon_logo.jpg
+++ b/doc/source/images/hpc_infrastructure-chameleon_logo.jpg
--- a/doc/source/images/hpc_infrastructure-ironic.png
+++ b/doc/source/images/hpc_infrastructure-ironic.png
--- a/doc/source/images/hpc_virtualisation-3_forms.png
+++ b/doc/source/images/hpc_virtualisation-3_forms.png
--- a/doc/source/images/hpc_virtualisation-NeCTAR_Australia.png
+++ b/doc/source/images/hpc_virtualisation-NeCTAR_Australia.png
--- a/doc/source/images/hpc_virtualisation-gpu_passthrough.png
+++ b/doc/source/images/hpc_virtualisation-gpu_passthrough.png
--- a/doc/source/images/hpc_virtualisation-racmon.png
+++ b/doc/source/images/hpc_virtualisation-racmon.png
--- a/doc/source/images/hpc_workload-bright_caas.png
+++ b/doc/source/images/hpc_workload-bright_caas.png
--- a/doc/source/images/hpc_workload-slurm_glance.png
+++ b/doc/source/images/hpc_workload-slurm_glance.png
--- a/doc/source/images/hpc_workload-spartan_logo.png
+++ b/doc/source/images/hpc_workload-spartan_logo.png
--- a/doc/source/images/openstack-hpc-book-cover.png
+++ b/doc/source/images/openstack-hpc-book-cover.png
--- a/doc/source/images/openstack_and_workload-spartan_logo.png
+++ b/doc/source/images/openstack_and_workload-spartan_logo.png
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@ -1,76 +0,0 @@
-========================
-Scientific Working Group
-========================
-
-Purpose
-=======
-
-The Scientific Working Group is dedicated to representing and
-advancing the use-cases and needs of research and high-performance
-computing atop OpenStack. It's also a great forum for cross-institutional
-collaboration. If you are (or would like to) run OpenStack to support
-researchers/scientists/academics and/or HPC/HTC, then please join!
-
-Objectives
-==========
-
-* HPC/HTC Infrastructure
-* Research Data Infrastructure
-* Application Infrastructure
-* Social Infrastructure
-* Create opportunities for the scientific community to engage the wider OpenStack community, i.e. industry
-
-Communication
-=============
-
-The default communication of our members is via the following open
-community mailing lists:
-
-* user-committee@lists.openstack.org for coordination of working
-  group activities. Please prefix email subject lines with the tag
-  "[scientific-wg]".
-
-* openstack-operators@lists.openstack.org for operational discussion
-  of scientific OpenStack deployment issues. Please prefix email
-  subject lines with the tag "[scientific]".
-
-Please use the hashtag "scientific-wg" for tagging any etherpad
-URLs, code, blogs posts, scientific research and/or other social
-publishing platforms.
-
-The Working Group also maintains a `wiki page`_.
-
-The Working Group has `weekly IRC meetings`_ in alternating time zones.
-
-.. _wiki page: https://wiki.openstack.org/wiki/Scientific_working_group
-.. _weekly IRC meetings: http://eavesdrop.openstack.org/#Scientific_Working_Group
-
-Members
-=======
-
-No formal membership is required. Please introduce yourself and/or
-fellow colleagues to this working group using one of the mailing
-lists below, or by attending one of the IRC meetings.
-
-This group is open to all members of the scientific OpenStack
-community and supporting vendors.
-
-Documents
-=========
-
-The Scientific Working Group maintains a guide to meeting the requirements of
-scientific computing workloads on OpenStack, titled
-*The Crossroads of Cloud and HPC: OpenStack for Scientific Research*.
-
-Contents:
-
-.. toctree::
-   :maxdepth: 2
-
-   openstack-and-virtualised-hpc
-   openstack-and-hpc-network
-   openstack-and-hpc-infrastructure
-   openstack-and-hpc-workloads
-   openstack-and-high-performance-data
-   openstack-and-federated-identity
-
--- a/doc/source/openstack-and-federated-identity.rst
+++ b/doc/source/openstack-and-federated-identity.rst
@ -1,130 +0,0 @@
-OpenStack and Federated Identity Management
-###########################################
-
-Scientific research depends on the free flow of ideas, and the free
-flow of ideas depends upon the free flow of people.  Scientific
-collaborations and research groups are often composed of users from
-different institutions across different countries.
-
-To support convenient and effective collaboration, compute resources
-managed by one institution should be seamlessly accessible to
-collaborators from other institutions, and vice versa.  This is the
-core principle of identity federation.
-
-OpenStack is uniquely positioned.  Universities and research
-institutions use OpenStack to deliver research computing services.
-Through improved support for identity federation, OpenStack can
-support collaboration between institutions.
-
-
-Federation Terminology
-======================
-
-* **National Research and Education Networks (NREN)** is a collective term used
-  for research federations.
-
-* **Home Organisation** is the institution to which a user is affiliated, and in
-  federated use cases is where a user will be authenticated.
-
-* **Security Assertion Markup Language (SAML)** ...
-
-* **Open ID Connect (OIDC)** ...
-
-* **Identity Provider (IdP)** ...
-
-* ...
-
-The Concepts of Identity Federation
-===================================
-
-NRENs use OpenStack to offer cloud compute services to authorized
-users.  The users may belong to universities, research institutions,
-industrial partners or more generally to any organisation within
-an identify federation.
-
-We have three actors: the NREN running the public cloud, the user
-and the user's home organisation, acting as an identity provider.
-
-Federated public clouds built SAML-based Identity Federations,
-leveraging Universities and Research Institutions as Identity
-Providers.  National Identity Federations are currently used to
-authenticate and authorize milions of users, and enable single sign
-on across thousands of services.  OpenStack-based public cloud
-computing is provided to users through federated identity login.
-
-Openstack services and dashboards need to be configured as Service
-Providers within identity federations and have to be accessed like
-all other services.
-
-The Challenges of Identity Federation
-------------------------------------
-
-Federated research computing infrastructure needs to enable users
-from other institutions within an identity federation to authenticate
-and get authorized for access to use infrastructure resources.  How is
-this process controlled?
-
-* *A user claims to be a user from another institution.  How is it proven that
-  they are who they say they are?*
-
-* *The user has successfully authenticated at their home organisation.
-  How are they authorized to use compute resources at this institution?
-  Is the affiliation to a project (and role within that project)
-  recorded at the home organisation or at the institution providing
-  the compute resources?*
-
-* *A federated user wants to run periodic background jobs.  How can
-  they do that without having to interactively submit their password?*
-
-* *How does an institution monitor and account for the usage of a
-  user who has no presence in the institution's user database?*
-
-* *How is a federated user contacted when required by an organisation,
-  and how would disciplinary actions be taken against a federated
-  user violating the terms of service?*
-
-* *A federated user has already used a lot of compute resources as
-  a guest of this institution.  How does the institutin decide and
-  enforce the user's limits?*
-
-* *A federated user has just left her home organisation, or left
-  the project on which she was collaborating with this institution.
-  How are active resources assigned to this user and project dealt with?*
-
-* *A user is part of an industrial research project in which their
-  organisation pays for access to the resources.  How is billing applied to the
-  resources used across the identity federation?*
-
-* *How does an institution enable its users to be authenticated and
-  authorized to use resources at other institutions within an
-  identity federation?*
-
-
-Federated Identity Management in OpenStack
-==========================================
-
-Case Study 1
-============
-
-Case Study 2
-============
-
-Further Reading
-===============
-
-* The INDIGO Identity and Access Management (IAM) service: https://github.com/indigo-iam/iam
-* INDIGO Keystone OpenID-Connect integration guide: https://github.com/indigo-dc/openid-keystone-doc
-* ...
-
-Acknowledgements
-================
-
-* **Person 1** from organisation 1
-* **Person 2** from organisation 2
-
-.. figure:: images/cc-by-sa.png
-   :width: 100
-   :alt: Creative commons licensing
-
-   This document is provided as open source with a Creative Commons license
-   with Attribution + Share-Alike (CC-BY-SA)
--- a/doc/source/openstack-and-high-performance-data.rst
+++ b/doc/source/openstack-and-high-performance-data.rst
@ -1,479 +0,0 @@
-OpenStack and High Performance Data
-###################################
-
-What can data requirements mean an HPC context?  The range of use cases
-is almost boundless.  With considerable generalisation we can consider
-some broad criteria for requirements, which expose the inherent tensions
-between HPC-centric and cloud-centric storage offerings:
-
-* The **data access** model: data objects could be stored and retrieved
-  using file-based, block-based, object-based or stream-based access.
-  HPC storage tends to focus on a model of file-based shared data storage
-  (with an emerging trend for object-based storage proposed for achieving
-  new pinnacles of scalability).  Conversely cloud infrastructure favours
-  block-based storage models, often backed with and extended by object-based
-  storage.  Support for data storage through shared filesystems is still
-  maturing in OpenStack.
-
-* The **data sharing** model: applications may request the same data
-  from many clients, or the clients may make data accesses that are
-  segregated from one another.  This distinction can have significant
-  consequences for storage architecture.  Cloud storage and HPC storage
-  are both highly distributed, but often differ in the way in which data
-  access is parallelised.  Providing high-performance access for many
-  clients to a shared dataset can be a niche requirement specific to HPC.
-  Cloud-centric storage architectures typically focus on delivering high
-  aggregate throughput on many discrete data accesses.
-
-* The level of **data persistence**.  An HPC-style tiered data storage
-  architecture does not need to incorporate data redundancy at every level
-  of the hierarchy.  This can improve performance for tiers caching data
-  closer to the processor.
-
-The cloud model offers capabilities that enable new possibilities for HPC:
-
-* **Automated provisioning**.  Software-defined infrastructure automates the
-  provisioning and configuration of compute resources, including storage.
-  Users and group administrators are able to create and configure storage
-  resources to their specific requirements at the exact time they are
-  needed.
-* **Multi-tenancy**.  HPC storage does not offer multi-tenancy with the level
-  of segregation that cloud can provide.  A virtualised storage resource
-  can be reserved for the private use of a single user, or could be shared
-  between a controlled group of collaborating users, or could even be
-  accessible by all users.
-* **Data isolation**.  Sensitive data requires careful data management.
-  Medical informatics workloads may contain patient genomes.  Engineering
-  simulations may contain data that is trade secret.  OpenStack’s
-  segregation model is stronger than ownership and permissions on a
-  POSIX-compliant shared filesystem, and also provides finer-grained
-  access control.
-
-There is clear value in increased flexibility - but at what cost in
-performance?  In more demanding environments, HPC storage tends to focus
-on and be tuned for delivering the requirements of a confined subset
-of workloads.  This is the opposite approach to the conventional cloud
-model, in which assumptions may not be possible about the storage access
-patterns of the supported workloads.
-
-This study will describe some of these divergences in greater detail, and
-demonstrate how OpenStack can integrate with HPC storage infrastructure.
-Finally some methods of achieving high performance data management on
-cloud-native storage infrastructure will be discussed.
-
-File-based Data: HPC Parallel Filesystems in OpenStack
-======================================================
-
-Conventionally in HPC, file-based data services are delivered
-by parallel filesystems such as Lustre and Spectrum Scale (GPFS).
-A parallel filesystem is a shared resource.  Typically it is mounted on
-all compute nodes in a system and available to all users of a system.
-Parallel filesystems excel at providing low-latency, high-bandwidth
-access to data.
-
-Parallel filesystems can be integrated into an OpenStack environment in
-a variety of configuration models.
-
-Provisioned Client Model
------------------------
-
-Access to an external parallel filesystem is provided through an OpenStack
-provider network.  OpenStack compute instances - virtualised or bare
-metal - mount the site filesystem as clients.
-
-This use case is fairly well established.  In the virtualised use case,
-performance is achieved through use of SR-IOV (with only a moderate
-level of overhead).  In the case of Lustre, with a layer-2 VLAN provider
-network the o2ib client drivers can use RoCE to perform Lustre data
-transport using RDMA.
-
-Cloud-hosted clients on a parallel filesystem raise issues with root in
-a cloud compute context.  On cloud infrastructure, privileged accesses
-from a client do not have the same degree of trust as on conventional HPC
-infrastructure.  Lustre approaches this issue by introducing Kerberos
-authentication for filesystem mounts and subsequent file accesses.
-Kerberos credentials for Lustre filesystems can be supplied to OpenStack
-instances upon creation as instance metadata.
-
-Provisioned Filesystem Model
----------------------------
-
-There are use cases where the dynamic provisioning of software-defined
-parallel filesystems has considerable appeal.  There have been
-proof-of-concept demonstrations of provisioning Lustre filesystems from
-scratch using OpenStack compute, storage and network resources.
-
-The OpenStack Manila project aims to provision and manage shared
-filesystems as an OpenStack service.  IBM’s Spectrum Scale integrates
-with Manila to re-export GPFS parallel filesystems using the user-space
-Ganesha NFS server.
-
-Currently these projects demonstrate functionality over performance.
-In future evolutions the overhead of dynamically provisioned parallel
-filesystems on OpenStack infrastructure may improve.
-
-A Parallel Data Substrate for OpenStack Services
------------------------------------------------
-
-IBM positions Spectrum Scale as a distributed data service for
-underpinning OpenStack services such as Cinder, Glance, Swift and Manila.
-More information about using Spectrum Scale in this manner can be found
-in IBM Research’s red paper on the subject (listed in the Further
-Reading section).
-
-Applying HPC Technologies to Enhance Data IO
-============================================
-
-A recurring theme throughout this study has been the use of remote DMA
-for efficient data transfer in HPC environments.  The advantages of this
-technology are especially pertinent in data intensive environments.
-OpenStack’s flexibility enables the introduction of RDMA protocols
-for many cloud infrastructure operations to reduce latency, increase
-bandwidth and enhance processor efficiency:
-
-Cinder block data IO can be performed using iSER (iSCSI extensions
-for RDMA).  iSER is a drop-in replacement for iSCSI that is easy to
-configure and set up.  Through providing tightly-coupled IO resources
-using RDMA technologies, the functional equivalent of HPC-style burst
-buffers can be added to the storage tiers of cloud infrastructure.
-
-Ceph data transfers can be performed using the Accelio RMDA transport.
-This technology was demonstrated some years ago but does not appear
-to have achieved production levels of stability or gained significant
-mainstream adoption.
-
-The NOWLAB group at Ohio State University have developed extensions to
-data analytics platforms such as HBase, Hadoop, Spark and Memcached to
-optimise data movements using RDMA.
-
-Optimising Ceph Storage for Data-Intensive Workloads
-====================================================
-
-The versatility of Ceph embodies the cloud-native approach to storage,
-and consequently Ceph has become a popular choice of storage technology
-for OpenStack infrastructure.  A single Ceph deployment can support
-various protocols and data access models.
-
-Ceph is capable of delivering strong read bandwidth.  For large reads
-from OpenStack block devices, Ceph is able to parallelise the delivery
-of the read data across multiple OSDs.
-
-Ceph’s data consistency model commits writes to multiple OSDs before
-a write transaction is completed.  By default a write is replicated
-three times.  This can result in higher latency and lower performance
-on write bandwidth.
-
-Ceph can run on clusters of commodity hardware configurations.  However,
-in order to maximise the performance (or price performance) of a Ceph
-cluster some design rules of thumb can be applied:
-
-Use separate physical network interfaces for external storage network and
-internal storage management.  On the NICs and switches, enable Ethernet
-flow control and raise the MTU to support jumbo frames.
-
-Each drive used for Ceph storage is managed by an OSD process.
-A Ceph storage node usually contains multiple drives (and multiple
-OSD processes).
-
-The best price/performance and highest density is achieved using fat
-storage nodes, typically containing 72 HDDs.  These work well for
-large scale deployments, but can lead to very costly units of failure
-in smaller deployments.  Node configurations of 12-32 HDDs are usually
-found in deployments of intermediate scale.
-
-Ceph storage nodes usually contain a higher-speed write journal, which is
-dedicated to service of a number of HDDs.  An SSD journal can typically
-feed 6 HDDs while an NVMe flash device can typically feed up to 20 HDDs.
-
-About 10G of external storage network bandwidth balances the read
-bandwidth of up to 15 HDDs.  The internal storage management network
-should be similarly scaled.
-
-A rule of thumb for RAM is to provide 0.5GB-1GB of RAM per TB per
-OSD daemon.
-
-On multi-socket storage nodes, close attention should be paid to NUMA
-considerations.  The PCI storage devices attached to each socket should
-be working together.  Journal devices should be connected with HDDs
-attached to HBAs on the same socket.  IRQ affinity should be confined
-to cores on the same socket.  Associated OSD processes should be pinned
-to the same cores.
-
-For tiered storage applications in which data can be regenerated from
-other storage, the replication count can safely be reduced from 3 to
-2 copies.
-
-The Cancer Genome Collaboratory: Large-scale Genomics on OpenStack
-==================================================================
-
-Genome datasets can be hundreds of terabytes in size, sometimes requiring
-weeks or months to download and significant resources to store and
-process.
-
-.. image:: images/high_performance_data-oicr_logo.jpg
-   :width: 300
-   :align: right
-   :alt: OICR logo
-
-The Ontario Institute for Cancer Research built the Cancer Genome
-Collaboratory (or simply The Collaboratory) as a biomedical research
-resource built upon OpenStack infrastructure.  The Collaboratory aims
-to facilitate research on the world’s largest and most comprehensive
-cancer genome dataset, currently produced by the International Cancer
-Genome Consortium (ICGC).
-
-By making the ICGC data available in cloud compute form in the
-Collaboratory, researchers can bring their analysis methods to the cloud,
-yielding benefits from the high availability, scalability and economy
-offered by OpenStack, and avoiding the large investment in compute
-resources and the time needed to download the data.
-
-An OpenStack Architecture for Genomics
--------------------------------------
-
-The Collaboratory’s requirements for the project were to build a cloud
-computing environment providing 3000 compute cores and 10-15 PB of raw
-data stored in a scalable and highly-available storage.  The project
-has also met constraints of budget, data security, confined data centre
-space, power and connectivity.  In selecting the storage architecture,
-capacity was considered to be more important than latency and performance.
-
-Each rack hosts 16 compute nodes using 2U high-density chassis, and
-between 6 and 8 Ceph storage nodes.  Hosting a mix of compute and storage
-nodes in each rack keeps some of the Nova-Ceph traffic in the same rack,
-while also lowering the power requirement for these high density racks
-(2 x 60A circuits are provided to each rack).
-
-As of September 2016, Collaboratory has 72 compute nodes (2600 CPU
-cores, Hyper-Threaded) with a physical configuration optimized for large
-data-intensive workflows: 32 or 40 CPU cores and a large amount of RAM
-(256 GB per node).  The workloads make extensive use of high performance
-local disk, incorporating hardware RAID10 across 6 x 2TB SAS drives.
-
-The networking is provided by Brocade ICX 7750-48C top-of-rack switches
-that use 6x40Gb cables to interconnect the racks in a ring stack topology,
-providing 240 Gbps non-blocking redundant inter-rack connectivity,
-at a 2:1 oversubscription ratio.
-
-The Collaboratory is deployed using entirely community-supported free
-software.  The OpenStack control plane is Ubuntu 14.04 and deployment
-configuration is based on Ansible.  The Collaboratory was initially
-deployed using OpenStack Juno and a year later upgraded to Kilo and
-then Liberty.
-
-Collaboratory deploys a standard HA stack based on Haproxy/Keepalived and
-Mariadb-Galera using three controller nodes.  The controller nodes also
-perform the role of Ceph-mon and Neutron L3-agents, using three separate
-RAID1 sets of SSD drives for MySQL, Ceph-mon and Mongodb processes.
-
-The compute nodes have 10G Ethernet with GRE and SDN capabilities
-for virtualized networking.  The Ceph nodes use 2x10G NICs bonded for
-client traffic and 2x10G NICs bonded for storage replication traffic.
-The Controller nodes have 4x10G NICs in an active-active bond (802.3ad)
-using layer3+4 hashing for better link utilisation.  The Openstack tenant
-routers are highly-available with two routers distributed across the three
-controllers.  The configuration does not use Neutron DVR out of concern
-for limiting the number of servers directly attached to the Internet.
-The public VLAN is carried only on the trunk ports facing the controllers
-and the monitoring server.
-
-Optimising Ceph for Genomics Workloads
--------------------------------------
-
-Upon workload start, the instances usually download data stored in Ceph's
-object storage.  OICR developed a download client that controls access
-to sensitive ICGC protected data through managed tokens.  Downloading a
-100GB file stored in Ceph takes around 18 minutes, with another 10-12
-minutes used to automatically check its integrity (md5sum), and is mostly
-limited by the instance’s local disk.
-
-The ICGC storage system adds a layer of control on top of Ceph’s
-object storage.  Currently this is a 2-node cluster behind an Haproxy
-instance serving the ICGC storage client.  The server component uses
-OICR’s authorization and metadata systems to provide secure access to
-related objects stored in Ceph.  By using OAuth-based access tokens,
-researchers can be given access to the Ceph data without having to
-configure Ceph permissions.  Access to individual project groups can
-also be implemented in this layer.
-
-Each Ceph storage node consists of 36 OSD drives (4, 6 or 8 TB) in
-a large Ceph cluster currently providing 4 PB of raw storage, using
-three replica pools.  The radosgw pool has 90% of the Ceph space being
-reserved for storing protected ICGC datasets, including the very large
-whole genome aligned reads for almost 2000 donors.  The remaining 10% of
-Ceph space is used as a scalable and highly-available backend for Glance
-and Cinder.  Ceph radosgw was tuned for the specific genomic workloads,
-mostly by increasing read-ahead on the OSD nodes, 65 MB as rados object
-stripe for Radosgw and 8 MB for RBD.
-
-Further Considerations and Future Directions
--------------------------------------------
-
-In the course of the development of the OpenStack infrastructure at the
-Collaboratory, several issues have been encountered and addressed:
-
-The instances used in cancer research are usually short lived
-(hours/days/weeks), but with high resource requirements in terms of CPU
-cores, memory and disk allocation.  As a consequence of this pattern of
-usage the Collaboratory OpenStack infrastructure does not support live
-migration as a standard operating procedure.
-
-The Collaboratory have encountered a few problems caused by Radosgw bugs
-involving overlapping multipart uploads.  However, these were detected by
-the Collaboratory’s monitoring system, and did not result in data loss.
-The Collaboratory created a monitoring system that uses automated Rally
-tests to monitor end-to-end functionality, and also download a random
-large S3 object (around 100 GB) to confirm data integrity and monitor
-object storage performance.
-
-Because of the mix of very large (BAM), medium (VCF) and very small
-(XML, JSON) files, the Ceph OSD nodes have imbalanced load and we have
-to regularly monitor and rebalance data.
-
-Currently, the Collaboratory is hosting 500TB of data from 2,000 donors.
-Over the next 2 years, OICR will increase the number of ICGC genomes
-available in the Collaboratory, with the goal of having the entire ICGC
-data set of 25,000 donors estimated to be 5PB when the project completes
-in 2018.
-
-Although in a closed beta phase with only a few research labs having
-accounts, there were more than 19,000 instances started in the last 18
-months, with almost 7,000 in the last three months.  One project that
-uses the Collaboratory heavily is the PanCancer Analysis of Whole Genomes
-(PCAWG), which characterizes the somatic, and germline variants from
-over 2,800 ICGC cancer whole genomes in 20 primary tumour sites.
-
-In conclusion, the Collaboratory environment has been running well for
-OICR and its partners.  George Mihaiescu, senior cloud architect at OICR,
-has many future plans for OpenStack and the Collaboratory:
-
-   “We hope to add new Openstack projects to the Collaboratory’s offering
-   of services, with Ironic and Heat being the first candidates.  We would
-   also like to provide new compute node configurations with RAID0 instead
-   of RAID10, or even SSD based local storage for improved IO performance.”
-
-CLIMB: OpenStack, Parallel Filesystems and Microbial Bioinformatics
-===================================================================
-
-The Cloud Infrastructure for Microbial Bioinformatics (CLIMB) is a
-collaboration between four UK universities (Swansea, Warwick, Cardiff
-and Birmingham) and funded by the UK’s Medical Research Council.
-CLIMB provides compute and storage as a free service to academic
-microbiologists in the UK.  After an extended period of testing, the
-CLIMB service was formally launched in July 2016.
-
-.. image:: images/high_performance_data-climb.jpg
-   :width: 400
-   :align: right
-   :alt: CLIMB hardware
-
-CLIMB is a federation of 4 sites, configured as OpenStack regions.
-Each site has an approximately equivalent configuration of compute nodes,
-network and storage.
-
-The compute node hardware configuration is tailored to support the
-memory-intensive demands of bioinformatics workloads.  The system as
-a whole comprises 7680 CPU cores, in fat 4-socket compute nodes with
-512GB RAM.  Each site also has three large memory nodes with 3TB of RAM
-and 192 hyper-threaded cores.
-
-The infrastructure is managed and deployed using xCAT cluster management
-software.  The system runs the Kilo release of OpenStack, with packages
-from the RDO distribution.  Configuration management is automated
-using Salt.
-
-Each site has 500 TB of GPFS storage.  Every hypervisor is a GPFS client,
-and uses an infiniband fabric to access the GPFS filesystem.  GPFS is
-used for scratch storage space in the hypervisors.
-
-For longer term data storage, to share datasets and VMs, and to provide
-block storage for running VMs, CLIMB deploys a storage solution based
-on Ceph.  The Ceph storage is replicated between sites.  Each site has 27
-Dell R730XD nodes for Ceph storage servers.  Each storage server contains
-16x 4TB HDDs for Ceph OSDs, giving a total raw storage capacity of 6912TB.
-After 3-way replication this yields a usable capacity of 2304TB.
-
-On two sites Ceph is used as the storage back end for Swift, Cinder
-and Glance.  At Birmingham GPFS is used for Cinder and Glance, with
-plans to migrate to Ceph.
-
-In addition to the infiniband network, a Brocade 10G Ethernet fabric is
-used, in conjunction with dual-redundant Brocade Vyatta virtual routers
-to manage cross-site connectivity.
-
-In the course of deploying and trialling the CLIMB system, a number of
-issues have been encountered and overcome.
-
-* The Vyatta software routers were initially underperforming with
-  consequential impact on inter-site bandwidth.
-* Some performance issues have been encountered due to NUMA topology
-  awareness not being passed through to VMs.
-* Stability problems with Broadcom 10GBaseT drivers in the controllers
-  led to reliability issues.  (Thankfully the HA failover mechanisms were
-  found to work as required).
-* Problems with interactions between Ceph and Dell hardware RAID cards.
-* Issues with Infiniband and GPFS configuration.
-
-CLIMB has future plans for developing their OpenStack infrastructure,
-including:
-
-* Migrating from regions to Nova cells as the federation model between
-  sites.
-* Integrating OpenStack Manila for exporting shared filesystems from
-  GPFS to guest VMs.
-
-Further Reading
-===============
-
-An IBM research study on integrating GPFS
-(Spectrum Scale) within OpenStack environments:
-http://www.redbooks.ibm.com/redpapers/pdfs/redp5331.pdf
-
-A 2015 presentation from ATOS on using Kerberos authentication in Lustre:
-http://cdn.opensfs.org/wp-content/uploads/2015/04/Lustre-and-Kerberos_Buisson.pdf
-
-Glyn Bowden of HPE and Alex Macdonald from SNIA discuss OpenStack
-storage (including the Provisioned Filesystem Model using Lustre):
-https://www.brighttalk.com/webcast/663/168821
-
-The High-Performance Big Data team at Ohio State University:
-http://hibd.cse.ohio-state.edu
-
-A useful talk from the 2016 Austin OpenStack Summit on Ceph design:
-https://www.openstack.org/videos/video/designing-for-high-performance-ceph-at-scale
-
-The Ontario Institute for Cancer Research Collaboratory:
-http://www.cancercollaboratory.org
-
-Further details on the International Cancer Genome Consortium:
-http://icgc.org/
-
-Dr Tom Connor presented CLIMB at the 2016 Austin OpenStack summit:
-https://www.openstack.org/videos/video/the-cloud-infrastructure-for-microbial-bioinformatics-breaking-biological-silos-using-openstack
-
-Acknowledgements
-================
-
-This document was written by Stig Telfer of `StackHPC Ltd <https://www.stackhpc.com>`_ with the support
-of Cambridge University, with contributions, guidance and feedback from
-subject matter experts:
-
-* **George Mihaiescu**, **Bob Tiernay**, **Andy Yang**, **Junjun Zhang**,
-  **Francois Gerthoffert**, **Christina Yung**, **Vincent Ferretti**
-  from the Ontario Institute for Cancer Research.  The authors wish
-  to acknowledge the funding support from the Discovery Frontiers:
-  Advancing Big Data Science in Genomics Research program (grant
-  no. RGPGR/448167-2013, ‘The Cancer Genome Collaboratory’), which
-  is jointly funded by the Natural Sciences and Engineering Research
-  Council (NSERC) of Canada, the Canadian Institutes of Health Research
-  (CIHR), Genome Canada, and the Canada Foundation for Innovation (CFI),
-  and with in-kind support from the Ontario Research Fund of the Ministry
-  of Research, Innovation and Science.
-* **Dr Tom Connor** from Cardiff University and the CLIMB collaboration.
-
-.. figure:: images/cc-by-sa.png
-   :width: 100
-   :alt: Creative commons licensing
-
-   This document is provided as open source with a Creative Commons license
-   with Attribution + Share-Alike (CC-BY-SA)
--- a/doc/source/openstack-and-hpc-infrastructure.rst
+++ b/doc/source/openstack-and-hpc-infrastructure.rst
@ -1,665 +0,0 @@
-OpenStack and HPC Infrastructure Management
-###########################################
-
-In this section we discuss the emerging OpenStack use case for management
-of HPC infrastructure.  We introduce Ironic, the OpenStack bare metal
-service and describe some of the differences, advantages and limitations
-of managing HPC infrastructure as a bare metal OpenStack cloud.
-
-Compared with OpenStack, established approaches to HPC infrastructure
-management are very different.  Conventional solutions offer much higher
-scale, and much lower management plane overhead.  However, they are also
-inflexible, difficult to use and slow to evolve.
-
-Through differences in the approach taken by cloud infrastructure
-management, OpenStack brings new flexibility to HPC infrastructure
-management:
-
-* OpenStack’s integrated support for multi-tenancy infrastructure
-  introduces segregation between users and projects that require isolation.
-* The cloud model enables the infrastructure deployed for different projects
-  to use entirely different software stacks.
-* The software-defined orchestration of deployments is assumed.
-  This approach, sometimes referred to as “infrastructure as code”,
-  ensures that infrastructure is deployed and configured according to a
-  prescriptive formula, often maintained under source control in the same
-  manner as source code.
-* The range of platforms supported by Ironic is highly diverse.  Just about
-  any hardware can and has been used in this context.
-* The collaborative open development model of OpenStack ensures that
-  community support is quick and easy to obtain.
-
-The “infrastructure as code” concept is also gaining traction among
-some HPC infrastructure management platforms that are adopting proven
-tools and techniques from the cloud infrastructure ecosystem.
-
-Deploying HPC Infrastructure at Scale
-=====================================
-
-HPC infrastructure deployment is not the same as cloud deployment.
-A cloud assumes large numbers of users, each administering a small
-number of instances compared to the overall size of the system.  In a
-multi-tenant environment, each user may use different software images.
-Without coordination between the tenants, it would be very unlikely for
-more than a few instances to be deployed at any one time.  The software
-architecture of the cloud deployment process is designed around this
-assumption.
-
-Conversely, HPC infrastructure deployment has markedly different
-properties:
-
-* A single user (the cluster administrator).  HPC infrastructure is a
-  managed service, not user-administered.
-* A single software image.  All user applications will run in a single
-  common environment.
-* Large proportions of the HPC cluster will be deployed simultaneously.
-* Many HPC infrastructures use diskless compute nodes that network-boot
-  a common software image.
-
-In the terminology of the cloud world, a typical HPC infrastructure
-deployment might even be considered a “black swan event”.  Cloud
-deployment strategies do not exploit the simplifying assumptions that
-deployments are usually across many nodes using the same image and for
-the same user.  Consequently, OpenStack Ironic deployments tend to scale
-to the low thousands of compute nodes with current software releases
-and best-practice configurations.  Network booting a common image is a
-capability that only recently has become possible in OpenStack and has
-yet to become an established practice.
-
-Bare Metal Management Using OpenStack Ironic
-============================================
-
-Using Ironic, bare metal compute nodes are automatically provisioned at
-a user’s request.  Once the compute allocation is released, the bare
-metal hardware is automatically decommissioned ready for it’s next use.
-
-Ironic requires no presence on the compute node instances that it manages.
-The software-defined infrastructure configuration that would typically
-be applied in the hypervisor environment must instead be applied in
-the hardware objects that interface with the bare metal compute node:
-local disks, network ports, etc.
-
-Support for a Wide Range of Hardware
------------------------------------
-
-A wide range of hardware is supported, from full-featured BMCs on
-enterprise server equipment down to devices whose power can only be
-controlled through an SNMP-enabled data centre power strip.
-
-An inventory of compute nodes is registered with Ironic and stored in
-Ironic’s node database.  Ironic records configuration details and
-current hardware state, including:
-
-* Physical properties of the compute node, including CPU count, RAM size
-  and disk capacity.
-* The MAC address of the network interface used for provisioning instance
-  software images.
-* The hardware drivers used to control and interact with the compute node.
-* Details needed by those drivers to address this specific compute node
-  (for example, BMC IP address and login credentials).
-* The current power state and provisioning state of the compute node,
-  including whether it is in active service.
-
-Inventory Grooming through Hardware Inspection
----------------------------------------------
-
-A node is initially registered with a minimal set of identifying
-credentials - sufficient to power it on and boot a ramdisk.  Ironic
-generates a detailed hardware profile of every compute node through a
-process called Hardware Inspection.
-
-Hardware inspection uses this minimal bootstrap configuration provided
-during node registration.  During the inspection phase a custom ramdisk
-is booted which probes the hardware configuration and gathers data.
-The data is posted back to Ironic to update the node inventory.  Large
-amounts of additional hardware profile data are also made available for
-later analysis.
-
-The inspection process can optionally run benchmarks to identify
-performance anomalies across a group of nodes.  Anomalies in the hardware
-inspection dataset of a group of nodes can be analysed using a tool
-called Cardiff.  Performance anomalies, once identified, can often be
-traced to configuration anomalies.  This process helps to isolate and
-eliminate potential issues before a new system enters production.
-
-Bare Metal and Network Isolation
--------------------------------
-
-The ability for Ironic to support multi-tenant network isolation is a
-new capability, first released in OpenStack’s Newton release cycle.
-This capability requires some mapping of the network switch ports
-connected to each compute node.  The mapping of an Ironic network port to
-its link partner switch port is maintained with identifiers for switch
-and switch port.  These are stored as attributes in the Ironic network
-port object.  Currently the generation of the network mapping is not
-automated by Ironic.
-
-Multi-tenant networking is implemented through configuring state in the
-attached switch port.  The state could be the access port VLAN ID for
-a VLAN network, or VTEP state for a VXLAN network.  Currently only a
-subset of Neutron drivers are able to perform the physical switch port
-state manipulations needed by Ironic.  Switches with VXLAN VTEP support
-and controllable through the OVSDB protocol are likely to be supported.
-
-Ironic maintains two private networks of its own: Networks dedicated to
-node provisioning and cleaning networks are defined in Neutron as provider
-networks.  When a node is deployed, its network port is placed into the
-provisioning network.  Upon successful deployment the node is connected
-to the virtual tenant network for active service.  Finally, when the node
-is destroyed it is placed on the cleaning network.  Maintaining distinct
-networks for each role enhances security, and the logical separation of
-traffic enables different QoS attributes to be assigned for each network.
-
-Current Limitations of Ironic Multi-tenant Networking
-----------------------------------------------------
-
-In HPC hardware configurations, compute nodes are attached to multiple
-networks.  Separate networks dedicated to management and high-speed data
-communication are typical.
-
-Current versions of Ironic do not have adequate support for attaching
-nodes to multiple physical networks.  Multiple physical interfaces can
-be defined for a node, and a node can be attached to multiple Neutron
-networks.  However, it is not possible to attach specific physical
-interfaces to specific networks.
-
-Consequently, with current capabilities only a single network interface
-should be managed by Ironic.  Other physical networks would be managed
-outside of OpenStack’s purview, but will not benefit from OpenStack's
-multi-tenant network capabilities as a result.  Furthermore, Ironic only
-supports a single network per physical port: all network switch ports
-for Ironic nodes are access ports.  Trunk ports are not yet supported
-although this feature is in the development backlog.
-
-Remote Console Management
-------------------------
-
-Many server management products include support for remote consoles,
-both serial and video.  Ironic includes drivers for serial consoles,
-built upon support in the underlying hardware.
-
-Recently-developed capabilities within Ironic have seen bare metal
-consoles integrated with OpenStack Nova’s framework for managing
-virtual consoles.  Ironic’s node kernel boot parameters are extended
-with a serial console port, which is then redirected by the BMC to
-serial-over-LAN.  Server consoles can be presented in the Horizon web
-interface in the same manner as virtualised server consoles.
-
-Currently this capability is only supported for IPMI-based server
-management.
-
-Security and Integrity
----------------------
-
-When bare metal compute is sold as an openly-accessible service,
-privileged access is granted to a bare metal system.  There is substantial
-scope for a malicious user to embed malware payloads in the BIOS and
-device firmware of the system.
-
-Ironic counters this threat in several ways:
-
-* **Node Cleaning**: Ironic’s node state machine includes states where
-  hardware state is reset and consistency checks can be run to detect
-  attempted malware injection.  Ironic’s default hardware manager does
-  not support these hardware-specific checks.  However, custom hardware
-  drivers can be developed to include BIOS configuration settings and
-  firmware integrity tests.
-
-* **Network Isolation**: Through using separate networks for node provisioning,
-  active tenant service and node cleaning, the opportunities for a
-  compromised system to probe and infect other systems across the network
-  are greatly reduced.
-
-* **Trusted Boot**: use of a Trusted Platform Module (TPM) and chain of trust
-  built upon it is necessary.  These processes are used to secure public
-  cloud deployments of Ironic-administered bare metal compute today.
-
-None of these capabilities is enabled by default.  Hardening Ironic’s
-security model requires expertise and some amount of effort.
-
-Provisioning at Scale
---------------------
-
-The cloud model use case makes different assumptions to HPC.  A cloud
-is expected to support a large number of individual users.  At any
-time, each user is assumed to make comparatively small changes to their
-compute resource usage.  The HPC infrastructure use case is dramatically
-different.  HPC infrastructure typically runs a single software image
-across the entire compute partition, and is likely to be deployed jointly
-in one operation.
-
-Ironic’s current deployment models do not scale as well as the models
-used by conventional HPC infrastructure management platforms.  xCAT uses
-a hierarchy of subordinate service nodes to fan out an iSCSI-based
-image deployment.  Rocks cluster toolkit uses BitTorrent to distribute
-RPM packages to all nodes.  In the Rocks model, each deployment target
-is a torrent peer.  The capacity of the deployment infrastructure grows
-alongside the number of targets being deployed.
-
-However, the technologies for content distribution and caching that are
-widely adopted by the cloud can be incorporated to address this issue.
-Caching proxy servers can be used to speed up deployment at scale.
-
-With appropriate configuration choices, Ironic can scale to handle
-deployment to multiple thousands of servers.
-
-.. figure:: images/hpc_infrastructure-ironic.png
-   :width: 600
-   :alt: Ironic node deployment flow diagram
-
-   *An overview of Ironic’s node deployment process when using the Ironic
-   Python Agent ramdisk and Swift URLs for image retrieval.  This strategy
-   demonstrates good scalability, but the deploy disk image cannot be bigger
-   than the RAM available on the node.*
-
-Building Upon Ironic to Convert Infrastructure into HPC Platforms
-=================================================================
-
-The strengths of cloud infrastructure tooling become apparent once Ironic
-has completed deployment.  From this point a set of unconfigured compute
-nodes must converge into the HPC compute platform required to meet the
-users’ needs.  A rich ecosystem of flexible tools is available to
-perform this purpose.
-
-See the section 
-`OpenStack and HPC Workload Management <openstack-and-hpc-workloads.html>`_
-for further details of some of the available approaches.
-
-Chameleon: An Experimental Testbed for Computer Science
-------------------------------------------------------
-
-.. figure:: images/hpc_infrastructure-chameleon_logo.jpg
-   :width: 400
-   :alt: Chameleon logo
-
-Chameleon is an infrastructure project implementing an experimental
-testbed for Computer Science led by University of Chicago, with Texas
-Advance Computing Center (TACC), University of Texas at San Antonio
-(UTSA), Northwestern University and Ohio State University as partners.
-The Chameleon project is funded by the National Science Foundation.
-
-The current system comprises ~600 nodes split between sites at TACC in
-Austin and University of Chicago.  The sites are interconnected with a
-100G network.  The compute nodes are divided into twelve racks, referred
-to as “standard cloud units”, comprising 42 compute nodes, 4 storage
-nodes with 16 2 TB hard drives each, and 10G Ethernet connecting all nodes
-with an SDN-enabled top-of-rack switch.  Each SCU has 40G Ethernet uplinks
-into the Chameleon core network fabric. On this, largely homogenous
-framework were grafted heterogenous elements allowing for different
-types of experimentation. One SCU has Mellanox ConnectX-3 Infiniband.
-Two computer nodes were set up as storage hierarchy nodes with 512 GB
-of memory, two Intel P3700 NVMe of 2.0 TB each, four Intel S3610 SSDs of
-1.6 TB each, and four 15K SAS HDDs of 600 GB each. Two additional nodes
-are equipped with  NVIDIA Tesla K80 accelerators and two with NVIDIA
-Tesla M40 accelerators.
-
-In the near term additional, heterogeneous cloud units for experimentation
-with alternate processors and networks will be incorporated, including
-FPGAs, Intel Atom microservers and ARM microservers. Compute nodes with
-GPU accelerators have already been added to Chameleon.
-
-Chameleon’s public launch was at the end of July 2015; since then
-it has supported over 200 research projects into computer science and
-cloud computing.
-
-The system is designed to be deeply reconfigurable and adaptive, to
-produce a wide range of flexible configurations for computer science
-research.  Chameleon uses the OpenStack Blazar project to manage advance
-reservation of compute resources for research projects.
-
-Chameleon deploys OpenStack packages from RDO, orchestrated using
-OpenStack Puppet modules.  Chameleon’s management services currently
-run CentOS 7 and OpenStack Liberty.  Through Ironic a large proportion
-of the compute nodes are provided to researchers as bare metal (a
-few SCUs are dedicated to virtualised compute instances using KVM).
-Chameleon’s Ironic configuration uses the popular driver pairing of
-PXE-driven iSCSI deployment and IPMItool power management.
-
-Ironic’s capabilities have expanded dramatically in the year since
-Chameleon first went into production, and many of the new capabilities
-will be integrated into this project.
-
-The Chameleon project’s wish list for Ironic capabilities includes:
-
-* Ironic-Cinder integration, orchestrating the attachment of network block
-  devices to bare metal instances.  This capability has been under active
-  development in Ironic and at the time of writing it is nearing completion.
-* Network isolation, placing different research projects onto different
-  VLANs to minimise their interference with one another.  Chameleon hosts
-  projects researching radically different forms of networking, which must
-  be segregated.
-* Bare metal consoles, enabling researchers to interact with their allocated
-  compute nodes at the bare metal level.
-* BIOS parameter management, enabling researchers to (safely) change
-  BIOS parameters, and then to restore default parameters at the end of
-  an experiment.
-
-Pierre Riteau, DevOps lead for the Chameleon project, sees Chameleon as
-an exciting use case for Ironic, which is currently developing many of
-these features:
-
-  “With the Ironic project, OpenStack provides a modern bare-metal
-  provisioning system benefiting from an active upstream community, with
-  each new release bringing additional capabilities.  Leveraging Ironic
-  and the rest of the OpenStack ecosystem, we were able to launch Chameleon
-  in a very short time.”
-
-  “However, the Ironic software is still maturing, and can lack in
-  features or scalability compared to some other bare-metal provisioning
-  software, especially in an architecture without a scalable Swift
-  installation.”
-
-  “Based on our experience, we recommend getting familiar with the
-  other core OpenStack projects when deploying Ironic.  Although Ironic
-  can be run as standalone using Bifrost, when deployed as part of an
-  OpenStack it interacts closely with Nova, Neutron, Glance, and Swift.
-  And as with all bare-metal provisioning systems, it is crucial to
-  have serial console access to compute nodes in order to troubleshoot
-  deployment failures, which can be caused by all sorts of hardware issues
-  and software misconfigurations.”
-
-  “We see the future of OpenStack in this area as providing a fully
-  featured system capable of efficiently managing data centre resources,
-  from provisioning operating systems to rolling out firmware upgrades
-  and identifying performance anomalies.”
-
-BRIDGES: A Next-Generation HPC Resource for Data Analytics
----------------------------------------------------------
-
-Bridges is a supercomputer at the Pittsburgh Supercomputer Center funded
-by the National Science Foundation.  It is designed as a uniquely flexible
-HPC resource, intended to support both traditional and non-traditional
-workflows.  The name implies the system’s aim to “bridge the research
-community with HPC and Big Data.”
-
-Bridges supports a diverse range of use cases, including graph analytics,
-machine learning and genomics. As a flexible resource, Bridges supports
-traditional SLURM-based batch workloads, Docker containers and interactive
-web-based workflows.
-
-Bridges has 800 compute nodes, 48 of which have dual-GPU accelerators
-from Nvidia.  There are also 46 high-memory nodes, including 4 with
-12TB of RAM each.  The entire system is interconnected with an Omnipath
-high-performance 100G network fabric.
-
-Bridges is deployed using community-supported free software.  The
-OpenStack control plane is CentOS 7 and Red Hat RDO (a freely available
-packaging of OpenStack for Red Hat systems).  OpenStack deployment
-configuration is based on the PackStack project.  Bridges was deployed
-using OpenStack Liberty and is scheduled to be upgraded to OpenStack
-Mitaka in the near future.
-
-Most of the nodes are deployed in a bare metal configuration using Ironic.
-Puppet is used to select the software role of a compute node at boot
-time, avoiding the need to re-image.  For example, a configuration for
-MPI, Hadoop or virtualisation could be selected according to workload
-requirements.
-
-OmniPath networking is delivered using the OFED driver stack.  Compute
-nodes use IP over OPA for general connectivity.  HPC apps use RDMA verbs
-to take full advantage of OmniPath’s capabilities.
-
-.. figure:: images/hpc_infrastructure-bridges.png
-   :width: 600
-   :alt: PSC BRIDGES network architecture
-
-   *Visualisation of the Bridges OmniPath network topology.  800 General
-   purpose compute nodes and GPU nodes are arrayed along the bottom of the
-   topology.  Special purpose compute nodes, storage and control plane nodes
-   are arrayed across the top of the topology.  42 compute nodes connect to
-   each OmniPath ToR switch (in yellow), creating a “compute island”,
-   with 7:1 oversubscription into the upper stages of the network.*
-
-Bridges Exposes Issues at Scale
-------------------------------
-
-The Bridges system is a very large deployment for Ironic.  While there are
-no exact numbers, Ironic has been quoted to scale to thousands of nodes.
-
-Coherency issues between Nova Scheduler and Ironic could arise if
-too many nodes were deployed simultaneously.  Introducing delays
-during the scripting of the "nova boot" commands kept things in check.
-Node deployments would be held to five ‘building’ instances with
-subsequent instances staggered by 25 seconds, resulting in automated
-deployment of the entire machine taking 1-2 days.
-
-Within Ironic the periodic polling of driver power states is serialised.
-BMCs can be very slow to respond, and this can lead to the time taken
-to poll all power states in series to grow quite large.  On Bridges,
-the polling takes approximately 8 minutes to complete.  This can also
-lead to apparent inconsistencies of state between Nova and Ironic, and
-the admin team work around this issue by enforcing “settling time”
-between deleting a node and reprovisioning it.
-
-Benefiting from OpenStack and Contributing Back
-----------------------------------------------
-
-The team at PSC have found benefits from using OpenStack for HPC system
-management:
-
-* The ability to manage system image creation using OpenStack tools such
-  as diskimage-builder.
-* Ironic’s automation of the management of PXE node booting.
-* The prescriptive repeatable deployment process developed by the team
-  using Ironic and Puppet.
-
-Robert Budden, senior cluster systems developer at PSC, has many future
-plans for OpenStack and Bridges:
-
-* Using other OpenStack services such as Magnum (Containerised workloads),
-  Sahara (Hadoop on the fly) and Trove (database as a service).
-  Developing Ironic support for network boot over OmniPath.
-* Diskless boot of extremely large memory nodes using Ironic’s Cinder
-  integration.
-* Deployment of a containerised OpenStack control plane using Kolla.
-* Increased convergence between bare metal and virtualised OpenStack
-  deployments.
-
-Robert adds:
-
-  “One of the great things is that as OpenStack improves, Bridges can
-  improve. As these new projects come online, we can incorporate those
-  features and the Bridges architecture can grow with the community."
-
-  “A big thing for me is to contribute back.  I’m a developer by nature,
-  I want to fix some of the bugs and scaling issues that I’ve seen and
-  push these back to the OpenStack community.”
-
-A $200 Million OpenStack-Powered Supercomputer
----------------------------------------------
-
-In 2014 and 2015 the US Department of Energy announced three new
-giant supercomputers, totalling $525 million, to be procured under the
-CORAL (Collaboration of Oak Ridge, Argonne and Livermore) initiative.
-Argonne National Laboratory’s $200 million system, Aurora, features a
-peak performance of 180 PFLOPs delivered by over 50,000 compute nodes.
-Aurora is expected to be 18 times more powerful than Argonne’s current
-flagship supercomputer (Mira).
-
-Aurora is to be deployed in 2018 by Intel, in partnership with Cray.
-Aurora exemplifies the full capabilities of Intel’s Scalable Systems
-Framework initiative.  Whilst Intel are providing the processors,
-memory technology and fabric interconnect, Cray’s long experience
-and technical expertise in system integration are also fundamental to
-Aurora’s successful delivery.
-
-.. figure:: images/hpc_infrastructure-aurora.jpg
-   :width: 600
-   :alt: Aurora floorplan render
-
-Cray’s Vision of the OpenStack-Powered Supercomputer
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Cray today sells a wide range of products for supercomputing, storage
-and high-performance data analytics.  Aside from the company’s core
-offering of supercomputer systems, much of Cray’s product line has come
-through acquisition.  As a result of this historical path the system
-management of each product is different, has different capabilities,
-and different limitations.
-
-The system management software that powers Cray’s supercomputers has
-developed through long experience to become highly scalable and efficient.
-The software stack is bespoke and specialised to delivering this single
-capability.  In some ways, it’s inflexible excellence represents the
-antithesis of OpenStack and software-defined cloud infrastructure.
-
-Faced with these challenges, and with customer demands for open management
-interfaces, in 2013 Cray initiated a development programme for a unified
-and open solution for system management across the product range.
-Cray’s architects quickly settled on OpenStack.  OpenStack relieves
-the Cray engineering team of the generic aspects of system management
-and frees them up to focus on problems specific to the demanding nature
-of the products.
-
-Successful OpenStack development strategies strongly favour an open
-approach.  Cray teams have worked with OpenStack developer communities to
-bring forward the capabilities required for effective HPC infrastructure
-management, for example:
-
-* **Enhanced Ironic deployment**, using the Bareon ramdisk derived from the Fuel
-  deployment project.  Cray management servers require complex deployment
-  configurations featuring multiple partitions and system images.
-* **Diskless Ironic deployment**, through active participation in the
-  development of Cinder and Ironic integration.
-* **Ironic multi-tenant networking**, through submission of bug fixes and
-  demonstration use cases.
-* **Containerised OpenStack deployment**, through participation in the OpenStack
-  Kolla project.
-* **Scalable monitoring infrastructure**, through participation in the Monasca
-  project.
-
-Fundamental challenges still remain for Cray to deliver
-OpenStack-orchestrated system management for supercomputer systems on
-the scale of Aurora.  Kitrick Sheets, senior principal engineer at Cray
-and architect of Cray’s OpenStack strategy, comments:
-
-  “Cray has spent many years developing infrastructure management
-  capabilities for high performance computing environments.  The emergence
-  of cloud computing and OpenStack has provided a foundation for common
-  infrastructure management APIs.  The abstractions provided within the
-  framework of OpenStack provide the ability to support familiar outward
-  interfaces for users who are accustomed to emerging elastic computing
-  environments while supporting the ability to provide features and
-  functions required for the support of HPC-class workloads.  Normalizing
-  the user and administrator interfaces also has the advantage of increasing
-  software portability, thereby increasing the pace of innovation.”
-
-  “While OpenStack presents many advantages for the management of HPC
-  environments, there are many opportunities for improvement to support the
-  high performance, large scale use cases.  Areas such as bulk deployment
-  of large collections of nodes, low-overhead state management, scalable
-  telemetry, etc. are a few of these.  Cray will continue to work with
-  the community on these and other areas directly related to support of
-  current and emerging HPC hardware and software ecosystems.”
-
-  “We believe that additional focus on performance and scale which
-  drive toward the support of the highest-end systems will pay dividends
-  on systems of all sizes.  In addition, as system sizes increase,
-  the incidents of hardware and software component failures become more
-  frequent, requiring increased resilience of services to support continual
-  operation.  The community's efforts toward live service updates is one
-  area that will move us much further down that path.”
-
-  “OpenStack provides significant opportunities for providing core
-  management capabilities for diverse hardware and software ecosystems.
-  We look forward to continuing our work with the community to enhance
-  and extend OpenStack to address the unique challenges presented by high
-  performance computing environments.”
-
-Most of the Benefits of Software-Defined Infrastructure...
-==========================================================
-
-In the space of HPC infrastructure management, OpenStack’s attraction
-is centred on the prospect of having all the benefits of software-defined
-infrastructure while paying none of the performance overhead.
-
-To date there is no single solution that can provide this.  However,
-a compromising trade-off can be struck in various ways:
-
-* Fully-virtualised infrastructure provides all capabilities of cloud with
-  much of the performance overhead of cloud.
-* Virtualised infrastructure using techniques such as SR-IOV and PCI
-  pass-through dramatically improves performance for network and IO
-  intensive workloads, but imposes some constraints on the flexibility of
-  software-defined infrastructure.
-* Bare metal infrastructure management using Ironic incurs no performance
-  overhead, but has further restrictions on flexibility.
-
-Each of these strategies is continually improving.  Fully-virtualised
-infrastructure using OpenStack private cloud provides control over
-performance-sensitive parameters like resource over-commitment and
-hypervisor tuning.  It is anticipated that infrastructure using hardware
-device pass-through optimisations will soon be capable of supporting cloud
-capabilities like live migration.  Ironic’s bare metal infrastructure
-management is continually developing new ways of presenting physical
-compute resources as though they were virtual.
-
-OpenStack has already arrived in the HPC infrastructure management
-ecosystem.  Projects using Ironic for HPC infrastructure management
-have already demonstrated success.  As it matures, its proposition
-of software-defined infrastructure without the overhead will become
-increasingly compelling.
-
-A Rapidly Developing Project
----------------------------
-
-While it is rapidly becoming popular, Ironic is a relatively young
-project within OpenStack.  Some areas are still being actively developed.
-For sites seeking to deploy Ironic-administered compute hardware, some
-limitations remain.  However, Ironic has a rapid pace of progress,
-and new capabilities are released with every OpenStack release cycle.
-
-HPC infrastructure management using OpenStack Ironic has been demonstrated
-at over 800 nodes, while Ironic is claimed to scale to managing thousands
-of nodes.  However, new problems become apparent at scale.  Currently,
-large deployments using Ironic should plan for an investment in the
-skill set of the administration team and active participation within
-the Ironic developer community.
-
-Further Reading
-===============
-
-A clear and helpful introduction into the workings of Ironic in greater
-detail: http://docs.openstack.org/developer/ironic/deploy/user-guide.html
-
-Deployment of Ironic as a standalone tool:
-http://docs.openstack.org/developer/bifrost/readme.html
-
-Kate Keahey from University of Chicago presented an architecture
-show-and-tell on Chameleon at the OpenStack Austin summit in April 2016:
-https://www.openstack.org/videos/video/chameleon-an-experimental-testbed-for-computer-science-as-application-of-cloud-computing-1
-
-Chameleon Cloud’s home page is at: https://www.chameleoncloud.org
-
-Robert Budden presented an architecture show-and-tell
-on Bridges at the OpenStack Austin summit in April 2016:
-https://www.openstack.org/videos/video/deploying-openstack-for-the-national-science-foundations-newest-supercomputers
-
-Further information on Bridges is available at its home page at PSC:
-http://www.psc.edu/index.php/bridges
-
-Argonne National Lab’s home page for Aurora: http://aurora.alcf.anl.gov
-
-A presentation from Intel giving an overview of Aurora:
-http://www.intel.com/content/dam/www/public/us/en/documents/presentation/intel-argonne-aurora-announcement-presentation.pdf
-
-Intel’s Scalable System Framework: http://www.intel.co.uk/content/www/uk/en/high-performance-computing/product-solutions.html
-
-Acknowledgements
-================
-
-This document was originally written by Stig Telfer of `StackHPC Ltd <https://www.stackhpc.com>`_ with the support
-of Cambridge University, with contributions, guidance and feedback from
-subject matter experts:
-
-* **Pierre Riteau**, University of Chicago and Chameleon DevOps lead.
-* **Kate Keahey**, University of Chicago and Chameleon Science Director.
-* **Robert Budden**, Senior Cluster Systems Developer, Pittsburgh Supercomputer Center.
-* **Kitrick Sheets**, Senior Principal Engineer, Cray Inc.
-
-.. figure:: images/cc-by-sa.png
-   :width: 100
-   :alt: Creative commons licensing
-
-   This document is provided as open source with a Creative Commons license
-   with Attribution + Share-Alike (CC-BY-SA)
--- a/doc/source/openstack-and-hpc-network.rst
+++ b/doc/source/openstack-and-hpc-network.rst
@ -1,458 +0,0 @@
-OpenStack and HPC Network Fabrics
-#################################
-
-HPC and cloud infrastructure are not built to the same requirements.
-As much as anything else, networking exemplifies the divergent criteria
-between HPC applications and the typical workloads served by cloud
-infrastructure.
-
-With sweeping generalisations, one typically assumes an HPC
-parallel workload is tightly-coupled, and a cloud-native workload is
-loosely-coupled.  A typical HPC parallel workload might be computational
-fluid dynamics using a partitioned geometric grid.  The application
-code is likely to be structured in a bulk synchronous parallel model,
-comprising phases of compute and data exchange between neighbouring
-workers.  Progress is made in lock-step, and is blocked until all workers
-complete each phase.
-
-Compare this with typical cloud-native application, which might be
-a microservice architecture consisting of a number of communicating
-sequential processes.  The overall application structure is completely
-different, and workers do not have the same degree of dependency upon
-other workers in order to make progress.
-
-The different requirements of HPC and cloud-native applications have led
-to different architectural choices being made at every level in order to
-deliver optimal and cost-effective solutions for each target application.
-
-A cloud environment experiences workload diversity to a far greater extent
-than seen in HPC.  This has become the quintessential driving force of
-software-defined infrastructure.  Cloud environments are designed to
-be flexible and adaptable.  As a result, cloud infrastructure has the
-flexibility to accommodate HPC requirements.
-
-The flexibility of cloud infrastructure is delivered through layers
-of abstraction.  OpenStack’s focus is on defining the intent of
-the multi-tenant cloud infrastructure.  Dedicated network management
-applications decide on the implementation.  As an orchestrator, OpenStack
-delegates knowledge of physical network connectivity to the network
-management platforms to which it connects.
-
-OpenStack’s surging momentum has ensured that support already exists
-for all but the most exotic of HPC network architectures.  This article
-will describe several solutions for delivering HPC networking in an
-OpenStack cloud.
-
-Using SR-IOV for Virtualised HPC Networking
-===========================================
-
-SR-IOV is a technology that demonstrates how software-defined
-infrastructure can introduce new flexibility in the management of HPC
-resources, whilst retaining the high-performance benefits.  With current
-generation devices, there is a slight increase in I/O latency when using
-SR-IOV virtual functions.  However, this overhead is negligible for all
-but the most latency-sensitive of applications.
-
-The Process Flow for Using SR-IOV
---------------------------------
-
-The Nova compute hypervisor is configured at boot time with kernel flags
-to support extensions for SR-IOV hardware management.
-
-The network kernel device driver is configured to create virtual
-functions.  These are present alongside the physical function.  When they
-are not assigned to a guest workload instance, the virtual functions
-are visible in the device tree of the hypervisor.
-
-The OpenStack services are configured with identifiers or addresses
-of devices configured to support SR-IOV.  This is most easily done by
-identifying the physical function (for example using its network device
-name, PCI bus address, or PCI vendor/device IDs).  All virtual functions
-associated with this device will be made available for virtualised
-compute instances.  The configuration that identifies SR-IOV devices is
-known as the whitelist.
-
-To use an SR-IOV virtual function for networking in an instance,
-a special direct-bound network port is created and connected with
-the VM.  This causes one of the virtual functions to be configured and
-passed-through from the hypervisor into the VM.
-
-Support for launching an instance using SR-IOV network interfaces from
-OpenStack’s Horizon web interface was introduced in the OpenStack
-Mitaka release (April 2016).  Prior to this, it was only possible to
-launch instances using SR-IOV ports through a sequence of command-line
-invocations (or through direct interaction with the OpenStack APIs).
-
-The Limitations of Using SR-IOV in Cloud Infrastructure
-------------------------------------------------------
-
-SR-IOV places some limitations on the cloud computing model that can be
-detrimental to the overall flexibility of the infrastructure:
-
-* Current SR-IOV hardware implementations support flat (unsegregated)
-  and VLAN network separation but not VXLAN for tenant networks.  This
-  limitation can constrain the configuration options for the network fabric.
-  Layer-3 IP-based fabrics using technologies such as ECMP are unlikely
-  to interoperate with VLAN-based network separation.
-
-* Live migration of VMs connected using SR-IOV is not possible with current
-  hardware and software.  The capability is being actively developed for
-  Mellanox SR-IOV NICs.  It is not confirmed whether live migration of
-  RDMA applications will be possible.
-
-* SR-IOV devices bypass OpenStack’s security groups, and consequently
-  should only be used for networks that are not externally connected.
-
-Virtualisation-aware MPI for Tightly-Coupled Cloud Workloads
-============================================================
-
-The MVAPICH2 library implements MPI-3 (based on MPI 3.1 standard)
-using the IB verbs low-level message passing primitives.  MVAPICH2 was
-created and developed by the `Network-Based Computing Laboratory
-<http://nowlab.cse.ohio-state.edu/>`_ (NOWLAB) at The Ohio State
-University, and has been freely available for download for 15 years.
-Over that time MVAPICH2 has been continuously developed and now runs on
-systems as big as 500,000 cores.
-
-An Infiniband NIC with SR-IOV capability was first developed by Mellanox
-in the ConnectX-3 generation of its product, unlocking the possibility
-of achieving near-native Infiniband performance in a virtualised
-environment.  MVAPICH2-Virt was introduced in 2015 to develop HPC levels
-of performance for cloud infrastructure.  The techniques adopted by
-MVAPICH2-Virt currently support KVM and Docker based cloud environments.
-MVAPICH2-Virt introduces Inter-VM Shared Memory (IVSHMEM) support to KVM
-hypervisors, increasing performance between co-resident VMs.  In order
-to run MVAPICH2-Virt based applications on top of OpenStack-based cloud
-environments easily, several extensions to set up SR-IOV and IVSHMEM
-devices in VMs have been developed for OpenStack’s Nova compute manager.
-
-MVAPICH2-Virt has two principal optimisation strategies for KVM-based
-cloud environments:
-
-* Dynamic locality awareness for MPI communication among co-resident
-  VMs.  A new communication channel, IVSHMEM, introduces a memory-space
-  communication mechanism between different VMs co-resident on the same
-  hypervisor.  Inter-node communication continues to use the SR-IOV
-  virtual function.
-* Tuning of MPI performance for both SR-IOV and IVSHMEM channels.
-
-.. figure:: images/hpc_fabrics-ivshmem.png
-   :width: 400
-   :alt: IV-SHMEM device for intra-hypervisor communication
-
-Similarly, MVAPICH2-Virt has two principal optimisation strategies for
-Docker based cloud environments:
-
-* Dynamic locality awareness for MPI communication among co-resident
-  containers. All Intra-Node MPI communication can go through either IPC-SHM
-  enabled channel or CMA channel, no matter they are in the same container
-  or different ones. Inter-Node-Inter-Container MPI communication will
-  leverage the InfiniBand HCA channel.
-* Tuning of MPI performance for all different channels, including IPC-SHM,
-  CMA, and InfiniBand HCA.
-
-With these strategies in effect, the performance overhead of KVM and
-Docker based virtualisation on standard MPI benchmarks and applications
-are less than 10%.
-
-  "The novel designs introduced in MVAPICH2-Virt take advantage of the
-  latest advances in virtualisation technologies and promise to design
-  next-generation HPC cloud environments with good performance", says
-  Prof. DK Panda and Dr. Xiaoyi Lu of NOWLAB.
-
-  OpenStack as an environment for supporting MPI based HPC workloads  has
-  many  benefits such as fast VM or container deployment for setting up
-  MPI job execution environments, security, enabling resource sharing,
-  providing privileged access in virtualized environments, supporting
-  high-performance networking technologies (e.g. SR-IOV), etc
-
-  Currently, OpenStack still could not fully support or work seamlessly
-  with technologies in HPC environments, such as IVSHMEM, SLURM, PBS,
-  etc. But with several extensions proposed by NOWLAB researchers, running
-  MPI based HPC workloads on top of OpenStack managed environments seems
-  a promising approach for building efficient clouds.
-
-  The future direction of MVAPICH2-Virt includes
-
-  * Further support different kinds of virtualised environments
-  * Further improve MPI application performance on cloud environments
-    through novel designs
-  * Support live migration of MPI applications in SR-IOV and IVSHMEM enabled VMs
-
-Infiniband and other Non-Ethernet Fabrics
-=========================================
-
-Infiniband is the dominant fabric interconnect for HPC clusters.  Of the
-TOP500 list published in June 2016, 41% of entries use Infiniband.
-
-In part, OpenStack’s flexibility comes from avoiding many rigid
-assumptions in infrastructure management.  However, OpenStack networking
-does have some expectations of an Ethernet and IP-centric network
-architecture, which can present challenges for the network architectures
-often used in HPC.  The Neutron driver for Infiniband circumvents this
-assumption by applying Neutron’s layer-3 network configuration to an
-IP-over-IB interface, and mapping Neutron’s layer-2 network segmentation
-ID to Infiniband pkeys.
-
-However, Neutron is limited to an allocation of 126 pkeys, which imposes
-a restrictive upper limit on the number of distinct tenant networks an
-OpenStack Infiniband cloud can support.
-
-A technical lead with experience of using OpenStack on Infiniband reports
-mixed experiences from an evaluation performed in 2015.  The overall
-result led him to conclude that HPC fabrics such as Infiniband are
-only worthwhile in an OpenStack environment if one is also using RDMA
-communication protocols in the client workload:
-
-  "MPI jobs were never a targeted application for our system.  Rather, the
-  goal for our OpenStack was to accommodate all the scientific audiences
-  for whom big HPC clusters, and batch job schedulers, weren't a best fit.
-  So, no hard, fast requirement for a low-latency medium.  What we realized
-  was that it's complex.  It may be hard to keep it running in production.
-  IPoIB on FDR, in unconnected mode, is slower than 10Gbps Ethernet,
-  and if you're not making use of RDMA, then you're really just kind of
-  hurting yourself.  Getting data in and out is tricky.  All the big data
-  we have is on a physically separate IB fabric, and no one wanted to span
-  those fabrics, and doing something involving IP routing would break down
-  the usefulness of RDMA."
-
-IP-over-IB performance and scalability has improved substantially in
-subsequent hardware and software releases.  A modern Infiniband host
-channel adaptor with a current driver stack operating in connected mode
-can sustain 35-40 Gbits/sec in a single TCP stream on FDR Infiniband.
-
-The Canadian HPC4Health consortium have deployed a federation of OpenStack
-private clouds using a Mellanox FDR Infiniband network fabric.
-
-Intel’s Omnipath network architecture is starting to emerge in the
-Scientific OpenStack community.  At Pittsburgh Supercomputer Center,
-the BRIDGES system entered production in early 2016 for HPC and data
-analytics workloads.  It comprises over 800 compute nodes with an
-Omnipath fabric interconnect.  In its current product generation,
-Omnipath does not support SR-IOV.  BRIDGES is a bare metal system,
-managed using OpenStack Ironic.  The Omnipath network management is
-managed independently of OpenStack.
-
-BRIDGES is described in greater detail in the section OpenStack and HPC
-Infrastructure Management.
-
-An RDMA-Centric Bioinformatics Cloud at Cambridge University
-============================================================
-
-Cambridge University’s Research Computing Services group has a long
-track record as a user of RDMA technologies such as Infiniband and
-Lustre across all its HPC infrastructure platforms.  When scoping a new
-bioinformatics compute resource in 2015, the desire to combine this
-proven HPC technology with a flexible self-service platform led to a
-requirements specification for an RDMA-centric OpenStack cloud.
-
-Bioinformatics workloads can be IO-intensive in nature, and can also
-feature IO access patterns that are highly sensitive to IO latency.
-Whilst this class of workload is typically a weakness of virtualised
-infrastructure, the effects are mitigated through use of HPC technologies
-such as RDMA and virtualisation technologies such as SR-IOV to maximise
-efficiency and minimise overhead.
-
-The added complexity of introducing HPC networking technologies is
-considerable, but remains hidden from the bioinformatics users of
-the system.  Block-based IO via RDMA is delivered to the kernel of the
-KVM hypervisor.  The compute instances simply see a paravirtualised
-block device.  File-based IO via RDMA is delivered using the Lustre
-filesystem client drivers running in the VM instances.  Through use
-of SR-IOV virtual functions, this is identical to a bare metal compute
-node in a conventional HPC configuration.  Similarly, MPI communication
-is performed on the virtualised network interfaces with no discernable
-difference for the user of the compute instance.
-
-.. figure:: images/hpc_fabrics-node_stack.png
-   :width: 400
-   :alt: Node software stack
-
-   *Software architecture of the compute node of an RDMA-centric 
-   OpenStack cloud.*
-
-The cloud contains 80 compute nodes, 3 management nodes and a number
-of storage nodes of various kinds.  The system runs Red Hat OpenStack
-Platform (OSP) and is deployed using Red Hat’s TripleO-based process.
-All the HPC-centric features of the system have been implemented
-using custom configuration and extensions to TripleO.  Post-deployment
-configuration management is performed using Ansible-OpenStack playbooks,
-resulting in a devops approach for managing an HPC system.
-
-To deploy a system with RDMA networking enabled in the compute node
-hypervisor, overcloud management QCOW2 images are created with OpenFabrics
-installed.  Cinder is configured to use iSER (iSCSI Extensions for RDMA)
-as a transport protocol.
-
-The cloud uses a combination of Mellanox 50G Ethernet NICs and 100G
-Ethernet switches for its HPC network fabric.  RDMA support using RoCEv1
-requires layer-2 network connectivity.  Consequently OpenStack’s
-networking is configured to use VLANs for control plane traffic and HPC
-tenant networking.  VXLAN is used for other classes of tenant networking.
-
-A multi-path layer-2 network fabric is created using multi-chassis LAGs.
-Traffic is distributed across multiple physical links whilst presenting a
-single logical link for the Ethernet network topology.  Port memberships
-of the tenant network VLANs are managed dynamically using the NEO
-network management platform from Mellanox, which integrates with
-OpenStack Neutron.
-
-.. figure:: images/hpc_fabrics-cambridge.png
-   :width: 600
-   :alt: Cambridge system overview
-
-The Forces Driving HPC and Cloud Diverge in Network Management
-==============================================================
-
-At the pinnacle of HPC, ultimate performance is achieved through
-exploiting full knowledge of all hardware details: the microarchitecture
-of a processor, the I/O subsystem of a server - or the physical location
-within a network.  HPC network management delivers performance by enabling
-workload placement with awareness of the network topology.
-
-The cloud model succeeds because of its abstraction.  Cloud infrastructure
-commits to delivering a virtualised flat network to its instances.
-All details of the underlying physical topology are obscured.  Where an
-HPC network management system can struggle to handle changes in physical
-network topology, cloud infrastructure adapts.
-
-OpenStack provides a limited solution to locality-aware placement, through
-use of availability zones (AZ).  By defining an AZ per top-of-rack
-switch, a user can request instances be scheduled to be co-resident
-on the same edge switch.  However, this can be a clumsy interface for
-launching instances on a large private cloud, and AZs cannot be nested
-to provide multiple levels of locality for co-locating larger workloads.
-
-OpenStack depends on other network management platforms for physical
-network knowledge, and delegates to them all aspects of physical
-network management.  Network management and monitoring packages such
-as Observium and Mellanox NEO are complementary to the functionality
-provided by OpenStack.
-
-Another key theme in HPC network management is in gathering
-network-centric performance telemetry.
-
-While HPC does not deliver on all of its promise in this area, there is
-greater focus within HPC network management on the ability to collect
-telemetry data on the performance of a network for optimising the
-workload.
-
-Cloud and HPC take very different approaches in this sector.
-
-In general, HPC performance monitoring is done at the application level.
-HPC application performance analysis typically follows a model in which
-runtime trace data is gathered during execution for later aggregation
-and visualisation.  This approach avoids overhead when monitoring is
-not required and minimises the overhead when monitoring is active.
-When application monitoring is active, leading packages such as OVIS
-minimise overhead by using RDMA for aggregation of runtime telemetry data.
-Application performance visualisation is performed using tools such
-as VAMPIR.  All these HPC-derived application performance monitoring
-tools will also work for applications running within an OpenStack/HPC
-environment.
-
-At a system level, HPC network performance analysis is more limited in
-scope, but developments such as PAVE at Lawrence Livermore and more
-recently INAM^2 from Ohio State University are able to demonstrate
-a more holistic capability to identify adverse interactions between
-applications sharing a network, in addition to performance bottlenecks
-within an application itself.
-
-The pace of development of cloud infrastructure monitoring is
-faster, and in many cases is derived from open-source equivalents of
-hyperscaler-developed capabilities.  Twitter’s Zipkin is a distributed
-application performance monitoring framework derived from conceptual
-details from Google’s Dapper.  LinkedIn developed and published
-Kafka, a distributed near-real-time message log.  However, the layers of
-abstraction that give cloud its flexibility can prevent cloud monitoring
-from providing performance insights from the physical domain that inform
-performance in the virtual domain.
-
-At OpenStack Paris in November 2014 Intel demonstrated Apex Lake,
-a project which aims to provide performance telemetry across these
-abstraction boundaries - including across virtual/physical network
-abstractions.  Some of these features may have been incorporated into
-the Intel’s open source Snap telemetry/monitoring framework.
-
-In its present situation, through use of SR-IOV network devices,
-cloud network infrastructure has demonstrated that it is capable at
-achieving performance levels that are typically within 1-9% of bare metal.
-OpenStack can be viewed as the integration and orchestration of existing
-technology platforms.  The physical network performance telemetry of
-cloud network infrastructure is delegated to the technology platforms
-upon which it is built.  In future, projects such as INAM^2 on the HPC
-side and Apex Lake on the cloud side may lead to a telemetry monitoring
-framework capable of presenting performance data from virtual and physical
-domains in the context of one another.
-
-Further Reading
-===============
-
-This Intel white paper provides a useful introduction to SR-IOV:
-http://www.intel.in/content/dam/doc/white-paper/pci-sig-single-root-io-virtualization-support-in-virtualization-technology-for-connectivity-paper.pdf
-
-A step-by-step guide to setting up Mellanox Infiniband
-with a Red Hat variant of Linux and OpenStack Mitaka:
-https://wiki.openstack.org/wiki/Mellanox-Neutron-Mitaka-Redhat-InfiniBand
-
-A presentation by Professor DK Panda from NOWLAB at Ohio State University
-on MVAPICH2-Virt: https://youtu.be/m0p2fibwukY
-
-Further information on MVAPICH2-Virt can be found here:
-http://mvapich.cse.ohio-state.edu
-
-Some papers from the team at NOWLAB describing MVAPICH2-Virt in greater depth:
-
-* [HiPC'14] High Performance MPI Library over SR-IOV Enabled InfiniBand Clusters.
-  Jie Zhang, Xiaoyi Lu, Jithin Jose, Rong Shi, Mingzhe Li, and Dhabaleswar
-  K. (DK) Panda.  Proceedings of the 21st annual IEEE International
-  Conference on High Performance Computing (HiPC), 2014.
-* [CCGrid'15] MVAPICH2 over OpenStack with SR-IOV: An Efficient Approach
-  to Build HPC Clouds. Jie Zhang, Xiaoyi Lu, Mark Arnold, and Dhabaleswar 
-  K. (DK) Panda.  Proceedings of the 15th IEEE/ACM International Symposium
-  on Cluster, Cloud and Grid Computing (CCGrid), 2015.
-
-The OVIS HPC application performance monitoring framework:
-https://ovis.ca.sandia.gov/mediawiki/index.php/Main_Page
-
-PAVE - Performance Analysis and Visualisation
-at Exascale at Lawrence Livermore:
-http://computation.llnl.gov/projects/pave-performance-analysis-visualization-exascale
-
-The introduction of INAM^2 for real-time
-Infiniband network performance monitoring:
-http://mvapich.cse.ohio-state.edu/static/media/publications/abstract/subramoni-isc16-inam.pdf
-
-Further information on INAM^2 can be found here:
-http://mvapich.cse.ohio-state.edu/tools/osu-inam/
-
-Open Zipkin is a distributed application performance monitoring framework
-developed at Twitter, based on Google’s paper their Dapper monitoring
-framework: http://zipkin.io
-
-Intel Snap is a new monitoring framework for virtualised infrastructure:
-http://snap-telemetry.io
-
-Observium is a network mapping and monitoring platform built upon SNMP:
-http://observium.org/
-
-A useful discussion on the value of high-resolution network telemetry
-for researching issues with maximum latency in a cloud environment:
-https://engineering.linkedin.com/performance/who-moved-my-99th-percentile-latency
-
-Acknowledgements
-================
-
-This document was originally written by Stig Telfer of `StackHPC Ltd <https://www.stackhpc.com>`_ with the support
-of Cambridge University, with contributions, guidance and feedback from
-subject matter experts:
-
-* **Professor DK Panda** and **Dr. Xiaoyi Lu** from NOWLAB, Ohio State University.
-* **Jonathan Mills** from NASA Goddard Spaceflight Center.
-
-.. figure:: images/cc-by-sa.png
-   :width: 100
-   :alt: Creative commons licensing
-
-   This document is provided as open source with a Creative Commons license
-   with Attribution + Share-Alike (CC-BY-SA)
--- a/doc/source/openstack-and-hpc-workloads.rst
+++ b/doc/source/openstack-and-hpc-workloads.rst
@ -1,648 +0,0 @@
-OpenStack and HPC Workload Management
-#####################################
-
-Workload Management in HPC and Cloud
-====================================
-
-The approach taken for managing workloads is a major difference between
-conventional use cases of HPC and cloud.
-
-A conventional approach to HPC workload management is likely to involve
-one or more head nodes of an HPC cluster for login, development,
-compilation and job submission services.  Parallel workloads would
-be submitted from a head node to job batch queues of the workload
-manager, which control access to parallel partitions of compute nodes.
-Such partitions may equate to mappings of types of compute nodes and the
-specific resources (CPU, memory, storage and networking) that applications
-require.  Each compute node runs a workload manager agent which configures
-resources, launches application processes and monitors utilisation.
-
-Pain Points in Conventional HPC Workload Management
---------------------------------------------------
-
-On a large multi-user HPC system the login node is a continual source
-of noisy neighbour problems.  Inconsiderate users may, for example,
-consume system resources by performing giant compilations with wide
-task parallelism,  open giant logfiles from their task executions,
-or run recursive finds across the filesystem to look for forgotten files.
-
-An HPC system must often support a diverse mix of workloads.  Different
-workloads may have a wide range of dependencies.  With increasing
-diversity comes an increasing test matrix, which increases the toil
-involved in making any changes.  How can an administrator be sure of the
-effects of any change to the software packages installed?  What must
-be done to support a new version of an ISV application?  What are the
-side-effects of updating the version of a dependency?  What if a security
-update leads to a dependency conflict?  As the flexibility of an HPC
-software environment grows, so too does the complexity maintaining it.
-
-In an environment where data is sensitive, local scratch space and
-parallel filesystems for HPC workloads can often have default access
-permissions with an undesirable level of openness.  Data security can be
-problematic in a shared HPC resource in which the tenants are not trusted.
-
-The Case for Workload Management on OpenStack Infrastructure
------------------------------------------------------------
-
-The flexibility of OpenStack can ease a number of pain points of HPC
-cluster administration:
-
-* With software-defined OpenStack infrastructure, a new compute node or
-  head node is created through software processes - not a trip to the
-  data centre.  Through intelligent orchestrated automated provisioning,
-  the administrative burden of managing changes to resource configuration
-  can be eliminated.  And from a user’s perspective, a self-service
-  process for resizing their resource allocation is much more responsive
-  and devolves control to the user.
-* Through OpenStack it becomes a simple process to automatically provision
-  and manage any number of login nodes and compute nodes.  The multi-tenancy
-  access control of cloud infrastructure ensures that compute resources
-  allocated to a project are only seen and accessible to members of that
-  project.  OpenStack does not pretend to change the behaviour of noisy
-  neighbours, but it helps to remove the strangers from a neighbourhood.
-* OpenStack’s design ethos is the embracing (not replacing) of data
-  centre diversity.  Supporting a diverse mix of HPC workloads is not
-  materially different from supporting the breadth of cloud-native
-  application platforms.  One of the most significant advances of cloud
-  computing has been in the effective management of software images.  Once a
-  user project has dedicated workload management resources allocated to it,
-  the software environment of those compute resources can be tailored to
-  the specific needs of that project without infringing on any conflicting
-  requirements of other users.
-* The cloud multi-tenancy implemented by OpenStack enforces segregation
-  so that tenants are only visible to one another through the interfaces
-  that they choose to expose.  The isolation of tenants applies to all
-  forms of resources - compute, networking and storage.  The fine-grained
-  control over what is shared (and what is not shared) results in greater
-  data security than a conventional multi-user HPC system.
-
-All of this can be done using conventional HPC infrastructure and
-conventional management techniques, but to do so would demand using
-industry best practices as a baseline, and require the continual
-attention of a number of competent system administrators to keep it
-running smoothly, securely and to the satisfaction of the users.
-
-Organisations working on the convergence of HPC and cloud often refer
-to this subject as cluster-as-a-service.  How can a cloud resource
-be equipped with the interfaces familiar to users of batch-queued
-conventional HPC resources?
-
-Delivering an HPC Platform upon OpenStack Infrastructure
-========================================================
-
-HPC usually entails a platform, not an infrastructure.  How is OpenStack
-orchestrated to provision an HPC cluster and workload manager?
-
-Addressing this market are proprietary products and open source projects.
-The tools available in the OpenStack ecosystem also ensure that a
-home-grown cluster orchestration solution is readily attainable.
-An example of each approach is included here.
-
-Broadly the cluster deployment workflow would follow these steps:
-
-#. The creation of the HPC cluster can be instigated through the command
-   line.  In some projects a custom panel for managing clusters is added
-   to Horizon the OpenStack web dashboard.
-#. Resources for the cluster must be allocated from the OpenStack
-   infrastructure.  Compute node instances, networks, ports, routers,
-   images and volumes must all be assigned to the new cluster.
-#. One or more head nodes must be deployed to manage the cluster node
-   instances, provide access for end users and workload management.
-   The head node may boot a customised image (or volume snapshot) with
-   the HPC cluster management software installed.  Alternatively, it may
-   boot a stock cloud image and install the required software packages as
-   a secondary phase of deployment.
-#. Once the head nodes are deployed with base OS and HPC cluster
-   management packages, an amount of site-specific and deployment-specific
-   configuration must be applied.  This can be applied through instance
-   metadata or a configuration management language such as Ansible or Puppet.
-   A Heat-orchestrated deployment can use a combination of instance metadata
-   and a configuration management language (usually Puppet but more recently
-   Ansible provides such capability).
-#. A number of cluster node instances must be deployed.  The process of
-   node deployment can follow different paths.  Typically the cluster nodes
-   would be deployed in the same manner as the head nodes by booting from
-   OpenStack images or volumes, and applying post-deployment configuration.
-#. The head nodes and cluster nodes will share one or more networks, and
-   the cluster nodes will register with the HPC workload management service
-   deployed on the head nodes.
-
-Open Platforms for Cluster-as-a-Service
---------------------------------------
-
-The simplest implementation is arguably ElastiCluster, developed and
-released as GPL open source by a research computing services group at
-the University of Zurich.  ElastiCluster supports OpenStack, Google
-Compute Engine and Amazon EC2 as back-end cloud infrastructure, and
-can deploy (among others) clusters offering SLURM, Grid Engine, Hadoop,
-Spark and Ceph.
-
-ElastiCluster is somewhat simplistic and its capabilities are
-comparatively limited.  For example, it doesn't currently support Keystone
-v3 authentication - a requirement for deployments where a private
-cloud is divided into a number of administrative domains.  A cluster
-is defined using an INI-format configuration template.  When creating
-a SLURM cluster, virtual cluster compute nodes and a single head node
-are provisioned as VMs from the OpenStack infrastructure.  The compute
-nodes are interconnected using a named OpenStack virtual network.
-All post-deployment configuration is carried out using Ansible playbooks.
-The head node is a SLURM controller, login node and NFS file server for
-/home mounting onto the compute nodes.
-
-Trinity from ClusterVision uses OpenStack to manage bare metal
-infrastructure, and creates a dynamic HPC-as-a-Service platform comprising
-SLURM workload management and Docker containers (running on bare metal)
-for the virtual cluster compute nodes.  Management of virtual clusters is
-more user-friendly in Trinity than in ElastiCluster.  A custom panel has
-been added to the OpenStack Horizon dashboard to enable users to create,
-manage and monitor their virtual clusters.
-
-Trinity is developed as open source, but has a very small group of
-developers.  The ‘bus factor’ of this project has been exposed by
-the recent departure from ClusterVision of Trinity’s core contributor.
-
-Bright Computing Cluster-on-Demand
----------------------------------
-
-Bright Computing has developed its proprietary products for HPC
-cluster management and adapted them for installation, configuration and
-administration of OpenStack private clouds.  The product is capable of
-partitioning a system into a mix of bare metal HPC compute and OpenStack
-private cloud.
-
-.. figure:: images/hpc_workload-bright_caas.png
-   :width: 600
-   :alt: Bright Computing Cluster-on-Demand
-
-Bright Computing also provides an OpenStack distribution with
-Bright-themed OpenStack web interface and an additional panel for
-management of Cluster-on-Demand deployments.
-
-Cluster-on-Demand uses OpenStack Heat for orchestrating the allocation and
-provisioning of virtualised cluster resources.  When a virtual cluster is
-created, the Nova flavors (virtualised hardware templates) for head node
-and cluster compute node are specified.  OpenStack networking details
-are also provided.  Bright OpenStack is capable of deploying OpenStack
-with SR-IOV support, and Cluster-on-Demand is capable of booting cluster
-compute nodes with SR-IOV networking.
-
-Cluster-on-Demand deployment begins with pre-built generic head
-node images.  Those can then be quickly instantiated (via optional
-copy-on-write semantics) and automatically customized to user’s
-requirements.  Bright’s deployment solution differs slightly from other
-approaches by using Bright Cluster Manager on the virtualised head node
-to deploy the virtual cluster nodes as though they were bare metal.
-This approach neatly nests the usage model of Bright Cluster Manager
-within a virtualised environment, preserving the familiar workflow of
-bare metal deployment.  However, as a result it does not exploit the
-efficiencies of cloud infrastructure for compute node deployment at scale.
-A virtualised cluster of “typical” size can be deployed on-demand from
-scratch in several minutes, at which point it is ready to accept HPC jobs.
-
-Bright provide configurations for a wide range of workload managers, big
-data services (Spark, Hadoop), deep learning tools, or even virtualised
-OpenStack clouds (OpenStack on OpenStack).  Bright Cluster-on-Demand
-can also dynamically burst to public clouds (AWS) when more resources
-are needed (e.g. GPU nodes) or during heavy load spikes.
-
-Cluster-on-Demand focuses on delivering the flexibility advantages of
-self-service cluster provisioning, but can also deliver performance with
-minimised virtualisation overhead through use of SR-IOV.
-
-A distinctive feature of Bright OpenStack is the ability to easily deploy
-virtualised HPC compute nodes next to physical ones, and run HPC workloads
-in an environment spanning mixture of physical and virtual compute nodes.
-Doing so provides the admin with a whole new level of flexibility.
-For example, it allows the assignment of high priority HPC job queues
-to physical compute nodes, and low priority job queues, or long running
-jobs, to virtual compute nodes.  This in turn allows the VMs to be
-live migrated across the datacentre (e.g. due to hardware maintenance)
-without impacting the long-running HPC jobs hosted on them.
-
-Extending SLURM and OpenStack to Orchestrate MVAPICH2-Virt Configuration
------------------------------------------------------------------------
-
-The NOWLAB group at Ohio State University has developed a virtualised
-variant of their MPI library, MVAPICH2-Virt.  MVAPICH2-Virt is described
-in greater detail in the section OpenStack and HPC Network Fabrics.
-
-NOWLAB has also developed plugins for SLURM, called SLURM-V, to extend
-SLURM with virtualization-oriented capabilities such as submitting jobs to
-dynamically created VMs with isolated SR-IOV and inter-VM shared memory
-(IVSHMEM) resources.  Through MVAPICH2-Virt runtime, the workload is
-able to take advantage of the configured SR-IOV and IVSHMEM resources
-efficiently.  The NOWLAB model is slightly different from the approach
-taken in Cluster-as-a-Service, in that a MVAPICH2-Virt based workload
-launches into a group of VMs provisioned specifically for that workload.
-
-  "The model we chose to create VMs for the lifetime of each job seems
-  a clear way of managing virtualized resources for HPC workloads. This
-  approach can avoid having long-lived VMs on compute nodes, which makes
-  the HPC resources always in the virtualised state. Through the SLURM-V
-  model, both bare-metal and VM based jobs can be launched on the same set
-  of compute nodes since the VMs are provisioned and configured dynamically
-  only when the jobs need virtualised environments", says Prof. DK Panda
-  and Dr. Xiaoyi Lu of NOWLAB.
-
-The IVSHMEM component runs as a software device driver in the host kernel.
-Every parallel workload has a separate instance of the IVSHMEM device
-for communication between co-resident VMs.  The IVSHMEM device is mapped
-into the workload VMs as a paravirtualised device.  The NOWLAB team has
-developed extensions to Nova to add the connection of the IVSHMEM device
-on VM creation, and recover the resources again on VM deletion.
-
-Users can also hotplug/unplug the IVSHMEM device to/from specified
-running virtual machines. The NOWLAB team provides a tool with
-MVAPICH2-Virt (details can be found in the `MVAPICH2-Virt userguide
-<http://mvapich.cse.ohio-state.edu/userguide/virt/#_support_for_integration_with_openstack_for_vms>`_)
-to hotplug an IVSHMEM device to a virtual machine and unplug an IVSHMEM
-device from a virtual machine.
-
-The SLURM-V extensions have been developed to work with KVM directly.
-However, the NOWLAB group have extended their project to enable SLURM-V
-to make OpenStack API calls to orchestrate the creation of workload VMs.
-In this model of usage, SLURM-V uses OpenStack to allocate VM instances,
-isolate networks and attach SR-IOV and IVSHMEM devices to workload VMs.
-OpenStack has already provided scalable and efficient mechanisms for
-creation, deployment, and reclamation of VMs on a large number of
-physical nodes.
-
-SLURM-V is likely to be one of many sources competing for
-OpenStack-managed resources.  If other cloud users consume all resources,
-leaving SLURM-V unable to launch sufficient workload VMs, then the new
-submitted jobs will be queued in SLURM to wait for available resources.
-As soon as one job completes and the corresponding resources are
-reclaimed, SLURM will find another job in the queue to execute based on
-the configured scheduling policy and resource requirements of jobs.
-
-Combining the Strengths of Cloud with HPC Workload Management
-------------------------------------------------------------
-
-At Los Alamos National Lab, there is a desire to increase the flexibility
-of the user environment of their HPC clusters.
-
-To simplify their workload, administrators want every software image
-to be the same, everywhere.  LANL systems standardise on a custom
-Linux distribution, based on Red Hat 6 and tailored for their demanding
-requirements.  Sustaining the evolution of that system to keep it current
-with upstream development whilst maintaining local code branches is an
-ongoing challenge.
-
-The users demand ever increasing flexibility, but have requirements
-that are sometimes contradictory.  Some users have applications with
-complex package dependencies that are out of date or not installed in
-the LANL distribution.  Some modern build systems assume internet access
-at build time, which is not available on LANL HPC clusters.  Conversely,
-some production applications are built from a code base that is decades
-old, and has dependencies on very old versions of libraries.  Not all
-software updates are backwards compatible.
-
-Tim Randles, a senior Linux administrator and OpenStack architect at the
-Lab, uses OpenStack and containers to provide solutions.  Woodchuck is
-LANL’s third-generation system aimed at accommodating these conflicting
-needs.  The 192-node system has a physical configuration optimised for
-data-intensive analytics: a large amount of RAM per CPU core, local disk
-for scratch space for platforms such as HDFS and 10G Ethernet with VXLAN
-and SDN capabilities for virtualised networking.
-
-Reid Priedhorsky at LANL has developed an unprivileged containerised
-runtime environment, dubbed “Charliecloud”, upon which users can
-run applications packaged using Docker tools.  This enables users to
-develop and build their packages on their (comparatively open) laptops
-or workstations, pulling in the software dependencies they require.
-
-One issue arising from this development cycle is that in a
-security-conscious network such as LANL, the process of transferring
-application container images to the HPC cluster involves copying large
-amounts of data through several hops.  This process was soon found to
-have drawbacks:
-
-* It quickly became time-consuming and frustrating.
-* It could not be incorporated into continuous integration frameworks.
-* The application container images were being stored for long periods of
-  time on Lustre-backed scratch space, which has a short data retention
-  policy, was occasionally unreliable and not backed up.
-
-Tim’s solution was to use OpenStack Glance as a portal between the
-user’s development environment on their workstation and the HPC cluster.
-Compared with the previous approach, the Glance API was accessible from
-both the user’s workstations and the HPC cluster management environment.
-The images stored in Glance were backed up, and OpenStack’s user model
-provided greater flexibility than traditional Unix users and groups,
-enabling fine-grained control over the sharing of application images.
-
-.. figure:: images/hpc_workload-slurm_glance.png
-   :width: 600
-   :alt: Integration of Glance and SLURM
-
-Tim developed SLURM plugins to interact with Glance for validating the
-image and the user’s right to access it.  When the job was scheduled for
-execution, user and image were both revalidated and the application image
-downloaded and deployed ready for launch in the Charliecloud environment.
-
-Future plans for this work include using Neutron to create and manage
-virtual tenant networks for each workload, and releasing the plugins
-developed as open source contributions to SLURMs codebase.
-
-HPC and Cloud Converge at the University of Melbourne
-----------------------------------------------------
-
-Research compute clusters are typically designed according to the demands
-of a small group of influential researchers representing an ideal use
-case.  Once built, however, the distribution of use cases can change as
-a broader group of researchers come onboard.  These new uses cases may
-not match the expected ideal, and in some cases conflict.  If job queues
-and computation times stretch out, it can drive the proliferation of
-isolated department-level clusters which are more expensive to maintain,
-lack scale, and are all too often orphaned when the responsible researcher
-moves on.
-
-.. figure:: images/hpc_workload-spartan_logo.png
-   :width: 400
-   :alt: Spartan logo
-
-Introducing Spartan
-~~~~~~~~~~~~~~~~~~~
-
-In 2016 the University of Melbourne launched a new cluster called Spartan.
-It takes an empirical approach, driven by the job profiles observed in
-its predecessor, Edward, in the prior year.  In particular, single-core
-and low memory jobs dominate; 76% of were single core, and 97% used <4
-GB of memory.  High-core count, task-parallel jobs were often delayed
-due to competition with these single core jobs, leading to research
-funds being directed towards department level resources.  National peak
-facilities were often rejected as an option due to their long queue
-times and restrictive usage requirements.
-
-Spartan takes advantage of the availability of an existing and very large
-research cloud (NeCTAR) to allow additional computation capacity, and
-the provisioning of common login and management infrastructure.  This is
-combined with a small but more powerful partition of tightly coupled
-bare-metal compute nodes, and specialist high-memory and GPU partitions.
-
-This hybrid arrangement offers the following advantages:
-
-* Users with data parallel jobs have access to the much larger research
-  cloud and can soak up the spare cycles available with cloud bursting,
-  reducing their job wait time.
-* Users with task parallel jobs have access to optimised bare-metal HPC,
-  supported by high-speed networking and storage.
-* The larger task parallel jobs remain segregated from less
-  resource-intensive data parallel jobs, reducing contention.
-* Job demands can be continually monitored, and the cloud and bare metal
-  partitions selectively expanded as and when the need arises.
-* Departments and research groups can co-invest in Spartan.  If they need
-  more processing time or a certain type of hardware, they can attach it
-  directly to Spartan and have priority access. This avoids the added
-  overheads of administering their own system, including the software
-  environment, login and management nodes.
-* Management nodes can be readily migrated to new hardware, allowing us
-  to upgrade or replace hardware without bringing the entire cluster down.
-* Spartan can continue beyond the life of its original hardware, as
-  different partitions are resized or replaced, a common management and
-  usage platform remains.
-
-Spartan does not have extraordinary hardware or software, and it’s
-peak performance does not exceed that of other HPC systems.  Instead,
-it seeks to segregate compute loads into partitions with different
-performance characteristics according to their demands.  This will
-result in shorter queues, better utilisation, cost-effectiveness, and,
-above all, faster time to results for our research community.
-
-Job and Resource Management
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Previous HPC systems at the University utilised Moab Workload Manager
-for job scheduling and Terascale Open-source Resource and QUEue Manager
-(TORQUE) as a resource manager.  The Spartan team adopted the SLURM
-Workload Manager for the following reasons:
-
-* Existing community of users at nearby Victorian Life Sciences Compute
-  Initiative (VLSCI) facility.
-* Similar syntax to the PBS scripts used on Edward, simplifying user
-  transition.
-* Highly configurable through add-on modules.
-* Importantly, support for cloud bursting, for example, to the Amazon
-  Elastic Computing Cloud (EC2) or, in Spartan's case, the NeCTAR research
-  cloud.
-
-Account Management
-~~~~~~~~~~~~~~~~~~
-
-Integration with a central staff and student Active Directory was
-initially considered, but ultimately rejected due to the verbose usernames
-required (i.e. email addresses).  The Spartan team reverted to using
-an LDAP-based system as had been the case with previous clusters, and
-a custom user management application.
-
-Application Environment
-~~~~~~~~~~~~~~~~~~~~~~~
-
-EasyBuild was used as a build and installation framework, with the LMod
-environmental modules system selected to manage application loading by
-users.  These tools tightly integrate, binding the specific toolchains
-and compilation environment to the applications loaded by users.
-EasyBuild's abstraction in its scripts sometimes required additional
-administrative overhead, and not all software had a pre-canned script
-ready for modification, necessitating them to be built from scratch.
-
-Training
-~~~~~~~~
-
-Training been a particular focus for the implementation of Spartan.
-Previous HPC training for researchers was limited, with only 38
-researcher/days of training conducted in the 2012-2014 period.
-The Spartan team now engage in weekly training, rotating across the
-following sessions:
-
-* Introductory, targeting researchers with little or no HPC or Linux
-  experience.
-* Transition, targeting existing Edward users who need to port their jobs
-  to Spartan.
-* Shell scripting.
-* Parallel programming.
-
-The team collaborate closely with researchers to drive this curriculum,
-serving a range of experience levels, research disciplines, and software
-applications.
-
-The Future
-~~~~~~~~~~
-
-Bernard Meade, Spartan project sponsor, adds:
-
-  “The future configuration of Spartan will be driven by how it is
-  actually used.  We continue to monitor what applications are run,
-  how long they take, and what resources they require.  While Spartan
-  has considerable elasticity on the cloud side, we’re also able to
-  incrementally invest in added bare-metal and specialist nodes (high
-  memory, GPU) as the need arises.  Given the diversity in HPC job
-  characteristics will only grow, we believe this agile approach is the
-  best means to serve the research community.”
-
-Cloud Infrastructure Does Not (yet) Provide All the Answers
-===========================================================
-
-OpenStack Control Plane Responsiveness and Job Startup
------------------------------------------------------
-
-Implementations of HPC workload management that create new VMs for worker
-nodes for every job in the batch queue can have consequential impact
-on the overall utilisation of the system if the jobs in the queue are
-comparatively short-lived:
-
-* Job startup time can be substantially increased.  A fast boot for a VM
-  could is of the order of 20 seconds.  Similarly, job cleanup time can
-  add more overhead while the VM is destroyed and its resources harvested.
-* A high churn of VM creation and deletion can add considerable load to
-  the OpenStack control plane.
-
-The Cluster-as-a-Service pattern of virtualised workload managers does
-not typically create VMs for every workload.  However, the OpenStack
-control plane can still have an impact on job startup time, for example
-if the application image must be retrieved and distributed, or a virtual
-tenant network must be created.  Empirical tests have measured the time
-to create a virtual tenant network to grow linearly with the number of
-ports in the network, which could have an impact on the startup time
-for large parallel workloads.
-
-Workload Managers Optimise Placement Decisions
----------------------------------------------
-
-A sophisticated workload manager can use awareness of physical network
-topology to optimise application performance through placing the workload
-on physical nodes with close network proximity.
-
-On a private cloud system such as OpenStack, the management of
-the physical network is delegated to a network management platform.
-OpenStack avoids physical network knowledge and focuses on defining the
-intended state, leaving physical network management platforms to apply
-architecture-specific configuration.
-
-In a Cluster-as-a-Service use case, there are two scheduling operations
-where topology-aware placement could be usefully applied:
-
-* When the virtual cluster compute node instances are created, their
-  placement is determined by the OpenStack Nova scheduler.
-* When a queued job in the workload manager is being allocated to virtual
-  cluster compute nodes.
-
-Through use of Availability Zones, OpenStack Nova can be configured to
-perform a simple form of topology-aware workload placement, but without
-any hierarchical grouping of nodes.  Nova’s scheduler filter API
-provides a mechanism which could be used for implementing topology-aware
-placement in a more intelligent fashion.
-
-OpenStack’s Flexibility is Stretched by the Economics of Utilisation
--------------------------------------------------------------------
-
-With its decoupled execution model, batch queue job submission is an
-ideal use case for off-peak compute resources.  The AWS spot market
-auctions time on idle cores for opportunistic usage at up to a 90%
-discount from the on-demand price.
-
-There is no direct equivalent to the AWS spot market in OpenStack.
-More generally, management of pricing and billing is considered outside of
-OpenStack’s scope.  OpenStack does not currently have the capabilities
-required for supporting opportunistic spot usage.
-
-However, work is underway to implement the software capabilities
-necessary for supporting preemptible spot instances, and it is hoped
-that OpenStack will support this use case in due course.  At that point,
-Cluster-as-a-Service deployments could grow or shrink in response to
-the availability of under-utilised compute resources on an OpenStack
-private cloud.
-
-The Difficulty of Future Resource Commitments
---------------------------------------------
-
-HPC facilities possess a greater degree of oversight and coordination,
-enabling users to request exclusive advance reservations of large sections
-of an HPC system to perform occasional large-scale workloads.
-
-In private cloud, there is no direct mainstream equivalent to this.
-However, the Blazar project aims to extend OpenStack compute with support
-for resource reservations.  Blazar works by changing the management
-of resource allocation for a segregated block of nodes.  Within the
-partition of nodes allocated to Blazar, resources can only be managed
-through advance reservations.
-
-A significant drawback of Blazar is that it does not support the
-intermingling of reservations with on-demand usage.  Without the ability
-to gracefully preempt running instances, Blazar can only support advance
-reservations by segregating a number of nodes exclusively for that mode
-of usage.
-
-Summary
-=======
-
-OpenStack delivers new capabilities to flexibly manage compute clusters
-as on-demand resources.  The ability to define a compute cluster
-and workload manager through code, data and configuration plays to
-OpenStack’s strengths.
-
-With the exception of some niche high-end requirements, OpenStack can
-be configured to deliver Cluster-as-a-Service with minimal performance
-overhead compared with a conventional bare metal HPC resource.
-
-Further Reading
-===============
-
-The ElastiCluster project from University of Zurich is
-open source.  Online documentation is available here:
-https://elasticluster.readthedocs.io/en/latest/index.html
-
-The Trinity project from ClusterVision is developed as open source:
-http://clustervision.com/solutions/trinity/
-
-Bright Computing presented their proprietary
-Bright OpenStack and Cluster-as-a-Service products
-at the OpenStack Austin summit in April 2016:
-https://www.openstack.org/videos/video/bright-computing-high-performance-computing-hpc-and-big-data-on-demand-with-cluster-as-a-service-caas
-
-The NOWLAB’s publication on Slurm-V: Extending Slurm
-for Building Efficient HPC Cloud with SR-IOV and IVShmem:
-http://link.springer.com/chapter/10.1007/978-3-319-43659-3_26
-
-Tim Randles from Los Alamos presented his work on
-integrating SLURM with Glance on the HPC/Research speaker
-track at the OpenStack Austin summit in April 2016:
-https://www.openstack.org/videos/video/glance-and-slurm-user-defined-image-management-on-hpc-clusters
-
-The Spartan OpenStack/HPC system at the University of Melbourne:
-http://newsroom.melbourne.edu/news/new-age-computing-launched-university-melbourne
-http://insidehpc.com/2016/07/spartan-hpc-service/
-
-Topology-aware placement in SLURM is described here:
-http://slurm.schedmd.com/topology.html
-
-Some research describing a method of adding
-topology-aware placement to the OpenStack Nova scheduler:
-http://charm.cs.illinois.edu/newPapers/13-01/paper.pdf
-
-HPC resource management at CERN and some current
-OpenStack pain points are described in detail here:
-http://openstack-in-production.blogspot.co.uk/2016/04/resource-management-at-cern.html
-
-OpenStack Pre-emptible Instances Extension (OPIE) from Indigo Datacloud
-is available here: https://github.com/indigo-dc/opie
-
-Acknowledgements
-================
-
-This document was originally written by Stig Telfer of `StackHPC Ltd <https://www.stackhpc.com>`_ with the support
-of Cambridge University, with contributions, guidance and feedback from
-subject matter experts:
-
-* **Piotr Wachowicz**, Cloud Integration Lead at Bright Computing
-* **Professor DK Panda** and **Dr. Xiaoyi Lu** from NOWLAB, Ohio State University.
-* **Tim Randles** from Los Alamos National Laboratory.
-* **Lev Lafayette**, **Bernard Meade**, **David Perry**, **Greg Sauter**
-  and **Daniel Tosello** from the University of Melbourne.
-
-.. figure:: images/cc-by-sa.png
-   :width: 100
-   :alt: Creative commons licensing
-
-   This document is provided as open source with a Creative Commons license
-   with Attribution + Share-Alike (CC-BY-SA)
--- a/doc/source/openstack-and-virtualised-hpc.rst
+++ b/doc/source/openstack-and-virtualised-hpc.rst
@ -1,410 +0,0 @@
-OpenStack and Virtualised HPC
-#############################
-
-Likely doubts over the adoption of OpenStack centre around the impact
-of infrastructure virtualisation. From the skeptical perspective of an
-HPC architect, why OpenStack?
-
-* *I have heard the hype*
-* *I am skeptical to some degree*
-* *I need evidence of benefit*
-
-In this section, we will describe the different forms of overhead that
-can be introduced by virtualisation, and provide technical details
-of solutions that mitigate, eliminate or bypass the overheads of
-software-defined infrastructure.
-
-The Overhead of Virtualisation
-==============================
-
-Analysis typically shows that the overhead of virtualisation for
-applications that are CPU or memory intensive is minimal.
-
-Similarly, applications that depend on high-bandwidth I/O or network
-communication for bulk data transfers can achieve levels of performance
-that are close to equivalent bare metal configurations.
-
-Where a significant performance impact is observed, it can often be
-ascribed to overcommitment of hardware resources or “noisy neighbours”
- issues that could equally apply in non-virtualised configurations.
-
-However, there remains a substantial class of applications whose
-performance is significantly impacted by virtualisation.  Some of the
-causes of that performance overhead are described here.
-
-Increased Software Overhead on I/O Operations
---------------------------------------------
-
-Factors such as storage IOPs and network message latency are often
-critical for HPC application performance.
-
-HPC applications that are sensitive to these factors are poor performers
-in a conventional virtualised environment.  Fully-virtualised environments
-incur additional overhead per I/O operation that can impact performance
-for applications that depend on such patterns of I/O.
-
-The additional overhead is mitigated through paravirtualisation, in
-which the guest OS includes support for running within a virtualised
-environment.  The guest OS cooperates with the host OS to improve
-the overhead of hardware device management.  Direct hardware device
-manipulation is performed in the host OS, keeping the micro-management
-of hardware closer to the physical device.  The hypervisor then presents
-a more efficient software interface to a simpler driver in the guest OS.
-Performance improves through streamlining interactions between guest OS
-and the virtual hardware devices presented to it.
-
-Hardware Offload in a Virtualised Network
-----------------------------------------
-
-All modern Ethernet NICs provide hardware offload of IP, TCP and
-other protocols.  To varying degrees, these free up CPU cycles from the
-transformations necessary between data in user buffers and packets on
-the wire (and vice versa).
-
-In a virtualised environment, the network traffic of a guest VM passes
-from a virtualised network device into the software-defined network
-infrastructure running in the hypervisor.  Packet processing is usually
-considerably more complex than a typical HPC configuration.  Hardware
-offload capabilities are often unable to operate or are ineffective
-in this mode.  As a result, networking performance in a virtualised
-environment can be less performant and more CPU-intensive than an
-equivalent bare metal environment.
-
-Increased Jitter in Virtualised Network Latency
-----------------------------------------------
-
-To varying degrees, virtualised environments generate increased system
-noise effects.  These effects result in a longer tail on latency
-distribution for interrupts and I/O operations.
-
-A bulk synchronous parallel workload, iterating in lock-step, moves at
-the speed of the slowest worker.  If the slowest worker is determined
-by jitter effects in I/O latency, overall application progress becomes
-affected by the increased system noise of a virtualised environment.
-
-Using OpenStack to Deliver Virtualised HPC
-==========================================
-
-There is considerable development activity in the area of virtualisation.
-New levels of performance and capability are continually being introduced
-at all levels: processor architecture, hypervisor, operating system and
-cloud orchestration.
-
-Best Practice for Virtualised System Performance
------------------------------------------------
-
-The twice-yearly cadence of OpenStack software releases leads to rapid
-development of new capabilities, which improve its performance and
-flexibility.
-
-Across the OpenStack operators community, there is a continual
-collaborative process of testing and improvement of hypervisor
-efficiency. Empirical studies of different configurations of tuning
-parameters are frequently published and reviewed. Clear improvements
-are collected into a curated guide on hypervisor performance tuning
-best practice.
-
-OpenStack’s Nova compute service supports exposing many hypervisor
-features for raising virtualised performance. For example:
-
-* Enabling processor architecture extensions for virtualisation.
-
-* Controlling hypervisor techniques for efficiently managing many
-  guests, such as Kernel Same-page Merging (KSM). This can add CPU
-  overhead in return for varying degrees of improvement in memory usage
-  by de-duplicating identical pages. For supporting memory-intensive
-  workloads, KSM can be configured to prevent merging between NUMA
-  nodes. For performance-critical HPC, it can be disabled altogether.
-
-* Pinning virtual cores to physical cores.
-
-* Passing through the NUMA topology of the physical host to the guest
-  enables the guest to perform NUMA-aware memory allocation and task
-  scheduling optimisations.
-
-* Passing through the specific processor model of the physical CPUs
-  can enable use of model-specific architectural extensions and runtime
-  microarchitectural optimisations in high-performance scientific libraries.
-
-* Backing guest memory with huge pages reduces the impact of host
-  Translation Lookaside Buffer (TLB) misses.
-
-By using optimisation techniques such as these, the overhead of
-virtualisation for CPU-bound and memory-bound workloads is reduced to
-typically one–two percent of bare metal performance. More information
-can be found in Further Reading for this section, below.
-
-Conversely, by constraining the virtual architecture more narrowly,
-these tuning parameters make VM migration more difficult in a cloud
-infrastructure consisting of heterogeneous hypervisor hardware, in
-particular, this may preclude live-migration.
-
-Hardware Support for I/O Virtualisation
---------------------------------------
-
-Hardware devices that support Single-Rooted I/O Virtualization (SR-IOV)
-enable the hardware resources of the physical function of a device to be
-presented as many virtual functions.  Each of these can be individually
-configured and passed through into a different VM.  In this way, the
-hardware resources of a network card can provide performance with close
-to no additional overhead, simultaneously serving the diverse needs of
-many VMs.
-
-Through direct access to physical hardware, SR-IOV networking places
-some limitations on software-defined infrastructure.  It is not typically
-possible to apply security group policies to a network interface mapped
-to an SR-IOV virtual function.  This may raise security concerns for
-externally accessible networks, but should not prevent SR-IOV networking
-being used internally for high-performance communication between the
-processes of an OpenStack hosted parallel workload.
-
-Recent empirical studies have found that using SR-IOV for high-performance
-networking can reduce the overhead of virtualisation typically to 1-9%
-of bare metal performance for network-bound HPC workloads.  Links to
-some examples can be found in the Further Reading section below.
-
-Using Physical Devices in a Virtualised Environment
---------------------------------------------------
-
-Some classes of HPC applications make intensive use of hardware
-acceleration in the form of GPUs, Xeon Phi, etc.
-
-Specialised compute hardware in the form of PCI devices can be included
-in software-defined infrastructure by pass-through.  The device is
-mapped directly into the device tree of a guest VM, providing that VM
-with exclusive access to the device.
-
-A virtual machine that makes specific requirements for hardware
-accelerators can be scheduled to a hypervisor with the resources
-available, and the VM is ‘composed’ by passing through the hardware
-it needs from the environment of the host.
-
-The resource management model of GPU devices does not adapt to SR-IOV.
-A GPU device is passed-through to a guest VM in its entirety.  A host
-system with multiple GPUs can pass-through different devices to different
-systems.  Similarly, multiple GPU devices can be passed-through into
-a single instance and GPUdirect peer-to-peer data transfers can be
-performed between GPU devices and also with RDMA-capable NICs.
-
-Device pass-through, however, can have a performance impact on virtualised
-memory management.  The IOMMU configuration required for pass-through
-restricts the use of transparent huge pages.  Memory must, therefore,
-be pinned in a guest VM using pass-through devices.  This can limit the
-flexibility of software-defined infrastructure to over-commit virtualised
-resources (although over-committed resources are generally unlikely to
-be worthwhile in an HPC use case).  Static huge pages can still be used
-to provide a boost to virtual memory performance.
-
-The performance overhead of virtualised GPU-intensive scientific
-workloads has been found to be as little as 1% of bare metal performance.
-More information can be found in the Further Reading section below.
-
-.. figure:: images/hpc_virtualisation-3_forms.png
-   :width: 600
-   :alt: Three forms of virtualised hardware
-
-   *Different strategies for efficient handling of hardware devices.  Here a
-   network card is used as example.  In paravirtualisation a virtual
-   network device is created in software that is designed for the most
-   efficient software interface.  In PCI-passthrough a physical device is
-   transferred exclusively from the hypervisor to a guest VM.  In SR-IOV,
-   a physical device creates a number of virtual functions, sharing the
-   physical resources.  Virtual functions can be passed-through to a guest
-   VM leaving the physical device behind in the hypervisor.*
-
-OS-level Virtualisation: Containers
-----------------------------------
-
-The overheads of virtualisation are almost eliminated by moving to a
-different model of compute abstraction.  Containers, popularised by
-Docker, package an application plus its dependencies as a lightweight
-self-contained execution environment instead of an entire virtual machine.
-The simplified execution model brings benefits in memory usage and
-I/O overhead.
-
-Currently, HPC networking using RDMA can be performed within containers,
-but with limitations.  The OFED software stack lacks awareness of network
-namespaces and cgroups, which prevents per-container control and isolation
-of RDMA resources.  However, containers configured with host networking
-can use RDMA.
-
-Bare Metal Virtualisation: OpenStack’s Project Ironic
-----------------------------------------------------
-
-OpenStack’s software-defined infrastructure does not need to be virtual.
-
-Ironic is a virtualisation driver.  Through some artful abstraction it
-presents bare metal compute nodes as though they were virtualised compute
-resources.  Ironic’s design philosophy results in zero overhead to the
-performance of the compute node, whilst providing many of the benefits
-of software-defined infrastructure management.
-
-Through Ironic, a user gains bare metal performance from their compute
-hardware, but retains the flexibility to run any software image they
-choose.
-
-The Ironic project is developing rapidly, with new capabilities being
-introduced with every release.  OpenStack’s latest release delivers
-some compelling new functionality:
-
-* Serial consoles
-* Volume attachment
-* Multi-tenant networking
-
-Complex image deployments (over multiple disks for example) is an
-evolving capability.
-
-Using Ironic has some limitations:
-
-* Ironic bare metal instances cannot be dynamically intermingled with
-  virtualised instances.  However, they can be organised as separate cells
-  or regions within the same OpenStack private cloud.
-* Some standard virtualisation features could never be supported, such as
-  overcommitment and migration.
-
-See the section `OpenStack and HPC Infrastructure Management <openstack-and-hpc-infrastructure.html>`_
-for further details about Ironic.
-
-Virtualised HPC on OpenStack at Monash University
-=================================================
-
-From its inception in 2012, Australian scientific research has benefited
-from the NeCTAR Research Cloud federation. Now comprising eight
-institutions from across the country, NeCTAR was an early adopter of
-OpenStack, and has been at the forefront of development of the project
-from that moment.
-
-NeCTAR’s federated cloud compute infrastructure supports a wide range of
-scientific research with diverse requirements. Monash Advanced Research
-Computing Hybrid (MonARCH) was commissioned in 2015/2016 to provide a
-flexible and dynamic HPC resource.
-
-.. figure:: images/hpc_virtualisation-NeCTAR_Australia.png
-   :width: 400
-   :alt: NeCTAR federation across Australia
-
-MonARCH has 35 dual-socket Haswell-based compute nodes and 820 CPU
-cores. MonARCH exploits cloud-bursting techniques to grow elastically by
-using resources from across the NeCTAR federation. The infrastructure
-uses a fabric of 56G Mellanox Ethernet for a converged, high-speed
-network. The cloud control plane is running Ubuntu Trusty and the KVM
-hypervisor. OpenStack Liberty (as of Q3’2016) was deployed using Ubuntu
-distribution packages (including selected patches as maintained by NeCTAR
-Core Services), orchestrated and configured using Puppet.
-
-.. figure:: images/hpc_virtualisation-racmon.png
-   :width: 400
-   :alt: RACmon group at Monash University
-
-MonARCH makes extensive use of SR-IOV for accessing its HPC network
-fabric. The high-speed network is configured to use VLANs for virtual
-tenant networking, enabling layer-2 RoCEv1 (RDMA over Converged
-Ethernet). RDMA is used in guest instances in support of tightly coupled
-parallel MPI workloads, and for high-speed access to 300TB of Lustre
-storage.
-
-Following MonARCH, Monash University recently built a mixed CPU &
-GPU cluster called M3, the latest system for the MASSIVE (Multi-modal
-Australian ScienceS Imaging and Visualisation Environment) project. Within
-M3, there are 1700 Haswell CPU cores along with 16 quad-GPU compute nodes
-and an octo-GPU compute node, based upon the NVIDIA K80 dual-GPU. Staff
-at Monash University’s R@CMon cloud research group have integrated
-SR-IOV networking and GPU pass-through into their compute instances.
-
-.. figure:: images/hpc_virtualisation-gpu_passthrough.png
-   :width: 400
-   :alt: GPU passthrough in a virtualised environment
-
-Specific high-performance OpenStack flavors are defined to require
-pass-through of one or more dedicated GPUs. This enables one to four
-GPU instances to run concurrently on a dual-K80 compute node, e.g.,
-to support CUDA accelerated HPC workloads and/or multiple interactive
-visualisation virtual-workstations.
-
-Blair Bethwaite, senior HPC consultant at Monash University, said:
-
-   “Using OpenStack brings us a high degree of flexibility in the HPC
-   environment. Applying cloud provisioning and management techniques
-   also helps to make the HPC-stack more generic, manageable and quick to
-   deploy. Plus, we benefit from the constant innovation from the OpenStack
-   community, with the ability to pick and choose new services and projects
-   from the ecosystem. OpenStack’s flexibility in the SDN space also
-   offers compelling new avenues to integrate researchers’ personal or
-   lab servers with the HPC service.
-
-   “However, before racing out to procure your next HPC platform driven
-   by OpenStack, I’d recommend evaluating your potential workloads
-   and carefully planning and testing the appropriate mix of hardware
-   capabilities, particularly acceleration features. KVM, OpenStack’s most
-   popular hypervisor, can certainly perform adequately for HPC—in recent
-   testing we are getting 98 percent on average and up to 99.9 percent
-   of bare metal in Linpack tests—but a modern HPC system is likely to
-   require some subset of bare metal infrastructure. If I was planning a
-   new deployment today I’d seriously consider including Ironic so that a
-   mix of bare metal and virtual cloud nodes can be provisioned and managed
-   consistently. As Ironic is maturing and becoming more feature-complete,
-   I expect to see many more highly integrated deployments and reference
-   architectures emerging in the years to come.”
-
-Optimising for “Time to Paper” using HPC on OpenStack
-=====================================================
-
-When evaluating OpenStack as a candidate for HPC infrastructure for
-research computing, the “time to paper” metric of the scientists
-using the resource should be included in consideration.
-
-Skeptics of using cloud compute for HPC infrastructure inevitably cite
-the various overheads of virtualisation in the case against OpenStack.
-With a rapidly-developing technology, these arguments can often be
-outdated.  Furthermore, cloud infrastructure presents a diminishing
-number of trade-offs in return for an increasing number of compelling
-new capabilities.
-
-Unlike conventional HPC system management, OpenStack provides, for
-example:
-
-* **Standardisation** as users can interact with the system through a
-  user-friendly web interface, a command line interface or a software API.
-* **Flexibility** and agility as users allocate compute resources as required
-  and have exclusive use of the virtual resources.  There is fine-grained
-  control of the extent to which physical resources are shared.
-* Users can **self-serve** and boot a software image of their choosing without
-  requiring operator assistance.  It is even possible for users to create
-  their own software images to run - a powerful advantage that eliminates
-  toil for the administrators and delay for the users.
-* Additional **security** as users have a higher degree of separation from
-  each other.  They cannot observe other users and are isolated from one
-  another on the network.
-
-Through careful consideration, an HPC-aware configuration of OpenStack is
-capable of realising all the benefits of software-defined infrastructure
-whilst incurring minimal overhead.  In its various forms, virtualisation
-strikes a balance between new capabilities and consequential overhead.
-
-Further Reading
-===============
-
-* The OpenStack Hypervisor Tuning Guide is a living document detailing best practice for virtualised performance: https://wiki.openstack.org/wiki/Documentation/HypervisorTuningGuide
-* CERN’s OpenStack in Production blog is a good example of the continual community process of hypervisor tuning: http://openstack-in-production.blogspot.co.uk/ 
-* As an example of the continuous evolution of hypervisor development, the MIKELANGELO project is currently working on optimisations for reducing the latency of virtualised IO using their sKVM project: https://www.mikelangelo-project.eu/2015/10/how-skvm-will-beat-the-io-performance-of-kvm/ 
-* The OpenStack Foundation has published a detailed white paper on using containers within OpenStack: https://www.openstack.org/assets/pdf-downloads/Containers-and-OpenStack.pdf 
-* An informative paper describing recent developments enabling GPUdirect peer-to-peer transfers between GPUs and RDMA-enabled NICs: http://grids.ucs.indiana.edu/ptliupages/publications/15-md-gpudirect%20(3).pdf
-* Whilst the focus of this paper is on comparing virtualisation strategies on the ARM architecture, the background information is accessible and the comparisons made with the x86 architecture are insightful: http://www.cs.columbia.edu/~cdall/pubs/isca2016-dall.pdf
-* For more information about MonARCH at Monash University, see the R@CMon blog: https://rcblog.erc.monash.edu.au/ 
-
-Acknowledgements
-================
-
-This document was originally written by Stig Telfer of `StackHPC Ltd <https://www.stackhpc.com>`_  with the support
-of Cambridge University, with contributions, guidance and feedback from
-subject matter experts:
-
-* **Professor DK Panda** and **Dr. Xiaoyi Lu** from NOWLAB, Ohio State University.
-* **Blair Bethwaite**, Senior HPC Consultant at Monash University.
-
-.. figure:: images/cc-by-sa.png
-   :width: 100
-   :alt: Creative commons licensing
-
-   This document is provided as open source with a Creative Commons license
-   with Attribution + Share-Alike (CC-BY-SA)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +0,0 @@
-oslosphinx>=4.7.0 # Apache-2.0
-rst2pdf
-six>=1.9.0
-sphinx>=1.2.1,!=1.3b1,<1.4  # BSD
-sphinxcontrib-httpdomain  # BSD
-sphinx_rtd_theme
--- a/setup.cfg
+++ b/setup.cfg
@ -1,19 +0,0 @@
-[metadata]
-name = scientific_wg
-summary = OpenStack Scientific WG documents
-description-file =
-    README.rst
-author = OpenStack
-classifier =
-    Environment :: OpenStack
-    Intended Audience :: Developers
-    Intended Audience :: Information Technology
-    License :: OSI Approved :: Apache Software License
-
-[files]
-packages = scientific_wg
-
-[build_sphinx]
-all_files = 1
-build-dir = doc/build
-source-dir = doc/source
--- a/setup.py
+++ b/setup.py
@ -1,27 +0,0 @@
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# THIS FILE IS MANAGED BY THE GLOBAL REQUIREMENTS REPO - DO NOT EDIT
-import setuptools
-
-# In python < 2.7.4, a lazy loading of package `pbr` will break
-# setuptools if some other modules registered functions in `atexit`.
-# solution from: http://bugs.python.org/issue15881#msg170215
-try:
-    import multiprocessing  # noqa
-except ImportError:
-    pass
-
-setuptools.setup(
-    setup_requires=['pbr>=1.8'],
-    pbr=True)
--- a/test-requirements.txt
+++ b/test-requirements.txt
@ -1,6 +0,0 @@
-hacking>=0.11.0,<0.12  # Apache-2.0
-
-testrepository>=0.0.18  # Apache-2.0/BSD
-testtools>=1.4.0  # MIT
-
-os-testr>=0.8.0 # Apache-2.0
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/test_fake.py
+++ b/tests/test_fake.py
@ -1,18 +0,0 @@
-# Licensed under the Apache License, Version 2.0 (the "License"); you may
-# not use this file except in compliance with the License. You may obtain
-# a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-
-import testtools
-
-
-class TestFake(testtools.TestCase):
-    def test_fake(self):
-        pass
--- a/tox.ini
+++ b/tox.ini
@ -1,22 +0,0 @@
-[tox]
-envlist = docs,py27
-skipsdist = True
-
-[testenv]
-basepython = python2.7
-setenv = VIRTUAL_ENV={envdir}
-         LANG=en_US.UTF-8
-         LANGUAGE=en_US:en
-         LC_ALL=C
-deps =
-    -r{toxinidir}/requirements.txt
-    -r{toxinidir}/test-requirements.txt
-
-commands = ostestr {posargs}
-
-[testenv:venv]
-commands = {posargs}
-
-[testenv:docs]
-commands =
-    python setup.py build_sphinx