From 1ec79db9106ec6e4a8d22e57daeaa58dbc630dc0 Mon Sep 17 00:00:00 2001 From: guo shasha <1623740225@qq.com> Date: Tue, 14 Aug 2018 14:39:40 +0800 Subject: [PATCH] Move specs from cyborg library to cyborg-specs library Change-Id: I1d7bc34762a2a105487b2f9664cb3fa12a53eb00 --- doc/source/conf.py | 0 specs/index.rst | 40 ++ specs/pike/approved/cyborg-agent.rst | 166 ++++++ specs/pike/approved/cyborg-api-proposal.rst | 414 +++++++++++++++ specs/pike/approved/cyborg-conductor.rst | 143 ++++++ .../pike/approved/cyborg-driver-proposal.rst | 163 ++++++ .../approved/cyborg-fpga-driver-proposal.rst | 193 +++++++ .../approved/cyborg-fpga-model-proposal.rst | 346 +++++++++++++ specs/queens/approved/cyborg-internal-api.rst | 265 ++++++++++ .../approved/cyborg-nova-interaction.rst | 187 +++++++ .../approved/cyborg-spdk-driver-proposal.rst | 221 ++++++++ specs/rocky/approved/compute-node.rst | 413 +++++++++++++++ .../approved/cyborg-agent-driver-api.rst | 222 ++++++++ .../approved/cyborg-fpga-bitstream-spec.rst | 253 +++++++++ .../cyborg-fpga-programming-proposal.rst | 200 +++++++ specs/rocky/approved/cyborg-nova-sched.rst | 486 ++++++++++++++++++ specs/rocky/approved/resource-quotas.rst | 204 ++++++++ specs/template.rst | 6 +- specs/tests/__init__.py | 0 specs/tests/base.py | 23 - specs/tests/test_specs.py | 28 - 21 files changed, 3919 insertions(+), 54 deletions(-) mode change 100755 => 100644 doc/source/conf.py create mode 100644 specs/index.rst create mode 100644 specs/pike/approved/cyborg-agent.rst create mode 100644 specs/pike/approved/cyborg-api-proposal.rst create mode 100644 specs/pike/approved/cyborg-conductor.rst create mode 100644 specs/pike/approved/cyborg-driver-proposal.rst create mode 100644 specs/queens/approved/cyborg-fpga-driver-proposal.rst create mode 100644 specs/queens/approved/cyborg-fpga-model-proposal.rst create mode 100644 specs/queens/approved/cyborg-internal-api.rst create mode 100644 specs/queens/approved/cyborg-nova-interaction.rst create mode 100644 specs/queens/approved/cyborg-spdk-driver-proposal.rst create mode 100644 specs/rocky/approved/compute-node.rst create mode 100644 specs/rocky/approved/cyborg-agent-driver-api.rst create mode 100644 specs/rocky/approved/cyborg-fpga-bitstream-spec.rst create mode 100644 specs/rocky/approved/cyborg-fpga-programming-proposal.rst create mode 100644 specs/rocky/approved/cyborg-nova-sched.rst create mode 100644 specs/rocky/approved/resource-quotas.rst delete mode 100644 specs/tests/__init__.py delete mode 100644 specs/tests/base.py delete mode 100644 specs/tests/test_specs.py diff --git a/doc/source/conf.py b/doc/source/conf.py old mode 100755 new mode 100644 diff --git a/specs/index.rst b/specs/index.rst new file mode 100644 index 0000000..a96a3c8 --- /dev/null +++ b/specs/index.rst @@ -0,0 +1,40 @@ +Cyborg Specs +============ + +Template +-------- +.. toctree:: + :maxdepth: 1 + + template + +Rocky +----- +This section has a list of specs for the Rocky release. + +.. toctree:: + :maxdepth: 1 + :glob: + + rocky/approved/* + +Queens +------ +This section has a list of specs for the Queens release. + +.. toctree:: + :maxdepth: 1 + :glob: + + queens/approved/* + +Pike +---- +This section has a list of specs for the Pike release. + + +.. toctree:: + :maxdepth: 1 + :glob: + + pike/approved/* diff --git a/specs/pike/approved/cyborg-agent.rst b/specs/pike/approved/cyborg-agent.rst new file mode 100644 index 0000000..d08273a --- /dev/null +++ b/specs/pike/approved/cyborg-agent.rst @@ -0,0 +1,166 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/3.0/legalcode + +========================================== + Cyborg Agent Proposal +========================================== + +https://blueprints.launchpad.net/openstack-cyborg/+spec/cyborg-agent + +This spec proposes the responsibilities and initial design of the +Cyborg Agent. + +Problem description +=================== + +Cyborg requires an agent on the compute hosts to manage the several +responsibilities, including locating accelerators, monitoring their +status, and orchestrating driver operations. + +Use Cases +--------- + +Use of accelerators attached to virtual machine instances in OpenStack + +Proposed change +=============== + +Cyborg Agent resides on various compute hosts and monitors them for +accelerators. On it's first run Cyborg Agent will run the detect +accelerator functions of all it's installed drivers. The resulting list +of accelerators available on the host will be reported to the conductor +where it will be stored into the database and listed during API requests. +By default accelerators will be inserted into the database in a inactive +state. It will be up to the operators to manually set an accelerator to +'ready' at which point cyborg agent will be responsible for calling the +drivers install function and ensuring that the accelerator is ready for use. + +In order to mirror the current Nova model of using the placement API each Agent +will send updates on it's resources directly to the placement API endpoint +as well as to the conductor for usage aggregation. This should keep placement +API up to date on accelerators and their usage. + +Alternatives +------------ + +There are lots of alternate ways to lay out the communication between the Agent +and the API endpoint or the driver. Almost all of them involving exactly where +we draw the line between the driver, Conductor , and Agent. I've written my +proposal with the goal of having the Agent act mostly as a monitoring tool, +reporting to the cloud operator or other Cyborg components to take action. +A more active role for Cyborg Agent is possible but either requires significant +synchronization with the Conductor or potentially steps on the toes of +operators. + +Data model impact +----------------- + +Cyborg Agent will create new entries in the database for accelerators it +detects it will also update those entries with the current status of the +accelerator at a high level. More temporary data like the current usage of +a given accelerator will be broadcast via a message passing system and won't +be stored. + +Cyborg Agent will retain a local cache of this data with the goal of not losing +accelerator state on system interruption or loss of connection. + + +REST API impact +--------------- + +TODO once we firm up who's responsible for what. + +Security impact +--------------- + +Monitoring capability might be useful to an attacker, but without root +this is a fairly minor concern. + +Notifications impact +-------------------- + +Notifying users that their accelerators are ready? + +Other end user impact +--------------------- + +Interaction details around adding/removing/setting up accelerators +details TBD. + +Performance Impact +------------------ + +Agent heartbeat for updated accelerator performance stats might make +scaling to many accelerator hosts a challenge for the Cyborg endpoint +and database. Perhaps we should consider doing an active 'load census' +before scheduling instances? But that just moves the problem from constant +load to issues with a bootstorm. + + +Other deployer impact +--------------------- + +By not placing the drivers with the Agent we keep the deployment footprint +pretty small. We do add development complexity and security concerns sending +them over the wire though. + +Developer impact +---------------- + +TBD + +Implementation +============== + +Assignee(s) +----------- + +Primary assignee: + + +Other contributors: + + +Work Items +---------- + +* Agent implementation + +Dependencies +============ + +* Cyborg Driver Spec +* Cyborg API Spec +* Cyborg Conductor Spec + +Testing +======= + +CI infrastructure with a set of accelerators, drivers, and hardware will be +required for testing the Agent installation and operation regularly. + +Documentation Impact +==================== + +Little to none. Perhaps on an on compute config file that may need to be +documented. But I think it's best to avoid local configuration where possible. + +References +========== + +Other Cyborg Specs + +History +======= + + +.. list-table:: Revisions + :header-rows: 1 + + * - Release + - Description + * - Pike + - Introduced diff --git a/specs/pike/approved/cyborg-api-proposal.rst b/specs/pike/approved/cyborg-api-proposal.rst new file mode 100644 index 0000000..3d0397c --- /dev/null +++ b/specs/pike/approved/cyborg-api-proposal.rst @@ -0,0 +1,414 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/3.0/legalcode + +=================== +Cyborg API proposal +=================== + +https://blueprints.launchpad.net/openstack-cyborg/+spec/cyborg-api + +This spec proposes to provide the initial API design for Cyborg. + +Problem description +=================== + +Cyborg as a common management framework for dedicated devices (hardware/ +software accelerators, high-speed storage, etc) needs RESTful API to expose +the basic functionalities. + +Use Cases +--------- + +* As a user I want to be able to spawn VM with dedicated hardware, so + that I can utilize provided hardware. +* As a compute service I need to know how requested resource should be + attached to the VM. +* As a scheduler service I'd like to know on which resource provider + requested resource can be found. + +Proposed change +=============== + +In general we want to develop the APIs that support basic life cycle management +for Cyborg. + +Life Cycle Management Phases +---------------------------- + +For cyborg, LCM phases include typical create, retrieve, update, delete +operations. One thing should be noted that deprovisioning mainly refers to +detach(delete) operation which deactivate an acceleration capability but +preserve the resource itself for future usage. For Cyborg, from functional +point of view, the LCM includes provision, attach,update,list, and detach. +There is no notion of deprovisioning for Cyborg API in a sense that we +decomission or disconnect an entire accelerator device from the bus. + +Difference between Provision and Attach/Detach +---------------------------------------------- + +Noted that while the APIs support provisioning via CRUD operations, +attach/detach are considered different: + +* Provision operations (create) will involve api-> + conductor->agent->driver workflow, where as attach/detach (update/delete) + could be taken care of at the driver layer without the involvement of the + pre-mentioned workflow. This is similar to the difference between create a + volume and attach/detach a volume in Cinder. + +* The attach/detach in Cyborg API will mainly involved in DB status + modification. + +Difference between Attach/Detach To VM and Host +----------------------------------------------- + +Moreover there are also differences when we attach an accelerator to a VM or +a host, similar to Cinder. + +* When the attachment happens to a VM, we are expecting that Nova could call + the virt driver to perform the action for the instance. In this case Nova + needs to support the acc-attach and acc-detach action. + +* When the attachment happens to a host, we are expecting that Cyborg could + take care of the action itself via Cyborg driver. Althrough currently there + is the generic driver to accomplish the job, we should consider a os-brick + like standalone lib for accelerator attach/detach operations. + +Alternatives +------------ + +* For attaching an accelerator to a VM, we could let Cyborg perform the action + itself, however it runs into the risk of tight-coupling with Nova of which + Cyborg needs to get instance related information. +* For attaching an accelerator to a host, we could consider to use Ironic + drivers however it might not bode well with the standalone accelerator rack + scenarios where accelerators are not attached to server at all. + +Data model impact +----------------- + +A new table in the API database will be created:: + + CREATE TABLE accelerators ( + accelerator_id INT NOT NULL, + device_type STRING NOT NULL, + acc_type STRING NOT NULL, + acc_capability STRING NOT NULL, + vendor_id STRING, + product_id STRING, + remotable INT, + ); + +Note that there is an ongoing discussion on nested resource +provider new data structures that will impact Cyborg DB imp- +lementation. For code implementation it should be aligned +with resource provider db requirement as much as possible. + + +REST API impact +--------------- + +The API changes add resource endpoints to: + +* `GET` a list of all the accelerators +* `GET` a single accelerator for a given id +* `POST` create a new accelerator resource +* `PUT` an update to an existing accelerator spec +* `PUT` attach an accelerator to a VM or a host +* `DELETE` detach an existing accelerator for a given id + +The following new REST API call will be created: + +'GET /accelerators' +************************* + +Return a list of accelerators managed by Cyborg + +Example message body of the response to the GET operation:: + + 200 OK + Content-Type: application/json + + { + "accelerator":[ + { + "uuid":"8e45a2ea-5364-4b0d-a252-bf8becaa606e", + "acc_specs": + { + "remote":0, + "num":1, + "device_type":"CRYPTO" + "acc_capability": + { + "num":2 + "ipsec": + { + "aes": + { + "3des":50, + "num":1, + } + } + } + } + }, + { + "uuid":"eaaf1c04-ced2-40e4-89a2-87edded06d64", + "acc_specs": + { + "remote":0, + "num":1, + "device_type":"CRYPTO" + "acc_capability": + { + "num":2 + "ipsec": + { + "aes": + { + "3des":40, + "num":1, + } + } + } + } + } + ] + } + +'GET /accelerators/{uuid}' +************************** + +Retrieve a certain accelerator info indetified by '{uuid}' + +Example GET Request:: + + GET /accelerators/8e45a2ea-5364-4b0d-a252-bf8becaa606e + + 200 OK + Content-Type: application/json + + { + "uuid":"8e45a2ea-5364-4b0d-a252-bf8becaa606e", + "acc_specs":{ + "remote":0, + "num":1, + "device_type":"CRYPTO" + "acc_capability":{ + "num":2 + "ipsec":{ + "aes":{ + "3des":50, + "num":1, + } + } + } + } + } + +If the accelerator does not exist a `404 Not Found` must be +returned. + +'POST /accelerators/{uuid}' +*************************** + +Create a new accelerator + +Example POST Request:: + + Content-type: application/json + + { + "name": "IPSec Card", + "uuid": "8e45a2ea-5364-4b0d-a252-bf8becaa606e" + } + +The body of the request must match the following JSONSchema document:: + + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "uuid": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "name" + ] + "additionalProperties": False + } + +The response body is empty. The headers include a location header +pointing to the created accelerator resource:: + + 201 Created + Location: /accelerators/8e45a2ea-5364-4b0d-a252-bf8becaa606e + +A `409 Conflict` response code will be returned if another accelerator +exists with the provided name. + +'PUT /accelerators/{uuid}/{acc_spec}' +************************************* + +Update the spec for the accelerator identified by `{uuid}`. + +Example:: + + PUT /accelerator/8e45a2ea-5364-4b0d-a252-bf8becaa606e + + Content-type: application/json + + { + "acc_specs":{ + "remote":0, + "num":1, + "device_type":"CRYPTO" + "acc_capability":{ + "num":2 + "ipsec":{ + "aes":{ + "3des":50, + "num":1, + } + } + } + } + } + +The returned HTTP response code will be one of the following: + +* `200 OK` if the spec is successfully updated +* `404 Not Found` if the accelerator identified by `{uuid}` was + not found +* `400 Bad Request` for bad or invalid syntax +* `409 Conflict` if another process updated the same spec. + + +'PUT /accelerators/{uuid}' +************************** + +Attach the accelerator identified by `{uuid}`. + +Example:: + + PUT /accelerator/8e45a2ea-5364-4b0d-a252-bf8becaa606e + + Content-type: application/json + + { + "name": "IPSec Card", + "uuid": "8e45a2ea-5364-4b0d-a252-bf8becaa606e" + } + +The returned HTTP response code will be one of the following: + +* `200 OK` if the accelerator is successfully attached +* `404 Not Found` if the accelerator identified by `{uuid}` was + not found +* `400 Bad Request` for bad or invalid syntax +* `409 Conflict` if another process attach the same accelerator. + + +'DELETE /accelerator/{uuid}' +**************************** + +Detach the accelerator identified by `{uuid}`. + +The body of the request and the response is empty. + +The returned HTTP response code will be one of the following: + +* `204 No Content` if the request was successful and the accelerator was + detached. +* `404 Not Found` if the accelerator identified by `{uuid}` was + not found. +* `409 Conflict` if there exist allocations records for any of the + accelerator resource that would be detached as a result of detaching + the accelerator. + + +Security impact +--------------- + +None + +Notifications impact +-------------------- + +None + +Other end user impact +--------------------- + +None + +Performance Impact +------------------ + +None + +Other deployer impact +--------------------- + +None + +Developer impact +---------------- + +Developers can use this REST API after it has been implemented. + +Implementation +============== + +Assignee(s) +----------- + +Primary assignee: + zhipengh + +Work Items +---------- + +* Implement the APIs specified in this spec +* Proposal to Nova about the new accelerator + attach/detach api +* Implement the DB specified in this spec + + +Dependencies +============ + +None. + +Testing +======= + +* Unit tests will be added to Cyborg API. + +Documentation Impact +==================== + +None + +References +========== + +None + +History +======= + + +.. list-table:: Revisions + :header-rows: 1 + + * - Release + - Description + * - Pike + - Introduced diff --git a/specs/pike/approved/cyborg-conductor.rst b/specs/pike/approved/cyborg-conductor.rst new file mode 100644 index 0000000..d250964 --- /dev/null +++ b/specs/pike/approved/cyborg-conductor.rst @@ -0,0 +1,143 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/3.0/legalcode + +========================================== + Cyborg Conductor Proposal +========================================== + +https://blueprints.launchpad.net/openstack-cyborg/+spec/cyborg-agent + +This spec proposes the responsibilities and initial design of the +Cyborg Conductor. + +Problem description +=================== + +Cyborg requires a conductor on the controller hosts to manage the cyborg +system state and coalesce database operations. + +Use Cases +--------- + +Use of accelerators attached to virtual machine instances in OpenStack + +Proposed change +=============== + +Cyborg Conductor will reside on the control node and will be +responsible for stateful actions taken by Cyborg. Acting as both a cache to +the database and as a method of combining reads and writes to the database. +All other Cyborg components will go through the conductor for database +operations. + +Alternatives +------------ + +Having each Cyborg Agent instance hit the database on it's own is a possible +alternative, and it may even be feasible if the accelerator load monitoring +rate is very low and the vast majority of operations are reads. But since we +intend to store metadata about accelerator usage updated regularly this model +probably will not scale well. + +Data model impact +----------------- + +Using the conductor 'properly' will result in little or no per instance state +and stateful operations moving through the conductor with the exception of +some local caching where it can be garunteed to work well. + +REST API impact +--------------- + +N/A + +Security impact +--------------- + +Negligible + +Notifications impact +-------------------- + +N/A + +Other end user impact +--------------------- + +Faster Cybrog operation and less database load. + +Performance Impact +------------------ + +Generally positive so long as we don't overload the messaging bus trying +to pass things to the Conductor to write out. + +Other deployer impact +--------------------- + +Conductor must be installed and configured on the controllers. + + +Developer impact +---------------- + +None for API users, internally heavy use of message passing will +be required if we want to keep all system state in the controllers. + + +Implementation +============== + +Assignee(s) +----------- + +Primary assignee: + jkilpatr + +Other contributors: + None + +Work Items +---------- + +* Implementation +* Integration with API and Agent + +Dependencies +============ + +* Cyborg API spec +* Cyborg Agent spec + +Testing +======= + +This component should be possible to fully test using unit tests and functional +CI using the dummy driver. + +Documentation Impact +==================== + +Some configuration values tuning save out rate and other parameters on the +controller will need to be documented for end users + +References +========== + +Cyborg API Spec +Cyborg Agent Spec + +History +======= + + +.. list-table:: Revisions + :header-rows: 1 + + * - Release + - Description + * - Pike + - Introduced diff --git a/specs/pike/approved/cyborg-driver-proposal.rst b/specs/pike/approved/cyborg-driver-proposal.rst new file mode 100644 index 0000000..63c1759 --- /dev/null +++ b/specs/pike/approved/cyborg-driver-proposal.rst @@ -0,0 +1,163 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/3.0/legalcode + +============================== +Cyborg Generic Driver Proposal +============================== + +https://blueprints.launchpad.net/openstack-cyborg/+spec/generic-driver-cyborg + +This spec proposes to provide the initial design for Cyborg's generic driver. + +Problem description +=================== + +This blueprint proposes to add a generic driver for openstack-cyborg. +The goal is to provide users & operators with a reliable generic +implementation that is hardware agnostic and provides basic +accelerator functionality. + +Use Cases +--------- + +* As an admin user and a non-admin user with elevated privileges, I should be + able to identify and discover attached accelerator backends. +* As an admin user and a non-admin user with elevated privileges, I should be + able to view services on each attached backend after the agent has + discovered services on each backend. +* As an admin user and a non-admin user, I should be able to list and update + attached accelerators by driver by querying nova with the Cyborg-API. +* As an admin user and a non-admin user with elevated privileges, I should be + able to install accelerator generic driver. +* As an admin user and a non-admin user with elevated privileges, I should be + able to uninstall accelerator generic driver. +* As an admin user and a non-admin user with elevated privileges, I should be + able to issue attach command to the instance via the driver which gets + routed to Nova via the Cyborg API. +* As an admin user and a non-admin user with elevated privileges, I should be + able to issue detach command to the instance via the driver which gets + routed to Nova via the Cyborg API. + +Proposed change +=============== + +* Cyborg needs a reference implementation that can be used as a model for + future driver implementations and that will be referred to as the generic + driver implementation +* Develop the generic driver implementation that supports CRUD operations for + accelerators for single backend and multi backend setup scenarios. + + +Alternatives +------------ + +None + +Data model impact +----------------- + +* The generic driver will update the central database when any CRUD or + attach/detach operations take place + +REST API impact +--------------- + +This blueprint proposes to add the following APIs: + +* cyborg install-driver +* cyborg uninstall-driver +* cyborg attach-instance +* cyborg detach-instance +* cyborg service-list +* cyborg driver-list +* cyborg update-driver +* cyborg discover-services + +Security impact +--------------- + +None + +Notifications impact +-------------------- + +None + +Other end user impact +--------------------- + +None + +Performance Impact +------------------ + +None + +Other deployer impact +--------------------- + +None + +Developer impact +---------------- + +Developers will have access to a reference generic implementation which +can be used to build vendor-specific drivers. + +Implementation +============== + +Assignee(s) +----------- + +Primary assignee: + Rushil Chugh + +Work Items +---------- + +This change would entail the following: + +* Add a feature to identify and discover attached accelerator backends. +* Add a feature to list services running on the backend +* Add a feature to attach accelerators to the generic backend. +* Add a feature to detach accelerators from the generic backend. +* Add a feature to list accelerators attached to the generic backend. +* Add a feature to modify accelerators attached to the generic backend. +* Defining a reference implementation detailing the flow of requests between + the cyborg-api, cyborg-conductor and nova-compute services. + +Dependencies +============ + +Dependent on Cyborg API and Agent implementations. + +Testing +======= + +* Unit tests will be added test Cyborg generic driver. + +Documentation Impact +==================== + +None + +References +========== + +None + +History +======= + + +.. list-table:: Revisions + :header-rows: 1 + + * - Release + - Description + * - Pike + - Introduced diff --git a/specs/queens/approved/cyborg-fpga-driver-proposal.rst b/specs/queens/approved/cyborg-fpga-driver-proposal.rst new file mode 100644 index 0000000..26d8c6b --- /dev/null +++ b/specs/queens/approved/cyborg-fpga-driver-proposal.rst @@ -0,0 +1,193 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/3.0/legalcode + +=========================== +Cyborg FPGA Driver Proposal +=========================== + +https://blueprints.launchpad.net/openstack-cyborg/+spec/cyborg-fpga-driver + +This spec proposes to provide the initial design for Cyborg's FPGA driver. + +Problem description +=================== + +A Field Programmable Gate Array(FPGA) is an integrated circuit designed to be +configured by a customer or a designer after manufacturing. The advantage lies +in that they are sometimes significantly faster for some applications because +of their parallel nature and optimality in terms of the number of gates used +for a certain process. Hence, using FPGA for application acceleration in cloud +has been becoming desirable. + +There is a management framwork in Cyborg [1]_ for heterogeneous accelerators, +tracking and deploying FPGAs. This spec will add a FPGA driver for Cyborg to +manage specific FPGA devices. + +Use Cases +--------- + +* When Cyborg agent starts or does resource checking periodically, the Cyborg + FPGA driver should enumerate the list of the FPGA devices, and report the + details of all available FPGA accelerators on the host, such as BDF(Bus, + Device, Function), PID(Product id) VID(Vendor id), IMAGE_ID and PF(Physical + Function)/VF(Virtual Function) type. + +* When user uses empty FPGA regions as their accelerators, Cyborg agent will + call driver's program() interface. Cyborg agent should provide BDF + of PF/VF, and local image path to the driver. More details can be found in + ref [2]_. + +* When there maybe more thant one vendor fpga card on a host, or on different + hosts in the cluster, Cyborg agent can discover the wendors easiy and + intelligently by Cyborg FPGA driver, and call the correct driver to execute + it's operations, such as discover() and program(). + + +Proposed changes +================ + +In general, the goal is to develop a Cyborg FPGA driver that supports +discover/program interfaces for FPGA accelerator framework. + +The driver should include the follow functions: +1. discover() +driver reports devices as following:: + + [{ + "vendor": "0x8086", + "product": "bcc0", + "pr_num": 1, + "devices": "0000:be:00:0", + "path": "/sys/class/fpga/intel-fpga-dev.0", + "regions": [ + {"vendor": "0x8086", + "product": "bcc1", + "regions": 1, + "devices": "0000:be:00:1", + "path": "/sys/class/fpga/intel-fpga-dev.1" + }] + }] + + pr_num: partial reconfiguration region numbers. + +2. program(device_path, image) + program the image to a PR region specified by device_path. + device_path: the sys path of accelerator device. + image: The local path of programming image. + +Image Format +---------------------------- + +Alternatives +------------ + +None + +Data model impact +----------------- + +FPGA driver will not touch Data model. +The Cyborg Agent can call FPGA driver to update the database +during the discover/program operations. + +REST API impact +--------------- + +The related FPGA accelerator APIs is out of scope for this spec. +The FPGA management framework for Cyborg [1]_ will alter the proposal. + +Security impact +--------------- + +None + +Notifications impact +-------------------- + +None + +Other end user impact +--------------------- + +None + +Performance Impact +------------------ + +None + +Other deployer impact +--------------------- + +Deployers should install the specific FPGA management stack that the driver +depends on. + +Please see ref [2]_ for details. + +Developer impact +---------------- + +There will be some developer impact vis-à-vis new functionality that +will be available to devs. + +Implementation +============== + +Assignee(s) +----------- + +Primary assignee: + Shaohe Feng + Dolpher Du + +Work Items +---------- + +* Implement the cyborg-fpga-driver in this spec. + +Dependencies +============ + +* Cyborg API Spec +* Cyborg Agent Spec +* Cyborg Driver Spec +* Cyborg Conductor Spec + +Testing +======== + +* Unit tests will be added to test Cyborg FPGA driver. +* Functional tests will be added to test Cyborg FPGA driver. + +Documentation Impact +==================== + +Document FPGA driver in the Cyborg project + +References +========== + +* Cyborg API Spec +* Cyborg Agent Spec +* Cyborg Driver Spec +* Cyborg Conductor Spec + + +History +======= + +.. list-table:: Revisions + :header-rows: 1 + + * - Release + - Description + * - Queens + - Introduced + +References +========== +.. [1] https://blueprints.launchpad.net/openstack-cyborg/+spec/cyborg-fpga-modelling +.. [2] https://01.org/OPAE diff --git a/specs/queens/approved/cyborg-fpga-model-proposal.rst b/specs/queens/approved/cyborg-fpga-model-proposal.rst new file mode 100644 index 0000000..8184add --- /dev/null +++ b/specs/queens/approved/cyborg-fpga-model-proposal.rst @@ -0,0 +1,346 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/3.0/legalcode + +========================================== + Cyborg FPGA Model Proposal +========================================== + +Blueprint url is not available yet +https://blueprints.launchpad.net/openstack-cyborg/+spec/cyborg-fpga-modelling + +This spec proposes the DB modelling schema for tracking reprogrammable +resources + +Problem description +=================== + +A field-programmable gate array (FPGA) is an integrated circuit designed to be +configured by a customer or a designer after manufacturing. Their advantage +lies in that they are sometimes significantly faster for some applications +because of their parallel nature and optimality in terms of the number of gates +used for a certain process. Hence, using FPGA for application acceleration in +cloud has been becoming desirable. Cyborg as a management framwork for +heterogeneous accelerators, tracking and deploying FPGAs are much needed +features. + + +Use Cases +--------- + +When user requests FPGA resources, scheduler will use placement agent [1]_ to +select appropriate hosts that have the requested FPGA resources. + +When a FPGA type resource is allocated to a VM, Cyborg needs to track down +which exact device has been assigned in the database. On the other hand, when +the resource is released, Cyborg will need to be detached and free the exact +resource. + +When a new device is plugged in to the system(host), Cyborg needs to discover +it and store it into the database + +Proposed change +=============== + +We need to add 2 more tables to Cyborg database, one for tracking all the +deployables and one for arbitrary key-value pairs of deplyable associated +attirbutes. These tables are named as Deployables and Attributes. + +Deployables table consists of all the common attributes columns as well as +a parent_id and a root_id. The parent_id will point to the associated parent +deployable and the root_id will point to the associated root deployable. +By doing this, we can form a nested tree structure to represent different +hierarchies. In addition, there will a foreign key named accelerator_id +reference to the accelerators table. For the case where FPGA has not been +loaded any bitstreams on it, they will still be tracked as a Deployable but +no other Deployables referencing to it. For instance, a network of +FPGA hierarchies can be formed using deployables in following scheme:: + + ------------------- + ------------------->|Deployable - FPGA|<-------------------- + | ------------------- | + | /\ | + | root_id / \ parent_id/root_id | + | / \ | + | ----------------- ----------------- | + | |Deployable - PF| |Deployable - PF| | + | ----------------- ----------------- | + | /\ | + | / \ parent_id root_id | + | / \ | + ----------------- ----------------- | + |Deployable - VF| |Deployable - VF| ----------------------- + ----------------- ----------------- + + +Attributes table consists of a key and a value columns to represent arbitrary +k-v pairs. + +For instance, bitstream_id and function kpi can be tracked in this table. +In addition, a foreign key deployable_id refers to the Deployables table and +a parent_attribute_id to form nested structured attribute relationships. + +Cyborg needs to have object classes to represent different types of +deployables(e.g. FPGA, Physical Functions, Virtual Functions etc). + +Cyborg Agent needs to add feature to discover the FPGA resources from FPGA +driver and report them to the Cyborg DB through the conductor. + +Conductor needs to add couple of sets of APIs for different types of deployable +resources. + +Alternatives +------------ + +Alternativly, instead of having a flat table to track arbitrary hierarchies, we +can use two different tables in Cyborg database, one for physical functions and +one for virtual functions. physical_functions should have a foreign key +constraint to reference the id in Accelerators table. In addition, +virtual_functions should have a foreign key constraint to reference the id +in physical_functions. + +The problems with this design are as follows. First, it can only track up to +3 hierarchies of resources. In case we need to add another layer, a lot of +migaration work will be required. Second, even if we only need to add some new +attribute to the existing resource type, we need to create new migration +scripts for them. Overall the maintenance work is tedious. + +Data model impact +----------------- +As discussed in previous sections, two tables will be added: Deployables and +Attributes:: + + + CREATE TABLE Deployables + ( + id INTEGER NOT NULL , /*Primary Key*/ + parent_id INTEGER , /*Pointer to the parent deployable's primary key*/ + root_id INTEGER , /*Pointer to the root deployable's primary key*/ + name VARCHAR2 (32 BYTE) , /*Name of the deployable*/ + pcie_address VARCHAR2 (32 BYTE) , /*pcie address which can be used for passthrough*/ + uuid VARCHAR2 (32 BYTE) , /*uuid v4 format for the deployable itself*/ + node_id VARCHAR2 (32 BYTE) , /*uuid v4 format to identify which host this deployable is located*/ + board VARCHAR2 (16 BYTE) , /*Identify the model of the deployable(e.g. KU115)*/ + vendor VARCHAR2 (16 BYTE) , /*Identify the vendor of the deployable(e.g. Xilinx)*/ + version VARCHAR2 (32 BYTE) , /*Identify the version of the deployable(e.g. 1.2a)*/ + type VARCHAR2 (32) , /*Identify the type of the deployable(e.g. FPGA/PF/VF)*/ + assignable CHAR (1) , /*Represent if the deployable can be assigned to users*/ + instance_id VARCHAR2 (32 BYTE) , /*Represent which instance this deployable has been assigned to*/ + availability INTEGER NOT NULL, /*enum type to represent the status of the deployable(e.g. acclocated/claimed)*/ + accelerator_id INTEGER NOT NULL /*foreign key references to the accelerator table*/ + ) ; + ALTER TABLE Deployables ADD CONSTRAINT Deployables_PK PRIMARY KEY ( id ) ; + ALTER TABLE Deployables ADD CONSTRAINT Deployables_accelerators_FK FOREIGN KEY ( accelerator_id ) REFERENCES accelerators ( id ) ; + + + CREATE TABLE Attributes + ( + id INTEGER NOT NULL , /*Primary Key*/ + deployable_id INTEGER NOT NULL , /*foreign key references to the Deployables table*/ + KEY CLOB , /*Attribute Key*/ + value CLOB , /*Attribute Value*/ + parent_attribute_id INTEGER /*Pointer to the parent attribute's primary key*/ + ) ; + ALTER TABLE Attributes ADD CONSTRAINT Attributes_PK PRIMARY KEY ( id ) ; + ALTER TABLE Attributes ADD CONSTRAINT Attributes_Deployables_FK FOREIGN KEY ( deployable_id ) REFERENCES Deployables ( id ) ON + DELETE CASCADE ; + + +RPC API impact +--------------- +Two sets of conductor APIs need to be added. 1 set for physical functions, +1 set for virtual functions + +Physical function apis:: + + def physical_function_create(context, values) + def physical_function_get_all_by_filters(context, filters, sort_key='created_at', sort_dir='desc', limit=None, marker=None, columns_to_join=None) + def physical_function_update(context, uuid, values, expected=None) + def physical_function_destroy(context, uuid) + +Virtual function apis:: + + def virtual_function_create(context, values) + def virtual_function_get_all_by_filters(context, filters, sort_key='created_at', sort_dir='desc', limit=None, marker=None, columns_to_join=None) + def virtual_function_update(context, uuid, values, expected=None) + def virtual_function_destroy(context, uuid) + +REST API impact +--------------- +Since these tables are not exposed to users for modifying/adding/deleting, +Cyborg will only add two extra REST APIs to allow user query information +related to deployables and their attributes. + +API for retrieving Deployable's information:: + + Url: {base_url}/accelerators/deployable/{uuid} + Method: GET + URL Params: + GET: uuid --> get deplyable by uuid + + Data Params: + None + + Success Response: + GET: + Code: 200 + Content: { deployable: {id : 12, parent_id: 11, root_id: 10, ....}} + + Error Response + Code: 401 UNAUTHORIZED + Content: { error : "Log in" } + OR + Code: 422 Unprocessable Entry + Content: { error : "deployable uuid invalid" } + + Sample Call: + To get the deployable with uuid=2864a139-c2cd-4f9f-abf3-44eb3f09b83c + $.ajax({ + url: "/accelerators/deployable/2864a139-c2cd-4f9f-abf3-44eb3f09b83c", + dataType: "json", + type : "get", + success : function(r) { + console.log(r); + } + }); + +API for retrieving list of Deployables with filters/attirbutes:: + + Url: {base_url}/accelerators/deployable + Method: GET + URL Params: + None + + Data Params: + k-v pairs for filtering + + Success Response: + GET: + Code: 200 + Content: { deployables: [{id : 12, parent_id: 11, root_id: 10, ....}]} + + Error Response + Code: 401 UNAUTHORIZED + Content: { error : "Log in" } + OR + Code: 422 Unprocessable Entry + Content: { error : "deployable uuid invalid" } + + Sample Call: + To get a list of FPGAs with no bitstream loaded. + $.ajax({ + url: "/accelerators/deployable", + data: { + "bitstream_id": None, + "type": "FPGA" + }, + dataType: "json", + type : "get", + success : function(r) { + console.log(r); + } + }); + +API for retrieving Deployable attributes' information:: + + Url: {base_url}/accelerators/deployable/{uuid}/attribute/{key} + Method: GET + URL Params: + GET: uuid --> uuid for the associated deployable + key --> key for the associated deployable + + Data Params: + None + + Success Response: + GET: + Code: 200 + Content: { attribute: {key : value}} + + Error Response + Code: 401 UNAUTHORIZED + Content: { error : "Log in" } + OR + Code: 422 Unprocessable Entry + Content: { error : "attirbute key invalid" } + + Sample Call: + To get the value of key=kpi for deployable with id=2864a139-c2cd-4f9f-abf3-44eb3f09b83c + $.ajax({ + url: "/accelerators/deployable/2864a139-c2cd-4f9f-abf3-44eb3f09b83c/attribute/kpi", + dataType: "json", + type : "get", + success : function(r) { + console.log(r); + } + }); + +Security impact +--------------- +None + +Notifications impact +-------------------- +None + +Other end user impact +--------------------- +None + +Performance Impact +------------------ +None + +Other deployer impact +--------------------- +None + +Developer impact +---------------- + +There will be new functionalities available to the dev because of this work. + + +Implementation +============== + +Assignee(s) +----------- +Primary assignee: + Li Liu + +Work Items +---------- +* Create migration scripts to add two more tables to the database +* Create models in sqlalchemy as well as related conductor APIs +* Create corespoinding objects +* Create Conductor APIs to allow resourece reporting + + +Dependencies +============ + +Testing +======= +* Unit tests will be added test Cyborg generic driver. + +Documentation Impact +==================== +Document FPGA Modelling in the Cyborg project + +References +========== +.. [1] https://docs.openstack.org/nova/latest/user/placement.html + +History +======= + +.. list-table:: Revisions + :header-rows: 1 + + * - Release + - Description + * - Queens + - Introduced diff --git a/specs/queens/approved/cyborg-internal-api.rst b/specs/queens/approved/cyborg-internal-api.rst new file mode 100644 index 0000000..410087d --- /dev/null +++ b/specs/queens/approved/cyborg-internal-api.rst @@ -0,0 +1,265 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/3.0/legalcode + +========================================== + Cyborg Internal API spec +========================================== + +This document loosely specifies the API calls between +the components of Cyborg. Driver, Agent, Conductor, and API endpoint. + +These API's are internal and therefore may change from version to version +without warning or backwards compatibility. This document is kept as a +developer reference to be edited before any internally braking changes +are made. + +Problem description +=================== + +Developers writing one component of Cyborg need to know how to talk to another +component of Cyborg, hopefully without having to go spelunking in the code +of that component. + + +Use Cases +--------- + +Happier Cyborg developers + +Proposed change +=============== + +Versioning internal API's + +Alternatives +------------ + +A mess + +Data model impact +----------------- + +A fixed internal API should help keep data models consistent. + +REST API impact +--------------- + +The API changes add resource endpoints to: + +Driver: + +* `POST` start accelerator discovery FROM: Agent +* `GET` get a list of discovered accelerators and their properties FROM: Agent + +Agent: + +* `POST` register driver FROM: Driver +* `POST` start accelerator discovery across all drivers FROM: Conductor +* `GET` get a list of all accelerators across all drivers FROM: Conductor + +Conductor: +* `POST` register agent FROM: Agent + + +The following new REST API call will be created: + +Driver 'POST /discovery' +*************************** + +Trigger the discovery and setup process for a specific driver + +.. code-block:: ini + + Content-Type: application/json + + { + "status":"IN-PROGRESS" + } + +Driver 'GET /hardware' +************************** + +Gets a list of hardware, not accelerators, accelerators are +ready to use entires available by the public API. Hardware are +physical devices on nodes that may or may not be ready to use or +even fully supported. + +.. code-block:: ini + + 200 OK + Content-Type: application/json + + { + "hardware":[ + { + "uuid":"8e45a2ea-5364-4b0d-a252-bf8becaa606e", + "acc_specs": + { + "remote":0, + "num":1, + "device_type":"CRYPTO" + "acc_capability": + { + "num":2 + "ipsec": + { + "aes": + { + "3des":50, + "num":1, + } + } + } + } + "acc_status": + { + "setup_required":true, + "reboot_equired":false + } + }] + } + + +Driver 'POST /hello' +*************************** + +Registers that a driver has been installed on the machine and is ready to use. +As well as it's endpoint and hardware support. + +.. code-block:: ini + + Content-Type: application/json + + { + "status":"READY", + "endpoint":"localhost:1337", + "type":"CRYPTO" + } + +Agent 'POST /discovery' +*************************** + +Trigger the discovery and setup process for all registered drivers + +See driver example + + +Agent 'GET /hardware' +*************************** + +Get list of hardware across all drivers on the node + +see driver example + + +Conductor 'POST /hello' +*************************** + +Registers that an Agent has been installed on the machine and is ready to use. + +.. code-block:: ini + + Content-Type: application/json + + { + "status":"READY", + "endpoint":"compute-whatever:1337", + } + + +Security impact +--------------- + +Care must be taken to secure the internal endpoints from malicious calls + + +Notifications impact +-------------------- + +N/A + +Other end user impact +--------------------- + +This change might have an impact on python-cyborgclient + +Performance Impact +------------------ + +In this model the Agent takes care of wrangling however many drivers are on +a compute and the Conductor takes care of wrangling all the agents to present +a coherent answer to the API quickly and easily. I don't include +API <-> Conductor calls yet because I assume the API will be for the most part +working from the database while the Conductor tries to keep that database up to +date and takes the occasional setup call. + + +Other deployer impact +--------------------- + +In this model we won't really know when we're missing an agent. If one has +reported in previously and then goes away we can have an alarm for that. But +if an agent never reports in we just have to assume no instance exists by that +name. This means making sure the Cyborg Drivers/Agent's are installed and +running is the responsibility of the deployment tool. + +Developer impact +---------------- + +More internal communication in Cyborg + +Implementation +============== + +Assignee(s) +----------- + + +Primary assignee: + jkilpatr + +Other contributors: + zhuli + +Work Items +---------- + +N/A + + +Dependencies +============ + +N/A + + +Testing +======= + +N/A + + +Documentation Impact +==================== + +N/A + +References +========== + +N/A + + +History +======= + +.. list-table:: Revisions + :header-rows: 1 + + * - Release Name + - Description + * - Queens + - Introduced diff --git a/specs/queens/approved/cyborg-nova-interaction.rst b/specs/queens/approved/cyborg-nova-interaction.rst new file mode 100644 index 0000000..a67af0f --- /dev/null +++ b/specs/queens/approved/cyborg-nova-interaction.rst @@ -0,0 +1,187 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/3.0/legalcode + +======================= +Cyborg-Nova interaction +======================= + +https://blueprints.launchpad.net/cyborg/+spec/cyborg-nova-interaction + +Cyborg, as a service for managing accelerators of any kind needs to cooperate +with Nova on two planes: Cyborg should be able to inform Nova about the +resources through placement API[1], so that scheduler can leverage user +requests for particular functionality into assignment of specific resource +using resource provider which possess an accelerator, and second, Cyborg should +be able to provide information on how Nova compute can attach particular +resource to VM. + +In a nutshell, this blueprint will define how information between Nova and +Cyborg will be exchanged. + +Problem description +=================== + +Currently in OpenStack the use of non-standard accelerator hardware is +supported in that features exist across many of the core servers that allow +these resources to be allocated, passed through, and eventually used. + +What remains a challenge though is the lack of an integrated workflow; there +is no way to configure many of the accelerator features without significant +by hand effort and service disruptions that go against the goals of having +a easy, stable, and flexible cloud. + +Cyborg exists to bring these disjoint efforts together into a more standard +workflow. While many components of this workflow already exist, some don't +and will need to be written expressly for this goal. + +Use Cases +--------- + +All possible use cases were briefly described in backlog Nova spec [2]. It +might be distinguished two main use case groups for which accelerators might be +used: + +* Accelerator might be attached to the VM, where workload demands acceleration. + That can be achieved by passing whole PCI device, certain host device from + ``/dev/`` filesystem, passing Virtual Function, etc. +* Accelerator might be utilized by infrastructure, like accelerating virtual + switches (i.e. Open vSwitch), and than utilized via appropriate service (like + Neutron for example). + + +Proposed Workflow +================= + +Using a method not relevant to this proposal Cyborg Agent inspects hardware +and finds accelerators that it is interested in setting up for use. + +These accelerators are registered into the Cyborg Database and the Cyborg +Conductor is now responsible for using the Nova placement API to create +corresponding traits and resources. + +One of the primary responsibilities of the Cyborg conductor is to keep the +placement API in sync with reality. For example if here is a device with +a virtual function or a FPGA with a given program Cyborg may be tasked with +changing the virtual function on the NIC or the program on the FPGA. At which +point the previously specified traits and resources need to be updated. +Likewise Cyborg will be watching monitoring Nova's instances to ensure that +doing this doesn't pull resources out from under an allocated instance. + +At a high level what we need to be able to do is the following + +1. Add a PCI device to Nova's whitelist live + (config only / needs implementation) +2. Add information about this device to the placement API + (existing / being worked) +3. Hotplug and unplug PCI devices from instances + (existing / not sure how well maintained) + + +Alternatives +------------ + +Don't use Cyborg, struggle with bouncing services and grub config changes +yourself. + +Data model impact +----------------- + +N/A + +REST API impact +--------------- + +N/A + +Security impact +--------------- + +N/A + +Notifications impact +-------------------- + +N/A + +Other end user impact +--------------------- + +N/A + +Performance Impact +------------------ + +N/A + +Other deployer impact +--------------------- + +N/A + +Developer impact +---------------- + +N/A + +Implementation +============== + +Assignee(s) +----------- + +Primary assignee: + None + +Work Items +---------- + +* Implementation of Cyborg service +* Implementation of Cyborg agent +* Blueprint for changes in Nova +* Implementation of the POC which exposes functionality and interoperability + between Cyborg and Nova + +Dependencies +============ + +This design depends on the changes which may or may not be accepted in Nova +project. Other than that is ongoing work on Nested resource providers: +http://specs.openstack.org/openstack/nova-specs/specs/ocata/approved/nested-resource-providers.html +Which would be an essential feature in Placement API, which will be leveraged +by Cyborg. + + +Testing +======= + +There would be a need to provide another gate, which would provide an +accelerator for tests. + +Documentation Impact +==================== + +* Document new nova api for whitelisting +* Document developer and user interaction with the workflow +* Document placement api standard identifiers + +References +========== + +* [1] https://docs.openstack.org/developer/nova/placement.html +* [2] https://review.openstack.org/#/c/318047/ +* [3] https://github.com/openstack/nova/blob/390c7e420f3880a352c3934b9331774f7afdadcc/nova/compute/resource_tracker.py#L751 + + +History +======= + +.. list-table:: Revisions + :header-rows: 1 + + * - Release Name + - Description + * - Queens + - Introduced diff --git a/specs/queens/approved/cyborg-spdk-driver-proposal.rst b/specs/queens/approved/cyborg-spdk-driver-proposal.rst new file mode 100644 index 0000000..b7a1879 --- /dev/null +++ b/specs/queens/approved/cyborg-spdk-driver-proposal.rst @@ -0,0 +1,221 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/3.0/legalcode + +=========================== +Cyborg SPDK Driver Proposal +=========================== + +https://blueprints.launchpad.net/openstack-cyborg/+spec/cyborg-spdk-driver + +This spec proposes to provide the initial design for Cyborg's SPDK driver. + +Problem description +=================== + +SPDK is a high performance kit and provides a user space, polled-mode, +asynchronous, lockless NVMe driver for storage acceleration on the +backend. Our goal is to add a SPDK driver for Cyborg to manage SPDK, +and further improve storage performance. + +Use Cases +--------- + +* When Cinder uses Ceph as its backend, the user should be able to + use the Cyborg SPDK driver to discover the SPDK accelerator backend, + enumerate the list of the Ceph nodes that have installed the SPDK. +* When Cinder directly uses SPDK's BlobStore as its backend, the user + should be able to accomplish the same life cycle management operations + for SPDK as mentioned above. After enumerating the SPDK, the user can + attach (install) SPDK on that node. When the task completes, the user + can also detach the SPDK from the node. Last but not least the user + should be able to update the latest and available SPDK. + +Proposed change +=============== + +In general, the goal is to develop the Cyborg SPDK driver that supports +discover/list/update/attach/detach operations for SPDK framework. + +SPDK framework +-------------- + +The SPDK framework comprises of the following components:: + + +-----------userspace--------+ +--------------+ + | +------+ +------+ +------+ | | +-----------+ | + +---+ | |DPDK | |NVMe | |NVMe | | | | Ceph | | + | N +-+-+NIC | |Target| |Driver+-+-+ |NVMe Device| | + | I | | |Driver| | | | | | | +-----------+ | + | C | | +------+ +------+ +------+ | | +-----------+ | + +---+ | +------------------------+ | | | Blobstore | | + | | DPDK Libraries | | | |NVMe Device| | + | +------------------------+ | | +-----------+ | + +----------------------------+ +---------------+ + +BlobStore NVMe Device Format +---------------------------- + +BlobStore owns the entire NVMe device including metadata management +and data management, which defines three basic units of disk space (like +logical block, page, cluster). The NVMe device is divided into clusters +starting from the first logical block. + +LBA 0 LBA N ++-----------+-----------+-----+-----------+ +| Cluster 0 | Cluster 1 | ... | Cluster N | ++-----------+-----------+-----+-----------+ + +Cluster0 has special format which consists of pages. Page0 is the +first page of Cluster0. Super Block contains the basic information of +BlobStore. + ++--------+-------------------+ +| Page 0 | Page 1 ... Page N | ++--------+-------------------+ +| Super | Metadata Region | +| Block | | ++--------+-------------------+ + +Each blob is allocated a non-contiguous set of pages. These pages form +a linked list. +In general, the BlobStore adopts direct operation of bare metal device and +avoids the filesystem, which improves efficiency. + +Life Cycle Management Phases +---------------------------- +* We should be able to add a judgement whether the backend node has SPDK kit + in generic driver module. If true, initialize the DPDK environment (such as + hugepage). +* Import the generic driver module, and then we should be able to + discover (probe) the system for SPDK. +* Determined by the backend storage scenario, enumerate (list) the optimal + SPDK node, returning a boolean value to judge whether the SPDK should be + attached. +* After the node where SPDK will be running is attached, we can now send a + request about the information of namespaces, and then create an I/O queue + pair to submit read/write requests to a namespace. +* When Ceph is used as the backend, as the latest Ceph (such as Luminous) + uses the BlueStore to be the storage engine, BlueStore and BlobStore are + very similar things. We will not be able to use BlobStore to accelerate + Ceph, but we can use Ioat and poller to boost speed for storage. +* When SPDK is used as the backend, we should be able to use BlobStore to + improve performance. +* Whenever user requests, we should be able to detach the SPDK device. +* Whenever user requests, we should be able to update SPDK to the latest and + stable release. + +Alternatives +------------ + +None + +Data model impact +----------------- + +* The Cyborg SPDK driver will notify Cyborg Agent to update the database + when discover/list/update/attach/detach operations take place. + +REST API impact +--------------- + +This blueprint proposes to add the following APIs: + +* cyborg discover-driver(driver_type) +* cyborg driver-list(driver_type) +* cyborg install-driver(driver_id, driver_type) +* cyborg attach-instance +* cyborg detach-instance +* cyborg uninstall-driver(driver_id, driver_type) +* cyborg update-driver + +Security impact +--------------- + +None + +Notifications impact +-------------------- + +None + +Other end user impact +--------------------- + +None + +Performance Impact +------------------ + +The SPDK can provide a user space, polled-mode, asynchronous, +lockless NVMe driver for storage acceleration on the backend. + +Other deployer impact +--------------------- + +Deployers can call SPDK from the nodes which have installed SPDK +after the driver has been implemented. + +Developer impact +---------------- + +None + +Implementation +============== + +Assignee(s) +----------- + +Primary assignee: + luwei he + +Work Items +---------- + +* Implement the cyborg-spdk-driver in this spec. +* Propose SPDK to py-spdk. The py-spdk is designed as a SPDK client + which provides the python binding. + + +Dependencies +============ + +* Cyborg API Spec +* Cyborg Agent Spec +* Cyborg Driver Spec +* Cyborg Conductor Spec + +Testing +======== + +* Unit tests will be added to test Cyborg SPDK driver. +* Functional tests will be added to test Cyborg SPDK driver. For example: + discover-->list-->attach,whether the workflow can be passed successfully. + +Documentation Impact +==================== + +Document SPDK driver in the Cyborg project + +References +========== + +* Cyborg API Spec +* Cyborg Agent Spec +* Cyborg Driver Spec +* Cyborg Conductor Spec + + +History +======= + +.. list-table:: Revisions + :header-rows: 1 + + * - Release + - Description + * - Queens + - Introduced diff --git a/specs/rocky/approved/compute-node.rst b/specs/rocky/approved/compute-node.rst new file mode 100644 index 0000000..bcbc484 --- /dev/null +++ b/specs/rocky/approved/compute-node.rst @@ -0,0 +1,413 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/3.0/legalcode + +============================================== +Cyborg-Nova-Glance Interaction in Compute Node +============================================== + +Cyborg is a service for managing accelerators, such as FPGAs, GPUs, etc. For +scheduling an instance that needs accelerators, Cyborg needs to work with Nova +at three levels: + +* Representation and Discovery: Cyborg shall represent accelerators + as resources in Placement. When a device is discovered, Cyborg + updates resource inventories in Placement. + +* Instance placement/scheduling: Cyborg may provide a weigher + that prioritizes hosts based on available accelerator resources. + +* Attaching accelerators to instances. In the compute node, Cyborg + shall define a workflow based on interacting with Nova through a + new os-acc library (like os-vif and os-brick). + +The first two aspects are addressed in [#CyborgNovaSched]_. This spec +addresses the attachment of accelerators to instances, via os-acc. For +FPGAs, Cyborg also needs to interact with Glance for fetching bitstreams. +Some aspects of that are covered in [#BitstreamSpec]_. This spec will +address the interaction of Cyborg and Glance in the compute node. + +This spec is common to all accelerators, including GPUs, High Precision +Time Synchronization (HPTS) cards, etc. Since FPGAs have more aspects +to be considered than other devices, some sections may focus on +FPGA-specific factors. The spec calls out the FPGA-specific aspects. + +Smart NICs based on FPGAs fall into two categories: those which +expose the FPGA explicitly to the host, and those that do not. Cyborg's +current scope includes the former. This spec includes such devices, +though the Cyborg-Neutron interaction is out of scope. + +The scope of this spec is Rocky release. + +Terminology +=========== +* Accelerator: The unit that can be assigned to an instance for + offloading specific functionality. For non-FPGA devices, it is either the + device itself or a virtualized version of it (e.g. vGPUs). For FPGAs, an + accelerator is either the entire device, a region within the device or a + function. + +* Bitstream: An FPGA image, usually a binary file, possibly with + vendor-specific metadata. A bitstream may implement one or more functions. + +* Function: A specific functionality, such as matrix multiplication or video + transcoding, usually represented as a string or UUID. This term may be used + with multi-function devices, including FPGAs and other fixed function + hardware like Intel QuickAssist. + +* Region: A part of the FPGA which can be programmed without disrupting + other parts of that FPGA. If an FPGA does not support Partial + Reconfiguration, the entire device constitutes one region. A region + may implement one or more functions. + +Here is an example diagram for an FPGA with multiple regions, and multiple +functions in a region:: + + PCI A PCI B + | | + +-------|--------|-------------------+ + | | | | + | +----|--------|---+ +--------+ | + | | +--|--+ +---|-+ | | | | + | | | Fn A| | Fn B| | | | | + | | +-----+ +-----+ | | | | + | +-----------------+ +--------+ | + | Region 1 Region 2 | + | | + +------------------------------------+ + +Problem description +=================== +Once Nova has picked a compute node for placement of an instance that needs +accelerators, the following steps needs to happen: + +* Nova compute on that node has to invoke Cyborg Agent for handling the needed + accelerators. This needs to happen through a library, named os-acc, patterned + after os-vif (Neutron) and os-brick (Cinder). + +* Cyborg Agent may call Glance to fetch a bitstream, either by id or based on + tags. + +* Cyborg Agent may need to call into a Cyborg driver to program said bitstream. + +* Cyborg Agent needs to call into a Cyborg driver to prepare a device and/or + obtain an attach handle (e.g. PCI BDF) that can be attached to the instance. + +* Cyborg Agent returns enough information to Nova compute via os-acc for the + instance to be launched. + +The behavior of each of these steps needs to be specified. + +In addition, the OpenStack Compute API [#ServerConcepts]_ specifies the +operations that can be done on an instance. The behavior with respect to +accelerators must be defined for each of these operations. That in turn is +related to when Nova compute calls os-acc. + +Use Cases +--------- +Please see [#CyborgNovaSched]_. We intend to support FPGAaaS with +request time programming, and AFaaS (both pre-programmed and +orchestrator-programmed scenarios). + +Cyborg will discover accelerator resources whenever the Cyborg agent starts up. +PCI hot plug can be supported past Rocky release. + +Cyborg must support all instance operations mentioned in OpenStack Compute API +[#ServerConcepts]_ in Rocky, except booting off a snapshot and live migration. + +Proposed change +=============== + +OpenStack Server API Behavior +----------------------------- +The OpenStack Compute API [#ServerConcepts]_ mentions the list of operations +that can be performed on an instance. Of these, some will not be supported by +Cyborg in Rocky. The list of supported operations (with +the intended behaviors) are as follows: + +* When an instance is started, the accelerators requested by that instance’s + flavor must be attached to the instance. On termination, those resources are + released. + +* When an instance is paused, suspended or locked, the accelerator resources + are left intact, and not detached from the instance. So, when the instance is + unpaused, resumed or unlocked, there is nothing to do. + +* When an instance is shelved, the accelerator resources are detached. On an + unshelve, it is expected that the build operation will go through the + scheduler again, so it is equivalent to an instance start. + +* When an instance is deleted, the accelerator resources are detached. On a + restore, it is expected that the build operation will go through the + scheduler again, so it is equivalent to an instance start. + +* Reboot: The accelerator resources are left intact. It is up the instance + software to rediscover attached resources. + +* Rebuild: Prior to the instance image replacement, all device access must be + quiesced, i.e., accesses to devices from that instance must be completed and + further accesses must be prohibited. The mechanics of such quiescing are + outside the scope of this document. With that precondition, accelerator + resources are left attached to the instance during the rebuild. + +* Resize (with change of flavor): It is equivalent to a termination followed by + re-scheduling and restart. The accelerator resources are detached on + termination, and re-attached on when the instance is scheduled again. + +* Cold migration: It is equivalent to a termination followed by re-scheduling + and restart. The accelerator resources are detached on termination, and + re-attached on when the instance is scheduled again. + +* Evacuate: This is a forcible rebuild by the administrator. As the semantics + of evacuation are left open even without accelerators, Cyborg’s behavior is + also left undefined. + +* Set administrator password, trigger crash dump: These are supported and not + no-ops for accelerators. + +The following instance operations are not supported in this release: + +* Booting off a snapshot: The snapshot may have been taken when the attached + accelerators were in a particular state. When booting off a previous + snapshot, the current configuration and state of accelerators may not match + the snapshot. So, this is unsupported. + +* Live migration: Until a mechanism is defined to migrate accelerator state + along with the instance, this is unsupported. + +os_acc Structure +---------------- +Cyborg will develop a new library named os-acc. That library will offer the +APIs listed later in this section. Nova Compute calls these APIs if it sees +that the requested flavor refers to CUSTOM_ACCELERATOR resource class, except +for the initialize() call, which is called unconditionally. Nova Compute calls +these APIs asynchronously, as suggested below:: + + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(os_acc., *args) + # do other stuff + try: + data = future.result() + except: + # handle exceptions + +The APIs of os-acc are as below: + +* initialize() + + * Called once at start of day. Waits for Cyborg Agent to be ready to accept + requests, i.e., all devices enumerated and traits published. + + * Returns None on success. + + * Throws ``CyborgAgentUnavailable`` exception if Cyborg Agent cannot be + contacted. + +* plug(instance_info, selected_rp, flavor_extra_specs) + + * Parameters are all read-only. Here are their descriptions: + + * instance_info: dictionary containing instance UUID, instance name, + project/tenant ID and VM image UUID. The instance name is needed for + better logging, the project/tenant ID may be passed to some accelerator + policy engine in the future and the VM image UUID may be used to query + Glance for metadata about accelerator requirements that may be stored + with the VM image. + + * selected_rp: Information about the selected resource provider is + passed as a dictionary. + + * flavor_extra_specs: the extra_specs field in the flavor, including + resource classes, traits and other fields interpreted by Cyborg. + + * Called by Nova compute when an instance is started, unshelved, or + restored and after a resize or cold migration. + + * Called before an instance is built, i.e., before the specification of + the instance is created. For libvirt-based hypervisors, this means + the call happens before the instance’s domain XML is created. + + * As part of this call, Cyborg Agent may fetch bitstreams from Glance and + initiate programming. It may fetch the bitstream specified in the + request’s flavor extra specs, if any. If the request refers to a + function ID/name, Cyborg Agent would query Glance to find bitstreams + that provide the flavor and match the chosen device, and would then + fetch the needed bitstream. + + * As part of this call, Cyborg Agent will locate the Deployable corresponding + to the chosen RP, locate the attach handles (e.g. PCI BDF) needed, update + its internal data structures in a persistent way, and return the needed + information back to Nova. + + * Returns an array, with one entry per requested accelerator, each entry + being a dictionary. The dictionary is structured as below for Rocky: + + | { “pci_id”: } + +* unplug(instance_info) + + * Parameters are all read-only. Here are their descriptions: + + * instance_info: dictionary containing instance UUID and instance + name. The instance name is needed for better logging. + + * Called when an instance is stopped, shelved, or deleted and before + a resize or cold migration. + + * As part of this call, Cyborg Agent will clean up internal resources, call + the appropriate Cyborg driver to clean up the device resources and update + its data structures persistently. + + * Returns the number of accelerators that were released. Errors may cause + exceptions to be thrown. + +Workflows +--------- +The pseudocode for each os-acc API can be expressed as below:: + + def initialize(): + # checks that all devices are discovered and their traits published + # waits if any discovery operation is ongoing + return None + + def plug(instance_info, rp, extra_specs): + validate_params(....) + glance = glanceclient.Client(...) + driver = # select Cyborg driver for chosen rp + rp_deployable = # get deployable for RP + if extra_specs refers to ``CUSTOM_FPGA__REGION_`` and + extra_specs refers to ``bitstream:``: + bitstream = glance.images.data(image_uuid) + driver.program(bitstream, rp_deployable, …) + if extra_specs refers to ``CUSTOM_FPGA__FUNCTION_`` and + extra_specs refers to function UUID/name: + region_type_uuid = # fetch from selected RP + bitstreams = glance.images.list(...) + # queries Glance by function UUID/name property and region type + # UUID to get matching bitstreams + if len(bitstreams) > 1: + error(...) # bitstream choice policy is outside Cyborg + driver.program(bitstream, rp_deployable, …) + pci_bdf = driver.allocate_handle(...) + # update Cyborg DB with instance_info and BDF usage + return { “pci_id”: pci bdf } + + def unplug(instance_info): + bdf_list = # fetch BDF usage from Cyborg DB for instance + # update Cyborg DB to mark those BDFs as free + return len(bdf_list) + +Alternatives +------------ + +N/A + +Data model impact +----------------- + +None + + +REST API impact +--------------- + +None + +Security impact +--------------- + +None + +Notifications impact +-------------------- + +None + +Other end user impact +--------------------- + +None + +Performance Impact +------------------ + +None + +Other deployer impact +--------------------- + +None + +Developer impact +---------------- + +None + +Implementation +============== + +Assignee(s) +----------- + +None + +Work Items +---------- + +* Decide how to associate multiple functions/bitstreams in extra specs + with multiple devices in the flavor. + +* Decide specific changes needed in Cyborg conductor, db, agent and drivers. + +* Others: TBD + +Dependencies +============ + +* Nested Resource Provider support in Nova + +* `Nova Granular Requests + `_ + +Testing +======= + +For each vendor driver supported in this release, we need to integrate the +corresponding FPGA type(s) in the CI infrastructure. + +Documentation Impact +==================== + +The behavior with respect to accelerators during various instance operations +(reboot, pause, etc.) must be documented. The procedure to upload a bitstream, +including applying Glance properties, must also be documented. + +References +========== + +.. [#CyborgNovaSched] `Cyborg Nova Scheduling Specification + `_ + +.. [#Bitstreamspec] `Cyborg bitstream metadata standardization spec + `_ + +.. [#ServerConcepts] `OpenStack Server API Concepts + `_ + +History +======= + +Optional section intended to be used each time the spec is updated to describe +new design, API or any database schema updated. Useful to let reader understand +what's happened along the time. + +.. list-table:: Revisions + :header-rows: 1 + + * - Release Name + - Description + * - Rocky + - Introduced + diff --git a/specs/rocky/approved/cyborg-agent-driver-api.rst b/specs/rocky/approved/cyborg-agent-driver-api.rst new file mode 100644 index 0000000..6ce14ad --- /dev/null +++ b/specs/rocky/approved/cyborg-agent-driver-api.rst @@ -0,0 +1,222 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/3.0/legalcode + +========================================== +Cyborg Agent-Driver API +========================================== + +Cyborg agent interacts with each Cyborg driver in the compute node to +discover available devices. This spec defines how the agent-driver API +is structured. + +No change is proposed to the way the agent discovers the drivers on +start or restart. + +This spec is common to all accelerators, including GPUs, High Precision +Time Synhronization (HPTS) cards, etc. Since FPGAs have more aspects to +be considered than other devices, some sections may focus on FPGA-specific +factors. The spec calls out the FPGA-specific aspects. + +The scope of this spec is Rocky release, but the API has been designed +to be extensible for future releases. Accordingly, the spec calls out +the Rocky-specific aspects. + +Problem description +=================== + +The [#Cyborg_Nova_scheduling_spec]_ specifies that devices are +represented using Resource Providers (RPs), Resource Classes (RCs) +and traits. The information needed to create them has to come from +the Cyborg driver to the Cyborg agent, which in turn needs to +push it to the Cyborg Conductor. + +The main challenge is discovering the device topology for FPGAs. +An FPGA may have one or more Partial Reconfiguration regions, +and those regions may have one or more accelerators nested inside them. +Further, it may have local memory that is either partitioned or +shared among the regions. + +Use Cases +--------- + +* Devices of different types (GPUs, FPGAs, HPTS cards, Quick Assist) are + present in the same host. + +* FPGAs of different types, possibly from different vendors, are present + in the same host. + +* An FPGA may have one or more regions. Each region may have one + or more accelerators. + + * In Rocky, we may support only one region per FPGA, and only one + accelerator per region. + +* For Rocky, it is proposed that local memory need not be exposed as + a resource to orchestration. That is because, since there is only + one region per FPGA, an instance attached to that region will be + able to access all the memory, no matter how much there is. For + non-FPGA devices like GPUs, there does not seem to be a requirement + to expose video RAM. + +Cyborg will assume and handle the following component relationships: + +* One product (e.g. Intel PAC Arria 10) may correspond to multiple + PCI vendor/device IDs. + +* One PCI vendor/device ID may correspond to different region type IDs. + This could be either because there are multiple regions in the same device + or because there are different versions/revisions of the same device. + +* But the same region type ID will never show up in products with + different PCI IDs. + +Proposed change +=============== + +Today, the Cyborg agent invokes the discover() API for each driver +that it finds. The discover() API returns a dictionary indexed by +the PCI BDF of a device. The value element in the key-value pair of +the dictionary contains the components and characteristics +of the device with that BDF. + +We propose to retain the same model, but enhance the dictionary to +include enough information to create the resource providers and traits +needed to populate Placement. Here are the additional proposed keys +in the device dictionary for each PF: + +| ``"type": `` # One of GPU, FPGA, etc. +| ``"vendor": `` +| ``"product": `` + +Also, in the ``regions`` entry for each PF, it is proposed to add +the following keys: + +| ``"region-type-uuid": `` # Optional, default: NULL +| ``"bitstream-id": `` # Glance/other UUID, optional, default: NULL +| ``"function-uuid": `` # Optional, default: NULL + +When the agent receives this dictionary for a device, it will do +the following: + +* If there is nested RP support, create an RP for the device and each + region within. + +* Create a device type trait: ``CUSTOM___``. + Apply it to the device RP (if nRP support exists) or the compute node RP. + + * E.g. CUSTOM_FPGA_INTEL_PAC_ARRIA10. + + * NOTE: The agent will convert all characters to upper case, replace + spaces with underscores, and check for conformance to custom trait + syntax (see [#Custom_traits]_) + +* Create region type traits for each region, of the form: + ``CUSTOM___REGION_``. Apply them to the + corresponding region RP (if nRP support exists) or the compute node RP. + + * E.g. CUSTOM_FPGA_INTEL_REGION_ + + * NOTE: For UUIDs, the agent will convert all hexadecimal digits to upper + case, replace hyphens with underscores and validate all characters. + +* Create function type traits for each function in each region, of the form: + ``CUSTOM___FUNCTION_``. Apply them to the + corresponding region RP (if nRP support exists) or the compute node RP. + + * E.g. CUSTOM_FPGA_INTEL_FUNCTION_ + +Alternatives +------------ + +N/A + +Data model impact +----------------- + +Add the new fields to the database under Deployables and Attributes. + +REST API impact +--------------- + +None + +Security impact +--------------- + +None + +Notifications impact +-------------------- + +None + +Other end user impact +--------------------- + +None + +Performance Impact +------------------ + +None + +Other deployer impact +--------------------- + +None + +Developer impact +---------------- + +None + +Implementation +============== + +Assignee(s) +----------- + +None + +Work Items +---------- + +Dependencies +============ + +None + +Testing +======= + +Need to update unit tests to check for the newly added fields. + +Documentation Impact +==================== + +None + +References +========== + +.. [#Cyborg_Nova_scheduling_spec] `Cyborg/Nova Scheduling spec `_ + +.. [#Custom_traits] `Custom Traits `_ + +History +======= + +Optional section intended to be used each time the spec is updated to describe +new design, API or any database schema updated. Useful to let reader +understand what's happened along the time. + +.. list-table:: Revisions + :header-rows: 1 + + * - Release Name + - Description + * - Rocky + - Introduced diff --git a/specs/rocky/approved/cyborg-fpga-bitstream-spec.rst b/specs/rocky/approved/cyborg-fpga-bitstream-spec.rst new file mode 100644 index 0000000..8df39ae --- /dev/null +++ b/specs/rocky/approved/cyborg-fpga-bitstream-spec.rst @@ -0,0 +1,253 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/4.0/legalcode + +==================================================== + Cyborg FPGA Bitstream metadata spec +==================================================== + +Blueprint url: +https://blueprints.launchpad.net/openstack-cyborg/+spec/cyborg-fpga-bitstream-metadata-spec + +This spec proposes the FPGA Bitstream metadata specifications for bitstream +management + +Problem description +=================== + +A field-programmable gate array (FPGA) is an integrated circuit designed to be +configured by a customer or a designer after manufacturing. Their advantage +lies in that they are sometimes significantly faster for some applications +because of their parallel nature and optimality in terms of the number of +gates used for a certain process. Hence, using FPGA for application +acceleration in cloud has become desirable. One of the encountered problems is +when it comes to bitstream management, it is difficult to map bitstreams to +their appropriate FPGA boards or reconfigurable regions. The aim of this +proposal is to provide a standardized set of metadata which should be +encapsulated together with bitstream storage. + +Use Cases +--------- + +When user requests to reprogram a FPGA board with certain functionality in the +cloud environment, he or she will need to retrieve a suitable bitstream from +the storage. In order to find the suitable one, bitstreams need to be +categorized based on some properties defined in metadata. + +Proposed change +=============== + +For each metadata, it will be stored as a row in this Glance's image_properties +in key-value pair format: column [name] holds the key whereas column [value] +holds the value. Note: no batabase schema change is required. This is a +standardization document to guide how to use existing Glance table for FPGA +bitstreams. + +Given this, Cyborg will standardize the key convention as follows: + ++--------------+---------+-----------+--------------------------------------+ +| name | value | nullable | description | ++--------------+---------+-----------+--------------------------------------+ +| bs-name | aes-128| False | name of the bitstream(not unique) | ++--------------+---------+-----------+--------------------------------------+ +| bs-uuid | {uuid} | False | The uuid generated during synthesis | ++--------------+---------+-----------+--------------------------------------+ +| vendor | Xilinx | False | Vendor of the card | ++--------------+---------+-----------+--------------------------------------+ +| board | KU115 | False | Board type for this bitstream to load| ++--------------+---------+-----------+--------------------------------------+ +| shell_id | {uuid} | True | Required shell bs-uuid for the bs | ++--------------+---------+-----------+--------------------------------------+ +| version | 1.0 | False | Device version number | ++--------------+---------+-----------+--------------------------------------+ +| driver | SDX | True | Type of driver for this bitstream | ++--------------+---------+-----------+--------------------------------------+ +| driver_ver | 1.0 | False | Driver version | ++--------------+---------+-----------+--------------------------------------+ +| driver_path | /path/ | False | Where to retrieve the driver binary | ++--------------+---------+-----------+--------------------------------------+ +| topology | {CLOB} | False | Function Topology | ++--------------+---------+-----------+--------------------------------------+ +| description | desc | True | Description | ++--------------+---------+-----------+--------------------------------------+ +| region_uuid | {uuid} | True | The uuid for target region type | ++--------------+---------+-----------+--------------------------------------+ +| function_uuid| {uuid} | False | The uuid for bs function type | ++--------------+---------+-----------+--------------------------------------+ +| function_name| nic-40 | True | The function name for this bitstream | ++--------------+---------+-----------+--------------------------------------+ + +Here are the details regarding some definded keys. + +[shell_id] +This field is optional. If a loading this PR bitstream requires a shell image, +this field specifies the shell bitstream's uuid. If it field is null, it means +this bitstream is a shell bitstream. + +[driver] +This specifies the path to a package of scripts/binaries to be installed in +order to use the loaded bitstream(e.g. insmod some kernel driver/git clone +some remote source code, etc) + +[region_uuid] +This value specifies the type of region that is required to load this +bitstream. This type is a uuid generated during the shell bitstream synthesis. + +[function_uuid] +This value specifies the type of function for this bitstream. It helps the +upsteam scheduler to match traits with appropriate bitstream. + +[topology] +This field describes the topology of function structures after the bitstream is +loaded on the FPGA. In particular, it uses JSON format to visualize how +physical functions, virtual functions are co-related to each other. It is +vendor driver's responsibility to interpret this and prepare the porper report +for Cyborg Agent. For instance:: + + { + "pf_num": 2, + "vf_num": 2, + "pf": [ + { + "name": "pf_1", + "capability": "", + "kpi": "", + "pci_offset": "0", + "vf": [ + { + "name": "vf_1", + "pci_offset": "1" + } + ] + }, + { + "name": "pf_2", + "capability": "", + "kpi": "", + "pci_offset": "2", + "vf": [ + { + "name": "vf_2", + "pci_offset": "3" + } + ] + } + ] + } + +This JSON template guides Cyborg Agent to populate vf/pf/deployable list in +Cyborg. + +Given the above JSON topology, Cyborg Driver should be able to interpret the +accelerator structure as follows:: + + ============= + =Accelerator= + ============= + | + ============ + =Deployable= + ============ + /\ + / \ + =================== =================== + = Deployable pf_1 = = Deployable pf_2 = + =================== =================== + | | + | | + =================== =================== + = Deployable vf_1 = = Deployable vf_2 = + =================== =================== + +Noted: 1. Topology is not mandatory to fill in, as long as vendor driver can +figure out what resources to report after the bitstream is loaded. 2. The JSON +provided here is only a reference template. It does not have to be PCI-centric +etc. and up to vendors how to define it for their products. 3. A root +deployable shouldbe created in the graph. In addition, the pfs and vfs here +are all instances of deployable. Please refer to the DB objects specs +regarding physical_function and virtual_function. + + +Finnally, all of the FPGA bitstreams should be TAGGED as "FPGA" in Glance. +This helps distinguishing between normal VM images and bitstream images +during filtering. + +Alternatives +------------ + + +Data model impact +----------------- + +RPC API impact +--------------- + +REST API impact +--------------- + +Security impact +--------------- +None + +Notifications impact +-------------------- +None + +Other end user impact +--------------------- +None + +Performance Impact +------------------ +None + +Other deployer impact +--------------------- +None + +Developer impact +---------------- +Accelerator vendors should implement the logic in program() api to populate +the loaded topology + + +Implementation +============== + +Assignee(s) +----------- +Primary assignee: + Li Liu + Shaohe Feng + +Work Items +---------- +* Provide example JSON format for bitstream +* Provide example implementation of vendor driver + +Dependencies +============ + +Testing +======= + +Documentation Impact +==================== +None + +References +========== +None + +History +======= + +.. list-table:: Revisions + :header-rows: 1 + + * - Release Name + - Description + * - Rocky + - Introduced diff --git a/specs/rocky/approved/cyborg-fpga-programming-proposal.rst b/specs/rocky/approved/cyborg-fpga-programming-proposal.rst new file mode 100644 index 0000000..71bbe73 --- /dev/null +++ b/specs/rocky/approved/cyborg-fpga-programming-proposal.rst @@ -0,0 +1,200 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/3.0/legalcode + +==================================================== + Cyborg FPGA Programming Service Proposal +==================================================== + +Blueprint url is not available yet +https://blueprints.launchpad.net/openstack-cyborg/+spec/cyborg-fpga-programming-ability + +This spec proposes a Programming Service to be added to Cyborg to allow user +dynamically change the functions loaded on FPGA in cloud environment + +Problem description +=================== + +A field-programmable gate array (FPGA) is an integrated circuit designed to be +configured by a customer or a designer after manufacturing. Their advantage +lies in that they are sometimes significantly faster for some applications +because of their parallel nature and optimality in terms of the number of +gates used for a certain process. In addition, FPGA can be reprogrammed based +on different applications Hence, using FPGA for application acceleration in +cloud has been becoming desirable. Cyborg as a management framwork for +heterogeneous accelerators, tracking, deploying and reprogramming FPGAs are +much needed features. Since the FPGA modelling has already been proposed in +another document, this spec will be focused on proposing Reporgramming +Service for FPGAs in Cyborg + +Use Cases +--------- + +In the scenario of OpenCL, user loads the accelerators on FPGA for their +application. When different applications are executing on OpenCL environment, +the accelerators will be changed from time to time. It will not be feasible +to login to each host and change the FPGA configuration manually by lab admin. +Instead, through the reprogramming service, users can manage the functions +of FPGA using a set of REST APIs. + +Similarly, during the maintenance of FPGA, admin needs to update/migrate +shells and bitstreams on FPGAs within data center. Cyborg Reprogramming +Service will allow them to use the APIs from a centralized console. + +Since this is a pure proposal for programming APIs, it would not focus on +what the upstream use case/runtime is. Those details will be in separate +specs when needed. + +Proposed change +=============== +First of all, Cyborg needs to add extra REST APIs to allow others to invoke +the programming service. The REST api should have following format:: + + Url: {base_url}/fpga/{deployable_uuid} + Method: POST + URL Params: + None + + Data Params: + glance_bitstream_uuid + + Success Response: + POST: + Code: 200 + Body: { "msg" : "bitstream has been loaded successfully"} + + Error Response + Code: 401 UNAUTHORIZED + Body: { error : "Log in" } + OR + Code: 422 Unprocessable Entry + Body: { error : "User is not authorized to use the resource" } + + Sample Call: + To program fpga resource with deployable_uuid=2864a139-c2cd-4f9f-abf3-44eb3f09b83c + with bitstream with uuid=0b955a5b-f5dd-49d0-8c4f-28729427d303 + $.ajax({ + url: "/fpga/2864a139-c2cd-4f9f-abf3-44eb3f09b83c", + data: { + "glance_bitstream_uuid": "0b955a5b-f5dd-49d0-8c4f-28729427d303" + }, + dataType: "json", + type : "post", + success : function(r) { + console.log(r); + } + }); + +Second, implement the service in Cyborg which does three tasks: 1. identify +the host location of the requested FPGA/Partial Reconfiguraion(PR) Region(e.g. +on which host is the board located). 2. Check if the user(API caller, +OpenStack Login User, etc) has the privilige to use the given bitstream, +FPGA, or host. 3. If the previous checks pass, Cyborg will send the program +notification to the target host with requested FPGA. + +Third, implement notification callee in Cyborg Agent. This should be a rpc +call with following signature:: + + int program_fpga_with_bitstream(deployable_uuid, bitstream_uuid) + +The function takes both deployable_uuid and bitstream_uuid as input. It uses +deployable_uuid to identify which specific FPGA/PR region is going to be +programmed and uses bitstream_uuid to retrieve bitstream from the bitstream +storage service (Glance in the context of OpenStack). In addition, this is a +synchronous meaning it will wait for the programming task to be completed and +then return a status code as integer. The return code should have following +interpretation: + ++------+--------------------------------------------------------+ +| code | meaning | ++------+--------------------------------------------------------+ +| 0 | program successfully | ++------+--------------------------------------------------------+ +| 1 | failed with unkown errors | ++------+--------------------------------------------------------+ +| 2 | invalid deployable_uuid(target fpga not found) | ++------+--------------------------------------------------------+ +| 3 | invalid bitstream_uuid(bitstream can not be downloaded)| ++------+--------------------------------------------------------+ + +Alternatives +------------ + + + +Data model impact +----------------- + + +REST API impact +--------------- +A rest api will be added to the Cyborg service as we discussed previously. +It should not impact any of the existing rest apis + +Security impact +--------------- +The access to FPGA/PR region and bitstreams should be carefully checked. + +Notifications impact +-------------------- +None + +Other end user impact +--------------------- +None + +Performance Impact +------------------ +None + +Other deployer impact +--------------------- +None + +Developer impact +---------------- +On the Cyborg Agent side, it relies on program() api implemented by vendor. + + +Implementation +============== + +Assignee(s) +----------- +Primary assignee: + Li Liu + +Work Items +---------- +* Implement the cyborg program service rest api +* Implement the cyborg program service +* Implement the notification call in Cyborg Agent, which invokes vendor driver + + +Dependencies +============ + +Testing +======= + +Documentation Impact +==================== +The Cyborg-Nova interaction related specs need to be aware the change of the +accelerators when FPGAs are being reprogrammed. + +References +========== +None + +History +======= + +.. list-table:: Revisions + :header-rows: 1 + + * - Release Name + - Description + * - Rocky + - Introduced diff --git a/specs/rocky/approved/cyborg-nova-sched.rst b/specs/rocky/approved/cyborg-nova-sched.rst new file mode 100644 index 0000000..73654ae --- /dev/null +++ b/specs/rocky/approved/cyborg-nova-sched.rst @@ -0,0 +1,486 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/3.0/legalcode + +========================================== +Cyborg-Nova Interaction for Scheduling +========================================== + +https://blueprints.launchpad.net/cyborg/+spec/cyborg-nova-interaction + +Cyborg provides a general management framework for accelerators, such +as FPGAs, GPUs, etc. For scheduling an instance that needs accelerators, +Cyborg needs to work with Nova on three levels: + +* Representation and Discovery: Cyborg shall represent accelerators as + resources in Placement. When a device is discovered, Cyborg updates + resource providers, inventories, traits, etc. in Placement. + +* Instance placement/scheduling: Cyborg may provide a filter and/or weigher + that limit or prioritize hosts based on available accelerator resources, + but it is expected that Placement itself can handle most requirements. + +* Attaching accelerators to instances. In the compute node, Cyborg shall + define a workflow based on interacting with Nova through a new os-acc + library (similar to os-vif and os-brick). + +This spec addresses the first two aspects. There is another spec to +address the attachment of accelerators to instances [#os-acc]_. +Cyborg also needs to handle some aspects for FPGAs without involving +Nova, specifically FPGA programming and bitstream management. They +will be covered in other specs. This spec is independent of those specs. + +This spec is common to all accelerators, including GPUs, High Precision +Time Synchronization (HPTS) cards, etc. Since FPGAs have more aspects to +be considered than other devices, some sections may focus on FPGA-specific +factors. The spec calls out the FPGA-specific aspects. + +Smart NICs based on FPGAs fall into two categories: those which expose +the FPGA explicitly to the host, and those that do not. Cyborg's scope +includes the former. This spec includes such devices, though the +Cyborg-Neutron interaction is out of scope. + +The scope of this spec is Rocky release. + +Terminology +=========== +* Accelerator: The unit that can be assigned to an instance for + offloading specific functionality. For non-FPGA devices, it is either the + device itself or a virtualized version of it (e.g. vGPUs). For FPGAs, an + accelerator is either the entire device, a region within the device or a + function. + +* Bitstream: An FPGA image, usually a binary file, possibly with + vendor-specific metadata. A bitstream may implement one or more functions. + +* Function: A specific functionality, such as matrix multiplication or video + transcoding, usually represented as a string or UUID. This term may be used + with multi-function devices, including FPGAs and other fixed function + hardware like Intel QuickAssist. + +* Region: A part of the FPGA which can be programmed without disrupting + other parts of that FPGA. If an FPGA does not support Partial + Reconfiguration, the entire device constitutes one region. A region + may implement one or more functions. + +Here is an example diagram for an FPGA with multiple regions, and multiple +functions in a region:: + + PCI A PCI B + | | + +-------|--------|-------------------+ + | | | | + | +----|--------|---+ +--------+ | + | | +--|--+ +---|-+ | | | | + | | | Fn A| | Fn B| | | | | + | | +-----+ +-----+ | | | | + | +-----------------+ +--------+ | + | Region 1 Region 2 | + | | + +------------------------------------+ + +Problem description +=================== +Cyborg's representation and handling of accelerators needs to be consistent +with Nova's Placement API. Specifically, they must be modeled in terms of +Resource Providers (RPs), Resource Classes (RCs) and Traits. + +Though PCI Express is entrenched in the data center, some accelerators +may be exposed to the host via some other protocol. Even with PCI, the +connections between accelerator components and PCI functions +may vary across devices. Accordingly, Cyborg should not represent +accelerators as PCI functions. + +For instances that need accelerators, we need to define a way for Cyborg +to be included seamlessly in the Nova scheduling workflow. + +Use Cases +--------- +We need to satisfy the following use cases for the tenant role: + +* Device as a Service (DaaS): The flavor asks for a device. + + * FPGA variation: The flavor asks for a device to which specific + bitstream(s) can be applied. There are three variations, the first + two of which delegate bitstream programming to Cyborg for secure + programming: + + * Request-time Programming: The flavor specifies a bitstream. (Cyborg + applies the bitstream before instance bringup. This is similar to + AWS flow.) + + * Run-time Programming: The instance may request one or more + bitstreams dynamically. (Cyborg receives the request and does + the programming.) + + * Direct Programming: The instance directly programs the FPGA + region assigned to it, without delegating it to Cyborg. The + security questions that this raises need to be addressed in + the future. (This is listed only for completeness; this is not + going to be addressed in Rocky, or even future releases till + the security concerns are fully addressed.) + +* Accelerated Function as a Service (AFaaS): The flavor asks for a + function (e.g. ipsec) attached to the instance. The operator may + satisfy this use case in two ways: + + * Pre-programmed: Do not allow orchestration to modify any function, + for any of these reasons: + + * Only fixed function hardware is available. (E.g. ASICs.) + + * Operational simplicity. + + * Assure tenants of programming security, by doing all programming offline + through some audited process. + + * For FPGAs, allow orchestration to program as needed, to maximize + flexibility and availability of resources. + +An operator must be able to provide both Device as a Service and Accelerated +Function as a Service in the same cluster, to serve all +kinds of users: those who are device-agnostic, those using 3rd party +bitstreams, and those using their own bitstreams (incl. developers). + +The goal for Cyborg is to provide the mechanisms to enable all these use +cases. + +In this spec, we do not consider bitstream developer or device developer +roles. Also, we assume that each accelerator device is dedicated to a +compute node, rather than shared among several nodes. + +Proposed change +=============== + +Representation +-------------- + + * Cyborg will represent a generic accelerator for a device type as a + custom Resource Class (RC) for that type, of the form + CUSTOM_ACCELERATOR_. E.g. CUSTOM_ACCELERATOR_GPU, + CUSTOM_ACCELERATOR_FPGA, etc. This helps in defining separate quotas + for different device types. + + * Device-local memory is the memory available to the device alone, + usually in the form of DDR, QDR or High Bandwidth Memory in the + PCIe board along with the device. It can also be represented as an + RC of the form CUSTOM_ACCELERATOR_MEMORY_. E.g. + CUSTOM_ACCELERATOR_MEMORY_DDR. A single PCIe board may have more + than one type of memory. + + * In addition, each device/region is represented as a Resource Provider + (RP). This enables traits to be applied to it and other RPs/RCs to + be contained within it. So, a device RP provides one or more instances + of that device type's RC. This depends on nested RP support in + Nova [#nRP]_. + + * For FPGAs, both the device and the regions within it will be + represented as RPs. This allows the hierarchy within an FPGA + to be naturally modelled as an RP hierarchy. + + * Using Nested RPs is the preferred way. But, until Nova + supports nested RPs, Cyborg shall associate the + RCs and traits (described below) with the compute node RPs. This + requires that all devices on a single host must share the same + traits. If nested RP support becomes usable after Rocky release, + the operator needs to handle the upgrade as below: + + * Terminate all instances using accelerators. + + * Remove all Cyborg traits and inventory on all compute node RPs, + perhaps by running a script. + + * Perform the Cyborg upgrade. Post-upgrade, the new agent/driver(s) + will create RPs for the devices and publish the traits + and inventory. + + * Cyborg will associate a Device Type trait with each device, of the + form CUSTOM_-. E.g. CUSTOM_GPU_AMD or + CUSTOM_FPGA_XILINX. This trait is intended to help match the + software drivers/libraries in the instance image. This is meant to + be used in a flavor when a single driver/library in the instance + image can handle most or all of device types from a vendor. + + * For FPGAs, this trait and others will be applied to the region + RPs which are children of the device RPs as well. + + * Cyborg will associate a Device Family trait with each device as + needed, of the form CUSTOM___. + E.g. CUSTOM_FPGA_INTEL_ARRIA10. + This is not a product name, but the name of a device family, used to + match software in the instance image with the device family. This is + a refinement of the Device Type Trait. It is meant to be used in + a flavor when there are different drivers/libraries for different + device families. Since it may be tough to forecast whether a new + device family will need a new driver/library, it may make sense to + associate both these traits with the same device RP. + + * For FPGAs, Cyborg will associate a region type trait with each region + (or with the FPGA itself if there is no Partial Reconfiguration + support), of the form CUSTOM_FPGA_REGION___. + E.g. CUSTOM_FPGA_REGION_INTEL_. This is needed for Device as a + Service with FPGAs. + + * For FPGAs, Cyborg may associate a function type trait with a region + when the region gets programmed, of the form + CUSTOM_FPGA_FUNCTION__. E.g. + CUSTOM_FPGA_FUNCTION_INTEL_. + This is needed for AFaaS use case. This is updated when Cyborg + reprograms a region as part of AFaaS request. + + * For FPGAs, Cyborg should associate a CUSTOM_PROGRAMMABLE trait with + every region. This is needed to lay the groundwork for + multi-function accelerators in the future. Flavors should ask for + this trait, except in the pre-programmed case. + + * For FPGAs, since they may implement a wide variety of functionality, + we may also attach a Functionality Trait. + E.g. CUSTOM_FPGA_COMPUTE, CUSTOM_FPGA_NETWORK, CUSTOM_FPGA_STORAGE. + + * The Cyborg agent needs to get enough information from the Cyborg driver + to create the RPs, RCs and traits. In particular, it needs to get the + device type string, region IDs and function IDs from the driver. This + requires the driver/agent interface to be enhanced [#drv-api]_. + + * The modeling in Placement represents generic virtual accelerators as + resource classes, and devices/regions as RPs. This is PCI-agnostic. + However, many FPGA implementations use PCI Express in general, and + SR-IOV in particular. In those cases, it is expected that Cyborg will + pass PCI VFs to instances via PCI Passthrough, and retain the PCI PF + in the host for management. + +Flavors +------- + For the sake of illustrating how the device representation in Nova + can be used, and for completeness, we now show how to define flavors + for various use cases. Please see [#flavor]_ for more details. + + * A flavor that needs device access always asks for one or more instances + of 'resource:CUSTOM_ACCELERATOR_'. In addition, it + needs to specify the right traits. + + * Example flavor for DaaS: + + | ``resources:CUSTOM_ACCELERATOR_HPTS=1`` + | ``trait:CUSTOM_HPTS_ZTE=required`` + + NOTE: For FPGAs, the flavor should also include CUSTOM_PROGRAMMABLE trait. + + * Example flavor for AFaaS Pre-programed: + + | ``resources:CUSTOM_ACCELERATOR_FPGA=1`` + | ``trait:CUSTOM_FPGA_INTEL_ARRIA10=required`` + | ``trait:CUSTOM_FPGA_FUNCTION_INTEL_=required`` + + * Example flavor for AFaaS Orchestration-Programmed: + + | ``resources:CUSTOM_ACCELERATOR_FPGA=1`` + | ``trait:CUSTOM_FPGA_INTEL_ARRIA10=required`` + | ``trait:CUSTOM_PROGRAMMABLE=required`` + | ``function:CUSTOM_FPGA_FUNCTION_INTEL_=required`` + (Not interpreted by Nova.) + + * NOTE: When Nova supports preferred traits, we can use that instead + of 'function' keyword in extra specs. + + * NOTE: For Cyborg to fetch the bitstream for this function, it + is assumed that the operator has configured the function UUID + as a property of the bitstream image in Glance. + + * Another example flavor for AFaaS Orchestration-Programmed which + refers to a function by name instead of UUID for ease of use: + + | ``resources:CUSTOM_ACCELERATOR_FPGA=1`` + | ``trait:CUSTOM_FPGA_INTEL_ARRIA10=required`` + | ``trait:CUSTOM_PROGRAMMABLE=required`` + | ``function_name:=required`` + (Not interpreted by Nova.) + + * NOTE: This assumes the operator has configured the function name + as a property of the bitstream image in Glance. The FPGA + hardware is not expected to expose function names, and so + Cyborg will not represent function names as traits. + + * A flavor may ask for other RCs, such as local memory. + + * A flavor may ask for multiple accelerators, using the granular resource + request syntax. Cyborg can tie function and bitstream fields in + the extra_specs to resources/traits using an extension of the granular + resource request syntax (see References) which is not interpreted by Nova. + + | ``resourcesN: CUSTOM_ACCELERATOR_FPGA=1`` + | ``traitsN: CUSTOM_FPGA_INTEL_ARRIA10=required`` + | ``othersN: function:CUSTOM_FPGA_FUNCTION_INTEL_=required`` + +Scheduling workflow +-------------------- +We now look at the scheduling flow when each device implements only +one function. Devices with multiple functions are outside the scope for now. + + * A request spec with a flavor comes to Nova conductor/scheduler. + + * Placement API returns the list of RPs which contain the requested + resources with matching traits. (With nested RP support, the returned + RPs are device/region RPs. Without it, they are compute node RPs.) + + * FPGA-specific: For AFaaS orchestration-programmed use case, Placement + will return matching devices but they may not have the requested + function. So, Cyborg may provide a weigher which checks the + allocation candidates to see which ones have the required function trait, + and ranks them higher. This requires no change to Cyborg DB. + + * The request_spec goes to compute node (ignoring Cells for now). + + NOTE: When one device/region implements multiple functions and + orchestration-driven programming is desired, the inventory of that + device needs to be adjusted. + This can be addressed later and is not a priority for Rocky release. + See References. + + * Nova compute calls os-acc/Cyborg [#os-acc]_. + + * FPGA-specific: If the request spec asks for a function X in extra specs, + but X is not present in the selected region RP, Cyborg should program + that region. + + * Cyborg should associate RPs/RCs and PFs/VFs with Deployables in its + internal DB. It can use such mappings associating the requested resource + (device/function) with some attach handle that can be used to + attach the resource to an instance (such as a PCI function). + +NOTE : This flow is PCI-agnostic: no PCI whitelists involved. + +Handling Multiple Functions Per Device +-------------------------------------- + +Alternatives +------------ + +N/A + +Data model impact +----------------- + +Following changes are needed in Cyborg. + +* Do not publish PCI functions as resources in Nova. Instead, publish + RC/RP info to Nova, and keep RP-PCI mapping internally. + +* Cyborg should associate RPs/RCs and PFs/VFs with Deployables in its + internal DB. + +* Driver/agent interface needs to report device/region types so that + RCs can be created. + +* Deployables table should track which RP corresponds to each Deployable. + +REST API impact +--------------- + +None + +Security impact +--------------- + +This change allows tenants to initiate FPGA bitstream programming. To mitigate +the security impact, it is proposed that only 2 methods are offered for +programming (flavor asks for a bitstream, or the running instance asks for +specific bitstreams) and both are handled through Cyborg. There is no direct +access from an instance to an FPGA. + +Notifications impact +-------------------- + +None + +Other end user impact +--------------------- + +None + +Performance Impact +------------------ + +Other deployer impact +--------------------- + +None + +Developer impact +---------------- + +None + +Implementation +============== + +Assignee(s) +----------- + +None + +Work Items +---------- + +* Decide specific changes needed in Cyborg conductor, db, agent and drivers. + +Dependencies +============ + +* `Nested Resource Providers + `_ + +* `Nova Granular Requests + `_ + +NOTE: the granular requests feature is needed to define a flavor that requests +non-identical accelerators, but is not needed for Cyborg development in Rocky. + +Testing +======= + +For each vendor driver supported in this release, we need to integrate the +corresponding FPGA type(s) in the CI infrastructure. + +Documentation Impact +==================== + +None + +References +========== + +.. [#os-acc] `Specification for Compute Node `_ + +.. [#nRP] `Nested RPs in Rocky `_ + +.. [#drv-api] `Specification for Cyborg Agent-Driver API `_ + +.. [#flavor] `Custom Resource Classes in Flavors `_ + +.. [#qspec] `Cyborg Nova Queens Spec `_ + +.. [#ptg] `Rocky PTG Etherpad for Cyborg Nova Interaction `_ + +.. [#multifn] `Detailed Cyborg/Nova scheduling `_ + +.. [#mails] `Openstack-dev email discussion `_ + + + +History +======= + +Optional section intended to be used each time the spec is updated to describe +new design, API or any database schema updated. Useful to let reader know +what happened over time. + +.. list-table:: Revisions + :header-rows: 1 + + * - Release Name + - Description + * - Rocky + - Introduced diff --git a/specs/rocky/approved/resource-quotas.rst b/specs/rocky/approved/resource-quotas.rst new file mode 100644 index 0000000..b4fad8a --- /dev/null +++ b/specs/rocky/approved/resource-quotas.rst @@ -0,0 +1,204 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/3.0/legalcode + +================================ +Quota Usage for Cyborg Resources +================================ + +Launchpad blueprint: +https://blueprints.launchpad.net/openstack-cyborg/+spec/cyborg-resource-quota + +There are multiple ways to slice an OpenStack cloud. Imposing quota on these +various slices puts a limitation on the amount of resources that can be +consumed which helps to guarantee "fairness" or fair distribution of resources +at the creation time. If a particular project needs more resources, the +concept of quota gives the ability to increase the resource count on-demand, +given that the system constraints are not exceeded. + + +Problem description +=================== +At present in Cyborg we don't have the concept of Quota on acceleration +resources, so users can consume as many resources as they want. +Quotas are tied closely to physical resources and billable entities, hence from +Cyborg's perspective, it helps to limit the allocation and consumption +of a particular kind of resources at a certain value. + +In place of implementing quota like other services, we want to enable +the unified limit which is provided by Keystone to manage our quota limit[1]. +With unified limits, all limits will be set in Keystone and enforced by +oslo.limit. So we decided to implement quota usage part first. +Once the oslo.limit is ready for other services, Cyborg will invoke oslo.limit +to get the limit information and do limit check etc. + +This specs aims at the implementation of quota usage in Cyborg. As the +oslo.limit is not finished yet, we can directly set the value of limit +manually, and reserved the function calling oslo.limit with a "pass" inside. + + +Use cases +--------- +Alice is an admin. She would like to have a feature which will give her +details of Cyborg acceleration resource consumptions so that she can manage her +resources appropriately. + +She might run into following scenarios: + +* Ability to know current resource consumption. + +* Ability to prohibit overuse by a project. + +* Prevent situation where users in a project get starved because users in + other project consume all the resource. "Quota Management" would help to + gurantee "fairness". + +* Prevent DOS kind of attacks, abuse or error by users, which leads to an + excessive amount of resources allocation. + + +Proposed change +=============== +Proposed changes are introducing a Quota_Usage Table which primarily stores +the quota usage assigned for each resource in a project, and a Reservation +Table to store every modification of resource usage. + +When a new resource allocation request comes, the 'reserved' field in the Quota +usages table will be updated. This acceleration resource is being used to set +up VM. For example, the fpga quota hardlimit is 5 and 3 fgpas have +already been used, then two new fpga requests come in. Since we have 3 fpgas +already used, the 'used' field will be set to 3. Now the 'reserved' +field will be set to 2 untill the fpga attachment is successful. Once +the attachment is done this field will be reset to 0, and the 'used' +count will be updated from 3 to 5. So at this moment, hardlimit is 5, used +is 5 and in-progress is 0. So there is one more request comes in, this request +will be rejected since there is not enough quota available. + +In general, + +Resource quota available = Resource hard_limit - [ +(Resource reserved + Resources already allocated for project)] + +In this specs, we just focus on the update of quota usage and we will not check +if one user has already exceed his quota limit. The limit management will be +set in Keystone in the future and we just need to invoke the oslo.limit. + +Alternatives +------------ +At present there is no quota infrastructure in Cyborg. + +Adding Quota Management layer at the Orchestration layer could be an +alternative.However, our approach will give a finer view of resource +consumptions at the IaaS layer which can be used while provisioning Cyborg +resources. + +Data model impact +----------------- +New Quota usages and reservation table will be introduced to Cyborg database to +store quota consumption for each resource in a project. + +Quota usages table: + ++---------------+--------------+------+-----+---------+----------------+ +| Field | Type | Null | Key | Default | Extra | ++---------------+--------------+------+-----+---------+----------------+ +| created_at | datetime | YES | | NULL | | +| updated_at | datetime | YES | | NULL | | +| id | int(11) | NO | PRI | NULL | auto_increment | +| project_id | varchar(255) | YES | MUL | NULL | | +| resource | varchar(255) | NO | | NULL | | +| reserved | int(11) | NO | | NULL | | +| used | int(11) | NO | | NULL | | ++---------------+--------------+------+-----+---------+----------------+ + +Quota reservation table: + ++------------+--------------+------+-----+---------+----------------+ +| Field | Type | Null | Key | Default | Extra | ++------------+--------------+------+-----+---------+----------------+ +| created_at | datetime | YES | | NULL | | +| updated_at | datetime | YES | | NULL | | +| deleted_at | datetime | YES | | NULL | | +| deleted | tinyint(1) | YES | | NULL | | +| id | int(11) | NO | PRI | NULL | auto_increment | +| uuid | varchar(36) | NO | | NULL | | +| usage_id | int(11) | NO | MUL | NULL | | +| project_id | varchar(255) | YES | MUL | NULL | | +| resource | varchar(255) | YES | | NULL | | +| delta | int(11) | NO | | NULL | | +| expire | datetime | YES | | NULL | | ++------------+--------------+------+-----+---------+----------------+ + +We will also introduce QuotaEngine class which represents the set of +recognized quotas and DbQuotaDriver class which performs check to enforcement +of quotas and also allows to obtain quota information. + +REST API impact +--------------- +Not sure if we need to expose GET quota usage before oslo.limit settle down. + +Security impact +--------------- +None + +Notifications impact +-------------------- +None + +Other end user impact +--------------------- +None + +Performance Impact +------------------ +None + +Other deployer impact +--------------------- +None + +Developer impact +---------------- +None + +Implementation +============== + +Assignee(s) +----------- + +Primary assignee: +Xinran WANG + +Other contributors: +None + +Work Items +---------- + +* Introduce Quota usages and Reservation table in Cyborg databases. +* Update these two tables during allocation and deallocation of resources. +* Reserve the place of function which will invoke oslo.limit with a "pass" + inside. +* Add rollback mechanism when allocation fails. + +Dependencies +============ +None + +Testing +======= + +* Each commit will be accompanied with unit tests. +* Gate functional tests will also be covered. + +Documentation Impact +==================== +None + +References +========== + +[1] https://review.openstack.org/#/c/540803 diff --git a/specs/template.rst b/specs/template.rst index 8760027..1f1c1bb 100644 --- a/specs/template.rst +++ b/specs/template.rst @@ -20,7 +20,7 @@ respectively. Some notes about the cyborg-spec and blueprint process: * Not all blueprints need a spec. For more information see - http://docs.openstack.org/developer/nova/blueprints.html#specs + http://docs.openstack.org/developer/cyborg/blueprints.html#specs * The aim of this document is first to define the problem we need to solve, and second agree the overall approach to solve that problem. @@ -99,8 +99,8 @@ If this is one part of a larger effort make it clear where this piece ends. In other words, what's the scope of this effort? At this point, if you would like to just get feedback on if the problem and -proposed change fit in Cyborg, you can stop here and post this for review to get -preliminary feedback. If so please say: +proposed change fit in Cyborg, you can stop here and post this for review to +get preliminary feedback. If so please say: Posting to get preliminary feedback on the scope of this spec. Alternatives diff --git a/specs/tests/__init__.py b/specs/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/specs/tests/base.py b/specs/tests/base.py deleted file mode 100644 index 1c30cdb..0000000 --- a/specs/tests/base.py +++ /dev/null @@ -1,23 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2010-2011 OpenStack Foundation -# Copyright (c) 2013 Hewlett-Packard Development Company, L.P. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from oslotest import base - - -class TestCase(base.BaseTestCase): - - """Test case base class for all unit tests.""" diff --git a/specs/tests/test_specs.py b/specs/tests/test_specs.py deleted file mode 100644 index d4ea7e2..0000000 --- a/specs/tests/test_specs.py +++ /dev/null @@ -1,28 +0,0 @@ -# -*- coding: utf-8 -*- - -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -""" -test_specs ----------------------------------- - -Tests for `specs` module. -""" - -from specs.tests import base - - -class TestSpecs(base.TestCase): - - def test_something(self): - pass