Added hardware reboot
We need to control services via shaker and we also need to have an ability to manage power via ipmi / VM power control commands.
This commit is contained in:
parent
c8bdd15ab4
commit
223c950f45
|
@ -10,9 +10,9 @@ def run_command():
|
|||
r = request.get_json(force=True)
|
||||
process = subprocess.Popen(r["command"].split(), stdout=subprocess.PIPE)
|
||||
output = process.communicate()[0]
|
||||
|
||||
|
||||
return output
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host="0.0.0.0", debug=True)
|
||||
app.run(host="0.0.0.0", debug=True)
|
||||
|
|
|
@ -6,7 +6,7 @@ from rally import osclients
|
|||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@base.context(name="cloud_nodes", order=1000)
|
||||
@base.context(name="cloud_nodes", order=800)
|
||||
class CloudNodesContext(base.Context):
|
||||
"""This context allows to define the list of nodes in the cloud"""
|
||||
|
||||
|
@ -18,6 +18,10 @@ class CloudNodesContext(base.Context):
|
|||
"controllers": {
|
||||
"type": "array",
|
||||
"default": []
|
||||
},
|
||||
"power_control_node": {
|
||||
"type": "dictionary",
|
||||
"default": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -25,6 +29,8 @@ class CloudNodesContext(base.Context):
|
|||
def setup(self):
|
||||
"""This method is called before the task start"""
|
||||
self.context["controllers"] = self.config.get("controllers", [])
|
||||
power_control_node = self.config.get("power_control_node", {})
|
||||
self.context["power_control_node"] = power_control_node
|
||||
|
||||
def cleanup(self):
|
||||
"""This method is called after the task finish"""
|
||||
|
|
|
@ -6,7 +6,7 @@ from rally import osclients
|
|||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@base.context(name="recover_cloud", order=999)
|
||||
@base.context(name="recover_cloud", order=900)
|
||||
class CloudNodesContext(base.Context):
|
||||
"""This context allows to recover cloud after disaster tests"""
|
||||
|
||||
|
@ -14,32 +14,56 @@ class CloudNodesContext(base.Context):
|
|||
"type": "object",
|
||||
"$schema": consts.JSON_SCHEMA,
|
||||
"additionalProperties": False,
|
||||
"properties": {}
|
||||
}
|
||||
|
||||
ACTIONS = {
|
||||
"stop rabbitmq service": {
|
||||
"do": "/etc/init.d/rabbitmq-server stop",
|
||||
"undo": "/etc/init.d/rabbitmq-server start"
|
||||
},
|
||||
"ban rabbitmq service with pcs": {
|
||||
"do": "pcs resource ban rabbitmq",
|
||||
"undo": "pcs resource clear rabbitmq"
|
||||
"properties": {
|
||||
"checks": {
|
||||
"type": "array",
|
||||
"default": []
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def check_rabbitmq_cluster_status(self, controllers):
|
||||
command = "rabbitmqctl cluster_status"
|
||||
|
||||
for controller in controllers:
|
||||
output = self.run_command(controller["shaker_agent_id"], command)
|
||||
for line in output.splitlines():
|
||||
if "nodes" in line and "running_nodes" not in line:
|
||||
nodes = [node for node in line.split("'")
|
||||
if "rabbit" in node]
|
||||
if "running_nodes" in line:
|
||||
active_nodes = [node for node in line.split("'")
|
||||
if "rabbit" in node]
|
||||
for node in nodes:
|
||||
if node not in active_nodes:
|
||||
return False
|
||||
return True
|
||||
|
||||
def run_command(self, node, command, recover_command=None,
|
||||
recover_timeout=0):
|
||||
if recover_cmd is not None:
|
||||
action = {"node": node, "command": command,
|
||||
"timeout": recover_timeout}
|
||||
self.context["recover_commands"].append(action)
|
||||
|
||||
r = requests.post("http://{0}/run_command".format(node),
|
||||
headers={"Content-Type": "application/json"},
|
||||
data=json.dumps({"command": command}))
|
||||
|
||||
return r.text
|
||||
|
||||
def setup(self):
|
||||
"""This method is called before the task start"""
|
||||
self.context["actions"] = self.ACTIONS
|
||||
|
||||
# done_actions contains information about name of shaker_id
|
||||
# and action name which were executed, example:
|
||||
# self.context["done_actions"] = [{"name": "node-1", "command": "ls"}]
|
||||
self.context["done_actions"] = []
|
||||
self.context["recover_commands"] = []
|
||||
self.context["checks"] = self.config.get("checks", [])
|
||||
|
||||
def cleanup(self):
|
||||
"""This method is called after the task finish"""
|
||||
for action in self.context["done_actions"]:
|
||||
## we need to import shaker somehow :)
|
||||
shaker.run_command_on_node(action["node"],
|
||||
ACTIONS[action["command"]]["undo"])
|
||||
for action in self.context["recover_commands"]:
|
||||
self.run_command(action["node"], action["command"])
|
||||
time.sleep(action.get("timeout", 0))
|
||||
|
||||
controllers = self.context["controllers"]
|
||||
if "rabbitmq_cluster_status" in self.context["checks"]:
|
||||
if self.check_rabbitmq_cluster_status(controllers) is False:
|
||||
raise "RabbitMQ cluster wasn't recovered"
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
import json
|
||||
import requests
|
||||
import time
|
||||
|
||||
from rally.benchmark.scenarios import base
|
||||
|
||||
|
||||
|
@ -11,22 +13,28 @@ class BaseDisasterScenario(base.Scenario):
|
|||
image=self.context["shaker_image"],
|
||||
flavor=self.context["default_flavor"],
|
||||
{"auto_assign_nic": True})
|
||||
return vm
|
||||
|
||||
def run_command(self, node, command, recover_command=None):
|
||||
if recover_cmd is not None:
|
||||
action = {"node": node, "command": command}
|
||||
self.context["recover_commands"].append(action)
|
||||
|
||||
def execute_command_on_shaker_node(self, node, command):
|
||||
cmd = {"command": command}
|
||||
r = requests.post("http://{0}/run_command".format(node),
|
||||
headers={"Content-Type": "application/json"},
|
||||
data=json.dumps(cmd))
|
||||
data=json.dumps({"command": command}))
|
||||
|
||||
return r.text
|
||||
|
||||
def run_command(self, node, command):
|
||||
return self.execute_command_on_shaker_node(node, command)
|
||||
def power_off_controller(self, controller_id):
|
||||
control_node = self.context["power_control_node"]
|
||||
controller = self.context["controllers"][controller_id]
|
||||
|
||||
def run_disaster_command(self, node, command):
|
||||
do = self.context["actions"][command]["do"]
|
||||
self.run_command(control_node["shaker_agent_id"],
|
||||
command=controller["hardware_power_off_cmd"],
|
||||
recover_command=controller["hardware_power_on_cmd"],
|
||||
recover_timeout=controller["power_on_timeout"])
|
||||
time.sleep(controller["power_off_timeout"])
|
||||
|
||||
done = {"node": node, "command": command}
|
||||
self.context["done_actions"].append(done)
|
||||
|
||||
self.execute_command_on_shaker_node(node, command)
|
||||
def power_off_main_controller(self):
|
||||
pass
|
||||
|
|
|
@ -1,35 +1,36 @@
|
|||
import random
|
||||
import base_disaster_scenario
|
||||
from rally.benchmark.scenarios import base
|
||||
|
||||
|
||||
class BaseDisasterScenario(base_disaster_scenario.BaseDisasterScenario):
|
||||
class RabbitMQDisasterScenarios(base_disaster_scenario.BaseDisasterScenario):
|
||||
|
||||
@base.scenario()
|
||||
def test_rabbitmq_failover01(self):
|
||||
""" Test Scenario:
|
||||
def power_off_one_cantroller(self):
|
||||
""" Poweroff one contoller and verify cloud
|
||||
|
||||
1. Deploy OpenStack cloud with 3 controllers
|
||||
2. Stop RabbitMQ services on all controllers
|
||||
3. Start RabbitMQ on one controller
|
||||
4. Create VM 10 times, create networks, volumes, upload images,
|
||||
create users and etc.
|
||||
5. Start all RabbitMQ services and repeat step #4
|
||||
Setup:
|
||||
OpenStack cloud with at least 3 controllers
|
||||
|
||||
Scenario:
|
||||
1. Poweroff one controller
|
||||
2. Verify cloud: create VM 10 times, create networks,
|
||||
volumes, upload images
|
||||
"""
|
||||
|
||||
for i in xrange(0, 3):
|
||||
self.run_disaster_command(self.context["controllers"][i],
|
||||
"stop rabbitmq service")
|
||||
controller_id = random.randint(0, len(self.context["controllers"]))
|
||||
self.power_off_controller(controller_id)
|
||||
|
||||
self.run_command(self.context["controllers"][0],
|
||||
"/etc/init.d/rabbitmq-server start")
|
||||
vm_list = []
|
||||
for i in xrange(0, 10):
|
||||
vm = self.boot_vm("test{0}".format(i))
|
||||
vm_list.append(vm)
|
||||
|
||||
# (tnurlygayanov): TODO:
|
||||
# Need to write the functions which will verify that cloud
|
||||
# works fine: create/delete several VMs, networks, images,
|
||||
# volumes and etc.
|
||||
if i in xrange(0, 10):
|
||||
self.boot_vm("test{0}".format(i))
|
||||
timeout = 300
|
||||
active_vms = []
|
||||
while timeout > 0 and len(active_vms) < 10:
|
||||
active_vms = [vm for vm in vm_list if vm.state == "ACTIVE"]
|
||||
timeout -= 1
|
||||
|
||||
for i in xrange(0, 3):
|
||||
self.run_command(self.context["controllers"][i],
|
||||
"/etc/init.d/rabbitmq-server start")
|
||||
if len(active_vms) < 10:
|
||||
raise "Can't boot VMs"
|
||||
|
|
|
@ -1,13 +1,41 @@
|
|||
{
|
||||
"shaker_controller.sample_print": [
|
||||
"RabbitMQDisasterScenarios.power_off_one_cantroller": [
|
||||
{
|
||||
"runner": {
|
||||
"type": "serial",
|
||||
"times": 5,
|
||||
},
|
||||
"context": {
|
||||
"recover_cloud": {
|
||||
"checks": ["rabbitmq_cluster_status", ]
|
||||
},
|
||||
"cloud_nodes": {
|
||||
"controllers": ["1", "2", "3"]
|
||||
"controllers": [
|
||||
{
|
||||
"shaker_agent_id": "1",
|
||||
"hardware_power_on_cmd": "VBoxManage startvm fuel-slave-1",
|
||||
"hardware_power_off_cmd": "VBoxManage controlvm fuel-slave-1 poweroff",
|
||||
"power_off_timeout": 20,
|
||||
"power_on_timeout": 30
|
||||
},
|
||||
{
|
||||
"shaker_agent_id": "2",
|
||||
"hardware_power_on_cmd": "VBoxManage startvm fuel-slave-2",
|
||||
"hardware_power_off_cmd": "VBoxManage controlvm fuel-slave-2 poweroff",
|
||||
"power_off_timeout": 20,
|
||||
"power_on_timeout": 30
|
||||
},
|
||||
{
|
||||
"shaker_agent_id": "3",
|
||||
"hardware_power_on_cmd": "VBoxManage startvm fuel-slave-3",
|
||||
"hardware_power_off_cmd": "VBoxManage controlvm fuel-slave-3 poweroff",
|
||||
"power_off_timeout": 20,
|
||||
"power_on_timeout": 30
|
||||
}
|
||||
],
|
||||
"power_control_node": {
|
||||
"shaker_agent_id": "localhost"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue