Add support for multi-controller HA deployments.

Change-Id: I819cec71cdbc8df7a85bd4f41f36493b34c8bdcc
This commit is contained in:
Emma Gordon 2015-11-30 12:02:12 +00:00
parent 8c16541bdf
commit e6c8fab962
8 changed files with 148 additions and 61 deletions

View File

@ -6,29 +6,55 @@ import yaml
from pluginutils import NODES_CONFIG
RECONFIGURE_ROUTE_REFLECTOR = "##REPLACE_ON_INSTALL##/calico_route_reflector.sh"
SCRIPTS_LOCATION="##REPLACE_ON_INSTALL##/"
RECONFIGURE_ROUTE_REFLECTOR = SCRIPTS_LOCATION + "calico_route_reflector.sh"
UPDATE_ETCD_CLUSTER = SCRIPTS_LOCATION + "update_etcd_cluster.sh"
def _get_configured_compute_nodes():
def _get_configured_nodes(roles):
with open(NODES_CONFIG, "r") as f:
config = yaml.safe_load(f)
compute_nodes = [node for node in config["nodes"]
if node["role"] == "compute"]
return [node for node in config["nodes"] if node["role"] in roles]
return compute_nodes
def _get_compute_nodes():
return _get_configured_nodes(["compute"])
def _get_control_nodes():
nodes = _get_configured_nodes(["controller", "primary-controller"])
for node in nodes:
# Note this does not change the node role in the Fuel deployment, just
# in the list of nodes internal to this script (where we are only
# concerned with the distinction between compute/control nodes, not
# whether a given control node is primary or not).
if node["role"] == "primary-controller":
node["role"] = "controller"
return nodes
class DeploymentChangeHandler(pyinotify.ProcessEvent):
def __init__(self):
super(DeploymentChangeHandler, self).__init__()
self.compute_nodes = _get_configured_compute_nodes()
self.compute_nodes = _get_compute_nodes()
self.control_nodes = _get_control_nodes()
def process_IN_MODIFY(self, event):
current_compute_nodes = _get_configured_compute_nodes()
if current_compute_nodes != self.compute_nodes:
current_compute_nodes = _get_compute_nodes()
current_control_nodes = _get_control_nodes()
if current_control_nodes != self.control_nodes:
subprocess.call(RECONFIGURE_ROUTE_REFLECTOR)
self.compute_nodes = current_compute_nodes
subprocess.call(UPDATE_ETCD_CLUSTER)
elif current_compute_nodes != self.compute_nodes:
subprocess.call(RECONFIGURE_ROUTE_REFLECTOR)
self.compute_nodes = current_compute_nodes
self.control_nodes = current_control_nodes
if __name__ == "__main__":

View File

@ -10,7 +10,7 @@ set -x
echo "Hi, I'm a compute node!"
this_node_address=$(python get_node_ip.py `hostname`)
controller_node_address=$(python get_controller_ip.py)
controller_node_addresses=$(python get_node_ips_by_role.py controller)
# Get APT key for binaries.projectcalico.org.
@ -52,6 +52,12 @@ apt-get update
apt-get -y install etcd
for controller_address in ${controller_node_addresses[@]}
do
initial_cluster+="${controller_address}=http://${controller_address}:2380,"
done
initial_cluster=${initial_cluster::-1} # remove trailing comma
service etcd stop
rm -rf /var/lib/etcd/*
awk '/exec \/usr\/bin\/etcd/{while(getline && $0 != ""){}}1' /etc/init/etcd.conf > tmp
@ -60,7 +66,7 @@ cat << EXEC_CMD >> /etc/init/etcd.conf
exec /usr/bin/etcd -proxy on \\
-listen-client-urls http://127.0.0.1:4001 \\
-advertise-client-urls http://127.0.0.1:7001 \\
-initial-cluster controller=http://${controller_node_address}:2380
-initial-cluster ${initial_cluster}
EXEC_CMD
service etcd start
@ -143,7 +149,7 @@ apt-get -y install calico-compute bird
# script. You should consult the relevant documentation for your chosen BGP
# stack.
calico-gen-bird-conf.sh $this_node_address $controller_node_address 64511
calico-gen-bird-mesh-conf.sh $this_node_address 64511 ${controller_node_addresses[@]}
# Edit the /etc/calico/felix.cfg file:
# Change the MetadataAddr setting to 127.0.0.1.

View File

@ -10,6 +10,7 @@ set -x
echo "Hi, I'm a controller node!"
this_node_address=$(python get_node_ip.py `hostname`)
controller_node_addresses=$(python get_node_ips_by_role.py controller)
# Get APT key for binaries.projectcalico.org.
@ -51,19 +52,26 @@ apt-get update
apt-get -y install etcd
for controller_address in ${controller_node_addresses[@]}
do
initial_cluster+="${controller_address}=http://${controller_address}:2380,"
done
initial_cluster=${initial_cluster::-1} # remove trailing comma
service etcd stop
rm -rf /var/lib/etcd/*
awk '/exec \/usr\/bin\/etcd/{while(getline && $0 != ""){}}1' /etc/init/etcd.conf > tmp
mv tmp /etc/init/etcd.conf
cat << EXEC_CMD >> /etc/init/etcd.conf
exec /usr/bin/etcd -name controller \\
exec /usr/bin/etcd -name ${this_node_address} \\
-advertise-client-urls "http://${this_node_address}:2379,http://${this_node_address}:4001" \\
-listen-client-urls "http://0.0.0.0:2379,http://0.0.0.0:4001" \\
-listen-peer-urls "http://0.0.0.0:2380" \\
-initial-advertise-peer-urls "http://${this_node_address}:2380" \\
-initial-cluster-token fuel-cluster-1 \\
-initial-cluster controller=http://${this_node_address}:2380 \\
-initial-cluster ${initial_cluster} \\
-initial-cluster-state new
EXEC_CMD
service etcd start

View File

@ -8,8 +8,10 @@ set -x
echo "Hi, I'm a route_reflector node!"
this_node_address=$(python get_node_ip.py `hostname`)
controller_node_addresses=$(python get_node_ips_by_role.py controller)
bgp_peers=$(python get_rr_peers.py)
client_peers=$(python get_node_ips_by_role.py compute)
route_reflector_peers=("${controller_node_addresses[@]/$this_node_address}")
# Generate basic config for a BIRD BGP route reflector.
cat > /etc/bird/bird.conf <<EOF
@ -38,24 +40,33 @@ protocol device {
}
EOF
# Add a BGP protocol stanza for each compute node.
for node in $bgp_peers; do
if [ $node != $this_node_address ]; then
cat >> /etc/bird/bird.conf <<EOF
# Add a BGP protocol stanza for all peers.
for node in ${client_peers[@]} ${route_reflector_peers[@]}; do
cat >> /etc/bird/bird.conf <<EOF
protocol bgp {
description "$node";
local as 64511;
neighbor $node as 64511;
multihop;
EOF
if [[ "${client_peers[@]}" =~ "${node}" ]]; then
cat >> /etc/bird/bird.conf <<EOF
description "Client $node";
rr client;
EOF
else
cat >> /etc/bird/bird.conf <<EOF
description "Route Reflector $node";
EOF
fi
cat >> /etc/bird/bird.conf <<EOF
rr cluster id 1.2.3.4;
import all;
export all;
source address ${this_node_address};
}
EOF
fi
done
# Restart BIRD with the new config.

View File

@ -1,16 +0,0 @@
#!/usr/bin/env python
# Copyright 2015 Metaswitch Networks
import yaml
with open("/etc/compute.yaml", "r") as f:
config = yaml.safe_load(f)
for node in config["nodes"]:
if node["role"] == "primary-controller":
controller_ip = node["internal_address"]
break
else:
controller_ip = None
print controller_ip

View File

@ -0,0 +1,32 @@
#!/usr/bin/env python
# Copyright 2015 Metaswitch Networks
import argparse
import yaml
from pluginutils import NODES_CONFIG
def main(node_roles):
with open(NODES_CONFIG, "r") as f:
config = yaml.safe_load(f)
node_ips = [node["internal_address"] for node in config["nodes"]
if node["role"] in node_roles]
return node_ips
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("node_role", choices=["compute", "controller"])
args = parser.parse_args()
args.node_role = [args.node_role]
if args.node_role == ["controller"]:
args.node_role.append("primary-controller")
node_ips = main(args.node_role)
if node_ips:
print " ".join(node_ips)

View File

@ -1,22 +0,0 @@
#!/usr/bin/env python
# Copyright 2015 Metaswitch Networks
import yaml
from pluginutils import NODES_CONFIG
def main():
with open(NODES_CONFIG, "r") as f:
config = yaml.safe_load(f)
# The route reflector should only peer with compute nodes.
peer_ips = [node["internal_address"] for node in config["nodes"]
if node["role"] == "compute"]
return peer_ips
if __name__ == "__main__":
peer_ips = main()
if peer_ips:
print " ".join(peer_ips)

View File

@ -0,0 +1,42 @@
#!/bin/bash
# Copyright 2015 Metaswitch Networks
this_node_address=$(python get_node_ip.py `hostname`)
controller_node_addresses=$(python get_node_ips_by_role.py controller)
for node_address in ${controller_node_addresses[@]}
do
initial_cluster+="${node_address}=http://${node_address}:2380,"
done
initial_cluster=${initial_cluster::-1} # remove trailing comma
service etcd stop
rm -rf /var/lib/etcd/*
awk '/exec \/usr\/bin\/etcd/{while(getline && $0 != ""){}}1' /etc/init/etcd.conf > tmp
mv tmp /etc/init/etcd.conf
cat << EXEC_CMD >> /etc/init/etcd.conf
exec /usr/bin/etcd -name ${this_node_address} \\
-advertise-client-urls "http://${this_node_address}:2379,http://${this_node_address}:4001" \\
-listen-client-urls "http://0.0.0.0:2379,http://0.0.0.0:4001" \\
-listen-peer-urls "http://0.0.0.0:2380" \\
-initial-advertise-peer-urls "http://${this_node_address}:2380" \\
-initial-cluster-token fuel-cluster-1 \\
-initial-cluster ${initial_cluster} \\
-initial-cluster-state new
EXEC_CMD
service etcd start
retry_count=0
while [[ $retry_count < 5 ]]; do
etcdctl cluster-health
if [[ $? == 0 ]]; then
break
else
((retry_count++))
service etcd restart
sleep 2
fi
done