Initial support for adding cluster nodes to an existing cluster

We add a function which will detect if rerunning the manifest would
mean adding any cluster nodes. For those nodes we trigger the cluster
node addition via:
pcs cluster auth newnode
pcs cluster node add newnode
pcs cluster start newnode

Initial run with one node in cluster_members:
Online: [ foobar-0 ]

Full list of resources:

 stonith-fence_amt-stonith-fence-1      (stonith:fence_amt):    Stopped
 Docker container: test_bundle [docker.io/sdelrio/docker-minimal-nginx]
   test_bundle-docker-0 (ocf:💓docker):        Started foobar-0
 ip-172.16.11.97        (ocf:💓IPaddr2):       Started foobar-0
 Clone Set: rabbitmq-clone [rabbitmq]
     Started: [ foobar-0 ]

Rerun with additional two additional nodes and by setting ::deep_compare
hiera keys to true so that resources get updated:
Online: [ foobar-0 foobar-1 foobar-2 ]

Full list of resources:

 stonith-fence_amt-stonith-fence-1      (stonith:fence_amt):    Started foobar-1
 ip-172.16.11.97        (ocf:💓IPaddr2):       Started foobar-2
 Clone Set: rabbitmq-clone [rabbitmq]
     Started: [ foobar-0 foobar-1 foobar-2 ]
 stonith-fence_amt-stonith-fence-2      (stonith:fence_amt):    Started foobar-0
 stonith-fence_amt-stonith-fence-3      (stonith:fence_amt):    Started foobar-1
 Docker container set: test_bundle [docker.io/sdelrio/docker-minimal-nginx]
   test_bundle-docker-0 (ocf:💓docker):        Started foobar-0
   test_bundle-docker-1 (ocf:💓docker):        Started foobar-2
   test_bundle-docker-2 (ocf:💓docker):        Started foobar-1

Ran about 50 scaleup tests and this node addition code has worked all
the time. We have intentionally not yet added removal support as that
needs to have a use-case and a lot of testing.

For this scaleup to work properly we need a fix for the firewall
ordering issue, otherwise this could be racy when stonith is configured.
(i.e. we'll need I01e681a6305e2708bf364781a2032265b146d065 if this
review ever gets backported).

Change-Id: Iac31035da98bd68a5481d97ee3765a99563db49f
This commit is contained in:
Michele Baldessari 2018-05-10 11:29:55 +02:00 committed by Alex Schultz
parent b11f579a5f
commit cd20731d96
3 changed files with 113 additions and 0 deletions

View File

@ -0,0 +1,54 @@
module Puppet::Parser::Functions
newfunction(
:pcmk_nodes_added,
type: :rvalue,
arity: -1,
doc: <<-eof
Input data cluster_members string separated by a space:
* String 'n1 n2 n3'
* Output of `crm_node -l` (only used to ease unit testing) (optional)
Output forms:
* array - output the plain array of nodes that have been added compared
to the running cluster. It returns an empty array in case the
cluster is not set up or if crm_node return an error
eof
) do |args|
# no point in doing this if the crm_node executable does not exist
return [] if Facter::Util::Resolution.which('crm_node') == nil
nodes = args[0]
crm_node_list = args[1]
unless nodes.is_a? String
fail "Got unsupported nodes input data: #{nodes.inspect}"
end
if crm_node_list && !crm_node_list.kind_of?(String) then
fail "Got unsupported crm_node_list #{crm_node_list.inspect}"
end
if crm_node_list && crm_node_list.kind_of?(String) then
return [] if crm_node_list.empty?
crm_nodes_output = crm_node_list
else
# A typical crm_node -l output is like the following:
# [root@foobar-0 ~]# crm_node -l
# 3 foobar-2 member
# 1 foobar-0 member
# 2 foobar-1 lost
crm_nodes_output = `crm_node -l`
# if the command fails we certainly did not add any nodes
return [] if $?.exitstatus != 0
end
crm_nodes = []
crm_nodes_output.lines.each { |line|
(id, node, state, _) = line.split(" ").collect(&:strip)
valid_states = %w(member lost)
state.downcase! if state
crm_nodes.push(node.strip) if valid_states.include? state
}
cluster_nodes = nodes.split(" ")
nodes_added = cluster_nodes - crm_nodes
Puppet.debug("pcmk_nodes_added: #{nodes_added}")
nodes_added
end
end

View File

@ -176,6 +176,34 @@ class pacemaker::corosync(
}
if $setup_cluster {
# Detect if we are trying to add some nodes by comparing
# $cluster_members and the actual running nodes in the cluster
$nodes_added = pcmk_nodes_added($cluster_members)
# If we're rerunning this puppet manifest and $cluster_members
# contains more nodes than the currently running cluster
if count($nodes_added) > 0 {
$nodes_added.each |$node_to_add| {
exec {"Adding Cluster node: ${node_to_add} to Cluster ${cluster_name}":
unless => "${::pacemaker::pcs_bin} status 2>&1 | grep -e \"^Online:.* ${node_to_add} .*\"",
command => "${::pacemaker::pcs_bin} cluster node add ${node_to_add} --wait",
timeout => $cluster_start_timeout,
tries => $cluster_start_tries,
try_sleep => $cluster_start_try_sleep,
notify => Exec["node-cluster-start-${node_to_add}"],
tag => 'pacemaker-scaleup',
}
exec {"node-cluster-start-${node_to_add}":
unless => "${::pacemaker::pcs_bin} status 2>&1 | grep -e \"^Online:.* ${node_to_add} .*\"",
command => "${::pacemaker::pcs_bin} cluster start ${node_to_add} --wait",
timeout => $cluster_start_timeout,
tries => $cluster_start_tries,
try_sleep => $cluster_start_try_sleep,
refreshonly => true,
tag => 'pacemaker-scaleup',
}
}
Exec <|tag == 'pacemaker-auth'|> -> Exec <|tag == 'pacemaker-scaleup'|>
}
if ! $cluster_members_rrp {
$cluster_members_rrp_real = $cluster_members

View File

@ -0,0 +1,31 @@
require 'spec_helper'
describe 'pcmk_nodes_added' do
context 'interface' do
it { is_expected.not_to eq(nil) }
it { is_expected.to run.with_params(1).and_raise_error(Puppet::Error, /Got unsupported nodes input data/) }
it { is_expected.to run.with_params('foo', []).and_raise_error(Puppet::Error, /Got unsupported crm_node_list/) }
end
it 'returns no added nodes because cluster is not set up' do
is_expected.to run.with_params('foo', '').and_return([])
is_expected.to run.with_params('foo bar', '').and_return([])
is_expected.to run.with_params('', '').and_return([])
end
it 'returns added nodes when cluster is fully up' do
crm_out = "\n3 ctr-2 member\n2 ctr-1 member\n1 ctr-0 member\n"
is_expected.to run.with_params('ctr-0 ctr-1 ctr-2', crm_out).and_return([])
is_expected.to run.with_params('ctr-0 ctr-1 ctr-2 ctr-3', crm_out).and_return(['ctr-3'])
is_expected.to run.with_params('ctr-1 ctr-3 ctr-2', crm_out).and_return(['ctr-3'])
is_expected.to run.with_params('', crm_out).and_return([])
end
it 'returns added nodes when cluster is not fully up' do
crm_out = "\n3 ctr-2 lost\n2 ctr-1 member\n1 ctr-0 member\n"
is_expected.to run.with_params('ctr-0 ctr-1 ctr-2', crm_out).and_return([])
is_expected.to run.with_params('ctr-0 ctr-1 ctr-2 ctr-3', crm_out).and_return(['ctr-3'])
is_expected.to run.with_params('ctr-1 ctr-3 ctr-2', crm_out).and_return(['ctr-3'])
is_expected.to run.with_params('', crm_out).and_return([])
end
end