Initial support for adding cluster nodes to an existing cluster
We add a function which will detect if rerunning the manifest would mean adding any cluster nodes. For those nodes we trigger the cluster node addition via: pcs cluster auth newnode pcs cluster node add newnode pcs cluster start newnode Initial run with one node in cluster_members: Online: [ foobar-0 ] Full list of resources: stonith-fence_amt-stonith-fence-1 (stonith:fence_amt): Stopped Docker container: test_bundle [docker.io/sdelrio/docker-minimal-nginx] test_bundle-docker-0 (ocf:💓docker): Started foobar-0 ip-172.16.11.97 (ocf:💓IPaddr2): Started foobar-0 Clone Set: rabbitmq-clone [rabbitmq] Started: [ foobar-0 ] Rerun with additional two additional nodes and by setting ::deep_compare hiera keys to true so that resources get updated: Online: [ foobar-0 foobar-1 foobar-2 ] Full list of resources: stonith-fence_amt-stonith-fence-1 (stonith:fence_amt): Started foobar-1 ip-172.16.11.97 (ocf:💓IPaddr2): Started foobar-2 Clone Set: rabbitmq-clone [rabbitmq] Started: [ foobar-0 foobar-1 foobar-2 ] stonith-fence_amt-stonith-fence-2 (stonith:fence_amt): Started foobar-0 stonith-fence_amt-stonith-fence-3 (stonith:fence_amt): Started foobar-1 Docker container set: test_bundle [docker.io/sdelrio/docker-minimal-nginx] test_bundle-docker-0 (ocf:💓docker): Started foobar-0 test_bundle-docker-1 (ocf:💓docker): Started foobar-2 test_bundle-docker-2 (ocf:💓docker): Started foobar-1 Ran about 50 scaleup tests and this node addition code has worked all the time. We have intentionally not yet added removal support as that needs to have a use-case and a lot of testing. For this scaleup to work properly we need a fix for the firewall ordering issue, otherwise this could be racy when stonith is configured. (i.e. we'll need I01e681a6305e2708bf364781a2032265b146d065 if this review ever gets backported). Change-Id: Iac31035da98bd68a5481d97ee3765a99563db49f
This commit is contained in:
parent
b11f579a5f
commit
cd20731d96
|
@ -0,0 +1,54 @@
|
|||
module Puppet::Parser::Functions
|
||||
newfunction(
|
||||
:pcmk_nodes_added,
|
||||
type: :rvalue,
|
||||
arity: -1,
|
||||
doc: <<-eof
|
||||
Input data cluster_members string separated by a space:
|
||||
* String 'n1 n2 n3'
|
||||
* Output of `crm_node -l` (only used to ease unit testing) (optional)
|
||||
|
||||
Output forms:
|
||||
* array - output the plain array of nodes that have been added compared
|
||||
to the running cluster. It returns an empty array in case the
|
||||
cluster is not set up or if crm_node return an error
|
||||
eof
|
||||
) do |args|
|
||||
# no point in doing this if the crm_node executable does not exist
|
||||
return [] if Facter::Util::Resolution.which('crm_node') == nil
|
||||
nodes = args[0]
|
||||
crm_node_list = args[1]
|
||||
unless nodes.is_a? String
|
||||
fail "Got unsupported nodes input data: #{nodes.inspect}"
|
||||
end
|
||||
if crm_node_list && !crm_node_list.kind_of?(String) then
|
||||
fail "Got unsupported crm_node_list #{crm_node_list.inspect}"
|
||||
end
|
||||
|
||||
if crm_node_list && crm_node_list.kind_of?(String) then
|
||||
return [] if crm_node_list.empty?
|
||||
crm_nodes_output = crm_node_list
|
||||
else
|
||||
# A typical crm_node -l output is like the following:
|
||||
# [root@foobar-0 ~]# crm_node -l
|
||||
# 3 foobar-2 member
|
||||
# 1 foobar-0 member
|
||||
# 2 foobar-1 lost
|
||||
crm_nodes_output = `crm_node -l`
|
||||
# if the command fails we certainly did not add any nodes
|
||||
return [] if $?.exitstatus != 0
|
||||
end
|
||||
|
||||
crm_nodes = []
|
||||
crm_nodes_output.lines.each { |line|
|
||||
(id, node, state, _) = line.split(" ").collect(&:strip)
|
||||
valid_states = %w(member lost)
|
||||
state.downcase! if state
|
||||
crm_nodes.push(node.strip) if valid_states.include? state
|
||||
}
|
||||
cluster_nodes = nodes.split(" ")
|
||||
nodes_added = cluster_nodes - crm_nodes
|
||||
Puppet.debug("pcmk_nodes_added: #{nodes_added}")
|
||||
nodes_added
|
||||
end
|
||||
end
|
|
@ -176,6 +176,34 @@ class pacemaker::corosync(
|
|||
}
|
||||
|
||||
if $setup_cluster {
|
||||
# Detect if we are trying to add some nodes by comparing
|
||||
# $cluster_members and the actual running nodes in the cluster
|
||||
$nodes_added = pcmk_nodes_added($cluster_members)
|
||||
# If we're rerunning this puppet manifest and $cluster_members
|
||||
# contains more nodes than the currently running cluster
|
||||
if count($nodes_added) > 0 {
|
||||
$nodes_added.each |$node_to_add| {
|
||||
exec {"Adding Cluster node: ${node_to_add} to Cluster ${cluster_name}":
|
||||
unless => "${::pacemaker::pcs_bin} status 2>&1 | grep -e \"^Online:.* ${node_to_add} .*\"",
|
||||
command => "${::pacemaker::pcs_bin} cluster node add ${node_to_add} --wait",
|
||||
timeout => $cluster_start_timeout,
|
||||
tries => $cluster_start_tries,
|
||||
try_sleep => $cluster_start_try_sleep,
|
||||
notify => Exec["node-cluster-start-${node_to_add}"],
|
||||
tag => 'pacemaker-scaleup',
|
||||
}
|
||||
exec {"node-cluster-start-${node_to_add}":
|
||||
unless => "${::pacemaker::pcs_bin} status 2>&1 | grep -e \"^Online:.* ${node_to_add} .*\"",
|
||||
command => "${::pacemaker::pcs_bin} cluster start ${node_to_add} --wait",
|
||||
timeout => $cluster_start_timeout,
|
||||
tries => $cluster_start_tries,
|
||||
try_sleep => $cluster_start_try_sleep,
|
||||
refreshonly => true,
|
||||
tag => 'pacemaker-scaleup',
|
||||
}
|
||||
}
|
||||
Exec <|tag == 'pacemaker-auth'|> -> Exec <|tag == 'pacemaker-scaleup'|>
|
||||
}
|
||||
|
||||
if ! $cluster_members_rrp {
|
||||
$cluster_members_rrp_real = $cluster_members
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe 'pcmk_nodes_added' do
|
||||
context 'interface' do
|
||||
it { is_expected.not_to eq(nil) }
|
||||
it { is_expected.to run.with_params(1).and_raise_error(Puppet::Error, /Got unsupported nodes input data/) }
|
||||
it { is_expected.to run.with_params('foo', []).and_raise_error(Puppet::Error, /Got unsupported crm_node_list/) }
|
||||
end
|
||||
|
||||
it 'returns no added nodes because cluster is not set up' do
|
||||
is_expected.to run.with_params('foo', '').and_return([])
|
||||
is_expected.to run.with_params('foo bar', '').and_return([])
|
||||
is_expected.to run.with_params('', '').and_return([])
|
||||
end
|
||||
|
||||
it 'returns added nodes when cluster is fully up' do
|
||||
crm_out = "\n3 ctr-2 member\n2 ctr-1 member\n1 ctr-0 member\n"
|
||||
is_expected.to run.with_params('ctr-0 ctr-1 ctr-2', crm_out).and_return([])
|
||||
is_expected.to run.with_params('ctr-0 ctr-1 ctr-2 ctr-3', crm_out).and_return(['ctr-3'])
|
||||
is_expected.to run.with_params('ctr-1 ctr-3 ctr-2', crm_out).and_return(['ctr-3'])
|
||||
is_expected.to run.with_params('', crm_out).and_return([])
|
||||
end
|
||||
|
||||
it 'returns added nodes when cluster is not fully up' do
|
||||
crm_out = "\n3 ctr-2 lost\n2 ctr-1 member\n1 ctr-0 member\n"
|
||||
is_expected.to run.with_params('ctr-0 ctr-1 ctr-2', crm_out).and_return([])
|
||||
is_expected.to run.with_params('ctr-0 ctr-1 ctr-2 ctr-3', crm_out).and_return(['ctr-3'])
|
||||
is_expected.to run.with_params('ctr-1 ctr-3 ctr-2', crm_out).and_return(['ctr-3'])
|
||||
is_expected.to run.with_params('', crm_out).and_return([])
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue