307 lines
11 KiB
Ruby
307 lines
11 KiB
Ruby
require 'digest'
|
|
require 'rexml/document'
|
|
|
|
# Constants that represent the state of a resource/constraint
|
|
PCMK_NOCHANGENEEDED = 0
|
|
PCMK_NOTEXISTS = 1
|
|
PCMK_CHANGENEEDED = 2
|
|
|
|
# Base temporary CIB backup folder
|
|
PCMK_TMP_BASE = "/var/lib/pacemaker/cib"
|
|
|
|
# Ruby 2.5 has dropped Dir::Tmpname.make_tmpname
|
|
# https://github.com/ruby/ruby/commit/25d56ea7b7b52dc81af30c92a9a0e2d2dab6ff27
|
|
def pcmk_tmpname((prefix, suffix), n)
|
|
#Dir::Tmpname.make_tmpname (prefix, suffix), n
|
|
prefix = (String.try_convert(prefix) or
|
|
raise ArgumentError, "unexpected prefix: #{prefix.inspect}")
|
|
suffix &&= (String.try_convert(suffix) or
|
|
raise ArgumentError, "unexpected suffix: #{suffix.inspect}")
|
|
t = Time.now.strftime("%Y%m%d")
|
|
path = "#{prefix}#{t}-#{$$}-#{rand(0x100000000).to_s(36)}".dup
|
|
path << "-#{n}" if n
|
|
path << suffix if suffix
|
|
path
|
|
end
|
|
|
|
def delete_cib(cib)
|
|
FileUtils.rm(cib, :force => true)
|
|
FileUtils.rm("#{cib}.orig", :force => true)
|
|
end
|
|
|
|
# backs up the current cib and returns the temporary file name where it
|
|
# was stored. Besides the temporary file it also makes an identical copy
|
|
# called temporary file + ".orig"
|
|
def backup_cib()
|
|
# We use the pacemaker CIB folder because of its restricted access permissions
|
|
cib = pcmk_tmpname("#{PCMK_TMP_BASE}/puppet-cib-backup", nil)
|
|
cmd = "/usr/sbin/pcs cluster cib #{cib}"
|
|
output = `#{cmd}`
|
|
ret = $?
|
|
if not ret.success?
|
|
msg = "backup_cib: Running: #{cmd} failed with code: #{ret.exitstatus} -> #{output}"
|
|
FileUtils.rm(cib, :force => true)
|
|
raise Puppet::Error, msg
|
|
end
|
|
Puppet.debug("backup_cib: #{cmd} returned #{output}")
|
|
FileUtils.cp cib, "#{cib}.orig"
|
|
return cib
|
|
end
|
|
|
|
# Pushes the cib file back to the cluster and removes the cib files
|
|
# returns the pcs cluster cib-push return code. If the cib file and its
|
|
# original counterpart are the exact same push_cib() is a no-op.
|
|
# The pcs cluster-cib syntax with "diff-against" is used only if pcs supports
|
|
# it (it helps to minimize the chances that a cib-push might fail due
|
|
# to us trying to push a too old CIB)
|
|
def push_cib(cib)
|
|
cib_digest = Digest::SHA2.file(cib)
|
|
cib_orig_digest = Digest::SHA2.file("#{cib}.orig")
|
|
if cib_digest == cib_orig_digest
|
|
Puppet.debug("push_cib: #{cib} and #{cib}.orig were identical, skipping")
|
|
delete_cib(cib)
|
|
return 0
|
|
end
|
|
has_diffagainst = `/usr/sbin/pcs cluster cib-push --help`.include? 'diff-against'
|
|
cmd = "/usr/sbin/pcs cluster cib-push #{cib}"
|
|
if has_diffagainst
|
|
cmd += " diff-against=#{cib}.orig"
|
|
end
|
|
output = `#{cmd}`
|
|
ret = $?
|
|
delete_cib(cib)
|
|
if not ret.success?
|
|
msg = "push_cib: Running: #{cmd} failed with code: #{ret.exitstatus} -> #{output}"
|
|
Puppet.debug("push_cib failed: #{msg}")
|
|
end
|
|
|
|
Puppet.debug("push_cib: #{cmd} returned #{ret.exitstatus} -> #{output}")
|
|
return ret.exitstatus
|
|
end
|
|
|
|
def pcs(name, resource_name, cmd, tries=1, try_sleep=0,
|
|
verify_on_create=false, post_success_sleep=0)
|
|
if name.start_with?("create") && verify_on_create
|
|
return pcs_create_with_verify(name, resource_name, cmd, tries, try_sleep)
|
|
end
|
|
max_tries = name.include?('show') ? 1 : tries
|
|
max_tries.times do |try|
|
|
cib = backup_cib()
|
|
try_text = max_tries > 1 ? "try #{try+1}/#{max_tries}: " : ''
|
|
Puppet.debug("#{try_text}/usr/sbin/pcs -f #{cib} #{cmd}")
|
|
pcs_out = `/usr/sbin/pcs -f #{cib} #{cmd} 2>&1`
|
|
if name.include?('show')
|
|
delete_cib(cib)
|
|
# return output for good exit or false for failure.
|
|
return $?.exitstatus == 0 ? pcs_out : false
|
|
end
|
|
if $?.exitstatus == 0
|
|
# If push_cib failed, we stay in the loop and keep trying
|
|
if push_cib(cib) == 0
|
|
sleep post_success_sleep
|
|
return pcs_out
|
|
end
|
|
end
|
|
Puppet.debug("Error: #{pcs_out}")
|
|
if try == max_tries-1
|
|
delete_cib(cib)
|
|
pcs_out_line = pcs_out.lines.first ? pcs_out.lines.first.chomp! : ''
|
|
raise Puppet::Error, "pcs -f #{cib} #{name} failed: #{pcs_out_line}"
|
|
end
|
|
if try_sleep > 0
|
|
Puppet.debug("Sleeping for #{try_sleep} seconds between tries")
|
|
sleep try_sleep
|
|
end
|
|
end
|
|
end
|
|
|
|
def pcs_create_with_verify(name, resource_name, cmd, tries=1, try_sleep=0)
|
|
max_tries = tries
|
|
max_tries.times do |try|
|
|
try_text = max_tries > 1 ? "try #{try+1}/#{max_tries}: " : ''
|
|
Puppet.debug("#{try_text}/usr/sbin/pcs #{cmd}")
|
|
pcs_out = `/usr/sbin/pcs #{cmd} 2>&1`
|
|
if $?.exitstatus == 0
|
|
sleep try_sleep
|
|
cmd_show = "/usr/sbin/pcs resource show " + resource_name
|
|
Puppet.debug("Verifying with: "+cmd_show)
|
|
`#{cmd_show}`
|
|
if $?.exitstatus == 0
|
|
return pcs_out
|
|
else
|
|
Puppet.debug("Warning: verification of pcs resource creation failed")
|
|
end
|
|
else
|
|
Puppet.debug("Error: #{pcs_out}")
|
|
sleep try_sleep
|
|
end
|
|
if try == max_tries-1
|
|
pcs_out_line = pcs_out.lines.first ? pcs_out.lines.first.chomp! : ''
|
|
raise Puppet::Error, "pcs #{name} failed: #{pcs_out_line}"
|
|
end
|
|
end
|
|
end
|
|
|
|
def not_empty_string(p)
|
|
p && p.kind_of?(String) && ! p.empty?
|
|
end
|
|
|
|
# Returns the pcs command to create the location rule
|
|
def build_pcs_location_rule_cmd(resource)
|
|
# The name that pcs will create is location-<name>[-{clone,master}]
|
|
location_rule = resource[:location_rule]
|
|
location_cmd = 'constraint location '
|
|
if resource.propertydefined?(:bundle)
|
|
location_cmd += resource[:bundle]
|
|
else
|
|
location_cmd += resource[:name]
|
|
if resource.propertydefined?(:clone_params)
|
|
location_cmd += '-clone'
|
|
elsif resource.propertydefined?(:master_params)
|
|
location_cmd += '-master'
|
|
end
|
|
end
|
|
location_cmd += ' rule'
|
|
if location_rule['resource_discovery']
|
|
location_cmd += " resource-discovery=#{location_rule['resource_discovery']}"
|
|
end
|
|
if location_rule['score']
|
|
location_cmd += " score=#{location_rule['score']}"
|
|
end
|
|
if location_rule['score_attribute']
|
|
location_cmd += " score-attribure=#{location_rule['score_attribute']}"
|
|
end
|
|
if location_rule['expression']
|
|
location_cmd += " " + location_rule['expression'].join(' ')
|
|
end
|
|
Puppet.debug("build_pcs_location_rule_cmd: #{location_cmd}")
|
|
location_cmd
|
|
end
|
|
|
|
# This method runs a pcs command on an offline cib
|
|
# Much simpler logic compared to pcs()
|
|
# return output for good exit or false for failure.
|
|
def pcs_offline(cmd, cib)
|
|
Puppet.debug("pcs_offline: /usr/sbin/pcs -f #{cib} #{cmd}")
|
|
pcs_out = `/usr/sbin/pcs -f #{cib} #{cmd}`
|
|
return $?.exitstatus == 0 ? pcs_out : false
|
|
end
|
|
|
|
# This is a loop that simply tries to push a CIB a number of time
|
|
# on to the live cluster. It does not remove the CIB except in the Error
|
|
# case. Returns nothing in case of success and errors out in case of errors
|
|
def push_cib_offline(cib, tries=1, try_sleep=0, post_success_sleep=0)
|
|
tries.times do |try|
|
|
try_text = tries > 1 ? "try #{try+1}/#{tries}: " : ''
|
|
Puppet.debug("pcs_cib_offline push #{try_text}")
|
|
if push_cib(cib) == 0
|
|
sleep post_success_sleep
|
|
return
|
|
end
|
|
Puppet.debug("Error: #{pcs_out}")
|
|
if try == tries-1
|
|
delete_cib(cib)
|
|
raise Puppet::Error, "push_cib_offline for #{cib} failed"
|
|
end
|
|
if try_sleep > 0
|
|
Puppet.debug("Sleeping for #{try_sleep} seconds between tries")
|
|
sleep try_sleep
|
|
end
|
|
end
|
|
end
|
|
|
|
# The following function will take a resource_name an xml graph file as generated by crm_simulate and
|
|
# will return true if the resource_name is contained in the transition graph (i.e. the cluster would
|
|
# restart the resource) and false if not (i.e. the cluster would not restart the resource)
|
|
def pcmk_graph_contain_id?(resource_name, graph_file, is_bundle=false)
|
|
graph = File.new(graph_file)
|
|
graph_doc = REXML::Document.new graph
|
|
xpath_query = '/transition_graph//primitive/@id'
|
|
ids = []
|
|
REXML::XPath.each(graph_doc, xpath_query) do |element|
|
|
id = element.to_s
|
|
# if we are a bundle we compare the start of the strings
|
|
# because the primitive id will be in the form of galera-bundle-1 as opposed to galera-bundle
|
|
if is_bundle then
|
|
if id.start_with?(resource_name) then
|
|
return true
|
|
end
|
|
else
|
|
if id == resource_name then
|
|
return true
|
|
end
|
|
end
|
|
end
|
|
return false
|
|
end
|
|
|
|
# This given a cib and a resource name, this method returns true if pacemaker
|
|
# will restart the resource false if no action will be taken by pacemaker
|
|
# Note that we need to leverage crm_simulate instead of crm_diff due to:
|
|
# https://bugzilla.redhat.com/show_bug.cgi?id=1561617
|
|
def pcmk_restart_resource?(resource_name, cib, is_bundle=false)
|
|
tmpfile = pcmk_tmpname("#{PCMK_TMP_BASE}/puppet-cib-simulate", nil)
|
|
cmd = "/usr/sbin/crm_simulate -x #{cib} -s -G#{tmpfile}"
|
|
crm_out = `#{cmd}`
|
|
if $?.exitstatus != 0
|
|
FileUtils.rm(tmpfile, :force => true)
|
|
delete_cib(cib)
|
|
raise Puppet::Error, "#{cmd} failed with: #{crm_out}"
|
|
end
|
|
# Now in tmpfile we have the xml of the changes to the cluster
|
|
# If tmpfile only contains one empy <transition_graph> no changes took place
|
|
ret = pcmk_graph_contain_id?(resource_name, tmpfile, is_bundle)
|
|
FileUtils.rm(tmpfile, :force => true)
|
|
return ret
|
|
end
|
|
|
|
# This method takes a resource and a creation command and does the following
|
|
# 1. Deletes the resource from the offline CIB
|
|
# 2. Recreates the resource on the offline CIB
|
|
# 3. Verifies if the pacemaker will restart the resource and returns true if the answer is a yes
|
|
def pcmk_resource_has_changed?(resource, cmd_create, is_bundle=false)
|
|
cib = backup_cib()
|
|
cmd_delete = "resource delete #{resource[:name]}"
|
|
ret = pcs_offline(cmd_delete, cib)
|
|
if ret == false
|
|
delete_cib(cib)
|
|
raise Puppet::Error, "#{cmd_delete} returned error. This should never happen."
|
|
end
|
|
ret = pcs_offline(cmd_create, cib)
|
|
if ret == false
|
|
delete_cib(cib)
|
|
raise Puppet::Error, "#{cmd_create} returned error. This should never happen."
|
|
end
|
|
ret = pcmk_restart_resource?(resource[:name], cib, is_bundle)
|
|
Puppet.debug("pcmk_resource_has_changed returned #{ret}")
|
|
delete_cib(cib)
|
|
return ret
|
|
end
|
|
|
|
# This function will update a resource by making a cib backup
|
|
# removing the resource and readding it and the push the CIB
|
|
# to the cluster
|
|
def pcmk_update_resource(resource, cmd_create)
|
|
cib = backup_cib()
|
|
cmd_delete = "resource delete #{resource[:name]}"
|
|
ret = pcs_offline(cmd_delete, cib)
|
|
if ret == false
|
|
delete_cib(cib)
|
|
raise Puppet::Error, "#{cmd_delete} returned error. This should never happen."
|
|
end
|
|
ret = pcs_offline(cmd_create, cib)
|
|
if ret == false
|
|
delete_cib(cib)
|
|
raise Puppet::Error, "#{cmd_create} returned error. This should never happen."
|
|
end
|
|
if resource[:location_rule] then
|
|
cmd_location = build_pcs_location_rule_cmd(resource)
|
|
ret = pcs_offline(cmd_location, cib)
|
|
if ret == false
|
|
delete_cib(cib)
|
|
raise Puppet::Error, "#{cmd_location} returned error. This should never happen."
|
|
end
|
|
end
|
|
push_cib_offline(cib, resource[:tries], resource[:try_sleep], resource[:post_success_sleep])
|
|
end
|