Fix race during major-upgrade-pacemaker step

Currently when we call the major-upgrade step we do the following: """ ... if [[ -n $(is_bootstrap_node) ]]; then check_clean_cluster fi ... if [[ -n $(is_bootstrap_node) ]]; then migrate_full_to_ng_ha fi ... for service in $(services_to_migrate); do manage_systemd_service stop "${service%%-clone}" ... done """ The problem with the above code is that it is open to the following race condition: 1. Code gets run first on a non-bootstrap controller node so we start stopping a bunch of services 2. Pacemaker notices will notice that services are down and will mark the service as stopped 3. Code gets run on the bootstrap node (controller-0) and the check_clean_cluster function will fail and exit 4. Eventually also the script on the non-bootstrap controller node will timeout and exit because the cluster never shut down (it never actually started the shutdown because we failed at 3) Let's make sure we first only call the HA NG migration step as a separate heat step. Only afterwards we start shutting down the systemd services on all nodes. We also need to move the STONITH_STATE variable into a file because it is being used across two different scripts (1 and 2) and we need to store that state. Co-Authored-By: Athlan-Guyot Sofer <sathlang@redhat.com> Closes-Bug: #1640407 Change-Id: Ifb9b9e633fcc77604cca2590071656f4b2275c60
2016-11-09 09:05:08 +01:00 · 2016-11-09 09:05:08 +01:00 · dde12b075f
parent 465324cb6a
commit dde12b075f
8 changed files with 321 additions and 269 deletions
--- a/extraconfig/tasks/major_upgrade_check.sh
+++ b/extraconfig/tasks/major_upgrade_check.sh
@ -18,14 +18,8 @@ check_pcsd()
    fi
 }

-check_disk_for_mysql_dump()
+mysql_need_update()
 {
-    # Where to backup current database if mysql need to be upgraded
-    MYSQL_BACKUP_DIR=/var/tmp/mysql_upgrade_osp
-    MYSQL_TEMP_UPGRADE_BACKUP_DIR=/var/lib/mysql-temp-upgrade-backup
-    # Spare disk ratio for extra safety
-    MYSQL_BACKUP_SIZE_RATIO=1.2
-
    # Shall we upgrade mysql data directory during the stack upgrade?
    if [ "$mariadb_do_major_upgrade" = "auto" ]; then
        ret=$(is_mysql_upgrade_needed)
@ -40,6 +34,17 @@ check_disk_for_mysql_dump()
    else
        DO_MYSQL_UPGRADE=1
    fi
+}
+
+check_disk_for_mysql_dump()
+{
+    # Where to backup current database if mysql need to be upgraded
+    MYSQL_BACKUP_DIR=/var/tmp/mysql_upgrade_osp
+    MYSQL_TEMP_UPGRADE_BACKUP_DIR=/var/lib/mysql-temp-upgrade-backup
+    # Spare disk ratio for extra safety
+    MYSQL_BACKUP_SIZE_RATIO=1.2
+
+    mysql_need_update

    if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
        if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
--- a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
+++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh
@ -2,8 +2,6 @@

 set -eu

-cluster_sync_timeout=1800
-
 check_cluster
 check_pcsd
 if [[ -n $(is_bootstrap_node) ]]; then
@ -19,6 +17,11 @@ check_disk_for_mysql_dump
 # at the end of this script
 if [[ -n $(is_bootstrap_node) ]]; then
    STONITH_STATE=$(pcs property show stonith-enabled | grep "stonith-enabled" | awk '{ print $2 }')
+    # We create this empty file if stonith was set to true so we can reenable stonith in step2
+    rm -f /var/tmp/stonith-true
+    if [ $STONITH_STATE == "true" ]; then
+        touch /var/tmp/stonith-true
+    fi
    pcs property set stonith-enabled=false
 fi

@ -31,178 +34,3 @@ if [[ -n $(is_bootstrap_node) ]]; then
    rabbitmq_mitaka_newton_upgrade
 fi

-# After migrating the cluster to HA-NG the services not under pacemaker's control
-# are still up and running. We need to stop them explicitely otherwise during the yum
-# upgrade the rpm %post sections will try to do a systemctl try-restart <service>, which
-# is going to take a long time because rabbit is down. By having the service stopped
-# systemctl try-restart is a noop
-
-for service in $(services_to_migrate); do
-    manage_systemd_service stop "${service%%-clone}"
-    # So the reason for not reusing check_resource_systemd is that
-    # I have observed systemctl is-active returning unknown with at least
-    # one service that was stopped (See LP 1627254)
-    timeout=600
-    tstart=$(date +%s)
-    tend=$(( $tstart + $timeout ))
-    check_interval=3
-    while (( $(date +%s) < $tend )); do
-      if [[ "$(systemctl is-active ${service%%-clone})" = "active" ]]; then
-        echo "$service still active, sleeping $check_interval seconds."
-        sleep $check_interval
-      else
-        # we do not care if it is inactive, unknown or failed as long as it is
-        # not running
-        break
-      fi
-
-    done
-done
-
-# In case the mysql package is updated, the database on disk must be
-# upgraded as well. This typically needs to happen during major
-# version upgrades (e.g. 5.5 -> 5.6, 5.5 -> 10.1...)
-#
-# Because in-place upgrades are not supported across 2+ major versions
-# (e.g. 5.5 -> 10.1), we rely on logical upgrades via dump/restore cycle
-# https://bugzilla.redhat.com/show_bug.cgi?id=1341968
-#
-# The default is to determine automatically if upgrade is needed based
-# on mysql package versionning, but this can be overriden manually
-# to support specific upgrade scenario
-
-if [[ -n $(is_bootstrap_node) ]]; then
-    if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
-        mysqldump $backup_flags > "$MYSQL_BACKUP_DIR/openstack_database.sql"
-        cp -rdp /etc/my.cnf* "$MYSQL_BACKUP_DIR"
-    fi
-
-    pcs resource disable redis
-    check_resource redis stopped 600
-    pcs resource disable rabbitmq
-    check_resource rabbitmq stopped 600
-    pcs resource disable galera
-    check_resource galera stopped 600
-    pcs resource disable openstack-cinder-volume
-    check_resource openstack-cinder-volume stopped 600
-    # Disable all VIPs before stopping the cluster, so that pcs doesn't use one as a source address:
-    #   https://bugzilla.redhat.com/show_bug.cgi?id=1330688
-    for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Started | awk '{ print $1 }'); do
-      pcs resource disable $vip
-      check_resource $vip stopped 60
-    done
-    pcs cluster stop --all
-fi
-
-
-# Swift isn't controlled by pacemaker
-systemctl_swift stop
-
-tstart=$(date +%s)
-while systemctl is-active pacemaker; do
-    sleep 5
-    tnow=$(date +%s)
-    if (( tnow-tstart > cluster_sync_timeout )) ; then
-        echo_error "ERROR: cluster shutdown timed out"
-        exit 1
-    fi
-done
-
-# The reason we do an sql dump *and* we move the old dir out of
-# the way is because it gives us an extra level of safety in case
-# something goes wrong during the upgrade. Once the restore is
-# successful we go ahead and remove it. If the directory exists
-# we bail out as it means the upgrade process had issues in the last
-# run.
-if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
-    if [ -d $MYSQL_TEMP_UPGRADE_BACKUP_DIR ]; then
-        echo_error "ERROR: mysql backup dir already exist"
-        exit 1
-    fi
-    mv /var/lib/mysql $MYSQL_TEMP_UPGRADE_BACKUP_DIR
-fi
-
-# Special-case OVS for https://bugs.launchpad.net/tripleo/+bug/1635205
-if [[ -n $(rpm -q --scripts openvswitch | awk '/postuninstall/,/*/' | grep "systemctl.*try-restart") ]]; then
-    echo "Manual upgrade of openvswitch - restart in postun detected"
-    mkdir OVS_UPGRADE || true
-    pushd OVS_UPGRADE
-    echo "Attempting to downloading latest openvswitch with yumdownloader"
-    yumdownloader --resolve openvswitch
-    echo "Updating openvswitch with nopostun option"
-    rpm -U --replacepkgs --nopostun ./*.rpm
-    popd
-else
-    echo "Skipping manual upgrade of openvswitch - no restart in postun detected"
-fi
-
-yum -y install python-zaqarclient  # needed for os-collect-config
-yum -y -q update
-
-# We need to ensure at least those two configuration settings, otherwise
-# mariadb 10.1+ won't activate galera replication.
-# wsrep_cluster_address must only be set though, its value does not
-# matter because it's overriden by the galera resource agent.
-cat >> /etc/my.cnf.d/galera.cnf <<EOF
-[mysqld]
-wsrep_on = ON
-wsrep_cluster_address = gcomm://localhost
-EOF
-
-if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
-    # Scripts run via heat have no HOME variable set and this confuses
-    # mysqladmin
-    export HOME=/root
-
-    mkdir /var/lib/mysql || /bin/true
-    chown mysql:mysql /var/lib/mysql
-    chmod 0755 /var/lib/mysql
-    restorecon -R /var/lib/mysql/
-    mysql_install_db --datadir=/var/lib/mysql --user=mysql
-    chown -R mysql:mysql /var/lib/mysql/
-
-    if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
-        mysqld_safe --wsrep-new-cluster &
-        # We have a populated /root/.my.cnf with root/password here so
-        # we need to temporarily rename it because the newly created
-        # db is empty and no root password is set
-        mv /root/.my.cnf /root/.my.cnf.temporary
-        timeout 60 sh -c 'while ! mysql -e "" &> /dev/null; do sleep 1; done'
-        mysql -u root < "$MYSQL_BACKUP_DIR/openstack_database.sql"
-        mv /root/.my.cnf.temporary /root/.my.cnf
-        mysqladmin -u root shutdown
-        # The import was successful so we may remove the folder
-        rm -r "$MYSQL_BACKUP_DIR"
-    fi
-fi
-
-# If we reached here without error we can safely blow away the origin
-# mysql dir from every controller
-
-# TODO: What if the upgrade fails on the bootstrap node, but not on
-# this controller.  Data may be lost.
-if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
-    rm -r $MYSQL_TEMP_UPGRADE_BACKUP_DIR
-fi
-
-# Let's reset the stonith back to true if it was true, before starting the cluster
-if [[ -n $(is_bootstrap_node) ]]; then
-    if [ $STONITH_STATE == "true" ]; then
-        pcs -f /var/lib/pacemaker/cib/cib.xml property set stonith-enabled=true
-    fi
-fi
-
-# Pin messages sent to compute nodes to kilo, these will be upgraded later
-crudini  --set /etc/nova/nova.conf upgrade_levels compute "$upgrade_level_nova_compute"
-# https://bugzilla.redhat.com/show_bug.cgi?id=1284047
-# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435
-crudini  --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit
-# https://bugzilla.redhat.com/show_bug.cgi?id=1284058
-# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists
-crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server"
-# LP: 1615035, required only for M/N upgrade.
-crudini --set /etc/nova/nova.conf DEFAULT scheduler_host_manager host_manager
-# LP: 1627450, required only for M/N upgrade
-crudini --set /etc/nova/nova.conf DEFAULT scheduler_driver filter_scheduler
-
-crudini --set /etc/sahara/sahara.conf DEFAULT plugins ambari,cdh,mapr,vanilla,spark,storm
--- a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh
+++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh
@ -2,67 +2,186 @@

 set -eu

-cluster_form_timeout=600
-cluster_settle_timeout=1800
-galera_sync_timeout=600
+cluster_sync_timeout=1800
+
+# After migrating the cluster to HA-NG the services not under pacemaker's control
+# are still up and running. We need to stop them explicitely otherwise during the yum
+# upgrade the rpm %post sections will try to do a systemctl try-restart <service>, which
+# is going to take a long time because rabbit is down. By having the service stopped
+# systemctl try-restart is a noop
+
+for service in $(services_to_migrate); do
+    manage_systemd_service stop "${service%%-clone}"
+    # So the reason for not reusing check_resource_systemd is that
+    # I have observed systemctl is-active returning unknown with at least
+    # one service that was stopped (See LP 1627254)
+    timeout=600
+    tstart=$(date +%s)
+    tend=$(( $tstart + $timeout ))
+    check_interval=3
+    while (( $(date +%s) < $tend )); do
+      if [[ "$(systemctl is-active ${service%%-clone})" = "active" ]]; then
+        echo "$service still active, sleeping $check_interval seconds."
+        sleep $check_interval
+      else
+        # we do not care if it is inactive, unknown or failed as long as it is
+        # not running
+        break
+      fi
+
+    done
+done
+
+# In case the mysql package is updated, the database on disk must be
+# upgraded as well. This typically needs to happen during major
+# version upgrades (e.g. 5.5 -> 5.6, 5.5 -> 10.1...)
+#
+# Because in-place upgrades are not supported across 2+ major versions
+# (e.g. 5.5 -> 10.1), we rely on logical upgrades via dump/restore cycle
+# https://bugzilla.redhat.com/show_bug.cgi?id=1341968
+#
+# The default is to determine automatically if upgrade is needed based
+# on mysql package versionning, but this can be overriden manually
+# to support specific upgrade scenario
+
+# Calling this function will set the DO_MYSQL_UPGRADE variable which is used
+# later
+mysql_need_update

 if [[ -n $(is_bootstrap_node) ]]; then
-    pcs cluster start --all
-
-    tstart=$(date +%s)
-    while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do
-        sleep 5
-        tnow=$(date +%s)
-        if (( tnow-tstart > cluster_form_timeout )) ; then
-            echo_error "ERROR: timed out forming the cluster"
-            exit 1
-        fi
-    done
-
-    if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then
-        echo_error "ERROR: timed out waiting for cluster to finish transition"
-        exit 1
+    if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
+        mysqldump $backup_flags > "$MYSQL_BACKUP_DIR/openstack_database.sql"
+        cp -rdp /etc/my.cnf* "$MYSQL_BACKUP_DIR"
    fi

-    for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Stopped | awk '{ print $1 }'); do
-      pcs resource enable $vip
-      check_resource_pacemaker $vip started 60
+    pcs resource disable redis
+    check_resource redis stopped 600
+    pcs resource disable rabbitmq
+    check_resource rabbitmq stopped 600
+    pcs resource disable galera
+    check_resource galera stopped 600
+    pcs resource disable openstack-cinder-volume
+    check_resource openstack-cinder-volume stopped 600
+    # Disable all VIPs before stopping the cluster, so that pcs doesn't use one as a source address:
+    #   https://bugzilla.redhat.com/show_bug.cgi?id=1330688
+    for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Started | awk '{ print $1 }'); do
+      pcs resource disable $vip
+      check_resource $vip stopped 60
    done
+    pcs cluster stop --all
 fi

-start_or_enable_service galera
-check_resource galera started 600
-start_or_enable_service redis
-check_resource redis started 600
-# We need mongod which is now a systemd service up and running before calling
-# ceilometer-dbsync. There is still a race here: mongod might not be up on all nodes
-# so ceilometer-dbsync will fail a couple of times before that. As it retries indefinitely
-# we should be good.
-# Due to LP Bug https://bugs.launchpad.net/tripleo/+bug/1627254 am using systemctl directly atm
-systemctl start mongod
-check_resource mongod started 600

+# Swift isn't controlled by pacemaker
+systemctl_swift stop
+
+tstart=$(date +%s)
+while systemctl is-active pacemaker; do
+    sleep 5
+    tnow=$(date +%s)
+    if (( tnow-tstart > cluster_sync_timeout )) ; then
+        echo_error "ERROR: cluster shutdown timed out"
+        exit 1
+    fi
+done
+
+# The reason we do an sql dump *and* we move the old dir out of
+# the way is because it gives us an extra level of safety in case
+# something goes wrong during the upgrade. Once the restore is
+# successful we go ahead and remove it. If the directory exists
+# we bail out as it means the upgrade process had issues in the last
+# run.
+if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
+    if [ -d $MYSQL_TEMP_UPGRADE_BACKUP_DIR ]; then
+        echo_error "ERROR: mysql backup dir already exist"
+        exit 1
+    fi
+    mv /var/lib/mysql $MYSQL_TEMP_UPGRADE_BACKUP_DIR
+fi
+
+# Special-case OVS for https://bugs.launchpad.net/tripleo/+bug/1635205
+if [[ -n $(rpm -q --scripts openvswitch | awk '/postuninstall/,/*/' | grep "systemctl.*try-restart") ]]; then
+    echo "Manual upgrade of openvswitch - restart in postun detected"
+    mkdir OVS_UPGRADE || true
+    pushd OVS_UPGRADE
+    echo "Attempting to downloading latest openvswitch with yumdownloader"
+    yumdownloader --resolve openvswitch
+    echo "Updating openvswitch with nopostun option"
+    rpm -U --replacepkgs --nopostun ./*.rpm
+    popd
+else
+    echo "Skipping manual upgrade of openvswitch - no restart in postun detected"
+fi
+
+yum -y install python-zaqarclient  # needed for os-collect-config
+yum -y -q update
+
+# We need to ensure at least those two configuration settings, otherwise
+# mariadb 10.1+ won't activate galera replication.
+# wsrep_cluster_address must only be set though, its value does not
+# matter because it's overriden by the galera resource agent.
+cat >> /etc/my.cnf.d/galera.cnf <<EOF
+[mysqld]
+wsrep_on = ON
+wsrep_cluster_address = gcomm://localhost
+EOF
+
+if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
+    # Scripts run via heat have no HOME variable set and this confuses
+    # mysqladmin
+    export HOME=/root
+
+    mkdir /var/lib/mysql || /bin/true
+    chown mysql:mysql /var/lib/mysql
+    chmod 0755 /var/lib/mysql
+    restorecon -R /var/lib/mysql/
+    mysql_install_db --datadir=/var/lib/mysql --user=mysql
+    chown -R mysql:mysql /var/lib/mysql/
+
+    if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
+        mysqld_safe --wsrep-new-cluster &
+        # We have a populated /root/.my.cnf with root/password here so
+        # we need to temporarily rename it because the newly created
+        # db is empty and no root password is set
+        mv /root/.my.cnf /root/.my.cnf.temporary
+        timeout 60 sh -c 'while ! mysql -e "" &> /dev/null; do sleep 1; done'
+        mysql -u root < "$MYSQL_BACKUP_DIR/openstack_database.sql"
+        mv /root/.my.cnf.temporary /root/.my.cnf
+        mysqladmin -u root shutdown
+        # The import was successful so we may remove the folder
+        rm -r "$MYSQL_BACKUP_DIR"
+    fi
+fi
+
+# If we reached here without error we can safely blow away the origin
+# mysql dir from every controller
+
+# TODO: What if the upgrade fails on the bootstrap node, but not on
+# this controller.  Data may be lost.
+if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
+    rm -r $MYSQL_TEMP_UPGRADE_BACKUP_DIR
+fi
+
+# Let's reset the stonith back to true if it was true, before starting the cluster
 if [[ -n $(is_bootstrap_node) ]]; then
-    tstart=$(date +%s)
-    while ! clustercheck; do
-        sleep 5
-        tnow=$(date +%s)
-        if (( tnow-tstart > galera_sync_timeout )) ; then
-            echo_error "ERROR galera sync timed out"
-            exit 1
-        fi
-    done
-
-    # Run all the db syncs
-    # TODO: check if this can be triggered in puppet and removed from here
-    ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf
-    cinder-manage db sync
-    glance-manage --config-file=/etc/glance/glance-registry.conf db_sync
-    heat-manage --config-file /etc/heat/heat.conf db_sync
-    keystone-manage db_sync
-    neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head
-    nova-manage db sync
-    nova-manage api_db sync
-    nova-manage db online_data_migrations
-    sahara-db-manage --config-file /etc/sahara/sahara.conf upgrade head
+    if [ -f /var/tmp/stonith-true ]; then
+        pcs -f /var/lib/pacemaker/cib/cib.xml property set stonith-enabled=true
+    fi
+    rm -f /var/tmp/stonith-true
 fi
+
+# Pin messages sent to compute nodes to kilo, these will be upgraded later
+crudini  --set /etc/nova/nova.conf upgrade_levels compute "$upgrade_level_nova_compute"
+# https://bugzilla.redhat.com/show_bug.cgi?id=1284047
+# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435
+crudini  --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit
+# https://bugzilla.redhat.com/show_bug.cgi?id=1284058
+# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists
+crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server"
+# LP: 1615035, required only for M/N upgrade.
+crudini --set /etc/nova/nova.conf DEFAULT scheduler_host_manager host_manager
+# LP: 1627450, required only for M/N upgrade
+crudini --set /etc/nova/nova.conf DEFAULT scheduler_driver filter_scheduler
+
+crudini --set /etc/sahara/sahara.conf DEFAULT plugins ambari,cdh,mapr,vanilla,spark,storm
+
--- a/extraconfig/tasks/major_upgrade_controller_pacemaker_3.sh
+++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_3.sh
@ -2,16 +2,67 @@

 set -eu

-start_or_enable_service rabbitmq
-check_resource rabbitmq started 600
+cluster_form_timeout=600
+cluster_settle_timeout=1800
+galera_sync_timeout=600
+
+if [[ -n $(is_bootstrap_node) ]]; then
+    pcs cluster start --all
+
+    tstart=$(date +%s)
+    while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do
+        sleep 5
+        tnow=$(date +%s)
+        if (( tnow-tstart > cluster_form_timeout )) ; then
+            echo_error "ERROR: timed out forming the cluster"
+            exit 1
+        fi
+    done
+
+    if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then
+        echo_error "ERROR: timed out waiting for cluster to finish transition"
+        exit 1
+    fi
+
+    for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Stopped | awk '{ print $1 }'); do
+      pcs resource enable $vip
+      check_resource_pacemaker $vip started 60
+    done
+fi
+
+start_or_enable_service galera
+check_resource galera started 600
 start_or_enable_service redis
 check_resource redis started 600
-start_or_enable_service openstack-cinder-volume
-check_resource openstack-cinder-volume started 600
+# We need mongod which is now a systemd service up and running before calling
+# ceilometer-dbsync. There is still a race here: mongod might not be up on all nodes
+# so ceilometer-dbsync will fail a couple of times before that. As it retries indefinitely
+# we should be good.
+# Due to LP Bug https://bugs.launchpad.net/tripleo/+bug/1627254 am using systemctl directly atm
+systemctl start mongod
+check_resource mongod started 600

-# start httpd so keystone is available for gnocchi
-# upgrade to run.
-systemctl start httpd
+if [[ -n $(is_bootstrap_node) ]]; then
+    tstart=$(date +%s)
+    while ! clustercheck; do
+        sleep 5
+        tnow=$(date +%s)
+        if (( tnow-tstart > galera_sync_timeout )) ; then
+            echo_error "ERROR galera sync timed out"
+            exit 1
+        fi
+    done

-# Swift isn't controled by pacemaker
-systemctl_swift start
+    # Run all the db syncs
+    # TODO: check if this can be triggered in puppet and removed from here
+    ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf
+    cinder-manage db sync
+    glance-manage --config-file=/etc/glance/glance-registry.conf db_sync
+    heat-manage --config-file /etc/heat/heat.conf db_sync
+    keystone-manage db_sync
+    neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head
+    nova-manage db sync
+    nova-manage api_db sync
+    nova-manage db online_data_migrations
+    sahara-db-manage --config-file /etc/sahara/sahara.conf upgrade head
+fi
--- a/extraconfig/tasks/major_upgrade_controller_pacemaker_4.sh
+++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_4.sh
@ -2,7 +2,16 @@

 set -eu

-if [[ -n $(is_bootstrap_node) ]]; then
-  # run gnocchi upgrade
-  gnocchi-upgrade
-fi
+start_or_enable_service rabbitmq
+check_resource rabbitmq started 600
+start_or_enable_service redis
+check_resource redis started 600
+start_or_enable_service openstack-cinder-volume
+check_resource openstack-cinder-volume started 600
+
+# start httpd so keystone is available for gnocchi
+# upgrade to run.
+systemctl start httpd
+
+# Swift isn't controled by pacemaker
+systemctl_swift start
--- a/extraconfig/tasks/major_upgrade_controller_pacemaker_5.sh
+++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_5.sh
@ -2,14 +2,7 @@

 set -eu

-# We need to start the systemd services we explicitely stopped at step _1.sh
-# FIXME: Should we let puppet during the convergence step do the service enabling or
-# should we add it here?
-services=$(services_to_migrate)
-if [[ ${keep_sahara_services_on_upgrade} =~ [Ff]alse ]] ; then
-    services=${services%%openstack-sahara*}
+if [[ -n $(is_bootstrap_node) ]]; then
+  # run gnocchi upgrade
+  gnocchi-upgrade
 fi
-for service in $services; do
-    manage_systemd_service start "${service%%-clone}"
-    check_resource_systemd "${service%%-clone}" started 600
-done
--- a/extraconfig/tasks/major_upgrade_controller_pacemaker_6.sh
+++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_6.sh
@ -0,0 +1,15 @@
+#!/bin/bash
+
+set -eu
+
+# We need to start the systemd services we explicitely stopped at step _1.sh
+# FIXME: Should we let puppet during the convergence step do the service enabling or
+# should we add it here?
+services=$(services_to_migrate)
+if [[ ${keep_sahara_services_on_upgrade} =~ [Ff]alse ]] ; then
+    services=${services%%openstack-sahara*}
+fi
+for service in $services; do
+    manage_systemd_service start "${service%%-clone}"
+    check_resource_systemd "${service%%-clone}" started 600
+done
--- a/extraconfig/tasks/major_upgrade_pacemaker.yaml
+++ b/extraconfig/tasks/major_upgrade_pacemaker.yaml
@ -113,7 +113,20 @@ resources:
      config:
        list_join:
        - ''
-        - - get_file: pacemaker_common_functions.sh
+        - - str_replace:
+              template: |
+                #!/bin/bash
+                upgrade_level_nova_compute='UPGRADE_LEVEL_NOVA_COMPUTE'
+              params:
+                UPGRADE_LEVEL_NOVA_COMPUTE: {get_param: UpgradeLevelNovaCompute}
+          - str_replace:
+              template: |
+                #!/bin/bash
+                mariadb_do_major_upgrade='MYSQL_MAJOR_UPGRADE'
+              params:
+                MYSQL_MAJOR_UPGRADE: {get_param: MySqlMajorUpgrade}
+          - get_file: pacemaker_common_functions.sh
+          - get_file: major_upgrade_check.sh
          - get_file: major_upgrade_pacemaker_migrations.sh
          - get_file: major_upgrade_controller_pacemaker_2.sh

@ -170,13 +183,7 @@ resources:
      config:
        list_join:
        - ''
-        - - str_replace:
-              template: |
-                #!/bin/bash
-                keep_sahara_services_on_upgrade='KEEP_SAHARA_SERVICES_ON_UPGRADE'
-              params:
-                KEEP_SAHARA_SERVICES_ON_UPGRADE: {get_param: KeepSaharaServicesOnUpgrade}
-          - get_file: pacemaker_common_functions.sh
+        - - get_file: pacemaker_common_functions.sh
          - get_file: major_upgrade_pacemaker_migrations.sh
          - get_file: major_upgrade_controller_pacemaker_5.sh

@ -187,3 +194,28 @@ resources:
      servers:  {get_param: [servers, Controller]}
      config: {get_resource: ControllerPacemakerUpgradeConfig_Step5}
      input_values: {get_param: input_values}
+
+  ControllerPacemakerUpgradeConfig_Step6:
+    type: OS::Heat::SoftwareConfig
+    properties:
+      group: script
+      config:
+        list_join:
+        - ''
+        - - str_replace:
+              template: |
+                #!/bin/bash
+                keep_sahara_services_on_upgrade='KEEP_SAHARA_SERVICES_ON_UPGRADE'
+              params:
+                KEEP_SAHARA_SERVICES_ON_UPGRADE: {get_param: KeepSaharaServicesOnUpgrade}
+          - get_file: pacemaker_common_functions.sh
+          - get_file: major_upgrade_pacemaker_migrations.sh
+          - get_file: major_upgrade_controller_pacemaker_6.sh
+
+  ControllerPacemakerUpgradeDeployment_Step6:
+    type: OS::Heat::SoftwareDeploymentGroup
+    depends_on: ControllerPacemakerUpgradeDeployment_Step5
+    properties:
+      servers:  {get_param: [servers, Controller]}
+      config: {get_resource: ControllerPacemakerUpgradeConfig_Step6}
+      input_values: {get_param: input_values}