etcd registration rework with proper locking
Plus small changes in clustercheckcron Change-Id: Id9039514d034adab5abe627f2de93a1df6d86f85
This commit is contained in:
parent
bc59213349
commit
ada422b39e
|
@ -15,8 +15,11 @@
|
|||
# 'clustercheckpassword!';
|
||||
set -e
|
||||
|
||||
# Forward logs to docker log collector
|
||||
exec 1>/proc/1/fd/2 2>/proc/1/fd/2
|
||||
|
||||
if [[ $1 == '-h' || $1 == '--help' ]];then
|
||||
echo "Usage: $0 <user> <pass> <available_when_donor=0|1> <log_file> <available_when_readonly=0|1> <defaults_extra_file>"
|
||||
echo "Usage: $0 <available_when_donor=0|1> <log_file> <available_when_readonly=0|1> <defaults_extra_file>"
|
||||
exit
|
||||
fi
|
||||
|
||||
|
@ -24,10 +27,11 @@ MYSQL_USERNAME=monitor
|
|||
MYSQL_PASSWORD={{ percona.monitor_password }}
|
||||
DISCOVERY_SERVICE={{ address("etcd", etcd.client_port) }}
|
||||
CLUSTER_NAME={{ percona.cluster_name }}
|
||||
AVAILABLE_WHEN_DONOR=${3:-0}
|
||||
ERR_FILE="${4:-/dev/null}"
|
||||
AVAILABLE_WHEN_READONLY=${5:-1}
|
||||
DEFAULTS_EXTRA_FILE=${6:-/etc/my.cnf}
|
||||
AVAILABLE_WHEN_DONOR=${1:-0}
|
||||
AVAILABLE_WHEN_READONLY=${2:-1}
|
||||
DEFAULTS_EXTRA_FILE=${3:-/etc/my.cnf}
|
||||
CURL="curl -sS"
|
||||
FIRST_RUN=1
|
||||
|
||||
# CLUSTER_NAME to be set in enviroment
|
||||
# DISCOVERY_SERVICE to be set in enviroment
|
||||
|
@ -54,11 +58,19 @@ hostname=$(hostname)
|
|||
|
||||
while true
|
||||
do
|
||||
|
||||
if [ $FIRST_RUN -eq 1 ]; then
|
||||
sleep 30
|
||||
FIRST_RUN=0
|
||||
fi
|
||||
#
|
||||
# Perform the query to check the wsrep_local_state
|
||||
#
|
||||
|
||||
# Race cond, we need to wait 'till mysql is ready, kek
|
||||
|
||||
WSREP_STATUS=($($MYSQL_CMDLINE -e "SHOW GLOBAL STATUS LIKE 'wsrep_%';" \
|
||||
2>${ERR_FILE} | grep -A 1 -E 'wsrep_local_state$|wsrep_cluster_status$' \
|
||||
| grep -A 1 -E 'wsrep_local_state$|wsrep_cluster_status$' \
|
||||
| sed -n -e '2p' -e '5p' | tr '\n' ' '))
|
||||
|
||||
if [[ ${WSREP_STATUS[1]} == 'Primary' && ( ${WSREP_STATUS[0]} -eq 4 || \
|
||||
|
@ -67,28 +79,33 @@ then
|
|||
|
||||
# Check only when set to 0 to avoid latency in response.
|
||||
if [[ $AVAILABLE_WHEN_READONLY -eq 0 ]];then
|
||||
READ_ONLY=$($MYSQL_CMDLINE -e "SHOW GLOBAL VARIABLES LIKE 'read_only';" \
|
||||
2>${ERR_FILE} | tail -1 2>>${ERR_FILE})
|
||||
READ_ONLY=$($MYSQL_CMDLINE -e "SHOW GLOBAL VARIABLES LIKE 'read_only';")
|
||||
|
||||
if [[ "${READ_ONLY}" == "ON" ]];then
|
||||
# Percona XtraDB Cluster node local state is 'Synced', but it is in
|
||||
# read-only mode. The variable AVAILABLE_WHEN_READONLY is set to 0.
|
||||
# => return HTTP 503
|
||||
# Shell return-code is 1
|
||||
curl http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/$CLUSTER_NAME/$ipaddr/?recursive=true -XDELETE >> $ERR_FILE 2>&1
|
||||
date
|
||||
echo "Read-only node. Destroying"
|
||||
$CURL http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/$CLUSTER_NAME/$ipaddr/?recursive=true -XDELETE
|
||||
fi
|
||||
|
||||
fi
|
||||
# Percona XtraDB Cluster node local state is 'Synced' => return HTTP 200
|
||||
# Shell return-code is 0
|
||||
curl http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/$CLUSTER_NAME/$ipaddr/ipaddr -XPUT -d value="$ipaddr" -d ttl=30 >> $ERR_FILE 2>&1
|
||||
curl http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/$CLUSTER_NAME/$ipaddr/hostname -XPUT -d value="$hostname" -d ttl=30 >> $ERR_FILE 2>&1
|
||||
curl http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/$CLUSTER_NAME/$ipaddr -XPUT -d ttl=30 -d dir=true -d prevExist=true >> $ERR_FILE 2>&1
|
||||
date
|
||||
echo "Node is fine. Updating TTL"
|
||||
$CURL http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/$CLUSTER_NAME/$ipaddr/ipaddr -XPUT -d value="$ipaddr" -d ttl=30
|
||||
$CURL http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/$CLUSTER_NAME/$ipaddr/hostname -XPUT -d value="$hostname" -d ttl=30
|
||||
$CURL http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/$CLUSTER_NAME/$ipaddr -XPUT -d ttl=30 -d dir=true -d prevExist=true
|
||||
else
|
||||
# Percona XtraDB Cluster node local state is not 'Synced' => return HTTP
|
||||
# 503
|
||||
# Shell return-code is 1
|
||||
curl http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/$CLUSTER_NAME/$ipaddr/?recursive=true -XDELETE >> $ERR_FILE 2>&1
|
||||
date
|
||||
echo "Node state is not Synced. Destroying."
|
||||
$CURL http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/$CLUSTER_NAME/$ipaddr/?recursive=true -XDELETE
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
|
|
|
@ -0,0 +1,123 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import functools
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
|
||||
import etcd
|
||||
|
||||
CONNECTION_ATTEMPTS = 3
|
||||
CONNECTION_DELAY = 5
|
||||
|
||||
LOG_DATEFMT = "%Y-%m-%d %H:%M:%S"
|
||||
LOG_FORMAT = "%(asctime)s.%(msecs)03d - %(levelname)s - %(message)s"
|
||||
logging.basicConfig(format=LOG_FORMAT, datefmt=LOG_DATEFMT)
|
||||
LOG = logging.getLogger(__name__)
|
||||
LOG.setLevel(logging.DEBUG)
|
||||
|
||||
ETCD_PATH = "/pxc-cluster/{{ percona.cluster_name }}"
|
||||
HOSTNAME = socket.getfqdn()
|
||||
IPADDR = socket.gethostbyname(HOSTNAME)
|
||||
|
||||
|
||||
def retry(f):
|
||||
@functools.wraps(f)
|
||||
def wrap(*args, **kwargs):
|
||||
attempts = CONNECTION_ATTEMPTS
|
||||
delay = CONNECTION_DELAY
|
||||
while attempts > 1:
|
||||
try:
|
||||
return f(*args, **kwargs)
|
||||
except etcd.EtcdException as e:
|
||||
LOG.warning('Etcd is not ready: %s', str(e))
|
||||
LOG.warning('Retrying in %d seconds...', delay)
|
||||
time.sleep(delay)
|
||||
attempts -= 1
|
||||
return f(*args, **kwargs)
|
||||
return wrap
|
||||
|
||||
|
||||
def get_etcd_client():
|
||||
|
||||
return etcd.Client(host="{{ address("etcd") }}",
|
||||
port={{ etcd.client_port.cont }},
|
||||
allow_reconnect=True,
|
||||
read_timeout=2)
|
||||
|
||||
|
||||
@retry
|
||||
def fetch_status(etcd_client):
|
||||
|
||||
key = ETCD_PATH
|
||||
result = [str(i.key).replace(key + "/", '')
|
||||
for i in etcd_client.read(key).leaves
|
||||
if str(i.key) != key]
|
||||
LOG.info("Current cluster state is: %s", result)
|
||||
return result
|
||||
|
||||
|
||||
def _etcd_set(etcd_client, data, ttl):
|
||||
|
||||
key = os.path.join(ETCD_PATH, IPADDR, data[0])
|
||||
etcd_client.set(key, data[1], ttl=ttl)
|
||||
LOG.info("Set %s with value '%s'", key, data[1])
|
||||
|
||||
|
||||
def _etcd_create_dir(etcd_client, ttl):
|
||||
|
||||
key = os.path.join(ETCD_PATH, IPADDR)
|
||||
try:
|
||||
etcd_client.get(key)
|
||||
LOG.warning("Found stale key '%s', deleting", key)
|
||||
etcd_client.delete(key, recursive=True, dir=True)
|
||||
etcd_client.write(os.path.join(ETCD_PATH, IPADDR), None, ttl=ttl,
|
||||
dir=True)
|
||||
LOG.info("Set ttl for '%s' directory to %s", key, ttl)
|
||||
except etcd.EtcdKeyNotFound:
|
||||
etcd_client.write(os.path.join(ETCD_PATH, IPADDR), None, ttl=ttl,
|
||||
dir=True)
|
||||
LOG.info("Set ttl for '%s' directory to %s", key, ttl)
|
||||
|
||||
|
||||
@retry
|
||||
def set_status(etcd_client, ttl=30):
|
||||
|
||||
etcd_client = get_etcd_client()
|
||||
_etcd_create_dir(etcd_client, ttl)
|
||||
_etcd_set(etcd_client, ('ctime', time.time()), ttl)
|
||||
_etcd_set(etcd_client, ('ipaddr', IPADDR), ttl)
|
||||
_etcd_set(etcd_client, ('hostname', HOSTNAME), ttl)
|
||||
|
||||
|
||||
def create_join_list(status):
|
||||
|
||||
status.remove(IPADDR)
|
||||
if not status:
|
||||
return ""
|
||||
else:
|
||||
return ','.join(status)
|
||||
|
||||
|
||||
def main(ttl):
|
||||
|
||||
try:
|
||||
etcd_client = get_etcd_client()
|
||||
lock = etcd.Lock(etcd_client, 'galera_bootstrap')
|
||||
LOG.info("Locking...")
|
||||
lock.acquire(blocking=True, lock_ttl=ttl)
|
||||
LOG.info("Successfuly acquired lock")
|
||||
set_status(etcd_client, ttl)
|
||||
status = fetch_status(etcd_client)
|
||||
# This output will be stdout == data
|
||||
print(create_join_list(status))
|
||||
except Exception as err:
|
||||
LOG.exception(err)
|
||||
finally:
|
||||
lock.release()
|
||||
LOG.info("Successfuly released lock")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(ttl=60)
|
|
@ -5,7 +5,6 @@ set -ex
|
|||
exec 1>/proc/1/fd/2 2>/proc/1/fd/2
|
||||
|
||||
MYSQL_ROOT_PASSWORD={{ db.root_password }}
|
||||
DISCOVERY_SERVICE={{ address("etcd", etcd.client_port) }}
|
||||
CLUSTER_NAME={{ percona.cluster_name }}
|
||||
XTRABACKUP_PASSWORD={{ percona.xtrabackup_password }}
|
||||
MONITOR_PASSWORD={{ percona.monitor_password }}
|
||||
|
@ -62,19 +61,18 @@ if [ ! -e "$DATADIR/init.ok" ]; then
|
|||
echo "GENERATED ROOT PASSWORD: $MYSQL_ROOT_PASSWORD"
|
||||
fi
|
||||
"${mysql[@]}" <<-EOSQL
|
||||
-- What's done in this file shouldn't be replicated
|
||||
-- or products like mysql-fabric won't work
|
||||
SET @@SESSION.SQL_LOG_BIN=0;
|
||||
CREATE USER 'root'@'%' IDENTIFIED BY '${MYSQL_ROOT_PASSWORD}' ;
|
||||
GRANT ALL ON *.* TO 'root'@'%' WITH GRANT OPTION ;
|
||||
ALTER USER 'root'@'localhost' IDENTIFIED BY '${MYSQL_ROOT_PASSWORD}';
|
||||
CREATE USER 'xtrabackup'@'localhost' IDENTIFIED BY '$XTRABACKUP_PASSWORD';
|
||||
GRANT RELOAD,PROCESS,LOCK TABLES,REPLICATION CLIENT ON *.* TO 'xtrabackup'@'localhost';
|
||||
GRANT REPLICATION CLIENT ON *.* TO monitor@'%' IDENTIFIED BY '$MONITOR_PASSWORD';
|
||||
GRANT PROCESS ON *.* TO monitor@localhost IDENTIFIED BY '$MONITOR_PASSWORD';
|
||||
DROP DATABASE IF EXISTS test ;
|
||||
FLUSH PRIVILEGES ;
|
||||
EOSQL
|
||||
-- What's done in this file shouldn't be replicated
|
||||
-- or products like mysql-fabric won't work
|
||||
SET @@SESSION.SQL_LOG_BIN=0;
|
||||
CREATE USER 'root'@'%' IDENTIFIED BY '${MYSQL_ROOT_PASSWORD}' ;
|
||||
GRANT ALL ON *.* TO 'root'@'%' WITH GRANT OPTION ;
|
||||
ALTER USER 'root'@'localhost' IDENTIFIED BY '${MYSQL_ROOT_PASSWORD}';
|
||||
CREATE USER 'xtrabackup'@'localhost' IDENTIFIED BY '$XTRABACKUP_PASSWORD';
|
||||
GRANT RELOAD,PROCESS,LOCK TABLES,REPLICATION CLIENT ON *.* TO 'xtrabackup'@'localhost';
|
||||
GRANT REPLICATION CLIENT ON *.* TO monitor@'%' IDENTIFIED BY '$MONITOR_PASSWORD';
|
||||
DROP DATABASE IF EXISTS test ;
|
||||
FLUSH PRIVILEGES ;
|
||||
EOSQL
|
||||
if [ ! -z "$MYSQL_ROOT_PASSWORD" ]; then
|
||||
mysql+=( -p"${MYSQL_ROOT_PASSWORD}" )
|
||||
fi
|
||||
|
@ -96,8 +94,8 @@ if [ ! -e "$DATADIR/init.ok" ]; then
|
|||
|
||||
if [ ! -z "$MYSQL_ONETIME_PASSWORD" ]; then
|
||||
"${mysql[@]}" <<-EOSQL
|
||||
ALTER USER 'root'@'%' PASSWORD EXPIRE;
|
||||
EOSQL
|
||||
ALTER USER 'root'@'%' PASSWORD EXPIRE;
|
||||
EOSQL
|
||||
fi
|
||||
if ! kill -s TERM "$pid" || ! wait "$pid"; then
|
||||
echo >&2 'MySQL init process failed.'
|
||||
|
@ -110,57 +108,21 @@ if [ ! -e "$DATADIR/init.ok" ]; then
|
|||
fi
|
||||
touch $DATADIR/init.ok
|
||||
|
||||
if [ -z "$DISCOVERY_SERVICE" ]; then
|
||||
cluster_join=$CLUSTER_JOIN
|
||||
available_nodes=$(/opt/ccp/bin/etcd_register.py)
|
||||
if [ -z "$available_nodes" ]; then
|
||||
echo "No available nodes found. Assuming Im first"
|
||||
else
|
||||
|
||||
echo
|
||||
echo 'Registering in the discovery service'
|
||||
echo
|
||||
|
||||
function join { local IFS="$1"; shift; echo "$*"; }
|
||||
|
||||
# Read the list of registered IP addresses
|
||||
ipaddr=$(hostname -i | awk ' { print $1 } ')
|
||||
hostname=$(hostname)
|
||||
|
||||
$CURL http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/queue/$CLUSTER_NAME -XPOST -d value=$ipaddr -d ttl=60
|
||||
|
||||
#get list of IP from queue
|
||||
i=( $($CURL http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/queue/$CLUSTER_NAME | jq -r '.node.nodes[].value') )
|
||||
|
||||
# this remove my ip from the list
|
||||
i1=${i[@]/$ipaddr}
|
||||
|
||||
# Register the current IP in the discovery service
|
||||
|
||||
# key set to expire in 30 sec. There is a cronjob that should update them regularly
|
||||
$CURL http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/$CLUSTER_NAME/$ipaddr/ipaddr -XPUT -d value="$ipaddr" -d ttl=30
|
||||
$CURL http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/$CLUSTER_NAME/$ipaddr/hostname -XPUT -d value="$hostname" -d ttl=30
|
||||
$CURL http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/$CLUSTER_NAME/$ipaddr -XPUT -d ttl=30 -d dir=true -d prevExist=true
|
||||
|
||||
i=( $($CURL http://$DISCOVERY_SERVICE/v2/keys/pxc-cluster/$CLUSTER_NAME/?quorum=true | jq -r '.node.nodes[]?.key' | awk -F'/' '{print $(NF)}') )
|
||||
# this remove my ip from the list
|
||||
i2=${i[@]/$ipaddr}
|
||||
|
||||
# Combine to arrays and remove duplicates
|
||||
OLDIFS="$IFS"
|
||||
IFS=$'\n'
|
||||
combined=(`for R in "${i1[@]}" "${i2[@]}" ; do echo "$R" ; done | sort -du`)
|
||||
IFS="$OLDIFS"
|
||||
|
||||
cluster_join=$(join , $combined )
|
||||
echo "Joining cluster $cluster_join"
|
||||
|
||||
bash /opt/ccp/bin/clustercheckcron monitor monitor 1 /var/lib/mysql/clustercheck.log 1 &
|
||||
|
||||
echo "Joining to nodes: $available_nodes"
|
||||
fi
|
||||
|
||||
bash /opt/ccp/bin/clustercheckcron 1 1 &
|
||||
|
||||
mysqld --user=mysql --wsrep_cluster_name=$CLUSTER_NAME \
|
||||
--wsrep_cluster_address="gcomm://$cluster_join" \
|
||||
--wsrep_cluster_address="gcomm://$available_nodes" \
|
||||
--wsrep_sst_method=xtrabackup-v2 \
|
||||
--wsrep_sst_auth="xtrabackup:$XTRABACKUP_PASSWORD" \
|
||||
--wsrep_node_address="$ipaddr" \
|
||||
--pxc_strict_mode=PERMISSIVE \
|
||||
$CMDARG
|
||||
|
||||
# vim: set ts=4 sw=4 tw=0 et :
|
||||
|
|
|
@ -31,6 +31,7 @@ service:
|
|||
- mycnf
|
||||
- check
|
||||
- readiness
|
||||
- galera-etcd-register
|
||||
dependencies:
|
||||
- etcd
|
||||
command: /opt/ccp/bin/entrypoint.sh
|
||||
|
@ -51,3 +52,7 @@ files:
|
|||
path: /opt/ccp/bin/percona_readiness.py
|
||||
content: percona_readiness.py.j2
|
||||
perm: "0750"
|
||||
galera-etcd-register:
|
||||
path: /opt/ccp/bin/etcd_register.py
|
||||
content: etcd_register.py.j2
|
||||
perm: "0755"
|
||||
|
|
Loading…
Reference in New Issue