diff --git a/ceilometer/openstack/common/rpc/impl_kombu.py b/ceilometer/openstack/common/rpc/impl_kombu.py index c8a6de0e30..99e4e9992b 100644 --- a/ceilometer/openstack/common/rpc/impl_kombu.py +++ b/ceilometer/openstack/common/rpc/impl_kombu.py @@ -52,6 +52,10 @@ kombu_opts = [ default='', help=('SSL certification authority file ' '(valid only if SSL enabled)')), + cfg.FloatOpt('kombu_reconnect_delay', + default=1.0, + help='How long to wait before reconnecting in response to an ' + 'AMQP consumer cancel notification.'), cfg.StrOpt('rabbit_host', default='localhost', help='The RabbitMQ broker address where a single node is used'), @@ -495,6 +499,17 @@ class Connection(object): LOG.info(_("Reconnecting to AMQP server on " "%(hostname)s:%(port)d") % params) try: + # NOTE(bogdando): when reconnecting to a RabbitMQ cluster + # with mirrored queues in use, the attempt to release the + # connection can hang "indefinitely" somewhere deep down + # in Kombu. Blocking the thread for a bit prior to + # release seems to kludge around the problem where it is + # otherwise reproduceable. + if self.conf.kombu_reconnect_delay > 0: + LOG.info(_("Delaying reconnect for %1.1f seconds...") % + self.conf.kombu_reconnect_delay) + time.sleep(self.conf.kombu_reconnect_delay) + self.connection.release() except self.connection_errors: pass