From 2a567ca60715beaf213c13f6dfd4e1af55279cda Mon Sep 17 00:00:00 2001 From: Zhen Qin Date: Fri, 1 Sep 2017 13:38:05 -0400 Subject: [PATCH] Suppress excessive debug logs when consume rabbit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If using rabbitmq as rpc backend, oslo.messaging generates large amount of redundant timeout debug logs (several logs per second on multiple openstack services, such as nova, heat, cinder), in format of 'Timed out waiting for RPC response: Timeout while waiting on RPC response - topic: "", RPC method: "" info: "'. It's because each socket timeout exception is raised to multiple levels of error recovery callback functions then logged repeatedly. However, the accompanying value of socket.timeout exception is currently always “timed out”. Besides, oslo.messaging has implemented retry mechanism to recover socket timeout failure. Therefore, IMO those logs should be suppressed, even if at debug level, to save disk space and make debugging more convenient. Change-Id: Iafc360f8d18871cff93e7fd721d793ecdef5f4a1 Closes-Bug: #1714558 (cherry picked from commit 147186c7b4aabd2155f58f61913882f7a066bcbc) --- oslo_messaging/_drivers/impl_rabbit.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/oslo_messaging/_drivers/impl_rabbit.py b/oslo_messaging/_drivers/impl_rabbit.py index 036bbd0d8..59ecf7df9 100644 --- a/oslo_messaging/_drivers/impl_rabbit.py +++ b/oslo_messaging/_drivers/impl_rabbit.py @@ -1033,14 +1033,13 @@ class Connection(object): timer = rpc_common.DecayingTimer(duration=timeout) timer.start() - def _raise_timeout(exc): - LOG.debug('Timed out waiting for RPC response: %s', exc) + def _raise_timeout(): raise rpc_common.Timeout() def _recoverable_error_callback(exc): if not isinstance(exc, rpc_common.Timeout): self._new_tags = set(self._consumers.values()) - timer.check_return(_raise_timeout, exc) + timer.check_return(_raise_timeout) def _error_callback(exc): _recoverable_error_callback(exc) @@ -1073,9 +1072,9 @@ class Connection(object): try: self.connection.drain_events(timeout=poll_timeout) return - except socket.timeout as exc: + except socket.timeout: poll_timeout = timer.check_return( - _raise_timeout, exc, maximum=self._poll_timeout) + _raise_timeout, maximum=self._poll_timeout) except self.connection.channel_errors as exc: if exc.code == 406 and exc.method_name == 'Basic.ack': # NOTE(gordc): occasionally multiple workers will grab