Add support for limiting dependency processing

To protect Zuul servers from accidental DoS attacks in case someone,
say, uploads a 1k change tree to gerrit, add an option to limit the
dependency processing in the Gerrit driver and in Zuul itself (since
those are the two places where we recursively process deps).

Change-Id: I568bd80bbc75284a8e63c2e414c5ac940fc1429a
This commit is contained in:
James E. Blair 2023-08-14 14:51:48 -07:00
parent bf889081d8
commit 70c34607f5
12 changed files with 212 additions and 0 deletions

View File

@ -92,6 +92,21 @@ The supported options in ``zuul.conf`` connections are:
which points back to Gerrit, project and all of its safe attributes,
and sha which is the git sha1.
.. attr:: max_dependencies
This setting can be used to limit the number of dependencies
that Zuul will consider when processing events from Gerrit. If
used, it should be set to a value that is higher than the
highest number of dependencies that are expected to be
encountered. If, when processing an event from Gerrit, Zuul
detects that the dependencies will exceed this value, Zuul will
ignore the event with no feedback to the user. This is meant
only to protect the Zuul server from resource exhaustion when
excessive dependencies are present. The default (unset) is no
limit. Note that the value ``0`` does not disable this option;
instead it limits Zuul to zero dependencies. This is distinct
from :attr:`tenant.max-dependencies`.
.. attr:: user
:default: zuul

View File

@ -338,6 +338,21 @@ configuration. Some examples of tenant definitions are:
A list of **project** items.
.. attr:: max-dependencies
This setting can be used to limit the number of dependencies
that Zuul will consider when enqueing a change in any pipeline
in this tenant. If used, it should be set to a value that is
higher than the highest number of dependencies that are expected
to be encountered. If, when enqueing a change, Zuul detects
that the dependencies will exceed this value, Zuul will not
enqueue the change and will provide no feedback to the user.
This is meant only to protect the Zuul server from resource
exhaustion when excessive dependencies are present. The default
(unset) is no limit. Note that the value ``0`` does not disable
this option; instead it limits Zuul to zero dependencies. This
is distinct from :attr:`<gerrit connection>.max_dependencies`.
.. attr:: max-nodes-per-job
:default: 5

View File

@ -0,0 +1,10 @@
---
features:
- |
Two new settings are available to protect Zuul from resource
exhaustion from processing too many dependencies among changes.
The Gerrit driver supports setting :attr:`<gerrit
connection>.max_dependencies` to limit internal dependency
processing during event processing, and a new tenant setting of
:attr:`tenant.max-dependencies` is available to limit tenant
processing while enqueing changes in pipelines.

View File

@ -0,0 +1,11 @@
- tenant:
name: tenant-one
max-dependencies: 1
source:
gerrit:
config-projects:
- common-config
untrusted-projects:
- org/project
- org/project1
- org/project2

View File

@ -0,0 +1,37 @@
[statsd]
# note, use 127.0.0.1 rather than localhost to avoid getting ipv6
# see: https://github.com/jsocol/pystatsd/issues/61
server=127.0.0.1
[scheduler]
tenant_config=main.yaml
[merger]
git_dir=/tmp/zuul-test/merger-git
git_user_email=zuul@example.com
git_user_name=zuul
[web]
root=http://zuul.example.com
[executor]
git_dir=/tmp/zuul-test/executor-git
load_multiplier=100
[connection gerrit]
driver=gerrit
server=review.example.com
user=jenkins
sshkey=fake_id_rsa_path
password=badpassword
max_dependencies=1
[connection smtp]
driver=smtp
server=localhost
port=25
default_from=zuul@example.com
default_to=you@example.com
[database]
dburi=$MYSQL_FIXTURE_DBURI$

View File

@ -1135,6 +1135,43 @@ class TestGerritDefaultBranch(ZuulTestCase):
self.assertNotEqual(new_layout, prev_layout)
class TestGerritMaxDeps(ZuulTestCase):
config_file = 'zuul-gerrit-max-deps.conf'
@simple_layout('layouts/simple.yaml')
def test_max_deps(self):
# max_dependencies for the connection is 1, so this is okay
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.assertHistory([
dict(name='check-job', result='SUCCESS', changes='1,1'),
])
# So is this
B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B')
B.setDependsOn(A, 1)
self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.assertHistory([
dict(name='check-job', result='SUCCESS', changes='1,1'),
dict(name='check-job', result='SUCCESS', changes='1,1 2,1'),
])
# This is not
C = self.fake_gerrit.addFakeChange('org/project', 'master', 'C')
C.setDependsOn(B, 1)
self.fake_gerrit.addEvent(C.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.assertHistory([
dict(name='check-job', result='SUCCESS', changes='1,1'),
dict(name='check-job', result='SUCCESS', changes='1,1 2,1'),
])
class TestGerritDriver(ZuulTestCase):
# Most of the Zuul test suite tests the Gerrit driver, to some
# extent. The other classes in this file test specific methods of

View File

@ -9738,3 +9738,49 @@ class TestDynamicBranchesProject(IncludeBranchesTestCase):
dict(name='project-test', result='SUCCESS', changes='2,1'),
]
self._test_include_branches(history1, history2, history3, history4)
class TestMaxDeps(ZuulTestCase):
tenant_config_file = 'config/single-tenant/main-max-deps.yaml'
def test_max_deps(self):
# max_dependencies for the connection is 1, so this is okay
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.assertHistory([
dict(name='project-merge', result='SUCCESS', changes='1,1'),
dict(name='project-test1', result='SUCCESS', changes='1,1'),
dict(name='project-test2', result='SUCCESS', changes='1,1'),
], ordered=False)
# So is this
B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B')
B.setDependsOn(A, 1)
self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.assertHistory([
dict(name='project-merge', result='SUCCESS', changes='1,1'),
dict(name='project-test1', result='SUCCESS', changes='1,1'),
dict(name='project-test2', result='SUCCESS', changes='1,1'),
dict(name='project-merge', result='SUCCESS', changes='1,1 2,1'),
dict(name='project-test1', result='SUCCESS', changes='1,1 2,1'),
dict(name='project-test2', result='SUCCESS', changes='1,1 2,1'),
], ordered=False)
# This is not
C = self.fake_gerrit.addFakeChange('org/project', 'master', 'C')
C.setDependsOn(B, 1)
self.fake_gerrit.addEvent(C.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.assertHistory([
dict(name='project-merge', result='SUCCESS', changes='1,1'),
dict(name='project-test1', result='SUCCESS', changes='1,1'),
dict(name='project-test2', result='SUCCESS', changes='1,1'),
dict(name='project-merge', result='SUCCESS', changes='1,1 2,1'),
dict(name='project-test1', result='SUCCESS', changes='1,1 2,1'),
dict(name='project-test2', result='SUCCESS', changes='1,1 2,1'),
], ordered=False)

View File

@ -1925,6 +1925,7 @@ class TenantParser(object):
def getSchema(self):
tenant = {vs.Required('name'): str,
'max-dependencies': int,
'max-nodes-per-job': int,
'max-job-timeout': int,
'source': self.validateTenantSources(),
@ -1960,6 +1961,8 @@ class TenantParser(object):
tenant = model.Tenant(conf['name'])
pcontext = ParseContext(self.connections, self.scheduler,
tenant, ansible_manager)
if conf.get('max-dependencies') is not None:
tenant.max_dependencies = conf['max-dependencies']
if conf.get('max-nodes-per-job') is not None:
tenant.max_nodes_per_job = conf['max-nodes-per-job']
if conf.get('max-job-timeout') is not None:

View File

@ -85,6 +85,10 @@ class HTTPBadRequestException(Exception):
pass
class GerritEventProcessingException(Exception):
pass
class GerritChangeCache(AbstractChangeCache):
log = logging.getLogger("zuul.driver.GerritChangeCache")
@ -227,6 +231,8 @@ class GerritEventConnector(threading.Thread):
"GerritEventProcessing", links=[link]):
try:
self._handleEvent(event)
except GerritEventProcessingException as e:
self.log.warning("Skipping event due to %s", e)
finally:
self.event_queue.ack(event)
if self._stopped:
@ -446,6 +452,10 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
self.port = int(self.connection_config.get('port', 29418))
self.keyfile = self.connection_config.get('sshkey', None)
self.keepalive = int(self.connection_config.get('keepalive', 60))
self.max_dependencies = self.connection_config.get(
'max_dependencies', None)
if self.max_dependencies is not None:
self.max_dependencies = int(self.max_dependencies)
self.event_source = self.EVENT_SOURCE_NONE
# TODO(corvus): Document this when the checks api is stable;
# it's not useful without it.
@ -805,6 +815,12 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
log.debug("Change %s is in history", change)
return change
if (self.max_dependencies is not None and
history and
len(history) > self.max_dependencies):
raise GerritEventProcessingException(
f"Change {change} has too many dependencies")
log.info("Updating %s", change)
data = self.queryChange(change.number, event=event)
@ -868,6 +884,8 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
dep not in needed_by_changes):
git_needed_by_changes.append(dep.cache_key)
needed_by_changes.add(dep.cache_key)
except GerritEventProcessingException:
raise
except Exception:
log.exception("Failed to get git-needed change %s,%s",
dep_num, dep_ps)
@ -893,6 +911,8 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
and dep not in needed_by_changes):
compat_needed_by_changes.append(dep.cache_key)
needed_by_changes.add(dep.cache_key)
except GerritEventProcessingException:
raise
except Exception:
log.exception("Failed to get commit-needed change %s,%s",
dep_num, dep_ps)
@ -928,6 +948,8 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
and dep not in needed_by_changes):
compat_needed_by_changes.append(dep.cache_key)
needed_by_changes.add(dep.cache_key)
except GerritEventProcessingException:
raise
except Exception:
log.exception("Failed to get commit-needed change %s,%s",
dep_num, dep_ps)

View File

@ -216,6 +216,14 @@ class DependentPipelineManager(SharedQueuePipelineManager):
node = dependency_graph.setdefault(change, [])
node.append(needed_change)
if (self.pipeline.tenant.max_dependencies is not None and
dependency_graph is not None and
(len(dependency_graph) >
self.pipeline.tenant.max_dependencies)):
log.debug(" Dependency graph for change %s is too large",
change)
return True, []
with self.getChangeQueue(needed_change,
event) as needed_change_queue:
if needed_change_queue != change_queue:

View File

@ -113,6 +113,13 @@ class IndependentPipelineManager(PipelineManager):
node = dependency_graph.setdefault(change, [])
node.append(needed_change)
if (self.pipeline.tenant.max_dependencies is not None and
dependency_graph is not None and
len(dependency_graph) > self.pipeline.tenant.max_dependencies):
log.debug(" Dependency graph for change %s is too large",
change)
return True, []
if self.isChangeAlreadyInQueue(needed_change, change_queue):
log.debug(" Needed change is already ahead in the queue")
continue

View File

@ -8572,6 +8572,7 @@ class Tenant(object):
self.name = name
self.max_nodes_per_job = 5
self.max_job_timeout = 10800
self.max_dependencies = None
self.exclude_unprotected_branches = False
self.default_base_job = None
self.layout = None