Add support for limiting dependency processing
To protect Zuul servers from accidental DoS attacks in case someone, say, uploads a 1k change tree to gerrit, add an option to limit the dependency processing in the Gerrit driver and in Zuul itself (since those are the two places where we recursively process deps). Change-Id: I568bd80bbc75284a8e63c2e414c5ac940fc1429a
This commit is contained in:
parent
bf889081d8
commit
70c34607f5
|
@ -92,6 +92,21 @@ The supported options in ``zuul.conf`` connections are:
|
|||
which points back to Gerrit, project and all of its safe attributes,
|
||||
and sha which is the git sha1.
|
||||
|
||||
.. attr:: max_dependencies
|
||||
|
||||
This setting can be used to limit the number of dependencies
|
||||
that Zuul will consider when processing events from Gerrit. If
|
||||
used, it should be set to a value that is higher than the
|
||||
highest number of dependencies that are expected to be
|
||||
encountered. If, when processing an event from Gerrit, Zuul
|
||||
detects that the dependencies will exceed this value, Zuul will
|
||||
ignore the event with no feedback to the user. This is meant
|
||||
only to protect the Zuul server from resource exhaustion when
|
||||
excessive dependencies are present. The default (unset) is no
|
||||
limit. Note that the value ``0`` does not disable this option;
|
||||
instead it limits Zuul to zero dependencies. This is distinct
|
||||
from :attr:`tenant.max-dependencies`.
|
||||
|
||||
.. attr:: user
|
||||
:default: zuul
|
||||
|
||||
|
|
|
@ -338,6 +338,21 @@ configuration. Some examples of tenant definitions are:
|
|||
|
||||
A list of **project** items.
|
||||
|
||||
.. attr:: max-dependencies
|
||||
|
||||
This setting can be used to limit the number of dependencies
|
||||
that Zuul will consider when enqueing a change in any pipeline
|
||||
in this tenant. If used, it should be set to a value that is
|
||||
higher than the highest number of dependencies that are expected
|
||||
to be encountered. If, when enqueing a change, Zuul detects
|
||||
that the dependencies will exceed this value, Zuul will not
|
||||
enqueue the change and will provide no feedback to the user.
|
||||
This is meant only to protect the Zuul server from resource
|
||||
exhaustion when excessive dependencies are present. The default
|
||||
(unset) is no limit. Note that the value ``0`` does not disable
|
||||
this option; instead it limits Zuul to zero dependencies. This
|
||||
is distinct from :attr:`<gerrit connection>.max_dependencies`.
|
||||
|
||||
.. attr:: max-nodes-per-job
|
||||
:default: 5
|
||||
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
features:
|
||||
- |
|
||||
Two new settings are available to protect Zuul from resource
|
||||
exhaustion from processing too many dependencies among changes.
|
||||
The Gerrit driver supports setting :attr:`<gerrit
|
||||
connection>.max_dependencies` to limit internal dependency
|
||||
processing during event processing, and a new tenant setting of
|
||||
:attr:`tenant.max-dependencies` is available to limit tenant
|
||||
processing while enqueing changes in pipelines.
|
|
@ -0,0 +1,11 @@
|
|||
- tenant:
|
||||
name: tenant-one
|
||||
max-dependencies: 1
|
||||
source:
|
||||
gerrit:
|
||||
config-projects:
|
||||
- common-config
|
||||
untrusted-projects:
|
||||
- org/project
|
||||
- org/project1
|
||||
- org/project2
|
|
@ -0,0 +1,37 @@
|
|||
[statsd]
|
||||
# note, use 127.0.0.1 rather than localhost to avoid getting ipv6
|
||||
# see: https://github.com/jsocol/pystatsd/issues/61
|
||||
server=127.0.0.1
|
||||
|
||||
[scheduler]
|
||||
tenant_config=main.yaml
|
||||
|
||||
[merger]
|
||||
git_dir=/tmp/zuul-test/merger-git
|
||||
git_user_email=zuul@example.com
|
||||
git_user_name=zuul
|
||||
|
||||
[web]
|
||||
root=http://zuul.example.com
|
||||
|
||||
[executor]
|
||||
git_dir=/tmp/zuul-test/executor-git
|
||||
load_multiplier=100
|
||||
|
||||
[connection gerrit]
|
||||
driver=gerrit
|
||||
server=review.example.com
|
||||
user=jenkins
|
||||
sshkey=fake_id_rsa_path
|
||||
password=badpassword
|
||||
max_dependencies=1
|
||||
|
||||
[connection smtp]
|
||||
driver=smtp
|
||||
server=localhost
|
||||
port=25
|
||||
default_from=zuul@example.com
|
||||
default_to=you@example.com
|
||||
|
||||
[database]
|
||||
dburi=$MYSQL_FIXTURE_DBURI$
|
|
@ -1135,6 +1135,43 @@ class TestGerritDefaultBranch(ZuulTestCase):
|
|||
self.assertNotEqual(new_layout, prev_layout)
|
||||
|
||||
|
||||
class TestGerritMaxDeps(ZuulTestCase):
|
||||
config_file = 'zuul-gerrit-max-deps.conf'
|
||||
|
||||
@simple_layout('layouts/simple.yaml')
|
||||
def test_max_deps(self):
|
||||
# max_dependencies for the connection is 1, so this is okay
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertHistory([
|
||||
dict(name='check-job', result='SUCCESS', changes='1,1'),
|
||||
])
|
||||
|
||||
# So is this
|
||||
B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B')
|
||||
B.setDependsOn(A, 1)
|
||||
self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertHistory([
|
||||
dict(name='check-job', result='SUCCESS', changes='1,1'),
|
||||
dict(name='check-job', result='SUCCESS', changes='1,1 2,1'),
|
||||
])
|
||||
|
||||
# This is not
|
||||
C = self.fake_gerrit.addFakeChange('org/project', 'master', 'C')
|
||||
C.setDependsOn(B, 1)
|
||||
self.fake_gerrit.addEvent(C.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertHistory([
|
||||
dict(name='check-job', result='SUCCESS', changes='1,1'),
|
||||
dict(name='check-job', result='SUCCESS', changes='1,1 2,1'),
|
||||
])
|
||||
|
||||
|
||||
class TestGerritDriver(ZuulTestCase):
|
||||
# Most of the Zuul test suite tests the Gerrit driver, to some
|
||||
# extent. The other classes in this file test specific methods of
|
||||
|
|
|
@ -9738,3 +9738,49 @@ class TestDynamicBranchesProject(IncludeBranchesTestCase):
|
|||
dict(name='project-test', result='SUCCESS', changes='2,1'),
|
||||
]
|
||||
self._test_include_branches(history1, history2, history3, history4)
|
||||
|
||||
|
||||
class TestMaxDeps(ZuulTestCase):
|
||||
tenant_config_file = 'config/single-tenant/main-max-deps.yaml'
|
||||
|
||||
def test_max_deps(self):
|
||||
# max_dependencies for the connection is 1, so this is okay
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertHistory([
|
||||
dict(name='project-merge', result='SUCCESS', changes='1,1'),
|
||||
dict(name='project-test1', result='SUCCESS', changes='1,1'),
|
||||
dict(name='project-test2', result='SUCCESS', changes='1,1'),
|
||||
], ordered=False)
|
||||
|
||||
# So is this
|
||||
B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B')
|
||||
B.setDependsOn(A, 1)
|
||||
self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertHistory([
|
||||
dict(name='project-merge', result='SUCCESS', changes='1,1'),
|
||||
dict(name='project-test1', result='SUCCESS', changes='1,1'),
|
||||
dict(name='project-test2', result='SUCCESS', changes='1,1'),
|
||||
dict(name='project-merge', result='SUCCESS', changes='1,1 2,1'),
|
||||
dict(name='project-test1', result='SUCCESS', changes='1,1 2,1'),
|
||||
dict(name='project-test2', result='SUCCESS', changes='1,1 2,1'),
|
||||
], ordered=False)
|
||||
|
||||
# This is not
|
||||
C = self.fake_gerrit.addFakeChange('org/project', 'master', 'C')
|
||||
C.setDependsOn(B, 1)
|
||||
self.fake_gerrit.addEvent(C.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertHistory([
|
||||
dict(name='project-merge', result='SUCCESS', changes='1,1'),
|
||||
dict(name='project-test1', result='SUCCESS', changes='1,1'),
|
||||
dict(name='project-test2', result='SUCCESS', changes='1,1'),
|
||||
dict(name='project-merge', result='SUCCESS', changes='1,1 2,1'),
|
||||
dict(name='project-test1', result='SUCCESS', changes='1,1 2,1'),
|
||||
dict(name='project-test2', result='SUCCESS', changes='1,1 2,1'),
|
||||
], ordered=False)
|
||||
|
|
|
@ -1925,6 +1925,7 @@ class TenantParser(object):
|
|||
|
||||
def getSchema(self):
|
||||
tenant = {vs.Required('name'): str,
|
||||
'max-dependencies': int,
|
||||
'max-nodes-per-job': int,
|
||||
'max-job-timeout': int,
|
||||
'source': self.validateTenantSources(),
|
||||
|
@ -1960,6 +1961,8 @@ class TenantParser(object):
|
|||
tenant = model.Tenant(conf['name'])
|
||||
pcontext = ParseContext(self.connections, self.scheduler,
|
||||
tenant, ansible_manager)
|
||||
if conf.get('max-dependencies') is not None:
|
||||
tenant.max_dependencies = conf['max-dependencies']
|
||||
if conf.get('max-nodes-per-job') is not None:
|
||||
tenant.max_nodes_per_job = conf['max-nodes-per-job']
|
||||
if conf.get('max-job-timeout') is not None:
|
||||
|
|
|
@ -85,6 +85,10 @@ class HTTPBadRequestException(Exception):
|
|||
pass
|
||||
|
||||
|
||||
class GerritEventProcessingException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class GerritChangeCache(AbstractChangeCache):
|
||||
log = logging.getLogger("zuul.driver.GerritChangeCache")
|
||||
|
||||
|
@ -227,6 +231,8 @@ class GerritEventConnector(threading.Thread):
|
|||
"GerritEventProcessing", links=[link]):
|
||||
try:
|
||||
self._handleEvent(event)
|
||||
except GerritEventProcessingException as e:
|
||||
self.log.warning("Skipping event due to %s", e)
|
||||
finally:
|
||||
self.event_queue.ack(event)
|
||||
if self._stopped:
|
||||
|
@ -446,6 +452,10 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
|
|||
self.port = int(self.connection_config.get('port', 29418))
|
||||
self.keyfile = self.connection_config.get('sshkey', None)
|
||||
self.keepalive = int(self.connection_config.get('keepalive', 60))
|
||||
self.max_dependencies = self.connection_config.get(
|
||||
'max_dependencies', None)
|
||||
if self.max_dependencies is not None:
|
||||
self.max_dependencies = int(self.max_dependencies)
|
||||
self.event_source = self.EVENT_SOURCE_NONE
|
||||
# TODO(corvus): Document this when the checks api is stable;
|
||||
# it's not useful without it.
|
||||
|
@ -805,6 +815,12 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
|
|||
log.debug("Change %s is in history", change)
|
||||
return change
|
||||
|
||||
if (self.max_dependencies is not None and
|
||||
history and
|
||||
len(history) > self.max_dependencies):
|
||||
raise GerritEventProcessingException(
|
||||
f"Change {change} has too many dependencies")
|
||||
|
||||
log.info("Updating %s", change)
|
||||
data = self.queryChange(change.number, event=event)
|
||||
|
||||
|
@ -868,6 +884,8 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
|
|||
dep not in needed_by_changes):
|
||||
git_needed_by_changes.append(dep.cache_key)
|
||||
needed_by_changes.add(dep.cache_key)
|
||||
except GerritEventProcessingException:
|
||||
raise
|
||||
except Exception:
|
||||
log.exception("Failed to get git-needed change %s,%s",
|
||||
dep_num, dep_ps)
|
||||
|
@ -893,6 +911,8 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
|
|||
and dep not in needed_by_changes):
|
||||
compat_needed_by_changes.append(dep.cache_key)
|
||||
needed_by_changes.add(dep.cache_key)
|
||||
except GerritEventProcessingException:
|
||||
raise
|
||||
except Exception:
|
||||
log.exception("Failed to get commit-needed change %s,%s",
|
||||
dep_num, dep_ps)
|
||||
|
@ -928,6 +948,8 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
|
|||
and dep not in needed_by_changes):
|
||||
compat_needed_by_changes.append(dep.cache_key)
|
||||
needed_by_changes.add(dep.cache_key)
|
||||
except GerritEventProcessingException:
|
||||
raise
|
||||
except Exception:
|
||||
log.exception("Failed to get commit-needed change %s,%s",
|
||||
dep_num, dep_ps)
|
||||
|
|
|
@ -216,6 +216,14 @@ class DependentPipelineManager(SharedQueuePipelineManager):
|
|||
node = dependency_graph.setdefault(change, [])
|
||||
node.append(needed_change)
|
||||
|
||||
if (self.pipeline.tenant.max_dependencies is not None and
|
||||
dependency_graph is not None and
|
||||
(len(dependency_graph) >
|
||||
self.pipeline.tenant.max_dependencies)):
|
||||
log.debug(" Dependency graph for change %s is too large",
|
||||
change)
|
||||
return True, []
|
||||
|
||||
with self.getChangeQueue(needed_change,
|
||||
event) as needed_change_queue:
|
||||
if needed_change_queue != change_queue:
|
||||
|
|
|
@ -113,6 +113,13 @@ class IndependentPipelineManager(PipelineManager):
|
|||
node = dependency_graph.setdefault(change, [])
|
||||
node.append(needed_change)
|
||||
|
||||
if (self.pipeline.tenant.max_dependencies is not None and
|
||||
dependency_graph is not None and
|
||||
len(dependency_graph) > self.pipeline.tenant.max_dependencies):
|
||||
log.debug(" Dependency graph for change %s is too large",
|
||||
change)
|
||||
return True, []
|
||||
|
||||
if self.isChangeAlreadyInQueue(needed_change, change_queue):
|
||||
log.debug(" Needed change is already ahead in the queue")
|
||||
continue
|
||||
|
|
|
@ -8572,6 +8572,7 @@ class Tenant(object):
|
|||
self.name = name
|
||||
self.max_nodes_per_job = 5
|
||||
self.max_job_timeout = 10800
|
||||
self.max_dependencies = None
|
||||
self.exclude_unprotected_branches = False
|
||||
self.default_base_job = None
|
||||
self.layout = None
|
||||
|
|
Loading…
Reference in New Issue