Use the GitHub default branch as the default branch

This supplies a per-project default value for Zuul's default-branch
based on what the default branch is set to in GitHub.  This means
that if users omit the default-branch setting on a Zuul project
stanza, Zuul will automatically use the correct value.

If the value in GitHub is changed, an event is emitted which allows
us to automatically reconfigure the tenant.

This could be expanded to other drivers that support an indication
of which branch is default.

Change-Id: I660376ecb3f382785d3bf96459384cfafef200c9
This commit is contained in:
James E. Blair 2023-08-15 17:41:31 -07:00
parent 210ca5d235
commit 57a9c13197
19 changed files with 401 additions and 22 deletions

View File

@ -31,7 +31,7 @@ pragma directives may not be set and then unset within the same file.
:attr:`job.branches`. This attribute overrides that behavior.
This can be useful if a project has multiple branches, yet the
jobs defined in the master branch should apply to all branches.
jobs defined in the default branch should apply to all branches.
The behavior may also be configured by a Zuul administrator
using

View File

@ -104,6 +104,15 @@ pipeline.
(regardless of in which branch the definition appears). It may
not appear in a :ref:`project-template` definition.
This setting also affects the order in which configuration
objects are processed. Zuul will process the default branch
first before any other branches.
The GitHub driver will automatically use the default branch as
specified in GitHub for the repository as a default value for
this setting. It may be overridden by setting this value
explicitly.
.. attr:: merge-mode
:default: (driver specific)

View File

@ -131,3 +131,9 @@ Version 15
:Prior Zuul version: 9.0.0
:Description: Adds ansible_split_streams to FrozenJob.
Affects schedulers and executors.
Version 16
----------
:Prior Zuul version: 9.0.0
:Description: Adds default_branch to the branch cache.
Affects schedulers.

View File

@ -0,0 +1,7 @@
---
features:
- |
GitHub repositories will now automatically use the default branch
as specified on GitHub as the project's default branch. This may
still be overridden in zuul using the
:attr:`project.default-branch` setting.

View File

@ -2992,6 +2992,15 @@ class FakeGithubConnection(githubconnection.GithubConnection):
}
return (name, data)
def getRepositoryEvent(self, repository, action, changes):
name = 'repository'
data = {
'action': action,
'changes': changes,
'repository': repository,
}
return (name, data)
def emitEvent(self, event, use_zuulweb=False):
"""Emulates sending the GitHub webhook event to the connection."""
name, data = event

View File

@ -242,6 +242,7 @@ class FakeRepository(object):
'allow_merge_commit': True,
'allow_squash_merge': True,
'allow_rebase_merge': True,
'default_branch': 'master',
}
# fail the next commit requests with 404

View File

@ -1272,3 +1272,31 @@ class TestDefaultBranch(ZuulTestCase):
md = layout.getProjectMetadata(
'review.example.com/org/regex-override-project-develop')
self.assertEqual('develop', md.default_branch)
@simple_layout('layouts/default-branch.yaml', driver='github')
def test_default_branch_upstream(self):
self.waitUntilSettled()
github = self.fake_github.getGithubClient()
repo = github.repo_from_project('org/project-default')
repo._repodata['default_branch'] = 'foobar'
connection = self.scheds.first.connections.connections['github']
connection.clearBranchCache()
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
layout = self.scheds.first.sched.abide.tenants.get('tenant-one').layout
md = layout.getProjectMetadata(
'github.com/org/project-default')
self.assertEqual('foobar', md.default_branch)
md = layout.getProjectMetadata(
'github.com/org/regex-default-project-empty')
self.assertEqual('master', md.default_branch)
md = layout.getProjectMetadata(
'github.com/org/regex-default-project-develop')
self.assertEqual('develop', md.default_branch)
md = layout.getProjectMetadata(
'github.com/org/regex-override-project-empty')
self.assertEqual('regex', md.default_branch)
md = layout.getProjectMetadata(
'github.com/org/regex-override-project-develop')
self.assertEqual('develop', md.default_branch)

View File

@ -2751,3 +2751,42 @@ class TestGithubDriverEnterpriseCache(ZuulGithubAppTestCase):
r'.*I shouldnt be seen.*',
re.DOTALL)))
self.assertEqual(len(A.comments), 0)
class TestGithubDefaultBranch(ZuulTestCase):
config_file = 'zuul-github-driver.conf'
tenant_config_file = 'config/unprotected-branches/main.yaml'
scheduler_count = 1
def test_default_branch_changed(self):
"""Test the repository edited event"""
self.waitUntilSettled()
github = self.fake_github.getGithubClient()
repo = github.repo_from_project('org/project1')
layout = self.scheds.first.sched.abide.tenants.get('tenant-one').layout
md = layout.getProjectMetadata('github.com/org/project1')
self.assertEqual('master', md.default_branch)
prev_layout = layout.uuid
repo._repodata['default_branch'] = 'foobar'
changes = {
'default_branch': {
'from': 'master',
}
}
repository = {
'full_name': 'org/project1',
'default_branch': 'foobar',
}
self.fake_github.emitEvent(
self.fake_github.getRepositoryEvent(repository, 'edited',
changes)
)
self.waitUntilSettled()
layout = self.scheds.first.sched.abide.tenants.get('tenant-one').layout
md = layout.getProjectMetadata('github.com/org/project1')
self.assertEqual('foobar', md.default_branch)
new_layout = layout.uuid
self.assertNotEqual(new_layout, prev_layout)

View File

@ -452,6 +452,50 @@ class TestGithubModelUpgrade(ZuulTestCase):
str(loading_errors[0].error))
class TestDefaultBranchUpgrade(ZuulTestCase):
config_file = "zuul-gerrit-github.conf"
scheduler_count = 1
@model_version(15)
@simple_layout('layouts/default-branch.yaml', driver='github')
def test_default_branch(self):
self.waitUntilSettled()
github = self.fake_github.getGithubClient()
repo = github.repo_from_project('org/project-default')
repo._repodata['default_branch'] = 'foobar'
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
# Verify we use the default from the defaultdict.
layout = self.scheds.first.sched.abide.tenants.get('tenant-one').layout
md = layout.getProjectMetadata(
'github.com/org/project-default')
self.assertEqual('master', md.default_branch)
# Upgrade our component
self.model_test_component_info.model_api = 16
# Perform a smart reconfiguration which should not clear the
# cache; we should continue to see no change because we should
# still be using the defaultdict.
self.scheds.first.smartReconfigure()
layout = self.scheds.first.sched.abide.tenants.get('tenant-one').layout
md = layout.getProjectMetadata(
'github.com/org/project-default')
self.assertEqual('master', md.default_branch)
# Perform a full reconfiguration which should cause us to
# actually query and update the branch cache.
self.scheds.first.fullReconfigure()
self.waitUntilSettled()
layout = self.scheds.first.sched.abide.tenants.get('tenant-one').layout
md = layout.getProjectMetadata(
'github.com/org/project-default')
self.assertEqual('foobar', md.default_branch)
class TestDeduplication(ZuulTestCase):
config_file = "zuul-gerrit-github.conf"
tenant_config_file = "config/circular-dependencies/main.yaml"

View File

@ -2034,9 +2034,11 @@ class TenantParser(object):
min_ltime = -1
branches = sorted(tpc.project.source.getProjectBranches(
tpc.project, tenant, min_ltime))
if 'master' in branches:
branches.remove('master')
branches = ['master'] + branches
default_branch = tpc.project.source.getProjectDefaultBranch(
tpc.project, tenant, min_ltime)
if default_branch in branches:
branches.remove(default_branch)
branches = [default_branch] + branches
static_branches = []
always_dynamic_branches = []
for b in branches:
@ -2784,6 +2786,10 @@ class TenantParser(object):
if project_metadata.merge_mode is None:
mode = project.source.getProjectDefaultMergeMode(project)
project_metadata.merge_mode = model.MERGER_MAP[mode]
if project_metadata.default_branch is None:
default_branch = project.source.getProjectDefaultBranch(
project, tenant)
project_metadata.default_branch = default_branch
tpc = tenant.project_configs[project.canonical_name]
if tpc.merge_modes is not None:
source_context = model.SourceContext(

View File

@ -185,6 +185,22 @@ class ZKBranchCacheMixin:
"""
return model.ALL_MERGE_MODES
def _fetchProjectDefaultBranch(self, project):
"""Perform a remote query to determine the project's default branch.
Connection subclasses should implement this method if they are
able to determine the upstream default branch for a project. The
default implemantion returns 'master' for now and will likely change
to return something else if and when the git default changes.
:param model.Project project:
The project.
:returns: The name of the default branch.
"""
return 'master'
def clearConnectionCacheOnBranchEvent(self, event):
"""Update event and clear connection cache if needed.
@ -250,6 +266,10 @@ class ZKBranchCacheMixin:
merge_modes = self._fetchProjectMergeModes(project)
self._branch_cache.setProjectMergeModes(
project.name, merge_modes)
default_branch = self._fetchProjectDefaultBranch(project)
self._branch_cache.setProjectDefaultBranch(
project.name, default_branch)
self.log.info("Got branches for %s" % project.name)
def getProjectBranches(self, project, tenant, min_ltime=-1):
@ -365,6 +385,63 @@ class ZKBranchCacheMixin:
return merge_modes
def getProjectDefaultBranch(self, project, tenant, min_ltime=-1):
"""Get the default branch for the given project.
:param zuul.model.Project project:
The project for which the default branch is returned.
:param zuul.model.Tenant tenant:
The related tenant.
:param int min_ltime:
The minimum ltime to determine if we need to refresh the cache.
:returns: The name of the default branch.
"""
default_branch = None
if self._branch_cache:
try:
default_branch = self._branch_cache.getProjectDefaultBranch(
project.name, min_ltime)
except LookupError:
if self.read_only:
# A scheduler hasn't attempted to fetch it yet
raise ReadOnlyBranchCacheError(
"Will not fetch default branch as read-only is set")
else:
default_branch = None
if default_branch is not None:
return default_branch
elif self.read_only:
# A scheduler has previously attempted a fetch, but got
# the None due to an error; we can't retry since we're
# read-only.
raise RuntimeError(
"Will not fetch default branch as read-only is set")
# We need to perform a query
try:
default_branch = self._fetchProjectDefaultBranch(project)
except Exception:
# We weren't able to get the default branch. We need to tell
# future schedulers to try again but tell zuul-web that we
# tried and failed. Set the default branch to None to indicate
# that we have performed a fetch and retrieved no data. Any
# time we encounter None in the cache, we will try again.
if self._branch_cache:
self._branch_cache.setProjectDefaultBranch(
project.name, None)
raise
self.log.info("Got default branch for %s: %s", project.name,
default_branch)
if self._branch_cache:
self._branch_cache.setProjectDefaultBranch(
project.name, default_branch)
return default_branch
def checkBranchCache(self, project_name: str, event,
protected: bool = None) -> None:
"""Clear the cache for a project when a branch event is processed

View File

@ -1,4 +1,5 @@
# Copyright 2015 Hewlett-Packard Development Company, L.P.
# Copyright 2023 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
@ -705,6 +706,35 @@ class GithubEventProcessor(object):
return events
def _event_repository(self):
project_name = self.body['repository']['full_name']
project = self.connection.source.getProject(project_name)
events = []
if self.body.get('action') == 'edited':
if 'default_branch' in self.body.get('changes', {}):
default_branch = self.body['repository']['default_branch']
self.log.debug('Updating default branch for %s to %s',
project, default_branch)
self.connection._branch_cache.setProjectDefaultBranch(
project.name, default_branch)
event = self._repository_to_event(project_name, default_branch)
event.action = 'edited'
events.append(event)
return events
def _repository_to_event(self, project_name, branch):
event = GithubTriggerEvent()
event.connection_name = self.connection.connection_name
event.trigger_name = 'github'
event.project_name = project_name
event.default_branch_changed = True
event.type = 'repository'
event.ref = f'refs/heads/{branch}'
event.branch = branch
return event
def _branch_protection_rule_to_event(self, project_name, branch):
event = GithubTriggerEvent()
event.connection_name = self.connection.connection_name
@ -1851,6 +1881,26 @@ class GithubConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
return merge_modes
def _fetchProjectDefaultBranch(self, project):
github = self.getGithubClient(project.name)
url = github.session.build_url('repos', project.name)
headers = {'Accept': 'application/vnd.github.loki-preview+json'}
resp = github.session.get(url, headers=headers)
if resp.status_code == 403:
self.log.error(str(resp))
rate_limit = github.rate_limit()
if rate_limit['resources']['core']['remaining'] == 0:
self.log.warning("Rate limit exceeded")
return None
elif resp.status_code == 404:
raise Exception("Got status code 404 when fetching "
"project %s" % project.name)
resp = resp.json()
return resp['default_branch']
def isBranchProtected(self, project_name: str, branch_name: str,
zuul_event_id=None) -> Optional[bool]:
github = self.getGithubClient(

View File

@ -1,6 +1,7 @@
# Copyright 2015 Hewlett-Packard Development Company, L.P.
# Copyright 2017 IBM Corp.
# Copyright 2017 Red Hat, Inc.
# Copyright 2023 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
@ -112,6 +113,7 @@ class GithubTriggerEvent(TriggerEvent):
self.commits = []
self.body_edited = None
self.branch_protection_changed = None
self.default_branch_changed = None
def toDict(self):
d = super().toDict()
@ -125,6 +127,7 @@ class GithubTriggerEvent(TriggerEvent):
d["commits"] = self.commits
d["body_edited"] = self.body_edited
d["branch_protection_changed"] = self.branch_protection_changed
d["default_branch_changed"] = self.default_branch_changed
return d
def updateFromDict(self, d):
@ -139,10 +142,14 @@ class GithubTriggerEvent(TriggerEvent):
self.commits = d["commits"]
self.body_edited = d["body_edited"]
self.branch_protection_changed = d.get("branch_protection_changed")
self.default_branch_changed = d.get("default_branch_changed")
def isBranchProtectionChanged(self):
return bool(self.branch_protection_changed)
def isDefaultBranchChanged(self):
return bool(self.default_branch_changed)
def isPatchsetCreated(self):
if self.type == 'pull_request':
return self.action in ['opened', 'changed']

View File

@ -1,4 +1,5 @@
# Copyright 2014 Puppet Labs Inc
# Copyright 2023 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
@ -138,6 +139,18 @@ class GithubSource(BaseSource):
def getProjectMergeModes(self, project, tenant, min_ltime=-1):
return self.connection.getProjectMergeModes(project, tenant, min_ltime)
def getProjectDefaultBranch(self, project, tenant, min_ltime=-1):
# We have to return something here, so try to get it from the
# cache, and if that fails, return the Zuul default.
try:
default_branch = self.connection.getProjectDefaultBranch(
project, tenant, min_ltime)
except Exception:
default_branch = None
if default_branch is None:
return super().getProjectDefaultBranch(project, tenant, min_ltime)
return default_branch
def getProjectBranchCacheLtime(self):
return self.connection._branch_cache.ltime

View File

@ -7277,6 +7277,9 @@ class TriggerEvent(AbstractEvent):
def isBranchProtectionChanged(self):
return False
def isDefaultBranchChanged(self):
return False
def _repr(self):
flags = [str(self.type)]
if self.project_name:
@ -7499,21 +7502,10 @@ class ProjectMetadata:
def __init__(self):
self.merge_mode = None
self._default_branch = None
self.default_branch = None
self.is_template = False
self.queue_name = None
def isDefaultBranchSet(self):
return self._default_branch is not None
@property
def default_branch(self):
return self._default_branch or "master"
@default_branch.setter
def default_branch(self, default_branch):
self._default_branch = default_branch
def toDict(self):
return {
'merge_mode': self.merge_mode,
@ -8173,7 +8165,7 @@ class Layout(object):
md = self.project_metadata[project_config.name]
if md.merge_mode is None and project_config.merge_mode is not None:
md.merge_mode = project_config.merge_mode
if (not md.isDefaultBranchSet() and
if (md.default_branch is None and
project_config.default_branch is not None):
md.default_branch = project_config.default_branch
if (

View File

@ -14,4 +14,4 @@
# When making ZK schema changes, increment this and add a record to
# doc/source/developer/model-changelog.rst
MODEL_API = 14
MODEL_API = 16

View File

@ -2504,9 +2504,10 @@ class Scheduler(threading.Thread):
not tpc.includesBranch(event.branch)):
reconfigure_tenant = False
# But if the event is that branch protection status has
# changed, do reconfigure.
if (event.isBranchProtectionChanged()):
# But if the event is that branch protection status or the
# default branch has changed, do reconfigure.
if (event.isBranchProtectionChanged() or
event.isDefaultBranchChanged()):
reconfigure_tenant = True
if reconfigure_tenant:

View File

@ -198,6 +198,22 @@ class BaseSource(object, metaclass=abc.ABCMeta):
return model.ALL_MERGE_MODES
def getProjectDefaultBranch(self, project, tenant, min_ltime=-1):
"""Return the default branch for this project.
If users do not specify the default branch for a project, this
mode will be used. It may be a driver-specific default, or
the driver may use data from the remote system to provide a
project-specific default.
This method is called very frequently, and should generally
return quickly. The connection is expected to cache default
branches for all projects queried.
"""
return 'master'
@abc.abstractmethod
def getProjectBranchCacheLtime(self):
"""Return the current ltime of the project branch cache."""

View File

@ -68,7 +68,8 @@ class BranchCacheZKObject(ShardedZKObject):
super().__init__()
self._set(protected={},
remainder={},
merge_modes={})
merge_modes={},
default_branch={})
def serialize(self, context):
data = {
@ -79,6 +80,10 @@ class BranchCacheZKObject(ShardedZKObject):
# safe to move into the dict above at any time.
if (COMPONENT_REGISTRY.model_api >= 11):
data["merge_modes"] = self.merge_modes
# This is mostly here to enable unit tests of upgrades, it's
# safe to move into the dict above at any time.
if (COMPONENT_REGISTRY.model_api >= 16):
data["default_branch"] = self.default_branch
return json.dumps(data, sort_keys=True).encode("utf8")
def deserialize(self, raw, context):
@ -87,6 +92,10 @@ class BranchCacheZKObject(ShardedZKObject):
if "merge_modes" not in data:
data["merge_modes"] = collections.defaultdict(
lambda: model.ALL_MERGE_MODES)
# MODEL_API < 16
if "default_branch" not in data:
data["default_branch"] = collections.defaultdict(
lambda: 'master')
return data
def _save(self, context, data, create=False):
@ -137,11 +146,13 @@ class BranchCache:
self.cache.protected.clear()
self.cache.remainder.clear()
self.cache.merge_modes.clear()
self.cache.default_branch.clear()
else:
for p in projects:
self.cache.protected.pop(p, None)
self.cache.remainder.pop(p, None)
self.cache.merge_modes.pop(p, None)
self.cache.default_branch.pop(p, None)
def getProjectBranches(self, project_name, exclude_unprotected,
min_ltime=-1, default=RAISE_EXCEPTION):
@ -336,6 +347,69 @@ class BranchCache:
with self.cache.activeContext(self.zk_context):
self.cache.merge_modes[project_name] = merge_modes
def getProjectDefaultBranch(self, project_name,
min_ltime=-1, default=RAISE_EXCEPTION):
"""Get the default branch for the given project.
Checking the branch cache we need to distinguish three different
cases:
1. cache miss (not queried yet)
2. cache hit (including unknown default branch)
3. error when fetching default branch
If the cache doesn't contain a default branch for the project
and no default value is provided a LookupError is raised.
If there was an error fetching the default branch, the return
value will be None.
Otherwise the default branch will be returned.
:param str project_name:
The project for which the default branch is returned.
:param int min_ltime:
The minimum cache ltime to consider the cache valid.
:param any default:
Optional default value to return if no cache entry exits.
:returns: The name of the default branch or None if there was
an error when fetching it.
"""
if self.ltime < min_ltime:
with locked(self.rlock):
self.cache.refresh(self.zk_context)
default_branch = None
try:
default_branch = self.cache.default_branch[project_name]
except KeyError:
if default is RAISE_EXCEPTION:
raise LookupError(
f"No default branch for project {project_name}")
else:
return default
return default_branch
def setProjectDefaultBranch(self, project_name, default_branch):
"""Set the upstream default branch for the given project.
Use None as a sentinel value for the default branch to indicate
that there was a fetch error.
:param str project_name:
The project for the default branch.
:param str default_branch:
The default branch or None.
"""
with locked(self.wlock):
with self.cache.activeContext(self.zk_context):
self.cache.default_branch[project_name] = default_branch
@property
def ltime(self):
return self.cache._zstat.last_modified_transaction_id