From aa4825cb53fc4d0b0740f4b4834cf702fcd19f66 Mon Sep 17 00:00:00 2001 From: Alex Schultz Date: Tue, 28 Jul 2020 14:42:59 -0600 Subject: [PATCH] Handle redirects for blobs better This patch adds a new function that checks if a response was a redirect for an a request and removes the Authorization header that we usually send if it is not one of our trusted hosts. This prevents authorization keys from going to insecure places. This is similar logic that exists in the moby registry code[0]. Additionally improves the cachability of blobs from docker.io because they are redirects to files that exist on a CDN that doesn't actually require authentication. The upstream CI registry caching system doesn't cache any requests with the Authorization header per the apache cache documentation[1]. [0] https://github.com/moby/moby/blob/a072d726c753c3b02232e6a7b08d7e7ce79fffa5/registry/registry.go#L140-L174 [1] https://httpd.apache.org/docs/2.4/caching.html Change-Id: I415eec5d307ac73456aa556db9d61ceac1eaa565 Partial-Bug: #1889122 (cherry picked from commit d3af31414747f28508e236b1fea8db7601fa2e7b) --- tripleo_common/image/image_uploader.py | 76 +++++++++++++++++-- .../tests/image/test_image_uploader.py | 76 ++++++++++++++++++- 2 files changed, 144 insertions(+), 8 deletions(-) diff --git a/tripleo_common/image/image_uploader.py b/tripleo_common/image/image_uploader.py index 9ac45be35..a1d9f98b2 100644 --- a/tripleo_common/image/image_uploader.py +++ b/tripleo_common/image/image_uploader.py @@ -49,6 +49,8 @@ LOG = logging.getLogger(__name__) SECURE_REGISTRIES = ( 'trunk.registry.rdoproject.org', + 'registry.redhat.io', + 'registry.access.redhat.com', 'docker.io', 'registry-1.docker.io', ) @@ -244,6 +246,55 @@ class RegistrySessionHelper(object): request.raise_for_status() + @staticmethod + def check_redirect_trusted(request_response, request_session, + stream=True, timeout=30): + """Check if we've been redirected to a trusted source + + Because we may be using auth, we may not want to leak authentication + keys to an untrusted source. If we get a redirect, we need to check + that the redirect url is one of our sources that we trust. Otherwise + we drop the Authorization header from the redirect request. We'll + add the header back into the request session after performing the + request to ensure that future usage of the session. + + :param: request_response: Response object of the request to check + :param: request_session: Session to use when redirecting + :param: stream: Should we stream the response of the redirect + :param: tiemout: Timeout for the redirect request + """ + # we're not a redirect, just return the original response + if not (request_response.status_code >= 300 + and request_response.status_code < 400): + return request_response + # parse the destination location + redir_url = parse.urlparse(request_response.headers['Location']) + # close the response since we're going to replace it + request_response.close() + auth_header = request_session.headers.pop('Authorization', None) + # ok we got a redirect, let's check where we are going + if len([h for h in SECURE_REGISTRIES if h in redir_url.netloc]) > 0: + # we're going to a trusted location, add the header back and + # return response + request_session.headers.update({'Authorization': auth_header}) + request_response = request_session.get(redir_url.geturl(), + stream=stream, + timeout=timeout) + else: + # we didn't trust the place we're going, request without auth but + # add the auth back to the request session afterwards + request_response = request_session.get(redir_url.geturl(), + stream=stream, + timeout=timeout) + request_session.headers.update({'Authorization': auth_header}) + + request_response.encoding = 'utf-8' + # recheck status here to make sure we didn't get a 401 from + # our redirect host path. + RegistrySessionHelper.check_status(session=request_session, + request=request_response) + return request_response + @staticmethod def _action(action, request_session, *args, **kwargs): """ Perform a session action and retry if auth fails @@ -780,8 +831,12 @@ class BaseImageUploader(object): session, config_url, headers=config_headers, - timeout=30 + timeout=30, + allow_redirects=False ) + # check if the blob is a redirect + config_r = RegistrySessionHelper.check_redirect_trusted( + config_r, session, stream=False) config = config_r.json() image, tag = cls._image_tag_from_url(image_url) @@ -1747,12 +1802,19 @@ class PythonImageUploader(BaseImageUploader): chunk_size = None LOG.info("[%s] Fetching layer %s from %s" % (image, digest, source_blob_url)) - with session.get( - source_blob_url, stream=True, timeout=30) as blob_req: - # TODO(aschultz): unsure if necessary or if only when using .text + with session.get(source_blob_url, + stream=True, + timeout=30, + allow_redirects=False) as blob_req: blob_req.encoding = 'utf-8' + # raise for status here to ensure we didn't got a 401 RegistrySessionHelper.check_status(session=session, request=blob_req) + # Requests to docker.io redirect to CDN for the actual content + # so we need to check if our initial blob request is a redirect + # and follow as necessary. + blob_req = RegistrySessionHelper.check_redirect_trusted(blob_req, + session) for data in blob_req.iter_content(chunk_size): LOG.debug("[%s] Read %i bytes for %s" % (image, len(data), digest)) @@ -1906,8 +1968,12 @@ class PythonImageUploader(BaseImageUploader): r = RegistrySessionHelper.get( source_session, source_config_url, - timeout=30 + timeout=30, + allow_redirects=False ) + # check if the blob was a redirect + r = RegistrySessionHelper.check_redirect_trusted( + r, source_session, stream=False) config_str = cls._get_response_text(r) manifest['config']['size'] = len(config_str) diff --git a/tripleo_common/tests/image/test_image_uploader.py b/tripleo_common/tests/image/test_image_uploader.py index d75c52461..76f23ceb3 100644 --- a/tripleo_common/tests/image/test_image_uploader.py +++ b/tripleo_common/tests/image/test_image_uploader.py @@ -76,6 +76,66 @@ class TestRegistrySessionHelper(base.TestCase): session_reauth_mock.assert_called_once_with() raise_for_status_mock.assert_called_once() + def test_check_redirect_trusted_no_redirect(self): + get_mock = mock.Mock() + session = mock.Mock() + session.headers = {'Authorization': 'foo'} + session.auth_args = {} + session.get = get_mock + resp = mock.Mock() + resp.status_code = 200 + + r = image_uploader.RegistrySessionHelper.check_redirect_trusted( + resp, session) + + self.assertEqual(resp, r) + + def test_check_redirect_trusted_is_trusted(self): + get_result = mock.Mock() + get_result.status_code = 200 + get_mock = mock.Mock() + get_mock.return_value = get_result + session = mock.Mock() + session.headers = {'Authorization': 'foo'} + session.auth_args = {} + session.get = get_mock + resp = mock.Mock() + resp.headers = {'Location': 'https://registry.redhat.io/v2'} + resp.status_code = 307 + + r = image_uploader.RegistrySessionHelper.check_redirect_trusted( + resp, session) + + self.assertNotEqual(resp, r) + self.assertEqual(get_result, r) + get_mock.assert_called_once_with('https://registry.redhat.io/v2', + stream=True, + timeout=30) + self.assertEqual(session.headers['Authorization'], 'foo') + + def test_check_redirect_trusted_not_trusted(self): + get_result = mock.Mock() + get_result.status_code = 200 + get_mock = mock.Mock() + get_mock.return_value = get_result + session = mock.Mock() + session.headers = {'Authorization': 'foo'} + session.auth_args = {} + session.get = get_mock + resp = mock.Mock() + resp.headers = {'Location': 'http://172.16.12.12:8787/'} + resp.status_code = 307 + + r = image_uploader.RegistrySessionHelper.check_redirect_trusted( + resp, session, False, 12) + + self.assertNotEqual(resp, r) + self.assertEqual(get_result, r) + get_mock.assert_called_once_with('http://172.16.12.12:8787/', + stream=False, + timeout=12) + self.assertEqual(session.headers['Authorization'], 'foo') + @mock.patch('tripleo_common.image.image_uploader.RegistrySessionHelper' '.check_status') def test_action(self, mock_status): @@ -2074,6 +2134,8 @@ class TestPythonImageUploader(base.TestCase): 'docker' ) + @mock.patch('tripleo_common.image.image_uploader.' + 'RegistrySessionHelper.check_redirect_trusted') @mock.patch('tripleo_common.image.image_uploader.' 'PythonImageUploader._copy_manifest_config_to_registry') @mock.patch('tripleo_common.image.image_uploader.' @@ -2084,7 +2146,8 @@ class TestPythonImageUploader(base.TestCase): 'PythonImageUploader.' '_copy_layer_registry_to_registry') def test_copy_registry_to_registry(self, _copy_layer, _upload_url, - mock_get, mock_copy_manifest): + mock_get, mock_copy_manifest, + mock_trusted): source_url = urlparse('docker://docker.io/t/nova-api:latest') target_url = urlparse('docker://192.168.2.1:5000/t/nova-api:latest') _upload_url.return_value = 'https://192.168.2.1:5000/v2/upload' @@ -2092,7 +2155,10 @@ class TestPythonImageUploader(base.TestCase): source_session = mock.Mock() target_session = mock.Mock() - mock_get.return_value.text = '{}' + mock_resp = mock.Mock() + mock_resp.text = '{}' + mock_get.return_value = mock_resp + mock_trusted.return_value = mock_resp manifest = json.dumps({ 'mediaType': image_uploader.MEDIA_MANIFEST_V2, @@ -2120,7 +2186,8 @@ class TestPythonImageUploader(base.TestCase): mock_get.assert_called_once_with( source_session, 'https://registry-1.docker.io/v2/t/nova-api/blobs/sha256:1234', - timeout=30 + timeout=30, + allow_redirects=False ) target_manifest = { 'config': { @@ -2136,6 +2203,9 @@ class TestPythonImageUploader(base.TestCase): 'distribution.manifest.v2+json', } + mock_trusted.assert_called_once_with(mock_resp, + source_session, + stream=False) mock_copy_manifest.assert_has_calls([ mock.call( target_url=target_url,