proxy: Use the right ranges when going to multiple object servers

When the proxy times out talking to a backend server (say, because it was under heavy load and having trouble servicing the request), we catch the ChunkReadTimeout and try to get the rest from another server. The client by and large doesn't care; there may be a brief pause in the download while the proxy get the new connection, but all the bytes arrive and in the right order: GET from node1, serve bytes 0 through N, timeout GET from node2, serve bytes N through end When we calculate the range for the new request, we check to see if we already have one from the previous request -- if one exists, we adjust it based on the bytes sent to the client thus far. This works fine for single failures, but if we need to go back *again* we double up the offset and send the client incomplete, bad data: GET from node1, serve bytes 0 through N, timeout GET from node2, serve bytes N through M, timeout GET from node3, serve bytes N + M through end Leaving the client missing bytes M through N + M. We should adjust the range based on the number of bytes pulled from the *backend* rather than delivered to the *frontend*. This just requires that we reset our book-keeping after adjusting the Range header. Change-Id: Ie153d01479c4242c01f48bf0ada78c2f9b6c8ff0 Closes-Bug: 1717401 (cherry picked from commit 6b19ca7a7d)
2017-09-15 22:52:26 +00:00 · 2017-09-15 22:52:26 +00:00 · cbc1459615
parent 6b4e6aeffa
commit cbc1459615
2 changed files with 26 additions and 7 deletions
--- a/swift/proxy/controllers/base.py
+++ b/swift/proxy/controllers/base.py
@ -772,14 +772,16 @@ class ResumingGetter(object):
                           this request. This will change the Range header
                           so that the next req will start where it left off.

-        :raises ValueError: if invalid range header
        :raises HTTPRequestedRangeNotSatisfiable: if begin + num_bytes
                                                  > end of range + 1
        :raises RangeAlreadyComplete: if begin + num_bytes == end of range + 1
        """
-        if 'Range' in self.backend_headers:
-            req_range = Range(self.backend_headers['Range'])
+        try:
+            req_range = Range(self.backend_headers.get('Range'))
+        except ValueError:
+            req_range = None

+        if req_range:
            begin, end = req_range.ranges[0]
            if begin is None:
                # this is a -50 range req (last 50 bytes of file)
@ -803,6 +805,9 @@ class ResumingGetter(object):
        else:
            self.backend_headers['Range'] = 'bytes=%d-' % num_bytes

+        # Reset so if we need to do this more than once, we don't double-up
+        self.bytes_used_from_backend = 0
+
    def pop_range(self):
        """
        Remove the first byterange from our Range header.
--- a/test/unit/proxy/controllers/test_base.py
+++ b/test/unit/proxy/controllers/test_base.py
@ -875,18 +875,32 @@ class TestFuncs(unittest.TestCase):

        node = {'ip': '1.2.3.4', 'port': 6200, 'device': 'sda'}

-        source1 = TestSource(['abcd', '1234', 'abc', None])
-        source2 = TestSource(['efgh5678'])
+        data = ['abcd', '1234', 'efgh', '5678', 'lots', 'more', 'data']
+
+        # NB: content length on source1 should be correct
+        # but that reversed piece never makes it to the client
+        source1 = TestSource(data[:2] + [data[2][::-1], None] + data[3:])
+        source2 = TestSource(data[2:4] + ['nope', None])
+        source3 = TestSource(data[4:])
        req = Request.blank('/v1/a/c/o')
        handler = GetOrHeadHandler(
            self.app, req, 'Object', None, None, None, {},
            client_chunk_size=8)

+        range_headers = []
+        sources = [(source2, node), (source3, node)]
+
+        def mock_get_source_and_node():
+            range_headers.append(handler.backend_headers['Range'])
+            return sources.pop(0)
+
        app_iter = handler._make_app_iter(req, node, source1)
        with mock.patch.object(handler, '_get_source_and_node',
-                               lambda: (source2, node)):
+                               side_effect=mock_get_source_and_node):
            client_chunks = list(app_iter)
-        self.assertEqual(client_chunks, ['abcd1234', 'efgh5678'])
+        self.assertEqual(range_headers, ['bytes=8-27', 'bytes=16-27'])
+        self.assertEqual(client_chunks, [
+            'abcd1234', 'efgh5678', 'lotsmore', 'data'])

    def test_client_chunk_size_resuming_chunked(self):