Merge "Change object_audit_location_generator() to yield for a single policy."

2018-03-22 06:18:19 +00:00 · 2018-03-22 06:18:19 +00:00 · 9aca9ad780
parent 7bacb38c76 8d7e245eb5
commit 9aca9ad780
4 changed files with 114 additions and 134 deletions
--- a/swift/obj/auditor.py
+++ b/swift/obj/auditor.py
@ -27,7 +27,7 @@ from eventlet import Timeout
 from swift.obj import diskfile, replicator
 from swift.common.utils import (
    get_logger, ratelimit_sleep, dump_recon_cache, list_from_csv, listdir,
-    unlink_paths_older_than, readconf, config_auto_int_value)
+    unlink_paths_older_than, readconf, config_auto_int_value, round_robin_iter)
 from swift.common.exceptions import DiskFileQuarantined, DiskFileNotExist,\
    DiskFileDeleted, DiskFileExpired
 from swift.common.daemon import Daemon
@ -120,18 +120,17 @@ class AuditorWorker(object):
        total_quarantines = 0
        total_errors = 0
        time_auditing = 0
-        # TODO: we should move audit-location generation to the storage policy,
-        # as we may (conceivably) have a different filesystem layout for each.
-        # We'd still need to generate the policies to audit from the actual
-        # directories found on-disk, and have appropriate error reporting if we
-        # find a directory that doesn't correspond to any known policy. This
-        # will require a sizable refactor, but currently all diskfile managers
-        # can find all diskfile locations regardless of policy -- so for now
-        # just use Policy-0's manager.
-        all_locs = (self.diskfile_router[POLICIES[0]]
+
+        # get AuditLocations for each policy
+        loc_generators = []
+        for policy in POLICIES:
+            loc_generators.append(
+                self.diskfile_router[policy]
                    .object_audit_location_generator(
-                        device_dirs=device_dirs,
+                        policy, device_dirs=device_dirs,
                        auditor_type=self.auditor_type))
+
+        all_locs = round_robin_iter(loc_generators)
        for location in all_locs:
            loop_time = time.time()
            self.failsafe_object_audit(location)
@ -192,8 +191,11 @@ class AuditorWorker(object):
            self.logger.info(
                _('Object audit stats: %s') % json.dumps(self.stats_buckets))

-        # Unset remaining partitions to not skip them in the next run
-        diskfile.clear_auditor_status(self.devices, self.auditor_type)
+        for policy in POLICIES:
+            # Unset remaining partitions to not skip them in the next run
+            self.diskfile_router[policy].clear_auditor_status(
+                policy,
+                self.auditor_type)

    def record_stats(self, obj_size):
        """
--- a/swift/obj/diskfile.py
+++ b/swift/obj/diskfile.py
@ -453,18 +453,20 @@ class AuditLocation(object):
        return str(self.path)


-def object_audit_location_generator(devices, mount_check=True, logger=None,
-                                    device_dirs=None, auditor_type="ALL"):
+def object_audit_location_generator(devices, datadir, mount_check=True,
+                                    logger=None, device_dirs=None,
+                                    auditor_type="ALL"):
    """
    Given a devices path (e.g. "/srv/node"), yield an AuditLocation for all
-    objects stored under that directory if device_dirs isn't set.  If
-    device_dirs is set, only yield AuditLocation for the objects under the
-    entries in device_dirs. The AuditLocation only knows the path to the hash
-    directory, not to the .data file therein (if any). This is to avoid a
-    double listdir(hash_dir); the DiskFile object will always do one, so
-    we don't.
+    objects stored under that directory for the given datadir (policy),
+    if device_dirs isn't set.  If device_dirs is set, only yield AuditLocation
+    for the objects under the entries in device_dirs. The AuditLocation only
+    knows the path to the hash directory, not to the .data file therein
+    (if any). This is to avoid a double listdir(hash_dir); the DiskFile object
+    will always do one, so we don't.

    :param devices: parent directory of the devices to be audited
+    :param datadir: objects directory
    :param mount_check: flag to check if a mount check should be performed
                        on devices
    :param logger: a logger object
@ -480,6 +482,7 @@ def object_audit_location_generator(devices, mount_check=True, logger=None,
    # randomize devices in case of process restart before sweep completed
    shuffle(device_dirs)

+    base, policy = split_policy_string(datadir)
    for device in device_dirs:
        if not check_drive(devices, device, mount_check):
            if logger:
@ -487,55 +490,37 @@ def object_audit_location_generator(devices, mount_check=True, logger=None,
                    'Skipping %s as it is not %s', device,
                    'mounted' if mount_check else 'a dir')
            continue
-        # loop through object dirs for all policies
-        device_dir = os.path.join(devices, device)
-        try:
-            dirs = os.listdir(device_dir)
-        except OSError as e:
-            if logger:
-                logger.debug(
-                    _('Skipping %(dir)s: %(err)s') % {'dir': device_dir,
-                                                      'err': e.strerror})
+
+        datadir_path = os.path.join(devices, device, datadir)
+        if not os.path.exists(datadir_path):
            continue
-        for dir_ in dirs:
-            if not dir_.startswith(DATADIR_BASE):
-                continue
+
+        partitions = get_auditor_status(datadir_path, logger, auditor_type)
+
+        for pos, partition in enumerate(partitions):
+            update_auditor_status(datadir_path, logger,
+                                  partitions[pos:], auditor_type)
+            part_path = os.path.join(datadir_path, partition)
            try:
-                base, policy = split_policy_string(dir_)
-            except PolicyError as e:
-                if logger:
-                    logger.warning(_('Directory %(directory)r does not map '
-                                     'to a valid policy (%(error)s)') % {
-                                   'directory': dir_, 'error': e})
+                suffixes = listdir(part_path)
+            except OSError as e:
+                if e.errno != errno.ENOTDIR:
+                    raise
                continue
-            datadir_path = os.path.join(devices, device, dir_)
-
-            partitions = get_auditor_status(datadir_path, logger, auditor_type)
-
-            for pos, partition in enumerate(partitions):
-                update_auditor_status(datadir_path, logger,
-                                      partitions[pos:], auditor_type)
-                part_path = os.path.join(datadir_path, partition)
+            for asuffix in suffixes:
+                suff_path = os.path.join(part_path, asuffix)
                try:
-                    suffixes = listdir(part_path)
+                    hashes = listdir(suff_path)
                except OSError as e:
                    if e.errno != errno.ENOTDIR:
                        raise
                    continue
-                for asuffix in suffixes:
-                    suff_path = os.path.join(part_path, asuffix)
-                    try:
-                        hashes = listdir(suff_path)
-                    except OSError as e:
-                        if e.errno != errno.ENOTDIR:
-                            raise
-                        continue
-                    for hsh in hashes:
-                        hsh_path = os.path.join(suff_path, hsh)
-                        yield AuditLocation(hsh_path, device, partition,
-                                            policy)
+                for hsh in hashes:
+                    hsh_path = os.path.join(suff_path, hsh)
+                    yield AuditLocation(hsh_path, device, partition,
+                                        policy)

-            update_auditor_status(datadir_path, logger, [], auditor_type)
+        update_auditor_status(datadir_path, logger, [], auditor_type)


 def get_auditor_status(datadir_path, logger, auditor_type):
@ -589,15 +574,13 @@ def update_auditor_status(datadir_path, logger, partitions, auditor_type):
                           {'auditor_status': auditor_status, 'err': e})


-def clear_auditor_status(devices, auditor_type="ALL"):
-    for device in os.listdir(devices):
-        for dir_ in os.listdir(os.path.join(devices, device)):
-            if not dir_.startswith("objects"):
-                continue
-            datadir_path = os.path.join(devices, device, dir_)
-            auditor_status = os.path.join(
-                datadir_path, "auditor_status_%s.json" % auditor_type)
-            remove_file(auditor_status)
+def clear_auditor_status(devices, datadir, auditor_type="ALL"):
+    device_dirs = listdir(devices)
+    for device in device_dirs:
+        datadir_path = os.path.join(devices, device, datadir)
+        auditor_status = os.path.join(
+            datadir_path, "auditor_status_%s.json" % auditor_type)
+        remove_file(auditor_status)


 def strip_self(f):
@ -1340,15 +1323,22 @@ class BaseDiskFileManager(object):
                                 pipe_size=self.pipe_size,
                                 use_linkat=self.use_linkat, **kwargs)

-    def object_audit_location_generator(self, device_dirs=None,
+    def clear_auditor_status(self, policy, auditor_type="ALL"):
+        datadir = get_data_dir(policy)
+        clear_auditor_status(self.devices, datadir, auditor_type)
+
+    def object_audit_location_generator(self, policy, device_dirs=None,
                                        auditor_type="ALL"):
        """
        Yield an AuditLocation for all objects stored under device_dirs.

+        :param policy: the StoragePolicy instance
        :param device_dirs: directory of target device
        :param auditor_type: either ALL or ZBF
        """
-        return object_audit_location_generator(self.devices, self.mount_check,
+        datadir = get_data_dir(policy)
+        return object_audit_location_generator(self.devices, datadir,
+                                               self.mount_check,
                                               self.logger, device_dirs,
                                               auditor_type)

--- a/test/unit/obj/test_auditor.py
+++ b/test/unit/obj/test_auditor.py
@ -852,7 +852,7 @@ class TestAuditor(unittest.TestCase):
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.isdir(quarantine_path))
        del(kwargs['zero_byte_fps'])
-        clear_auditor_status(self.devices)
+        clear_auditor_status(self.devices, 'objects')
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.isdir(quarantine_path))

--- a/test/unit/obj/test_diskfile.py
+++ b/test/unit/obj/test_diskfile.py
@ -375,17 +375,6 @@ class TestObjectAuditLocationGenerator(unittest.TestCase):
                                     "fcd938702024c25fef6c32fef05298eb"))
            os.makedirs(os.path.join(tmpdir, "sdp", "objects-1", "9970", "ca5",
                                     "4a943bc72c2e647c4675923d58cf4ca5"))
-            os.makedirs(os.path.join(tmpdir, "sdq", "objects-2", "9971", "8eb",
-                                     "fcd938702024c25fef6c32fef05298eb"))
-            os.makedirs(os.path.join(tmpdir, "sdq", "objects-99", "9972",
-                                     "8eb",
-                                     "fcd938702024c25fef6c32fef05298eb"))
-            # the bad
-            os.makedirs(os.path.join(tmpdir, "sdq", "objects-", "1135",
-                                     "6c3",
-                                     "fcd938702024c25fef6c32fef05298eb"))
-            os.makedirs(os.path.join(tmpdir, "sdq", "objects-fud", "foo"))
-            os.makedirs(os.path.join(tmpdir, "sdq", "objects-+1", "foo"))

            self._make_file(os.path.join(tmpdir, "sdp", "objects", "1519",
                                         "fed"))
@ -404,26 +393,18 @@ class TestObjectAuditLocationGenerator(unittest.TestCase):
                                     "4f9eee668b66c6f0250bfa3c7ab9e51e"))

            logger = debug_logger()
-            locations = [(loc.path, loc.device, loc.partition, loc.policy)
-                         for loc in diskfile.object_audit_location_generator(
-                             devices=tmpdir, mount_check=False,
-                             logger=logger)]
-            locations.sort()
+            loc_generators = []
+            datadirs = ["objects", "objects-1"]
+            for datadir in datadirs:
+                loc_generators.append(
+                    diskfile.object_audit_location_generator(
+                        devices=tmpdir, datadir=datadir, mount_check=False,
+                        logger=logger))

-            # expect some warnings about those bad dirs
-            warnings = logger.get_lines_for_level('warning')
-            self.assertEqual(set(warnings), set([
-                ("Directory 'objects-' does not map to a valid policy "
-                 "(Unknown policy, for index '')"),
-                ("Directory 'objects-2' does not map to a valid policy "
-                 "(Unknown policy, for index '2')"),
-                ("Directory 'objects-99' does not map to a valid policy "
-                 "(Unknown policy, for index '99')"),
-                ("Directory 'objects-fud' does not map to a valid policy "
-                 "(Unknown policy, for index 'fud')"),
-                ("Directory 'objects-+1' does not map to a valid policy "
-                 "(Unknown policy, for index '+1')"),
-            ]))
+            all_locs = itertools.chain(*loc_generators)
+            locations = [(loc.path, loc.device, loc.partition, loc.policy) for
+                         loc in all_locs]
+            locations.sort()

            expected =  \
                [(os.path.join(tmpdir, "sdp", "objects-1", "9970", "ca5",
@ -448,12 +429,19 @@ class TestObjectAuditLocationGenerator(unittest.TestCase):
            self.assertEqual(locations, expected)

            # Reset status file for next run
-            diskfile.clear_auditor_status(tmpdir)
+            for datadir in datadirs:
+                diskfile.clear_auditor_status(tmpdir, datadir)

            # now without a logger
-            locations = [(loc.path, loc.device, loc.partition, loc.policy)
-                         for loc in diskfile.object_audit_location_generator(
-                             devices=tmpdir, mount_check=False)]
+            for datadir in datadirs:
+                loc_generators.append(
+                    diskfile.object_audit_location_generator(
+                        devices=tmpdir, datadir=datadir, mount_check=False,
+                        logger=logger))
+
+            all_locs = itertools.chain(*loc_generators)
+            locations = [(loc.path, loc.device, loc.partition, loc.policy) for
+                         loc in all_locs]
            locations.sort()
            self.assertEqual(locations, expected)

@ -470,7 +458,7 @@ class TestObjectAuditLocationGenerator(unittest.TestCase):
            locations = [
                (loc.path, loc.device, loc.partition, loc.policy)
                for loc in diskfile.object_audit_location_generator(
-                    devices=tmpdir, mount_check=True)]
+                    devices=tmpdir, datadir="objects", mount_check=True)]
            locations.sort()

            self.assertEqual(
@ -485,7 +473,8 @@ class TestObjectAuditLocationGenerator(unittest.TestCase):
            locations = [
                (loc.path, loc.device, loc.partition, loc.policy)
                for loc in diskfile.object_audit_location_generator(
-                    devices=tmpdir, mount_check=True, logger=logger)]
+                    devices=tmpdir, datadir="objects", mount_check=True,
+                    logger=logger)]
            debug_lines = logger.get_lines_for_level('debug')
            self.assertEqual([
                'Skipping sdq as it is not mounted',
@ -502,7 +491,7 @@ class TestObjectAuditLocationGenerator(unittest.TestCase):
            locations = [
                (loc.path, loc.device, loc.partition, loc.policy)
                for loc in diskfile.object_audit_location_generator(
-                    devices=tmpdir, mount_check=False)]
+                    devices=tmpdir, datadir="objects", mount_check=False)]

            self.assertEqual(
                locations,
@ -516,30 +505,22 @@ class TestObjectAuditLocationGenerator(unittest.TestCase):
            locations = [
                (loc.path, loc.device, loc.partition, loc.policy)
                for loc in diskfile.object_audit_location_generator(
-                    devices=tmpdir, mount_check=False, logger=logger)]
+                    devices=tmpdir, datadir="objects", mount_check=False,
+                    logger=logger)]
            debug_lines = logger.get_lines_for_level('debug')
            self.assertEqual([
                'Skipping garbage as it is not a dir',
            ], debug_lines)
            logger.clear()
-            with mock_check_drive(isdir=True):
-                locations = [
-                    (loc.path, loc.device, loc.partition, loc.policy)
-                    for loc in diskfile.object_audit_location_generator(
-                        devices=tmpdir, mount_check=False, logger=logger)]
-            debug_lines = logger.get_lines_for_level('debug')
-            self.assertEqual([
-                'Skipping %s: Not a directory' % os.path.join(
-                    tmpdir, "garbage"),
-            ], debug_lines)
-            logger.clear()
+
            with mock_check_drive() as mocks:
                mocks['ismount'].side_effect = lambda path: (
                    False if path.endswith('garbage') else True)
                locations = [
                    (loc.path, loc.device, loc.partition, loc.policy)
                    for loc in diskfile.object_audit_location_generator(
-                        devices=tmpdir, mount_check=True, logger=logger)]
+                        devices=tmpdir, datadir="objects", mount_check=True,
+                        logger=logger)]
            debug_lines = logger.get_lines_for_level('debug')
            self.assertEqual([
                'Skipping garbage as it is not mounted',
@ -550,10 +531,10 @@ class TestObjectAuditLocationGenerator(unittest.TestCase):
        # so that errors get logged and a human can see what's going wrong;
        # only normal FS corruption should be skipped over silently.

-        def list_locations(dirname):
+        def list_locations(dirname, datadir):
            return [(loc.path, loc.device, loc.partition, loc.policy)
                    for loc in diskfile.object_audit_location_generator(
-                        devices=dirname, mount_check=False)]
+                        devices=dirname, datadir=datadir, mount_check=False)]

        real_listdir = os.listdir

@ -570,30 +551,34 @@ class TestObjectAuditLocationGenerator(unittest.TestCase):
                                     "2607", "b54",
                                     "fe450ec990a88cc4b252b181bab04b54"))
            with mock.patch('os.listdir', splode_if_endswith("sdf/objects")):
-                self.assertRaises(OSError, list_locations, tmpdir)
+                self.assertRaises(OSError, list_locations, tmpdir, "objects")
            with mock.patch('os.listdir', splode_if_endswith("2607")):
-                self.assertRaises(OSError, list_locations, tmpdir)
+                self.assertRaises(OSError, list_locations, tmpdir, "objects")
            with mock.patch('os.listdir', splode_if_endswith("b54")):
-                self.assertRaises(OSError, list_locations, tmpdir)
+                self.assertRaises(OSError, list_locations, tmpdir, "objects")

    def test_auditor_status(self):
        with temptree([]) as tmpdir:
            os.makedirs(os.path.join(tmpdir, "sdf", "objects", "1", "a", "b"))
            os.makedirs(os.path.join(tmpdir, "sdf", "objects", "2", "a", "b"))
+            datadir = "objects"

            # Pretend that some time passed between each partition
            with mock.patch('os.stat') as mock_stat, \
                    mock_check_drive(isdir=True):
                mock_stat.return_value.st_mtime = time() - 60
                # Auditor starts, there are two partitions to check
-                gen = diskfile.object_audit_location_generator(tmpdir, False)
+                gen = diskfile.object_audit_location_generator(tmpdir,
+                                                               datadir,
+                                                               False)
                gen.next()
                gen.next()

            # Auditor stopped for some reason without raising StopIterator in
            # the generator and restarts There is now only one remaining
            # partition to check
-            gen = diskfile.object_audit_location_generator(tmpdir, False)
+            gen = diskfile.object_audit_location_generator(tmpdir, datadir,
+                                                           False)
            with mock_check_drive(isdir=True):
                gen.next()

@ -602,17 +587,19 @@ class TestObjectAuditLocationGenerator(unittest.TestCase):

            # There are no partitions to check if the auditor restarts another
            # time and the status files have not been cleared
-            gen = diskfile.object_audit_location_generator(tmpdir, False)
+            gen = diskfile.object_audit_location_generator(tmpdir, datadir,
+                                                           False)
            with mock_check_drive(isdir=True):
                self.assertRaises(StopIteration, gen.next)

            # Reset status file
-            diskfile.clear_auditor_status(tmpdir)
+            diskfile.clear_auditor_status(tmpdir, datadir)

            # If the auditor restarts another time, we expect to
            # check two partitions again, because the remaining
            # partitions were empty and a new listdir was executed
-            gen = diskfile.object_audit_location_generator(tmpdir, False)
+            gen = diskfile.object_audit_location_generator(tmpdir, datadir,
+                                                           False)
            with mock_check_drive(isdir=True):
                gen.next()
                gen.next()
@ -985,7 +972,8 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
        self.df_mgr.logger.increment.assert_called_with('async_pendings')

    def test_object_audit_location_generator(self):
-        locations = list(self.df_mgr.object_audit_location_generator())
+        locations = list(
+            self.df_mgr.object_audit_location_generator(POLICIES[0]))
        self.assertEqual(locations, [])

    def test_replication_one_per_device_deprecation(self):