From 38b13bb40b879adcc06cef0b0fa629e4b00f7122 Mon Sep 17 00:00:00 2001 From: Fernando Ferraz Date: Tue, 19 Jul 2022 05:33:58 -0300 Subject: [PATCH] NetApp ONTAP: Fix SnapMirror snapshots not being cleaned up Replica promote is retaining unneeded snapshots from previous SnapMirror relationships and increasing the amount of space consumed from snapshots in the storage system. This patch fixes the issue by calling the snapmirror release operation after resync completes its transferring, which allows the SnapMirror software to properly cleanup unneeded resources. Closes-Bug: #1982808 Change-Id: I516fb3575e30d18d971d6a1b7f3b9ad7120c3bbd --- .../dataontap/cluster_mode/data_motion.py | 30 +++++++++++ .../netapp/dataontap/cluster_mode/lib_base.py | 9 ++++ .../cluster_mode/test_data_motion.py | 54 +++++++++++++++++++ .../dataontap/cluster_mode/test_lib_base.py | 42 +++++++++++++++ ...shots-not-cleaned-up-63cc98cd468adbd1.yaml | 9 ++++ 5 files changed, 144 insertions(+) create mode 100644 releasenotes/notes/bug-1982808-netapp-fix-snapmirror-snapshots-not-cleaned-up-63cc98cd468adbd1.yaml diff --git a/manila/share/drivers/netapp/dataontap/cluster_mode/data_motion.py b/manila/share/drivers/netapp/dataontap/cluster_mode/data_motion.py index 95cc69932c..6617e50fb7 100644 --- a/manila/share/drivers/netapp/dataontap/cluster_mode/data_motion.py +++ b/manila/share/drivers/netapp/dataontap/cluster_mode/data_motion.py @@ -861,3 +861,33 @@ class DataMotionSession(object): msg = _("Unable to release the snapmirror from source volume %s. " "Retries exhausted. Aborting") % src_volume_name raise exception.NetAppException(message=msg) + + def cleanup_previous_snapmirror_relationships(self, replica, replica_list): + """Cleanup previous snapmirrors relationships for replica.""" + LOG.debug("Cleaning up old snapmirror relationships for replica %s.", + replica['id']) + src_vol_name, src_vserver, src_backend = ( + self.get_backend_info_for_share(replica)) + src_client = get_client_for_backend(src_backend, + vserver_name=src_vserver) + + # replica_list may contain the replica we are trying to clean up + destinations = (r for r in replica_list if r['id'] != replica['id']) + + for destination in destinations: + dest_vol_name, dest_vserver, _ = ( + self.get_backend_info_for_share(destination)) + try: + src_client.release_snapmirror_vol( + src_vserver, src_vol_name, dest_vserver, dest_vol_name) + except netapp_api.NaApiError as e: + if (e.code == netapp_api.EOBJECTNOTFOUND or + e.code == netapp_api.ESOURCE_IS_DIFFERENT or + "(entry doesn't exist)" in e.message): + LOG.debug( + 'Snapmirror destination %s no longer exists for ' + 'replica %s.', destination['id'], replica['id']) + else: + LOG.exception( + 'Error releasing snapmirror destination %s for ' + 'replica %s.', destination['id'], replica['id']) diff --git a/manila/share/drivers/netapp/dataontap/cluster_mode/lib_base.py b/manila/share/drivers/netapp/dataontap/cluster_mode/lib_base.py index 2b78ca71df..0c076e2ae7 100644 --- a/manila/share/drivers/netapp/dataontap/cluster_mode/lib_base.py +++ b/manila/share/drivers/netapp/dataontap/cluster_mode/lib_base.py @@ -2660,6 +2660,15 @@ class NetAppCmodeFileStorageLibrary(object): share_name)): return constants.REPLICA_STATE_OUT_OF_SYNC + # NOTE(sfernand): When promoting replicas, the previous source volume + # and its destinations are put in an 'out of sync' state and must be + # cleaned up once to avoid retaining unused snapshots from the previous + # relationship. Replicas already 'in-sync' won't try another cleanup + # attempt. + if replica['replica_state'] == constants.REPLICA_STATE_OUT_OF_SYNC: + dm_session.cleanup_previous_snapmirror_relationships( + replica, replica_list) + return constants.REPLICA_STATE_IN_SYNC def promote_replica(self, context, replica_list, replica, access_rules, diff --git a/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_data_motion.py b/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_data_motion.py index 2164e06c00..c366743434 100644 --- a/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_data_motion.py +++ b/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_data_motion.py @@ -1217,3 +1217,57 @@ class NetAppCDOTDataMotionSessionTestCase(test.TestCase): src_mock_client.release_snapmirror_vol.assert_called_once_with( fake.VSERVER1, fake.SHARE_NAME, fake.VSERVER2, fake.SHARE_NAME2, relationship_info_only=False) + + @ddt.data([{'id': 'src_share'}, {'id': 'dst_share'}], + [{'id': 'dst_share'}]) + def test_cleanup_previous_snapmirror_relationships(self, replica_list): + mock_src_client = mock.Mock() + src_backend_info = ('src_share', 'src_vserver', 'src_backend') + dst_backend_info = ('dst_share', 'dst_vserver', 'dst_backend') + self.mock_object(self.dm_session, 'get_backend_info_for_share', + mock.Mock(side_effect=[src_backend_info, + dst_backend_info])) + self.mock_object(data_motion, 'get_client_for_backend', + mock.Mock(return_value=mock_src_client)) + self.mock_object(mock_src_client, 'release_snapmirror_vol') + + result = self.dm_session.cleanup_previous_snapmirror_relationships( + {'id': 'src_share'}, replica_list) + + data_motion.get_client_for_backend.assert_called_once_with( + 'src_backend', vserver_name='src_vserver') + self.dm_session.get_backend_info_for_share.assert_has_calls([ + mock.call({'id': 'src_share'}), + mock.call({'id': 'dst_share'}) + ]) + mock_src_client.release_snapmirror_vol.assert_called_once_with( + 'src_vserver', 'src_share', 'dst_vserver', 'dst_share') + + self.assertIsNone(result) + + @ddt.data(netapp_api.NaApiError(), + netapp_api.NaApiError(code=netapp_api.EOBJECTNOTFOUND), + netapp_api.NaApiError(code=netapp_api.ESOURCE_IS_DIFFERENT), + netapp_api.NaApiError(code='some_random_code', + message="(entry doesn't exist)"), + netapp_api.NaApiError(code='some_random_code', + message='(actually, entry does exist!)')) + def test_cleanup_previous_snapmirror_relationships_does_not_exist( + self, release_exception): + mock_src_client = mock.Mock() + self.mock_object(self.dm_session, 'get_backend_info_for_share', + mock.Mock(return_value=( + mock.Mock(), mock.Mock(), mock.Mock()))) + self.mock_object(data_motion, 'get_client_for_backend', + mock.Mock(return_value=mock_src_client)) + self.mock_object(mock_src_client, 'release_snapmirror_vol', + mock.Mock(side_effect=release_exception)) + + replica = {'id': 'src_share'} + replica_list = [replica, {'id': 'dst_share'}] + + result = self.dm_session.cleanup_previous_snapmirror_relationships( + replica, replica_list) + + mock_src_client.release_snapmirror_vol.assert_called() + self.assertIsNone(result) diff --git a/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_lib_base.py b/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_lib_base.py index e8495a1414..e9663e64d3 100644 --- a/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_lib_base.py +++ b/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_lib_base.py @@ -4257,6 +4257,46 @@ class NetAppFileStorageLibraryTestCase(test.TestCase): fake.SHARE, None, [], share_server=None) + (self.mock_dm_session.cleanup_previous_snapmirror_relationships + .assert_not_called()) + self.assertEqual(constants.REPLICA_STATE_IN_SYNC, result) + + def test_update_replica_state_replica_change_to_in_sycn(self): + fake_snapmirror = { + 'mirror-state': 'snapmirrored', + 'relationship-status': 'idle', + 'last-transfer-end-timestamp': '%s' % float(time.time()) + } + # fake SHARE has replica_state set to active already + active_replica = fake.SHARE + out_of_sync_replica = copy.deepcopy(fake.SHARE) + out_of_sync_replica['replica_state'] = ( + constants.REPLICA_STATE_OUT_OF_SYNC) + replica_list = [out_of_sync_replica, active_replica] + vserver_client = mock.Mock() + self.mock_object(vserver_client, 'volume_exists', + mock.Mock(return_value=True)) + self.mock_object(self.library, + '_get_vserver', + mock.Mock(return_value=(fake.VSERVER1, + vserver_client))) + self.mock_dm_session.get_snapmirrors = mock.Mock( + return_value=[fake_snapmirror]) + mock_config = mock.Mock() + mock_config.safe_get = mock.Mock(return_value=0) + self.mock_object(data_motion, 'get_backend_configuration', + mock.Mock(return_value=mock_config)) + self.mock_object(self.library, + '_is_readable_replica', + mock.Mock(return_value=False)) + + result = self.library.update_replica_state( + None, replica_list, out_of_sync_replica, + None, [], share_server=None) + + # Expect a snapmirror cleanup as replica was in out of sync state + (self.mock_dm_session.cleanup_previous_snapmirror_relationships + .assert_called_once_with(out_of_sync_replica, replica_list)) self.assertEqual(constants.REPLICA_STATE_IN_SYNC, result) def test_update_replica_state_backend_volume_absent(self): @@ -4303,6 +4343,8 @@ class NetAppFileStorageLibraryTestCase(test.TestCase): fake.SHARE, None, snapshots, share_server=None) + (self.mock_dm_session.cleanup_previous_snapmirror_relationships + .assert_not_called()) self.assertEqual(constants.REPLICA_STATE_IN_SYNC, result) def test_update_replica_state_missing_snapshot(self): diff --git a/releasenotes/notes/bug-1982808-netapp-fix-snapmirror-snapshots-not-cleaned-up-63cc98cd468adbd1.yaml b/releasenotes/notes/bug-1982808-netapp-fix-snapmirror-snapshots-not-cleaned-up-63cc98cd468adbd1.yaml new file mode 100644 index 0000000000..765b52f78a --- /dev/null +++ b/releasenotes/notes/bug-1982808-netapp-fix-snapmirror-snapshots-not-cleaned-up-63cc98cd468adbd1.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + NetApp driver `bug #1982808 + `_: Fixed issue + preventing the storage system from proper clean up unused SnapMirror + snapshots after a replica promote, significantly increasing the amount + of space consumed in ONTAP volumes by snapshots. +