From 8fc4b88519ddd4face0fe095627a0055cff32ba9 Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Wed, 15 Apr 2015 13:39:03 -0700 Subject: [PATCH] Skip tarball and just gzip the archive file. It's just a single file, so it makes sense to just gzip it. Change-Id: I0103269a9e365fdbcb30c1389cf6f3eabfce1a23 --- setup.cfg | 2 +- shoebox/roll_manager.py | 27 +++++++++++++-------------- test/integration/test_json_tarball.py | 18 ++++++------------ test/test_roll_manager.py | 15 ++++++++++----- 4 files changed, 30 insertions(+), 32 deletions(-) diff --git a/setup.cfg b/setup.cfg index c1d9db4..6bf4ce2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = shoebox -version = 0.6 +version = 0.7 author = Dark Secret Software Inc. author-email = admin@darksecretsoftware.com summary = data archiving library diff --git a/shoebox/roll_manager.py b/shoebox/roll_manager.py index 151f9c4..9356a14 100644 --- a/shoebox/roll_manager.py +++ b/shoebox/roll_manager.py @@ -15,10 +15,10 @@ import datetime import fnmatch +import gzip import hashlib import os import os.path -import tarfile import notification_utils @@ -144,16 +144,16 @@ class WritingRollManager(RollManager): class WritingJSONRollManager(object): """No archiver. No roll checker. Just write 1 file line per json payload. - Once the file gets big enough, .tar.gz the file and move + Once the file gets big enough, gzip the file and move into the destination_directory. Expects an external tool like rsync to move the file. - A SHA-256 of the payload may be included in the tarball filename.""" + A SHA-256 of the payload may be included in the archive filename.""" def __init__(self, *args, **kwargs): self.filename_template = args[0] self.directory = kwargs.get('directory', '.') self.destination_directory = kwargs.get('destination_directory', '.') self.roll_size_mb = int(kwargs.get('roll_size_mb', 1000)) - minutes = kwargs.get('roll_minutes', 15) + minutes = kwargs.get('roll_minutes', 60) self.roll_after = datetime.timedelta(minutes=minutes) # Look in the working directory for any files. Move them to the @@ -205,17 +205,16 @@ class WritingJSONRollManager(object): f.close() return sha256.hexdigest() - def _tar_working_file(self, filename): - # tar all the files in working directory into an archive - # in destination_directory. + def _gzip_working_file(self, filename): + # gzip the working file in the destination_directory. crc = self._get_file_sha(filename) - # No contextmgr for tarfile in 2.6 :( - fn = self._make_filename(crc, self.destination_directory) + ".tar.gz" - tar = tarfile.open(fn, "w:gz") - just_name = os.path.basename(filename) - tar.add(filename, arcname=just_name) - tar.close() + fn = self._make_filename(crc, self.destination_directory) + ".gz" + + with open(filename, 'r') as file_in: + file_out = gzip.open(fn, 'wb') + file_out.writelines(file_in) + file_out.close() def _clean_working_directory(self, filename): os.remove(filename) @@ -223,7 +222,7 @@ class WritingJSONRollManager(object): def _do_roll(self, filename): self.close() - self._tar_working_file(filename) + self._gzip_working_file(filename) self._clean_working_directory(filename) def write(self, metadata, json_payload): diff --git a/test/integration/test_json_tarball.py b/test/integration/test_json_tarball.py index 54ba4e1..9df7d24 100644 --- a/test/integration/test_json_tarball.py +++ b/test/integration/test_json_tarball.py @@ -1,10 +1,10 @@ import datetime +import gzip import hashlib import json import mock import os import shutil -import tarfile import unittest import notification_utils @@ -59,20 +59,14 @@ class TestDirectory(unittest.TestCase): if os.path.isfile(full): self.fail("Working directory not empty.") - # Extract the tarballs ... + # Read the gzip files ... total = 0 for f in os.listdir(DESTDIR): - tar = tarfile.open(os.path.join(DESTDIR, f), "r:gz") - names = tar.getnames() - tar.extractall(path=EXTRACTDIR) - tar.close() + archive = gzip.open(os.path.join(DESTDIR, f), 'rb') + file_content = archive.read().split('\n') + archive.close() - for item in names: - full = os.path.join(EXTRACTDIR, item) - num = 0 - with open(full, "r") as handle: - for line in handle: - num += 1 + num = len(file_content) - 1 total += num print "In %s: %d of %d Remaining: %d" % (f, num, actual, actual - total) diff --git a/test/test_roll_manager.py b/test/test_roll_manager.py index 3be2c0d..2db62d3 100644 --- a/test/test_roll_manager.py +++ b/test/test_roll_manager.py @@ -144,15 +144,20 @@ class TestJSONRollManager(unittest.TestCase): @mock.patch( "shoebox.roll_manager.WritingJSONRollManager._archive_working_files") - def test_tar_working_file(self, awf): + def test_gzip_working_file(self, awf): rm = roll_manager.WritingJSONRollManager("template.foo") with mock.patch.object(rm, "_get_file_sha") as gfs: gfs.return_value = "aabbcc" - with mock.patch.object(roll_manager.tarfile, 'open') as tar: - tar.return_value = mock.MagicMock() - rm._tar_working_file("foo") - self.assertTrue(tar.called) + + open_name = '%s.open' % roll_manager.__name__ + with mock.patch(open_name, create=True) as mock_open: + handle = mock.MagicMock() + mock_open.return_value = handle + with mock.patch.object(roll_manager.gzip, 'open') as gzip: + gzip.return_value = mock.MagicMock() + rm._gzip_working_file("foo") + self.assertTrue(gzip.called) @mock.patch( "shoebox.roll_manager.WritingJSONRollManager._archive_working_files")