Optionally reduce memory footprint
This patch adds an option to reduce program's memory footprint during runtime and updates README file accordingly. Instead of loading all contents into memory and then sorting the entries, it will go entry by entry saving the oldest one. Memory reduced footprint operation increases time to merge logs by approximately 25%. Option is `-m` or `--min-memory`.
This commit is contained in:
parent
c0e26d3789
commit
8459bad993
21
README.rst
21
README.rst
|
@ -18,9 +18,18 @@ Limitations
|
|||
This tool is not able to properly (or meaningfully) merge logs if your servers
|
||||
are not time synced to a common time source.
|
||||
|
||||
This is a naive implementation, not smart at all, instead of runtime comparing
|
||||
input dates as they come from log files, we create a big memory list with
|
||||
all log lines, sort them, and spite them out. This can be improved.
|
||||
By default os-log-merger uses a memory hogging implementation because it
|
||||
provides a considerable time reduction to complete the merging. This
|
||||
implementation loads all file contents in memory and then sorts and then
|
||||
proceeds to output merged result.
|
||||
|
||||
For operation on memory constrained systems and with log files of considerable
|
||||
sizes os-log-merger can operate on a memory conservative mode where log entries
|
||||
will be read from files one by one and sorted as they come.
|
||||
|
||||
This memory reduction has an impact on processing speed, and will increase the
|
||||
time to process the files by 25%.
|
||||
|
||||
|
||||
How to install
|
||||
~~~~~~~~~~~~~~
|
||||
|
@ -52,3 +61,9 @@ The previous example would produce something like this::
|
|||
|
||||
References to http url files instead of local files is also supported. Files
|
||||
will be cached locally to avoid re-downloading on next runs.
|
||||
|
||||
Limit memory usage
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
We can disabled default speed optimized operation for those case were we want
|
||||
to favor a small memory footprint by using option `-m` (`--min-memory`).
|
||||
|
|
|
@ -127,19 +127,25 @@ class OpenStackLog:
|
|||
except IndexError:
|
||||
return None
|
||||
|
||||
def log_entries(self):
|
||||
entry = None
|
||||
def __iter__(self):
|
||||
self.entry = None
|
||||
self.next_entry = None
|
||||
return self
|
||||
|
||||
def _readline(self, entry):
|
||||
while True:
|
||||
line = self._file.readline()
|
||||
if line == "":
|
||||
break
|
||||
return entry, None
|
||||
|
||||
try:
|
||||
new_entry = self._extract_with_date(line)
|
||||
if new_entry is None:
|
||||
continue
|
||||
if entry:
|
||||
yield entry
|
||||
return entry, new_entry
|
||||
entry = new_entry
|
||||
|
||||
except ValueError:
|
||||
# it's a non-dated line, just append to the entry
|
||||
# extra info
|
||||
|
@ -148,33 +154,86 @@ class OpenStackLog:
|
|||
entry = (date_object, filename, pid, level,
|
||||
rest + EXTRALINES_PADDING + line)
|
||||
|
||||
if entry:
|
||||
yield entry
|
||||
def __next__(self):
|
||||
return self.next()
|
||||
|
||||
def next(self):
|
||||
self.entry, self.next_entry = self._readline(self.next_entry)
|
||||
if self.entry is None:
|
||||
raise StopIteration()
|
||||
return self.entry
|
||||
|
||||
def peek(self):
|
||||
return self.entry
|
||||
|
||||
def __cmp__(self, other):
|
||||
if other.peek() is None or self.peek() is None:
|
||||
if self.peek() is None:
|
||||
return 0 if other.peek() is None else 1
|
||||
return -1
|
||||
|
||||
if (other.peek() or self.peek()) is None:
|
||||
return 0 if self.peek() is None else -1
|
||||
return cmp(self.peek()[0], other.peek()[0])
|
||||
|
||||
|
||||
def process_logs_limit_memory_usage(logs):
|
||||
oslogs = [iter(log) for log in logs]
|
||||
|
||||
for log in oslogs:
|
||||
next(log)
|
||||
|
||||
while True:
|
||||
entry = min(oslogs)
|
||||
result = entry.peek()
|
||||
if result is None:
|
||||
break
|
||||
yield result
|
||||
try:
|
||||
next(entry)
|
||||
except StopIteration:
|
||||
# We don't need to remove the entry, since the code works with
|
||||
# files that have reached the end, but there is no point in keep
|
||||
# checking a file that has already reached the EOF.
|
||||
oslogs.remove(entry)
|
||||
if not oslogs:
|
||||
break
|
||||
|
||||
|
||||
def process_logs_memory_hog(logs):
|
||||
all_entries = []
|
||||
# read all the logs
|
||||
for log in logs:
|
||||
for entry in log:
|
||||
all_entries.append(entry)
|
||||
|
||||
sorted_entries = sorted(all_entries, key=lambda log_entry: log_entry[0])
|
||||
for entry in sorted_entries:
|
||||
yield entry
|
||||
|
||||
|
||||
def process_logs(cfg):
|
||||
all_entries = []
|
||||
filename_alias = {}
|
||||
logs = []
|
||||
for filename in cfg.logfiles:
|
||||
path, alias, is_url = get_path_and_alias(filename,
|
||||
cfg.log_base,
|
||||
cfg.log_postfix)
|
||||
filename_alias[path] = (filename, alias, is_url)
|
||||
|
||||
# read the log
|
||||
oslog = OpenStackLog(path)
|
||||
for entry in oslog.log_entries():
|
||||
all_entries.append(entry)
|
||||
logs.append(OpenStackLog(path))
|
||||
|
||||
alias = generate_aliases(filename_alias, cfg)
|
||||
|
||||
sorted_entries = sorted(all_entries, key=lambda log_entry: log_entry[0])
|
||||
for entry in sorted_entries:
|
||||
if cfg.limit_memory:
|
||||
method = process_logs_limit_memory_usage
|
||||
else:
|
||||
method = process_logs_memory_hog
|
||||
|
||||
for entry in method(logs):
|
||||
(date_object, filename, pid, level, rest) = entry
|
||||
print (' '.join(
|
||||
[date_object.strftime("%Y-%m-%d %H:%M:%S.%f"),
|
||||
'[%s]' % alias[filename], pid,
|
||||
level, rest]).rstrip('\n'))
|
||||
print (' '.join([date_object.strftime("%Y-%m-%d %H:%M:%S.%f"),
|
||||
'[%s]' % alias[filename],
|
||||
pid, level, rest]).rstrip('\n'))
|
||||
|
||||
|
||||
def get_path_and_alias(filename, log_base, log_postfix):
|
||||
|
@ -397,6 +456,9 @@ one has not been provided:'
|
|||
parser.add_argument('--alias-level', '-a', type=int, default=0,
|
||||
dest='alias_level',
|
||||
help='Level of smart alias naming (0-3)')
|
||||
parser.add_argument('--min-memory', '-m', default=False,
|
||||
action='store_true', dest='limit_memory',
|
||||
help='Limit memory usage')
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
|
Loading…
Reference in New Issue