Dedup index entries
When we generate indexes we query disk and swift, if any files have overlapping paths between them we were generating duplicate index entries for the same paths. This happened because we used lists to store the entries which allow for duplicates. Fix this by using sets until we need to sort (sets are unsorted so we have to have a list at that point). This will remove any duplicates and make the index pages more correct. Change-Id: I6dfa3b30819d6633c3e483d3a386bdce3e26b572
This commit is contained in:
parent
082d0b942a
commit
a6fa3bfd2d
|
@ -210,24 +210,26 @@ class IndexIterableBuffer(collections.Iterable):
|
||||||
self.file_headers = {}
|
self.file_headers = {}
|
||||||
self.file_headers['Content-type'] = 'text/html'
|
self.file_headers['Content-type'] = 'text/html'
|
||||||
|
|
||||||
|
# Use sets here to dedup. We can have duplicates
|
||||||
|
# if disk and swift based paths have overlap.
|
||||||
|
file_set = self.disk_list() | self.swift_list()
|
||||||
# file_list is a list of tuples (relpath, name)
|
# file_list is a list of tuples (relpath, name)
|
||||||
self.file_list = self.disk_list() + self.swift_list()
|
self.file_list = sorted(file_set, key=lambda tup: tup[0])
|
||||||
self.file_list = sorted(self.file_list, key=lambda tup: tup[0])
|
|
||||||
|
|
||||||
def disk_list(self):
|
def disk_list(self):
|
||||||
file_list = []
|
file_set = set()
|
||||||
if os.path.isdir(self.logpath):
|
if os.path.isdir(self.logpath):
|
||||||
for f in os.listdir(self.logpath):
|
for f in os.listdir(self.logpath):
|
||||||
if os.path.isdir(os.path.join(self.logpath, f)):
|
if os.path.isdir(os.path.join(self.logpath, f)):
|
||||||
f = f + '/' if f[-1] != '/' else f
|
f = f + '/' if f[-1] != '/' else f
|
||||||
file_list.append((
|
file_set.add((
|
||||||
os.path.join('/', self.logname, f),
|
os.path.join('/', self.logname, f),
|
||||||
f
|
f
|
||||||
))
|
))
|
||||||
return file_list
|
return file_set
|
||||||
|
|
||||||
def swift_list(self):
|
def swift_list(self):
|
||||||
file_list = []
|
file_set = set()
|
||||||
if self.config.has_section('swift'):
|
if self.config.has_section('swift'):
|
||||||
try:
|
try:
|
||||||
swift_config = dict(self.config.items('swift'))
|
swift_config = dict(self.config.items('swift'))
|
||||||
|
@ -246,7 +248,7 @@ class IndexIterableBuffer(collections.Iterable):
|
||||||
fname
|
fname
|
||||||
else:
|
else:
|
||||||
fname = os.path.relpath(f['name'], self.logname)
|
fname = os.path.relpath(f['name'], self.logname)
|
||||||
file_list.append((
|
file_set.add((
|
||||||
os.path.join('/', self.logname, fname),
|
os.path.join('/', self.logname, fname),
|
||||||
fname
|
fname
|
||||||
))
|
))
|
||||||
|
@ -256,7 +258,7 @@ class IndexIterableBuffer(collections.Iterable):
|
||||||
sys.stderr.write('logname: %s\n' % self.logname)
|
sys.stderr.write('logname: %s\n' % self.logname)
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
return file_list
|
return file_set
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
env = jinja2.Environment(
|
env = jinja2.Environment(
|
||||||
|
@ -299,13 +301,13 @@ def get_file_generator(environ, root_path, config=None):
|
||||||
os.path.join(logname, 'index.html'), config)
|
os.path.join(logname, 'index.html'), config)
|
||||||
|
|
||||||
if not file_generator or not file_generator.obj:
|
if not file_generator or not file_generator.obj:
|
||||||
if config.has_section('general'):
|
if (config.has_section('general') and
|
||||||
if config.has_option('general', 'generate_folder_index'):
|
config.has_option('general', 'generate_folder_index') and
|
||||||
if config.getboolean('general', 'generate_folder_index'):
|
config.getboolean('general', 'generate_folder_index')):
|
||||||
index_generator = IndexIterableBuffer(logname, logpath,
|
index_generator = IndexIterableBuffer(logname, logpath,
|
||||||
config)
|
config)
|
||||||
if len(index_generator.file_list) > 0:
|
if len(index_generator.file_list) > 0:
|
||||||
return index_generator
|
return index_generator
|
||||||
raise NoSuchFile()
|
raise NoSuchFile()
|
||||||
|
|
||||||
return file_generator
|
return file_generator
|
||||||
|
|
Loading…
Reference in New Issue