fuel-docs/lunr.py

58 lines
1.6 KiB
Python
Executable File

#!/usr/bin/env python
import glob, ntpath, random, json
from bs4 import BeautifulSoup
exclude = ['_build/html/search.html', '_build/html/index.html', '_build/html/index_content.html', '_build/html/contents.html', '_build/html/genindex.html', '_build/html/terminology.html']
files = glob.glob('_build/html/*.html')
for remove in exclude:
if remove in files: files.remove(remove)
doc = []
def formatheading(filename, headings, guide, type):
title = headings.text[:-1]
parent = headings.parent
url = filename + '#' + headings.parent.attrs['id']
if type == 'h2':
for tag in parent.find_all('h2'):
tag.replaceWith('')
for tag in parent.find_all('h3'):
tag.parent.replaceWith('')
else:
for tag in parent.find_all('h3'):
tag.replaceWith('')
for tag in parent.find_all('h4'):
tag.replaceWith('')
body = parent.get_text(" ", strip=True)
return {
"title": title,
"guide": guide,
"url": url,
"body": body.replace('\n', ' ')
}
for file in files:
filename = ntpath.basename(file)
h2 = BeautifulSoup(open(file), 'html.parser')
h3 = BeautifulSoup(open(file), 'html.parser')
for title in h2.findAll('h1'):
guide = title.text[:-1]
for headings in h3.findAll('h3'):
result = formatheading(filename, headings, guide, 'h3')
if result['body']:
doc.append(result)
for headings in h2.findAll('h2'):
result = formatheading(filename, headings, guide, 'h2')
if result['body']:
doc.append(result)
with open('_build/html/_static/data.json', 'w') as outfile:
json.dump(doc, outfile)