From d49fa250e33b5e496f42996cce8e66e7a02b7375 Mon Sep 17 00:00:00 2001 From: Jim Phillips Date: Mon, 14 Sep 2015 11:42:38 -0400 Subject: [PATCH] add lunr.py to parse HTML to create JSON for Lunr search Change-Id: I44aff67391fa6f925b8cf09f787d7ec251faef0c --- Makefile | 7 +- _templates/mirantis/static/search.html | 413 +++++++++++++++++++++++++ common_conf.py | 2 +- lunr.py | 58 ++++ requirements.txt | 1 + 5 files changed, 479 insertions(+), 2 deletions(-) create mode 100644 _templates/mirantis/static/search.html create mode 100755 lunr.py diff --git a/Makefile b/Makefile index f40a8be80..52c824bf5 100644 --- a/Makefile +++ b/Makefile @@ -61,7 +61,7 @@ images: $(PDFs) all: clean html dirhtml singlehtml latexpdf pdf -html: images +rawhtml: images $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." @@ -177,6 +177,11 @@ doctest: @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." +html: rawhtml + ./lunr.py + @echo + @echo "Lunr search index complete" + SPELL = aspell ASPELLOPTS = --dont-backup -d en --personal=.aspell_en.wordlist RSTS := $(shell find pages/ -type f -name '*.rst') diff --git a/_templates/mirantis/static/search.html b/_templates/mirantis/static/search.html new file mode 100644 index 000000000..d29711022 --- /dev/null +++ b/_templates/mirantis/static/search.html @@ -0,0 +1,413 @@ + + + + + + + + + Search — Mirantis OpenStack v6.1 | Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +

Search

+
+ +

+ Please activate JavaScript to enable the search + functionality. +

+
+

+ From here you can search these documents. Enter your search + words into the box below and click "search". Note that the search + function will automatically search for all of the words. Pages + containing fewer words won't appear in the result list. +

+ + +
+
+ +
+ + +
+ +
+
+ +
+ +
+
+
+
+ +
+
+ + + + + + +Top + + + + + + + + + + + \ No newline at end of file diff --git a/common_conf.py b/common_conf.py index 6be563d06..d7bcf8ce9 100644 --- a/common_conf.py +++ b/common_conf.py @@ -197,7 +197,7 @@ html_favicon = '_static/mirantis_icon.ico' # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] -html_extra_path = ['_templates/mirantis/static/index.html'] +html_extra_path = ['_templates/mirantis/static/index.html', '_templates/mirantis/static/search.html'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. diff --git a/lunr.py b/lunr.py new file mode 100755 index 000000000..fb0e06a76 --- /dev/null +++ b/lunr.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +import glob, ntpath, random, json +from bs4 import BeautifulSoup + +exclude = ['_build/html/search.html', '_build/html/index.html', '_build/html/index_content.html', '_build/html/contents.html', '_build/html/genindex.html', '_build/html/terminology.html'] +files = glob.glob('_build/html/*.html') + +for remove in exclude: + if remove in files: files.remove(remove) + +doc = [] + +def formatheading(filename, headings, guide, type): + title = headings.text[:-1] + parent = headings.parent + url = filename + '#' + headings.parent.attrs['id'] + + if type == 'h2': + for tag in parent.find_all('h2'): + tag.replaceWith('') + for tag in parent.find_all('h3'): + tag.parent.replaceWith('') + else: + for tag in parent.find_all('h3'): + tag.replaceWith('') + for tag in parent.find_all('h4'): + tag.replaceWith('') + + body = parent.get_text(" ", strip=True) + + return { + "title": title, + "guide": guide, + "url": url, + "body": body.replace('\n', ' ') + } + +for file in files: + filename = ntpath.basename(file) + h2 = BeautifulSoup(open(file), 'html.parser') + h3 = BeautifulSoup(open(file), 'html.parser') + + for title in h2.findAll('h1'): + guide = title.text[:-1] + + for headings in h3.findAll('h3'): + result = formatheading(filename, headings, guide, 'h3') + if result['body']: + doc.append(result) + + for headings in h2.findAll('h2'): + result = formatheading(filename, headings, guide, 'h2') + if result['body']: + doc.append(result) + +with open('_build/html/_static/data.json', 'w') as outfile: + json.dump(doc, outfile) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1d03b008a..53c4b9cfb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ sphinxcontrib-actdiag sphinxcontrib-seqdiag sphinxcontrib-nwdiag sphinxcontrib-plantuml +beautifulsoup4==4.4.0