Factor out HtmlTreeBranch and related code from template parser.

This code is not directly related to the template parser, so it
can safely live in its own file.

The only significant change to the code is to the signature of
`html_branches` so that it can be called without requiring a file.
Since it's only used in html_grep, that has been updated to reflect
this change.

Fixes: #1774.
This commit is contained in:
Gordon P. Hemsley
2016-09-11 14:23:29 -04:00
parent 4d3350bd7b
commit 331617efab
5 changed files with 269 additions and 204 deletions

View File

@@ -3,14 +3,14 @@ from __future__ import print_function
from collections import defaultdict
from six.moves import range
from .template_parser import html_branches, Token, HtmlTreeBranch
from .html_branches import html_branches, HtmlTreeBranch
def show_all_branches(fns):
# type: (List[str]) -> None
for fn in fns:
print(fn)
text = open(fn).read()
branches = html_branches(text)
branches = html_branches(text, fn=fn)
for branch in branches:
print(branch.text())
print('---')
@@ -27,7 +27,8 @@ class Grepper(object):
all_branches = [] # type: List[HtmlTreeBranch]
for fn in fns:
branches = html_branches(fn)
text = open(fn).read()
branches = html_branches(text, fn=fn)
all_branches += branches
self.word_dict = defaultdict(set) # type: Dict[str, Set[HtmlTreeBranch]]