css parser: Show line numbers for errors.

This is a fairly major overhaul of the CSS parser to support
line numbers in error messages.

Basically, instead of passing "slices" of tokens around, we pass
indexes into the token arrays to all of our sub-parsers, which
allows them to have access to previous tokens in certain cases.
This is particularly important for errors where stuff is missing
(vs. being wrong).

In testing this out I found a few more places to catch errors.
This commit is contained in:
Steve Howell
2017-02-01 06:31:24 -08:00
committed by Tim Abbott
parent a03d816983
commit 2a07b204bf
3 changed files with 113 additions and 89 deletions

View File

@@ -32,8 +32,12 @@ def check_our_files():
try: try:
validate(fn) validate(fn)
except CssParserException as e: except CssParserException as e:
print('CssParserException raised while parsing file %s' % (fn,)) msg = '''
print(e) ERROR! Some CSS seems to be misformatted.
{}
See line {} in file {}
'''.format(e.msg, e.token.line, fn)
print(msg)
sys.exit(1) sys.exit(1)
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -13,27 +13,36 @@ class Token(object):
self.col = col self.col = col
class CssParserException(Exception): class CssParserException(Exception):
# TODO: Have callers pass in line numbers. def __init__(self, msg, token):
pass # type: (str, Token) -> None
self.msg = msg
self.token = token
def find_end_brace(tokens, i): def __str__(self):
# type: (List[Token], int) -> int # type: () -> str
return self.msg
def find_end_brace(tokens, i, end):
# type: (List[Token], int, int) -> int
depth = 0 depth = 0
while i < len(tokens): while i < end:
s = tokens[i].s s = tokens[i].s
if s == '{': if s == '{':
depth += 1 depth += 1
elif s == '}': elif s == '}':
if depth == 0: if depth == 0:
raise CssParserException('unexpected }') raise CssParserException('unexpected }', tokens[i])
elif depth == 1: elif depth == 1:
break break
depth -= 1 depth -= 1
i += 1 i += 1
else:
raise CssParserException('missing }', tokens[i-1])
return i return i
def get_whitespace_and_comments(tokens, i, line=None): def get_whitespace_and_comments(tokens, i, end, line=None):
# type: (List[Token], int, int) -> Tuple[int, str] # type: (List[Token], int, int, int) -> Tuple[int, str]
def is_fluff_token(token): def is_fluff_token(token):
# type: (Token) -> bool # type: (Token) -> bool
@@ -52,7 +61,7 @@ def get_whitespace_and_comments(tokens, i, line=None):
return False return False
text = '' text = ''
while (i < len(tokens)) and is_fluff_token(tokens[i]): while (i < end) and is_fluff_token(tokens[i]):
s = tokens[i].s s = tokens[i].s
text += s text += s
i += 1 i += 1
@@ -63,27 +72,29 @@ def get_whitespace_and_comments(tokens, i, line=None):
############### Begin parsing here ############### Begin parsing here
def parse_sections(tokens): def parse_sections(tokens, start, end):
# type: (List[Token]) -> CssSectionList # type: (List[Token], int, int) -> CssSectionList
i = 0 i = start
sections = [] sections = []
while i < len(tokens): while i < end:
start, pre_fluff = get_whitespace_and_comments(tokens, i) start, pre_fluff = get_whitespace_and_comments(tokens, i, end)
i = find_end_brace(tokens, start) if start >= end:
raise CssParserException('unexpected empty section', tokens[end-1])
section_tokens = tokens[start:i+1] i = find_end_brace(tokens, start, end)
i, post_fluff = get_whitespace_and_comments(tokens, i+1)
section_end = i + 1
i, post_fluff = get_whitespace_and_comments(tokens, i+1, end)
if section_tokens:
section = parse_section( section = parse_section(
tokens=section_tokens, tokens=tokens,
start=start,
end=section_end,
pre_fluff=pre_fluff, pre_fluff=pre_fluff,
post_fluff=post_fluff post_fluff=post_fluff
) )
sections.append(section) sections.append(section)
else:
raise CssParserException('unexpected empty section')
section_list = CssSectionList( section_list = CssSectionList(
tokens=tokens, tokens=tokens,
@@ -91,16 +102,15 @@ def parse_sections(tokens):
) )
return section_list return section_list
def parse_section(tokens, pre_fluff, post_fluff): def parse_section(tokens, start, end, pre_fluff, post_fluff):
# type: (List[Token], str, str) -> Union[CssNestedSection, CssSection] # type: (List[Token], int, int, str, str) -> Union[CssNestedSection, CssSection]
assert not ws(tokens[0].s) assert not ws(tokens[start].s)
assert tokens[-1].s == '}' # caller should strip trailing fluff assert tokens[end-1].s == '}' # caller should strip trailing fluff
first_token = tokens[0].s first_token = tokens[start].s
if first_token in ('@media', '@keyframes') or first_token.startswith('@-'): if first_token in ('@media', '@keyframes') or first_token.startswith('@-'):
i, selector_list = parse_selectors_section(tokens) # not technically selectors i, selector_list = parse_selectors_section(tokens, start, end) # not technically selectors
body_tokens = tokens[i+1:-1] section_list = parse_sections(tokens, i+1, end-1)
section_list = parse_sections(body_tokens)
nested_section = CssNestedSection( nested_section = CssNestedSection(
tokens=tokens, tokens=tokens,
selector_list=selector_list, selector_list=selector_list,
@@ -110,8 +120,8 @@ def parse_section(tokens, pre_fluff, post_fluff):
) )
return nested_section return nested_section
else: else:
i, selector_list = parse_selectors_section(tokens) i, selector_list = parse_selectors_section(tokens, start, end)
declaration_block = parse_declaration_block(tokens[i:]) declaration_block = parse_declaration_block(tokens, i, end)
section = CssSection( section = CssSection(
tokens=tokens, tokens=tokens,
selector_list=selector_list, selector_list=selector_list,
@@ -121,35 +131,34 @@ def parse_section(tokens, pre_fluff, post_fluff):
) )
return section return section
def parse_selectors_section(tokens): def parse_selectors_section(tokens, start, end):
# type: (List[Token]) -> Tuple[int, CssSelectorList] # type: (List[Token], int, int) -> Tuple[int, CssSelectorList]
start, pre_fluff = get_whitespace_and_comments(tokens, 0) start, pre_fluff = get_whitespace_and_comments(tokens, start, end)
assert pre_fluff == '' assert pre_fluff == ''
i = start i = start
text = '' text = ''
while i < len(tokens) and tokens[i].s != '{': while i < end and tokens[i].s != '{':
s = tokens[i].s s = tokens[i].s
text += s text += s
i += 1 i += 1
selector_list = parse_selectors(tokens[start:i]) selector_list = parse_selectors(tokens, start, i)
return i, selector_list return i, selector_list
def parse_selectors(tokens): def parse_selectors(tokens, start, end):
# type: (List[Token]) -> CssSelectorList # type: (List[Token], int, int) -> CssSelectorList
i = 0 i = start
start = i
selectors = [] selectors = []
while i < len(tokens): while i < end:
s = tokens[i].s s = tokens[i].s
if s == ',': if s == ',':
selector = parse_selector(tokens[start:i]) selector = parse_selector(tokens, start, i)
selectors.append(selector) selectors.append(selector)
i += 1 i += 1
start = i start = i
if s.startswith('/*'): if s.startswith('/*'):
raise CssParserException('Comments in selector section are not allowed') raise CssParserException('Comments in selector section are not allowed', tokens[i])
i += 1 i += 1
selector = parse_selector(tokens[start:i]) selector = parse_selector(tokens, start, i)
selectors.append(selector) selectors.append(selector)
selector_list = CssSelectorList( selector_list = CssSelectorList(
tokens=tokens, tokens=tokens,
@@ -157,12 +166,12 @@ def parse_selectors(tokens):
) )
return selector_list return selector_list
def parse_selector(tokens): def parse_selector(tokens, start, end):
# type: (List[Token]) -> CssSelector # type: (List[Token], int, int) -> CssSelector
i, pre_fluff = get_whitespace_and_comments(tokens, 0) i, pre_fluff = get_whitespace_and_comments(tokens, start, end)
levels = [] levels = []
last_i = None last_i = None
while i < len(tokens): while i < end:
token = tokens[i] token = tokens[i]
i += 1 i += 1
if not ws(token.s[0]): if not ws(token.s[0]):
@@ -170,10 +179,10 @@ def parse_selector(tokens):
levels.append(token) levels.append(token)
if last_i is None: if last_i is None:
raise CssParserException('Missing selector') raise CssParserException('Missing selector', tokens[-1])
assert last_i is not None assert last_i is not None
start, post_fluff = get_whitespace_and_comments(tokens, last_i) start, post_fluff = get_whitespace_and_comments(tokens, last_i, end)
selector = CssSelector( selector = CssSelector(
tokens=tokens, tokens=tokens,
pre_fluff=pre_fluff, pre_fluff=pre_fluff,
@@ -182,21 +191,20 @@ def parse_selector(tokens):
) )
return selector return selector
def parse_declaration_block(tokens): def parse_declaration_block(tokens, start, end):
# type: (List[Token]) -> CssDeclarationBlock # type: (List[Token], int, int) -> CssDeclarationBlock
assert tokens[0].s == '{' # caller should strip leading fluff assert tokens[start].s == '{' # caller should strip leading fluff
assert tokens[-1].s == '}' # caller should strip trailing fluff assert tokens[end-1].s == '}' # caller should strip trailing fluff
tokens = tokens[1:-1] i = start + 1
i = 0
declarations = [] declarations = []
while i < len(tokens): while i < end-1:
start = i start = i
i, _ = get_whitespace_and_comments(tokens, i) i, _ = get_whitespace_and_comments(tokens, i, end)
while (i < len(tokens)) and (tokens[i].s != ';'): while (i < end) and (tokens[i].s != ';'):
i += 1 i += 1
if i < len(tokens): if i < end:
i, _ = get_whitespace_and_comments(tokens, i+1, line=tokens[i].line) i, _ = get_whitespace_and_comments(tokens, i+1, end, line=tokens[i].line)
declaration = parse_declaration(tokens[start:i]) declaration = parse_declaration(tokens, start, i)
declarations.append(declaration) declarations.append(declaration)
declaration_block = CssDeclarationBlock( declaration_block = CssDeclarationBlock(
@@ -205,26 +213,25 @@ def parse_declaration_block(tokens):
) )
return declaration_block return declaration_block
def parse_declaration(tokens): def parse_declaration(tokens, start, end):
# type: (List[Token]) -> CssDeclaration # type: (List[Token], int, int) -> CssDeclaration
i, pre_fluff = get_whitespace_and_comments(tokens, 0) i, pre_fluff = get_whitespace_and_comments(tokens, start, end)
try:
css_property = tokens[i].s
except IndexError:
raise CssParserException('Empty declaration')
if (i >= end) or (tokens[i].s == '}'):
raise CssParserException('Empty declaration or missing semicolon', tokens[i-1])
css_property = tokens[i].s
if tokens[i+1].s != ':': if tokens[i+1].s != ':':
# print(css_property) raise CssParserException('We expect a colon here', tokens[i])
raise CssParserException('We expect a colon here')
i += 2 i += 2
start = i start = i
while (i < len(tokens)) and (tokens[i].s != ';'): while (i < end) and (tokens[i].s != ';') and (tokens[i].s != '}'):
i += 1 i += 1
css_value = parse_value(tokens[start:i]) css_value = parse_value(tokens, start, i)
semicolon = (i < len(tokens)) and (tokens[i].s == ';') semicolon = (i < end) and (tokens[i].s == ';')
if semicolon: if semicolon:
i += 1 i += 1
_, post_fluff = get_whitespace_and_comments(tokens, i) _, post_fluff = get_whitespace_and_comments(tokens, i, end)
declaration = CssDeclaration( declaration = CssDeclaration(
tokens=tokens, tokens=tokens,
pre_fluff=pre_fluff, pre_fluff=pre_fluff,
@@ -235,11 +242,14 @@ def parse_declaration(tokens):
) )
return declaration return declaration
def parse_value(tokens): def parse_value(tokens, start, end):
# type: (List[Token]) -> CssValue # type: (List[Token], int, int) -> CssValue
i, pre_fluff = get_whitespace_and_comments(tokens, 0) i, pre_fluff = get_whitespace_and_comments(tokens, start, end)
if i < end:
value = tokens[i] value = tokens[i]
i, post_fluff = get_whitespace_and_comments(tokens, i+1) else:
raise CssParserException('Missing value', tokens[i-1])
i, post_fluff = get_whitespace_and_comments(tokens, i+1, end)
return CssValue( return CssValue(
tokens=tokens, tokens=tokens,
value=value, value=value,
@@ -377,7 +387,7 @@ class CssValue(object):
def parse(text): def parse(text):
# type: (str) -> CssSectionList # type: (str) -> CssSectionList
tokens = tokenize(text) tokens = tokenize(text)
section_list = parse_sections(tokens=tokens) section_list = parse_sections(tokens, 0, len(tokens))
return section_list return section_list
#### Begin tokenizer section here #### Begin tokenizer section here
@@ -469,7 +479,7 @@ def tokenize(text):
while (state.i < len(text)) and not looking_at('*/'): while (state.i < len(text)) and not looking_at('*/'):
state.i += 1 state.i += 1
if not looking_at('*/'): if not looking_at('*/'):
raise CssParserException('unclosed comment') raise CssParserException('unclosed comment', tokens[-1])
s = text[old_i:state.i+2] s = text[old_i:state.i+2]
state.i = old_i state.i = old_i

View File

@@ -193,6 +193,16 @@ class ParserTestSadPath(unittest.TestCase):
error = 'Missing selector' error = 'Missing selector'
self._assert_error(my_css, error) self._assert_error(my_css, error)
def test_missing_value(self):
# type: () -> None
my_css = '''
h1
{
bottom:
}'''
error = 'Missing value'
self._assert_error(my_css, error)
def test_disallow_comments_in_selectors(self): def test_disallow_comments_in_selectors(self):
# type: () -> None # type: () -> None
my_css = ''' my_css = '''