Speed up custom checkers in ./tools/lint.

The main thing here is to make looping over lines be the inner
loop, instead of looping over rules.  This keeps regexes in
cache, and it also avoids some O(N) checks.

This is a significant speedup for me, reducing time from 16s
to 11s.
This commit is contained in:
Steve Howell
2017-06-20 19:17:00 -04:00
committed by showell
parent ca3b9f3059
commit 572ea370ea

View File

@@ -15,12 +15,12 @@ def build_custom_checkers(by_lang):
def custom_check_file(fn, rules, skip_rules=None, max_length=None): def custom_check_file(fn, rules, skip_rules=None, max_length=None):
# type: (str, RuleList, Optional[Any], Optional[int]) -> bool # type: (str, RuleList, Optional[Any], Optional[int]) -> bool
failed = False failed = False
lineFlag = False
line_tups = []
for i, line in enumerate(open(fn)): for i, line in enumerate(open(fn)):
line_newline_stripped = line.strip('\n') line_newline_stripped = line.strip('\n')
line_fully_stripped = line_newline_stripped.strip() line_fully_stripped = line_newline_stripped.strip()
skip = False skip = False
lineFlag = True
for rule in skip_rules or []: for rule in skip_rules or []:
if re.match(rule, line): if re.match(rule, line):
skip = True skip = True
@@ -28,12 +28,14 @@ def build_custom_checkers(by_lang):
continue continue
if skip: if skip:
continue continue
tup = (i, line, line_newline_stripped, line_fully_stripped)
line_tups.append(tup)
rules_to_apply = []
fn_dirname = os.path.dirname(fn)
for rule in rules: for rule in rules:
exclude_list = rule.get('exclude', set()) exclude_list = rule.get('exclude', set())
if fn in exclude_list or os.path.dirname(fn) in exclude_list: if fn in exclude_list or fn_dirname in exclude_list:
continue
exclude_list = rule.get('exclude_line', set())
if (fn, line_fully_stripped) in exclude_list:
continue continue
if rule.get("include_only"): if rule.get("include_only"):
found = False found = False
@@ -42,6 +44,16 @@ def build_custom_checkers(by_lang):
found = True found = True
if not found: if not found:
continue continue
rules_to_apply.append(rule)
for rule in rules_to_apply:
exclude_list = rule.get('exclude_line', set())
pattern = rule['pattern']
for (i, line, line_newline_stripped, line_fully_stripped) in line_tups:
if (fn, line_fully_stripped) in exclude_list:
continue
try: try:
line_to_check = line_fully_stripped line_to_check = line_fully_stripped
if rule.get('strip') is not None: if rule.get('strip') is not None:
@@ -49,13 +61,16 @@ def build_custom_checkers(by_lang):
line_to_check = line_newline_stripped line_to_check = line_newline_stripped
else: else:
raise Exception("Invalid strip rule") raise Exception("Invalid strip rule")
if re.search(rule['pattern'], line_to_check): if re.search(pattern, line_to_check):
sys.stdout.write(rule['description'] + ' at %s line %s:\n' % (fn, i+1)) sys.stdout.write(rule['description'] + ' at %s line %s:\n' % (fn, i+1))
print(line) print(line)
failed = True failed = True
except Exception: except Exception:
print("Exception with %s at %s line %s" % (rule['pattern'], fn, i+1)) print("Exception with %s at %s line %s" % (rule['pattern'], fn, i+1))
traceback.print_exc() traceback.print_exc()
lastLine = None
for (i, line, line_newline_stripped, line_fully_stripped) in line_tups:
if isinstance(line, bytes): if isinstance(line, bytes):
line_length = len(line.decode("utf-8")) line_length = len(line.decode("utf-8"))
else: else:
@@ -68,9 +83,11 @@ def build_custom_checkers(by_lang):
print("Line too long (%s) at %s line %s: %s" % (len(line), fn, i+1, line_newline_stripped)) print("Line too long (%s) at %s line %s: %s" % (len(line), fn, i+1, line_newline_stripped))
failed = True failed = True
lastLine = line lastLine = line
if lineFlag and '\n' not in lastLine:
if lastLine and ('\n' not in lastLine):
print("No newline at the end of file. Fix with `sed -i '$a\\' %s`" % (fn,)) print("No newline at the end of file. Fix with `sed -i '$a\\' %s`" % (fn,))
failed = True failed = True
return failed return failed
whitespace_rules = [ whitespace_rules = [