mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 05:53:43 +00:00 
			
		
		
		
	log-search: Allow multiple search terms.
This allows AND'ing multiple terms together.
This commit is contained in:
		
				
					committed by
					
						
						Tim Abbott
					
				
			
			
				
	
			
			
			
						parent
						
							bd73e7d411
						
					
				
				
					commit
					258b658cc0
				
			@@ -8,7 +8,7 @@ import re
 | 
			
		||||
import signal
 | 
			
		||||
import sys
 | 
			
		||||
from enum import Enum, auto
 | 
			
		||||
from typing import Callable, List, TextIO, Tuple
 | 
			
		||||
from typing import List, Set, TextIO, Tuple
 | 
			
		||||
 | 
			
		||||
ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 | 
			
		||||
sys.path.append(ZULIP_PATH)
 | 
			
		||||
@@ -20,6 +20,7 @@ setup_path()
 | 
			
		||||
os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings"
 | 
			
		||||
 | 
			
		||||
from django.conf import settings
 | 
			
		||||
from typing_extensions import Protocol
 | 
			
		||||
 | 
			
		||||
from scripts.lib.zulip_tools import BOLD, CYAN, ENDC, FAIL, GRAY, OKBLUE
 | 
			
		||||
 | 
			
		||||
@@ -58,7 +59,9 @@ def parser() -> argparse.ArgumentParser:
 | 
			
		||||
 | 
			
		||||
    filtering = parser.add_argument_group("Filtering")
 | 
			
		||||
    filtering.add_argument(
 | 
			
		||||
        "filter", help="IP address, hostname, user-id, path, or status code to search for"
 | 
			
		||||
        "filter_terms",
 | 
			
		||||
        help="IP address, hostname, user-id, path, or status code to search for; multiple are AND'ed together",
 | 
			
		||||
        nargs="+",
 | 
			
		||||
    )
 | 
			
		||||
    filtering.add_argument(
 | 
			
		||||
        "--all-lines",
 | 
			
		||||
@@ -161,10 +164,17 @@ class FilterType(Enum):
 | 
			
		||||
    STATUS = auto()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class FilterFunc(Protocol):
 | 
			
		||||
    def __call__(
 | 
			
		||||
        self, m: re.Match, t: str = ...  # type: ignore[type-arg]  # Requires Python 3.9
 | 
			
		||||
    ) -> bool:
 | 
			
		||||
        ...
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main() -> None:
 | 
			
		||||
    args = parser().parse_args()
 | 
			
		||||
 | 
			
		||||
    (filter_type, filter_func) = parse_filters(args)
 | 
			
		||||
    (filter_types, filter_funcs) = parse_filters(args)
 | 
			
		||||
    logfile_names = parse_logfile_names(args)
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
@@ -173,7 +183,8 @@ def main() -> None:
 | 
			
		||||
                for logline in logfile:
 | 
			
		||||
                    # As a performance optimization, just do a substring
 | 
			
		||||
                    # check before we parse the line fully
 | 
			
		||||
                    if args.filter not in logline.lower():
 | 
			
		||||
                    lowered = logline.lower()
 | 
			
		||||
                    if not all(f in lowered for f in args.filter_terms):
 | 
			
		||||
                        continue
 | 
			
		||||
 | 
			
		||||
                    if args.nginx:
 | 
			
		||||
@@ -185,11 +196,11 @@ def main() -> None:
 | 
			
		||||
                        if args.nginx:
 | 
			
		||||
                            print(f"! Failed to parse:\n{logline}", file=sys.stderr)
 | 
			
		||||
                        continue
 | 
			
		||||
                    if passes_filters(filter_func, match, args):
 | 
			
		||||
                    if passes_filters(filter_funcs, match, args):
 | 
			
		||||
                        print_line(
 | 
			
		||||
                            match,
 | 
			
		||||
                            args,
 | 
			
		||||
                            filter_type=filter_type,
 | 
			
		||||
                            filter_types=filter_types,
 | 
			
		||||
                        )
 | 
			
		||||
    except KeyboardInterrupt:
 | 
			
		||||
        sys.exit(signal.SIGINT + 128)
 | 
			
		||||
@@ -228,67 +239,79 @@ def parse_logfile_names(args: argparse.Namespace) -> List[str]:
 | 
			
		||||
 | 
			
		||||
def parse_filters(
 | 
			
		||||
    args: argparse.Namespace,
 | 
			
		||||
) -> Tuple[FilterType, Callable[[re.Match], bool]]:  # type: ignore[type-arg]  # Requires Python 3.9
 | 
			
		||||
) -> Tuple[Set[FilterType], List[FilterFunc]]:
 | 
			
		||||
    # The heuristics below are not intended to be precise -- they
 | 
			
		||||
    # certainly count things as "IPv4" or "IPv6" addresses that are
 | 
			
		||||
    # invalid.  However, we expect the input here to already be
 | 
			
		||||
    # reasonably well-formed.
 | 
			
		||||
 | 
			
		||||
    filter = args.filter
 | 
			
		||||
    filter_types = set()
 | 
			
		||||
    filter_funcs = []
 | 
			
		||||
    filter_terms = []
 | 
			
		||||
 | 
			
		||||
    if re.match(r"[1-5][0-9][0-9]$", filter):
 | 
			
		||||
        filter_func = lambda m: m["code"] == filter
 | 
			
		||||
        filter_type = FilterType.STATUS
 | 
			
		||||
        if not args.nginx and filter == "502":
 | 
			
		||||
            logging.warning("Adding --nginx -- 502's do not appear in Django logs.")
 | 
			
		||||
            args.nginx = True
 | 
			
		||||
    elif re.match(r"[1-5]xx$", filter):
 | 
			
		||||
        filter = filter[0]
 | 
			
		||||
        filter_func = lambda m: m["code"].startswith(filter)
 | 
			
		||||
        filter_type = FilterType.STATUS
 | 
			
		||||
    elif re.match(r"\d+$", filter):
 | 
			
		||||
        if args.nginx:
 | 
			
		||||
            raise parser().error("Cannot parse user-ids with nginx logs; try without --nginx")
 | 
			
		||||
        filter_func = lambda m: m["user_id"] == filter
 | 
			
		||||
        filter_type = FilterType.USER_ID
 | 
			
		||||
    elif re.match(r"\d{1,3}(\.\d{1,3}){3}$", filter):
 | 
			
		||||
        filter_func = lambda m: m["ip"] == filter
 | 
			
		||||
        filter_type = FilterType.CLIENT_IP
 | 
			
		||||
    elif re.match(r"([a-f0-9:]+:+){1,7}[a-f0-9]+$", filter):
 | 
			
		||||
        filter_func = lambda m: m["ip"] == filter
 | 
			
		||||
        filter_type = FilterType.CLIENT_IP
 | 
			
		||||
    elif re.match(r"[a-z0-9]([a-z0-9-]*[a-z0-9])?$", filter.lower()):
 | 
			
		||||
        filter = filter.lower()
 | 
			
		||||
        if args.nginx:
 | 
			
		||||
            filter_func = lambda m: m["hostname"].startswith(filter + ".")
 | 
			
		||||
    for filter_term in args.filter_terms:
 | 
			
		||||
        if re.match(r"[1-5][0-9][0-9]$", filter_term):
 | 
			
		||||
            filter_func = lambda m, t=filter_term: m["code"] == t
 | 
			
		||||
            filter_type = FilterType.STATUS
 | 
			
		||||
            if not args.nginx and filter_term == "502":
 | 
			
		||||
                logging.warning("Adding --nginx -- 502's do not appear in Django logs.")
 | 
			
		||||
                args.nginx = True
 | 
			
		||||
        elif re.match(r"[1-5]xx$", filter_term):
 | 
			
		||||
            filter_term = filter_term[0]
 | 
			
		||||
            filter_func = lambda m, t=filter_term: m["code"].startswith(t)
 | 
			
		||||
            filter_type = FilterType.STATUS
 | 
			
		||||
        elif re.match(r"\d+$", filter_term):
 | 
			
		||||
            if args.nginx:
 | 
			
		||||
                raise parser().error("Cannot parse user-ids with nginx logs; try without --nginx")
 | 
			
		||||
            filter_func = lambda m, t=filter_term: m["user_id"] == t
 | 
			
		||||
            filter_type = FilterType.USER_ID
 | 
			
		||||
        elif re.match(r"\d{1,3}(\.\d{1,3}){3}$", filter_term):
 | 
			
		||||
            filter_func = lambda m, t=filter_term: m["ip"] == t
 | 
			
		||||
            filter_type = FilterType.CLIENT_IP
 | 
			
		||||
        elif re.match(r"([a-f0-9:]+:+){1,7}[a-f0-9]+$", filter_term):
 | 
			
		||||
            filter_func = lambda m, t=filter_term: m["ip"] == t
 | 
			
		||||
            filter_type = FilterType.CLIENT_IP
 | 
			
		||||
        elif re.match(r"[a-z0-9]([a-z0-9-]*[a-z0-9])?$", filter_term.lower()):
 | 
			
		||||
            filter_term = filter_term.lower()
 | 
			
		||||
            if args.nginx:
 | 
			
		||||
                filter_func = lambda m, t=filter_term: m["hostname"].startswith(t + ".")
 | 
			
		||||
            else:
 | 
			
		||||
                filter_func = lambda m, t=filter_term: m["hostname"] == t
 | 
			
		||||
            filter_type = FilterType.HOSTNAME
 | 
			
		||||
        elif re.match(r"[a-z0-9-]+(\.[a-z0-9-]+)+$", filter_term.lower()) and re.search(
 | 
			
		||||
            r"[a-z-]", filter_term.lower()
 | 
			
		||||
        ):
 | 
			
		||||
            if not args.nginx:
 | 
			
		||||
                raise parser().error("Cannot parse full domains with Python logs; try --nginx")
 | 
			
		||||
            filter_term = filter_term.lower()
 | 
			
		||||
            filter_func = lambda m, t=filter_term: m["hostname"] == t
 | 
			
		||||
            filter_type = FilterType.HOSTNAME
 | 
			
		||||
        elif re.match(r"/\S*$", filter_term):
 | 
			
		||||
            filter_func = lambda m, t=filter_term: m["path"] == t
 | 
			
		||||
            filter_type = FilterType.PATH
 | 
			
		||||
            args.all_lines = True
 | 
			
		||||
        else:
 | 
			
		||||
            filter_func = lambda m: m["hostname"] == filter
 | 
			
		||||
        filter_type = FilterType.HOSTNAME
 | 
			
		||||
    elif re.match(r"[a-z0-9-]+(\.[a-z0-9-]+)+$", filter.lower()) and re.search(
 | 
			
		||||
        r"[a-z-]", filter.lower()
 | 
			
		||||
    ):
 | 
			
		||||
        if not args.nginx:
 | 
			
		||||
            raise parser().error("Cannot parse full domains with Python logs; try --nginx")
 | 
			
		||||
        filter = filter.lower()
 | 
			
		||||
        filter_func = lambda m: m["hostname"] == filter
 | 
			
		||||
        filter_type = FilterType.HOSTNAME
 | 
			
		||||
    elif re.match(r"/\S*$", filter):
 | 
			
		||||
        filter_func = lambda m: m["path"] == filter
 | 
			
		||||
        filter_type = FilterType.PATH
 | 
			
		||||
        args.all_lines = True
 | 
			
		||||
    else:
 | 
			
		||||
        raise RuntimeError(
 | 
			
		||||
            f"Can't parse {filter} as an IP, hostname, user-id, path, or status code."
 | 
			
		||||
        )
 | 
			
		||||
    return (filter_type, filter_func)
 | 
			
		||||
            raise RuntimeError(
 | 
			
		||||
                f"Can't parse {filter_term} as an IP, hostname, user-id, path, or status code."
 | 
			
		||||
            )
 | 
			
		||||
        if filter_type in filter_types:
 | 
			
		||||
            parser().error("Supplied the same time of value more than once, which cannot match!")
 | 
			
		||||
        filter_types.add(filter_type)
 | 
			
		||||
        filter_funcs.append(filter_func)
 | 
			
		||||
        filter_terms.append(filter_term)
 | 
			
		||||
 | 
			
		||||
    # Push back the modified raw strings, so we can use them for fast substring searches
 | 
			
		||||
    args.filter_terms = filter_terms
 | 
			
		||||
 | 
			
		||||
    return (filter_types, filter_funcs)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def passes_filters(
 | 
			
		||||
    string_filter: Callable[[re.Match], bool],  # type: ignore[type-arg]  # Requires Python 3.9
 | 
			
		||||
    string_filters: List[FilterFunc],
 | 
			
		||||
    match: re.Match,  # type: ignore[type-arg]  # Requires Python 3.9
 | 
			
		||||
    args: argparse.Namespace,
 | 
			
		||||
) -> bool:
 | 
			
		||||
    if not string_filter(match):
 | 
			
		||||
    if not all(f(match) for f in string_filters):
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    if args.all_lines:
 | 
			
		||||
@@ -318,7 +341,7 @@ def passes_filters(
 | 
			
		||||
def print_line(
 | 
			
		||||
    match: re.Match,  # type: ignore[type-arg]  # Requires Python 3.9
 | 
			
		||||
    args: argparse.Namespace,
 | 
			
		||||
    filter_type: FilterType,
 | 
			
		||||
    filter_types: Set[FilterType],
 | 
			
		||||
) -> None:
 | 
			
		||||
    if args.full_line:
 | 
			
		||||
        print(match.group(0))
 | 
			
		||||
@@ -350,7 +373,7 @@ def print_line(
 | 
			
		||||
        indicator = "!"
 | 
			
		||||
        color = FAIL
 | 
			
		||||
    url = f"{BOLD}{match['path']}"
 | 
			
		||||
    if filter_type != FilterType.HOSTNAME:
 | 
			
		||||
    if FilterType.HOSTNAME not in filter_types:
 | 
			
		||||
        hostname = match["hostname"]
 | 
			
		||||
        if hostname is None:
 | 
			
		||||
            hostname = "???." + settings.EXTERNAL_HOST
 | 
			
		||||
@@ -370,8 +393,8 @@ def print_line(
 | 
			
		||||
    parts = [
 | 
			
		||||
        ts,
 | 
			
		||||
        f"{duration:>5}ms",
 | 
			
		||||
        f"{user_id:7}" if not args.nginx and filter_type != FilterType.USER_ID else None,
 | 
			
		||||
        f"{match['ip']:39}" if filter_type != FilterType.CLIENT_IP else None,
 | 
			
		||||
        f"{user_id:7}" if not args.nginx and FilterType.USER_ID not in filter_types else None,
 | 
			
		||||
        f"{match['ip']:39}" if FilterType.CLIENT_IP not in filter_types else None,
 | 
			
		||||
        indicator + match["code"],
 | 
			
		||||
        f"{match['method']:6}",
 | 
			
		||||
        url,
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user