mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-03 21:43:21 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			93 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			93 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from dataclasses import dataclass
 | 
						|
from datetime import timedelta
 | 
						|
from typing import Any, Literal
 | 
						|
 | 
						|
from django.utils.timezone import now as timezone_now
 | 
						|
from typing_extensions import override
 | 
						|
 | 
						|
from analytics.lib.counts import ALL_COUNT_STATS, CountStat
 | 
						|
from analytics.models import installation_epoch
 | 
						|
from scripts.lib.zulip_tools import atomic_nagios_write
 | 
						|
from zerver.lib.management import ZulipBaseCommand
 | 
						|
from zerver.lib.timestamp import TimeZoneNotUTCError, floor_to_day, floor_to_hour, verify_UTC
 | 
						|
from zerver.models import Realm
 | 
						|
 | 
						|
states = {
 | 
						|
    0: "OK",
 | 
						|
    1: "WARNING",
 | 
						|
    2: "CRITICAL",
 | 
						|
    3: "UNKNOWN",
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
@dataclass
 | 
						|
class NagiosResult:
 | 
						|
    status: Literal["ok", "warning", "critical", "unknown"]
 | 
						|
    message: str
 | 
						|
 | 
						|
 | 
						|
class Command(ZulipBaseCommand):
 | 
						|
    help = """Checks FillState table.
 | 
						|
 | 
						|
    Run as a cron job that runs every hour."""
 | 
						|
 | 
						|
    @override
 | 
						|
    def handle(self, *args: Any, **options: Any) -> None:
 | 
						|
        fill_state = self.get_fill_state()
 | 
						|
        atomic_nagios_write("check-analytics-state", fill_state.status, fill_state.message)
 | 
						|
 | 
						|
    def get_fill_state(self) -> NagiosResult:
 | 
						|
        if not Realm.objects.exists():
 | 
						|
            return NagiosResult(status="ok", message="No realms exist, so not checking FillState.")
 | 
						|
 | 
						|
        warning_unfilled_properties = []
 | 
						|
        critical_unfilled_properties = []
 | 
						|
        for property, stat in ALL_COUNT_STATS.items():
 | 
						|
            last_fill = stat.last_successful_fill()
 | 
						|
            if last_fill is None:
 | 
						|
                last_fill = installation_epoch()
 | 
						|
            try:
 | 
						|
                verify_UTC(last_fill)
 | 
						|
            except TimeZoneNotUTCError:
 | 
						|
                return NagiosResult(
 | 
						|
                    status="critical", message=f"FillState not in UTC for {property}"
 | 
						|
                )
 | 
						|
 | 
						|
            if stat.frequency == CountStat.DAY:
 | 
						|
                floor_function = floor_to_day
 | 
						|
                warning_threshold = timedelta(hours=26)
 | 
						|
                critical_threshold = timedelta(hours=50)
 | 
						|
            else:  # CountStat.HOUR
 | 
						|
                floor_function = floor_to_hour
 | 
						|
                warning_threshold = timedelta(minutes=90)
 | 
						|
                critical_threshold = timedelta(minutes=150)
 | 
						|
 | 
						|
            if floor_function(last_fill) != last_fill:
 | 
						|
                return NagiosResult(
 | 
						|
                    status="critical",
 | 
						|
                    message=f"FillState not on {stat.frequency} boundary for {property}",
 | 
						|
                )
 | 
						|
 | 
						|
            time_to_last_fill = timezone_now() - last_fill
 | 
						|
            if time_to_last_fill > critical_threshold:
 | 
						|
                critical_unfilled_properties.append(property)
 | 
						|
            elif time_to_last_fill > warning_threshold:
 | 
						|
                warning_unfilled_properties.append(property)
 | 
						|
 | 
						|
        if len(critical_unfilled_properties) == 0 and len(warning_unfilled_properties) == 0:
 | 
						|
            return NagiosResult(status="ok", message="FillState looks fine.")
 | 
						|
        if len(critical_unfilled_properties) == 0:
 | 
						|
            return NagiosResult(
 | 
						|
                status="warning",
 | 
						|
                message="Missed filling {} once.".format(
 | 
						|
                    ", ".join(warning_unfilled_properties),
 | 
						|
                ),
 | 
						|
            )
 | 
						|
        return NagiosResult(
 | 
						|
            status="critical",
 | 
						|
            message="Missed filling {} once. Missed filling {} at least twice.".format(
 | 
						|
                ", ".join(warning_unfilled_properties),
 | 
						|
                ", ".join(critical_unfilled_properties),
 | 
						|
            ),
 | 
						|
        )
 |