mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-03 21:43:21 +00:00 
			
		
		
		
	analytics: Store realm disk space used as a CountStat.
Fixes #29632. The issue description explains this well: We currently recalculate `currently_used_upload_space_bytes` every file upload, by dint of calling `flush_used_upload_space_cache` on save/delete, and then immediately calling `user_profile.realm.currently_used_upload_space_bytes()` in `notify_attachment_update`. Since this walks the Attachments table, recalculating this can take seconds in large realms. Switch this to using a CountStat, so we don't need to walk significant chunks of the Attachment table when we upload an attachment. This will also give us a historical daily graph of usage.
This commit is contained in:
		
				
					committed by
					
						
						Tim Abbott
					
				
			
			
				
	
			
			
			
						parent
						
							4c4a443002
						
					
				
				
					commit
					9406bfbc0a
				
			@@ -485,6 +485,40 @@ def sql_data_collector(
 | 
				
			|||||||
    return DataCollector(output_table, pull_function)
 | 
					    return DataCollector(output_table, pull_function)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def count_upload_space_used_by_realm_query(realm: Optional[Realm]) -> QueryFn:
 | 
				
			||||||
 | 
					    if realm is None:
 | 
				
			||||||
 | 
					        realm_clause: Composable = SQL("")
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        realm_clause = SQL("zerver_attachment.realm_id = {} AND").format(Literal(realm.id))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Note: This query currently has to go through the entire table,
 | 
				
			||||||
 | 
					    # summing all the sizes of attachments for every realm. This can be improved
 | 
				
			||||||
 | 
					    # by having a query which looks at the latest CountStat for each realm,
 | 
				
			||||||
 | 
					    # and sums it with only the new attachments.
 | 
				
			||||||
 | 
					    # There'd be additional complexity added by the fact that attachments can
 | 
				
			||||||
 | 
					    # also be deleted. Partially this can be accounted for by subtracting
 | 
				
			||||||
 | 
					    # ArchivedAttachment sizes, but there's still the issue of attachments
 | 
				
			||||||
 | 
					    # which can be directly deleted via the API.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return lambda kwargs: SQL(
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					            INSERT INTO analytics_realmcount (realm_id, property, end_time, value)
 | 
				
			||||||
 | 
					            SELECT
 | 
				
			||||||
 | 
					                zerver_attachment.realm_id,
 | 
				
			||||||
 | 
					                %(property)s,
 | 
				
			||||||
 | 
					                %(time_end)s,
 | 
				
			||||||
 | 
					                COALESCE(SUM(zerver_attachment.size), 0)
 | 
				
			||||||
 | 
					            FROM
 | 
				
			||||||
 | 
					                zerver_attachment
 | 
				
			||||||
 | 
					            WHERE
 | 
				
			||||||
 | 
					                {realm_clause}
 | 
				
			||||||
 | 
					                zerver_attachment.create_time < %(time_end)s
 | 
				
			||||||
 | 
					            GROUP BY
 | 
				
			||||||
 | 
					                zerver_attachment.realm_id
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					    ).format(**kwargs, realm_clause=realm_clause)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def do_pull_minutes_active(
 | 
					def do_pull_minutes_active(
 | 
				
			||||||
    property: str, start_time: datetime, end_time: datetime, realm: Optional[Realm] = None
 | 
					    property: str, start_time: datetime, end_time: datetime, realm: Optional[Realm] = None
 | 
				
			||||||
) -> int:
 | 
					) -> int:
 | 
				
			||||||
@@ -863,6 +897,11 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]:
 | 
				
			|||||||
            CountStat.DAY,
 | 
					            CountStat.DAY,
 | 
				
			||||||
            interval=TIMEDELTA_MAX,
 | 
					            interval=TIMEDELTA_MAX,
 | 
				
			||||||
        ),
 | 
					        ),
 | 
				
			||||||
 | 
					        CountStat(
 | 
				
			||||||
 | 
					            "upload_quota_used_bytes::day",
 | 
				
			||||||
 | 
					            sql_data_collector(RealmCount, count_upload_space_used_by_realm_query(realm), None),
 | 
				
			||||||
 | 
					            CountStat.DAY,
 | 
				
			||||||
 | 
					        ),
 | 
				
			||||||
        # Messages read stats.  messages_read::hour is the total
 | 
					        # Messages read stats.  messages_read::hour is the total
 | 
				
			||||||
        # number of messages read, whereas
 | 
					        # number of messages read, whereas
 | 
				
			||||||
        # messages_read_interactions::hour tries to count the total
 | 
					        # messages_read_interactions::hour tries to count the total
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -76,6 +76,7 @@ from zerver.models import (
 | 
				
			|||||||
)
 | 
					)
 | 
				
			||||||
from zerver.models.clients import get_client
 | 
					from zerver.models.clients import get_client
 | 
				
			||||||
from zerver.models.groups import SystemGroups
 | 
					from zerver.models.groups import SystemGroups
 | 
				
			||||||
 | 
					from zerver.models.messages import Attachment
 | 
				
			||||||
from zerver.models.scheduled_jobs import NotificationTriggers
 | 
					from zerver.models.scheduled_jobs import NotificationTriggers
 | 
				
			||||||
from zerver.models.users import get_user, is_cross_realm_bot_email
 | 
					from zerver.models.users import get_user, is_cross_realm_bot_email
 | 
				
			||||||
from zilencer.models import (
 | 
					from zilencer.models import (
 | 
				
			||||||
@@ -190,6 +191,18 @@ class AnalyticsTestCase(ZulipTestCase):
 | 
				
			|||||||
            kwargs[key] = kwargs.get(key, value)
 | 
					            kwargs[key] = kwargs.get(key, value)
 | 
				
			||||||
        return Message.objects.create(**kwargs)
 | 
					        return Message.objects.create(**kwargs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def create_attachment(
 | 
				
			||||||
 | 
					        self, user_profile: UserProfile, filename: str, size: int, create_time: datetime
 | 
				
			||||||
 | 
					    ) -> Attachment:
 | 
				
			||||||
 | 
					        return Attachment.objects.create(
 | 
				
			||||||
 | 
					            file_name=filename,
 | 
				
			||||||
 | 
					            path_id=f"foo/bar/{filename}",
 | 
				
			||||||
 | 
					            owner=user_profile,
 | 
				
			||||||
 | 
					            realm=user_profile.realm,
 | 
				
			||||||
 | 
					            size=size,
 | 
				
			||||||
 | 
					            create_time=create_time,
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # kwargs should only ever be a UserProfile or Stream.
 | 
					    # kwargs should only ever be a UserProfile or Stream.
 | 
				
			||||||
    def assert_table_count(
 | 
					    def assert_table_count(
 | 
				
			||||||
        self,
 | 
					        self,
 | 
				
			||||||
@@ -546,6 +559,41 @@ class TestCountStats(AnalyticsTestCase):
 | 
				
			|||||||
        self.assertTableState(UserCount, [], [])
 | 
					        self.assertTableState(UserCount, [], [])
 | 
				
			||||||
        self.assertTableState(StreamCount, [], [])
 | 
					        self.assertTableState(StreamCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_upload_quota_used_bytes(self) -> None:
 | 
				
			||||||
 | 
					        stat = COUNT_STATS["upload_quota_used_bytes::day"]
 | 
				
			||||||
 | 
					        self.current_property = stat.property
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        user1 = self.create_user()
 | 
				
			||||||
 | 
					        user2 = self.create_user()
 | 
				
			||||||
 | 
					        user_second_realm = self.create_user(realm=self.second_realm)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.create_attachment(user1, "file1", 100, self.TIME_LAST_HOUR)
 | 
				
			||||||
 | 
					        attachment2 = self.create_attachment(user2, "file2", 200, self.TIME_LAST_HOUR)
 | 
				
			||||||
 | 
					        self.create_attachment(user_second_realm, "file3", 10, self.TIME_LAST_HOUR)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        do_fill_count_stat_at_hour(stat, self.TIME_ZERO)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.assertTableState(
 | 
				
			||||||
 | 
					            RealmCount,
 | 
				
			||||||
 | 
					            ["value", "subgroup", "realm"],
 | 
				
			||||||
 | 
					            [[300, None, self.default_realm], [10, None, self.second_realm]],
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Delete an attachment and run the CountStat job again the next day.
 | 
				
			||||||
 | 
					        attachment2.delete()
 | 
				
			||||||
 | 
					        do_fill_count_stat_at_hour(stat, self.TIME_ZERO + self.DAY)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.assertTableState(
 | 
				
			||||||
 | 
					            RealmCount,
 | 
				
			||||||
 | 
					            ["value", "subgroup", "realm", "end_time"],
 | 
				
			||||||
 | 
					            [
 | 
				
			||||||
 | 
					                [300, None, self.default_realm, self.TIME_ZERO],
 | 
				
			||||||
 | 
					                [10, None, self.second_realm, self.TIME_ZERO],
 | 
				
			||||||
 | 
					                [100, None, self.default_realm, self.TIME_ZERO + self.DAY],
 | 
				
			||||||
 | 
					                [10, None, self.second_realm, self.TIME_ZERO + self.DAY],
 | 
				
			||||||
 | 
					            ],
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_active_users_by_is_bot_for_realm_constraint(self) -> None:
 | 
					    def test_active_users_by_is_bot_for_realm_constraint(self) -> None:
 | 
				
			||||||
        # For single Realm
 | 
					        # For single Realm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -870,12 +870,26 @@ class Realm(models.Model):  # type: ignore[django-manager-missing] # django-stub
 | 
				
			|||||||
        lambda realm: get_realm_used_upload_space_cache_key(realm.id), timeout=3600 * 24 * 7
 | 
					        lambda realm: get_realm_used_upload_space_cache_key(realm.id), timeout=3600 * 24 * 7
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    def currently_used_upload_space_bytes(realm) -> int:  # noqa: N805
 | 
					    def currently_used_upload_space_bytes(realm) -> int:  # noqa: N805
 | 
				
			||||||
 | 
					        from analytics.models import RealmCount, installation_epoch
 | 
				
			||||||
        from zerver.models import Attachment
 | 
					        from zerver.models import Attachment
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        used_space = Attachment.objects.filter(realm=realm).aggregate(Sum("size"))["size__sum"]
 | 
					        try:
 | 
				
			||||||
        if used_space is None:
 | 
					            latest_count_stat = RealmCount.objects.filter(
 | 
				
			||||||
            return 0
 | 
					                realm=realm, property="upload_quota_used_bytes::day"
 | 
				
			||||||
        return used_space
 | 
					            ).latest("end_time")
 | 
				
			||||||
 | 
					            last_recorded_used_space = latest_count_stat.value
 | 
				
			||||||
 | 
					            last_recorded_date = latest_count_stat.end_time
 | 
				
			||||||
 | 
					        except RealmCount.DoesNotExist:
 | 
				
			||||||
 | 
					            last_recorded_used_space = 0
 | 
				
			||||||
 | 
					            last_recorded_date = installation_epoch()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        newly_used_space = Attachment.objects.filter(
 | 
				
			||||||
 | 
					            realm=realm, create_time__gte=last_recorded_date
 | 
				
			||||||
 | 
					        ).aggregate(Sum("size"))["size__sum"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if newly_used_space is None:
 | 
				
			||||||
 | 
					            return last_recorded_used_space
 | 
				
			||||||
 | 
					        return last_recorded_used_space + newly_used_space
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def ensure_not_on_limited_plan(self) -> None:
 | 
					    def ensure_not_on_limited_plan(self) -> None:
 | 
				
			||||||
        if self.plan_type == Realm.PLAN_TYPE_LIMITED:
 | 
					        if self.plan_type == Realm.PLAN_TYPE_LIMITED:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -9,12 +9,14 @@ from urllib.parse import quote
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import orjson
 | 
					import orjson
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
 | 
					from django.utils.timezone import now as timezone_now
 | 
				
			||||||
from PIL import Image
 | 
					from PIL import Image
 | 
				
			||||||
from typing_extensions import override
 | 
					from typing_extensions import override
 | 
				
			||||||
from urllib3 import encode_multipart_formdata
 | 
					from urllib3 import encode_multipart_formdata
 | 
				
			||||||
from urllib3.fields import RequestField
 | 
					from urllib3.fields import RequestField
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import zerver.lib.upload
 | 
					import zerver.lib.upload
 | 
				
			||||||
 | 
					from analytics.models import RealmCount
 | 
				
			||||||
from zerver.actions.create_realm import do_create_realm
 | 
					from zerver.actions.create_realm import do_create_realm
 | 
				
			||||||
from zerver.actions.message_send import internal_send_private_message
 | 
					from zerver.actions.message_send import internal_send_private_message
 | 
				
			||||||
from zerver.actions.realm_icon import do_change_icon_source
 | 
					from zerver.actions.realm_icon import do_change_icon_source
 | 
				
			||||||
@@ -23,7 +25,7 @@ from zerver.actions.realm_settings import do_change_realm_plan_type, do_set_real
 | 
				
			|||||||
from zerver.actions.user_settings import do_delete_avatar_image
 | 
					from zerver.actions.user_settings import do_delete_avatar_image
 | 
				
			||||||
from zerver.lib.attachments import validate_attachment_request
 | 
					from zerver.lib.attachments import validate_attachment_request
 | 
				
			||||||
from zerver.lib.avatar import avatar_url, get_avatar_field
 | 
					from zerver.lib.avatar import avatar_url, get_avatar_field
 | 
				
			||||||
from zerver.lib.cache import cache_get, get_realm_used_upload_space_cache_key
 | 
					from zerver.lib.cache import cache_delete, cache_get, get_realm_used_upload_space_cache_key
 | 
				
			||||||
from zerver.lib.create_user import copy_default_settings
 | 
					from zerver.lib.create_user import copy_default_settings
 | 
				
			||||||
from zerver.lib.initial_password import initial_password
 | 
					from zerver.lib.initial_password import initial_password
 | 
				
			||||||
from zerver.lib.realm_icon import realm_icon_url
 | 
					from zerver.lib.realm_icon import realm_icon_url
 | 
				
			||||||
@@ -1844,6 +1846,22 @@ class UploadSpaceTests(UploadSerializeMixin, ZulipTestCase):
 | 
				
			|||||||
        self.assertEqual(None, cache_get(get_realm_used_upload_space_cache_key(self.realm.id)))
 | 
					        self.assertEqual(None, cache_get(get_realm_used_upload_space_cache_key(self.realm.id)))
 | 
				
			||||||
        self.assert_length(data2, self.realm.currently_used_upload_space_bytes())
 | 
					        self.assert_length(data2, self.realm.currently_used_upload_space_bytes())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        now = timezone_now()
 | 
				
			||||||
 | 
					        RealmCount.objects.create(
 | 
				
			||||||
 | 
					            realm=self.realm,
 | 
				
			||||||
 | 
					            property="upload_quota_used_bytes::day",
 | 
				
			||||||
 | 
					            end_time=now,
 | 
				
			||||||
 | 
					            value=len(data2),
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        # Purge the cache since we want to actually execute the function.
 | 
				
			||||||
 | 
					        cache_delete(get_realm_used_upload_space_cache_key(self.realm.id))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.assert_length(data2, self.realm.currently_used_upload_space_bytes())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        data3 = b"even-more-data!"
 | 
				
			||||||
 | 
					        upload_message_attachment("dummy3.txt", len(data3), "text/plain", data3, self.user_profile)
 | 
				
			||||||
 | 
					        self.assertEqual(len(data2) + len(data3), self.realm.currently_used_upload_space_bytes())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class DecompressionBombTests(ZulipTestCase):
 | 
					class DecompressionBombTests(ZulipTestCase):
 | 
				
			||||||
    @override
 | 
					    @override
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user