mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 14:03:30 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			65 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			65 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from django.db import migrations
 | 
						|
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
 | 
						|
from django.db.migrations.state import StateApps
 | 
						|
from django.db.models import Count, Sum
 | 
						|
 | 
						|
 | 
						|
def clear_duplicate_counts(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
 | 
						|
    """This is a preparatory migration for our Analytics tables.
 | 
						|
 | 
						|
    The backstory is that Django's unique_together indexes do not properly
 | 
						|
    handle the subgroup=None corner case (allowing duplicate rows that have a
 | 
						|
    subgroup of None), which meant that in race conditions, rather than updating
 | 
						|
    an existing row for the property/(realm, stream, user)/time with subgroup=None, Django would
 | 
						|
    create a duplicate row.
 | 
						|
 | 
						|
    In the next migration, we'll add a proper constraint to fix this bug, but
 | 
						|
    we need to fix any existing problematic rows before we can add that constraint.
 | 
						|
 | 
						|
    We fix this in an appropriate fashion for each type of CountStat object; mainly
 | 
						|
    this means deleting the extra rows, but for LoggingCountStat objects, we need to
 | 
						|
    additionally combine the sums.
 | 
						|
    """
 | 
						|
    count_tables = dict(
 | 
						|
        realm=apps.get_model("analytics", "RealmCount"),
 | 
						|
        user=apps.get_model("analytics", "UserCount"),
 | 
						|
        stream=apps.get_model("analytics", "StreamCount"),
 | 
						|
        installation=apps.get_model("analytics", "InstallationCount"),
 | 
						|
    )
 | 
						|
 | 
						|
    for name, count_table in count_tables.items():
 | 
						|
        value = [name, "property", "end_time"]
 | 
						|
        if name == "installation":
 | 
						|
            value = ["property", "end_time"]
 | 
						|
        counts = (
 | 
						|
            count_table.objects.filter(subgroup=None)
 | 
						|
            .values(*value)
 | 
						|
            .annotate(Count("id"), Sum("value"))
 | 
						|
            .filter(id__count__gt=1)
 | 
						|
        )
 | 
						|
 | 
						|
        for count in counts:
 | 
						|
            count.pop("id__count")
 | 
						|
            total_value = count.pop("value__sum")
 | 
						|
            duplicate_counts = list(count_table.objects.filter(**count))
 | 
						|
            first_count = duplicate_counts[0]
 | 
						|
            if count["property"] in ["invites_sent::day", "active_users_log:is_bot:day"]:
 | 
						|
                # For LoggingCountStat objects, the right fix is to combine the totals;
 | 
						|
                # for other CountStat objects, we expect the duplicates to have the same value.
 | 
						|
                # And so all we need to do is delete them.
 | 
						|
                first_count.value = total_value
 | 
						|
                first_count.save()
 | 
						|
            to_cleanup = duplicate_counts[1:]
 | 
						|
            for duplicate_count in to_cleanup:
 | 
						|
                duplicate_count.delete()
 | 
						|
 | 
						|
 | 
						|
class Migration(migrations.Migration):
 | 
						|
    dependencies = [
 | 
						|
        ("analytics", "0014_remove_fillstate_last_modified"),
 | 
						|
    ]
 | 
						|
 | 
						|
    operations = [
 | 
						|
        migrations.RunPython(clear_duplicate_counts, reverse_code=migrations.RunPython.noop),
 | 
						|
    ]
 |