mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 05:53:43 +00:00 
			
		
		
		
	This commit upgrades 0015_clear_duplicate_counts migration to remove duplicate count in StreamCount, UserCount, InstallationCount as well. Fixes https://github.com/zulip/docker-zulip/issues/266
		
			
				
	
	
		
			60 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			60 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from django.db import migrations
 | 
						|
from django.db.backends.postgresql.schema import DatabaseSchemaEditor
 | 
						|
from django.db.migrations.state import StateApps
 | 
						|
from django.db.models import Count, Sum
 | 
						|
 | 
						|
 | 
						|
def clear_duplicate_counts(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
 | 
						|
    """This is a preparatory migration for our Analytics tables.
 | 
						|
 | 
						|
    The backstory is that Django's unique_together indexes do not properly
 | 
						|
    handle the subgroup=None corner case (allowing duplicate rows that have a
 | 
						|
    subgroup of None), which meant that in race conditions, rather than updating
 | 
						|
    an existing row for the property/(realm, stream, user)/time with subgroup=None, Django would
 | 
						|
    create a duplicate row.
 | 
						|
 | 
						|
    In the next migration, we'll add a proper constraint to fix this bug, but
 | 
						|
    we need to fix any existing problematic rows before we can add that constraint.
 | 
						|
 | 
						|
    We fix this in an appropriate fashion for each type of CountStat object; mainly
 | 
						|
    this means deleting the extra rows, but for LoggingCountStat objects, we need to
 | 
						|
    additionally combine the sums.
 | 
						|
    """
 | 
						|
    count_tables = dict(realm=apps.get_model('analytics', 'RealmCount'),
 | 
						|
                        user=apps.get_model('analytics', 'UserCount'),
 | 
						|
                        stream=apps.get_model('analytics', 'StreamCount'),
 | 
						|
                        installation=apps.get_model('analytics', 'InstallationCount'))
 | 
						|
 | 
						|
    for name, count_table in count_tables.items():
 | 
						|
        value = [name, 'property', 'end_time']
 | 
						|
        if name == 'installation':
 | 
						|
            value = ['property', 'end_time']
 | 
						|
        counts = count_table.objects.filter(subgroup=None).values(*value).annotate(
 | 
						|
            Count('id'), Sum('value')).filter(id__count__gt=1)
 | 
						|
 | 
						|
        for count in counts:
 | 
						|
            count.pop('id__count')
 | 
						|
            total_value = count.pop('value__sum')
 | 
						|
            duplicate_counts = list(count_table.objects.filter(**count))
 | 
						|
            first_count = duplicate_counts[0]
 | 
						|
            if count['property'] in ["invites_sent::day", "active_users_log:is_bot:day"]:
 | 
						|
                # For LoggingCountStat objects, the right fix is to combine the totals;
 | 
						|
                # for other CountStat objects, we expect the duplicates to have the same value.
 | 
						|
                # And so all we need to do is delete them.
 | 
						|
                first_count.value = total_value
 | 
						|
                first_count.save()
 | 
						|
            to_cleanup = duplicate_counts[1:]
 | 
						|
            for duplicate_count in to_cleanup:
 | 
						|
                duplicate_count.delete()
 | 
						|
 | 
						|
class Migration(migrations.Migration):
 | 
						|
 | 
						|
    dependencies = [
 | 
						|
        ('analytics', '0014_remove_fillstate_last_modified'),
 | 
						|
    ]
 | 
						|
 | 
						|
    operations = [
 | 
						|
        migrations.RunPython(clear_duplicate_counts,
 | 
						|
                             reverse_code=migrations.RunPython.noop),
 | 
						|
    ]
 |