mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-03 21:43:21 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			185 lines
		
	
	
		
			8.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			185 lines
		
	
	
		
			8.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# Generated by Django 4.0.7 on 2022-09-30 20:30
 | 
						|
 | 
						|
import ast
 | 
						|
 | 
						|
import orjson
 | 
						|
from django.db import migrations, transaction
 | 
						|
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
 | 
						|
from django.db.migrations.state import StateApps
 | 
						|
from django.db.models import F, JSONField, Model
 | 
						|
from django.db.models.functions import Cast, JSONObject
 | 
						|
 | 
						|
# This migration is mostly the same as
 | 
						|
# backfill_remote_realmauditlog_extradata_to_json_field in zilencer.
 | 
						|
 | 
						|
OLD_VALUE = "1"
 | 
						|
NEW_VALUE = "2"
 | 
						|
USER_FULL_NAME_CHANGED = 124
 | 
						|
REALM_DISCOUNT_CHANGED = 209
 | 
						|
BATCH_SIZE = 5000
 | 
						|
 | 
						|
DISCOUNT_DATA_TEMPLATE = """Audit log entry {id} with event type REALM_DISCOUNT_CHANGED is skipped.
 | 
						|
The data consistency needs to be manually checked.
 | 
						|
  Discount data to remove after the upcoming JSONField migration:
 | 
						|
{data_to_remove}
 | 
						|
  Discount data to keep after the upcoming JSONField migration:
 | 
						|
{data_to_keep}
 | 
						|
"""
 | 
						|
 | 
						|
OVERWRITE_TEMPLATE = """Audit log entry with id {id} has extra_data_json been inconsistently overwritten.
 | 
						|
  The old value is:
 | 
						|
{old_value}
 | 
						|
  The new value is:
 | 
						|
{new_value}
 | 
						|
"""
 | 
						|
 | 
						|
 | 
						|
@transaction.atomic
 | 
						|
def do_bulk_backfill_extra_data(
 | 
						|
    audit_log_model: type[Model], id_lower_bound: int, id_upper_bound: int
 | 
						|
) -> None:
 | 
						|
    # First handle the special case for audit logs with the
 | 
						|
    # USER_FULL_NAME_CHANGED event, which stores the full name not as
 | 
						|
    # str(dict()) but a plain str. Note that we only update the entries where
 | 
						|
    # extra_data_json has the default value, because we do not want to override
 | 
						|
    # existing audit log entries with a NEW_VALUE of None for extra_data_json.
 | 
						|
    # We do not need to skip existing entries for other parts of backfilling
 | 
						|
    # because we have double-write implemented so that the backfilled value
 | 
						|
    # will still be consistent.
 | 
						|
    audit_log_model._default_manager.filter(
 | 
						|
        event_type=USER_FULL_NAME_CHANGED,
 | 
						|
        id__range=(id_lower_bound, id_upper_bound),
 | 
						|
        extra_data_json={},
 | 
						|
        # extra_data used to keeps track of the old name. As a result, we know
 | 
						|
        # nothing about what NEW_VALUE would be especially if the name has been
 | 
						|
        # changed multiple times. extra_data_json is a JSONObject whose
 | 
						|
        # OLD_VALUE and NEW_VALUE is mapped from the value of the extra_data
 | 
						|
        # field (which is just a old full name string) and None, respectively.
 | 
						|
        # Documentation for JSONObject:
 | 
						|
        # https://docs.djangoproject.com/en/5.0/ref/models/database-functions/#jsonobject
 | 
						|
    ).update(extra_data_json=JSONObject(**{OLD_VALUE: "extra_data", NEW_VALUE: None}))
 | 
						|
 | 
						|
    inconsistent_extra_data_json: list[tuple[int, str, object, object]] = []
 | 
						|
    # A dict converted with str() will start with a open bracket followed by a
 | 
						|
    # single quote, as opposed to a JSON-encoded value, which will use a
 | 
						|
    # _double_ quote. We use this to filter out those entries with malformed
 | 
						|
    # extra_data to be handled later. This should only update rows with
 | 
						|
    # extra_data populated with orjson.dumps.
 | 
						|
 | 
						|
    # The first query below checks for entries that would have extra_data_json
 | 
						|
    # being overwritten by the migration with a value inconsistent with its
 | 
						|
    # previous value.
 | 
						|
    inconsistent_extra_data_json.extend(
 | 
						|
        audit_log_model._default_manager.filter(
 | 
						|
            extra_data__isnull=False, id__range=(id_lower_bound, id_upper_bound)
 | 
						|
        )
 | 
						|
        .annotate(new_extra_data_json=Cast("extra_data", output_field=JSONField()))
 | 
						|
        .exclude(extra_data__startswith="{'")
 | 
						|
        .exclude(event_type=USER_FULL_NAME_CHANGED)
 | 
						|
        .exclude(extra_data_json={})
 | 
						|
        .exclude(extra_data_json=F("new_extra_data_json"))
 | 
						|
        .values_list("id", "extra_data", "extra_data_json", "new_extra_data_json")
 | 
						|
    )
 | 
						|
    (
 | 
						|
        audit_log_model._default_manager.filter(
 | 
						|
            extra_data__isnull=False,
 | 
						|
            id__range=(id_lower_bound, id_upper_bound),
 | 
						|
            extra_data_json__inconsistent_old_extra_data__isnull=True,
 | 
						|
        )
 | 
						|
        .exclude(extra_data__startswith="{'")
 | 
						|
        .exclude(event_type=USER_FULL_NAME_CHANGED)
 | 
						|
        .update(extra_data_json=Cast("extra_data", output_field=JSONField()))
 | 
						|
    )
 | 
						|
 | 
						|
    python_valued_audit_log_entries = audit_log_model._default_manager.filter(
 | 
						|
        extra_data__startswith="{'",
 | 
						|
        id__range=(id_lower_bound, id_upper_bound),
 | 
						|
        extra_data_json__inconsistent_old_extra_data__isnull=True,
 | 
						|
    )
 | 
						|
    for audit_log_entry in python_valued_audit_log_entries:
 | 
						|
        # extra_data for entries that store dict stringified with builtins.str()
 | 
						|
        # are converted back with ast.literal_eval for safety and efficiency.
 | 
						|
        # str()'d extra_data with the REALM_DISCOUNT_CHANGED event type is not
 | 
						|
        # handled by this migration. We expect that all such entries are
 | 
						|
        # manually converted beforehand or an error will occur during the
 | 
						|
        # migration, because ast.literal_eval does not allow the evaluation of
 | 
						|
        # Decimal.
 | 
						|
        old_value = audit_log_entry.extra_data_json  # type: ignore[attr-defined] # The migration cannot depend on zerver.models, which contains the real type of the RealmAuditLog model, so it cannot be properly typed.
 | 
						|
        if audit_log_entry.event_type == REALM_DISCOUNT_CHANGED:  # type: ignore[attr-defined] # Explained above.
 | 
						|
            print(
 | 
						|
                DISCOUNT_DATA_TEMPLATE.format(
 | 
						|
                    id=audit_log_entry.id,  # type: ignore[attr-defined] # Explained above.
 | 
						|
                    data_to_remove=audit_log_entry.extra_data,  # type: ignore[attr-defined] # Explained above.
 | 
						|
                    data_to_keep=old_value,
 | 
						|
                )
 | 
						|
            )
 | 
						|
            continue
 | 
						|
        new_value = ast.literal_eval(audit_log_entry.extra_data)  # type: ignore[attr-defined] # Explained above.
 | 
						|
        if old_value not in ({}, new_value):
 | 
						|
            inconsistent_extra_data_json.append(
 | 
						|
                (audit_log_entry.id, audit_log_entry.extra_data, old_value, new_value)  # type: ignore[attr-defined] # Explained above.
 | 
						|
            )
 | 
						|
        audit_log_entry.extra_data_json = new_value  # type: ignore[attr-defined] # Explained above.
 | 
						|
    audit_log_model._default_manager.bulk_update(
 | 
						|
        python_valued_audit_log_entries, fields=["extra_data_json"]
 | 
						|
    )
 | 
						|
 | 
						|
    if inconsistent_extra_data_json:
 | 
						|
        audit_log_entries = []
 | 
						|
        for (
 | 
						|
            audit_log_entry_id,
 | 
						|
            old_extra_data,
 | 
						|
            old_extra_data_json,
 | 
						|
            new_extra_data_json,
 | 
						|
        ) in inconsistent_extra_data_json:
 | 
						|
            audit_log_entry = audit_log_model._default_manager.get(id=audit_log_entry_id)
 | 
						|
            assert isinstance(old_extra_data_json, dict)
 | 
						|
            if "inconsistent_old_extra_data" in old_extra_data_json:
 | 
						|
                # Skip entries that have been backfilled and detected as
 | 
						|
                # anomalies before.
 | 
						|
                continue
 | 
						|
            assert isinstance(new_extra_data_json, dict)
 | 
						|
            audit_log_entry.extra_data_json = {  # type: ignore[attr-defined] # Explained above.
 | 
						|
                **new_extra_data_json,
 | 
						|
                "inconsistent_old_extra_data": old_extra_data,
 | 
						|
                "inconsistent_old_extra_data_json": old_extra_data_json,
 | 
						|
            }
 | 
						|
            audit_log_entries.append(audit_log_entry)
 | 
						|
            print(
 | 
						|
                OVERWRITE_TEMPLATE.format(
 | 
						|
                    id=audit_log_entry_id,
 | 
						|
                    old_value=orjson.dumps(old_extra_data_json).decode(),
 | 
						|
                    new_value=orjson.dumps(new_extra_data_json).decode(),
 | 
						|
                )
 | 
						|
            )
 | 
						|
        audit_log_model._default_manager.bulk_update(audit_log_entries, fields=["extra_data_json"])
 | 
						|
 | 
						|
 | 
						|
def backfill_extra_data(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
 | 
						|
    audit_log_model = apps.get_model("zerver", "RealmAuditLog")
 | 
						|
    if not audit_log_model.objects.filter(extra_data__isnull=False).exists():
 | 
						|
        return
 | 
						|
 | 
						|
    audit_log_entries = audit_log_model.objects.filter(extra_data__isnull=False)
 | 
						|
    id_lower_bound = audit_log_entries.earliest("id").id
 | 
						|
    id_upper_bound = audit_log_entries.latest("id").id
 | 
						|
    while id_lower_bound <= id_upper_bound:
 | 
						|
        do_bulk_backfill_extra_data(
 | 
						|
            audit_log_model, id_lower_bound, min(id_lower_bound + BATCH_SIZE, id_upper_bound)
 | 
						|
        )
 | 
						|
        id_lower_bound += BATCH_SIZE + 1
 | 
						|
 | 
						|
 | 
						|
class Migration(migrations.Migration):
 | 
						|
    atomic = False
 | 
						|
 | 
						|
    dependencies = [
 | 
						|
        ("zerver", "0459_remove_invalid_characters_from_user_group_name"),
 | 
						|
    ]
 | 
						|
 | 
						|
    operations = [
 | 
						|
        migrations.RunPython(
 | 
						|
            backfill_extra_data, reverse_code=migrations.RunPython.noop, elidable=True
 | 
						|
        ),
 | 
						|
    ]
 |