Remove unrelated content

This commit is contained in:
zandercymatics 2025-01-06 12:43:48 -07:00
parent d0183d4d14
commit 6086b9587f
No known key found for this signature in database
GPG key ID: FF4636ABEC9682B7
2 changed files with 0 additions and 118 deletions

View file

@ -1,112 +0,0 @@
import logging
from django.core.management import BaseCommand
from registrar.models import Suborganization, DomainRequest, DomainInformation
from registrar.management.commands.utility.terminal_helper import TerminalColors, TerminalHelper
logger = logging.getLogger(__name__)
class Command(BaseCommand):
help = "Clean up duplicate suborganizations that differ only by spaces and capitalization"
def handle(self, **kwargs):
# Find duplicates
duplicates = {}
all_suborgs = Suborganization.objects.all()
for suborg in all_suborgs:
# Normalize name by removing extra spaces and converting to lowercase
normalized_name = " ".join(suborg.name.trim().split()).lower()
# First occurrence of this name
if normalized_name not in duplicates:
duplicates[normalized_name] = {
"keep": suborg,
"delete": []
}
continue
# Compare with our current best
duplicate_record = duplicates.get(normalized_name)
current_best = duplicate_record.get("keep")
# Check if all other fields match.
# If they don't, we should inspect this record manually.
fields_to_compare = ["portfolio", "city", "state_territory"]
fields_match = all(
getattr(suborg, field) == getattr(current_best, field)
for field in fields_to_compare
)
if not fields_match:
logger.warning(
f"{TerminalColors.YELLOW}"
f"\nSkipping potential duplicate: {suborg.name} (id: {suborg.id})"
f"\nData mismatch with {current_best.name} (id: {current_best.id})"
f"{TerminalColors.ENDC}"
)
continue
# Determine if new suborg is better than current best.
# The fewest spaces and most capitals wins.
new_has_fewer_spaces = suborg.name.count(" ") < current_best.name.count(" ")
new_has_more_capitals = sum(1 for c in suborg.name if c.isupper()) > sum(1 for c in current_best.name if c.isupper())
if new_has_fewer_spaces or new_has_more_capitals:
# New suborg is better - demote the old one to the delete list
duplicate_record["delete"].append(current_best)
duplicate_record["keep"] = suborg
else:
# If it is not better, just delete the old one
duplicate_record["delete"].append(suborg)
# Filter out entries without duplicates
duplicates = {k: v for k, v in duplicates.items() if v.get("delete")}
if not duplicates:
logger.info(f"No duplicate suborganizations found.")
return
# Show preview of changes
preview = "The following duplicates will be removed:\n"
for data in duplicates.values():
best = data.get("keep")
preview += f"\nKeeping: '{best.name}' (id: {best.id})"
for duplicate in data.get("delete"):
preview += f"\nRemoving: '{duplicate.name}' (id: {duplicate.id})"
preview += "\n"
# Get confirmation and execute deletions
if TerminalHelper.prompt_for_execution(
system_exit_on_terminate=True,
prompt_message=preview,
prompt_title="Clean up duplicate suborganizations?",
verify_message="*** WARNING: This will delete suborganizations! ***"
):
try:
# Update all references to point to the right suborg before deletion
for record in duplicates.values():
best_record = record.get("keep")
delete_ids = [dupe.id for dupe in record.get("delete")]
# Update domain requests
DomainRequest.objects.filter(
sub_organization_id__in=delete_ids
).update(sub_organization=best_record)
# Update domain information
DomainInformation.objects.filter(
sub_organization_id__in=delete_ids
).update(sub_organization=best_record)
ids_to_delete = [
dupe.id
for data in duplicates.values()
for dupe in data["delete"]
]
# Bulk delete all duplicates
delete_count, _ = Suborganization.objects.filter(id__in=ids_to_delete).delete()
logger.info(f"{TerminalColors.OKGREEN}Successfully deleted {delete_count} suborganizations{TerminalColors.ENDC}")
except Exception as e:
logger.error(f"{TerminalColors.FAIL}Failed to clean up suborganizations: {str(e)}{TerminalColors.ENDC}")

View file

@ -343,9 +343,3 @@ def value_of_attribute(obj, attribute_name: str):
if callable(value):
value = value()
return value
def normalize_string(string_to_normalize: str) -> str:
"""Normalizes a given string. Returns a string without extra spaces, in all lowercase."""
new_string = " ".join(string_to_normalize.trim().split())
return new_string.lower()