mirror of
https://github.com/cisagov/manage.get.gov.git
synced 2025-08-03 16:32:15 +02:00
Remove unrelated content
This commit is contained in:
parent
d0183d4d14
commit
6086b9587f
2 changed files with 0 additions and 118 deletions
|
@ -1,112 +0,0 @@
|
|||
import logging
|
||||
from django.core.management import BaseCommand
|
||||
from registrar.models import Suborganization, DomainRequest, DomainInformation
|
||||
from registrar.management.commands.utility.terminal_helper import TerminalColors, TerminalHelper
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Clean up duplicate suborganizations that differ only by spaces and capitalization"
|
||||
|
||||
def handle(self, **kwargs):
|
||||
# Find duplicates
|
||||
duplicates = {}
|
||||
all_suborgs = Suborganization.objects.all()
|
||||
|
||||
for suborg in all_suborgs:
|
||||
# Normalize name by removing extra spaces and converting to lowercase
|
||||
normalized_name = " ".join(suborg.name.trim().split()).lower()
|
||||
|
||||
# First occurrence of this name
|
||||
if normalized_name not in duplicates:
|
||||
duplicates[normalized_name] = {
|
||||
"keep": suborg,
|
||||
"delete": []
|
||||
}
|
||||
continue
|
||||
|
||||
# Compare with our current best
|
||||
duplicate_record = duplicates.get(normalized_name)
|
||||
current_best = duplicate_record.get("keep")
|
||||
|
||||
# Check if all other fields match.
|
||||
# If they don't, we should inspect this record manually.
|
||||
fields_to_compare = ["portfolio", "city", "state_territory"]
|
||||
fields_match = all(
|
||||
getattr(suborg, field) == getattr(current_best, field)
|
||||
for field in fields_to_compare
|
||||
)
|
||||
if not fields_match:
|
||||
logger.warning(
|
||||
f"{TerminalColors.YELLOW}"
|
||||
f"\nSkipping potential duplicate: {suborg.name} (id: {suborg.id})"
|
||||
f"\nData mismatch with {current_best.name} (id: {current_best.id})"
|
||||
f"{TerminalColors.ENDC}"
|
||||
)
|
||||
continue
|
||||
|
||||
# Determine if new suborg is better than current best.
|
||||
# The fewest spaces and most capitals wins.
|
||||
new_has_fewer_spaces = suborg.name.count(" ") < current_best.name.count(" ")
|
||||
new_has_more_capitals = sum(1 for c in suborg.name if c.isupper()) > sum(1 for c in current_best.name if c.isupper())
|
||||
if new_has_fewer_spaces or new_has_more_capitals:
|
||||
# New suborg is better - demote the old one to the delete list
|
||||
duplicate_record["delete"].append(current_best)
|
||||
duplicate_record["keep"] = suborg
|
||||
else:
|
||||
# If it is not better, just delete the old one
|
||||
duplicate_record["delete"].append(suborg)
|
||||
|
||||
# Filter out entries without duplicates
|
||||
duplicates = {k: v for k, v in duplicates.items() if v.get("delete")}
|
||||
if not duplicates:
|
||||
logger.info(f"No duplicate suborganizations found.")
|
||||
return
|
||||
|
||||
# Show preview of changes
|
||||
preview = "The following duplicates will be removed:\n"
|
||||
for data in duplicates.values():
|
||||
best = data.get("keep")
|
||||
preview += f"\nKeeping: '{best.name}' (id: {best.id})"
|
||||
|
||||
for duplicate in data.get("delete"):
|
||||
preview += f"\nRemoving: '{duplicate.name}' (id: {duplicate.id})"
|
||||
preview += "\n"
|
||||
|
||||
# Get confirmation and execute deletions
|
||||
if TerminalHelper.prompt_for_execution(
|
||||
system_exit_on_terminate=True,
|
||||
prompt_message=preview,
|
||||
prompt_title="Clean up duplicate suborganizations?",
|
||||
verify_message="*** WARNING: This will delete suborganizations! ***"
|
||||
):
|
||||
try:
|
||||
# Update all references to point to the right suborg before deletion
|
||||
for record in duplicates.values():
|
||||
best_record = record.get("keep")
|
||||
delete_ids = [dupe.id for dupe in record.get("delete")]
|
||||
|
||||
# Update domain requests
|
||||
DomainRequest.objects.filter(
|
||||
sub_organization_id__in=delete_ids
|
||||
).update(sub_organization=best_record)
|
||||
|
||||
# Update domain information
|
||||
DomainInformation.objects.filter(
|
||||
sub_organization_id__in=delete_ids
|
||||
).update(sub_organization=best_record)
|
||||
|
||||
ids_to_delete = [
|
||||
dupe.id
|
||||
for data in duplicates.values()
|
||||
for dupe in data["delete"]
|
||||
]
|
||||
|
||||
# Bulk delete all duplicates
|
||||
delete_count, _ = Suborganization.objects.filter(id__in=ids_to_delete).delete()
|
||||
logger.info(f"{TerminalColors.OKGREEN}Successfully deleted {delete_count} suborganizations{TerminalColors.ENDC}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"{TerminalColors.FAIL}Failed to clean up suborganizations: {str(e)}{TerminalColors.ENDC}")
|
|
@ -343,9 +343,3 @@ def value_of_attribute(obj, attribute_name: str):
|
|||
if callable(value):
|
||||
value = value()
|
||||
return value
|
||||
|
||||
|
||||
def normalize_string(string_to_normalize: str) -> str:
|
||||
"""Normalizes a given string. Returns a string without extra spaces, in all lowercase."""
|
||||
new_string = " ".join(string_to_normalize.trim().split())
|
||||
return new_string.lower()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue