mirror of
https://github.com/cisagov/manage.get.gov.git
synced 2025-08-14 13:34:10 +02:00
Remove unrelated content
This commit is contained in:
parent
d0183d4d14
commit
6086b9587f
2 changed files with 0 additions and 118 deletions
|
@ -1,112 +0,0 @@
|
||||||
import logging
|
|
||||||
from django.core.management import BaseCommand
|
|
||||||
from registrar.models import Suborganization, DomainRequest, DomainInformation
|
|
||||||
from registrar.management.commands.utility.terminal_helper import TerminalColors, TerminalHelper
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
|
||||||
help = "Clean up duplicate suborganizations that differ only by spaces and capitalization"
|
|
||||||
|
|
||||||
def handle(self, **kwargs):
|
|
||||||
# Find duplicates
|
|
||||||
duplicates = {}
|
|
||||||
all_suborgs = Suborganization.objects.all()
|
|
||||||
|
|
||||||
for suborg in all_suborgs:
|
|
||||||
# Normalize name by removing extra spaces and converting to lowercase
|
|
||||||
normalized_name = " ".join(suborg.name.trim().split()).lower()
|
|
||||||
|
|
||||||
# First occurrence of this name
|
|
||||||
if normalized_name not in duplicates:
|
|
||||||
duplicates[normalized_name] = {
|
|
||||||
"keep": suborg,
|
|
||||||
"delete": []
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Compare with our current best
|
|
||||||
duplicate_record = duplicates.get(normalized_name)
|
|
||||||
current_best = duplicate_record.get("keep")
|
|
||||||
|
|
||||||
# Check if all other fields match.
|
|
||||||
# If they don't, we should inspect this record manually.
|
|
||||||
fields_to_compare = ["portfolio", "city", "state_territory"]
|
|
||||||
fields_match = all(
|
|
||||||
getattr(suborg, field) == getattr(current_best, field)
|
|
||||||
for field in fields_to_compare
|
|
||||||
)
|
|
||||||
if not fields_match:
|
|
||||||
logger.warning(
|
|
||||||
f"{TerminalColors.YELLOW}"
|
|
||||||
f"\nSkipping potential duplicate: {suborg.name} (id: {suborg.id})"
|
|
||||||
f"\nData mismatch with {current_best.name} (id: {current_best.id})"
|
|
||||||
f"{TerminalColors.ENDC}"
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Determine if new suborg is better than current best.
|
|
||||||
# The fewest spaces and most capitals wins.
|
|
||||||
new_has_fewer_spaces = suborg.name.count(" ") < current_best.name.count(" ")
|
|
||||||
new_has_more_capitals = sum(1 for c in suborg.name if c.isupper()) > sum(1 for c in current_best.name if c.isupper())
|
|
||||||
if new_has_fewer_spaces or new_has_more_capitals:
|
|
||||||
# New suborg is better - demote the old one to the delete list
|
|
||||||
duplicate_record["delete"].append(current_best)
|
|
||||||
duplicate_record["keep"] = suborg
|
|
||||||
else:
|
|
||||||
# If it is not better, just delete the old one
|
|
||||||
duplicate_record["delete"].append(suborg)
|
|
||||||
|
|
||||||
# Filter out entries without duplicates
|
|
||||||
duplicates = {k: v for k, v in duplicates.items() if v.get("delete")}
|
|
||||||
if not duplicates:
|
|
||||||
logger.info(f"No duplicate suborganizations found.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Show preview of changes
|
|
||||||
preview = "The following duplicates will be removed:\n"
|
|
||||||
for data in duplicates.values():
|
|
||||||
best = data.get("keep")
|
|
||||||
preview += f"\nKeeping: '{best.name}' (id: {best.id})"
|
|
||||||
|
|
||||||
for duplicate in data.get("delete"):
|
|
||||||
preview += f"\nRemoving: '{duplicate.name}' (id: {duplicate.id})"
|
|
||||||
preview += "\n"
|
|
||||||
|
|
||||||
# Get confirmation and execute deletions
|
|
||||||
if TerminalHelper.prompt_for_execution(
|
|
||||||
system_exit_on_terminate=True,
|
|
||||||
prompt_message=preview,
|
|
||||||
prompt_title="Clean up duplicate suborganizations?",
|
|
||||||
verify_message="*** WARNING: This will delete suborganizations! ***"
|
|
||||||
):
|
|
||||||
try:
|
|
||||||
# Update all references to point to the right suborg before deletion
|
|
||||||
for record in duplicates.values():
|
|
||||||
best_record = record.get("keep")
|
|
||||||
delete_ids = [dupe.id for dupe in record.get("delete")]
|
|
||||||
|
|
||||||
# Update domain requests
|
|
||||||
DomainRequest.objects.filter(
|
|
||||||
sub_organization_id__in=delete_ids
|
|
||||||
).update(sub_organization=best_record)
|
|
||||||
|
|
||||||
# Update domain information
|
|
||||||
DomainInformation.objects.filter(
|
|
||||||
sub_organization_id__in=delete_ids
|
|
||||||
).update(sub_organization=best_record)
|
|
||||||
|
|
||||||
ids_to_delete = [
|
|
||||||
dupe.id
|
|
||||||
for data in duplicates.values()
|
|
||||||
for dupe in data["delete"]
|
|
||||||
]
|
|
||||||
|
|
||||||
# Bulk delete all duplicates
|
|
||||||
delete_count, _ = Suborganization.objects.filter(id__in=ids_to_delete).delete()
|
|
||||||
logger.info(f"{TerminalColors.OKGREEN}Successfully deleted {delete_count} suborganizations{TerminalColors.ENDC}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"{TerminalColors.FAIL}Failed to clean up suborganizations: {str(e)}{TerminalColors.ENDC}")
|
|
|
@ -343,9 +343,3 @@ def value_of_attribute(obj, attribute_name: str):
|
||||||
if callable(value):
|
if callable(value):
|
||||||
value = value()
|
value = value()
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
def normalize_string(string_to_normalize: str) -> str:
|
|
||||||
"""Normalizes a given string. Returns a string without extra spaces, in all lowercase."""
|
|
||||||
new_string = " ".join(string_to_normalize.trim().split())
|
|
||||||
return new_string.lower()
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue