diff --git a/src/registrar/management/commands/clean_duplicate_suborgs.py b/src/registrar/management/commands/clean_duplicate_suborgs.py new file mode 100644 index 000000000..a5c7a87f0 --- /dev/null +++ b/src/registrar/management/commands/clean_duplicate_suborgs.py @@ -0,0 +1,123 @@ +import logging +from django.core.management import BaseCommand +from registrar.models import Suborganization, DomainRequest, DomainInformation +from registrar.management.commands.utility.terminal_helper import TerminalColors, TerminalHelper + + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = "Clean up duplicate suborganizations that differ only by spaces and capitalization" + + def handle(self, **kwargs): + # Find duplicates + duplicates = {} + all_suborgs = Suborganization.objects.all() + + for suborg in all_suborgs: + # Normalize name by removing extra spaces and converting to lowercase + normalized_name = " ".join(suborg.name.split()).lower() + + # First occurrence of this name + if normalized_name not in duplicates: + duplicates[normalized_name] = { + "keep": suborg, + "delete": [] + } + continue + + # Compare with our current best + current_best = duplicates[normalized_name]["keep"] + + # Check if all other fields match. + # If they don't, we should inspect this record manually. + fields_to_compare = ["portfolio", "city", "state_territory"] + fields_match = all( + getattr(suborg, field) == getattr(current_best, field) + for field in fields_to_compare + ) + if not fields_match: + logger.warning( + f"{TerminalColors.YELLOW}" + f"\nSkipping potential duplicate: {suborg.name} (id: {suborg.id})" + f"\nData mismatch with {current_best.name} (id: {current_best.id})" + f"{TerminalColors.ENDC}" + ) + continue + + # Determine if new suborg is better than current best. + # The fewest spaces and most capitals wins. + new_has_fewer_spaces = suborg.name.count(" ") < current_best.name.count(" ") + new_has_more_capitals = sum(1 for c in suborg.name if c.isupper()) > sum(1 for c in current_best.name if c.isupper()) + # TODO + # Split into words and count properly capitalized first letters + # new_proper_caps = sum( + # 1 for word in suborg.name.split() + # if word and word[0].isupper() + # ) + # current_proper_caps = sum( + # 1 for word in current_best.name.split() + # if word and word[0].isupper() + # ) + # new_has_better_caps = new_proper_caps > current_proper_caps + + if new_has_fewer_spaces or new_has_more_capitals: + # New suborg is better - demote the old one to the delete list + duplicates[normalized_name]["delete"].append(current_best) + duplicates[normalized_name]["keep"] = suborg + else: + # If it is not better, just delete the old one + duplicates[normalized_name]["delete"].append(suborg) + + # Filter out entries without duplicates + duplicates = {k: v for k, v in duplicates.items() if v.get("delete")} + if not duplicates: + logger.info(f"No duplicate suborganizations found.") + return + + # Show preview of changes + preview = "The following duplicates will be removed:\n" + for data in duplicates.values(): + best = data.get("keep") + preview += f"\nKeeping: '{best.name}' (id: {best.id})" + + for duplicate in data.get("delete"): + preview += f"\nRemoving: '{duplicate.name}' (id: {duplicate.id})" + preview += "\n" + + # Get confirmation and execute deletions + if TerminalHelper.prompt_for_execution( + system_exit_on_terminate=True, + prompt_message=preview, + prompt_title="Clean up duplicate suborganizations?", + verify_message="*** WARNING: This will delete suborganizations! ***" + ): + try: + # Update all references to point to the right suborg before deletion + for record in duplicates.values(): + best_record = record.get("keep") + delete_ids = [dupe.id for dupe in record.get("delete")] + + # Update domain requests + DomainRequest.objects.filter( + sub_organization_id__in=delete_ids + ).update(sub_organization=best_record) + + # Update domain information + DomainInformation.objects.filter( + sub_organization_id__in=delete_ids + ).update(sub_organization=best_record) + + ids_to_delete = [ + dupe.id + for data in duplicates.values() + for dupe in data["delete"] + ] + + # Bulk delete all duplicates + delete_count, _ = Suborganization.objects.filter(id__in=ids_to_delete).delete() + logger.info(f"{TerminalColors.OKGREEN}Successfully deleted {delete_count} suborganizations{TerminalColors.ENDC}") + + except Exception as e: + logger.error(f"{TerminalColors.FAIL}Failed to clean up suborganizations: {str(e)}{TerminalColors.ENDC}") diff --git a/src/registrar/management/commands/create_federal_portfolio.py b/src/registrar/management/commands/create_federal_portfolio.py index 9cf4d36ea..b8a0ed091 100644 --- a/src/registrar/management/commands/create_federal_portfolio.py +++ b/src/registrar/management/commands/create_federal_portfolio.py @@ -104,7 +104,11 @@ class Command(BaseCommand): also create new suborganizations""" portfolio, created = self.create_portfolio(federal_agency) if created: - self.create_suborganizations(portfolio, federal_agency) + valid_agencies = DomainInformation.objects.filter( + federal_agency=federal_agency, organization_name__isnull=False + ) + org_names = set(valid_agencies.values_list("organization_name", flat=True)) + self.create_suborganizations(portfolio, federal_agency, org_names) if parse_domains or both: self.handle_portfolio_domains(portfolio, federal_agency) @@ -155,13 +159,8 @@ class Command(BaseCommand): return portfolio, True - def create_suborganizations(self, portfolio: Portfolio, federal_agency: FederalAgency): + def create_suborganizations(self, portfolio: Portfolio, federal_agency: FederalAgency, org_names: set): """Create Suborganizations tied to the given portfolio based on DomainInformation objects""" - valid_agencies = DomainInformation.objects.filter( - federal_agency=federal_agency, organization_name__isnull=False - ) - org_names = set(valid_agencies.values_list("organization_name", flat=True)) - if not org_names: message = ( "Could not add any suborganizations." @@ -232,6 +231,16 @@ class Command(BaseCommand): domain_request.portfolio = portfolio if domain_request.organization_name in suborgs: domain_request.sub_organization = suborgs.get(domain_request.organization_name) + else: + # Fill in the requesting suborg fields if we have the data to do so + if domain_request.organization_name and domain_request.city and domain_request.state_territory: + domain_request.requested_suborganization = domain_request.organization_name + domain_request.suborganization_city = domain_request.city + domain_request.suborganization_state_territory = domain_request.state_territory + else: + message = f"No suborganization data found whatsoever for {domain_request}." + TerminalHelper.colorful_logger(logger.warning, TerminalColors.YELLOW, message) + self.updated_portfolios.add(portfolio) DomainRequest.objects.bulk_update(domain_requests, ["portfolio", "sub_organization"])