Wrap up PR

This commit is contained in:
zandercymatics 2025-01-09 14:22:47 -07:00
parent 028d033ff2
commit db75d31a0b
No known key found for this signature in database
GPG key ID: FF4636ABEC9682B7
6 changed files with 252 additions and 37 deletions

View file

@ -918,3 +918,38 @@ Example (only requests): `./manage.py create_federal_portfolio --branch "executi
- Parameters #1-#2: Either `--agency_name` or `--branch` must be specified. Not both.
- Parameters #2-#3, you cannot use `--both` while using these. You must specify either `--parse_requests` or `--parse_domains` seperately. While all of these parameters are optional in that you do not need to specify all of them,
you must specify at least one to run this script.
## Patch suborganizations
This script deletes some duplicate suborganization data that exists in our database (one-time use).
It works in two ways:
1. If the only name difference between two suborg records is extra spaces or a capitalization difference,
then we delete all duplicate records of this type.
2. If the suborg name is one we manually specify to delete via the script.
Before it deletes records, it goes through each DomainInformation and DomainRequest object and updates the reference to "sub_organization" to match the non-duplicative record.
### Running on sandboxes
#### Step 1: Login to CloudFoundry
```cf login -a api.fr.cloud.gov --sso```
#### Step 2: SSH into your environment
```cf ssh getgov-{space}```
Example: `cf ssh getgov-za`
#### Step 3: Create a shell instance
```/tmp/lifecycle/shell```
#### Step 4: Upload your csv to the desired sandbox
[Follow these steps](#use-scp-to-transfer-data-to-sandboxes) to upload the federal_cio csv to a sandbox of your choice.
#### Step 5: Running the script
To create a specific portfolio:
```./manage.py patch_suborganizations```
### Running locally
#### Step 1: Running the script
```docker-compose exec app ./manage.py patch_suborganizations```

View file

@ -80,18 +80,33 @@ class Command(BaseCommand):
else:
raise CommandError(f"Cannot find '{branch}' federal agencies in our database.")
all_suborganizations = []
all_domains = []
all_domain_requests = []
for federal_agency in agencies:
message = f"Processing federal agency '{federal_agency.agency}'..."
TerminalHelper.colorful_logger(logger.info, TerminalColors.MAGENTA, message)
try:
# C901 'Command.handle' is too complex (12)
self.handle_populate_portfolio(federal_agency, parse_domains, parse_requests, both)
suborgs, domains, requests = self.handle_populate_portfolio(
federal_agency, parse_domains, parse_requests, both
)
all_suborganizations.extend(suborgs)
all_domains.extend(domains)
all_domain_requests.extend(requests)
except Exception as exec:
self.failed_portfolios.add(federal_agency)
logger.error(exec)
message = f"Failed to create portfolio '{federal_agency.agency}'"
TerminalHelper.colorful_logger(logger.info, TerminalColors.FAIL, message)
# Post process steps
# Add suborg info to created or existing suborgs.
if all_suborganizations:
self.post_process_suborganization_fields(all_suborganizations, all_domains, all_domain_requests)
message = f"Added city and state_territory information to {len(all_suborganizations)} suborgs."
TerminalHelper.colorful_logger(logger.info, TerminalColors.MAGENTA, message)
TerminalHelper.log_script_run_summary(
self.updated_portfolios,
self.failed_portfolios,
@ -105,47 +120,46 @@ class Command(BaseCommand):
"""Attempts to create a portfolio. If successful, this function will
also create new suborganizations"""
portfolio, created = self.create_portfolio(federal_agency)
suborganizations = Suborganization.objects.none()
domains = DomainInformation.objects.filter(federal_agency=federal_agency, portfolio__isnull=True)
domain_requests = DomainRequest.objects.filter(federal_agency=federal_agency, portfolio__isnull=True).exclude(
status__in=[
DomainRequest.DomainRequestStatus.STARTED,
DomainRequest.DomainRequestStatus.INELIGIBLE,
DomainRequest.DomainRequestStatus.REJECTED,
]
)
suborganizations = self.create_suborganizations(portfolio, federal_agency)
domains = []
domain_requests = []
if created:
suborganizations = self.create_suborganizations(portfolio, federal_agency)
if parse_domains or both:
domains = self.handle_portfolio_domains(portfolio, federal_agency, domains)
domains = self.handle_portfolio_domains(portfolio, federal_agency)
if parse_requests or both:
domain_requests = self.handle_portfolio_requests(portfolio, federal_agency, domain_requests)
domain_requests = self.handle_portfolio_requests(portfolio, federal_agency)
# Post process steps
# Add suborg info to created or existing suborgs. Get the refreshed queryset for each.
self.post_process_suborganization_fields(suborganizations.all(), domains.all(), domain_requests.all())
return suborganizations, domains, domain_requests
def post_process_suborganization_fields(self, suborganizations, domains, requests):
"""Post-process suborganization fields by pulling data from related domains and requests.
This function updates suborganization city and state_territory fields based on
related domain information and domain request information.
"""
# Exclude domains and requests where the org name is the same,
# and where we are missing some crucial information.
domains = domains.exclude(
domains = DomainInformation.objects.filter(id__in=[domain.id for domain in domains]).exclude(
portfolio__isnull=True,
organization_name__isnull=True,
sub_organization__isnull=True,
organization_name__iexact=F("portfolio__organization_name"),
).in_bulk(field_name="organization_name")
)
domains_dict = {domain.organization_name: domain for domain in domains}
requests = requests.exclude(
requests = DomainRequest.objects.filter(id__in=[request.id for request in requests]).exclude(
portfolio__isnull=True,
organization_name__isnull=True,
sub_organization__isnull=True,
organization_name__iexact=F("portfolio__organization_name"),
).in_bulk(field_name="organization_name")
)
requests_dict = {request.organization_name: request for request in requests}
for suborg in suborganizations:
domain = domains.get(suborg.name, None)
request = requests.get(suborg.name, None)
domain = domains_dict.get(suborg.name, None)
request = requests_dict.get(suborg.name, None)
# PRIORITY:
# 1. Domain info
@ -266,11 +280,19 @@ class Command(BaseCommand):
TerminalHelper.colorful_logger(logger.warning, TerminalColors.YELLOW, "No suborganizations added")
return new_suborgs
def handle_portfolio_requests(self, portfolio: Portfolio, federal_agency: FederalAgency, domain_requests):
def handle_portfolio_requests(self, portfolio: Portfolio, federal_agency: FederalAgency):
"""
Associate portfolio with domain requests for a federal agency.
Updates all relevant domain request records.
"""
domain_requests = DomainRequest.objects.filter(federal_agency=federal_agency, portfolio__isnull=True).exclude(
status__in=[
DomainRequest.DomainRequestStatus.STARTED,
DomainRequest.DomainRequestStatus.INELIGIBLE,
DomainRequest.DomainRequestStatus.REJECTED,
]
)
if not domain_requests.exists():
message = f"""
Portfolio '{portfolio}' not added to domain requests: no valid records found.
@ -295,14 +317,14 @@ class Command(BaseCommand):
message = f"Added portfolio '{portfolio}' to {len(domain_requests)} domain requests."
TerminalHelper.colorful_logger(logger.info, TerminalColors.OKGREEN, message)
# Return a fresh copy of the queryset
return domain_requests.all()
return domain_requests
def handle_portfolio_domains(self, portfolio: Portfolio, federal_agency: FederalAgency, domain_infos):
def handle_portfolio_domains(self, portfolio: Portfolio, federal_agency: FederalAgency):
"""
Associate portfolio with domains for a federal agency.
Updates all relevant domain information records.
"""
domain_infos = DomainInformation.objects.filter(federal_agency=federal_agency, portfolio__isnull=True)
if not domain_infos.exists():
message = f"""
Portfolio '{portfolio}' not added to domains: no valid records found.
@ -323,5 +345,4 @@ class Command(BaseCommand):
message = f"Added portfolio '{portfolio}' to {len(domain_infos)} domains."
TerminalHelper.colorful_logger(logger.info, TerminalColors.OKGREEN, message)
# Return a fresh copy of the queryset
return domain_infos.all()
return domain_infos

View file

@ -19,12 +19,12 @@ class Command(BaseCommand):
# For extra_records_to_prune: the key gets deleted, the value gets kept.
extra_records_to_prune = {
normalize_string("Assistant Secretary for Preparedness and Response Office of the Secretary"): {
"keep": "Assistant Secretary for Preparedness and Response, Office of the Secretary"
"replace_with": "Assistant Secretary for Preparedness and Response, Office of the Secretary"
},
normalize_string("US Geological Survey"): {"keep": "U.S. Geological Survey"},
normalize_string("USDA/OC"): {"keep": "USDA, Office of Communications"},
normalize_string("GSA, IC, OGP WebPortfolio"): {"keep": "GSA, IC, OGP Web Portfolio"},
normalize_string("USDA/ARS/NAL"): {"keep": "USDA, ARS, NAL"},
normalize_string("US Geological Survey"): {"replace_with": "U.S. Geological Survey"},
normalize_string("USDA/OC"): {"replace_with": "USDA, Office of Communications"},
normalize_string("GSA, IC, OGP WebPortfolio"): {"replace_with": "GSA, IC, OGP Web Portfolio"},
normalize_string("USDA/ARS/NAL"): {"replace_with": "USDA, ARS, NAL"},
}
# First: Group all suborganization names by their "normalized" names (finding duplicates).
@ -61,8 +61,8 @@ class Command(BaseCommand):
if normalized_name in extra_records_to_prune:
# The 'keep' field expects a Suborganization but we just pass in a string, so this is just a workaround.
# This assumes that there is only one item in the name_group array (see usda/oc example). Should be fine, given our data.
hardcoded_record_name = extra_records_to_prune[normalized_name]["keep"]
name_group = name_groups.get(normalized_name(hardcoded_record_name), [])
hardcoded_record_name = extra_records_to_prune[normalized_name]["replace_with"]
name_group = name_groups.get(normalize_string(hardcoded_record_name))
keep = name_group[0] if name_group else None
records_to_prune[normalized_name] = {"keep": keep, "delete": duplicate_suborgs}
# Delete duplicates (extra spaces or casing differences)

View file

@ -355,7 +355,7 @@ def normalize_string(string_to_normalize, lowercase=True):
return new_string.lower() if lowercase else new_string
def count_capitals(text: str, leading_only: bool):
def count_capitals(text: str, leading_only: bool):
"""Counts capital letters in a string.
Args:
text (str): The string to analyze.

View file

@ -39,6 +39,7 @@ from epplibwrapper import (
ErrorCode,
responses,
)
from registrar.models.suborganization import Suborganization
from registrar.models.utility.portfolio_helper import UserPortfolioPermissionChoices, UserPortfolioRoleChoices
from registrar.models.user_domain_role import UserDomainRole
@ -905,6 +906,7 @@ class MockDb(TestCase):
DomainInformation.objects.all().delete()
DomainRequest.objects.all().delete()
UserDomainRole.objects.all().delete()
Suborganization.objects.all().delete()
Portfolio.objects.all().delete()
UserPortfolioPermission.objects.all().delete()
User.objects.all().delete()

View file

@ -1,4 +1,5 @@
import copy
from io import StringIO
import boto3_mocking # type: ignore
from datetime import date, datetime, time
from django.core.management import call_command
@ -32,7 +33,7 @@ import tablib
from unittest.mock import patch, call, MagicMock, mock_open
from epplibwrapper import commands, common
from .common import MockEppLib, less_console_noise, completed_domain_request, MockSESClient
from .common import MockEppLib, less_console_noise, completed_domain_request, MockSESClient, MockDbForIndividualTests
from api.tests.common import less_console_noise_decorator
@ -1731,3 +1732,159 @@ class TestCreateFederalPortfolio(TestCase):
self.assertEqual(existing_portfolio.organization_name, self.federal_agency.agency)
self.assertEqual(existing_portfolio.notes, "Old notes")
self.assertEqual(existing_portfolio.creator, self.user)
def test_post_process_suborganization_fields(self):
"""Test suborganization field updates from domain and request data.
Also tests the priority order for updating city and state_territory:
1. Domain information fields
2. Domain request suborganization fields
3. Domain request standard fields
"""
# Create test data with different field combinations
self.domain_info.organization_name = "super"
self.domain_info.city = "Domain City "
self.domain_info.state_territory = "NY"
self.domain_info.save()
self.domain_request_2.organization_name = "super"
self.domain_request.suborganization_city = "Request Suborg City"
self.domain_request.suborganization_state_territory = "CA"
self.domain_request.city = "Request City"
self.domain_request.state_territory = "TX"
self.domain_request.save()
# Create another request/info pair without domain info data
self.domain_info_2.organization_name = "creative"
self.domain_info_2.city = None
self.domain_info_2.state_territory = None
self.domain_info_2.save()
self.domain_request_2.organization_name = "creative"
self.domain_request_2.suborganization_city = "Second Suborg City"
self.domain_request_2.suborganization_state_territory = "WA"
self.domain_request_2.city = "Second City"
self.domain_request_2.state_territory = "OR"
self.domain_request_2.save()
# Create a third request/info pair without suborg data
self.domain_info_3.organization_name = "names"
self.domain_info_3.city = None
self.domain_info_3.state_territory = None
self.domain_info_3.save()
self.domain_request_3.organization_name = "names"
self.domain_request_3.suborganization_city = None
self.domain_request_3.suborganization_state_territory = None
self.domain_request_3.city = "Third City"
self.domain_request_3.state_territory = "FL"
self.domain_request_3.save()
# Test running the script with both, and just with parse_requests
self.run_create_federal_portfolio(agency_name="Test Federal Agency", parse_requests=True, parse_domains=True)
self.run_create_federal_portfolio(
agency_name="Executive Agency 1",
parse_requests=True,
)
self.domain_info.refresh_from_db()
self.domain_request.refresh_from_db()
self.domain_info_2.refresh_from_db()
self.domain_request_2.refresh_from_db()
self.domain_info_3.refresh_from_db()
self.domain_request_3.refresh_from_db()
# Verify suborganizations were created with correct field values
# Should use domain info values
suborg_1 = Suborganization.objects.get(name=self.domain_info.organization_name)
self.assertEqual(suborg_1.city, "Domain City")
self.assertEqual(suborg_1.state_territory, "NY")
# Should use domain request suborg values
suborg_2 = Suborganization.objects.get(name=self.domain_info_2.organization_name)
self.assertEqual(suborg_2.city, "Second Suborg City")
self.assertEqual(suborg_2.state_territory, "WA")
# Should use domain request standard values
suborg_3 = Suborganization.objects.get(name=self.domain_info_3.organization_name)
self.assertEqual(suborg_3.city, "Third City")
self.assertEqual(suborg_3.state_territory, "FL")
class TestPatchSuborganizations(MockDbForIndividualTests):
"""Tests for the patch_suborganizations management command."""
@less_console_noise_decorator
def run_patch_suborganizations(self):
"""Helper method to run the patch_suborganizations command."""
with patch(
"registrar.management.commands.utility.terminal_helper.TerminalHelper.prompt_for_execution",
return_value=True,
):
call_command("patch_suborganizations")
@less_console_noise_decorator
def test_space_and_case_duplicates(self):
"""Test cleaning up duplicates that differ by spaces and case.
Should keep the version with:
1. Fewest spaces
2. Most leading capitals
"""
Suborganization.objects.create(name="Test Organization ", portfolio=self.portfolio_1)
Suborganization.objects.create(name="test organization", portfolio=self.portfolio_1)
Suborganization.objects.create(name="Test Organization", portfolio=self.portfolio_1)
# Create an unrelated record to test that it doesn't get deleted, too
Suborganization.objects.create(name="unrelated org", portfolio=self.portfolio_1)
self.run_patch_suborganizations()
self.assertEqual(Suborganization.objects.count(), 2)
self.assertEqual(Suborganization.objects.filter(name__in=["unrelated org", "Test Organization"]).count(), 2)
@less_console_noise_decorator
def test_hardcoded_record(self):
"""Tests that our hardcoded records update as we expect them to"""
# Create orgs with old and new name formats
old_name = "USDA/OC"
new_name = "USDA, Office of Communications"
Suborganization.objects.create(name=old_name, portfolio=self.portfolio_1)
Suborganization.objects.create(name=new_name, portfolio=self.portfolio_1)
self.run_patch_suborganizations()
# Verify only the new one remains
self.assertEqual(Suborganization.objects.count(), 1)
remaining = Suborganization.objects.first()
self.assertEqual(remaining.name, new_name)
@less_console_noise_decorator
def test_reference_updates(self):
"""Test that references are updated on domain info and domain request before deletion."""
# Create suborganizations
keep_org = Suborganization.objects.create(name="Test Organization", portfolio=self.portfolio_1)
delete_org = Suborganization.objects.create(name="test organization ", portfolio=self.portfolio_1)
unrelated_org = Suborganization.objects.create(name="awesome", portfolio=self.portfolio_1)
# We expect these references to update
self.domain_request_1.sub_organization = delete_org
self.domain_information_1.sub_organization = delete_org
self.domain_request_1.save()
self.domain_information_1.save()
# But not these ones
self.domain_request_2.sub_organization = unrelated_org
self.domain_information_2.sub_organization = unrelated_org
self.domain_request_2.save()
self.domain_information_2.save()
self.run_patch_suborganizations()
self.domain_request_1.refresh_from_db()
self.domain_information_1.refresh_from_db()
self.domain_request_2.refresh_from_db()
self.domain_information_2.refresh_from_db()
self.assertEqual(self.domain_request_1.sub_organization, keep_org)
self.assertEqual(self.domain_information_1.sub_organization, keep_org)
self.assertEqual(self.domain_request_2.sub_organization, unrelated_org)
self.assertEqual(self.domain_information_2.sub_organization, unrelated_org)