Simplify Load_Organization_data

This commit is contained in:
zandercymatics 2023-11-17 15:14:02 -07:00
parent 2f9896686b
commit b875a4583d
No known key found for this signature in database
GPG key ID: FF4636ABEC9682B7
4 changed files with 120 additions and 172 deletions

View file

@ -105,10 +105,7 @@ class Command(BaseCommand):
if not proceed: if not proceed:
return None return None
logger.info( logger.info(f"{TerminalColors.MAGENTA}" "Loading organization data onto TransitionDomain tables...")
f"{TerminalColors.MAGENTA}"
"Loading organization data onto TransitionDomain tables..."
)
load = OrganizationDataLoader(args) load = OrganizationDataLoader(args)
domain_information_to_update = load.update_organization_data_for_all() domain_information_to_update = load.update_organization_data_for_all()
@ -135,11 +132,7 @@ class Command(BaseCommand):
return None return None
if len(domain_information_to_update) == 0: if len(domain_information_to_update) == 0:
logger.error( logger.error(f"{TerminalColors.MAGENTA}" "No DomainInformation objects exist" f"{TerminalColors.ENDC}")
f"{TerminalColors.MAGENTA}"
"No DomainInformation objects exist"
f"{TerminalColors.ENDC}"
)
return None return None
logger.info( logger.info(
@ -148,125 +141,93 @@ class Command(BaseCommand):
f"{TerminalColors.ENDC}" f"{TerminalColors.ENDC}"
) )
self.update_domain_information(domain_information_to_update, args.debug) self.update_domain_information(domain_information_to_update, args.debug)
def update_domain_information(self, desired_objects: List[TransitionDomain], debug): def update_domain_information(self, desired_objects: List[TransitionDomain], debug):
di_to_update = [] di_to_update = []
di_failed_to_update = [] di_failed_to_update = []
# These are fields that we COULD update, but fields we choose not to update.
# For instance, if the user already entered data - lets not corrupt that.
di_skipped = [] di_skipped = []
# Grab each TransitionDomain we want to change. Store it. # Grab each TransitionDomain we want to change.
# Fetches all TransitionDomains in one query.
transition_domains = TransitionDomain.objects.filter( transition_domains = TransitionDomain.objects.filter(
username__in=[item.username for item in desired_objects], username__in=[item.username for item in desired_objects],
domain_name__in=[item.domain_name for item in desired_objects] domain_name__in=[item.domain_name for item in desired_objects],
).distinct() ).distinct()
if len(desired_objects) != len(transition_domains): if len(desired_objects) != len(transition_domains):
raise Exception("Could not find all desired TransitionDomains") raise Exception("Could not find all desired TransitionDomains")
# Then, for each domain_name grab the associated domain object. # Then, for each domain_name grab the associated domain object.
# Fetches all Domains in one query. domains = Domain.objects.filter(name__in=[td.domain_name for td in transition_domains])
domains = Domain.objects.filter( # Create dictionary for faster lookup
name__in=[td.domain_name for td in transition_domains] domains_dict = {d.name: d for d in domains}
)
# Start with all DomainInformation objects # Start with all DomainInformation objects
filtered_domain_informations = DomainInformation.objects.all() filtered_domain_informations = DomainInformation.objects.all()
changed_fields = [
"address_line1",
"city",
"state_territory",
"zipcode",
]
# Chain filter calls for each field. This checks to see if the end user
# made a change to ANY field in changed_fields. If they did, don't update their information.
# We assume that if they made a change, we don't want to interfere with that.
for field in changed_fields:
# For each changed_field, check if no data exists
filtered_domain_informations = filtered_domain_informations.filter(**{f'{field}__isnull': True})
# Then, use each domain object to map domain <--> DomainInformation # Then, use each domain object to map domain <--> DomainInformation
# Fetches all DomainInformations in one query. # Fetches all DomainInformations in one query.
# If any related organization fields have been updated,
# we can assume that they modified this information themselves - thus we should not update it.
domain_informations = filtered_domain_informations.filter( domain_informations = filtered_domain_informations.filter(
domain__in=domains domain__in=domains,
address_line1__isnull=True,
city__isnull=True,
state_territory__isnull=True,
zipcode__isnull=True,
) )
# Create dictionaries for faster lookup
domains_dict = {d.name: d for d in domains}
domain_informations_dict = {di.domain.name: di for di in domain_informations} domain_informations_dict = {di.domain.name: di for di in domain_informations}
for item in transition_domains: for item in transition_domains:
try: if item.domain_name not in domains_dict:
should_update = True logger.error(f"Could not add {item.domain_name}. Domain does not exist.")
# Grab the current Domain. This ensures we are pointing towards the right place.
if item.domain_name not in domains_dict:
logger.error(f"Could not add {item.domain_name}. Domain does not exist.")
di_failed_to_update.append(item)
continue
current_domain = domains_dict[item.domain_name]
# Based on the current domain, grab the right DomainInformation object.
if current_domain.name in domain_informations_dict:
current_domain_information = domain_informations_dict[current_domain.name]
current_domain_information.address_line1 = item.address_line
current_domain_information.city = item.city
current_domain_information.state_territory = item.state_territory
current_domain_information.zipcode = item.zipcode
if debug:
logger.info(f"Updating {current_domain.name}...")
else:
logger.info(
f"{TerminalColors.YELLOW}"
f"Domain {current_domain.name} was updated by a user. Cannot update."
f"{TerminalColors.ENDC}"
)
should_update = False
except Exception as err:
logger.error(err)
di_failed_to_update.append(item) di_failed_to_update.append(item)
else: continue
if should_update:
di_to_update.append(current_domain_information) current_domain = domains_dict[item.domain_name]
else: if current_domain.name not in domain_informations_dict:
# TODO either update to name for all, logger.info(
# or have this filter to the right field f"{TerminalColors.YELLOW}"
di_skipped.append(item) f"Domain {current_domain.name} was updated by a user. Cannot update."
f"{TerminalColors.ENDC}"
if len(di_failed_to_update) > 0: )
di_skipped.append(item)
continue
# Based on the current domain, grab the right DomainInformation object.
current_domain_information = domain_informations_dict[current_domain.name]
# Update fields
current_domain_information.address_line1 = item.address_line
current_domain_information.city = item.city
current_domain_information.state_territory = item.state_territory
current_domain_information.zipcode = item.zipcode
di_to_update.append(current_domain_information)
if debug:
logger.info(f"Updated {current_domain.name}...")
if di_failed_to_update:
failed = [item.domain_name for item in di_failed_to_update]
logger.error( logger.error(
f"{TerminalColors.FAIL}" f"""{TerminalColors.FAIL}
"Failed to update. An exception was encountered " Failed to update. An exception was encountered on the following TransitionDomains: {failed}
f"on the following TransitionDomains: {[item for item in di_failed_to_update]}" {TerminalColors.ENDC}"""
f"{TerminalColors.ENDC}"
) )
raise Exception("Failed to update DomainInformations") raise Exception("Failed to update DomainInformations")
skipped_count = len(di_skipped)
if skipped_count > 0:
logger.info(f"Skipped updating {skipped_count} fields. User-supplied data exists")
if not debug: if di_skipped:
logger.info( logger.info(f"Skipped updating {len(di_skipped)} fields. User-supplied data exists")
f"Ready to update {len(di_to_update)} TransitionDomains."
) self.bulk_update_domain_information(di_to_update, debug)
else:
logger.info( def bulk_update_domain_information(self, di_to_update, debug):
f"Ready to update {len(di_to_update)} TransitionDomains: {[item for item in di_to_update]}" if debug:
) logger.info(f"Updating these TransitionDomains: {[item for item in di_to_update]}")
logger.info( logger.info(f"Ready to update {len(di_to_update)} TransitionDomains.")
f"{TerminalColors.MAGENTA}"
"Beginning mass DomainInformation update..." logger.info(f"{TerminalColors.MAGENTA}" "Beginning mass DomainInformation update..." f"{TerminalColors.ENDC}")
f"{TerminalColors.ENDC}"
)
changed_fields = [ changed_fields = [
"address_line1", "address_line1",
@ -283,15 +244,9 @@ class Command(BaseCommand):
page = paginator.page(page_num) page = paginator.page(page_num)
DomainInformation.objects.bulk_update(page.object_list, changed_fields) DomainInformation.objects.bulk_update(page.object_list, changed_fields)
if not debug: if debug:
logger.info( logger.info(f"Updated these DomainInformations: {[item for item in di_to_update]}")
f"{TerminalColors.OKGREEN}"
f"Updated {len(di_to_update)} DomainInformations." logger.info(
f"{TerminalColors.ENDC}" f"{TerminalColors.OKGREEN}" f"Updated {len(di_to_update)} DomainInformations." f"{TerminalColors.ENDC}"
) )
else:
logger.info(
f"{TerminalColors.OKGREEN}"
f"Updated {len(di_to_update)} DomainInformations: {[item for item in di_to_update]}"
f"{TerminalColors.ENDC}"
)

View file

@ -751,26 +751,28 @@ class FileDataHolder:
full_filename = date + "." + filename_without_date full_filename = date + "." + filename_without_date
return (full_filename, can_infer) return (full_filename, can_infer)
class OrganizationDataLoader: class OrganizationDataLoader:
"""Saves organization data onto Transition Domains. Handles file parsing.""" """Saves organization data onto Transition Domains. Handles file parsing."""
def __init__(self, options: TransitionDomainArguments): def __init__(self, options: TransitionDomainArguments):
# Globally stores event logs and organizes them # Globally stores event logs and organizes them
self.parse_logs = FileTransitionLog() self.parse_logs = FileTransitionLog()
self.debug = options.debug self.debug = options.debug
options.pattern_map_params = [ options.pattern_map_params = [
( (
EnumFilenames.DOMAIN_ADDITIONAL, EnumFilenames.DOMAIN_ADDITIONAL,
options.domain_additional_filename, options.domain_additional_filename,
DomainAdditionalData, DomainAdditionalData,
"domainname", "domainname",
), ),
( (
EnumFilenames.ORGANIZATION_ADHOC, EnumFilenames.ORGANIZATION_ADHOC,
options.organization_adhoc_filename, options.organization_adhoc_filename,
OrganizationAdhoc, OrganizationAdhoc,
"orgid", "orgid",
), ),
] ]
# Reads and parses organization data # Reads and parses organization data
self.parsed_data = ExtraTransitionDomain(options) self.parsed_data = ExtraTransitionDomain(options)
@ -779,15 +781,13 @@ class OrganizationDataLoader:
self.tds_to_update = [] self.tds_to_update = []
self.tds_failed_to_update = [] self.tds_failed_to_update = []
def update_organization_data_for_all(self): def update_organization_data_for_all(self):
"""Updates org data for all TransitionDomains""" """Updates org data for all TransitionDomains"""
all_transition_domains = TransitionDomain.objects.all() all_transition_domains = TransitionDomain.objects.all()
if len(all_transition_domains) < 1: if len(all_transition_domains) < 1:
raise Exception( raise Exception(
f"{TerminalColors.FAIL}" f"{TerminalColors.FAIL}" "No TransitionDomains exist. Cannot update." f"{TerminalColors.ENDC}"
"No TransitionDomains exist. Cannot update."
f"{TerminalColors.ENDC}"
) )
# Store all actions we want to perform in tds_to_update # Store all actions we want to perform in tds_to_update
@ -822,26 +822,20 @@ class OrganizationDataLoader:
if len(self.tds_failed_to_update) > 0: if len(self.tds_failed_to_update) > 0:
logger.error( logger.error(
"Failed to update. An exception was encountered " "Failed to update. An exception was encountered "
f"on the following TransitionDomains: {[item for item in self.tds_failed_to_update]}" f"on the following TransitionDomains: {[item for item in self.tds_failed_to_update]}"
) )
raise Exception("Failed to update TransitionDomains") raise Exception("Failed to update TransitionDomains")
if not self.debug: if not self.debug:
logger.info( logger.info(f"Ready to update {len(self.tds_to_update)} TransitionDomains.")
f"Ready to update {len(self.tds_to_update)} TransitionDomains."
)
else: else:
logger.info( logger.info(
f"Ready to update {len(self.tds_to_update)} TransitionDomains: {[item for item in self.tds_failed_to_update]}" f"Ready to update {len(self.tds_to_update)} TransitionDomains: {[item for item in self.tds_failed_to_update]}"
) )
def bulk_update_transition_domains(self, update_list): def bulk_update_transition_domains(self, update_list):
logger.info( logger.info(f"{TerminalColors.MAGENTA}" "Beginning mass TransitionDomain update..." f"{TerminalColors.ENDC}")
f"{TerminalColors.MAGENTA}"
"Beginning mass TransitionDomain update..."
f"{TerminalColors.ENDC}"
)
changed_fields = [ changed_fields = [
"address_line", "address_line",
@ -905,7 +899,7 @@ class OrganizationDataLoader:
self.log_add_or_changed_values(EnumFilenames.AUTHORITY_ADHOC, changed_fields, domain_name) self.log_add_or_changed_values(EnumFilenames.AUTHORITY_ADHOC, changed_fields, domain_name)
return transition_domain return transition_domain
def get_org_info(self, domain_name) -> OrganizationAdhoc: def get_org_info(self, domain_name) -> OrganizationAdhoc:
"""Maps an id given in get_domain_data to a organization_adhoc """Maps an id given in get_domain_data to a organization_adhoc
record which has its corresponding definition""" record which has its corresponding definition"""
@ -914,17 +908,17 @@ class OrganizationDataLoader:
return None return None
org_id = domain_info.orgid org_id = domain_info.orgid
return self.get_organization_adhoc(org_id) return self.get_organization_adhoc(org_id)
def get_organization_adhoc(self, desired_id) -> OrganizationAdhoc: def get_organization_adhoc(self, desired_id) -> OrganizationAdhoc:
"""Grabs a corresponding row within the ORGANIZATION_ADHOC file, """Grabs a corresponding row within the ORGANIZATION_ADHOC file,
based off a desired_id""" based off a desired_id"""
return self.get_object_by_id(EnumFilenames.ORGANIZATION_ADHOC, desired_id) return self.get_object_by_id(EnumFilenames.ORGANIZATION_ADHOC, desired_id)
def get_domain_data(self, desired_id) -> DomainAdditionalData: def get_domain_data(self, desired_id) -> DomainAdditionalData:
"""Grabs a corresponding row within the DOMAIN_ADDITIONAL file, """Grabs a corresponding row within the DOMAIN_ADDITIONAL file,
based off a desired_id""" based off a desired_id"""
return self.get_object_by_id(EnumFilenames.DOMAIN_ADDITIONAL, desired_id) return self.get_object_by_id(EnumFilenames.DOMAIN_ADDITIONAL, desired_id)
def get_object_by_id(self, file_type: EnumFilenames, desired_id): def get_object_by_id(self, file_type: EnumFilenames, desired_id):
"""Returns a field in a dictionary based off the type and id. """Returns a field in a dictionary based off the type and id.
@ -1032,9 +1026,7 @@ class ExtraTransitionDomain:
# metadata about each file and associate it with an enum. # metadata about each file and associate it with an enum.
# That way if we want the data located at the agency_adhoc file, # That way if we want the data located at the agency_adhoc file,
# we can just call EnumFilenames.AGENCY_ADHOC. # we can just call EnumFilenames.AGENCY_ADHOC.
if ( if options.pattern_map_params is None or options.pattern_map_params == []:
options.pattern_map_params is None or options.pattern_map_params == []
):
options.pattern_map_params = [ options.pattern_map_params = [
( (
EnumFilenames.AGENCY_ADHOC, EnumFilenames.AGENCY_ADHOC,

View file

@ -1,5 +1,5 @@
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import List, Optional from typing import Optional
from registrar.management.commands.utility.epp_data_containers import EnumFilenames from registrar.management.commands.utility.epp_data_containers import EnumFilenames

View file

@ -18,6 +18,7 @@ from unittest.mock import patch
from .common import less_console_noise from .common import less_console_noise
class TestOrganizationMigration(TestCase): class TestOrganizationMigration(TestCase):
def setUp(self): def setUp(self):
""" """ """ """
@ -65,7 +66,7 @@ class TestOrganizationMigration(TestCase):
def run_transfer_domains(self): def run_transfer_domains(self):
call_command("transfer_transition_domains_to_domains") call_command("transfer_transition_domains_to_domains")
def run_load_organization_data(self): def run_load_organization_data(self):
# noqa here (E501) because splitting this up makes it # noqa here (E501) because splitting this up makes it
# confusing to read. # confusing to read.
@ -162,7 +163,7 @@ class TestOrganizationMigration(TestCase):
# == Second, try adding org data to it == # # == Second, try adding org data to it == #
self.run_load_organization_data() self.run_load_organization_data()
# == Third, test that we've loaded data as we expect == # # == Third, test that we've loaded data as we expect == #
transition_domains = TransitionDomain.objects.filter(domain_name="fakewebsite2.gov") transition_domains = TransitionDomain.objects.filter(domain_name="fakewebsite2.gov")
# Should return three objects (three unique emails) # Should return three objects (three unique emails)
@ -171,33 +172,32 @@ class TestOrganizationMigration(TestCase):
# Lets test the first one # Lets test the first one
transition = transition_domains.first() transition = transition_domains.first()
expected_transition_domain = TransitionDomain( expected_transition_domain = TransitionDomain(
username='alexandra.bobbitt5@test.com', username="alexandra.bobbitt5@test.com",
domain_name='fakewebsite2.gov', domain_name="fakewebsite2.gov",
status='on hold', status="on hold",
email_sent=True, email_sent=True,
organization_type='Federal', organization_type="Federal",
organization_name='Fanoodle', organization_name="Fanoodle",
federal_type='Executive', federal_type="Executive",
federal_agency='Department of Commerce', federal_agency="Department of Commerce",
epp_creation_date=datetime.date(2004, 5, 7), epp_creation_date=datetime.date(2004, 5, 7),
epp_expiration_date=datetime.date(2023, 9, 30), epp_expiration_date=datetime.date(2023, 9, 30),
first_name='Seline', first_name="Seline",
middle_name='testmiddle2', middle_name="testmiddle2",
last_name='Tower', last_name="Tower",
title=None, title=None,
email='stower3@answers.com', email="stower3@answers.com",
phone='151-539-6028', phone="151-539-6028",
address_line='93001 Arizona Drive', address_line="93001 Arizona Drive",
city='Columbus', city="Columbus",
state_territory='Oh', state_territory="Oh",
zipcode='43268' zipcode="43268",
) )
expected_transition_domain.id = transition.id expected_transition_domain.id = transition.id
self.assertEqual(transition, expected_transition_domain) self.assertEqual(transition, expected_transition_domain)
def test_load_organization_data_domain_information(self): def test_load_organization_data_domain_information(self):
self.maxDiff = None
# == First, parse all existing data == # # == First, parse all existing data == #
self.run_load_domains() self.run_load_domains()
self.run_transfer_domains() self.run_transfer_domains()
@ -205,14 +205,14 @@ class TestOrganizationMigration(TestCase):
# == Second, try adding org data to it == # # == Second, try adding org data to it == #
self.run_load_organization_data() self.run_load_organization_data()
# == Third, test that we've loaded data as we expect == # # == Third, test that we've loaded data as we expect == #
_domain = Domain.objects.filter(name="fakewebsite2.gov").get() _domain = Domain.objects.filter(name="fakewebsite2.gov").get()
domain_information = DomainInformation.objects.filter(domain=_domain).get() domain_information = DomainInformation.objects.filter(domain=_domain).get()
self.assertEqual(domain_information.address_line1, '93001 Arizona Drive') self.assertEqual(domain_information.address_line1, "93001 Arizona Drive")
self.assertEqual(domain_information.city, 'Columbus') self.assertEqual(domain_information.city, "Columbus")
self.assertEqual(domain_information.state_territory, 'Oh') self.assertEqual(domain_information.state_territory, "Oh")
self.assertEqual(domain_information.zipcode, '43268') self.assertEqual(domain_information.zipcode, "43268")
def test_load_organization_data_integrity(self): def test_load_organization_data_integrity(self):
"""Validates data integrity with the load_org_data command""" """Validates data integrity with the load_org_data command"""
@ -222,7 +222,7 @@ class TestOrganizationMigration(TestCase):
# Second, try adding org data to it # Second, try adding org data to it
self.run_load_organization_data() self.run_load_organization_data()
# Third, test that we didn't corrupt any data # Third, test that we didn't corrupt any data
expected_total_transition_domains = 9 expected_total_transition_domains = 9
expected_total_domains = 5 expected_total_domains = 5
@ -245,6 +245,7 @@ class TestOrganizationMigration(TestCase):
expected_missing_domain_invitations, expected_missing_domain_invitations,
) )
class TestMigrations(TestCase): class TestMigrations(TestCase):
def setUp(self): def setUp(self):
""" """ """ """
@ -308,7 +309,7 @@ class TestMigrations(TestCase):
migrationJSON=self.migration_json_filename, migrationJSON=self.migration_json_filename,
disablePrompts=True, disablePrompts=True,
) )
def run_load_organization_data(self): def run_load_organization_data(self):
# noqa here (E501) because splitting this up makes it # noqa here (E501) because splitting this up makes it
# confusing to read. # confusing to read.