Simplify Load_Organization_data

This commit is contained in:
zandercymatics 2023-11-17 15:14:02 -07:00
parent 2f9896686b
commit b875a4583d
No known key found for this signature in database
GPG key ID: FF4636ABEC9682B7
4 changed files with 120 additions and 172 deletions

View file

@ -105,10 +105,7 @@ class Command(BaseCommand):
if not proceed:
return None
logger.info(
f"{TerminalColors.MAGENTA}"
"Loading organization data onto TransitionDomain tables..."
)
logger.info(f"{TerminalColors.MAGENTA}" "Loading organization data onto TransitionDomain tables...")
load = OrganizationDataLoader(args)
domain_information_to_update = load.update_organization_data_for_all()
@ -135,11 +132,7 @@ class Command(BaseCommand):
return None
if len(domain_information_to_update) == 0:
logger.error(
f"{TerminalColors.MAGENTA}"
"No DomainInformation objects exist"
f"{TerminalColors.ENDC}"
)
logger.error(f"{TerminalColors.MAGENTA}" "No DomainInformation objects exist" f"{TerminalColors.ENDC}")
return None
logger.info(
@ -152,121 +145,89 @@ class Command(BaseCommand):
def update_domain_information(self, desired_objects: List[TransitionDomain], debug):
di_to_update = []
di_failed_to_update = []
# These are fields that we COULD update, but fields we choose not to update.
# For instance, if the user already entered data - lets not corrupt that.
di_skipped = []
# Grab each TransitionDomain we want to change. Store it.
# Fetches all TransitionDomains in one query.
# Grab each TransitionDomain we want to change.
transition_domains = TransitionDomain.objects.filter(
username__in=[item.username for item in desired_objects],
domain_name__in=[item.domain_name for item in desired_objects]
domain_name__in=[item.domain_name for item in desired_objects],
).distinct()
if len(desired_objects) != len(transition_domains):
raise Exception("Could not find all desired TransitionDomains")
# Then, for each domain_name grab the associated domain object.
# Fetches all Domains in one query.
domains = Domain.objects.filter(
name__in=[td.domain_name for td in transition_domains]
)
domains = Domain.objects.filter(name__in=[td.domain_name for td in transition_domains])
# Create dictionary for faster lookup
domains_dict = {d.name: d for d in domains}
# Start with all DomainInformation objects
filtered_domain_informations = DomainInformation.objects.all()
changed_fields = [
"address_line1",
"city",
"state_territory",
"zipcode",
]
# Chain filter calls for each field. This checks to see if the end user
# made a change to ANY field in changed_fields. If they did, don't update their information.
# We assume that if they made a change, we don't want to interfere with that.
for field in changed_fields:
# For each changed_field, check if no data exists
filtered_domain_informations = filtered_domain_informations.filter(**{f'{field}__isnull': True})
# Then, use each domain object to map domain <--> DomainInformation
# Fetches all DomainInformations in one query.
# If any related organization fields have been updated,
# we can assume that they modified this information themselves - thus we should not update it.
domain_informations = filtered_domain_informations.filter(
domain__in=domains
domain__in=domains,
address_line1__isnull=True,
city__isnull=True,
state_territory__isnull=True,
zipcode__isnull=True,
)
# Create dictionaries for faster lookup
domains_dict = {d.name: d for d in domains}
domain_informations_dict = {di.domain.name: di for di in domain_informations}
for item in transition_domains:
try:
should_update = True
# Grab the current Domain. This ensures we are pointing towards the right place.
if item.domain_name not in domains_dict:
logger.error(f"Could not add {item.domain_name}. Domain does not exist.")
di_failed_to_update.append(item)
continue
current_domain = domains_dict[item.domain_name]
# Based on the current domain, grab the right DomainInformation object.
if current_domain.name in domain_informations_dict:
current_domain_information = domain_informations_dict[current_domain.name]
current_domain_information.address_line1 = item.address_line
current_domain_information.city = item.city
current_domain_information.state_territory = item.state_territory
current_domain_information.zipcode = item.zipcode
if debug:
logger.info(f"Updating {current_domain.name}...")
else:
if current_domain.name not in domain_informations_dict:
logger.info(
f"{TerminalColors.YELLOW}"
f"Domain {current_domain.name} was updated by a user. Cannot update."
f"{TerminalColors.ENDC}"
)
should_update = False
except Exception as err:
logger.error(err)
di_failed_to_update.append(item)
else:
if should_update:
di_to_update.append(current_domain_information)
else:
# TODO either update to name for all,
# or have this filter to the right field
di_skipped.append(item)
continue
if len(di_failed_to_update) > 0:
# Based on the current domain, grab the right DomainInformation object.
current_domain_information = domain_informations_dict[current_domain.name]
# Update fields
current_domain_information.address_line1 = item.address_line
current_domain_information.city = item.city
current_domain_information.state_territory = item.state_territory
current_domain_information.zipcode = item.zipcode
di_to_update.append(current_domain_information)
if debug:
logger.info(f"Updated {current_domain.name}...")
if di_failed_to_update:
failed = [item.domain_name for item in di_failed_to_update]
logger.error(
f"{TerminalColors.FAIL}"
"Failed to update. An exception was encountered "
f"on the following TransitionDomains: {[item for item in di_failed_to_update]}"
f"{TerminalColors.ENDC}"
f"""{TerminalColors.FAIL}
Failed to update. An exception was encountered on the following TransitionDomains: {failed}
{TerminalColors.ENDC}"""
)
raise Exception("Failed to update DomainInformations")
skipped_count = len(di_skipped)
if skipped_count > 0:
logger.info(f"Skipped updating {skipped_count} fields. User-supplied data exists")
if di_skipped:
logger.info(f"Skipped updating {len(di_skipped)} fields. User-supplied data exists")
if not debug:
logger.info(
f"Ready to update {len(di_to_update)} TransitionDomains."
)
else:
logger.info(
f"Ready to update {len(di_to_update)} TransitionDomains: {[item for item in di_to_update]}"
)
self.bulk_update_domain_information(di_to_update, debug)
logger.info(
f"{TerminalColors.MAGENTA}"
"Beginning mass DomainInformation update..."
f"{TerminalColors.ENDC}"
)
def bulk_update_domain_information(self, di_to_update, debug):
if debug:
logger.info(f"Updating these TransitionDomains: {[item for item in di_to_update]}")
logger.info(f"Ready to update {len(di_to_update)} TransitionDomains.")
logger.info(f"{TerminalColors.MAGENTA}" "Beginning mass DomainInformation update..." f"{TerminalColors.ENDC}")
changed_fields = [
"address_line1",
@ -283,15 +244,9 @@ class Command(BaseCommand):
page = paginator.page(page_num)
DomainInformation.objects.bulk_update(page.object_list, changed_fields)
if not debug:
if debug:
logger.info(f"Updated these DomainInformations: {[item for item in di_to_update]}")
logger.info(
f"{TerminalColors.OKGREEN}"
f"Updated {len(di_to_update)} DomainInformations."
f"{TerminalColors.ENDC}"
)
else:
logger.info(
f"{TerminalColors.OKGREEN}"
f"Updated {len(di_to_update)} DomainInformations: {[item for item in di_to_update]}"
f"{TerminalColors.ENDC}"
f"{TerminalColors.OKGREEN}" f"Updated {len(di_to_update)} DomainInformations." f"{TerminalColors.ENDC}"
)

View file

@ -751,8 +751,10 @@ class FileDataHolder:
full_filename = date + "." + filename_without_date
return (full_filename, can_infer)
class OrganizationDataLoader:
"""Saves organization data onto Transition Domains. Handles file parsing."""
def __init__(self, options: TransitionDomainArguments):
# Globally stores event logs and organizes them
self.parse_logs = FileTransitionLog()
@ -785,9 +787,7 @@ class OrganizationDataLoader:
all_transition_domains = TransitionDomain.objects.all()
if len(all_transition_domains) < 1:
raise Exception(
f"{TerminalColors.FAIL}"
"No TransitionDomains exist. Cannot update."
f"{TerminalColors.ENDC}"
f"{TerminalColors.FAIL}" "No TransitionDomains exist. Cannot update." f"{TerminalColors.ENDC}"
)
# Store all actions we want to perform in tds_to_update
@ -828,20 +828,14 @@ class OrganizationDataLoader:
raise Exception("Failed to update TransitionDomains")
if not self.debug:
logger.info(
f"Ready to update {len(self.tds_to_update)} TransitionDomains."
)
logger.info(f"Ready to update {len(self.tds_to_update)} TransitionDomains.")
else:
logger.info(
f"Ready to update {len(self.tds_to_update)} TransitionDomains: {[item for item in self.tds_failed_to_update]}"
)
def bulk_update_transition_domains(self, update_list):
logger.info(
f"{TerminalColors.MAGENTA}"
"Beginning mass TransitionDomain update..."
f"{TerminalColors.ENDC}"
)
logger.info(f"{TerminalColors.MAGENTA}" "Beginning mass TransitionDomain update..." f"{TerminalColors.ENDC}")
changed_fields = [
"address_line",
@ -1032,9 +1026,7 @@ class ExtraTransitionDomain:
# metadata about each file and associate it with an enum.
# That way if we want the data located at the agency_adhoc file,
# we can just call EnumFilenames.AGENCY_ADHOC.
if (
options.pattern_map_params is None or options.pattern_map_params == []
):
if options.pattern_map_params is None or options.pattern_map_params == []:
options.pattern_map_params = [
(
EnumFilenames.AGENCY_ADHOC,

View file

@ -1,5 +1,5 @@
from dataclasses import dataclass, field
from typing import List, Optional
from typing import Optional
from registrar.management.commands.utility.epp_data_containers import EnumFilenames

View file

@ -18,6 +18,7 @@ from unittest.mock import patch
from .common import less_console_noise
class TestOrganizationMigration(TestCase):
def setUp(self):
""" """
@ -171,33 +172,32 @@ class TestOrganizationMigration(TestCase):
# Lets test the first one
transition = transition_domains.first()
expected_transition_domain = TransitionDomain(
username='alexandra.bobbitt5@test.com',
domain_name='fakewebsite2.gov',
status='on hold',
username="alexandra.bobbitt5@test.com",
domain_name="fakewebsite2.gov",
status="on hold",
email_sent=True,
organization_type='Federal',
organization_name='Fanoodle',
federal_type='Executive',
federal_agency='Department of Commerce',
organization_type="Federal",
organization_name="Fanoodle",
federal_type="Executive",
federal_agency="Department of Commerce",
epp_creation_date=datetime.date(2004, 5, 7),
epp_expiration_date=datetime.date(2023, 9, 30),
first_name='Seline',
middle_name='testmiddle2',
last_name='Tower',
first_name="Seline",
middle_name="testmiddle2",
last_name="Tower",
title=None,
email='stower3@answers.com',
phone='151-539-6028',
address_line='93001 Arizona Drive',
city='Columbus',
state_territory='Oh',
zipcode='43268'
email="stower3@answers.com",
phone="151-539-6028",
address_line="93001 Arizona Drive",
city="Columbus",
state_territory="Oh",
zipcode="43268",
)
expected_transition_domain.id = transition.id
self.assertEqual(transition, expected_transition_domain)
def test_load_organization_data_domain_information(self):
self.maxDiff = None
# == First, parse all existing data == #
self.run_load_domains()
self.run_transfer_domains()
@ -209,10 +209,10 @@ class TestOrganizationMigration(TestCase):
_domain = Domain.objects.filter(name="fakewebsite2.gov").get()
domain_information = DomainInformation.objects.filter(domain=_domain).get()
self.assertEqual(domain_information.address_line1, '93001 Arizona Drive')
self.assertEqual(domain_information.city, 'Columbus')
self.assertEqual(domain_information.state_territory, 'Oh')
self.assertEqual(domain_information.zipcode, '43268')
self.assertEqual(domain_information.address_line1, "93001 Arizona Drive")
self.assertEqual(domain_information.city, "Columbus")
self.assertEqual(domain_information.state_territory, "Oh")
self.assertEqual(domain_information.zipcode, "43268")
def test_load_organization_data_integrity(self):
"""Validates data integrity with the load_org_data command"""
@ -245,6 +245,7 @@ class TestOrganizationMigration(TestCase):
expected_missing_domain_invitations,
)
class TestMigrations(TestCase):
def setUp(self):
""" """