fix create_suborg step and consolidate

This commit is contained in:
zandercymatics 2025-03-21 10:54:54 -06:00
parent 65e70704eb
commit 46a720abb3
No known key found for this signature in database
GPG key ID: FF4636ABEC9682B7
2 changed files with 81 additions and 91 deletions

View file

@ -37,12 +37,14 @@ class Command(BaseCommand):
logger.info(f"{TerminalColors.BOLD}{no_changes_message}{TerminalColors.ENDC}") logger.info(f"{TerminalColors.BOLD}{no_changes_message}{TerminalColors.ENDC}")
def has_changes(self) -> bool: def has_changes(self) -> bool:
num_changes = [len(self.create), len(self.update), len(self.skip), len(self.fail)] changes = [self.create, self.update, self.skip, self.fail]
return any([num_change > 0 for num_change in num_changes]) return any([change for change in changes if change])
def bulk_create(self): def bulk_create(self):
try: try:
ScriptDataHelper.bulk_create_fields(self.model_class, self.create, quiet=True) res = ScriptDataHelper.bulk_create_fields(self.model_class, self.create, return_created=True, quiet=True)
self.create = res
return res
except Exception as err: except Exception as err:
# In this case, just swap the fail and add lists # In this case, just swap the fail and add lists
self.fail = self.create.copy() self.fail = self.create.copy()
@ -51,7 +53,9 @@ class Command(BaseCommand):
def bulk_update(self, fields_to_update): def bulk_update(self, fields_to_update):
try: try:
ScriptDataHelper.bulk_update_fields(self.model_class, self.update, fields_to_update, quiet=True) res = ScriptDataHelper.bulk_update_fields(self.model_class, self.update, fields_to_update, quiet=True)
self.update = res
return res
except Exception as err: except Exception as err:
# In this case, just swap the fail and update lists # In this case, just swap the fail and update lists
self.fail = self.update.copy() self.fail = self.update.copy()
@ -167,12 +171,11 @@ class Command(BaseCommand):
organization_name__in=agencies.values_list("agency", flat=True), organization_name__isnull=False organization_name__in=agencies.values_list("agency", flat=True), organization_name__isnull=False
) )
existing_portfolios_set = {normalize_string(p.organization_name): p for p in existing_portfolios} existing_portfolios_set = {normalize_string(p.organization_name): p for p in existing_portfolios}
agencies_set = {normalize_string(agency.agency): agency for agency in agencies} agencies_dict = {normalize_string(agency.agency): agency for agency in agencies}
for federal_agency in agencies_set.values(): for federal_agency in agencies_dict.values():
portfolio_name = normalize_string(federal_agency.agency, lowercase=False) portfolio_name = normalize_string(federal_agency.agency, lowercase=False)
portfolio = existing_portfolios_set.get(portfolio_name, None) portfolio = existing_portfolios_set.get(portfolio_name, None)
new_portfolio = portfolio is None if portfolio is None:
if new_portfolio:
portfolio = Portfolio( portfolio = Portfolio(
organization_name=portfolio_name, organization_name=portfolio_name,
federal_agency=federal_agency, federal_agency=federal_agency,
@ -183,36 +186,30 @@ class Command(BaseCommand):
) )
self.portfolio_changes.create.append(portfolio) self.portfolio_changes.create.append(portfolio)
logger.info(f"{TerminalColors.OKGREEN}Created portfolio '{portfolio}'.{TerminalColors.ENDC}") logger.info(f"{TerminalColors.OKGREEN}Created portfolio '{portfolio}'.{TerminalColors.ENDC}")
elif skip_existing_portfolios:
if skip_existing_portfolios and not new_portfolio:
message = f"Portfolio '{portfolio}' already exists. Skipped." message = f"Portfolio '{portfolio}' already exists. Skipped."
logger.info(f"{TerminalColors.YELLOW}{message}{TerminalColors.ENDC}") logger.info(f"{TerminalColors.YELLOW}{message}{TerminalColors.ENDC}")
if portfolio:
self.portfolio_changes.skip.append(portfolio) self.portfolio_changes.skip.append(portfolio)
# Create portfolios # Create portfolios
self.portfolio_changes.bulk_create() portfolios_to_use = self.portfolio_changes.bulk_create()
# After create, get the list of all portfolios to use # After create, get the list of all portfolios to use
portfolios_to_use = set(self.portfolio_changes.create) portfolios_to_use = set(self.portfolio_changes.create)
if not skip_existing_portfolios: if not skip_existing_portfolios:
portfolios_to_use.update(set(existing_portfolios)) portfolios_to_use.update(set(existing_portfolios))
portfolios_to_use_dict = {normalize_string(p.organization_name): p for p in portfolios_to_use}
# == Handle suborganizations == # # == Handle suborganizations == #
for portfolio in portfolios_to_use: created_suborgs = self.create_suborganizations(portfolios_to_use_dict, agencies_dict)
created_suborgs = [] if created_suborgs:
org_name = normalize_string(portfolio.organization_name) self.suborganization_changes.create.extend(created_suborgs.values())
federal_agency = agencies_set.get(org_name) self.suborganization_changes.bulk_create()
if portfolio:
created_suborgs = self.create_suborganizations(portfolio, federal_agency)
Suborganization.objects.bulk_create(created_suborgs)
self.suborganization_changes.create.extend(created_suborgs)
# == Handle domains, requests, and managers == # # == Handle domains, requests, and managers == #
for portfolio in portfolios_to_use: for portfolio_org_name, portfolio in portfolios_to_use_dict.items():
org_name = normalize_string(portfolio.organization_name) federal_agency = agencies_dict.get(portfolio_org_name)
federal_agency = agencies_set.get(org_name)
if parse_domains: if parse_domains:
self.handle_portfolio_domains(portfolio, federal_agency) self.handle_portfolio_domains(portfolio, federal_agency)
@ -318,85 +315,66 @@ class Command(BaseCommand):
display_as_str=True, display_as_str=True,
) )
def create_suborganizations(self, portfolio, federal_agency): def create_suborganizations(self, portfolio_dict, agency_dict):
"""Create Suborganizations tied to the given portfolio based on DomainInformation objects""" """Create Suborganizations tied to the given portfolio based on DomainInformation objects"""
base_filter = Q( created_suborgs = {}
organization_name__isnull=False,
) & ~Q(organization_name__iexact=F("portfolio__organization_name"))
domains = federal_agency.domaininformation_set.filter(base_filter) portfolios = portfolio_dict.values()
requests = federal_agency.domainrequest_set.filter(base_filter) agencies = agency_dict.values()
existing_orgs = Suborganization.objects.all() existing_suborgs = Suborganization.objects.filter(portfolio__in=portfolios)
suborg_dict = {normalize_string(org.name): org for org in existing_suborgs}
domains = DomainInformation.objects.filter(
# Org name must not be null, and must not be the portfolio name
Q(
organization_name__isnull=False,
) & ~Q(organization_name__iexact=F("portfolio__organization_name")),
# Only get relevant data to the agency/portfolio we are targeting
Q(federal_agency__in=agencies) | Q(portfolio__in=portfolios),
)
requests = DomainRequest.objects.filter(
# Org name must not be null, and must not be the portfolio name
Q(
organization_name__isnull=False,
) & ~Q(organization_name__iexact=F("portfolio__organization_name")),
# Only get relevant data to the agency/portfolio we are targeting
Q(federal_agency__in=agencies) | Q(portfolio__in=portfolios),
)
# Normalize all suborg names so we don't add duplicate data unintentionally. # Normalize all suborg names so we don't add duplicate data unintentionally.
# Get all suborg names that we COULD add for portfolio_name, portfolio in portfolio_dict.items():
org_names_normalized = {}
for domain in domains: for domain in domains:
org_name = normalize_string(domain.organization_name) if normalize_string(domain.federal_agency.agency) != portfolio_name:
if org_name not in org_names_normalized: continue
org_names_normalized[org_name] = domain.organization_name
# Get all suborg names that presently exist org_name = domain.organization_name
existing_org_names_normalized = {} norm_org_name = normalize_string(domain.organization_name)
for org in existing_orgs: # If the suborg already exists or if we've already added it, don't add it again.
org_name = normalize_string(org.name) if norm_org_name not in suborg_dict and norm_org_name not in created_suborgs:
if org_name not in existing_org_names_normalized: suborg = Suborganization(name=org_name, portfolio=portfolio)
existing_org_names_normalized[org_name] = org.name created_suborgs[norm_org_name] = suborg
# Subtract existing names from ones we COULD add.
# We don't want to add existing names.
new_org_names = {}
for norm_name, name in org_names_normalized.items():
if norm_name not in existing_org_names_normalized:
new_org_names[norm_name] = name
# Add new suborgs assuming they aren't duplicates and don't already exist in the db.
created_suborgs = []
for norm_name, name in new_org_names.items():
norm_portfolio_name = normalize_string(portfolio.organization_name)
if norm_name != norm_portfolio_name:
suborg = Suborganization(name=name, portfolio=portfolio)
created_suborgs.append(suborg)
# Add location information to suborgs. # Add location information to suborgs.
# This can vary per domain and request, so this is a seperate step. # This can vary per domain and request, so this is a seperate step.
# First: Filter domains and requests by those that have data
valid_domains = domains.filter(
city__isnull=False,
state_territory__isnull=False,
portfolio__isnull=False,
sub_organization__isnull=False,
)
valid_requests = requests.filter(
(
Q(city__isnull=False, state_territory__isnull=False)
| Q(suborganization_city__isnull=False, suborganization_state_territory__isnull=False)
),
portfolio__isnull=False,
sub_organization__isnull=False,
)
# Second: Group domains and requests by normalized organization name. # First: Group domains and requests by normalized organization name.
# This means that later down the line we can account for "duplicate" org names.
domains_dict = {} domains_dict = {}
requests_dict = {} requests_dict = {}
for domain in valid_domains: for domain in domains:
print(f"what is the org name? {domain.organization_name}")
normalized_name = normalize_string(domain.organization_name) normalized_name = normalize_string(domain.organization_name)
domains_dict.setdefault(normalized_name, []).append(domain) domains_dict.setdefault(normalized_name, []).append(domain)
for request in valid_requests: for request in requests:
print(f"what is the org name for requests? {request.organization_name}")
normalized_name = normalize_string(request.organization_name) normalized_name = normalize_string(request.organization_name)
requests_dict.setdefault(normalized_name, []).append(request) requests_dict.setdefault(normalized_name, []).append(request)
# Fourth: Process each suborg to add city / state territory info # Second: Process each suborg to add city / state territory info
for suborg in created_suborgs: for norm_name, suborg in created_suborgs.items():
self.set_suborganization_location(suborg, domains_dict, requests_dict) self.set_suborganization_location(norm_name, suborg, domains_dict, requests_dict)
return created_suborgs return created_suborgs
def set_suborganization_location(self, suborg, domains_dict, requests_dict): def set_suborganization_location(self, normalized_suborg_name, suborg, domains_dict, requests_dict):
"""Updates a single suborganization's location data if valid. """Updates a single suborganization's location data if valid.
Args: Args:
@ -407,18 +385,19 @@ class Command(BaseCommand):
Priority matches parent method. Updates are skipped if location data conflicts Priority matches parent method. Updates are skipped if location data conflicts
between multiple records of the same type. between multiple records of the same type.
""" """
normalized_suborg_name = normalize_string(suborg.name)
domains = domains_dict.get(normalized_suborg_name, []) domains = domains_dict.get(normalized_suborg_name, [])
requests = requests_dict.get(normalized_suborg_name, []) requests = requests_dict.get(normalized_suborg_name, [])
print(f"domains: {domains}")
print(f"requests: {requests}")
# Try to get matching domain info # Try to get matching domain info
domain = None domain = None
if domains: if domains:
reference = domains[0] reference = domains[0]
use_location_for_domain = all( use_location_for_domain = all(
d.city == reference.city and d.state_territory == reference.state_territory for d in domains d.city
and d.state_territory
and d.city == reference.city
and d.state_territory == reference.state_territory
for d in domains
) )
if use_location_for_domain: if use_location_for_domain:
domain = reference domain = reference

View file

@ -52,6 +52,8 @@ class ScriptDataHelper:
Usage: Usage:
ScriptDataHelper.bulk_update_fields(Domain, page.object_list, ["first_ready"]) ScriptDataHelper.bulk_update_fields(Domain, page.object_list, ["first_ready"])
Returns: A queryset of the updated objets
""" """
if not quiet: if not quiet:
logger.info(f"{TerminalColors.YELLOW} Bulk updating fields... {TerminalColors.ENDC}") logger.info(f"{TerminalColors.YELLOW} Bulk updating fields... {TerminalColors.ENDC}")
@ -63,7 +65,7 @@ class ScriptDataHelper:
model_class.objects.bulk_update(page.object_list, fields_to_update) model_class.objects.bulk_update(page.object_list, fields_to_update)
@staticmethod @staticmethod
def bulk_create_fields(model_class, update_list, batch_size=1000, quiet=False): def bulk_create_fields(model_class, update_list, batch_size=1000, return_created=False, quiet=False):
""" """
This function performs a bulk create operation on a specified Django model class in batches. This function performs a bulk create operation on a specified Django model class in batches.
It uses Django's Paginator to handle large datasets in a memory-efficient manner. It uses Django's Paginator to handle large datasets in a memory-efficient manner.
@ -80,13 +82,22 @@ class ScriptDataHelper:
or large field values, you may need to decrease this value to prevent out-of-memory errors. or large field values, you may need to decrease this value to prevent out-of-memory errors.
Usage: Usage:
ScriptDataHelper.bulk_add_fields(Domain, page.object_list) ScriptDataHelper.bulk_add_fields(Domain, page.object_list)
Returns: A queryset of the added objects
""" """
if not quiet: if not quiet:
logger.info(f"{TerminalColors.YELLOW} Bulk adding fields... {TerminalColors.ENDC}") logger.info(f"{TerminalColors.YELLOW} Bulk adding fields... {TerminalColors.ENDC}")
created_objs = []
paginator = Paginator(update_list, batch_size) paginator = Paginator(update_list, batch_size)
for page_num in paginator.page_range: for page_num in paginator.page_range:
page = paginator.page(page_num) page = paginator.page(page_num)
model_class.objects.bulk_create(page.object_list) all_created = model_class.objects.bulk_create(page.object_list)
if return_created:
created_objs.extend([created.id for created in all_created])
if return_created:
return model_class.objects.filter(id__in=created_objs)
return None
class PopulateScriptTemplate(ABC): class PopulateScriptTemplate(ABC):