mirror of
https://github.com/cisagov/manage.get.gov.git
synced 2025-08-22 01:01:08 +02:00
fix create_suborg step and consolidate
This commit is contained in:
parent
65e70704eb
commit
46a720abb3
2 changed files with 81 additions and 91 deletions
|
@ -37,12 +37,14 @@ class Command(BaseCommand):
|
||||||
logger.info(f"{TerminalColors.BOLD}{no_changes_message}{TerminalColors.ENDC}")
|
logger.info(f"{TerminalColors.BOLD}{no_changes_message}{TerminalColors.ENDC}")
|
||||||
|
|
||||||
def has_changes(self) -> bool:
|
def has_changes(self) -> bool:
|
||||||
num_changes = [len(self.create), len(self.update), len(self.skip), len(self.fail)]
|
changes = [self.create, self.update, self.skip, self.fail]
|
||||||
return any([num_change > 0 for num_change in num_changes])
|
return any([change for change in changes if change])
|
||||||
|
|
||||||
def bulk_create(self):
|
def bulk_create(self):
|
||||||
try:
|
try:
|
||||||
ScriptDataHelper.bulk_create_fields(self.model_class, self.create, quiet=True)
|
res = ScriptDataHelper.bulk_create_fields(self.model_class, self.create, return_created=True, quiet=True)
|
||||||
|
self.create = res
|
||||||
|
return res
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
# In this case, just swap the fail and add lists
|
# In this case, just swap the fail and add lists
|
||||||
self.fail = self.create.copy()
|
self.fail = self.create.copy()
|
||||||
|
@ -51,7 +53,9 @@ class Command(BaseCommand):
|
||||||
|
|
||||||
def bulk_update(self, fields_to_update):
|
def bulk_update(self, fields_to_update):
|
||||||
try:
|
try:
|
||||||
ScriptDataHelper.bulk_update_fields(self.model_class, self.update, fields_to_update, quiet=True)
|
res = ScriptDataHelper.bulk_update_fields(self.model_class, self.update, fields_to_update, quiet=True)
|
||||||
|
self.update = res
|
||||||
|
return res
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
# In this case, just swap the fail and update lists
|
# In this case, just swap the fail and update lists
|
||||||
self.fail = self.update.copy()
|
self.fail = self.update.copy()
|
||||||
|
@ -167,12 +171,11 @@ class Command(BaseCommand):
|
||||||
organization_name__in=agencies.values_list("agency", flat=True), organization_name__isnull=False
|
organization_name__in=agencies.values_list("agency", flat=True), organization_name__isnull=False
|
||||||
)
|
)
|
||||||
existing_portfolios_set = {normalize_string(p.organization_name): p for p in existing_portfolios}
|
existing_portfolios_set = {normalize_string(p.organization_name): p for p in existing_portfolios}
|
||||||
agencies_set = {normalize_string(agency.agency): agency for agency in agencies}
|
agencies_dict = {normalize_string(agency.agency): agency for agency in agencies}
|
||||||
for federal_agency in agencies_set.values():
|
for federal_agency in agencies_dict.values():
|
||||||
portfolio_name = normalize_string(federal_agency.agency, lowercase=False)
|
portfolio_name = normalize_string(federal_agency.agency, lowercase=False)
|
||||||
portfolio = existing_portfolios_set.get(portfolio_name, None)
|
portfolio = existing_portfolios_set.get(portfolio_name, None)
|
||||||
new_portfolio = portfolio is None
|
if portfolio is None:
|
||||||
if new_portfolio:
|
|
||||||
portfolio = Portfolio(
|
portfolio = Portfolio(
|
||||||
organization_name=portfolio_name,
|
organization_name=portfolio_name,
|
||||||
federal_agency=federal_agency,
|
federal_agency=federal_agency,
|
||||||
|
@ -183,36 +186,30 @@ class Command(BaseCommand):
|
||||||
)
|
)
|
||||||
self.portfolio_changes.create.append(portfolio)
|
self.portfolio_changes.create.append(portfolio)
|
||||||
logger.info(f"{TerminalColors.OKGREEN}Created portfolio '{portfolio}'.{TerminalColors.ENDC}")
|
logger.info(f"{TerminalColors.OKGREEN}Created portfolio '{portfolio}'.{TerminalColors.ENDC}")
|
||||||
|
elif skip_existing_portfolios:
|
||||||
if skip_existing_portfolios and not new_portfolio:
|
|
||||||
message = f"Portfolio '{portfolio}' already exists. Skipped."
|
message = f"Portfolio '{portfolio}' already exists. Skipped."
|
||||||
logger.info(f"{TerminalColors.YELLOW}{message}{TerminalColors.ENDC}")
|
logger.info(f"{TerminalColors.YELLOW}{message}{TerminalColors.ENDC}")
|
||||||
if portfolio:
|
self.portfolio_changes.skip.append(portfolio)
|
||||||
self.portfolio_changes.skip.append(portfolio)
|
|
||||||
|
|
||||||
# Create portfolios
|
# Create portfolios
|
||||||
self.portfolio_changes.bulk_create()
|
portfolios_to_use = self.portfolio_changes.bulk_create()
|
||||||
|
|
||||||
# After create, get the list of all portfolios to use
|
# After create, get the list of all portfolios to use
|
||||||
portfolios_to_use = set(self.portfolio_changes.create)
|
portfolios_to_use = set(self.portfolio_changes.create)
|
||||||
if not skip_existing_portfolios:
|
if not skip_existing_portfolios:
|
||||||
portfolios_to_use.update(set(existing_portfolios))
|
portfolios_to_use.update(set(existing_portfolios))
|
||||||
|
|
||||||
|
portfolios_to_use_dict = {normalize_string(p.organization_name): p for p in portfolios_to_use}
|
||||||
|
|
||||||
# == Handle suborganizations == #
|
# == Handle suborganizations == #
|
||||||
for portfolio in portfolios_to_use:
|
created_suborgs = self.create_suborganizations(portfolios_to_use_dict, agencies_dict)
|
||||||
created_suborgs = []
|
if created_suborgs:
|
||||||
org_name = normalize_string(portfolio.organization_name)
|
self.suborganization_changes.create.extend(created_suborgs.values())
|
||||||
federal_agency = agencies_set.get(org_name)
|
self.suborganization_changes.bulk_create()
|
||||||
if portfolio:
|
|
||||||
created_suborgs = self.create_suborganizations(portfolio, federal_agency)
|
|
||||||
Suborganization.objects.bulk_create(created_suborgs)
|
|
||||||
self.suborganization_changes.create.extend(created_suborgs)
|
|
||||||
|
|
||||||
# == Handle domains, requests, and managers == #
|
# == Handle domains, requests, and managers == #
|
||||||
for portfolio in portfolios_to_use:
|
for portfolio_org_name, portfolio in portfolios_to_use_dict.items():
|
||||||
org_name = normalize_string(portfolio.organization_name)
|
federal_agency = agencies_dict.get(portfolio_org_name)
|
||||||
federal_agency = agencies_set.get(org_name)
|
|
||||||
|
|
||||||
if parse_domains:
|
if parse_domains:
|
||||||
self.handle_portfolio_domains(portfolio, federal_agency)
|
self.handle_portfolio_domains(portfolio, federal_agency)
|
||||||
|
|
||||||
|
@ -318,85 +315,66 @@ class Command(BaseCommand):
|
||||||
display_as_str=True,
|
display_as_str=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
def create_suborganizations(self, portfolio, federal_agency):
|
def create_suborganizations(self, portfolio_dict, agency_dict):
|
||||||
"""Create Suborganizations tied to the given portfolio based on DomainInformation objects"""
|
"""Create Suborganizations tied to the given portfolio based on DomainInformation objects"""
|
||||||
base_filter = Q(
|
created_suborgs = {}
|
||||||
organization_name__isnull=False,
|
|
||||||
) & ~Q(organization_name__iexact=F("portfolio__organization_name"))
|
|
||||||
|
|
||||||
domains = federal_agency.domaininformation_set.filter(base_filter)
|
portfolios = portfolio_dict.values()
|
||||||
requests = federal_agency.domainrequest_set.filter(base_filter)
|
agencies = agency_dict.values()
|
||||||
existing_orgs = Suborganization.objects.all()
|
existing_suborgs = Suborganization.objects.filter(portfolio__in=portfolios)
|
||||||
|
suborg_dict = {normalize_string(org.name): org for org in existing_suborgs}
|
||||||
|
|
||||||
|
domains = DomainInformation.objects.filter(
|
||||||
|
# Org name must not be null, and must not be the portfolio name
|
||||||
|
Q(
|
||||||
|
organization_name__isnull=False,
|
||||||
|
) & ~Q(organization_name__iexact=F("portfolio__organization_name")),
|
||||||
|
# Only get relevant data to the agency/portfolio we are targeting
|
||||||
|
Q(federal_agency__in=agencies) | Q(portfolio__in=portfolios),
|
||||||
|
)
|
||||||
|
requests = DomainRequest.objects.filter(
|
||||||
|
# Org name must not be null, and must not be the portfolio name
|
||||||
|
Q(
|
||||||
|
organization_name__isnull=False,
|
||||||
|
) & ~Q(organization_name__iexact=F("portfolio__organization_name")),
|
||||||
|
# Only get relevant data to the agency/portfolio we are targeting
|
||||||
|
Q(federal_agency__in=agencies) | Q(portfolio__in=portfolios),
|
||||||
|
)
|
||||||
|
|
||||||
# Normalize all suborg names so we don't add duplicate data unintentionally.
|
# Normalize all suborg names so we don't add duplicate data unintentionally.
|
||||||
# Get all suborg names that we COULD add
|
for portfolio_name, portfolio in portfolio_dict.items():
|
||||||
org_names_normalized = {}
|
for domain in domains:
|
||||||
for domain in domains:
|
if normalize_string(domain.federal_agency.agency) != portfolio_name:
|
||||||
org_name = normalize_string(domain.organization_name)
|
continue
|
||||||
if org_name not in org_names_normalized:
|
|
||||||
org_names_normalized[org_name] = domain.organization_name
|
|
||||||
|
|
||||||
# Get all suborg names that presently exist
|
org_name = domain.organization_name
|
||||||
existing_org_names_normalized = {}
|
norm_org_name = normalize_string(domain.organization_name)
|
||||||
for org in existing_orgs:
|
# If the suborg already exists or if we've already added it, don't add it again.
|
||||||
org_name = normalize_string(org.name)
|
if norm_org_name not in suborg_dict and norm_org_name not in created_suborgs:
|
||||||
if org_name not in existing_org_names_normalized:
|
suborg = Suborganization(name=org_name, portfolio=portfolio)
|
||||||
existing_org_names_normalized[org_name] = org.name
|
created_suborgs[norm_org_name] = suborg
|
||||||
|
|
||||||
# Subtract existing names from ones we COULD add.
|
|
||||||
# We don't want to add existing names.
|
|
||||||
new_org_names = {}
|
|
||||||
for norm_name, name in org_names_normalized.items():
|
|
||||||
if norm_name not in existing_org_names_normalized:
|
|
||||||
new_org_names[norm_name] = name
|
|
||||||
|
|
||||||
# Add new suborgs assuming they aren't duplicates and don't already exist in the db.
|
|
||||||
created_suborgs = []
|
|
||||||
for norm_name, name in new_org_names.items():
|
|
||||||
norm_portfolio_name = normalize_string(portfolio.organization_name)
|
|
||||||
if norm_name != norm_portfolio_name:
|
|
||||||
suborg = Suborganization(name=name, portfolio=portfolio)
|
|
||||||
created_suborgs.append(suborg)
|
|
||||||
|
|
||||||
# Add location information to suborgs.
|
# Add location information to suborgs.
|
||||||
# This can vary per domain and request, so this is a seperate step.
|
# This can vary per domain and request, so this is a seperate step.
|
||||||
# First: Filter domains and requests by those that have data
|
|
||||||
valid_domains = domains.filter(
|
|
||||||
city__isnull=False,
|
|
||||||
state_territory__isnull=False,
|
|
||||||
portfolio__isnull=False,
|
|
||||||
sub_organization__isnull=False,
|
|
||||||
)
|
|
||||||
valid_requests = requests.filter(
|
|
||||||
(
|
|
||||||
Q(city__isnull=False, state_territory__isnull=False)
|
|
||||||
| Q(suborganization_city__isnull=False, suborganization_state_territory__isnull=False)
|
|
||||||
),
|
|
||||||
portfolio__isnull=False,
|
|
||||||
sub_organization__isnull=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Second: Group domains and requests by normalized organization name.
|
# First: Group domains and requests by normalized organization name.
|
||||||
# This means that later down the line we can account for "duplicate" org names.
|
|
||||||
domains_dict = {}
|
domains_dict = {}
|
||||||
requests_dict = {}
|
requests_dict = {}
|
||||||
for domain in valid_domains:
|
for domain in domains:
|
||||||
print(f"what is the org name? {domain.organization_name}")
|
|
||||||
normalized_name = normalize_string(domain.organization_name)
|
normalized_name = normalize_string(domain.organization_name)
|
||||||
domains_dict.setdefault(normalized_name, []).append(domain)
|
domains_dict.setdefault(normalized_name, []).append(domain)
|
||||||
|
|
||||||
for request in valid_requests:
|
for request in requests:
|
||||||
print(f"what is the org name for requests? {request.organization_name}")
|
|
||||||
normalized_name = normalize_string(request.organization_name)
|
normalized_name = normalize_string(request.organization_name)
|
||||||
requests_dict.setdefault(normalized_name, []).append(request)
|
requests_dict.setdefault(normalized_name, []).append(request)
|
||||||
|
|
||||||
# Fourth: Process each suborg to add city / state territory info
|
# Second: Process each suborg to add city / state territory info
|
||||||
for suborg in created_suborgs:
|
for norm_name, suborg in created_suborgs.items():
|
||||||
self.set_suborganization_location(suborg, domains_dict, requests_dict)
|
self.set_suborganization_location(norm_name, suborg, domains_dict, requests_dict)
|
||||||
|
|
||||||
return created_suborgs
|
return created_suborgs
|
||||||
|
|
||||||
def set_suborganization_location(self, suborg, domains_dict, requests_dict):
|
def set_suborganization_location(self, normalized_suborg_name, suborg, domains_dict, requests_dict):
|
||||||
"""Updates a single suborganization's location data if valid.
|
"""Updates a single suborganization's location data if valid.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -407,18 +385,19 @@ class Command(BaseCommand):
|
||||||
Priority matches parent method. Updates are skipped if location data conflicts
|
Priority matches parent method. Updates are skipped if location data conflicts
|
||||||
between multiple records of the same type.
|
between multiple records of the same type.
|
||||||
"""
|
"""
|
||||||
normalized_suborg_name = normalize_string(suborg.name)
|
|
||||||
domains = domains_dict.get(normalized_suborg_name, [])
|
domains = domains_dict.get(normalized_suborg_name, [])
|
||||||
requests = requests_dict.get(normalized_suborg_name, [])
|
requests = requests_dict.get(normalized_suborg_name, [])
|
||||||
print(f"domains: {domains}")
|
|
||||||
print(f"requests: {requests}")
|
|
||||||
|
|
||||||
# Try to get matching domain info
|
# Try to get matching domain info
|
||||||
domain = None
|
domain = None
|
||||||
if domains:
|
if domains:
|
||||||
reference = domains[0]
|
reference = domains[0]
|
||||||
use_location_for_domain = all(
|
use_location_for_domain = all(
|
||||||
d.city == reference.city and d.state_territory == reference.state_territory for d in domains
|
d.city
|
||||||
|
and d.state_territory
|
||||||
|
and d.city == reference.city
|
||||||
|
and d.state_territory == reference.state_territory
|
||||||
|
for d in domains
|
||||||
)
|
)
|
||||||
if use_location_for_domain:
|
if use_location_for_domain:
|
||||||
domain = reference
|
domain = reference
|
||||||
|
|
|
@ -52,6 +52,8 @@ class ScriptDataHelper:
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
ScriptDataHelper.bulk_update_fields(Domain, page.object_list, ["first_ready"])
|
ScriptDataHelper.bulk_update_fields(Domain, page.object_list, ["first_ready"])
|
||||||
|
|
||||||
|
Returns: A queryset of the updated objets
|
||||||
"""
|
"""
|
||||||
if not quiet:
|
if not quiet:
|
||||||
logger.info(f"{TerminalColors.YELLOW} Bulk updating fields... {TerminalColors.ENDC}")
|
logger.info(f"{TerminalColors.YELLOW} Bulk updating fields... {TerminalColors.ENDC}")
|
||||||
|
@ -63,7 +65,7 @@ class ScriptDataHelper:
|
||||||
model_class.objects.bulk_update(page.object_list, fields_to_update)
|
model_class.objects.bulk_update(page.object_list, fields_to_update)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def bulk_create_fields(model_class, update_list, batch_size=1000, quiet=False):
|
def bulk_create_fields(model_class, update_list, batch_size=1000, return_created=False, quiet=False):
|
||||||
"""
|
"""
|
||||||
This function performs a bulk create operation on a specified Django model class in batches.
|
This function performs a bulk create operation on a specified Django model class in batches.
|
||||||
It uses Django's Paginator to handle large datasets in a memory-efficient manner.
|
It uses Django's Paginator to handle large datasets in a memory-efficient manner.
|
||||||
|
@ -80,13 +82,22 @@ class ScriptDataHelper:
|
||||||
or large field values, you may need to decrease this value to prevent out-of-memory errors.
|
or large field values, you may need to decrease this value to prevent out-of-memory errors.
|
||||||
Usage:
|
Usage:
|
||||||
ScriptDataHelper.bulk_add_fields(Domain, page.object_list)
|
ScriptDataHelper.bulk_add_fields(Domain, page.object_list)
|
||||||
|
|
||||||
|
Returns: A queryset of the added objects
|
||||||
"""
|
"""
|
||||||
if not quiet:
|
if not quiet:
|
||||||
logger.info(f"{TerminalColors.YELLOW} Bulk adding fields... {TerminalColors.ENDC}")
|
logger.info(f"{TerminalColors.YELLOW} Bulk adding fields... {TerminalColors.ENDC}")
|
||||||
|
|
||||||
|
created_objs = []
|
||||||
paginator = Paginator(update_list, batch_size)
|
paginator = Paginator(update_list, batch_size)
|
||||||
for page_num in paginator.page_range:
|
for page_num in paginator.page_range:
|
||||||
page = paginator.page(page_num)
|
page = paginator.page(page_num)
|
||||||
model_class.objects.bulk_create(page.object_list)
|
all_created = model_class.objects.bulk_create(page.object_list)
|
||||||
|
if return_created:
|
||||||
|
created_objs.extend([created.id for created in all_created])
|
||||||
|
if return_created:
|
||||||
|
return model_class.objects.filter(id__in=created_objs)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class PopulateScriptTemplate(ABC):
|
class PopulateScriptTemplate(ABC):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue