mirror of
https://github.com/cisagov/manage.get.gov.git
synced 2025-08-13 13:09:41 +02:00
Merge pull request #1669 from cisagov/za/1511-csv-slow
(On getgov-za) Ticket #1511 - CSV export takes a long time
This commit is contained in:
commit
db26d5b2a2
3 changed files with 123 additions and 41 deletions
|
@ -182,8 +182,6 @@ class LoadExtraTransitionDomain:
|
||||||
# STEP 5: Parse creation and expiration data
|
# STEP 5: Parse creation and expiration data
|
||||||
updated_transition_domain = self.parse_creation_expiration_data(domain_name, transition_domain)
|
updated_transition_domain = self.parse_creation_expiration_data(domain_name, transition_domain)
|
||||||
|
|
||||||
# Check if the instance has changed before saving
|
|
||||||
updated_transition_domain.save()
|
|
||||||
updated_transition_domains.append(updated_transition_domain)
|
updated_transition_domains.append(updated_transition_domain)
|
||||||
logger.info(f"{TerminalColors.OKCYAN}" f"Successfully updated {domain_name}" f"{TerminalColors.ENDC}")
|
logger.info(f"{TerminalColors.OKCYAN}" f"Successfully updated {domain_name}" f"{TerminalColors.ENDC}")
|
||||||
|
|
||||||
|
@ -199,6 +197,28 @@ class LoadExtraTransitionDomain:
|
||||||
)
|
)
|
||||||
failed_transition_domains.append(domain_name)
|
failed_transition_domains.append(domain_name)
|
||||||
|
|
||||||
|
updated_fields = [
|
||||||
|
"organization_name",
|
||||||
|
"organization_type",
|
||||||
|
"federal_type",
|
||||||
|
"federal_agency",
|
||||||
|
"first_name",
|
||||||
|
"middle_name",
|
||||||
|
"last_name",
|
||||||
|
"email",
|
||||||
|
"phone",
|
||||||
|
"epp_creation_date",
|
||||||
|
"epp_expiration_date",
|
||||||
|
]
|
||||||
|
|
||||||
|
batch_size = 1000
|
||||||
|
# Create a Paginator object. Bulk_update on the full dataset
|
||||||
|
# is too memory intensive for our current app config, so we can chunk this data instead.
|
||||||
|
paginator = Paginator(updated_transition_domains, batch_size)
|
||||||
|
for page_num in paginator.page_range:
|
||||||
|
page = paginator.page(page_num)
|
||||||
|
TransitionDomain.objects.bulk_update(page.object_list, updated_fields)
|
||||||
|
|
||||||
failed_count = len(failed_transition_domains)
|
failed_count = len(failed_transition_domains)
|
||||||
if failed_count == 0:
|
if failed_count == 0:
|
||||||
if self.debug:
|
if self.debug:
|
||||||
|
|
|
@ -910,10 +910,15 @@ class Domain(TimeStampedModel, DomainHelper):
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def get_security_email(self):
|
def get_security_email(self):
|
||||||
logger.info("get_security_email-> getting the contact ")
|
logger.info("get_security_email-> getting the contact")
|
||||||
secContact = self.security_contact
|
|
||||||
if secContact is not None:
|
security = PublicContact.ContactTypeChoices.SECURITY
|
||||||
return secContact.email
|
security_contact = self.generic_contact_getter(security)
|
||||||
|
|
||||||
|
# If we get a valid value for security_contact, pull its email
|
||||||
|
# Otherwise, just return nothing
|
||||||
|
if security_contact is not None and isinstance(security_contact, PublicContact):
|
||||||
|
return security_contact.email
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -1121,7 +1126,6 @@ class Domain(TimeStampedModel, DomainHelper):
|
||||||
If you wanted to setup getter logic for Security, you would call:
|
If you wanted to setup getter logic for Security, you would call:
|
||||||
cache_contact_helper(PublicContact.ContactTypeChoices.SECURITY),
|
cache_contact_helper(PublicContact.ContactTypeChoices.SECURITY),
|
||||||
or cache_contact_helper("security").
|
or cache_contact_helper("security").
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# registrant_contact(s) are an edge case. They exist on
|
# registrant_contact(s) are an edge case. They exist on
|
||||||
# the "registrant" property as opposed to contacts.
|
# the "registrant" property as opposed to contacts.
|
||||||
|
|
|
@ -3,10 +3,13 @@ import logging
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from registrar.models.domain import Domain
|
from registrar.models.domain import Domain
|
||||||
from registrar.models.domain_information import DomainInformation
|
from registrar.models.domain_information import DomainInformation
|
||||||
from registrar.models.public_contact import PublicContact
|
|
||||||
from django.db.models import Value
|
|
||||||
from django.db.models.functions import Coalesce
|
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
from django.core.paginator import Paginator
|
||||||
|
from django.db.models import F, Value, CharField
|
||||||
|
from django.db.models.functions import Concat, Coalesce
|
||||||
|
|
||||||
|
from registrar.models.public_contact import PublicContact
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -20,50 +23,77 @@ def write_header(writer, columns):
|
||||||
|
|
||||||
|
|
||||||
def get_domain_infos(filter_condition, sort_fields):
|
def get_domain_infos(filter_condition, sort_fields):
|
||||||
domain_infos = DomainInformation.objects.filter(**filter_condition).order_by(*sort_fields)
|
domain_infos = (
|
||||||
return domain_infos
|
DomainInformation.objects.select_related("domain", "authorizing_official")
|
||||||
|
.filter(**filter_condition)
|
||||||
|
.order_by(*sort_fields)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Do a mass concat of the first and last name fields for authorizing_official.
|
||||||
|
# The old operation was computationally heavy for some reason, so if we precompute
|
||||||
|
# this here, it is vastly more efficient.
|
||||||
|
domain_infos_cleaned = domain_infos.annotate(
|
||||||
|
ao=Concat(
|
||||||
|
Coalesce(F("authorizing_official__first_name"), Value("")),
|
||||||
|
Value(" "),
|
||||||
|
Coalesce(F("authorizing_official__last_name"), Value("")),
|
||||||
|
output_field=CharField(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return domain_infos_cleaned
|
||||||
|
|
||||||
|
|
||||||
def write_row(writer, columns, domain_info):
|
def parse_row(columns, domain_info: DomainInformation, security_emails_dict=None):
|
||||||
security_contacts = domain_info.domain.contacts.filter(contact_type=PublicContact.ContactTypeChoices.SECURITY)
|
"""Given a set of columns, generate a new row from cleaned column data"""
|
||||||
|
|
||||||
# For linter
|
# Domain should never be none when parsing this information
|
||||||
ao = " "
|
if domain_info.domain is None:
|
||||||
if domain_info.authorizing_official:
|
raise ValueError("Domain is none")
|
||||||
first_name = domain_info.authorizing_official.first_name or ""
|
|
||||||
last_name = domain_info.authorizing_official.last_name or ""
|
|
||||||
ao = first_name + " " + last_name
|
|
||||||
|
|
||||||
security_email = " "
|
domain = domain_info.domain # type: ignore
|
||||||
if security_contacts:
|
|
||||||
security_email = security_contacts[0].email
|
# Grab the security email from a preset dictionary.
|
||||||
|
# If nothing exists in the dictionary, grab from .contacts.
|
||||||
|
if security_emails_dict is not None and domain.name in security_emails_dict:
|
||||||
|
_email = security_emails_dict.get(domain.name)
|
||||||
|
security_email = _email if _email is not None else " "
|
||||||
|
else:
|
||||||
|
# If the dictionary doesn't contain that data, lets filter for it manually.
|
||||||
|
# This is a last resort as this is a more expensive operation.
|
||||||
|
security_contacts = domain.contacts.filter(contact_type=PublicContact.ContactTypeChoices.SECURITY)
|
||||||
|
_email = security_contacts[0].email if security_contacts else None
|
||||||
|
security_email = _email if _email is not None else " "
|
||||||
|
|
||||||
invalid_emails = {"registrar@dotgov.gov", "dotgov@cisa.dhs.gov"}
|
|
||||||
# These are default emails that should not be displayed in the csv report
|
# These are default emails that should not be displayed in the csv report
|
||||||
if security_email is not None and security_email.lower() in invalid_emails:
|
invalid_emails = {"registrar@dotgov.gov", "dotgov@cisa.dhs.gov"}
|
||||||
|
if security_email.lower() in invalid_emails:
|
||||||
security_email = "(blank)"
|
security_email = "(blank)"
|
||||||
|
|
||||||
|
if domain_info.federal_type:
|
||||||
|
domain_type = f"{domain_info.get_organization_type_display()} - {domain_info.get_federal_type_display()}"
|
||||||
|
else:
|
||||||
|
domain_type = domain_info.get_organization_type_display()
|
||||||
|
|
||||||
# create a dictionary of fields which can be included in output
|
# create a dictionary of fields which can be included in output
|
||||||
FIELDS = {
|
FIELDS = {
|
||||||
"Domain name": domain_info.domain.name,
|
"Domain name": domain.name,
|
||||||
"Domain type": domain_info.get_organization_type_display() + " - " + domain_info.get_federal_type_display()
|
"Domain type": domain_type,
|
||||||
if domain_info.federal_type
|
|
||||||
else domain_info.get_organization_type_display(),
|
|
||||||
"Agency": domain_info.federal_agency,
|
"Agency": domain_info.federal_agency,
|
||||||
"Organization name": domain_info.organization_name,
|
"Organization name": domain_info.organization_name,
|
||||||
"City": domain_info.city,
|
"City": domain_info.city,
|
||||||
"State": domain_info.state_territory,
|
"State": domain_info.state_territory,
|
||||||
"AO": ao,
|
"AO": domain_info.ao, # type: ignore
|
||||||
"AO email": domain_info.authorizing_official.email if domain_info.authorizing_official else " ",
|
"AO email": domain_info.authorizing_official.email if domain_info.authorizing_official else " ",
|
||||||
"Security contact email": security_email,
|
"Security contact email": security_email,
|
||||||
"Status": domain_info.domain.get_state_display(),
|
"Status": domain.get_state_display(),
|
||||||
"Expiration date": domain_info.domain.expiration_date,
|
"Expiration date": domain.expiration_date,
|
||||||
"Created at": domain_info.domain.created_at,
|
"Created at": domain.created_at,
|
||||||
"First ready": domain_info.domain.first_ready,
|
"First ready": domain.first_ready,
|
||||||
"Deleted": domain_info.domain.deleted,
|
"Deleted": domain.deleted,
|
||||||
}
|
}
|
||||||
|
|
||||||
writer.writerow([FIELDS.get(column, "") for column in columns])
|
row = [FIELDS.get(column, "") for column in columns]
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
def write_body(
|
def write_body(
|
||||||
|
@ -78,13 +108,41 @@ def write_body(
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Get the domainInfos
|
# Get the domainInfos
|
||||||
domain_infos = get_domain_infos(filter_condition, sort_fields)
|
all_domain_infos = get_domain_infos(filter_condition, sort_fields)
|
||||||
|
|
||||||
all_domain_infos = list(domain_infos)
|
# Store all security emails to avoid epp calls or excessive filters
|
||||||
|
sec_contact_ids = all_domain_infos.values_list("domain__security_contact_registry_id", flat=True)
|
||||||
|
security_emails_dict = {}
|
||||||
|
public_contacts = (
|
||||||
|
PublicContact.objects.only("email", "domain__name")
|
||||||
|
.select_related("domain")
|
||||||
|
.filter(registry_id__in=sec_contact_ids)
|
||||||
|
)
|
||||||
|
|
||||||
# Write rows to CSV
|
# Populate a dictionary of domain names and their security contacts
|
||||||
for domain_info in all_domain_infos:
|
for contact in public_contacts:
|
||||||
write_row(writer, columns, domain_info)
|
domain: Domain = contact.domain
|
||||||
|
if domain is not None and domain.name not in security_emails_dict:
|
||||||
|
security_emails_dict[domain.name] = contact.email
|
||||||
|
else:
|
||||||
|
logger.warning("csv_export -> Domain was none for PublicContact")
|
||||||
|
|
||||||
|
# Reduce the memory overhead when performing the write operation
|
||||||
|
paginator = Paginator(all_domain_infos, 1000)
|
||||||
|
for page_num in paginator.page_range:
|
||||||
|
page = paginator.page(page_num)
|
||||||
|
rows = []
|
||||||
|
for domain_info in page.object_list:
|
||||||
|
try:
|
||||||
|
row = parse_row(columns, domain_info, security_emails_dict)
|
||||||
|
rows.append(row)
|
||||||
|
except ValueError:
|
||||||
|
# This should not happen. If it does, just skip this row.
|
||||||
|
# It indicates that DomainInformation.domain is None.
|
||||||
|
logger.error("csv_export -> Error when parsing row, domain was None")
|
||||||
|
continue
|
||||||
|
|
||||||
|
writer.writerows(rows)
|
||||||
|
|
||||||
|
|
||||||
def export_data_type_to_csv(csv_file):
|
def export_data_type_to_csv(csv_file):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue