Domain invitation import script

This commit is contained in:
Neil Martinsen-Burrell 2023-04-17 15:34:32 -05:00
parent 3a80cb24ce
commit 267acd1c29
No known key found for this signature in database
GPG key ID: 6A3C818CC10D0184
3 changed files with 90 additions and 2 deletions

View file

@ -63,3 +63,17 @@ an invitation in the system for each email address listed in the
Verisign system, and they use the same email address with Login.gov, then they Verisign system, and they use the same email address with Login.gov, then they
will end up with access to the same domains in the new registrar that they will end up with access to the same domains in the new registrar that they
were associated with in the Verisign system. were associated with in the Verisign system.
A management command that does this needs to process two data files, one for
the contact information and one for the domain/contact association, so we
can't use stdin the way that we did before. Instead, we can use the fact that
Docker Compose mounts the `src/` directory inside of the container at `/app`.
Then, data files that are inside of the `src/` directory can be accessed
inside the Docker container.
An example script using this technique is in
`src/registrar/management/commands/load_domain_invitations.py`.
```shell
docker compose run app ./manage.py load_domain_invitations /app/escrow_domain_contacts.daily.dotgov.GOV.txt /app/escrow_contacts.daily.dotgov.GOV.txt
```

View file

@ -0,0 +1,75 @@
"""Load domain invitations for existing domains and their contacts."""
import csv
import logging
from collections import defaultdict
from django.core.management import BaseCommand
from registrar.models import Domain, DomainInvitation
logger = logging.getLogger(__name__)
class Command(BaseCommand):
help = "Load invitations for existing domains and their users."
def add_arguments(self, parser):
"""Add our two filename arguments."""
parser.add_argument(
"domain_contacts_filename",
help="Data file with domain contact information",
)
parser.add_argument(
"contacts_filename", help="Data file with contact information"
)
parser.add_argument("--sep", default="|", help="Delimiter character")
def handle(self, domain_contacts_filename, contacts_filename, **options):
"""Load the data files and create the DomainInvitations."""
sep = options.get("sep")
# We open the domain file first and hold it in memory.
# There are three contacts per domain, so there should be at
# most 3*N different contacts here.
contact_domains = defaultdict(list) # each contact has a list of domains
logger.info("Reading domain-contacts data file %s", domain_contacts_filename)
with open(domain_contacts_filename, "r") as domain_file:
for row in csv.reader(domain_file, delimiter=sep):
# fields are just domain, userid, role
# lowercase the domain names now
contact_domains[row[1]].append(row[0].lower())
logger.info("Loaded domains for %d contacts", len(contact_domains))
# now we have a mapping of user IDs to lists of domains for that user
# iterate over the contacts list and for contacts in our mapping,
# create the domain invitations for their email address
logger.info("Reading contacts data file %s", contacts_filename)
to_create = []
existing = 0
skipped = 0
with open(contacts_filename, "r") as contacts_file:
for row in csv.reader(contacts_file, delimiter=sep):
# userid is in the first field, email is the seventh
userid = row[0]
if userid not in contact_domains:
# this user has no domains, skip them
skipped += 1
continue
for domain_name in contact_domains[userid]:
email_address = row[6]
domain = Domain.objects.get(name=domain_name)
to_create.append(DomainInvitation(
email=email_address.lower(),
domain=domain,
status=DomainInvitation.INVITED,
))
logger.info("Creating %d invitations", len(to_create))
DomainInvitation.objects.bulk_create(to_create)
logger.info(
"Created %d domain invitations, ignored %d contacts",
len(to_create),
skipped,
)

View file

@ -54,8 +54,7 @@ class Command(BaseCommand):
# accumulate model objects so we can `bulk_create` them all at once. # accumulate model objects so we can `bulk_create` them all at once.
domains = [] domains = []
for row in reader: for row in reader:
name = row["Name"] name = row["Name"].lower() # we typically use lowercase domains
logger.info("Processing domain %s", name)
# Ensure that there is a `Domain` object for each domain name in # Ensure that there is a `Domain` object for each domain name in
# this file and that it is active. There is a uniqueness # this file and that it is active. There is a uniqueness