diff --git a/docs/operations/data_migration.md b/docs/operations/data_migration.md index 84d7dee15..89958a887 100644 --- a/docs/operations/data_migration.md +++ b/docs/operations/data_migration.md @@ -63,3 +63,17 @@ an invitation in the system for each email address listed in the Verisign system, and they use the same email address with Login.gov, then they will end up with access to the same domains in the new registrar that they were associated with in the Verisign system. + +A management command that does this needs to process two data files, one for +the contact information and one for the domain/contact association, so we +can't use stdin the way that we did before. Instead, we can use the fact that +Docker Compose mounts the `src/` directory inside of the container at `/app`. +Then, data files that are inside of the `src/` directory can be accessed +inside the Docker container. + +An example script using this technique is in +`src/registrar/management/commands/load_domain_invitations.py`. + +```shell +docker compose run app ./manage.py load_domain_invitations /app/escrow_domain_contacts.daily.dotgov.GOV.txt /app/escrow_contacts.daily.dotgov.GOV.txt +``` diff --git a/src/registrar/management/commands/load_domain_invitations.py b/src/registrar/management/commands/load_domain_invitations.py new file mode 100644 index 000000000..aa9b6ac27 --- /dev/null +++ b/src/registrar/management/commands/load_domain_invitations.py @@ -0,0 +1,75 @@ +"""Load domain invitations for existing domains and their contacts.""" + +import csv +import logging + +from collections import defaultdict + +from django.core.management import BaseCommand + +from registrar.models import Domain, DomainInvitation + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = "Load invitations for existing domains and their users." + + def add_arguments(self, parser): + """Add our two filename arguments.""" + parser.add_argument( + "domain_contacts_filename", + help="Data file with domain contact information", + ) + parser.add_argument( + "contacts_filename", help="Data file with contact information" + ) + + parser.add_argument("--sep", default="|", help="Delimiter character") + + def handle(self, domain_contacts_filename, contacts_filename, **options): + """Load the data files and create the DomainInvitations.""" + sep = options.get("sep") + + # We open the domain file first and hold it in memory. + # There are three contacts per domain, so there should be at + # most 3*N different contacts here. + contact_domains = defaultdict(list) # each contact has a list of domains + logger.info("Reading domain-contacts data file %s", domain_contacts_filename) + with open(domain_contacts_filename, "r") as domain_file: + for row in csv.reader(domain_file, delimiter=sep): + # fields are just domain, userid, role + # lowercase the domain names now + contact_domains[row[1]].append(row[0].lower()) + logger.info("Loaded domains for %d contacts", len(contact_domains)) + + # now we have a mapping of user IDs to lists of domains for that user + # iterate over the contacts list and for contacts in our mapping, + # create the domain invitations for their email address + logger.info("Reading contacts data file %s", contacts_filename) + to_create = [] + existing = 0 + skipped = 0 + with open(contacts_filename, "r") as contacts_file: + for row in csv.reader(contacts_file, delimiter=sep): + # userid is in the first field, email is the seventh + userid = row[0] + if userid not in contact_domains: + # this user has no domains, skip them + skipped += 1 + continue + for domain_name in contact_domains[userid]: + email_address = row[6] + domain = Domain.objects.get(name=domain_name) + to_create.append(DomainInvitation( + email=email_address.lower(), + domain=domain, + status=DomainInvitation.INVITED, + )) + logger.info("Creating %d invitations", len(to_create)) + DomainInvitation.objects.bulk_create(to_create) + logger.info( + "Created %d domain invitations, ignored %d contacts", + len(to_create), + skipped, + ) diff --git a/src/registrar/management/commands/load_domains_data.py b/src/registrar/management/commands/load_domains_data.py index 4f1300a3c..08fd5b4e3 100644 --- a/src/registrar/management/commands/load_domains_data.py +++ b/src/registrar/management/commands/load_domains_data.py @@ -54,8 +54,7 @@ class Command(BaseCommand): # accumulate model objects so we can `bulk_create` them all at once. domains = [] for row in reader: - name = row["Name"] - logger.info("Processing domain %s", name) + name = row["Name"].lower() # we typically use lowercase domains # Ensure that there is a `Domain` object for each domain name in # this file and that it is active. There is a uniqueness