diff --git a/docs/operations/data_migration.md b/docs/operations/data_migration.md new file mode 100644 index 000000000..84d7dee15 --- /dev/null +++ b/docs/operations/data_migration.md @@ -0,0 +1,65 @@ +# Registrar Data Migration + +There is an existing registrar/registry at Verisign. They will provide us with an +export of the data from that system. The goal of our data migration is to take +the provided data and use it to create as much as possible a _matching_ state +in our registrar. + +There is no way to make our registrar _identical_ to the Verisign system +because we have a different data model and workflow model. Instead, we should +focus our migration efforts on creating a state in our new registrar that will +primarily allow users of the system to perform the tasks that they want to do. + +## Users + +One of the major differences with the existing registrar/registry is that our +system uses Login.gov for authentication. Any person with an identity-verified +Login.gov account can make an account on the new registrar, and the first time +that person logs in through Login.gov, we make a corresponding account in our +user table. Because we cannot know the Universal Unique ID (UUID) for a +person's Login.gov account, we cannot pre-create user accounts for individuals +in our new registrar based on the data from Verisign. + +## Domains + +Our registrar keeps track of domains. The authoritative source for domain +information is the registry, but the registrar needs a copy of that +information to make connections between registry users and the domains that +they manage. The registrar stores very few fields about a domain except for +its name, so it could be straightforward to import the exported list of domains +from Verisign's `escrow_domains.daily.dotgov.GOV.txt`. It doesn't appear that +that table stores a flag for active or inactive, so every domain in the file +can be imported into our system as `is_active=True`. + +An example Django management command that can load the delimited text file +from the daily escrow is in +`src/registrar/management/commands/load_domains_data.py`. It uses Django's +object-relational modeler (ORM) to create Django objects for the domains and +then write them to the database in a single bulk operation. To run the command +locally for testing, using Docker Compose: + +```shell +docker compose run -T app ./manage.py load_domains_data < /tmp/escrow_domains.daily.dotgov.GOV.txt +``` + +## User access to domains + +The Verisign data contains a `escrow_domain_contacts.daily.dotgov.txt` file +that links each domain to three different types of contacts: `billing`, +`tech`, and `admin`. The ID of the contact in this linking table corresponds +to the ID of a contact in the `escrow_contacts.daily.dotgov.txt` file. In the +contacts file is an email address for each contact. + +The new registrar associates user accounts (authenticated with Login.gov) with +domains using a `UserDomainRole` linking table. New users can be granted roles +on domains by creating a `DomainInvitation` that links an email address with a +domain. When a new user finishes authenticating with Login.gov and their email +address matches an invitation, then they are given the appropriate role on the +invitation's domain. + +For the purposes of migration, we can prime the invitation system by creating +an invitation in the system for each email address listed in the +`domain_contacts` file. This means that if a person is currently a user in the +Verisign system, and they use the same email address with Login.gov, then they +will end up with access to the same domains in the new registrar that they +were associated with in the Verisign system. diff --git a/src/registrar/management/commands/load_domains_data.py b/src/registrar/management/commands/load_domains_data.py new file mode 100644 index 000000000..4f1300a3c --- /dev/null +++ b/src/registrar/management/commands/load_domains_data.py @@ -0,0 +1,71 @@ +"""Load domains from registry export.""" + +import csv +import logging +import sys + +from django.core.management.base import BaseCommand +from django.db.transaction import atomic + +from registrar.models import Domain + + +logger = logging.getLogger(__name__) + + +def _domain_dict_reader(file_object, **kwargs): + """A csv DictReader with the correct field names for escrow_domains data. + + All keyword arguments are sent on to the DictReader function call. + """ + # field names are from escrow_manifests without "f" + return csv.DictReader( + file_object, + fieldnames=[ + "Name", + "Roid", + "IdnTableId", + "Registrant", + "ClID", + "CrRr", + "CrID", + "CrDate", + "UpRr", + "UpID", + "UpDate", + "ExDate", + "TrDate", + ], + **kwargs, + ) + + +class Command(BaseCommand): + help = "Load domain data from a delimited text file on stdin." + + def add_arguments(self, parser): + parser.add_argument( + "--sep", default="|", help="Separator character for data file" + ) + + def handle(self, *args, **options): + separator_character = options.get("sep") + reader = _domain_dict_reader(sys.stdin, delimiter=separator_character) + # accumulate model objects so we can `bulk_create` them all at once. + domains = [] + for row in reader: + name = row["Name"] + logger.info("Processing domain %s", name) + + # Ensure that there is a `Domain` object for each domain name in + # this file and that it is active. There is a uniqueness + # constraint for active Domain objects, so we are going to account + # for that here with this check so that our later bulk_create + # should succeed + if Domain.objects.filter(name=name, is_active=True).exists(): + # don't do anything, this domain is here and active + continue + else: + domains.append(Domain(name=name, is_active=True)) + logger.info("Creating %d new domains", len(domains)) + Domain.objects.bulk_create(domains)