From 43e619dc1a541f70097f52481ec82fa46b3e38b4 Mon Sep 17 00:00:00 2001 From: CocoByte Date: Thu, 19 Oct 2023 17:52:49 -0600 Subject: [PATCH 1/7] scaffolded script Signed-off-by: CocoByte --- .../commands/test_domain_migration.py | 107 ++++++++++++++++++ .../management/commands/utility/enums.py | 0 2 files changed, 107 insertions(+) create mode 100644 src/registrar/management/commands/test_domain_migration.py create mode 100644 src/registrar/management/commands/utility/enums.py diff --git a/src/registrar/management/commands/test_domain_migration.py b/src/registrar/management/commands/test_domain_migration.py new file mode 100644 index 000000000..7bdda7643 --- /dev/null +++ b/src/registrar/management/commands/test_domain_migration.py @@ -0,0 +1,107 @@ +import logging +import argparse +import sys + +from django_fsm import TransitionNotAllowed # type: ignore + +from django.core.management import BaseCommand + +from registrar.models import TransitionDomain +from registrar.models import Domain +from registrar.models import DomainInvitation + +logger = logging.getLogger(__name__) + + +class termColors: + """Colors for terminal outputs + (makes reading the logs WAY easier)""" + + HEADER = "\033[95m" + OKBLUE = "\033[94m" + OKCYAN = "\033[96m" + OKGREEN = "\033[92m" + YELLOW = "\033[93m" + FAIL = "\033[91m" + ENDC = "\033[0m" + BOLD = "\033[1m" + UNDERLINE = "\033[4m" + BackgroundLightYellow = "\033[103m" + + +class Command(BaseCommand): + help = """ """ + + def add_arguments(self, parser): + parser.add_argument("--debug", action=argparse.BooleanOptionalAction) + + parser.add_argument( + "--limitParse", + default=0, + help="Sets max number of entries to load, set to 0 to load all entries", + ) + + def print_debug_mode_statements( + self, debug_on: bool, debug_max_entries_to_parse: int + ): + """Prints additional terminal statements to indicate if --debug + or --limitParse are in use""" + self.print_debug( + debug_on, + f"""{termColors.OKCYAN} + ----------DEBUG MODE ON---------- + Detailed print statements activated. + {termColors.ENDC} + """, + ) + self.print_debug( + debug_max_entries_to_parse > 0, + f"""{termColors.OKCYAN} + ----------LIMITER ON---------- + Parsing of entries will be limited to + {debug_max_entries_to_parse} lines per file.") + Detailed print statements activated. + {termColors.ENDC} + """, + ) + + def print_debug(self, print_condition: bool, print_statement: str): + """This function reduces complexity of debug statements + in other functions. + It uses the logger to write the given print_statement to the + terminal if print_condition is TRUE""" + # DEBUG: + if print_condition: + logger.info(print_statement) + + + def handle( + self, + **options, + ): + """ + Do a diff between the transition_domain and the following tables: + domain, domain_information and the domain_invitation. + + It should: + - Print any domains that exist in the transition_domain table + but not in their corresponding domain, domain information or + domain invitation tables. + - Print which table this domain is missing from + - Check for duplicate entries in domain or + domain_information tables and print which are + duplicates and in which tables + + (ONLY RUNS with full script option) + - Emails should be sent to the appropriate users + note that all moved domains should now be accessible + on django admin for an analyst + + OPTIONS: + -- (run all other scripts: + 1 - imports for trans domains + 2 - transfer to domain & domain invitation + 3 - send domain invite) + ** Triggers table reset ** + """ + diff --git a/src/registrar/management/commands/utility/enums.py b/src/registrar/management/commands/utility/enums.py new file mode 100644 index 000000000..e69de29bb From 70388d6185c9d725caa96611ab362b8a2cd78a66 Mon Sep 17 00:00:00 2001 From: zandercymatics <141044360+zandercymatics@users.noreply.github.com> Date: Mon, 23 Oct 2023 11:15:41 -0600 Subject: [PATCH 2/7] Update data_migration.md --- docs/operations/data_migration.md | 76 +++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/docs/operations/data_migration.md b/docs/operations/data_migration.md index 192db0db8..0a446c7ae 100644 --- a/docs/operations/data_migration.md +++ b/docs/operations/data_migration.md @@ -84,6 +84,82 @@ FILE 1: **escrow_domain_contacts.daily.gov.GOV.txt** -> has the map of domain na FILE 2: **escrow_contacts.daily.gov.GOV.txt** -> has the mapping of contact id to contact email address (which is what we care about for sending domain invitations) FILE 3: **escrow_domain_statuses.daily.gov.GOV.txt** -> has the map of domains and their statuses +## Load migration data onto a production or sandbox environment +**WARNING:** All files uploaded in this manner are temporary, i.e. they will be deleted when the app is restaged. +Do not use this method to store data you want to keep around permanently. + +### STEP 1: Use scp to transfer data +CloudFoundry supports scp as means of transferring data locally to our environment. If you are dealing with a batch of files, try sending across a tar.gz and unpacking that. + +**Login to Cloud.gov** + +```bash +cf login -a api.fr.cloud.gov --sso +``` + +**Target your workspace** + +```bash +cf target -o cisa-dotgov -s {SANDBOX_NAME} +``` +*SANDBOX_NAME* - Name of your sandbox, ex: za or ab + +**Run the scp command** + +Use the following command to transfer the desired file: +```shell +scp -P 2222 -o User=cf:$(cf curl /v3/apps/$(cf app {FULL_NAME_OF_YOUR_SANDBOX_HERE} --guid)/processes | jq -r '.resources[] +| select(.type=="web") | .guid')/0 {LOCAL_PATH_TO_FILE} ssh.fr.cloud.gov:tmp/{DESIRED_NAME_OF_FILE} +``` +The items in curly braces are the values that you will manually replace. +These are as follows: +* FULL_NAME_OF_YOUR_SANDBOX_HERE - Name of your sandbox, ex: getgov-za +* LOCAL_PATH_TO_FILE - Path to the file you want to copy, ex: src/tmp/escrow_contacts.daily.gov.GOV.txt +* DESIRED_NAME_OF_FILE - Use this to specify the filename and type, ex: test.txt or escrow_contacts.daily.gov.GOV.txt + +NOTE: If you'd wish to change what directory these files are uploaded to, you can change `ssh.fr.cloud.gov:tmp/` to `ssh.fr.cloud.gov:{DIRECTORY_YOU_WANT}/`, but be aware that this makes data migration more tricky than it has to be. + +**Get a temp auth code** + +The scp command requires a temporary authentication code. Open a new terminal instance (while keeping the current one open), +and enter the following command: +```shell +cf ssh-code +``` +Copy this code into the password prompt from earlier. + +NOTE: You can use different utilities to copy this onto the clipboard for you. If you are on Windows, try the command `cf ssh-code | clip`. On Mac, this will be `cf ssh-code | pbcopy` + +### STEP 2: Transfer uploaded files to the getgov directory +Due to the nature of how Cloud.gov operates, the getgov directory is dynamically generated whenever the app is built under the tmp/ folder. We can directly upload files to the tmp/ folder but cannot target the generated getgov folder directly, as we need to spin up a shell to access this. From here, we can move those uploaded files into the getgov directory using the `cat` command. Note that you will have to repeat this for each file you want to move, so it is better to use a tar.gz for multiple, and unpack it inside of the `datamigration` folder. + +**SSH into your sandbox** + +```shell +cf ssh {FULL_NAME_OF_YOUR_SANDBOX_HERE} +``` + +**Open a shell** + +```shell +/tmp/lifecycle/shell +``` + +**Move the desired file into the correct directory** + +```shell +cat ../tmp/{filename} > datamigration/{filename} +``` + + +### STEP 3: Load Transition Domain data into TransitionDomain table +Run the following script to transfer the existing data on our .txt files to our DB. +```shell +./manage.py load_transition_domain migrationdata/escrow_domain_contacts.daily.gov.GOV.txt migrationdata/escrow_contacts.daily.gov.GOV.txt migrationdata/escrow_domain_statuses.daily.gov.GOV.txt +``` + +## Load migration data onto our local environments + Transferring this data from these files into our domain tables happens in two steps; ***IMPORTANT: only run the following locally, to avoid publicizing PII in our public repo.*** From e17bb11f4c15666f9cd01122accd8100d264a057 Mon Sep 17 00:00:00 2001 From: zandercymatics <141044360+zandercymatics@users.noreply.github.com> Date: Mon, 23 Oct 2023 12:42:02 -0600 Subject: [PATCH 3/7] Create README.md --- src/migrationdata/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 src/migrationdata/README.md diff --git a/src/migrationdata/README.md b/src/migrationdata/README.md new file mode 100644 index 000000000..81190ee3f --- /dev/null +++ b/src/migrationdata/README.md @@ -0,0 +1,8 @@ +## Purpose +Use this folder for storing files for the migration process. Should otherwise be empty on local dev environments unless necessary. This folder must exist due to the nature of how data is stored on cloud.gov and the nature of the data we want to send. + +## How do I migrate registrar data? +This process is detailed in [data_migration.md](../../docs/operations/data_migration.md) + +## What kind of files can I store here? +The intent is for PII data or otherwise, but this can exist in any format. Do note that the data contained in this file will be temporary, so after the app is restaged it will lose it. This is ideal for migration files as they write to our DB, but not for something you need to permanently hold onto. \ No newline at end of file From 10841bb97403481f4dff876ef2555890c540ab2f Mon Sep 17 00:00:00 2001 From: zandercymatics <141044360+zandercymatics@users.noreply.github.com> Date: Mon, 23 Oct 2023 14:51:55 -0600 Subject: [PATCH 4/7] Script for moving files --- docs/operations/data_migration.md | 13 +++- .../commands/cat_files_into_getgov.py | 61 +++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 src/registrar/management/commands/cat_files_into_getgov.py diff --git a/docs/operations/data_migration.md b/docs/operations/data_migration.md index 0a446c7ae..6e8a58b73 100644 --- a/docs/operations/data_migration.md +++ b/docs/operations/data_migration.md @@ -145,10 +145,21 @@ cf ssh {FULL_NAME_OF_YOUR_SANDBOX_HERE} /tmp/lifecycle/shell ``` +From this directory, run the following command: +```shell +./manage.py cat_files_into_getgov --file_extension txt +``` + +NOTE: This will look for all files in /tmp with the .txt extension, but this can +be changed if you are dealing with different extensions. + +#### Manual method +If the `cat_files_into_getgov.py` script isn't working, follow these steps instead. + **Move the desired file into the correct directory** ```shell -cat ../tmp/{filename} > datamigration/{filename} +cat ../tmp/{filename} > migrationdata/{filename} ``` diff --git a/src/registrar/management/commands/cat_files_into_getgov.py b/src/registrar/management/commands/cat_files_into_getgov.py new file mode 100644 index 000000000..17964f236 --- /dev/null +++ b/src/registrar/management/commands/cat_files_into_getgov.py @@ -0,0 +1,61 @@ +"""Loads files from /tmp into our sandboxes""" +import glob +import csv +import logging + +import os + +from django.core.management import BaseCommand + + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = "Runs the cat command on files from /tmp into the getgov directory." + + def add_arguments(self, parser): + """Add our two filename arguments.""" + parser.add_argument( + "--file_extension", + default="txt", + help="What file extensions to look for, like txt or gz", + ) + parser.add_argument("--directory", default="migrationdata", help="Desired directory") + + def handle(self, **options): + file_extension: str = options.get("file_extension").lstrip('.') + directory = options.get("directory") + + # file_extension is always coerced as str, Truthy is OK to use here. + if not file_extension or not isinstance(file_extension, str): + raise ValueError(f"Invalid file extension '{file_extension}'") + + matching_extensions = glob.glob(f'../tmp/*.{file_extension}') + if not matching_extensions: + logger.error(f"No files with the extension {file_extension} found") + + for src_file_path in matching_extensions: + filename = os.path.basename(src_file_path) + do_command = True + exit_status: int + + desired_file_path = f'{directory}/{filename}' + if os.path.exists(desired_file_path): + replace = input(f'{desired_file_path} already exists. Do you want to replace it? (y/n) ') + if replace.lower() != 'y': + do_command = False + + if do_command: + copy_from = f"../tmp/{filename}" + self.cat(copy_from, desired_file_path) + exit_status = os.system(f'cat ../tmp/{filename} > {desired_file_path}') + + if exit_status == 0: + logger.info(f"Successfully copied {filename}") + else: + logger.info(f"Failed to copy {filename}") + + def cat(self, copy_from, copy_to): + exit_status = os.system(f'cat {copy_from} > {copy_to}') + return exit_status \ No newline at end of file From c713b8958f0ce0585f969d0530b080a4e0bab609 Mon Sep 17 00:00:00 2001 From: CocoByte Date: Mon, 23 Oct 2023 14:54:02 -0600 Subject: [PATCH 5/7] added test_domain_migration script and terminal_helper. Still needs work Signed-off-by: CocoByte --- .../commands/test_domain_migration.py | 223 +++++++++++++++--- .../management/commands/utility/enums.py | 0 .../commands/utility/terminal_helper.py | 50 ++++ 3 files changed, 235 insertions(+), 38 deletions(-) delete mode 100644 src/registrar/management/commands/utility/enums.py create mode 100644 src/registrar/management/commands/utility/terminal_helper.py diff --git a/src/registrar/management/commands/test_domain_migration.py b/src/registrar/management/commands/test_domain_migration.py index 7bdda7643..19631140a 100644 --- a/src/registrar/management/commands/test_domain_migration.py +++ b/src/registrar/management/commands/test_domain_migration.py @@ -9,59 +9,76 @@ from django.core.management import BaseCommand from registrar.models import TransitionDomain from registrar.models import Domain from registrar.models import DomainInvitation +from registrar.models.domain_information import DomainInformation + +from registrar.management.commands.utility.terminal_helper import TerminalColors +from registrar.management.commands.utility.terminal_helper import TerminalHelper + +from registrar.management.commands.load_transition_domain import Command as load_transition_domain_command logger = logging.getLogger(__name__) - -class termColors: - """Colors for terminal outputs - (makes reading the logs WAY easier)""" - - HEADER = "\033[95m" - OKBLUE = "\033[94m" - OKCYAN = "\033[96m" - OKGREEN = "\033[92m" - YELLOW = "\033[93m" - FAIL = "\033[91m" - ENDC = "\033[0m" - BOLD = "\033[1m" - UNDERLINE = "\033[4m" - BackgroundLightYellow = "\033[103m" - - class Command(BaseCommand): help = """ """ def add_arguments(self, parser): + """ + OPTIONAL ARGUMENTS: + --debug + A boolean (default to true), which activates additional print statements + """ + + parser.add_argument("--runLoaders", + help="Runs all scripts (in sequence) for transition domain migrations", + action=argparse.BooleanOptionalAction) + + # The file arguments have default values for running in the sandbox + parser.add_argument( + "--loaderDirectory", + default="migrationData/", + help="The location of the files used for load_transition_domain migration script" + ) + parser.add_argument( + "domain_contacts_filename", + default="escrow_domain_contacts.daily.gov.GOV.txt", + help="Data file with domain contact information" + ) + parser.add_argument( + "contacts_filename", + default="escrow_contacts.daily.gov.GOV.txt", + help="Data file with contact information", + ) + parser.add_argument( + "domain_statuses_filename", + default="escrow_domain_statuses.daily.gov.GOV.txt", + help="Data file with domain status information" + ) + + parser.add_argument("--sep", default="|", help="Delimiter character") + parser.add_argument("--debug", action=argparse.BooleanOptionalAction) parser.add_argument( - "--limitParse", - default=0, - help="Sets max number of entries to load, set to 0 to load all entries", + "--limitParse", default=0, help="Sets max number of entries to load" + ) + + parser.add_argument( + "--resetTable", + help="Deletes all data in the TransitionDomain table", + action=argparse.BooleanOptionalAction, ) def print_debug_mode_statements( - self, debug_on: bool, debug_max_entries_to_parse: int + self, debug_on: bool ): """Prints additional terminal statements to indicate if --debug or --limitParse are in use""" self.print_debug( debug_on, - f"""{termColors.OKCYAN} + f"""{TerminalColors.OKCYAN} ----------DEBUG MODE ON---------- Detailed print statements activated. - {termColors.ENDC} - """, - ) - self.print_debug( - debug_max_entries_to_parse > 0, - f"""{termColors.OKCYAN} - ----------LIMITER ON---------- - Parsing of entries will be limited to - {debug_max_entries_to_parse} lines per file.") - Detailed print statements activated. - {termColors.ENDC} + {TerminalColors.ENDC} """, ) @@ -74,21 +91,133 @@ class Command(BaseCommand): if print_condition: logger.info(print_statement) + def compare_tables(self, debug_on): + logger.info( + f"""{TerminalColors.OKCYAN} + ============= BEGINNING ANALYSIS =============== + {TerminalColors.ENDC} + """ + ) + + #TODO: would filteredRelation be faster? + for transition_domain in TransitionDomain.objects.all():# DEBUG: + transition_domain_name = transition_domain.domain_name + transition_domain_email = transition_domain.username + + self.print_debug( + debug_on, + f"{TerminalColors.OKCYAN}Checking: {transition_domain_name} {TerminalColors.ENDC}", # noqa + ) + + missing_domains = [] + duplicate_domains = [] + missing_domain_informations = [] + missing_domain_invites = [] + + # Check Domain table + matching_domains = Domain.objects.filter(name=transition_domain_name) + # Check Domain Information table + matching_domain_informations = DomainInformation.objects.filter(domain__name=transition_domain_name) + # Check Domain Invitation table + matching_domain_invitations = DomainInvitation.objects.filter(email=transition_domain_email.lower(), + domain__name=transition_domain_name) + + if len(matching_domains) == 0: + missing_domains.append(transition_domain_name) + elif len(matching_domains) > 1: + duplicate_domains.append(transition_domain_name) + if len(matching_domain_informations) == 0: + missing_domain_informations.append(transition_domain_name) + if len(matching_domain_invitations) == 0: + missing_domain_invites.append(transition_domain_name) + + total_missing_domains = len(missing_domains) + total_duplicate_domains = len(duplicate_domains) + total_missing_domain_informations = len(missing_domain_informations) + total_missing_domain_invitations = len(missing_domain_invites) + + missing_domains_as_string = "{}".format(", ".join(map(str, missing_domains))) + duplicate_domains_as_string = "{}".format(", ".join(map(str, duplicate_domains))) + missing_domain_informations_as_string = "{}".format(", ".join(map(str, missing_domain_informations))) + missing_domain_invites_as_string = "{}".format(", ".join(map(str, missing_domain_invites))) + + logger.info( + f"""{TerminalColors.OKGREEN} + ============= FINISHED ANALYSIS =============== + + {total_missing_domains} Missing Domains: + (These are transition domains that are missing from the Domain Table) + {TerminalColors.YELLOW}{missing_domains_as_string}{TerminalColors.OKGREEN} + + {total_duplicate_domains} Duplicate Domains: + (These are transition domains which have duplicate entries in the Domain Table) + {TerminalColors.YELLOW}{duplicate_domains_as_string}{TerminalColors.OKGREEN} + + {total_missing_domain_informations} Domains Information Entries missing: + (These are transition domains which have no entries in the Domain Information Table) + {TerminalColors.YELLOW}{missing_domain_informations_as_string}{TerminalColors.OKGREEN} + + {total_missing_domain_invitations} Domain Invitations missing: + (These are transition domains which have no entires in the Domain Invitation Table) + {TerminalColors.YELLOW}{missing_domain_invites_as_string}{TerminalColors.OKGREEN} + {TerminalColors.ENDC} + """ + ) + def run_migration_scripts(self, + file_location, + domain_contacts_filename, + contacts_filename, + domain_statuses_filename): + + files_are_correct = TerminalHelper.query_yes_no( + f""" + {TerminalColors.YELLOW} + PLEASE CHECK: + The loader scripts expect to find the following files: + - domain contacts: {domain_contacts_filename} + - contacts: {contacts_filename} + - domain statuses: {domain_statuses_filename} + + The files should be at the following directory; + {file_location} + + Does this look correct?{TerminalColors.ENDC}""" + ) + + if not files_are_correct: + # prompt the user to provide correct file inputs + logger.info(f""" + {TerminalColors.YELLOW} + PLEASE Re-Run the script with the correct file location and filenames: + EXAMPLE: + + + """) + return + load_transition_domain_command.handle( + domain_contacts_filename, + contacts_filename, + domain_statuses_filename + ) + def handle( self, + # domain_contacts_filename, + # contacts_filename, + # domain_statuses_filename, **options, ): """ - Do a diff between the transition_domain and the following tables: + Does a diff between the transition_domain and the following tables: domain, domain_information and the domain_invitation. - It should: - - Print any domains that exist in the transition_domain table + Produces the following report (printed to the terminal): + #1 - Print any domains that exist in the transition_domain table but not in their corresponding domain, domain information or domain invitation tables. - - Print which table this domain is missing from - - Check for duplicate entries in domain or + #2 - Print which table this domain is missing from + #3- Check for duplicate entries in domain or domain_information tables and print which are duplicates and in which tables @@ -105,3 +234,21 @@ class Command(BaseCommand): ** Triggers table reset ** """ + # Get --debug argument + debug_on = options.get("debug") + # Get --runLoaders argument + run_loaders_on = options.get("runLoaders") + + # Analyze tables for corrupt data... + self.compare_tables(debug_on) + + # Run migration scripts if specified by user... + if run_loaders_on: + file_location = options.get("loaderDirectory") + # domain_contacts_filename = options.get("domain_contacts_filename") + # contacts_filename = options.get("contacts_filename") + # domain_statuses_filename = options.get("domain_statuses_filename") + # self.run_migration_scripts(file_location, + # domain_contacts_filename, + # contacts_filename, + # domain_statuses_filename) \ No newline at end of file diff --git a/src/registrar/management/commands/utility/enums.py b/src/registrar/management/commands/utility/enums.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/registrar/management/commands/utility/terminal_helper.py b/src/registrar/management/commands/utility/terminal_helper.py new file mode 100644 index 000000000..da2a3a54a --- /dev/null +++ b/src/registrar/management/commands/utility/terminal_helper.py @@ -0,0 +1,50 @@ +import logging + +logger = logging.getLogger(__name__) + +class TerminalColors: + """Colors for terminal outputs + (makes reading the logs WAY easier)""" + + HEADER = "\033[95m" + OKBLUE = "\033[94m" + OKCYAN = "\033[96m" + OKGREEN = "\033[92m" + YELLOW = "\033[93m" + FAIL = "\033[91m" + ENDC = "\033[0m" + BOLD = "\033[1m" + UNDERLINE = "\033[4m" + BackgroundLightYellow = "\033[103m" + +class TerminalHelper: + + def query_yes_no(question: str, default="yes") -> bool: + """Ask a yes/no question via raw_input() and return their answer. + + "question" is a string that is presented to the user. + "default" is the presumed answer if the user just hits . + It must be "yes" (the default), "no" or None (meaning + an answer is required of the user). + + The "answer" return value is True for "yes" or False for "no". + """ + valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False} + if default is None: + prompt = " [y/n] " + elif default == "yes": + prompt = " [Y/n] " + elif default == "no": + prompt = " [y/N] " + else: + raise ValueError("invalid default answer: '%s'" % default) + + while True: + logger.info(question + prompt) + choice = input().lower() + if default is not None and choice == "": + return valid[default] + elif choice in valid: + return valid[choice] + else: + logger.info("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n") \ No newline at end of file From d5c0ac7a0c7cda5a28b8ee20aa3782f3d4912134 Mon Sep 17 00:00:00 2001 From: CocoByte Date: Mon, 23 Oct 2023 22:22:54 -0600 Subject: [PATCH 6/7] attempt 1 for executing external functions -- note, this doesn't work Signed-off-by: CocoByte --- .../commands/load_transition_domain.py | 50 +++++--- .../commands/test_domain_migration.py | 117 ++++++++++++------ 2 files changed, 114 insertions(+), 53 deletions(-) diff --git a/src/registrar/management/commands/load_transition_domain.py b/src/registrar/management/commands/load_transition_domain.py index 206589c33..1212870b5 100644 --- a/src/registrar/management/commands/load_transition_domain.py +++ b/src/registrar/management/commands/load_transition_domain.py @@ -297,29 +297,20 @@ class Command(BaseCommand): ) TransitionDomain.objects.all().delete() - def handle( # noqa: C901 - self, + def parse_files(self, # noqa: C901 domain_contacts_filename, contacts_filename, domain_statuses_filename, - **options, - ): - """Parse the data files and create TransitionDomains.""" - sep = options.get("sep") + reset_table, + sep, + debug_on, + debug_max_entries_to_parse): # If --resetTable was used, prompt user to confirm # deletion of table data - if options.get("resetTable"): + if reset_table: self.prompt_table_reset() - # Get --debug argument - debug_on = options.get("debug") - - # Get --LimitParse argument - debug_max_entries_to_parse = int( - options.get("limitParse") - ) # set to 0 to parse all entries - # print message to terminal about which args are in use self.print_debug_mode_statements(debug_on, debug_max_entries_to_parse) @@ -522,3 +513,32 @@ class Command(BaseCommand): duplicate_domain_user_combos, duplicate_domains, users_without_email ) self.print_summary_status_findings(domains_without_status, outlier_statuses) + + def handle( + self, + domain_contacts_filename, + contacts_filename, + domain_statuses_filename, + **options, + ): + """Parse the data files and create TransitionDomains.""" + # Get --sep argument + sep = options.get("sep") + + # Get --resetTable argument + reset_table = options.get("resetTable") + + # Get --debug argument + debug_on = options.get("debug") + + # Get --limitParse argument + debug_max_entries_to_parse = int( + options.get("limitParse") + ) # set to 0 to parse all entries + self.parse_files(domain_contacts_filename, + contacts_filename, + domain_statuses_filename, + sep, + reset_table, + debug_on, + debug_max_entries_to_parse) diff --git a/src/registrar/management/commands/test_domain_migration.py b/src/registrar/management/commands/test_domain_migration.py index 19631140a..f5154e0fe 100644 --- a/src/registrar/management/commands/test_domain_migration.py +++ b/src/registrar/management/commands/test_domain_migration.py @@ -35,26 +35,39 @@ class Command(BaseCommand): # The file arguments have default values for running in the sandbox parser.add_argument( "--loaderDirectory", - default="migrationData/", + default="migrationData", help="The location of the files used for load_transition_domain migration script" ) parser.add_argument( - "domain_contacts_filename", - default="escrow_domain_contacts.daily.gov.GOV.txt", - help="Data file with domain contact information" - ) - parser.add_argument( - "contacts_filename", - default="escrow_contacts.daily.gov.GOV.txt", - help="Data file with contact information", - ) - parser.add_argument( - "domain_statuses_filename", - default="escrow_domain_statuses.daily.gov.GOV.txt", - help="Data file with domain status information" + "--loaderFilenames", + default="escrow_domain_contacts.daily.gov.GOV.txt escrow_contacts.daily.gov.GOV.txt escrow_domain_statuses.daily.gov.GOV.txt", + help="""The files used for load_transition_domain migration script. + Must appear IN ORDER and separated by spaces: + domain_contacts_filename.txt contacts_filename.txt domain_statuses_filename.txt + + where... + - domain_contacts_filename is the Data file with domain contact information + - contacts_filename is the Data file with contact information + - domain_statuses_filename is the Data file with domain status information""" ) - parser.add_argument("--sep", default="|", help="Delimiter character") + # parser.add_argument( + # "domain_contacts_filename", + # default="escrow_domain_contacts.daily.gov.GOV.txt", + # help="Data file with domain contact information" + # ) + # parser.add_argument( + # "contacts_filename", + # default="escrow_contacts.daily.gov.GOV.txt", + # help="Data file with contact information", + # ) + # parser.add_argument( + # "domain_statuses_filename", + # default="escrow_domain_statuses.daily.gov.GOV.txt", + # help="Data file with domain status information" + # ) + + parser.add_argument("--sep", default="|", help="Delimiter character for the loader files") parser.add_argument("--debug", action=argparse.BooleanOptionalAction) @@ -165,23 +178,40 @@ class Command(BaseCommand): ) def run_migration_scripts(self, - file_location, - domain_contacts_filename, - contacts_filename, - domain_statuses_filename): + options): + + file_location = options.get("loaderDirectory")+"/" + filenames = options.get("loaderFilenames").split() + if len(filenames) < 3: + filenames_as_string = "{}".format(", ".join(map(str, filenames))) + logger.info(f""" + {TerminalColors.FAIL} + --loaderFilenames expected 3 filenames to follow it, + but only {len(filenames)} were given: + {filenames_as_string} + + PLEASE MODIFY THE SCRIPT AND TRY RUNNING IT AGAIN + ============= TERMINATING ============= + {TerminalColors.ENDC} + """) + return + domain_contacts_filename = filenames[0] + contacts_filename = filenames[1] + domain_statuses_filename = filenames[2] files_are_correct = TerminalHelper.query_yes_no( f""" {TerminalColors.YELLOW} - PLEASE CHECK: - The loader scripts expect to find the following files: + *** IMPORTANT: VERIFY THE FOLLOWING *** + + The migration scripts are looking in directory.... + {file_location} + + ....for the following files: - domain contacts: {domain_contacts_filename} - contacts: {contacts_filename} - domain statuses: {domain_statuses_filename} - The files should be at the following directory; - {file_location} - Does this look correct?{TerminalColors.ENDC}""" ) @@ -190,22 +220,37 @@ class Command(BaseCommand): logger.info(f""" {TerminalColors.YELLOW} PLEASE Re-Run the script with the correct file location and filenames: - EXAMPLE: + EXAMPLE: + docker compose run -T app ./manage.py test_domain_migration --runLoaders --loaderDirectory /app/tmp --loaderFilenames escrow_domain_contacts.daily.gov.GOV.txt escrow_contacts.daily.gov.GOV.txt escrow_domain_statuses.daily.gov.GOV.txt """) return - load_transition_domain_command.handle( - domain_contacts_filename, - contacts_filename, - domain_statuses_filename - ) + + # Get --sep argument + sep = options.get("sep") + + # Get --resetTable argument + reset_table = options.get("resetTable") + + # Get --debug argument + debug_on = options.get("debug") + + # Get --limitParse argument + debug_max_entries_to_parse = int( + options.get("limitParse") + ) # set to 0 to parse all entries + load_transition_domain_command.parse_files(load_transition_domain_command, + domain_contacts_filename, + contacts_filename, + domain_statuses_filename, + sep, + reset_table, + debug_on, + debug_max_entries_to_parse) def handle( self, - # domain_contacts_filename, - # contacts_filename, - # domain_statuses_filename, **options, ): """ @@ -244,11 +289,7 @@ class Command(BaseCommand): # Run migration scripts if specified by user... if run_loaders_on: - file_location = options.get("loaderDirectory") # domain_contacts_filename = options.get("domain_contacts_filename") # contacts_filename = options.get("contacts_filename") # domain_statuses_filename = options.get("domain_statuses_filename") - # self.run_migration_scripts(file_location, - # domain_contacts_filename, - # contacts_filename, - # domain_statuses_filename) \ No newline at end of file + self.run_migration_scripts(options) \ No newline at end of file From cb3cfe3a6d5c34bf0c2ba149bacec196eda3620f Mon Sep 17 00:00:00 2001 From: CocoByte Date: Tue, 24 Oct 2023 00:23:45 -0600 Subject: [PATCH 7/7] finished options for running loader scripts. Added boilerplate for simulating logins --- .../commands/load_transition_domain.py | 50 ++---- .../commands/test_domain_migration.py | 156 +++++++++++++++--- .../commands/utility/terminal_helper.py | 11 +- 3 files changed, 162 insertions(+), 55 deletions(-) diff --git a/src/registrar/management/commands/load_transition_domain.py b/src/registrar/management/commands/load_transition_domain.py index 1212870b5..206589c33 100644 --- a/src/registrar/management/commands/load_transition_domain.py +++ b/src/registrar/management/commands/load_transition_domain.py @@ -297,20 +297,29 @@ class Command(BaseCommand): ) TransitionDomain.objects.all().delete() - def parse_files(self, # noqa: C901 + def handle( # noqa: C901 + self, domain_contacts_filename, contacts_filename, domain_statuses_filename, - reset_table, - sep, - debug_on, - debug_max_entries_to_parse): + **options, + ): + """Parse the data files and create TransitionDomains.""" + sep = options.get("sep") # If --resetTable was used, prompt user to confirm # deletion of table data - if reset_table: + if options.get("resetTable"): self.prompt_table_reset() + # Get --debug argument + debug_on = options.get("debug") + + # Get --LimitParse argument + debug_max_entries_to_parse = int( + options.get("limitParse") + ) # set to 0 to parse all entries + # print message to terminal about which args are in use self.print_debug_mode_statements(debug_on, debug_max_entries_to_parse) @@ -513,32 +522,3 @@ class Command(BaseCommand): duplicate_domain_user_combos, duplicate_domains, users_without_email ) self.print_summary_status_findings(domains_without_status, outlier_statuses) - - def handle( - self, - domain_contacts_filename, - contacts_filename, - domain_statuses_filename, - **options, - ): - """Parse the data files and create TransitionDomains.""" - # Get --sep argument - sep = options.get("sep") - - # Get --resetTable argument - reset_table = options.get("resetTable") - - # Get --debug argument - debug_on = options.get("debug") - - # Get --limitParse argument - debug_max_entries_to_parse = int( - options.get("limitParse") - ) # set to 0 to parse all entries - self.parse_files(domain_contacts_filename, - contacts_filename, - domain_statuses_filename, - sep, - reset_table, - debug_on, - debug_max_entries_to_parse) diff --git a/src/registrar/management/commands/test_domain_migration.py b/src/registrar/management/commands/test_domain_migration.py index f5154e0fe..bc9efa9df 100644 --- a/src/registrar/management/commands/test_domain_migration.py +++ b/src/registrar/management/commands/test_domain_migration.py @@ -1,10 +1,13 @@ import logging import argparse -import sys +import os + +from django.test import Client from django_fsm import TransitionNotAllowed # type: ignore from django.core.management import BaseCommand +from django.contrib.auth import get_user_model from registrar.models import TransitionDomain from registrar.models import Domain @@ -31,8 +34,12 @@ class Command(BaseCommand): parser.add_argument("--runLoaders", help="Runs all scripts (in sequence) for transition domain migrations", action=argparse.BooleanOptionalAction) + + parser.add_argument("--triggerLogins", + help="Simulates a user login for each user in domain invitation", + action=argparse.BooleanOptionalAction) - # The file arguments have default values for running in the sandbox + # The following file arguments have default values for running in the sandbox parser.add_argument( "--loaderDirectory", default="migrationData", @@ -177,9 +184,79 @@ class Command(BaseCommand): """ ) + def run_load_transition_domain_script(self, + file_location, + domain_contacts_filename, + contacts_filename, + domain_statuses_filename, + sep, + reset_table, + debug_on, + debug_max_entries_to_parse): + load_transition_domain_command_string = "./manage.py load_transition_domain " + load_transition_domain_command_string += file_location+domain_contacts_filename + " " + load_transition_domain_command_string += file_location+contacts_filename + " " + load_transition_domain_command_string += file_location+domain_statuses_filename + " " + + if sep is not None and sep != "|": + load_transition_domain_command_string += f"--sep {sep} " + + if reset_table: + load_transition_domain_command_string += "--resetTable " + + if debug_on: + load_transition_domain_command_string += "--debug " + + if debug_max_entries_to_parse > 0: + load_transition_domain_command_string += f"--limitParse {debug_max_entries_to_parse} " + + proceed_load_transition_domain = TerminalHelper.query_yes_no( + f"""{TerminalColors.OKCYAN} + ===================================== + Running load_transition_domain script + ===================================== + + {load_transition_domain_command_string} + {TerminalColors.FAIL} + Proceed? + {TerminalColors.ENDC}""" + ) + + if not proceed_load_transition_domain: + return + logger.info(f"""{TerminalColors.OKCYAN} + ==== EXECUTING... ==== + {TerminalColors.ENDC}""") + os.system(f"{load_transition_domain_command_string}") + + def run_transfer_script(self, debug_on): + command_string = "./manage.py transfer_transition_domains_to_domains " + + if debug_on: + command_string += "--debug " + + + proceed_load_transition_domain = TerminalHelper.query_yes_no( + f"""{TerminalColors.OKCYAN} + ===================================================== + Running transfer_transition_domains_to_domains script + ===================================================== + + {command_string} + {TerminalColors.FAIL} + Proceed? + {TerminalColors.ENDC}""" + ) + + if not proceed_load_transition_domain: + return + logger.info(f"""{TerminalColors.OKCYAN} + ==== EXECUTING... ==== + {TerminalColors.ENDC}""") + os.system(f"{command_string}") + def run_migration_scripts(self, options): - file_location = options.get("loaderDirectory")+"/" filenames = options.get("loaderFilenames").split() if len(filenames) < 3: @@ -201,7 +278,7 @@ class Command(BaseCommand): files_are_correct = TerminalHelper.query_yes_no( f""" - {TerminalColors.YELLOW} + {TerminalColors.OKCYAN} *** IMPORTANT: VERIFY THE FOLLOWING *** The migration scripts are looking in directory.... @@ -210,8 +287,9 @@ class Command(BaseCommand): ....for the following files: - domain contacts: {domain_contacts_filename} - contacts: {contacts_filename} - - domain statuses: {domain_statuses_filename} + - domain statuses: {domain_statuses_filename}y + {TerminalColors.FAIL} Does this look correct?{TerminalColors.ENDC}""" ) @@ -240,14 +318,38 @@ class Command(BaseCommand): debug_max_entries_to_parse = int( options.get("limitParse") ) # set to 0 to parse all entries - load_transition_domain_command.parse_files(load_transition_domain_command, - domain_contacts_filename, - contacts_filename, - domain_statuses_filename, - sep, - reset_table, - debug_on, - debug_max_entries_to_parse) + + self.run_load_transition_domain_script(file_location, + domain_contacts_filename, + contacts_filename, + domain_statuses_filename, + sep, + reset_table, + debug_on, + debug_max_entries_to_parse) + + self.run_transfer_script(debug_on) + + def simulate_user_logins(self, debug_on): + logger.info(f"""{TerminalColors.OKCYAN} + ================== + SIMULATING LOGINS + ================== + {TerminalColors.ENDC} + """) + for invite in DomainInvitation.objects.all(): + #DEBUG: + TerminalHelper.print_debug(debug_on,f"""{TerminalColors.OKCYAN}Processing invite: {invite}{TerminalColors.ENDC}""") + # get a user with this email address + User = get_user_model() + try: + user = User.objects.get(email=invite.email) + #DEBUG: + TerminalHelper.print_debug(debug_on,f"""{TerminalColors.OKCYAN}Logging in user: {user}{TerminalColors.ENDC}""") + Client.force_login(user) + except User.DoesNotExist: + #TODO: how should we handle this? + logger.warn(f"""{TerminalColors.FAIL}No user found {invite.email}{TerminalColors.ENDC}""") def handle( self, @@ -283,13 +385,29 @@ class Command(BaseCommand): debug_on = options.get("debug") # Get --runLoaders argument run_loaders_on = options.get("runLoaders") + # Get --triggerLogins argument + simulate_user_login_enabled = options.get("triggerLogins") - # Analyze tables for corrupt data... - self.compare_tables(debug_on) + prompt_continuation_of_analysis = False # Run migration scripts if specified by user... if run_loaders_on: - # domain_contacts_filename = options.get("domain_contacts_filename") - # contacts_filename = options.get("contacts_filename") - # domain_statuses_filename = options.get("domain_statuses_filename") - self.run_migration_scripts(options) \ No newline at end of file + self.run_migration_scripts(options) + prompt_continuation_of_analysis = True + + # Simulate user login for each user in domain invitation if sepcified by user + if simulate_user_login_enabled: + self.simulate_user_logins(debug_on) + prompt_continuation_of_analysis = True + + analyze_tables = True + if prompt_continuation_of_analysis: + analyze_tables = TerminalHelper.query_yes_no( + f"""{TerminalColors.FAIL} + Proceed with table analysis? + {TerminalColors.ENDC}""" + ) + + # Analyze tables for corrupt data... + if analyze_tables: + self.compare_tables(debug_on) \ No newline at end of file diff --git a/src/registrar/management/commands/utility/terminal_helper.py b/src/registrar/management/commands/utility/terminal_helper.py index da2a3a54a..ec7580e21 100644 --- a/src/registrar/management/commands/utility/terminal_helper.py +++ b/src/registrar/management/commands/utility/terminal_helper.py @@ -47,4 +47,13 @@ class TerminalHelper: elif choice in valid: return valid[choice] else: - logger.info("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n") \ No newline at end of file + logger.info("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n") + + def print_debug(print_condition: bool, print_statement: str): + """This function reduces complexity of debug statements + in other functions. + It uses the logger to write the given print_statement to the + terminal if print_condition is TRUE""" + # DEBUG: + if print_condition: + logger.info(print_statement) \ No newline at end of file