"""Data migration: 1 - generates a report of data integrity across all transition domain related tables 2 - allows users to run all migration scripts for transition domain data """ import logging import argparse import sys import os from django.test import Client from django_fsm import TransitionNotAllowed # type: ignore from django.core.management import BaseCommand from django.core.management import call_command from registrar.models import ( Domain, DomainInformation, DomainInvitation, TransitionDomain, User, ) from registrar.management.commands.utility.terminal_helper import ( TerminalColors, TerminalHelper, ) logger = logging.getLogger(__name__) class Command(BaseCommand): help = """ """ def add_arguments(self, parser): """ OPTIONAL ARGUMENTS: --runMigrations A boolean (default to true), which triggers running all scripts (in sequence) for transition domain migrations --migrationDirectory The location of the files used for load_transition_domain migration script EXAMPLE USAGE: > --migrationDirectory /app/tmp --migrationFilenames The files used for load_transition_domain migration script. Must appear IN ORDER and comma-delimiteds: EXAMPLE USAGE: > --migrationFilenames domain_contacts_filename.txt,contacts_filename.txt,domain_statuses_filename.txt where... - domain_contacts_filename is the Data file with domain contact information - contacts_filename is the Data file with contact information - domain_statuses_filename is the Data file with domain status information --sep Delimiter for the migration scripts to correctly parse the given text files. (usually this can remain at default value of |) --debug A boolean (default to true), which activates additional print statements --prompt A boolean (default to true), which activates terminal prompts that allows the user to step through each portion of this script. --limitParse Used by the migration scripts (load_transition_domain) to set the limit for the number of data entries to insert. Set to 0 (or just don't use this argument) to parse every entry. This was provided primarily for testing purposes --resetTable Used by the migration scripts to trigger a prompt for deleting all table entries. Useful for testing purposes, but USE WITH CAUTION """ # noqa - line length, impacts readability parser.add_argument( "--runMigrations", help="Runs all scripts (in sequence) for transition domain migrations", action=argparse.BooleanOptionalAction, ) # --triggerLogins # A boolean (default to true), which triggers running # simulations of user logins for each user in domain invitation parser.add_argument( "--triggerLogins", help="Simulates a user login for each user in domain invitation", action=argparse.BooleanOptionalAction, ) # The following file arguments have default values for running in the sandbox parser.add_argument( "--migrationDirectory", default="migrationData", help="The location of the files used for load_transition_domain migration script", ) parser.add_argument( "--migrationFilenames", default="escrow_domain_contacts.daily.gov.GOV.txt,escrow_contacts.daily.gov.GOV.txt,escrow_domain_statuses.daily.gov.GOV.txt", help="""The files used for load_transition_domain migration script. Must appear IN ORDER and separated by commas: domain_contacts_filename.txt,contacts_filename.txt,domain_statuses_filename.txt where... - domain_contacts_filename is the Data file with domain contact information - contacts_filename is the Data file with contact information - domain_statuses_filename is the Data file with domain status information""", ) parser.add_argument( "--sep", default="|", help="Delimiter character for the migration files" ) parser.add_argument("--debug", action=argparse.BooleanOptionalAction) parser.add_argument("--prompt", action=argparse.BooleanOptionalAction) parser.add_argument( "--limitParse", default=0, help="Sets max number of entries to load" ) parser.add_argument( "--resetTable", help="Deletes all data in the TransitionDomain table", action=argparse.BooleanOptionalAction, ) def compare_tables(self, debug_on: bool): """Does a diff between the transition_domain and the following tables: domain, domain_information and the domain_invitation. Produces the following report (printed to the terminal): #1 - Print any domains that exist in the transition_domain table but not in their corresponding domain, domain information or domain invitation tables. #2 - Print which table this domain is missing from #3- Check for duplicate entries in domain or domain_information tables and print which are duplicates and in which tables """ logger.info( f"""{TerminalColors.OKCYAN} ============= BEGINNING ANALYSIS =============== {TerminalColors.ENDC} """ ) # TODO: would filteredRelation be faster? missing_domains = [] duplicate_domains = [] missing_domain_informations = [] missing_domain_invites = [] for transition_domain in TransitionDomain.objects.all(): # DEBUG: transition_domain_name = transition_domain.domain_name transition_domain_email = transition_domain.username TerminalHelper.print_conditional( debug_on, f"{TerminalColors.OKCYAN}Checking: {transition_domain_name} {TerminalColors.ENDC}", # noqa ) # Check Domain table matching_domains = Domain.objects.filter(name=transition_domain_name) # Check Domain Information table matching_domain_informations = DomainInformation.objects.filter( domain__name=transition_domain_name ) # Check Domain Invitation table matching_domain_invitations = DomainInvitation.objects.filter( email=transition_domain_email.lower(), domain__name=transition_domain_name, ) if len(matching_domains) == 0: TerminalHelper.print_conditional( debug_on, f"""{TerminalColors.YELLOW}Missing Domain{TerminalColors.ENDC}""", ) missing_domains.append(transition_domain_name) elif len(matching_domains) > 1: TerminalHelper.print_conditional( debug_on, f"""{TerminalColors.YELLOW}Duplicate Domain{TerminalColors.ENDC}""", ) duplicate_domains.append(transition_domain_name) if len(matching_domain_informations) == 0: TerminalHelper.print_conditional( debug_on, f"""{TerminalColors.YELLOW}Missing Domain Information{TerminalColors.ENDC}""", ) missing_domain_informations.append(transition_domain_name) if len(matching_domain_invitations) == 0: TerminalHelper.print_conditional( debug_on, f"""{TerminalColors.YELLOW}Missing Domain Invitation{TerminalColors.ENDC}""", ) missing_domain_invites.append(transition_domain_name) total_missing_domains = len(missing_domains) total_duplicate_domains = len(duplicate_domains) total_missing_domain_informations = len(missing_domain_informations) total_missing_domain_invitations = len(missing_domain_invites) missing_domains_as_string = "{}".format(", ".join(map(str, missing_domains))) duplicate_domains_as_string = "{}".format( ", ".join(map(str, duplicate_domains)) ) missing_domain_informations_as_string = "{}".format( ", ".join(map(str, missing_domain_informations)) ) missing_domain_invites_as_string = "{}".format( ", ".join(map(str, missing_domain_invites)) ) logger.info( f"""{TerminalColors.OKGREEN} ============= FINISHED ANALYSIS =============== {total_missing_domains} Missing Domains: (These are transition domains that are missing from the Domain Table) {TerminalColors.YELLOW}{missing_domains_as_string}{TerminalColors.OKGREEN} {total_duplicate_domains} Duplicate Domains: (These are transition domains which have duplicate entries in the Domain Table) {TerminalColors.YELLOW}{duplicate_domains_as_string}{TerminalColors.OKGREEN} {total_missing_domain_informations} Domain Information Entries missing: (These are transition domains which have no entries in the Domain Information Table) {TerminalColors.YELLOW}{missing_domain_informations_as_string}{TerminalColors.OKGREEN} {total_missing_domain_invitations} Domain Invitations missing: (These are transition domains which have no entires in the Domain Invitation Table) {TerminalColors.YELLOW}{missing_domain_invites_as_string}{TerminalColors.OKGREEN} {TerminalColors.ENDC} """ ) def run_load_transition_domain_script( self, file_location: str, domain_contacts_filename: str, contacts_filename: str, domain_statuses_filename: str, sep: str, reset_table: bool, debug_on: bool, prompts_enabled: bool, debug_max_entries_to_parse: int, ): """Runs the load_transition_domain script""" # Create the command string command_script = "load_transition_domain" command_string = ( f"./manage.py {command_script} " f"{file_location+domain_contacts_filename} " f"{file_location+contacts_filename} " f"{file_location+domain_statuses_filename} " ) if sep is not None and sep != "|": command_string += f"--sep {sep} " if reset_table: command_string += "--resetTable " if debug_on: command_string += "--debug " if debug_max_entries_to_parse > 0: command_string += f"--limitParse {debug_max_entries_to_parse} " # Execute the command string if prompts_enabled: system_exit_on_terminate = True TerminalHelper.prompt_for_execution( system_exit_on_terminate, command_string, "Running load_transition_domain script", ) # TODO: make this somehow run inside TerminalHelper prompt call_command( command_script, f"{file_location+domain_contacts_filename}", f"{file_location+contacts_filename}", f"{file_location+domain_statuses_filename}", sep=sep, resetTable=reset_table, debug=debug_on, limitParse=debug_max_entries_to_parse, ) def run_transfer_script(self, debug_on: bool, prompts_enabled: bool): """Runs the transfer_transition_domains_to_domains script""" # Create the command string command_script = "transfer_transition_domains_to_domains" command_string = f"./manage.py {command_script}" if debug_on: command_string += "--debug " # Execute the command string if prompts_enabled: system_exit_on_terminate = True TerminalHelper.prompt_for_execution( system_exit_on_terminate, command_string, "Running transfer_transition_domains_to_domains script", ) # TODO: make this somehow run inside TerminalHelper prompt call_command(command_script) def run_send_invites_script(self, debug_on: bool, prompts_enabled: bool): """Runs the send_domain_invitations script""" # Create the command string... command_script = "send_domain_invitations" command_string = f"./manage.py {command_script} -s" # Execute the command string if prompts_enabled: system_exit_on_terminate = True TerminalHelper.prompt_for_execution( system_exit_on_terminate, command_string, "Running send_domain_invitations script", ) # TODO: make this somehow run inside TerminalHelper prompt call_command(command_script, send_emails=True) def run_migration_scripts( self, file_location, domain_contacts_filename, contacts_filename, domain_statuses_filename, sep, reset_table, debug_on, prompts_enabled, debug_max_entries_to_parse, ): """Runs the following migration scripts (in order): 1 - imports for trans domains 2 - transfer to domain & domain invitation""" if prompts_enabled: # Allow the user to inspect the filepath # data given in the arguments, and prompt # the user to verify this info before proceeding files_are_correct = TerminalHelper.query_yes_no( f""" {TerminalColors.OKCYAN} *** IMPORTANT: VERIFY THE FOLLOWING *** The migration scripts are looking in directory.... {file_location} ....for the following files: - domain contacts: {domain_contacts_filename} - contacts: {contacts_filename} - domain statuses: {domain_statuses_filename} {TerminalColors.FAIL} Does this look correct?{TerminalColors.ENDC}""" ) # If the user rejected the filepath information # as incorrect, prompt the user to provide # correct file inputs in their original command # prompt and exit this subroutine if not files_are_correct: logger.info( f""" {TerminalColors.YELLOW} PLEASE Re-Run the script with the correct file location and filenames: EXAMPLE: docker compose run -T app ./manage.py test_domain_migration --runMigrations --migrationDirectory /app/tmp --migrationFilenames escrow_domain_contacts.daily.gov.GOV.txt escrow_contacts.daily.gov.GOV.txt escrow_domain_statuses.daily.gov.GOV.txt """ ) # noqa return # Proceed executing the migration scripts self.run_load_transition_domain_script( file_location, domain_contacts_filename, contacts_filename, domain_statuses_filename, sep, reset_table, debug_on, prompts_enabled, debug_max_entries_to_parse, ) self.run_transfer_script(debug_on, prompts_enabled) def simulate_user_logins(self, debug_on): """Simulates logins for users (this will add Domain Information objects to our tables)""" # logger.info(f"" # f"{TerminalColors.OKCYAN}" # f"================== SIMULATING LOGINS ==================" # f"{TerminalColors.ENDC}") # for invite in DomainInvitation.objects.all(): #TODO: move to unit test # #DEBUG: # TerminalHelper.print_conditional(debug_on, # f"{TerminalColors.OKCYAN}" # f"Processing invite: {invite}" # f"{TerminalColors.ENDC}") # # get a user with this email address # user, user_created = User.objects.get_or_create(email=invite.email, username=invite.email) # #DEBUG: # TerminalHelper.print_conditional(user_created, # f"""{TerminalColors.OKCYAN}No user found (creating temporary user object){TerminalColors.ENDC}""") # TerminalHelper.print_conditional(debug_on, # f"""{TerminalColors.OKCYAN}Executing first-time login for user: {user}{TerminalColors.ENDC}""") # user.first_login() # if user_created: # logger.info(f"""{TerminalColors.YELLOW}(Deleting temporary user object){TerminalColors.ENDC}""") # user.delete() def handle( self, **options, ): """ Does the following; 1 - run migration scripts 2 - simulate logins 3 - send domain invitations (Emails should be sent to the appropriate users note that all moved domains should now be accessible on django admin for an analyst) 4 - analyze the data for transition domains and generate a report """ # SETUP # Grab all arguments relevant to # orchestrating which parts of this script # should execute. Print some indicators to # the terminal so the user knows what is # enabled. # Get arguments debug_on = options.get("debug") prompts_enabled = options.get("prompt") run_migrations_enabled = options.get("runMigrations") simulate_user_login_enabled = ( False # TODO: delete? Moving to unit test... options.get("triggerLogins") ) TerminalHelper.print_conditional( debug_on, f"""{TerminalColors.OKCYAN} ----------DEBUG MODE ON---------- Detailed print statements activated. {TerminalColors.ENDC} """, ) TerminalHelper.print_conditional( run_migrations_enabled, f"""{TerminalColors.OKCYAN} ----------RUNNING MIGRATIONS ON---------- All migration scripts will be run before analyzing the data. {TerminalColors.ENDC} """, ) TerminalHelper.print_conditional( run_migrations_enabled, f"""{TerminalColors.OKCYAN} ----------TRIGGER LOGINS ON---------- Will be simulating user logins {TerminalColors.ENDC} """, ) # If a user decides to run all migration # scripts, they may or may not wish to # proceed with analysis of the data depending # on the results of the migration. # Provide a breakpoint for them to decide # whether to continue or not. # The same will happen if simulating user # logins (to allow users to run only that # portion of the script if desired) prompt_continuation_of_analysis = False # STEP 1 -- RUN MIGRATIONS # Run migration scripts if specified by user if run_migrations_enabled: # grab arguments for running migrations sep = options.get("sep") reset_table = options.get("resetTable") debug_max_entries_to_parse = int(options.get("limitParse")) # Grab filepath information from the arguments file_location = options.get("migrationDirectory") + "/" filenames = options.get("migrationFilenames").split(",") if len(filenames) < 3: filenames_as_string = "{}".format(", ".join(map(str, filenames))) logger.info( f""" {TerminalColors.FAIL} --migrationFilenames expected 3 filenames to follow it, but only {len(filenames)} were given: {filenames_as_string} PLEASE MODIFY THE SCRIPT AND TRY RUNNING IT AGAIN ============= TERMINATING ============= {TerminalColors.ENDC} """ ) sys.exit() domain_contacts_filename = filenames[0] contacts_filename = filenames[1] domain_statuses_filename = filenames[2] # Run migration scripts self.run_migration_scripts( file_location, domain_contacts_filename, contacts_filename, domain_statuses_filename, sep, reset_table, debug_on, prompts_enabled, debug_max_entries_to_parse, ) prompt_continuation_of_analysis = True # STEP 2 -- SIMULATE LOGINS # Simulate user login for each user in domain # invitation if specified by user OR if running # migration scripts. # (NOTE: Although users can choose to run login # simulations separately (for testing purposes), # if we are running all migration scripts, we should # automatically execute this as the final step # to ensure Domain Information objects get added # to the database.) if run_migrations_enabled and simulate_user_login_enabled: if prompts_enabled: simulate_user_login_enabled = TerminalHelper.query_yes_no( f"""{TerminalColors.FAIL} Proceed with simulating user logins? {TerminalColors.ENDC}""" ) if not simulate_user_login_enabled: return self.simulate_user_logins(debug_on) prompt_continuation_of_analysis = True # STEP 3 -- SEND INVITES proceed_with_sending_invites = run_migrations_enabled if prompts_enabled and run_migrations_enabled: proceed_with_sending_invites = TerminalHelper.query_yes_no( f"""{TerminalColors.FAIL} Proceed with sending user invites for all transition domains? (Y = proceed, N = skip) {TerminalColors.ENDC}""" ) if proceed_with_sending_invites: self.run_send_invites_script(debug_on, prompts_enabled) prompt_continuation_of_analysis = True # STEP 4 -- ANALYZE TABLES & GENERATE REPORT # Analyze tables for corrupt data... if prompt_continuation_of_analysis and prompts_enabled: # ^ (only prompt if we ran steps 1 and/or 2) analyze_tables = TerminalHelper.query_yes_no( f"""{TerminalColors.FAIL} Proceed with table analysis? (Y = proceed, N = exit) {TerminalColors.ENDC}""" ) if not analyze_tables: return self.compare_tables(debug_on)