mirror of
https://github.com/cisagov/manage.get.gov.git
synced 2025-07-21 18:25:58 +02:00
Agency Extractor Script created
This commit is contained in:
parent
55e2b6cbba
commit
265fd83fa4
1 changed files with 119 additions and 0 deletions
119
src/registrar/management/commands/agency_data_extractor.py
Normal file
119
src/registrar/management/commands/agency_data_extractor.py
Normal file
|
@ -0,0 +1,119 @@
|
||||||
|
import argparse
|
||||||
|
import csv
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from django.core.management import BaseCommand
|
||||||
|
|
||||||
|
from registrar.management.commands.utility.terminal_helper import (
|
||||||
|
TerminalColors,
|
||||||
|
TerminalHelper,
|
||||||
|
)
|
||||||
|
from registrar.models.domain_application import DomainApplication
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# DEV SHORTCUT:
|
||||||
|
# Example command for running this script:
|
||||||
|
# docker compose run -T app ./manage.py agency_data_extractor 20231009.agency.adhoc.dotgov.txt --dir /app/tmp --debug
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = """Loads data for domains that are in transition
|
||||||
|
(populates transition_domain model objects)."""
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
"""Add file that contains agency data"""
|
||||||
|
parser.add_argument(
|
||||||
|
"agency_data_filename", help="Data file with agency information"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dir", default="migrationdata", help="Desired directory"
|
||||||
|
)
|
||||||
|
parser.add_argument("--sep", default="|", help="Delimiter character")
|
||||||
|
|
||||||
|
parser.add_argument("--debug", help="Prints additional debug statements to the terminal", action=argparse.BooleanOptionalAction)
|
||||||
|
|
||||||
|
def extract_agencies(
|
||||||
|
self,
|
||||||
|
agency_data_filepath: str,
|
||||||
|
sep: str,
|
||||||
|
debug: bool
|
||||||
|
) -> [str]:
|
||||||
|
"""Extracts all the agency names from the provided agency file"""
|
||||||
|
agency_names = []
|
||||||
|
logger.info(f"{TerminalColors.OKCYAN}Reading agency data file {agency_data_filepath}{TerminalColors.ENDC}")
|
||||||
|
with open(agency_data_filepath, "r") as agency_data_filepath: # noqa
|
||||||
|
for row in csv.reader(agency_data_filepath, delimiter=sep):
|
||||||
|
agency_name = row[1]
|
||||||
|
TerminalHelper.print_conditional(debug, f"Checking: {agency_name}")
|
||||||
|
agency_names.append(agency_name)
|
||||||
|
logger.info(f"{TerminalColors.OKCYAN}Checked {len(agency_names)} agencies{TerminalColors.ENDC}")
|
||||||
|
return agency_names
|
||||||
|
|
||||||
|
def compare_lists(self, new_agency_list: [str], current_agency_list: [str], debug: bool):
|
||||||
|
"""
|
||||||
|
Compares the new agency list with the current
|
||||||
|
agency list and provides the equivalent of
|
||||||
|
an outer-join on the two (printed to the terminal)
|
||||||
|
"""
|
||||||
|
|
||||||
|
new_agencies = []
|
||||||
|
# 1 - Get all new agencies that we don't already have (We might want to ADD these to our list)
|
||||||
|
for agency in new_agency_list:
|
||||||
|
if agency not in current_agency_list:
|
||||||
|
new_agencies.append(agency)
|
||||||
|
TerminalHelper.print_conditional(debug, f"{TerminalColors.YELLOW}Found new agency: {agency}{TerminalColors.ENDC}")
|
||||||
|
|
||||||
|
possibly_unused_agencies = []
|
||||||
|
# 2 - Get all new agencies that we don't already have (We might want to ADD these to our list)
|
||||||
|
for agency in current_agency_list:
|
||||||
|
if agency not in new_agency_list:
|
||||||
|
possibly_unused_agencies.append(agency)
|
||||||
|
TerminalHelper.print_conditional(debug, f"{TerminalColors.YELLOW}Possibly unused agency detected: {agency}{TerminalColors.ENDC}")
|
||||||
|
|
||||||
|
# Print the summary of findings
|
||||||
|
# 1 - Print the list of agencies in the NEW list, which we do not already have
|
||||||
|
# 2 - Print the list of agencies that we currently have, which are NOT in the new list (these might be eligible for removal?) TODO: would we ever want to remove existing agencies?
|
||||||
|
new_agencies_as_string = "{}".format(
|
||||||
|
",\n ".join(map(str, new_agencies))
|
||||||
|
)
|
||||||
|
possibly_unused_agencies_as_string = "{}".format(
|
||||||
|
",\n ".join(map(str, possibly_unused_agencies))
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"""
|
||||||
|
{TerminalColors.OKGREEN}
|
||||||
|
======================== SUMMARY OF FINDINGS ============================
|
||||||
|
{len(new_agency_list)} AGENCIES WERE PROVIDED in the agency file.
|
||||||
|
{len(current_agency_list)} AGENCIES ARE CURRENTLY IN OUR SYSTEM.
|
||||||
|
|
||||||
|
{len(new_agency_list)-len(new_agencies)} AGENCIES MATCHED
|
||||||
|
(These are agencies that are in the given agency file AND in our system already)
|
||||||
|
|
||||||
|
{len(new_agencies)} AGENCIES TO ADD:
|
||||||
|
These agencies were in the provided agency file, but are not in our system.
|
||||||
|
{TerminalColors.YELLOW}{new_agencies_as_string}
|
||||||
|
{TerminalColors.OKGREEN}
|
||||||
|
|
||||||
|
{len(possibly_unused_agencies)} AGENCIES TO (POSSIBLY) REMOVE:
|
||||||
|
These agencies are in our system, but not in the provided agency file:
|
||||||
|
{TerminalColors.YELLOW}{possibly_unused_agencies_as_string}
|
||||||
|
{TerminalColors.ENDC}
|
||||||
|
""")
|
||||||
|
|
||||||
|
def handle(
|
||||||
|
self,
|
||||||
|
agency_data_filename,
|
||||||
|
**options,
|
||||||
|
):
|
||||||
|
"""Parse the agency data file."""
|
||||||
|
|
||||||
|
# Get all the arguments
|
||||||
|
sep = options.get("sep")
|
||||||
|
debug = options.get("debug")
|
||||||
|
dir = options.get("dir")
|
||||||
|
|
||||||
|
agency_data_file = dir+"/"+agency_data_filename
|
||||||
|
|
||||||
|
new_agencies = self.extract_agencies(agency_data_file, sep, debug)
|
||||||
|
existing_agencies = DomainApplication.AGENCIES
|
||||||
|
self.compare_lists(new_agencies, existing_agencies, debug)
|
Loading…
Add table
Add a link
Reference in a new issue