Parsing agency, documentation

This commit is contained in:
zandercymatics 2023-10-30 10:44:25 -06:00
parent d70e5a2d77
commit a74b9f4c3c
No known key found for this signature in database
GPG key ID: FF4636ABEC9682B7
6 changed files with 210 additions and 87 deletions

View file

@ -1,8 +1,8 @@
## Purpose ## Purpose
Use this folder for storing files for the migration process. Should otherwise be empty on local dev environments unless necessary. This folder must exist due to the nature of how data is stored on cloud.gov and the nature of the data we want to send. Use this folder for storing files for the migration process. Should otherwise be empty on local dev environments unless necessary. This folder must exist due to the nature of how data is stored on cloud.gov and the nature of the data we typically want to send.
## How do I migrate registrar data? ## How do I migrate registrar data?
This process is detailed in [data_migration.md](../../docs/operations/data_migration.md) This process is detailed in [data_migration.md](../../docs/operations/data_migration.md)
## What kind of files can I store here? ## What kind of files can I store here?
The intent is for PII data or otherwise, but this can exist in any format. Do note that the data contained in this file will be temporary, so after the app is restaged it will lose it. This is ideal for migration files as they write to our DB, but not for something you need to permanently hold onto. The intent is for PII data or otherwise, but this can exist in any format. Do note that the data contained in this file will be temporary, so after the app is restaged it will lose it (as long as nothing is committed). This is ideal for migration files as they write to our DB, but not for something you need to permanently hold onto.

View file

@ -13,6 +13,7 @@ from registrar.models.transition_domain import TransitionDomain
from .utility.extra_transition_domain import ExtraTransitionDomain from .utility.extra_transition_domain import ExtraTransitionDomain
from .utility.epp_data_containers import ( from .utility.epp_data_containers import (
AgencyAdhoc, AgencyAdhoc,
AuthorityAdhoc,
DomainAdditionalData, DomainAdditionalData,
DomainTypeAdhoc, DomainTypeAdhoc,
OrganizationAdhoc, OrganizationAdhoc,
@ -30,6 +31,17 @@ class LogCode(Enum):
class FileTransitionLog: class FileTransitionLog:
"""Container for storing event logs. Used to lessen
the complexity of storing multiple logs across multiple
variables.
self.logs: dict -> {
EnumFilenames.DOMAIN_ADHOC: List[LogItem],
EnumFilenames.AGENCY_ADHOC: List[LogItem],
EnumFilenames.ORGANIZATION_ADHOC: List[LogItem],
EnumFilenames.DOMAIN_ADDITIONAL: List[LogItem],
}
"""
def __init__(self): def __init__(self):
self.logs = { self.logs = {
EnumFilenames.DOMAIN_ADHOC: [], EnumFilenames.DOMAIN_ADHOC: [],
@ -39,16 +51,24 @@ class FileTransitionLog:
} }
class LogItem: class LogItem:
"""Used for storing data about logger information.
Intended for use in"""
def __init__(self, file_type, code, message): def __init__(self, file_type, code, message):
self.file_type = file_type self.file_type = file_type
self.code = code self.code = code
self.message = message self.message = message
def add_log(self, file_type, code, message): def add_log(self, file_type, code, message):
self.logs[file_type] = self.LogItem(file_type, code, message) """Adds a log item to self.logs
def add_log(self, log: LogItem): file_type -> Which array to add to,
self.logs.append(log) ex. EnumFilenames.DOMAIN_ADHOC
code -> Log severity or other metadata, ex. LogCode.ERROR
message -> Message to display
"""
self.logs[file_type] = self.LogItem(file_type, code, message)
def create_log_item(self, file_type, code, message, add_to_list=True): def create_log_item(self, file_type, code, message, add_to_list=True):
"""Creates and returns an LogItem object. """Creates and returns an LogItem object.
@ -63,6 +83,9 @@ class FileTransitionLog:
return log return log
def display_logs(self, file_type): def display_logs(self, file_type):
"""Displays all logs in the given file_type in EnumFilenames.
Will log with the correct severity depending on code.
"""
for log in self.logs.get(file_type): for log in self.logs.get(file_type):
match log.code: match log.code:
case LogCode.ERROR: case LogCode.ERROR:
@ -129,24 +152,24 @@ class Command(BaseCommand):
domain_name = transition_domain.domain_name domain_name = transition_domain.domain_name
updated_transition_domain = transition_domain updated_transition_domain = transition_domain
# STEP 1: Parse domain type data # STEP 1: Parse organization data
updated_transition_domain = self.parse_domain_type_data(
domain_name, transition_domain
)
self.parse_logs(EnumFilenames.DOMAIN_ADHOC)
# STEP 2: Parse agency data - TODO
updated_transition_domain = self.parse_agency_data(
domain_name, transition_domain
)
self.parse_logs(EnumFilenames.AGENCY_ADHOC)
# STEP 3: Parse organization data
updated_transition_domain = self.parse_org_data( updated_transition_domain = self.parse_org_data(
domain_name, transition_domain domain_name, transition_domain
) )
self.parse_logs.display_logs(EnumFilenames.ORGANIZATION_ADHOC) self.parse_logs.display_logs(EnumFilenames.ORGANIZATION_ADHOC)
# STEP 2: Parse domain type data
updated_transition_domain = self.parse_domain_type_data(
domain_name, transition_domain
)
self.parse_logs.display_logs(EnumFilenames.DOMAIN_ADHOC)
# STEP 3: Parse agency data - TODO
updated_transition_domain = self.parse_agency_data(
domain_name, transition_domain
)
self.parse_logs.display_logs(EnumFilenames.AGENCY_ADHOC)
# STEP 4: Parse expiration data - TODO # STEP 4: Parse expiration data - TODO
updated_transition_domain = self.parse_expiration_data( updated_transition_domain = self.parse_expiration_data(
domain_name, transition_domain domain_name, transition_domain
@ -159,40 +182,59 @@ class Command(BaseCommand):
def parse_expiration_data(self, domain_name, transition_domain): def parse_expiration_data(self, domain_name, transition_domain):
return transition_domain return transition_domain
# TODO - Implement once Niki gets her ticket in def parse_agency_data(self, domain_name, transition_domain) -> TransitionDomain:
def parse_agency_data(self, domain_name, transition_domain):
"""
if not isinstance(transition_domain, TransitionDomain): if not isinstance(transition_domain, TransitionDomain):
raise ValueError("Not a valid object, must be TransitionDomain") raise ValueError("Not a valid object, must be TransitionDomain")
info = self.get_domain_type_info(domain_name) info = self.get_agency_info(domain_name)
if info is None: if info is None:
self.parse_logs.create_log_item( self.parse_logs.create_log_item(
EnumFilenames.AGENCY_ADHOC, EnumFilenames.AGENCY_ADHOC,
LogCode.INFO, LogCode.INFO,
f"Could not add agency_data on {domain_name}, no data exists." f"Could not add federal_agency on {domain_name}, no data exists."
) )
return transition_domain return transition_domain
agency_exists = ( agency_exists = (
transition_domain.agency_name is not None transition_domain.federal_agency is not None
and transition_domain.agency_name.strip() != "" and transition_domain.federal_agency.strip() != ""
) )
if not info.active.lower() == "y":
self.parse_logs.create_log_item(
EnumFilenames.DOMAIN_ADHOC,
LogCode.ERROR,
f"Could not add inactive agency {info.agencyname} on {domain_name}",
)
return transition_domain
if not info.isfederal.lower() == "y":
self.parse_logs.create_log_item(
EnumFilenames.DOMAIN_ADHOC,
LogCode.ERROR,
f"Could not add non-federal agency {info.agencyname} on {domain_name}",
)
return transition_domain
transition_domain.federal_agency = info.agencyname
# Logs if we either added to this property, # Logs if we either added to this property,
# or modified it. # or modified it.
self._add_or_change_message( self._add_or_change_message(
EnumFilenames.AGENCY_ADHOC, EnumFilenames.AGENCY_ADHOC,
"agency_name", "federal_agency",
transition_domain.agency_name, transition_domain.federal_agency,
domain_name, domain_name,
agency_exists agency_exists
) )
"""
return transition_domain return transition_domain
def parse_domain_type_data(self, domain_name, transition_domain: TransitionDomain): def parse_domain_type_data(self, domain_name, transition_domain: TransitionDomain) -> TransitionDomain:
"""Parses the DomainType file.
This file has definitions for organization_type and federal_agency.
Logs if
"""
if not isinstance(transition_domain, TransitionDomain): if not isinstance(transition_domain, TransitionDomain):
raise ValueError("Not a valid object, must be TransitionDomain") raise ValueError("Not a valid object, must be TransitionDomain")
@ -212,8 +254,8 @@ class Command(BaseCommand):
if domain_type.count != 1 or domain_type.count != 2: if domain_type.count != 1 or domain_type.count != 2:
raise ValueError("Found invalid data in DOMAIN_ADHOC") raise ValueError("Found invalid data in DOMAIN_ADHOC")
# Then, just grab the agency type. # Then, just grab the organization type.
new_federal_agency = domain_type[0].strip() new_organization_type = domain_type[0].strip()
# Check if this domain_type is active or not. # Check if this domain_type is active or not.
# If not, we don't want to add this. # If not, we don't want to add this.
@ -228,7 +270,7 @@ class Command(BaseCommand):
# Are we updating data that already exists, # Are we updating data that already exists,
# or are we adding new data in its place? # or are we adding new data in its place?
federal_agency_exists = ( federal_agency_exists = (
transition_domain.federal_agency is not None transition_domain.organization_type is not None
and transition_domain.federal_agency.strip() != "" and transition_domain.federal_agency.strip() != ""
) )
federal_type_exists = ( federal_type_exists = (
@ -237,13 +279,14 @@ class Command(BaseCommand):
) )
# If we get two records, then we know it is federal. # If we get two records, then we know it is federal.
# needs to be lowercase for federal type
is_federal = domain_type.count() == 2 is_federal = domain_type.count() == 2
if is_federal: if is_federal:
new_federal_type = domain_type[1].strip() new_federal_type = domain_type[1].strip()
transition_domain.federal_agency = new_federal_agency transition_domain.organization_type = new_organization_type
transition_domain.federal_type = new_federal_type transition_domain.federal_type = new_federal_type
else: else:
transition_domain.federal_agency = new_federal_agency transition_domain.organization_type = new_organization_type
transition_domain.federal_type = None transition_domain.federal_type = None
# Logs if we either added to this property, # Logs if we either added to this property,
@ -266,7 +309,7 @@ class Command(BaseCommand):
return transition_domain return transition_domain
def parse_org_data(self, domain_name, transition_domain: TransitionDomain): def parse_org_data(self, domain_name, transition_domain: TransitionDomain) -> TransitionDomain:
if not isinstance(transition_domain, TransitionDomain): if not isinstance(transition_domain, TransitionDomain):
raise ValueError("Not a valid object, must be TransitionDomain") raise ValueError("Not a valid object, must be TransitionDomain")
@ -275,23 +318,23 @@ class Command(BaseCommand):
self.parse_logs.create_log_item( self.parse_logs.create_log_item(
EnumFilenames.ORGANIZATION_ADHOC, EnumFilenames.ORGANIZATION_ADHOC,
LogCode.INFO, LogCode.INFO,
f"Could not add organization_type on {domain_name}, no data exists.", f"Could not add organization_name on {domain_name}, no data exists.",
) )
return transition_domain return transition_domain
desired_property_exists = ( desired_property_exists = (
transition_domain.organization_type is not None transition_domain.organization_name is not None
and transition_domain.organization_type.strip() != "" and transition_domain.organization_name.strip() != ""
) )
transition_domain.organization_type = org_info.orgname transition_domain.organization_name = org_info.orgname
# Logs if we either added to this property, # Logs if we either added to this property,
# or modified it. # or modified it.
self._add_or_change_message( self._add_or_change_message(
EnumFilenames.ORGANIZATION_ADHOC, EnumFilenames.ORGANIZATION_ADHOC,
"organization_type", "organization_name",
transition_domain.organization_type, transition_domain.organization_name,
domain_name, domain_name,
desired_property_exists, desired_property_exists,
) )
@ -316,6 +359,7 @@ class Command(BaseCommand):
f"Updated existing {var_name} to '{changed_value}' on {domain_name}", f"Updated existing {var_name} to '{changed_value}' on {domain_name}",
) )
# Property getters, i.e. orgid or domaintypeid
def get_org_info(self, domain_name) -> OrganizationAdhoc: def get_org_info(self, domain_name) -> OrganizationAdhoc:
domain_info = self.get_domain_data(domain_name) domain_info = self.get_domain_data(domain_name)
org_id = domain_info.orgid org_id = domain_info.orgid
@ -326,43 +370,81 @@ class Command(BaseCommand):
type_id = domain_info.domaintypeid type_id = domain_info.domaintypeid
return self.get_domain_adhoc(type_id) return self.get_domain_adhoc(type_id)
def get_agency_info(self, domain_name): def get_agency_info(self, domain_name) -> AgencyAdhoc:
# domain_info = self.get_domain_data(domain_name) domain_info = self.get_domain_data(domain_name)
# type_id = domain_info.authorityid type_id = domain_info.orgid
# return self.get_domain_adhoc(type_id) return self.get_domain_adhoc(type_id)
raise
def get_authority_info(self, domain_name):
domain_info = self.get_domain_data(domain_name)
type_id = domain_info.authorityid
return self.get_authority_adhoc(type_id)
# Object getters, i.e. DomainAdditionalData or OrganizationAdhoc
def get_domain_data(self, desired_id) -> DomainAdditionalData: def get_domain_data(self, desired_id) -> DomainAdditionalData:
return self.get_object_by_id(EnumFilenames.DOMAIN_ADDITIONAL, desired_id) return self.get_object_by_id(EnumFilenames.DOMAIN_ADDITIONAL, desired_id)
def get_organization_adhoc(self, desired_id) -> OrganizationAdhoc: def get_organization_adhoc(self, desired_id) -> OrganizationAdhoc:
"""Grabs adhoc information for organizations. Returns an organization """Grabs adhoc information for organizations. Returns an organization
dictionary. adhoc object.
returns:
{
"org_id_1": OrganizationAdhoc,
"org_id_2: OrganizationAdhoc,
...
}
""" """
return self.get_object_by_id(EnumFilenames.ORGANIZATION_ADHOC, desired_id) return self.get_object_by_id(EnumFilenames.ORGANIZATION_ADHOC, desired_id)
def get_domain_adhoc(self, desired_id): def get_domain_adhoc(self, desired_id) -> DomainTypeAdhoc:
"""""" """"""
return self.get_object_by_id(EnumFilenames.DOMAIN_ADHOC, desired_id) return self.get_object_by_id(EnumFilenames.DOMAIN_ADHOC, desired_id)
def get_agency_adhoc(self, desired_id): def get_agency_adhoc(self, desired_id) -> AgencyAdhoc:
"""""" """"""
return self.get_object_by_id(EnumFilenames.AGENCY_ADHOC, desired_id) return self.get_object_by_id(EnumFilenames.AGENCY_ADHOC, desired_id)
def get_authority_adhoc(self, desired_id) -> AuthorityAdhoc:
""""""
return self.get_object_by_id(EnumFilenames.AUTHORITY_ADHOC, desired_id)
def get_object_by_id(self, file_type: EnumFilenames, desired_id): def get_object_by_id(self, file_type: EnumFilenames, desired_id):
"""""" """Returns a field in a dictionary based off the type and id.
desired_type = self.domain_object.csv_data.get(file_type)
if desired_type is not None: vars:
obj = desired_type.get(desired_id) file_type: (constant) EnumFilenames -> Which data file to target.
else: An example would be `EnumFilenames.DOMAIN_ADHOC`.
desired_id: str -> Which id you want to search on.
An example would be `"12"` or `"igorville.gov"`
Explanation:
Each data file has an associated type (file_type) for tracking purposes.
Each file_type is a dictionary which
contains a dictionary of row[id_field]: object.
In practice, this would look like:
EnumFilenames.AUTHORITY_ADHOC: {
"1": AuthorityAdhoc(...),
"2": AuthorityAdhoc(...),
...
}
desired_id will then specify which id to grab. If we wanted "1",
then this function will return the value of id "1".
So, `AuthorityAdhoc(...)`
"""
# Grabs a dict associated with the file_type.
# For example, EnumFilenames.DOMAIN_ADDITIONAL.
desired_type = self.domain_object.file_data.get(file_type)
if desired_type is None:
self.parse_logs.create_log_item(
file_type, LogCode.ERROR, f"Type {file_type} does not exist"
)
return None
# Grab the value given an Id within that file_type dict.
# For example, "igorville.gov".
obj = desired_type.get(desired_id)
if obj is None:
self.parse_logs.create_log_item( self.parse_logs.create_log_item(
file_type, LogCode.ERROR, f"Id {desired_id} does not exist" file_type, LogCode.ERROR, f"Id {desired_id} does not exist"
) )
return obj return obj

View file

@ -216,17 +216,18 @@ class Command(BaseCommand):
""" """
) )
def run_load_transition_domain_script(self, def run_load_transition_domain_script(
file_location: str, self,
domain_contacts_filename: str, file_location: str,
contacts_filename: str, domain_contacts_filename: str,
domain_statuses_filename: str, contacts_filename: str,
sep: str, domain_statuses_filename: str,
reset_table: bool, sep: str,
debug_on: bool, reset_table: bool,
prompts_enabled: bool, debug_on: bool,
debug_max_entries_to_parse: int): prompts_enabled: bool,
debug_max_entries_to_parse: int
):
"""Runs the load_transition_domain script""" """Runs the load_transition_domain script"""
# Create the command string # Create the command string
command_script = "load_transition_domain" command_script = "load_transition_domain"

View file

@ -1,3 +1,10 @@
"""
A list of helper classes to facilitate handling data from verisign data exports.
Regarding our dataclasses:
Not intended to be used as models but rather as an alternative to storing as a dictionary.
By keeping it as a dataclass instead of a dictionary, we can maintain data consistency.
"""
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum from enum import Enum
from typing import Optional from typing import Optional
@ -6,7 +13,6 @@ from typing import Optional
@dataclass @dataclass
class AgencyAdhoc: class AgencyAdhoc:
"""Defines the structure given in the AGENCY_ADHOC file""" """Defines the structure given in the AGENCY_ADHOC file"""
agencyid: Optional[int] = None agencyid: Optional[int] = None
agencyname: Optional[str] = None agencyname: Optional[str] = None
active: Optional[str] = None active: Optional[str] = None
@ -16,7 +22,6 @@ class AgencyAdhoc:
@dataclass @dataclass
class DomainAdditionalData: class DomainAdditionalData:
"""Defines the structure given in the DOMAIN_ADDITIONAL file""" """Defines the structure given in the DOMAIN_ADDITIONAL file"""
domainname: Optional[str] = None domainname: Optional[str] = None
domaintypeid: Optional[int] = None domaintypeid: Optional[int] = None
authorityid: Optional[int] = None authorityid: Optional[int] = None
@ -29,7 +34,6 @@ class DomainAdditionalData:
@dataclass @dataclass
class DomainTypeAdhoc: class DomainTypeAdhoc:
"""Defines the structure given in the DOMAIN_ADHOC file""" """Defines the structure given in the DOMAIN_ADHOC file"""
domaintypeid: Optional[int] = None domaintypeid: Optional[int] = None
domaintype: Optional[str] = None domaintype: Optional[str] = None
code: Optional[str] = None code: Optional[str] = None
@ -39,7 +43,6 @@ class DomainTypeAdhoc:
@dataclass @dataclass
class OrganizationAdhoc: class OrganizationAdhoc:
"""Defines the structure given in the ORGANIZATION_ADHOC file""" """Defines the structure given in the ORGANIZATION_ADHOC file"""
orgid: Optional[int] = None orgid: Optional[int] = None
orgname: Optional[str] = None orgname: Optional[str] = None
orgstreet: Optional[str] = None orgstreet: Optional[str] = None
@ -49,12 +52,29 @@ class OrganizationAdhoc:
orgcountrycode: Optional[str] = None orgcountrycode: Optional[str] = None
@dataclass
class AuthorityAdhoc:
"""Defines the structure given in the AUTHORITY_ADHOC file"""
authorityid: Optional[int] = None
firstname: Optional[str] = None
middlename: Optional[str] = None
lastname: Optional[str] = None
email: Optional[str] = None
phonenumber: Optional[str] = None
agencyid: Optional[int] = None
addlinfo: Optional[str] = None
class EnumFilenames(Enum): class EnumFilenames(Enum):
"""Returns a tuple mapping for (filetype, default_file_name). """Returns a tuple mapping for (filetype, default_file_name).
For instance, AGENCY_ADHOC = ("agency_adhoc", "agency.adhoc.dotgov.txt") For instance, AGENCY_ADHOC = ("agency_adhoc", "agency.adhoc.dotgov.txt")
""" """
# We are sourcing data from many different locations, so its better to track this
# as an Enum rather than multiple spread out variables.
# We store the "type" as [0], and we store the "default_filepath" as [1].
AGENCY_ADHOC = ("agency_adhoc", "agency.adhoc.dotgov.txt") AGENCY_ADHOC = ("agency_adhoc", "agency.adhoc.dotgov.txt")
DOMAIN_ADDITIONAL = ( DOMAIN_ADDITIONAL = (
"domain_additional", "domain_additional",
@ -62,3 +82,4 @@ class EnumFilenames(Enum):
) )
DOMAIN_ADHOC = ("domain_adhoc", "domaintypes.adhoc.dotgov.txt") DOMAIN_ADHOC = ("domain_adhoc", "domaintypes.adhoc.dotgov.txt")
ORGANIZATION_ADHOC = ("organization_adhoc", "organization.adhoc.dotgov.txt") ORGANIZATION_ADHOC = ("organization_adhoc", "organization.adhoc.dotgov.txt")
AUTHORITY_ADHOC = ("authority_adhoc", "authority.adhoc.dotgov.txt")

View file

@ -12,6 +12,7 @@ from epp_data_containers import (
DomainAdditionalData, DomainAdditionalData,
DomainTypeAdhoc, DomainTypeAdhoc,
OrganizationAdhoc, OrganizationAdhoc,
AuthorityAdhoc,
EnumFilenames, EnumFilenames,
) )
@ -67,15 +68,19 @@ class PatternMap:
date = match.group(1) date = match.group(1)
filename_without_date = match.group(2) filename_without_date = match.group(2)
# Can the supplied self.regex do a match on the filename?
can_infer = filename_without_date == default_file_name can_infer = filename_without_date == default_file_name
if not can_infer: if not can_infer:
return (self.filename, False) return (self.filename, False)
# If so, note that and return the inferred name
full_filename = date + filename_without_date full_filename = date + filename_without_date
return (full_filename, can_infer) return (full_filename, can_infer)
class ExtraTransitionDomain: class ExtraTransitionDomain:
"""Helper class to aid in storing TransitionDomain data spread across
multiple files."""
filenames = EnumFilenames filenames = EnumFilenames
strip_date_regex = re.compile(r"\d+\.(.+)") strip_date_regex = re.compile(r"\d+\.(.+)")
@ -85,16 +90,18 @@ class ExtraTransitionDomain:
domain_additional_filename=filenames.DOMAIN_ADDITIONAL[1], domain_additional_filename=filenames.DOMAIN_ADDITIONAL[1],
domain_adhoc_filename=filenames.DOMAIN_ADHOC[1], domain_adhoc_filename=filenames.DOMAIN_ADHOC[1],
organization_adhoc_filename=filenames.ORGANIZATION_ADHOC[1], organization_adhoc_filename=filenames.ORGANIZATION_ADHOC[1],
authority_adhoc_filename=filenames.AUTHORITY_ADHOC[1],
directory="migrationdata", directory="migrationdata",
seperator="|", seperator="|",
): ):
self.directory = directory self.directory = directory
self.seperator = seperator self.seperator = seperator
self.all_files = glob.glob(f"{directory}/*")
# Create a set with filenames as keys for quick lookup
self.all_files_set = {os.path.basename(file) for file in self.all_files}
self.csv_data = { _all_files = glob.glob(f"{directory}/*")
# Create a set with filenames as keys for quick lookup
self.all_files_set = {os.path.basename(file) for file in _all_files}
self.file_data = {
# (filename, default_url): metadata about the desired file # (filename, default_url): metadata about the desired file
self.filenames.AGENCY_ADHOC: PatternMap( self.filenames.AGENCY_ADHOC: PatternMap(
agency_adhoc_filename, self.strip_date_regex, AgencyAdhoc, "agencyid" agency_adhoc_filename, self.strip_date_regex, AgencyAdhoc, "agencyid"
@ -117,16 +124,22 @@ class ExtraTransitionDomain:
OrganizationAdhoc, OrganizationAdhoc,
"orgid", "orgid",
), ),
self.filenames.AUTHORITY_ADHOC: PatternMap(
authority_adhoc_filename,
self.strip_date_regex,
AuthorityAdhoc,
"authorityid",
),
} }
def parse_all_files(self, overwrite_existing_data=True): def parse_all_files(self):
"""Clears all preexisting data then parses each related CSV file. """Clears all preexisting data then parses each related CSV file.
overwrite_existing_data: bool -> Determines if we should clear overwrite_existing_data: bool -> Determines if we should clear
csv_data.data if it already exists file_data.data if it already exists
""" """
self.clear_csv_data() self.clear_file_data()
for item in self.csv_data: for item in self.file_data:
file_type: PatternMap = item.value file_type: PatternMap = item.value
filename = file_type.filename filename = file_type.filename
@ -141,8 +154,8 @@ class ExtraTransitionDomain:
# Log if we can't find the desired file # Log if we can't find the desired file
logger.error(f"Could not find file: {filename}") logger.error(f"Could not find file: {filename}")
def clear_csv_data(self): def clear_file_data(self):
for item in self.csv_data: for item in self.file_data:
file_type: PatternMap = item.value file_type: PatternMap = item.value
file_type.data = {} file_type.data = {}

View file

@ -48,6 +48,12 @@ class TransitionDomain(TimeStampedModel):
blank=True, blank=True,
help_text="Type of organization", help_text="Type of organization",
) )
organization_name = models.TextField(
null=True,
blank=True,
help_text="Organization name",
db_index=True,
)
federal_type = models.TextField( federal_type = models.TextField(
max_length=50, max_length=50,
null=True, null=True,