Script changes

This commit is contained in:
zandercymatics 2023-10-30 14:29:56 -06:00
parent abf1cd9378
commit cb4db4f71a
No known key found for this signature in database
GPG key ID: FF4636ABEC9682B7
3 changed files with 109 additions and 51 deletions

View file

@ -51,14 +51,14 @@ class FileTransitionLog:
} }
class LogItem: class LogItem:
"""Used for storing data about logger information. """Used for storing data about logger information."""
Intended for use in""" def __init__(self, file_type, code, message, domain_name):
def __init__(self, file_type, code, message):
self.file_type = file_type self.file_type = file_type
self.code = code self.code = code
self.message = message self.message = message
self.domain_name = domain_name
def add_log(self, file_type, code, message): def add_log(self, file_type, code, message, domain_name):
"""Adds a log item to self.logs """Adds a log item to self.logs
file_type -> Which array to add to, file_type -> Which array to add to,
@ -68,18 +68,18 @@ class FileTransitionLog:
message -> Message to display message -> Message to display
""" """
self.logs[file_type] = self.LogItem(file_type, code, message) self.logs[file_type].append(self.LogItem(file_type, code, message, domain_name))
def create_log_item(self, file_type, code, message, add_to_list=True): def create_log_item(self, file_type, code, message, domain_name=None, add_to_list=True):
"""Creates and returns an LogItem object. """Creates and returns an LogItem object.
add_to_list: bool -> If enabled, add it to the logs array. add_to_list: bool -> If enabled, add it to the logs array.
""" """
log = self.LogItem(file_type, code, message) log = self.LogItem(file_type, code, message, domain_name)
if not add_to_list: if not add_to_list:
return log return log
else: else:
self.logs[file_type] = log self.logs[file_type].append(log)
return log return log
def display_logs(self, file_type): def display_logs(self, file_type):
@ -89,6 +89,7 @@ class FileTransitionLog:
for log in self.logs.get(file_type): for log in self.logs.get(file_type):
match log.code: match log.code:
case LogCode.ERROR: case LogCode.ERROR:
if log.domain_name is None:
logger.error(log.message) logger.error(log.message)
case LogCode.WARNING: case LogCode.WARNING:
logger.warning(log.message) logger.warning(log.message)
@ -110,22 +111,22 @@ class Command(BaseCommand):
) )
parser.add_argument( parser.add_argument(
"--agency_adhoc_filename", "--agency_adhoc_filename",
default=EnumFilenames.AGENCY_ADHOC[1], default=EnumFilenames.AGENCY_ADHOC.value[1],
help="Defines the filename for agency adhocs", help="Defines the filename for agency adhocs",
) )
parser.add_argument( parser.add_argument(
"--domain_additional_filename", "--domain_additional_filename",
default=EnumFilenames.DOMAIN_ADDITIONAL[1], default=EnumFilenames.DOMAIN_ADDITIONAL.value[1],
help="Defines the filename for additional domain data", help="Defines the filename for additional domain data",
) )
parser.add_argument( parser.add_argument(
"--domain_adhoc_filename", "--domain_adhoc_filename",
default=EnumFilenames.DOMAIN_ADHOC[1], default=EnumFilenames.DOMAIN_ADHOC.value[1],
help="Defines the filename for domain type adhocs", help="Defines the filename for domain type adhocs",
) )
parser.add_argument( parser.add_argument(
"--organization_adhoc_filename", "--organization_adhoc_filename",
default=EnumFilenames.ORGANIZATION_ADHOC[1], default=EnumFilenames.ORGANIZATION_ADHOC.value[1],
help="Defines the filename for domain type adhocs", help="Defines the filename for domain type adhocs",
) )
parser.add_argument("--sep", default="|", help="Delimiter character") parser.add_argument("--sep", default="|", help="Delimiter character")
@ -143,6 +144,7 @@ class Command(BaseCommand):
self.domain_object.parse_all_files() self.domain_object.parse_all_files()
except Exception as err: except Exception as err:
logger.error(f"Could not load additional data. Error: {err}") logger.error(f"Could not load additional data. Error: {err}")
raise err
else: else:
all_transition_domains = TransitionDomain.objects.all() all_transition_domains = TransitionDomain.objects.all()
if not all_transition_domains.exists(): if not all_transition_domains.exists():
@ -190,8 +192,9 @@ class Command(BaseCommand):
if info is None: if info is None:
self.parse_logs.create_log_item( self.parse_logs.create_log_item(
EnumFilenames.AGENCY_ADHOC, EnumFilenames.AGENCY_ADHOC,
LogCode.INFO, LogCode.ERROR,
f"Could not add federal_agency on {domain_name}, no data exists." f"Could not add federal_agency on {domain_name}, no data exists.",
domain_name
) )
return transition_domain return transition_domain
@ -205,6 +208,7 @@ class Command(BaseCommand):
EnumFilenames.DOMAIN_ADHOC, EnumFilenames.DOMAIN_ADHOC,
LogCode.ERROR, LogCode.ERROR,
f"Could not add inactive agency {info.agencyname} on {domain_name}", f"Could not add inactive agency {info.agencyname} on {domain_name}",
domain_name
) )
return transition_domain return transition_domain
@ -213,6 +217,7 @@ class Command(BaseCommand):
EnumFilenames.DOMAIN_ADHOC, EnumFilenames.DOMAIN_ADHOC,
LogCode.ERROR, LogCode.ERROR,
f"Could not add non-federal agency {info.agencyname} on {domain_name}", f"Could not add non-federal agency {info.agencyname} on {domain_name}",
domain_name
) )
return transition_domain return transition_domain
@ -242,8 +247,9 @@ class Command(BaseCommand):
if info is None: if info is None:
self.parse_logs.create_log_item( self.parse_logs.create_log_item(
EnumFilenames.DOMAIN_ADHOC, EnumFilenames.DOMAIN_ADHOC,
LogCode.INFO, LogCode.ERROR,
f"Could not add domain_type on {domain_name}, no data exists.", f"Could not add domain_type on {domain_name}, no data exists.",
domain_name
) )
return transition_domain return transition_domain
@ -264,6 +270,7 @@ class Command(BaseCommand):
EnumFilenames.DOMAIN_ADHOC, EnumFilenames.DOMAIN_ADHOC,
LogCode.ERROR, LogCode.ERROR,
f"Could not add inactive domain_type {domain_type[0]} on {domain_name}", f"Could not add inactive domain_type {domain_type[0]} on {domain_name}",
domain_name
) )
return transition_domain return transition_domain
@ -317,8 +324,9 @@ class Command(BaseCommand):
if org_info is None: if org_info is None:
self.parse_logs.create_log_item( self.parse_logs.create_log_item(
EnumFilenames.ORGANIZATION_ADHOC, EnumFilenames.ORGANIZATION_ADHOC,
LogCode.INFO, LogCode.ERROR,
f"Could not add organization_name on {domain_name}, no data exists.", f"Could not add organization_name on {domain_name}, no data exists.",
domain_name
) )
return transition_domain return transition_domain
@ -351,32 +359,42 @@ class Command(BaseCommand):
file_type, file_type,
LogCode.DEBUG, LogCode.DEBUG,
f"Added {file_type} as '{var_name}' on {domain_name}", f"Added {file_type} as '{var_name}' on {domain_name}",
domain_name
) )
else: else:
self.parse_logs.create_log_item( self.parse_logs.create_log_item(
file_type, file_type,
LogCode.INFO, LogCode.INFO,
f"Updated existing {var_name} to '{changed_value}' on {domain_name}", f"Updated existing {var_name} to '{changed_value}' on {domain_name}",
domain_name
) )
# Property getters, i.e. orgid or domaintypeid # Property getters, i.e. orgid or domaintypeid
def get_org_info(self, domain_name) -> OrganizationAdhoc: def get_org_info(self, domain_name) -> OrganizationAdhoc:
domain_info = self.get_domain_data(domain_name) domain_info = self.get_domain_data(domain_name)
if domain_info is None:
return None
org_id = domain_info.orgid org_id = domain_info.orgid
return self.get_organization_adhoc(org_id) return self.get_organization_adhoc(org_id)
def get_domain_type_info(self, domain_name) -> DomainTypeAdhoc: def get_domain_type_info(self, domain_name) -> DomainTypeAdhoc:
domain_info = self.get_domain_data(domain_name) domain_info = self.get_domain_data(domain_name)
if domain_info is None:
return None
type_id = domain_info.domaintypeid type_id = domain_info.domaintypeid
return self.get_domain_adhoc(type_id) return self.get_domain_adhoc(type_id)
def get_agency_info(self, domain_name) -> AgencyAdhoc: def get_agency_info(self, domain_name) -> AgencyAdhoc:
domain_info = self.get_domain_data(domain_name) domain_info = self.get_domain_data(domain_name)
if domain_info is None:
return None
type_id = domain_info.orgid type_id = domain_info.orgid
return self.get_domain_adhoc(type_id) return self.get_domain_adhoc(type_id)
def get_authority_info(self, domain_name): def get_authority_info(self, domain_name):
domain_info = self.get_domain_data(domain_name) domain_info = self.get_domain_data(domain_name)
if domain_info is None:
return None
type_id = domain_info.authorityid type_id = domain_info.authorityid
return self.get_authority_adhoc(type_id) return self.get_authority_adhoc(type_id)
@ -441,10 +459,9 @@ class Command(BaseCommand):
# Grab the value given an Id within that file_type dict. # Grab the value given an Id within that file_type dict.
# For example, "igorville.gov". # For example, "igorville.gov".
obj = desired_type.get(desired_id) obj = desired_type.data.get(desired_id)
if obj is None: if obj is None:
self.parse_logs.create_log_item( self.parse_logs.create_log_item(
file_type, LogCode.ERROR, f"Id {desired_id} does not exist" file_type, LogCode.ERROR, f"Id {desired_id} does not exist"
) )
return obj return obj

View file

@ -7,7 +7,7 @@ import logging
import os import os
from typing import List from typing import List
from epp_data_containers import ( from .epp_data_containers import (
AgencyAdhoc, AgencyAdhoc,
DomainAdditionalData, DomainAdditionalData,
DomainTypeAdhoc, DomainTypeAdhoc,
@ -45,23 +45,26 @@ class PatternMap:
regex: re.Pattern, regex: re.Pattern,
data_type: type, data_type: type,
id_field: str, id_field: str,
data: dict = {},
): ):
self.regex = regex self.regex = regex
self.data_type = data_type self.data_type = data_type
self.id_field = id_field self.id_field = id_field
self.data = data self.data = {}
self.filename = filename
self.could_infer = False
def try_infer_filename(self, current_file_name, default_file_name):
"""Tries to match a given filename to a regex,
then uses that match to generate the filename."""
# returns (filename, inferred_successfully) # returns (filename, inferred_successfully)
_infer = self._infer_filename(self.regex, filename) return self._infer_filename(self.regex, current_file_name, default_file_name)
self.filename = _infer[0]
self.could_infer = _infer[1]
def _infer_filename(self, regex: re.Pattern, default_file_name): def _infer_filename(self, regex: re.Pattern, matched_file_name, default_file_name):
if not isinstance(regex, re.Pattern): if not isinstance(regex, re.Pattern):
return (self.filename, False) return (self.filename, False)
match = regex.match(self.filename) match = regex.match(matched_file_name)
if not match: if not match:
return (self.filename, False) return (self.filename, False)
@ -74,7 +77,7 @@ class PatternMap:
return (self.filename, False) return (self.filename, False)
# If so, note that and return the inferred name # If so, note that and return the inferred name
full_filename = date + filename_without_date full_filename = date + "." + filename_without_date
return (full_filename, can_infer) return (full_filename, can_infer)
@ -82,25 +85,28 @@ class ExtraTransitionDomain:
"""Helper class to aid in storing TransitionDomain data spread across """Helper class to aid in storing TransitionDomain data spread across
multiple files.""" multiple files."""
filenames = EnumFilenames filenames = EnumFilenames
strip_date_regex = re.compile(r"\d+\.(.+)") #strip_date_regex = re.compile(r"\d+\.(.+)")
strip_date_regex = re.compile(r"(?:.*\/)?(\d+)\.(.+)")
def __init__( def __init__(
self, self,
agency_adhoc_filename=filenames.AGENCY_ADHOC[1], agency_adhoc_filename=filenames.AGENCY_ADHOC.value[1],
domain_additional_filename=filenames.DOMAIN_ADDITIONAL[1], domain_additional_filename=filenames.DOMAIN_ADDITIONAL.value[1],
domain_adhoc_filename=filenames.DOMAIN_ADHOC[1], domain_adhoc_filename=filenames.DOMAIN_ADHOC.value[1],
organization_adhoc_filename=filenames.ORGANIZATION_ADHOC[1], organization_adhoc_filename=filenames.ORGANIZATION_ADHOC.value[1],
authority_adhoc_filename=filenames.AUTHORITY_ADHOC[1], authority_adhoc_filename=filenames.AUTHORITY_ADHOC.value[1],
directory="migrationdata", directory="migrationdata",
seperator="|", seperator="|",
): ):
# Add a slash if the last character isn't one
if directory and directory[-1] != "/":
directory += "/"
self.directory = directory self.directory = directory
self.seperator = seperator self.seperator = seperator
_all_files = glob.glob(f"{directory}/*") self.all_files = glob.glob(f"{directory}*")
# Create a set with filenames as keys for quick lookup # Create a set with filenames as keys for quick lookup
self.all_files_set = {os.path.basename(file) for file in _all_files} self.all_files_set = {os.path.basename(file) for file in self.all_files}
self.file_data = { self.file_data = {
# (filename, default_url): metadata about the desired file # (filename, default_url): metadata about the desired file
self.filenames.AGENCY_ADHOC: PatternMap( self.filenames.AGENCY_ADHOC: PatternMap(
@ -132,34 +138,62 @@ class ExtraTransitionDomain:
), ),
} }
def parse_all_files(self): def parse_all_files(self, infer_filenames=True):
"""Clears all preexisting data then parses each related CSV file. """Clears all preexisting data then parses each related CSV file.
overwrite_existing_data: bool -> Determines if we should clear overwrite_existing_data: bool -> Determines if we should clear
file_data.data if it already exists file_data.data if it already exists
""" """
self.clear_file_data() self.clear_file_data()
for item in self.file_data: for name, value in self.file_data.items():
file_type: PatternMap = item.value filename = f"{value.filename}"
filename = file_type.filename
if filename in self.all_files_set: if filename in self.all_files_set:
file_type.data = self._read_csv_file( _file = f"{self.directory}{value.filename}"
self.all_files_set[filename], value.data = self._read_csv_file(
_file,
self.seperator, self.seperator,
file_type.data_type, value.data_type,
file_type.id_field, value.id_field,
) )
else: else:
if not infer_filenames:
logger.error(f"Could not find file: {filename}")
continue
logger.warning(
"Attempting to infer filename"
f" for file: {filename}."
)
for filename in self.all_files:
default_name = name.value[1]
match = value.try_infer_filename(filename, default_name)
filename = match[0]
can_infer = match[1]
if can_infer:
break
if filename in self.all_files_set:
logger.info(f"Infer success. Found file {filename}")
_file = f"{self.directory}{filename}"
value.data = self._read_csv_file(
_file,
self.seperator,
value.data_type,
value.id_field,
)
continue
# Log if we can't find the desired file # Log if we can't find the desired file
logger.error(f"Could not find file: {filename}") logger.error(f"Could not find file: {filename}")
def clear_file_data(self): def clear_file_data(self):
for item in self.file_data: for item in self.file_data.values():
file_type: PatternMap = item.value file_type: PatternMap = item
file_type.data = {} file_type.data = {}
def _read_csv_file(self, file, seperator, dataclass_type, id_field): def _read_csv_file(self, file, seperator, dataclass_type, id_field):
with open(file, "r", encoding="utf-8") as requested_file: with open(file, "r", encoding="utf-8-sig") as requested_file:
reader = csv.DictReader(requested_file, delimiter=seperator) reader = csv.DictReader(requested_file, delimiter=seperator)
return {row[id_field]: dataclass_type(**row) for row in reader} dict_data = {row[id_field]: dataclass_type(**row) for row in reader}
logger.debug(f"it is finally here {dict_data}")
return dict_data

View file

@ -5,7 +5,7 @@ from django.db import migrations, models
class Migration(migrations.Migration): class Migration(migrations.Migration):
dependencies = [ dependencies = [
("registrar", "0042_create_groups_v03"), ("registrar", "0043_domain_expiration_date"),
] ]
operations = [ operations = [
@ -31,4 +31,11 @@ class Migration(migrations.Migration):
blank=True, help_text="Type of organization", max_length=255, null=True blank=True, help_text="Type of organization", max_length=255, null=True
), ),
), ),
migrations.AddField(
model_name="transitiondomain",
name="organization_name",
field=models.TextField(
blank=True, db_index=True, help_text="Organization name", null=True
),
),
] ]