Script changes

This commit is contained in:
zandercymatics 2023-10-30 14:29:56 -06:00
parent abf1cd9378
commit cb4db4f71a
No known key found for this signature in database
GPG key ID: FF4636ABEC9682B7
3 changed files with 109 additions and 51 deletions

View file

@ -51,14 +51,14 @@ class FileTransitionLog:
}
class LogItem:
"""Used for storing data about logger information.
Intended for use in"""
def __init__(self, file_type, code, message):
"""Used for storing data about logger information."""
def __init__(self, file_type, code, message, domain_name):
self.file_type = file_type
self.code = code
self.message = message
self.domain_name = domain_name
def add_log(self, file_type, code, message):
def add_log(self, file_type, code, message, domain_name):
"""Adds a log item to self.logs
file_type -> Which array to add to,
@ -68,18 +68,18 @@ class FileTransitionLog:
message -> Message to display
"""
self.logs[file_type] = self.LogItem(file_type, code, message)
self.logs[file_type].append(self.LogItem(file_type, code, message, domain_name))
def create_log_item(self, file_type, code, message, add_to_list=True):
def create_log_item(self, file_type, code, message, domain_name=None, add_to_list=True):
"""Creates and returns an LogItem object.
add_to_list: bool -> If enabled, add it to the logs array.
"""
log = self.LogItem(file_type, code, message)
log = self.LogItem(file_type, code, message, domain_name)
if not add_to_list:
return log
else:
self.logs[file_type] = log
self.logs[file_type].append(log)
return log
def display_logs(self, file_type):
@ -89,6 +89,7 @@ class FileTransitionLog:
for log in self.logs.get(file_type):
match log.code:
case LogCode.ERROR:
if log.domain_name is None:
logger.error(log.message)
case LogCode.WARNING:
logger.warning(log.message)
@ -110,22 +111,22 @@ class Command(BaseCommand):
)
parser.add_argument(
"--agency_adhoc_filename",
default=EnumFilenames.AGENCY_ADHOC[1],
default=EnumFilenames.AGENCY_ADHOC.value[1],
help="Defines the filename for agency adhocs",
)
parser.add_argument(
"--domain_additional_filename",
default=EnumFilenames.DOMAIN_ADDITIONAL[1],
default=EnumFilenames.DOMAIN_ADDITIONAL.value[1],
help="Defines the filename for additional domain data",
)
parser.add_argument(
"--domain_adhoc_filename",
default=EnumFilenames.DOMAIN_ADHOC[1],
default=EnumFilenames.DOMAIN_ADHOC.value[1],
help="Defines the filename for domain type adhocs",
)
parser.add_argument(
"--organization_adhoc_filename",
default=EnumFilenames.ORGANIZATION_ADHOC[1],
default=EnumFilenames.ORGANIZATION_ADHOC.value[1],
help="Defines the filename for domain type adhocs",
)
parser.add_argument("--sep", default="|", help="Delimiter character")
@ -143,6 +144,7 @@ class Command(BaseCommand):
self.domain_object.parse_all_files()
except Exception as err:
logger.error(f"Could not load additional data. Error: {err}")
raise err
else:
all_transition_domains = TransitionDomain.objects.all()
if not all_transition_domains.exists():
@ -190,8 +192,9 @@ class Command(BaseCommand):
if info is None:
self.parse_logs.create_log_item(
EnumFilenames.AGENCY_ADHOC,
LogCode.INFO,
f"Could not add federal_agency on {domain_name}, no data exists."
LogCode.ERROR,
f"Could not add federal_agency on {domain_name}, no data exists.",
domain_name
)
return transition_domain
@ -205,6 +208,7 @@ class Command(BaseCommand):
EnumFilenames.DOMAIN_ADHOC,
LogCode.ERROR,
f"Could not add inactive agency {info.agencyname} on {domain_name}",
domain_name
)
return transition_domain
@ -213,6 +217,7 @@ class Command(BaseCommand):
EnumFilenames.DOMAIN_ADHOC,
LogCode.ERROR,
f"Could not add non-federal agency {info.agencyname} on {domain_name}",
domain_name
)
return transition_domain
@ -242,8 +247,9 @@ class Command(BaseCommand):
if info is None:
self.parse_logs.create_log_item(
EnumFilenames.DOMAIN_ADHOC,
LogCode.INFO,
LogCode.ERROR,
f"Could not add domain_type on {domain_name}, no data exists.",
domain_name
)
return transition_domain
@ -264,6 +270,7 @@ class Command(BaseCommand):
EnumFilenames.DOMAIN_ADHOC,
LogCode.ERROR,
f"Could not add inactive domain_type {domain_type[0]} on {domain_name}",
domain_name
)
return transition_domain
@ -317,8 +324,9 @@ class Command(BaseCommand):
if org_info is None:
self.parse_logs.create_log_item(
EnumFilenames.ORGANIZATION_ADHOC,
LogCode.INFO,
LogCode.ERROR,
f"Could not add organization_name on {domain_name}, no data exists.",
domain_name
)
return transition_domain
@ -351,32 +359,42 @@ class Command(BaseCommand):
file_type,
LogCode.DEBUG,
f"Added {file_type} as '{var_name}' on {domain_name}",
domain_name
)
else:
self.parse_logs.create_log_item(
file_type,
LogCode.INFO,
f"Updated existing {var_name} to '{changed_value}' on {domain_name}",
domain_name
)
# Property getters, i.e. orgid or domaintypeid
def get_org_info(self, domain_name) -> OrganizationAdhoc:
domain_info = self.get_domain_data(domain_name)
if domain_info is None:
return None
org_id = domain_info.orgid
return self.get_organization_adhoc(org_id)
def get_domain_type_info(self, domain_name) -> DomainTypeAdhoc:
domain_info = self.get_domain_data(domain_name)
if domain_info is None:
return None
type_id = domain_info.domaintypeid
return self.get_domain_adhoc(type_id)
def get_agency_info(self, domain_name) -> AgencyAdhoc:
domain_info = self.get_domain_data(domain_name)
if domain_info is None:
return None
type_id = domain_info.orgid
return self.get_domain_adhoc(type_id)
def get_authority_info(self, domain_name):
domain_info = self.get_domain_data(domain_name)
if domain_info is None:
return None
type_id = domain_info.authorityid
return self.get_authority_adhoc(type_id)
@ -441,10 +459,9 @@ class Command(BaseCommand):
# Grab the value given an Id within that file_type dict.
# For example, "igorville.gov".
obj = desired_type.get(desired_id)
obj = desired_type.data.get(desired_id)
if obj is None:
self.parse_logs.create_log_item(
file_type, LogCode.ERROR, f"Id {desired_id} does not exist"
)
return obj

View file

@ -7,7 +7,7 @@ import logging
import os
from typing import List
from epp_data_containers import (
from .epp_data_containers import (
AgencyAdhoc,
DomainAdditionalData,
DomainTypeAdhoc,
@ -45,23 +45,26 @@ class PatternMap:
regex: re.Pattern,
data_type: type,
id_field: str,
data: dict = {},
):
self.regex = regex
self.data_type = data_type
self.id_field = id_field
self.data = data
self.data = {}
self.filename = filename
self.could_infer = False
def try_infer_filename(self, current_file_name, default_file_name):
"""Tries to match a given filename to a regex,
then uses that match to generate the filename."""
# returns (filename, inferred_successfully)
_infer = self._infer_filename(self.regex, filename)
self.filename = _infer[0]
self.could_infer = _infer[1]
return self._infer_filename(self.regex, current_file_name, default_file_name)
def _infer_filename(self, regex: re.Pattern, default_file_name):
def _infer_filename(self, regex: re.Pattern, matched_file_name, default_file_name):
if not isinstance(regex, re.Pattern):
return (self.filename, False)
match = regex.match(self.filename)
match = regex.match(matched_file_name)
if not match:
return (self.filename, False)
@ -74,7 +77,7 @@ class PatternMap:
return (self.filename, False)
# If so, note that and return the inferred name
full_filename = date + filename_without_date
full_filename = date + "." + filename_without_date
return (full_filename, can_infer)
@ -82,25 +85,28 @@ class ExtraTransitionDomain:
"""Helper class to aid in storing TransitionDomain data spread across
multiple files."""
filenames = EnumFilenames
strip_date_regex = re.compile(r"\d+\.(.+)")
#strip_date_regex = re.compile(r"\d+\.(.+)")
strip_date_regex = re.compile(r"(?:.*\/)?(\d+)\.(.+)")
def __init__(
self,
agency_adhoc_filename=filenames.AGENCY_ADHOC[1],
domain_additional_filename=filenames.DOMAIN_ADDITIONAL[1],
domain_adhoc_filename=filenames.DOMAIN_ADHOC[1],
organization_adhoc_filename=filenames.ORGANIZATION_ADHOC[1],
authority_adhoc_filename=filenames.AUTHORITY_ADHOC[1],
agency_adhoc_filename=filenames.AGENCY_ADHOC.value[1],
domain_additional_filename=filenames.DOMAIN_ADDITIONAL.value[1],
domain_adhoc_filename=filenames.DOMAIN_ADHOC.value[1],
organization_adhoc_filename=filenames.ORGANIZATION_ADHOC.value[1],
authority_adhoc_filename=filenames.AUTHORITY_ADHOC.value[1],
directory="migrationdata",
seperator="|",
):
# Add a slash if the last character isn't one
if directory and directory[-1] != "/":
directory += "/"
self.directory = directory
self.seperator = seperator
_all_files = glob.glob(f"{directory}/*")
self.all_files = glob.glob(f"{directory}*")
# Create a set with filenames as keys for quick lookup
self.all_files_set = {os.path.basename(file) for file in _all_files}
self.all_files_set = {os.path.basename(file) for file in self.all_files}
self.file_data = {
# (filename, default_url): metadata about the desired file
self.filenames.AGENCY_ADHOC: PatternMap(
@ -132,34 +138,62 @@ class ExtraTransitionDomain:
),
}
def parse_all_files(self):
def parse_all_files(self, infer_filenames=True):
"""Clears all preexisting data then parses each related CSV file.
overwrite_existing_data: bool -> Determines if we should clear
file_data.data if it already exists
"""
self.clear_file_data()
for item in self.file_data:
file_type: PatternMap = item.value
filename = file_type.filename
for name, value in self.file_data.items():
filename = f"{value.filename}"
if filename in self.all_files_set:
file_type.data = self._read_csv_file(
self.all_files_set[filename],
_file = f"{self.directory}{value.filename}"
value.data = self._read_csv_file(
_file,
self.seperator,
file_type.data_type,
file_type.id_field,
value.data_type,
value.id_field,
)
else:
if not infer_filenames:
logger.error(f"Could not find file: {filename}")
continue
logger.warning(
"Attempting to infer filename"
f" for file: {filename}."
)
for filename in self.all_files:
default_name = name.value[1]
match = value.try_infer_filename(filename, default_name)
filename = match[0]
can_infer = match[1]
if can_infer:
break
if filename in self.all_files_set:
logger.info(f"Infer success. Found file {filename}")
_file = f"{self.directory}{filename}"
value.data = self._read_csv_file(
_file,
self.seperator,
value.data_type,
value.id_field,
)
continue
# Log if we can't find the desired file
logger.error(f"Could not find file: {filename}")
def clear_file_data(self):
for item in self.file_data:
file_type: PatternMap = item.value
for item in self.file_data.values():
file_type: PatternMap = item
file_type.data = {}
def _read_csv_file(self, file, seperator, dataclass_type, id_field):
with open(file, "r", encoding="utf-8") as requested_file:
with open(file, "r", encoding="utf-8-sig") as requested_file:
reader = csv.DictReader(requested_file, delimiter=seperator)
return {row[id_field]: dataclass_type(**row) for row in reader}
dict_data = {row[id_field]: dataclass_type(**row) for row in reader}
logger.debug(f"it is finally here {dict_data}")
return dict_data

View file

@ -5,7 +5,7 @@ from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("registrar", "0042_create_groups_v03"),
("registrar", "0043_domain_expiration_date"),
]
operations = [
@ -31,4 +31,11 @@ class Migration(migrations.Migration):
blank=True, help_text="Type of organization", max_length=255, null=True
),
),
migrations.AddField(
model_name="transitiondomain",
name="organization_name",
field=models.TextField(
blank=True, db_index=True, help_text="Organization name", null=True
),
),
]