mirror of
https://github.com/cisagov/manage.get.gov.git
synced 2025-07-14 06:55:08 +02:00
Script changes
This commit is contained in:
parent
abf1cd9378
commit
cb4db4f71a
3 changed files with 109 additions and 51 deletions
|
@ -51,14 +51,14 @@ class FileTransitionLog:
|
|||
}
|
||||
|
||||
class LogItem:
|
||||
"""Used for storing data about logger information.
|
||||
Intended for use in"""
|
||||
def __init__(self, file_type, code, message):
|
||||
"""Used for storing data about logger information."""
|
||||
def __init__(self, file_type, code, message, domain_name):
|
||||
self.file_type = file_type
|
||||
self.code = code
|
||||
self.message = message
|
||||
self.domain_name = domain_name
|
||||
|
||||
def add_log(self, file_type, code, message):
|
||||
def add_log(self, file_type, code, message, domain_name):
|
||||
"""Adds a log item to self.logs
|
||||
|
||||
file_type -> Which array to add to,
|
||||
|
@ -68,18 +68,18 @@ class FileTransitionLog:
|
|||
|
||||
message -> Message to display
|
||||
"""
|
||||
self.logs[file_type] = self.LogItem(file_type, code, message)
|
||||
self.logs[file_type].append(self.LogItem(file_type, code, message, domain_name))
|
||||
|
||||
def create_log_item(self, file_type, code, message, add_to_list=True):
|
||||
def create_log_item(self, file_type, code, message, domain_name=None, add_to_list=True):
|
||||
"""Creates and returns an LogItem object.
|
||||
|
||||
add_to_list: bool -> If enabled, add it to the logs array.
|
||||
"""
|
||||
log = self.LogItem(file_type, code, message)
|
||||
log = self.LogItem(file_type, code, message, domain_name)
|
||||
if not add_to_list:
|
||||
return log
|
||||
else:
|
||||
self.logs[file_type] = log
|
||||
self.logs[file_type].append(log)
|
||||
return log
|
||||
|
||||
def display_logs(self, file_type):
|
||||
|
@ -89,7 +89,8 @@ class FileTransitionLog:
|
|||
for log in self.logs.get(file_type):
|
||||
match log.code:
|
||||
case LogCode.ERROR:
|
||||
logger.error(log.message)
|
||||
if log.domain_name is None:
|
||||
logger.error(log.message)
|
||||
case LogCode.WARNING:
|
||||
logger.warning(log.message)
|
||||
case LogCode.INFO:
|
||||
|
@ -110,22 +111,22 @@ class Command(BaseCommand):
|
|||
)
|
||||
parser.add_argument(
|
||||
"--agency_adhoc_filename",
|
||||
default=EnumFilenames.AGENCY_ADHOC[1],
|
||||
default=EnumFilenames.AGENCY_ADHOC.value[1],
|
||||
help="Defines the filename for agency adhocs",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--domain_additional_filename",
|
||||
default=EnumFilenames.DOMAIN_ADDITIONAL[1],
|
||||
default=EnumFilenames.DOMAIN_ADDITIONAL.value[1],
|
||||
help="Defines the filename for additional domain data",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--domain_adhoc_filename",
|
||||
default=EnumFilenames.DOMAIN_ADHOC[1],
|
||||
default=EnumFilenames.DOMAIN_ADHOC.value[1],
|
||||
help="Defines the filename for domain type adhocs",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--organization_adhoc_filename",
|
||||
default=EnumFilenames.ORGANIZATION_ADHOC[1],
|
||||
default=EnumFilenames.ORGANIZATION_ADHOC.value[1],
|
||||
help="Defines the filename for domain type adhocs",
|
||||
)
|
||||
parser.add_argument("--sep", default="|", help="Delimiter character")
|
||||
|
@ -143,6 +144,7 @@ class Command(BaseCommand):
|
|||
self.domain_object.parse_all_files()
|
||||
except Exception as err:
|
||||
logger.error(f"Could not load additional data. Error: {err}")
|
||||
raise err
|
||||
else:
|
||||
all_transition_domains = TransitionDomain.objects.all()
|
||||
if not all_transition_domains.exists():
|
||||
|
@ -190,8 +192,9 @@ class Command(BaseCommand):
|
|||
if info is None:
|
||||
self.parse_logs.create_log_item(
|
||||
EnumFilenames.AGENCY_ADHOC,
|
||||
LogCode.INFO,
|
||||
f"Could not add federal_agency on {domain_name}, no data exists."
|
||||
LogCode.ERROR,
|
||||
f"Could not add federal_agency on {domain_name}, no data exists.",
|
||||
domain_name
|
||||
)
|
||||
return transition_domain
|
||||
|
||||
|
@ -205,6 +208,7 @@ class Command(BaseCommand):
|
|||
EnumFilenames.DOMAIN_ADHOC,
|
||||
LogCode.ERROR,
|
||||
f"Could not add inactive agency {info.agencyname} on {domain_name}",
|
||||
domain_name
|
||||
)
|
||||
return transition_domain
|
||||
|
||||
|
@ -213,6 +217,7 @@ class Command(BaseCommand):
|
|||
EnumFilenames.DOMAIN_ADHOC,
|
||||
LogCode.ERROR,
|
||||
f"Could not add non-federal agency {info.agencyname} on {domain_name}",
|
||||
domain_name
|
||||
)
|
||||
return transition_domain
|
||||
|
||||
|
@ -242,8 +247,9 @@ class Command(BaseCommand):
|
|||
if info is None:
|
||||
self.parse_logs.create_log_item(
|
||||
EnumFilenames.DOMAIN_ADHOC,
|
||||
LogCode.INFO,
|
||||
LogCode.ERROR,
|
||||
f"Could not add domain_type on {domain_name}, no data exists.",
|
||||
domain_name
|
||||
)
|
||||
return transition_domain
|
||||
|
||||
|
@ -264,6 +270,7 @@ class Command(BaseCommand):
|
|||
EnumFilenames.DOMAIN_ADHOC,
|
||||
LogCode.ERROR,
|
||||
f"Could not add inactive domain_type {domain_type[0]} on {domain_name}",
|
||||
domain_name
|
||||
)
|
||||
return transition_domain
|
||||
|
||||
|
@ -317,8 +324,9 @@ class Command(BaseCommand):
|
|||
if org_info is None:
|
||||
self.parse_logs.create_log_item(
|
||||
EnumFilenames.ORGANIZATION_ADHOC,
|
||||
LogCode.INFO,
|
||||
LogCode.ERROR,
|
||||
f"Could not add organization_name on {domain_name}, no data exists.",
|
||||
domain_name
|
||||
)
|
||||
return transition_domain
|
||||
|
||||
|
@ -351,32 +359,42 @@ class Command(BaseCommand):
|
|||
file_type,
|
||||
LogCode.DEBUG,
|
||||
f"Added {file_type} as '{var_name}' on {domain_name}",
|
||||
domain_name
|
||||
)
|
||||
else:
|
||||
self.parse_logs.create_log_item(
|
||||
file_type,
|
||||
LogCode.INFO,
|
||||
f"Updated existing {var_name} to '{changed_value}' on {domain_name}",
|
||||
domain_name
|
||||
)
|
||||
|
||||
# Property getters, i.e. orgid or domaintypeid
|
||||
def get_org_info(self, domain_name) -> OrganizationAdhoc:
|
||||
domain_info = self.get_domain_data(domain_name)
|
||||
if domain_info is None:
|
||||
return None
|
||||
org_id = domain_info.orgid
|
||||
return self.get_organization_adhoc(org_id)
|
||||
|
||||
def get_domain_type_info(self, domain_name) -> DomainTypeAdhoc:
|
||||
domain_info = self.get_domain_data(domain_name)
|
||||
if domain_info is None:
|
||||
return None
|
||||
type_id = domain_info.domaintypeid
|
||||
return self.get_domain_adhoc(type_id)
|
||||
|
||||
def get_agency_info(self, domain_name) -> AgencyAdhoc:
|
||||
domain_info = self.get_domain_data(domain_name)
|
||||
if domain_info is None:
|
||||
return None
|
||||
type_id = domain_info.orgid
|
||||
return self.get_domain_adhoc(type_id)
|
||||
|
||||
def get_authority_info(self, domain_name):
|
||||
domain_info = self.get_domain_data(domain_name)
|
||||
if domain_info is None:
|
||||
return None
|
||||
type_id = domain_info.authorityid
|
||||
return self.get_authority_adhoc(type_id)
|
||||
|
||||
|
@ -441,10 +459,9 @@ class Command(BaseCommand):
|
|||
|
||||
# Grab the value given an Id within that file_type dict.
|
||||
# For example, "igorville.gov".
|
||||
obj = desired_type.get(desired_id)
|
||||
obj = desired_type.data.get(desired_id)
|
||||
if obj is None:
|
||||
self.parse_logs.create_log_item(
|
||||
file_type, LogCode.ERROR, f"Id {desired_id} does not exist"
|
||||
)
|
||||
|
||||
return obj
|
||||
|
|
|
@ -7,7 +7,7 @@ import logging
|
|||
|
||||
import os
|
||||
from typing import List
|
||||
from epp_data_containers import (
|
||||
from .epp_data_containers import (
|
||||
AgencyAdhoc,
|
||||
DomainAdditionalData,
|
||||
DomainTypeAdhoc,
|
||||
|
@ -45,23 +45,26 @@ class PatternMap:
|
|||
regex: re.Pattern,
|
||||
data_type: type,
|
||||
id_field: str,
|
||||
data: dict = {},
|
||||
):
|
||||
self.regex = regex
|
||||
self.data_type = data_type
|
||||
self.id_field = id_field
|
||||
self.data = data
|
||||
self.data = {}
|
||||
self.filename = filename
|
||||
self.could_infer = False
|
||||
|
||||
def try_infer_filename(self, current_file_name, default_file_name):
|
||||
"""Tries to match a given filename to a regex,
|
||||
then uses that match to generate the filename."""
|
||||
# returns (filename, inferred_successfully)
|
||||
_infer = self._infer_filename(self.regex, filename)
|
||||
self.filename = _infer[0]
|
||||
self.could_infer = _infer[1]
|
||||
return self._infer_filename(self.regex, current_file_name, default_file_name)
|
||||
|
||||
def _infer_filename(self, regex: re.Pattern, default_file_name):
|
||||
def _infer_filename(self, regex: re.Pattern, matched_file_name, default_file_name):
|
||||
if not isinstance(regex, re.Pattern):
|
||||
return (self.filename, False)
|
||||
|
||||
match = regex.match(self.filename)
|
||||
match = regex.match(matched_file_name)
|
||||
|
||||
if not match:
|
||||
return (self.filename, False)
|
||||
|
||||
|
@ -74,7 +77,7 @@ class PatternMap:
|
|||
return (self.filename, False)
|
||||
|
||||
# If so, note that and return the inferred name
|
||||
full_filename = date + filename_without_date
|
||||
full_filename = date + "." + filename_without_date
|
||||
return (full_filename, can_infer)
|
||||
|
||||
|
||||
|
@ -82,25 +85,28 @@ class ExtraTransitionDomain:
|
|||
"""Helper class to aid in storing TransitionDomain data spread across
|
||||
multiple files."""
|
||||
filenames = EnumFilenames
|
||||
strip_date_regex = re.compile(r"\d+\.(.+)")
|
||||
#strip_date_regex = re.compile(r"\d+\.(.+)")
|
||||
strip_date_regex = re.compile(r"(?:.*\/)?(\d+)\.(.+)")
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
agency_adhoc_filename=filenames.AGENCY_ADHOC[1],
|
||||
domain_additional_filename=filenames.DOMAIN_ADDITIONAL[1],
|
||||
domain_adhoc_filename=filenames.DOMAIN_ADHOC[1],
|
||||
organization_adhoc_filename=filenames.ORGANIZATION_ADHOC[1],
|
||||
authority_adhoc_filename=filenames.AUTHORITY_ADHOC[1],
|
||||
agency_adhoc_filename=filenames.AGENCY_ADHOC.value[1],
|
||||
domain_additional_filename=filenames.DOMAIN_ADDITIONAL.value[1],
|
||||
domain_adhoc_filename=filenames.DOMAIN_ADHOC.value[1],
|
||||
organization_adhoc_filename=filenames.ORGANIZATION_ADHOC.value[1],
|
||||
authority_adhoc_filename=filenames.AUTHORITY_ADHOC.value[1],
|
||||
directory="migrationdata",
|
||||
seperator="|",
|
||||
):
|
||||
# Add a slash if the last character isn't one
|
||||
if directory and directory[-1] != "/":
|
||||
directory += "/"
|
||||
self.directory = directory
|
||||
self.seperator = seperator
|
||||
|
||||
_all_files = glob.glob(f"{directory}/*")
|
||||
self.all_files = glob.glob(f"{directory}*")
|
||||
# Create a set with filenames as keys for quick lookup
|
||||
self.all_files_set = {os.path.basename(file) for file in _all_files}
|
||||
|
||||
self.all_files_set = {os.path.basename(file) for file in self.all_files}
|
||||
self.file_data = {
|
||||
# (filename, default_url): metadata about the desired file
|
||||
self.filenames.AGENCY_ADHOC: PatternMap(
|
||||
|
@ -132,34 +138,62 @@ class ExtraTransitionDomain:
|
|||
),
|
||||
}
|
||||
|
||||
def parse_all_files(self):
|
||||
def parse_all_files(self, infer_filenames=True):
|
||||
"""Clears all preexisting data then parses each related CSV file.
|
||||
|
||||
overwrite_existing_data: bool -> Determines if we should clear
|
||||
file_data.data if it already exists
|
||||
"""
|
||||
self.clear_file_data()
|
||||
for item in self.file_data:
|
||||
file_type: PatternMap = item.value
|
||||
filename = file_type.filename
|
||||
for name, value in self.file_data.items():
|
||||
filename = f"{value.filename}"
|
||||
|
||||
if filename in self.all_files_set:
|
||||
file_type.data = self._read_csv_file(
|
||||
self.all_files_set[filename],
|
||||
_file = f"{self.directory}{value.filename}"
|
||||
value.data = self._read_csv_file(
|
||||
_file,
|
||||
self.seperator,
|
||||
file_type.data_type,
|
||||
file_type.id_field,
|
||||
value.data_type,
|
||||
value.id_field,
|
||||
)
|
||||
else:
|
||||
if not infer_filenames:
|
||||
logger.error(f"Could not find file: {filename}")
|
||||
continue
|
||||
|
||||
logger.warning(
|
||||
"Attempting to infer filename"
|
||||
f" for file: {filename}."
|
||||
)
|
||||
for filename in self.all_files:
|
||||
default_name = name.value[1]
|
||||
match = value.try_infer_filename(filename, default_name)
|
||||
filename = match[0]
|
||||
can_infer = match[1]
|
||||
if can_infer:
|
||||
break
|
||||
|
||||
if filename in self.all_files_set:
|
||||
logger.info(f"Infer success. Found file {filename}")
|
||||
_file = f"{self.directory}{filename}"
|
||||
value.data = self._read_csv_file(
|
||||
_file,
|
||||
self.seperator,
|
||||
value.data_type,
|
||||
value.id_field,
|
||||
)
|
||||
continue
|
||||
# Log if we can't find the desired file
|
||||
logger.error(f"Could not find file: {filename}")
|
||||
|
||||
def clear_file_data(self):
|
||||
for item in self.file_data:
|
||||
file_type: PatternMap = item.value
|
||||
for item in self.file_data.values():
|
||||
file_type: PatternMap = item
|
||||
file_type.data = {}
|
||||
|
||||
def _read_csv_file(self, file, seperator, dataclass_type, id_field):
|
||||
with open(file, "r", encoding="utf-8") as requested_file:
|
||||
with open(file, "r", encoding="utf-8-sig") as requested_file:
|
||||
reader = csv.DictReader(requested_file, delimiter=seperator)
|
||||
return {row[id_field]: dataclass_type(**row) for row in reader}
|
||||
dict_data = {row[id_field]: dataclass_type(**row) for row in reader}
|
||||
logger.debug(f"it is finally here {dict_data}")
|
||||
return dict_data
|
||||
|
|
|
@ -5,7 +5,7 @@ from django.db import migrations, models
|
|||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("registrar", "0042_create_groups_v03"),
|
||||
("registrar", "0043_domain_expiration_date"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
|
@ -31,4 +31,11 @@ class Migration(migrations.Migration):
|
|||
blank=True, help_text="Type of organization", max_length=255, null=True
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="transitiondomain",
|
||||
name="organization_name",
|
||||
field=models.TextField(
|
||||
blank=True, db_index=True, help_text="Organization name", null=True
|
||||
),
|
||||
),
|
||||
]
|
Loading…
Add table
Add a link
Reference in a new issue