Migration change, script work

This commit is contained in:
zandercymatics 2023-10-27 08:29:50 -06:00
parent 00f44f2f84
commit 519595ba9b
No known key found for this signature in database
GPG key ID: FF4636ABEC9682B7
6 changed files with 107 additions and 49 deletions

1
.gitignore vendored
View file

@ -175,3 +175,4 @@ src/migrationdata/20231009.domaintypes.adhoc.dotgov.txt
src/migrationdata/20231009.domainadditionaldatalink.adhoc.dotgov.txt src/migrationdata/20231009.domainadditionaldatalink.adhoc.dotgov.txt
src/migrationdata/20231009.agency.adhoc.dotgov.txt src/migrationdata/20231009.agency.adhoc.dotgov.txt
src/migrationdata/20231009.organization.adhoc.dotgov.txt src/migrationdata/20231009.organization.adhoc.dotgov.txt
src/migrationdata/20231009.organization.adhoc.dotgov.txt

View file

@ -8,34 +8,18 @@ import os
from typing import List from typing import List
from enum import Enum from enum import Enum
from django.core.management import BaseCommand from django.core.management import BaseCommand
from .utility.extra_transition_domain import ExtraTransitionDomain
from registrar.models.transition_domain import TransitionDomain
from .utility.extra_transition_domain import ExtraTransitionDomain
from .utility.epp_data_containers import AgencyAdhoc, DomainAdditionalData, DomainTypeAdhoc, OrganizationAdhoc, EnumFilenames
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class EnumFilenames(Enum):
AGENCY_ADHOC = "agency.adhoc.dotgov.txt"
DOMAIN_ADDITIONAL = "domainadditionaldatalink.adhoc.dotgov.txt"
DOMAIN_ADHOC = "domaintypes.adhoc.dotgov.txt"
ORGANIZATION_ADHOC = "organization.adhoc.dotgov.txt"
class Command(BaseCommand): class Command(BaseCommand):
help = "" help = ""
filenames = EnumFilenames filenames = EnumFilenames
strip_date_regex = re.compile(r'\d+\.(.+)')
# While the prefix of these files typically includes the date,
# the rest of them following a predefined pattern. Define this here,
# and search for that to infer what is wanted.
filename_pattern_mapping = {
# filename - regex to use when encountered
filenames.AGENCY_ADHOC: strip_date_regex,
filenames.DOMAIN_ADDITIONAL: strip_date_regex,
filenames.DOMAIN_ADHOC: strip_date_regex,
filenames.ORGANIZATION_ADHOC: strip_date_regex
}
def add_arguments(self, parser): def add_arguments(self, parser):
"""Add filename arguments.""" """Add filename arguments."""
parser.add_argument( parser.add_argument(
@ -45,22 +29,22 @@ class Command(BaseCommand):
) )
parser.add_argument( parser.add_argument(
"--agency_adhoc_filename", "--agency_adhoc_filename",
default=self.filenames.AGENCY_ADHOC, default=self.filenames.AGENCY_ADHOC[1],
help="Defines the filename for agency adhocs", help="Defines the filename for agency adhocs",
) )
parser.add_argument( parser.add_argument(
"--domain_additional_filename", "--domain_additional_filename",
default=self.filenames.DOMAIN_ADDITIONAL, default=self.filenames.DOMAIN_ADDITIONAL[1],
help="Defines the filename for additional domain data", help="Defines the filename for additional domain data",
) )
parser.add_argument( parser.add_argument(
"--domain_adhoc_filename", "--domain_adhoc_filename",
default=self.filenames.DOMAIN_ADHOC, default=self.filenames.DOMAIN_ADHOC[1],
help="Defines the filename for domain type adhocs", help="Defines the filename for domain type adhocs",
) )
parser.add_argument( parser.add_argument(
"--organization_adhoc_filename", "--organization_adhoc_filename",
default=self.filenames.ORGANIZATION_ADHOC, default=self.filenames.ORGANIZATION_ADHOC[1],
help="Defines the filename for domain type adhocs", help="Defines the filename for domain type adhocs",
) )
parser.add_argument("--sep", default="|", help="Delimiter character") parser.add_argument("--sep", default="|", help="Delimiter character")
@ -79,5 +63,30 @@ class Command(BaseCommand):
except Exception as err: except Exception as err:
logger.error(f"Could not load additional data. Error: {err}") logger.error(f"Could not load additional data. Error: {err}")
else: else:
for transition_domain in TransitionDomain.objects.all():
transition_domain.organization_type
def get_organization_adhoc(self, desired_id):
"""Grabs adhoc information for organizations. Returns an organization
dictionary
returns:
{
"
}
"""
return self.get_object_by_id(self.filenames.ORGANIZATION_ADHOC, desired_id)
def get_domain_adhoc(self, desired_id):
""""""
return self.get_object_by_id(self.filenames.DOMAIN_ADHOC, desired_id)
def get_agency_adhoc(self, desired_id):
""""""
return self.get_object_by_id(self.filenames.AGENCY_ADHOC, desired_id)
def get_object_by_id(self, file_type: EnumFilenames, desired_id):
""""""
desired_type = self.domain_object.csv_data.get(file_type)
obj = desired_type.get(desired_id)
return obj

View file

@ -1,4 +1,5 @@
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum
from typing import Optional from typing import Optional
@dataclass @dataclass
@ -39,3 +40,13 @@ class OrganizationAdhoc():
orgstate: Optional[str] = None orgstate: Optional[str] = None
orgzip: Optional[str] = None orgzip: Optional[str] = None
orgcountrycode: Optional[str] = None orgcountrycode: Optional[str] = None
class EnumFilenames(Enum):
"""Returns a tuple mapping for (filetype, default_file_name).
For instance, AGENCY_ADHOC = ("agency_adhoc", "agency.adhoc.dotgov.txt")
"""
AGENCY_ADHOC = ("agency_adhoc", "agency.adhoc.dotgov.txt")
DOMAIN_ADDITIONAL = ("domain_additional", "domainadditionaldatalink.adhoc.dotgov.txt")
DOMAIN_ADHOC = ("domain_adhoc", "domaintypes.adhoc.dotgov.txt")
ORGANIZATION_ADHOC = ("organization_adhoc", "organization.adhoc.dotgov.txt")

View file

@ -7,27 +7,34 @@ import logging
import os import os
from typing import List from typing import List
from enum import Enum from epp_data_containers import AgencyAdhoc, DomainAdditionalData, DomainTypeAdhoc, OrganizationAdhoc, EnumFilenames
from epp_data_containers import AgencyAdhoc, DomainAdditionalData, DomainTypeAdhoc, OrganizationAdhoc
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class EnumFilenames(Enum):
"""Returns a tuple mapping for (filetype, default_file_name).
For instance, AGENCY_ADHOC = ("agency_adhoc", "agency.adhoc.dotgov.txt")
"""
AGENCY_ADHOC = ("agency_adhoc", "agency.adhoc.dotgov.txt")
DOMAIN_ADDITIONAL = ("domain_additional", "domainadditionaldatalink.adhoc.dotgov.txt")
DOMAIN_ADHOC = ("domain_adhoc", "domaintypes.adhoc.dotgov.txt")
ORGANIZATION_ADHOC = ("organization_adhoc", "organization.adhoc.dotgov.txt")
@dataclass @dataclass
class PatternMap(): class PatternMap():
"""Helper class that holds data and metadata about a requested file.
def __init__(self, filename: str, regex, data_type, data=[]): filename: str -> The desired filename to target. If no filename is given,
it is assumed that you are passing in a filename pattern and it will look
for a filename that matches the given postfix you pass in.
regex: re.Pattern -> Defines what regex you want to use when inferring
filenames. If none, no matching occurs.
data_type: type -> Metadata about the desired type for data.
id_field: str -> Defines which field should act as the id in data.
data: dict -> The returned data. Intended to be used with data_type
to cross-reference.
"""
def __init__(self, filename: str, regex: re.Pattern, data_type: type, id_field: str, data: dict = {}):
self.regex = regex self.regex = regex
self.data_type = data_type self.data_type = data_type
self.id_field = id_field
self.data = data self.data = data
# returns (filename, inferred_successfully) # returns (filename, inferred_successfully)
@ -36,7 +43,7 @@ class PatternMap():
self.could_infer = _infer[1] self.could_infer = _infer[1]
def _infer_filename(self, regex, default_file_name): def _infer_filename(self, regex: re.Pattern, default_file_name):
if not isinstance(regex, re.Pattern): if not isinstance(regex, re.Pattern):
return (self.filename, False) return (self.filename, False)
@ -73,15 +80,20 @@ class ExtraTransitionDomain():
self.all_files_set = {os.path.basename(file) for file in self.all_files} self.all_files_set = {os.path.basename(file) for file in self.all_files}
self.csv_data = { self.csv_data = {
self.filenames.AGENCY_ADHOC: PatternMap(agency_adhoc_filename, self.strip_date_regex, AgencyAdhoc), # (filename, default_url): metadata about the desired file
self.filenames.DOMAIN_ADDITIONAL: PatternMap(domain_additional_filename, self.strip_date_regex, DomainAdditionalData), self.filenames.AGENCY_ADHOC: PatternMap(agency_adhoc_filename, self.strip_date_regex, AgencyAdhoc, "agencyid"),
self.filenames.DOMAIN_ADHOC: PatternMap(domain_adhoc_filename, self.strip_date_regex, DomainTypeAdhoc), self.filenames.DOMAIN_ADDITIONAL: PatternMap(domain_additional_filename, self.strip_date_regex, DomainAdditionalData, "domainname"),
self.filenames.ORGANIZATION_ADHOC: PatternMap(organization_adhoc_filename, self.strip_date_regex, OrganizationAdhoc) self.filenames.DOMAIN_ADHOC: PatternMap(domain_adhoc_filename, self.strip_date_regex, DomainTypeAdhoc, "domaintypeid"),
self.filenames.ORGANIZATION_ADHOC: PatternMap(organization_adhoc_filename, self.strip_date_regex, OrganizationAdhoc, "orgid")
} }
def parse_all_files(self): def parse_all_files(self, overwrite_existing_data = True):
"""Clears all preexisting data then parses each related CSV file""" """Clears all preexisting data then parses each related CSV file.
overwrite_existing_data: bool -> Determines if we should clear
csv_data.data if it already exists
"""
self.clear_csv_data() self.clear_csv_data()
for item in self.csv_data: for item in self.csv_data:
file_type: PatternMap = item.value file_type: PatternMap = item.value
@ -91,20 +103,21 @@ class ExtraTransitionDomain():
file_type.data = self._read_csv_file( file_type.data = self._read_csv_file(
self.all_files_set[filename], self.all_files_set[filename],
self.seperator, self.seperator,
file_type.data_type file_type.data_type,
file_type.id_field
) )
else: else:
# Log if we can't find the desired file # Log if we can't find the desired file
logger.warning(f"Could not find file: {filename}") logger.error(f"Could not find file: {filename}")
def clear_csv_data(self): def clear_csv_data(self):
for item in self.csv_data: for item in self.csv_data:
file_type: PatternMap = item.value file_type: PatternMap = item.value
file_type.data = [] file_type.data = {}
def _read_csv_file(self, file, seperator, dataclass_type): def _read_csv_file(self, file, seperator, dataclass_type, id_field):
with open(file, "r", encoding="utf-8") as requested_file: with open(file, "r", encoding="utf-8") as requested_file:
reader = csv.DictReader(requested_file, delimiter=seperator) reader = csv.DictReader(requested_file, delimiter=seperator)
return [dataclass_type(**row) for row in reader] return {row[id_field]: dataclass_type(**row) for row in reader}

View file

@ -0,0 +1,19 @@
# Generated by Django 4.2.6 on 2023-10-27 14:21
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("registrar", "0043_transitiondomain_federal_agency_and_more"),
]
operations = [
migrations.AddField(
model_name="transitiondomain",
name="organization_name",
field=models.TextField(
blank=True, help_text="Organization name", null=True
),
),
]

View file

@ -48,6 +48,11 @@ class TransitionDomain(TimeStampedModel):
blank=True, blank=True,
help_text="Type of organization", help_text="Type of organization",
) )
organization_name = models.TextField(
null=True,
blank=True,
help_text="Organization name",
)
federal_type = models.TextField( federal_type = models.TextField(
max_length=50, max_length=50,
null=True, null=True,