mirror of
https://github.com/cisagov/manage.get.gov.git
synced 2025-07-15 07:25:08 +02:00
Migration change, script work
This commit is contained in:
parent
00f44f2f84
commit
519595ba9b
6 changed files with 107 additions and 49 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -175,3 +175,4 @@ src/migrationdata/20231009.domaintypes.adhoc.dotgov.txt
|
||||||
src/migrationdata/20231009.domainadditionaldatalink.adhoc.dotgov.txt
|
src/migrationdata/20231009.domainadditionaldatalink.adhoc.dotgov.txt
|
||||||
src/migrationdata/20231009.agency.adhoc.dotgov.txt
|
src/migrationdata/20231009.agency.adhoc.dotgov.txt
|
||||||
src/migrationdata/20231009.organization.adhoc.dotgov.txt
|
src/migrationdata/20231009.organization.adhoc.dotgov.txt
|
||||||
|
src/migrationdata/20231009.organization.adhoc.dotgov.txt
|
||||||
|
|
|
@ -8,34 +8,18 @@ import os
|
||||||
from typing import List
|
from typing import List
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
from .utility.extra_transition_domain import ExtraTransitionDomain
|
|
||||||
|
|
||||||
|
from registrar.models.transition_domain import TransitionDomain
|
||||||
|
from .utility.extra_transition_domain import ExtraTransitionDomain
|
||||||
|
from .utility.epp_data_containers import AgencyAdhoc, DomainAdditionalData, DomainTypeAdhoc, OrganizationAdhoc, EnumFilenames
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class EnumFilenames(Enum):
|
|
||||||
AGENCY_ADHOC = "agency.adhoc.dotgov.txt"
|
|
||||||
DOMAIN_ADDITIONAL = "domainadditionaldatalink.adhoc.dotgov.txt"
|
|
||||||
DOMAIN_ADHOC = "domaintypes.adhoc.dotgov.txt"
|
|
||||||
ORGANIZATION_ADHOC = "organization.adhoc.dotgov.txt"
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
help = ""
|
help = ""
|
||||||
|
|
||||||
filenames = EnumFilenames
|
filenames = EnumFilenames
|
||||||
|
|
||||||
strip_date_regex = re.compile(r'\d+\.(.+)')
|
|
||||||
# While the prefix of these files typically includes the date,
|
|
||||||
# the rest of them following a predefined pattern. Define this here,
|
|
||||||
# and search for that to infer what is wanted.
|
|
||||||
filename_pattern_mapping = {
|
|
||||||
# filename - regex to use when encountered
|
|
||||||
filenames.AGENCY_ADHOC: strip_date_regex,
|
|
||||||
filenames.DOMAIN_ADDITIONAL: strip_date_regex,
|
|
||||||
filenames.DOMAIN_ADHOC: strip_date_regex,
|
|
||||||
filenames.ORGANIZATION_ADHOC: strip_date_regex
|
|
||||||
}
|
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
"""Add filename arguments."""
|
"""Add filename arguments."""
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
@ -45,22 +29,22 @@ class Command(BaseCommand):
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--agency_adhoc_filename",
|
"--agency_adhoc_filename",
|
||||||
default=self.filenames.AGENCY_ADHOC,
|
default=self.filenames.AGENCY_ADHOC[1],
|
||||||
help="Defines the filename for agency adhocs",
|
help="Defines the filename for agency adhocs",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--domain_additional_filename",
|
"--domain_additional_filename",
|
||||||
default=self.filenames.DOMAIN_ADDITIONAL,
|
default=self.filenames.DOMAIN_ADDITIONAL[1],
|
||||||
help="Defines the filename for additional domain data",
|
help="Defines the filename for additional domain data",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--domain_adhoc_filename",
|
"--domain_adhoc_filename",
|
||||||
default=self.filenames.DOMAIN_ADHOC,
|
default=self.filenames.DOMAIN_ADHOC[1],
|
||||||
help="Defines the filename for domain type adhocs",
|
help="Defines the filename for domain type adhocs",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--organization_adhoc_filename",
|
"--organization_adhoc_filename",
|
||||||
default=self.filenames.ORGANIZATION_ADHOC,
|
default=self.filenames.ORGANIZATION_ADHOC[1],
|
||||||
help="Defines the filename for domain type adhocs",
|
help="Defines the filename for domain type adhocs",
|
||||||
)
|
)
|
||||||
parser.add_argument("--sep", default="|", help="Delimiter character")
|
parser.add_argument("--sep", default="|", help="Delimiter character")
|
||||||
|
@ -79,5 +63,30 @@ class Command(BaseCommand):
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.error(f"Could not load additional data. Error: {err}")
|
logger.error(f"Could not load additional data. Error: {err}")
|
||||||
else:
|
else:
|
||||||
|
for transition_domain in TransitionDomain.objects.all():
|
||||||
|
transition_domain.organization_type
|
||||||
|
|
||||||
|
def get_organization_adhoc(self, desired_id):
|
||||||
|
"""Grabs adhoc information for organizations. Returns an organization
|
||||||
|
dictionary
|
||||||
|
|
||||||
|
returns:
|
||||||
|
{
|
||||||
|
"
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
return self.get_object_by_id(self.filenames.ORGANIZATION_ADHOC, desired_id)
|
||||||
|
|
||||||
|
def get_domain_adhoc(self, desired_id):
|
||||||
|
""""""
|
||||||
|
return self.get_object_by_id(self.filenames.DOMAIN_ADHOC, desired_id)
|
||||||
|
|
||||||
|
def get_agency_adhoc(self, desired_id):
|
||||||
|
""""""
|
||||||
|
return self.get_object_by_id(self.filenames.AGENCY_ADHOC, desired_id)
|
||||||
|
|
||||||
|
def get_object_by_id(self, file_type: EnumFilenames, desired_id):
|
||||||
|
""""""
|
||||||
|
desired_type = self.domain_object.csv_data.get(file_type)
|
||||||
|
obj = desired_type.get(desired_id)
|
||||||
|
return obj
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from enum import Enum
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@ -39,3 +40,13 @@ class OrganizationAdhoc():
|
||||||
orgstate: Optional[str] = None
|
orgstate: Optional[str] = None
|
||||||
orgzip: Optional[str] = None
|
orgzip: Optional[str] = None
|
||||||
orgcountrycode: Optional[str] = None
|
orgcountrycode: Optional[str] = None
|
||||||
|
|
||||||
|
class EnumFilenames(Enum):
|
||||||
|
"""Returns a tuple mapping for (filetype, default_file_name).
|
||||||
|
|
||||||
|
For instance, AGENCY_ADHOC = ("agency_adhoc", "agency.adhoc.dotgov.txt")
|
||||||
|
"""
|
||||||
|
AGENCY_ADHOC = ("agency_adhoc", "agency.adhoc.dotgov.txt")
|
||||||
|
DOMAIN_ADDITIONAL = ("domain_additional", "domainadditionaldatalink.adhoc.dotgov.txt")
|
||||||
|
DOMAIN_ADHOC = ("domain_adhoc", "domaintypes.adhoc.dotgov.txt")
|
||||||
|
ORGANIZATION_ADHOC = ("organization_adhoc", "organization.adhoc.dotgov.txt")
|
|
@ -7,27 +7,34 @@ import logging
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from typing import List
|
from typing import List
|
||||||
from enum import Enum
|
from epp_data_containers import AgencyAdhoc, DomainAdditionalData, DomainTypeAdhoc, OrganizationAdhoc, EnumFilenames
|
||||||
from epp_data_containers import AgencyAdhoc, DomainAdditionalData, DomainTypeAdhoc, OrganizationAdhoc
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class EnumFilenames(Enum):
|
|
||||||
"""Returns a tuple mapping for (filetype, default_file_name).
|
|
||||||
|
|
||||||
For instance, AGENCY_ADHOC = ("agency_adhoc", "agency.adhoc.dotgov.txt")
|
|
||||||
"""
|
|
||||||
AGENCY_ADHOC = ("agency_adhoc", "agency.adhoc.dotgov.txt")
|
|
||||||
DOMAIN_ADDITIONAL = ("domain_additional", "domainadditionaldatalink.adhoc.dotgov.txt")
|
|
||||||
DOMAIN_ADHOC = ("domain_adhoc", "domaintypes.adhoc.dotgov.txt")
|
|
||||||
ORGANIZATION_ADHOC = ("organization_adhoc", "organization.adhoc.dotgov.txt")
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class PatternMap():
|
class PatternMap():
|
||||||
|
"""Helper class that holds data and metadata about a requested file.
|
||||||
|
|
||||||
def __init__(self, filename: str, regex, data_type, data=[]):
|
filename: str -> The desired filename to target. If no filename is given,
|
||||||
|
it is assumed that you are passing in a filename pattern and it will look
|
||||||
|
for a filename that matches the given postfix you pass in.
|
||||||
|
|
||||||
|
regex: re.Pattern -> Defines what regex you want to use when inferring
|
||||||
|
filenames. If none, no matching occurs.
|
||||||
|
|
||||||
|
data_type: type -> Metadata about the desired type for data.
|
||||||
|
|
||||||
|
id_field: str -> Defines which field should act as the id in data.
|
||||||
|
|
||||||
|
data: dict -> The returned data. Intended to be used with data_type
|
||||||
|
to cross-reference.
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, filename: str, regex: re.Pattern, data_type: type, id_field: str, data: dict = {}):
|
||||||
self.regex = regex
|
self.regex = regex
|
||||||
self.data_type = data_type
|
self.data_type = data_type
|
||||||
|
self.id_field = id_field
|
||||||
self.data = data
|
self.data = data
|
||||||
|
|
||||||
# returns (filename, inferred_successfully)
|
# returns (filename, inferred_successfully)
|
||||||
|
@ -36,7 +43,7 @@ class PatternMap():
|
||||||
self.could_infer = _infer[1]
|
self.could_infer = _infer[1]
|
||||||
|
|
||||||
|
|
||||||
def _infer_filename(self, regex, default_file_name):
|
def _infer_filename(self, regex: re.Pattern, default_file_name):
|
||||||
if not isinstance(regex, re.Pattern):
|
if not isinstance(regex, re.Pattern):
|
||||||
return (self.filename, False)
|
return (self.filename, False)
|
||||||
|
|
||||||
|
@ -73,15 +80,20 @@ class ExtraTransitionDomain():
|
||||||
self.all_files_set = {os.path.basename(file) for file in self.all_files}
|
self.all_files_set = {os.path.basename(file) for file in self.all_files}
|
||||||
|
|
||||||
self.csv_data = {
|
self.csv_data = {
|
||||||
self.filenames.AGENCY_ADHOC: PatternMap(agency_adhoc_filename, self.strip_date_regex, AgencyAdhoc),
|
# (filename, default_url): metadata about the desired file
|
||||||
self.filenames.DOMAIN_ADDITIONAL: PatternMap(domain_additional_filename, self.strip_date_regex, DomainAdditionalData),
|
self.filenames.AGENCY_ADHOC: PatternMap(agency_adhoc_filename, self.strip_date_regex, AgencyAdhoc, "agencyid"),
|
||||||
self.filenames.DOMAIN_ADHOC: PatternMap(domain_adhoc_filename, self.strip_date_regex, DomainTypeAdhoc),
|
self.filenames.DOMAIN_ADDITIONAL: PatternMap(domain_additional_filename, self.strip_date_regex, DomainAdditionalData, "domainname"),
|
||||||
self.filenames.ORGANIZATION_ADHOC: PatternMap(organization_adhoc_filename, self.strip_date_regex, OrganizationAdhoc)
|
self.filenames.DOMAIN_ADHOC: PatternMap(domain_adhoc_filename, self.strip_date_regex, DomainTypeAdhoc, "domaintypeid"),
|
||||||
|
self.filenames.ORGANIZATION_ADHOC: PatternMap(organization_adhoc_filename, self.strip_date_regex, OrganizationAdhoc, "orgid")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def parse_all_files(self):
|
def parse_all_files(self, overwrite_existing_data = True):
|
||||||
"""Clears all preexisting data then parses each related CSV file"""
|
"""Clears all preexisting data then parses each related CSV file.
|
||||||
|
|
||||||
|
overwrite_existing_data: bool -> Determines if we should clear
|
||||||
|
csv_data.data if it already exists
|
||||||
|
"""
|
||||||
self.clear_csv_data()
|
self.clear_csv_data()
|
||||||
for item in self.csv_data:
|
for item in self.csv_data:
|
||||||
file_type: PatternMap = item.value
|
file_type: PatternMap = item.value
|
||||||
|
@ -91,20 +103,21 @@ class ExtraTransitionDomain():
|
||||||
file_type.data = self._read_csv_file(
|
file_type.data = self._read_csv_file(
|
||||||
self.all_files_set[filename],
|
self.all_files_set[filename],
|
||||||
self.seperator,
|
self.seperator,
|
||||||
file_type.data_type
|
file_type.data_type,
|
||||||
|
file_type.id_field
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Log if we can't find the desired file
|
# Log if we can't find the desired file
|
||||||
logger.warning(f"Could not find file: {filename}")
|
logger.error(f"Could not find file: {filename}")
|
||||||
|
|
||||||
|
|
||||||
def clear_csv_data(self):
|
def clear_csv_data(self):
|
||||||
for item in self.csv_data:
|
for item in self.csv_data:
|
||||||
file_type: PatternMap = item.value
|
file_type: PatternMap = item.value
|
||||||
file_type.data = []
|
file_type.data = {}
|
||||||
|
|
||||||
def _read_csv_file(self, file, seperator, dataclass_type):
|
def _read_csv_file(self, file, seperator, dataclass_type, id_field):
|
||||||
with open(file, "r", encoding="utf-8") as requested_file:
|
with open(file, "r", encoding="utf-8") as requested_file:
|
||||||
reader = csv.DictReader(requested_file, delimiter=seperator)
|
reader = csv.DictReader(requested_file, delimiter=seperator)
|
||||||
return [dataclass_type(**row) for row in reader]
|
return {row[id_field]: dataclass_type(**row) for row in reader}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,19 @@
|
||||||
|
# Generated by Django 4.2.6 on 2023-10-27 14:21
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
dependencies = [
|
||||||
|
("registrar", "0043_transitiondomain_federal_agency_and_more"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="transitiondomain",
|
||||||
|
name="organization_name",
|
||||||
|
field=models.TextField(
|
||||||
|
blank=True, help_text="Organization name", null=True
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
|
@ -48,6 +48,11 @@ class TransitionDomain(TimeStampedModel):
|
||||||
blank=True,
|
blank=True,
|
||||||
help_text="Type of organization",
|
help_text="Type of organization",
|
||||||
)
|
)
|
||||||
|
organization_name = models.TextField(
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
help_text="Organization name",
|
||||||
|
)
|
||||||
federal_type = models.TextField(
|
federal_type = models.TextField(
|
||||||
max_length=50,
|
max_length=50,
|
||||||
null=True,
|
null=True,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue