mirror of
https://github.com/cisagov/manage.get.gov.git
synced 2025-05-31 09:43:54 +02:00
- Fixed file arguments error (couldn’t consume multiple files)
- Fixed errors with get_or_create logic - Fixed errors with mapping data between files - Fixed logic for checking for existing entries
This commit is contained in:
parent
8198533bcc
commit
125690f347
3 changed files with 7879 additions and 7866 deletions
|
@ -45,55 +45,30 @@ class Command(BaseCommand):
|
||||||
"""Load the data files and create the DomainInvitations."""
|
"""Load the data files and create the DomainInvitations."""
|
||||||
sep = options.get("sep")
|
sep = options.get("sep")
|
||||||
|
|
||||||
"""
|
|
||||||
# Create mapping of userId -> domain names
|
|
||||||
# We open the domain file first and hold it in memory.
|
|
||||||
# There are three contacts per domain, so there should be at
|
|
||||||
# most 3*N different contacts here.
|
|
||||||
contact_domains = defaultdict(list) # each contact has a list of domains
|
|
||||||
logger.info("Reading domain-contacts data file %s", domain_contacts_filename)
|
|
||||||
with open(domain_contacts_filename, "r") as domain_contacts_file:
|
|
||||||
for row in csv.reader(domain_contacts_file, delimiter=sep):
|
|
||||||
# fields are just domain, userid, role
|
|
||||||
# lowercase the domain names now
|
|
||||||
contact_domains[row[1]].append(row[0].lower())
|
|
||||||
logger.info("Loaded domains for %d contacts", len(contact_domains))
|
|
||||||
|
|
||||||
# STEP 1:
|
|
||||||
# Create mapping of domain name -> userId
|
|
||||||
domains_contact = defaultdict(list) # each contact has a list of domains
|
|
||||||
logger.info("Reading domain-contacts data file %s", domain_contacts_filename)
|
|
||||||
with open(domain_contacts_filename, "r") as domain_contacts_file:
|
|
||||||
for row in csv.reader(domain_contacts_file, delimiter=sep):
|
|
||||||
# fields are just domain, userid, role
|
|
||||||
# lowercase the domain names now --NOTE: is there a reason why we do this??
|
|
||||||
domainName = row[0].lower()
|
|
||||||
userId = row[1]
|
|
||||||
domains_contact[domainName].append(userId)
|
|
||||||
logger.info("Loaded domains for %d contacts", len(domains_contact))
|
|
||||||
"""
|
|
||||||
|
|
||||||
# STEP 1:
|
# STEP 1:
|
||||||
# Create mapping of domain name -> status
|
# Create mapping of domain name -> status
|
||||||
domain_status = defaultdict() # NOTE: how to determine "most recent" status?
|
# TODO: figure out latest status
|
||||||
|
domain_status_dictionary = defaultdict(str) # NOTE: how to determine "most recent" status?
|
||||||
logger.info("Reading domain statuses data file %s", domain_statuses_filename)
|
logger.info("Reading domain statuses data file %s", domain_statuses_filename)
|
||||||
with open(domain_statuses_filename, "r") as domain_statuses_file:
|
with open(domain_statuses_filename, "r") as domain_statuses_file:
|
||||||
for row in csv.reader(domain_statuses_file, delimiter=sep):
|
for row in csv.reader(domain_statuses_file, delimiter=sep):
|
||||||
domainName = row[0].lower()
|
domainName = row[0].lower()
|
||||||
domainStatus = row[1]
|
domainStatus = row[1].lower()
|
||||||
domain_status[domainName] = domainStatus
|
# print("adding "+domainName+", "+domainStatus)
|
||||||
logger.info("Loaded statuses for %d domains", len(domain_status))
|
domain_status_dictionary[domainName] = domainStatus
|
||||||
|
logger.info("Loaded statuses for %d domains", len(domain_status_dictionary))
|
||||||
|
print(domain_status_dictionary)
|
||||||
|
|
||||||
# STEP 2:
|
# STEP 2:
|
||||||
# Create mapping of userId -> emails NOTE: is this one to many??
|
# Create mapping of userId -> emails NOTE: is this one to many??
|
||||||
user_emails = defaultdict(list) # each contact has a list of e-mails
|
user_emails_dictionary = defaultdict(list) # each contact has a list of e-mails
|
||||||
logger.info("Reading domain-contacts data file %s", domain_contacts_filename)
|
logger.info("Reading domain-contacts data file %s", domain_contacts_filename)
|
||||||
with open(contacts_filename, "r") as contacts_file:
|
with open(contacts_filename, "r") as contacts_file:
|
||||||
for row in csv.reader(contacts_file, delimiter=sep):
|
for row in csv.reader(contacts_file, delimiter=sep):
|
||||||
userId = row[0]
|
userId = row[0]
|
||||||
user_email = row[6]
|
user_email = row[6]
|
||||||
user_emails[userId].append(user_email)
|
user_emails_dictionary[userId].append(user_email)
|
||||||
logger.info("Loaded emails for %d users", len(user_emails))
|
logger.info("Loaded emails for %d users", len(user_emails_dictionary))
|
||||||
|
|
||||||
# STEP 3:
|
# STEP 3:
|
||||||
# TODO: Need to add logic for conflicting domain status entries
|
# TODO: Need to add logic for conflicting domain status entries
|
||||||
|
@ -104,22 +79,26 @@ class Command(BaseCommand):
|
||||||
|
|
||||||
# keep track of statuses that don't match our available status values
|
# keep track of statuses that don't match our available status values
|
||||||
outlier_statuses = set
|
outlier_statuses = set
|
||||||
|
total_outlier_statuses = 0
|
||||||
|
|
||||||
# keep track of domains that have no known status
|
# keep track of domains that have no known status
|
||||||
domains_without_status = set
|
domains_without_status = set
|
||||||
|
total_domains_without_status = 0
|
||||||
|
|
||||||
# keep track of users that have no e-mails
|
# keep track of users that have no e-mails
|
||||||
users_without_email = set
|
users_without_email = set
|
||||||
|
total_users_without_email = 0
|
||||||
|
|
||||||
# keep track of domains we UPDATED (instead of ones we added)
|
# keep track of domains we UPDATED (instead of ones we added)
|
||||||
total_updated_domain_entries = 0
|
total_updated_domain_entries = 0
|
||||||
|
total_new_entries = 0
|
||||||
|
|
||||||
logger.info("Reading domain-contacts data file %s", domain_contacts_filename)
|
logger.info("Reading domain-contacts data file %s", domain_contacts_filename)
|
||||||
with open(domain_contacts_filename, "r") as domain_contacts_file:
|
with open(domain_contacts_filename, "r") as domain_contacts_file:
|
||||||
for row in csv.reader(domain_contacts_file, delimiter=sep):
|
for row in csv.reader(domain_contacts_file, delimiter=sep):
|
||||||
# fields are just domain, userid, role
|
# fields are just domain, userid, role
|
||||||
# lowercase the domain names
|
# lowercase the domain names
|
||||||
domainName = row[0]
|
domainName = row[0].lower()
|
||||||
userId = row[1]
|
userId = row[1]
|
||||||
|
|
||||||
domainStatus = TransitionDomain.StatusChoices.CREATED
|
domainStatus = TransitionDomain.StatusChoices.CREATED
|
||||||
|
@ -127,38 +106,75 @@ class Command(BaseCommand):
|
||||||
emailSent = False
|
emailSent = False
|
||||||
# TODO: how to know if e-mail was sent?
|
# TODO: how to know if e-mail was sent?
|
||||||
|
|
||||||
if domainName not in domain_status:
|
if domainName not in domain_status_dictionary:
|
||||||
# this domain has no status...default to "Create"
|
# this domain has no status...default to "Create"
|
||||||
domains_without_status.add(domainName)
|
# domains_without_status.add(domainName)
|
||||||
|
# print("No status found for domain: "+domainName)
|
||||||
|
total_domains_without_status += 1
|
||||||
else:
|
else:
|
||||||
originalStatus = domain_status[domainName]
|
originalStatus = domain_status_dictionary[domainName]
|
||||||
|
# print(originalStatus)
|
||||||
if originalStatus in TransitionDomain.StatusChoices.values:
|
if originalStatus in TransitionDomain.StatusChoices.values:
|
||||||
|
# print("YAY")
|
||||||
domainStatus = originalStatus
|
domainStatus = originalStatus
|
||||||
else:
|
else:
|
||||||
# default all other statuses to "Create"
|
# default all other statuses to "Create"
|
||||||
outlier_statuses.add(originalStatus)
|
# outlier_statuses.add(originalStatus)
|
||||||
|
# print("Unknown status: "+originalStatus)
|
||||||
|
total_outlier_statuses += 1
|
||||||
|
|
||||||
if userId not in user_emails:
|
if userId not in user_emails_dictionary:
|
||||||
# this user has no e-mail...this should never happen
|
# this user has no e-mail...this should never happen
|
||||||
users_without_email.add(userId)
|
# users_without_email.add(userId)
|
||||||
break
|
# print("no e-mail found for user: "+userId)
|
||||||
|
total_users_without_email += 1
|
||||||
|
else:
|
||||||
|
userEmail = user_emails_dictionary[userId]
|
||||||
|
|
||||||
# Check to see if this domain-user pairing already exists so we don't add duplicates
|
# Check to see if this domain-user pairing already exists so we don't add duplicates
|
||||||
existingEntry = TransitionDomain.objects.get(
|
'''
|
||||||
username=userEmail, domain_name=domainName
|
newOrExistingEntry, isNew = TransitionDomain.objects.get_or_create(
|
||||||
|
username=userEmail,
|
||||||
|
domain_name=domainName
|
||||||
)
|
)
|
||||||
if existingEntry:
|
if isNew:
|
||||||
existingEntry.status = domainStatus
|
|
||||||
total_updated_domain_entries += 1
|
total_updated_domain_entries += 1
|
||||||
else:
|
else:
|
||||||
to_create.append(
|
total_new_entries += 1
|
||||||
TransitionDomain(
|
newOrExistingEntry.status = domainStatus
|
||||||
username=userEmail,
|
newOrExistingEntry.email_sent = emailSent
|
||||||
domain_name=domainName,
|
to_create.append(
|
||||||
status=domainStatus,
|
newOrExistingEntry
|
||||||
email_sent=emailSent,
|
)
|
||||||
)
|
'''
|
||||||
|
|
||||||
|
newOrExistingEntry = None
|
||||||
|
try:
|
||||||
|
newOrExistingEntry = TransitionDomain.objects.get(
|
||||||
|
username=userEmail,
|
||||||
|
domain_name=domainName
|
||||||
)
|
)
|
||||||
|
print("Updating entry: ", newOrExistingEntry)
|
||||||
|
print(" Status: ", newOrExistingEntry.status, " > ",domainStatus)
|
||||||
|
newOrExistingEntry.status = domainStatus
|
||||||
|
print(" Email Sent: ", newOrExistingEntry.email_sent, " > ", emailSent)
|
||||||
|
newOrExistingEntry.email_sent = emailSent
|
||||||
|
newOrExistingEntry.save()
|
||||||
|
except TransitionDomain.DoesNotExist:
|
||||||
|
# no matching entry, make one
|
||||||
|
newOrExistingEntry = TransitionDomain(
|
||||||
|
username=userEmail,
|
||||||
|
domain_name=domainName,
|
||||||
|
status = domainStatus,
|
||||||
|
email_sent = emailSent
|
||||||
|
)
|
||||||
|
print("Adding entry ",total_new_entries,": ", newOrExistingEntry)
|
||||||
|
to_create.append(newOrExistingEntry)
|
||||||
|
total_new_entries += 1
|
||||||
|
if total_new_entries > 10:
|
||||||
|
print("DONE")
|
||||||
|
break
|
||||||
|
|
||||||
logger.info("Creating %d transition domain entries", len(to_create))
|
logger.info("Creating %d transition domain entries", len(to_create))
|
||||||
TransitionDomain.objects.bulk_create(to_create)
|
TransitionDomain.objects.bulk_create(to_create)
|
||||||
|
|
||||||
|
@ -167,9 +183,9 @@ class Command(BaseCommand):
|
||||||
updated %d transition domain entries,
|
updated %d transition domain entries,
|
||||||
found %d users without email,
|
found %d users without email,
|
||||||
found %d unique statuses that do not map to existing status values""",
|
found %d unique statuses that do not map to existing status values""",
|
||||||
len(to_create),
|
total_new_entries,
|
||||||
total_updated_domain_entries,
|
total_updated_domain_entries,
|
||||||
len(users_without_email),
|
total_users_without_email,
|
||||||
len(outlier_statuses),
|
total_outlier_statuses,
|
||||||
)
|
)
|
||||||
# TODO: add more info to logger?
|
# TODO: add more info to logger?
|
||||||
|
|
|
@ -2,19 +2,16 @@ from django.db import models
|
||||||
|
|
||||||
from .utility.time_stamped_model import TimeStampedModel
|
from .utility.time_stamped_model import TimeStampedModel
|
||||||
|
|
||||||
|
|
||||||
class StatusChoices(models.TextChoices):
|
class StatusChoices(models.TextChoices):
|
||||||
CREATED = "created", "Created"
|
CREATED = "created", "Created"
|
||||||
HOLD = "hold", "Hold"
|
HOLD = "hold", "Hold"
|
||||||
|
|
||||||
|
|
||||||
class TransitionDomain(TimeStampedModel):
|
class TransitionDomain(TimeStampedModel):
|
||||||
"""Transition Domain model stores information about the
|
"""Transition Domain model stores information about the
|
||||||
state of a domain upon transition between registry
|
state of a domain upon transition between registry
|
||||||
providers"""
|
providers"""
|
||||||
|
|
||||||
CREATED = "created", "Created"
|
StatusChoices = StatusChoices
|
||||||
HOLD = "hold", "Hold"
|
|
||||||
|
|
||||||
username = models.TextField(
|
username = models.TextField(
|
||||||
null=False,
|
null=False,
|
||||||
|
@ -31,10 +28,7 @@ class TransitionDomain(TimeStampedModel):
|
||||||
max_length=255,
|
max_length=255,
|
||||||
null=False,
|
null=False,
|
||||||
blank=True,
|
blank=True,
|
||||||
choices=[
|
choices=StatusChoices.choices,
|
||||||
(CREATED),
|
|
||||||
(HOLD),
|
|
||||||
],
|
|
||||||
verbose_name="Status",
|
verbose_name="Status",
|
||||||
help_text="domain status during the transfer",
|
help_text="domain status during the transfer",
|
||||||
)
|
)
|
||||||
|
@ -46,4 +40,7 @@ class TransitionDomain(TimeStampedModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.username
|
return (
|
||||||
|
f"username: {self.username} "
|
||||||
|
f"domainName: {self.domain_name} "
|
||||||
|
)
|
||||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue