- Fixed file arguments error (couldn’t consume multiple files)

- Fixed errors with get_or_create logic
- Fixed errors with mapping data between files
- Fixed logic for checking for existing entries
This commit is contained in:
CocoByte 2023-09-21 01:21:26 -06:00
parent 8198533bcc
commit 125690f347
No known key found for this signature in database
GPG key ID: BBFAA2526384C97F
3 changed files with 7879 additions and 7866 deletions

View file

@ -45,55 +45,30 @@ class Command(BaseCommand):
"""Load the data files and create the DomainInvitations.""" """Load the data files and create the DomainInvitations."""
sep = options.get("sep") sep = options.get("sep")
"""
# Create mapping of userId -> domain names
# We open the domain file first and hold it in memory.
# There are three contacts per domain, so there should be at
# most 3*N different contacts here.
contact_domains = defaultdict(list) # each contact has a list of domains
logger.info("Reading domain-contacts data file %s", domain_contacts_filename)
with open(domain_contacts_filename, "r") as domain_contacts_file:
for row in csv.reader(domain_contacts_file, delimiter=sep):
# fields are just domain, userid, role
# lowercase the domain names now
contact_domains[row[1]].append(row[0].lower())
logger.info("Loaded domains for %d contacts", len(contact_domains))
# STEP 1:
# Create mapping of domain name -> userId
domains_contact = defaultdict(list) # each contact has a list of domains
logger.info("Reading domain-contacts data file %s", domain_contacts_filename)
with open(domain_contacts_filename, "r") as domain_contacts_file:
for row in csv.reader(domain_contacts_file, delimiter=sep):
# fields are just domain, userid, role
# lowercase the domain names now --NOTE: is there a reason why we do this??
domainName = row[0].lower()
userId = row[1]
domains_contact[domainName].append(userId)
logger.info("Loaded domains for %d contacts", len(domains_contact))
"""
# STEP 1: # STEP 1:
# Create mapping of domain name -> status # Create mapping of domain name -> status
domain_status = defaultdict() # NOTE: how to determine "most recent" status? # TODO: figure out latest status
domain_status_dictionary = defaultdict(str) # NOTE: how to determine "most recent" status?
logger.info("Reading domain statuses data file %s", domain_statuses_filename) logger.info("Reading domain statuses data file %s", domain_statuses_filename)
with open(domain_statuses_filename, "r") as domain_statuses_file: with open(domain_statuses_filename, "r") as domain_statuses_file:
for row in csv.reader(domain_statuses_file, delimiter=sep): for row in csv.reader(domain_statuses_file, delimiter=sep):
domainName = row[0].lower() domainName = row[0].lower()
domainStatus = row[1] domainStatus = row[1].lower()
domain_status[domainName] = domainStatus # print("adding "+domainName+", "+domainStatus)
logger.info("Loaded statuses for %d domains", len(domain_status)) domain_status_dictionary[domainName] = domainStatus
logger.info("Loaded statuses for %d domains", len(domain_status_dictionary))
print(domain_status_dictionary)
# STEP 2: # STEP 2:
# Create mapping of userId -> emails NOTE: is this one to many?? # Create mapping of userId -> emails NOTE: is this one to many??
user_emails = defaultdict(list) # each contact has a list of e-mails user_emails_dictionary = defaultdict(list) # each contact has a list of e-mails
logger.info("Reading domain-contacts data file %s", domain_contacts_filename) logger.info("Reading domain-contacts data file %s", domain_contacts_filename)
with open(contacts_filename, "r") as contacts_file: with open(contacts_filename, "r") as contacts_file:
for row in csv.reader(contacts_file, delimiter=sep): for row in csv.reader(contacts_file, delimiter=sep):
userId = row[0] userId = row[0]
user_email = row[6] user_email = row[6]
user_emails[userId].append(user_email) user_emails_dictionary[userId].append(user_email)
logger.info("Loaded emails for %d users", len(user_emails)) logger.info("Loaded emails for %d users", len(user_emails_dictionary))
# STEP 3: # STEP 3:
# TODO: Need to add logic for conflicting domain status entries # TODO: Need to add logic for conflicting domain status entries
@ -104,22 +79,26 @@ class Command(BaseCommand):
# keep track of statuses that don't match our available status values # keep track of statuses that don't match our available status values
outlier_statuses = set outlier_statuses = set
total_outlier_statuses = 0
# keep track of domains that have no known status # keep track of domains that have no known status
domains_without_status = set domains_without_status = set
total_domains_without_status = 0
# keep track of users that have no e-mails # keep track of users that have no e-mails
users_without_email = set users_without_email = set
total_users_without_email = 0
# keep track of domains we UPDATED (instead of ones we added) # keep track of domains we UPDATED (instead of ones we added)
total_updated_domain_entries = 0 total_updated_domain_entries = 0
total_new_entries = 0
logger.info("Reading domain-contacts data file %s", domain_contacts_filename) logger.info("Reading domain-contacts data file %s", domain_contacts_filename)
with open(domain_contacts_filename, "r") as domain_contacts_file: with open(domain_contacts_filename, "r") as domain_contacts_file:
for row in csv.reader(domain_contacts_file, delimiter=sep): for row in csv.reader(domain_contacts_file, delimiter=sep):
# fields are just domain, userid, role # fields are just domain, userid, role
# lowercase the domain names # lowercase the domain names
domainName = row[0] domainName = row[0].lower()
userId = row[1] userId = row[1]
domainStatus = TransitionDomain.StatusChoices.CREATED domainStatus = TransitionDomain.StatusChoices.CREATED
@ -127,38 +106,75 @@ class Command(BaseCommand):
emailSent = False emailSent = False
# TODO: how to know if e-mail was sent? # TODO: how to know if e-mail was sent?
if domainName not in domain_status: if domainName not in domain_status_dictionary:
# this domain has no status...default to "Create" # this domain has no status...default to "Create"
domains_without_status.add(domainName) # domains_without_status.add(domainName)
# print("No status found for domain: "+domainName)
total_domains_without_status += 1
else: else:
originalStatus = domain_status[domainName] originalStatus = domain_status_dictionary[domainName]
# print(originalStatus)
if originalStatus in TransitionDomain.StatusChoices.values: if originalStatus in TransitionDomain.StatusChoices.values:
# print("YAY")
domainStatus = originalStatus domainStatus = originalStatus
else: else:
# default all other statuses to "Create" # default all other statuses to "Create"
outlier_statuses.add(originalStatus) # outlier_statuses.add(originalStatus)
# print("Unknown status: "+originalStatus)
total_outlier_statuses += 1
if userId not in user_emails: if userId not in user_emails_dictionary:
# this user has no e-mail...this should never happen # this user has no e-mail...this should never happen
users_without_email.add(userId) # users_without_email.add(userId)
break # print("no e-mail found for user: "+userId)
total_users_without_email += 1
else:
userEmail = user_emails_dictionary[userId]
# Check to see if this domain-user pairing already exists so we don't add duplicates # Check to see if this domain-user pairing already exists so we don't add duplicates
existingEntry = TransitionDomain.objects.get( '''
username=userEmail, domain_name=domainName newOrExistingEntry, isNew = TransitionDomain.objects.get_or_create(
username=userEmail,
domain_name=domainName
) )
if existingEntry: if isNew:
existingEntry.status = domainStatus
total_updated_domain_entries += 1 total_updated_domain_entries += 1
else: else:
to_create.append( total_new_entries += 1
TransitionDomain( newOrExistingEntry.status = domainStatus
username=userEmail, newOrExistingEntry.email_sent = emailSent
domain_name=domainName, to_create.append(
status=domainStatus, newOrExistingEntry
email_sent=emailSent, )
) '''
newOrExistingEntry = None
try:
newOrExistingEntry = TransitionDomain.objects.get(
username=userEmail,
domain_name=domainName
) )
print("Updating entry: ", newOrExistingEntry)
print(" Status: ", newOrExistingEntry.status, " > ",domainStatus)
newOrExistingEntry.status = domainStatus
print(" Email Sent: ", newOrExistingEntry.email_sent, " > ", emailSent)
newOrExistingEntry.email_sent = emailSent
newOrExistingEntry.save()
except TransitionDomain.DoesNotExist:
# no matching entry, make one
newOrExistingEntry = TransitionDomain(
username=userEmail,
domain_name=domainName,
status = domainStatus,
email_sent = emailSent
)
print("Adding entry ",total_new_entries,": ", newOrExistingEntry)
to_create.append(newOrExistingEntry)
total_new_entries += 1
if total_new_entries > 10:
print("DONE")
break
logger.info("Creating %d transition domain entries", len(to_create)) logger.info("Creating %d transition domain entries", len(to_create))
TransitionDomain.objects.bulk_create(to_create) TransitionDomain.objects.bulk_create(to_create)
@ -167,9 +183,9 @@ class Command(BaseCommand):
updated %d transition domain entries, updated %d transition domain entries,
found %d users without email, found %d users without email,
found %d unique statuses that do not map to existing status values""", found %d unique statuses that do not map to existing status values""",
len(to_create), total_new_entries,
total_updated_domain_entries, total_updated_domain_entries,
len(users_without_email), total_users_without_email,
len(outlier_statuses), total_outlier_statuses,
) )
# TODO: add more info to logger? # TODO: add more info to logger?

View file

@ -2,19 +2,16 @@ from django.db import models
from .utility.time_stamped_model import TimeStampedModel from .utility.time_stamped_model import TimeStampedModel
class StatusChoices(models.TextChoices): class StatusChoices(models.TextChoices):
CREATED = "created", "Created" CREATED = "created", "Created"
HOLD = "hold", "Hold" HOLD = "hold", "Hold"
class TransitionDomain(TimeStampedModel): class TransitionDomain(TimeStampedModel):
"""Transition Domain model stores information about the """Transition Domain model stores information about the
state of a domain upon transition between registry state of a domain upon transition between registry
providers""" providers"""
CREATED = "created", "Created" StatusChoices = StatusChoices
HOLD = "hold", "Hold"
username = models.TextField( username = models.TextField(
null=False, null=False,
@ -31,10 +28,7 @@ class TransitionDomain(TimeStampedModel):
max_length=255, max_length=255,
null=False, null=False,
blank=True, blank=True,
choices=[ choices=StatusChoices.choices,
(CREATED),
(HOLD),
],
verbose_name="Status", verbose_name="Status",
help_text="domain status during the transfer", help_text="domain status during the transfer",
) )
@ -46,4 +40,7 @@ class TransitionDomain(TimeStampedModel):
) )
def __str__(self): def __str__(self):
return self.username return (
f"username: {self.username} "
f"domainName: {self.domain_name} "
)

File diff suppressed because it is too large Load diff