Data chunking / don't update existing data

This commit is contained in:
zandercymatics 2023-11-17 11:26:46 -07:00
parent 13b1ca0238
commit a6db4b7145
No known key found for this signature in database
GPG key ID: FF4636ABEC9682B7
2 changed files with 68 additions and 17 deletions

View file

@ -10,7 +10,7 @@ import logging
import os
import sys
from typing import Dict
from django.core.paginator import Paginator
from registrar.models.transition_domain import TransitionDomain
from .epp_data_containers import (
@ -850,7 +850,13 @@ class OrganizationDataLoader:
"zipcode",
]
TransitionDomain.objects.bulk_update(update_list, changed_fields)
batch_size = 1000
# Create a Paginator object. Bulk_update on the full dataset
# is too memory intensive for our current app config, so we can chunk this data instead.
paginator = Paginator(update_list, batch_size)
for page_num in paginator.page_range:
page = paginator.page(page_num)
TransitionDomain.objects.bulk_update(page.object_list, changed_fields)
if not self.debug:
logger.info(