From 77e5c918e7cacf5cba4e216dbaeae9ccf77c261e Mon Sep 17 00:00:00 2001 From: David Kennedy Date: Tue, 4 Jun 2024 11:51:39 -0400 Subject: [PATCH 01/10] added skipEppSave command line option, may change option in future commit --- src/registrar/admin.py | 48 +++++++++++++++---- .../management/commands/import_tables.py | 9 +++- 2 files changed, 46 insertions(+), 11 deletions(-) diff --git a/src/registrar/admin.py b/src/registrar/admin.py index ce00bbc88..6573079f9 100644 --- a/src/registrar/admin.py +++ b/src/registrar/admin.py @@ -2252,18 +2252,46 @@ class PublicContactResource(resources.ModelResource): class Meta: model = models.PublicContact - def import_row(self, row, instance_loader, using_transactions=True, dry_run=False, raise_errors=None, **kwargs): - """Override kwargs skip_epp_save and set to True""" - kwargs["skip_epp_save"] = True - return super().import_row( - row, - instance_loader, - using_transactions=using_transactions, - dry_run=dry_run, - raise_errors=raise_errors, + def __init__(self): + """Sets global variables for code tidyness""" + super().__init__() + self.skip_epp_save=False + + def import_data( + self, + dataset, + dry_run=False, + raise_errors=False, + use_transactions=None, + collect_failed_rows=False, + rollback_on_validation_errors=False, + **kwargs + ): + """Override import_data to set self.skip_epp_save if in kwargs""" + self.skip_epp_save = kwargs.get('skip_epp_save', False) + return super().import_data( + dataset, + dry_run, + raise_errors, + use_transactions, + collect_failed_rows, + rollback_on_validation_errors, **kwargs, ) + # def import_row(self, row, instance_loader, using_transactions=True, dry_run=False, raise_errors=None, **kwargs): + # """Override kwargs skip_epp_save and set to True""" + # kwargs["skip_epp_save"] = True + # super().import_data() + # return super().import_row( + # row, + # instance_loader, + # using_transactions=using_transactions, + # dry_run=dry_run, + # raise_errors=raise_errors, + # **kwargs, + # ) + def save_instance(self, instance, is_create, using_transactions=True, dry_run=False): """Override save_instance setting skip_epp_save to True""" self.before_save_instance(instance, using_transactions, dry_run) @@ -2276,7 +2304,7 @@ class PublicContactResource(resources.ModelResource): # we don't have transactions and we want to do a dry_run pass else: - instance.save(skip_epp_save=True) + instance.save(skip_epp_save=self.skip_epp_save) self.after_save_instance(instance, using_transactions, dry_run) diff --git a/src/registrar/management/commands/import_tables.py b/src/registrar/management/commands/import_tables.py index 3594d3215..cbe3dc6ee 100644 --- a/src/registrar/management/commands/import_tables.py +++ b/src/registrar/management/commands/import_tables.py @@ -1,3 +1,4 @@ +import argparse import logging import os import pyzipper @@ -14,6 +15,10 @@ logger = logging.getLogger(__name__) class Command(BaseCommand): help = "Imports tables from a zip file, exported_tables.zip, containing CSV files in the tmp directory." + def add_arguments(self, parser): + """Add command line arguments.""" + parser.add_argument('--skipEppSave', default=True, action=argparse.BooleanOptionalAction) + def handle(self, **options): """Extracts CSV files from a zip archive and imports them into the respective tables""" @@ -21,6 +26,8 @@ class Command(BaseCommand): logger.error("import_tables cannot be run in production") return + self.skip_epp_save = options.get("skipEppSave") + table_names = [ "User", "Contact", @@ -73,7 +80,7 @@ class Command(BaseCommand): resource_instance = resourceclass() with open(csv_filename, "r") as csvfile: dataset = tablib.Dataset().load(csvfile.read(), format="csv") - result = resource_instance.import_data(dataset, dry_run=False, skip_epp_save=True) + result = resource_instance.import_data(dataset, dry_run=False, skip_epp_save=self.skip_epp_save) if result.has_errors(): logger.error(f"Errors occurred while importing {csv_filename}: {result.row_errors()}") From b3a272025d52791add5655d40c82c5011df9f771 Mon Sep 17 00:00:00 2001 From: David Kennedy Date: Tue, 11 Jun 2024 07:09:11 -0400 Subject: [PATCH 02/10] update import_tables documentation --- docs/operations/import_export.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/docs/operations/import_export.md b/docs/operations/import_export.md index 7ddfd5d3b..4810774e4 100644 --- a/docs/operations/import_export.md +++ b/docs/operations/import_export.md @@ -35,6 +35,7 @@ For reference, the zip file will contain the following tables in csv form: * Websites * Host * HostIP +* PublicContact After exporting the file from the target environment, scp the exported_tables.zip file from the target environment to local. Run the below commands from local. @@ -81,11 +82,18 @@ For reference, this deletes all rows from the following tables: * DraftDomain * HostIP * Host +* PublicContact #### Importing into Target Environment Once target environment is prepared, files can be imported. +If importing tables from stable environment into an OT&E sandbox, there will be a difference +between the stable's registry and the sandbox's registry. Therefore, you need to run import_tables +with --skipEppSave option set to False. If you set to False, it will attempt to save PublicContact +records to the registry on load. If this is unset, or set to True, it will load the database and not +attempt to update the registry on load. + To scp the exported_tables.zip file from local to the sandbox, run the following: Get passcode by running: @@ -107,7 +115,7 @@ cf ssh {target-app} example cleaning getgov-backup: cf ssh getgov-backup /tmp/lifecycle/backup -./manage.py import_tables +./manage.py import_tables --skipEppSave=False For reference, this imports tables in the following order: @@ -121,6 +129,7 @@ For reference, this imports tables in the following order: * DomainRequest * DomainInformation * UserDomainRole +* PublicContact Optional step: * Run fixtures to load fixture users back in \ No newline at end of file From 97ee855de6afc3b80c91e3f169d57f7f0cd29c68 Mon Sep 17 00:00:00 2001 From: David Kennedy Date: Thu, 13 Jun 2024 07:21:06 -0400 Subject: [PATCH 03/10] added FederalAgency, updated for using bulk --- docs/operations/import_export.md | 7 +++++-- src/registrar/admin.py | 14 +++++++++++++- src/registrar/management/commands/clean_tables.py | 6 ++++-- src/registrar/management/commands/export_tables.py | 1 + src/registrar/management/commands/import_tables.py | 8 +++++++- 5 files changed, 30 insertions(+), 6 deletions(-) diff --git a/docs/operations/import_export.md b/docs/operations/import_export.md index 4810774e4..b7fd50d52 100644 --- a/docs/operations/import_export.md +++ b/docs/operations/import_export.md @@ -32,6 +32,7 @@ For reference, the zip file will contain the following tables in csv form: * DomainInformation * DomainUserRole * DraftDomain +* FederalAgency * Websites * Host * HostIP @@ -76,13 +77,14 @@ For reference, this deletes all rows from the following tables: * DomainInformation * DomainRequest * Domain -* User (all but the current user) +* User * Contact * Websites * DraftDomain * HostIP * Host * PublicContact +* FederalAgency #### Importing into Target Environment @@ -115,7 +117,7 @@ cf ssh {target-app} example cleaning getgov-backup: cf ssh getgov-backup /tmp/lifecycle/backup -./manage.py import_tables --skipEppSave=False +./manage.py import_tables --no-skipEppSave For reference, this imports tables in the following order: @@ -126,6 +128,7 @@ For reference, this imports tables in the following order: * HostIP * DraftDomain * Websites +* FederalAgency * DomainRequest * DomainInformation * UserDomainRole diff --git a/src/registrar/admin.py b/src/registrar/admin.py index 9ddaf03e7..16aba7ec0 100644 --- a/src/registrar/admin.py +++ b/src/registrar/admin.py @@ -2370,6 +2370,9 @@ class PublicContactResource(resources.ModelResource): class Meta: model = models.PublicContact + use_bulk = True + batch_size = 1000 + force_init_instance = True def __init__(self): """Sets global variables for code tidyness""" @@ -2472,11 +2475,20 @@ class VerifiedByStaffAdmin(ListHeaderAdmin): super().save_model(request, obj, form, change) -class FederalAgencyAdmin(ListHeaderAdmin): +class FederalAgencyResource(resources.ModelResource): + """defines how each field in the referenced model should be mapped to the corresponding fields in the + import/export file""" + + class Meta: + model = models.FederalAgency + + +class FederalAgencyAdmin(ListHeaderAdmin, ImportExportModelAdmin): list_display = ["agency"] search_fields = ["agency"] search_help_text = "Search by agency name." ordering = ["agency"] + resource_classes = [FederalAgencyResource] class UserGroupAdmin(AuditedAdmin): diff --git a/src/registrar/management/commands/clean_tables.py b/src/registrar/management/commands/clean_tables.py index fa37c214d..f0c51390b 100644 --- a/src/registrar/management/commands/clean_tables.py +++ b/src/registrar/management/commands/clean_tables.py @@ -28,6 +28,7 @@ class Command(BaseCommand): * DomainInformation * DomainRequest * DraftDomain + * FederalAgency * Host * HostIp * PublicContact @@ -40,14 +41,15 @@ class Command(BaseCommand): table_names = [ "DomainInformation", "DomainRequest", + "FederalAgency", "PublicContact", + "HostIp", + "Host", "Domain", "User", "Contact", "Website", "DraftDomain", - "HostIp", - "Host", ] for table_name in table_names: diff --git a/src/registrar/management/commands/export_tables.py b/src/registrar/management/commands/export_tables.py index f927129fe..bd10ea7c5 100644 --- a/src/registrar/management/commands/export_tables.py +++ b/src/registrar/management/commands/export_tables.py @@ -18,6 +18,7 @@ class Command(BaseCommand): "Domain", "DomainRequest", "DomainInformation", + "FederalAgency", "UserDomainRole", "DraftDomain", "Website", diff --git a/src/registrar/management/commands/import_tables.py b/src/registrar/management/commands/import_tables.py index cbe3dc6ee..fcefb9b61 100644 --- a/src/registrar/management/commands/import_tables.py +++ b/src/registrar/management/commands/import_tables.py @@ -36,6 +36,7 @@ class Command(BaseCommand): "HostIp", "DraftDomain", "Website", + "FederalAgency", "DomainRequest", "DomainInformation", "UserDomainRole", @@ -83,7 +84,12 @@ class Command(BaseCommand): result = resource_instance.import_data(dataset, dry_run=False, skip_epp_save=self.skip_epp_save) if result.has_errors(): - logger.error(f"Errors occurred while importing {csv_filename}: {result.row_errors()}") + logger.error(f"Errors occurred while importing {csv_filename}:") + for row_error in result.row_errors(): + row_index = row_error[0] + errors = row_error[1] + for error in errors: + logger.error(f"Row {row_index} - {error.error} - {error.row}") else: logger.info(f"Successfully imported {csv_filename} into {table_name}") From 552e43409640f3abbf838394f39be6fd50426e4d Mon Sep 17 00:00:00 2001 From: David Kennedy Date: Thu, 13 Jun 2024 09:42:33 -0400 Subject: [PATCH 04/10] updated scripts to break datasets into max of 10000 rows --- .../management/commands/export_tables.py | 64 ++++++++++++----- .../management/commands/import_tables.py | 70 ++++++++++--------- 2 files changed, 85 insertions(+), 49 deletions(-) diff --git a/src/registrar/management/commands/export_tables.py b/src/registrar/management/commands/export_tables.py index bd10ea7c5..8c2cf7dc4 100644 --- a/src/registrar/management/commands/export_tables.py +++ b/src/registrar/management/commands/export_tables.py @@ -1,6 +1,9 @@ +import glob import logging +import math import os import pyzipper +import tablib from django.core.management import BaseCommand import registrar.admin @@ -37,17 +40,21 @@ class Command(BaseCommand): zip_filename = "tmp/exported_tables.zip" with pyzipper.AESZipFile(zip_filename, "w", compression=pyzipper.ZIP_DEFLATED) as zipf: for table_name in table_names: - csv_filename = f"tmp/{table_name}.csv" - if os.path.exists(csv_filename): - zipf.write(csv_filename, os.path.basename(csv_filename)) - logger.info(f"Added {csv_filename} to zip archive {zip_filename}") - # Remove the CSV files after adding them to the zip file - for table_name in table_names: - csv_filename = f"tmp/{table_name}.csv" - if os.path.exists(csv_filename): - os.remove(csv_filename) - logger.info(f"Removed temporary file {csv_filename}") + # Define the directory and the pattern + tmp_dir = 'tmp' + pattern = os.path.join(tmp_dir, f'{table_name}_*.csv') + zip_file_path = os.path.join(tmp_dir, 'exported_files.zip') + + # Find all files that match the pattern + for file_path in glob.glob(pattern): + # Add each file to the zip archive + zipf.write(file_path, os.path.basename(file_path)) + logger.info(f'Added {file_path} to {zip_file_path}') + + # Remove the file after adding to zip + os.remove(file_path) + logger.info(f'Removed {file_path}') def export_table(self, table_name): """Export a given table to a csv file in the tmp directory""" @@ -55,11 +62,36 @@ class Command(BaseCommand): try: resourceclass = getattr(registrar.admin, resourcename) dataset = resourceclass().export() - filename = f"tmp/{table_name}.csv" - with open(filename, "w") as outputfile: - outputfile.write(dataset.csv) - logger.info(f"Successfully exported {table_name} to {filename}") - except AttributeError: - logger.error(f"Resource class {resourcename} not found in registrar.admin") + if not isinstance(dataset, tablib.Dataset): + raise ValueError(f"Exported data from {resourcename} is not a tablib.Dataset") + + # Determine the number of rows per file + rows_per_file = 10000 + total_rows = len(dataset) + + # Calculate the number of files needed + num_files = math.ceil(total_rows / rows_per_file) + logger.info(f'splitting {table_name} into {num_files} files') + + # Split the dataset and export each chunk to a separate file + for i in range(num_files): + start_row = i * rows_per_file + end_row = start_row + rows_per_file + + # Create a new dataset for the chunk + chunk = tablib.Dataset(headers=dataset.headers) + for row in dataset[start_row:end_row]: + chunk.append(row) + #chunk = dataset[start_row:end_row] + + # Export the chunk to a new file + filename = f'tmp/{table_name}_{i + 1}.csv' + with open(filename, 'w') as f: + f.write(chunk.export('csv')) + + logger.info(f'Successfully exported {table_name} into {num_files} files.') + + except AttributeError as ae: + logger.error(f"Resource class {resourcename} not found in registrar.admin: {ae}") except Exception as e: logger.error(f"Failed to export {table_name}: {e}") diff --git a/src/registrar/management/commands/import_tables.py b/src/registrar/management/commands/import_tables.py index fcefb9b61..d04d0dbb2 100644 --- a/src/registrar/management/commands/import_tables.py +++ b/src/registrar/management/commands/import_tables.py @@ -1,4 +1,5 @@ import argparse +import glob import logging import os import pyzipper @@ -64,43 +65,46 @@ class Command(BaseCommand): """Import data from a CSV file into the given table""" resourcename = f"{table_name}Resource" - csv_filename = f"tmp/{table_name}.csv" - try: - if not os.path.exists(csv_filename): - logger.error(f"CSV file {csv_filename} not found.") - return - # if table_name is Contact, clean the table first - # User table is loaded before Contact, and signals create - # rows in Contact table which break the import, so need - # to be cleaned again before running import on Contact table - if table_name == "Contact": - self.clean_table(table_name) + # if table_name is Contact, clean the table first + # User table is loaded before Contact, and signals create + # rows in Contact table which break the import, so need + # to be cleaned again before running import on Contact table + if table_name == "Contact": + self.clean_table(table_name) - resourceclass = getattr(registrar.admin, resourcename) - resource_instance = resourceclass() - with open(csv_filename, "r") as csvfile: - dataset = tablib.Dataset().load(csvfile.read(), format="csv") - result = resource_instance.import_data(dataset, dry_run=False, skip_epp_save=self.skip_epp_save) + # Define the directory and the pattern for csv filenames + tmp_dir = 'tmp' + pattern = os.path.join(tmp_dir, f'{table_name}_*.csv') - if result.has_errors(): - logger.error(f"Errors occurred while importing {csv_filename}:") - for row_error in result.row_errors(): - row_index = row_error[0] - errors = row_error[1] - for error in errors: - logger.error(f"Row {row_index} - {error.error} - {error.row}") - else: - logger.info(f"Successfully imported {csv_filename} into {table_name}") + resourceclass = getattr(registrar.admin, resourcename) + resource_instance = resourceclass() - except AttributeError: - logger.error(f"Resource class {resourcename} not found in registrar.admin") - except Exception as e: - logger.error(f"Failed to import {csv_filename}: {e}") - finally: - if os.path.exists(csv_filename): - os.remove(csv_filename) - logger.info(f"Removed temporary file {csv_filename}") + # Find all files that match the pattern + for csv_filename in glob.glob(pattern): + try: + with open(csv_filename, "r") as csvfile: + dataset = tablib.Dataset().load(csvfile.read(), format="csv") + result = resource_instance.import_data(dataset, dry_run=False, skip_epp_save=self.skip_epp_save) + + if result.has_errors(): + logger.error(f"Errors occurred while importing {csv_filename}:") + for row_error in result.row_errors(): + row_index = row_error[0] + errors = row_error[1] + for error in errors: + logger.error(f"Row {row_index} - {error.error} - {error.row}") + else: + logger.info(f"Successfully imported {csv_filename} into {table_name}") + + except AttributeError: + logger.error(f"Resource class {resourcename} not found in registrar.admin") + except Exception as e: + logger.error(f"Failed to import {csv_filename}: {e}") + finally: + if os.path.exists(csv_filename): + os.remove(csv_filename) + logger.info(f"Removed temporary file {csv_filename}") def clean_table(self, table_name): """Delete all rows in the given table""" From aa0a398c06509b21e7d0cdfda2f8afeadfc4f0c9 Mon Sep 17 00:00:00 2001 From: David Kennedy Date: Fri, 14 Jun 2024 12:17:08 -0400 Subject: [PATCH 05/10] updated unit tests for the associated changes in this PR --- src/registrar/admin.py | 13 +- .../management/commands/export_tables.py | 31 ++-- .../management/commands/import_tables.py | 11 +- .../tests/test_management_scripts.py | 148 +++++++++--------- 4 files changed, 101 insertions(+), 102 deletions(-) diff --git a/src/registrar/admin.py b/src/registrar/admin.py index 16aba7ec0..46b468eb2 100644 --- a/src/registrar/admin.py +++ b/src/registrar/admin.py @@ -2370,14 +2370,15 @@ class PublicContactResource(resources.ModelResource): class Meta: model = models.PublicContact - use_bulk = True - batch_size = 1000 - force_init_instance = True + # may want to consider these bulk options in future, so left in as comments + # use_bulk = True + # batch_size = 1000 + # force_init_instance = True def __init__(self): """Sets global variables for code tidyness""" super().__init__() - self.skip_epp_save=False + self.skip_epp_save = False def import_data( self, @@ -2387,10 +2388,10 @@ class PublicContactResource(resources.ModelResource): use_transactions=None, collect_failed_rows=False, rollback_on_validation_errors=False, - **kwargs + **kwargs, ): """Override import_data to set self.skip_epp_save if in kwargs""" - self.skip_epp_save = kwargs.get('skip_epp_save', False) + self.skip_epp_save = kwargs.get("skip_epp_save", False) return super().import_data( dataset, dry_run, diff --git a/src/registrar/management/commands/export_tables.py b/src/registrar/management/commands/export_tables.py index 8c2cf7dc4..e5c940a40 100644 --- a/src/registrar/management/commands/export_tables.py +++ b/src/registrar/management/commands/export_tables.py @@ -41,20 +41,21 @@ class Command(BaseCommand): with pyzipper.AESZipFile(zip_filename, "w", compression=pyzipper.ZIP_DEFLATED) as zipf: for table_name in table_names: - # Define the directory and the pattern - tmp_dir = 'tmp' - pattern = os.path.join(tmp_dir, f'{table_name}_*.csv') - zip_file_path = os.path.join(tmp_dir, 'exported_files.zip') + # Define the tmp directory and the file pattern + tmp_dir = "tmp" + pattern = f"{table_name}_" + zip_file_path = os.path.join(tmp_dir, "exported_files.zip") # Find all files that match the pattern - for file_path in glob.glob(pattern): + matching_files = [file for file in os.listdir(tmp_dir) if file.startswith(pattern)] + for file_path in matching_files: # Add each file to the zip archive zipf.write(file_path, os.path.basename(file_path)) - logger.info(f'Added {file_path} to {zip_file_path}') - + logger.info(f"Added {file_path} to {zip_file_path}") + # Remove the file after adding to zip os.remove(file_path) - logger.info(f'Removed {file_path}') + logger.info(f"Removed {file_path}") def export_table(self, table_name): """Export a given table to a csv file in the tmp directory""" @@ -71,7 +72,8 @@ class Command(BaseCommand): # Calculate the number of files needed num_files = math.ceil(total_rows / rows_per_file) - logger.info(f'splitting {table_name} into {num_files} files') + + logger.info(f"splitting {table_name} into {num_files} files") # Split the dataset and export each chunk to a separate file for i in range(num_files): @@ -82,16 +84,15 @@ class Command(BaseCommand): chunk = tablib.Dataset(headers=dataset.headers) for row in dataset[start_row:end_row]: chunk.append(row) - #chunk = dataset[start_row:end_row] # Export the chunk to a new file - filename = f'tmp/{table_name}_{i + 1}.csv' - with open(filename, 'w') as f: - f.write(chunk.export('csv')) + filename = f"tmp/{table_name}_{i + 1}.csv" + with open(filename, "w") as f: + f.write(chunk.export("csv")) - logger.info(f'Successfully exported {table_name} into {num_files} files.') + logger.info(f"Successfully exported {table_name} into {num_files} files.") except AttributeError as ae: - logger.error(f"Resource class {resourcename} not found in registrar.admin: {ae}") + logger.error(f"Resource class {resourcename} not found in registrar.admin") except Exception as e: logger.error(f"Failed to export {table_name}: {e}") diff --git a/src/registrar/management/commands/import_tables.py b/src/registrar/management/commands/import_tables.py index d04d0dbb2..abe26830f 100644 --- a/src/registrar/management/commands/import_tables.py +++ b/src/registrar/management/commands/import_tables.py @@ -18,7 +18,7 @@ class Command(BaseCommand): def add_arguments(self, parser): """Add command line arguments.""" - parser.add_argument('--skipEppSave', default=True, action=argparse.BooleanOptionalAction) + parser.add_argument("--skipEppSave", default=True, action=argparse.BooleanOptionalAction) def handle(self, **options): """Extracts CSV files from a zip archive and imports them into the respective tables""" @@ -74,19 +74,20 @@ class Command(BaseCommand): self.clean_table(table_name) # Define the directory and the pattern for csv filenames - tmp_dir = 'tmp' - pattern = os.path.join(tmp_dir, f'{table_name}_*.csv') + tmp_dir = "tmp" + #pattern = os.path.join(tmp_dir, f"{table_name}_*.csv") + pattern = f"{table_name}_" resourceclass = getattr(registrar.admin, resourcename) resource_instance = resourceclass() # Find all files that match the pattern - for csv_filename in glob.glob(pattern): + matching_files = [file for file in os.listdir(tmp_dir) if file.startswith(pattern)] + for csv_filename in matching_files: try: with open(csv_filename, "r") as csvfile: dataset = tablib.Dataset().load(csvfile.read(), format="csv") result = resource_instance.import_data(dataset, dry_run=False, skip_epp_save=self.skip_epp_save) - if result.has_errors(): logger.error(f"Errors occurred while importing {csv_filename}:") for row_error in result.row_errors(): diff --git a/src/registrar/tests/test_management_scripts.py b/src/registrar/tests/test_management_scripts.py index 500953f02..006a39231 100644 --- a/src/registrar/tests/test_management_scripts.py +++ b/src/registrar/tests/test_management_scripts.py @@ -7,6 +7,7 @@ from django.utils.module_loading import import_string import logging import pyzipper from registrar.management.commands.clean_tables import Command as CleanTablesCommand +from registrar.management.commands.export_tables import Command as ExportTablesCommand from registrar.models import ( User, Domain, @@ -869,88 +870,77 @@ class TestCleanTables(TestCase): self.logger_mock.error.assert_any_call("Error cleaning table DomainInformation: Some error") + + class TestExportTables(MockEppLib): """Test the export_tables script""" def setUp(self): + self.command = ExportTablesCommand() self.logger_patcher = patch("registrar.management.commands.export_tables.logger") self.logger_mock = self.logger_patcher.start() def tearDown(self): self.logger_patcher.stop() - @patch("registrar.management.commands.export_tables.os.makedirs") - @patch("registrar.management.commands.export_tables.os.path.exists") - @patch("registrar.management.commands.export_tables.os.remove") - @patch("registrar.management.commands.export_tables.pyzipper.AESZipFile") + @patch("os.makedirs") + @patch("os.path.exists") + @patch("os.remove") + @patch("pyzipper.AESZipFile") @patch("registrar.management.commands.export_tables.getattr") - @patch("builtins.open", new_callable=mock_open, read_data=b"mock_csv_data") - @patch("django.utils.translation.trans_real._translations", {}) - @patch("django.utils.translation.trans_real.translation") + @patch("builtins.open", new_callable=mock_open) + @patch("os.listdir") def test_handle( - self, mock_translation, mock_file, mock_getattr, mock_zipfile, mock_remove, mock_path_exists, mock_makedirs + self, mock_listdir, mock_open, mock_getattr, mock_zipfile, mock_remove, mock_path_exists, mock_makedirs ): """test that the handle method properly exports tables""" - with less_console_noise(): - # Mock os.makedirs to do nothing - mock_makedirs.return_value = None + # Mock os.makedirs to do nothing + mock_makedirs.return_value = None - # Mock os.path.exists to always return True - mock_path_exists.return_value = True + # Mock os.path.exists to always return True + mock_path_exists.return_value = True - # Mock the resource class and its export method - mock_resource_class = MagicMock() - mock_dataset = MagicMock() - mock_dataset.csv = b"mock_csv_data" - mock_resource_class().export.return_value = mock_dataset - mock_getattr.return_value = mock_resource_class + # Check that the export_table function was called for each table + table_names = [ + "User", "Contact", "Domain", "DomainRequest", "DomainInformation", "FederalAgency", + "UserDomainRole", "DraftDomain", "Website", "HostIp", "Host", "PublicContact", + ] - # Mock translation function to return a dummy translation object - mock_translation.return_value = MagicMock() + # Mock directory listing + mock_listdir.side_effect = lambda path: [f"{table}_1.csv" for table in table_names] - call_command("export_tables") + # Mock the resource class and its export method + mock_dataset = tablib.Dataset() + mock_dataset.headers = ["header1", "header2"] + mock_dataset.append(["row1_col1", "row1_col2"]) + mock_resource_class = MagicMock() + mock_resource_class().export.return_value = mock_dataset + mock_getattr.return_value = mock_resource_class - # Check that os.makedirs was called once to create the tmp directory - mock_makedirs.assert_called_once_with("tmp", exist_ok=True) + command_instance = ExportTablesCommand() + command_instance.handle() - # Check that the export_table function was called for each table - table_names = [ - "User", - "Contact", - "Domain", - "DomainRequest", - "DomainInformation", - "UserDomainRole", - "DraftDomain", - "Website", - "HostIp", - "Host", - "PublicContact", - ] + # Check that os.makedirs was called once to create the tmp directory + mock_makedirs.assert_called_once_with("tmp", exist_ok=True) - # Check that the CSV file was written - for table_name in table_names: - mock_file().write.assert_any_call(b"mock_csv_data") - # Check that os.path.exists was called - mock_path_exists.assert_any_call(f"tmp/{table_name}.csv") - # Check that os.remove was called - mock_remove.assert_any_call(f"tmp/{table_name}.csv") + # Check that the CSV file was written + for table_name in table_names: + # Check that os.remove was called + mock_remove.assert_any_call(f"{table_name}_1.csv") - # Check that the zipfile was created and files were added - mock_zipfile.assert_called_once_with("tmp/exported_tables.zip", "w", compression=pyzipper.ZIP_DEFLATED) - zipfile_instance = mock_zipfile.return_value.__enter__.return_value - for table_name in table_names: - zipfile_instance.write.assert_any_call(f"tmp/{table_name}.csv", f"{table_name}.csv") + # Check that the zipfile was created and files were added + mock_zipfile.assert_called_once_with("tmp/exported_tables.zip", "w", compression=pyzipper.ZIP_DEFLATED) + zipfile_instance = mock_zipfile.return_value.__enter__.return_value + for table_name in table_names: + zipfile_instance.write.assert_any_call(f"{table_name}_1.csv", f"{table_name}_1.csv") - # Verify logging for added files - for table_name in table_names: - self.logger_mock.info.assert_any_call( - f"Added tmp/{table_name}.csv to zip archive tmp/exported_tables.zip" - ) + # Verify logging for added files + for table_name in table_names: + self.logger_mock.info.assert_any_call(f"Added {table_name}_1.csv to tmp/exported_files.zip") - # Verify logging for removed files - for table_name in table_names: - self.logger_mock.info.assert_any_call(f"Removed temporary file tmp/{table_name}.csv") + # Verify logging for removed files + for table_name in table_names: + self.logger_mock.info.assert_any_call(f"Removed {table_name}_1.csv") @patch("registrar.management.commands.export_tables.getattr") def test_export_table_handles_missing_resource_class(self, mock_getattr): @@ -995,8 +985,10 @@ class TestImportTables(TestCase): @patch("registrar.management.commands.import_tables.logger") @patch("registrar.management.commands.import_tables.getattr") @patch("django.apps.apps.get_model") + @patch("os.listdir") def test_handle( self, + mock_listdir, mock_get_model, mock_getattr, mock_logger, @@ -1019,6 +1011,24 @@ class TestImportTables(TestCase): mock_zipfile_instance = mock_zipfile.return_value.__enter__.return_value mock_zipfile_instance.extractall.return_value = None + # Check that the import_table function was called for each table + table_names = [ + "User", + "Contact", + "Domain", + "DomainRequest", + "DomainInformation", + "UserDomainRole", + "DraftDomain", + "Website", + "HostIp", + "Host", + "PublicContact", + ] + + # Mock directory listing + mock_listdir.side_effect = lambda path: [f"{table}_1.csv" for table in table_names] + # Mock the CSV file content csv_content = b"mock_csv_data" @@ -1054,23 +1064,9 @@ class TestImportTables(TestCase): # Check that extractall was called once to extract the zip file contents mock_zipfile_instance.extractall.assert_called_once_with("tmp") - # Check that the import_table function was called for each table - table_names = [ - "User", - "Contact", - "Domain", - "DomainRequest", - "DomainInformation", - "UserDomainRole", - "DraftDomain", - "Website", - "HostIp", - "Host", - "PublicContact", - ] # Check that os.path.exists was called for each table for table_name in table_names: - mock_path_exists.assert_any_call(f"tmp/{table_name}.csv") + mock_path_exists.assert_any_call(f"{table_name}_1.csv") # Check that clean_tables is called for Contact mock_get_model.assert_any_call("registrar", "Contact") @@ -1079,18 +1075,18 @@ class TestImportTables(TestCase): # Check that logger.info was called for each successful import for table_name in table_names: - mock_logger.info.assert_any_call(f"Successfully imported tmp/{table_name}.csv into {table_name}") + mock_logger.info.assert_any_call(f"Successfully imported {table_name}_1.csv into {table_name}") # Check that logger.error was not called for resource class not found mock_logger.error.assert_not_called() # Check that os.remove was called for each CSV file for table_name in table_names: - mock_remove.assert_any_call(f"tmp/{table_name}.csv") + mock_remove.assert_any_call(f"{table_name}_1.csv") # Check that logger.info was called for each CSV file removal for table_name in table_names: - mock_logger.info.assert_any_call(f"Removed temporary file tmp/{table_name}.csv") + mock_logger.info.assert_any_call(f"Removed temporary file {table_name}_1.csv") @patch("registrar.management.commands.import_tables.logger") @patch("registrar.management.commands.import_tables.os.makedirs") From 38f7ef50dbce9bbfaac2fde9a029e71fb0bb92ea Mon Sep 17 00:00:00 2001 From: David Kennedy Date: Fri, 14 Jun 2024 12:32:17 -0400 Subject: [PATCH 06/10] formatted for code readability --- .../management/commands/export_tables.py | 3 +-- .../management/commands/import_tables.py | 2 -- src/registrar/tests/test_management_scripts.py | 18 +++++++++++++----- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/registrar/management/commands/export_tables.py b/src/registrar/management/commands/export_tables.py index e5c940a40..d9b45ec94 100644 --- a/src/registrar/management/commands/export_tables.py +++ b/src/registrar/management/commands/export_tables.py @@ -1,4 +1,3 @@ -import glob import logging import math import os @@ -92,7 +91,7 @@ class Command(BaseCommand): logger.info(f"Successfully exported {table_name} into {num_files} files.") - except AttributeError as ae: + except AttributeError: logger.error(f"Resource class {resourcename} not found in registrar.admin") except Exception as e: logger.error(f"Failed to export {table_name}: {e}") diff --git a/src/registrar/management/commands/import_tables.py b/src/registrar/management/commands/import_tables.py index abe26830f..7797d8d75 100644 --- a/src/registrar/management/commands/import_tables.py +++ b/src/registrar/management/commands/import_tables.py @@ -1,5 +1,4 @@ import argparse -import glob import logging import os import pyzipper @@ -75,7 +74,6 @@ class Command(BaseCommand): # Define the directory and the pattern for csv filenames tmp_dir = "tmp" - #pattern = os.path.join(tmp_dir, f"{table_name}_*.csv") pattern = f"{table_name}_" resourceclass = getattr(registrar.admin, resourcename) diff --git a/src/registrar/tests/test_management_scripts.py b/src/registrar/tests/test_management_scripts.py index 006a39231..8e3917d40 100644 --- a/src/registrar/tests/test_management_scripts.py +++ b/src/registrar/tests/test_management_scripts.py @@ -870,8 +870,6 @@ class TestCleanTables(TestCase): self.logger_mock.error.assert_any_call("Error cleaning table DomainInformation: Some error") - - class TestExportTables(MockEppLib): """Test the export_tables script""" @@ -902,8 +900,18 @@ class TestExportTables(MockEppLib): # Check that the export_table function was called for each table table_names = [ - "User", "Contact", "Domain", "DomainRequest", "DomainInformation", "FederalAgency", - "UserDomainRole", "DraftDomain", "Website", "HostIp", "Host", "PublicContact", + "User", + "Contact", + "Domain", + "DomainRequest", + "DomainInformation", + "FederalAgency", + "UserDomainRole", + "DraftDomain", + "Website", + "HostIp", + "Host", + "PublicContact", ] # Mock directory listing @@ -1025,7 +1033,7 @@ class TestImportTables(TestCase): "Host", "PublicContact", ] - + # Mock directory listing mock_listdir.side_effect = lambda path: [f"{table}_1.csv" for table in table_names] From 41bfa3991d45ab07e48c9216d6b4a09f5aa3e613 Mon Sep 17 00:00:00 2001 From: David Kennedy Date: Fri, 14 Jun 2024 12:48:20 -0400 Subject: [PATCH 07/10] code cleanup --- src/registrar/admin.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/registrar/admin.py b/src/registrar/admin.py index 46b468eb2..5344d7059 100644 --- a/src/registrar/admin.py +++ b/src/registrar/admin.py @@ -2402,19 +2402,6 @@ class PublicContactResource(resources.ModelResource): **kwargs, ) - # def import_row(self, row, instance_loader, using_transactions=True, dry_run=False, raise_errors=None, **kwargs): - # """Override kwargs skip_epp_save and set to True""" - # kwargs["skip_epp_save"] = True - # super().import_data() - # return super().import_row( - # row, - # instance_loader, - # using_transactions=using_transactions, - # dry_run=dry_run, - # raise_errors=raise_errors, - # **kwargs, - # ) - def save_instance(self, instance, is_create, using_transactions=True, dry_run=False): """Override save_instance setting skip_epp_save to True""" self.before_save_instance(instance, using_transactions, dry_run) From 08b0b592b27fd124f0dd25265b0a79f8de4d92be Mon Sep 17 00:00:00 2001 From: David Kennedy Date: Fri, 14 Jun 2024 15:17:57 -0400 Subject: [PATCH 08/10] fixed paths on export and import scripts --- src/registrar/management/commands/export_tables.py | 4 ++-- src/registrar/management/commands/import_tables.py | 2 +- src/registrar/tests/test_management_scripts.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/registrar/management/commands/export_tables.py b/src/registrar/management/commands/export_tables.py index d9b45ec94..7d7521c1f 100644 --- a/src/registrar/management/commands/export_tables.py +++ b/src/registrar/management/commands/export_tables.py @@ -49,11 +49,11 @@ class Command(BaseCommand): matching_files = [file for file in os.listdir(tmp_dir) if file.startswith(pattern)] for file_path in matching_files: # Add each file to the zip archive - zipf.write(file_path, os.path.basename(file_path)) + zipf.write(f"tmp/{file_path}", os.path.basename(file_path)) logger.info(f"Added {file_path} to {zip_file_path}") # Remove the file after adding to zip - os.remove(file_path) + os.remove(f"tmp/{file_path}") logger.info(f"Removed {file_path}") def export_table(self, table_name): diff --git a/src/registrar/management/commands/import_tables.py b/src/registrar/management/commands/import_tables.py index 7797d8d75..cb78e13bd 100644 --- a/src/registrar/management/commands/import_tables.py +++ b/src/registrar/management/commands/import_tables.py @@ -83,7 +83,7 @@ class Command(BaseCommand): matching_files = [file for file in os.listdir(tmp_dir) if file.startswith(pattern)] for csv_filename in matching_files: try: - with open(csv_filename, "r") as csvfile: + with open(f"tmp/{csv_filename}", "r") as csvfile: dataset = tablib.Dataset().load(csvfile.read(), format="csv") result = resource_instance.import_data(dataset, dry_run=False, skip_epp_save=self.skip_epp_save) if result.has_errors(): diff --git a/src/registrar/tests/test_management_scripts.py b/src/registrar/tests/test_management_scripts.py index 8e3917d40..784fe3b67 100644 --- a/src/registrar/tests/test_management_scripts.py +++ b/src/registrar/tests/test_management_scripts.py @@ -934,13 +934,13 @@ class TestExportTables(MockEppLib): # Check that the CSV file was written for table_name in table_names: # Check that os.remove was called - mock_remove.assert_any_call(f"{table_name}_1.csv") + mock_remove.assert_any_call(f"tmp/{table_name}_1.csv") # Check that the zipfile was created and files were added mock_zipfile.assert_called_once_with("tmp/exported_tables.zip", "w", compression=pyzipper.ZIP_DEFLATED) zipfile_instance = mock_zipfile.return_value.__enter__.return_value for table_name in table_names: - zipfile_instance.write.assert_any_call(f"{table_name}_1.csv", f"{table_name}_1.csv") + zipfile_instance.write.assert_any_call(f"tmp/{table_name}_1.csv", f"{table_name}_1.csv") # Verify logging for added files for table_name in table_names: From 9c885396138b8aa4bab3a1bdd26b7c9fd09cfb26 Mon Sep 17 00:00:00 2001 From: David Kennedy Date: Tue, 18 Jun 2024 08:17:08 -0400 Subject: [PATCH 09/10] updated export_tables to use Paginator --- .../management/commands/export_tables.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/registrar/management/commands/export_tables.py b/src/registrar/management/commands/export_tables.py index 7d7521c1f..dddf2cada 100644 --- a/src/registrar/management/commands/export_tables.py +++ b/src/registrar/management/commands/export_tables.py @@ -1,5 +1,5 @@ +from django.core.paginator import Paginator import logging -import math import os import pyzipper import tablib @@ -56,8 +56,9 @@ class Command(BaseCommand): os.remove(f"tmp/{file_path}") logger.info(f"Removed {file_path}") + def export_table(self, table_name): - """Export a given table to a csv file in the tmp directory""" + """Export a given table to csv files in the tmp directory""" resourcename = f"{table_name}Resource" try: resourceclass = getattr(registrar.admin, resourcename) @@ -67,25 +68,25 @@ class Command(BaseCommand): # Determine the number of rows per file rows_per_file = 10000 - total_rows = len(dataset) - # Calculate the number of files needed - num_files = math.ceil(total_rows / rows_per_file) + # Use Paginator to handle splitting the dataset + paginator = Paginator(dataset.dict, rows_per_file) + num_files = paginator.num_pages logger.info(f"splitting {table_name} into {num_files} files") - # Split the dataset and export each chunk to a separate file - for i in range(num_files): - start_row = i * rows_per_file - end_row = start_row + rows_per_file + # Export each page to a separate file + for page_num in paginator.page_range: + page = paginator.page(page_num) # Create a new dataset for the chunk chunk = tablib.Dataset(headers=dataset.headers) - for row in dataset[start_row:end_row]: + for row_dict in page.object_list: + row = [row_dict[header] for header in dataset.headers] chunk.append(row) # Export the chunk to a new file - filename = f"tmp/{table_name}_{i + 1}.csv" + filename = f"tmp/{table_name}_{page_num}.csv" with open(filename, "w") as f: f.write(chunk.export("csv")) From 29fa4b26ae3c989c12b6e3f80e31fee2c72bfaaf Mon Sep 17 00:00:00 2001 From: David Kennedy Date: Tue, 18 Jun 2024 08:21:18 -0400 Subject: [PATCH 10/10] formatted for linter --- src/registrar/management/commands/export_tables.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/registrar/management/commands/export_tables.py b/src/registrar/management/commands/export_tables.py index dddf2cada..e934c9fad 100644 --- a/src/registrar/management/commands/export_tables.py +++ b/src/registrar/management/commands/export_tables.py @@ -56,7 +56,6 @@ class Command(BaseCommand): os.remove(f"tmp/{file_path}") logger.info(f"Removed {file_path}") - def export_table(self, table_name): """Export a given table to csv files in the tmp directory""" resourcename = f"{table_name}Resource"