Add a scrap command to backfill Spec11 threats (#897)

This parses through all pre-existing Spec11 files in GCS (starting at
2019-01-01 which is basically when the new format started) and maps them
to the new Spec11ThreatMatch objects.

Because the old format stored domain names only and the new format stores
names + repo IDs, we need to retrieve the DomainBase objects from the
point in time of the scan (failing if they don't exist). Because the
same domains appear multiple times (we estimate a total of 100k+ entries
but only 1-2k unique domains) we cache the DomainBase objects that we
retrieve from Datastore.
This commit is contained in:
gbrodman 2020-12-15 16:18:27 -05:00 committed by GitHub
parent 090eeaa618
commit 1db0c5b018
11 changed files with 4652 additions and 4402 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -78,3 +78,4 @@ V77__fixes_for_replay.sql
V78__add_history_id_for_redemption_history_entry.sql
V79__drop_foreign_keys_on_pollmessage.sql
V80__defer_bill_event_key.sql
V81__drop_spec11_fkeys.sql

View file

@ -0,0 +1,18 @@
-- Copyright 2020 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Some objects haven't been fully populated in SQL yet, so don't depend on them.
ALTER TABLE "Spec11ThreatMatch" DROP CONSTRAINT "fk_spec11_threat_match_domain_repo_id";
ALTER TABLE "Spec11ThreatMatch" DROP CONSTRAINT "fk_spec11_threat_match_registrar_id";
ALTER TABLE "Spec11ThreatMatch" DROP CONSTRAINT "fk_spec11_threat_match_tld";

View file

@ -2251,30 +2251,6 @@ ALTER TABLE ONLY public."RegistrarPoc"
ADD CONSTRAINT fk_registrar_poc_registrar_id FOREIGN KEY (registrar_id) REFERENCES public."Registrar"(registrar_id);
--
-- Name: Spec11ThreatMatch fk_spec11_threat_match_domain_repo_id; Type: FK CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public."Spec11ThreatMatch"
ADD CONSTRAINT fk_spec11_threat_match_domain_repo_id FOREIGN KEY (domain_repo_id) REFERENCES public."Domain"(repo_id);
--
-- Name: Spec11ThreatMatch fk_spec11_threat_match_registrar_id; Type: FK CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public."Spec11ThreatMatch"
ADD CONSTRAINT fk_spec11_threat_match_registrar_id FOREIGN KEY (registrar_id) REFERENCES public."Registrar"(registrar_id);
--
-- Name: Spec11ThreatMatch fk_spec11_threat_match_tld; Type: FK CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public."Spec11ThreatMatch"
ADD CONSTRAINT fk_spec11_threat_match_tld FOREIGN KEY (tld) REFERENCES public."Tld"(tld_name);
--
-- Name: DomainHistoryHost fka9woh3hu8gx5x0vly6bai327n; Type: FK CONSTRAINT; Schema: public; Owner: -
--