Merge pull request #2226 from internetee/replace-validation-events-indexes-to-concurrency

Replace validation events indexes to concurrency
This commit is contained in:
Timo Võhmar 2021-11-29 10:42:56 +02:00 committed by GitHub
commit 6c4c5ab43b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 139 additions and 18 deletions

View file

@ -0,0 +1,7 @@
class AddTypeIndexToValidationEvent < ActiveRecord::Migration[6.1]
disable_ddl_transaction!
def change
add_index :validation_events, :validation_eventable_id, :algorithm => :concurrently
end
end

View file

@ -0,0 +1,7 @@
class AddConcIndexToContact < ActiveRecord::Migration[6.1]
disable_ddl_transaction!
def change
add_index :contacts, :email, :algorithm => :concurrently
end
end

View file

@ -0,0 +1,5 @@
class AddIndexToJsonValidationEvent < ActiveRecord::Migration[6.1]
def change
add_index :validation_events, :event_data, :using => :gin
end
end

View file

@ -51,6 +51,20 @@ CREATE EXTENSION IF NOT EXISTS hstore WITH SCHEMA public;
COMMENT ON EXTENSION hstore IS 'data type for storing sets of (key, value) pairs';
--
-- Name: pg_stat_statements; Type: EXTENSION; Schema: -; Owner: -
--
CREATE EXTENSION IF NOT EXISTS pg_stat_statements WITH SCHEMA public;
--
-- Name: EXTENSION pg_stat_statements; Type: COMMENT; Schema: -; Owner: -
--
COMMENT ON EXTENSION pg_stat_statements IS 'track execution statistics of all SQL statements executed';
--
-- Name: pgcrypto; Type: EXTENSION; Schema: -; Owner: -
--
@ -2296,6 +2310,39 @@ CREATE SEQUENCE public.pghero_query_stats_id_seq
ALTER SEQUENCE public.pghero_query_stats_id_seq OWNED BY public.pghero_query_stats.id;
--
-- Name: pghero_space_stats; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE public.pghero_space_stats (
id bigint NOT NULL,
database text,
schema text,
relation text,
size bigint,
captured_at timestamp without time zone
);
--
-- Name: pghero_space_stats_id_seq; Type: SEQUENCE; Schema: public; Owner: -
--
CREATE SEQUENCE public.pghero_space_stats_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
--
-- Name: pghero_space_stats_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
--
ALTER SEQUENCE public.pghero_space_stats_id_seq OWNED BY public.pghero_space_stats.id;
--
-- Name: prices; Type: TABLE; Schema: public; Owner: -
--
@ -3204,6 +3251,13 @@ ALTER TABLE ONLY public.payment_orders ALTER COLUMN id SET DEFAULT nextval('publ
ALTER TABLE ONLY public.pghero_query_stats ALTER COLUMN id SET DEFAULT nextval('public.pghero_query_stats_id_seq'::regclass);
--
-- Name: pghero_space_stats id; Type: DEFAULT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.pghero_space_stats ALTER COLUMN id SET DEFAULT nextval('public.pghero_space_stats_id_seq'::regclass);
--
-- Name: prices id; Type: DEFAULT; Schema: public; Owner: -
--
@ -3735,6 +3789,14 @@ ALTER TABLE ONLY public.pghero_query_stats
ADD CONSTRAINT pghero_query_stats_pkey PRIMARY KEY (id);
--
-- Name: pghero_space_stats pghero_space_stats_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.pghero_space_stats
ADD CONSTRAINT pghero_space_stats_pkey PRIMARY KEY (id);
--
-- Name: prices prices_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
@ -4029,6 +4091,13 @@ CREATE INDEX index_contact_requests_on_whois_record_id ON public.contact_request
CREATE INDEX index_contacts_on_code ON public.contacts USING btree (code);
--
-- Name: index_contacts_on_email; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_contacts_on_email ON public.contacts USING btree (email);
--
-- Name: index_contacts_on_registrar_id; Type: INDEX; Schema: public; Owner: -
--
@ -4505,6 +4574,13 @@ CREATE INDEX index_payment_orders_on_invoice_id ON public.payment_orders USING b
CREATE INDEX index_pghero_query_stats_on_database_and_captured_at ON public.pghero_query_stats USING btree (database, captured_at);
--
-- Name: index_pghero_space_stats_on_database_and_captured_at; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_pghero_space_stats_on_database_and_captured_at ON public.pghero_space_stats USING btree (database, captured_at);
--
-- Name: index_prices_on_zone_id; Type: INDEX; Schema: public; Owner: -
--
@ -4554,6 +4630,13 @@ CREATE INDEX index_users_on_identity_code ON public.users USING btree (identity_
CREATE INDEX index_users_on_registrar_id ON public.users USING btree (registrar_id);
--
-- Name: index_validation_events_on_event_data; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_validation_events_on_event_data ON public.validation_events USING gin (event_data);
--
-- Name: index_validation_events_on_validation_eventable; Type: INDEX; Schema: public; Owner: -
--
@ -4561,6 +4644,13 @@ CREATE INDEX index_users_on_registrar_id ON public.users USING btree (registrar_
CREATE INDEX index_validation_events_on_validation_eventable ON public.validation_events USING btree (validation_eventable_type, validation_eventable_id);
--
-- Name: index_validation_events_on_validation_eventable_id; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_validation_events_on_validation_eventable_id ON public.validation_events USING btree (validation_eventable_id);
--
-- Name: index_versions_on_item_type_and_item_id; Type: INDEX; Schema: public; Owner: -
--
@ -5295,6 +5385,10 @@ INSERT INTO "schema_migrations" (version) VALUES
('20211028122103'),
('20211028125245'),
('20211029082225'),
('20211124071418');
('20211124071418'),
('20211124084308'),
('20211125181033'),
('20211125184334'),
('20211126085139');

View file

@ -27,7 +27,7 @@ namespace :verify_email do
VerifyEmailsJob.set(wait_until: spam_protect_timeout(options)).perform_later(
contact: contact,
check_level: check_level(options)
)
) if filter_check_level(contact)
end
end
end
@ -49,10 +49,6 @@ def logger
@logger ||= ActiveSupport::TaggedLogging.new(Syslog::Logger.new('registry'))
end
# Here I set the time after which the validation is considered obsolete
# I take all contact records that have successfully passed the verification and fall within the deadline
# I am looking for contacts that have not been verified or their verification is out of date
def prepare_contacts(options)
if options[:domain_name].present?
contacts_by_domain(options[:domain_name])
@ -65,23 +61,32 @@ def prepare_contacts(options)
end
end
def filter_check_level(contact)
return true unless contact.validation_events.exists?
data = contact.validation_events.order(created_at: :asc).last
return true if data.successful? && data.created_at < (Time.zone.now - ValidationEvent::VALIDATION_PERIOD)
if data.failed?
return false if data.event_data['check_level'] == 'regex'
return false if data.event_data['check_level'] == 'smtp'
return false if check_mx_contact_validation(contact)
return true
end
false
end
def failed_contacts
failed_contacts = []
failed_validations_ids = ValidationEvent.failed.distinct.pluck(:validation_eventable_id)
contacts = Contact.where(id: failed_validations_ids).includes(:validation_events)
contacts.find_each(batch_size: 10_000) do |contact|
data = contact.validation_events.order(created_at: :asc).last
if data.failed?
next if data.event_data['check_level'] == 'regex'
next if data.event_data['check_level'] == 'smtp'
next if check_mx_contact_validation(contact)
failed_contacts << contact.id
end
failed_contacts << contact.id if filter_check_level(contact)
end
failed_contacts.uniq
@ -89,6 +94,9 @@ end
def check_mx_contact_validation(contact)
data = contact.validation_events.mx.order(created_at: :asc).last(ValidationEvent::MX_CHECK)
return false if data.size < ValidationEvent::MX_CHECK
data.all? { |d| d.failed? }
end