From 2d08f4bb09bec73bfae62b198c5c6a41983c9442 Mon Sep 17 00:00:00 2001 From: Stas Date: Thu, 30 Jun 2016 11:15:56 +0300 Subject: [PATCH 01/20] 119627029-modifications_for_model_and rake (cherry picked from commit 7ef8ad5) --- app/models/legal_document.rb | 30 ++++++---- .../20160629114503_add_hash_to_legal_doc.rb | 5 ++ lib/tasks/legal_doc.rake | 60 +++++++++++++++++++ 3 files changed, 85 insertions(+), 10 deletions(-) create mode 100644 db/migrate/20160629114503_add_hash_to_legal_doc.rb create mode 100644 lib/tasks/legal_doc.rake diff --git a/app/models/legal_document.rb b/app/models/legal_document.rb index b4bf3c96b..ad15ba754 100644 --- a/app/models/legal_document.rb +++ b/app/models/legal_document.rb @@ -32,18 +32,28 @@ class LegalDocument < ActiveRecord::Base def save_to_filesystem - loop do - rand = SecureRandom.random_number.to_s.last(4) - next if rand.to_i == 0 || rand.length < 4 - dir = "#{ENV['legal_documents_dir']}/#{Time.zone.now.strftime('%Y/%m/%d')}" - FileUtils.mkdir_p(dir, mode: 0775) - self.path = "#{dir}/#{Time.zone.now.to_formatted_s(:number)}_#{rand}.#{document_type}" - break unless File.file?(path) + digest = Digest::SHA1.new + ld = LegalDocument.where(checksum: digest.update(Base64.decode64(body))) + + if !ld + loop do + rand = SecureRandom.random_number.to_s.last(4) + next if rand.to_i == 0 || rand.length < 4 + + dir = "#{ENV['legal_documents_dir']}/#{Time.zone.now.strftime('%Y/%m/%d')}" + FileUtils.mkdir_p(dir, mode: 0775) + self.path = "#{dir}/#{Time.zone.now.to_formatted_s(:number)}_#{rand}.#{document_type}" + break unless File.file?(path) + end + + File.open(path, 'wb') { |f| f.write(Base64.decode64(body)) } unless Rails.env.test? + self.path = path + else + + self.path = ld.first.path + end - - File.open(path, 'wb') { |f| f.write(Base64.decode64(body)) } unless Rails.env.test? - self.path = path end def add_creator diff --git a/db/migrate/20160629114503_add_hash_to_legal_doc.rb b/db/migrate/20160629114503_add_hash_to_legal_doc.rb new file mode 100644 index 000000000..c79a5d13f --- /dev/null +++ b/db/migrate/20160629114503_add_hash_to_legal_doc.rb @@ -0,0 +1,5 @@ +class AddHashToLegalDoc < ActiveRecord::Migration + def change + add_column :legal_documents, :checksum, :text + end +end diff --git a/lib/tasks/legal_doc.rake b/lib/tasks/legal_doc.rake new file mode 100644 index 000000000..d1c8f003b --- /dev/null +++ b/lib/tasks/legal_doc.rake @@ -0,0 +1,60 @@ +namespace :legal_doc do + + desc 'Legal documents duplicates fix' + task all: :environment do + Rake::Task['legal_doc:generate_hash'].invoke + Rake::Task['legal_doc:remove_dublicates'].invoke + end + + desc 'Generate hash' + task generate_hash: :environment do + + start = Time.zone.now.to_f + puts '-----> Generating unique hash for legal documents' + count = 0 + + LegalDocument.find_each do |x| + + if File.exist?(x.path) && x.body_hash.blank? + digest = Digest::SHA1.new + digest.update File.binread(x.path) + x.checksum = digest.hexdigest + x.save + count += 1 + end + + end + puts "-----> Hash generated for #{count} rows in #{(Time.zone.now.to_f - start).round(2)} seconds" + end + + desc 'Remove duplicates' + task remove_dublicates: :environment do + + start = Time.zone.now.to_f + puts '-----> Removing legal documents duplicates' + count = 0 + modified = Array.new + + LegalDocument.find_each do |x| + if File.exist?(x.path) + + LegalDocument.where(checksum: x.checksum) do |y| + + if x.id != y.id && !modified.include?(x.id) + + File.delete(y.path) if File.exist?(y.path) + y.path = x.path + y.save + modified.push(y.id) + count += 1 + + end + end + end + end + puts "-----> Duplicates fixed for #{count} rows in #{(Time.zone.now.to_f - start).round(2)} seconds" + + end + +end + From 9b6bd22be33ee9d3782991e63c2675ab4670a106 Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Thu, 30 Jun 2016 11:30:45 +0300 Subject: [PATCH 02/20] Story#119627029 - less base64 decodings (cherry picked from commit 16ff42a) --- app/models/legal_document.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/models/legal_document.rb b/app/models/legal_document.rb index ad15ba754..7a5aa0d4d 100644 --- a/app/models/legal_document.rb +++ b/app/models/legal_document.rb @@ -32,9 +32,9 @@ class LegalDocument < ActiveRecord::Base def save_to_filesystem - digest = Digest::SHA1.new - ld = LegalDocument.where(checksum: digest.update(Base64.decode64(body))) + binary = Base64.decode64(body) + ld = LegalDocument.where(checksum: digest.update(binary)) if !ld loop do @@ -47,7 +47,7 @@ class LegalDocument < ActiveRecord::Base break unless File.file?(path) end - File.open(path, 'wb') { |f| f.write(Base64.decode64(body)) } unless Rails.env.test? + File.open(path, 'wb') { |f| f.write(binary) } unless Rails.env.test? self.path = path else From 71547d91a3c6c1d952500e398e4fa679a5a6f8fb Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Thu, 30 Jun 2016 11:33:21 +0300 Subject: [PATCH 03/20] Story#119627029 - more efficient legal doc search by checksum (cherry picked from commit 952d15d) --- app/models/legal_document.rb | 9 ++++----- db/migrate/20160629114503_add_hash_to_legal_doc.rb | 3 ++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/app/models/legal_document.rb b/app/models/legal_document.rb index 7a5aa0d4d..8a8b3ba94 100644 --- a/app/models/legal_document.rb +++ b/app/models/legal_document.rb @@ -34,9 +34,9 @@ class LegalDocument < ActiveRecord::Base def save_to_filesystem digest = Digest::SHA1.new binary = Base64.decode64(body) - ld = LegalDocument.where(checksum: digest.update(binary)) + ld = LegalDocument.find_by(checksum: digest.update(binary)) - if !ld + if ld.nil? loop do rand = SecureRandom.random_number.to_s.last(4) next if rand.to_i == 0 || rand.length < 4 @@ -49,10 +49,9 @@ class LegalDocument < ActiveRecord::Base File.open(path, 'wb') { |f| f.write(binary) } unless Rails.env.test? self.path = path + else - - self.path = ld.first.path - + self.path = ld.path end end diff --git a/db/migrate/20160629114503_add_hash_to_legal_doc.rb b/db/migrate/20160629114503_add_hash_to_legal_doc.rb index c79a5d13f..8ea2f182d 100644 --- a/db/migrate/20160629114503_add_hash_to_legal_doc.rb +++ b/db/migrate/20160629114503_add_hash_to_legal_doc.rb @@ -1,5 +1,6 @@ class AddHashToLegalDoc < ActiveRecord::Migration def change - add_column :legal_documents, :checksum, :text + add_column :legal_documents, :checksum, :string + add_index :legal_documents, :checksum end end From 2e4e07503fc6a0f62417276f69ab0047df1f110d Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Thu, 30 Jun 2016 11:48:18 +0300 Subject: [PATCH 04/20] Story#119627029 - more efficient update of checksums of existing records (cherry picked from commit 8127181) --- lib/tasks/legal_doc.rake | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/tasks/legal_doc.rake b/lib/tasks/legal_doc.rake index d1c8f003b..e2a00dfaa 100644 --- a/lib/tasks/legal_doc.rake +++ b/lib/tasks/legal_doc.rake @@ -8,17 +8,15 @@ namespace :legal_doc do desc 'Generate hash' task generate_hash: :environment do - start = Time.zone.now.to_f puts '-----> Generating unique hash for legal documents' count = 0 - LegalDocument.find_each do |x| - - if File.exist?(x.path) && x.body_hash.blank? + LegalDocument.where(checksum: [nil, ""]).find_each do |x| + if File.exist?(x.path) digest = Digest::SHA1.new digest.update File.binread(x.path) - x.checksum = digest.hexdigest + x.checksum = digest.to_s x.save count += 1 end From ca74eaeca17b76bea91883c11daacfdf46312085 Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Thu, 30 Jun 2016 11:57:16 +0300 Subject: [PATCH 05/20] Story#119627029 - extract checksum calculator (cherry picked from commit c90c985) --- app/models/legal_document.rb | 6 ++++++ lib/tasks/legal_doc.rake | 6 ++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/app/models/legal_document.rb b/app/models/legal_document.rb index 8a8b3ba94..90d83729b 100644 --- a/app/models/legal_document.rb +++ b/app/models/legal_document.rb @@ -55,6 +55,12 @@ class LegalDocument < ActiveRecord::Base end end + def calc_checksum + digest = Digest::SHA1.new + digest.update File.binread(path) + digest.to_s + end + def add_creator self.creator_str = ::PaperTrail.whodunnit true diff --git a/lib/tasks/legal_doc.rake b/lib/tasks/legal_doc.rake index e2a00dfaa..de8bfa784 100644 --- a/lib/tasks/legal_doc.rake +++ b/lib/tasks/legal_doc.rake @@ -14,9 +14,7 @@ namespace :legal_doc do LegalDocument.where(checksum: [nil, ""]).find_each do |x| if File.exist?(x.path) - digest = Digest::SHA1.new - digest.update File.binread(x.path) - x.checksum = digest.to_s + x.checksum = x.calc_checksum x.save count += 1 end @@ -33,7 +31,7 @@ namespace :legal_doc do count = 0 modified = Array.new - LegalDocument.find_each do |x| + LegalDocument.where.not(checksum: [nil, ""]).find_each do |x| if File.exist?(x.path) LegalDocument.where(checksum: x.checksum) do |y| From 1487a260d104d4d166f9b10dd7303d26bb6bd0a5 Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Thu, 30 Jun 2016 12:17:16 +0300 Subject: [PATCH 06/20] Story#119627029 - update algorithm how we merge existing legal doc files (cherry picked from commit 77de3ad) --- lib/tasks/legal_doc.rake | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/lib/tasks/legal_doc.rake b/lib/tasks/legal_doc.rake index de8bfa784..4597b1186 100644 --- a/lib/tasks/legal_doc.rake +++ b/lib/tasks/legal_doc.rake @@ -32,19 +32,16 @@ namespace :legal_doc do modified = Array.new LegalDocument.where.not(checksum: [nil, ""]).find_each do |x| - if File.exist?(x.path) + next if modified.include?(x.checksum) + next if !File.exist?(x.path) + modified.push(x.checksum) - LegalDocument.where(checksum: x.checksum) do |y| - if x.id != y.id && !modified.include?(x.id) - - File.delete(y.path) if File.exist?(y.path) - y.path = x.path - y.save - modified.push(y.id) - count += 1 - - end + LegalDocument.where(checksum: x.checksum).where.not(id: x.id) do |y| + unless modified.include?(x.id) + File.delete(y.path) if File.exist?(y.path) + y.update(path: x.path) + count += 1 end end end From 5ed949efc6f53816460d42761e0458cb27e2aa58 Mon Sep 17 00:00:00 2001 From: Stas Date: Fri, 8 Jul 2016 17:58:15 +0300 Subject: [PATCH 07/20] 119627029-task_moved_into_cronjob (cherry picked from commit 2f1b255) --- app/models/domain_cron.rb | 5 +++++ app/models/legal_document.rb | 17 +++++------------ lib/tasks/legal_doc.rake | 4 ++-- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/app/models/domain_cron.rb b/app/models/domain_cron.rb index 549e3b1da..db3a8bf9b 100644 --- a/app/models/domain_cron.rb +++ b/app/models/domain_cron.rb @@ -132,4 +132,9 @@ class DomainCron ) end + def self.delete_legal_doc_duplicates + Rake::Task['legal_doc:remove_duplicates'].reenable + Rake::Task['legal_doc:remove_duplicates'].invoke + end + end diff --git a/app/models/legal_document.rb b/app/models/legal_document.rb index 90d83729b..5b72110df 100644 --- a/app/models/legal_document.rb +++ b/app/models/legal_document.rb @@ -32,27 +32,20 @@ class LegalDocument < ActiveRecord::Base def save_to_filesystem - digest = Digest::SHA1.new binary = Base64.decode64(body) - ld = LegalDocument.find_by(checksum: digest.update(binary)) - if ld.nil? - loop do + loop do rand = SecureRandom.random_number.to_s.last(4) next if rand.to_i == 0 || rand.length < 4 - dir = "#{ENV['legal_documents_dir']}/#{Time.zone.now.strftime('%Y/%m/%d')}" FileUtils.mkdir_p(dir, mode: 0775) self.path = "#{dir}/#{Time.zone.now.to_formatted_s(:number)}_#{rand}.#{document_type}" break unless File.file?(path) - end - - File.open(path, 'wb') { |f| f.write(binary) } unless Rails.env.test? - self.path = path - - else - self.path = ld.path end + + File.open(path, 'wb') { |f| f.write(binary) } unless Rails.env.test? + self.path = path + end def calc_checksum diff --git a/lib/tasks/legal_doc.rake b/lib/tasks/legal_doc.rake index 4597b1186..82eba07d1 100644 --- a/lib/tasks/legal_doc.rake +++ b/lib/tasks/legal_doc.rake @@ -3,7 +3,7 @@ namespace :legal_doc do desc 'Legal documents duplicates fix' task all: :environment do Rake::Task['legal_doc:generate_hash'].invoke - Rake::Task['legal_doc:remove_dublicates'].invoke + Rake::Task['legal_doc:remove_duplicates'].invoke end desc 'Generate hash' @@ -24,7 +24,7 @@ namespace :legal_doc do end desc 'Remove duplicates' - task remove_dublicates: :environment do + task remove_duplicates: :environment do start = Time.zone.now.to_f puts '-----> Removing legal documents duplicates' From cd81a3d3560f2d5f59630dd5bb03e24fd3178a8c Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Mon, 1 Aug 2016 16:01:52 +0300 Subject: [PATCH 08/20] Story#119627029 - save checksum of legal doc to DB (cherry picked from commit dd3f19c) --- app/models/legal_document.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/models/legal_document.rb b/app/models/legal_document.rb index 5b72110df..84fe84335 100644 --- a/app/models/legal_document.rb +++ b/app/models/legal_document.rb @@ -33,6 +33,7 @@ class LegalDocument < ActiveRecord::Base def save_to_filesystem binary = Base64.decode64(body) + digest = Digest::SHA1.new.update(binary).to_s loop do rand = SecureRandom.random_number.to_s.last(4) @@ -45,7 +46,7 @@ class LegalDocument < ActiveRecord::Base File.open(path, 'wb') { |f| f.write(binary) } unless Rails.env.test? self.path = path - + self.checksum = digest end def calc_checksum From 9799e5c877f3c4130cd5624316cb2bbe414c08bc Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Mon, 8 Aug 2016 13:11:01 +0300 Subject: [PATCH 09/20] Story#119627029 - uniq legal docs by new logic (cherry picked from commit a671ac4) --- lib/tasks/legal_doc.rake | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/lib/tasks/legal_doc.rake b/lib/tasks/legal_doc.rake index 82eba07d1..f886980de 100644 --- a/lib/tasks/legal_doc.rake +++ b/lib/tasks/legal_doc.rake @@ -23,6 +23,9 @@ namespace :legal_doc do puts "-----> Hash generated for #{count} rows in #{(Time.zone.now.to_f - start).round(2)} seconds" end + + # Starting point is Domain legal docs + # then inside it checking the same domains and connected contacts desc 'Remove duplicates' task remove_duplicates: :environment do @@ -31,18 +34,33 @@ namespace :legal_doc do count = 0 modified = Array.new - LegalDocument.where.not(checksum: [nil, ""]).find_each do |x| - next if modified.include?(x.checksum) - next if !File.exist?(x.path) - modified.push(x.checksum) + LegalDocument.where(documentable_type: "Domain").where.not(checksum: [nil, ""]).find_each do |orig_legal| + next if modified.include?(orig_legal.checksum) + next if !File.exist?(orig_legal.path) + modified.push(orig_legal.checksum) - LegalDocument.where(checksum: x.checksum).where.not(id: x.id) do |y| - unless modified.include?(x.id) - File.delete(y.path) if File.exist?(y.path) - y.update(path: x.path) - count += 1 - end + LegalDocument.where(documentable_type: "Domain", documentable_id: orig_legal.documentable_id). + where(checksum: orig_legal.checksum). + where.not(id: orig_legal.id).each do |new_legal| + unless modified.include?(orig_legal.id) + File.delete(new_legal.path) if File.exist?(new_legal.path) + new_legal.update(path: orig_legal.path) + count += 1 + end + end + + contact_ids = DomainVersion.where(item_id: orig_legal.documentable_id).distinct. + pluck("object->>'registrar_id'", "object->'registrant_id'", "object_changes->'registrar_id'", + "object_changes->'registrant_id'", "children->'tech_contacts'", "children->'admin_contacts'").flatten.uniq + contact_ids = contact_ids.map{|id| id.is_a?(Hash) ? id["id"] : id}.compact.uniq + LegalDocument.where(documentable_type: "Contact", documentable_id: contact_ids). + where(checksum: orig_legal.checksum).each do |new_legal| + unless modified.include?(orig_legal.id) + File.delete(new_legal.path) if File.exist?(new_legal.path) + new_legal.update(path: orig_legal.path) + count += 1 + end end end puts "-----> Duplicates fixed for #{count} rows in #{(Time.zone.now.to_f - start).round(2)} seconds" From 22f8633f87995ffc3070ed462a00fa4441e2558f Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Thu, 18 Aug 2016 14:54:31 +0300 Subject: [PATCH 10/20] Stroy#119627029 - typo in accessor (cherry picked from commit 5b946ce) --- lib/tasks/legal_doc.rake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/tasks/legal_doc.rake b/lib/tasks/legal_doc.rake index f886980de..6820e783a 100644 --- a/lib/tasks/legal_doc.rake +++ b/lib/tasks/legal_doc.rake @@ -51,8 +51,8 @@ namespace :legal_doc do end contact_ids = DomainVersion.where(item_id: orig_legal.documentable_id).distinct. - pluck("object->>'registrar_id'", "object->'registrant_id'", "object_changes->'registrar_id'", - "object_changes->'registrant_id'", "children->'tech_contacts'", "children->'admin_contacts'").flatten.uniq + pluck("object->>'registrar_id'", "object->>'registrant_id'", "object_changes->>'registrar_id'", + "object_changes->>'registrant_id'", "children->>'tech_contacts'", "children->>'admin_contacts'").flatten.uniq contact_ids = contact_ids.map{|id| id.is_a?(Hash) ? id["id"] : id}.compact.uniq LegalDocument.where(documentable_type: "Contact", documentable_id: contact_ids). where(checksum: orig_legal.checksum).each do |new_legal| From f639ae7eaac5a5a79d0120f870efe7ce7747f240 Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Thu, 18 Aug 2016 22:57:26 +0300 Subject: [PATCH 11/20] Stroy#119627029 - do not load legal doc if file is the same (cherry picked from commit 55d111a) --- lib/tasks/legal_doc.rake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/tasks/legal_doc.rake b/lib/tasks/legal_doc.rake index 6820e783a..26a648ef2 100644 --- a/lib/tasks/legal_doc.rake +++ b/lib/tasks/legal_doc.rake @@ -42,7 +42,7 @@ namespace :legal_doc do LegalDocument.where(documentable_type: "Domain", documentable_id: orig_legal.documentable_id). where(checksum: orig_legal.checksum). - where.not(id: orig_legal.id).each do |new_legal| + where.not(id: orig_legal.id).where.not(path: orig_legal.path).each do |new_legal| unless modified.include?(orig_legal.id) File.delete(new_legal.path) if File.exist?(new_legal.path) new_legal.update(path: orig_legal.path) @@ -55,7 +55,7 @@ namespace :legal_doc do "object_changes->>'registrant_id'", "children->>'tech_contacts'", "children->>'admin_contacts'").flatten.uniq contact_ids = contact_ids.map{|id| id.is_a?(Hash) ? id["id"] : id}.compact.uniq LegalDocument.where(documentable_type: "Contact", documentable_id: contact_ids). - where(checksum: orig_legal.checksum).each do |new_legal| + where(checksum: orig_legal.checksum).where.not(path: orig_legal.path).each do |new_legal| unless modified.include?(orig_legal.id) File.delete(new_legal.path) if File.exist?(new_legal.path) new_legal.update(path: orig_legal.path) From 443913f18a7e060a2f48b3091dc80bd1792b0ad8 Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Thu, 18 Aug 2016 22:58:18 +0300 Subject: [PATCH 12/20] Stroy#119627029 - in some situations we may have array, so setting to not so deep array (cherry picked from commit 7ce5b20) --- lib/tasks/legal_doc.rake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/tasks/legal_doc.rake b/lib/tasks/legal_doc.rake index 26a648ef2..8e737215a 100644 --- a/lib/tasks/legal_doc.rake +++ b/lib/tasks/legal_doc.rake @@ -53,7 +53,7 @@ namespace :legal_doc do contact_ids = DomainVersion.where(item_id: orig_legal.documentable_id).distinct. pluck("object->>'registrar_id'", "object->>'registrant_id'", "object_changes->>'registrar_id'", "object_changes->>'registrant_id'", "children->>'tech_contacts'", "children->>'admin_contacts'").flatten.uniq - contact_ids = contact_ids.map{|id| id.is_a?(Hash) ? id["id"] : id}.compact.uniq + contact_ids = contact_ids.map{|id| id.is_a?(Hash) ? id["id"] : id}.flatten.compact.uniq LegalDocument.where(documentable_type: "Contact", documentable_id: contact_ids). where(checksum: orig_legal.checksum).where.not(path: orig_legal.path).each do |new_legal| unless modified.include?(orig_legal.id) From 472288e69191750e1cc18efb62e66deb902ea002 Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Thu, 1 Sep 2016 15:24:07 +0300 Subject: [PATCH 13/20] Story#119627029 show in log files which files are updated (cherry picked from commit 0016b54) --- lib/tasks/legal_doc.rake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/tasks/legal_doc.rake b/lib/tasks/legal_doc.rake index 8e737215a..504a0f941 100644 --- a/lib/tasks/legal_doc.rake +++ b/lib/tasks/legal_doc.rake @@ -47,6 +47,7 @@ namespace :legal_doc do File.delete(new_legal.path) if File.exist?(new_legal.path) new_legal.update(path: orig_legal.path) count += 1 + puts "File #{new_legal.path} has been removed by Domain #{new_legal.documentable_id}. Document id: #{new_legal.id}" end end @@ -60,6 +61,7 @@ namespace :legal_doc do File.delete(new_legal.path) if File.exist?(new_legal.path) new_legal.update(path: orig_legal.path) count += 1 + puts "File #{new_legal.path} has been removed by Contact #{new_legal.documentable_id}. Document id: #{new_legal.id}" end end end From 0dc711f7d251950c981e1c270eb73bd5d385dc8f Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Mon, 3 Oct 2016 13:50:22 +0300 Subject: [PATCH 14/20] Story#119627029 test that legal doc will be uniq within same domain (cherry picked from commit 9297167) --- app/models/legal_document.rb | 44 ++++++++- db/schema-read-only.rb | 43 ++++++++- db/structure.sql | 137 +++++++++++++++++++++------- lib/tasks/legal_doc.rake | 40 +------- spec/models/legal_documents_spec.rb | 39 ++++++++ 5 files changed, 225 insertions(+), 78 deletions(-) create mode 100644 spec/models/legal_documents_spec.rb diff --git a/app/models/legal_document.rb b/app/models/legal_document.rb index 84fe84335..f6418dfc2 100644 --- a/app/models/legal_document.rb +++ b/app/models/legal_document.rb @@ -1,4 +1,5 @@ class LegalDocument < ActiveRecord::Base + cattr_accessor :explicitly_write_file include EppErrors MIN_BODY_SIZE = (1.37 * 3.kilobytes).ceil @@ -44,7 +45,7 @@ class LegalDocument < ActiveRecord::Base break unless File.file?(path) end - File.open(path, 'wb') { |f| f.write(binary) } unless Rails.env.test? + File.open(path, 'wb') { |f| f.write(binary) } if !Rails.env.test? || self.class.explicitly_write_file self.path = path self.checksum = digest end @@ -59,4 +60,45 @@ class LegalDocument < ActiveRecord::Base self.creator_str = ::PaperTrail.whodunnit true end + + + def self.remove_duplicates + start = Time.zone.now.to_f + puts '-----> Removing legal documents duplicates' + count = 0 + modified = Array.new + + LegalDocument.where(documentable_type: "Domain").where.not(checksum: [nil, ""]).find_each do |orig_legal| + next if modified.include?(orig_legal.checksum) + next if !File.exist?(orig_legal.path) + modified.push(orig_legal.checksum) + + LegalDocument.where(documentable_type: "Domain", documentable_id: orig_legal.documentable_id). + where(checksum: orig_legal.checksum). + where.not(id: orig_legal.id).where.not(path: orig_legal.path).each do |new_legal| + unless modified.include?(orig_legal.id) + File.delete(new_legal.path) if File.exist?(new_legal.path) + new_legal.update(path: orig_legal.path) + count += 1 + puts "File #{new_legal.path} has been removed by Domain #{new_legal.documentable_id}. Document id: #{new_legal.id}" + end + end + + contact_ids = DomainVersion.where(item_id: orig_legal.documentable_id).distinct. + pluck("object->>'registrar_id'", "object->>'registrant_id'", "object_changes->>'registrar_id'", + "object_changes->>'registrant_id'", "children->>'tech_contacts'", "children->>'admin_contacts'").flatten.uniq + contact_ids = contact_ids.map{|id| id.is_a?(Hash) ? id["id"] : id}.flatten.compact.uniq + LegalDocument.where(documentable_type: "Contact", documentable_id: contact_ids). + where(checksum: orig_legal.checksum).where.not(path: orig_legal.path).each do |new_legal| + unless modified.include?(orig_legal.id) + File.delete(new_legal.path) if File.exist?(new_legal.path) + new_legal.update(path: orig_legal.path) + count += 1 + puts "File #{new_legal.path} has been removed by Contact #{new_legal.documentable_id}. Document id: #{new_legal.id}" + end + end + end + puts "-----> Duplicates fixed for #{count} rows in #{(Time.zone.now.to_f - start).round(2)} seconds" + + end end diff --git a/db/schema-read-only.rb b/db/schema-read-only.rb index 2d410461c..c633d4b60 100644 --- a/db/schema-read-only.rb +++ b/db/schema-read-only.rb @@ -11,12 +11,12 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20160304125933) do +ActiveRecord::Schema.define(version: 20160629114503) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" - enable_extension "hstore" enable_extension "btree_gist" + enable_extension "hstore" create_table "account_activities", force: :cascade do |t| t.integer "account_id" @@ -214,11 +214,13 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.string "country_code" t.string "state" t.integer "legacy_id" - t.string "statuses", array: true + t.string "statuses", default: [], array: true t.hstore "status_notes" t.integer "legacy_history_id" t.integer "copy_from_id" t.datetime "ident_updated_at" + t.integer "upid" + t.datetime "up_date" end add_index "contacts", ["code"], name: "index_contacts_on_code", using: :btree @@ -261,6 +263,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at", null: false t.datetime "updated_at", null: false t.string "invoice_number" + t.text "request" end add_index "directos", ["item_type", "item_id"], name: "index_directos_on_item_type_and_item_id", using: :btree @@ -360,6 +363,8 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.boolean "reserved", default: false t.hstore "status_notes" t.string "statuses_backup", default: [], array: true + t.integer "upid" + t.datetime "up_date" end add_index "domains", ["delete_at"], name: "index_domains_on_delete_at", using: :btree @@ -474,8 +479,10 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "creator_str" t.string "path" + t.string "checksum" end + add_index "legal_documents", ["checksum"], name: "index_legal_documents_on_checksum", using: :btree add_index "legal_documents", ["documentable_type", "documentable_id"], name: "index_legal_documents_on_documentable_type_and_documentable_id", using: :btree create_table "log_account_activities", force: :cascade do |t| @@ -488,6 +495,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_account_activities", ["item_type", "item_id"], name: "index_log_account_activities_on_item_type_and_item_id", using: :btree @@ -503,6 +511,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_accounts", ["item_type", "item_id"], name: "index_log_accounts_on_item_type_and_item_id", using: :btree @@ -518,6 +527,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_addresses", ["item_type", "item_id"], name: "index_log_addresses_on_item_type_and_item_id", using: :btree @@ -533,6 +543,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_api_users", ["item_type", "item_id"], name: "index_log_api_users_on_item_type_and_item_id", using: :btree @@ -548,6 +559,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_bank_statements", ["item_type", "item_id"], name: "index_log_bank_statements_on_item_type_and_item_id", using: :btree @@ -563,6 +575,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_bank_transactions", ["item_type", "item_id"], name: "index_log_bank_transactions_on_item_type_and_item_id", using: :btree @@ -578,6 +591,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_blocked_domains", ["item_type", "item_id"], name: "index_log_blocked_domains_on_item_type_and_item_id", using: :btree @@ -593,6 +607,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_certificates", ["item_type", "item_id"], name: "index_log_certificates_on_item_type_and_item_id", using: :btree @@ -608,6 +623,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_contact_statuses", ["item_type", "item_id"], name: "index_log_contact_statuses_on_item_type_and_item_id", using: :btree @@ -624,6 +640,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.string "session" t.json "children" t.datetime "ident_updated_at" + t.string "uuid" end add_index "log_contacts", ["item_type", "item_id"], name: "index_log_contacts_on_item_type_and_item_id", using: :btree @@ -639,6 +656,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_countries", ["item_type", "item_id"], name: "index_log_countries_on_item_type_and_item_id", using: :btree @@ -654,6 +672,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_dnskeys", ["item_type", "item_id"], name: "index_log_dnskeys_on_item_type_and_item_id", using: :btree @@ -669,6 +688,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_domain_contacts", ["item_type", "item_id"], name: "index_log_domain_contacts_on_item_type_and_item_id", using: :btree @@ -684,6 +704,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_domain_statuses", ["item_type", "item_id"], name: "index_log_domain_statuses_on_item_type_and_item_id", using: :btree @@ -699,6 +720,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_domain_transfers", ["item_type", "item_id"], name: "index_log_domain_transfers_on_item_type_and_item_id", using: :btree @@ -717,6 +739,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.text "admin_contact_ids", default: [], array: true t.string "session" t.json "children" + t.string "uuid" end add_index "log_domains", ["item_type", "item_id"], name: "index_log_domains_on_item_type_and_item_id", using: :btree @@ -732,6 +755,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_invoice_items", ["item_type", "item_id"], name: "index_log_invoice_items_on_item_type_and_item_id", using: :btree @@ -747,6 +771,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_invoices", ["item_type", "item_id"], name: "index_log_invoices_on_item_type_and_item_id", using: :btree @@ -762,6 +787,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_keyrelays", ["item_type", "item_id"], name: "index_log_keyrelays_on_item_type_and_item_id", using: :btree @@ -777,6 +803,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_messages", ["item_type", "item_id"], name: "index_log_messages_on_item_type_and_item_id", using: :btree @@ -792,6 +819,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_nameservers", ["item_type", "item_id"], name: "index_log_nameservers_on_item_type_and_item_id", using: :btree @@ -806,6 +834,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.json "object_changes" t.datetime "created_at" t.string "session" + t.string "uuid" end create_table "log_registrars", force: :cascade do |t| @@ -818,6 +847,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_registrars", ["item_type", "item_id"], name: "index_log_registrars_on_item_type_and_item_id", using: :btree @@ -833,6 +863,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_reserved_domains", ["item_type", "item_id"], name: "index_log_reserved_domains_on_item_type_and_item_id", using: :btree @@ -848,6 +879,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_settings", ["item_type", "item_id"], name: "index_log_settings_on_item_type_and_item_id", using: :btree @@ -863,6 +895,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_users", ["item_type", "item_id"], name: "index_log_users_on_item_type_and_item_id", using: :btree @@ -878,6 +911,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end create_table "log_zonefile_settings", force: :cascade do |t| @@ -890,6 +924,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.datetime "created_at" t.string "session" t.json "children" + t.string "uuid" end add_index "log_zonefile_settings", ["item_type", "item_id"], name: "index_log_zonefile_settings_on_item_type_and_item_id", using: :btree @@ -931,6 +966,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.string "creator_str" t.string "updator_str" t.integer "legacy_domain_id" + t.string "hostname_puny" end add_index "nameservers", ["domain_id"], name: "index_nameservers_on_domain_id", using: :btree @@ -1015,6 +1051,7 @@ ActiveRecord::Schema.define(version: 20160304125933) do t.boolean "vat" t.integer "legacy_id" t.string "reference_no" + t.boolean "test_registrar", default: false end add_index "registrars", ["code"], name: "index_registrars_on_code", using: :btree diff --git a/db/structure.sql b/db/structure.sql index 78228b693..3ce24e478 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -744,11 +744,13 @@ CREATE TABLE contacts ( country_code character varying, state character varying, legacy_id integer, - statuses character varying[], + statuses character varying[] DEFAULT '{}'::character varying[], status_notes hstore, legacy_history_id integer, copy_from_id integer, - ident_updated_at timestamp without time zone + ident_updated_at timestamp without time zone, + upid integer, + up_date timestamp without time zone ); @@ -887,7 +889,8 @@ CREATE TABLE directos ( response json, created_at timestamp without time zone NOT NULL, updated_at timestamp without time zone NOT NULL, - invoice_number character varying + invoice_number character varying, + request text ); @@ -1098,7 +1101,9 @@ CREATE TABLE domains ( statuses character varying[], reserved boolean DEFAULT false, status_notes hstore, - statuses_backup character varying[] DEFAULT '{}'::character varying[] + statuses_backup character varying[] DEFAULT '{}'::character varying[], + upid integer, + up_date timestamp without time zone ); @@ -1316,7 +1321,8 @@ CREATE TABLE legal_documents ( documentable_type character varying, created_at timestamp without time zone, creator_str character varying, - path character varying + path character varying, + checksum character varying ); @@ -1353,7 +1359,8 @@ CREATE TABLE log_account_activities ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -1390,7 +1397,8 @@ CREATE TABLE log_accounts ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -1427,7 +1435,8 @@ CREATE TABLE log_addresses ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -1464,7 +1473,8 @@ CREATE TABLE log_api_users ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -1501,7 +1511,8 @@ CREATE TABLE log_bank_statements ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -1538,7 +1549,8 @@ CREATE TABLE log_bank_transactions ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -1575,7 +1587,8 @@ CREATE TABLE log_blocked_domains ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -1612,7 +1625,8 @@ CREATE TABLE log_certificates ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -1649,7 +1663,8 @@ CREATE TABLE log_contact_statuses ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -1687,7 +1702,8 @@ CREATE TABLE log_contacts ( created_at timestamp without time zone, session character varying, children json, - ident_updated_at timestamp without time zone + ident_updated_at timestamp without time zone, + uuid character varying ); @@ -1724,7 +1740,8 @@ CREATE TABLE log_countries ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -1761,7 +1778,8 @@ CREATE TABLE log_dnskeys ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -1798,7 +1816,8 @@ CREATE TABLE log_domain_contacts ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -1835,7 +1854,8 @@ CREATE TABLE log_domain_statuses ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -1872,7 +1892,8 @@ CREATE TABLE log_domain_transfers ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -1912,7 +1933,8 @@ CREATE TABLE log_domains ( tech_contact_ids text[] DEFAULT '{}'::text[], admin_contact_ids text[] DEFAULT '{}'::text[], session character varying, - children json + children json, + uuid character varying ); @@ -1949,7 +1971,8 @@ CREATE TABLE log_invoice_items ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -1986,7 +2009,8 @@ CREATE TABLE log_invoices ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -2023,7 +2047,8 @@ CREATE TABLE log_keyrelays ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -2060,7 +2085,8 @@ CREATE TABLE log_messages ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -2097,7 +2123,8 @@ CREATE TABLE log_nameservers ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -2133,7 +2160,8 @@ CREATE TABLE log_pricelists ( object json, object_changes json, created_at timestamp without time zone, - session character varying + session character varying, + uuid character varying ); @@ -2170,7 +2198,8 @@ CREATE TABLE log_registrars ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -2207,7 +2236,8 @@ CREATE TABLE log_reserved_domains ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -2244,7 +2274,8 @@ CREATE TABLE log_settings ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -2281,7 +2312,8 @@ CREATE TABLE log_users ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -2318,7 +2350,8 @@ CREATE TABLE log_white_ips ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -2355,7 +2388,8 @@ CREATE TABLE log_zonefile_settings ( object_changes json, created_at timestamp without time zone, session character varying, - children json + children json, + uuid character varying ); @@ -2466,7 +2500,8 @@ CREATE TABLE nameservers ( domain_id integer, creator_str character varying, updator_str character varying, - legacy_domain_id integer + legacy_domain_id integer, + hostname_puny character varying ); @@ -2673,7 +2708,8 @@ CREATE TABLE registrars ( directo_handle character varying, vat boolean, legacy_id integer, - reference_no character varying + reference_no character varying, + test_registrar boolean DEFAULT false ); @@ -4274,6 +4310,13 @@ CREATE INDEX index_keyrelays_on_domain_id ON keyrelays USING btree (domain_id); CREATE INDEX index_keyrelays_on_requester_id ON keyrelays USING btree (requester_id); +-- +-- Name: index_legal_documents_on_checksum; Type: INDEX; Schema: public; Owner: -; Tablespace: +-- + +CREATE INDEX index_legal_documents_on_checksum ON legal_documents USING btree (checksum); + + -- -- Name: index_legal_documents_on_documentable_type_and_documentable_id; Type: INDEX; Schema: public; Owner: -; Tablespace: -- @@ -5214,5 +5257,29 @@ INSERT INTO schema_migrations (version) VALUES ('20160118092454'); INSERT INTO schema_migrations (version) VALUES ('20160218102355'); +INSERT INTO schema_migrations (version) VALUES ('20160225113801'); + +INSERT INTO schema_migrations (version) VALUES ('20160225113812'); + +INSERT INTO schema_migrations (version) VALUES ('20160226132045'); + +INSERT INTO schema_migrations (version) VALUES ('20160226132056'); + INSERT INTO schema_migrations (version) VALUES ('20160304125933'); +INSERT INTO schema_migrations (version) VALUES ('20160311085957'); + +INSERT INTO schema_migrations (version) VALUES ('20160405131315'); + +INSERT INTO schema_migrations (version) VALUES ('20160411140719'); + +INSERT INTO schema_migrations (version) VALUES ('20160414110443'); + +INSERT INTO schema_migrations (version) VALUES ('20160421074023'); + +INSERT INTO schema_migrations (version) VALUES ('20160429114732'); + +INSERT INTO schema_migrations (version) VALUES ('20160527110738'); + +INSERT INTO schema_migrations (version) VALUES ('20160629114503'); + diff --git a/lib/tasks/legal_doc.rake b/lib/tasks/legal_doc.rake index 504a0f941..ad7df0fd2 100644 --- a/lib/tasks/legal_doc.rake +++ b/lib/tasks/legal_doc.rake @@ -28,45 +28,7 @@ namespace :legal_doc do # then inside it checking the same domains and connected contacts desc 'Remove duplicates' task remove_duplicates: :environment do - - start = Time.zone.now.to_f - puts '-----> Removing legal documents duplicates' - count = 0 - modified = Array.new - - LegalDocument.where(documentable_type: "Domain").where.not(checksum: [nil, ""]).find_each do |orig_legal| - next if modified.include?(orig_legal.checksum) - next if !File.exist?(orig_legal.path) - modified.push(orig_legal.checksum) - - - LegalDocument.where(documentable_type: "Domain", documentable_id: orig_legal.documentable_id). - where(checksum: orig_legal.checksum). - where.not(id: orig_legal.id).where.not(path: orig_legal.path).each do |new_legal| - unless modified.include?(orig_legal.id) - File.delete(new_legal.path) if File.exist?(new_legal.path) - new_legal.update(path: orig_legal.path) - count += 1 - puts "File #{new_legal.path} has been removed by Domain #{new_legal.documentable_id}. Document id: #{new_legal.id}" - end - end - - contact_ids = DomainVersion.where(item_id: orig_legal.documentable_id).distinct. - pluck("object->>'registrar_id'", "object->>'registrant_id'", "object_changes->>'registrar_id'", - "object_changes->>'registrant_id'", "children->>'tech_contacts'", "children->>'admin_contacts'").flatten.uniq - contact_ids = contact_ids.map{|id| id.is_a?(Hash) ? id["id"] : id}.flatten.compact.uniq - LegalDocument.where(documentable_type: "Contact", documentable_id: contact_ids). - where(checksum: orig_legal.checksum).where.not(path: orig_legal.path).each do |new_legal| - unless modified.include?(orig_legal.id) - File.delete(new_legal.path) if File.exist?(new_legal.path) - new_legal.update(path: orig_legal.path) - count += 1 - puts "File #{new_legal.path} has been removed by Contact #{new_legal.documentable_id}. Document id: #{new_legal.id}" - end - end - end - puts "-----> Duplicates fixed for #{count} rows in #{(Time.zone.now.to_f - start).round(2)} seconds" - + LegalDocument.remove_duplicates end end diff --git a/spec/models/legal_documents_spec.rb b/spec/models/legal_documents_spec.rb new file mode 100644 index 000000000..4ec71313b --- /dev/null +++ b/spec/models/legal_documents_spec.rb @@ -0,0 +1,39 @@ +require 'rails_helper' + +describe LegalDocument do + context 'tasks' do + it 'make files uniq' do + Fabricate(:zonefile_setting, origin: 'ee') + Fabricate(:zonefile_setting, origin: 'pri.ee') + Fabricate(:zonefile_setting, origin: 'med.ee') + Fabricate(:zonefile_setting, origin: 'fie.ee') + Fabricate(:zonefile_setting, origin: 'com.ee') + LegalDocument.explicitly_write_file = true + + domain = Fabricate(:domain) + original = domain.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) + copy = domain.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) + skipping_as_different = domain.legal_documents.create!(body: Base64.encode64('D' * 4.kilobytes)) + skipping_as_no_checksum = domain.legal_documents.create!(checksum: nil, body: Base64.encode64('S' * 4.kilobytes)) + skipping_as_no_checksum2 = domain.legal_documents.create!(checksum: "", body: Base64.encode64('S' * 4.kilobytes)) + + skipping_as_no_checksum.update_columns(checksum: nil) + skipping_as_no_checksum2.update_columns(checksum: "") + skipping_as_no_checksum.reload + skipping_as_no_checksum2.reload + skipping_as_no_checksum.path.should_not == skipping_as_no_checksum2.path + + skipping_as_no_checksum.checksum.should == nil + skipping_as_no_checksum2.checksum.should == "" + original.checksum.should == copy.checksum + original.checksum.should_not == skipping_as_different.checksum + + LegalDocument.remove_duplicates + skipping_as_no_checksum.path.should_not be(skipping_as_no_checksum2.path) + original.path.should_not be(skipping_as_different.path) + original.path.should == copy.path + + end + end + +end \ No newline at end of file From 2c3c762da313d5cd3920711ecc62726e9600f6ab Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Mon, 3 Oct 2016 13:54:02 +0300 Subject: [PATCH 15/20] Story#119627029 test that legal doc will be uniq within same domain (check different domains) (cherry picked from commit 96349b1) --- spec/models/legal_documents_spec.rb | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/spec/models/legal_documents_spec.rb b/spec/models/legal_documents_spec.rb index 4ec71313b..ac0379176 100644 --- a/spec/models/legal_documents_spec.rb +++ b/spec/models/legal_documents_spec.rb @@ -10,12 +10,15 @@ describe LegalDocument do Fabricate(:zonefile_setting, origin: 'com.ee') LegalDocument.explicitly_write_file = true - domain = Fabricate(:domain) - original = domain.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) - copy = domain.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) - skipping_as_different = domain.legal_documents.create!(body: Base64.encode64('D' * 4.kilobytes)) - skipping_as_no_checksum = domain.legal_documents.create!(checksum: nil, body: Base64.encode64('S' * 4.kilobytes)) - skipping_as_no_checksum2 = domain.legal_documents.create!(checksum: "", body: Base64.encode64('S' * 4.kilobytes)) + domain = Fabricate(:domain) + domain2 = Fabricate(:domain) + domains = [] + domains << original = domain.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) + domains << copy = domain.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) + domains << skipping_as_different_domain = domain2.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) + domains << skipping_as_different = domain.legal_documents.create!(body: Base64.encode64('D' * 4.kilobytes)) + domains << skipping_as_no_checksum = domain.legal_documents.create!(checksum: nil, body: Base64.encode64('S' * 4.kilobytes)) + domains << skipping_as_no_checksum2 = domain.legal_documents.create!(checksum: "", body: Base64.encode64('S' * 4.kilobytes)) skipping_as_no_checksum.update_columns(checksum: nil) skipping_as_no_checksum2.update_columns(checksum: "") @@ -29,8 +32,11 @@ describe LegalDocument do original.checksum.should_not == skipping_as_different.checksum LegalDocument.remove_duplicates + domains.each(&:reload) + skipping_as_no_checksum.path.should_not be(skipping_as_no_checksum2.path) - original.path.should_not be(skipping_as_different.path) + original.path.should_not == skipping_as_different.path + original.path.should_not == skipping_as_different_domain.path original.path.should == copy.path end From b8bc341bee219fe9e0eeaee817a566a02698a4dd Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Mon, 3 Oct 2016 14:01:21 +0300 Subject: [PATCH 16/20] Story#119627029 registrar is not part of contacts (cherry picked from commit d390045) --- app/models/legal_document.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/legal_document.rb b/app/models/legal_document.rb index f6418dfc2..13fbdf049 100644 --- a/app/models/legal_document.rb +++ b/app/models/legal_document.rb @@ -85,8 +85,8 @@ class LegalDocument < ActiveRecord::Base end contact_ids = DomainVersion.where(item_id: orig_legal.documentable_id).distinct. - pluck("object->>'registrar_id'", "object->>'registrant_id'", "object_changes->>'registrar_id'", - "object_changes->>'registrant_id'", "children->>'tech_contacts'", "children->>'admin_contacts'").flatten.uniq + pluck("object->>'registrant_id'", "object_changes->>'registrant_id'", + "children->>'tech_contacts'", "children->>'admin_contacts'").flatten.uniq contact_ids = contact_ids.map{|id| id.is_a?(Hash) ? id["id"] : id}.flatten.compact.uniq LegalDocument.where(documentable_type: "Contact", documentable_id: contact_ids). where(checksum: orig_legal.checksum).where.not(path: orig_legal.path).each do |new_legal| From be75b79c85551fa97bd356178e7df85518a84088 Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Mon, 3 Oct 2016 14:51:18 +0300 Subject: [PATCH 17/20] Story#119627029 process history ids even if they are strings (cherry picked from commit 5cc603d) --- app/models/legal_document.rb | 12 +++++++++++- spec/models/legal_documents_spec.rb | 17 ++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/app/models/legal_document.rb b/app/models/legal_document.rb index 13fbdf049..e8139519a 100644 --- a/app/models/legal_document.rb +++ b/app/models/legal_document.rb @@ -86,8 +86,18 @@ class LegalDocument < ActiveRecord::Base contact_ids = DomainVersion.where(item_id: orig_legal.documentable_id).distinct. pluck("object->>'registrant_id'", "object_changes->>'registrant_id'", + "children->>'tech_contacts'", "children->>'admin_contacts'", "children->>'tech_contacts'", "children->>'admin_contacts'").flatten.uniq - contact_ids = contact_ids.map{|id| id.is_a?(Hash) ? id["id"] : id}.flatten.compact.uniq + contact_ids = contact_ids.map{|id| + case id + when Hash + id["id"] + when String + JSON.parse(id) rescue id.to_i + else + id + end + }.flatten.compact.uniq LegalDocument.where(documentable_type: "Contact", documentable_id: contact_ids). where(checksum: orig_legal.checksum).where.not(path: orig_legal.path).each do |new_legal| unless modified.include?(orig_legal.id) diff --git a/spec/models/legal_documents_spec.rb b/spec/models/legal_documents_spec.rb index ac0379176..46922f869 100644 --- a/spec/models/legal_documents_spec.rb +++ b/spec/models/legal_documents_spec.rb @@ -9,6 +9,7 @@ describe LegalDocument do Fabricate(:zonefile_setting, origin: 'fie.ee') Fabricate(:zonefile_setting, origin: 'com.ee') LegalDocument.explicitly_write_file = true + PaperTrail.enabled = true domain = Fabricate(:domain) domain2 = Fabricate(:domain) @@ -19,6 +20,15 @@ describe LegalDocument do domains << skipping_as_different = domain.legal_documents.create!(body: Base64.encode64('D' * 4.kilobytes)) domains << skipping_as_no_checksum = domain.legal_documents.create!(checksum: nil, body: Base64.encode64('S' * 4.kilobytes)) domains << skipping_as_no_checksum2 = domain.legal_documents.create!(checksum: "", body: Base64.encode64('S' * 4.kilobytes)) + domains << registrant_copy = domain.registrant.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) + domains << registrant_skipping_as_different = domain.registrant.legal_documents.create!(body: Base64.encode64('Q' * 4.kilobytes)) + domains << tech_copy = domain.tech_contacts.first.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) + domains << tech_skipping_as_different = domain.tech_contacts.first.legal_documents.create!(body: Base64.encode64('W' * 4.kilobytes)) + domains << admin_copy = domain.admin_contacts.first.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) + domains << admin_skipping_as_different = domain.admin_contacts.first.legal_documents.create!(body: Base64.encode64('E' * 4.kilobytes)) + # writing nesting to history + domain.update(updated_at: Time.now) + domain2.update(updated_at: Time.now) skipping_as_no_checksum.update_columns(checksum: nil) skipping_as_no_checksum2.update_columns(checksum: "") @@ -37,8 +47,13 @@ describe LegalDocument do skipping_as_no_checksum.path.should_not be(skipping_as_no_checksum2.path) original.path.should_not == skipping_as_different.path original.path.should_not == skipping_as_different_domain.path + original.path.should_not == registrant_skipping_as_different.path + original.path.should_not == tech_skipping_as_different.path + original.path.should_not == admin_skipping_as_different.path original.path.should == copy.path - + original.path.should == registrant_copy.path + original.path.should == tech_copy.path + original.path.should == admin_copy.path end end From c35ed2f3c557f80c38c7f76a3340642a8b4133c8 Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Wed, 5 Oct 2016 14:53:19 +0300 Subject: [PATCH 18/20] Story#119627029 check shared contact (cherry picked from commit 9f73211) --- app/models/legal_document.rb | 2 +- spec/models/legal_documents_spec.rb | 39 ++++++++++++++++++----------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/app/models/legal_document.rb b/app/models/legal_document.rb index e8139519a..f6e15733e 100644 --- a/app/models/legal_document.rb +++ b/app/models/legal_document.rb @@ -87,7 +87,7 @@ class LegalDocument < ActiveRecord::Base contact_ids = DomainVersion.where(item_id: orig_legal.documentable_id).distinct. pluck("object->>'registrant_id'", "object_changes->>'registrant_id'", "children->>'tech_contacts'", "children->>'admin_contacts'", - "children->>'tech_contacts'", "children->>'admin_contacts'").flatten.uniq + "tech_contact_ids", "admin_contact_ids").flatten.uniq contact_ids = contact_ids.map{|id| case id when Hash diff --git a/spec/models/legal_documents_spec.rb b/spec/models/legal_documents_spec.rb index 46922f869..e411c923d 100644 --- a/spec/models/legal_documents_spec.rb +++ b/spec/models/legal_documents_spec.rb @@ -13,22 +13,27 @@ describe LegalDocument do domain = Fabricate(:domain) domain2 = Fabricate(:domain) - domains = [] - domains << original = domain.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) - domains << copy = domain.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) - domains << skipping_as_different_domain = domain2.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) - domains << skipping_as_different = domain.legal_documents.create!(body: Base64.encode64('D' * 4.kilobytes)) - domains << skipping_as_no_checksum = domain.legal_documents.create!(checksum: nil, body: Base64.encode64('S' * 4.kilobytes)) - domains << skipping_as_no_checksum2 = domain.legal_documents.create!(checksum: "", body: Base64.encode64('S' * 4.kilobytes)) - domains << registrant_copy = domain.registrant.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) - domains << registrant_skipping_as_different = domain.registrant.legal_documents.create!(body: Base64.encode64('Q' * 4.kilobytes)) - domains << tech_copy = domain.tech_contacts.first.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) - domains << tech_skipping_as_different = domain.tech_contacts.first.legal_documents.create!(body: Base64.encode64('W' * 4.kilobytes)) - domains << admin_copy = domain.admin_contacts.first.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) - domains << admin_skipping_as_different = domain.admin_contacts.first.legal_documents.create!(body: Base64.encode64('E' * 4.kilobytes)) + legals = [] + legals << original = domain.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) + legals << copy = domain.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) + legals << skipping_as_different_domain = domain2.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) + legals << skipping_as_different = domain.legal_documents.create!(body: Base64.encode64('D' * 4.kilobytes)) + legals << skipping_as_no_checksum = domain.legal_documents.create!(checksum: nil, body: Base64.encode64('S' * 4.kilobytes)) + legals << skipping_as_no_checksum2 = domain.legal_documents.create!(checksum: "", body: Base64.encode64('S' * 4.kilobytes)) + legals << registrant_copy = domain.registrant.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) + legals << registrant_skipping_as_different = domain.registrant.legal_documents.create!(body: Base64.encode64('Q' * 4.kilobytes)) + legals << tech_copy = domain.tech_contacts.first.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) + legals << tech_skipping_as_different = domain.tech_contacts.first.legal_documents.create!(body: Base64.encode64('W' * 4.kilobytes)) + legals << admin_copy = domain.admin_contacts.first.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) + legals << admin_skipping_as_different = domain.admin_contacts.first.legal_documents.create!(body: Base64.encode64('E' * 4.kilobytes)) + legals << new_second_tech_contact = domain2.tech_contacts.first.legal_documents.create!(body: Base64.encode64('S' * 4.kilobytes)) + domain.tech_contacts << domain2.tech_contacts.first + + # writing nesting to history domain.update(updated_at: Time.now) domain2.update(updated_at: Time.now) + domain.reload skipping_as_no_checksum.update_columns(checksum: nil) skipping_as_no_checksum2.update_columns(checksum: "") @@ -40,9 +45,12 @@ describe LegalDocument do skipping_as_no_checksum2.checksum.should == "" original.checksum.should == copy.checksum original.checksum.should_not == skipping_as_different.checksum + domain.tech_contacts.count.should == 2 LegalDocument.remove_duplicates - domains.each(&:reload) + LegalDocument.remove_duplicates + LegalDocument.remove_duplicates + legals.each(&:reload) skipping_as_no_checksum.path.should_not be(skipping_as_no_checksum2.path) original.path.should_not == skipping_as_different.path @@ -54,6 +62,9 @@ describe LegalDocument do original.path.should == registrant_copy.path original.path.should == tech_copy.path original.path.should == admin_copy.path + + original.path.should == new_second_tech_contact.path + skipping_as_different_domain.path.should_not == new_second_tech_contact.path end end From 5eb456a2107ea55bbb092324147f53d8136500aa Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Wed, 14 Dec 2016 16:26:49 +0200 Subject: [PATCH 19/20] removing migration number from structure as its not done in reality --- db/structure.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/db/structure.sql b/db/structure.sql index 3ce24e478..30e55f139 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -5275,8 +5275,6 @@ INSERT INTO schema_migrations (version) VALUES ('20160411140719'); INSERT INTO schema_migrations (version) VALUES ('20160414110443'); -INSERT INTO schema_migrations (version) VALUES ('20160421074023'); - INSERT INTO schema_migrations (version) VALUES ('20160429114732'); INSERT INTO schema_migrations (version) VALUES ('20160527110738'); From 01a352e437b58d536d8707889decb5f847472928 Mon Sep 17 00:00:00 2001 From: Vladimir Krylov Date: Wed, 14 Dec 2016 16:40:04 +0200 Subject: [PATCH 20/20] no need to save to filesystem if no body --- app/models/legal_document.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/legal_document.rb b/app/models/legal_document.rb index f6e15733e..0c08958ae 100644 --- a/app/models/legal_document.rb +++ b/app/models/legal_document.rb @@ -17,7 +17,7 @@ class LegalDocument < ActiveRecord::Base validate :val_body_length, if: ->(file){ file.path.blank? && !Rails.env.staging?} before_create :add_creator - before_save :save_to_filesystem + before_save :save_to_filesystem, if: :body def epp_code_map {