Story#119627029 - uniq legal docs by new logic

This commit is contained in:
Vladimir Krylov 2016-08-08 13:11:01 +03:00
parent dd3f19c5e1
commit a671ac4696

View file

@ -23,6 +23,9 @@ namespace :legal_doc do
puts "-----> Hash generated for #{count} rows in #{(Time.zone.now.to_f - start).round(2)} seconds" puts "-----> Hash generated for #{count} rows in #{(Time.zone.now.to_f - start).round(2)} seconds"
end end
# Starting point is Domain legal docs
# then inside it checking the same domains and connected contacts
desc 'Remove duplicates' desc 'Remove duplicates'
task remove_duplicates: :environment do task remove_duplicates: :environment do
@ -31,18 +34,33 @@ namespace :legal_doc do
count = 0 count = 0
modified = Array.new modified = Array.new
LegalDocument.where.not(checksum: [nil, ""]).find_each do |x| LegalDocument.where(documentable_type: "Domain").where.not(checksum: [nil, ""]).find_each do |orig_legal|
next if modified.include?(x.checksum) next if modified.include?(orig_legal.checksum)
next if !File.exist?(x.path) next if !File.exist?(orig_legal.path)
modified.push(x.checksum) modified.push(orig_legal.checksum)
LegalDocument.where(checksum: x.checksum).where.not(id: x.id) do |y| LegalDocument.where(documentable_type: "Domain", documentable_id: orig_legal.documentable_id).
unless modified.include?(x.id) where(checksum: orig_legal.checksum).
File.delete(y.path) if File.exist?(y.path) where.not(id: orig_legal.id).each do |new_legal|
y.update(path: x.path) unless modified.include?(orig_legal.id)
count += 1 File.delete(new_legal.path) if File.exist?(new_legal.path)
end new_legal.update(path: orig_legal.path)
count += 1
end
end
contact_ids = DomainVersion.where(item_id: orig_legal.documentable_id).distinct.
pluck("object->>'registrar_id'", "object->'registrant_id'", "object_changes->'registrar_id'",
"object_changes->'registrant_id'", "children->'tech_contacts'", "children->'admin_contacts'").flatten.uniq
contact_ids = contact_ids.map{|id| id.is_a?(Hash) ? id["id"] : id}.compact.uniq
LegalDocument.where(documentable_type: "Contact", documentable_id: contact_ids).
where(checksum: orig_legal.checksum).each do |new_legal|
unless modified.include?(orig_legal.id)
File.delete(new_legal.path) if File.exist?(new_legal.path)
new_legal.update(path: orig_legal.path)
count += 1
end
end end
end end
puts "-----> Duplicates fixed for #{count} rows in #{(Time.zone.now.to_f - start).round(2)} seconds" puts "-----> Duplicates fixed for #{count} rows in #{(Time.zone.now.to_f - start).round(2)} seconds"