diff --git a/app/models/legal_document.rb b/app/models/legal_document.rb index 98c1dad8d..5f9abffb7 100644 --- a/app/models/legal_document.rb +++ b/app/models/legal_document.rb @@ -32,18 +32,28 @@ class LegalDocument < ActiveRecord::Base def save_to_filesystem - loop do - rand = SecureRandom.random_number.to_s.last(4) - next if rand.to_i == 0 || rand.length < 4 - dir = "#{ENV['legal_documents_dir']}/#{Time.zone.now.strftime('%Y/%m/%d')}" - FileUtils.mkdir_p(dir, mode: 0775) - self.path = "#{dir}/#{Time.zone.now.to_formatted_s(:number)}_#{rand}.#{document_type}" - break unless File.file?(path) + digest = Digest::SHA1.new + ld = LegalDocument.where(checksum: digest.update(Base64.decode64(body))) + + if !ld + loop do + rand = SecureRandom.random_number.to_s.last(4) + next if rand.to_i == 0 || rand.length < 4 + + dir = "#{ENV['legal_documents_dir']}/#{Time.zone.now.strftime('%Y/%m/%d')}" + FileUtils.mkdir_p(dir, mode: 0775) + self.path = "#{dir}/#{Time.zone.now.to_formatted_s(:number)}_#{rand}.#{document_type}" + break unless File.file?(path) + end + + File.open(path, 'wb') { |f| f.write(Base64.decode64(body)) } unless Rails.env.test? + self.path = path + else + + self.path = ld.first.path + end - - File.open(path, 'wb') { |f| f.write(Base64.decode64(body)) } unless Rails.env.test? - self.path = path end def add_creator diff --git a/db/migrate/20160629114503_add_hash_to_legal_doc.rb b/db/migrate/20160629114503_add_hash_to_legal_doc.rb new file mode 100644 index 000000000..c79a5d13f --- /dev/null +++ b/db/migrate/20160629114503_add_hash_to_legal_doc.rb @@ -0,0 +1,5 @@ +class AddHashToLegalDoc < ActiveRecord::Migration + def change + add_column :legal_documents, :checksum, :text + end +end diff --git a/lib/tasks/legal_doc.rake b/lib/tasks/legal_doc.rake new file mode 100644 index 000000000..d1c8f003b --- /dev/null +++ b/lib/tasks/legal_doc.rake @@ -0,0 +1,60 @@ +namespace :legal_doc do + + desc 'Legal documents duplicates fix' + task all: :environment do + Rake::Task['legal_doc:generate_hash'].invoke + Rake::Task['legal_doc:remove_dublicates'].invoke + end + + desc 'Generate hash' + task generate_hash: :environment do + + start = Time.zone.now.to_f + puts '-----> Generating unique hash for legal documents' + count = 0 + + LegalDocument.find_each do |x| + + if File.exist?(x.path) && x.body_hash.blank? + digest = Digest::SHA1.new + digest.update File.binread(x.path) + x.checksum = digest.hexdigest + x.save + count += 1 + end + + end + puts "-----> Hash generated for #{count} rows in #{(Time.zone.now.to_f - start).round(2)} seconds" + end + + desc 'Remove duplicates' + task remove_dublicates: :environment do + + start = Time.zone.now.to_f + puts '-----> Removing legal documents duplicates' + count = 0 + modified = Array.new + + LegalDocument.find_each do |x| + if File.exist?(x.path) + + LegalDocument.where(checksum: x.checksum) do |y| + + if x.id != y.id && !modified.include?(x.id) + + File.delete(y.path) if File.exist?(y.path) + y.path = x.path + y.save + modified.push(y.id) + count += 1 + + end + end + end + end + puts "-----> Duplicates fixed for #{count} rows in #{(Time.zone.now.to_f - start).round(2)} seconds" + + end + +end +