require 'csv' require 'open-uri' require 'zip' require 'net/http' require 'uri' require 'optparse' require 'rake_option_parser_boilerplate' namespace :company_status do # bundle exec rake company_status:check_all -- --open_data_file_path=tmp/ettevotja_rekvisiidid__lihtandmed.csv --missing_companies_output_path=tmp/missing_companies_in_business_registry.csv --deleted_companies_output_path=tmp/deleted_companies_from_business_registry.csv --download_path=https://avaandmed.ariregister.rik.ee/sites/default/files/avaandmed/ettevotja_rekvisiidid__lihtandmed.csv.zip --soft_delete_enable=false --sleep_time=4 --registrants_only=true desc 'Get Estonian companies status from Business Registry.' DELETED_FROM_REGISTRY_STATUS = 'K' DESTINATION = Rails.root.join('tmp').to_s + '/' COMPANY_STATUS = 'ettevotja_staatus' BUSINESS_REGISTRY_CODE = 'ariregistri_kood' task :check_all => :environment do options = initialize_rake_task open_data_file_path = options[:open_data_file_path] missing_companies_in_business_registry_path = options[:missing_companies_output_path] deleted_companies_from_business_registry_path = options[:deleted_companies_output_path] download_path = options[:download_path] soft_delete_enable = options[:soft_delete_enable] downloaded_filename = File.basename(URI(download_path).path) are_registrants_only = options[:registrants_only] sleep_time = options[:sleep_time] puts "SOFT DELETE ENABLE: #{soft_delete_enable}" puts "*** Run 1 step. Downloading fresh open data file. ***" remove_old_file(DESTINATION + downloaded_filename) download_open_data_file(download_path, downloaded_filename) unzip_file(downloaded_filename, DESTINATION) puts "*** Run 2 step. I am collecting data from open business registry sources. ***" company_data = collect_company_data(open_data_file_path) puts "*** Run 3 step. I process companies, update their information, and sort them into different files based on whether the companies are missing or removed from the business registry ***" whitelisted_companies = ENV['whitelist_companies'].present? ? JSON.parse(ENV['whitelist_companies']) : [] contacts_query = Contact.where(ident_type: 'org', ident_country_code: 'EE') if are_registrants_only contacts_query = contacts_query.joins(:registrant_domains).distinct end unique_contacts = contacts_query.to_a.uniq(&:code) unique_contacts.each do |contact| next if whitelisted_companies.include?(contact.ident) status = company_data.fetch(contact.ident, {}).fetch(COMPANY_STATUS, 'K') update_company_status(contact: contact, status: status) puts "Company: #{contact.name} with ident: #{contact.ident} and ID: #{contact.id} has status: #{status}" puts "Contact domain: #{contact.registrant_domains.pluck(:name)}" if status == 'K' sort_companies_to_files( contact: contact, missing_companies_in_business_registry_path: missing_companies_in_business_registry_path, deleted_companies_from_business_registry_path: deleted_companies_from_business_registry_path, soft_delete_enable: soft_delete_enable, sleep_time: sleep_time ) end end puts '*** Done ***' end private def initialize_rake_task open_data_file_path = "#{DESTINATION}ettevotja_rekvisiidid__lihtandmed.csv" missing_companies_in_business_registry_path = "#{DESTINATION}missing_companies_in_business_registry.csv" deleted_companies_from_business_registry_path = "#{DESTINATION}deleted_companies_from_business_registry.csv" url = 'https://avaandmed.ariregister.rik.ee/sites/default/files/avaandmed/ettevotja_rekvisiidid__lihtandmed.csv.zip' options = { open_data_file_path: open_data_file_path, missing_companies_output_path: missing_companies_in_business_registry_path, deleted_companies_output_path: deleted_companies_from_business_registry_path, download_path: url, soft_delete_enable: false, registrants_only: false, sleep_time: 2, } banner = 'Usage: rake companies:check_all -- [options]' RakeOptionParserBoilerplate.process_args(options: options, banner: banner, hash: companies_opts_hash) end def companies_opts_hash { open_data_file_path: ['-o [OPEN_DATA_FILE_PATH]', '--open_data_file_path [DOMAIN_NAME]', String], missing_companies_output_path: ['-m [MISSING_COMPANIES_OUTPUT_PATH]', '--missing_companies_output_path [MISSING_COMPANIES_OUTPUT_PATH]', String], deleted_companies_output_path: ['-s [DELETED_COMPANIES_OUTPUT_PATH]', '--deleted_companies_output_path [DELETED_COMPANIES_OUTPUT_PATH]', String], download_path: ['-d [DOWNLOAD_PATH]', '--download_path [DOWNLOAD_PATH]', String], soft_delete_enable: ['-e [SOFT_DELETE_ENABLE]', '--soft_delete_enable [SOFT_DELETE_ENABLE]', FalseClass], registrants_only: ['-r', '--registrants_only [REGISTRANTS_ONLY]', FalseClass], sleep_time: ['-s', '--sleep_time [SLEEP_TIME]', Integer], } end def remove_old_file(output_file_path) FileUtils.rm(output_file_path) if File.exist?(output_file_path) end def unzip_file(filename, destination) Zip::File.open(filename) do |zip_file| zip_file.each do |entry| entry.extract(File.join(destination, entry.name)) { true } end end puts "Archive invoke to #{destination}" end def collect_company_data(open_data_file_path) company_data = {} CSV.foreach(open_data_file_path, headers: true, col_sep: ';', quote_char: '"', liberal_parsing: true) do |row| company_data[row[BUSINESS_REGISTRY_CODE]] = row end company_data end def download_open_data_file(url, filename) uri = URI(url) Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http| request = Net::HTTP::Get.new(uri) response = http.request(request) if response.code == '200' File.open(filename, 'wb') do |file| file.write(response.body) end else puts "Failed to download file: #{response.code} #{response.message}" end end puts "File saved as #{filename}" end def update_company_status(contact:, status:) contact.update(company_register_status: status, checked_company_at: Time.zone.now) end def put_company_to_missing_file(contact:, path:) write_to_csv_file(csv_file_path: path, headers: ["ID", "Code", "Ident", "Name", "Contact Type"], attrs: [contact.id, contact.code, contact.ident, contact.name, determine_contact_type(contact)]) end def sort_companies_to_files(contact:, missing_companies_in_business_registry_path:, deleted_companies_from_business_registry_path:, soft_delete_enable:, sleep_time:) sleep sleep_time puts "Sleeping for #{sleep_time} seconds" resp = contact.return_company_details if resp.empty? put_company_to_missing_file(contact: contact, path: missing_companies_in_business_registry_path) puts "Company: #{contact.name} with ident: #{contact.ident} and ID: #{contact.id} is missing in registry, company id: #{contact.id}" soft_delete_company(contact) if soft_delete_enable else status = resp.first.status.upcase kandeliik_type = resp.first.kandeliik.last.last.kandeliik kandeliik_tekstina = resp.first.kandeliik.last.last.kandeliik_tekstina kande_kpv = resp.first.kandeliik.last.last.kande_kpv if status == DELETED_FROM_REGISTRY_STATUS csv_file_path = deleted_companies_from_business_registry_path headers = ["ID", "Code", "Ident", "Name", "Status", "Kandeliik Type", "Kandeliik Tekstina", "kande_kpv", "Contact Type"] attrs = [contact.id, contact.code, contact.ident, contact.name, status, kandeliik_type, kandeliik_tekstina, kande_kpv, determine_contact_type(contact)] write_to_csv_file(csv_file_path: csv_file_path, headers: headers, attrs: attrs) puts "Company: #{contact.name} with ident: #{contact.ident} and ID: #{contact.id} has status #{status}, company id: #{contact.id}" soft_delete_company(contact) if soft_delete_enable end end end def determine_contact_type(contact) roles = [] roles << 'Registrant' if contact.registrant_domains.any? roles += contact.domain_contacts.pluck(:type).uniq if contact.domain_contacts.any? roles << 'Unknown' if roles.empty? roles.join(', ') end def soft_delete_company(contact) contact.registrant_domains.each do |domain| # next if domain.force_delete_scheduled? domain.schedule_force_delete(type: :soft, notify_by_email: true, reason: 'invalid_company', email: contact.email) puts "Soft delete process initiated for company: #{contact.name} with ID: #{contact.id} domain: #{domain.name}" end end def write_to_csv_file(csv_file_path:, headers:, attrs:) write_headers = !File.exist?(csv_file_path) begin CSV.open(csv_file_path, "ab", write_headers: write_headers, headers: headers) do |csv| csv << attrs end puts "Successfully wrote to CSV: #{csv_file_path}" rescue => e puts "Error writing to CSV: #{e.message}" end end end