diff --git a/Dockerfile b/Dockerfile
index bbfac955e..5c27b4f0b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -62,6 +62,16 @@ RUN apt-get install -y --no-install-recommends > /dev/null \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
+# Install Python packages for wordcloud generation
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ python3-pip \
+ python3-setuptools \
+ python3-dev \
+ && pip3 install --upgrade pip setuptools wheel \
+ && pip3 install --no-cache-dir numpy Pillow matplotlib wordcloud openai dotenv \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
RUN apt-get autoremove -y && apt-get clean
ENV CHROME_VERSION="128.0.6613.137"
@@ -95,7 +105,6 @@ ENV PATH="/opt/chrome-linux64:${PATH}"
RUN ln -s /lib/ld-linux.so.2 /lib/ld-linux.so.2 || true
-# Обертка для wkhtmltopdf с xvfb
RUN echo '#!/bin/bash\nxvfb-run -a --server-args="-screen 0, 1024x768x24" /usr/bin/wkhtmltopdf "$@"' > /usr/local/bin/wkhtmltopdf \
&& chmod +x /usr/local/bin/wkhtmltopdf
diff --git a/Gemfile b/Gemfile
index 6f04e08d8..5bae52e19 100644
--- a/Gemfile
+++ b/Gemfile
@@ -113,3 +113,6 @@ gem 'net-ftp'
# https://stackoverflow.com/questions/79360526/uninitialized-constant-activesupportloggerthreadsafelevellogger-nameerror
gem 'concurrent-ruby', '1.3.4'
+
+# gives you access to stdin, stdout, and stderr when running other programs
+gem 'open3'
\ No newline at end of file
diff --git a/Gemfile.lock b/Gemfile.lock
index aaa2d9a94..9c7de730f 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -394,6 +394,7 @@ GEM
omniauth-rails_csrf_protection (0.1.2)
actionpack (>= 4.2)
omniauth (>= 1.3.1)
+ open3 (0.2.1)
openid_connect (1.4.2)
activemodel
attr_required (>= 1.0.0)
@@ -636,6 +637,7 @@ DEPENDENCIES
nokogiri (~> 1.16.0)
omniauth-rails_csrf_protection
omniauth-tara!
+ open3
openssl
paper_trail (~> 14.0)
pdfkit
diff --git a/app/controllers/admin/tools/wordcloud_controller.rb b/app/controllers/admin/tools/wordcloud_controller.rb
new file mode 100644
index 000000000..30d78a6d3
--- /dev/null
+++ b/app/controllers/admin/tools/wordcloud_controller.rb
@@ -0,0 +1,175 @@
+# frozen_string_literal: true
+
+module Admin
+ module Tools
+ # Controller for the admin wordcloud generator tool that creates visual representations
+ # of the most common words used in domain names
+ class WordcloudController < BaseController # rubocop:disable Metrics/ClassLength
+ WORDCLOUD_DIR = Rails.root.join('public', 'wordcloud')
+ WORDCLOUD_IMAGE_PATH = WORDCLOUD_DIR.join('wordcloud.png')
+ WORDCLOUD_CONFIG_PATH = WORDCLOUD_DIR.join('config.json')
+ TOP_WORDS_PATH = WORDCLOUD_DIR.join('top_words.txt')
+
+ before_action :authorize_admin
+ before_action :clear_cache, only: :create
+ before_action :ensure_wordcloud_dir, only: :create
+
+ def index
+ # Load configuration
+ @config = load_wordcloud_config
+
+ # Setup wordcloud data if image exists
+ if File.exist?(WORDCLOUD_IMAGE_PATH)
+ setup_wordcloud_data
+ else
+ @wordcloud_url = nil
+ end
+ end
+
+ def create
+ # Validate domains file
+ if params[:domains_file].present?
+ domains_file_path = process_uploaded_file(params[:domains_file])
+ return redirect_to admin_tools_wordcloud_path if domains_file_path.nil?
+ else
+ flash[:alert] = I18n.t('admin.tools.wordcloud_no_file')
+ return redirect_to admin_tools_wordcloud_path
+ end
+
+ # Collect and save configuration
+ config = build_config_from_params
+ File.write(WORDCLOUD_CONFIG_PATH, config.to_json)
+
+ # Start the background job
+ GenerateWordCloudJob.perform_later(domains_file_path.to_s, current_admin_user.id, config)
+ redirect_to progress_admin_tools_wordcloud_path
+
+ rescue StandardError => e
+ logger.error "Error starting wordcloud generation: #{e.message}"
+ flash[:alert] = "#{I18n.t('admin.tools.wordcloud_error')}: #{e.message}"
+ redirect_to admin_tools_wordcloud_path
+ end
+
+ # GET /admin/tools/wordcloud/progress
+ def progress
+ @progress_key = "wordcloud_progress:#{current_admin_user.id}"
+ @progress_data = Rails.cache.fetch(@progress_key) || { status: 'not_started', progress: 0 }
+ end
+
+ # GET /admin/tools/wordcloud/status
+ def status
+ progress_key = "wordcloud_progress:#{current_admin_user.id}"
+ progress_data = Rails.cache.fetch(progress_key) || { status: 'not_started', progress: 0 }
+
+ render json: progress_data
+ end
+
+ private
+
+ def ensure_wordcloud_dir
+ FileUtils.mkdir_p(WORDCLOUD_DIR) unless Dir.exist?(WORDCLOUD_DIR)
+ end
+
+ def process_uploaded_file(uploaded_file)
+ # Create a persistent copy of the uploaded file
+ persistent_file_path = Rails.root.join('tmp', "domains_#{Time.now.to_i}.csv")
+
+ # Copy the file content to a persistent location
+ FileUtils.cp(uploaded_file.tempfile.path, persistent_file_path)
+
+ # Validate file has content
+ if File.size(persistent_file_path).zero?
+ File.delete(persistent_file_path)
+ flash[:alert] = I18n.t('admin.tools.wordcloud_empty_file')
+ return nil
+ end
+
+ persistent_file_path
+ end
+
+ def build_config_from_params
+ # Base configuration
+ config = {
+ width: params[:width].presence || 800,
+ height: params[:height].presence || 800,
+ max_words: params[:max_words].presence || 500,
+ background_color: params[:background_color].presence || 'white',
+ min_word_length: params[:min_word_length].presence || 2,
+ include_numbers: params[:include_numbers] == '1',
+ batch_size: params[:batch_size].presence || 500,
+ additional_prompt: params[:additional_prompt].presence || nil
+ }
+
+ # Process additional stopwords
+ if params[:additional_stopwords].present?
+ stopwords = params[:additional_stopwords].downcase.split(/[\s,]+/).reject(&:empty?)
+ config[:additional_stopwords] = stopwords if stopwords.any?
+ end
+
+ # Process special terms
+ if params[:special_terms].present?
+ special_terms = params[:special_terms].split(/[\s,]+/).reject(&:empty?)
+ config[:special_terms] = special_terms if special_terms.any?
+ end
+
+ config
+ end
+
+ def load_wordcloud_config
+ if File.exist?(WORDCLOUD_CONFIG_PATH)
+ begin
+ JSON.parse(File.read(WORDCLOUD_CONFIG_PATH))
+ rescue JSON::ParserError
+ default_wordcloud_config
+ end
+ else
+ default_wordcloud_config
+ end
+ end
+
+ def setup_wordcloud_data
+ # Add timestamp to prevent caching
+ @wordcloud_url = "/wordcloud/wordcloud.png?t=#{File.mtime(WORDCLOUD_IMAGE_PATH).to_i}"
+
+ # Get the file's modification time and convert to application timezone
+ @wordcloud_generated_at = File.mtime(WORDCLOUD_IMAGE_PATH).in_time_zone(Time.zone)
+
+ # Load top words
+ load_top_words
+ end
+
+ def load_top_words
+ return unless File.exist?(TOP_WORDS_PATH)
+
+ @top_words = []
+ File.readlines(TOP_WORDS_PATH).each do |line|
+ if line =~ /^\d+\.\s+(\w+):\s+(\d+)$/
+ @top_words << [$1, $2.to_i]
+ end
+ end
+ end
+
+ def default_wordcloud_config
+ {
+ 'width' => 800,
+ 'height' => 800,
+ 'max_words' => 500,
+ 'background_color' => 'white',
+ 'additional_stopwords' => [],
+ 'include_numbers' => true,
+ 'min_word_length' => 2,
+ 'special_terms' => ['e-', 'i-', '2-', '3-', '4-', '.com', 'tr.ee', 'ai', 'web'],
+ 'batch_size' => 500
+ }
+ end
+
+ def authorize_admin
+ authorize! :access, :tools
+ end
+
+ def clear_cache
+ Rails.cache.delete("wordcloud_progress:#{current_admin_user.id}")
+ end
+ end
+ end
+end
diff --git a/app/controllers/admin/tools_controller.rb b/app/controllers/admin/tools_controller.rb
new file mode 100644
index 000000000..afc2891ac
--- /dev/null
+++ b/app/controllers/admin/tools_controller.rb
@@ -0,0 +1,14 @@
+module Admin
+ class ToolsController < BaseController
+ before_action :authorize_admin
+
+ # GET /admin/tools
+ def index; end
+
+ private
+
+ def authorize_admin
+ authorize! :access, :tools
+ end
+ end
+end
diff --git a/app/jobs/generate_word_cloud_job.rb b/app/jobs/generate_word_cloud_job.rb
new file mode 100644
index 000000000..618d63404
--- /dev/null
+++ b/app/jobs/generate_word_cloud_job.rb
@@ -0,0 +1,119 @@
+# Use Open3 to capture output in real-time
+require 'open3'
+
+# Background job that generates a wordcloud image from domain names
+# using an external Python script with progress tracking
+class GenerateWordCloudJob < ApplicationJob
+ def perform(domains_file_path, user_id, config = {})
+
+ Rails.logger.info("Generating wordcloud for #{domains_file_path}")
+
+ @domains_file_path = domains_file_path
+ @user_id = user_id
+ @config = config
+ @progress_key = "wordcloud_progress:#{user_id}"
+ @wordcloud_dir = Rails.root.join('public', 'wordcloud')
+ @config_file_path = nil
+
+ initialize_progress
+
+ begin
+ setup_environment
+ run_wordcloud_script
+ rescue StandardError => e
+ handle_error(e)
+ ensure
+ cleanup
+ end
+ end
+
+ private
+
+ def initialize_progress
+ Rails.cache.write(@progress_key, { status: 'processing', progress: 0 })
+ end
+
+ def setup_environment
+ # Ensure the wordcloud directory exists
+ FileUtils.mkdir_p(@wordcloud_dir) unless Dir.exist?(@wordcloud_dir)
+
+ # Create a config file for the Python script
+ @config_file_path = Rails.root.join(@wordcloud_dir, "wordcloud_config_#{Time.now.to_i}.json")
+ File.write(@config_file_path, @config.to_json)
+
+ # Setup Python script
+ @script_path = Rails.root.join('lib', 'wordcloud', 'generate_wordcloud.py')
+ FileUtils.chmod('+x', @script_path) unless File.executable?(@script_path)
+ end
+
+ def run_wordcloud_script
+ python_executable = ENV.fetch('PYTHON_EXECUTABLE', 'python3')
+ env = { 'PYTHONIOENCODING' => 'utf-8', 'PYTHONUNBUFFERED' => '1' }
+
+ Open3.popen2e(env, python_executable, @script_path.to_s, @domains_file_path,
+ @wordcloud_dir.to_s, @config_file_path.to_s) do |stdin, stdout_err, wait_thr|
+ stdin.close
+ process_script_output(stdout_err, wait_thr)
+ end
+ end
+
+ def process_script_output(stdout_err, wait_thr)
+ # Process output line by line
+ while line = stdout_err.gets
+ update_progress_from_output(line)
+ Rails.logger.info("WordCloud: #{line.strip}")
+ end
+
+ # Process exit status
+ handle_exit_status(wait_thr.value)
+ end
+
+ def update_progress_from_output(line)
+ case line
+ when %r{Processing batch (\d+)/(\d+)}
+ current, total = $1.to_i, $2.to_i
+ progress = ((current.to_f / total) * 80).round
+ update_progress(progress)
+ when /Total estimated cost/
+ update_progress(80)
+ when /Generating word cloud/
+ update_progress(90)
+ end
+ end
+
+ def update_progress(value, status: 'processing')
+ Rails.cache.write(@progress_key, { status: status, progress: value })
+ end
+
+ def handle_exit_status(exit_status)
+ if exit_status.success?
+ update_progress(100, status: 'completed')
+ else
+ Rails.cache.write(
+ @progress_key,
+ {
+ status: 'failed',
+ progress: 0,
+ error: "Process failed with status #{exit_status.exitstatus}"
+ }
+ )
+ end
+ end
+
+ def handle_error(exception)
+ Rails.logger.error("Error in WordCloud job: #{exception.message}")
+ Rails.logger.error(exception.backtrace.join("\n"))
+ Rails.cache.write(
+ @progress_key,
+ {
+ status: 'failed',
+ progress: 0,
+ error: exception.message
+ }
+ )
+ end
+
+ def cleanup
+ File.delete(@config_file_path) if @config_file_path && File.exist?(@config_file_path)
+ end
+end
diff --git a/app/models/ability.rb b/app/models/ability.rb
index ba6971dcf..eeec470f2 100644
--- a/app/models/ability.rb
+++ b/app/models/ability.rb
@@ -121,6 +121,7 @@ class Ability
can :destroy, :pending
can :create, :zonefile
can :access, :settings_menu
+ can :access, :tools
can :manage, :mass_actions
can :manage, BouncedMailAddress
end
diff --git a/app/views/admin/base/_menu.haml b/app/views/admin/base/_menu.haml
index f1e855742..73b4daf77 100644
--- a/app/views/admin/base/_menu.haml
+++ b/app/views/admin/base/_menu.haml
@@ -6,6 +6,8 @@
%li= link_to t(:contacts), admin_contacts_path
- if can? :show, Registrar
%li= link_to t(:registrars), admin_registrars_path
+ - if can?(:access, :tools)
+ %li= link_to t(:tools), admin_tools_path
- if can?(:access, :settings_menu)
%li.dropdown
%a.dropdown-toggle{"data-toggle" => "dropdown", href: "#"}
diff --git a/app/views/admin/tools/index.html.erb b/app/views/admin/tools/index.html.erb
new file mode 100644
index 000000000..1b111132a
--- /dev/null
+++ b/app/views/admin/tools/index.html.erb
@@ -0,0 +1,23 @@
+<%= render "shared/title", name: t('admin.tools.title') %>
+
+
+
+
+
+
<%= t('admin.tools.available_tools') %>
+
+
+
+
+
+
<%= t('admin.tools.wordcloud_generator') %>
+
<%= t('admin.tools.wordcloud_generator_description') %>
+ <%= link_to t('admin.tools.generate_wordcloud'), admin_tools_wordcloud_path, class: 'btn btn-primary' %>
+
+
+
+
+
+
+
+
diff --git a/app/views/admin/tools/wordcloud/_form.html.erb b/app/views/admin/tools/wordcloud/_form.html.erb
new file mode 100644
index 000000000..ea56f726d
--- /dev/null
+++ b/app/views/admin/tools/wordcloud/_form.html.erb
@@ -0,0 +1,114 @@
+<%= form_tag admin_tools_wordcloud_path, method: :post, multipart: true do %>
+
+
+
+
+
+
+
+
+
+ <%= label_tag :width, t('admin.tools.wordcloud.width') %>
+ <%= number_field_tag :width, @config['width'], min: 400, max: 2000, step: 100, class: 'form-control' %>
+
+
+
+
+ <%= label_tag :height, t('admin.tools.wordcloud.height') %>
+ <%= number_field_tag :height, @config['height'], min: 400, max: 2000, step: 100, class: 'form-control' %>
+
+
+
+
+ <%= label_tag :max_words, t('admin.tools.wordcloud.max_words') %>
+ <%= number_field_tag :max_words, @config['max_words'], min: 100, max: 1000, step: 50, class: 'form-control' %>
+
+
+
+
+
+
+
+ <%= label_tag :batch_size, t('admin.tools.wordcloud.batch_size') %>
+ <%= number_field_tag :batch_size, @config['batch_size'], min: 100, max: 1000, step: 50, class: 'form-control' %>
+ <%= t('admin.tools.wordcloud.batch_size_help') %>
+
+
+
+
+ <%= label_tag :background_color, t('admin.tools.wordcloud.background') %>
+ <%= select_tag :background_color,
+ options_for_select([
+ ['White', 'white'],
+ ['Black', 'black'],
+ ['Transparent', 'transparent'],
+ ['Light Gray', '#f0f0f0']
+ ], @config['background_color']),
+ class: 'form-control' %>
+
+
+
+
+
+
+
+ <%= label_tag :min_word_length, t('admin.tools.wordcloud.min_word_length') %>
+ <%= number_field_tag :min_word_length, @config['min_word_length'], min: 1, max: 5, class: 'form-control' %>
+
+
+
+
+
+
+ <%= label_tag :special_terms, t('admin.tools.wordcloud.special_terms') %>
+ <%= text_field_tag :special_terms, @config['special_terms'].is_a?(Array) ? @config['special_terms'].join(', ') : '',
+ class: 'form-control',
+ placeholder: t('admin.tools.wordcloud.special_terms_placeholder') %>
+ <%= t('admin.tools.wordcloud.special_terms_help') %>
+
+
+
+ <%= label_tag :additional_stopwords, t('admin.tools.wordcloud.additional_stopwords') %>
+ <%= text_area_tag :additional_stopwords, @config['additional_stopwords'].is_a?(Array) ? @config['additional_stopwords'].join(', ') : '',
+ rows: 3,
+ placeholder: t('admin.tools.wordcloud.stopwords_placeholder'),
+ class: 'form-control' %>
+ <%= t('admin.tools.wordcloud.stopwords_help') %>
+
+
+
+ <%= label_tag :additional_prompt, t('admin.tools.wordcloud.additional_prompt') %>
+ <%= text_area_tag :additional_prompt, @config['additional_prompt'], class: "form-control", rows: 3,
+ placeholder: t('admin.tools.wordcloud.additional_prompt_placeholder') %>
+ <%= t('admin.tools.wordcloud.additional_prompt_help') %>
+
+
+
+
+
+ <%= submit_tag t('admin.tools.generate_wordcloud'), class: 'btn btn-primary btn-lg mt-3' %>
+<% end %>
\ No newline at end of file
diff --git a/app/views/admin/tools/wordcloud/index.html.erb b/app/views/admin/tools/wordcloud/index.html.erb
new file mode 100644
index 000000000..083c1b324
--- /dev/null
+++ b/app/views/admin/tools/wordcloud/index.html.erb
@@ -0,0 +1,92 @@
+<% content_for :actions do %>
+ <%= link_to t('back'), admin_tools_path, class: 'btn btn-default' %>
+<% end %>
+<%= render "shared/title", name: t('admin.tools.wordcloud.title') %>
+
+
+
+
+
+
+
+
+
+
<%= t('admin.tools.wordcloud.title') %>
+
+
+ <% if @wordcloud_url %>
+
+ <%= link_to @wordcloud_url, target: "_blank", title: t('admin.tools.wordcloud.view_full_size') do %>
+ <%= image_tag @wordcloud_url, class: 'img-responsive', alt: t('admin.tools.wordcloud.title') %>
+
+ <%= t('admin.tools.wordcloud.click_to_enlarge') %>
+
+ <% end %>
+
+ <% if @wordcloud_generated_at %>
+
+ <%= t('admin.tools.wordcloud.generated_at', time: l(@wordcloud_generated_at, format: :long)) %>
+
+ <% end %>
+
+ <% end %>
+
+
<%= t('admin.tools.wordcloud.instructions') %>
+
+ <%= render 'admin/tools/wordcloud/form' %>
+
+
+
+
+
+
+
+
<%= t('admin.tools.wordcloud.top_words') %>
+
+
+ <% if @top_words && @top_words.any? %>
+
+ <% @top_words.each do |word, count| %>
+ - <%= word %>: <%= count %>
+ <% end %>
+
+ <% else %>
+
<%= t('admin.tools.wordcloud.top_words_empty') %>
+ <% end %>
+
+
+
+
+
+
\ No newline at end of file
diff --git a/app/views/admin/tools/wordcloud/progress.html.erb b/app/views/admin/tools/wordcloud/progress.html.erb
new file mode 100644
index 000000000..2849b1794
--- /dev/null
+++ b/app/views/admin/tools/wordcloud/progress.html.erb
@@ -0,0 +1,93 @@
+
+
+
+
WordCloud Generation Progress
+
+
+
+
+
+ <%= @progress_data[:progress] %>%
+
+
+
+
+ <% case @progress_data[:status] %>
+ <% when 'not_started' %>
+
Waiting to start processing...
+ <% when 'processing' %>
+
Processing in progress...
+ <% when 'completed' %>
+
+ WordCloud generation completed!
+ <%= link_to "View WordCloud", admin_tools_wordcloud_path, class: "btn btn-primary" %>
+
+ <% when 'failed' %>
+
+ Error: <%= @progress_data[:error] || "Unknown error occurred" %>
+
+ <% end %>
+
+
+
+ <%= link_to "Back to Tools", admin_tools_path, class: "btn btn-secondary" %>
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/config/environments/production.rb b/config/environments/production.rb
index 2825185d0..e3f6591d6 100644
--- a/config/environments/production.rb
+++ b/config/environments/production.rb
@@ -54,6 +54,10 @@ Rails.application.configure do
# Use a different cache store in production.
# config.cache_store = :mem_cache_store
+ config.cache_store = :redis_cache_store, {
+ url: "#{ENV.fetch('REDIS_URL', 'redis://localhost:6379')}/1",
+ expires_in: 300.seconds
+ }
# Use a real queuing backend for Active Job (and separate queues per environment)
config.active_job.queue_adapter = :sidekiq
diff --git a/config/initializers/sidekiq.rb b/config/initializers/sidekiq.rb
index b2d067a2f..33392bcb8 100644
--- a/config/initializers/sidekiq.rb
+++ b/config/initializers/sidekiq.rb
@@ -2,7 +2,7 @@ require 'sidekiq/web' # Require at the top of the initializer
Sidekiq.configure_server do |config|
config.logger.level = Logger::INFO
-
+
# Custom job logging format
Sidekiq.logger.formatter = proc do |severity, datetime, progname, msg|
thread_id = Thread.current.object_id.to_s(36)
diff --git a/config/locales/admin/tools.en.yml b/config/locales/admin/tools.en.yml
new file mode 100644
index 000000000..073be1b6c
--- /dev/null
+++ b/config/locales/admin/tools.en.yml
@@ -0,0 +1,42 @@
+en:
+ admin:
+ tools:
+ title: "Administrative Tools"
+ available_tools: "Available Tools"
+ wordcloud_generator: "Domain Name Wordcloud Generator"
+ wordcloud_generator_description: "Generate a visual wordcloud from domain names in the registry"
+ generate_wordcloud: "Generate Wordcloud"
+ wordcloud:
+ title: "Domain Name Wordcloud"
+ success: "Wordcloud generated successfully"
+ error: "Error generating wordcloud"
+ processing: "Processing domain names. This may take a few minutes..."
+ instructions: "Generate a visual representation of the most common words in domain names. Click the button below to create the wordcloud."
+ top_words: "Top Words"
+ top_words_empty: "Generate a wordcloud to see the most frequent words."
+ click_to_enlarge: "Click to enlarge"
+ view_full_size: "View full size wordcloud image"
+ use_custom_domains: "Use custom domain list"
+ custom_file_description: "Upload a CSV file with one domain name per line"
+ file_upload_error: "Error processing uploaded file"
+ file_optional: "If no file is uploaded, all active domains in the registry will be used"
+ generated_at: "Generated at %{time}"
+ no_file: "No domain names found"
+ width: "Width"
+ height: "Height"
+ max_words: "Max Words"
+ background: "Background"
+ additional_stopwords: "Stopwords"
+ stopwords_placeholder: "Enter additional stopwords, one per line"
+ stopwords_help: "Stopwords are words that will not be included in the wordcloud"
+ advanced_options: "Advanced Options"
+ min_word_length: "Min Word Length"
+ include_numbers: "Include Numbers"
+ special_terms: "Special Terms"
+ special_terms_placeholder: "e.g., e-, i-, .com, ai, web"
+ special_terms_help: "These terms will be preserved in the word cloud even if they would normally be filtered out"
+ batch_size: "Batch Size"
+ batch_size_help: "Number of domains to process in each API call."
+ additional_prompt: "Additional Prompt Text"
+ additional_prompt_placeholder: "Add any additional instructions for the word cloud generation here..."
+ additional_prompt_help: "Optional text that will be used as additional context during word cloud generation."
diff --git a/config/locales/en.yml b/config/locales/en.yml
index fe98336f3..82e131a22 100644
--- a/config/locales/en.yml
+++ b/config/locales/en.yml
@@ -229,6 +229,7 @@ en:
valid_from: 'Valid from'
general: 'General'
contacts: 'Contacts'
+ tools: 'Tools'
identity_code: 'Identity code'
nameservers: 'Nameservers'
hostname: 'Hostname'
diff --git a/config/routes.rb b/config/routes.rb
index 50e72511b..aca21c4f2 100644
--- a/config/routes.rb
+++ b/config/routes.rb
@@ -250,6 +250,17 @@ Rails.application.routes.draw do
end
# post 'admi/upload_spreadsheet', to: 'customers#upload_spreadsheet', as: :customers_upload_spreadsheet
+ resources :tools, only: %i[index]
+
+ namespace :tools do
+ resource :wordcloud, controller: 'wordcloud', only: %i[create] do
+ collection do
+ get '', to: 'wordcloud#index'
+ get 'progress', to: 'wordcloud#progress', as: :progress
+ get 'status', to: 'wordcloud#status', as: :status
+ end
+ end
+ end
resources :bank_statements do
resources :bank_transactions
diff --git a/lib/wordcloud/fonts/Pacifico-Regular.ttf b/lib/wordcloud/fonts/Pacifico-Regular.ttf
new file mode 100644
index 000000000..e7def95d3
Binary files /dev/null and b/lib/wordcloud/fonts/Pacifico-Regular.ttf differ
diff --git a/lib/wordcloud/generate_wordcloud.py b/lib/wordcloud/generate_wordcloud.py
new file mode 100644
index 000000000..e9eaf1ea2
--- /dev/null
+++ b/lib/wordcloud/generate_wordcloud.py
@@ -0,0 +1,351 @@
+#!/usr/bin/env python3
+
+import os
+import re
+import sys
+import json
+import random
+import asyncio
+import numpy as np
+from PIL import Image
+from os import path
+from wordcloud import WordCloud, STOPWORDS
+from openai import AsyncOpenAI
+import matplotlib.pyplot as plt
+# import pandas as pd
+from dotenv import load_dotenv
+load_dotenv()
+
+BATCH_SIZE = int(os.environ.get("OPENAI_BATCH_SIZE", "20"))
+
+def load_system_prompt():
+ """Loads system prompt from system_prompt.md file"""
+ prompt_file = path.join(path.dirname(__file__), 'system_prompt.md')
+
+ if not path.exists(prompt_file):
+ raise FileNotFoundError(f"System prompt not found at {prompt_file}. Please create the file.")
+
+ with open(prompt_file, 'r', encoding='utf-8') as f:
+ system_prompt = f.read()
+
+ return system_prompt
+
+d = path.dirname(__file__) if "__file__" in locals() else os.getcwd()
+
+output_dir = sys.argv[2] if len(sys.argv) > 2 else d
+
+try:
+ SYSTEM_PROMPT = load_system_prompt()
+ print("System prompt successfully loaded from file.")
+except FileNotFoundError as e:
+ print(f"Error: {e}")
+ sys.exit(1)
+
+# Load configuration if provided
+config = {}
+if len(sys.argv) > 3 and sys.argv[3]:
+ config_file = sys.argv[3]
+ if path.exists(config_file):
+ with open(config_file, 'r') as f:
+ config = json.load(f)
+ print(f"Loaded configuration: {config}")
+
+# Check if domains file path is provided and exists
+if len(sys.argv) > 1 and sys.argv[1]:
+ domains_file = sys.argv[1]
+ if not path.exists(domains_file):
+ print(f"Error: Provided domains file {domains_file} not found")
+ sys.exit(1)
+else:
+ print(f"Error: Domains file not found")
+ sys.exit(1)
+
+# Read domain names from the file
+with open(domains_file, 'r', encoding='utf-8') as f:
+ domain_names = [line.strip().lower() for line in f if line.strip()]
+
+if not domain_names:
+ print("Error: No domain names found in the provided file")
+ sys.exit(1)
+
+
+# Function to extract words using OpenAI API asynchronously
+async def extract_words_with_openai(domain_names, batch_size=BATCH_SIZE):
+ filtered_domains = []
+
+ # Filter out domains that are only numbers
+ for domain in domain_names:
+ domain_core = domain.lower().replace('www.', '')
+ main_part = domain_core.split('.')[0]
+ if not main_part.isdigit():
+ filtered_domains.append(domain)
+
+
+ # Get API key from environment variable
+ api_key = os.environ.get("OPENAI_API_KEY")
+ if not api_key:
+ raise ValueError("OpenAI API key not found. Set the OPENAI_API_KEY environment variable.")
+
+ # Initialize AsyncOpenAI client
+ client = AsyncOpenAI(api_key=api_key)
+
+ # Get model and temperature from environment variables
+ model = os.environ.get("OPENAI_MODEL", "gpt-4o-2024-11-20")
+ temperature = float(os.environ.get("OPENAI_TEMPERATURE", "0"))
+ max_tokens = int(os.environ.get("OPENAI_MAX_TOKENS", "16000"))
+
+ # Process domains in batches
+ all_words = []
+ total_prompt_tokens = 0
+ total_completion_tokens = 0
+ total_cost = 0
+
+ # Calculate number of batches
+ num_batches = (len(filtered_domains) + batch_size - 1) // batch_size
+
+ # Create semaphore to limit concurrent requests
+ semaphore = asyncio.Semaphore(10) # Limit to 10 concurrent requests
+
+ async def process_batch(batch_idx):
+ async with semaphore:
+ start_idx = batch_idx * batch_size
+ end_idx = min(start_idx + batch_size, len(filtered_domains))
+ batch = filtered_domains[start_idx:end_idx]
+
+ print(f"Processing batch {batch_idx + 1}/{num_batches} ({len(batch)} domains)...")
+ sys.stdout.flush()
+
+ # Prepare the prompt with domain names and special terms
+ domains_text = "\n".join(batch)
+ prompt = f"List of domain names: {domains_text}"
+
+ # Make the API call
+ try:
+ print(f"Using model: {model} with temperature: {temperature}")
+ response = await client.chat.completions.create(
+ model=model,
+ messages=[
+ {"role": "system", "content": SYSTEM_PROMPT},
+ {"role": "user", "content": prompt}
+ ],
+ response_format={
+ "type": "json_schema",
+ "json_schema": {
+ "name": "domain_analysis_results",
+ "strict": True,
+ "schema": {
+ "type": "object",
+ "properties": {
+ "results": {
+ "type": "array",
+ "description": "A list of analysis results for the provided domains.",
+ "items": {
+ "type": "object",
+ "properties": {
+ "Language": {
+ "type": "string",
+ "description": "The language identified in the domain name."
+ },
+ "is_splitted": {
+ "type": "string",
+ "description": "Indicates whether the domain name is split into recognizable words."
+ },
+ "reasoning": {
+ "type": "string",
+ "description": "Explanation of the reasoning behind the language and word identification."
+ },
+ "words": {
+ "type": "array",
+ "description": "The words identified in the domain name.",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "required": [
+ "Language",
+ "is_splitted",
+ "reasoning",
+ "words"
+ ],
+ "additionalProperties": False
+ }
+ }
+ },
+ "required": [
+ "results"
+ ],
+ "additionalProperties": False
+ }
+ }
+ },
+ temperature=temperature,
+ max_tokens=max_tokens,
+ )
+
+ # Track token usage
+ prompt_tokens = response.usage.prompt_tokens
+ completion_tokens = response.usage.completion_tokens
+ total_tokens = response.usage.total_tokens
+
+ nonlocal total_prompt_tokens, total_completion_tokens
+ total_prompt_tokens += prompt_tokens
+ total_completion_tokens += completion_tokens
+
+ print(f"Token usage - Prompt: {prompt_tokens}, Completion: {completion_tokens}, Total: {total_tokens}")
+
+ # Calculate cost (approximate, based on current pricing)
+ if "gpt-4.1" in model:
+ prompt_cost = (prompt_tokens / 1000000) * 2.00 # $2.00 per 1M tokens for GPT-4.1 input
+ completion_cost = (completion_tokens / 1000000) * 8.00 # $8.00 per 1M tokens for GPT-4.1 output
+ else:
+ prompt_cost = 0
+ completion_cost = 0
+
+ batch_cost = prompt_cost + completion_cost
+ nonlocal total_cost
+ total_cost += batch_cost
+ print(f"Estimated batch cost: ${batch_cost:.6f}")
+
+ # Extract the words from the response
+ response_json = json.loads(response.choices[0].message.content)
+ batch_words = []
+ for result in response_json['results']:
+ if result['Language'] == 'Ignore':
+ continue
+ batch_words.extend(result['words'])
+
+ print(f"Extracted {len(batch_words)} words from this batch")
+ return batch_words
+
+ except Exception as e:
+ print(f"Error calling OpenAI API for batch: {e}")
+ return []
+
+ # Create tasks for each batch
+ tasks = []
+ for batch_idx in range(num_batches):
+ tasks.append(process_batch(batch_idx))
+
+ # Run all tasks concurrently and wait for results
+ batch_results = await asyncio.gather(*tasks)
+
+ # Combine all words from all batches
+ for batch_words in batch_results:
+ all_words.extend(batch_words)
+
+ print(f"Total token usage - Prompt: {total_prompt_tokens}, Completion: {total_completion_tokens}")
+ print(f"Total estimated cost: ${total_cost:.6f}")
+
+ return all_words
+
+# Replace the synchronous call with an async function
+async def main():
+ # Process domain names using OpenAI
+ print("Extracting words from domain names using OpenAI...")
+ extracted_words = await extract_words_with_openai(domain_names)
+ print(f"Extracted {len(extracted_words)} words")
+
+ # Join the extracted words for the word cloud
+ processed_text = ' '.join(extracted_words)
+
+ def custom_color_func(word, font_size, position, orientation, random_state=None,
+ **kwargs):
+ return "hsl(215, 100%%, %d%%)" % random.randint(15, 80)
+
+ mask = np.array(Image.open(path.join(d, 'mask.png')))
+
+ # Get configuration values with defaults
+ width = int(config.get('width', 800))
+ height = int(config.get('height', 800))
+ max_words = int(config.get('max_words', 500))
+ background_color = config.get('background_color', 'white')
+ min_word_length = int(config.get('min_word_length', 2))
+ include_numbers = config.get('include_numbers', True)
+
+ # Handle transparent background
+ if background_color == 'transparent':
+ background_color = None
+
+ # Get additional stopwords
+ additional_stopwords = config.get('additional_stopwords', [])
+
+ stopwords = set(STOPWORDS)
+ stopwords = {
+ 'ja', 'ning', 'et', 'kui', 'aga', 'ka', 'ei', 'see', 'on', 'ole',
+ 'oma', 'seda', 'siis', 'või', 'mis', 'nii', 'veel', 'kes', 'üle',
+ 'välja', 'olema', 'kus', 'nagu', 'kuid', 'selle', 'pole', 'ära',
+ 'vaid', 'sest', 'juba', 'meie', 'mida', 'need', 'olid', 'minu',
+ 'tema', 'pärast', 'mingi', 'palju', 'kõik', 'seal', 'olen', 'oled',
+ 'oli', 'olnud', 'ongi', 'poolt', 'meil', 'teda', 'just', 'kuna',
+ 'läbi', 'küll',
+ 'the', 'and', 'a', 'to', 'of', 'in', 'is', 'that', 'it', 'for',
+ 'with', 'as', 'be', 'on', 'not', 'this', 'but', 'by', 'from', 'are',
+ 'or', 'an', 'at', 'was', 'have', 'has', 'had', 'were', 'will', 'would',
+ 'should', 'can', 'could', 'may', 'might', 'must', 'do', 'does', 'did',
+ 'doing', 'done', 'their', 'they', 'them', 'there', 'these', 'those',
+ 'which', 'who', 'whom', 'whose', 'what', 'when', 'where', 'why', 'how'
+ }
+
+ stopwords.update(stopwords)
+ stopwords.update(additional_stopwords)
+
+ font_path = path.join(d, 'fonts', 'Pacifico-Regular.ttf')
+ # Alternative: use a system font
+ # font_path = fm.findfont(fm.FontProperties(family='Arial'))
+
+ print("Generating word cloud...")
+ wc = WordCloud(width=width, height=height,
+ mask=mask,
+ stopwords=stopwords,
+ background_color=background_color,
+ max_words=max_words,
+ include_numbers=include_numbers,
+ collocations=False,
+ min_word_length=min_word_length,
+ regexp=r"[A-Za-zÕÄÖÜõäöü0-9][\w\-'ÕÄÖÜõäöü]*(?