Rake to dump list of screenshots for future ML research

This commit is contained in:
Kyle Drake 2020-02-01 21:17:47 -08:00
parent 225bf84946
commit a9e49d7cdd

View file

@ -582,5 +582,18 @@ task :generate_sitemap => [:environment] do
gz.write %{</sitemapindex>}
end
desc 'ml_screenshots_list_dump'
task :ml_screenshots_list_dump => [:environment] do
['phishing', 'spam', 'ham', nil].each do |classifier|
File.open("./files/screenshot-urls-#{classifier.to_s}.txt", 'w') do |fp|
SiteFile.where(classifier: classifier).where(path: 'index.html').each do |site_file|
begin
fp.write "#{site_file.site.screenshot_url('index.html', Site::SCREENSHOT_RESOLUTIONS.first)}\n"
rescue NoMethodError
end
end
end
end
end
end