From e183ac32f6708780fc528f4aeedcd644f04ef8f6 Mon Sep 17 00:00:00 2001 From: Kyle Drake Date: Tue, 2 Jul 2019 13:46:39 -0700 Subject: [PATCH] dumper task for ml image classification work --- Rakefile | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Rakefile b/Rakefile index c1926403..a8e0e62e 100644 --- a/Rakefile +++ b/Rakefile @@ -465,3 +465,17 @@ task :dedupe_site_blocks => [:environment] do duped_block.destroy end end + +desc 'ml_screenshots_list_dump' +task :ml_screenshots_list_dump => [:environment] do + ['phishing', 'spam', 'ham', nil].each do |classifier| + File.open("./files/screenshot-urls-#{classifier.to_s}.txt", 'w') do |fp| + SiteFile.where(classifier: classifier).where(path: 'index.html').each do |site_file| + begin + fp.write "#{site_file.site.screenshot_url('index.html', Site::SCREENSHOT_RESOLUTIONS.first)}\n" + rescue NoMethodError + end + end + end + end +end