From 0487e0c95b9487e728a633781ae281948f14e98f Mon Sep 17 00:00:00 2001 From: Kyle Drake Date: Wed, 30 Dec 2015 17:52:47 -0600 Subject: [PATCH] spam training script --- Rakefile | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Rakefile b/Rakefile index 5cfc0cc9..9f9da7cc 100644 --- a/Rakefile +++ b/Rakefile @@ -302,3 +302,19 @@ task :train_classifier => [:environment] do end end end + +desc 'train_spam' +task :train_spam => [:environment] do + paths = File.read('./spam.txt') + + paths.split("\n").each do |path| + username, site_file_path = path.match(/^([a-zA-Z0-9_\-]+)\/(.+)$/i).captures + site = Site[username: username] + next if site.nil? + site_file = site.site_files_dataset.where(path: site_file_path).first + next if site_file.nil? + site.train site_file_path, :spam + site.ban! + puts "Deleted #{site_file_path}, banned #{site.username}" + end +end