From c42196e16d5feb6c40e492e8ddd7765dd139ad56 Mon Sep 17 00:00:00 2001 From: Kyle Drake Date: Fri, 1 Jan 2016 01:52:21 -0600 Subject: [PATCH] updates to classifier primer --- Rakefile | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Rakefile b/Rakefile index 9f9da7cc..49f89cfe 100644 --- a/Rakefile +++ b/Rakefile @@ -292,13 +292,16 @@ task :update_screenshots => [:environment] do end =end -desc 'train_classifier' -task :train_classifier => [:environment] do +desc 'prime_classifier' +task :prime_classifier => [:environment] do Site.select(:id, :username).where(is_banned: false, is_deleted: false).all.each do |site| + next if site.site_files_dataset.where(classifier: 'spam').count > 0 html_files = site.site_files_dataset.where(path: /\.html$/).all - html_files.each do |file| - site.train html_files.path + html_files.each do |html_file| + print "training #{site.username}/#{html_file.path}..." + site.train html_file.path + print "done.\n" end end end