diff --git a/Gemfile b/Gemfile index b46b07d0..4e0890c0 100644 --- a/Gemfile +++ b/Gemfile @@ -33,7 +33,6 @@ gem 'dnsruby' gem 'base32' gem 'coveralls', require: false gem 'sanitize' -gem 'linnaeus', git: 'https://github.com/neocities/linnaeus.git', branch: 'soften_deps_and_perf' gem 'will_paginate' platform :mri, :rbx do diff --git a/environment.rb b/environment.rb index d3a35a9e..36b425d0 100644 --- a/environment.rb +++ b/environment.rb @@ -133,6 +133,3 @@ $country_codes = {} CSV.foreach("./files/country_codes.csv") do |row| $country_codes[row.last] = row.first end - -$classifier = Linnaeus::Classifier.new redis_db: 1 -$trainer = Linnaeus::Trainer.new redis_db: 1 diff --git a/models/site.rb b/models/site.rb index 3e48c353..54920fa3 100644 --- a/models/site.rb +++ b/models/site.rb @@ -1247,17 +1247,17 @@ class Site < Sequel::Model def classify(path) return nil unless classification_allowed? path - $classifier.classify process_for_classification(path) + #$classifier.classify process_for_classification(path) end def classification_scores(path) return nil unless classification_allowed? path - $classifier.classification_scores process_for_classification(path) + #$classifier.classification_scores process_for_classification(path) end def train(path, category='ham') return nil unless classification_allowed? path - $trainer.train(category, process_for_classification(path)) + # $trainer.train(category, process_for_classification(path)) site_file = site_files_dataset.where(path: path).first site_file.classifier = category site_file.save_changes validate: false @@ -1265,7 +1265,7 @@ class Site < Sequel::Model def untrain(path, category='ham') return nil unless classification_allowed? path - $trainer.untrain(category, process_for_classification(path)) + # $trainer.untrain(category, process_for_classification(path)) site_file = site_files_dataset.where(path: path).first site_file.classifier = category site_file.save_changes validate: false diff --git a/models/site_file.rb b/models/site_file.rb index fe4c8343..bf81c5e3 100644 --- a/models/site_file.rb +++ b/models/site_file.rb @@ -1,5 +1,4 @@ require 'sanitize' -require 'linnaeus' class SiteFile < Sequel::Model CLASSIFIER_LIMIT = 1_000_000.freeze diff --git a/tests/site_file_tests.rb b/tests/site_file_tests.rb index a303aa12..fbf6ccff 100644 --- a/tests/site_file_tests.rb +++ b/tests/site_file_tests.rb @@ -332,9 +332,10 @@ describe 'site_files' do describe 'classification' do before do - $trainer.instance_variable_get('@db').redis.flushall + puts "TODO FINISH CLASSIFIER" + #$trainer.instance_variable_get('@db').redis.flushall end - +=begin it 'trains files' do upload 'files[]' => Rack::Test::UploadedFile.new('./tests/files/classifier/ham.html', 'text/html') upload 'files[]' => Rack::Test::UploadedFile.new('./tests/files/classifier/spam.html', 'text/html') @@ -348,6 +349,7 @@ describe 'site_files' do @site.classify('spam.html').must_equal 'spam' @site.classify('phishing.html').must_equal 'phishing' end +=end end end end