123 lines
4.8 KiB
C#
123 lines
4.8 KiB
C#
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Text;
|
|
using NUnit.Framework;
|
|
|
|
namespace ScrewTurn.Wiki.SearchEngine.Tests {
|
|
|
|
[TestFixture]
|
|
public class ToolsTests : TestsBase {
|
|
|
|
[Test]
|
|
public void RemoveDiacriticsAndPunctuation() {
|
|
string testPhrase = "Wow, thìs thing sèems really cool!";
|
|
string testWord = "Wòrd";
|
|
|
|
Assert.AreEqual("wow this thing seems really cool", Tools.RemoveDiacriticsAndPunctuation(testPhrase, false), "Wrong normalized phrase");
|
|
Assert.AreEqual("word", Tools.RemoveDiacriticsAndPunctuation(testWord, true), "Wrong normalized word");
|
|
}
|
|
|
|
[Test]
|
|
public void IsSplitChar() {
|
|
foreach(char c in ",.;:-\"'!?^=()<>\\|/[]{}«»*°§%&#@~©®±") {
|
|
Assert.IsTrue(Tools.IsSplitChar(c), "Char is a split char");
|
|
}
|
|
foreach(char c in "abcdefghijklmnopqrstuvwxyz0123456789òçàùèéì€$£") {
|
|
Assert.IsFalse(Tools.IsSplitChar(c), "Char is not a split char");
|
|
}
|
|
}
|
|
|
|
[Test]
|
|
public void SkipSplitChars() {
|
|
Assert.AreEqual(0, Tools.SkipSplitChars(0, "hello"));
|
|
Assert.AreEqual(1, Tools.SkipSplitChars(0, " hello"));
|
|
Assert.AreEqual(7, Tools.SkipSplitChars(6, "Hello! How are you?"));
|
|
}
|
|
|
|
[Test]
|
|
public void Tokenize() {
|
|
string input = "Hello, there!";
|
|
WordInfo[] expectedOutput = new WordInfo[] { new WordInfo("Hello", 0, 0, WordLocation.Content), new WordInfo("there", 7, 1, WordLocation.Content) };
|
|
|
|
WordInfo[] output = Tools.Tokenize(input, WordLocation.Content);
|
|
|
|
Assert.AreEqual(expectedOutput.Length, output.Length, "Wrong output length");
|
|
|
|
for(int i = 0; i < output.Length; i++) {
|
|
Assert.AreEqual(expectedOutput[i].Text, output[i].Text, "Wrong word text at index " + i.ToString());
|
|
Assert.AreEqual(expectedOutput[i].FirstCharIndex, output[i].FirstCharIndex, "Wrong first char index at " + i.ToString());
|
|
Assert.AreEqual(expectedOutput[i].WordIndex, output[i].WordIndex, "Wrong word index at " + i.ToString());
|
|
}
|
|
}
|
|
|
|
[Test]
|
|
public void Tokenize_OneWord() {
|
|
string input = "todo";
|
|
WordInfo[] expectedOutput = new WordInfo[] { new WordInfo("todo", 0, 0, WordLocation.Content) };
|
|
|
|
WordInfo[] output = Tools.Tokenize(input, WordLocation.Content);
|
|
|
|
Assert.AreEqual(expectedOutput.Length, output.Length, "Wrong output length");
|
|
|
|
for(int i = 0; i < output.Length; i++) {
|
|
Assert.AreEqual(expectedOutput[i].Text, output[i].Text, "Wrong word text at index " + i.ToString());
|
|
Assert.AreEqual(expectedOutput[i].FirstCharIndex, output[i].FirstCharIndex, "Wrong first char index at " + i.ToString());
|
|
Assert.AreEqual(expectedOutput[i].WordIndex, output[i].WordIndex, "Wrong word index at " + i.ToString());
|
|
}
|
|
}
|
|
|
|
[Test]
|
|
public void Tokenize_OneWordWithSplitChar() {
|
|
string input = "todo.";
|
|
WordInfo[] expectedOutput = new WordInfo[] { new WordInfo("todo", 0, 0, WordLocation.Content) };
|
|
|
|
WordInfo[] output = Tools.Tokenize(input, WordLocation.Content);
|
|
|
|
Assert.AreEqual(expectedOutput.Length, output.Length, "Wrong output length");
|
|
|
|
for(int i = 0; i < output.Length; i++) {
|
|
Assert.AreEqual(expectedOutput[i].Text, output[i].Text, "Wrong word text at index " + i.ToString());
|
|
Assert.AreEqual(expectedOutput[i].FirstCharIndex, output[i].FirstCharIndex, "Wrong first char index at " + i.ToString());
|
|
Assert.AreEqual(expectedOutput[i].WordIndex, output[i].WordIndex, "Wrong word index at " + i.ToString());
|
|
}
|
|
}
|
|
|
|
[Test]
|
|
[ExpectedException(typeof(ArgumentNullException))]
|
|
public void Tokenize_NullText() {
|
|
Tools.Tokenize(null, WordLocation.Content);
|
|
}
|
|
|
|
[Test]
|
|
public void RemoveStopWords() {
|
|
WordInfo[] input = new WordInfo[] { new WordInfo("I", 0, 0, WordLocation.Content), new WordInfo("like", 7, 1, WordLocation.Content),
|
|
new WordInfo("the", 15, 2, WordLocation.Content), new WordInfo("cookies", 22, 3, WordLocation.Content) };
|
|
WordInfo[] expectedOutput = new WordInfo[] { new WordInfo("I", 0, 0, WordLocation.Content), new WordInfo("like", 7, 1, WordLocation.Content),
|
|
new WordInfo("cookies", 22, 3, WordLocation.Content) };
|
|
|
|
WordInfo[] output = Tools.RemoveStopWords(input, new string[] { "the", "in", "of" });
|
|
|
|
Assert.AreEqual(expectedOutput.Length, output.Length, "Wrong output length");
|
|
|
|
for(int i = 0; i < output.Length; i++) {
|
|
Assert.AreEqual(expectedOutput[i].Text, output[i].Text, "Wrong word text at index " + i.ToString());
|
|
Assert.AreEqual(expectedOutput[i].FirstCharIndex, output[i].FirstCharIndex, "Wrong word position at index " + i.ToString());
|
|
}
|
|
}
|
|
|
|
[Test]
|
|
[ExpectedException(typeof(ArgumentNullException))]
|
|
public void RemoveStopWords_NullInputWords() {
|
|
Tools.RemoveStopWords(null, new string[0]);
|
|
}
|
|
|
|
[Test]
|
|
[ExpectedException(typeof(ArgumentNullException))]
|
|
public void RemoveStopWords_NullStopWords() {
|
|
Tools.RemoveStopWords(new WordInfo[0], null);
|
|
}
|
|
|
|
}
|
|
|
|
}
|