Fixed help files.
This commit is contained in:
commit
b8f912cc79
1543 changed files with 395123 additions and 0 deletions
123
SearchEngine-Tests/ToolsTests.cs
Normal file
123
SearchEngine-Tests/ToolsTests.cs
Normal file
|
@ -0,0 +1,123 @@
|
|||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
using NUnit.Framework;
|
||||
|
||||
namespace ScrewTurn.Wiki.SearchEngine.Tests {
|
||||
|
||||
[TestFixture]
|
||||
public class ToolsTests : TestsBase {
|
||||
|
||||
[Test]
|
||||
public void RemoveDiacriticsAndPunctuation() {
|
||||
string testPhrase = "Wow, thìs thing sèems really cool!";
|
||||
string testWord = "Wòrd";
|
||||
|
||||
Assert.AreEqual("wow this thing seems really cool", Tools.RemoveDiacriticsAndPunctuation(testPhrase, false), "Wrong normalized phrase");
|
||||
Assert.AreEqual("word", Tools.RemoveDiacriticsAndPunctuation(testWord, true), "Wrong normalized word");
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void IsSplitChar() {
|
||||
foreach(char c in ",.;:-\"'!?^=()<>\\|/[]{}«»*°§%&#@~©®±") {
|
||||
Assert.IsTrue(Tools.IsSplitChar(c), "Char is a split char");
|
||||
}
|
||||
foreach(char c in "abcdefghijklmnopqrstuvwxyz0123456789òçàùèéì€$£") {
|
||||
Assert.IsFalse(Tools.IsSplitChar(c), "Char is not a split char");
|
||||
}
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void SkipSplitChars() {
|
||||
Assert.AreEqual(0, Tools.SkipSplitChars(0, "hello"));
|
||||
Assert.AreEqual(1, Tools.SkipSplitChars(0, " hello"));
|
||||
Assert.AreEqual(7, Tools.SkipSplitChars(6, "Hello! How are you?"));
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void Tokenize() {
|
||||
string input = "Hello, there!";
|
||||
WordInfo[] expectedOutput = new WordInfo[] { new WordInfo("Hello", 0, 0, WordLocation.Content), new WordInfo("there", 7, 1, WordLocation.Content) };
|
||||
|
||||
WordInfo[] output = Tools.Tokenize(input, WordLocation.Content);
|
||||
|
||||
Assert.AreEqual(expectedOutput.Length, output.Length, "Wrong output length");
|
||||
|
||||
for(int i = 0; i < output.Length; i++) {
|
||||
Assert.AreEqual(expectedOutput[i].Text, output[i].Text, "Wrong word text at index " + i.ToString());
|
||||
Assert.AreEqual(expectedOutput[i].FirstCharIndex, output[i].FirstCharIndex, "Wrong first char index at " + i.ToString());
|
||||
Assert.AreEqual(expectedOutput[i].WordIndex, output[i].WordIndex, "Wrong word index at " + i.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void Tokenize_OneWord() {
|
||||
string input = "todo";
|
||||
WordInfo[] expectedOutput = new WordInfo[] { new WordInfo("todo", 0, 0, WordLocation.Content) };
|
||||
|
||||
WordInfo[] output = Tools.Tokenize(input, WordLocation.Content);
|
||||
|
||||
Assert.AreEqual(expectedOutput.Length, output.Length, "Wrong output length");
|
||||
|
||||
for(int i = 0; i < output.Length; i++) {
|
||||
Assert.AreEqual(expectedOutput[i].Text, output[i].Text, "Wrong word text at index " + i.ToString());
|
||||
Assert.AreEqual(expectedOutput[i].FirstCharIndex, output[i].FirstCharIndex, "Wrong first char index at " + i.ToString());
|
||||
Assert.AreEqual(expectedOutput[i].WordIndex, output[i].WordIndex, "Wrong word index at " + i.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void Tokenize_OneWordWithSplitChar() {
|
||||
string input = "todo.";
|
||||
WordInfo[] expectedOutput = new WordInfo[] { new WordInfo("todo", 0, 0, WordLocation.Content) };
|
||||
|
||||
WordInfo[] output = Tools.Tokenize(input, WordLocation.Content);
|
||||
|
||||
Assert.AreEqual(expectedOutput.Length, output.Length, "Wrong output length");
|
||||
|
||||
for(int i = 0; i < output.Length; i++) {
|
||||
Assert.AreEqual(expectedOutput[i].Text, output[i].Text, "Wrong word text at index " + i.ToString());
|
||||
Assert.AreEqual(expectedOutput[i].FirstCharIndex, output[i].FirstCharIndex, "Wrong first char index at " + i.ToString());
|
||||
Assert.AreEqual(expectedOutput[i].WordIndex, output[i].WordIndex, "Wrong word index at " + i.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
[Test]
|
||||
[ExpectedException(typeof(ArgumentNullException))]
|
||||
public void Tokenize_NullText() {
|
||||
Tools.Tokenize(null, WordLocation.Content);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void RemoveStopWords() {
|
||||
WordInfo[] input = new WordInfo[] { new WordInfo("I", 0, 0, WordLocation.Content), new WordInfo("like", 7, 1, WordLocation.Content),
|
||||
new WordInfo("the", 15, 2, WordLocation.Content), new WordInfo("cookies", 22, 3, WordLocation.Content) };
|
||||
WordInfo[] expectedOutput = new WordInfo[] { new WordInfo("I", 0, 0, WordLocation.Content), new WordInfo("like", 7, 1, WordLocation.Content),
|
||||
new WordInfo("cookies", 22, 3, WordLocation.Content) };
|
||||
|
||||
WordInfo[] output = Tools.RemoveStopWords(input, new string[] { "the", "in", "of" });
|
||||
|
||||
Assert.AreEqual(expectedOutput.Length, output.Length, "Wrong output length");
|
||||
|
||||
for(int i = 0; i < output.Length; i++) {
|
||||
Assert.AreEqual(expectedOutput[i].Text, output[i].Text, "Wrong word text at index " + i.ToString());
|
||||
Assert.AreEqual(expectedOutput[i].FirstCharIndex, output[i].FirstCharIndex, "Wrong word position at index " + i.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
[Test]
|
||||
[ExpectedException(typeof(ArgumentNullException))]
|
||||
public void RemoveStopWords_NullInputWords() {
|
||||
Tools.RemoveStopWords(null, new string[0]);
|
||||
}
|
||||
|
||||
[Test]
|
||||
[ExpectedException(typeof(ArgumentNullException))]
|
||||
public void RemoveStopWords_NullStopWords() {
|
||||
Tools.RemoveStopWords(new WordInfo[0], null);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue