using System;
using System.Collections.Generic;
using System.Text;
using System.Globalization;
namespace ScrewTurn.Wiki.SearchEngine {
///
/// Represents a word in a document.
///
/// All instance and static members are thread-safe.
public class Word {
///
/// The word text, lowercase.
///
protected string text;
///
/// The occurrences.
///
protected OccurrenceDictionary occurrences;
///
/// The word unique ID.
///
protected uint id;
///
/// Initializes a new instance of the class.
///
/// The word ID.
/// The text of the word (lowercase).
/// If is null.
/// If is empty.
public Word(uint id, string text)
: this(id, text, new OccurrenceDictionary(10)) { }
///
/// Initializes a new instance of the class.
///
/// The word ID.
/// The text of the word (lowercase).
/// The occurrences.
/// If or are null.
/// If is empty.
public Word(uint id, string text, OccurrenceDictionary occurrences) {
if(text == null) throw new ArgumentNullException("text");
if(text.Length == 0) throw new ArgumentException("Text must contain at least one character", "text");
if(occurrences == null) throw new ArgumentNullException("occurrences");
this.text = Tools.RemoveDiacriticsAndPunctuation(text, true);
//if(this.text.Length == 0) throw new InvalidOperationException();
this.id = id;
this.occurrences = occurrences;
}
///
/// Gets or sets the unique ID of the word.
///
public uint ID {
get {
lock(this) {
return id;
}
}
set {
lock(this) {
id = value;
}
}
}
///
/// Gets the text of the word (lowercase).
///
public string Text {
get {
// Read-only: no need to lock
return text;
}
}
///
/// Gets the occurrences.
///
public OccurrenceDictionary Occurrences {
get {
lock(occurrences) {
return occurrences;
}
}
}
///
/// Gets the total occurrences.
///
/// Computing the result is O(n), where n is the number of
/// documents the word occurs in at least one time.
public int TotalOccurrences {
get {
int count = 0;
lock(occurrences) {
foreach(KeyValuePair pair in occurrences) {
count += pair.Value.Count;
}
}
return count;
}
}
///
/// Stores an occurrence.
///
/// The document the occurrence is referred to.
/// The index of the first character of the word in the document.
/// The index of the word in the document.
/// The location of the word.
/// Adding an occurrence is O(n), where n is the number of occurrences
/// of the word already stored for the same document. If there were no occurrences previously stored,
/// the operation is O(1).
/// If is null.
/// If or are less than zero.
public void AddOccurrence(IDocument document, ushort firstCharIndex, ushort wordIndex, WordLocation location) {
if(document == null) throw new ArgumentNullException("document");
if(firstCharIndex < 0) throw new ArgumentOutOfRangeException("firstCharIndex", "Invalid first char index: must be greater than or equal to zero");
if(wordIndex < 0) throw new ArgumentOutOfRangeException("wordIndex", "Invalid word index: must be greater than or equal to zero");
lock(occurrences) {
if(occurrences.ContainsKey(document)) {
// Existing document
occurrences[document].Add(new BasicWordInfo(firstCharIndex, wordIndex, location));
}
else {
// New document
SortedBasicWordInfoSet set = new SortedBasicWordInfoSet();
set.Add(new BasicWordInfo(firstCharIndex, wordIndex, location));
occurrences.Add(document, set);
}
}
}
///
/// Removes all the occurrences for a document.
///
/// The document to remove the occurrences of.
/// The dumped word mappings for the removed word occurrences.
/// Removing the occurrences for the document is O(1).
/// If is null.
public List RemoveOccurrences(IDocument document) {
if(document == null) throw new ArgumentNullException("document");
lock(occurrences) {
if(occurrences.ContainsKey(document)) return occurrences.RemoveExtended(document, ID);
else return new List();
}
}
///
/// Adds a bulk of occurrences of the word in a document, removing all the old positions, if any.
///
/// The document.
/// The positions.
/// If positions is empty, the effect of the invocation of the method is equal to
/// that of with the same document.
/// Bulk-adding the occurrences is O(1).
/// If or are null.
public void BulkAddOccurrences(IDocument document, SortedBasicWordInfoSet positions) {
if(document == null) throw new ArgumentNullException("document");
if(positions == null) throw new ArgumentNullException("positions");
lock(occurrences) {
if(occurrences.ContainsKey(document)) {
if(positions.Count == 0) RemoveOccurrences(document);
else occurrences[document] = positions;
}
else occurrences.Add(document, positions);
}
}
///
/// Gets a string representation of the current instance.
///
/// The string representation.
public override string ToString() {
return string.Format("{0} [x{1}]", text, TotalOccurrences);
}
}
}