using System; using System.Collections.Generic; using System.Text; using System.Web; using System.Web.UI; using System.Web.UI.WebControls; using ScrewTurn.Wiki.PluginFramework; using ScrewTurn.Wiki.SearchEngine; using System.Data; namespace ScrewTurn.Wiki { public partial class Search : BasePage { private const int MaxResults = 30; private readonly Dictionary searchModeMap = new Dictionary() { { "1", SearchOptions.AtLeastOneWord }, { "2", SearchOptions.AllWords }, { "3", SearchOptions.ExactPhrase } }; protected void Page_Load(object sender, EventArgs e) { if(Request["OpenSearch"] != null) { GenerateOpenSearchDescription(); return; } Page.Title = Properties.Messages.SearchTitle + " - " + Settings.WikiTitle; lblStrings.Text = string.Format("", chkAllNamespaces.ClientID); txtQuery.Focus(); if(!Page.IsPostBack) { // Initialize all controls string[] queryStringCategories = null; if(Request["Categories"] != null) { queryStringCategories = Request["Categories"].Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries); Array.Sort(queryStringCategories); } if(Request["SearchUncategorized"] != null) { chkUncategorizedPages.Checked = Request["SearchUncategorized"] == "1"; } chkAllNamespaces.Checked = Request["AllNamespaces"] == "1"; chkFilesAndAttachments.Checked = Request["FilesAndAttachments"] == "1"; if(chkAllNamespaces.Checked) { lblHideCategoriesScript.Text = ""; } List allCategories = Pages.GetCategories(DetectNamespaceInfo()); lstCategories.Items.Clear(); List selectedCategories = new List(allCategories.Count); // Populate categories list and select specified categories if any foreach(CategoryInfo cat in allCategories) { ListItem item = new ListItem(NameTools.GetLocalName(cat.FullName), cat.FullName); if(queryStringCategories != null) { if(Array.Find(queryStringCategories, delegate(string c) { return c == cat.FullName; }) != null) { item.Selected = true; selectedCategories.Add(cat.FullName); } } else { item.Selected = true; selectedCategories.Add(cat.FullName); } lstCategories.Items.Add(item); } // Select mode if specified if(Request["Mode"] != null) { switch(Request["Mode"]) { case "1": rdoAtLeastOneWord.Checked = true; rdoAllWords.Checked = false; rdoExactPhrase.Checked = false; break; case "2": rdoAtLeastOneWord.Checked = false; rdoAllWords.Checked = true; rdoExactPhrase.Checked = false; break; default: rdoAtLeastOneWord.Checked = false; rdoAllWords.Checked = false; rdoExactPhrase.Checked = true; break; } } if(Request["Query"] != null) txtQuery.Text = Request["Query"]; // Launch search, if query is specified string mode = Request["Mode"]; if(string.IsNullOrEmpty(mode)) mode = "1"; if(!string.IsNullOrEmpty(Request["Query"])) { PerformSearch(Request["Query"], searchModeMap[mode], selectedCategories, chkUncategorizedPages.Checked, chkAllNamespaces.Checked, chkFilesAndAttachments.Checked); } } } protected void btnGo_Click(object sender, EventArgs e) { // Redirect firing the search string query = ScrewTurn.Wiki.SearchEngine.Tools.RemoveDiacriticsAndPunctuation(txtQuery.Text, false); if(query.Length > 0) { UrlTools.Redirect(UrlTools.BuildUrl("Search.aspx?Query=", Tools.UrlEncode(txtQuery.Text), "&SearchUncategorized=", chkUncategorizedPages.Checked ? "1" : "0", "&Categories=", GetCategories(), "&Mode=", GetMode(), chkAllNamespaces.Checked ? "&AllNamespaces=1" : "", chkFilesAndAttachments.Checked ? "&FilesAndAttachments=1" : "")); } } /// /// Gets the selected categories. /// /// The selected categories. private string GetCategories() { StringBuilder sb = new StringBuilder(50); foreach(ListItem item in lstCategories.Items) { if(item.Selected) { sb.Append(item.Value); sb.Append(","); } } return sb.ToString(); } /// /// Gets the search mode string. /// /// The search mode string. private string GetMode() { if(rdoAtLeastOneWord.Checked) return "1"; else if(rdoAllWords.Checked) return "2"; else return "3"; } /// /// Performs a search. /// /// The search query. /// The search mode. /// The selected categories. /// A value indicating whether to search uncategorized pages. /// A value indicating whether to search in all namespaces and categories. /// A value indicating whether to search files and attachments. private void PerformSearch(string query, SearchOptions mode, List selectedCategories, bool searchUncategorized, bool searchInAllNamespacesAndCategories, bool searchFilesAndAttachments) { SearchResultCollection results = null; DateTime begin = DateTime.Now; try { results = SearchTools.Search(query, true, searchFilesAndAttachments, mode); } catch(ArgumentException ex) { Log.LogEntry("Search threw an exception\n" + ex.ToString(), EntryType.Warning, SessionFacade.CurrentUsername); results = new SearchResultCollection(); } DateTime end = DateTime.Now; // Build a list of SearchResultRow for display in the repeater List rows = new List(Math.Min(results.Count, MaxResults)); string currentUser = SessionFacade.GetCurrentUsername(); string[] currentGroups = SessionFacade.GetCurrentGroupNames(); CategoryInfo[] pageCategories; int count = 0; foreach(SearchResult res in results) { // Filter by category PageInfo currentPage = null; pageCategories = new CategoryInfo[0]; if(res.Document.TypeTag == PageDocument.StandardTypeTag) { currentPage = (res.Document as PageDocument).PageInfo; pageCategories = Pages.GetCategoriesForPage(currentPage); // Verify permissions bool canReadPage = AuthChecker.CheckActionForPage(currentPage, Actions.ForPages.ReadPage, currentUser, currentGroups); if(!canReadPage) continue; // Skip } else if(res.Document.TypeTag == MessageDocument.StandardTypeTag) { currentPage = (res.Document as MessageDocument).PageInfo; pageCategories = Pages.GetCategoriesForPage(currentPage); // Verify permissions bool canReadDiscussion = AuthChecker.CheckActionForPage(currentPage, Actions.ForPages.ReadDiscussion, currentUser, currentGroups); if(!canReadDiscussion) continue; // Skip } else if(res.Document.TypeTag == PageAttachmentDocument.StandardTypeTag) { currentPage = (res.Document as PageAttachmentDocument).Page; pageCategories = Pages.GetCategoriesForPage(currentPage); // Verify permissions bool canDownloadAttn = AuthChecker.CheckActionForPage(currentPage, Actions.ForPages.DownloadAttachments, currentUser, currentGroups); if(!canDownloadAttn) continue; // Skip } else if(res.Document.TypeTag == FileDocument.StandardTypeTag) { string[] fields = ((FileDocument)res.Document).Name.Split('|'); IFilesStorageProviderV30 provider = Collectors.FilesProviderCollector.GetProvider(fields[0]); string directory = Tools.GetDirectoryName(fields[1]); // Verify permissions bool canDownloadFiles = AuthChecker.CheckActionForDirectory(provider, directory, Actions.ForDirectories.DownloadFiles, currentUser, currentGroups); if(!canDownloadFiles) continue; // Skip } string currentNamespace = DetectNamespace(); if(string.IsNullOrEmpty(currentNamespace)) currentNamespace = null; if(currentPage != null) { // Check categories match, if page is set if(searchInAllNamespacesAndCategories || Array.Find(pageCategories, delegate(CategoryInfo c) { return selectedCategories.Contains(c.FullName); }) != null || pageCategories.Length == 0 && searchUncategorized) { // ... then namespace if(searchInAllNamespacesAndCategories || NameTools.GetNamespace(currentPage.FullName) == currentNamespace) { rows.Add(SearchResultRow.CreateInstance(res)); count++; } } } else { // No associated page (-> file), add result rows.Add(SearchResultRow.CreateInstance(res)); count++; } if(count >= MaxResults) break; } rptResults.DataSource = rows; rptResults.DataBind(); PrintStats(end - begin, rows.Count); } /// /// Prints the search statistics. /// /// The time the search required. /// The number of results. private void PrintStats(TimeSpan time, int results) { int totalDocuments = 0; int totalWords = 0; long totalSize = 0; foreach(IPagesStorageProviderV30 prov in Collectors.PagesProviderCollector.AllProviders) { int dc, wc, oc; long s; prov.GetIndexStats(out dc, out wc, out oc, out s); totalDocuments += dc; totalWords += wc; totalSize += s; } lblStats.Text = string.Format(Properties.Messages.SearchStats, Tools.BytesToString(totalSize), totalDocuments, totalWords, time.TotalSeconds, results); } /// /// Generates the OpenSearch description XML document and renders it to output. /// private void GenerateOpenSearchDescription() { string xml = @" {0} {1} {2}{4} UTF-8 {2}Search.aspx "; Response.Clear(); Response.AddHeader("content-type", "application/opensearchdescription+xml"); Response.AddHeader("content-disposition", "inline;filename=search.xml"); Response.Write( string.Format(xml, Settings.WikiTitle, Settings.WikiTitle + " - Search", Settings.MainUrl, "{searchTerms}", "Images/SearchIcon.ico")); Response.End(); } } /// /// Represents a search result in a format useful for screen display. /// public class SearchResultRow { public const string Page = "page"; public const string Message = "message"; public const string File = "file"; public const string Attachment = "attachment"; private string link; private string type; private string title; private float relevance; private string formattedExcerpt; /// /// Initializes a new instance of the class. /// /// The link. /// The result type. /// The title. /// The relevance (%). /// The formatted page excerpt. public SearchResultRow(string link, string type, string title, float relevance, string formattedExcerpt) { this.link = link; this.type = type; this.title = title; this.relevance = relevance; this.formattedExcerpt = formattedExcerpt; } /// /// Gets the page. /// public string Link { get { return link; } } /// /// Gets the type of the result. /// public string Type { get { return type; } } /// /// Gets the title. /// public string Title { get { return title; } } /// /// Gets the relevance. /// public float Relevance { get { return relevance; } } /// /// Gets the formatted excerpt. /// public string FormattedExcerpt { get { return formattedExcerpt; } } /// /// Creates a new instance of the class. /// /// The result to use. /// The instance. public static SearchResultRow CreateInstance(SearchResult result) { string queryStringKeywords = "HL=" + GetKeywordsForQueryString(result.Matches); if(result.Document.TypeTag == PageDocument.StandardTypeTag) { PageDocument pageDoc = result.Document as PageDocument; return new SearchResultRow(pageDoc.PageInfo.FullName + Settings.PageExtension + "?" + queryStringKeywords, Page, FormattingPipeline.PrepareTitle(pageDoc.Title, false, FormattingContext.PageContent, pageDoc.PageInfo), result.Relevance.Value, GetExcerpt(pageDoc.PageInfo, result.Matches)); } else if(result.Document.TypeTag == MessageDocument.StandardTypeTag) { MessageDocument msgDoc = result.Document as MessageDocument; PageContent content = Content.GetPageContent(msgDoc.PageInfo, true); return new SearchResultRow(msgDoc.PageInfo.FullName + Settings.PageExtension + "?" + queryStringKeywords +"&Discuss=1#" + Tools.GetMessageIdForAnchor(msgDoc.DateTime), Message, FormattingPipeline.PrepareTitle(msgDoc.Title, false, FormattingContext.MessageBody, content.PageInfo) + " (" + FormattingPipeline.PrepareTitle(content.Title, false, FormattingContext.MessageBody, content.PageInfo) + ")", result.Relevance.Value, GetExcerpt(msgDoc.PageInfo, msgDoc.MessageID, result.Matches)); } else if(result.Document.TypeTag == FileDocument.StandardTypeTag) { FileDocument fileDoc = result.Document as FileDocument; return new SearchResultRow("GetFile.aspx?File=" + Tools.UrlEncode(fileDoc.Name.Substring(fileDoc.Provider.Length + 1)) + "&Provider=" + Tools.UrlEncode(fileDoc.Provider), File, fileDoc.Title, result.Relevance.Value, ""); } else if(result.Document.TypeTag == PageAttachmentDocument.StandardTypeTag) { PageAttachmentDocument attnDoc = result.Document as PageAttachmentDocument; PageContent content = Content.GetPageContent(attnDoc.Page, false); return new SearchResultRow(attnDoc.Page.FullName + Settings.PageExtension, Attachment, attnDoc.Title + " (" + FormattingPipeline.PrepareTitle(content.Title, false, FormattingContext.PageContent, content.PageInfo) + ")", result.Relevance.Value, ""); } else throw new NotSupportedException(); } /// /// Gets the formatted page excerpt. /// /// The page. /// The matches to highlight. /// The excerpt. private static string GetExcerpt(PageInfo page, WordInfoCollection matches) { PageContent pageContent = Content.GetPageContent(page, true); string content = pageContent.Content; List sortedMatches = new List(matches); sortedMatches.RemoveAll(delegate(WordInfo wi) { return wi.Location != WordLocation.Content; }); sortedMatches.Sort(delegate(WordInfo x, WordInfo y) { return x.FirstCharIndex.CompareTo(y.FirstCharIndex); }); return BuildFormattedExcerpt(sortedMatches, Host.Instance.PrepareContentForIndexing(page, content)); } /// /// Gets the formatted message excerpt. /// /// The page. /// The message ID. /// The matches to highlight. /// The excerpt. private static string GetExcerpt(PageInfo page, int messageID, WordInfoCollection matches) { Message message = Pages.FindMessage(Pages.GetPageMessages(page), messageID); string content = message.Body; List sortedMatches = new List(matches); sortedMatches.RemoveAll(delegate(WordInfo wi) { return wi.Location != WordLocation.Content; }); sortedMatches.Sort(delegate(WordInfo x, WordInfo y) { return x.FirstCharIndex.CompareTo(y.FirstCharIndex); }); return BuildFormattedExcerpt(sortedMatches, Host.Instance.PrepareContentForIndexing(null, content)); } /// /// Builds the formatted excerpt for a search match. /// /// The regex matches. /// The original input text. /// The formatted excerpt. private static string BuildFormattedExcerpt(List matches, string input) { // Highlight all the matches in the original string, then cut it int shift = 100; int maxLen = 600; string highlightOpen = ""; string highlightClose = ""; StringBuilder sb = new StringBuilder(input); for(int i = 0; i < matches.Count; i++) { WordInfo match = matches[i]; int openIndex = match.FirstCharIndex + i * (highlightOpen.Length + highlightClose.Length); bool openIndexOk = openIndex >= 0 && openIndex <= sb.Length; if(openIndexOk) sb.Insert(openIndex, highlightOpen); int closeIndex = match.FirstCharIndex + match.Text.Length + highlightOpen.Length + i * (highlightOpen.Length + highlightClose.Length); if(openIndexOk && closeIndex >= 0 && closeIndex <= sb.Length) sb.Insert(closeIndex, highlightClose); else if(openIndexOk) sb.Append(highlightClose); // Make sure an open tags is also closed } bool startsAtZero = false, endsAtEnd = false; string result = ""; if(matches.Count > 0) { int start = matches[0].FirstCharIndex - shift; if(start < 0) { start = 0; startsAtZero = true; } int len = matches[matches.Count - 1].FirstCharIndex + matches[matches.Count - 1].Text.Length + shift - matches.Count * (highlightOpen.Length + highlightClose.Length) - start; if(start + len >= sb.Length) { len = sb.Length - start; endsAtEnd = true; } if(len <= 0) len = sb.Length; // HACK: This should never occur, but if it does it crashes the wiki, so set it to max len if(len > maxLen) len = maxLen; result = sb.ToString(); // Cut string without breaking words while(start > 0 && result[start] != ' ') { start--; len++; } while(start + len < result.Length && result[start + len] != ' ') len++; result = sb.ToString().Substring(start, len); } else { // Extract an initial piece of the content (300 chars) startsAtZero = true; endsAtEnd = true; if(input.Length < 300) result = input; else { endsAtEnd = false; result = input.Substring(0, 300); // Cut string without breaking words (this will require just a few iterations) while(result.Length < input.Length && input[result.Length] != ' ') { result += input[result.Length]; } } } if(!startsAtZero) result = "[...] " + result; if(!endsAtEnd) result += " [...]"; return result; } /// /// Gets a list of keywords formatted for the query string. /// /// The search keywords. /// The formatted list, for example 'word1,word2,word3'. private static string GetKeywordsForQueryString(WordInfoCollection matches) { StringBuilder buffer = new StringBuilder(100); List added = new List(5); for(int i = 0; i < matches.Count; i++) { if(!added.Contains(matches[i].Text)) { buffer.Append(Tools.UrlEncode(matches[i].Text)); if(i != matches.Count - 1) buffer.Append(","); added.Add(matches[i].Text); } } return buffer.ToString().TrimEnd(','); } } }