using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Web; namespace ScrewTurn.Wiki { /// /// Implements reverse formatting methods (HTML->WikiMarkup). /// public static class ReverseFormatter { private static readonly Regex BoldRegex = new Regex(@"()((.|\n|\r)*?)()", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex ItalicRegex = new Regex(@"()((.|\n|\r)*?)()", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex UnderlineRegex = new Regex(@"()((.|\n|\r)*?)()", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex StrikeRegex = new Regex(@"()((.|\n|\r)*?)()", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex H1Regex = new Regex(@"(

)((.|\n|\r)*?)(

)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex H2Regex = new Regex(@"(

)((.|\n|\r)*?)(

)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex H3Regex = new Regex(@"(

)((.|\n|\r)*?)(

)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex H4Regex = new Regex(@"(

)((.|\n|\r)*?)(

)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex PageLinkRegex = new Regex(@"()((.|\n|\r)*?)()", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex UnknownLinkRegex = new Regex(@"()((.|\n|\r)*?)()", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex FileLinkRegex = new Regex(@"()((.|\n|\r)+?)()", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex AttachmentLinkRegex = new Regex(@"()((.|\n|\r)+?)()", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex SystemLinkRegex = new Regex(@"((.|\n|\r)*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex ExternalLinkRegex = new Regex(@"()((.|\n|\r)*?)()", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex InternalLinkRegex = new Regex(@"((.|\n|\r)*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex AnchorLinkRegex = new Regex(@"((.|\n|\r)*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex EmailLinkRegex = new Regex(@"()((.|\n|\r)*?)()", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex AnchorRegex = new Regex(@"(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex ImageLeftRightRegex = new Regex(@"(
|
)()?()?(

((.)*?)

)?(|
)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex ImageInlineRegex = new Regex(@"()?()?", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex HRRegex = new Regex(@"

\s*

", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex BoxRegex = new Regex(@"
((.|\n|\r)*?)
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex CodeRegex = new Regex(@"((.|\n|\r)*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex PreRegex = new Regex(@"
((.|\n|\r)*?)
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex SingleBR = new Regex(@"(?)
(?!
)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); private static readonly Regex SingleNewLine = new Regex(@"(?(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); // Title=1 - Href=2 - Target=3 - Content=4 --- Href=http://www.server.com/Spaced%20Page.ashx private static readonly Regex UnknownLinkRegexIE = new Regex(@"(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); // Title=1 - ProviderGlobal=3 - Provider=4 - Page=6 - File=7 - Target=8 - Content=9 private static readonly Regex FileOrAttachmentLinkRegexIE = new Regex(@"(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); // Title=1 - Href=2 - Target=3 - Content=4 --- Href=http://www.server.com/Register.aspx private static readonly Regex SystemLinkRegexIE = new Regex(@"(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); // Title=1 - Href=2 - Target=3 - Content=4 private static readonly Regex ExternalLinkRegexIE = new Regex(@"(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); // Title=1 - Href=2 - Target=3 - Content=4 private static readonly Regex InternalLinkRegexIE = new Regex(@"(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); // AnchorLinkRegexIE would be equal to InternalLinkRegex - no need for it // Title=1 - Href=2 - Target=3 - Content=4 private static readonly Regex EmailLinkRegexIE = new Regex(@"", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); // DivClass=1 - A=2 - ATitle=3 - AHref=4 - ATarget=5 - ImageAlt=6 - ImageSrc=7 - P=9 - PContent=10 --- Href/Src=http://www.server.com/Blah.ashx/GetFile.aspx... private static readonly Regex ImageLeftRightRegexIE = new Regex(@"
()?\""?(.*?)\""?()?(\r\n

(.*?)

)?
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); // A=1 - ATitle=2 - AHref=3 - ATarget=4 - ImageAlt=5 - ImageSrc=6 - P=8 - PContent=9 --- Href/Src=http://www.server.com/Blah.ashx/GetFile.aspx... private static readonly Regex ImageAutoRegexIE = new Regex(@"\r\n\r\n\r\n
()?\""?(.*?)\""?()?(\r\n

(.*?)

)?
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); // A=1 - ATitle=2 - AHref=3 - ATarget=4 - ImageAlt=5 - ImageSrc=6 --- Href/Src=http://www.server.com/Blah.ashx/GetFile.aspx... private static readonly Regex ImageInlineRegexIE = new Regex(@"()?\""?(.*?)\""?()?", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); /// /// Reverse formats HTML content into WikiMarkup. /// /// The input HTML. /// The corresponding WikiMarkup. public static string ReverseFormat(string html) { Match match = null; StringBuilder buffer = new StringBuilder(html); if(!html.EndsWith("\r\n")) buffer.Append("\r\n"); buffer.Replace("
", "
"); buffer.Replace("
", "
"); buffer.Replace("", ""); buffer.Replace("", ""); buffer.Replace("", ""); buffer.Replace("", ""); buffer.Replace("", ""); buffer.Replace("", ""); buffer.Replace("", ""); buffer.Replace("", ""); buffer.Replace("&", "&"); // Escape square brackets, otherwise they're interpreted as links buffer.Replace("[", "["); buffer.Replace("]", "]"); // Temporarily replace
in
 tags
			match = PreRegex.Match(buffer.ToString());
			while(match.Success) {
				Match subMatch = SingleBR.Match(match.Value);
				while(subMatch.Success) {
					buffer.Remove(match.Index + subMatch.Index, subMatch.Length);
					buffer.Insert(match.Index + subMatch.Index, "");
					subMatch = SingleBR.Match(match.Value, subMatch.Index + 1);
				}
				match = PreRegex.Match(buffer.ToString(), match.Index + 1);
			}
			buffer.Replace("", "\r\n");

			// Code
			match = CodeRegex.Match(buffer.ToString());
			while(match.Success) {
				buffer.Remove(match.Index, match.Length);
				buffer.Insert(match.Index, "{{" + match.Value.Substring(6, match.Length - 13) + "}}");
				match = CodeRegex.Match(buffer.ToString(), match.Index + 1);
			}

			// Pre
			// Unescape square brackets
			match = PreRegex.Match(buffer.ToString());
			while(match.Success) {
				buffer.Remove(match.Index, match.Length);
				buffer.Insert(match.Index, "@@" +
					match.Value.Substring(5, match.Length - 11).Replace("&", "&").Replace("[", "[").Replace("]", "]") +
					"@@");
				match = PreRegex.Match(buffer.ToString(), match.Index + 1);
			}

			// Bold
			match = BoldRegex.Match(buffer.ToString());
			while(match.Success) {
				buffer.Remove(match.Index, match.Length);
				buffer.Insert(match.Index, "'''" + match.Groups[2].Value + "'''");
				match = BoldRegex.Match(buffer.ToString(), match.Index + 1);
			}

			// Italic
			match = ItalicRegex.Match(buffer.ToString());
			while(match.Success) {
				buffer.Remove(match.Index, match.Length);
				buffer.Insert(match.Index, "''" + match.Groups[2].Value + "''");
				match = ItalicRegex.Match(buffer.ToString(), match.Index + 1);
			}

			// Underline
			match = UnderlineRegex.Match(buffer.ToString());
			while(match.Success) {
				buffer.Remove(match.Index, match.Length);
				buffer.Insert(match.Index, "__" + match.Groups[2].Value + "__");
				match = UnderlineRegex.Match(buffer.ToString(), match.Index + 1);
			}

			// Strike
			match = StrikeRegex.Match(buffer.ToString());
			while(match.Success) {
				buffer.Remove(match.Index, match.Length);
				buffer.Insert(match.Index, "--" + match.Groups[2].Value + "--");
				match = StrikeRegex.Match(buffer.ToString(), match.Index + 1);
			}

			// Horizontal Ruler
			match = HRRegex.Match(buffer.ToString());
			while(match.Success) {
				buffer.Remove(match.Index, match.Length);
				buffer.Insert(match.Index, "----");
				match = HRRegex.Match(buffer.ToString(), match.Index + 1);
			}

			// H1
			match = H1Regex.Match(buffer.ToString());
			while(match.Success) {
				char c = buffer[match.Index + match.Length];
				bool addNewLine = false;
				if(buffer[match.Index + match.Length] != '\n') addNewLine = true;
				buffer.Remove(match.Index, match.Length);
				if(addNewLine) buffer.Insert(match.Index, "==" + match.Groups[2].Value + "==\n");
				else buffer.Insert(match.Index, "==" + match.Groups[2].Value + "==");
				match = H1Regex.Match(buffer.ToString(), match.Index + 1);
			}

			// H2
			match = H2Regex.Match(buffer.ToString());
			while(match.Success) {
				bool addNewLine = false;
				if(buffer[match.Index + match.Length] != '\n') addNewLine = true;
				buffer.Remove(match.Index, match.Length);
				if(addNewLine) buffer.Insert(match.Index, "===" + match.Groups[2].Value + "===\n");
				else buffer.Insert(match.Index, "===" + match.Groups[2].Value + "===");
				match = H2Regex.Match(buffer.ToString(), match.Index + 1);
			}

			// H3
			match = H3Regex.Match(buffer.ToString());
			while(match.Success) {
				bool addNewLine = false;
				if(buffer[match.Index + match.Length] != '\n') addNewLine = true;
				buffer.Remove(match.Index, match.Length);
				if(addNewLine) buffer.Insert(match.Index, "====" + match.Groups[2].Value + "====\n");
				else buffer.Insert(match.Index, "====" + match.Groups[2].Value + "====");
				match = H3Regex.Match(buffer.ToString(), match.Index + 1);
			}

			// H4
			match = H4Regex.Match(buffer.ToString());
			while(match.Success) {
				bool addNewLine = false;
				if(buffer[match.Index + match.Length] != '\n') addNewLine = true;
				buffer.Remove(match.Index, match.Length);
				if(addNewLine) buffer.Insert(match.Index, "=====" + match.Groups[2].Value + "=====\n");
				else buffer.Insert(match.Index, "=====" + match.Groups[2].Value + "=====");
				match = H4Regex.Match(buffer.ToString(), match.Index + 1);
			}

			// Lists
			buffer.Replace("
    ", "
      "); buffer.Replace("
    ", "
"); buffer.Replace("
    ", "
      "); buffer.Replace("
    ", "
"); buffer.Replace("
  • ", "
  • "); buffer.Replace("
  • ", ""); ProcessLists(buffer); // Page Link match = PageLinkRegex.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; if(match.Groups[2].Value == @"target=""_blank"" ") insertion += "^"; string decoded = UrlDecode(match.Groups[3].Value); insertion += (decoded.StartsWith(" ") ? "++" : "") + decoded.Trim(); if(match.Groups[6].Value != decoded) insertion += "|" + match.Groups[6].Value; insertion += "]"; buffer.Insert(match.Index, insertion); match = PageLinkRegex.Match(buffer.ToString(), match.Index + 1); } // Page Link IE match = PageLinkRegexIE.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; if(match.Groups[3].Value == " target=_blank") insertion += "^"; string page = match.Groups[2].Value.Substring(match.Groups[2].Value.LastIndexOf("/") + 1); page = page.Substring(0, page.Length - 5); // Remove .ashx page = UrlDecode(page); insertion += page; if(match.Groups[4].Value != page) insertion += "|" + match.Groups[4].Value; insertion += "]"; buffer.Insert(match.Index, insertion); match = PageLinkRegexIE.Match(buffer.ToString(), match.Index + 1); } // Unknown Link match = UnknownLinkRegex.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; if(match.Groups[2].Value == @"target=""_blank"" ") insertion += "^"; string decoded = UrlDecode(match.Groups[3].Value); insertion += decoded; if(match.Groups[6].Value != decoded) insertion += "|" + match.Groups[6].Value; insertion += "]"; buffer.Insert(match.Index, insertion); match = UnknownLinkRegex.Match(buffer.ToString(), match.Index + 1); } // Unknown Link IE match = UnknownLinkRegexIE.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; if(match.Groups[3].Value == " target=_blank") insertion += "^"; string page = match.Groups[2].Value.Substring(match.Groups[2].Value.LastIndexOf("/") + 1); page = page.Substring(0, page.Length - 5); // Remove .ashx page = UrlDecode(page); insertion += page; if(match.Groups[4].Value != page) insertion += "|" + match.Groups[4].Value; insertion += "]"; buffer.Insert(match.Index, insertion); match = UnknownLinkRegexIE.Match(buffer.ToString(), match.Index + 1); } // File Link match = FileLinkRegex.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; if(match.Groups[2].Value == @"target=""_blank"" ") insertion += "^"; if(match.Groups[3].Value != "") insertion += "{UP:" + match.Groups[4].Value + "}" + UrlDecode(match.Groups[6].Value); else insertion += "{UP}" + UrlDecode(match.Groups[6].Value); if(!match.Groups[10].Value.StartsWith("GetFile.aspx") && !match.Groups[10].Value.StartsWith("{UP")) insertion += "|" + match.Groups[10]; insertion += "]"; buffer.Insert(match.Index, insertion); match = FileLinkRegex.Match(buffer.ToString(), match.Index + 1); } // File Link IE match = FileOrAttachmentLinkRegexIE.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; if(match.Groups[8].Value == " target=_blank") insertion += "^"; if(match.Groups[3].Value != "") insertion += "{UP:" + match.Groups[4].Value; else insertion += "{UP"; if(match.Groups[6].Value != "") insertion += "(" + UrlDecode(match.Groups[6].Value) + ")"; insertion += "}"; insertion += UrlDecode(match.Groups[7].Value); if(!match.Groups[9].Value.StartsWith("GetFile.aspx") && !match.Groups[9].Value.StartsWith("{UP")) insertion += "|" + match.Groups[9].Value; insertion += "]"; buffer.Insert(match.Index, insertion); match = FileOrAttachmentLinkRegexIE.Match(buffer.ToString(), match.Index + 1); } // Attachment Link match = AttachmentLinkRegex.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; if(match.Groups[2].Value == @"target=""_blank"" ") insertion += "^"; // if the provider is not present "{UP" is added without ":providername" insertion += match.Groups[4].Value == "" ? "{UP" : "{UP:" + match.Groups[4].Value; insertion += "(" + UrlDecode(match.Groups[6].Value) + ")}" + UrlDecode(match.Groups[8].Value); if(!match.Groups[12].Value.StartsWith("GetFile.aspx") && !match.Groups[12].Value.StartsWith("{UP")) insertion += "|" + match.Groups[12]; insertion += "]"; buffer.Insert(match.Index, insertion); match = AttachmentLinkRegex.Match(buffer.ToString(), match.Index + 1); } // External Link match = ExternalLinkRegex.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; //if(match.Groups[6].Value == @"target=""_blank""") insertion += "^"; string url = match.Groups[2].Value; if(url.StartsWith(Settings.MainUrl)) url = url.Substring(Settings.MainUrl.Length); insertion += url; if(match.Groups[7].Value != match.Groups[2].Value && match.Groups[7].Value + "/" != match.Groups[2].Value) insertion += "|" + match.Groups[7].Value; insertion += "]"; buffer.Insert(match.Index, insertion); match = ExternalLinkRegex.Match(buffer.ToString(), match.Index + 1); } // External Link IE match = ExternalLinkRegexIE.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; string url = match.Groups[2].Value; if(url.StartsWith(Settings.MainUrl)) url = url.Substring(Settings.MainUrl.Length); insertion += url; if(match.Groups[4].Value != match.Groups[2].Value.TrimEnd('/')) insertion += "|" + match.Groups[4].Value; insertion += "]"; buffer.Insert(match.Index, insertion); match = ExternalLinkRegexIE.Match(buffer.ToString(), match.Index + 1); } // Internal Link match = InternalLinkRegex.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; if(match.Groups[1].Value == @"target=""_blank""") insertion += "^"; string url = match.Groups[2].Value; if(url.StartsWith(Settings.MainUrl)) url = url.Substring(Settings.MainUrl.Length); insertion += url; string decoded = UrlDecode(match.Groups[6].Value); if(match.Groups[2].Value != decoded) insertion += "|" + decoded; insertion += "]"; buffer.Insert(match.Index, insertion); match = InternalLinkRegex.Match(buffer.ToString(), match.Index + 1); } // Internal Link IE match = InternalLinkRegexIE.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; if(match.Groups[3].Value == " target=_blank") insertion += "^"; string url = match.Groups[2].Value; if(url.StartsWith(Settings.MainUrl)) url = url.Substring(Settings.MainUrl.Length); insertion += url; string decoded = UrlDecode(match.Groups[4].Value); if(decoded != match.Groups[2].Value) insertion += "|" + decoded; insertion += "]"; buffer.Insert(match.Index, insertion); match = InternalLinkRegexIE.Match(buffer.ToString(), match.Index + 1); } // Anchor Link match = AnchorLinkRegex.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; if(match.Groups[3].Value != "") insertion += "^"; insertion += "#"; insertion += match.Groups[1].Value; string val = match.Groups[6].Value.ToLowerInvariant().Replace(" ", ""); if(val != "") insertion += "|" + val; insertion += "]"; buffer.Insert(match.Index, insertion); match = AnchorLinkRegex.Match(buffer.ToString(), match.Index + 1); } // System Link (.aspx) match = SystemLinkRegex.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; if(match.Groups[1].Value == @"target=""_blank""") insertion += "^"; insertion += match.Groups[2].Value; string decoded = UrlDecode(match.Groups[6].Value); if(match.Groups[2].Value != decoded) insertion += "|" + decoded; insertion += "]"; buffer.Insert(match.Index, insertion); match = SystemLinkRegex.Match(buffer.ToString(), match.Index + 1); } // System Link IE match = SystemLinkRegexIE.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; if(match.Groups[3].Value == " target=_blank") insertion += "^"; string url = match.Groups[2].Value.Substring(match.Groups[2].Value.LastIndexOf("/") + 1); insertion += url; string decoded = UrlDecode(match.Groups[4].Value); if(decoded != url) insertion += "|" + decoded; insertion += "]"; buffer.Insert(match.Index, insertion); match = SystemLinkRegexIE.Match(buffer.ToString(), match.Index + 1); } // Email Link match = EmailLinkRegex.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; if(match.Groups[2].Value == @"target=""_blank"" ") insertion += "^"; insertion += match.Groups[3].Value; string decoded = UrlDecode(match.Groups[6].Value); if(decoded != match.Groups[3].Value) insertion += "|" + decoded; insertion += "]"; buffer.Insert(match.Index, insertion); match = EmailLinkRegex.Match(buffer.ToString(), match.Index + 1); } // Email Link IE match = EmailLinkRegexIE.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; insertion += match.Groups[2].Value.Substring(7); // Remove mailto: string decoded = UrlDecode(match.Groups[4].Value); if(decoded != match.Groups[2].Value.Substring(7)) insertion += "|" + decoded; insertion += "]"; buffer.Insert(match.Index, insertion); match = EmailLinkRegexIE.Match(buffer.ToString(), match.Index + 1); } // Anchor match = AnchorRegex.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); buffer.Insert(match.Index, "[anchor|#" + match.Groups[1].Value + "]"); match = AnchorRegex.Match(buffer.ToString(), match.Index + 1); } // Image Left/Right/Auto match = ImageLeftRightRegex.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); string insertion = "["; if(match.Groups[1].Value.StartsWith("", ""); buffer.Replace("

    ", "
    "); buffer.Replace("

    ", ""); buffer.Replace("

    ", "
    "); //
    //sb.Replace("

    ", "\r\n\r\n"); //sb.Replace("\r\n
    ", "\r\n\r\n"); //sb.Replace("
    ", "{BR}\r\n"); buffer.Replace("
    ", "\r\n"); // Fix line breaks in IE buffer.Replace("\r\n\r\n\r\n=====", "\r\n\r\n====="); buffer.Replace("\r\n\r\n\r\n====", "\r\n\r\n===="); buffer.Replace("\r\n\r\n\r\n===", "\r\n\r\n==="); buffer.Replace("\r\n\r\n\r\n==", "\r\n\r\n=="); buffer.Replace("\r\n\r\n\r\n----", "\r\n\r\n----"); buffer.Replace("\r\n\r\n\r\n* ", "\r\n\r\n* "); buffer.Replace("\r\n\r\n\r\n# ", "\r\n\r\n# "); match = SingleNewLine.Match(buffer.ToString()); while(match.Success) { buffer.Remove(match.Index, match.Length); buffer.Insert(match.Index, "{BR}"); match = SingleNewLine.Match(buffer.ToString(), match.Index); } buffer.Replace("<", "<"); buffer.Replace(">", ">"); string result = buffer.ToString(); return result.TrimEnd('\r', '\n'); } /// /// Processes unordered and ordered lists. /// /// The string builder buffer. private static void ProcessLists(StringBuilder sb) { string temp = null; int ulIndex = -1; int olIndex = -1; int lastIndex = 0; do { temp = sb.ToString().ToLowerInvariant(); ulIndex = temp.IndexOf("
      ", lastIndex); olIndex = temp.IndexOf("
        ", lastIndex); if(ulIndex != -1 || olIndex != -1) { // 1. Find tag pairs // 2. Extract block and remove it from SB // 3. Process block and generate WikiMarkup output // 4. Insert new markup in SB at original position if(ulIndex != -1 && (ulIndex < olIndex || olIndex == -1)) { // Find a UL block int openIndex, closeIndex; if(FindTagsPair(sb, "
          ", "
        ", lastIndex, out openIndex, out closeIndex)) { string section = sb.ToString().Substring(openIndex, closeIndex - openIndex + 5); sb.Remove(openIndex, closeIndex - openIndex + 5); string result = ProcessList(false, section); sb.Insert(openIndex, result); // Skip processed data lastIndex = openIndex + result.Length; } else lastIndex += 4; continue; } if(olIndex != -1 && (olIndex < ulIndex || ulIndex == -1)) { // Find a OL block int openIndex, closeIndex; if(FindTagsPair(sb, "
          ", "
        ", lastIndex, out openIndex, out closeIndex)) { string section = sb.ToString().Substring(openIndex, closeIndex - openIndex + 5); sb.Remove(openIndex, closeIndex - openIndex + 5); string result = ProcessList(true, section); sb.Insert(openIndex, result); // Skip processed data lastIndex = openIndex + result.Length; } else lastIndex += 4; continue; } } } while(ulIndex != -1 || olIndex != -1); } /// /// Processes an unordered or ordered list. /// /// true for an ordered list, false for an unordered list. /// The input HTML. /// The output WikiMarkup. private static string ProcessList(bool ordered, string html) { HtmlList list = BuildListTree(ordered, html); string wikiMarkup = BuildListWikiMarkup(list, ""); return wikiMarkup.TrimEnd('\r', '\n'); } /// /// Builds the WikiMarkup for a list. /// /// The root list. /// The previous bullets, used at upper levels. /// The WikiMarkup. private static string BuildListWikiMarkup(HtmlList list, string previousBullets) { previousBullets = previousBullets + (list.Type == HtmlListType.Ordered ? "#" : "*"); StringBuilder sb = new StringBuilder(500); foreach(HtmlListElement elem in list.Elements) { sb.Append(previousBullets); sb.Append(" "); sb.Append(elem.Text); sb.Append("\r\n"); if(elem.SubList != null) { sb.Append(BuildListWikiMarkup(elem.SubList, previousBullets)); } } // Remove empty lines in the middle of the list string raw = sb.ToString().Replace("\r", ""); string[] lines = raw.Split(new char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); return string.Join("\r\n", lines) + (raw.EndsWith("\r\n") || raw.EndsWith("\n") ? "\r\n" : ""); } /// /// Builds a list tree. /// /// true for an ordered list. /// The input HTML. /// The list tree. private static HtmlList BuildListTree(bool ordered, string html) { string[] tags = new string[] { "
          ", "
            ", "
          • ", "
          • ", "
          ", "
        " }; // IE seems to add new-lines after some elements // \r\n are never added by the Formatter, so it is safe to remove all them html = html.Replace("\r", ""); html = html.Replace("\n", ""); int index = 0; int lastOpenListItemIndex = 0; int stringFound; HtmlList root = new HtmlList(ordered ? HtmlListType.Ordered : HtmlListType.Unordered); HtmlList currentList = root; do { index = FirstIndexOfAny(html, index, out stringFound, tags); if(index != -1) { switch(stringFound) { case 0: //
          // Unless at the beginning, start a new sub-list if(index != 0) { // Set text of current element (sub-lists are added into the previous item) if(lastOpenListItemIndex != -1) { string text = html.Substring(lastOpenListItemIndex + 4, index - (lastOpenListItemIndex + 4)); currentList.Elements[currentList.Elements.Count - 1].Text = text; } currentList.Elements[currentList.Elements.Count - 1].SubList = new HtmlList(HtmlListType.Ordered); currentList = currentList.Elements[currentList.Elements.Count - 1].SubList; } break; case 1: //
            // Unless at the beginning, start a new sub-list if(index != 0) { // Set text of current element (sub-lists are added into the previous item) if(lastOpenListItemIndex != -1) { string text = html.Substring(lastOpenListItemIndex + 4, index - (lastOpenListItemIndex + 4)); currentList.Elements[currentList.Elements.Count - 1].Text = text; } currentList.Elements[currentList.Elements.Count - 1].SubList = new HtmlList(HtmlListType.Unordered); currentList = currentList.Elements[currentList.Elements.Count - 1].SubList; } break; case 2: //
          • lastOpenListItemIndex = index; currentList.Elements.Add(new HtmlListElement()); break; case 3: //
          • // If lastOpenListItemIndex != -1 (i.e. there are no sub-lists) extract item text and set it to the last list element // Otherwise, navigate upwards to parent list (if any) if(lastOpenListItemIndex != -1) { string text = html.Substring(lastOpenListItemIndex + 4, index - (lastOpenListItemIndex + 4)); currentList.Elements[currentList.Elements.Count - 1].Text = text; } else { currentList = FindAnchestor(root, currentList); } break; case 4: //
          // Close last open list (nothing to do) lastOpenListItemIndex = -1; break; case 5: //
        // Close last open list (nothing to do) lastOpenListItemIndex = -1; break; default: throw new NotSupportedException(); } index++; } } while(index != -1); return root; } /// /// Finds the anchestor of a list in a tree. /// /// The root of the tree. /// The current element. /// The anchestor of current. private static HtmlList FindAnchestor(HtmlList root, HtmlList current) { foreach(HtmlListElement elem in root.Elements) { if(elem.SubList == current) return root; else if(elem.SubList != null) { HtmlList temp = FindAnchestor(elem.SubList, current); if(temp != null) return temp; } } //return root; return null; } /// /// Finds the index of the first string. /// /// The input string. /// The start index. /// The index (in strings) of the string found. /// The strings to search for. /// The index of the string found in input. private static int FirstIndexOfAny(string input, int startIndex, out int stringFound, params string[] strings) { if(startIndex > input.Length) { stringFound = -1; return -1; } int[] indices = new int[strings.Length]; for(int i = 0; i < strings.Length; i++) { indices[i] = input.IndexOf(strings[i], startIndex); } bool nothingFound = true; int min = int.MaxValue; stringFound = -1; for(int i = 0; i < indices.Length; i++) { if(indices[i] != -1 && indices[i] < min) { nothingFound = false; min = indices[i]; stringFound = i; } } if(nothingFound) return -1; else return min; } /// /// Finds the position of a matched tag pair. /// /// The string builder buffer. /// The open tag. /// The close tag. /// The start index. /// The open index. /// The (matched/balanced) close index. /// true if a tag pair is found, false otherwise. private static bool FindTagsPair(StringBuilder sb, string openTag, string closeTag, int startIndex, out int openIndex, out int closeIndex) { // Find indexes for all open and close tags // Identify the smallest tag tree string text = sb.ToString(); List openIndexes = new List(10); List closeIndexes = new List(10); if(startIndex >= sb.Length) { openIndex = -1; closeIndex = -1; return false; } int currentOpenIndex = startIndex - 1; int currentCloseIndex = startIndex - 1; do { currentOpenIndex = text.IndexOf(openTag, currentOpenIndex + 1); if(currentOpenIndex != -1) openIndexes.Add(currentOpenIndex); } while(currentOpenIndex != -1); // Optimization if(openIndexes.Count == 0) { openIndex = -1; closeIndex = -1; return false; } do { currentCloseIndex = text.IndexOf(closeTag, currentCloseIndex + 1); if(currentCloseIndex != -1) closeIndexes.Add(currentCloseIndex); } while(currentCloseIndex != -1); // Optimization if(closeIndexes.Count == 0) { openIndex = -1; closeIndex = -1; return false; } // Condition needed for further processing if(openIndexes.Count != closeIndexes.Count) { openIndex = -1; closeIndex = -1; return false; } // Build a sorted list of tags List tags = new List(openIndexes.Count * 2); foreach(int index in openIndexes) { tags.Add(new Tag() { Type = TagType.Open, Index = index }); } foreach(int index in closeIndexes) { tags.Add(new Tag() { Type = TagType.Close, Index = index }); } tags.Sort((x, y) => { return x.Index.CompareTo(y.Index); }); // Find shortest closed tree int openCount = 0; int firstOpenIndex = -1; foreach(Tag tag in tags) { if(tag.Type == TagType.Open) { openCount++; if(firstOpenIndex == -1) firstOpenIndex = tag.Index; } else openCount--; if(openCount == 0) { openIndex = firstOpenIndex; closeIndex = tag.Index; return true; } } openIndex = -1; closeIndex = -1; return false; } /// /// Prepares a link URL. /// /// The raw URL, as generated by the formatter. /// The prepared link URL, suitable for formatting. private static string PrepareLink(string rawUrl) { rawUrl = UrlDecode(rawUrl); string mainUrl = GetCurrentRequestMainUrl().ToLowerInvariant(); if(rawUrl.ToLowerInvariant().StartsWith(mainUrl)) rawUrl = rawUrl.Substring(mainUrl.Length); if(rawUrl.ToLowerInvariant().EndsWith(".ashx")) return rawUrl.Substring(0, rawUrl.Length - 5); int extensionIndex = rawUrl.ToLowerInvariant().IndexOf(".ashx#"); if(extensionIndex != -1) { return rawUrl.Remove(extensionIndex, 5); } if(rawUrl.StartsWith("GetFile.aspx")) { // Look for File and Provider parameter (v2 and v3) string provider, page, file; GetProviderAndFileAndPage(rawUrl, out provider, out page, out file); if(provider == null && page == null) return "{UP}" + file; else if(page != null) { return "{UP" + (provider != null ? ":" + provider : "") + "(" + page + ")}" + file; } else { return "{UP" + (provider != null ? ":" + provider : "") + "}" + file; } } return rawUrl; } /// /// Prepares an image URL. /// /// The raw URL, as generated by the formatter. /// The prepared image URL, suitable for formatting. private static string PrepareImageUrl(string rawUrl) { rawUrl = UrlDecode(rawUrl); string mainUrl = GetCurrentRequestMainUrl().ToLowerInvariant(); if(rawUrl.ToLowerInvariant().StartsWith(mainUrl)) rawUrl = rawUrl.Substring(mainUrl.Length); if(rawUrl.StartsWith("GetFile.aspx")) { // Look for File and Provider parameter (v2 and v3) string provider, page, file; GetProviderAndFileAndPage(rawUrl, out provider, out page, out file); if(provider == null) return "{UP" + (page != null ? "(" + page + ")" : "") + "}" + file; else return "{UP:" + provider + (page != null ? "(" + page + ")" : "") + "}" + file; } else return rawUrl; } /// /// Gets the current request main URL, such as http://www.server.com/Wiki/. /// /// The URL. private static string GetCurrentRequestMainUrl() { string url = HttpContext.Current.Request.Url.GetLeftPart(UriPartial.Path); if(!url.EndsWith("/")) { int index = url.LastIndexOf("/"); if(index != -1) url = url.Substring(0, index + 1); } return url; } /// /// Gets the provider and file of a link or URL. /// /// The raw URL, in the format ...?Provider=PROVIDER[&IsPageAttachment=1&Page=PAGE]&File=FILE. /// The provider, or null. /// The page (for attachments), or null. /// The file. private static void GetProviderAndFileAndPage(string rawUrl, out string provider, out string page, out string file) { rawUrl = rawUrl.Substring(rawUrl.IndexOf("?") + 1).Replace("&", "&"); string[] chunks = rawUrl.Split('&'); provider = null; page = null; file = null; foreach(string chunk in chunks) { if(chunk.StartsWith("Provider=")) { provider = chunk.Substring(9); } if(chunk.StartsWith("File=")) { file = chunk.Substring(5); } if(chunk.StartsWith("Page=")) { page = chunk.Substring(5); } } } /// /// Decodes a URL-encoded string, even if it was encoded multiple times. /// /// The input encoded string. /// The decoded string. /// It seems that in some cases URL encoding occurs multiple times, /// one on the server and one on the client. private static string UrlDecode(string input) { return Tools.UrlDecode(input); //return Tools.UrlDecode(Tools.UrlDecode(input)); } } /// /// Represents an open or close tag. /// public class Tag { /// /// Gets or sets the tag type. /// public TagType Type { get; set; } /// /// Gets or sets the tag index. /// public int Index { get; set; } } /// /// Lists tag types. /// public enum TagType { /// /// An open tag. /// Open, /// /// A close tag. /// Close } /// /// Represents a HTML list. /// public class HtmlList { /// /// Initializes a new instance of the class. /// /// The list type. public HtmlList(HtmlListType type) { Type = type; Elements = new List(10); } /// /// Gets or sets the list type. /// public HtmlListType Type { get; set; } /// /// Gets or sets the list elements. /// public List Elements { get; set; } } /// /// Represents a HTML list element. /// public class HtmlListElement { /// /// Gets or sets the text. /// public string Text { get; set; } /// /// Gets or sets the sub-list. /// public HtmlList SubList { get; set; } } /// /// Lists HTML list types. /// public enum HtmlListType { /// /// An ordered list. /// Ordered, /// /// An unordered list. /// Unordered } }