using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
using System.Xml;
using System.IO;
namespace ScrewTurn.Wiki {
///
/// Implements reverse formatting methods (HTML->WikiMarkup).
///
public static class ReverseFormatter {
private static readonly Regex WebkitDivRegex = new Regex(@"(
|
)()? ()?(((.)*?) )?(| |
)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
private static readonly Regex ImageInlineRegex = new Regex(@"(
)?
()?", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
private static readonly Regex HRRegex = new Regex(@"
\s*
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
private static readonly Regex BoxRegex = new Regex(@"
((.|\n|\r)*?)
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
private static readonly Regex CodeRegex = new Regex(@"
((.|\n|\r)*?)
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
private static readonly Regex PreRegex = new Regex(@"
((.|\n|\r)*?)
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
private static readonly Regex SingleBR = new Regex(@"(?)
(?!
)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
private static readonly Regex SingleNewLine = new Regex(@"(?(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
// Title=1 - Href=2 - Target=3 - Content=4 --- Href=http://www.server.com/Spaced%20Page.ashx
private static readonly Regex UnknownLinkRegexIE = new Regex(@"
(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
// Title=1 - ProviderGlobal=3 - Provider=4 - Page=6 - File=7 - Target=8 - Content=9
private static readonly Regex FileOrAttachmentLinkRegexIE = new Regex(@"
(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
// Title=1 - Href=2 - Target=3 - Content=4 --- Href=http://www.server.com/Register.aspx
private static readonly Regex SystemLinkRegexIE = new Regex(@"
(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
// Title=1 - Href=2 - Target=3 - Content=4
private static readonly Regex ExternalLinkRegexIE = new Regex(@"
(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
// Title=1 - Href=2 - Target=3 - Content=4
private static readonly Regex InternalLinkRegexIE = new Regex(@"
(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
// AnchorLinkRegexIE would be equal to InternalLinkRegex - no need for it
// Title=1 - Href=2 - Target=3 - Content=4
private static readonly Regex EmailLinkRegexIE = new Regex(@"
(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
// DivClass=1 - A=2 - ATitle=3 - AHref=4 - ATarget=5 - ImageAlt=6 - ImageSrc=7 - P=9 - PContent=10 --- Href/Src=http://www.server.com/Blah.ashx/GetFile.aspx...
private static readonly Regex ImageLeftRightRegexIE = new Regex(@"
(
)?
()?(\r\n
(.*?)
)?
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
// A=1 - ATitle=2 - AHref=3 - ATarget=4 - ImageAlt=5 - ImageSrc=6 - P=8 - PContent=9 --- Href/Src=http://www.server.com/Blah.ashx/GetFile.aspx...
private static readonly Regex ImageAutoRegexIE = new Regex(@"
\r\n\r\n\r\n()? ()?(\r\n(.*?) )? |
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
// A=1 - ATitle=2 - AHref=3 - ATarget=4 - ImageAlt=5 - ImageSrc=6 --- Href/Src=http://www.server.com/Blah.ashx/GetFile.aspx...
private static readonly Regex ImageInlineRegexIE = new Regex(@"(
)?
()?", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
private static List
listText= new List();
//private static string result = "";
private static string processImage(XmlNode node) {
string result = "";
if(node.Attributes.Count != 0) {
foreach(XmlAttribute attName in node.Attributes) {
if(attName.Name.ToString() == "src") {
string[] path = attName.Value.ToString().Split('=');
//result += "|" + processChild(node.ChildNodes);
result += "{" + "UP(" + path[1].Split('&')[0] + ")}" + path[2];
}
}
}
return result;
}
///
/// Processes the child Image.
///
/// The nodes.
///
private static string processChildImage(XmlNodeList nodes) {
string image ="";
string p ="";
string url = "";
string result ="";
foreach(XmlNode node in nodes) {
if(node.Name.ToLowerInvariant() == "img")
image += processImage(node);
else if(node.Name.ToLowerInvariant() == "p") {
p += "|" + processChild(node.ChildNodes) + "|";
}
else if(node.Name.ToLowerInvariant() == "a") {
string link = "";
string target = "";
if(node.Attributes.Count != 0) {
XmlAttributeCollection attribute = node.Attributes;
foreach(XmlAttribute attName in attribute) {
if(attName.Value.ToString() == "_blank")
target += "^";
if(attName.Name.ToString() == "href")
link += attName.Value.ToString();
}
}
image += processImage(node.LastChild);
url = "|" + target + link;
}
}
result = p+image+ url;
return result;
}
///
/// Processes the child.
///
/// The nodes.
///
private static string processChild(XmlNodeList nodes) {
string result = "";
foreach(XmlNode node in nodes) {
bool anchor = false;
if(node.NodeType == XmlNodeType.Text) {
result += node.Value;
//string result = "";
}
else {
switch(node.Name.ToLowerInvariant()) {
case "b":
case "strong":
result += ("'''" + processChild(node.ChildNodes) + "'''");
break;
case "s":
result += ("--" + processChild(node.ChildNodes) + "--");
break;
case "em":
case "i":
result += ("''" + processChild(node.ChildNodes) + "''");
break;
case "u":
result += ("__" + processChild(node.ChildNodes) + "__");
break;
//break;
case "h1":
result += ("==" + processChild(node.ChildNodes) + "==");
break;
//break;
case "h2":
result += ("===" + processChild(node.ChildNodes) + "===");
break;
//break;
case "h3":
result += ("====" + processChild(node.ChildNodes) + "====");
break;
//break;
case "h4":
result += ("=====" + processChild(node.ChildNodes) + "=====");
break;
case "pre":
result += ("(((" + processChild(node.ChildNodes) + ")))");
break;
case "code":
result += ("@@" + processChild(node.ChildNodes) + "@@");
break;
case "hr":
case "hr /":
result += ("----" + processChild(node.ChildNodes));
break;
case "\t":
result += (":" + processChild(node.ChildNodes));
break;
case "éé":
result += ("~~~~" + processChild(node.ChildNodes));
break;
case "span":
if(node.Attributes.Count != 0) {
XmlAttributeCollection attribute = node.Attributes;
foreach(XmlAttribute attName in attribute) {
if(attName.Value.ToString() == "italic")
result += "''" + processChild(node.ChildNodes) + "''";
}
}
break;
case "\n":
case "br":
result += ("{br}" + processChild(node.ChildNodes));
break;
case "ol":
result += "{br}" + processChild(node.ChildNodes) + "{br}";
break;
case "ul":
result += "{br}" + processChild(node.ChildNodes) + "{br}";
break;
case "table":
result += processChild(node.ChildNodes);
break;
case "tbody":
result += processChild(node.ChildNodes);
break;
case "tr":
result += processChild(node.ChildNodes);
break;
case "td":
result += processChild(node.ChildNodes);
break;
case "li":
if (node.ParentNode.Name.ToLowerInvariant() == "ol")
result += ("# " + processChild(node.ChildNodes) + "{br}");
else if (node.ParentNode.Name.ToLowerInvariant() == "ul")
result += ("* " + processChild(node.ChildNodes) + "{br}");
break;
case "sup":
result += ("" + processChild(node.ChildNodes) + "");
break;
case "sub":
result += ("" + processChild(node.ChildNodes) + "");
break;
case "p":
if(node.Attributes.Count != 0) {
XmlAttributeCollection attribute = node.Attributes;
foreach(XmlAttribute attName in attribute) {
if(attName.Value.ToString() == "imagedescription")
result += "";
}
}
break;
case "div":
if(node.Attributes.Count != 0) {
XmlAttributeCollection attribute = node.Attributes;
foreach(XmlAttribute attName in attribute) {
if (attName.Value.ToString() == "box"){
result += "(((" + processChild(node.ChildNodes) + "))){br}";
}
if(attName.Value.ToString() == "imageleft") {
result += "[imageleft" + processChildImage(node.ChildNodes) + "]{br}";
}
if(attName.Value.ToString() == "imageright")
result += "[imageleft" + processChildImage(node.ChildNodes) + "]{br}";
if(attName.Value.ToString() == "imageauto")
result += "[imageleft" + processChildImage(node.ChildNodes) + "]{br}";
}
}
else
result += (processChild(node.ChildNodes) + "{br}");
break;
case "img":
if(node.Attributes.Count != 0) {
XmlAttributeCollection attribute = node.Attributes;
foreach(XmlAttribute attName in attribute) {
//if(attName.Name.ToString() == "src") {
// string[] path = attName.Value.ToString().Split('=');
//result += "|" + processChild(node.ChildNodes);
result += "";
//}
}
}
break;
case "a":
string link="";
string target="";
string title="";
if(node.Attributes.Count != 0) {
XmlAttributeCollection attribute = node.Attributes;
foreach(XmlAttribute attName in attribute) {
if(attName.Name.ToString() != "id".ToLowerInvariant()) {
if(attName.Value.ToString() == "_blank")
target += "^";
if(attName.Name.ToString() == "href")
link += attName.Value.ToString();
if(attName.Name.ToString() == "title")
title += attName.Value.ToString();
}
else{
anchor = true;
result += "[anchor|#" + attName.Value.ToString().ToLowerInvariant() + "]" + processChild(node.ChildNodes);
break;
}
}
if(!anchor)
result += "[" + target + link + "|" + title + "]" + processChild(node.ChildNodes); //"]");
}
break;
default:
result += (node.OuterXml);
break;
}
}
}
return result;
}
///
/// Froms the HTML.
///
/// The reader.
///
private static XmlDocument FromHTML(TextReader reader) {
// setup SgmlReader
Sgml.SgmlReader sgmlReader = new Sgml.SgmlReader();
sgmlReader.DocType = "HTML";
sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
sgmlReader.CaseFolding = Sgml.CaseFolding.ToLower;
sgmlReader.InputStream = reader;
// create document
XmlDocument doc = new XmlDocument();
doc.PreserveWhitespace = true;
doc.XmlResolver = null;
doc.Load(sgmlReader);
return doc;
}
///
/// Reverse formats HTML content into WikiMarkup.
///
/// The input HTML.
/// The corresponding WikiMarkup.
public static string ReverseFormat(string html) {
StringReader strReader = new StringReader(html);
XmlDocument x = FromHTML((TextReader)strReader);
string text = processChild(x.FirstChild.ChildNodes);
//StringBuilder t = new StringBuilder(html);
//result = "";
listText.Clear();
return text;
}
///
/// Reverse formats HTML content into WikiMarkup.
///
/// The input HTML.
/// The corresponding WikiMarkup.
public static string ReverseFormatOld(string html) {
Match match = null;
StringBuilder buffer = new StringBuilder(html);
if(!html.EndsWith("\r\n")) buffer.Append("\r\n");
buffer.Replace("
", "
");
buffer.Replace("
", "
");
buffer.Replace("", "");
buffer.Replace("", "");
buffer.Replace("", "");
buffer.Replace("", "");
buffer.Replace("", "");
buffer.Replace("", "");
buffer.Replace("", "");
buffer.Replace("", "");
buffer.Replace("", "
");
buffer.Replace("
", "");
buffer.Replace("&", "&");
// Escape square brackets, otherwise they're interpreted as links
buffer.Replace("[", "[");
buffer.Replace("]", "]");
// #469: IE seems to randomly add this stuff
buffer.Replace("
\r\n", "
");
buffer.Replace("", "");
buffer.Replace("
", "");
// Temporarily replace
in tags
match = PreRegex.Match(buffer.ToString());
while(match.Success) {
Match subMatch = SingleBR.Match(match.Value);
while(subMatch.Success) {
buffer.Remove(match.Index + subMatch.Index, subMatch.Length);
buffer.Insert(match.Index + subMatch.Index, "");
subMatch = SingleBR.Match(match.Value, subMatch.Index + 1);
}
match = PreRegex.Match(buffer.ToString(), match.Index + 1);
}
buffer.Replace("", "\r\n");
// Code
match = CodeRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
buffer.Insert(match.Index, "{{" + match.Value.Substring(6, match.Length - 13) + "}}");
match = CodeRegex.Match(buffer.ToString(), match.Index + 1);
}
// Pre
// Unescape square brackets
match = PreRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
buffer.Insert(match.Index, "@@" +
match.Value.Substring(5, match.Length - 11).Replace("&", "&").Replace("[", "[").Replace("]", "]") +
"@@");
match = PreRegex.Match(buffer.ToString(), match.Index + 1);
}
// WebkitDivRegex
// Remove all div added by webkit and replace them with \r\n.
match = WebkitDivRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
buffer.Insert(match.Index, "\r\n" + match.Groups[2].Value);
match = WebkitDivRegex.Match(buffer.ToString(), match.Index + 1);
}
// Bold
match = BoldRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
buffer.Insert(match.Index, "'''" + match.Groups[2].Value + "'''");
match = BoldRegex.Match(buffer.ToString(), match.Index + 1);
}
// Italic
match = ItalicRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
buffer.Insert(match.Index, "''" + match.Groups[2].Value + "''");
match = ItalicRegex.Match(buffer.ToString(), match.Index + 1);
}
// Underline
match = UnderlineRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
buffer.Insert(match.Index, "__" + match.Groups[2].Value + "__");
match = UnderlineRegex.Match(buffer.ToString(), match.Index + 1);
}
// Strike
match = StrikeRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
buffer.Insert(match.Index, "--" + match.Groups[2].Value + "--");
match = StrikeRegex.Match(buffer.ToString(), match.Index + 1);
}
// Horizontal Ruler
match = HRRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
buffer.Insert(match.Index, "----");
match = HRRegex.Match(buffer.ToString(), match.Index + 1);
}
// H1
match = H1Regex.Match(buffer.ToString());
while(match.Success) {
char c = buffer[match.Index + match.Length];
bool addNewLine = false;
if(buffer[match.Index + match.Length] != '\n') addNewLine = true;
buffer.Remove(match.Index, match.Length);
if(addNewLine) buffer.Insert(match.Index, "==" + match.Groups[2].Value + "==\n");
else buffer.Insert(match.Index, "==" + match.Groups[2].Value + "==");
match = H1Regex.Match(buffer.ToString(), match.Index + 1);
}
// H2
match = H2Regex.Match(buffer.ToString());
while(match.Success) {
bool addNewLine = false;
if(buffer[match.Index + match.Length] != '\n') addNewLine = true;
buffer.Remove(match.Index, match.Length);
if(addNewLine) buffer.Insert(match.Index, "===" + match.Groups[2].Value + "===\n");
else buffer.Insert(match.Index, "===" + match.Groups[2].Value + "===");
match = H2Regex.Match(buffer.ToString(), match.Index + 1);
}
// H3
match = H3Regex.Match(buffer.ToString());
while(match.Success) {
bool addNewLine = false;
if(buffer[match.Index + match.Length] != '\n') addNewLine = true;
buffer.Remove(match.Index, match.Length);
if(addNewLine) buffer.Insert(match.Index, "====" + match.Groups[2].Value + "====\n");
else buffer.Insert(match.Index, "====" + match.Groups[2].Value + "====");
match = H3Regex.Match(buffer.ToString(), match.Index + 1);
}
// H4
match = H4Regex.Match(buffer.ToString());
while(match.Success) {
bool addNewLine = false;
if(buffer[match.Index + match.Length] != '\n') addNewLine = true;
buffer.Remove(match.Index, match.Length);
if(addNewLine) buffer.Insert(match.Index, "=====" + match.Groups[2].Value + "=====\n");
else buffer.Insert(match.Index, "=====" + match.Groups[2].Value + "=====");
match = H4Regex.Match(buffer.ToString(), match.Index + 1);
}
// Lists
buffer.Replace("
");
buffer.Replace("", "");
buffer.Replace("
", "
");
buffer.Replace("", "");
buffer.Replace("", "");
ProcessLists(buffer);
// Page Link
match = PageLinkRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
if(match.Groups[2].Value == @"target=""_blank"" ") insertion += "^";
string decoded = UrlDecode(match.Groups[3].Value);
insertion += (decoded.StartsWith(" ") ? "++" : "") + decoded.Trim();
if(match.Groups[6].Value != decoded) insertion += "|" + match.Groups[6].Value;
insertion += "]";
buffer.Insert(match.Index, insertion);
match = PageLinkRegex.Match(buffer.ToString(), match.Index + 1);
}
// Page Link IE
match = PageLinkRegexIE.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
if(match.Groups[3].Value == " target=_blank") insertion += "^";
string page = match.Groups[2].Value.Substring(match.Groups[2].Value.LastIndexOf("/") + 1);
page = page.Substring(0, page.Length - 5); // Remove .ashx
page = UrlDecode(page);
insertion += page;
if(match.Groups[4].Value != page) insertion += "|" + match.Groups[4].Value;
insertion += "]";
buffer.Insert(match.Index, insertion);
match = PageLinkRegexIE.Match(buffer.ToString(), match.Index + 1);
}
// Unknown Link
match = UnknownLinkRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
if(match.Groups[2].Value == @"target=""_blank"" ") insertion += "^";
string decoded = UrlDecode(match.Groups[3].Value);
insertion += decoded;
if(match.Groups[6].Value != decoded) insertion += "|" + match.Groups[6].Value;
insertion += "]";
buffer.Insert(match.Index, insertion);
match = UnknownLinkRegex.Match(buffer.ToString(), match.Index + 1);
}
// Unknown Link IE
match = UnknownLinkRegexIE.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
if(match.Groups[3].Value == " target=_blank") insertion += "^";
string page = match.Groups[2].Value.Substring(match.Groups[2].Value.LastIndexOf("/") + 1);
page = page.Substring(0, page.Length - 5); // Remove .ashx
page = UrlDecode(page);
insertion += page;
if(match.Groups[4].Value != page) insertion += "|" + match.Groups[4].Value;
insertion += "]";
buffer.Insert(match.Index, insertion);
match = UnknownLinkRegexIE.Match(buffer.ToString(), match.Index + 1);
}
// File Link
match = FileLinkRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
if(match.Groups[2].Value == @"target=""_blank"" ") insertion += "^";
if(match.Groups[3].Value != "") insertion += "{UP:" + match.Groups[4].Value + "}" + UrlDecode(match.Groups[6].Value);
else insertion += "{UP}" + UrlDecode(match.Groups[6].Value);
if(!match.Groups[10].Value.StartsWith("GetFile.aspx") && !match.Groups[10].Value.StartsWith("{UP")) insertion += "|" + match.Groups[10];
insertion += "]";
buffer.Insert(match.Index, insertion);
match = FileLinkRegex.Match(buffer.ToString(), match.Index + 1);
}
// File Link IE
match = FileOrAttachmentLinkRegexIE.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
if(match.Groups[8].Value == " target=_blank") insertion += "^";
if(match.Groups[3].Value != "") insertion += "{UP:" + match.Groups[4].Value;
else insertion += "{UP";
if(match.Groups[6].Value != "") insertion += "(" + UrlDecode(match.Groups[6].Value) + ")";
insertion += "}";
insertion += UrlDecode(match.Groups[7].Value);
if(!match.Groups[9].Value.StartsWith("GetFile.aspx") && !match.Groups[9].Value.StartsWith("{UP")) insertion += "|" + match.Groups[9].Value;
insertion += "]";
buffer.Insert(match.Index, insertion);
match = FileOrAttachmentLinkRegexIE.Match(buffer.ToString(), match.Index + 1);
}
// Attachment Link
match = AttachmentLinkRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
if(match.Groups[2].Value == @"target=""_blank"" ") insertion += "^";
// if the provider is not present "{UP" is added without ":providername"
insertion += match.Groups[4].Value == "" ? "{UP" : "{UP:" + match.Groups[4].Value;
insertion += "(" + UrlDecode(match.Groups[6].Value) + ")}" + UrlDecode(match.Groups[8].Value);
if(!match.Groups[12].Value.StartsWith("GetFile.aspx") && !match.Groups[12].Value.StartsWith("{UP")) insertion += "|" + match.Groups[12];
insertion += "]";
buffer.Insert(match.Index, insertion);
match = AttachmentLinkRegex.Match(buffer.ToString(), match.Index + 1);
}
// External Link
match = ExternalLinkRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
//if(match.Groups[6].Value == @"target=""_blank""") insertion += "^";
string url = match.Groups[2].Value;
if(url.StartsWith(Settings.MainUrl)) url = url.Substring(Settings.MainUrl.Length);
insertion += url;
if(match.Groups[7].Value != match.Groups[2].Value && match.Groups[7].Value + "/" != match.Groups[2].Value) insertion += "|" + match.Groups[7].Value;
insertion += "]";
buffer.Insert(match.Index, insertion);
match = ExternalLinkRegex.Match(buffer.ToString(), match.Index + 1);
}
// External Link IE
match = ExternalLinkRegexIE.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
string url = match.Groups[2].Value;
if(url.StartsWith(Settings.MainUrl)) url = url.Substring(Settings.MainUrl.Length);
insertion += url;
if(match.Groups[4].Value != match.Groups[2].Value.TrimEnd('/')) insertion += "|" + match.Groups[4].Value;
insertion += "]";
buffer.Insert(match.Index, insertion);
match = ExternalLinkRegexIE.Match(buffer.ToString(), match.Index + 1);
}
// Internal Link
match = InternalLinkRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
if(match.Groups[1].Value == @"target=""_blank""") insertion += "^";
string url = match.Groups[2].Value;
if(url.StartsWith(Settings.MainUrl)) url = url.Substring(Settings.MainUrl.Length);
insertion += url;
string decoded = UrlDecode(match.Groups[6].Value);
if(match.Groups[2].Value != decoded) insertion += "|" + decoded;
insertion += "]";
buffer.Insert(match.Index, insertion);
match = InternalLinkRegex.Match(buffer.ToString(), match.Index + 1);
}
// Internal Link IE
match = InternalLinkRegexIE.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
if(match.Groups[3].Value == " target=_blank") insertion += "^";
string url = match.Groups[2].Value;
if(url.StartsWith(Settings.MainUrl)) url = url.Substring(Settings.MainUrl.Length);
insertion += url;
string decoded = UrlDecode(match.Groups[4].Value);
if(decoded != match.Groups[2].Value) insertion += "|" + decoded;
insertion += "]";
buffer.Insert(match.Index, insertion);
match = InternalLinkRegexIE.Match(buffer.ToString(), match.Index + 1);
}
// Anchor Link
match = AnchorLinkRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
if(match.Groups[3].Value != "") insertion += "^";
insertion += "#";
insertion += match.Groups[1].Value;
string val = match.Groups[6].Value.ToLowerInvariant().Replace(" ", "");
if(val != "") insertion += "|" + val;
insertion += "]";
buffer.Insert(match.Index, insertion);
match = AnchorLinkRegex.Match(buffer.ToString(), match.Index + 1);
}
// System Link (.aspx)
match = SystemLinkRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
if(match.Groups[1].Value == @"target=""_blank""") insertion += "^";
insertion += match.Groups[2].Value;
string decoded = UrlDecode(match.Groups[6].Value);
if(match.Groups[2].Value != decoded) insertion += "|" + decoded;
insertion += "]";
buffer.Insert(match.Index, insertion);
match = SystemLinkRegex.Match(buffer.ToString(), match.Index + 1);
}
// System Link IE
match = SystemLinkRegexIE.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
if(match.Groups[3].Value == " target=_blank") insertion += "^";
string url = match.Groups[2].Value.Substring(match.Groups[2].Value.LastIndexOf("/") + 1);
insertion += url;
string decoded = UrlDecode(match.Groups[4].Value);
if(decoded != url) insertion += "|" + decoded;
insertion += "]";
buffer.Insert(match.Index, insertion);
match = SystemLinkRegexIE.Match(buffer.ToString(), match.Index + 1);
}
// Email Link
match = EmailLinkRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
if(match.Groups[2].Value == @"target=""_blank"" ") insertion += "^";
insertion += match.Groups[3].Value;
string decoded = UrlDecode(match.Groups[6].Value);
if(decoded != match.Groups[3].Value) insertion += "|" + decoded;
insertion += "]";
buffer.Insert(match.Index, insertion);
match = EmailLinkRegex.Match(buffer.ToString(), match.Index + 1);
}
// Email Link IE
match = EmailLinkRegexIE.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
insertion += match.Groups[2].Value.Substring(7); // Remove mailto:
string decoded = UrlDecode(match.Groups[4].Value);
if(decoded != match.Groups[2].Value.Substring(7)) insertion += "|" + decoded;
insertion += "]";
buffer.Insert(match.Index, insertion);
match = EmailLinkRegexIE.Match(buffer.ToString(), match.Index + 1);
}
// Anchor
match = AnchorRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
buffer.Insert(match.Index, "[anchor|#" + match.Groups[1].Value + "]");
match = AnchorRegex.Match(buffer.ToString(), match.Index + 1);
}
// Image Left/Right/Auto
match = ImageLeftRightRegex.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
string insertion = "[";
if(match.Groups[1].Value.StartsWith("
buffer.Replace("
", "\r\n");
// Fix line breaks in IE
buffer.Replace("\r\n\r\n\r\n=====", "\r\n\r\n=====");
buffer.Replace("\r\n\r\n\r\n====", "\r\n\r\n====");
buffer.Replace("\r\n\r\n\r\n===", "\r\n\r\n===");
buffer.Replace("\r\n\r\n\r\n==", "\r\n\r\n==");
buffer.Replace("\r\n\r\n\r\n----", "\r\n\r\n----");
buffer.Replace("\r\n\r\n\r\n* ", "\r\n\r\n* ");
buffer.Replace("\r\n\r\n\r\n# ", "\r\n\r\n# ");
match = SingleNewLine.Match(buffer.ToString());
while(match.Success) {
buffer.Remove(match.Index, match.Length);
buffer.Insert(match.Index, "{BR}");
match = SingleNewLine.Match(buffer.ToString(), match.Index);
}
buffer.Replace("<", "<");
buffer.Replace(">", ">");
string result = buffer.ToString();
return result.TrimEnd('\r', '\n');
}
///
/// Processes unordered and ordered lists.
///
/// The string builder buffer.
private static void ProcessLists(StringBuilder sb) {
string temp = null;
int ulIndex = -1;
int olIndex = -1;
int lastIndex = 0;
do {
temp = sb.ToString().ToLowerInvariant();
ulIndex = temp.IndexOf("", lastIndex);
olIndex = temp.IndexOf("", lastIndex);
if(ulIndex != -1 || olIndex != -1) {
// 1. Find tag pairs
// 2. Extract block and remove it from SB
// 3. Process block and generate WikiMarkup output
// 4. Insert new markup in SB at original position
if(ulIndex != -1 && (ulIndex < olIndex || olIndex == -1)) {
// Find a UL block
int openIndex, closeIndex;
if(FindTagsPair(sb, "", lastIndex, out openIndex, out closeIndex)) {
string section = sb.ToString().Substring(openIndex, closeIndex - openIndex + 5);
sb.Remove(openIndex, closeIndex - openIndex + 5);
string result = ProcessList(false, section);
sb.Insert(openIndex, result);
// Skip processed data
lastIndex = openIndex + result.Length;
}
else lastIndex += 4;
continue;
}
if(olIndex != -1 && (olIndex < ulIndex || ulIndex == -1)) {
// Find a OL block
int openIndex, closeIndex;
if(FindTagsPair(sb, "", "
", lastIndex, out openIndex, out closeIndex)) {
string section = sb.ToString().Substring(openIndex, closeIndex - openIndex + 5);
sb.Remove(openIndex, closeIndex - openIndex + 5);
string result = ProcessList(true, section);
sb.Insert(openIndex, result);
// Skip processed data
lastIndex = openIndex + result.Length;
}
else lastIndex += 4;
continue;
}
}
} while(ulIndex != -1 || olIndex != -1);
}
///
/// Processes an unordered or ordered list.
///
/// true for an ordered list, false for an unordered list.
/// The input HTML.
/// The output WikiMarkup.
private static string ProcessList(bool ordered, string html) {
HtmlList list = BuildListTree(ordered, html);
string wikiMarkup = BuildListWikiMarkup(list, "");
return wikiMarkup.TrimEnd('\r', '\n');
}
///
/// Builds the WikiMarkup for a list.
///
/// The root list.
/// The previous bullets, used at upper levels.
/// The WikiMarkup.
private static string BuildListWikiMarkup(HtmlList list, string previousBullets) {
previousBullets = previousBullets + (list.Type == HtmlListType.Ordered ? "#" : "*");
StringBuilder sb = new StringBuilder(500);
foreach(HtmlListElement elem in list.Elements) {
sb.Append(previousBullets);
sb.Append(" ");
sb.Append(elem.Text);
sb.Append("\r\n");
if(elem.SubList != null) {
sb.Append(BuildListWikiMarkup(elem.SubList, previousBullets));
}
}
// Remove empty lines in the middle of the list
string raw = sb.ToString().Replace("\r", "");
string[] lines = raw.Split(new char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
return
string.Join("\r\n", lines) +
(raw.EndsWith("\r\n") || raw.EndsWith("\n") ? "\r\n" : "");
}
///
/// Builds a list tree.
///
/// true for an ordered list.
/// The input HTML.
/// The list tree.
private static HtmlList BuildListTree(bool ordered, string html) {
string[] tags = new string[] { "", "", "
" };
// IE seems to add new-lines after some elements
// \r\n are never added by the Formatter, so it is safe to remove all them
html = html.Replace("\r", "");
html = html.Replace("\n", "");
int index = 0;
int lastOpenListItemIndex = 0;
int stringFound;
HtmlList root = new HtmlList(ordered ? HtmlListType.Ordered : HtmlListType.Unordered);
HtmlList currentList = root;
do {
index = FirstIndexOfAny(html, index, out stringFound, tags);
if(index != -1) {
switch(stringFound) {
case 0: //
// Unless at the beginning, start a new sub-list
if(index != 0) {
// Set text of current element (sub-lists are added into the previous item)
if(lastOpenListItemIndex != -1) {
string text = html.Substring(lastOpenListItemIndex + 4, index - (lastOpenListItemIndex + 4));
currentList.Elements[currentList.Elements.Count - 1].Text = text;
}
currentList.Elements[currentList.Elements.Count - 1].SubList = new HtmlList(HtmlListType.Ordered);
currentList = currentList.Elements[currentList.Elements.Count - 1].SubList;
}
break;
case 1: //
// Unless at the beginning, start a new sub-list
if(index != 0) {
// Set text of current element (sub-lists are added into the previous item)
if(lastOpenListItemIndex != -1) {
string text = html.Substring(lastOpenListItemIndex + 4, index - (lastOpenListItemIndex + 4));
currentList.Elements[currentList.Elements.Count - 1].Text = text;
}
currentList.Elements[currentList.Elements.Count - 1].SubList = new HtmlList(HtmlListType.Unordered);
currentList = currentList.Elements[currentList.Elements.Count - 1].SubList;
}
break;
case 2: // -
lastOpenListItemIndex = index;
currentList.Elements.Add(new HtmlListElement());
break;
case 3: //
// If lastOpenListItemIndex != -1 (i.e. there are no sub-lists) extract item text and set it to the last list element
// Otherwise, navigate upwards to parent list (if any)
if(lastOpenListItemIndex != -1) {
string text = html.Substring(lastOpenListItemIndex + 4, index - (lastOpenListItemIndex + 4));
currentList.Elements[currentList.Elements.Count - 1].Text = text;
}
else {
currentList = FindAnchestor(root, currentList);
}
break;
case 4: //
// Close last open list (nothing to do)
lastOpenListItemIndex = -1;
break;
case 5: //
// Close last open list (nothing to do)
lastOpenListItemIndex = -1;
break;
default:
throw new NotSupportedException();
}
index++;
}
} while(index != -1);
return root;
}
///
/// Finds the anchestor of a list in a tree.
///
/// The root of the tree.
/// The current element.
/// The anchestor of current.
private static HtmlList FindAnchestor(HtmlList root, HtmlList current) {
foreach(HtmlListElement elem in root.Elements) {
if(elem.SubList == current) return root;
else if(elem.SubList != null) {
HtmlList temp = FindAnchestor(elem.SubList, current);
if(temp != null) return temp;
}
}
//return root;
return null;
}
///
/// Finds the index of the first string.
///
/// The input string.
/// The start index.
/// The index (in strings) of the string found.
/// The strings to search for.
/// The index of the string found in input.
private static int FirstIndexOfAny(string input, int startIndex, out int stringFound, params string[] strings) {
if(startIndex > input.Length) {
stringFound = -1;
return -1;
}
int[] indices = new int[strings.Length];
for(int i = 0; i < strings.Length; i++) {
indices[i] = input.IndexOf(strings[i], startIndex);
}
bool nothingFound = true;
int min = int.MaxValue;
stringFound = -1;
for(int i = 0; i < indices.Length; i++) {
if(indices[i] != -1 && indices[i] < min) {
nothingFound = false;
min = indices[i];
stringFound = i;
}
}
if(nothingFound) return -1;
else return min;
}
///
/// Finds the position of a matched tag pair.
///
/// The string builder buffer.
/// The open tag.
/// The close tag.
/// The start index.
/// The open index.
/// The (matched/balanced) close index.
/// true if a tag pair is found, false otherwise.
private static bool FindTagsPair(StringBuilder sb, string openTag, string closeTag, int startIndex, out int openIndex, out int closeIndex) {
// Find indexes for all open and close tags
// Identify the smallest tag tree
string text = sb.ToString();
List openIndexes = new List(10);
List closeIndexes = new List(10);
if(startIndex >= sb.Length) {
openIndex = -1;
closeIndex = -1;
return false;
}
int currentOpenIndex = startIndex - 1;
int currentCloseIndex = startIndex - 1;
do {
currentOpenIndex = text.IndexOf(openTag, currentOpenIndex + 1);
if(currentOpenIndex != -1) openIndexes.Add(currentOpenIndex);
} while(currentOpenIndex != -1);
// Optimization
if(openIndexes.Count == 0) {
openIndex = -1;
closeIndex = -1;
return false;
}
do {
currentCloseIndex = text.IndexOf(closeTag, currentCloseIndex + 1);
if(currentCloseIndex != -1) closeIndexes.Add(currentCloseIndex);
} while(currentCloseIndex != -1);
// Optimization
if(closeIndexes.Count == 0) {
openIndex = -1;
closeIndex = -1;
return false;
}
// Condition needed for further processing
if(openIndexes.Count != closeIndexes.Count) {
openIndex = -1;
closeIndex = -1;
return false;
}
// Build a sorted list of tags
List tags = new List(openIndexes.Count * 2);
foreach(int index in openIndexes) {
tags.Add(new Tag() { Type = TagType.Open, Index = index });
}
foreach(int index in closeIndexes) {
tags.Add(new Tag() { Type = TagType.Close, Index = index });
}
tags.Sort((x, y) => { return x.Index.CompareTo(y.Index); });
// Find shortest closed tree
int openCount = 0;
int firstOpenIndex = -1;
foreach(Tag tag in tags) {
if(tag.Type == TagType.Open) {
openCount++;
if(firstOpenIndex == -1) firstOpenIndex = tag.Index;
}
else openCount--;
if(openCount == 0) {
openIndex = firstOpenIndex;
closeIndex = tag.Index;
return true;
}
}
openIndex = -1;
closeIndex = -1;
return false;
}
///
/// Prepares a link URL.
///
/// The raw URL, as generated by the formatter.
/// The prepared link URL, suitable for formatting.
private static string PrepareLink(string rawUrl) {
rawUrl = UrlDecode(rawUrl);
string mainUrl = GetCurrentRequestMainUrl().ToLowerInvariant();
if(rawUrl.ToLowerInvariant().StartsWith(mainUrl)) rawUrl = rawUrl.Substring(mainUrl.Length);
if(rawUrl.ToLowerInvariant().EndsWith(".ashx")) return rawUrl.Substring(0, rawUrl.Length - 5);
int extensionIndex = rawUrl.ToLowerInvariant().IndexOf(".ashx#");
if(extensionIndex != -1) {
return rawUrl.Remove(extensionIndex, 5);
}
if(rawUrl.StartsWith("GetFile.aspx")) {
// Look for File and Provider parameter (v2 and v3)
string provider, page, file;
GetProviderAndFileAndPage(rawUrl, out provider, out page, out file);
if(provider == null && page == null) return "{UP}" + file;
else if(page != null) {
return "{UP" + (provider != null ? ":" + provider : "") + "(" + page + ")}" + file;
}
else {
return "{UP" + (provider != null ? ":" + provider : "") + "}" + file;
}
}
return rawUrl;
}
///
/// Prepares an image URL.
///
/// The raw URL, as generated by the formatter.
/// The prepared image URL, suitable for formatting.
private static string PrepareImageUrl(string rawUrl) {
rawUrl = UrlDecode(rawUrl);
string mainUrl = GetCurrentRequestMainUrl().ToLowerInvariant();
if(rawUrl.ToLowerInvariant().StartsWith(mainUrl)) rawUrl = rawUrl.Substring(mainUrl.Length);
if(rawUrl.StartsWith("GetFile.aspx")) {
// Look for File and Provider parameter (v2 and v3)
string provider, page, file;
GetProviderAndFileAndPage(rawUrl, out provider, out page, out file);
if(provider == null) return "{UP" + (page != null ? "(" + page + ")" : "") + "}" + file;
else return "{UP:" + provider + (page != null ? "(" + page + ")" : "") + "}" + file;
}
else return rawUrl;
}
///
/// Gets the current request main URL, such as http://www.server.com/Wiki/.
///
/// The URL.
private static string GetCurrentRequestMainUrl() {
string url = HttpContext.Current.Request.Url.FixHost().GetLeftPart(UriPartial.Path);
if(!url.EndsWith("/")) {
int index = url.LastIndexOf("/");
if(index != -1) url = url.Substring(0, index + 1);
}
return url;
}
///
/// Gets the provider and file of a link or URL.
///
/// The raw URL, in the format ...?Provider=PROVIDER[&IsPageAttachment=1&Page=PAGE]&File=FILE.
/// The provider, or null.
/// The page (for attachments), or null.
/// The file.
private static void GetProviderAndFileAndPage(string rawUrl, out string provider, out string page, out string file) {
rawUrl = rawUrl.Substring(rawUrl.IndexOf("?") + 1).Replace("&", "&");
string[] chunks = rawUrl.Split('&');
provider = null;
page = null;
file = null;
foreach(string chunk in chunks) {
if(chunk.StartsWith("Provider=")) {
provider = chunk.Substring(9);
}
if(chunk.StartsWith("File=")) {
file = chunk.Substring(5);
}
if(chunk.StartsWith("Page=")) {
page = chunk.Substring(5);
}
}
}
///
/// Decodes a URL-encoded string, even if it was encoded multiple times.
///
/// The input encoded string.
/// The decoded string.
/// It seems that in some cases URL encoding occurs multiple times,
/// one on the server and one on the client.
private static string UrlDecode(string input) {
return Tools.UrlDecode(input);
//return Tools.UrlDecode(Tools.UrlDecode(input));
}
}
///
/// Represents an open or close tag.
///
public class Tag {
///
/// Gets or sets the tag type.
///
public TagType Type { get; set; }
///
/// Gets or sets the tag index.
///
public int Index { get; set; }
}
///
/// Lists tag types.
///
public enum TagType {
///
/// An open tag.
///
Open,
///
/// A close tag.
///
Close
}
///
/// Represents a HTML list.
///
public class HtmlList {
///
/// Initializes a new instance of the class.
///
/// The list type.
public HtmlList(HtmlListType type) {
Type = type;
Elements = new List(10);
}
///
/// Gets or sets the list type.
///
public HtmlListType Type { get; set; }
///
/// Gets or sets the list elements.
///
public List Elements { get; set; }
}
///
/// Represents a HTML list element.
///
public class HtmlListElement {
///
/// Gets or sets the text.
///
public string Text { get; set; }
///
/// Gets or sets the sub-list.
///
public HtmlList SubList { get; set; }
}
///
/// Lists HTML list types.
///
public enum HtmlListType {
///
/// An ordered list.
///
Ordered,
///
/// An unordered list.
///
Unordered
}
}