diff --git a/Core-Tests/ReverseFormatterTests.cs b/Core-Tests/ReverseFormatterTests.cs
index 4aaeb7b..4acc7a9 100644
--- a/Core-Tests/ReverseFormatterTests.cs
+++ b/Core-Tests/ReverseFormatterTests.cs
@@ -24,24 +24,24 @@ namespace ScrewTurn.Wiki.Tests {
[TestCase("text", "text")]
[TestCase("
|
)()? ()?(((.)*?) )?(| |
)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
- private static readonly Regex ImageInlineRegex = new Regex(@"(
)?
()?", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
- private static readonly Regex HRRegex = new Regex(@"
\s*
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
- private static readonly Regex BoxRegex = new Regex(@"
((.|\n|\r)*?)
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
- private static readonly Regex CodeRegex = new Regex(@"
((.|\n|\r)*?)
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
- private static readonly Regex PreRegex = new Regex(@"
((.|\n|\r)*?)
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
- private static readonly Regex SingleBR = new Regex(@"(?)
(?!
)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
- private static readonly Regex SingleNewLine = new Regex(@"(?(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
-
- // Title=1 - Href=2 - Target=3 - Content=4 --- Href=http://www.server.com/Spaced%20Page.ashx
- private static readonly Regex UnknownLinkRegexIE = new Regex(@"
(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
-
- // Title=1 - ProviderGlobal=3 - Provider=4 - Page=6 - File=7 - Target=8 - Content=9
- private static readonly Regex FileOrAttachmentLinkRegexIE = new Regex(@"
(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
-
- // Title=1 - Href=2 - Target=3 - Content=4 --- Href=http://www.server.com/Register.aspx
- private static readonly Regex SystemLinkRegexIE = new Regex(@"
(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
-
- // Title=1 - Href=2 - Target=3 - Content=4
- private static readonly Regex ExternalLinkRegexIE = new Regex(@"
(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
-
- // Title=1 - Href=2 - Target=3 - Content=4
- private static readonly Regex InternalLinkRegexIE = new Regex(@"
(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
-
- // AnchorLinkRegexIE would be equal to InternalLinkRegex - no need for it
-
- // Title=1 - Href=2 - Target=3 - Content=4
- private static readonly Regex EmailLinkRegexIE = new Regex(@"
(.*?)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
-
- // DivClass=1 - A=2 - ATitle=3 - AHref=4 - ATarget=5 - ImageAlt=6 - ImageSrc=7 - P=9 - PContent=10 --- Href/Src=http://www.server.com/Blah.ashx/GetFile.aspx...
- private static readonly Regex ImageLeftRightRegexIE = new Regex(@"
(
)?
()?(\r\n
(.*?)
)?
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
-
- // A=1 - ATitle=2 - AHref=3 - ATarget=4 - ImageAlt=5 - ImageSrc=6 - P=8 - PContent=9 --- Href/Src=http://www.server.com/Blah.ashx/GetFile.aspx...
- private static readonly Regex ImageAutoRegexIE = new Regex(@"
\r\n\r\n\r\n()? ()?(\r\n(.*?) )? |
", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
-
- // A=1 - ATitle=2 - AHref=3 - ATarget=4 - ImageAlt=5 - ImageSrc=6 --- Href/Src=http://www.server.com/Blah.ashx/GetFile.aspx...
- private static readonly Regex ImageInlineRegexIE = new Regex(@"(
)?
()?", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
-
-
- private static List
listText= new List();
- //private static string result = "";
-
///
- /// Processes the list.
+ /// Processes order or unorder lists and sublists.
///
/// The nodes.
/// The marker.
- ///
+ /// Valid WikiMarkUp text for the lists
private static string processList(XmlNodeList nodes, string marker) {
string result = "";
string ul = "*";
string ol = "#";
foreach(XmlNode node in nodes) {
- if(node.Name.ToString() == "li"){
+ if(node.Name.ToString() == "li") {
foreach(XmlNode child in node.ChildNodes) {
- switch(child.Name.ToString()){
+ switch(child.Name.ToString()) {
case "ol":
result += processList(child.ChildNodes, marker + ol);
break;
@@ -115,16 +51,15 @@ namespace ScrewTurn.Wiki {
///
/// Processes the image.
///
- /// The node.
- ///
+ /// The node contenent fileName of the image.
+ /// The correct path for wikimarup and image
private static string processImage(XmlNode node) {
string result = "";
if(node.Attributes.Count != 0) {
foreach(XmlAttribute attName in node.Attributes) {
if(attName.Name.ToString() == "src") {
string[] path = attName.Value.ToString().Split('=');
- //result += "|" + processChild(node.ChildNodes);
- result += "{"+"UP("+ path[1].Split('&')[0] +")}" + path[2];
+ result += "{" + "UP(" + path[1].Split('&')[0] + ")}" + path[2];
}
}
}
@@ -134,13 +69,13 @@ namespace ScrewTurn.Wiki {
///
/// Processes the child Image.
///
- /// The nodes.
- ///
+ /// Nodelist from an image.
+ /// The correct WikiMarkup for the images
private static string processChildImage(XmlNodeList nodes) {
- string image ="";
- string p ="";
+ string image = "";
+ string p = "";
string url = "";
- string result ="";
+ string result = "";
foreach(XmlNode node in nodes) {
if(node.Name.ToLowerInvariant() == "img")
image += processImage(node);
@@ -164,15 +99,15 @@ namespace ScrewTurn.Wiki {
url = "|" + target + link;
}
}
- result = p+image+ url;
+ result = p + image + url;
return result;
}
///
/// Processes the child.
///
- /// The nodes.
- ///
+ /// A XmlNodeList .
+ /// The corrispondent WikiMarkup Text
private static string processChild(XmlNodeList nodes) {
string result = "";
foreach(XmlNode node in nodes) {
@@ -243,7 +178,7 @@ namespace ScrewTurn.Wiki {
break;
case "\n":
case "br":
- result += ("{br}" + processChild(node.ChildNodes));
+ result += ("\r\n" + processChild(node.ChildNodes));
break;
case "table":
string image = "";
@@ -251,16 +186,16 @@ namespace ScrewTurn.Wiki {
foreach(XmlAttribute attName in node.Attributes) {
if(attName.Value.ToString() == "imageauto") {
isImage = true;
- image += "[imageauto|" + processChild(node.ChildNodes) + "]{br}";
+ image += "[imageauto|" + processChild(node.ChildNodes) + "]\r\n";
}
}
- if (isImage){
+ if(isImage) {
result += image;
break;
}
else result += processChild(node.ChildNodes);
- break;
- case "tbody":
+ break;
+ case "tbody":
result += processChild(node.ChildNodes);
break;
case "tr":
@@ -270,10 +205,10 @@ namespace ScrewTurn.Wiki {
result += processChild(node.ChildNodes);
break;
case "ol":
- result += processList(node.ChildNodes, "#") + "{br}";
+ result += processList(node.ChildNodes, "#") + "\r\n";
break;
case "ul":
- result += processList(node.ChildNodes, "*") + "{br}";
+ result += processList(node.ChildNodes, "*") + "\r\n";
break;
case "li":
result += processChild(node.ChildNodes);
@@ -297,20 +232,20 @@ namespace ScrewTurn.Wiki {
if(node.Attributes.Count != 0) {
XmlAttributeCollection attribute = node.Attributes;
foreach(XmlAttribute attName in attribute) {
- if (attName.Value.ToString() == "box"){
- result += "(((" + processChild(node.ChildNodes) + "))){br}";
+ if(attName.Value.ToString() == "box") {
+ result += "(((" + processChild(node.ChildNodes) + ")))\r\n";
}
if(attName.Value.ToString() == "imageleft") {
- result += "[imageleft" + processChildImage(node.ChildNodes) + "]{br}";
+ result += "[imageleft" + processChildImage(node.ChildNodes) + "]\r\n";
}
if(attName.Value.ToString() == "imageright")
- result += "[imageright" + processChildImage(node.ChildNodes) + "]{br}";
+ result += "[imageright" + processChildImage(node.ChildNodes) + "]\r\n";
if(attName.Value.ToString() == "imageauto")
- result += "[imageauto" + processChildImage(node.ChildNodes) + "]{br}";
+ result += "[imageauto" + processChildImage(node.ChildNodes) + "]\r\n";
}
}
else
- result += (processChild(node.ChildNodes) + "{br}");
+ result += (processChild(node.ChildNodes) + "\r\n");
break;
case "img":
@@ -320,7 +255,7 @@ namespace ScrewTurn.Wiki {
if(node.ParentNode != null)
if(node.ParentNode.Name.ToLowerInvariant().ToString() == "a")
isLink = true;
- if(node.Attributes.Count != 0){
+ if(node.Attributes.Count != 0) {
foreach(XmlAttribute attName in node.Attributes) {
if(attName.Name.ToString() == "alt")
description = attName.Value.ToString();
@@ -329,18 +264,18 @@ namespace ScrewTurn.Wiki {
}
}
if((!hasClass) && (!isLink))
- result += "[image|" + description + "|" + processImage(node) + "]{br}";
+ result += "[image|" + description + "|" + processImage(node) + "]\r\n";
else if((!hasClass) && (isLink))
result += "[image|" + description + "|" + processImage(node);
else
- result += description+"|"+processImage(node);
+ result += description + "|" + processImage(node);
break;
case "a":
bool isTable = false;
- string link="";
- string target="";
- string title="";
+ string link = "";
+ string target = "";
+ string title = "";
bool childImg = false;
if(node.FirstChild != null) {
if(node.FirstChild.Name.ToLowerInvariant() == "img")
@@ -370,15 +305,13 @@ namespace ScrewTurn.Wiki {
if(title != link)
result += "[" + target + link + "|" + processChild(node.ChildNodes) + "]";
else
- result += "[" + target + link + "|" + "]" + processChild(node.ChildNodes);
+ result += "[" + target + link + "|" + processChild(node.ChildNodes) + "]";
if((!anchor) && (isTable))
result += processChild(node.ChildNodes) + "|" + target + link;
if((!anchor) && (childImg) && (!isTable))
- result += processChild(node.ChildNodes) + "|" + target + link +"]{br}";
+ result += processChild(node.ChildNodes) + "|" + target + link + "]\r\n";
}
- //}
- //else processChild(node.ChildNodes);
- break;
+ break;
default:
result += (node.OuterXml);
@@ -393,9 +326,9 @@ namespace ScrewTurn.Wiki {
/// Froms the HTML.
///
/// The reader.
- ///
+ /// valid XML Document
private static XmlDocument FromHTML(TextReader reader) {
-
+
// setup SgmlReader
Sgml.SgmlReader sgmlReader = new Sgml.SgmlReader();
sgmlReader.DocType = "HTML";
@@ -421,1097 +354,7 @@ namespace ScrewTurn.Wiki {
StringReader strReader = new StringReader(html);
XmlDocument x = FromHTML((TextReader)strReader);
- string text = processChild(x.FirstChild.ChildNodes);
- //StringBuilder t = new StringBuilder(html);
- //result = "";
- listText.Clear();
- return text;
+ return processChild(x.FirstChild.ChildNodes);
}
-
-
- ///
- /// Reverse formats HTML content into WikiMarkup.
- ///
- /// The input HTML.
- /// The corresponding WikiMarkup.
- public static string ReverseFormatOld(string html) {
-
- Match match = null;
- StringBuilder buffer = new StringBuilder(html);
- if(!html.EndsWith("\r\n")) buffer.Append("\r\n");
-
- buffer.Replace("
", "
");
- buffer.Replace("
", "
");
-
- buffer.Replace("", "");
- buffer.Replace("", "");
- buffer.Replace("", "");
- buffer.Replace("", "");
- buffer.Replace("", "");
- buffer.Replace("", "");
- buffer.Replace("", "");
- buffer.Replace("", "");
- buffer.Replace("", "
");
- buffer.Replace("
", "");
-
- buffer.Replace("&", "&");
-
- // Escape square brackets, otherwise they're interpreted as links
- buffer.Replace("[", "[");
- buffer.Replace("]", "]");
-
- // #469: IE seems to randomly add this stuff
- buffer.Replace("
\r\n", "
");
-
- buffer.Replace("", "");
- buffer.Replace("
", "");
-
- // Temporarily replace
in tags
- match = PreRegex.Match(buffer.ToString());
- while(match.Success) {
- Match subMatch = SingleBR.Match(match.Value);
- while(subMatch.Success) {
- buffer.Remove(match.Index + subMatch.Index, subMatch.Length);
- buffer.Insert(match.Index + subMatch.Index, "");
- subMatch = SingleBR.Match(match.Value, subMatch.Index + 1);
- }
- match = PreRegex.Match(buffer.ToString(), match.Index + 1);
- }
- buffer.Replace("", "\r\n");
-
- // Code
- match = CodeRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- buffer.Insert(match.Index, "{{" + match.Value.Substring(6, match.Length - 13) + "}}");
- match = CodeRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Pre
- // Unescape square brackets
- match = PreRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- buffer.Insert(match.Index, "@@" +
- match.Value.Substring(5, match.Length - 11).Replace("&", "&").Replace("[", "[").Replace("]", "]") +
- "@@");
- match = PreRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // WebkitDivRegex
- // Remove all div added by webkit and replace them with \r\n.
- match = WebkitDivRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- buffer.Insert(match.Index, "\r\n" + match.Groups[2].Value);
- match = WebkitDivRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Bold
- match = BoldRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- buffer.Insert(match.Index, "'''" + match.Groups[2].Value + "'''");
- match = BoldRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Italic
- match = ItalicRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- buffer.Insert(match.Index, "''" + match.Groups[2].Value + "''");
- match = ItalicRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Underline
- match = UnderlineRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- buffer.Insert(match.Index, "__" + match.Groups[2].Value + "__");
- match = UnderlineRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Strike
- match = StrikeRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- buffer.Insert(match.Index, "--" + match.Groups[2].Value + "--");
- match = StrikeRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Horizontal Ruler
- match = HRRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- buffer.Insert(match.Index, "----");
- match = HRRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // H1
- match = H1Regex.Match(buffer.ToString());
- while(match.Success) {
- char c = buffer[match.Index + match.Length];
- bool addNewLine = false;
- if(buffer[match.Index + match.Length] != '\n') addNewLine = true;
- buffer.Remove(match.Index, match.Length);
- if(addNewLine) buffer.Insert(match.Index, "==" + match.Groups[2].Value + "==\n");
- else buffer.Insert(match.Index, "==" + match.Groups[2].Value + "==");
- match = H1Regex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // H2
- match = H2Regex.Match(buffer.ToString());
- while(match.Success) {
- bool addNewLine = false;
- if(buffer[match.Index + match.Length] != '\n') addNewLine = true;
- buffer.Remove(match.Index, match.Length);
- if(addNewLine) buffer.Insert(match.Index, "===" + match.Groups[2].Value + "===\n");
- else buffer.Insert(match.Index, "===" + match.Groups[2].Value + "===");
- match = H2Regex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // H3
- match = H3Regex.Match(buffer.ToString());
- while(match.Success) {
- bool addNewLine = false;
- if(buffer[match.Index + match.Length] != '\n') addNewLine = true;
- buffer.Remove(match.Index, match.Length);
- if(addNewLine) buffer.Insert(match.Index, "====" + match.Groups[2].Value + "====\n");
- else buffer.Insert(match.Index, "====" + match.Groups[2].Value + "====");
- match = H3Regex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // H4
- match = H4Regex.Match(buffer.ToString());
- while(match.Success) {
- bool addNewLine = false;
- if(buffer[match.Index + match.Length] != '\n') addNewLine = true;
- buffer.Remove(match.Index, match.Length);
- if(addNewLine) buffer.Insert(match.Index, "=====" + match.Groups[2].Value + "=====\n");
- else buffer.Insert(match.Index, "=====" + match.Groups[2].Value + "=====");
- match = H4Regex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Lists
- buffer.Replace("
");
- buffer.Replace("", "");
- buffer.Replace("
", "
");
- buffer.Replace("", "");
- buffer.Replace("", "");
- ProcessLists(buffer);
-
- // Page Link
- match = PageLinkRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- if(match.Groups[2].Value == @"target=""_blank"" ") insertion += "^";
- string decoded = UrlDecode(match.Groups[3].Value);
- insertion += (decoded.StartsWith(" ") ? "++" : "") + decoded.Trim();
- if(match.Groups[6].Value != decoded) insertion += "|" + match.Groups[6].Value;
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = PageLinkRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Page Link IE
- match = PageLinkRegexIE.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- if(match.Groups[3].Value == " target=_blank") insertion += "^";
- string page = match.Groups[2].Value.Substring(match.Groups[2].Value.LastIndexOf("/") + 1);
- page = page.Substring(0, page.Length - 5); // Remove .ashx
- page = UrlDecode(page);
- insertion += page;
- if(match.Groups[4].Value != page) insertion += "|" + match.Groups[4].Value;
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = PageLinkRegexIE.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Unknown Link
- match = UnknownLinkRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- if(match.Groups[2].Value == @"target=""_blank"" ") insertion += "^";
- string decoded = UrlDecode(match.Groups[3].Value);
- insertion += decoded;
- if(match.Groups[6].Value != decoded) insertion += "|" + match.Groups[6].Value;
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = UnknownLinkRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Unknown Link IE
- match = UnknownLinkRegexIE.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- if(match.Groups[3].Value == " target=_blank") insertion += "^";
- string page = match.Groups[2].Value.Substring(match.Groups[2].Value.LastIndexOf("/") + 1);
- page = page.Substring(0, page.Length - 5); // Remove .ashx
- page = UrlDecode(page);
- insertion += page;
- if(match.Groups[4].Value != page) insertion += "|" + match.Groups[4].Value;
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = UnknownLinkRegexIE.Match(buffer.ToString(), match.Index + 1);
- }
-
- // File Link
- match = FileLinkRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- if(match.Groups[2].Value == @"target=""_blank"" ") insertion += "^";
- if(match.Groups[3].Value != "") insertion += "{UP:" + match.Groups[4].Value + "}" + UrlDecode(match.Groups[6].Value);
- else insertion += "{UP}" + UrlDecode(match.Groups[6].Value);
- if(!match.Groups[10].Value.StartsWith("GetFile.aspx") && !match.Groups[10].Value.StartsWith("{UP")) insertion += "|" + match.Groups[10];
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = FileLinkRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // File Link IE
- match = FileOrAttachmentLinkRegexIE.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- if(match.Groups[8].Value == " target=_blank") insertion += "^";
- if(match.Groups[3].Value != "") insertion += "{UP:" + match.Groups[4].Value;
- else insertion += "{UP";
- if(match.Groups[6].Value != "") insertion += "(" + UrlDecode(match.Groups[6].Value) + ")";
- insertion += "}";
- insertion += UrlDecode(match.Groups[7].Value);
- if(!match.Groups[9].Value.StartsWith("GetFile.aspx") && !match.Groups[9].Value.StartsWith("{UP")) insertion += "|" + match.Groups[9].Value;
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = FileOrAttachmentLinkRegexIE.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Attachment Link
- match = AttachmentLinkRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- if(match.Groups[2].Value == @"target=""_blank"" ") insertion += "^";
- // if the provider is not present "{UP" is added without ":providername"
- insertion += match.Groups[4].Value == "" ? "{UP" : "{UP:" + match.Groups[4].Value;
- insertion += "(" + UrlDecode(match.Groups[6].Value) + ")}" + UrlDecode(match.Groups[8].Value);
- if(!match.Groups[12].Value.StartsWith("GetFile.aspx") && !match.Groups[12].Value.StartsWith("{UP")) insertion += "|" + match.Groups[12];
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = AttachmentLinkRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // External Link
- match = ExternalLinkRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- //if(match.Groups[6].Value == @"target=""_blank""") insertion += "^";
- string url = match.Groups[2].Value;
- if(url.StartsWith(Settings.MainUrl)) url = url.Substring(Settings.MainUrl.Length);
- insertion += url;
- if(match.Groups[7].Value != match.Groups[2].Value && match.Groups[7].Value + "/" != match.Groups[2].Value) insertion += "|" + match.Groups[7].Value;
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = ExternalLinkRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // External Link IE
- match = ExternalLinkRegexIE.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- string url = match.Groups[2].Value;
- if(url.StartsWith(Settings.MainUrl)) url = url.Substring(Settings.MainUrl.Length);
- insertion += url;
- if(match.Groups[4].Value != match.Groups[2].Value.TrimEnd('/')) insertion += "|" + match.Groups[4].Value;
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = ExternalLinkRegexIE.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Internal Link
- match = InternalLinkRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- if(match.Groups[1].Value == @"target=""_blank""") insertion += "^";
- string url = match.Groups[2].Value;
- if(url.StartsWith(Settings.MainUrl)) url = url.Substring(Settings.MainUrl.Length);
- insertion += url;
- string decoded = UrlDecode(match.Groups[6].Value);
- if(match.Groups[2].Value != decoded) insertion += "|" + decoded;
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = InternalLinkRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Internal Link IE
- match = InternalLinkRegexIE.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- if(match.Groups[3].Value == " target=_blank") insertion += "^";
- string url = match.Groups[2].Value;
- if(url.StartsWith(Settings.MainUrl)) url = url.Substring(Settings.MainUrl.Length);
- insertion += url;
- string decoded = UrlDecode(match.Groups[4].Value);
- if(decoded != match.Groups[2].Value) insertion += "|" + decoded;
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = InternalLinkRegexIE.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Anchor Link
- match = AnchorLinkRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- if(match.Groups[3].Value != "") insertion += "^";
- insertion += "#";
- insertion += match.Groups[1].Value;
- string val = match.Groups[6].Value.ToLowerInvariant().Replace(" ", "");
- if(val != "") insertion += "|" + val;
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = AnchorLinkRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // System Link (.aspx)
- match = SystemLinkRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- if(match.Groups[1].Value == @"target=""_blank""") insertion += "^";
- insertion += match.Groups[2].Value;
- string decoded = UrlDecode(match.Groups[6].Value);
- if(match.Groups[2].Value != decoded) insertion += "|" + decoded;
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = SystemLinkRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // System Link IE
- match = SystemLinkRegexIE.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- if(match.Groups[3].Value == " target=_blank") insertion += "^";
- string url = match.Groups[2].Value.Substring(match.Groups[2].Value.LastIndexOf("/") + 1);
- insertion += url;
- string decoded = UrlDecode(match.Groups[4].Value);
- if(decoded != url) insertion += "|" + decoded;
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = SystemLinkRegexIE.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Email Link
- match = EmailLinkRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- if(match.Groups[2].Value == @"target=""_blank"" ") insertion += "^";
- insertion += match.Groups[3].Value;
- string decoded = UrlDecode(match.Groups[6].Value);
- if(decoded != match.Groups[3].Value) insertion += "|" + decoded;
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = EmailLinkRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Email Link IE
- match = EmailLinkRegexIE.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- insertion += match.Groups[2].Value.Substring(7); // Remove mailto:
- string decoded = UrlDecode(match.Groups[4].Value);
- if(decoded != match.Groups[2].Value.Substring(7)) insertion += "|" + decoded;
- insertion += "]";
- buffer.Insert(match.Index, insertion);
- match = EmailLinkRegexIE.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Anchor
- match = AnchorRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- buffer.Insert(match.Index, "[anchor|#" + match.Groups[1].Value + "]");
- match = AnchorRegex.Match(buffer.ToString(), match.Index + 1);
- }
-
- // Image Left/Right/Auto
- match = ImageLeftRightRegex.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- string insertion = "[";
- if(match.Groups[1].Value.StartsWith("
- buffer.Replace("
", "\r\n");
-
- // Fix line breaks in IE
- buffer.Replace("\r\n\r\n\r\n=====", "\r\n\r\n=====");
- buffer.Replace("\r\n\r\n\r\n====", "\r\n\r\n====");
- buffer.Replace("\r\n\r\n\r\n===", "\r\n\r\n===");
- buffer.Replace("\r\n\r\n\r\n==", "\r\n\r\n==");
- buffer.Replace("\r\n\r\n\r\n----", "\r\n\r\n----");
- buffer.Replace("\r\n\r\n\r\n* ", "\r\n\r\n* ");
- buffer.Replace("\r\n\r\n\r\n# ", "\r\n\r\n# ");
-
- match = SingleNewLine.Match(buffer.ToString());
- while(match.Success) {
- buffer.Remove(match.Index, match.Length);
- buffer.Insert(match.Index, "{BR}");
- match = SingleNewLine.Match(buffer.ToString(), match.Index);
- }
-
- buffer.Replace("<", "<");
- buffer.Replace(">", ">");
-
- string result = buffer.ToString();
-
- return result.TrimEnd('\r', '\n');
- }
-
- ///
- /// Processes unordered and ordered lists.
- ///
- /// The string builder buffer.
- private static void ProcessLists(StringBuilder sb) {
- string temp = null;
-
- int ulIndex = -1;
- int olIndex = -1;
-
- int lastIndex = 0;
-
- do {
- temp = sb.ToString().ToLowerInvariant();
-
- ulIndex = temp.IndexOf("", lastIndex);
- olIndex = temp.IndexOf("", lastIndex);
-
- if(ulIndex != -1 || olIndex != -1) {
- // 1. Find tag pairs
- // 2. Extract block and remove it from SB
- // 3. Process block and generate WikiMarkup output
- // 4. Insert new markup in SB at original position
-
- if(ulIndex != -1 && (ulIndex < olIndex || olIndex == -1)) {
- // Find a UL block
- int openIndex, closeIndex;
-
- if(FindTagsPair(sb, "", lastIndex, out openIndex, out closeIndex)) {
- string section = sb.ToString().Substring(openIndex, closeIndex - openIndex + 5);
- sb.Remove(openIndex, closeIndex - openIndex + 5);
-
- string result = ProcessList(false, section);
-
- sb.Insert(openIndex, result);
-
- // Skip processed data
- lastIndex = openIndex + result.Length;
- }
- else lastIndex += 4;
-
- continue;
- }
-
- if(olIndex != -1 && (olIndex < ulIndex || ulIndex == -1)) {
- // Find a OL block
- int openIndex, closeIndex;
-
- if(FindTagsPair(sb, "", "
", lastIndex, out openIndex, out closeIndex)) {
- string section = sb.ToString().Substring(openIndex, closeIndex - openIndex + 5);
- sb.Remove(openIndex, closeIndex - openIndex + 5);
-
- string result = ProcessList(true, section);
-
- sb.Insert(openIndex, result);
-
- // Skip processed data
- lastIndex = openIndex + result.Length;
- }
- else lastIndex += 4;
-
- continue;
- }
- }
-
- } while(ulIndex != -1 || olIndex != -1);
- }
-
- ///
- /// Processes an unordered or ordered list.
- ///
- /// true for an ordered list, false for an unordered list.
- /// The input HTML.
- /// The output WikiMarkup.
- private static string ProcessList(bool ordered, string html) {
- HtmlList list = BuildListTree(ordered, html);
-
- string wikiMarkup = BuildListWikiMarkup(list, "");
-
- return wikiMarkup.TrimEnd('\r', '\n');
- }
-
- ///
- /// Builds the WikiMarkup for a list.
- ///
- /// The root list.
- /// The previous bullets, used at upper levels.
- /// The WikiMarkup.
- private static string BuildListWikiMarkup(HtmlList list, string previousBullets) {
- previousBullets = previousBullets + (list.Type == HtmlListType.Ordered ? "#" : "*");
-
- StringBuilder sb = new StringBuilder(500);
-
- foreach(HtmlListElement elem in list.Elements) {
- sb.Append(previousBullets);
- sb.Append(" ");
- sb.Append(elem.Text);
- sb.Append("\r\n");
-
- if(elem.SubList != null) {
- sb.Append(BuildListWikiMarkup(elem.SubList, previousBullets));
- }
- }
-
- // Remove empty lines in the middle of the list
- string raw = sb.ToString().Replace("\r", "");
- string[] lines = raw.Split(new char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
-
- return
- string.Join("\r\n", lines) +
- (raw.EndsWith("\r\n") || raw.EndsWith("\n") ? "\r\n" : "");
- }
-
- ///
- /// Builds a list tree.
- ///
- /// true for an ordered list.
- /// The input HTML.
- /// The list tree.
- private static HtmlList BuildListTree(bool ordered, string html) {
- string[] tags = new string[] { "", "", "
" };
-
- // IE seems to add new-lines after some elements
- // \r\n are never added by the Formatter, so it is safe to remove all them
- html = html.Replace("\r", "");
- html = html.Replace("\n", "");
-
- int index = 0;
- int lastOpenListItemIndex = 0;
- int stringFound;
-
- HtmlList root = new HtmlList(ordered ? HtmlListType.Ordered : HtmlListType.Unordered);
- HtmlList currentList = root;
-
- do {
- index = FirstIndexOfAny(html, index, out stringFound, tags);
-
- if(index != -1) {
- switch(stringFound) {
- case 0: //
- // Unless at the beginning, start a new sub-list
- if(index != 0) {
- // Set text of current element (sub-lists are added into the previous item)
- if(lastOpenListItemIndex != -1) {
- string text = html.Substring(lastOpenListItemIndex + 4, index - (lastOpenListItemIndex + 4));
- currentList.Elements[currentList.Elements.Count - 1].Text = text;
- }
- currentList.Elements[currentList.Elements.Count - 1].SubList = new HtmlList(HtmlListType.Ordered);
- currentList = currentList.Elements[currentList.Elements.Count - 1].SubList;
- }
- break;
- case 1: //
- // Unless at the beginning, start a new sub-list
- if(index != 0) {
- // Set text of current element (sub-lists are added into the previous item)
- if(lastOpenListItemIndex != -1) {
- string text = html.Substring(lastOpenListItemIndex + 4, index - (lastOpenListItemIndex + 4));
- currentList.Elements[currentList.Elements.Count - 1].Text = text;
- }
- currentList.Elements[currentList.Elements.Count - 1].SubList = new HtmlList(HtmlListType.Unordered);
- currentList = currentList.Elements[currentList.Elements.Count - 1].SubList;
- }
- break;
- case 2: // -
- lastOpenListItemIndex = index;
- currentList.Elements.Add(new HtmlListElement());
- break;
- case 3: //
- // If lastOpenListItemIndex != -1 (i.e. there are no sub-lists) extract item text and set it to the last list element
- // Otherwise, navigate upwards to parent list (if any)
- if(lastOpenListItemIndex != -1) {
- string text = html.Substring(lastOpenListItemIndex + 4, index - (lastOpenListItemIndex + 4));
- currentList.Elements[currentList.Elements.Count - 1].Text = text;
- }
- else {
- currentList = FindAnchestor(root, currentList);
- }
- break;
- case 4: //
- // Close last open list (nothing to do)
- lastOpenListItemIndex = -1;
- break;
- case 5: //
- // Close last open list (nothing to do)
- lastOpenListItemIndex = -1;
- break;
- default:
- throw new NotSupportedException();
- }
-
- index++;
- }
- } while(index != -1);
-
- return root;
- }
-
- ///
- /// Finds the anchestor of a list in a tree.
- ///
- /// The root of the tree.
- /// The current element.
- /// The anchestor of current.
- private static HtmlList FindAnchestor(HtmlList root, HtmlList current) {
- foreach(HtmlListElement elem in root.Elements) {
- if(elem.SubList == current) return root;
- else if(elem.SubList != null) {
- HtmlList temp = FindAnchestor(elem.SubList, current);
- if(temp != null) return temp;
- }
- }
- //return root;
- return null;
- }
-
- ///
- /// Finds the index of the first string.
- ///
- /// The input string.
- /// The start index.
- /// The index (in strings) of the string found.
- /// The strings to search for.
- /// The index of the string found in input.
- private static int FirstIndexOfAny(string input, int startIndex, out int stringFound, params string[] strings) {
- if(startIndex > input.Length) {
- stringFound = -1;
- return -1;
- }
-
- int[] indices = new int[strings.Length];
-
- for(int i = 0; i < strings.Length; i++) {
- indices[i] = input.IndexOf(strings[i], startIndex);
- }
-
- bool nothingFound = true;
- int min = int.MaxValue;
- stringFound = -1;
- for(int i = 0; i < indices.Length; i++) {
- if(indices[i] != -1 && indices[i] < min) {
- nothingFound = false;
- min = indices[i];
- stringFound = i;
- }
- }
-
- if(nothingFound) return -1;
- else return min;
- }
-
- ///
- /// Finds the position of a matched tag pair.
- ///
- /// The string builder buffer.
- /// The open tag.
- /// The close tag.
- /// The start index.
- /// The open index.
- /// The (matched/balanced) close index.
- /// true if a tag pair is found, false otherwise.
- private static bool FindTagsPair(StringBuilder sb, string openTag, string closeTag, int startIndex, out int openIndex, out int closeIndex) {
- // Find indexes for all open and close tags
- // Identify the smallest tag tree
-
- string text = sb.ToString();
-
- List openIndexes = new List(10);
- List closeIndexes = new List(10);
-
- if(startIndex >= sb.Length) {
- openIndex = -1;
- closeIndex = -1;
- return false;
- }
-
- int currentOpenIndex = startIndex - 1;
- int currentCloseIndex = startIndex - 1;
-
- do {
- currentOpenIndex = text.IndexOf(openTag, currentOpenIndex + 1);
- if(currentOpenIndex != -1) openIndexes.Add(currentOpenIndex);
- } while(currentOpenIndex != -1);
-
- // Optimization
- if(openIndexes.Count == 0) {
- openIndex = -1;
- closeIndex = -1;
- return false;
- }
-
- do {
- currentCloseIndex = text.IndexOf(closeTag, currentCloseIndex + 1);
- if(currentCloseIndex != -1) closeIndexes.Add(currentCloseIndex);
- } while(currentCloseIndex != -1);
-
- // Optimization
- if(closeIndexes.Count == 0) {
- openIndex = -1;
- closeIndex = -1;
- return false;
- }
-
- // Condition needed for further processing
- if(openIndexes.Count != closeIndexes.Count) {
- openIndex = -1;
- closeIndex = -1;
- return false;
- }
-
- // Build a sorted list of tags
- List tags = new List(openIndexes.Count * 2);
- foreach(int index in openIndexes) {
- tags.Add(new Tag() { Type = TagType.Open, Index = index });
- }
- foreach(int index in closeIndexes) {
- tags.Add(new Tag() { Type = TagType.Close, Index = index });
- }
- tags.Sort((x, y) => { return x.Index.CompareTo(y.Index); });
-
- // Find shortest closed tree
- int openCount = 0;
- int firstOpenIndex = -1;
- foreach(Tag tag in tags) {
- if(tag.Type == TagType.Open) {
- openCount++;
- if(firstOpenIndex == -1) firstOpenIndex = tag.Index;
- }
- else openCount--;
-
- if(openCount == 0) {
- openIndex = firstOpenIndex;
- closeIndex = tag.Index;
- return true;
- }
- }
-
- openIndex = -1;
- closeIndex = -1;
- return false;
- }
-
- ///
- /// Prepares a link URL.
- ///
- /// The raw URL, as generated by the formatter.
- /// The prepared link URL, suitable for formatting.
- private static string PrepareLink(string rawUrl) {
- rawUrl = UrlDecode(rawUrl);
- string mainUrl = GetCurrentRequestMainUrl().ToLowerInvariant();
- if(rawUrl.ToLowerInvariant().StartsWith(mainUrl)) rawUrl = rawUrl.Substring(mainUrl.Length);
-
- if(rawUrl.ToLowerInvariant().EndsWith(".ashx")) return rawUrl.Substring(0, rawUrl.Length - 5);
-
- int extensionIndex = rawUrl.ToLowerInvariant().IndexOf(".ashx#");
- if(extensionIndex != -1) {
- return rawUrl.Remove(extensionIndex, 5);
- }
-
- if(rawUrl.StartsWith("GetFile.aspx")) {
- // Look for File and Provider parameter (v2 and v3)
-
- string provider, page, file;
- GetProviderAndFileAndPage(rawUrl, out provider, out page, out file);
-
- if(provider == null && page == null) return "{UP}" + file;
- else if(page != null) {
- return "{UP" + (provider != null ? ":" + provider : "") + "(" + page + ")}" + file;
- }
- else {
- return "{UP" + (provider != null ? ":" + provider : "") + "}" + file;
- }
- }
-
- return rawUrl;
- }
-
- ///
- /// Prepares an image URL.
- ///
- /// The raw URL, as generated by the formatter.
- /// The prepared image URL, suitable for formatting.
- private static string PrepareImageUrl(string rawUrl) {
- rawUrl = UrlDecode(rawUrl);
- string mainUrl = GetCurrentRequestMainUrl().ToLowerInvariant();
- if(rawUrl.ToLowerInvariant().StartsWith(mainUrl)) rawUrl = rawUrl.Substring(mainUrl.Length);
-
- if(rawUrl.StartsWith("GetFile.aspx")) {
- // Look for File and Provider parameter (v2 and v3)
-
- string provider, page, file;
- GetProviderAndFileAndPage(rawUrl, out provider, out page, out file);
-
- if(provider == null) return "{UP" + (page != null ? "(" + page + ")" : "") + "}" + file;
- else return "{UP:" + provider + (page != null ? "(" + page + ")" : "") + "}" + file;
- }
- else return rawUrl;
- }
-
- ///
- /// Gets the current request main URL, such as http://www.server.com/Wiki/.
- ///
- /// The URL.
- private static string GetCurrentRequestMainUrl() {
- string url = HttpContext.Current.Request.Url.FixHost().GetLeftPart(UriPartial.Path);
- if(!url.EndsWith("/")) {
- int index = url.LastIndexOf("/");
- if(index != -1) url = url.Substring(0, index + 1);
- }
- return url;
- }
-
- ///
- /// Gets the provider and file of a link or URL.
- ///
- /// The raw URL, in the format ...?Provider=PROVIDER[&IsPageAttachment=1&Page=PAGE]&File=FILE.
- /// The provider, or null.
- /// The page (for attachments), or null.
- /// The file.
- private static void GetProviderAndFileAndPage(string rawUrl, out string provider, out string page, out string file) {
- rawUrl = rawUrl.Substring(rawUrl.IndexOf("?") + 1).Replace("&", "&");
-
- string[] chunks = rawUrl.Split('&');
-
- provider = null;
- page = null;
- file = null;
-
- foreach(string chunk in chunks) {
- if(chunk.StartsWith("Provider=")) {
- provider = chunk.Substring(9);
- }
- if(chunk.StartsWith("File=")) {
- file = chunk.Substring(5);
- }
- if(chunk.StartsWith("Page=")) {
- page = chunk.Substring(5);
- }
- }
- }
-
- ///
- /// Decodes a URL-encoded string, even if it was encoded multiple times.
- ///
- /// The input encoded string.
- /// The decoded string.
- /// It seems that in some cases URL encoding occurs multiple times,
- /// one on the server and one on the client.
- private static string UrlDecode(string input) {
- return Tools.UrlDecode(input);
- //return Tools.UrlDecode(Tools.UrlDecode(input));
- }
-
}
-
- ///
- /// Represents an open or close tag.
- ///
- public class Tag {
-
- ///
- /// Gets or sets the tag type.
- ///
- public TagType Type { get; set; }
-
- ///
- /// Gets or sets the tag index.
- ///
- public int Index { get; set; }
-
- }
-
- ///
- /// Lists tag types.
- ///
- public enum TagType {
- ///
- /// An open tag.
- ///
- Open,
- ///
- /// A close tag.
- ///
- Close
- }
-
- ///
- /// Represents a HTML list.
- ///
- public class HtmlList {
-
- ///
- /// Initializes a new instance of the class.
- ///
- /// The list type.
- public HtmlList(HtmlListType type) {
- Type = type;
- Elements = new List(10);
- }
-
- ///
- /// Gets or sets the list type.
- ///
- public HtmlListType Type { get; set; }
-
- ///
- /// Gets or sets the list elements.
- ///
- public List Elements { get; set; }
-
- }
-
- ///
- /// Represents a HTML list element.
- ///
- public class HtmlListElement {
-
- ///
- /// Gets or sets the text.
- ///
- public string Text { get; set; }
-
- ///
- /// Gets or sets the sub-list.
- ///
- public HtmlList SubList { get; set; }
-
- }
-
- ///
- /// Lists HTML list types.
- ///
- public enum HtmlListType {
- ///
- /// An ordered list.
- ///
- Ordered,
- ///
- /// An unordered list.
- ///
- Unordered
- }
-
-}
+}
\ No newline at end of file