using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Web; using System.Xml; using System.IO; namespace ScrewTurn.Wiki { /// /// Implements reverse formatting methods (HTML->WikiMarkup). /// public static class ReverseFormatter { /// /// Processes order or unorder lists and sublists. /// /// The nodes. /// The marker. /// Valid WikiMarkUp text for the lists private static string processList(XmlNodeList nodes, string marker) { string result = ""; string ul = "*"; string ol = "#"; foreach(XmlNode node in nodes) { if(node.Name.ToString() == "li") { foreach(XmlNode child in node.ChildNodes) { switch(child.Name.ToString()) { case "ol": result += processList(child.ChildNodes, marker + ol); break; case "ul": result += processList(child.ChildNodes, marker + ul); break; default: StringReader a = new StringReader(child.OuterXml); XmlDocument n = FromHTML((TextReader)a); result += marker + " " + processChild(n.ChildNodes) + "\r\n"; break; } } } } return result; } /// /// Processes the image. /// /// The node contenent fileName of the image. /// The correct path for wikimarup and image private static string processImage(XmlNode node) { string result = ""; if(node.Attributes.Count != 0) { foreach(XmlAttribute attName in node.Attributes) { if((attName.Name.ToString() == "src") || (attName.Value.ToString().ToLowerInvariant() == "Image")) { string[] path = attName.Value.ToString().Split('='); result += "{" + "UP(" + path[1].Split('&')[0] + ")}" + path[2]; } } } return result; } /// /// Processes the child Image. /// /// Nodelist from an image. /// The correct WikiMarkup for the images private static string processChildImage(XmlNodeList nodes) { string image = ""; string p = ""; string url = ""; string result = ""; foreach(XmlNode node in nodes) { if(node.Name.ToLowerInvariant() == "img") image += processImage(node); else if(node.Name.ToLowerInvariant() == "p") { p += "|" + processChild(node.ChildNodes) + "|"; } else if(node.Name.ToLowerInvariant() == "a") { string link = ""; string target = ""; if(node.Attributes.Count != 0) { XmlAttributeCollection attribute = node.Attributes; foreach(XmlAttribute attName in attribute) { if(attName.Value.ToString() == "_blank") target += "^"; if(attName.Name.ToString() == "href") link += attName.Value.ToString(); } } image += processImage(node.LastChild); url = "|" + target + link; } } result = p + image + url; return result; } /// /// Processes the child. /// /// A XmlNodeList . /// The corrispondent WikiMarkup Text private static string processChild(XmlNodeList nodes) { string result = ""; foreach(XmlNode node in nodes) { bool anchor = false; if(node.NodeType == XmlNodeType.Text) { result += node.Value; } else if (node.NodeType != XmlNodeType.Whitespace){ switch(node.Name.ToLowerInvariant()) { case "html": result += processChild(node.ChildNodes); break; case "b": case "strong": result += ("'''" + processChild(node.ChildNodes) + "'''"); break; case "s": result += ("--" + processChild(node.ChildNodes) + "--"); break; case "em": case "i": result += ("''" + processChild(node.ChildNodes) + "''"); break; case "u": result += ("__" + processChild(node.ChildNodes) + "__"); break; //break; case "h1": result += ("==" + processChild(node.ChildNodes) + "=="); break; //break; case "h2": result += ("===" + processChild(node.ChildNodes) + "==="); break; //break; case "h3": result += ("====" + processChild(node.ChildNodes) + "===="); break; //break; case "h4": result += ("=====" + processChild(node.ChildNodes) + "====="); break; case "pre": result += ("@@" + node.InnerText.ToString() + "@@"); break; case "code": result += ("{{" + processChild(node.ChildNodes) + "}}"); break; case "hr": case "hr /": result += ("----" + processChild(node.ChildNodes)); break; case "\t": result += (":" + processChild(node.ChildNodes)); break; case "éé": result += ("~~~~" + processChild(node.ChildNodes)); break; case "span": if(node.Attributes.Count != 0) { XmlAttributeCollection attribute = node.Attributes; foreach(XmlAttribute attName in attribute) { if(attName.Value.ToString() == "italic") result += "''" + processChild(node.ChildNodes) + "''"; } } break; case "br": result += ("{BR}\r\n" + processChild(node.ChildNodes)); break; case "table": string image = ""; bool isImage = false; foreach(XmlAttribute attName in node.Attributes) { if(attName.Value.ToString() == "imageauto") { isImage = true; image += "[imageauto|" + processChild(node.ChildNodes) + "]\r\n"; } } if(isImage) { result += image; break; } else result += processChild(node.ChildNodes); break; case "tbody": result += processChild(node.ChildNodes); break; case "tr": result += processChild(node.ChildNodes); break; case "td": result += processChild(node.ChildNodes); break; case "ol": result += processList(node.ChildNodes, "#") + "\r\n"; break; case "ul": result += processList(node.ChildNodes, "*") + "\r\n"; break; case "li": result += processChild(node.ChildNodes); break; case "sup": result += ("" + processChild(node.ChildNodes) + ""); break; case "sub": result += ("" + processChild(node.ChildNodes) + ""); break; case "p": if(node.Attributes.Count != 0) { XmlAttributeCollection attribute = node.Attributes; foreach(XmlAttribute attName in attribute) { if(attName.Value.ToString() == "imagedescription") result += ""; } } else result += processChild(node.ChildNodes) + "{BR}\r\n"; break; case "div": if(node.Attributes.Count != 0) { XmlAttributeCollection attribute = node.Attributes; foreach(XmlAttribute attName in attribute) { if(attName.Value.ToString() == "box") { result += "(((" + processChild(node.ChildNodes) + ")))\r\n"; } if(attName.Value.ToString() == "imageleft") { result += "[imageleft" + processChildImage(node.ChildNodes) + "]\r\n"; } if(attName.Value.ToString() == "imageright") result += "[imageright" + processChildImage(node.ChildNodes) + "]\r\n"; if(attName.Value.ToString() == "imageauto") result += "[imageauto" + processChildImage(node.ChildNodes) + "]\r\n"; } } else result += (processChild(node.ChildNodes) + "\r\n"); break; case "img": string description = ""; bool hasClass = false; bool isLink = false; if(node.ParentNode != null) if(node.ParentNode.Name.ToLowerInvariant().ToString() == "a") isLink = true; if(node.Attributes.Count != 0) { foreach(XmlAttribute attName in node.Attributes) { if(attName.Name.ToString() == "alt") description = attName.Value.ToString(); if(attName.Name.ToString() == "class") hasClass = true; } } if((!hasClass) && (!isLink)) result += "[image|" + description + "|" + processImage(node) + "]\r\n"; else if((!hasClass) && (isLink)) result += "[image|" + description + "|" + processImage(node); else result += description + "|" + processImage(node); break; case "a": bool isTable = false; string link = ""; string target = ""; string title = ""; bool childImg = false; if(node.FirstChild != null) { if(node.FirstChild.Name.ToLowerInvariant() == "img") childImg = true; } if(node.ParentNode.Name.ToLowerInvariant() == "td") isTable = true; if(node.Attributes.Count != 0) { XmlAttributeCollection attribute = node.Attributes; foreach(XmlAttribute attName in attribute) { if(attName.Name.ToString() != "id".ToLowerInvariant()) { if(attName.Value.ToString() == "_blank") target += "^"; if(attName.Name.ToString() == "href") link += attName.Value.ToString(); if(attName.Name.ToString() == "title") title += attName.Value.ToString(); } else { anchor = true; result += "[anchor|#" + attName.Value.ToString().ToLowerInvariant() + "]" + processChild(node.ChildNodes); break; } } if((!anchor) && (!isTable) && (!childImg)) if(title != link) result += "[" + target + link + "|" + processChild(node.ChildNodes) + "]"; else result += "[" + target + link + "|" + processChild(node.ChildNodes) + "]"; if((!anchor) && (isTable)) result += processChild(node.ChildNodes) + "|" + target + link; if((!anchor) && (childImg) && (!isTable)) result += processChild(node.ChildNodes) + "|" + target + link + "]\r\n"; } break; default: result += (node.OuterXml); break; } } } return result; } /// /// Froms the HTML. /// /// The reader. /// valid XML Document private static XmlDocument FromHTML(TextReader reader) { // setup SgmlReader Sgml.SgmlReader sgmlReader = new Sgml.SgmlReader(); sgmlReader.DocType = "HTML"; sgmlReader.WhitespaceHandling = WhitespaceHandling.All; sgmlReader.CaseFolding = Sgml.CaseFolding.ToLower; sgmlReader.InputStream = reader; // create document XmlDocument doc = new XmlDocument(); doc.PreserveWhitespace = true; doc.XmlResolver = null; doc.Load(sgmlReader); return doc; } /// /// Reverse formats HTML content into WikiMarkup. /// /// The input HTML. /// The corresponding WikiMarkup. public static string ReverseFormat(string html) { StringReader strReader = new StringReader(html); XmlDocument x = FromHTML((TextReader)strReader); return processChild(x.FirstChild.ChildNodes); } } }