using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Web; using System.Xml; using System.IO; namespace ScrewTurn.Wiki { /// /// Implements reverse formatting methods (HTML->WikiMarkup). /// public static class ReverseFormatter { private static string ProcessList(XmlNodeList nodes, string marker) { string result = ""; string ul = "*"; string ol = "#"; foreach(XmlNode node in nodes) { string text = ""; if(node.Name == "li") { foreach(XmlNode child in node.ChildNodes) { if(child.Name != "ol" && child.Name != "ul") { StringReader a = new StringReader(child.OuterXml); XmlDocument n = FromHTML((TextReader)a); text += ProcessChild(n.ChildNodes); } } result += marker + " " + text; if(!result.EndsWith("\n")) result += "\n"; foreach(XmlNode child in node.ChildNodes) { if(child.Name.ToString() == "ol") result += ProcessList(child.ChildNodes, marker + ol); if(child.Name.ToString() == "ul") result += ProcessList(child.ChildNodes, marker + ul); } } } return result; } private static string ProcessImage(XmlNode node) { string result = ""; if(node.Attributes.Count != 0) { foreach(XmlAttribute attName in node.Attributes) { if(attName.Name == "src") { string[] path = attName.Value.ToString().Split('='); if(path.Length > 2) result += "{" + "UP(" + path[1].Split('&')[0] + ")}" + path[2]; else result += "{UP}" + path[path.Length - 1]; } } } return result; } private static string ProcessLink(string link) { string subLink = ""; string[] links = link.Split('='); if(links[0] == "GetFile.aspx?File") { subLink += "{UP}"; for(int i = 1; i < links.Length - 1; i++) { subLink += links[i] + "="; } subLink += links[links.Length - 1]; link = subLink; } return link; } private static string ProcessChildImage(XmlNodeList nodes) { string image = ""; string p = ""; string url = ""; string result = ""; bool hasDescription = false; foreach(XmlNode node in nodes) { if(node.Name.ToLowerInvariant() == "img") image += ProcessImage(node); else if(node.Name.ToLowerInvariant() == "p") { hasDescription = true; p += "|" + ProcessChild(node.ChildNodes) + "|"; } else if(node.Name.ToLowerInvariant() == "a") { string link = ""; string target = ""; if(node.Attributes.Count != 0) { XmlAttributeCollection attribute = node.Attributes; foreach(XmlAttribute attName in attribute) { if(attName.Value.ToString() == "_blank") target += "^"; if(attName.Name.ToString() == "href") link += attName.Value.ToString(); } } link = ProcessLink(link); image += ProcessImage(node.LastChild); url = "|" + target + link; } } if(!hasDescription) p = "||"; result = p + image + url; return result; } private static string ProcessTableImage(XmlNodeList nodes) { string result = ""; foreach(XmlNode node in nodes) { switch(node.Name.ToLowerInvariant()) { case "tbody": result += ProcessTableImage(node.ChildNodes); break; case "tr": result += ProcessTableImage(node.ChildNodes); break; case "td": string image = ""; string aref = ""; string p = ""; bool hasLink = false; if(node.FirstChild.Name.ToLowerInvariant() == "img") image += ProcessTableImage(node.ChildNodes); if(node.FirstChild.Name.ToLowerInvariant() == "a") { hasLink = true; aref += ProcessTableImage(node.ChildNodes); } if(node.LastChild.Name.ToLowerInvariant() == "p") p += node.LastChild.InnerText.ToString(); if(!hasLink) result += p + image; else result += p + aref; break; case "img": result += "|" + ProcessImage(node); break; case "a": string link = ""; string target = ""; string title = ""; if(node.Attributes.Count != 0) { XmlAttributeCollection attribute = node.Attributes; foreach(XmlAttribute attName in attribute) { if(attName.Name.ToString() != "id".ToLowerInvariant()) { if(attName.Value.ToString() == "_blank") target += "^"; if(attName.Name.ToString() == "href") link += attName.Value.ToString(); if(attName.Name.ToString() == "title") title += attName.Value.ToString(); } link = ProcessLink(link); } result += ProcessTableImage(node.ChildNodes) + "|" + target + link; } break; } } return result; } private static string ProcessTable(XmlNodeList nodes) { string result = ""; foreach(XmlNode node in nodes) { switch(node.Name.ToLowerInvariant()) { case "thead": result += ProcessTable(node.ChildNodes); break; case "th": result += "! " + ProcessChild(node.ChildNodes) + "\n"; break; case "caption": result += "|+ " + ProcessChild(node.ChildNodes) + "\n"; break; case "tbody": result += ProcessTable(node.ChildNodes) + ""; break; case "tr": string style = ""; foreach(XmlAttribute attr in node.Attributes) { if(attr.Name.ToLowerInvariant() == "style") style += "style=\"" + attr.Value.ToString() + "\" "; } result += "|- " + style + "\n" + ProcessTable(node.ChildNodes); break; case "td": string styleTd = ""; if(node.Attributes.Count != 0) { foreach(XmlAttribute attr in node.Attributes) { styleTd += " " + attr.Name + "=\"" + attr.Value.ToString() + "\" "; } result += "| " + styleTd + " | " + ProcessChild(node.ChildNodes) + "\n"; } else result += "| " + ProcessChild(node.ChildNodes) + "\n"; break; } } return result; } private static string ProcessChild(XmlNodeList nodes) { string result = ""; foreach(XmlNode node in nodes) { bool anchor = false; if(node.NodeType == XmlNodeType.Text) result += node.Value; else if(node.NodeType != XmlNodeType.Whitespace) { switch(node.Name.ToLowerInvariant()) { case "html": result += ProcessChild(node.ChildNodes); break; case "b": case "strong": result += "'''" + ProcessChild(node.ChildNodes) + "'''"; break; case "strike": case "s": result += "--" + ProcessChild(node.ChildNodes) + "--"; break; case "em": case "i": result += "''" + ProcessChild(node.ChildNodes) + "''"; break; case "u": result += "__" + ProcessChild(node.ChildNodes) + "__"; break; case "h1": if(node.HasChildNodes) { if(node.FirstChild.NodeType == XmlNodeType.Whitespace) result += "----\n" + ProcessChild(node.ChildNodes); else result += "==" + ProcessChild(node.ChildNodes) + "==\n"; } else result += "----\n"; break; case "h2": result += "===" + ProcessChild(node.ChildNodes) + "===\n"; break; case "h3": result += "====" + ProcessChild(node.ChildNodes) + "====\n"; break; case "h4": result += "=====" + ProcessChild(node.ChildNodes) + "=====\n"; break; case "pre": result += "@@" + node.InnerText.ToString() + "@@"; break; case "code": result += "{{" + ProcessChild(node.ChildNodes) + "}}"; break; case "hr": case "hr /": result += "\n== ==\n" + ProcessChild(node.ChildNodes); break; case "span": if(node.Attributes.Count != 0) { XmlAttributeCollection attribute = node.Attributes; foreach(XmlAttribute attName in attribute) { if(attName.Value.ToString() == "italic") result += "''" + ProcessChild(node.ChildNodes) + "''"; } } break; case "br": result += "\n" + ProcessChild(node.ChildNodes); break; case "table": bool isImage = false; string image = ""; string tableStyle = ""; foreach(XmlAttribute attName in node.Attributes) { if(attName.Value.ToString() == "imageauto") { isImage = true; image += "[imageauto|" + ProcessTableImage(node.ChildNodes) + "]\n"; } else tableStyle += attName.Name + "=\"" + attName.Value.ToString() + "\" "; } if(isImage) { result += image; isImage = false; break; } else result += "{| " + tableStyle + "\n" + ProcessTable(node.ChildNodes) + "|}\n"; break; case "ol": if(node.ParentNode != null) { if(node.ParentNode.Name.ToLowerInvariant() != "td") result += ProcessList(node.ChildNodes, "#"); else result += node.OuterXml.ToString(); } else result += ProcessList(node.ChildNodes, "#"); break; case "ul": if(node.ParentNode != null) { if(node.ParentNode.Name.ToLowerInvariant() != "td") result += ProcessList(node.ChildNodes, "*"); else result += node.OuterXml.ToString(); } else result += ProcessList(node.ChildNodes, "*"); break; case "sup": result += "" + ProcessChild(node.ChildNodes) + ""; break; case "sub": result += "" + ProcessChild(node.ChildNodes) + ""; break; case "p": if(node.Attributes.Count != 0) { XmlAttributeCollection attribute = node.Attributes; foreach(XmlAttribute attName in attribute) { if(attName.Value.ToString() == "imagedescription") result += ""; } } else result += ProcessChild(node.ChildNodes) + "{BR}\n"; break; case "div": if(node.Attributes.Count != 0) { XmlAttributeCollection attribute = node.Attributes; foreach(XmlAttribute attName in attribute) { if(attName.Value.ToString() == "box") result += "(((" + ProcessChild(node.ChildNodes) + ")))\n"; if(attName.Value.ToString() == "imageleft") result += "[imageleft" + ProcessChildImage(node.ChildNodes) + "]\n"; if(attName.Value.ToString() == "imageright") result += "[imageright" + ProcessChildImage(node.ChildNodes) + "]\n"; if(attName.Value.ToString() == "image") result += "[image" + ProcessChildImage(node.ChildNodes) + "]\n"; if(attName.Value.ToString() == "indent") result += ": " + ProcessChild(node.ChildNodes) + "\n"; } } else result += (ProcessChild(node.ChildNodes) + "\n"); break; case "img": string description = ""; bool hasClass = false; bool isLink = false; if(node.ParentNode != null && node.ParentNode.Name.ToLowerInvariant().ToString() == "a") isLink = true; if(node.Attributes.Count != 0) { foreach(XmlAttribute attName in node.Attributes) { if(attName.Name.ToString() == "alt") description = attName.Value.ToString(); if(attName.Name.ToString() == "class") hasClass = true; } } if(!hasClass && !isLink) result += "[image|" + description + "|" + ProcessImage(node) + "]\n"; else if(!hasClass && isLink) result += "[image|" + description + "|" + ProcessImage(node); else result += description + "|" + ProcessImage(node); break; case "a": bool isTable = false; string link = ""; string target = ""; string title = ""; bool isInternalLink = false; bool childImg = false; bool isUnknowLink = false; if(node.FirstChild != null && node.FirstChild.Name.ToLowerInvariant() == "img") childImg = true; if(node.ParentNode.Name.ToLowerInvariant() == "td") isTable = true; if(node.Attributes.Count != 0) { XmlAttributeCollection attribute = node.Attributes; foreach(XmlAttribute attName in attribute) { if(attName.Name.ToString() != "id".ToLowerInvariant()) { if(attName.Value.ToString() == "_blank") target += "^"; if(attName.Name.ToString() == "href") link += attName.Value.ToString(); if(attName.Name.ToString() == "title") title += attName.Value.ToString(); if(attName.Value.ToString() == "SystemLink".ToLowerInvariant()) isInternalLink = true; if(attName.Value.ToString().ToLowerInvariant() == "unknownlink") isUnknowLink = true; } else { anchor = true; result += "[anchor|#" + attName.Value.ToString().ToLowerInvariant() + "]" + ProcessChild(node.ChildNodes); break; } } if (isInternalLink) { string[] splittedLink = link.Split('='); link = "c:" + splittedLink[1]; } else link = ProcessLink(link); if(!anchor && !isTable && !childImg && !isUnknowLink) if(title != link) result += "[" + target + link + "|" + ProcessChild(node.ChildNodes) + "]"; else result += "[" + target + link + "|" + ProcessChild(node.ChildNodes) + "]"; if(!anchor && !childImg && isTable) result += "[" + target + link + "|" + ProcessChild(node.ChildNodes) + "]"; if(!anchor && childImg && !isTable) result += ProcessChild(node.ChildNodes) + "|" + target + link + "]\n"; } break; default: result += node.OuterXml; break; } } else result += ""; } return result; } private static XmlDocument FromHTML(TextReader reader) { // setup SgmlReader Sgml.SgmlReader sgmlReader = new Sgml.SgmlReader(); sgmlReader.DocType = "HTML"; sgmlReader.WhitespaceHandling = WhitespaceHandling.All; sgmlReader.CaseFolding = Sgml.CaseFolding.ToLower; sgmlReader.InputStream = reader; // create document XmlDocument doc = new XmlDocument(); doc.PreserveWhitespace = true; doc.XmlResolver = null; doc.Load(sgmlReader); return doc; } /// /// Reverse formats HTML content into WikiMarkup. /// /// The input HTML. /// The corresponding WikiMarkup. public static string ReverseFormat(string html) { StringReader strReader = new StringReader(html); XmlDocument x = FromHTML((TextReader)strReader); if(x != null) return ProcessChild(x.FirstChild.ChildNodes); else return ""; } } }