1*055d4590SKeyi Gui// Copyright (C) 2006 Google Inc. 2*055d4590SKeyi Gui// 3*055d4590SKeyi Gui// Licensed under the Apache License, Version 2.0 (the "License"); 4*055d4590SKeyi Gui// you may not use this file except in compliance with the License. 5*055d4590SKeyi Gui// You may obtain a copy of the License at 6*055d4590SKeyi Gui// 7*055d4590SKeyi Gui// http://www.apache.org/licenses/LICENSE-2.0 8*055d4590SKeyi Gui// 9*055d4590SKeyi Gui// Unless required by applicable law or agreed to in writing, software 10*055d4590SKeyi Gui// distributed under the License is distributed on an "AS IS" BASIS, 11*055d4590SKeyi Gui// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*055d4590SKeyi Gui// See the License for the specific language governing permissions and 13*055d4590SKeyi Gui// limitations under the License. 14*055d4590SKeyi Gui 15*055d4590SKeyi Gui 16*055d4590SKeyi Gui/** 17*055d4590SKeyi Gui * @fileoverview 18*055d4590SKeyi Gui * some functions for browser-side pretty printing of code contained in html. 19*055d4590SKeyi Gui * 20*055d4590SKeyi Gui * The lexer should work on a number of languages including C and friends, 21*055d4590SKeyi Gui * Java, Python, Bash, SQL, HTML, XML, CSS, Javascript, and Makefiles. 22*055d4590SKeyi Gui * It works passably on Ruby, PHP and Awk and a decent subset of Perl, but, 23*055d4590SKeyi Gui * because of commenting conventions, doesn't work on Smalltalk, Lisp-like, or 24*055d4590SKeyi Gui * CAML-like languages. 25*055d4590SKeyi Gui * 26*055d4590SKeyi Gui * If there's a language not mentioned here, then I don't know it, and don't 27*055d4590SKeyi Gui * know whether it works. If it has a C-like, Bash-like, or XML-like syntax 28*055d4590SKeyi Gui * then it should work passably. 29*055d4590SKeyi Gui * 30*055d4590SKeyi Gui * Usage: 31*055d4590SKeyi Gui * 1) include this source file in an html page via 32*055d4590SKeyi Gui * <script type="text/javascript" src="/path/to/prettify.js"></script> 33*055d4590SKeyi Gui * 2) define style rules. See the example page for examples. 34*055d4590SKeyi Gui * 3) mark the <pre> and <code> tags in your source with class=prettyprint. 35*055d4590SKeyi Gui * You can also use the (html deprecated) <xmp> tag, but the pretty printer 36*055d4590SKeyi Gui * needs to do more substantial DOM manipulations to support that, so some 37*055d4590SKeyi Gui * css styles may not be preserved. 38*055d4590SKeyi Gui * That's it. I wanted to keep the API as simple as possible, so there's no 39*055d4590SKeyi Gui * need to specify which language the code is in. 40*055d4590SKeyi Gui * 41*055d4590SKeyi Gui * Change log: 42*055d4590SKeyi Gui * cbeust, 2006/08/22 43*055d4590SKeyi Gui * Java annotations (start with "@") are now captured as literals ("lit") 44*055d4590SKeyi Gui */ 45*055d4590SKeyi Gui 46*055d4590SKeyi Gui// JSLint declarations 47*055d4590SKeyi Gui/*global console, document, navigator, setTimeout, window */ 48*055d4590SKeyi Gui 49*055d4590SKeyi Gui/** 50*055d4590SKeyi Gui * Split {@code prettyPrint} into multiple timeouts so as not to interfere with 51*055d4590SKeyi Gui * UI events. 52*055d4590SKeyi Gui * If set to {@code false}, {@code prettyPrint()} is synchronous. 53*055d4590SKeyi Gui */ 54*055d4590SKeyi Guivar PR_SHOULD_USE_CONTINUATION = true; 55*055d4590SKeyi Gui 56*055d4590SKeyi Gui/** the number of characters between tab columns */ 57*055d4590SKeyi Guivar PR_TAB_WIDTH = 8; 58*055d4590SKeyi Gui 59*055d4590SKeyi Gui/** Walks the DOM returning a properly escaped version of innerHTML. 60*055d4590SKeyi Gui * @param {Node} node 61*055d4590SKeyi Gui * @param {Array.<string>} out output buffer that receives chunks of HTML. 62*055d4590SKeyi Gui */ 63*055d4590SKeyi Guivar PR_normalizedHtml; 64*055d4590SKeyi Gui 65*055d4590SKeyi Gui/** Contains functions for creating and registering new language handlers. 66*055d4590SKeyi Gui * @type {Object} 67*055d4590SKeyi Gui */ 68*055d4590SKeyi Guivar PR; 69*055d4590SKeyi Gui 70*055d4590SKeyi Gui/** Pretty print a chunk of code. 71*055d4590SKeyi Gui * 72*055d4590SKeyi Gui * @param {string} sourceCodeHtml code as html 73*055d4590SKeyi Gui * @return {string} code as html, but prettier 74*055d4590SKeyi Gui */ 75*055d4590SKeyi Guivar prettyPrintOne; 76*055d4590SKeyi Gui/** find all the < pre > and < code > tags in the DOM with class=prettyprint 77*055d4590SKeyi Gui * and prettify them. 78*055d4590SKeyi Gui * @param {Function} opt_whenDone if specified, called when the last entry 79*055d4590SKeyi Gui * has been finished. 80*055d4590SKeyi Gui */ 81*055d4590SKeyi Guivar prettyPrint; 82*055d4590SKeyi Gui 83*055d4590SKeyi Gui/** browser detection. @extern */ 84*055d4590SKeyi Guifunction _pr_isIE6() { 85*055d4590SKeyi Gui var isIE6 = navigator && navigator.userAgent && 86*055d4590SKeyi Gui /\bMSIE 6\./.test(navigator.userAgent); 87*055d4590SKeyi Gui _pr_isIE6 = function () { return isIE6; }; 88*055d4590SKeyi Gui return isIE6; 89*055d4590SKeyi Gui} 90*055d4590SKeyi Gui 91*055d4590SKeyi Gui 92*055d4590SKeyi Gui(function () { 93*055d4590SKeyi Gui /** Splits input on space and returns an Object mapping each non-empty part to 94*055d4590SKeyi Gui * true. 95*055d4590SKeyi Gui */ 96*055d4590SKeyi Gui function wordSet(words) { 97*055d4590SKeyi Gui words = words.split(/ /g); 98*055d4590SKeyi Gui var set = {}; 99*055d4590SKeyi Gui for (var i = words.length; --i >= 0;) { 100*055d4590SKeyi Gui var w = words[i]; 101*055d4590SKeyi Gui if (w) { set[w] = null; } 102*055d4590SKeyi Gui } 103*055d4590SKeyi Gui return set; 104*055d4590SKeyi Gui } 105*055d4590SKeyi Gui 106*055d4590SKeyi Gui // Keyword lists for various languages. 107*055d4590SKeyi Gui var FLOW_CONTROL_KEYWORDS = 108*055d4590SKeyi Gui "break continue do else for if return while "; 109*055d4590SKeyi Gui var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " + 110*055d4590SKeyi Gui "double enum extern float goto int long register short signed sizeof " + 111*055d4590SKeyi Gui "static struct switch typedef union unsigned void volatile "; 112*055d4590SKeyi Gui var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " + 113*055d4590SKeyi Gui "new operator private protected public this throw true try "; 114*055d4590SKeyi Gui var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " + 115*055d4590SKeyi Gui "concept concept_map const_cast constexpr decltype " + 116*055d4590SKeyi Gui "dynamic_cast explicit export friend inline late_check " + 117*055d4590SKeyi Gui "mutable namespace nullptr reinterpret_cast static_assert static_cast " + 118*055d4590SKeyi Gui "template typeid typename typeof using virtual wchar_t where "; 119*055d4590SKeyi Gui var JAVA_KEYWORDS = COMMON_KEYWORDS + 120*055d4590SKeyi Gui "boolean byte extends final finally implements import instanceof null " + 121*055d4590SKeyi Gui "native package strictfp super synchronized throws transient "; 122*055d4590SKeyi Gui var CSHARP_KEYWORDS = JAVA_KEYWORDS + 123*055d4590SKeyi Gui "as base by checked decimal delegate descending event " + 124*055d4590SKeyi Gui "fixed foreach from group implicit in interface internal into is lock " + 125*055d4590SKeyi Gui "object out override orderby params readonly ref sbyte sealed " + 126*055d4590SKeyi Gui "stackalloc string select uint ulong unchecked unsafe ushort var "; 127*055d4590SKeyi Gui var JSCRIPT_KEYWORDS = COMMON_KEYWORDS + 128*055d4590SKeyi Gui "debugger eval export function get null set undefined var with " + 129*055d4590SKeyi Gui "Infinity NaN "; 130*055d4590SKeyi Gui var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " + 131*055d4590SKeyi Gui "goto if import last local my next no our print package redo require " + 132*055d4590SKeyi Gui "sub undef unless until use wantarray while BEGIN END "; 133*055d4590SKeyi Gui var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " + 134*055d4590SKeyi Gui "elif except exec finally from global import in is lambda " + 135*055d4590SKeyi Gui "nonlocal not or pass print raise try with yield " + 136*055d4590SKeyi Gui "False True None "; 137*055d4590SKeyi Gui var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" + 138*055d4590SKeyi Gui " defined elsif end ensure false in module next nil not or redo rescue " + 139*055d4590SKeyi Gui "retry self super then true undef unless until when yield BEGIN END "; 140*055d4590SKeyi Gui var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " + 141*055d4590SKeyi Gui "function in local set then until "; 142*055d4590SKeyi Gui var ALL_KEYWORDS = ( 143*055d4590SKeyi Gui CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS + 144*055d4590SKeyi Gui PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS); 145*055d4590SKeyi Gui 146*055d4590SKeyi Gui // token style names. correspond to css classes 147*055d4590SKeyi Gui /** token style for a string literal */ 148*055d4590SKeyi Gui var PR_STRING = 'str'; 149*055d4590SKeyi Gui /** token style for a keyword */ 150*055d4590SKeyi Gui var PR_KEYWORD = 'kwd'; 151*055d4590SKeyi Gui /** token style for a comment */ 152*055d4590SKeyi Gui var PR_COMMENT = 'com'; 153*055d4590SKeyi Gui /** token style for a type */ 154*055d4590SKeyi Gui var PR_TYPE = 'typ'; 155*055d4590SKeyi Gui /** token style for a literal value. e.g. 1, null, true. */ 156*055d4590SKeyi Gui var PR_LITERAL = 'lit'; 157*055d4590SKeyi Gui /** token style for a punctuation string. */ 158*055d4590SKeyi Gui var PR_PUNCTUATION = 'pun'; 159*055d4590SKeyi Gui /** token style for a punctuation string. */ 160*055d4590SKeyi Gui var PR_PLAIN = 'pln'; 161*055d4590SKeyi Gui 162*055d4590SKeyi Gui /** token style for an sgml tag. */ 163*055d4590SKeyi Gui var PR_TAG = 'tag'; 164*055d4590SKeyi Gui /** token style for a markup declaration such as a DOCTYPE. */ 165*055d4590SKeyi Gui var PR_DECLARATION = 'dec'; 166*055d4590SKeyi Gui /** token style for embedded source. */ 167*055d4590SKeyi Gui var PR_SOURCE = 'src'; 168*055d4590SKeyi Gui /** token style for an sgml attribute name. */ 169*055d4590SKeyi Gui var PR_ATTRIB_NAME = 'atn'; 170*055d4590SKeyi Gui /** token style for an sgml attribute value. */ 171*055d4590SKeyi Gui var PR_ATTRIB_VALUE = 'atv'; 172*055d4590SKeyi Gui 173*055d4590SKeyi Gui /** 174*055d4590SKeyi Gui * A class that indicates a section of markup that is not code, e.g. to allow 175*055d4590SKeyi Gui * embedding of line numbers within code listings. 176*055d4590SKeyi Gui */ 177*055d4590SKeyi Gui var PR_NOCODE = 'nocode'; 178*055d4590SKeyi Gui 179*055d4590SKeyi Gui function isWordChar(ch) { 180*055d4590SKeyi Gui return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 181*055d4590SKeyi Gui } 182*055d4590SKeyi Gui 183*055d4590SKeyi Gui /** Splice one array into another. 184*055d4590SKeyi Gui * Like the python <code> 185*055d4590SKeyi Gui * container[containerPosition:containerPosition + countReplaced] = inserted 186*055d4590SKeyi Gui * </code> 187*055d4590SKeyi Gui * @param {Array} inserted 188*055d4590SKeyi Gui * @param {Array} container modified in place 189*055d4590SKeyi Gui * @param {Number} containerPosition 190*055d4590SKeyi Gui * @param {Number} countReplaced 191*055d4590SKeyi Gui */ 192*055d4590SKeyi Gui function spliceArrayInto( 193*055d4590SKeyi Gui inserted, container, containerPosition, countReplaced) { 194*055d4590SKeyi Gui inserted.unshift(containerPosition, countReplaced || 0); 195*055d4590SKeyi Gui try { 196*055d4590SKeyi Gui container.splice.apply(container, inserted); 197*055d4590SKeyi Gui } finally { 198*055d4590SKeyi Gui inserted.splice(0, 2); 199*055d4590SKeyi Gui } 200*055d4590SKeyi Gui } 201*055d4590SKeyi Gui 202*055d4590SKeyi Gui /** A set of tokens that can precede a regular expression literal in 203*055d4590SKeyi Gui * javascript. 204*055d4590SKeyi Gui * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full 205*055d4590SKeyi Gui * list, but I've removed ones that might be problematic when seen in 206*055d4590SKeyi Gui * languages that don't support regular expression literals. 207*055d4590SKeyi Gui * 208*055d4590SKeyi Gui * <p>Specifically, I've removed any keywords that can't precede a regexp 209*055d4590SKeyi Gui * literal in a syntactically legal javascript program, and I've removed the 210*055d4590SKeyi Gui * "in" keyword since it's not a keyword in many languages, and might be used 211*055d4590SKeyi Gui * as a count of inches. 212*055d4590SKeyi Gui * @private 213*055d4590SKeyi Gui */ 214*055d4590SKeyi Gui var REGEXP_PRECEDER_PATTERN = function () { 215*055d4590SKeyi Gui var preceders = [ 216*055d4590SKeyi Gui "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", 217*055d4590SKeyi Gui "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=", 218*055d4590SKeyi Gui "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";", 219*055d4590SKeyi Gui "<", "<<", "<<=", "<=", "=", "==", "===", ">", 220*055d4590SKeyi Gui ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", 221*055d4590SKeyi Gui "^", "^=", "^^", "^^=", "{", "|", "|=", "||", 222*055d4590SKeyi Gui "||=", "~" /* handles =~ and !~ */, 223*055d4590SKeyi Gui "break", "case", "continue", "delete", 224*055d4590SKeyi Gui "do", "else", "finally", "instanceof", 225*055d4590SKeyi Gui "return", "throw", "try", "typeof" 226*055d4590SKeyi Gui ]; 227*055d4590SKeyi Gui var pattern = '(?:' + 228*055d4590SKeyi Gui '(?:(?:^|[^0-9.])\\.{1,3})|' + // a dot that's not part of a number 229*055d4590SKeyi Gui '(?:(?:^|[^\\+])\\+)|' + // allow + but not ++ 230*055d4590SKeyi Gui '(?:(?:^|[^\\-])-)'; // allow - but not -- 231*055d4590SKeyi Gui for (var i = 0; i < preceders.length; ++i) { 232*055d4590SKeyi Gui var preceder = preceders[i]; 233*055d4590SKeyi Gui if (isWordChar(preceder.charAt(0))) { 234*055d4590SKeyi Gui pattern += '|\\b' + preceder; 235*055d4590SKeyi Gui } else { 236*055d4590SKeyi Gui pattern += '|' + preceder.replace(/([^=<>:&])/g, '\\$1'); 237*055d4590SKeyi Gui } 238*055d4590SKeyi Gui } 239*055d4590SKeyi Gui pattern += '|^)\\s*$'; // matches at end, and matches empty string 240*055d4590SKeyi Gui return new RegExp(pattern); 241*055d4590SKeyi Gui // CAVEAT: this does not properly handle the case where a regular 242*055d4590SKeyi Gui // expression immediately follows another since a regular expression may 243*055d4590SKeyi Gui // have flags for case-sensitivity and the like. Having regexp tokens 244*055d4590SKeyi Gui // adjacent is not 245*055d4590SKeyi Gui // valid in any language I'm aware of, so I'm punting. 246*055d4590SKeyi Gui // TODO: maybe style special characters inside a regexp as punctuation. 247*055d4590SKeyi Gui }(); 248*055d4590SKeyi Gui 249*055d4590SKeyi Gui // Define regexps here so that the interpreter doesn't have to create an 250*055d4590SKeyi Gui // object each time the function containing them is called. 251*055d4590SKeyi Gui // The language spec requires a new object created even if you don't access 252*055d4590SKeyi Gui // the $1 members. 253*055d4590SKeyi Gui var pr_amp = /&/g; 254*055d4590SKeyi Gui var pr_lt = /</g; 255*055d4590SKeyi Gui var pr_gt = />/g; 256*055d4590SKeyi Gui var pr_quot = /\"/g; 257*055d4590SKeyi Gui /** like textToHtml but escapes double quotes to be attribute safe. */ 258*055d4590SKeyi Gui function attribToHtml(str) { 259*055d4590SKeyi Gui return str.replace(pr_amp, '&') 260*055d4590SKeyi Gui .replace(pr_lt, '<') 261*055d4590SKeyi Gui .replace(pr_gt, '>') 262*055d4590SKeyi Gui .replace(pr_quot, '"'); 263*055d4590SKeyi Gui } 264*055d4590SKeyi Gui 265*055d4590SKeyi Gui /** escapest html special characters to html. */ 266*055d4590SKeyi Gui function textToHtml(str) { 267*055d4590SKeyi Gui return str.replace(pr_amp, '&') 268*055d4590SKeyi Gui .replace(pr_lt, '<') 269*055d4590SKeyi Gui .replace(pr_gt, '>'); 270*055d4590SKeyi Gui } 271*055d4590SKeyi Gui 272*055d4590SKeyi Gui 273*055d4590SKeyi Gui var pr_ltEnt = /</g; 274*055d4590SKeyi Gui var pr_gtEnt = />/g; 275*055d4590SKeyi Gui var pr_aposEnt = /'/g; 276*055d4590SKeyi Gui var pr_quotEnt = /"/g; 277*055d4590SKeyi Gui var pr_ampEnt = /&/g; 278*055d4590SKeyi Gui var pr_nbspEnt = / /g; 279*055d4590SKeyi Gui /** unescapes html to plain text. */ 280*055d4590SKeyi Gui function htmlToText(html) { 281*055d4590SKeyi Gui var pos = html.indexOf('&'); 282*055d4590SKeyi Gui if (pos < 0) { return html; } 283*055d4590SKeyi Gui // Handle numeric entities specially. We can't use functional substitution 284*055d4590SKeyi Gui // since that doesn't work in older versions of Safari. 285*055d4590SKeyi Gui // These should be rare since most browsers convert them to normal chars. 286*055d4590SKeyi Gui for (--pos; (pos = html.indexOf('&#', pos + 1)) >= 0;) { 287*055d4590SKeyi Gui var end = html.indexOf(';', pos); 288*055d4590SKeyi Gui if (end >= 0) { 289*055d4590SKeyi Gui var num = html.substring(pos + 3, end); 290*055d4590SKeyi Gui var radix = 10; 291*055d4590SKeyi Gui if (num && num.charAt(0) === 'x') { 292*055d4590SKeyi Gui num = num.substring(1); 293*055d4590SKeyi Gui radix = 16; 294*055d4590SKeyi Gui } 295*055d4590SKeyi Gui var codePoint = parseInt(num, radix); 296*055d4590SKeyi Gui if (!isNaN(codePoint)) { 297*055d4590SKeyi Gui html = (html.substring(0, pos) + String.fromCharCode(codePoint) + 298*055d4590SKeyi Gui html.substring(end + 1)); 299*055d4590SKeyi Gui } 300*055d4590SKeyi Gui } 301*055d4590SKeyi Gui } 302*055d4590SKeyi Gui 303*055d4590SKeyi Gui return html.replace(pr_ltEnt, '<') 304*055d4590SKeyi Gui .replace(pr_gtEnt, '>') 305*055d4590SKeyi Gui .replace(pr_aposEnt, "'") 306*055d4590SKeyi Gui .replace(pr_quotEnt, '"') 307*055d4590SKeyi Gui .replace(pr_ampEnt, '&') 308*055d4590SKeyi Gui .replace(pr_nbspEnt, ' '); 309*055d4590SKeyi Gui } 310*055d4590SKeyi Gui 311*055d4590SKeyi Gui /** is the given node's innerHTML normally unescaped? */ 312*055d4590SKeyi Gui function isRawContent(node) { 313*055d4590SKeyi Gui return 'XMP' === node.tagName; 314*055d4590SKeyi Gui } 315*055d4590SKeyi Gui 316*055d4590SKeyi Gui function normalizedHtml(node, out) { 317*055d4590SKeyi Gui switch (node.nodeType) { 318*055d4590SKeyi Gui case 1: // an element 319*055d4590SKeyi Gui var name = node.tagName.toLowerCase(); 320*055d4590SKeyi Gui out.push('<', name); 321*055d4590SKeyi Gui for (var i = 0; i < node.attributes.length; ++i) { 322*055d4590SKeyi Gui var attr = node.attributes[i]; 323*055d4590SKeyi Gui if (!attr.specified) { continue; } 324*055d4590SKeyi Gui out.push(' '); 325*055d4590SKeyi Gui normalizedHtml(attr, out); 326*055d4590SKeyi Gui } 327*055d4590SKeyi Gui out.push('>'); 328*055d4590SKeyi Gui for (var child = node.firstChild; child; child = child.nextSibling) { 329*055d4590SKeyi Gui normalizedHtml(child, out); 330*055d4590SKeyi Gui } 331*055d4590SKeyi Gui if (node.firstChild || !/^(?:br|link|img)$/.test(name)) { 332*055d4590SKeyi Gui out.push('<\/', name, '>'); 333*055d4590SKeyi Gui } 334*055d4590SKeyi Gui break; 335*055d4590SKeyi Gui case 2: // an attribute 336*055d4590SKeyi Gui out.push(node.name.toLowerCase(), '="', attribToHtml(node.value), '"'); 337*055d4590SKeyi Gui break; 338*055d4590SKeyi Gui case 3: case 4: // text 339*055d4590SKeyi Gui out.push(textToHtml(node.nodeValue)); 340*055d4590SKeyi Gui break; 341*055d4590SKeyi Gui } 342*055d4590SKeyi Gui } 343*055d4590SKeyi Gui 344*055d4590SKeyi Gui var PR_innerHtmlWorks = null; 345*055d4590SKeyi Gui function getInnerHtml(node) { 346*055d4590SKeyi Gui // inner html is hopelessly broken in Safari 2.0.4 when the content is 347*055d4590SKeyi Gui // an html description of well formed XML and the containing tag is a PRE 348*055d4590SKeyi Gui // tag, so we detect that case and emulate innerHTML. 349*055d4590SKeyi Gui if (null === PR_innerHtmlWorks) { 350*055d4590SKeyi Gui var testNode = document.createElement('PRE'); 351*055d4590SKeyi Gui testNode.appendChild( 352*055d4590SKeyi Gui document.createTextNode('<!DOCTYPE foo PUBLIC "foo bar">\n<foo />')); 353*055d4590SKeyi Gui PR_innerHtmlWorks = !/</.test(testNode.innerHTML); 354*055d4590SKeyi Gui } 355*055d4590SKeyi Gui 356*055d4590SKeyi Gui if (PR_innerHtmlWorks) { 357*055d4590SKeyi Gui var content = node.innerHTML; 358*055d4590SKeyi Gui // XMP tags contain unescaped entities so require special handling. 359*055d4590SKeyi Gui if (isRawContent(node)) { 360*055d4590SKeyi Gui content = textToHtml(content); 361*055d4590SKeyi Gui } 362*055d4590SKeyi Gui return content; 363*055d4590SKeyi Gui } 364*055d4590SKeyi Gui 365*055d4590SKeyi Gui var out = []; 366*055d4590SKeyi Gui for (var child = node.firstChild; child; child = child.nextSibling) { 367*055d4590SKeyi Gui normalizedHtml(child, out); 368*055d4590SKeyi Gui } 369*055d4590SKeyi Gui return out.join(''); 370*055d4590SKeyi Gui } 371*055d4590SKeyi Gui 372*055d4590SKeyi Gui /** returns a function that expand tabs to spaces. This function can be fed 373*055d4590SKeyi Gui * successive chunks of text, and will maintain its own internal state to 374*055d4590SKeyi Gui * keep track of how tabs are expanded. 375*055d4590SKeyi Gui * @return {function (string) : string} a function that takes 376*055d4590SKeyi Gui * plain text and return the text with tabs expanded. 377*055d4590SKeyi Gui * @private 378*055d4590SKeyi Gui */ 379*055d4590SKeyi Gui function makeTabExpander(tabWidth) { 380*055d4590SKeyi Gui var SPACES = ' '; 381*055d4590SKeyi Gui var charInLine = 0; 382*055d4590SKeyi Gui 383*055d4590SKeyi Gui return function (plainText) { 384*055d4590SKeyi Gui // walk over each character looking for tabs and newlines. 385*055d4590SKeyi Gui // On tabs, expand them. On newlines, reset charInLine. 386*055d4590SKeyi Gui // Otherwise increment charInLine 387*055d4590SKeyi Gui var out = null; 388*055d4590SKeyi Gui var pos = 0; 389*055d4590SKeyi Gui for (var i = 0, n = plainText.length; i < n; ++i) { 390*055d4590SKeyi Gui var ch = plainText.charAt(i); 391*055d4590SKeyi Gui 392*055d4590SKeyi Gui switch (ch) { 393*055d4590SKeyi Gui case '\t': 394*055d4590SKeyi Gui if (!out) { out = []; } 395*055d4590SKeyi Gui out.push(plainText.substring(pos, i)); 396*055d4590SKeyi Gui // calculate how much space we need in front of this part 397*055d4590SKeyi Gui // nSpaces is the amount of padding -- the number of spaces needed 398*055d4590SKeyi Gui // to move us to the next column, where columns occur at factors of 399*055d4590SKeyi Gui // tabWidth. 400*055d4590SKeyi Gui var nSpaces = tabWidth - (charInLine % tabWidth); 401*055d4590SKeyi Gui charInLine += nSpaces; 402*055d4590SKeyi Gui for (; nSpaces >= 0; nSpaces -= SPACES.length) { 403*055d4590SKeyi Gui out.push(SPACES.substring(0, nSpaces)); 404*055d4590SKeyi Gui } 405*055d4590SKeyi Gui pos = i + 1; 406*055d4590SKeyi Gui break; 407*055d4590SKeyi Gui case '\n': 408*055d4590SKeyi Gui charInLine = 0; 409*055d4590SKeyi Gui break; 410*055d4590SKeyi Gui default: 411*055d4590SKeyi Gui ++charInLine; 412*055d4590SKeyi Gui } 413*055d4590SKeyi Gui } 414*055d4590SKeyi Gui if (!out) { return plainText; } 415*055d4590SKeyi Gui out.push(plainText.substring(pos)); 416*055d4590SKeyi Gui return out.join(''); 417*055d4590SKeyi Gui }; 418*055d4590SKeyi Gui } 419*055d4590SKeyi Gui 420*055d4590SKeyi Gui // The below pattern matches one of the following 421*055d4590SKeyi Gui // (1) /[^<]+/ : A run of characters other than '<' 422*055d4590SKeyi Gui // (2) /<!--.*?-->/: an HTML comment 423*055d4590SKeyi Gui // (3) /<!\[CDATA\[.*?\]\]>/: a cdata section 424*055d4590SKeyi Gui // (3) /<\/?[a-zA-Z][^>]*>/ : A probably tag that should not be highlighted 425*055d4590SKeyi Gui // (4) /</ : A '<' that does not begin a larger chunk. Treated as 1 426*055d4590SKeyi Gui var pr_chunkPattern = 427*055d4590SKeyi Gui /(?:[^<]+|<!--[\s\S]*?-->|<!\[CDATA\[([\s\S]*?)\]\]>|<\/?[a-zA-Z][^>]*>|<)/g; 428*055d4590SKeyi Gui var pr_commentPrefix = /^<!--/; 429*055d4590SKeyi Gui var pr_cdataPrefix = /^<\[CDATA\[/; 430*055d4590SKeyi Gui var pr_brPrefix = /^<br\b/i; 431*055d4590SKeyi Gui var pr_tagNameRe = /^<(\/?)([a-zA-Z]+)/; 432*055d4590SKeyi Gui 433*055d4590SKeyi Gui /** split markup into chunks of html tags (style null) and 434*055d4590SKeyi Gui * plain text (style {@link #PR_PLAIN}), converting tags which are 435*055d4590SKeyi Gui * significant for tokenization (<br>) into their textual equivalent. 436*055d4590SKeyi Gui * 437*055d4590SKeyi Gui * @param {string} s html where whitespace is considered significant. 438*055d4590SKeyi Gui * @return {Object} source code and extracted tags. 439*055d4590SKeyi Gui * @private 440*055d4590SKeyi Gui */ 441*055d4590SKeyi Gui function extractTags(s) { 442*055d4590SKeyi Gui // since the pattern has the 'g' modifier and defines no capturing groups, 443*055d4590SKeyi Gui // this will return a list of all chunks which we then classify and wrap as 444*055d4590SKeyi Gui // PR_Tokens 445*055d4590SKeyi Gui var matches = s.match(pr_chunkPattern); 446*055d4590SKeyi Gui var sourceBuf = []; 447*055d4590SKeyi Gui var sourceBufLen = 0; 448*055d4590SKeyi Gui var extractedTags = []; 449*055d4590SKeyi Gui if (matches) { 450*055d4590SKeyi Gui for (var i = 0, n = matches.length; i < n; ++i) { 451*055d4590SKeyi Gui var match = matches[i]; 452*055d4590SKeyi Gui if (match.length > 1 && match.charAt(0) === '<') { 453*055d4590SKeyi Gui if (pr_commentPrefix.test(match)) { continue; } 454*055d4590SKeyi Gui if (pr_cdataPrefix.test(match)) { 455*055d4590SKeyi Gui // strip CDATA prefix and suffix. Don't unescape since it's CDATA 456*055d4590SKeyi Gui sourceBuf.push(match.substring(9, match.length - 3)); 457*055d4590SKeyi Gui sourceBufLen += match.length - 12; 458*055d4590SKeyi Gui } else if (pr_brPrefix.test(match)) { 459*055d4590SKeyi Gui // <br> tags are lexically significant so convert them to text. 460*055d4590SKeyi Gui // This is undone later. 461*055d4590SKeyi Gui sourceBuf.push('\n'); 462*055d4590SKeyi Gui ++sourceBufLen; 463*055d4590SKeyi Gui } else { 464*055d4590SKeyi Gui if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) { 465*055d4590SKeyi Gui // A <span class="nocode"> will start a section that should be 466*055d4590SKeyi Gui // ignored. Continue walking the list until we see a matching end 467*055d4590SKeyi Gui // tag. 468*055d4590SKeyi Gui var name = match.match(pr_tagNameRe)[2]; 469*055d4590SKeyi Gui var depth = 1; 470*055d4590SKeyi Gui end_tag_loop: 471*055d4590SKeyi Gui for (var j = i + 1; j < n; ++j) { 472*055d4590SKeyi Gui var name2 = matches[j].match(pr_tagNameRe); 473*055d4590SKeyi Gui if (name2 && name2[2] === name) { 474*055d4590SKeyi Gui if (name2[1] === '/') { 475*055d4590SKeyi Gui if (--depth === 0) { break end_tag_loop; } 476*055d4590SKeyi Gui } else { 477*055d4590SKeyi Gui ++depth; 478*055d4590SKeyi Gui } 479*055d4590SKeyi Gui } 480*055d4590SKeyi Gui } 481*055d4590SKeyi Gui if (j < n) { 482*055d4590SKeyi Gui extractedTags.push( 483*055d4590SKeyi Gui sourceBufLen, matches.slice(i, j + 1).join('')); 484*055d4590SKeyi Gui i = j; 485*055d4590SKeyi Gui } else { // Ignore unclosed sections. 486*055d4590SKeyi Gui extractedTags.push(sourceBufLen, match); 487*055d4590SKeyi Gui } 488*055d4590SKeyi Gui } else { 489*055d4590SKeyi Gui extractedTags.push(sourceBufLen, match); 490*055d4590SKeyi Gui } 491*055d4590SKeyi Gui } 492*055d4590SKeyi Gui } else { 493*055d4590SKeyi Gui var literalText = htmlToText(match); 494*055d4590SKeyi Gui sourceBuf.push(literalText); 495*055d4590SKeyi Gui sourceBufLen += literalText.length; 496*055d4590SKeyi Gui } 497*055d4590SKeyi Gui } 498*055d4590SKeyi Gui } 499*055d4590SKeyi Gui return { source: sourceBuf.join(''), tags: extractedTags }; 500*055d4590SKeyi Gui } 501*055d4590SKeyi Gui 502*055d4590SKeyi Gui /** True if the given tag contains a class attribute with the nocode class. */ 503*055d4590SKeyi Gui function isNoCodeTag(tag) { 504*055d4590SKeyi Gui return !!tag 505*055d4590SKeyi Gui // First canonicalize the representation of attributes 506*055d4590SKeyi Gui .replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g, 507*055d4590SKeyi Gui ' $1="$2$3$4"') 508*055d4590SKeyi Gui // Then look for the attribute we want. 509*055d4590SKeyi Gui .match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/); 510*055d4590SKeyi Gui } 511*055d4590SKeyi Gui 512*055d4590SKeyi Gui /** Given triples of [style, pattern, context] returns a lexing function, 513*055d4590SKeyi Gui * The lexing function interprets the patterns to find token boundaries and 514*055d4590SKeyi Gui * returns a decoration list of the form 515*055d4590SKeyi Gui * [index_0, style_0, index_1, style_1, ..., index_n, style_n] 516*055d4590SKeyi Gui * where index_n is an index into the sourceCode, and style_n is a style 517*055d4590SKeyi Gui * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to 518*055d4590SKeyi Gui * all characters in sourceCode[index_n-1:index_n]. 519*055d4590SKeyi Gui * 520*055d4590SKeyi Gui * The stylePatterns is a list whose elements have the form 521*055d4590SKeyi Gui * [style : string, pattern : RegExp, context : RegExp, shortcut : string]. 522*055d4590SKeyi Gui & 523*055d4590SKeyi Gui * Style is a style constant like PR_PLAIN. 524*055d4590SKeyi Gui * 525*055d4590SKeyi Gui * Pattern must only match prefixes, and if it matches a prefix and context 526*055d4590SKeyi Gui * is null or matches the last non-comment token parsed, then that match is 527*055d4590SKeyi Gui * considered a token with the same style. 528*055d4590SKeyi Gui * 529*055d4590SKeyi Gui * Context is applied to the last non-whitespace, non-comment token 530*055d4590SKeyi Gui * recognized. 531*055d4590SKeyi Gui * 532*055d4590SKeyi Gui * Shortcut is an optional string of characters, any of which, if the first 533*055d4590SKeyi Gui * character, gurantee that this pattern and only this pattern matches. 534*055d4590SKeyi Gui * 535*055d4590SKeyi Gui * @param {Array} shortcutStylePatterns patterns that always start with 536*055d4590SKeyi Gui * a known character. Must have a shortcut string. 537*055d4590SKeyi Gui * @param {Array} fallthroughStylePatterns patterns that will be tried in 538*055d4590SKeyi Gui * order if the shortcut ones fail. May have shortcuts. 539*055d4590SKeyi Gui * 540*055d4590SKeyi Gui * @return {function (string, number?) : Array.<number|string>} a 541*055d4590SKeyi Gui * function that takes source code and returns a list of decorations. 542*055d4590SKeyi Gui */ 543*055d4590SKeyi Gui function createSimpleLexer(shortcutStylePatterns, 544*055d4590SKeyi Gui fallthroughStylePatterns) { 545*055d4590SKeyi Gui var shortcuts = {}; 546*055d4590SKeyi Gui (function () { 547*055d4590SKeyi Gui var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns); 548*055d4590SKeyi Gui for (var i = allPatterns.length; --i >= 0;) { 549*055d4590SKeyi Gui var patternParts = allPatterns[i]; 550*055d4590SKeyi Gui var shortcutChars = patternParts[3]; 551*055d4590SKeyi Gui if (shortcutChars) { 552*055d4590SKeyi Gui for (var c = shortcutChars.length; --c >= 0;) { 553*055d4590SKeyi Gui shortcuts[shortcutChars.charAt(c)] = patternParts; 554*055d4590SKeyi Gui } 555*055d4590SKeyi Gui } 556*055d4590SKeyi Gui } 557*055d4590SKeyi Gui })(); 558*055d4590SKeyi Gui 559*055d4590SKeyi Gui var nPatterns = fallthroughStylePatterns.length; 560*055d4590SKeyi Gui var notWs = /\S/; 561*055d4590SKeyi Gui 562*055d4590SKeyi Gui return function (sourceCode, opt_basePos) { 563*055d4590SKeyi Gui opt_basePos = opt_basePos || 0; 564*055d4590SKeyi Gui var decorations = [opt_basePos, PR_PLAIN]; 565*055d4590SKeyi Gui var lastToken = ''; 566*055d4590SKeyi Gui var pos = 0; // index into sourceCode 567*055d4590SKeyi Gui var tail = sourceCode; 568*055d4590SKeyi Gui 569*055d4590SKeyi Gui while (tail.length) { 570*055d4590SKeyi Gui var style; 571*055d4590SKeyi Gui var token = null; 572*055d4590SKeyi Gui var match; 573*055d4590SKeyi Gui 574*055d4590SKeyi Gui var patternParts = shortcuts[tail.charAt(0)]; 575*055d4590SKeyi Gui if (patternParts) { 576*055d4590SKeyi Gui match = tail.match(patternParts[1]); 577*055d4590SKeyi Gui token = match[0]; 578*055d4590SKeyi Gui style = patternParts[0]; 579*055d4590SKeyi Gui } else { 580*055d4590SKeyi Gui for (var i = 0; i < nPatterns; ++i) { 581*055d4590SKeyi Gui patternParts = fallthroughStylePatterns[i]; 582*055d4590SKeyi Gui var contextPattern = patternParts[2]; 583*055d4590SKeyi Gui if (contextPattern && !contextPattern.test(lastToken)) { 584*055d4590SKeyi Gui // rule can't be used 585*055d4590SKeyi Gui continue; 586*055d4590SKeyi Gui } 587*055d4590SKeyi Gui match = tail.match(patternParts[1]); 588*055d4590SKeyi Gui if (match) { 589*055d4590SKeyi Gui token = match[0]; 590*055d4590SKeyi Gui style = patternParts[0]; 591*055d4590SKeyi Gui break; 592*055d4590SKeyi Gui } 593*055d4590SKeyi Gui } 594*055d4590SKeyi Gui 595*055d4590SKeyi Gui if (!token) { // make sure that we make progress 596*055d4590SKeyi Gui style = PR_PLAIN; 597*055d4590SKeyi Gui token = tail.substring(0, 1); 598*055d4590SKeyi Gui } 599*055d4590SKeyi Gui } 600*055d4590SKeyi Gui 601*055d4590SKeyi Gui decorations.push(opt_basePos + pos, style); 602*055d4590SKeyi Gui pos += token.length; 603*055d4590SKeyi Gui tail = tail.substring(token.length); 604*055d4590SKeyi Gui if (style !== PR_COMMENT && notWs.test(token)) { lastToken = token; } 605*055d4590SKeyi Gui } 606*055d4590SKeyi Gui return decorations; 607*055d4590SKeyi Gui }; 608*055d4590SKeyi Gui } 609*055d4590SKeyi Gui 610*055d4590SKeyi Gui var PR_MARKUP_LEXER = createSimpleLexer([], [ 611*055d4590SKeyi Gui [PR_PLAIN, /^[^<]+/, null], 612*055d4590SKeyi Gui [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/, null], 613*055d4590SKeyi Gui [PR_COMMENT, /^<!--[\s\S]*?(?:-->|$)/, null], 614*055d4590SKeyi Gui [PR_SOURCE, /^<\?[\s\S]*?(?:\?>|$)/, null], 615*055d4590SKeyi Gui [PR_SOURCE, /^<%[\s\S]*?(?:%>|$)/, null], 616*055d4590SKeyi Gui [PR_SOURCE, 617*055d4590SKeyi Gui // Tags whose content is not escaped, and which contain source code. 618*055d4590SKeyi Gui /^<(script|style|xmp)\b[^>]*>[\s\S]*?<\/\1\b[^>]*>/i, null], 619*055d4590SKeyi Gui [PR_TAG, /^<\/?\w[^<>]*>/, null] 620*055d4590SKeyi Gui ]); 621*055d4590SKeyi Gui // Splits any of the source|style|xmp entries above into a start tag, 622*055d4590SKeyi Gui // source content, and end tag. 623*055d4590SKeyi Gui var PR_SOURCE_CHUNK_PARTS = /^(<[^>]*>)([\s\S]*)(<\/[^>]*>)$/; 624*055d4590SKeyi Gui /** split markup on tags, comments, application directives, and other top 625*055d4590SKeyi Gui * level constructs. Tags are returned as a single token - attributes are 626*055d4590SKeyi Gui * not yet broken out. 627*055d4590SKeyi Gui * @private 628*055d4590SKeyi Gui */ 629*055d4590SKeyi Gui function tokenizeMarkup(source) { 630*055d4590SKeyi Gui var decorations = PR_MARKUP_LEXER(source); 631*055d4590SKeyi Gui for (var i = 0; i < decorations.length; i += 2) { 632*055d4590SKeyi Gui if (decorations[i + 1] === PR_SOURCE) { 633*055d4590SKeyi Gui var start, end; 634*055d4590SKeyi Gui start = decorations[i]; 635*055d4590SKeyi Gui end = i + 2 < decorations.length ? decorations[i + 2] : source.length; 636*055d4590SKeyi Gui // Split out start and end script tags as actual tags, and leave the 637*055d4590SKeyi Gui // body with style SCRIPT. 638*055d4590SKeyi Gui var sourceChunk = source.substring(start, end); 639*055d4590SKeyi Gui var match = sourceChunk.match(PR_SOURCE_CHUNK_PARTS); 640*055d4590SKeyi Gui if (match) { 641*055d4590SKeyi Gui decorations.splice( 642*055d4590SKeyi Gui i, 2, 643*055d4590SKeyi Gui start, PR_TAG, // the open chunk 644*055d4590SKeyi Gui start + match[1].length, PR_SOURCE, 645*055d4590SKeyi Gui start + match[1].length + (match[2] || '').length, PR_TAG); 646*055d4590SKeyi Gui } 647*055d4590SKeyi Gui } 648*055d4590SKeyi Gui } 649*055d4590SKeyi Gui return decorations; 650*055d4590SKeyi Gui } 651*055d4590SKeyi Gui 652*055d4590SKeyi Gui var PR_TAG_LEXER = createSimpleLexer([ 653*055d4590SKeyi Gui [PR_ATTRIB_VALUE, /^\'[^\']*(?:\'|$)/, null, "'"], 654*055d4590SKeyi Gui [PR_ATTRIB_VALUE, /^\"[^\"]*(?:\"|$)/, null, '"'], 655*055d4590SKeyi Gui [PR_PUNCTUATION, /^[<>\/=]+/, null, '<>/='] 656*055d4590SKeyi Gui ], [ 657*055d4590SKeyi Gui [PR_TAG, /^[\w:\-]+/, /^</], 658*055d4590SKeyi Gui [PR_ATTRIB_VALUE, /^[\w\-]+/, /^=/], 659*055d4590SKeyi Gui [PR_ATTRIB_NAME, /^[\w:\-]+/, null], 660*055d4590SKeyi Gui [PR_PLAIN, /^\s+/, null, ' \t\r\n'] 661*055d4590SKeyi Gui ]); 662*055d4590SKeyi Gui /** split tags attributes and their values out from the tag name, and 663*055d4590SKeyi Gui * recursively lex source chunks. 664*055d4590SKeyi Gui * @private 665*055d4590SKeyi Gui */ 666*055d4590SKeyi Gui function splitTagAttributes(source, decorations) { 667*055d4590SKeyi Gui for (var i = 0; i < decorations.length; i += 2) { 668*055d4590SKeyi Gui var style = decorations[i + 1]; 669*055d4590SKeyi Gui if (style === PR_TAG) { 670*055d4590SKeyi Gui var start, end; 671*055d4590SKeyi Gui start = decorations[i]; 672*055d4590SKeyi Gui end = i + 2 < decorations.length ? decorations[i + 2] : source.length; 673*055d4590SKeyi Gui var chunk = source.substring(start, end); 674*055d4590SKeyi Gui var subDecorations = PR_TAG_LEXER(chunk, start); 675*055d4590SKeyi Gui spliceArrayInto(subDecorations, decorations, i, 2); 676*055d4590SKeyi Gui i += subDecorations.length - 2; 677*055d4590SKeyi Gui } 678*055d4590SKeyi Gui } 679*055d4590SKeyi Gui return decorations; 680*055d4590SKeyi Gui } 681*055d4590SKeyi Gui 682*055d4590SKeyi Gui /** returns a function that produces a list of decorations from source text. 683*055d4590SKeyi Gui * 684*055d4590SKeyi Gui * This code treats ", ', and ` as string delimiters, and \ as a string 685*055d4590SKeyi Gui * escape. It does not recognize perl's qq() style strings. 686*055d4590SKeyi Gui * It has no special handling for double delimiter escapes as in basic, or 687*055d4590SKeyi Gui * the tripled delimiters used in python, but should work on those regardless 688*055d4590SKeyi Gui * although in those cases a single string literal may be broken up into 689*055d4590SKeyi Gui * multiple adjacent string literals. 690*055d4590SKeyi Gui * 691*055d4590SKeyi Gui * It recognizes C, C++, and shell style comments. 692*055d4590SKeyi Gui * 693*055d4590SKeyi Gui * @param {Object} options a set of optional parameters. 694*055d4590SKeyi Gui * @return {function (string) : Array.<string|number>} a 695*055d4590SKeyi Gui * decorator that takes sourceCode as plain text and that returns a 696*055d4590SKeyi Gui * decoration list 697*055d4590SKeyi Gui */ 698*055d4590SKeyi Gui function sourceDecorator(options) { 699*055d4590SKeyi Gui var shortcutStylePatterns = [], fallthroughStylePatterns = []; 700*055d4590SKeyi Gui if (options.tripleQuotedStrings) { 701*055d4590SKeyi Gui // '''multi-line-string''', 'single-line-string', and double-quoted 702*055d4590SKeyi Gui shortcutStylePatterns.push( 703*055d4590SKeyi Gui [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, 704*055d4590SKeyi Gui null, '\'"']); 705*055d4590SKeyi Gui } else if (options.multiLineStrings) { 706*055d4590SKeyi Gui // 'multi-line-string', "multi-line-string" 707*055d4590SKeyi Gui shortcutStylePatterns.push( 708*055d4590SKeyi Gui [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, 709*055d4590SKeyi Gui null, '\'"`']); 710*055d4590SKeyi Gui } else { 711*055d4590SKeyi Gui // 'single-line-string', "single-line-string" 712*055d4590SKeyi Gui shortcutStylePatterns.push( 713*055d4590SKeyi Gui [PR_STRING, 714*055d4590SKeyi Gui /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/, 715*055d4590SKeyi Gui null, '"\'']); 716*055d4590SKeyi Gui } 717*055d4590SKeyi Gui fallthroughStylePatterns.push( 718*055d4590SKeyi Gui [PR_PLAIN, /^(?:[^\'\"\`\/\#]+)/, null, ' \r\n']); 719*055d4590SKeyi Gui if (options.hashComments) { 720*055d4590SKeyi Gui shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']); 721*055d4590SKeyi Gui } 722*055d4590SKeyi Gui if (options.cStyleComments) { 723*055d4590SKeyi Gui fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]); 724*055d4590SKeyi Gui fallthroughStylePatterns.push( 725*055d4590SKeyi Gui [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]); 726*055d4590SKeyi Gui } 727*055d4590SKeyi Gui if (options.regexLiterals) { 728*055d4590SKeyi Gui var REGEX_LITERAL = ( 729*055d4590SKeyi Gui // A regular expression literal starts with a slash that is 730*055d4590SKeyi Gui // not followed by * or / so that it is not confused with 731*055d4590SKeyi Gui // comments. 732*055d4590SKeyi Gui '^/(?=[^/*])' 733*055d4590SKeyi Gui // and then contains any number of raw characters, 734*055d4590SKeyi Gui + '(?:[^/\\x5B\\x5C]' 735*055d4590SKeyi Gui // escape sequences (\x5C), 736*055d4590SKeyi Gui + '|\\x5C[\\s\\S]' 737*055d4590SKeyi Gui // or non-nesting character sets (\x5B\x5D); 738*055d4590SKeyi Gui + '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+' 739*055d4590SKeyi Gui // finally closed by a /. 740*055d4590SKeyi Gui + '(?:/|$)'); 741*055d4590SKeyi Gui fallthroughStylePatterns.push( 742*055d4590SKeyi Gui [PR_STRING, new RegExp(REGEX_LITERAL), REGEXP_PRECEDER_PATTERN]); 743*055d4590SKeyi Gui } 744*055d4590SKeyi Gui 745*055d4590SKeyi Gui var keywords = wordSet(options.keywords); 746*055d4590SKeyi Gui 747*055d4590SKeyi Gui options = null; 748*055d4590SKeyi Gui 749*055d4590SKeyi Gui /** splits the given string into comment, string, and "other" tokens. 750*055d4590SKeyi Gui * @param {string} sourceCode as plain text 751*055d4590SKeyi Gui * @return {Array.<number|string>} a decoration list. 752*055d4590SKeyi Gui * @private 753*055d4590SKeyi Gui */ 754*055d4590SKeyi Gui var splitStringAndCommentTokens = createSimpleLexer( 755*055d4590SKeyi Gui shortcutStylePatterns, fallthroughStylePatterns); 756*055d4590SKeyi Gui 757*055d4590SKeyi Gui var styleLiteralIdentifierPuncRecognizer = createSimpleLexer([], [ 758*055d4590SKeyi Gui [PR_PLAIN, /^\s+/, null, ' \r\n'], 759*055d4590SKeyi Gui // TODO(mikesamuel): recognize non-latin letters and numerals in idents 760*055d4590SKeyi Gui [PR_PLAIN, /^[a-z_$@][a-z_$@0-9]*/i, null], 761*055d4590SKeyi Gui // A hex number 762*055d4590SKeyi Gui [PR_LITERAL, /^0x[a-f0-9]+[a-z]/i, null], 763*055d4590SKeyi Gui // An octal or decimal number, possibly in scientific notation 764*055d4590SKeyi Gui [PR_LITERAL, 765*055d4590SKeyi Gui /^(?:\d(?:_\d+)*\d*(?:\.\d*)?|\.\d+)(?:e[+\-]?\d+)?[a-z]*/i, 766*055d4590SKeyi Gui null, '123456789'], 767*055d4590SKeyi Gui [PR_PUNCTUATION, /^[^\s\w\.$@]+/, null] 768*055d4590SKeyi Gui // Fallback will handle decimal points not adjacent to a digit 769*055d4590SKeyi Gui ]); 770*055d4590SKeyi Gui 771*055d4590SKeyi Gui /** splits plain text tokens into more specific tokens, and then tries to 772*055d4590SKeyi Gui * recognize keywords, and types. 773*055d4590SKeyi Gui * @private 774*055d4590SKeyi Gui */ 775*055d4590SKeyi Gui function splitNonStringNonCommentTokens(source, decorations) { 776*055d4590SKeyi Gui for (var i = 0; i < decorations.length; i += 2) { 777*055d4590SKeyi Gui var style = decorations[i + 1]; 778*055d4590SKeyi Gui if (style === PR_PLAIN) { 779*055d4590SKeyi Gui var start, end, chunk, subDecs; 780*055d4590SKeyi Gui start = decorations[i]; 781*055d4590SKeyi Gui end = i + 2 < decorations.length ? decorations[i + 2] : source.length; 782*055d4590SKeyi Gui chunk = source.substring(start, end); 783*055d4590SKeyi Gui subDecs = styleLiteralIdentifierPuncRecognizer(chunk, start); 784*055d4590SKeyi Gui for (var j = 0, m = subDecs.length; j < m; j += 2) { 785*055d4590SKeyi Gui var subStyle = subDecs[j + 1]; 786*055d4590SKeyi Gui if (subStyle === PR_PLAIN) { 787*055d4590SKeyi Gui var subStart = subDecs[j]; 788*055d4590SKeyi Gui var subEnd = j + 2 < m ? subDecs[j + 2] : chunk.length; 789*055d4590SKeyi Gui var token = source.substring(subStart, subEnd); 790*055d4590SKeyi Gui if (token === '.') { 791*055d4590SKeyi Gui subDecs[j + 1] = PR_PUNCTUATION; 792*055d4590SKeyi Gui } else if (token in keywords) { 793*055d4590SKeyi Gui subDecs[j + 1] = PR_KEYWORD; 794*055d4590SKeyi Gui } else if (/^@?[A-Z][A-Z$]*[a-z][A-Za-z$]*$/.test(token)) { 795*055d4590SKeyi Gui // classify types and annotations using Java's style conventions 796*055d4590SKeyi Gui subDecs[j + 1] = token.charAt(0) === '@' ? PR_LITERAL : PR_TYPE; 797*055d4590SKeyi Gui } 798*055d4590SKeyi Gui } 799*055d4590SKeyi Gui } 800*055d4590SKeyi Gui spliceArrayInto(subDecs, decorations, i, 2); 801*055d4590SKeyi Gui i += subDecs.length - 2; 802*055d4590SKeyi Gui } 803*055d4590SKeyi Gui } 804*055d4590SKeyi Gui return decorations; 805*055d4590SKeyi Gui } 806*055d4590SKeyi Gui 807*055d4590SKeyi Gui return function (sourceCode) { 808*055d4590SKeyi Gui // Split into strings, comments, and other. 809*055d4590SKeyi Gui // We do this because strings and comments are easily recognizable and can 810*055d4590SKeyi Gui // contain stuff that looks like other tokens, so we want to mark those 811*055d4590SKeyi Gui // early so we don't recurse into them. 812*055d4590SKeyi Gui var decorations = splitStringAndCommentTokens(sourceCode); 813*055d4590SKeyi Gui 814*055d4590SKeyi Gui // Split non comment|string tokens on whitespace and word boundaries 815*055d4590SKeyi Gui decorations = splitNonStringNonCommentTokens(sourceCode, decorations); 816*055d4590SKeyi Gui 817*055d4590SKeyi Gui return decorations; 818*055d4590SKeyi Gui }; 819*055d4590SKeyi Gui } 820*055d4590SKeyi Gui 821*055d4590SKeyi Gui var decorateSource = sourceDecorator({ 822*055d4590SKeyi Gui keywords: ALL_KEYWORDS, 823*055d4590SKeyi Gui hashComments: true, 824*055d4590SKeyi Gui cStyleComments: true, 825*055d4590SKeyi Gui multiLineStrings: true, 826*055d4590SKeyi Gui regexLiterals: true 827*055d4590SKeyi Gui }); 828*055d4590SKeyi Gui 829*055d4590SKeyi Gui /** identify regions of markup that are really source code, and recursivley 830*055d4590SKeyi Gui * lex them. 831*055d4590SKeyi Gui * @private 832*055d4590SKeyi Gui */ 833*055d4590SKeyi Gui function splitSourceNodes(source, decorations) { 834*055d4590SKeyi Gui for (var i = 0; i < decorations.length; i += 2) { 835*055d4590SKeyi Gui var style = decorations[i + 1]; 836*055d4590SKeyi Gui if (style === PR_SOURCE) { 837*055d4590SKeyi Gui // Recurse using the non-markup lexer 838*055d4590SKeyi Gui var start, end; 839*055d4590SKeyi Gui start = decorations[i]; 840*055d4590SKeyi Gui end = i + 2 < decorations.length ? decorations[i + 2] : source.length; 841*055d4590SKeyi Gui var subDecorations = decorateSource(source.substring(start, end)); 842*055d4590SKeyi Gui for (var j = 0, m = subDecorations.length; j < m; j += 2) { 843*055d4590SKeyi Gui subDecorations[j] += start; 844*055d4590SKeyi Gui } 845*055d4590SKeyi Gui spliceArrayInto(subDecorations, decorations, i, 2); 846*055d4590SKeyi Gui i += subDecorations.length - 2; 847*055d4590SKeyi Gui } 848*055d4590SKeyi Gui } 849*055d4590SKeyi Gui return decorations; 850*055d4590SKeyi Gui } 851*055d4590SKeyi Gui 852*055d4590SKeyi Gui /** identify attribute values that really contain source code and recursively 853*055d4590SKeyi Gui * lex them. 854*055d4590SKeyi Gui * @private 855*055d4590SKeyi Gui */ 856*055d4590SKeyi Gui function splitSourceAttributes(source, decorations) { 857*055d4590SKeyi Gui var nextValueIsSource = false; 858*055d4590SKeyi Gui for (var i = 0; i < decorations.length; i += 2) { 859*055d4590SKeyi Gui var style = decorations[i + 1]; 860*055d4590SKeyi Gui var start, end; 861*055d4590SKeyi Gui if (style === PR_ATTRIB_NAME) { 862*055d4590SKeyi Gui start = decorations[i]; 863*055d4590SKeyi Gui end = i + 2 < decorations.length ? decorations[i + 2] : source.length; 864*055d4590SKeyi Gui nextValueIsSource = /^on|^style$/i.test(source.substring(start, end)); 865*055d4590SKeyi Gui } else if (style === PR_ATTRIB_VALUE) { 866*055d4590SKeyi Gui if (nextValueIsSource) { 867*055d4590SKeyi Gui start = decorations[i]; 868*055d4590SKeyi Gui end = i + 2 < decorations.length ? decorations[i + 2] : source.length; 869*055d4590SKeyi Gui var attribValue = source.substring(start, end); 870*055d4590SKeyi Gui var attribLen = attribValue.length; 871*055d4590SKeyi Gui var quoted = 872*055d4590SKeyi Gui (attribLen >= 2 && /^[\"\']/.test(attribValue) && 873*055d4590SKeyi Gui attribValue.charAt(0) === attribValue.charAt(attribLen - 1)); 874*055d4590SKeyi Gui 875*055d4590SKeyi Gui var attribSource; 876*055d4590SKeyi Gui var attribSourceStart; 877*055d4590SKeyi Gui var attribSourceEnd; 878*055d4590SKeyi Gui if (quoted) { 879*055d4590SKeyi Gui attribSourceStart = start + 1; 880*055d4590SKeyi Gui attribSourceEnd = end - 1; 881*055d4590SKeyi Gui attribSource = attribValue; 882*055d4590SKeyi Gui } else { 883*055d4590SKeyi Gui attribSourceStart = start + 1; 884*055d4590SKeyi Gui attribSourceEnd = end - 1; 885*055d4590SKeyi Gui attribSource = attribValue.substring(1, attribValue.length - 1); 886*055d4590SKeyi Gui } 887*055d4590SKeyi Gui 888*055d4590SKeyi Gui var attribSourceDecorations = decorateSource(attribSource); 889*055d4590SKeyi Gui for (var j = 0, m = attribSourceDecorations.length; j < m; j += 2) { 890*055d4590SKeyi Gui attribSourceDecorations[j] += attribSourceStart; 891*055d4590SKeyi Gui } 892*055d4590SKeyi Gui 893*055d4590SKeyi Gui if (quoted) { 894*055d4590SKeyi Gui attribSourceDecorations.push(attribSourceEnd, PR_ATTRIB_VALUE); 895*055d4590SKeyi Gui spliceArrayInto(attribSourceDecorations, decorations, i + 2, 0); 896*055d4590SKeyi Gui } else { 897*055d4590SKeyi Gui spliceArrayInto(attribSourceDecorations, decorations, i, 2); 898*055d4590SKeyi Gui } 899*055d4590SKeyi Gui } 900*055d4590SKeyi Gui nextValueIsSource = false; 901*055d4590SKeyi Gui } 902*055d4590SKeyi Gui } 903*055d4590SKeyi Gui return decorations; 904*055d4590SKeyi Gui } 905*055d4590SKeyi Gui 906*055d4590SKeyi Gui /** returns a decoration list given a string of markup. 907*055d4590SKeyi Gui * 908*055d4590SKeyi Gui * This code recognizes a number of constructs. 909*055d4590SKeyi Gui * <!-- ... --> comment 910*055d4590SKeyi Gui * <!\w ... > declaration 911*055d4590SKeyi Gui * <\w ... > tag 912*055d4590SKeyi Gui * </\w ... > tag 913*055d4590SKeyi Gui * <?...?> embedded source 914*055d4590SKeyi Gui * <%...%> embedded source 915*055d4590SKeyi Gui * &[#\w]...; entity 916*055d4590SKeyi Gui * 917*055d4590SKeyi Gui * It does not recognizes %foo; doctype entities from . 918*055d4590SKeyi Gui * 919*055d4590SKeyi Gui * It will recurse into any <style>, <script>, and on* attributes using 920*055d4590SKeyi Gui * PR_lexSource. 921*055d4590SKeyi Gui */ 922*055d4590SKeyi Gui function decorateMarkup(sourceCode) { 923*055d4590SKeyi Gui // This function works as follows: 924*055d4590SKeyi Gui // 1) Start by splitting the markup into text and tag chunks 925*055d4590SKeyi Gui // Input: string s 926*055d4590SKeyi Gui // Output: List<PR_Token> where style in (PR_PLAIN, null) 927*055d4590SKeyi Gui // 2) Then split the text chunks further into comments, declarations, 928*055d4590SKeyi Gui // tags, etc. 929*055d4590SKeyi Gui // After each split, consider whether the token is the start of an 930*055d4590SKeyi Gui // embedded source section, i.e. is an open <script> tag. If it is, find 931*055d4590SKeyi Gui // the corresponding close token, and don't bother to lex in between. 932*055d4590SKeyi Gui // Input: List<string> 933*055d4590SKeyi Gui // Output: List<PR_Token> with style in 934*055d4590SKeyi Gui // (PR_TAG, PR_PLAIN, PR_SOURCE, null) 935*055d4590SKeyi Gui // 3) Finally go over each tag token and split out attribute names and 936*055d4590SKeyi Gui // values. 937*055d4590SKeyi Gui // Input: List<PR_Token> 938*055d4590SKeyi Gui // Output: List<PR_Token> where style in 939*055d4590SKeyi Gui // (PR_TAG, PR_PLAIN, PR_SOURCE, NAME, VALUE, null) 940*055d4590SKeyi Gui var decorations = tokenizeMarkup(sourceCode); 941*055d4590SKeyi Gui decorations = splitTagAttributes(sourceCode, decorations); 942*055d4590SKeyi Gui decorations = splitSourceNodes(sourceCode, decorations); 943*055d4590SKeyi Gui decorations = splitSourceAttributes(sourceCode, decorations); 944*055d4590SKeyi Gui return decorations; 945*055d4590SKeyi Gui } 946*055d4590SKeyi Gui 947*055d4590SKeyi Gui /** 948*055d4590SKeyi Gui * @param {string} sourceText plain text 949*055d4590SKeyi Gui * @param {Array.<number|string>} extractedTags chunks of raw html preceded 950*055d4590SKeyi Gui * by their position in sourceText in order. 951*055d4590SKeyi Gui * @param {Array.<number|string>} decorations style classes preceded by their 952*055d4590SKeyi Gui * position in sourceText in order. 953*055d4590SKeyi Gui * @return {string} html 954*055d4590SKeyi Gui * @private 955*055d4590SKeyi Gui */ 956*055d4590SKeyi Gui function recombineTagsAndDecorations(sourceText, extractedTags, decorations) { 957*055d4590SKeyi Gui var html = []; 958*055d4590SKeyi Gui // index past the last char in sourceText written to html 959*055d4590SKeyi Gui var outputIdx = 0; 960*055d4590SKeyi Gui 961*055d4590SKeyi Gui var openDecoration = null; 962*055d4590SKeyi Gui var currentDecoration = null; 963*055d4590SKeyi Gui var tagPos = 0; // index into extractedTags 964*055d4590SKeyi Gui var decPos = 0; // index into decorations 965*055d4590SKeyi Gui var tabExpander = makeTabExpander(PR_TAB_WIDTH); 966*055d4590SKeyi Gui 967*055d4590SKeyi Gui var adjacentSpaceRe = /([\r\n ]) /g; 968*055d4590SKeyi Gui var startOrSpaceRe = /(^| ) /gm; 969*055d4590SKeyi Gui var newlineRe = /\r\n?|\n/g; 970*055d4590SKeyi Gui var trailingSpaceRe = /[ \r\n]$/; 971*055d4590SKeyi Gui var lastWasSpace = true; // the last text chunk emitted ended with a space. 972*055d4590SKeyi Gui 973*055d4590SKeyi Gui // A helper function that is responsible for opening sections of decoration 974*055d4590SKeyi Gui // and outputing properly escaped chunks of source 975*055d4590SKeyi Gui function emitTextUpTo(sourceIdx) { 976*055d4590SKeyi Gui if (sourceIdx > outputIdx) { 977*055d4590SKeyi Gui if (openDecoration && openDecoration !== currentDecoration) { 978*055d4590SKeyi Gui // Close the current decoration 979*055d4590SKeyi Gui html.push('</span>'); 980*055d4590SKeyi Gui openDecoration = null; 981*055d4590SKeyi Gui } 982*055d4590SKeyi Gui if (!openDecoration && currentDecoration) { 983*055d4590SKeyi Gui openDecoration = currentDecoration; 984*055d4590SKeyi Gui html.push('<span class="', openDecoration, '">'); 985*055d4590SKeyi Gui } 986*055d4590SKeyi Gui // This interacts badly with some wikis which introduces paragraph tags 987*055d4590SKeyi Gui // into pre blocks for some strange reason. 988*055d4590SKeyi Gui // It's necessary for IE though which seems to lose the preformattedness 989*055d4590SKeyi Gui // of <pre> tags when their innerHTML is assigned. 990*055d4590SKeyi Gui // http://stud3.tuwien.ac.at/~e0226430/innerHtmlQuirk.html 991*055d4590SKeyi Gui // and it serves to undo the conversion of <br>s to newlines done in 992*055d4590SKeyi Gui // chunkify. 993*055d4590SKeyi Gui var htmlChunk = textToHtml( 994*055d4590SKeyi Gui tabExpander(sourceText.substring(outputIdx, sourceIdx))) 995*055d4590SKeyi Gui .replace(lastWasSpace 996*055d4590SKeyi Gui ? startOrSpaceRe 997*055d4590SKeyi Gui : adjacentSpaceRe, '$1 '); 998*055d4590SKeyi Gui // Keep track of whether we need to escape space at the beginning of the 999*055d4590SKeyi Gui // next chunk. 1000*055d4590SKeyi Gui lastWasSpace = trailingSpaceRe.test(htmlChunk); 1001*055d4590SKeyi Gui html.push(htmlChunk.replace(newlineRe, '<br />')); 1002*055d4590SKeyi Gui outputIdx = sourceIdx; 1003*055d4590SKeyi Gui } 1004*055d4590SKeyi Gui } 1005*055d4590SKeyi Gui 1006*055d4590SKeyi Gui while (true) { 1007*055d4590SKeyi Gui // Determine if we're going to consume a tag this time around. Otherwise 1008*055d4590SKeyi Gui // we consume a decoration or exit. 1009*055d4590SKeyi Gui var outputTag; 1010*055d4590SKeyi Gui if (tagPos < extractedTags.length) { 1011*055d4590SKeyi Gui if (decPos < decorations.length) { 1012*055d4590SKeyi Gui // Pick one giving preference to extractedTags since we shouldn't open 1013*055d4590SKeyi Gui // a new style that we're going to have to immediately close in order 1014*055d4590SKeyi Gui // to output a tag. 1015*055d4590SKeyi Gui outputTag = extractedTags[tagPos] <= decorations[decPos]; 1016*055d4590SKeyi Gui } else { 1017*055d4590SKeyi Gui outputTag = true; 1018*055d4590SKeyi Gui } 1019*055d4590SKeyi Gui } else { 1020*055d4590SKeyi Gui outputTag = false; 1021*055d4590SKeyi Gui } 1022*055d4590SKeyi Gui // Consume either a decoration or a tag or exit. 1023*055d4590SKeyi Gui if (outputTag) { 1024*055d4590SKeyi Gui emitTextUpTo(extractedTags[tagPos]); 1025*055d4590SKeyi Gui if (openDecoration) { 1026*055d4590SKeyi Gui // Close the current decoration 1027*055d4590SKeyi Gui html.push('</span>'); 1028*055d4590SKeyi Gui openDecoration = null; 1029*055d4590SKeyi Gui } 1030*055d4590SKeyi Gui html.push(extractedTags[tagPos + 1]); 1031*055d4590SKeyi Gui tagPos += 2; 1032*055d4590SKeyi Gui } else if (decPos < decorations.length) { 1033*055d4590SKeyi Gui emitTextUpTo(decorations[decPos]); 1034*055d4590SKeyi Gui currentDecoration = decorations[decPos + 1]; 1035*055d4590SKeyi Gui decPos += 2; 1036*055d4590SKeyi Gui } else { 1037*055d4590SKeyi Gui break; 1038*055d4590SKeyi Gui } 1039*055d4590SKeyi Gui } 1040*055d4590SKeyi Gui emitTextUpTo(sourceText.length); 1041*055d4590SKeyi Gui if (openDecoration) { 1042*055d4590SKeyi Gui html.push('</span>'); 1043*055d4590SKeyi Gui } 1044*055d4590SKeyi Gui 1045*055d4590SKeyi Gui return html.join(''); 1046*055d4590SKeyi Gui } 1047*055d4590SKeyi Gui 1048*055d4590SKeyi Gui /** Maps language-specific file extensions to handlers. */ 1049*055d4590SKeyi Gui var langHandlerRegistry = {}; 1050*055d4590SKeyi Gui /** Register a language handler for the given file extensions. 1051*055d4590SKeyi Gui * @param {function (string) : Array.<number|string>} handler 1052*055d4590SKeyi Gui * a function from source code to a list of decorations. 1053*055d4590SKeyi Gui * @param {Array.<string>} fileExtensions 1054*055d4590SKeyi Gui */ 1055*055d4590SKeyi Gui function registerLangHandler(handler, fileExtensions) { 1056*055d4590SKeyi Gui for (var i = fileExtensions.length; --i >= 0;) { 1057*055d4590SKeyi Gui var ext = fileExtensions[i]; 1058*055d4590SKeyi Gui if (!langHandlerRegistry.hasOwnProperty(ext)) { 1059*055d4590SKeyi Gui langHandlerRegistry[ext] = handler; 1060*055d4590SKeyi Gui } else if ('console' in window) { 1061*055d4590SKeyi Gui console.log('cannot override language handler %s', ext); 1062*055d4590SKeyi Gui } 1063*055d4590SKeyi Gui } 1064*055d4590SKeyi Gui } 1065*055d4590SKeyi Gui registerLangHandler(decorateSource, ['default-code']); 1066*055d4590SKeyi Gui registerLangHandler(decorateMarkup, 1067*055d4590SKeyi Gui ['default-markup', 'html', 'htm', 'xhtml', 'xml', 'xsl']); 1068*055d4590SKeyi Gui registerLangHandler(sourceDecorator({ 1069*055d4590SKeyi Gui keywords: CPP_KEYWORDS, 1070*055d4590SKeyi Gui hashComments: true, 1071*055d4590SKeyi Gui cStyleComments: true 1072*055d4590SKeyi Gui }), ['c', 'cc', 'cpp', 'cxx', 'cyc']); 1073*055d4590SKeyi Gui registerLangHandler(sourceDecorator({ 1074*055d4590SKeyi Gui keywords: CSHARP_KEYWORDS, 1075*055d4590SKeyi Gui hashComments: true, 1076*055d4590SKeyi Gui cStyleComments: true 1077*055d4590SKeyi Gui }), ['cs']); 1078*055d4590SKeyi Gui registerLangHandler(sourceDecorator({ 1079*055d4590SKeyi Gui keywords: JAVA_KEYWORDS, 1080*055d4590SKeyi Gui cStyleComments: true 1081*055d4590SKeyi Gui }), ['java']); 1082*055d4590SKeyi Gui registerLangHandler(sourceDecorator({ 1083*055d4590SKeyi Gui keywords: SH_KEYWORDS, 1084*055d4590SKeyi Gui hashComments: true, 1085*055d4590SKeyi Gui multiLineStrings: true 1086*055d4590SKeyi Gui }), ['bsh', 'csh', 'sh']); 1087*055d4590SKeyi Gui registerLangHandler(sourceDecorator({ 1088*055d4590SKeyi Gui keywords: PYTHON_KEYWORDS, 1089*055d4590SKeyi Gui hashComments: true, 1090*055d4590SKeyi Gui multiLineStrings: true, 1091*055d4590SKeyi Gui tripleQuotedStrings: true 1092*055d4590SKeyi Gui }), ['cv', 'py']); 1093*055d4590SKeyi Gui registerLangHandler(sourceDecorator({ 1094*055d4590SKeyi Gui keywords: PERL_KEYWORDS, 1095*055d4590SKeyi Gui hashComments: true, 1096*055d4590SKeyi Gui multiLineStrings: true, 1097*055d4590SKeyi Gui regexLiterals: true 1098*055d4590SKeyi Gui }), ['perl', 'pl', 'pm']); 1099*055d4590SKeyi Gui registerLangHandler(sourceDecorator({ 1100*055d4590SKeyi Gui keywords: RUBY_KEYWORDS, 1101*055d4590SKeyi Gui hashComments: true, 1102*055d4590SKeyi Gui multiLineStrings: true, 1103*055d4590SKeyi Gui regexLiterals: true 1104*055d4590SKeyi Gui }), ['rb']); 1105*055d4590SKeyi Gui registerLangHandler(sourceDecorator({ 1106*055d4590SKeyi Gui keywords: JSCRIPT_KEYWORDS, 1107*055d4590SKeyi Gui cStyleComments: true, 1108*055d4590SKeyi Gui regexLiterals: true 1109*055d4590SKeyi Gui }), ['js']); 1110*055d4590SKeyi Gui 1111*055d4590SKeyi Gui function prettyPrintOne(sourceCodeHtml, opt_langExtension) { 1112*055d4590SKeyi Gui try { 1113*055d4590SKeyi Gui // Extract tags, and convert the source code to plain text. 1114*055d4590SKeyi Gui var sourceAndExtractedTags = extractTags(sourceCodeHtml); 1115*055d4590SKeyi Gui /** Plain text. @type {string} */ 1116*055d4590SKeyi Gui var source = sourceAndExtractedTags.source; 1117*055d4590SKeyi Gui 1118*055d4590SKeyi Gui /** Even entries are positions in source in ascending order. Odd entries 1119*055d4590SKeyi Gui * are tags that were extracted at that position. 1120*055d4590SKeyi Gui * @type {Array.<number|string>} 1121*055d4590SKeyi Gui */ 1122*055d4590SKeyi Gui var extractedTags = sourceAndExtractedTags.tags; 1123*055d4590SKeyi Gui 1124*055d4590SKeyi Gui // Pick a lexer and apply it. 1125*055d4590SKeyi Gui if (!langHandlerRegistry.hasOwnProperty(opt_langExtension)) { 1126*055d4590SKeyi Gui // Treat it as markup if the first non whitespace character is a < and 1127*055d4590SKeyi Gui // the last non-whitespace character is a >. 1128*055d4590SKeyi Gui opt_langExtension = 1129*055d4590SKeyi Gui /^\s*</.test(source) ? 'default-markup' : 'default-code'; 1130*055d4590SKeyi Gui } 1131*055d4590SKeyi Gui 1132*055d4590SKeyi Gui /** Even entries are positions in source in ascending order. Odd enties 1133*055d4590SKeyi Gui * are style markers (e.g., PR_COMMENT) that run from that position until 1134*055d4590SKeyi Gui * the end. 1135*055d4590SKeyi Gui * @type {Array.<number|string>} 1136*055d4590SKeyi Gui */ 1137*055d4590SKeyi Gui var decorations = langHandlerRegistry[opt_langExtension].call({}, source); 1138*055d4590SKeyi Gui 1139*055d4590SKeyi Gui // Integrate the decorations and tags back into the source code to produce 1140*055d4590SKeyi Gui // a decorated html string. 1141*055d4590SKeyi Gui return recombineTagsAndDecorations(source, extractedTags, decorations); 1142*055d4590SKeyi Gui } catch (e) { 1143*055d4590SKeyi Gui if ('console' in window) { 1144*055d4590SKeyi Gui console.log(e); 1145*055d4590SKeyi Gui console.trace(); 1146*055d4590SKeyi Gui } 1147*055d4590SKeyi Gui return sourceCodeHtml; 1148*055d4590SKeyi Gui } 1149*055d4590SKeyi Gui } 1150*055d4590SKeyi Gui 1151*055d4590SKeyi Gui function prettyPrint(opt_whenDone) { 1152*055d4590SKeyi Gui var isIE6 = _pr_isIE6(); 1153*055d4590SKeyi Gui 1154*055d4590SKeyi Gui // fetch a list of nodes to rewrite 1155*055d4590SKeyi Gui var codeSegments = [ 1156*055d4590SKeyi Gui document.getElementsByTagName('pre'), 1157*055d4590SKeyi Gui document.getElementsByTagName('code'), 1158*055d4590SKeyi Gui document.getElementsByTagName('xmp') ]; 1159*055d4590SKeyi Gui var elements = []; 1160*055d4590SKeyi Gui for (var i = 0; i < codeSegments.length; ++i) { 1161*055d4590SKeyi Gui for (var j = 0; j < codeSegments[i].length; ++j) { 1162*055d4590SKeyi Gui elements.push(codeSegments[i][j]); 1163*055d4590SKeyi Gui } 1164*055d4590SKeyi Gui } 1165*055d4590SKeyi Gui codeSegments = null; 1166*055d4590SKeyi Gui 1167*055d4590SKeyi Gui // the loop is broken into a series of continuations to make sure that we 1168*055d4590SKeyi Gui // don't make the browser unresponsive when rewriting a large page. 1169*055d4590SKeyi Gui var k = 0; 1170*055d4590SKeyi Gui 1171*055d4590SKeyi Gui function doWork() { 1172*055d4590SKeyi Gui var endTime = (PR_SHOULD_USE_CONTINUATION ? 1173*055d4590SKeyi Gui new Date().getTime() + 250 /* ms */ : 1174*055d4590SKeyi Gui Infinity); 1175*055d4590SKeyi Gui for (; k < elements.length && new Date().getTime() < endTime; k++) { 1176*055d4590SKeyi Gui var cs = elements[k]; 1177*055d4590SKeyi Gui if (cs.className && cs.className.indexOf('prettyprint') >= 0) { 1178*055d4590SKeyi Gui // If the classes includes a language extensions, use it. 1179*055d4590SKeyi Gui // Language extensions can be specified like 1180*055d4590SKeyi Gui // <pre class="prettyprint lang-cpp"> 1181*055d4590SKeyi Gui // the language extension "cpp" is used to find a language handler as 1182*055d4590SKeyi Gui // passed to PR_registerLangHandler. 1183*055d4590SKeyi Gui var langExtension = cs.className.match(/\blang-(\w+)\b/); 1184*055d4590SKeyi Gui if (langExtension) { langExtension = langExtension[1]; } 1185*055d4590SKeyi Gui 1186*055d4590SKeyi Gui // make sure this is not nested in an already prettified element 1187*055d4590SKeyi Gui var nested = false; 1188*055d4590SKeyi Gui for (var p = cs.parentNode; p; p = p.parentNode) { 1189*055d4590SKeyi Gui if ((p.tagName === 'pre' || p.tagName === 'code' || 1190*055d4590SKeyi Gui p.tagName === 'xmp') && 1191*055d4590SKeyi Gui p.className && p.className.indexOf('prettyprint') >= 0) { 1192*055d4590SKeyi Gui nested = true; 1193*055d4590SKeyi Gui break; 1194*055d4590SKeyi Gui } 1195*055d4590SKeyi Gui } 1196*055d4590SKeyi Gui if (!nested) { 1197*055d4590SKeyi Gui // fetch the content as a snippet of properly escaped HTML. 1198*055d4590SKeyi Gui // Firefox adds newlines at the end. 1199*055d4590SKeyi Gui var content = getInnerHtml(cs); 1200*055d4590SKeyi Gui content = content.replace(/(?:\r\n?|\n)$/, ''); 1201*055d4590SKeyi Gui 1202*055d4590SKeyi Gui // do the pretty printing 1203*055d4590SKeyi Gui var newContent = prettyPrintOne(content, langExtension); 1204*055d4590SKeyi Gui 1205*055d4590SKeyi Gui // push the prettified html back into the tag. 1206*055d4590SKeyi Gui if (!isRawContent(cs)) { 1207*055d4590SKeyi Gui // just replace the old html with the new 1208*055d4590SKeyi Gui cs.innerHTML = newContent; 1209*055d4590SKeyi Gui } else { 1210*055d4590SKeyi Gui // we need to change the tag to a <pre> since <xmp>s do not allow 1211*055d4590SKeyi Gui // embedded tags such as the span tags used to attach styles to 1212*055d4590SKeyi Gui // sections of source code. 1213*055d4590SKeyi Gui var pre = document.createElement('PRE'); 1214*055d4590SKeyi Gui for (var i = 0; i < cs.attributes.length; ++i) { 1215*055d4590SKeyi Gui var a = cs.attributes[i]; 1216*055d4590SKeyi Gui if (a.specified) { 1217*055d4590SKeyi Gui var aname = a.name.toLowerCase(); 1218*055d4590SKeyi Gui if (aname === 'class') { 1219*055d4590SKeyi Gui pre.className = a.value; // For IE 6 1220*055d4590SKeyi Gui } else { 1221*055d4590SKeyi Gui pre.setAttribute(a.name, a.value); 1222*055d4590SKeyi Gui } 1223*055d4590SKeyi Gui } 1224*055d4590SKeyi Gui } 1225*055d4590SKeyi Gui pre.innerHTML = newContent; 1226*055d4590SKeyi Gui 1227*055d4590SKeyi Gui // remove the old 1228*055d4590SKeyi Gui cs.parentNode.replaceChild(pre, cs); 1229*055d4590SKeyi Gui cs = pre; 1230*055d4590SKeyi Gui } 1231*055d4590SKeyi Gui 1232*055d4590SKeyi Gui // Replace <br>s with line-feeds so that copying and pasting works 1233*055d4590SKeyi Gui // on IE 6. 1234*055d4590SKeyi Gui // Doing this on other browsers breaks lots of stuff since \r\n is 1235*055d4590SKeyi Gui // treated as two newlines on Firefox, and doing this also slows 1236*055d4590SKeyi Gui // down rendering. 1237*055d4590SKeyi Gui if (isIE6 && cs.tagName === 'PRE') { 1238*055d4590SKeyi Gui var lineBreaks = cs.getElementsByTagName('br'); 1239*055d4590SKeyi Gui for (var j = lineBreaks.length; --j >= 0;) { 1240*055d4590SKeyi Gui var lineBreak = lineBreaks[j]; 1241*055d4590SKeyi Gui lineBreak.parentNode.replaceChild( 1242*055d4590SKeyi Gui document.createTextNode('\r\n'), lineBreak); 1243*055d4590SKeyi Gui } 1244*055d4590SKeyi Gui } 1245*055d4590SKeyi Gui } 1246*055d4590SKeyi Gui } 1247*055d4590SKeyi Gui } 1248*055d4590SKeyi Gui if (k < elements.length) { 1249*055d4590SKeyi Gui // finish up in a continuation 1250*055d4590SKeyi Gui setTimeout(doWork, 250); 1251*055d4590SKeyi Gui } else if (opt_whenDone) { 1252*055d4590SKeyi Gui opt_whenDone(); 1253*055d4590SKeyi Gui } 1254*055d4590SKeyi Gui } 1255*055d4590SKeyi Gui 1256*055d4590SKeyi Gui doWork(); 1257*055d4590SKeyi Gui } 1258*055d4590SKeyi Gui 1259*055d4590SKeyi Gui window['PR_normalizedHtml'] = normalizedHtml; 1260*055d4590SKeyi Gui window['prettyPrintOne'] = prettyPrintOne; 1261*055d4590SKeyi Gui window['prettyPrint'] = prettyPrint; 1262*055d4590SKeyi Gui window['PR'] = { 1263*055d4590SKeyi Gui 'createSimpleLexer': createSimpleLexer, 1264*055d4590SKeyi Gui 'registerLangHandler': registerLangHandler, 1265*055d4590SKeyi Gui 'sourceDecorator': sourceDecorator, 1266*055d4590SKeyi Gui 'PR_ATTRIB_NAME': PR_ATTRIB_NAME, 1267*055d4590SKeyi Gui 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE, 1268*055d4590SKeyi Gui 'PR_COMMENT': PR_COMMENT, 1269*055d4590SKeyi Gui 'PR_DECLARATION': PR_DECLARATION, 1270*055d4590SKeyi Gui 'PR_KEYWORD': PR_KEYWORD, 1271*055d4590SKeyi Gui 'PR_LITERAL': PR_LITERAL, 1272*055d4590SKeyi Gui 'PR_NOCODE': PR_NOCODE, 1273*055d4590SKeyi Gui 'PR_PLAIN': PR_PLAIN, 1274*055d4590SKeyi Gui 'PR_PUNCTUATION': PR_PUNCTUATION, 1275*055d4590SKeyi Gui 'PR_SOURCE': PR_SOURCE, 1276*055d4590SKeyi Gui 'PR_STRING': PR_STRING, 1277*055d4590SKeyi Gui 'PR_TAG': PR_TAG, 1278*055d4590SKeyi Gui 'PR_TYPE': PR_TYPE 1279*055d4590SKeyi Gui }; 1280*055d4590SKeyi Gui})(); 1281