1*83a54b2fSSadaf Ebrahimi// Copyright 2005 Google Inc. 2*83a54b2fSSadaf Ebrahimi// All Rights Reserved. 3*83a54b2fSSadaf Ebrahimi// 4*83a54b2fSSadaf Ebrahimi// [email protected] 5*83a54b2fSSadaf Ebrahimi 6*83a54b2fSSadaf Ebrahimi// Usage: 7*83a54b2fSSadaf Ebrahimi// 1) include this source file in an html page via 8*83a54b2fSSadaf Ebrahimi// <script type=text/javascript src=prettify.js></script> 9*83a54b2fSSadaf Ebrahimi// 2) define style rules. See the example page for examples. 10*83a54b2fSSadaf Ebrahimi// 3) mark the <pre> and <code> tags in your source with class=prettyprint. 11*83a54b2fSSadaf Ebrahimi// You can also use the (html deprecated) <xmp> tag, but the pretty printer 12*83a54b2fSSadaf Ebrahimi// needs to do more substantial DOM manipulations to support that, so some 13*83a54b2fSSadaf Ebrahimi// css styles may not be preserved. 14*83a54b2fSSadaf Ebrahimi 15*83a54b2fSSadaf Ebrahimi// Change log: 16*83a54b2fSSadaf Ebrahimi// cbeust, 2006/08/22 17*83a54b2fSSadaf Ebrahimi// Java annotations (start with "@") are now captured as literals ("lit") 18*83a54b2fSSadaf Ebrahimi// 19*83a54b2fSSadaf Ebrahimi 20*83a54b2fSSadaf Ebrahimivar PR_keywords = new Object(); 21*83a54b2fSSadaf Ebrahimi/** initialize the keyword list for our target languages. */ 22*83a54b2fSSadaf Ebrahimi(function () { 23*83a54b2fSSadaf Ebrahimi var CPP_KEYWORDS = ( 24*83a54b2fSSadaf Ebrahimi "bool break case catch char class const const_cast continue default " + 25*83a54b2fSSadaf Ebrahimi "delete deprecated dllexport dllimport do double dynamic_cast else enum " + 26*83a54b2fSSadaf Ebrahimi "explicit extern false float for friend goto if inline int long mutable " + 27*83a54b2fSSadaf Ebrahimi "naked namespace new noinline noreturn nothrow novtable operator private " + 28*83a54b2fSSadaf Ebrahimi "property protected public register reinterpret_cast return selectany " + 29*83a54b2fSSadaf Ebrahimi "short signed sizeof static static_cast struct switch template this " + 30*83a54b2fSSadaf Ebrahimi "thread throw true try typedef typeid typename union unsigned using " + 31*83a54b2fSSadaf Ebrahimi "declaration, using directive uuid virtual void volatile while typeof"); 32*83a54b2fSSadaf Ebrahimi var JAVA_KEYWORDS = ( 33*83a54b2fSSadaf Ebrahimi "abstract default goto package synchronized boolean do if private this " + 34*83a54b2fSSadaf Ebrahimi "break double implements protected throw byte else import public throws " + 35*83a54b2fSSadaf Ebrahimi "case enum instanceof return transient catch extends int short try char " + 36*83a54b2fSSadaf Ebrahimi "final interface static void class finally long strictfp volatile const " + 37*83a54b2fSSadaf Ebrahimi "float native super while continue for new switch"); 38*83a54b2fSSadaf Ebrahimi var PYTHON_KEYWORDS = ( 39*83a54b2fSSadaf Ebrahimi "and assert break class continue def del elif else except exec finally " + 40*83a54b2fSSadaf Ebrahimi "for from global if import in is lambda not or pass print raise return " + 41*83a54b2fSSadaf Ebrahimi "try while yield"); 42*83a54b2fSSadaf Ebrahimi var JSCRIPT_KEYWORDS = ( 43*83a54b2fSSadaf Ebrahimi "abstract boolean break byte case catch char class const continue " + 44*83a54b2fSSadaf Ebrahimi "debugger default delete do double else enum export extends false final " + 45*83a54b2fSSadaf Ebrahimi "finally float for function goto if implements import in instanceof int " + 46*83a54b2fSSadaf Ebrahimi "interface long native new null package private protected public return " + 47*83a54b2fSSadaf Ebrahimi "short static super switch synchronized this throw throws transient " + 48*83a54b2fSSadaf Ebrahimi "true try typeof var void volatile while with NaN Infinity"); 49*83a54b2fSSadaf Ebrahimi var PERL_KEYWORDS = ( 50*83a54b2fSSadaf Ebrahimi "foreach require sub unless until use elsif BEGIN END"); 51*83a54b2fSSadaf Ebrahimi var SH_KEYWORDS = ( 52*83a54b2fSSadaf Ebrahimi "if then do else fi end"); 53*83a54b2fSSadaf Ebrahimi var KEYWORDS = [CPP_KEYWORDS, JAVA_KEYWORDS, PYTHON_KEYWORDS, 54*83a54b2fSSadaf Ebrahimi JSCRIPT_KEYWORDS, PERL_KEYWORDS, SH_KEYWORDS]; 55*83a54b2fSSadaf Ebrahimi for (var k = 0; k < KEYWORDS.length; k++) { 56*83a54b2fSSadaf Ebrahimi var kw = KEYWORDS[k].split(' '); 57*83a54b2fSSadaf Ebrahimi for (var i = 0; i < kw.length; i++) { 58*83a54b2fSSadaf Ebrahimi if (kw[i]) { PR_keywords[kw[i]] = true; } 59*83a54b2fSSadaf Ebrahimi } 60*83a54b2fSSadaf Ebrahimi } 61*83a54b2fSSadaf Ebrahimi}).call(this); 62*83a54b2fSSadaf Ebrahimi 63*83a54b2fSSadaf Ebrahimi// token style names. correspond to css classes 64*83a54b2fSSadaf Ebrahimi/** token style for a string literal */ 65*83a54b2fSSadaf Ebrahimivar PR_STRING = 'str'; 66*83a54b2fSSadaf Ebrahimi/** token style for a keyword */ 67*83a54b2fSSadaf Ebrahimivar PR_KEYWORD = 'kwd'; 68*83a54b2fSSadaf Ebrahimi/** token style for a comment */ 69*83a54b2fSSadaf Ebrahimivar PR_COMMENT = 'com'; 70*83a54b2fSSadaf Ebrahimi/** token style for a type */ 71*83a54b2fSSadaf Ebrahimivar PR_TYPE = 'typ'; 72*83a54b2fSSadaf Ebrahimi/** token style for a literal value. e.g. 1, null, true. */ 73*83a54b2fSSadaf Ebrahimivar PR_LITERAL = 'lit'; 74*83a54b2fSSadaf Ebrahimi/** token style for a punctuation string. */ 75*83a54b2fSSadaf Ebrahimivar PR_PUNCTUATION = 'pun'; 76*83a54b2fSSadaf Ebrahimi/** token style for a punctuation string. */ 77*83a54b2fSSadaf Ebrahimivar PR_PLAIN = 'pln'; 78*83a54b2fSSadaf Ebrahimi 79*83a54b2fSSadaf Ebrahimi/** token style for an sgml tag. */ 80*83a54b2fSSadaf Ebrahimivar PR_TAG = 'tag'; 81*83a54b2fSSadaf Ebrahimi/** token style for a markup declaration such as a DOCTYPE. */ 82*83a54b2fSSadaf Ebrahimivar PR_DECLARATION = 'dec'; 83*83a54b2fSSadaf Ebrahimi/** token style for embedded source. */ 84*83a54b2fSSadaf Ebrahimivar PR_SOURCE = 'src'; 85*83a54b2fSSadaf Ebrahimi/** token style for an sgml attribute name. */ 86*83a54b2fSSadaf Ebrahimivar PR_ATTRIB_NAME = 'atn'; 87*83a54b2fSSadaf Ebrahimi/** token style for an sgml attribute value. */ 88*83a54b2fSSadaf Ebrahimivar PR_ATTRIB_VALUE = 'atv'; 89*83a54b2fSSadaf Ebrahimi 90*83a54b2fSSadaf Ebrahimi/** the position of the end of a token during. A division of a string into 91*83a54b2fSSadaf Ebrahimi * n tokens can be represented as a series n - 1 token ends, as long as 92*83a54b2fSSadaf Ebrahimi * runs of whitespace warrant their own token. 93*83a54b2fSSadaf Ebrahimi * @private 94*83a54b2fSSadaf Ebrahimi */ 95*83a54b2fSSadaf Ebrahimifunction PR_TokenEnd(end, style) { 96*83a54b2fSSadaf Ebrahimi if (undefined === style) { throw new Error('BAD'); } 97*83a54b2fSSadaf Ebrahimi if ('number' != typeof(end)) { throw new Error('BAD'); } 98*83a54b2fSSadaf Ebrahimi this.end = end; 99*83a54b2fSSadaf Ebrahimi this.style = style; 100*83a54b2fSSadaf Ebrahimi} 101*83a54b2fSSadaf EbrahimiPR_TokenEnd.prototype.toString = function () { 102*83a54b2fSSadaf Ebrahimi return '[PR_TokenEnd ' + this.end + 103*83a54b2fSSadaf Ebrahimi (this.style ? ':' + this.style : '') + ']'; 104*83a54b2fSSadaf Ebrahimi}; 105*83a54b2fSSadaf Ebrahimi 106*83a54b2fSSadaf Ebrahimi 107*83a54b2fSSadaf Ebrahimi/** a chunk of text with a style. These are used to represent both the output 108*83a54b2fSSadaf Ebrahimi * from the lexing functions as well as intermediate results. 109*83a54b2fSSadaf Ebrahimi * @constructor 110*83a54b2fSSadaf Ebrahimi * @param token the token text 111*83a54b2fSSadaf Ebrahimi * @param style one of the token styles defined in designdoc-template, or null 112*83a54b2fSSadaf Ebrahimi * for a styleless token, such as an embedded html tag. 113*83a54b2fSSadaf Ebrahimi * @private 114*83a54b2fSSadaf Ebrahimi */ 115*83a54b2fSSadaf Ebrahimifunction PR_Token(token, style) { 116*83a54b2fSSadaf Ebrahimi if (undefined === style) { throw new Error('BAD'); } 117*83a54b2fSSadaf Ebrahimi this.token = token; 118*83a54b2fSSadaf Ebrahimi this.style = style; 119*83a54b2fSSadaf Ebrahimi} 120*83a54b2fSSadaf Ebrahimi 121*83a54b2fSSadaf EbrahimiPR_Token.prototype.toString = function () { 122*83a54b2fSSadaf Ebrahimi return '[PR_Token ' + this.token + (this.style ? ':' + this.style : '') + ']'; 123*83a54b2fSSadaf Ebrahimi}; 124*83a54b2fSSadaf Ebrahimi 125*83a54b2fSSadaf Ebrahimi 126*83a54b2fSSadaf Ebrahimi/** a helper class that decodes common html entities used to escape source and 127*83a54b2fSSadaf Ebrahimi * markup punctuation characters in html. 128*83a54b2fSSadaf Ebrahimi * @constructor 129*83a54b2fSSadaf Ebrahimi * @private 130*83a54b2fSSadaf Ebrahimi */ 131*83a54b2fSSadaf Ebrahimifunction PR_DecodeHelper() { 132*83a54b2fSSadaf Ebrahimi this.next = 0; 133*83a54b2fSSadaf Ebrahimi this.ch = '\0'; 134*83a54b2fSSadaf Ebrahimi} 135*83a54b2fSSadaf Ebrahimi 136*83a54b2fSSadaf EbrahimiPR_DecodeHelper.prototype.decode = function (s, i) { 137*83a54b2fSSadaf Ebrahimi var next = i + 1; 138*83a54b2fSSadaf Ebrahimi var ch = s.charAt(i); 139*83a54b2fSSadaf Ebrahimi if ('&' == ch) { 140*83a54b2fSSadaf Ebrahimi var semi = s.indexOf(';', next); 141*83a54b2fSSadaf Ebrahimi if (semi >= 0 && semi < next + 4) { 142*83a54b2fSSadaf Ebrahimi var entityName = s.substring(next, semi).toLowerCase(); 143*83a54b2fSSadaf Ebrahimi next = semi + 1; 144*83a54b2fSSadaf Ebrahimi if ('lt' == entityName) { 145*83a54b2fSSadaf Ebrahimi ch = '<'; 146*83a54b2fSSadaf Ebrahimi } else if ('gt' == entityName) { 147*83a54b2fSSadaf Ebrahimi ch = '>'; 148*83a54b2fSSadaf Ebrahimi } else if ('quot' == entityName) { 149*83a54b2fSSadaf Ebrahimi ch = '"'; 150*83a54b2fSSadaf Ebrahimi } else if ('apos' == entityName) { 151*83a54b2fSSadaf Ebrahimi ch = '\''; 152*83a54b2fSSadaf Ebrahimi } else if ('amp' == entityName) { 153*83a54b2fSSadaf Ebrahimi ch = '&'; 154*83a54b2fSSadaf Ebrahimi } else { 155*83a54b2fSSadaf Ebrahimi next = i + 1; 156*83a54b2fSSadaf Ebrahimi } 157*83a54b2fSSadaf Ebrahimi } 158*83a54b2fSSadaf Ebrahimi } 159*83a54b2fSSadaf Ebrahimi this.next = next; 160*83a54b2fSSadaf Ebrahimi this.ch = ch; 161*83a54b2fSSadaf Ebrahimi return this.ch; 162*83a54b2fSSadaf Ebrahimi} 163*83a54b2fSSadaf Ebrahimi 164*83a54b2fSSadaf Ebrahimi 165*83a54b2fSSadaf Ebrahimi// some string utilities 166*83a54b2fSSadaf Ebrahimifunction PR_isWordChar(ch) { 167*83a54b2fSSadaf Ebrahimi return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 168*83a54b2fSSadaf Ebrahimi} 169*83a54b2fSSadaf Ebrahimi 170*83a54b2fSSadaf Ebrahimifunction PR_isIdentifierStart(ch) { 171*83a54b2fSSadaf Ebrahimi return PR_isWordChar(ch) || ch == '_' || ch == '$' || ch == '@'; 172*83a54b2fSSadaf Ebrahimi} 173*83a54b2fSSadaf Ebrahimi 174*83a54b2fSSadaf Ebrahimifunction PR_isIdentifierPart(ch) { 175*83a54b2fSSadaf Ebrahimi return PR_isIdentifierStart(ch) || PR_isDigitChar(ch); 176*83a54b2fSSadaf Ebrahimi} 177*83a54b2fSSadaf Ebrahimi 178*83a54b2fSSadaf Ebrahimifunction PR_isSpaceChar(ch) { 179*83a54b2fSSadaf Ebrahimi return "\t \r\n".indexOf(ch) >= 0; 180*83a54b2fSSadaf Ebrahimi} 181*83a54b2fSSadaf Ebrahimi 182*83a54b2fSSadaf Ebrahimifunction PR_isDigitChar(ch) { 183*83a54b2fSSadaf Ebrahimi return ch >= '0' && ch <= '9'; 184*83a54b2fSSadaf Ebrahimi} 185*83a54b2fSSadaf Ebrahimi 186*83a54b2fSSadaf Ebrahimifunction PR_trim(s) { 187*83a54b2fSSadaf Ebrahimi var i = 0, j = s.length - 1; 188*83a54b2fSSadaf Ebrahimi while (i <= j && PR_isSpaceChar(s.charAt(i))) { ++i; } 189*83a54b2fSSadaf Ebrahimi while (j > i && PR_isSpaceChar(s.charAt(j))) { --j; } 190*83a54b2fSSadaf Ebrahimi return s.substring(i, j + 1); 191*83a54b2fSSadaf Ebrahimi} 192*83a54b2fSSadaf Ebrahimi 193*83a54b2fSSadaf Ebrahimifunction PR_startsWith(s, prefix) { 194*83a54b2fSSadaf Ebrahimi return s.length >= prefix.length && prefix == s.substring(0, prefix.length); 195*83a54b2fSSadaf Ebrahimi} 196*83a54b2fSSadaf Ebrahimi 197*83a54b2fSSadaf Ebrahimifunction PR_endsWith(s, suffix) { 198*83a54b2fSSadaf Ebrahimi return s.length >= suffix.length && 199*83a54b2fSSadaf Ebrahimi suffix == s.substring(s.length - suffix.length, s.length); 200*83a54b2fSSadaf Ebrahimi} 201*83a54b2fSSadaf Ebrahimi 202*83a54b2fSSadaf Ebrahimi/** true iff prefix matches the first prefix characters in chars[0:len]. 203*83a54b2fSSadaf Ebrahimi * @private 204*83a54b2fSSadaf Ebrahimi */ 205*83a54b2fSSadaf Ebrahimifunction PR_prefixMatch(chars, len, prefix) { 206*83a54b2fSSadaf Ebrahimi if (len < prefix.length) { return false; } 207*83a54b2fSSadaf Ebrahimi for (var i = 0, n = prefix.length; i < n; ++i) { 208*83a54b2fSSadaf Ebrahimi if (prefix.charAt(i) != chars[i]) { return false; } 209*83a54b2fSSadaf Ebrahimi } 210*83a54b2fSSadaf Ebrahimi return true; 211*83a54b2fSSadaf Ebrahimi} 212*83a54b2fSSadaf Ebrahimi 213*83a54b2fSSadaf Ebrahimi/** used to convert html special characters embedded in XMP tags into html. */ 214*83a54b2fSSadaf Ebrahimifunction PR_textToHtml(str) { 215*83a54b2fSSadaf Ebrahimi return str.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>'); 216*83a54b2fSSadaf Ebrahimi} 217*83a54b2fSSadaf Ebrahimi 218*83a54b2fSSadaf Ebrahimi 219*83a54b2fSSadaf Ebrahimi/** split markup into chunks of html tags (style null) and 220*83a54b2fSSadaf Ebrahimi * plain text (style {@link #PR_PLAIN}). 221*83a54b2fSSadaf Ebrahimi * 222*83a54b2fSSadaf Ebrahimi * @param s a String of html. 223*83a54b2fSSadaf Ebrahimi * @return an Array of PR_Tokens of style PR_PLAIN and null. 224*83a54b2fSSadaf Ebrahimi * @private 225*83a54b2fSSadaf Ebrahimi */ 226*83a54b2fSSadaf Ebrahimifunction PR_chunkify(s) { 227*83a54b2fSSadaf Ebrahimi var chunks = new Array(); 228*83a54b2fSSadaf Ebrahimi var state = 0; 229*83a54b2fSSadaf Ebrahimi var start = 0; 230*83a54b2fSSadaf Ebrahimi var pos = -1; 231*83a54b2fSSadaf Ebrahimi for (var i = 0, n = s.length; i < n; ++i) { 232*83a54b2fSSadaf Ebrahimi var ch = s.charAt(i); 233*83a54b2fSSadaf Ebrahimi switch (state) { 234*83a54b2fSSadaf Ebrahimi case 0: 235*83a54b2fSSadaf Ebrahimi if ('<' == ch) { state = 1; } 236*83a54b2fSSadaf Ebrahimi break; 237*83a54b2fSSadaf Ebrahimi case 1: 238*83a54b2fSSadaf Ebrahimi pos = i - 1; 239*83a54b2fSSadaf Ebrahimi if ('/' == ch) { state = 2; } 240*83a54b2fSSadaf Ebrahimi else if (PR_isWordChar(ch)) { state = 3; } 241*83a54b2fSSadaf Ebrahimi else if ('<' == ch) { state = 1; } 242*83a54b2fSSadaf Ebrahimi else { state = 0; } 243*83a54b2fSSadaf Ebrahimi break; 244*83a54b2fSSadaf Ebrahimi case 2: 245*83a54b2fSSadaf Ebrahimi if (PR_isWordChar(ch)) { state = 3; } 246*83a54b2fSSadaf Ebrahimi else if ('<' == ch) { state = 1; } 247*83a54b2fSSadaf Ebrahimi else { state = 0; } 248*83a54b2fSSadaf Ebrahimi break; 249*83a54b2fSSadaf Ebrahimi case 3: 250*83a54b2fSSadaf Ebrahimi if ('>' == ch) { 251*83a54b2fSSadaf Ebrahimi if (pos > start) { 252*83a54b2fSSadaf Ebrahimi chunks.push(new PR_Token(s.substring(start, pos), PR_PLAIN)); 253*83a54b2fSSadaf Ebrahimi } 254*83a54b2fSSadaf Ebrahimi chunks.push(new PR_Token(s.substring(pos, i + 1), null)); 255*83a54b2fSSadaf Ebrahimi start = i + 1; 256*83a54b2fSSadaf Ebrahimi pos = -1; 257*83a54b2fSSadaf Ebrahimi state = 0; 258*83a54b2fSSadaf Ebrahimi } 259*83a54b2fSSadaf Ebrahimi break; 260*83a54b2fSSadaf Ebrahimi } 261*83a54b2fSSadaf Ebrahimi } 262*83a54b2fSSadaf Ebrahimi if (s.length > start) { 263*83a54b2fSSadaf Ebrahimi chunks.push(new PR_Token(s.substring(start, s.length), PR_PLAIN)); 264*83a54b2fSSadaf Ebrahimi } 265*83a54b2fSSadaf Ebrahimi return chunks; 266*83a54b2fSSadaf Ebrahimi} 267*83a54b2fSSadaf Ebrahimi 268*83a54b2fSSadaf Ebrahimi/** splits chunks around entities. 269*83a54b2fSSadaf Ebrahimi * @private 270*83a54b2fSSadaf Ebrahimi */ 271*83a54b2fSSadaf Ebrahimifunction PR_splitEntities(chunks) { 272*83a54b2fSSadaf Ebrahimi var chunksOut = new Array(); 273*83a54b2fSSadaf Ebrahimi var state = 0; 274*83a54b2fSSadaf Ebrahimi for (var ci = 0, nc = chunks.length; ci < nc; ++ci) { 275*83a54b2fSSadaf Ebrahimi var chunk = chunks[ci]; 276*83a54b2fSSadaf Ebrahimi if (PR_PLAIN != chunk.style) { 277*83a54b2fSSadaf Ebrahimi chunksOut.push(chunk); 278*83a54b2fSSadaf Ebrahimi continue; 279*83a54b2fSSadaf Ebrahimi } 280*83a54b2fSSadaf Ebrahimi var s = chunk.token; 281*83a54b2fSSadaf Ebrahimi var pos = 0; 282*83a54b2fSSadaf Ebrahimi var start; 283*83a54b2fSSadaf Ebrahimi for (var i = 0; i < s.length; ++i) { 284*83a54b2fSSadaf Ebrahimi var ch = s.charAt(i); 285*83a54b2fSSadaf Ebrahimi switch (state) { 286*83a54b2fSSadaf Ebrahimi case 0: 287*83a54b2fSSadaf Ebrahimi if ('&' == ch) { state = 1; } 288*83a54b2fSSadaf Ebrahimi break; 289*83a54b2fSSadaf Ebrahimi case 1: 290*83a54b2fSSadaf Ebrahimi if ('#' == ch || PR_isWordChar(ch)) { 291*83a54b2fSSadaf Ebrahimi start = i - 1; 292*83a54b2fSSadaf Ebrahimi state = 2; 293*83a54b2fSSadaf Ebrahimi } else { 294*83a54b2fSSadaf Ebrahimi state = 0; 295*83a54b2fSSadaf Ebrahimi } 296*83a54b2fSSadaf Ebrahimi break; 297*83a54b2fSSadaf Ebrahimi case 2: 298*83a54b2fSSadaf Ebrahimi if (';' == ch) { 299*83a54b2fSSadaf Ebrahimi if (start > pos) { 300*83a54b2fSSadaf Ebrahimi chunksOut.push( 301*83a54b2fSSadaf Ebrahimi new PR_Token(s.substring(pos, start), chunk.style)); 302*83a54b2fSSadaf Ebrahimi } 303*83a54b2fSSadaf Ebrahimi chunksOut.push(new PR_Token(s.substring(start, i + 1), null)); 304*83a54b2fSSadaf Ebrahimi pos = i + 1; 305*83a54b2fSSadaf Ebrahimi state = 0; 306*83a54b2fSSadaf Ebrahimi } 307*83a54b2fSSadaf Ebrahimi break; 308*83a54b2fSSadaf Ebrahimi } 309*83a54b2fSSadaf Ebrahimi } 310*83a54b2fSSadaf Ebrahimi if (s.length > pos) { 311*83a54b2fSSadaf Ebrahimi chunksOut.push(pos ? 312*83a54b2fSSadaf Ebrahimi new PR_Token(s.substring(pos, s.length), chunk.style) : 313*83a54b2fSSadaf Ebrahimi chunk); 314*83a54b2fSSadaf Ebrahimi } 315*83a54b2fSSadaf Ebrahimi } 316*83a54b2fSSadaf Ebrahimi return chunksOut; 317*83a54b2fSSadaf Ebrahimi} 318*83a54b2fSSadaf Ebrahimi 319*83a54b2fSSadaf Ebrahimi/** walk the tokenEnds list and the chunk list in parallel to generate a list 320*83a54b2fSSadaf Ebrahimi * of split tokens. 321*83a54b2fSSadaf Ebrahimi * @private 322*83a54b2fSSadaf Ebrahimi */ 323*83a54b2fSSadaf Ebrahimifunction PR_splitChunks(chunks, tokenEnds) { 324*83a54b2fSSadaf Ebrahimi var tokens = new Array(); // the output 325*83a54b2fSSadaf Ebrahimi 326*83a54b2fSSadaf Ebrahimi var ci = 0; // index into chunks 327*83a54b2fSSadaf Ebrahimi // position of beginning of amount written so far in absolute space. 328*83a54b2fSSadaf Ebrahimi var posAbs = 0; 329*83a54b2fSSadaf Ebrahimi // position of amount written so far in chunk space 330*83a54b2fSSadaf Ebrahimi var posChunk = 0; 331*83a54b2fSSadaf Ebrahimi 332*83a54b2fSSadaf Ebrahimi // current chunk 333*83a54b2fSSadaf Ebrahimi var chunk = new PR_Token('', null); 334*83a54b2fSSadaf Ebrahimi 335*83a54b2fSSadaf Ebrahimi for (var ei = 0, ne = tokenEnds.length; ei < ne; ++ei) { 336*83a54b2fSSadaf Ebrahimi var tokenEnd = tokenEnds[ei]; 337*83a54b2fSSadaf Ebrahimi var end = tokenEnd.end; 338*83a54b2fSSadaf Ebrahimi 339*83a54b2fSSadaf Ebrahimi var tokLen = end - posAbs; 340*83a54b2fSSadaf Ebrahimi var remainingInChunk = chunk.token.length - posChunk; 341*83a54b2fSSadaf Ebrahimi while (remainingInChunk <= tokLen) { 342*83a54b2fSSadaf Ebrahimi if (remainingInChunk > 0) { 343*83a54b2fSSadaf Ebrahimi tokens.push( 344*83a54b2fSSadaf Ebrahimi new PR_Token(chunk.token.substring(posChunk, chunk.token.length), 345*83a54b2fSSadaf Ebrahimi null == chunk.style ? null : tokenEnd.style)); 346*83a54b2fSSadaf Ebrahimi } 347*83a54b2fSSadaf Ebrahimi posAbs += remainingInChunk; 348*83a54b2fSSadaf Ebrahimi posChunk = 0; 349*83a54b2fSSadaf Ebrahimi if (ci < chunks.length) { chunk = chunks[ci++]; } 350*83a54b2fSSadaf Ebrahimi 351*83a54b2fSSadaf Ebrahimi tokLen = end - posAbs; 352*83a54b2fSSadaf Ebrahimi remainingInChunk = chunk.token.length - posChunk; 353*83a54b2fSSadaf Ebrahimi } 354*83a54b2fSSadaf Ebrahimi 355*83a54b2fSSadaf Ebrahimi if (tokLen) { 356*83a54b2fSSadaf Ebrahimi tokens.push( 357*83a54b2fSSadaf Ebrahimi new PR_Token(chunk.token.substring(posChunk, posChunk + tokLen), 358*83a54b2fSSadaf Ebrahimi tokenEnd.style)); 359*83a54b2fSSadaf Ebrahimi posAbs += tokLen; 360*83a54b2fSSadaf Ebrahimi posChunk += tokLen; 361*83a54b2fSSadaf Ebrahimi } 362*83a54b2fSSadaf Ebrahimi } 363*83a54b2fSSadaf Ebrahimi 364*83a54b2fSSadaf Ebrahimi return tokens; 365*83a54b2fSSadaf Ebrahimi} 366*83a54b2fSSadaf Ebrahimi 367*83a54b2fSSadaf Ebrahimi/** splits markup tokens into declarations, tags, and source chunks. 368*83a54b2fSSadaf Ebrahimi * @private 369*83a54b2fSSadaf Ebrahimi */ 370*83a54b2fSSadaf Ebrahimifunction PR_splitMarkup(chunks) { 371*83a54b2fSSadaf Ebrahimi // A state machine to split out declarations, tags, etc. 372*83a54b2fSSadaf Ebrahimi // This state machine deals with absolute space in the text, indexed by k, 373*83a54b2fSSadaf Ebrahimi // and position in the current chunk, indexed by pos and tokenStart to 374*83a54b2fSSadaf Ebrahimi // generate a list of the ends of tokens. 375*83a54b2fSSadaf Ebrahimi // Absolute space is calculated by considering the chunks as appended into 376*83a54b2fSSadaf Ebrahimi // one big string, as they were before being split. 377*83a54b2fSSadaf Ebrahimi 378*83a54b2fSSadaf Ebrahimi // Known failure cases 379*83a54b2fSSadaf Ebrahimi // Server side scripting sections such as <?...?> in attributes. 380*83a54b2fSSadaf Ebrahimi // i.e. <span class="<? foo ?>"> 381*83a54b2fSSadaf Ebrahimi // Handling this would require a stack, and we don't use PHP. 382*83a54b2fSSadaf Ebrahimi 383*83a54b2fSSadaf Ebrahimi // The output: a list of pairs of PR_TokenEnd instances 384*83a54b2fSSadaf Ebrahimi var tokenEnds = new Array(); 385*83a54b2fSSadaf Ebrahimi 386*83a54b2fSSadaf Ebrahimi var state = 0; // FSM state variable 387*83a54b2fSSadaf Ebrahimi var k = 0; // position in absolute space of the start of the current chunk 388*83a54b2fSSadaf Ebrahimi var tokenStart = -1; // the start of the current token 389*83a54b2fSSadaf Ebrahimi 390*83a54b2fSSadaf Ebrahimi // Try to find a closing tag for any open <style> or <script> tags 391*83a54b2fSSadaf Ebrahimi // We can't do this at a later stage because then the following case 392*83a54b2fSSadaf Ebrahimi // would fail: 393*83a54b2fSSadaf Ebrahimi // <script>document.writeln('<!--');</script> 394*83a54b2fSSadaf Ebrahimi 395*83a54b2fSSadaf Ebrahimi // We use tokenChars[:tokenCharsI] to accumulate the tag name so that we 396*83a54b2fSSadaf Ebrahimi // can check whether to enter into a no scripting section when the tag ends. 397*83a54b2fSSadaf Ebrahimi var tokenChars = new Array(12); 398*83a54b2fSSadaf Ebrahimi var tokenCharsI = 0; 399*83a54b2fSSadaf Ebrahimi // if non null, the tag prefix that we need to see to break out. 400*83a54b2fSSadaf Ebrahimi var endScriptTag = null; 401*83a54b2fSSadaf Ebrahimi var decodeHelper = new PR_DecodeHelper(); 402*83a54b2fSSadaf Ebrahimi 403*83a54b2fSSadaf Ebrahimi for (var ci = 0, nc = chunks.length; ci < nc; ++ci) { 404*83a54b2fSSadaf Ebrahimi var chunk = chunks[ci]; 405*83a54b2fSSadaf Ebrahimi if (PR_PLAIN != chunk.style) { 406*83a54b2fSSadaf Ebrahimi k += chunk.token.length; 407*83a54b2fSSadaf Ebrahimi continue; 408*83a54b2fSSadaf Ebrahimi } 409*83a54b2fSSadaf Ebrahimi 410*83a54b2fSSadaf Ebrahimi var s = chunk.token; 411*83a54b2fSSadaf Ebrahimi var pos = 0; // the position past the last character processed so far in s 412*83a54b2fSSadaf Ebrahimi 413*83a54b2fSSadaf Ebrahimi for (var i = 0, n = s.length; i < n; /* i = next at bottom */) { 414*83a54b2fSSadaf Ebrahimi decodeHelper.decode(s, i); 415*83a54b2fSSadaf Ebrahimi var ch = decodeHelper.ch; 416*83a54b2fSSadaf Ebrahimi var next = decodeHelper.next; 417*83a54b2fSSadaf Ebrahimi 418*83a54b2fSSadaf Ebrahimi var tokenStyle = null; 419*83a54b2fSSadaf Ebrahimi switch (state) { 420*83a54b2fSSadaf Ebrahimi case 0: 421*83a54b2fSSadaf Ebrahimi if ('<' == ch) { state = 1; } 422*83a54b2fSSadaf Ebrahimi break; 423*83a54b2fSSadaf Ebrahimi case 1: 424*83a54b2fSSadaf Ebrahimi tokenCharsI = 0; 425*83a54b2fSSadaf Ebrahimi if ('/' == ch) { // only consider close tags if we're in script/style 426*83a54b2fSSadaf Ebrahimi state = 7; 427*83a54b2fSSadaf Ebrahimi } else if (null == endScriptTag) { 428*83a54b2fSSadaf Ebrahimi if ('!' == ch) { 429*83a54b2fSSadaf Ebrahimi state = 2; 430*83a54b2fSSadaf Ebrahimi } else if (PR_isWordChar(ch)) { 431*83a54b2fSSadaf Ebrahimi state = 8; 432*83a54b2fSSadaf Ebrahimi } else if ('?' == ch) { 433*83a54b2fSSadaf Ebrahimi state = 9; 434*83a54b2fSSadaf Ebrahimi } else if ('%' == ch) { 435*83a54b2fSSadaf Ebrahimi state = 11; 436*83a54b2fSSadaf Ebrahimi } else if ('<' != ch) { 437*83a54b2fSSadaf Ebrahimi state = 0; 438*83a54b2fSSadaf Ebrahimi } 439*83a54b2fSSadaf Ebrahimi } else if ('<' != ch) { 440*83a54b2fSSadaf Ebrahimi state = 0; 441*83a54b2fSSadaf Ebrahimi } 442*83a54b2fSSadaf Ebrahimi break; 443*83a54b2fSSadaf Ebrahimi case 2: 444*83a54b2fSSadaf Ebrahimi if ('-' == ch) { 445*83a54b2fSSadaf Ebrahimi state = 4; 446*83a54b2fSSadaf Ebrahimi } else if (PR_isWordChar(ch)) { 447*83a54b2fSSadaf Ebrahimi state = 3; 448*83a54b2fSSadaf Ebrahimi } else if ('<' == ch) { 449*83a54b2fSSadaf Ebrahimi state = 1; 450*83a54b2fSSadaf Ebrahimi } else { 451*83a54b2fSSadaf Ebrahimi state = 0; 452*83a54b2fSSadaf Ebrahimi } 453*83a54b2fSSadaf Ebrahimi break; 454*83a54b2fSSadaf Ebrahimi case 3: 455*83a54b2fSSadaf Ebrahimi if ('>' == ch) { 456*83a54b2fSSadaf Ebrahimi state = 0; 457*83a54b2fSSadaf Ebrahimi tokenStyle = PR_DECLARATION; 458*83a54b2fSSadaf Ebrahimi } 459*83a54b2fSSadaf Ebrahimi break; 460*83a54b2fSSadaf Ebrahimi case 4: 461*83a54b2fSSadaf Ebrahimi if ('-' == ch) { state = 5; } 462*83a54b2fSSadaf Ebrahimi break; 463*83a54b2fSSadaf Ebrahimi case 5: 464*83a54b2fSSadaf Ebrahimi if ('-' == ch) { state = 6; } 465*83a54b2fSSadaf Ebrahimi break; 466*83a54b2fSSadaf Ebrahimi case 6: 467*83a54b2fSSadaf Ebrahimi if ('>' == ch) { 468*83a54b2fSSadaf Ebrahimi state = 0; 469*83a54b2fSSadaf Ebrahimi tokenStyle = PR_COMMENT; 470*83a54b2fSSadaf Ebrahimi } else if ('-' == ch) { 471*83a54b2fSSadaf Ebrahimi state = 6; 472*83a54b2fSSadaf Ebrahimi } else { 473*83a54b2fSSadaf Ebrahimi state = 4; 474*83a54b2fSSadaf Ebrahimi } 475*83a54b2fSSadaf Ebrahimi break; 476*83a54b2fSSadaf Ebrahimi case 7: 477*83a54b2fSSadaf Ebrahimi if (PR_isWordChar(ch)) { 478*83a54b2fSSadaf Ebrahimi state = 8; 479*83a54b2fSSadaf Ebrahimi } else if ('<' == ch) { 480*83a54b2fSSadaf Ebrahimi state = 1; 481*83a54b2fSSadaf Ebrahimi } else { 482*83a54b2fSSadaf Ebrahimi state = 0; 483*83a54b2fSSadaf Ebrahimi } 484*83a54b2fSSadaf Ebrahimi break; 485*83a54b2fSSadaf Ebrahimi case 8: 486*83a54b2fSSadaf Ebrahimi if ('>' == ch) { 487*83a54b2fSSadaf Ebrahimi state = 0; 488*83a54b2fSSadaf Ebrahimi tokenStyle = PR_TAG; 489*83a54b2fSSadaf Ebrahimi } 490*83a54b2fSSadaf Ebrahimi break; 491*83a54b2fSSadaf Ebrahimi case 9: 492*83a54b2fSSadaf Ebrahimi if ('?' == ch) { state = 10; } 493*83a54b2fSSadaf Ebrahimi break; 494*83a54b2fSSadaf Ebrahimi case 10: 495*83a54b2fSSadaf Ebrahimi if ('>' == ch) { 496*83a54b2fSSadaf Ebrahimi state = 0; 497*83a54b2fSSadaf Ebrahimi tokenStyle = PR_SOURCE; 498*83a54b2fSSadaf Ebrahimi } else if ('?' != ch) { 499*83a54b2fSSadaf Ebrahimi state = 9; 500*83a54b2fSSadaf Ebrahimi } 501*83a54b2fSSadaf Ebrahimi break; 502*83a54b2fSSadaf Ebrahimi case 11: 503*83a54b2fSSadaf Ebrahimi if ('%' == ch) { state = 12; } 504*83a54b2fSSadaf Ebrahimi break; 505*83a54b2fSSadaf Ebrahimi case 12: 506*83a54b2fSSadaf Ebrahimi if ('>' == ch) { 507*83a54b2fSSadaf Ebrahimi state = 0; 508*83a54b2fSSadaf Ebrahimi tokenStyle = PR_SOURCE; 509*83a54b2fSSadaf Ebrahimi } else if ('%' != ch) { 510*83a54b2fSSadaf Ebrahimi state = 11; 511*83a54b2fSSadaf Ebrahimi } 512*83a54b2fSSadaf Ebrahimi break; 513*83a54b2fSSadaf Ebrahimi } 514*83a54b2fSSadaf Ebrahimi 515*83a54b2fSSadaf Ebrahimi if (tokenCharsI < tokenChars.length) { 516*83a54b2fSSadaf Ebrahimi tokenChars[tokenCharsI++] = ch.toLowerCase(); 517*83a54b2fSSadaf Ebrahimi } 518*83a54b2fSSadaf Ebrahimi if (1 == state) { tokenStart = k + i; } 519*83a54b2fSSadaf Ebrahimi i = next; 520*83a54b2fSSadaf Ebrahimi if (tokenStyle != null) { 521*83a54b2fSSadaf Ebrahimi if (null != tokenStyle) { 522*83a54b2fSSadaf Ebrahimi if (endScriptTag) { 523*83a54b2fSSadaf Ebrahimi if (PR_prefixMatch(tokenChars, tokenCharsI, endScriptTag)) { 524*83a54b2fSSadaf Ebrahimi endScriptTag = null; 525*83a54b2fSSadaf Ebrahimi } 526*83a54b2fSSadaf Ebrahimi } else { 527*83a54b2fSSadaf Ebrahimi if (PR_prefixMatch(tokenChars, tokenCharsI, 'script')) { 528*83a54b2fSSadaf Ebrahimi endScriptTag = '/script'; 529*83a54b2fSSadaf Ebrahimi } else if (PR_prefixMatch(tokenChars, tokenCharsI, 'style')) { 530*83a54b2fSSadaf Ebrahimi endScriptTag = '/style'; 531*83a54b2fSSadaf Ebrahimi } else if (PR_prefixMatch(tokenChars, tokenCharsI, 'xmp')) { 532*83a54b2fSSadaf Ebrahimi endScriptTag = '/xmp'; 533*83a54b2fSSadaf Ebrahimi } 534*83a54b2fSSadaf Ebrahimi } 535*83a54b2fSSadaf Ebrahimi // disallow the tag if endScriptTag is set and this was not an open 536*83a54b2fSSadaf Ebrahimi // tag. 537*83a54b2fSSadaf Ebrahimi if (endScriptTag && tokenCharsI && '/' == tokenChars[0]) { 538*83a54b2fSSadaf Ebrahimi tokenStyle = null; 539*83a54b2fSSadaf Ebrahimi } 540*83a54b2fSSadaf Ebrahimi } 541*83a54b2fSSadaf Ebrahimi if (null != tokenStyle) { 542*83a54b2fSSadaf Ebrahimi tokenEnds.push(new PR_TokenEnd(tokenStart, PR_PLAIN)); 543*83a54b2fSSadaf Ebrahimi tokenEnds.push(new PR_TokenEnd(k + next, tokenStyle)); 544*83a54b2fSSadaf Ebrahimi } 545*83a54b2fSSadaf Ebrahimi } 546*83a54b2fSSadaf Ebrahimi } 547*83a54b2fSSadaf Ebrahimi k += chunk.token.length; 548*83a54b2fSSadaf Ebrahimi } 549*83a54b2fSSadaf Ebrahimi tokenEnds.push(new PR_TokenEnd(k, PR_PLAIN)); 550*83a54b2fSSadaf Ebrahimi 551*83a54b2fSSadaf Ebrahimi return tokenEnds; 552*83a54b2fSSadaf Ebrahimi} 553*83a54b2fSSadaf Ebrahimi 554*83a54b2fSSadaf Ebrahimi/** splits the given string into comment, string, and "other" tokens. 555*83a54b2fSSadaf Ebrahimi * @return an array of PR_Tokens with style in 556*83a54b2fSSadaf Ebrahimi * (PR_STRING, PR_COMMENT, PR_PLAIN, null) 557*83a54b2fSSadaf Ebrahimi * The result array may contain spurious zero length tokens. Ignore them. 558*83a54b2fSSadaf Ebrahimi * 559*83a54b2fSSadaf Ebrahimi * @private 560*83a54b2fSSadaf Ebrahimi */ 561*83a54b2fSSadaf Ebrahimifunction PR_splitStringAndCommentTokens(chunks) { 562*83a54b2fSSadaf Ebrahimi // a state machine to split out comments, strings, and other stuff 563*83a54b2fSSadaf Ebrahimi var tokenEnds = new Array(); // positions of ends of tokens in absolute space 564*83a54b2fSSadaf Ebrahimi var state = 0; // FSM state variable 565*83a54b2fSSadaf Ebrahimi var delim = -1; // string delimiter 566*83a54b2fSSadaf Ebrahimi var k = 0; // absolute position of beginning of current chunk 567*83a54b2fSSadaf Ebrahimi for (var ci = 0, nc = chunks.length; ci < nc; ++ci) { 568*83a54b2fSSadaf Ebrahimi var chunk = chunks[ci]; 569*83a54b2fSSadaf Ebrahimi var s = chunk.token; 570*83a54b2fSSadaf Ebrahimi if (PR_PLAIN == chunk.style) { 571*83a54b2fSSadaf Ebrahimi for (var i = 0, n = s.length; i < n; ++i) { 572*83a54b2fSSadaf Ebrahimi var ch = s.charAt(i); 573*83a54b2fSSadaf Ebrahimi if (0 == state) { 574*83a54b2fSSadaf Ebrahimi if (ch == '"' || ch == '\'' || ch == '`') { 575*83a54b2fSSadaf Ebrahimi tokenEnds.push(new PR_TokenEnd(k + i, PR_PLAIN)); 576*83a54b2fSSadaf Ebrahimi state = 1; 577*83a54b2fSSadaf Ebrahimi delim = ch; 578*83a54b2fSSadaf Ebrahimi } else if (ch == '/') { 579*83a54b2fSSadaf Ebrahimi state = 3; 580*83a54b2fSSadaf Ebrahimi } else if (ch == '#') { 581*83a54b2fSSadaf Ebrahimi tokenEnds.push(new PR_TokenEnd(k + i, PR_PLAIN)); 582*83a54b2fSSadaf Ebrahimi state = 4; 583*83a54b2fSSadaf Ebrahimi } 584*83a54b2fSSadaf Ebrahimi } else if (1 == state) { 585*83a54b2fSSadaf Ebrahimi if (ch == delim) { 586*83a54b2fSSadaf Ebrahimi state = 0; 587*83a54b2fSSadaf Ebrahimi tokenEnds.push(new PR_TokenEnd(k + i + 1, PR_STRING)); 588*83a54b2fSSadaf Ebrahimi } else if (ch == '\\') { 589*83a54b2fSSadaf Ebrahimi state = 2; 590*83a54b2fSSadaf Ebrahimi } 591*83a54b2fSSadaf Ebrahimi } else if (2 == state) { 592*83a54b2fSSadaf Ebrahimi state = 1; 593*83a54b2fSSadaf Ebrahimi } else if (3 == state) { 594*83a54b2fSSadaf Ebrahimi if (ch == '/') { 595*83a54b2fSSadaf Ebrahimi state = 4; 596*83a54b2fSSadaf Ebrahimi tokenEnds.push(new PR_TokenEnd(k + i - 1, PR_PLAIN)); 597*83a54b2fSSadaf Ebrahimi } else if (ch == '*') { 598*83a54b2fSSadaf Ebrahimi state = 5; 599*83a54b2fSSadaf Ebrahimi tokenEnds.push(new PR_TokenEnd(k + i - 1, PR_PLAIN)); 600*83a54b2fSSadaf Ebrahimi } else { 601*83a54b2fSSadaf Ebrahimi state = 0; 602*83a54b2fSSadaf Ebrahimi // next loop will reenter state 0 without same value of i, so 603*83a54b2fSSadaf Ebrahimi // ch will be reconsidered as start of new token. 604*83a54b2fSSadaf Ebrahimi --i; 605*83a54b2fSSadaf Ebrahimi } 606*83a54b2fSSadaf Ebrahimi } else if (4 == state) { 607*83a54b2fSSadaf Ebrahimi if (ch == '\r' || ch == '\n') { 608*83a54b2fSSadaf Ebrahimi state = 0; 609*83a54b2fSSadaf Ebrahimi tokenEnds.push(new PR_TokenEnd(k + i, PR_COMMENT)); 610*83a54b2fSSadaf Ebrahimi } 611*83a54b2fSSadaf Ebrahimi } else if (5 == state) { 612*83a54b2fSSadaf Ebrahimi if (ch == '*') { 613*83a54b2fSSadaf Ebrahimi state = 6; 614*83a54b2fSSadaf Ebrahimi } 615*83a54b2fSSadaf Ebrahimi } else if (6 == state) { 616*83a54b2fSSadaf Ebrahimi if (ch == '/') { 617*83a54b2fSSadaf Ebrahimi state = 0; 618*83a54b2fSSadaf Ebrahimi tokenEnds.push(new PR_TokenEnd(k + i + 1, PR_COMMENT)); 619*83a54b2fSSadaf Ebrahimi } else if (ch != '*') { 620*83a54b2fSSadaf Ebrahimi state = 5; 621*83a54b2fSSadaf Ebrahimi } 622*83a54b2fSSadaf Ebrahimi } 623*83a54b2fSSadaf Ebrahimi } 624*83a54b2fSSadaf Ebrahimi } 625*83a54b2fSSadaf Ebrahimi k += s.length; 626*83a54b2fSSadaf Ebrahimi } 627*83a54b2fSSadaf Ebrahimi tokenEnds.push(new PR_TokenEnd(k, PR_PLAIN)); // a token ends at the end 628*83a54b2fSSadaf Ebrahimi 629*83a54b2fSSadaf Ebrahimi return PR_splitChunks(chunks, tokenEnds); 630*83a54b2fSSadaf Ebrahimi} 631*83a54b2fSSadaf Ebrahimi 632*83a54b2fSSadaf Ebrahimi/** used by lexSource to split a non string, non comment token. 633*83a54b2fSSadaf Ebrahimi * @private 634*83a54b2fSSadaf Ebrahimi */ 635*83a54b2fSSadaf Ebrahimifunction PR_splitNonStringNonCommentToken(s, outlist) { 636*83a54b2fSSadaf Ebrahimi var pos = 0; 637*83a54b2fSSadaf Ebrahimi var state = 0; 638*83a54b2fSSadaf Ebrahimi for (var i = 0; i <= s.length; i++) { 639*83a54b2fSSadaf Ebrahimi var ch = s.charAt(i); 640*83a54b2fSSadaf Ebrahimi // the next state. 641*83a54b2fSSadaf Ebrahimi // if set to -1 then it will cause a reentry to state 0 without consuming 642*83a54b2fSSadaf Ebrahimi // another character. 643*83a54b2fSSadaf Ebrahimi var nstate = state; 644*83a54b2fSSadaf Ebrahimi 645*83a54b2fSSadaf Ebrahimi if (i == s.length) { 646*83a54b2fSSadaf Ebrahimi // nstate will not be equal to state, so it will append the token 647*83a54b2fSSadaf Ebrahimi nstate = -2; 648*83a54b2fSSadaf Ebrahimi } else { 649*83a54b2fSSadaf Ebrahimi switch (state) { 650*83a54b2fSSadaf Ebrahimi case 0: // whitespace state 651*83a54b2fSSadaf Ebrahimi if (PR_isIdentifierStart(ch)) { 652*83a54b2fSSadaf Ebrahimi nstate = 1; 653*83a54b2fSSadaf Ebrahimi } else if (PR_isDigitChar(ch)) { 654*83a54b2fSSadaf Ebrahimi nstate = 2; 655*83a54b2fSSadaf Ebrahimi } else if (!PR_isSpaceChar(ch)) { 656*83a54b2fSSadaf Ebrahimi nstate = 3; 657*83a54b2fSSadaf Ebrahimi } 658*83a54b2fSSadaf Ebrahimi if (nstate && pos < i) { 659*83a54b2fSSadaf Ebrahimi var t = s.substring(pos, i); 660*83a54b2fSSadaf Ebrahimi outlist.push(new PR_Token(t, PR_PLAIN)); 661*83a54b2fSSadaf Ebrahimi pos = i; 662*83a54b2fSSadaf Ebrahimi } 663*83a54b2fSSadaf Ebrahimi break; 664*83a54b2fSSadaf Ebrahimi case 1: // identifier state 665*83a54b2fSSadaf Ebrahimi if (!PR_isIdentifierPart(ch)) { 666*83a54b2fSSadaf Ebrahimi nstate = -1; 667*83a54b2fSSadaf Ebrahimi } 668*83a54b2fSSadaf Ebrahimi break; 669*83a54b2fSSadaf Ebrahimi case 2: // number literal state 670*83a54b2fSSadaf Ebrahimi // handle numeric literals like 671*83a54b2fSSadaf Ebrahimi // 0x7f 300UL 100_000 672*83a54b2fSSadaf Ebrahimi 673*83a54b2fSSadaf Ebrahimi // this does not treat floating point values as a single literal 674*83a54b2fSSadaf Ebrahimi // 0.1 and 3e-6 675*83a54b2fSSadaf Ebrahimi // are each split into multiple tokens 676*83a54b2fSSadaf Ebrahimi if (!(PR_isDigitChar(ch) || PR_isWordChar(ch) || ch == '_')) { 677*83a54b2fSSadaf Ebrahimi nstate = -1; 678*83a54b2fSSadaf Ebrahimi } 679*83a54b2fSSadaf Ebrahimi break; 680*83a54b2fSSadaf Ebrahimi case 3: // punctuation state 681*83a54b2fSSadaf Ebrahimi if (PR_isIdentifierStart(ch) || PR_isDigitChar(ch) || 682*83a54b2fSSadaf Ebrahimi PR_isSpaceChar(ch)) { 683*83a54b2fSSadaf Ebrahimi nstate = -1; 684*83a54b2fSSadaf Ebrahimi } 685*83a54b2fSSadaf Ebrahimi break; 686*83a54b2fSSadaf Ebrahimi } 687*83a54b2fSSadaf Ebrahimi } 688*83a54b2fSSadaf Ebrahimi 689*83a54b2fSSadaf Ebrahimi if (nstate != state) { 690*83a54b2fSSadaf Ebrahimi if (nstate < 0) { 691*83a54b2fSSadaf Ebrahimi if (i > pos) { 692*83a54b2fSSadaf Ebrahimi var t = s.substring(pos, i); 693*83a54b2fSSadaf Ebrahimi var ch0 = t.charAt(0); 694*83a54b2fSSadaf Ebrahimi var style; 695*83a54b2fSSadaf Ebrahimi if (PR_isIdentifierStart(ch0)) { 696*83a54b2fSSadaf Ebrahimi if (PR_keywords[t]) { 697*83a54b2fSSadaf Ebrahimi style = PR_KEYWORD; 698*83a54b2fSSadaf Ebrahimi } 699*83a54b2fSSadaf Ebrahimi else if (ch0 == '@') { 700*83a54b2fSSadaf Ebrahimi style = PR_LITERAL; 701*83a54b2fSSadaf Ebrahimi } else { 702*83a54b2fSSadaf Ebrahimi // Treat any word that starts with an uppercase character and 703*83a54b2fSSadaf Ebrahimi // contains at least one lowercase character as a type, or 704*83a54b2fSSadaf Ebrahimi // ends with _t. 705*83a54b2fSSadaf Ebrahimi // This works perfectly for Java, pretty well for C++, and 706*83a54b2fSSadaf Ebrahimi // passably for Python. The _t catches C structs. 707*83a54b2fSSadaf Ebrahimi var isType = false; 708*83a54b2fSSadaf Ebrahimi if (ch0 >= 'A' && ch0 <= 'Z') { 709*83a54b2fSSadaf Ebrahimi for (var j = 1; j < t.length; j++) { 710*83a54b2fSSadaf Ebrahimi var ch1 = t.charAt(j); 711*83a54b2fSSadaf Ebrahimi if (ch1 >= 'a' && ch1 <= 'z') { 712*83a54b2fSSadaf Ebrahimi isType = true; 713*83a54b2fSSadaf Ebrahimi break; 714*83a54b2fSSadaf Ebrahimi } 715*83a54b2fSSadaf Ebrahimi } 716*83a54b2fSSadaf Ebrahimi if (!isType && t.length >= 2 && 717*83a54b2fSSadaf Ebrahimi t.substring(t.length - 2) == '_t') { 718*83a54b2fSSadaf Ebrahimi isType = true; 719*83a54b2fSSadaf Ebrahimi } 720*83a54b2fSSadaf Ebrahimi } 721*83a54b2fSSadaf Ebrahimi style = isType ? PR_TYPE : PR_PLAIN; 722*83a54b2fSSadaf Ebrahimi } 723*83a54b2fSSadaf Ebrahimi } else if (PR_isDigitChar(ch0)) { 724*83a54b2fSSadaf Ebrahimi style = PR_LITERAL; 725*83a54b2fSSadaf Ebrahimi } else if (!PR_isSpaceChar(ch0)) { 726*83a54b2fSSadaf Ebrahimi style = PR_PUNCTUATION; 727*83a54b2fSSadaf Ebrahimi } else { 728*83a54b2fSSadaf Ebrahimi style = PR_PLAIN; 729*83a54b2fSSadaf Ebrahimi } 730*83a54b2fSSadaf Ebrahimi pos = i; 731*83a54b2fSSadaf Ebrahimi outlist.push(new PR_Token(t, style)); 732*83a54b2fSSadaf Ebrahimi } 733*83a54b2fSSadaf Ebrahimi 734*83a54b2fSSadaf Ebrahimi state = 0; 735*83a54b2fSSadaf Ebrahimi if (nstate == -1) { 736*83a54b2fSSadaf Ebrahimi // don't increment. This allows us to use state 0 to redispatch based 737*83a54b2fSSadaf Ebrahimi // on the current character. 738*83a54b2fSSadaf Ebrahimi i--; 739*83a54b2fSSadaf Ebrahimi continue; 740*83a54b2fSSadaf Ebrahimi } 741*83a54b2fSSadaf Ebrahimi } 742*83a54b2fSSadaf Ebrahimi state = nstate; 743*83a54b2fSSadaf Ebrahimi } 744*83a54b2fSSadaf Ebrahimi } 745*83a54b2fSSadaf Ebrahimi} 746*83a54b2fSSadaf Ebrahimi 747*83a54b2fSSadaf Ebrahimi/** split a group of chunks of markup. 748*83a54b2fSSadaf Ebrahimi * @private 749*83a54b2fSSadaf Ebrahimi */ 750*83a54b2fSSadaf Ebrahimifunction PR_tokenizeMarkup(chunks) { 751*83a54b2fSSadaf Ebrahimi if (!(chunks && chunks.length)) { return chunks; } 752*83a54b2fSSadaf Ebrahimi 753*83a54b2fSSadaf Ebrahimi var tokenEnds = PR_splitMarkup(chunks); 754*83a54b2fSSadaf Ebrahimi return PR_splitChunks(chunks, tokenEnds); 755*83a54b2fSSadaf Ebrahimi} 756*83a54b2fSSadaf Ebrahimi 757*83a54b2fSSadaf Ebrahimi/** split tags attributes and their values out from the tag name, and 758*83a54b2fSSadaf Ebrahimi * recursively lex source chunks. 759*83a54b2fSSadaf Ebrahimi * @private 760*83a54b2fSSadaf Ebrahimi */ 761*83a54b2fSSadaf Ebrahimifunction PR_splitTagAttributes(tokens) { 762*83a54b2fSSadaf Ebrahimi var tokensOut = new Array(); 763*83a54b2fSSadaf Ebrahimi var state = 0; 764*83a54b2fSSadaf Ebrahimi var stateStyle = PR_TAG; 765*83a54b2fSSadaf Ebrahimi var delim = null; // attribute delimiter for quoted value state. 766*83a54b2fSSadaf Ebrahimi var decodeHelper = new PR_DecodeHelper(); 767*83a54b2fSSadaf Ebrahimi for (var ci = 0; ci < tokens.length; ++ci) { 768*83a54b2fSSadaf Ebrahimi var tok = tokens[ci]; 769*83a54b2fSSadaf Ebrahimi if (PR_TAG == tok.style) { 770*83a54b2fSSadaf Ebrahimi var s = tok.token; 771*83a54b2fSSadaf Ebrahimi var start = 0; 772*83a54b2fSSadaf Ebrahimi for (var i = 0; i < s.length; /* i = next at bottom */) { 773*83a54b2fSSadaf Ebrahimi decodeHelper.decode(s, i); 774*83a54b2fSSadaf Ebrahimi var ch = decodeHelper.ch; 775*83a54b2fSSadaf Ebrahimi var next = decodeHelper.next; 776*83a54b2fSSadaf Ebrahimi 777*83a54b2fSSadaf Ebrahimi var emitEnd = null; // null or position of end of chunk to emit. 778*83a54b2fSSadaf Ebrahimi var nextStyle = null; // null or next value of stateStyle 779*83a54b2fSSadaf Ebrahimi if (ch == '>') { 780*83a54b2fSSadaf Ebrahimi if (PR_TAG != stateStyle) { 781*83a54b2fSSadaf Ebrahimi emitEnd = i; 782*83a54b2fSSadaf Ebrahimi nextStyle = PR_TAG; 783*83a54b2fSSadaf Ebrahimi } 784*83a54b2fSSadaf Ebrahimi } else { 785*83a54b2fSSadaf Ebrahimi switch (state) { 786*83a54b2fSSadaf Ebrahimi case 0: 787*83a54b2fSSadaf Ebrahimi if ('<' == ch) { state = 1; } 788*83a54b2fSSadaf Ebrahimi break; 789*83a54b2fSSadaf Ebrahimi case 1: 790*83a54b2fSSadaf Ebrahimi if (PR_isSpaceChar(ch)) { state = 2; } 791*83a54b2fSSadaf Ebrahimi break; 792*83a54b2fSSadaf Ebrahimi case 2: 793*83a54b2fSSadaf Ebrahimi if (!PR_isSpaceChar(ch)) { 794*83a54b2fSSadaf Ebrahimi nextStyle = PR_ATTRIB_NAME; 795*83a54b2fSSadaf Ebrahimi emitEnd = i; 796*83a54b2fSSadaf Ebrahimi state = 3; 797*83a54b2fSSadaf Ebrahimi } 798*83a54b2fSSadaf Ebrahimi break; 799*83a54b2fSSadaf Ebrahimi case 3: 800*83a54b2fSSadaf Ebrahimi if ('=' == ch) { 801*83a54b2fSSadaf Ebrahimi emitEnd = i; 802*83a54b2fSSadaf Ebrahimi nextStyle = PR_TAG; 803*83a54b2fSSadaf Ebrahimi state = 5; 804*83a54b2fSSadaf Ebrahimi } else if (PR_isSpaceChar(ch)) { 805*83a54b2fSSadaf Ebrahimi emitEnd = i; 806*83a54b2fSSadaf Ebrahimi nextStyle = PR_TAG; 807*83a54b2fSSadaf Ebrahimi state = 4; 808*83a54b2fSSadaf Ebrahimi } 809*83a54b2fSSadaf Ebrahimi break; 810*83a54b2fSSadaf Ebrahimi case 4: 811*83a54b2fSSadaf Ebrahimi if ('=' == ch) { 812*83a54b2fSSadaf Ebrahimi state = 5; 813*83a54b2fSSadaf Ebrahimi } else if (!PR_isSpaceChar(ch)) { 814*83a54b2fSSadaf Ebrahimi emitEnd = i; 815*83a54b2fSSadaf Ebrahimi nextStyle = PR_ATTRIB_NAME; 816*83a54b2fSSadaf Ebrahimi state = 3; 817*83a54b2fSSadaf Ebrahimi } 818*83a54b2fSSadaf Ebrahimi break; 819*83a54b2fSSadaf Ebrahimi case 5: 820*83a54b2fSSadaf Ebrahimi if ('"' == ch || '\'' == ch) { 821*83a54b2fSSadaf Ebrahimi emitEnd = i; 822*83a54b2fSSadaf Ebrahimi nextStyle = PR_ATTRIB_VALUE; 823*83a54b2fSSadaf Ebrahimi state = 6; 824*83a54b2fSSadaf Ebrahimi delim = ch; 825*83a54b2fSSadaf Ebrahimi } else if (!PR_isSpaceChar(ch)) { 826*83a54b2fSSadaf Ebrahimi emitEnd = i; 827*83a54b2fSSadaf Ebrahimi nextStyle = PR_ATTRIB_VALUE; 828*83a54b2fSSadaf Ebrahimi state = 7; 829*83a54b2fSSadaf Ebrahimi } 830*83a54b2fSSadaf Ebrahimi break; 831*83a54b2fSSadaf Ebrahimi case 6: 832*83a54b2fSSadaf Ebrahimi if (ch == delim) { 833*83a54b2fSSadaf Ebrahimi emitEnd = next; 834*83a54b2fSSadaf Ebrahimi nextStyle = PR_TAG; 835*83a54b2fSSadaf Ebrahimi state = 2; 836*83a54b2fSSadaf Ebrahimi } 837*83a54b2fSSadaf Ebrahimi break; 838*83a54b2fSSadaf Ebrahimi case 7: 839*83a54b2fSSadaf Ebrahimi if (PR_isSpaceChar(ch)) { 840*83a54b2fSSadaf Ebrahimi emitEnd = i; 841*83a54b2fSSadaf Ebrahimi nextStyle = PR_TAG; 842*83a54b2fSSadaf Ebrahimi state = 2; 843*83a54b2fSSadaf Ebrahimi } 844*83a54b2fSSadaf Ebrahimi break; 845*83a54b2fSSadaf Ebrahimi } 846*83a54b2fSSadaf Ebrahimi } 847*83a54b2fSSadaf Ebrahimi if (emitEnd) { 848*83a54b2fSSadaf Ebrahimi if (emitEnd > start) { 849*83a54b2fSSadaf Ebrahimi tokensOut.push( 850*83a54b2fSSadaf Ebrahimi new PR_Token(s.substring(start, emitEnd), stateStyle)); 851*83a54b2fSSadaf Ebrahimi start = emitEnd; 852*83a54b2fSSadaf Ebrahimi } 853*83a54b2fSSadaf Ebrahimi stateStyle = nextStyle; 854*83a54b2fSSadaf Ebrahimi } 855*83a54b2fSSadaf Ebrahimi i = next; 856*83a54b2fSSadaf Ebrahimi } 857*83a54b2fSSadaf Ebrahimi if (s.length > start) { 858*83a54b2fSSadaf Ebrahimi tokensOut.push(new PR_Token(s.substring(start, s.length), stateStyle)); 859*83a54b2fSSadaf Ebrahimi } 860*83a54b2fSSadaf Ebrahimi } else { 861*83a54b2fSSadaf Ebrahimi if (tok.style) { 862*83a54b2fSSadaf Ebrahimi state = 0; 863*83a54b2fSSadaf Ebrahimi stateStyle = PR_TAG; 864*83a54b2fSSadaf Ebrahimi } 865*83a54b2fSSadaf Ebrahimi tokensOut.push(tok); 866*83a54b2fSSadaf Ebrahimi } 867*83a54b2fSSadaf Ebrahimi } 868*83a54b2fSSadaf Ebrahimi return tokensOut; 869*83a54b2fSSadaf Ebrahimi} 870*83a54b2fSSadaf Ebrahimi 871*83a54b2fSSadaf Ebrahimi/** identify regions of markup that are really source code, and recursivley 872*83a54b2fSSadaf Ebrahimi * lex them. 873*83a54b2fSSadaf Ebrahimi * @private 874*83a54b2fSSadaf Ebrahimi */ 875*83a54b2fSSadaf Ebrahimifunction PR_splitSourceNodes(tokens) { 876*83a54b2fSSadaf Ebrahimi var tokensOut = new Array(); 877*83a54b2fSSadaf Ebrahimi // when we see a <script> tag, store '/' here so that we know to end the 878*83a54b2fSSadaf Ebrahimi // source processing 879*83a54b2fSSadaf Ebrahimi var endScriptTag = null; 880*83a54b2fSSadaf Ebrahimi var decodeHelper = new PR_DecodeHelper(); 881*83a54b2fSSadaf Ebrahimi 882*83a54b2fSSadaf Ebrahimi var sourceChunks = null; 883*83a54b2fSSadaf Ebrahimi 884*83a54b2fSSadaf Ebrahimi for (var ci = 0, nc = tokens.length; ci < nc; ++ci) { 885*83a54b2fSSadaf Ebrahimi var tok = tokens[ci]; 886*83a54b2fSSadaf Ebrahimi if (null == tok.style) { 887*83a54b2fSSadaf Ebrahimi tokens.push(tok); 888*83a54b2fSSadaf Ebrahimi continue; 889*83a54b2fSSadaf Ebrahimi } 890*83a54b2fSSadaf Ebrahimi 891*83a54b2fSSadaf Ebrahimi var s = tok.token; 892*83a54b2fSSadaf Ebrahimi 893*83a54b2fSSadaf Ebrahimi if (null == endScriptTag) { 894*83a54b2fSSadaf Ebrahimi if (PR_SOURCE == tok.style) { 895*83a54b2fSSadaf Ebrahimi // split off any starting and trailing <?, <% 896*83a54b2fSSadaf Ebrahimi if ('<' == decodeHelper.decode(s, 0)) { 897*83a54b2fSSadaf Ebrahimi decodeHelper.decode(s, decodeHelper.next); 898*83a54b2fSSadaf Ebrahimi if ('%' == decodeHelper.ch || '?' == decodeHelper.ch) { 899*83a54b2fSSadaf Ebrahimi endScriptTag = decodeHelper.ch; 900*83a54b2fSSadaf Ebrahimi tokensOut.push(new PR_Token(s.substring(0, decodeHelper.next), 901*83a54b2fSSadaf Ebrahimi PR_TAG)); 902*83a54b2fSSadaf Ebrahimi s = s.substring(decodeHelper.next, s.length); 903*83a54b2fSSadaf Ebrahimi } 904*83a54b2fSSadaf Ebrahimi } 905*83a54b2fSSadaf Ebrahimi } else if (PR_TAG == tok.style) { 906*83a54b2fSSadaf Ebrahimi if ('<' == decodeHelper.decode(s, 0) && 907*83a54b2fSSadaf Ebrahimi '/' != s.charAt(decodeHelper.next)) { 908*83a54b2fSSadaf Ebrahimi var tagContent = s.substring(decodeHelper.next).toLowerCase(); 909*83a54b2fSSadaf Ebrahimi // FIXME(msamuel): this does not mirror exactly the code in 910*83a54b2fSSadaf Ebrahimi // in PR_splitMarkup that defers splitting tags inside script and 911*83a54b2fSSadaf Ebrahimi // style blocks. 912*83a54b2fSSadaf Ebrahimi if (PR_startsWith(tagContent, 'script') || 913*83a54b2fSSadaf Ebrahimi PR_startsWith(tagContent, 'style') || 914*83a54b2fSSadaf Ebrahimi PR_startsWith(tagContent, 'xmp')) { 915*83a54b2fSSadaf Ebrahimi endScriptTag = '/'; 916*83a54b2fSSadaf Ebrahimi } 917*83a54b2fSSadaf Ebrahimi } 918*83a54b2fSSadaf Ebrahimi } 919*83a54b2fSSadaf Ebrahimi } 920*83a54b2fSSadaf Ebrahimi 921*83a54b2fSSadaf Ebrahimi if (null != endScriptTag) { 922*83a54b2fSSadaf Ebrahimi var endTok = null; 923*83a54b2fSSadaf Ebrahimi if (PR_SOURCE == tok.style) { 924*83a54b2fSSadaf Ebrahimi if (endScriptTag == '%' || endScriptTag == '?') { 925*83a54b2fSSadaf Ebrahimi var pos = s.lastIndexOf(endScriptTag); 926*83a54b2fSSadaf Ebrahimi if (pos >= 0 && '>' == decodeHelper.decode(s, pos + 1) && 927*83a54b2fSSadaf Ebrahimi s.length == decodeHelper.next) { 928*83a54b2fSSadaf Ebrahimi endTok = new PR_Token(s.substring(pos, s.length), PR_TAG); 929*83a54b2fSSadaf Ebrahimi s = s.substring(0, pos); 930*83a54b2fSSadaf Ebrahimi } 931*83a54b2fSSadaf Ebrahimi } 932*83a54b2fSSadaf Ebrahimi if (null == sourceChunks) { sourceChunks = new Array(); } 933*83a54b2fSSadaf Ebrahimi sourceChunks.push(new PR_Token(s, PR_PLAIN)); 934*83a54b2fSSadaf Ebrahimi } else if (PR_PLAIN == tok.style) { 935*83a54b2fSSadaf Ebrahimi if (null == sourceChunks) { sourceChunks = new Array(); } 936*83a54b2fSSadaf Ebrahimi sourceChunks.push(tok); 937*83a54b2fSSadaf Ebrahimi } else if (PR_TAG == tok.style) { 938*83a54b2fSSadaf Ebrahimi // if it starts with </ then it must be the end tag. 939*83a54b2fSSadaf Ebrahimi if ('<' == decodeHelper.decode(tok.token, 0) && 940*83a54b2fSSadaf Ebrahimi tok.token.length > decodeHelper.next && 941*83a54b2fSSadaf Ebrahimi '/' == decodeHelper.decode(tok.token, decodeHelper.next)) { 942*83a54b2fSSadaf Ebrahimi endTok = tok; 943*83a54b2fSSadaf Ebrahimi } else { 944*83a54b2fSSadaf Ebrahimi tokensOut.push(tok); 945*83a54b2fSSadaf Ebrahimi } 946*83a54b2fSSadaf Ebrahimi } else { 947*83a54b2fSSadaf Ebrahimi if (sourceChunks) { 948*83a54b2fSSadaf Ebrahimi sourceChunks.push(tok); 949*83a54b2fSSadaf Ebrahimi } else { 950*83a54b2fSSadaf Ebrahimi // push remaining tag and attribute tokens from the opening tag 951*83a54b2fSSadaf Ebrahimi tokensOut.push(tok); 952*83a54b2fSSadaf Ebrahimi } 953*83a54b2fSSadaf Ebrahimi } 954*83a54b2fSSadaf Ebrahimi if (endTok) { 955*83a54b2fSSadaf Ebrahimi if (sourceChunks) { 956*83a54b2fSSadaf Ebrahimi var sourceTokens = PR_lexSource(sourceChunks); 957*83a54b2fSSadaf Ebrahimi tokensOut.push(new PR_Token('<span class=embsrc>', null)); 958*83a54b2fSSadaf Ebrahimi for (var si = 0, ns = sourceTokens.length; si < ns; ++si) { 959*83a54b2fSSadaf Ebrahimi tokensOut.push(sourceTokens[si]); 960*83a54b2fSSadaf Ebrahimi } 961*83a54b2fSSadaf Ebrahimi tokensOut.push(new PR_Token('</span>', null)); 962*83a54b2fSSadaf Ebrahimi sourceChunks = null; 963*83a54b2fSSadaf Ebrahimi } 964*83a54b2fSSadaf Ebrahimi tokensOut.push(endTok); 965*83a54b2fSSadaf Ebrahimi endScriptTag = null; 966*83a54b2fSSadaf Ebrahimi } 967*83a54b2fSSadaf Ebrahimi } else { 968*83a54b2fSSadaf Ebrahimi tokensOut.push(tok); 969*83a54b2fSSadaf Ebrahimi } 970*83a54b2fSSadaf Ebrahimi } 971*83a54b2fSSadaf Ebrahimi return tokensOut; 972*83a54b2fSSadaf Ebrahimi} 973*83a54b2fSSadaf Ebrahimi 974*83a54b2fSSadaf Ebrahimi/** splits the quotes from an attribute value. 975*83a54b2fSSadaf Ebrahimi * ['"foo"'] -> ['"', 'foo', '"'] 976*83a54b2fSSadaf Ebrahimi * @private 977*83a54b2fSSadaf Ebrahimi */ 978*83a54b2fSSadaf Ebrahimifunction PR_splitAttributeQuotes(tokens) { 979*83a54b2fSSadaf Ebrahimi var firstPlain = null, lastPlain = null; 980*83a54b2fSSadaf Ebrahimi for (var i = 0; i < tokens.length; ++i) { 981*83a54b2fSSadaf Ebrahimi if (PR_PLAIN = tokens[i].style) { 982*83a54b2fSSadaf Ebrahimi firstPlain = i; 983*83a54b2fSSadaf Ebrahimi break; 984*83a54b2fSSadaf Ebrahimi } 985*83a54b2fSSadaf Ebrahimi } 986*83a54b2fSSadaf Ebrahimi for (var i = tokens.length; --i >= 0;) { 987*83a54b2fSSadaf Ebrahimi if (PR_PLAIN = tokens[i].style) { 988*83a54b2fSSadaf Ebrahimi lastPlain = i; 989*83a54b2fSSadaf Ebrahimi break; 990*83a54b2fSSadaf Ebrahimi } 991*83a54b2fSSadaf Ebrahimi } 992*83a54b2fSSadaf Ebrahimi if (null == firstPlain) { return tokens; } 993*83a54b2fSSadaf Ebrahimi 994*83a54b2fSSadaf Ebrahimi var decodeHelper = new PR_DecodeHelper(); 995*83a54b2fSSadaf Ebrahimi var fs = tokens[firstPlain].token; 996*83a54b2fSSadaf Ebrahimi var fc = decodeHelper.decode(fs, 0); 997*83a54b2fSSadaf Ebrahimi if ('"' != fc && '\'' != fc) { 998*83a54b2fSSadaf Ebrahimi return tokens; 999*83a54b2fSSadaf Ebrahimi } 1000*83a54b2fSSadaf Ebrahimi var fpos = decodeHelper.next; 1001*83a54b2fSSadaf Ebrahimi 1002*83a54b2fSSadaf Ebrahimi var ls = tokens[lastPlain].token; 1003*83a54b2fSSadaf Ebrahimi var lpos = ls.lastIndexOf('&'); 1004*83a54b2fSSadaf Ebrahimi if (lpos < 0) { lpos = ls.length - 1; } 1005*83a54b2fSSadaf Ebrahimi var lc = decodeHelper.decode(ls, lpos); 1006*83a54b2fSSadaf Ebrahimi if (lc != fc || decodeHelper.next != ls.length) { 1007*83a54b2fSSadaf Ebrahimi lc = null; 1008*83a54b2fSSadaf Ebrahimi lpos = ls.length; 1009*83a54b2fSSadaf Ebrahimi } 1010*83a54b2fSSadaf Ebrahimi 1011*83a54b2fSSadaf Ebrahimi var tokensOut = new Array(); 1012*83a54b2fSSadaf Ebrahimi for (var i = 0; i < firstPlain; ++i) { 1013*83a54b2fSSadaf Ebrahimi tokensOut.push(tokens[i]); 1014*83a54b2fSSadaf Ebrahimi } 1015*83a54b2fSSadaf Ebrahimi tokensOut.push(new PR_Token(fs.substring(0, fpos), PR_ATTRIB_VALUE)); 1016*83a54b2fSSadaf Ebrahimi if (lastPlain == firstPlain) { 1017*83a54b2fSSadaf Ebrahimi tokensOut.push(new PR_Token(fs.substring(fpos, lpos), PR_PLAIN)); 1018*83a54b2fSSadaf Ebrahimi } else { 1019*83a54b2fSSadaf Ebrahimi tokensOut.push(new PR_Token(fs.substring(fpos, fs.length), PR_PLAIN)); 1020*83a54b2fSSadaf Ebrahimi for (var i = firstPlain + 1; i < lastPlain; ++i) { 1021*83a54b2fSSadaf Ebrahimi tokensOut.push(tokens[i]); 1022*83a54b2fSSadaf Ebrahimi } 1023*83a54b2fSSadaf Ebrahimi if (lc) { 1024*83a54b2fSSadaf Ebrahimi tokens.push(new PR_Token(ls.substring(0, lpos), PR_PLAIN)); 1025*83a54b2fSSadaf Ebrahimi } else { 1026*83a54b2fSSadaf Ebrahimi tokens.push(tokens[lastPlain]); 1027*83a54b2fSSadaf Ebrahimi } 1028*83a54b2fSSadaf Ebrahimi } 1029*83a54b2fSSadaf Ebrahimi if (lc) { 1030*83a54b2fSSadaf Ebrahimi tokensOut.push(new PR_Token(ls.substring(lpos, ls.length), PR_PLAIN)); 1031*83a54b2fSSadaf Ebrahimi } 1032*83a54b2fSSadaf Ebrahimi for (var i = lastPlain + 1; i < tokens.length; ++i) { 1033*83a54b2fSSadaf Ebrahimi tokensOut.push(tokens[i]); 1034*83a54b2fSSadaf Ebrahimi } 1035*83a54b2fSSadaf Ebrahimi return tokensOut; 1036*83a54b2fSSadaf Ebrahimi} 1037*83a54b2fSSadaf Ebrahimi 1038*83a54b2fSSadaf Ebrahimi/** identify attribute values that really contain source code and recursively 1039*83a54b2fSSadaf Ebrahimi * lex them. 1040*83a54b2fSSadaf Ebrahimi * @private 1041*83a54b2fSSadaf Ebrahimi */ 1042*83a54b2fSSadaf Ebrahimifunction PR_splitSourceAttributes(tokens) { 1043*83a54b2fSSadaf Ebrahimi var tokensOut = new Array(); 1044*83a54b2fSSadaf Ebrahimi 1045*83a54b2fSSadaf Ebrahimi var sourceChunks = null; 1046*83a54b2fSSadaf Ebrahimi var inSource = false; 1047*83a54b2fSSadaf Ebrahimi var name = ''; 1048*83a54b2fSSadaf Ebrahimi 1049*83a54b2fSSadaf Ebrahimi for (var ci = 0, nc = tokens.length; ci < nc; ++ci) { 1050*83a54b2fSSadaf Ebrahimi var tok = tokens[ci]; 1051*83a54b2fSSadaf Ebrahimi var outList = tokensOut; 1052*83a54b2fSSadaf Ebrahimi if (PR_TAG == tok.style) { 1053*83a54b2fSSadaf Ebrahimi if (inSource) { 1054*83a54b2fSSadaf Ebrahimi inSource = false; 1055*83a54b2fSSadaf Ebrahimi name = ''; 1056*83a54b2fSSadaf Ebrahimi if (sourceChunks) { 1057*83a54b2fSSadaf Ebrahimi tokensOut.push(new PR_Token('<span class=embsrc>', null)); 1058*83a54b2fSSadaf Ebrahimi var sourceTokens = 1059*83a54b2fSSadaf Ebrahimi PR_lexSource(PR_splitAttributeQuotes(sourceChunks)); 1060*83a54b2fSSadaf Ebrahimi for (var si = 0, ns = sourceTokens.length; si < ns; ++si) { 1061*83a54b2fSSadaf Ebrahimi tokensOut.push(sourceTokens[si]); 1062*83a54b2fSSadaf Ebrahimi } 1063*83a54b2fSSadaf Ebrahimi tokensOut.push(new PR_Token('</span>', null)); 1064*83a54b2fSSadaf Ebrahimi sourceChunks = null; 1065*83a54b2fSSadaf Ebrahimi } 1066*83a54b2fSSadaf Ebrahimi } else if (name && tok.token.indexOf('=') >= 0) { 1067*83a54b2fSSadaf Ebrahimi var nameLower = name.toLowerCase(); 1068*83a54b2fSSadaf Ebrahimi if (PR_startsWith(nameLower, 'on') || 'style' == nameLower) { 1069*83a54b2fSSadaf Ebrahimi inSource = true; 1070*83a54b2fSSadaf Ebrahimi } 1071*83a54b2fSSadaf Ebrahimi } else { 1072*83a54b2fSSadaf Ebrahimi name = ''; 1073*83a54b2fSSadaf Ebrahimi } 1074*83a54b2fSSadaf Ebrahimi } else if (PR_ATTRIB_NAME == tok.style) { 1075*83a54b2fSSadaf Ebrahimi name += tok.token; 1076*83a54b2fSSadaf Ebrahimi } else if (PR_ATTRIB_VALUE == tok.style) { 1077*83a54b2fSSadaf Ebrahimi if (inSource) { 1078*83a54b2fSSadaf Ebrahimi if (null == sourceChunks) { sourceChunks = new Array(); } 1079*83a54b2fSSadaf Ebrahimi outList = sourceChunks; 1080*83a54b2fSSadaf Ebrahimi tok = new PR_Token(tok.token, PR_PLAIN); 1081*83a54b2fSSadaf Ebrahimi } 1082*83a54b2fSSadaf Ebrahimi } else { 1083*83a54b2fSSadaf Ebrahimi if (sourceChunks) { 1084*83a54b2fSSadaf Ebrahimi outList = sourceChunks; 1085*83a54b2fSSadaf Ebrahimi } 1086*83a54b2fSSadaf Ebrahimi } 1087*83a54b2fSSadaf Ebrahimi outList.push(tok); 1088*83a54b2fSSadaf Ebrahimi } 1089*83a54b2fSSadaf Ebrahimi return tokensOut; 1090*83a54b2fSSadaf Ebrahimi} 1091*83a54b2fSSadaf Ebrahimi 1092*83a54b2fSSadaf Ebrahimi/** returns a list of PR_Token objects given chunks of source code. 1093*83a54b2fSSadaf Ebrahimi * 1094*83a54b2fSSadaf Ebrahimi * This code assumes that < tokens are html escaped, but " are not. 1095*83a54b2fSSadaf Ebrahimi * It will do a resonable job with <, but will not recognize an " 1096*83a54b2fSSadaf Ebrahimi * as starting a string. 1097*83a54b2fSSadaf Ebrahimi * 1098*83a54b2fSSadaf Ebrahimi * This code treats ", ', and ` as string delimiters, and \ as a string escape. 1099*83a54b2fSSadaf Ebrahimi * It does not recognize double delimiter escapes, or perl's qq() style 1100*83a54b2fSSadaf Ebrahimi * strings. 1101*83a54b2fSSadaf Ebrahimi * 1102*83a54b2fSSadaf Ebrahimi * It recognizes C, C++, and shell style comments. 1103*83a54b2fSSadaf Ebrahimi * 1104*83a54b2fSSadaf Ebrahimi * @param chunks PR_Tokens with style in (null, PR_PLAIN) 1105*83a54b2fSSadaf Ebrahimi */ 1106*83a54b2fSSadaf Ebrahimifunction PR_lexSource(chunks) { 1107*83a54b2fSSadaf Ebrahimi // positions of ends of tokens in order 1108*83a54b2fSSadaf Ebrahimi var tokensIn = PR_splitStringAndCommentTokens(chunks); 1109*83a54b2fSSadaf Ebrahimi 1110*83a54b2fSSadaf Ebrahimi // split entities out of so that we know to treat them as single units. 1111*83a54b2fSSadaf Ebrahimi tokensIn = PR_splitEntities(tokensIn); 1112*83a54b2fSSadaf Ebrahimi 1113*83a54b2fSSadaf Ebrahimi // split non comment|string tokens on whitespace and word boundaries 1114*83a54b2fSSadaf Ebrahimi var tokensOut = new Array(); 1115*83a54b2fSSadaf Ebrahimi for (var i = 0; i < tokensIn.length; ++i) { 1116*83a54b2fSSadaf Ebrahimi var tok = tokensIn[i]; 1117*83a54b2fSSadaf Ebrahimi var t = tok.token; 1118*83a54b2fSSadaf Ebrahimi var s = tok.style; 1119*83a54b2fSSadaf Ebrahimi 1120*83a54b2fSSadaf Ebrahimi if (PR_PLAIN == s) { 1121*83a54b2fSSadaf Ebrahimi PR_splitNonStringNonCommentToken(t, tokensOut); 1122*83a54b2fSSadaf Ebrahimi continue; 1123*83a54b2fSSadaf Ebrahimi } 1124*83a54b2fSSadaf Ebrahimi tokensOut.push(tok); 1125*83a54b2fSSadaf Ebrahimi } 1126*83a54b2fSSadaf Ebrahimi 1127*83a54b2fSSadaf Ebrahimi return tokensOut; 1128*83a54b2fSSadaf Ebrahimi} 1129*83a54b2fSSadaf Ebrahimi 1130*83a54b2fSSadaf Ebrahimi/** returns a list of PR_Token objects given a string of markup. 1131*83a54b2fSSadaf Ebrahimi * 1132*83a54b2fSSadaf Ebrahimi * This code assumes that < tokens are html escaped, but " are not. 1133*83a54b2fSSadaf Ebrahimi * It will do a resonable job with <, but will not recognize an " 1134*83a54b2fSSadaf Ebrahimi * as starting a string. 1135*83a54b2fSSadaf Ebrahimi * 1136*83a54b2fSSadaf Ebrahimi * This code recognizes a number of constructs. 1137*83a54b2fSSadaf Ebrahimi * <!-- ... --> comment 1138*83a54b2fSSadaf Ebrahimi * <!\w ... > declaration 1139*83a54b2fSSadaf Ebrahimi * <\w ... > tag 1140*83a54b2fSSadaf Ebrahimi * </\w ... > tag 1141*83a54b2fSSadaf Ebrahimi * <?...?> embedded source 1142*83a54b2fSSadaf Ebrahimi * &[#\w]...; entity 1143*83a54b2fSSadaf Ebrahimi * 1144*83a54b2fSSadaf Ebrahimi * It does not recognizes %foo; entities. 1145*83a54b2fSSadaf Ebrahimi * 1146*83a54b2fSSadaf Ebrahimi * It will recurse into any <style>, <script>, and on* attributes using 1147*83a54b2fSSadaf Ebrahimi * PR_lexSource. 1148*83a54b2fSSadaf Ebrahimi */ 1149*83a54b2fSSadaf Ebrahimifunction PR_lexMarkup(chunks) { 1150*83a54b2fSSadaf Ebrahimi // This function works as follows: 1151*83a54b2fSSadaf Ebrahimi // 1) Start by splitting the markup into text and tag chunks 1152*83a54b2fSSadaf Ebrahimi // Input: String s 1153*83a54b2fSSadaf Ebrahimi // Output: List<PR_Token> where style in (PR_PLAIN, null) 1154*83a54b2fSSadaf Ebrahimi // 2) Then split the text chunks further into comments, declarations, 1155*83a54b2fSSadaf Ebrahimi // tags, etc. 1156*83a54b2fSSadaf Ebrahimi // After each split, consider whether the token is the start of an 1157*83a54b2fSSadaf Ebrahimi // embedded source section, i.e. is an open <script> tag. If it is, 1158*83a54b2fSSadaf Ebrahimi // find the corresponding close token, and don't bother to lex in between. 1159*83a54b2fSSadaf Ebrahimi // Input: List<String> 1160*83a54b2fSSadaf Ebrahimi // Output: List<PR_Token> with style in (PR_TAG, PR_PLAIN, PR_SOURCE, null) 1161*83a54b2fSSadaf Ebrahimi // 3) Finally go over each tag token and split out attribute names and values. 1162*83a54b2fSSadaf Ebrahimi // Input: List<PR_Token> 1163*83a54b2fSSadaf Ebrahimi // Output: List<PR_Token> where style in 1164*83a54b2fSSadaf Ebrahimi // (PR_TAG, PR_PLAIN, PR_SOURCE, NAME, VALUE, null) 1165*83a54b2fSSadaf Ebrahimi var tokensOut = PR_tokenizeMarkup(chunks); 1166*83a54b2fSSadaf Ebrahimi tokensOut = PR_splitTagAttributes(tokensOut); 1167*83a54b2fSSadaf Ebrahimi tokensOut = PR_splitSourceNodes(tokensOut); 1168*83a54b2fSSadaf Ebrahimi tokensOut = PR_splitSourceAttributes(tokensOut); 1169*83a54b2fSSadaf Ebrahimi return tokensOut; 1170*83a54b2fSSadaf Ebrahimi} 1171*83a54b2fSSadaf Ebrahimi 1172*83a54b2fSSadaf Ebrahimi/** classify the string as either source or markup and lex appropriately. */ 1173*83a54b2fSSadaf Ebrahimifunction PR_lexOne(s) { 1174*83a54b2fSSadaf Ebrahimi var chunks = PR_chunkify(s); 1175*83a54b2fSSadaf Ebrahimi // treat it as markup if the first non whitespace character is a < and the 1176*83a54b2fSSadaf Ebrahimi // last non-whitespace character is a > 1177*83a54b2fSSadaf Ebrahimi var isMarkup = false; 1178*83a54b2fSSadaf Ebrahimi for (var i = 0; i < chunks.length; ++i) { 1179*83a54b2fSSadaf Ebrahimi if (PR_PLAIN == chunks[i].style) { 1180*83a54b2fSSadaf Ebrahimi if (PR_startsWith(PR_trim(chunks[i].token), '<')) { 1181*83a54b2fSSadaf Ebrahimi for (var j = chunks.length; --j >= 0;) { 1182*83a54b2fSSadaf Ebrahimi if (PR_PLAIN == chunks[j].style) { 1183*83a54b2fSSadaf Ebrahimi isMarkup = PR_endsWith(PR_trim(chunks[j].token), '>'); 1184*83a54b2fSSadaf Ebrahimi break; 1185*83a54b2fSSadaf Ebrahimi } 1186*83a54b2fSSadaf Ebrahimi } 1187*83a54b2fSSadaf Ebrahimi } 1188*83a54b2fSSadaf Ebrahimi break; 1189*83a54b2fSSadaf Ebrahimi } 1190*83a54b2fSSadaf Ebrahimi } 1191*83a54b2fSSadaf Ebrahimi return isMarkup ? PR_lexMarkup(chunks) : PR_lexSource(chunks); 1192*83a54b2fSSadaf Ebrahimi} 1193*83a54b2fSSadaf Ebrahimi 1194*83a54b2fSSadaf Ebrahimi/** pretty print a chunk of code. 1195*83a54b2fSSadaf Ebrahimi * 1196*83a54b2fSSadaf Ebrahimi * @param s code as html 1197*83a54b2fSSadaf Ebrahimi * @return code as html, but prettier 1198*83a54b2fSSadaf Ebrahimi */ 1199*83a54b2fSSadaf Ebrahimifunction prettyPrintOne(s) { 1200*83a54b2fSSadaf Ebrahimi try { 1201*83a54b2fSSadaf Ebrahimi var tokens = PR_lexOne(s); 1202*83a54b2fSSadaf Ebrahimi var out = ''; 1203*83a54b2fSSadaf Ebrahimi var lastStyle = null; 1204*83a54b2fSSadaf Ebrahimi for (var i = 0; i < tokens.length; i++) { 1205*83a54b2fSSadaf Ebrahimi var t = tokens[i]; 1206*83a54b2fSSadaf Ebrahimi if (t.style != lastStyle) { 1207*83a54b2fSSadaf Ebrahimi if (lastStyle != null) { 1208*83a54b2fSSadaf Ebrahimi out += '</span>'; 1209*83a54b2fSSadaf Ebrahimi } 1210*83a54b2fSSadaf Ebrahimi if (t.style != null) { 1211*83a54b2fSSadaf Ebrahimi out += '<span class=' + t.style + '>'; 1212*83a54b2fSSadaf Ebrahimi } 1213*83a54b2fSSadaf Ebrahimi lastStyle = t.style; 1214*83a54b2fSSadaf Ebrahimi } 1215*83a54b2fSSadaf Ebrahimi var html = t.token; 1216*83a54b2fSSadaf Ebrahimi if (null != t.style) { 1217*83a54b2fSSadaf Ebrahimi // This interacts badly with the wiki which introduces paragraph tags 1218*83a54b2fSSadaf Ebrahimi // int pre blocks for some strange reason. 1219*83a54b2fSSadaf Ebrahimi // It's necessary for IE though which seems to lose the preformattedness 1220*83a54b2fSSadaf Ebrahimi // of <pre> tags when their innerHTML is assigned. 1221*83a54b2fSSadaf Ebrahimi html = html.replace(/(?:\r\n?)|\n/g, '<br>').replace(/ /g, ' '); 1222*83a54b2fSSadaf Ebrahimi } 1223*83a54b2fSSadaf Ebrahimi out += html; 1224*83a54b2fSSadaf Ebrahimi } 1225*83a54b2fSSadaf Ebrahimi if (lastStyle != null) { 1226*83a54b2fSSadaf Ebrahimi out += '</span>'; 1227*83a54b2fSSadaf Ebrahimi } 1228*83a54b2fSSadaf Ebrahimi return out; 1229*83a54b2fSSadaf Ebrahimi } catch (e) { 1230*83a54b2fSSadaf Ebrahimi //alert(e.stack); // DISABLE in production 1231*83a54b2fSSadaf Ebrahimi return s; 1232*83a54b2fSSadaf Ebrahimi } 1233*83a54b2fSSadaf Ebrahimi} 1234*83a54b2fSSadaf Ebrahimi 1235*83a54b2fSSadaf Ebrahimi/** find all the < pre > and < code > tags in the DOM with class=prettyprint and 1236*83a54b2fSSadaf Ebrahimi * prettify them. 1237*83a54b2fSSadaf Ebrahimi */ 1238*83a54b2fSSadaf Ebrahimifunction prettyPrint() { 1239*83a54b2fSSadaf Ebrahimi // fetch a list of nodes to rewrite 1240*83a54b2fSSadaf Ebrahimi var codeSegments = [ 1241*83a54b2fSSadaf Ebrahimi document.getElementsByTagName('pre'), 1242*83a54b2fSSadaf Ebrahimi document.getElementsByTagName('code'), 1243*83a54b2fSSadaf Ebrahimi document.getElementsByTagName('xmp') ]; 1244*83a54b2fSSadaf Ebrahimi var elements = []; 1245*83a54b2fSSadaf Ebrahimi for (var i = 0; i < codeSegments.length; ++i) { 1246*83a54b2fSSadaf Ebrahimi for (var j = 0; j < codeSegments[i].length; ++j) { 1247*83a54b2fSSadaf Ebrahimi elements.push(codeSegments[i][j]); 1248*83a54b2fSSadaf Ebrahimi } 1249*83a54b2fSSadaf Ebrahimi } 1250*83a54b2fSSadaf Ebrahimi codeSegments = null; 1251*83a54b2fSSadaf Ebrahimi 1252*83a54b2fSSadaf Ebrahimi // the loop is broken into a series of continuations to make sure that we 1253*83a54b2fSSadaf Ebrahimi // don't make the browser unresponsive when rewriting a large page. 1254*83a54b2fSSadaf Ebrahimi var k = 0; 1255*83a54b2fSSadaf Ebrahimi 1256*83a54b2fSSadaf Ebrahimi function doWork() { 1257*83a54b2fSSadaf Ebrahimi var endTime = new Date().getTime() + 250; 1258*83a54b2fSSadaf Ebrahimi for (; k < elements.length && new Date().getTime() < endTime; k++) { 1259*83a54b2fSSadaf Ebrahimi var cs = elements[k]; 1260*83a54b2fSSadaf Ebrahimi if (cs.className && cs.className.indexOf('prettyprint') >= 0) { 1261*83a54b2fSSadaf Ebrahimi 1262*83a54b2fSSadaf Ebrahimi // make sure this is not nested in an already prettified element 1263*83a54b2fSSadaf Ebrahimi var nested = false; 1264*83a54b2fSSadaf Ebrahimi for (var p = cs.parentNode; p != null; p = p.parentNode) { 1265*83a54b2fSSadaf Ebrahimi if ((p.tagName == 'pre' || p.tagName == 'code' || 1266*83a54b2fSSadaf Ebrahimi p.tagName == 'xmp') && 1267*83a54b2fSSadaf Ebrahimi p.className && p.className.indexOf('prettyprint') >= 0) { 1268*83a54b2fSSadaf Ebrahimi nested = true; 1269*83a54b2fSSadaf Ebrahimi break; 1270*83a54b2fSSadaf Ebrahimi } 1271*83a54b2fSSadaf Ebrahimi } 1272*83a54b2fSSadaf Ebrahimi if (!nested) { 1273*83a54b2fSSadaf Ebrahimi // XMP tags contain unescaped entities so require special handling. 1274*83a54b2fSSadaf Ebrahimi var isRawContent = 'XMP' == cs.tagName; 1275*83a54b2fSSadaf Ebrahimi 1276*83a54b2fSSadaf Ebrahimi // fetch the content as a snippet of properly escaped HTML 1277*83a54b2fSSadaf Ebrahimi var content = cs.innerHTML; 1278*83a54b2fSSadaf Ebrahimi if (isRawContent) { 1279*83a54b2fSSadaf Ebrahimi content = PR_textToHtml(content); 1280*83a54b2fSSadaf Ebrahimi } 1281*83a54b2fSSadaf Ebrahimi 1282*83a54b2fSSadaf Ebrahimi // do the pretty printing 1283*83a54b2fSSadaf Ebrahimi var newContent = prettyPrintOne(content); 1284*83a54b2fSSadaf Ebrahimi 1285*83a54b2fSSadaf Ebrahimi // push the prettified html back into the tag. 1286*83a54b2fSSadaf Ebrahimi if (!isRawContent) { 1287*83a54b2fSSadaf Ebrahimi // just replace the old html with the new 1288*83a54b2fSSadaf Ebrahimi cs.innerHTML = newContent; 1289*83a54b2fSSadaf Ebrahimi } else { 1290*83a54b2fSSadaf Ebrahimi // we need to change the tag to a <pre> since <xmp>s do not allow 1291*83a54b2fSSadaf Ebrahimi // embedded tags such as the span tags used to attach styles to 1292*83a54b2fSSadaf Ebrahimi // sections of source code. 1293*83a54b2fSSadaf Ebrahimi var pre = document.createElement('PRE'); 1294*83a54b2fSSadaf Ebrahimi for (var i = 0; i < cs.attributes.length; ++i) { 1295*83a54b2fSSadaf Ebrahimi var a = cs.attributes[i]; 1296*83a54b2fSSadaf Ebrahimi if (a.specified) { 1297*83a54b2fSSadaf Ebrahimi pre.setAttribute(a.name, a.value); 1298*83a54b2fSSadaf Ebrahimi } 1299*83a54b2fSSadaf Ebrahimi } 1300*83a54b2fSSadaf Ebrahimi pre.innerHTML = newContent; 1301*83a54b2fSSadaf Ebrahimi // remove the old 1302*83a54b2fSSadaf Ebrahimi cs.parentNode.replaceChild(pre, cs); 1303*83a54b2fSSadaf Ebrahimi } 1304*83a54b2fSSadaf Ebrahimi } 1305*83a54b2fSSadaf Ebrahimi } 1306*83a54b2fSSadaf Ebrahimi } 1307*83a54b2fSSadaf Ebrahimi if (k < elements.length) { 1308*83a54b2fSSadaf Ebrahimi // finish up in a continuation 1309*83a54b2fSSadaf Ebrahimi setTimeout(doWork, 250); 1310*83a54b2fSSadaf Ebrahimi } 1311*83a54b2fSSadaf Ebrahimi } 1312*83a54b2fSSadaf Ebrahimi 1313*83a54b2fSSadaf Ebrahimi doWork(); 1314*83a54b2fSSadaf Ebrahimi} 1315