xref: /aosp_15_r20/external/testng/doc/prettify.js (revision 83a54b2f2026052c38867f4cde56111edcd60b56)
1*83a54b2fSSadaf Ebrahimi// Copyright 2005 Google Inc.
2*83a54b2fSSadaf Ebrahimi// All Rights Reserved.
3*83a54b2fSSadaf Ebrahimi//
4*83a54b2fSSadaf Ebrahimi// [email protected]
5*83a54b2fSSadaf Ebrahimi
6*83a54b2fSSadaf Ebrahimi// Usage:
7*83a54b2fSSadaf Ebrahimi// 1) include this source file in an html page via
8*83a54b2fSSadaf Ebrahimi// <script type=text/javascript src=prettify.js></script>
9*83a54b2fSSadaf Ebrahimi// 2) define style rules.  See the example page for examples.
10*83a54b2fSSadaf Ebrahimi// 3) mark the <pre> and <code> tags in your source with class=prettyprint.
11*83a54b2fSSadaf Ebrahimi//    You can also use the (html deprecated) <xmp> tag, but the pretty printer
12*83a54b2fSSadaf Ebrahimi//    needs to do more substantial DOM manipulations to support that, so some
13*83a54b2fSSadaf Ebrahimi//    css styles may not be preserved.
14*83a54b2fSSadaf Ebrahimi
15*83a54b2fSSadaf Ebrahimi// Change log:
16*83a54b2fSSadaf Ebrahimi// cbeust, 2006/08/22
17*83a54b2fSSadaf Ebrahimi//   Java annotations (start with "@") are now captured as literals ("lit")
18*83a54b2fSSadaf Ebrahimi//
19*83a54b2fSSadaf Ebrahimi
20*83a54b2fSSadaf Ebrahimivar PR_keywords = new Object();
21*83a54b2fSSadaf Ebrahimi/** initialize the keyword list for our target languages. */
22*83a54b2fSSadaf Ebrahimi(function () {
23*83a54b2fSSadaf Ebrahimi  var CPP_KEYWORDS = (
24*83a54b2fSSadaf Ebrahimi    "bool break case catch char class const const_cast continue default " +
25*83a54b2fSSadaf Ebrahimi    "delete deprecated dllexport dllimport do double dynamic_cast else enum " +
26*83a54b2fSSadaf Ebrahimi    "explicit extern false float for friend goto if inline int long mutable " +
27*83a54b2fSSadaf Ebrahimi    "naked namespace new noinline noreturn nothrow novtable operator private " +
28*83a54b2fSSadaf Ebrahimi    "property protected public register reinterpret_cast return selectany " +
29*83a54b2fSSadaf Ebrahimi    "short signed sizeof static static_cast struct switch template this " +
30*83a54b2fSSadaf Ebrahimi    "thread throw true try typedef typeid typename union unsigned using " +
31*83a54b2fSSadaf Ebrahimi    "declaration, using directive uuid virtual void volatile while typeof");
32*83a54b2fSSadaf Ebrahimi  var JAVA_KEYWORDS = (
33*83a54b2fSSadaf Ebrahimi    "abstract default goto package synchronized boolean do if private this " +
34*83a54b2fSSadaf Ebrahimi    "break double implements protected throw byte else import public throws " +
35*83a54b2fSSadaf Ebrahimi    "case enum instanceof return transient catch extends int short try char " +
36*83a54b2fSSadaf Ebrahimi    "final interface static void class finally long strictfp volatile const " +
37*83a54b2fSSadaf Ebrahimi    "float native super while continue for new switch");
38*83a54b2fSSadaf Ebrahimi  var PYTHON_KEYWORDS = (
39*83a54b2fSSadaf Ebrahimi    "and assert break class continue def del elif else except exec finally " +
40*83a54b2fSSadaf Ebrahimi    "for from global if import in is lambda not or pass print raise return " +
41*83a54b2fSSadaf Ebrahimi    "try while yield");
42*83a54b2fSSadaf Ebrahimi  var JSCRIPT_KEYWORDS = (
43*83a54b2fSSadaf Ebrahimi    "abstract boolean break byte case catch char class const continue " +
44*83a54b2fSSadaf Ebrahimi    "debugger default delete do double else enum export extends false final " +
45*83a54b2fSSadaf Ebrahimi    "finally float for function goto if implements import in instanceof int " +
46*83a54b2fSSadaf Ebrahimi    "interface long native new null package private protected public return " +
47*83a54b2fSSadaf Ebrahimi    "short static super switch synchronized this throw throws transient " +
48*83a54b2fSSadaf Ebrahimi    "true try typeof var void volatile while with NaN Infinity");
49*83a54b2fSSadaf Ebrahimi  var PERL_KEYWORDS = (
50*83a54b2fSSadaf Ebrahimi    "foreach require sub unless until use elsif BEGIN END");
51*83a54b2fSSadaf Ebrahimi  var SH_KEYWORDS = (
52*83a54b2fSSadaf Ebrahimi    "if then do else fi end");
53*83a54b2fSSadaf Ebrahimi  var KEYWORDS = [CPP_KEYWORDS, JAVA_KEYWORDS, PYTHON_KEYWORDS,
54*83a54b2fSSadaf Ebrahimi                  JSCRIPT_KEYWORDS, PERL_KEYWORDS, SH_KEYWORDS];
55*83a54b2fSSadaf Ebrahimi  for (var k = 0; k < KEYWORDS.length; k++) {
56*83a54b2fSSadaf Ebrahimi    var kw = KEYWORDS[k].split(' ');
57*83a54b2fSSadaf Ebrahimi    for (var i = 0; i < kw.length; i++) {
58*83a54b2fSSadaf Ebrahimi      if (kw[i]) { PR_keywords[kw[i]] = true; }
59*83a54b2fSSadaf Ebrahimi    }
60*83a54b2fSSadaf Ebrahimi  }
61*83a54b2fSSadaf Ebrahimi}).call(this);
62*83a54b2fSSadaf Ebrahimi
63*83a54b2fSSadaf Ebrahimi// token style names.  correspond to css classes
64*83a54b2fSSadaf Ebrahimi/** token style for a string literal */
65*83a54b2fSSadaf Ebrahimivar PR_STRING = 'str';
66*83a54b2fSSadaf Ebrahimi/** token style for a keyword */
67*83a54b2fSSadaf Ebrahimivar PR_KEYWORD = 'kwd';
68*83a54b2fSSadaf Ebrahimi/** token style for a comment */
69*83a54b2fSSadaf Ebrahimivar PR_COMMENT = 'com';
70*83a54b2fSSadaf Ebrahimi/** token style for a type */
71*83a54b2fSSadaf Ebrahimivar PR_TYPE = 'typ';
72*83a54b2fSSadaf Ebrahimi/** token style for a literal value.  e.g. 1, null, true. */
73*83a54b2fSSadaf Ebrahimivar PR_LITERAL = 'lit';
74*83a54b2fSSadaf Ebrahimi/** token style for a punctuation string. */
75*83a54b2fSSadaf Ebrahimivar PR_PUNCTUATION = 'pun';
76*83a54b2fSSadaf Ebrahimi/** token style for a punctuation string. */
77*83a54b2fSSadaf Ebrahimivar PR_PLAIN = 'pln';
78*83a54b2fSSadaf Ebrahimi
79*83a54b2fSSadaf Ebrahimi/** token style for an sgml tag. */
80*83a54b2fSSadaf Ebrahimivar PR_TAG = 'tag';
81*83a54b2fSSadaf Ebrahimi/** token style for a markup declaration such as a DOCTYPE. */
82*83a54b2fSSadaf Ebrahimivar PR_DECLARATION = 'dec';
83*83a54b2fSSadaf Ebrahimi/** token style for embedded source. */
84*83a54b2fSSadaf Ebrahimivar PR_SOURCE = 'src';
85*83a54b2fSSadaf Ebrahimi/** token style for an sgml attribute name. */
86*83a54b2fSSadaf Ebrahimivar PR_ATTRIB_NAME = 'atn';
87*83a54b2fSSadaf Ebrahimi/** token style for an sgml attribute value. */
88*83a54b2fSSadaf Ebrahimivar PR_ATTRIB_VALUE = 'atv';
89*83a54b2fSSadaf Ebrahimi
90*83a54b2fSSadaf Ebrahimi/** the position of the end of a token during.  A division of a string into
91*83a54b2fSSadaf Ebrahimi  * n tokens can be represented as a series n - 1 token ends, as long as
92*83a54b2fSSadaf Ebrahimi  * runs of whitespace warrant their own token.
93*83a54b2fSSadaf Ebrahimi  * @private
94*83a54b2fSSadaf Ebrahimi  */
95*83a54b2fSSadaf Ebrahimifunction PR_TokenEnd(end, style) {
96*83a54b2fSSadaf Ebrahimi  if (undefined === style) { throw new Error('BAD'); }
97*83a54b2fSSadaf Ebrahimi  if ('number' != typeof(end)) { throw new Error('BAD'); }
98*83a54b2fSSadaf Ebrahimi  this.end = end;
99*83a54b2fSSadaf Ebrahimi  this.style = style;
100*83a54b2fSSadaf Ebrahimi}
101*83a54b2fSSadaf EbrahimiPR_TokenEnd.prototype.toString = function () {
102*83a54b2fSSadaf Ebrahimi  return '[PR_TokenEnd ' + this.end +
103*83a54b2fSSadaf Ebrahimi    (this.style ? ':' + this.style : '') + ']';
104*83a54b2fSSadaf Ebrahimi};
105*83a54b2fSSadaf Ebrahimi
106*83a54b2fSSadaf Ebrahimi
107*83a54b2fSSadaf Ebrahimi/** a chunk of text with a style.  These are used to represent both the output
108*83a54b2fSSadaf Ebrahimi  * from the lexing functions as well as intermediate results.
109*83a54b2fSSadaf Ebrahimi  * @constructor
110*83a54b2fSSadaf Ebrahimi  * @param token the token text
111*83a54b2fSSadaf Ebrahimi  * @param style one of the token styles defined in designdoc-template, or null
112*83a54b2fSSadaf Ebrahimi  *   for a styleless token, such as an embedded html tag.
113*83a54b2fSSadaf Ebrahimi  * @private
114*83a54b2fSSadaf Ebrahimi  */
115*83a54b2fSSadaf Ebrahimifunction PR_Token(token, style) {
116*83a54b2fSSadaf Ebrahimi  if (undefined === style) { throw new Error('BAD'); }
117*83a54b2fSSadaf Ebrahimi  this.token = token;
118*83a54b2fSSadaf Ebrahimi  this.style = style;
119*83a54b2fSSadaf Ebrahimi}
120*83a54b2fSSadaf Ebrahimi
121*83a54b2fSSadaf EbrahimiPR_Token.prototype.toString = function () {
122*83a54b2fSSadaf Ebrahimi  return '[PR_Token ' + this.token + (this.style ? ':' + this.style : '') + ']';
123*83a54b2fSSadaf Ebrahimi};
124*83a54b2fSSadaf Ebrahimi
125*83a54b2fSSadaf Ebrahimi
126*83a54b2fSSadaf Ebrahimi/** a helper class that decodes common html entities used to escape source and
127*83a54b2fSSadaf Ebrahimi  * markup punctuation characters in html.
128*83a54b2fSSadaf Ebrahimi  * @constructor
129*83a54b2fSSadaf Ebrahimi  * @private
130*83a54b2fSSadaf Ebrahimi  */
131*83a54b2fSSadaf Ebrahimifunction PR_DecodeHelper() {
132*83a54b2fSSadaf Ebrahimi  this.next = 0;
133*83a54b2fSSadaf Ebrahimi  this.ch = '\0';
134*83a54b2fSSadaf Ebrahimi}
135*83a54b2fSSadaf Ebrahimi
136*83a54b2fSSadaf EbrahimiPR_DecodeHelper.prototype.decode = function (s, i) {
137*83a54b2fSSadaf Ebrahimi  var next = i + 1;
138*83a54b2fSSadaf Ebrahimi  var ch = s.charAt(i);
139*83a54b2fSSadaf Ebrahimi  if ('&' == ch) {
140*83a54b2fSSadaf Ebrahimi    var semi = s.indexOf(';', next);
141*83a54b2fSSadaf Ebrahimi    if (semi >= 0 && semi < next + 4) {
142*83a54b2fSSadaf Ebrahimi      var entityName = s.substring(next, semi).toLowerCase();
143*83a54b2fSSadaf Ebrahimi      next = semi + 1;
144*83a54b2fSSadaf Ebrahimi      if ('lt' == entityName) {
145*83a54b2fSSadaf Ebrahimi        ch = '<';
146*83a54b2fSSadaf Ebrahimi      } else if ('gt' == entityName) {
147*83a54b2fSSadaf Ebrahimi        ch = '>';
148*83a54b2fSSadaf Ebrahimi      } else if ('quot' == entityName) {
149*83a54b2fSSadaf Ebrahimi        ch = '"';
150*83a54b2fSSadaf Ebrahimi      } else if ('apos' == entityName) {
151*83a54b2fSSadaf Ebrahimi        ch = '\'';
152*83a54b2fSSadaf Ebrahimi      } else if ('amp' == entityName) {
153*83a54b2fSSadaf Ebrahimi        ch = '&';
154*83a54b2fSSadaf Ebrahimi      } else {
155*83a54b2fSSadaf Ebrahimi        next = i + 1;
156*83a54b2fSSadaf Ebrahimi      }
157*83a54b2fSSadaf Ebrahimi    }
158*83a54b2fSSadaf Ebrahimi  }
159*83a54b2fSSadaf Ebrahimi  this.next = next;
160*83a54b2fSSadaf Ebrahimi  this.ch = ch;
161*83a54b2fSSadaf Ebrahimi  return this.ch;
162*83a54b2fSSadaf Ebrahimi}
163*83a54b2fSSadaf Ebrahimi
164*83a54b2fSSadaf Ebrahimi
165*83a54b2fSSadaf Ebrahimi// some string utilities
166*83a54b2fSSadaf Ebrahimifunction PR_isWordChar(ch) {
167*83a54b2fSSadaf Ebrahimi  return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
168*83a54b2fSSadaf Ebrahimi}
169*83a54b2fSSadaf Ebrahimi
170*83a54b2fSSadaf Ebrahimifunction PR_isIdentifierStart(ch) {
171*83a54b2fSSadaf Ebrahimi  return PR_isWordChar(ch) || ch == '_' || ch == '$' || ch == '@';
172*83a54b2fSSadaf Ebrahimi}
173*83a54b2fSSadaf Ebrahimi
174*83a54b2fSSadaf Ebrahimifunction PR_isIdentifierPart(ch) {
175*83a54b2fSSadaf Ebrahimi  return PR_isIdentifierStart(ch) || PR_isDigitChar(ch);
176*83a54b2fSSadaf Ebrahimi}
177*83a54b2fSSadaf Ebrahimi
178*83a54b2fSSadaf Ebrahimifunction PR_isSpaceChar(ch) {
179*83a54b2fSSadaf Ebrahimi  return "\t \r\n".indexOf(ch) >= 0;
180*83a54b2fSSadaf Ebrahimi}
181*83a54b2fSSadaf Ebrahimi
182*83a54b2fSSadaf Ebrahimifunction PR_isDigitChar(ch) {
183*83a54b2fSSadaf Ebrahimi  return ch >= '0' && ch <= '9';
184*83a54b2fSSadaf Ebrahimi}
185*83a54b2fSSadaf Ebrahimi
186*83a54b2fSSadaf Ebrahimifunction PR_trim(s) {
187*83a54b2fSSadaf Ebrahimi  var i = 0, j = s.length - 1;
188*83a54b2fSSadaf Ebrahimi  while (i <= j && PR_isSpaceChar(s.charAt(i))) { ++i; }
189*83a54b2fSSadaf Ebrahimi  while (j > i && PR_isSpaceChar(s.charAt(j))) { --j; }
190*83a54b2fSSadaf Ebrahimi  return s.substring(i, j + 1);
191*83a54b2fSSadaf Ebrahimi}
192*83a54b2fSSadaf Ebrahimi
193*83a54b2fSSadaf Ebrahimifunction PR_startsWith(s, prefix) {
194*83a54b2fSSadaf Ebrahimi  return s.length >= prefix.length && prefix == s.substring(0, prefix.length);
195*83a54b2fSSadaf Ebrahimi}
196*83a54b2fSSadaf Ebrahimi
197*83a54b2fSSadaf Ebrahimifunction PR_endsWith(s, suffix) {
198*83a54b2fSSadaf Ebrahimi  return s.length >= suffix.length &&
199*83a54b2fSSadaf Ebrahimi         suffix == s.substring(s.length - suffix.length, s.length);
200*83a54b2fSSadaf Ebrahimi}
201*83a54b2fSSadaf Ebrahimi
202*83a54b2fSSadaf Ebrahimi/** true iff prefix matches the first prefix characters in chars[0:len].
203*83a54b2fSSadaf Ebrahimi  * @private
204*83a54b2fSSadaf Ebrahimi  */
205*83a54b2fSSadaf Ebrahimifunction PR_prefixMatch(chars, len, prefix) {
206*83a54b2fSSadaf Ebrahimi  if (len < prefix.length) { return false; }
207*83a54b2fSSadaf Ebrahimi  for (var i = 0, n = prefix.length; i < n; ++i) {
208*83a54b2fSSadaf Ebrahimi    if (prefix.charAt(i) != chars[i]) { return false; }
209*83a54b2fSSadaf Ebrahimi  }
210*83a54b2fSSadaf Ebrahimi  return true;
211*83a54b2fSSadaf Ebrahimi}
212*83a54b2fSSadaf Ebrahimi
213*83a54b2fSSadaf Ebrahimi/** used to convert html special characters embedded in XMP tags into html. */
214*83a54b2fSSadaf Ebrahimifunction PR_textToHtml(str) {
215*83a54b2fSSadaf Ebrahimi  return str.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
216*83a54b2fSSadaf Ebrahimi}
217*83a54b2fSSadaf Ebrahimi
218*83a54b2fSSadaf Ebrahimi
219*83a54b2fSSadaf Ebrahimi/** split markup into chunks of html tags (style null) and
220*83a54b2fSSadaf Ebrahimi  * plain text (style {@link #PR_PLAIN}).
221*83a54b2fSSadaf Ebrahimi  *
222*83a54b2fSSadaf Ebrahimi  * @param s a String of html.
223*83a54b2fSSadaf Ebrahimi  * @return an Array of PR_Tokens of style PR_PLAIN and null.
224*83a54b2fSSadaf Ebrahimi  * @private
225*83a54b2fSSadaf Ebrahimi  */
226*83a54b2fSSadaf Ebrahimifunction PR_chunkify(s) {
227*83a54b2fSSadaf Ebrahimi  var chunks = new Array();
228*83a54b2fSSadaf Ebrahimi  var state = 0;
229*83a54b2fSSadaf Ebrahimi  var start = 0;
230*83a54b2fSSadaf Ebrahimi  var pos = -1;
231*83a54b2fSSadaf Ebrahimi  for (var i = 0, n = s.length; i < n; ++i) {
232*83a54b2fSSadaf Ebrahimi    var ch = s.charAt(i);
233*83a54b2fSSadaf Ebrahimi    switch (state) {
234*83a54b2fSSadaf Ebrahimi      case 0:
235*83a54b2fSSadaf Ebrahimi        if ('<' == ch) { state = 1; }
236*83a54b2fSSadaf Ebrahimi        break;
237*83a54b2fSSadaf Ebrahimi      case 1:
238*83a54b2fSSadaf Ebrahimi        pos = i - 1;
239*83a54b2fSSadaf Ebrahimi        if ('/' == ch) { state = 2; }
240*83a54b2fSSadaf Ebrahimi        else if (PR_isWordChar(ch)) { state = 3; }
241*83a54b2fSSadaf Ebrahimi        else if ('<' == ch) { state = 1; }
242*83a54b2fSSadaf Ebrahimi        else { state = 0; }
243*83a54b2fSSadaf Ebrahimi        break;
244*83a54b2fSSadaf Ebrahimi      case 2:
245*83a54b2fSSadaf Ebrahimi        if (PR_isWordChar(ch)) { state = 3; }
246*83a54b2fSSadaf Ebrahimi        else if ('<' == ch) { state = 1; }
247*83a54b2fSSadaf Ebrahimi        else { state = 0; }
248*83a54b2fSSadaf Ebrahimi        break;
249*83a54b2fSSadaf Ebrahimi      case 3:
250*83a54b2fSSadaf Ebrahimi        if ('>' == ch) {
251*83a54b2fSSadaf Ebrahimi          if (pos > start) {
252*83a54b2fSSadaf Ebrahimi            chunks.push(new PR_Token(s.substring(start, pos), PR_PLAIN));
253*83a54b2fSSadaf Ebrahimi          }
254*83a54b2fSSadaf Ebrahimi          chunks.push(new PR_Token(s.substring(pos, i + 1), null));
255*83a54b2fSSadaf Ebrahimi          start = i + 1;
256*83a54b2fSSadaf Ebrahimi          pos = -1;
257*83a54b2fSSadaf Ebrahimi          state = 0;
258*83a54b2fSSadaf Ebrahimi        }
259*83a54b2fSSadaf Ebrahimi        break;
260*83a54b2fSSadaf Ebrahimi    }
261*83a54b2fSSadaf Ebrahimi  }
262*83a54b2fSSadaf Ebrahimi  if (s.length > start) {
263*83a54b2fSSadaf Ebrahimi    chunks.push(new PR_Token(s.substring(start, s.length), PR_PLAIN));
264*83a54b2fSSadaf Ebrahimi  }
265*83a54b2fSSadaf Ebrahimi  return chunks;
266*83a54b2fSSadaf Ebrahimi}
267*83a54b2fSSadaf Ebrahimi
268*83a54b2fSSadaf Ebrahimi/** splits chunks around entities.
269*83a54b2fSSadaf Ebrahimi  * @private
270*83a54b2fSSadaf Ebrahimi  */
271*83a54b2fSSadaf Ebrahimifunction PR_splitEntities(chunks) {
272*83a54b2fSSadaf Ebrahimi  var chunksOut = new Array();
273*83a54b2fSSadaf Ebrahimi  var state = 0;
274*83a54b2fSSadaf Ebrahimi  for (var ci = 0, nc = chunks.length; ci < nc; ++ci) {
275*83a54b2fSSadaf Ebrahimi    var chunk = chunks[ci];
276*83a54b2fSSadaf Ebrahimi    if (PR_PLAIN != chunk.style) {
277*83a54b2fSSadaf Ebrahimi      chunksOut.push(chunk);
278*83a54b2fSSadaf Ebrahimi      continue;
279*83a54b2fSSadaf Ebrahimi    }
280*83a54b2fSSadaf Ebrahimi    var s = chunk.token;
281*83a54b2fSSadaf Ebrahimi    var pos = 0;
282*83a54b2fSSadaf Ebrahimi    var start;
283*83a54b2fSSadaf Ebrahimi    for (var i = 0; i < s.length; ++i) {
284*83a54b2fSSadaf Ebrahimi      var ch = s.charAt(i);
285*83a54b2fSSadaf Ebrahimi      switch (state) {
286*83a54b2fSSadaf Ebrahimi        case 0:
287*83a54b2fSSadaf Ebrahimi          if ('&' == ch) { state = 1; }
288*83a54b2fSSadaf Ebrahimi          break;
289*83a54b2fSSadaf Ebrahimi        case 1:
290*83a54b2fSSadaf Ebrahimi          if ('#' == ch || PR_isWordChar(ch)) {
291*83a54b2fSSadaf Ebrahimi            start = i - 1;
292*83a54b2fSSadaf Ebrahimi            state = 2;
293*83a54b2fSSadaf Ebrahimi          } else {
294*83a54b2fSSadaf Ebrahimi            state = 0;
295*83a54b2fSSadaf Ebrahimi          }
296*83a54b2fSSadaf Ebrahimi          break;
297*83a54b2fSSadaf Ebrahimi        case 2:
298*83a54b2fSSadaf Ebrahimi          if (';' == ch) {
299*83a54b2fSSadaf Ebrahimi            if (start > pos) {
300*83a54b2fSSadaf Ebrahimi              chunksOut.push(
301*83a54b2fSSadaf Ebrahimi                  new PR_Token(s.substring(pos, start), chunk.style));
302*83a54b2fSSadaf Ebrahimi            }
303*83a54b2fSSadaf Ebrahimi            chunksOut.push(new PR_Token(s.substring(start, i + 1), null));
304*83a54b2fSSadaf Ebrahimi            pos = i + 1;
305*83a54b2fSSadaf Ebrahimi            state = 0;
306*83a54b2fSSadaf Ebrahimi          }
307*83a54b2fSSadaf Ebrahimi          break;
308*83a54b2fSSadaf Ebrahimi      }
309*83a54b2fSSadaf Ebrahimi    }
310*83a54b2fSSadaf Ebrahimi    if (s.length > pos) {
311*83a54b2fSSadaf Ebrahimi      chunksOut.push(pos ?
312*83a54b2fSSadaf Ebrahimi                     new PR_Token(s.substring(pos, s.length), chunk.style) :
313*83a54b2fSSadaf Ebrahimi                     chunk);
314*83a54b2fSSadaf Ebrahimi    }
315*83a54b2fSSadaf Ebrahimi  }
316*83a54b2fSSadaf Ebrahimi  return chunksOut;
317*83a54b2fSSadaf Ebrahimi}
318*83a54b2fSSadaf Ebrahimi
319*83a54b2fSSadaf Ebrahimi/** walk the tokenEnds list and the chunk list in parallel to generate a list
320*83a54b2fSSadaf Ebrahimi  * of split tokens.
321*83a54b2fSSadaf Ebrahimi  * @private
322*83a54b2fSSadaf Ebrahimi  */
323*83a54b2fSSadaf Ebrahimifunction PR_splitChunks(chunks, tokenEnds) {
324*83a54b2fSSadaf Ebrahimi  var tokens = new Array();  // the output
325*83a54b2fSSadaf Ebrahimi
326*83a54b2fSSadaf Ebrahimi  var ci = 0;  // index into chunks
327*83a54b2fSSadaf Ebrahimi  // position of beginning of amount written so far in absolute space.
328*83a54b2fSSadaf Ebrahimi  var posAbs = 0;
329*83a54b2fSSadaf Ebrahimi  // position of amount written so far in chunk space
330*83a54b2fSSadaf Ebrahimi  var posChunk = 0;
331*83a54b2fSSadaf Ebrahimi
332*83a54b2fSSadaf Ebrahimi  // current chunk
333*83a54b2fSSadaf Ebrahimi  var chunk = new PR_Token('', null);
334*83a54b2fSSadaf Ebrahimi
335*83a54b2fSSadaf Ebrahimi  for (var ei = 0, ne = tokenEnds.length; ei < ne; ++ei) {
336*83a54b2fSSadaf Ebrahimi    var tokenEnd = tokenEnds[ei];
337*83a54b2fSSadaf Ebrahimi    var end = tokenEnd.end;
338*83a54b2fSSadaf Ebrahimi
339*83a54b2fSSadaf Ebrahimi    var tokLen = end - posAbs;
340*83a54b2fSSadaf Ebrahimi    var remainingInChunk = chunk.token.length - posChunk;
341*83a54b2fSSadaf Ebrahimi    while (remainingInChunk <= tokLen) {
342*83a54b2fSSadaf Ebrahimi      if (remainingInChunk > 0) {
343*83a54b2fSSadaf Ebrahimi        tokens.push(
344*83a54b2fSSadaf Ebrahimi            new PR_Token(chunk.token.substring(posChunk, chunk.token.length),
345*83a54b2fSSadaf Ebrahimi                         null == chunk.style ? null : tokenEnd.style));
346*83a54b2fSSadaf Ebrahimi      }
347*83a54b2fSSadaf Ebrahimi      posAbs += remainingInChunk;
348*83a54b2fSSadaf Ebrahimi      posChunk = 0;
349*83a54b2fSSadaf Ebrahimi      if (ci < chunks.length) { chunk = chunks[ci++]; }
350*83a54b2fSSadaf Ebrahimi
351*83a54b2fSSadaf Ebrahimi      tokLen = end - posAbs;
352*83a54b2fSSadaf Ebrahimi      remainingInChunk = chunk.token.length - posChunk;
353*83a54b2fSSadaf Ebrahimi    }
354*83a54b2fSSadaf Ebrahimi
355*83a54b2fSSadaf Ebrahimi    if (tokLen) {
356*83a54b2fSSadaf Ebrahimi      tokens.push(
357*83a54b2fSSadaf Ebrahimi          new PR_Token(chunk.token.substring(posChunk, posChunk + tokLen),
358*83a54b2fSSadaf Ebrahimi                       tokenEnd.style));
359*83a54b2fSSadaf Ebrahimi      posAbs += tokLen;
360*83a54b2fSSadaf Ebrahimi      posChunk += tokLen;
361*83a54b2fSSadaf Ebrahimi    }
362*83a54b2fSSadaf Ebrahimi  }
363*83a54b2fSSadaf Ebrahimi
364*83a54b2fSSadaf Ebrahimi  return tokens;
365*83a54b2fSSadaf Ebrahimi}
366*83a54b2fSSadaf Ebrahimi
367*83a54b2fSSadaf Ebrahimi/** splits markup tokens into declarations, tags, and source chunks.
368*83a54b2fSSadaf Ebrahimi  * @private
369*83a54b2fSSadaf Ebrahimi  */
370*83a54b2fSSadaf Ebrahimifunction PR_splitMarkup(chunks) {
371*83a54b2fSSadaf Ebrahimi  // A state machine to split out declarations, tags, etc.
372*83a54b2fSSadaf Ebrahimi  // This state machine deals with absolute space in the text, indexed by k,
373*83a54b2fSSadaf Ebrahimi  // and position in the current chunk, indexed by pos and tokenStart to
374*83a54b2fSSadaf Ebrahimi  // generate a list of the ends of tokens.
375*83a54b2fSSadaf Ebrahimi  // Absolute space is calculated by considering the chunks as appended into
376*83a54b2fSSadaf Ebrahimi  // one big string, as they were before being split.
377*83a54b2fSSadaf Ebrahimi
378*83a54b2fSSadaf Ebrahimi  // Known failure cases
379*83a54b2fSSadaf Ebrahimi  // Server side scripting sections such as <?...?> in attributes.
380*83a54b2fSSadaf Ebrahimi  // i.e. <span class="<? foo ?>">
381*83a54b2fSSadaf Ebrahimi  // Handling this would require a stack, and we don't use PHP.
382*83a54b2fSSadaf Ebrahimi
383*83a54b2fSSadaf Ebrahimi  // The output: a list of pairs of PR_TokenEnd instances
384*83a54b2fSSadaf Ebrahimi  var tokenEnds = new Array();
385*83a54b2fSSadaf Ebrahimi
386*83a54b2fSSadaf Ebrahimi  var state = 0;  // FSM state variable
387*83a54b2fSSadaf Ebrahimi  var k = 0;  // position in absolute space of the start of the current chunk
388*83a54b2fSSadaf Ebrahimi  var tokenStart = -1;  // the start of the current token
389*83a54b2fSSadaf Ebrahimi
390*83a54b2fSSadaf Ebrahimi  // Try to find a closing tag for any open <style> or <script> tags
391*83a54b2fSSadaf Ebrahimi  // We can't do this at a later stage because then the following case
392*83a54b2fSSadaf Ebrahimi  // would fail:
393*83a54b2fSSadaf Ebrahimi  // <script>document.writeln('<!--');</script>
394*83a54b2fSSadaf Ebrahimi
395*83a54b2fSSadaf Ebrahimi  // We use tokenChars[:tokenCharsI] to accumulate the tag name so that we
396*83a54b2fSSadaf Ebrahimi  // can check whether to enter into a no scripting section when the tag ends.
397*83a54b2fSSadaf Ebrahimi  var tokenChars = new Array(12);
398*83a54b2fSSadaf Ebrahimi  var tokenCharsI = 0;
399*83a54b2fSSadaf Ebrahimi  // if non null, the tag prefix that we need to see to break out.
400*83a54b2fSSadaf Ebrahimi  var endScriptTag = null;
401*83a54b2fSSadaf Ebrahimi  var decodeHelper = new PR_DecodeHelper();
402*83a54b2fSSadaf Ebrahimi
403*83a54b2fSSadaf Ebrahimi  for (var ci = 0, nc = chunks.length; ci < nc; ++ci) {
404*83a54b2fSSadaf Ebrahimi    var chunk = chunks[ci];
405*83a54b2fSSadaf Ebrahimi    if (PR_PLAIN != chunk.style) {
406*83a54b2fSSadaf Ebrahimi      k += chunk.token.length;
407*83a54b2fSSadaf Ebrahimi      continue;
408*83a54b2fSSadaf Ebrahimi    }
409*83a54b2fSSadaf Ebrahimi
410*83a54b2fSSadaf Ebrahimi    var s = chunk.token;
411*83a54b2fSSadaf Ebrahimi    var pos = 0;  // the position past the last character processed so far in s
412*83a54b2fSSadaf Ebrahimi
413*83a54b2fSSadaf Ebrahimi    for (var i = 0, n = s.length; i < n; /* i = next at bottom */) {
414*83a54b2fSSadaf Ebrahimi      decodeHelper.decode(s, i);
415*83a54b2fSSadaf Ebrahimi      var ch = decodeHelper.ch;
416*83a54b2fSSadaf Ebrahimi      var next = decodeHelper.next;
417*83a54b2fSSadaf Ebrahimi
418*83a54b2fSSadaf Ebrahimi      var tokenStyle = null;
419*83a54b2fSSadaf Ebrahimi      switch (state) {
420*83a54b2fSSadaf Ebrahimi        case 0:
421*83a54b2fSSadaf Ebrahimi          if ('<' == ch) { state = 1; }
422*83a54b2fSSadaf Ebrahimi          break;
423*83a54b2fSSadaf Ebrahimi        case 1:
424*83a54b2fSSadaf Ebrahimi          tokenCharsI = 0;
425*83a54b2fSSadaf Ebrahimi          if ('/' == ch) {  // only consider close tags if we're in script/style
426*83a54b2fSSadaf Ebrahimi            state = 7;
427*83a54b2fSSadaf Ebrahimi          } else if (null == endScriptTag) {
428*83a54b2fSSadaf Ebrahimi            if ('!' == ch) {
429*83a54b2fSSadaf Ebrahimi              state = 2;
430*83a54b2fSSadaf Ebrahimi            } else if (PR_isWordChar(ch)) {
431*83a54b2fSSadaf Ebrahimi              state = 8;
432*83a54b2fSSadaf Ebrahimi            } else if ('?' == ch) {
433*83a54b2fSSadaf Ebrahimi              state = 9;
434*83a54b2fSSadaf Ebrahimi            } else if ('%' == ch) {
435*83a54b2fSSadaf Ebrahimi              state = 11;
436*83a54b2fSSadaf Ebrahimi            } else if ('<' != ch) {
437*83a54b2fSSadaf Ebrahimi              state = 0;
438*83a54b2fSSadaf Ebrahimi            }
439*83a54b2fSSadaf Ebrahimi          } else if ('<' != ch) {
440*83a54b2fSSadaf Ebrahimi            state = 0;
441*83a54b2fSSadaf Ebrahimi          }
442*83a54b2fSSadaf Ebrahimi          break;
443*83a54b2fSSadaf Ebrahimi        case 2:
444*83a54b2fSSadaf Ebrahimi          if ('-' == ch) {
445*83a54b2fSSadaf Ebrahimi            state = 4;
446*83a54b2fSSadaf Ebrahimi          } else if (PR_isWordChar(ch)) {
447*83a54b2fSSadaf Ebrahimi            state = 3;
448*83a54b2fSSadaf Ebrahimi          } else if ('<' == ch) {
449*83a54b2fSSadaf Ebrahimi            state = 1;
450*83a54b2fSSadaf Ebrahimi          } else {
451*83a54b2fSSadaf Ebrahimi            state = 0;
452*83a54b2fSSadaf Ebrahimi          }
453*83a54b2fSSadaf Ebrahimi          break;
454*83a54b2fSSadaf Ebrahimi        case 3:
455*83a54b2fSSadaf Ebrahimi          if ('>' == ch) {
456*83a54b2fSSadaf Ebrahimi            state = 0;
457*83a54b2fSSadaf Ebrahimi            tokenStyle = PR_DECLARATION;
458*83a54b2fSSadaf Ebrahimi          }
459*83a54b2fSSadaf Ebrahimi          break;
460*83a54b2fSSadaf Ebrahimi        case 4:
461*83a54b2fSSadaf Ebrahimi          if ('-' == ch) { state = 5; }
462*83a54b2fSSadaf Ebrahimi          break;
463*83a54b2fSSadaf Ebrahimi        case 5:
464*83a54b2fSSadaf Ebrahimi          if ('-' == ch) { state = 6; }
465*83a54b2fSSadaf Ebrahimi          break;
466*83a54b2fSSadaf Ebrahimi        case 6:
467*83a54b2fSSadaf Ebrahimi          if ('>' == ch) {
468*83a54b2fSSadaf Ebrahimi            state = 0;
469*83a54b2fSSadaf Ebrahimi            tokenStyle = PR_COMMENT;
470*83a54b2fSSadaf Ebrahimi          } else if ('-' == ch) {
471*83a54b2fSSadaf Ebrahimi            state = 6;
472*83a54b2fSSadaf Ebrahimi          } else {
473*83a54b2fSSadaf Ebrahimi            state = 4;
474*83a54b2fSSadaf Ebrahimi          }
475*83a54b2fSSadaf Ebrahimi          break;
476*83a54b2fSSadaf Ebrahimi        case 7:
477*83a54b2fSSadaf Ebrahimi          if (PR_isWordChar(ch)) {
478*83a54b2fSSadaf Ebrahimi            state = 8;
479*83a54b2fSSadaf Ebrahimi          } else if ('<' == ch) {
480*83a54b2fSSadaf Ebrahimi            state = 1;
481*83a54b2fSSadaf Ebrahimi          } else {
482*83a54b2fSSadaf Ebrahimi            state = 0;
483*83a54b2fSSadaf Ebrahimi          }
484*83a54b2fSSadaf Ebrahimi          break;
485*83a54b2fSSadaf Ebrahimi        case 8:
486*83a54b2fSSadaf Ebrahimi          if ('>' == ch) {
487*83a54b2fSSadaf Ebrahimi            state = 0;
488*83a54b2fSSadaf Ebrahimi            tokenStyle = PR_TAG;
489*83a54b2fSSadaf Ebrahimi          }
490*83a54b2fSSadaf Ebrahimi          break;
491*83a54b2fSSadaf Ebrahimi        case 9:
492*83a54b2fSSadaf Ebrahimi          if ('?' == ch) { state = 10; }
493*83a54b2fSSadaf Ebrahimi          break;
494*83a54b2fSSadaf Ebrahimi        case 10:
495*83a54b2fSSadaf Ebrahimi          if ('>' == ch) {
496*83a54b2fSSadaf Ebrahimi            state = 0;
497*83a54b2fSSadaf Ebrahimi            tokenStyle = PR_SOURCE;
498*83a54b2fSSadaf Ebrahimi          } else if ('?' != ch) {
499*83a54b2fSSadaf Ebrahimi            state = 9;
500*83a54b2fSSadaf Ebrahimi          }
501*83a54b2fSSadaf Ebrahimi          break;
502*83a54b2fSSadaf Ebrahimi        case 11:
503*83a54b2fSSadaf Ebrahimi          if ('%' == ch) { state = 12; }
504*83a54b2fSSadaf Ebrahimi          break;
505*83a54b2fSSadaf Ebrahimi        case 12:
506*83a54b2fSSadaf Ebrahimi          if ('>' == ch) {
507*83a54b2fSSadaf Ebrahimi            state = 0;
508*83a54b2fSSadaf Ebrahimi            tokenStyle = PR_SOURCE;
509*83a54b2fSSadaf Ebrahimi          } else if ('%' != ch) {
510*83a54b2fSSadaf Ebrahimi            state = 11;
511*83a54b2fSSadaf Ebrahimi          }
512*83a54b2fSSadaf Ebrahimi          break;
513*83a54b2fSSadaf Ebrahimi      }
514*83a54b2fSSadaf Ebrahimi
515*83a54b2fSSadaf Ebrahimi      if (tokenCharsI < tokenChars.length) {
516*83a54b2fSSadaf Ebrahimi        tokenChars[tokenCharsI++] = ch.toLowerCase();
517*83a54b2fSSadaf Ebrahimi      }
518*83a54b2fSSadaf Ebrahimi      if (1 == state) { tokenStart = k + i; }
519*83a54b2fSSadaf Ebrahimi      i = next;
520*83a54b2fSSadaf Ebrahimi      if (tokenStyle != null) {
521*83a54b2fSSadaf Ebrahimi        if (null != tokenStyle) {
522*83a54b2fSSadaf Ebrahimi          if (endScriptTag) {
523*83a54b2fSSadaf Ebrahimi            if (PR_prefixMatch(tokenChars, tokenCharsI, endScriptTag)) {
524*83a54b2fSSadaf Ebrahimi              endScriptTag = null;
525*83a54b2fSSadaf Ebrahimi            }
526*83a54b2fSSadaf Ebrahimi          } else {
527*83a54b2fSSadaf Ebrahimi            if (PR_prefixMatch(tokenChars, tokenCharsI, 'script')) {
528*83a54b2fSSadaf Ebrahimi              endScriptTag = '/script';
529*83a54b2fSSadaf Ebrahimi            } else if (PR_prefixMatch(tokenChars, tokenCharsI, 'style')) {
530*83a54b2fSSadaf Ebrahimi              endScriptTag = '/style';
531*83a54b2fSSadaf Ebrahimi            } else if (PR_prefixMatch(tokenChars, tokenCharsI, 'xmp')) {
532*83a54b2fSSadaf Ebrahimi              endScriptTag = '/xmp';
533*83a54b2fSSadaf Ebrahimi            }
534*83a54b2fSSadaf Ebrahimi          }
535*83a54b2fSSadaf Ebrahimi          // disallow the tag if endScriptTag is set and this was not an open
536*83a54b2fSSadaf Ebrahimi          // tag.
537*83a54b2fSSadaf Ebrahimi          if (endScriptTag && tokenCharsI && '/' == tokenChars[0]) {
538*83a54b2fSSadaf Ebrahimi            tokenStyle = null;
539*83a54b2fSSadaf Ebrahimi          }
540*83a54b2fSSadaf Ebrahimi        }
541*83a54b2fSSadaf Ebrahimi        if (null != tokenStyle) {
542*83a54b2fSSadaf Ebrahimi          tokenEnds.push(new PR_TokenEnd(tokenStart, PR_PLAIN));
543*83a54b2fSSadaf Ebrahimi          tokenEnds.push(new PR_TokenEnd(k + next, tokenStyle));
544*83a54b2fSSadaf Ebrahimi        }
545*83a54b2fSSadaf Ebrahimi      }
546*83a54b2fSSadaf Ebrahimi    }
547*83a54b2fSSadaf Ebrahimi    k += chunk.token.length;
548*83a54b2fSSadaf Ebrahimi  }
549*83a54b2fSSadaf Ebrahimi  tokenEnds.push(new PR_TokenEnd(k, PR_PLAIN));
550*83a54b2fSSadaf Ebrahimi
551*83a54b2fSSadaf Ebrahimi  return tokenEnds;
552*83a54b2fSSadaf Ebrahimi}
553*83a54b2fSSadaf Ebrahimi
554*83a54b2fSSadaf Ebrahimi/** splits the given string into comment, string, and "other" tokens.
555*83a54b2fSSadaf Ebrahimi  * @return an array of PR_Tokens with style in
556*83a54b2fSSadaf Ebrahimi  *   (PR_STRING, PR_COMMENT, PR_PLAIN, null)
557*83a54b2fSSadaf Ebrahimi  *   The result array may contain spurious zero length tokens.  Ignore them.
558*83a54b2fSSadaf Ebrahimi  *
559*83a54b2fSSadaf Ebrahimi  * @private
560*83a54b2fSSadaf Ebrahimi  */
561*83a54b2fSSadaf Ebrahimifunction PR_splitStringAndCommentTokens(chunks) {
562*83a54b2fSSadaf Ebrahimi  // a state machine to split out comments, strings, and other stuff
563*83a54b2fSSadaf Ebrahimi  var tokenEnds = new Array();  // positions of ends of tokens in absolute space
564*83a54b2fSSadaf Ebrahimi  var state = 0;  // FSM state variable
565*83a54b2fSSadaf Ebrahimi  var delim = -1;  // string delimiter
566*83a54b2fSSadaf Ebrahimi  var k = 0;  // absolute position of beginning of current chunk
567*83a54b2fSSadaf Ebrahimi  for (var ci = 0, nc = chunks.length; ci < nc; ++ci) {
568*83a54b2fSSadaf Ebrahimi    var chunk = chunks[ci];
569*83a54b2fSSadaf Ebrahimi    var s = chunk.token;
570*83a54b2fSSadaf Ebrahimi    if (PR_PLAIN == chunk.style) {
571*83a54b2fSSadaf Ebrahimi      for (var i = 0, n = s.length; i < n; ++i) {
572*83a54b2fSSadaf Ebrahimi        var ch = s.charAt(i);
573*83a54b2fSSadaf Ebrahimi        if (0 == state) {
574*83a54b2fSSadaf Ebrahimi          if (ch == '"' || ch == '\'' || ch == '`') {
575*83a54b2fSSadaf Ebrahimi            tokenEnds.push(new PR_TokenEnd(k + i, PR_PLAIN));
576*83a54b2fSSadaf Ebrahimi            state = 1;
577*83a54b2fSSadaf Ebrahimi            delim = ch;
578*83a54b2fSSadaf Ebrahimi          } else if (ch == '/') {
579*83a54b2fSSadaf Ebrahimi            state = 3;
580*83a54b2fSSadaf Ebrahimi          } else if (ch == '#') {
581*83a54b2fSSadaf Ebrahimi            tokenEnds.push(new PR_TokenEnd(k + i, PR_PLAIN));
582*83a54b2fSSadaf Ebrahimi            state = 4;
583*83a54b2fSSadaf Ebrahimi          }
584*83a54b2fSSadaf Ebrahimi        } else if (1 == state) {
585*83a54b2fSSadaf Ebrahimi          if (ch == delim) {
586*83a54b2fSSadaf Ebrahimi            state = 0;
587*83a54b2fSSadaf Ebrahimi            tokenEnds.push(new PR_TokenEnd(k + i + 1, PR_STRING));
588*83a54b2fSSadaf Ebrahimi          } else if (ch == '\\') {
589*83a54b2fSSadaf Ebrahimi            state = 2;
590*83a54b2fSSadaf Ebrahimi          }
591*83a54b2fSSadaf Ebrahimi        } else if (2 == state) {
592*83a54b2fSSadaf Ebrahimi          state = 1;
593*83a54b2fSSadaf Ebrahimi        } else if (3 == state) {
594*83a54b2fSSadaf Ebrahimi          if (ch == '/') {
595*83a54b2fSSadaf Ebrahimi            state = 4;
596*83a54b2fSSadaf Ebrahimi            tokenEnds.push(new PR_TokenEnd(k + i - 1, PR_PLAIN));
597*83a54b2fSSadaf Ebrahimi          } else if (ch == '*') {
598*83a54b2fSSadaf Ebrahimi            state = 5;
599*83a54b2fSSadaf Ebrahimi            tokenEnds.push(new PR_TokenEnd(k + i - 1, PR_PLAIN));
600*83a54b2fSSadaf Ebrahimi          } else {
601*83a54b2fSSadaf Ebrahimi            state = 0;
602*83a54b2fSSadaf Ebrahimi            // next loop will reenter state 0 without same value of i, so
603*83a54b2fSSadaf Ebrahimi            // ch will be reconsidered as start of new token.
604*83a54b2fSSadaf Ebrahimi            --i;
605*83a54b2fSSadaf Ebrahimi          }
606*83a54b2fSSadaf Ebrahimi        } else if (4 == state) {
607*83a54b2fSSadaf Ebrahimi          if (ch == '\r' || ch == '\n') {
608*83a54b2fSSadaf Ebrahimi            state = 0;
609*83a54b2fSSadaf Ebrahimi            tokenEnds.push(new PR_TokenEnd(k + i, PR_COMMENT));
610*83a54b2fSSadaf Ebrahimi          }
611*83a54b2fSSadaf Ebrahimi        } else if (5 == state) {
612*83a54b2fSSadaf Ebrahimi          if (ch == '*') {
613*83a54b2fSSadaf Ebrahimi            state = 6;
614*83a54b2fSSadaf Ebrahimi          }
615*83a54b2fSSadaf Ebrahimi        } else if (6 == state) {
616*83a54b2fSSadaf Ebrahimi          if (ch == '/') {
617*83a54b2fSSadaf Ebrahimi            state = 0;
618*83a54b2fSSadaf Ebrahimi            tokenEnds.push(new PR_TokenEnd(k + i + 1, PR_COMMENT));
619*83a54b2fSSadaf Ebrahimi          } else if (ch != '*') {
620*83a54b2fSSadaf Ebrahimi            state = 5;
621*83a54b2fSSadaf Ebrahimi          }
622*83a54b2fSSadaf Ebrahimi        }
623*83a54b2fSSadaf Ebrahimi      }
624*83a54b2fSSadaf Ebrahimi    }
625*83a54b2fSSadaf Ebrahimi    k += s.length;
626*83a54b2fSSadaf Ebrahimi  }
627*83a54b2fSSadaf Ebrahimi  tokenEnds.push(new PR_TokenEnd(k, PR_PLAIN));  // a token ends at the end
628*83a54b2fSSadaf Ebrahimi
629*83a54b2fSSadaf Ebrahimi  return PR_splitChunks(chunks, tokenEnds);
630*83a54b2fSSadaf Ebrahimi}
631*83a54b2fSSadaf Ebrahimi
632*83a54b2fSSadaf Ebrahimi/** used by lexSource to split a non string, non comment token.
633*83a54b2fSSadaf Ebrahimi  * @private
634*83a54b2fSSadaf Ebrahimi  */
635*83a54b2fSSadaf Ebrahimifunction PR_splitNonStringNonCommentToken(s, outlist) {
636*83a54b2fSSadaf Ebrahimi  var pos = 0;
637*83a54b2fSSadaf Ebrahimi  var state = 0;
638*83a54b2fSSadaf Ebrahimi  for (var i = 0; i <= s.length; i++) {
639*83a54b2fSSadaf Ebrahimi    var ch = s.charAt(i);
640*83a54b2fSSadaf Ebrahimi    // the next state.
641*83a54b2fSSadaf Ebrahimi    // if set to -1 then it will cause a reentry to state 0 without consuming
642*83a54b2fSSadaf Ebrahimi    // another character.
643*83a54b2fSSadaf Ebrahimi    var nstate = state;
644*83a54b2fSSadaf Ebrahimi
645*83a54b2fSSadaf Ebrahimi    if (i == s.length) {
646*83a54b2fSSadaf Ebrahimi      // nstate will not be equal to state, so it will append the token
647*83a54b2fSSadaf Ebrahimi      nstate = -2;
648*83a54b2fSSadaf Ebrahimi    } else {
649*83a54b2fSSadaf Ebrahimi      switch (state) {
650*83a54b2fSSadaf Ebrahimi      case 0:  // whitespace state
651*83a54b2fSSadaf Ebrahimi        if (PR_isIdentifierStart(ch)) {
652*83a54b2fSSadaf Ebrahimi          nstate = 1;
653*83a54b2fSSadaf Ebrahimi        } else if (PR_isDigitChar(ch)) {
654*83a54b2fSSadaf Ebrahimi          nstate = 2;
655*83a54b2fSSadaf Ebrahimi        } else if (!PR_isSpaceChar(ch)) {
656*83a54b2fSSadaf Ebrahimi          nstate = 3;
657*83a54b2fSSadaf Ebrahimi        }
658*83a54b2fSSadaf Ebrahimi        if (nstate && pos < i) {
659*83a54b2fSSadaf Ebrahimi          var t = s.substring(pos, i);
660*83a54b2fSSadaf Ebrahimi          outlist.push(new PR_Token(t, PR_PLAIN));
661*83a54b2fSSadaf Ebrahimi          pos = i;
662*83a54b2fSSadaf Ebrahimi        }
663*83a54b2fSSadaf Ebrahimi        break;
664*83a54b2fSSadaf Ebrahimi      case 1:  // identifier state
665*83a54b2fSSadaf Ebrahimi        if (!PR_isIdentifierPart(ch)) {
666*83a54b2fSSadaf Ebrahimi          nstate = -1;
667*83a54b2fSSadaf Ebrahimi        }
668*83a54b2fSSadaf Ebrahimi        break;
669*83a54b2fSSadaf Ebrahimi      case 2:  // number literal state
670*83a54b2fSSadaf Ebrahimi        // handle numeric literals like
671*83a54b2fSSadaf Ebrahimi        // 0x7f 300UL 100_000
672*83a54b2fSSadaf Ebrahimi
673*83a54b2fSSadaf Ebrahimi        // this does not treat floating point values as a single literal
674*83a54b2fSSadaf Ebrahimi        //   0.1 and 3e-6
675*83a54b2fSSadaf Ebrahimi        // are each split into multiple tokens
676*83a54b2fSSadaf Ebrahimi        if (!(PR_isDigitChar(ch) || PR_isWordChar(ch) || ch == '_')) {
677*83a54b2fSSadaf Ebrahimi          nstate = -1;
678*83a54b2fSSadaf Ebrahimi        }
679*83a54b2fSSadaf Ebrahimi        break;
680*83a54b2fSSadaf Ebrahimi      case 3:  // punctuation state
681*83a54b2fSSadaf Ebrahimi        if (PR_isIdentifierStart(ch) || PR_isDigitChar(ch) ||
682*83a54b2fSSadaf Ebrahimi            PR_isSpaceChar(ch)) {
683*83a54b2fSSadaf Ebrahimi          nstate = -1;
684*83a54b2fSSadaf Ebrahimi        }
685*83a54b2fSSadaf Ebrahimi        break;
686*83a54b2fSSadaf Ebrahimi      }
687*83a54b2fSSadaf Ebrahimi    }
688*83a54b2fSSadaf Ebrahimi
689*83a54b2fSSadaf Ebrahimi    if (nstate != state) {
690*83a54b2fSSadaf Ebrahimi      if (nstate < 0) {
691*83a54b2fSSadaf Ebrahimi        if (i > pos) {
692*83a54b2fSSadaf Ebrahimi          var t = s.substring(pos, i);
693*83a54b2fSSadaf Ebrahimi          var ch0 = t.charAt(0);
694*83a54b2fSSadaf Ebrahimi          var style;
695*83a54b2fSSadaf Ebrahimi          if (PR_isIdentifierStart(ch0)) {
696*83a54b2fSSadaf Ebrahimi            if (PR_keywords[t]) {
697*83a54b2fSSadaf Ebrahimi              style = PR_KEYWORD;
698*83a54b2fSSadaf Ebrahimi            }
699*83a54b2fSSadaf Ebrahimi            else if (ch0 == '@') {
700*83a54b2fSSadaf Ebrahimi              style = PR_LITERAL;
701*83a54b2fSSadaf Ebrahimi            } else {
702*83a54b2fSSadaf Ebrahimi              // Treat any word that starts with an uppercase character and
703*83a54b2fSSadaf Ebrahimi              // contains at least one lowercase character as a type, or
704*83a54b2fSSadaf Ebrahimi              // ends with _t.
705*83a54b2fSSadaf Ebrahimi              // This works perfectly for Java, pretty well for C++, and
706*83a54b2fSSadaf Ebrahimi              // passably for Python.  The _t catches C structs.
707*83a54b2fSSadaf Ebrahimi              var isType = false;
708*83a54b2fSSadaf Ebrahimi              if (ch0 >= 'A' && ch0 <= 'Z') {
709*83a54b2fSSadaf Ebrahimi                for (var j = 1; j < t.length; j++) {
710*83a54b2fSSadaf Ebrahimi                  var ch1 = t.charAt(j);
711*83a54b2fSSadaf Ebrahimi                  if (ch1 >= 'a' && ch1 <= 'z') {
712*83a54b2fSSadaf Ebrahimi                    isType = true;
713*83a54b2fSSadaf Ebrahimi                    break;
714*83a54b2fSSadaf Ebrahimi                  }
715*83a54b2fSSadaf Ebrahimi                }
716*83a54b2fSSadaf Ebrahimi                if (!isType && t.length >= 2 &&
717*83a54b2fSSadaf Ebrahimi                    t.substring(t.length - 2) == '_t') {
718*83a54b2fSSadaf Ebrahimi                  isType = true;
719*83a54b2fSSadaf Ebrahimi                }
720*83a54b2fSSadaf Ebrahimi              }
721*83a54b2fSSadaf Ebrahimi              style = isType ? PR_TYPE : PR_PLAIN;
722*83a54b2fSSadaf Ebrahimi            }
723*83a54b2fSSadaf Ebrahimi          } else if (PR_isDigitChar(ch0)) {
724*83a54b2fSSadaf Ebrahimi            style = PR_LITERAL;
725*83a54b2fSSadaf Ebrahimi          } else if (!PR_isSpaceChar(ch0)) {
726*83a54b2fSSadaf Ebrahimi            style = PR_PUNCTUATION;
727*83a54b2fSSadaf Ebrahimi          } else {
728*83a54b2fSSadaf Ebrahimi            style = PR_PLAIN;
729*83a54b2fSSadaf Ebrahimi          }
730*83a54b2fSSadaf Ebrahimi          pos = i;
731*83a54b2fSSadaf Ebrahimi          outlist.push(new PR_Token(t, style));
732*83a54b2fSSadaf Ebrahimi        }
733*83a54b2fSSadaf Ebrahimi
734*83a54b2fSSadaf Ebrahimi        state = 0;
735*83a54b2fSSadaf Ebrahimi        if (nstate == -1) {
736*83a54b2fSSadaf Ebrahimi          // don't increment.  This allows us to use state 0 to redispatch based
737*83a54b2fSSadaf Ebrahimi          // on the current character.
738*83a54b2fSSadaf Ebrahimi          i--;
739*83a54b2fSSadaf Ebrahimi          continue;
740*83a54b2fSSadaf Ebrahimi        }
741*83a54b2fSSadaf Ebrahimi      }
742*83a54b2fSSadaf Ebrahimi      state = nstate;
743*83a54b2fSSadaf Ebrahimi    }
744*83a54b2fSSadaf Ebrahimi  }
745*83a54b2fSSadaf Ebrahimi}
746*83a54b2fSSadaf Ebrahimi
747*83a54b2fSSadaf Ebrahimi/** split a group of chunks of markup.
748*83a54b2fSSadaf Ebrahimi  * @private
749*83a54b2fSSadaf Ebrahimi  */
750*83a54b2fSSadaf Ebrahimifunction PR_tokenizeMarkup(chunks) {
751*83a54b2fSSadaf Ebrahimi  if (!(chunks && chunks.length)) { return chunks; }
752*83a54b2fSSadaf Ebrahimi
753*83a54b2fSSadaf Ebrahimi  var tokenEnds = PR_splitMarkup(chunks);
754*83a54b2fSSadaf Ebrahimi  return PR_splitChunks(chunks, tokenEnds);
755*83a54b2fSSadaf Ebrahimi}
756*83a54b2fSSadaf Ebrahimi
757*83a54b2fSSadaf Ebrahimi/** split tags attributes and their values out from the tag name, and
758*83a54b2fSSadaf Ebrahimi  * recursively lex source chunks.
759*83a54b2fSSadaf Ebrahimi  * @private
760*83a54b2fSSadaf Ebrahimi  */
761*83a54b2fSSadaf Ebrahimifunction PR_splitTagAttributes(tokens) {
762*83a54b2fSSadaf Ebrahimi  var tokensOut = new Array();
763*83a54b2fSSadaf Ebrahimi  var state = 0;
764*83a54b2fSSadaf Ebrahimi  var stateStyle = PR_TAG;
765*83a54b2fSSadaf Ebrahimi  var delim = null;  // attribute delimiter for quoted value state.
766*83a54b2fSSadaf Ebrahimi  var decodeHelper = new PR_DecodeHelper();
767*83a54b2fSSadaf Ebrahimi  for (var ci = 0; ci < tokens.length; ++ci) {
768*83a54b2fSSadaf Ebrahimi    var tok = tokens[ci];
769*83a54b2fSSadaf Ebrahimi    if (PR_TAG == tok.style) {
770*83a54b2fSSadaf Ebrahimi      var s = tok.token;
771*83a54b2fSSadaf Ebrahimi      var start = 0;
772*83a54b2fSSadaf Ebrahimi      for (var i = 0; i < s.length; /* i = next at bottom */) {
773*83a54b2fSSadaf Ebrahimi        decodeHelper.decode(s, i);
774*83a54b2fSSadaf Ebrahimi        var ch = decodeHelper.ch;
775*83a54b2fSSadaf Ebrahimi        var next = decodeHelper.next;
776*83a54b2fSSadaf Ebrahimi
777*83a54b2fSSadaf Ebrahimi        var emitEnd = null;  // null or position of end of chunk to emit.
778*83a54b2fSSadaf Ebrahimi        var nextStyle = null;  // null or next value of stateStyle
779*83a54b2fSSadaf Ebrahimi        if (ch == '>') {
780*83a54b2fSSadaf Ebrahimi          if (PR_TAG != stateStyle) {
781*83a54b2fSSadaf Ebrahimi            emitEnd = i;
782*83a54b2fSSadaf Ebrahimi            nextStyle = PR_TAG;
783*83a54b2fSSadaf Ebrahimi          }
784*83a54b2fSSadaf Ebrahimi        } else {
785*83a54b2fSSadaf Ebrahimi          switch (state) {
786*83a54b2fSSadaf Ebrahimi            case 0:
787*83a54b2fSSadaf Ebrahimi              if ('<' == ch) { state = 1; }
788*83a54b2fSSadaf Ebrahimi              break;
789*83a54b2fSSadaf Ebrahimi            case 1:
790*83a54b2fSSadaf Ebrahimi              if (PR_isSpaceChar(ch)) { state = 2; }
791*83a54b2fSSadaf Ebrahimi              break;
792*83a54b2fSSadaf Ebrahimi            case 2:
793*83a54b2fSSadaf Ebrahimi              if (!PR_isSpaceChar(ch)) {
794*83a54b2fSSadaf Ebrahimi                nextStyle = PR_ATTRIB_NAME;
795*83a54b2fSSadaf Ebrahimi                emitEnd = i;
796*83a54b2fSSadaf Ebrahimi                state = 3;
797*83a54b2fSSadaf Ebrahimi              }
798*83a54b2fSSadaf Ebrahimi              break;
799*83a54b2fSSadaf Ebrahimi            case 3:
800*83a54b2fSSadaf Ebrahimi              if ('=' == ch) {
801*83a54b2fSSadaf Ebrahimi                emitEnd = i;
802*83a54b2fSSadaf Ebrahimi                nextStyle = PR_TAG;
803*83a54b2fSSadaf Ebrahimi                state = 5;
804*83a54b2fSSadaf Ebrahimi              } else if (PR_isSpaceChar(ch)) {
805*83a54b2fSSadaf Ebrahimi                emitEnd = i;
806*83a54b2fSSadaf Ebrahimi                nextStyle = PR_TAG;
807*83a54b2fSSadaf Ebrahimi                state = 4;
808*83a54b2fSSadaf Ebrahimi              }
809*83a54b2fSSadaf Ebrahimi              break;
810*83a54b2fSSadaf Ebrahimi            case 4:
811*83a54b2fSSadaf Ebrahimi              if ('=' == ch) {
812*83a54b2fSSadaf Ebrahimi                state = 5;
813*83a54b2fSSadaf Ebrahimi              } else if (!PR_isSpaceChar(ch)) {
814*83a54b2fSSadaf Ebrahimi                emitEnd = i;
815*83a54b2fSSadaf Ebrahimi                nextStyle = PR_ATTRIB_NAME;
816*83a54b2fSSadaf Ebrahimi                state = 3;
817*83a54b2fSSadaf Ebrahimi              }
818*83a54b2fSSadaf Ebrahimi              break;
819*83a54b2fSSadaf Ebrahimi            case 5:
820*83a54b2fSSadaf Ebrahimi              if ('"' == ch || '\'' == ch) {
821*83a54b2fSSadaf Ebrahimi                emitEnd = i;
822*83a54b2fSSadaf Ebrahimi                nextStyle = PR_ATTRIB_VALUE;
823*83a54b2fSSadaf Ebrahimi                state = 6;
824*83a54b2fSSadaf Ebrahimi                delim = ch;
825*83a54b2fSSadaf Ebrahimi              } else if (!PR_isSpaceChar(ch)) {
826*83a54b2fSSadaf Ebrahimi                emitEnd = i;
827*83a54b2fSSadaf Ebrahimi                nextStyle = PR_ATTRIB_VALUE;
828*83a54b2fSSadaf Ebrahimi                state = 7;
829*83a54b2fSSadaf Ebrahimi              }
830*83a54b2fSSadaf Ebrahimi              break;
831*83a54b2fSSadaf Ebrahimi            case 6:
832*83a54b2fSSadaf Ebrahimi              if (ch == delim) {
833*83a54b2fSSadaf Ebrahimi                emitEnd = next;
834*83a54b2fSSadaf Ebrahimi                nextStyle = PR_TAG;
835*83a54b2fSSadaf Ebrahimi                state = 2;
836*83a54b2fSSadaf Ebrahimi              }
837*83a54b2fSSadaf Ebrahimi              break;
838*83a54b2fSSadaf Ebrahimi            case 7:
839*83a54b2fSSadaf Ebrahimi              if (PR_isSpaceChar(ch)) {
840*83a54b2fSSadaf Ebrahimi                emitEnd = i;
841*83a54b2fSSadaf Ebrahimi                nextStyle = PR_TAG;
842*83a54b2fSSadaf Ebrahimi                state = 2;
843*83a54b2fSSadaf Ebrahimi              }
844*83a54b2fSSadaf Ebrahimi              break;
845*83a54b2fSSadaf Ebrahimi          }
846*83a54b2fSSadaf Ebrahimi        }
847*83a54b2fSSadaf Ebrahimi        if (emitEnd) {
848*83a54b2fSSadaf Ebrahimi          if (emitEnd > start) {
849*83a54b2fSSadaf Ebrahimi            tokensOut.push(
850*83a54b2fSSadaf Ebrahimi                new PR_Token(s.substring(start, emitEnd), stateStyle));
851*83a54b2fSSadaf Ebrahimi            start = emitEnd;
852*83a54b2fSSadaf Ebrahimi          }
853*83a54b2fSSadaf Ebrahimi          stateStyle = nextStyle;
854*83a54b2fSSadaf Ebrahimi        }
855*83a54b2fSSadaf Ebrahimi        i = next;
856*83a54b2fSSadaf Ebrahimi      }
857*83a54b2fSSadaf Ebrahimi      if (s.length > start) {
858*83a54b2fSSadaf Ebrahimi        tokensOut.push(new PR_Token(s.substring(start, s.length), stateStyle));
859*83a54b2fSSadaf Ebrahimi      }
860*83a54b2fSSadaf Ebrahimi    } else {
861*83a54b2fSSadaf Ebrahimi      if (tok.style) {
862*83a54b2fSSadaf Ebrahimi        state = 0;
863*83a54b2fSSadaf Ebrahimi        stateStyle = PR_TAG;
864*83a54b2fSSadaf Ebrahimi      }
865*83a54b2fSSadaf Ebrahimi      tokensOut.push(tok);
866*83a54b2fSSadaf Ebrahimi    }
867*83a54b2fSSadaf Ebrahimi  }
868*83a54b2fSSadaf Ebrahimi  return tokensOut;
869*83a54b2fSSadaf Ebrahimi}
870*83a54b2fSSadaf Ebrahimi
871*83a54b2fSSadaf Ebrahimi/** identify regions of markup that are really source code, and recursivley
872*83a54b2fSSadaf Ebrahimi  * lex them.
873*83a54b2fSSadaf Ebrahimi  * @private
874*83a54b2fSSadaf Ebrahimi  */
875*83a54b2fSSadaf Ebrahimifunction PR_splitSourceNodes(tokens) {
876*83a54b2fSSadaf Ebrahimi  var tokensOut = new Array();
877*83a54b2fSSadaf Ebrahimi  // when we see a <script> tag, store '/' here so that we know to end the
878*83a54b2fSSadaf Ebrahimi  // source processing
879*83a54b2fSSadaf Ebrahimi  var endScriptTag = null;
880*83a54b2fSSadaf Ebrahimi  var decodeHelper = new PR_DecodeHelper();
881*83a54b2fSSadaf Ebrahimi
882*83a54b2fSSadaf Ebrahimi  var sourceChunks = null;
883*83a54b2fSSadaf Ebrahimi
884*83a54b2fSSadaf Ebrahimi  for (var ci = 0, nc = tokens.length; ci < nc; ++ci) {
885*83a54b2fSSadaf Ebrahimi    var tok = tokens[ci];
886*83a54b2fSSadaf Ebrahimi    if (null == tok.style) {
887*83a54b2fSSadaf Ebrahimi      tokens.push(tok);
888*83a54b2fSSadaf Ebrahimi      continue;
889*83a54b2fSSadaf Ebrahimi    }
890*83a54b2fSSadaf Ebrahimi
891*83a54b2fSSadaf Ebrahimi    var s = tok.token;
892*83a54b2fSSadaf Ebrahimi
893*83a54b2fSSadaf Ebrahimi    if (null == endScriptTag) {
894*83a54b2fSSadaf Ebrahimi      if (PR_SOURCE == tok.style) {
895*83a54b2fSSadaf Ebrahimi        // split off any starting and trailing <?, <%
896*83a54b2fSSadaf Ebrahimi        if ('<' == decodeHelper.decode(s, 0)) {
897*83a54b2fSSadaf Ebrahimi          decodeHelper.decode(s, decodeHelper.next);
898*83a54b2fSSadaf Ebrahimi          if ('%' == decodeHelper.ch || '?' == decodeHelper.ch) {
899*83a54b2fSSadaf Ebrahimi            endScriptTag = decodeHelper.ch;
900*83a54b2fSSadaf Ebrahimi            tokensOut.push(new PR_Token(s.substring(0, decodeHelper.next),
901*83a54b2fSSadaf Ebrahimi                                        PR_TAG));
902*83a54b2fSSadaf Ebrahimi            s = s.substring(decodeHelper.next, s.length);
903*83a54b2fSSadaf Ebrahimi          }
904*83a54b2fSSadaf Ebrahimi        }
905*83a54b2fSSadaf Ebrahimi      } else if (PR_TAG == tok.style) {
906*83a54b2fSSadaf Ebrahimi        if ('<' == decodeHelper.decode(s, 0) &&
907*83a54b2fSSadaf Ebrahimi            '/' != s.charAt(decodeHelper.next)) {
908*83a54b2fSSadaf Ebrahimi          var tagContent = s.substring(decodeHelper.next).toLowerCase();
909*83a54b2fSSadaf Ebrahimi          // FIXME(msamuel): this does not mirror exactly the code in
910*83a54b2fSSadaf Ebrahimi          // in PR_splitMarkup that defers splitting tags inside script and
911*83a54b2fSSadaf Ebrahimi          // style blocks.
912*83a54b2fSSadaf Ebrahimi          if (PR_startsWith(tagContent, 'script') ||
913*83a54b2fSSadaf Ebrahimi              PR_startsWith(tagContent, 'style') ||
914*83a54b2fSSadaf Ebrahimi              PR_startsWith(tagContent, 'xmp')) {
915*83a54b2fSSadaf Ebrahimi            endScriptTag = '/';
916*83a54b2fSSadaf Ebrahimi          }
917*83a54b2fSSadaf Ebrahimi        }
918*83a54b2fSSadaf Ebrahimi      }
919*83a54b2fSSadaf Ebrahimi    }
920*83a54b2fSSadaf Ebrahimi
921*83a54b2fSSadaf Ebrahimi    if (null != endScriptTag) {
922*83a54b2fSSadaf Ebrahimi      var endTok = null;
923*83a54b2fSSadaf Ebrahimi      if (PR_SOURCE == tok.style) {
924*83a54b2fSSadaf Ebrahimi        if (endScriptTag == '%' || endScriptTag == '?') {
925*83a54b2fSSadaf Ebrahimi          var pos = s.lastIndexOf(endScriptTag);
926*83a54b2fSSadaf Ebrahimi          if (pos >= 0 && '>' == decodeHelper.decode(s, pos + 1) &&
927*83a54b2fSSadaf Ebrahimi              s.length == decodeHelper.next) {
928*83a54b2fSSadaf Ebrahimi            endTok = new PR_Token(s.substring(pos, s.length), PR_TAG);
929*83a54b2fSSadaf Ebrahimi            s = s.substring(0, pos);
930*83a54b2fSSadaf Ebrahimi          }
931*83a54b2fSSadaf Ebrahimi        }
932*83a54b2fSSadaf Ebrahimi        if (null == sourceChunks) { sourceChunks = new Array(); }
933*83a54b2fSSadaf Ebrahimi        sourceChunks.push(new PR_Token(s, PR_PLAIN));
934*83a54b2fSSadaf Ebrahimi      } else if (PR_PLAIN == tok.style) {
935*83a54b2fSSadaf Ebrahimi        if (null == sourceChunks) { sourceChunks = new Array(); }
936*83a54b2fSSadaf Ebrahimi        sourceChunks.push(tok);
937*83a54b2fSSadaf Ebrahimi      } else if (PR_TAG == tok.style) {
938*83a54b2fSSadaf Ebrahimi        // if it starts with </ then it must be the end tag.
939*83a54b2fSSadaf Ebrahimi        if ('<' == decodeHelper.decode(tok.token, 0) &&
940*83a54b2fSSadaf Ebrahimi            tok.token.length > decodeHelper.next &&
941*83a54b2fSSadaf Ebrahimi            '/' == decodeHelper.decode(tok.token, decodeHelper.next)) {
942*83a54b2fSSadaf Ebrahimi          endTok = tok;
943*83a54b2fSSadaf Ebrahimi        } else {
944*83a54b2fSSadaf Ebrahimi          tokensOut.push(tok);
945*83a54b2fSSadaf Ebrahimi        }
946*83a54b2fSSadaf Ebrahimi      } else {
947*83a54b2fSSadaf Ebrahimi        if (sourceChunks) {
948*83a54b2fSSadaf Ebrahimi          sourceChunks.push(tok);
949*83a54b2fSSadaf Ebrahimi        } else {
950*83a54b2fSSadaf Ebrahimi          // push remaining tag and attribute tokens from the opening tag
951*83a54b2fSSadaf Ebrahimi          tokensOut.push(tok);
952*83a54b2fSSadaf Ebrahimi        }
953*83a54b2fSSadaf Ebrahimi      }
954*83a54b2fSSadaf Ebrahimi      if (endTok) {
955*83a54b2fSSadaf Ebrahimi        if (sourceChunks) {
956*83a54b2fSSadaf Ebrahimi          var sourceTokens = PR_lexSource(sourceChunks);
957*83a54b2fSSadaf Ebrahimi          tokensOut.push(new PR_Token('<span class=embsrc>', null));
958*83a54b2fSSadaf Ebrahimi          for (var si = 0, ns = sourceTokens.length; si < ns; ++si) {
959*83a54b2fSSadaf Ebrahimi            tokensOut.push(sourceTokens[si]);
960*83a54b2fSSadaf Ebrahimi          }
961*83a54b2fSSadaf Ebrahimi          tokensOut.push(new PR_Token('</span>', null));
962*83a54b2fSSadaf Ebrahimi          sourceChunks = null;
963*83a54b2fSSadaf Ebrahimi        }
964*83a54b2fSSadaf Ebrahimi        tokensOut.push(endTok);
965*83a54b2fSSadaf Ebrahimi        endScriptTag = null;
966*83a54b2fSSadaf Ebrahimi      }
967*83a54b2fSSadaf Ebrahimi    } else {
968*83a54b2fSSadaf Ebrahimi      tokensOut.push(tok);
969*83a54b2fSSadaf Ebrahimi    }
970*83a54b2fSSadaf Ebrahimi  }
971*83a54b2fSSadaf Ebrahimi  return tokensOut;
972*83a54b2fSSadaf Ebrahimi}
973*83a54b2fSSadaf Ebrahimi
974*83a54b2fSSadaf Ebrahimi/** splits the quotes from an attribute value.
975*83a54b2fSSadaf Ebrahimi  * ['"foo"'] -> ['"', 'foo', '"']
976*83a54b2fSSadaf Ebrahimi  * @private
977*83a54b2fSSadaf Ebrahimi  */
978*83a54b2fSSadaf Ebrahimifunction PR_splitAttributeQuotes(tokens) {
979*83a54b2fSSadaf Ebrahimi  var firstPlain = null, lastPlain = null;
980*83a54b2fSSadaf Ebrahimi  for (var i = 0; i < tokens.length; ++i) {
981*83a54b2fSSadaf Ebrahimi    if (PR_PLAIN = tokens[i].style) {
982*83a54b2fSSadaf Ebrahimi      firstPlain = i;
983*83a54b2fSSadaf Ebrahimi      break;
984*83a54b2fSSadaf Ebrahimi    }
985*83a54b2fSSadaf Ebrahimi  }
986*83a54b2fSSadaf Ebrahimi  for (var i = tokens.length; --i >= 0;) {
987*83a54b2fSSadaf Ebrahimi    if (PR_PLAIN = tokens[i].style) {
988*83a54b2fSSadaf Ebrahimi      lastPlain = i;
989*83a54b2fSSadaf Ebrahimi      break;
990*83a54b2fSSadaf Ebrahimi    }
991*83a54b2fSSadaf Ebrahimi  }
992*83a54b2fSSadaf Ebrahimi  if (null == firstPlain) { return tokens; }
993*83a54b2fSSadaf Ebrahimi
994*83a54b2fSSadaf Ebrahimi  var decodeHelper = new PR_DecodeHelper();
995*83a54b2fSSadaf Ebrahimi  var fs = tokens[firstPlain].token;
996*83a54b2fSSadaf Ebrahimi  var fc = decodeHelper.decode(fs, 0);
997*83a54b2fSSadaf Ebrahimi  if ('"' != fc && '\'' != fc) {
998*83a54b2fSSadaf Ebrahimi    return tokens;
999*83a54b2fSSadaf Ebrahimi  }
1000*83a54b2fSSadaf Ebrahimi  var fpos = decodeHelper.next;
1001*83a54b2fSSadaf Ebrahimi
1002*83a54b2fSSadaf Ebrahimi  var ls = tokens[lastPlain].token;
1003*83a54b2fSSadaf Ebrahimi  var lpos = ls.lastIndexOf('&');
1004*83a54b2fSSadaf Ebrahimi  if (lpos < 0) { lpos = ls.length - 1; }
1005*83a54b2fSSadaf Ebrahimi  var lc = decodeHelper.decode(ls, lpos);
1006*83a54b2fSSadaf Ebrahimi  if (lc != fc || decodeHelper.next != ls.length) {
1007*83a54b2fSSadaf Ebrahimi    lc = null;
1008*83a54b2fSSadaf Ebrahimi    lpos = ls.length;
1009*83a54b2fSSadaf Ebrahimi  }
1010*83a54b2fSSadaf Ebrahimi
1011*83a54b2fSSadaf Ebrahimi  var tokensOut = new Array();
1012*83a54b2fSSadaf Ebrahimi  for (var i = 0; i < firstPlain; ++i) {
1013*83a54b2fSSadaf Ebrahimi    tokensOut.push(tokens[i]);
1014*83a54b2fSSadaf Ebrahimi  }
1015*83a54b2fSSadaf Ebrahimi  tokensOut.push(new PR_Token(fs.substring(0, fpos), PR_ATTRIB_VALUE));
1016*83a54b2fSSadaf Ebrahimi  if (lastPlain == firstPlain) {
1017*83a54b2fSSadaf Ebrahimi    tokensOut.push(new PR_Token(fs.substring(fpos, lpos), PR_PLAIN));
1018*83a54b2fSSadaf Ebrahimi  } else {
1019*83a54b2fSSadaf Ebrahimi    tokensOut.push(new PR_Token(fs.substring(fpos, fs.length), PR_PLAIN));
1020*83a54b2fSSadaf Ebrahimi    for (var i = firstPlain + 1; i < lastPlain; ++i) {
1021*83a54b2fSSadaf Ebrahimi      tokensOut.push(tokens[i]);
1022*83a54b2fSSadaf Ebrahimi    }
1023*83a54b2fSSadaf Ebrahimi    if (lc) {
1024*83a54b2fSSadaf Ebrahimi      tokens.push(new PR_Token(ls.substring(0, lpos), PR_PLAIN));
1025*83a54b2fSSadaf Ebrahimi    } else {
1026*83a54b2fSSadaf Ebrahimi      tokens.push(tokens[lastPlain]);
1027*83a54b2fSSadaf Ebrahimi    }
1028*83a54b2fSSadaf Ebrahimi  }
1029*83a54b2fSSadaf Ebrahimi  if (lc) {
1030*83a54b2fSSadaf Ebrahimi    tokensOut.push(new PR_Token(ls.substring(lpos, ls.length), PR_PLAIN));
1031*83a54b2fSSadaf Ebrahimi  }
1032*83a54b2fSSadaf Ebrahimi  for (var i = lastPlain + 1; i < tokens.length; ++i) {
1033*83a54b2fSSadaf Ebrahimi    tokensOut.push(tokens[i]);
1034*83a54b2fSSadaf Ebrahimi  }
1035*83a54b2fSSadaf Ebrahimi  return tokensOut;
1036*83a54b2fSSadaf Ebrahimi}
1037*83a54b2fSSadaf Ebrahimi
1038*83a54b2fSSadaf Ebrahimi/** identify attribute values that really contain source code and recursively
1039*83a54b2fSSadaf Ebrahimi  * lex them.
1040*83a54b2fSSadaf Ebrahimi  * @private
1041*83a54b2fSSadaf Ebrahimi  */
1042*83a54b2fSSadaf Ebrahimifunction PR_splitSourceAttributes(tokens) {
1043*83a54b2fSSadaf Ebrahimi  var tokensOut = new Array();
1044*83a54b2fSSadaf Ebrahimi
1045*83a54b2fSSadaf Ebrahimi  var sourceChunks = null;
1046*83a54b2fSSadaf Ebrahimi  var inSource = false;
1047*83a54b2fSSadaf Ebrahimi  var name = '';
1048*83a54b2fSSadaf Ebrahimi
1049*83a54b2fSSadaf Ebrahimi  for (var ci = 0, nc = tokens.length; ci < nc; ++ci) {
1050*83a54b2fSSadaf Ebrahimi    var tok = tokens[ci];
1051*83a54b2fSSadaf Ebrahimi    var outList = tokensOut;
1052*83a54b2fSSadaf Ebrahimi    if (PR_TAG == tok.style) {
1053*83a54b2fSSadaf Ebrahimi      if (inSource) {
1054*83a54b2fSSadaf Ebrahimi        inSource = false;
1055*83a54b2fSSadaf Ebrahimi        name = '';
1056*83a54b2fSSadaf Ebrahimi        if (sourceChunks) {
1057*83a54b2fSSadaf Ebrahimi          tokensOut.push(new PR_Token('<span class=embsrc>', null));
1058*83a54b2fSSadaf Ebrahimi          var sourceTokens =
1059*83a54b2fSSadaf Ebrahimi            PR_lexSource(PR_splitAttributeQuotes(sourceChunks));
1060*83a54b2fSSadaf Ebrahimi          for (var si = 0, ns = sourceTokens.length; si < ns; ++si) {
1061*83a54b2fSSadaf Ebrahimi            tokensOut.push(sourceTokens[si]);
1062*83a54b2fSSadaf Ebrahimi          }
1063*83a54b2fSSadaf Ebrahimi          tokensOut.push(new PR_Token('</span>', null));
1064*83a54b2fSSadaf Ebrahimi          sourceChunks = null;
1065*83a54b2fSSadaf Ebrahimi        }
1066*83a54b2fSSadaf Ebrahimi      } else if (name && tok.token.indexOf('=') >= 0) {
1067*83a54b2fSSadaf Ebrahimi        var nameLower = name.toLowerCase();
1068*83a54b2fSSadaf Ebrahimi        if (PR_startsWith(nameLower, 'on') || 'style' == nameLower) {
1069*83a54b2fSSadaf Ebrahimi          inSource = true;
1070*83a54b2fSSadaf Ebrahimi        }
1071*83a54b2fSSadaf Ebrahimi      } else {
1072*83a54b2fSSadaf Ebrahimi        name = '';
1073*83a54b2fSSadaf Ebrahimi      }
1074*83a54b2fSSadaf Ebrahimi    } else if (PR_ATTRIB_NAME == tok.style) {
1075*83a54b2fSSadaf Ebrahimi      name += tok.token;
1076*83a54b2fSSadaf Ebrahimi    } else if (PR_ATTRIB_VALUE == tok.style) {
1077*83a54b2fSSadaf Ebrahimi      if (inSource) {
1078*83a54b2fSSadaf Ebrahimi        if (null == sourceChunks) { sourceChunks = new Array(); }
1079*83a54b2fSSadaf Ebrahimi        outList = sourceChunks;
1080*83a54b2fSSadaf Ebrahimi        tok = new PR_Token(tok.token, PR_PLAIN);
1081*83a54b2fSSadaf Ebrahimi      }
1082*83a54b2fSSadaf Ebrahimi    } else {
1083*83a54b2fSSadaf Ebrahimi      if (sourceChunks) {
1084*83a54b2fSSadaf Ebrahimi        outList = sourceChunks;
1085*83a54b2fSSadaf Ebrahimi      }
1086*83a54b2fSSadaf Ebrahimi    }
1087*83a54b2fSSadaf Ebrahimi    outList.push(tok);
1088*83a54b2fSSadaf Ebrahimi  }
1089*83a54b2fSSadaf Ebrahimi  return tokensOut;
1090*83a54b2fSSadaf Ebrahimi}
1091*83a54b2fSSadaf Ebrahimi
1092*83a54b2fSSadaf Ebrahimi/** returns a list of PR_Token objects given chunks of source code.
1093*83a54b2fSSadaf Ebrahimi  *
1094*83a54b2fSSadaf Ebrahimi  * This code assumes that < tokens are html escaped, but " are not.
1095*83a54b2fSSadaf Ebrahimi  * It will do a resonable job with <, but will not recognize an &quot;
1096*83a54b2fSSadaf Ebrahimi  * as starting a string.
1097*83a54b2fSSadaf Ebrahimi  *
1098*83a54b2fSSadaf Ebrahimi  * This code treats ", ', and ` as string delimiters, and \ as a string escape.
1099*83a54b2fSSadaf Ebrahimi  * It does not recognize double delimiter escapes, or perl's qq() style
1100*83a54b2fSSadaf Ebrahimi  * strings.
1101*83a54b2fSSadaf Ebrahimi  *
1102*83a54b2fSSadaf Ebrahimi  * It recognizes C, C++, and shell style comments.
1103*83a54b2fSSadaf Ebrahimi  *
1104*83a54b2fSSadaf Ebrahimi  * @param chunks PR_Tokens with style in (null, PR_PLAIN)
1105*83a54b2fSSadaf Ebrahimi  */
1106*83a54b2fSSadaf Ebrahimifunction PR_lexSource(chunks) {
1107*83a54b2fSSadaf Ebrahimi  // positions of ends of tokens in order
1108*83a54b2fSSadaf Ebrahimi  var tokensIn = PR_splitStringAndCommentTokens(chunks);
1109*83a54b2fSSadaf Ebrahimi
1110*83a54b2fSSadaf Ebrahimi  // split entities out of so that we know to treat them as single units.
1111*83a54b2fSSadaf Ebrahimi  tokensIn = PR_splitEntities(tokensIn);
1112*83a54b2fSSadaf Ebrahimi
1113*83a54b2fSSadaf Ebrahimi  // split non comment|string tokens on whitespace and word boundaries
1114*83a54b2fSSadaf Ebrahimi  var tokensOut = new Array();
1115*83a54b2fSSadaf Ebrahimi  for (var i = 0; i < tokensIn.length; ++i) {
1116*83a54b2fSSadaf Ebrahimi    var tok = tokensIn[i];
1117*83a54b2fSSadaf Ebrahimi    var t = tok.token;
1118*83a54b2fSSadaf Ebrahimi    var s = tok.style;
1119*83a54b2fSSadaf Ebrahimi
1120*83a54b2fSSadaf Ebrahimi    if (PR_PLAIN == s) {
1121*83a54b2fSSadaf Ebrahimi      PR_splitNonStringNonCommentToken(t, tokensOut);
1122*83a54b2fSSadaf Ebrahimi      continue;
1123*83a54b2fSSadaf Ebrahimi    }
1124*83a54b2fSSadaf Ebrahimi    tokensOut.push(tok);
1125*83a54b2fSSadaf Ebrahimi  }
1126*83a54b2fSSadaf Ebrahimi
1127*83a54b2fSSadaf Ebrahimi  return tokensOut;
1128*83a54b2fSSadaf Ebrahimi}
1129*83a54b2fSSadaf Ebrahimi
1130*83a54b2fSSadaf Ebrahimi/** returns a list of PR_Token objects given a string of markup.
1131*83a54b2fSSadaf Ebrahimi  *
1132*83a54b2fSSadaf Ebrahimi  * This code assumes that < tokens are html escaped, but " are not.
1133*83a54b2fSSadaf Ebrahimi  * It will do a resonable job with <, but will not recognize an &quot;
1134*83a54b2fSSadaf Ebrahimi  * as starting a string.
1135*83a54b2fSSadaf Ebrahimi  *
1136*83a54b2fSSadaf Ebrahimi  * This code recognizes a number of constructs.
1137*83a54b2fSSadaf Ebrahimi  * <!-- ... --> comment
1138*83a54b2fSSadaf Ebrahimi  * <!\w ... >   declaration
1139*83a54b2fSSadaf Ebrahimi  * <\w ... >    tag
1140*83a54b2fSSadaf Ebrahimi  * </\w ... >   tag
1141*83a54b2fSSadaf Ebrahimi  * <?...?>      embedded source
1142*83a54b2fSSadaf Ebrahimi  * &[#\w]...;   entity
1143*83a54b2fSSadaf Ebrahimi  *
1144*83a54b2fSSadaf Ebrahimi  * It does not recognizes %foo; entities.
1145*83a54b2fSSadaf Ebrahimi  *
1146*83a54b2fSSadaf Ebrahimi  * It will recurse into any <style>, <script>, and on* attributes using
1147*83a54b2fSSadaf Ebrahimi  * PR_lexSource.
1148*83a54b2fSSadaf Ebrahimi  */
1149*83a54b2fSSadaf Ebrahimifunction PR_lexMarkup(chunks) {
1150*83a54b2fSSadaf Ebrahimi  // This function works as follows:
1151*83a54b2fSSadaf Ebrahimi  // 1) Start by splitting the markup into text and tag chunks
1152*83a54b2fSSadaf Ebrahimi  //    Input:  String s
1153*83a54b2fSSadaf Ebrahimi  //    Output: List<PR_Token> where style in (PR_PLAIN, null)
1154*83a54b2fSSadaf Ebrahimi  // 2) Then split the text chunks further into comments, declarations,
1155*83a54b2fSSadaf Ebrahimi  //    tags, etc.
1156*83a54b2fSSadaf Ebrahimi  //    After each split, consider whether the token is the start of an
1157*83a54b2fSSadaf Ebrahimi  //    embedded source section, i.e. is an open <script> tag.  If it is,
1158*83a54b2fSSadaf Ebrahimi  //    find the corresponding close token, and don't bother to lex in between.
1159*83a54b2fSSadaf Ebrahimi  //    Input:  List<String>
1160*83a54b2fSSadaf Ebrahimi  //    Output: List<PR_Token> with style in (PR_TAG, PR_PLAIN, PR_SOURCE, null)
1161*83a54b2fSSadaf Ebrahimi  // 3) Finally go over each tag token and split out attribute names and values.
1162*83a54b2fSSadaf Ebrahimi  //    Input:  List<PR_Token>
1163*83a54b2fSSadaf Ebrahimi  //    Output: List<PR_Token> where style in
1164*83a54b2fSSadaf Ebrahimi  //            (PR_TAG, PR_PLAIN, PR_SOURCE, NAME, VALUE, null)
1165*83a54b2fSSadaf Ebrahimi  var tokensOut = PR_tokenizeMarkup(chunks);
1166*83a54b2fSSadaf Ebrahimi  tokensOut = PR_splitTagAttributes(tokensOut);
1167*83a54b2fSSadaf Ebrahimi  tokensOut = PR_splitSourceNodes(tokensOut);
1168*83a54b2fSSadaf Ebrahimi  tokensOut = PR_splitSourceAttributes(tokensOut);
1169*83a54b2fSSadaf Ebrahimi  return tokensOut;
1170*83a54b2fSSadaf Ebrahimi}
1171*83a54b2fSSadaf Ebrahimi
1172*83a54b2fSSadaf Ebrahimi/** classify the string as either source or markup and lex appropriately. */
1173*83a54b2fSSadaf Ebrahimifunction PR_lexOne(s) {
1174*83a54b2fSSadaf Ebrahimi  var chunks = PR_chunkify(s);
1175*83a54b2fSSadaf Ebrahimi  // treat it as markup if the first non whitespace character is a < and the
1176*83a54b2fSSadaf Ebrahimi  // last non-whitespace character is a >
1177*83a54b2fSSadaf Ebrahimi  var isMarkup = false;
1178*83a54b2fSSadaf Ebrahimi  for (var i = 0; i < chunks.length; ++i) {
1179*83a54b2fSSadaf Ebrahimi    if (PR_PLAIN == chunks[i].style) {
1180*83a54b2fSSadaf Ebrahimi      if (PR_startsWith(PR_trim(chunks[i].token), '&lt;')) {
1181*83a54b2fSSadaf Ebrahimi        for (var j = chunks.length; --j >= 0;) {
1182*83a54b2fSSadaf Ebrahimi          if (PR_PLAIN == chunks[j].style) {
1183*83a54b2fSSadaf Ebrahimi            isMarkup = PR_endsWith(PR_trim(chunks[j].token), '&gt;');
1184*83a54b2fSSadaf Ebrahimi            break;
1185*83a54b2fSSadaf Ebrahimi          }
1186*83a54b2fSSadaf Ebrahimi        }
1187*83a54b2fSSadaf Ebrahimi      }
1188*83a54b2fSSadaf Ebrahimi      break;
1189*83a54b2fSSadaf Ebrahimi    }
1190*83a54b2fSSadaf Ebrahimi  }
1191*83a54b2fSSadaf Ebrahimi  return isMarkup ? PR_lexMarkup(chunks) : PR_lexSource(chunks);
1192*83a54b2fSSadaf Ebrahimi}
1193*83a54b2fSSadaf Ebrahimi
1194*83a54b2fSSadaf Ebrahimi/** pretty print a chunk of code.
1195*83a54b2fSSadaf Ebrahimi  *
1196*83a54b2fSSadaf Ebrahimi  * @param s code as html
1197*83a54b2fSSadaf Ebrahimi  * @return code as html, but prettier
1198*83a54b2fSSadaf Ebrahimi  */
1199*83a54b2fSSadaf Ebrahimifunction prettyPrintOne(s) {
1200*83a54b2fSSadaf Ebrahimi  try {
1201*83a54b2fSSadaf Ebrahimi    var tokens = PR_lexOne(s);
1202*83a54b2fSSadaf Ebrahimi    var out = '';
1203*83a54b2fSSadaf Ebrahimi    var lastStyle = null;
1204*83a54b2fSSadaf Ebrahimi    for (var i = 0; i < tokens.length; i++) {
1205*83a54b2fSSadaf Ebrahimi      var t = tokens[i];
1206*83a54b2fSSadaf Ebrahimi      if (t.style != lastStyle) {
1207*83a54b2fSSadaf Ebrahimi        if (lastStyle != null) {
1208*83a54b2fSSadaf Ebrahimi          out += '</span>';
1209*83a54b2fSSadaf Ebrahimi        }
1210*83a54b2fSSadaf Ebrahimi        if (t.style != null) {
1211*83a54b2fSSadaf Ebrahimi          out += '<span class=' + t.style + '>';
1212*83a54b2fSSadaf Ebrahimi        }
1213*83a54b2fSSadaf Ebrahimi        lastStyle = t.style;
1214*83a54b2fSSadaf Ebrahimi      }
1215*83a54b2fSSadaf Ebrahimi      var html = t.token;
1216*83a54b2fSSadaf Ebrahimi      if (null != t.style) {
1217*83a54b2fSSadaf Ebrahimi        // This interacts badly with the wiki which introduces paragraph tags
1218*83a54b2fSSadaf Ebrahimi        // int pre blocks for some strange reason.
1219*83a54b2fSSadaf Ebrahimi        // It's necessary for IE though which seems to lose the preformattedness
1220*83a54b2fSSadaf Ebrahimi        // of <pre> tags when their innerHTML is assigned.
1221*83a54b2fSSadaf Ebrahimi        html = html.replace(/(?:\r\n?)|\n/g, '<br>').replace(/  /g, '&nbsp; ');
1222*83a54b2fSSadaf Ebrahimi      }
1223*83a54b2fSSadaf Ebrahimi      out += html;
1224*83a54b2fSSadaf Ebrahimi    }
1225*83a54b2fSSadaf Ebrahimi    if (lastStyle != null) {
1226*83a54b2fSSadaf Ebrahimi      out += '</span>';
1227*83a54b2fSSadaf Ebrahimi    }
1228*83a54b2fSSadaf Ebrahimi    return out;
1229*83a54b2fSSadaf Ebrahimi  } catch (e) {
1230*83a54b2fSSadaf Ebrahimi    //alert(e.stack);  // DISABLE in production
1231*83a54b2fSSadaf Ebrahimi    return s;
1232*83a54b2fSSadaf Ebrahimi  }
1233*83a54b2fSSadaf Ebrahimi}
1234*83a54b2fSSadaf Ebrahimi
1235*83a54b2fSSadaf Ebrahimi/** find all the < pre > and < code > tags in the DOM with class=prettyprint and
1236*83a54b2fSSadaf Ebrahimi  * prettify them.
1237*83a54b2fSSadaf Ebrahimi  */
1238*83a54b2fSSadaf Ebrahimifunction prettyPrint() {
1239*83a54b2fSSadaf Ebrahimi  // fetch a list of nodes to rewrite
1240*83a54b2fSSadaf Ebrahimi  var codeSegments = [
1241*83a54b2fSSadaf Ebrahimi      document.getElementsByTagName('pre'),
1242*83a54b2fSSadaf Ebrahimi      document.getElementsByTagName('code'),
1243*83a54b2fSSadaf Ebrahimi      document.getElementsByTagName('xmp') ];
1244*83a54b2fSSadaf Ebrahimi  var elements = [];
1245*83a54b2fSSadaf Ebrahimi  for (var i = 0; i < codeSegments.length; ++i) {
1246*83a54b2fSSadaf Ebrahimi    for (var j = 0; j < codeSegments[i].length; ++j) {
1247*83a54b2fSSadaf Ebrahimi      elements.push(codeSegments[i][j]);
1248*83a54b2fSSadaf Ebrahimi    }
1249*83a54b2fSSadaf Ebrahimi  }
1250*83a54b2fSSadaf Ebrahimi  codeSegments = null;
1251*83a54b2fSSadaf Ebrahimi
1252*83a54b2fSSadaf Ebrahimi  // the loop is broken into a series of continuations to make sure that we
1253*83a54b2fSSadaf Ebrahimi  // don't make the browser unresponsive when rewriting a large page.
1254*83a54b2fSSadaf Ebrahimi  var k = 0;
1255*83a54b2fSSadaf Ebrahimi
1256*83a54b2fSSadaf Ebrahimi  function doWork() {
1257*83a54b2fSSadaf Ebrahimi    var endTime = new Date().getTime() + 250;
1258*83a54b2fSSadaf Ebrahimi    for (; k < elements.length && new Date().getTime() < endTime; k++) {
1259*83a54b2fSSadaf Ebrahimi      var cs = elements[k];
1260*83a54b2fSSadaf Ebrahimi      if (cs.className && cs.className.indexOf('prettyprint') >= 0) {
1261*83a54b2fSSadaf Ebrahimi
1262*83a54b2fSSadaf Ebrahimi        // make sure this is not nested in an already prettified element
1263*83a54b2fSSadaf Ebrahimi        var nested = false;
1264*83a54b2fSSadaf Ebrahimi        for (var p = cs.parentNode; p != null; p = p.parentNode) {
1265*83a54b2fSSadaf Ebrahimi          if ((p.tagName == 'pre' || p.tagName == 'code' ||
1266*83a54b2fSSadaf Ebrahimi               p.tagName == 'xmp') &&
1267*83a54b2fSSadaf Ebrahimi              p.className && p.className.indexOf('prettyprint') >= 0) {
1268*83a54b2fSSadaf Ebrahimi            nested = true;
1269*83a54b2fSSadaf Ebrahimi            break;
1270*83a54b2fSSadaf Ebrahimi          }
1271*83a54b2fSSadaf Ebrahimi        }
1272*83a54b2fSSadaf Ebrahimi        if (!nested) {
1273*83a54b2fSSadaf Ebrahimi          // XMP tags contain unescaped entities so require special handling.
1274*83a54b2fSSadaf Ebrahimi          var isRawContent = 'XMP' == cs.tagName;
1275*83a54b2fSSadaf Ebrahimi
1276*83a54b2fSSadaf Ebrahimi          // fetch the content as a snippet of properly escaped HTML
1277*83a54b2fSSadaf Ebrahimi          var content = cs.innerHTML;
1278*83a54b2fSSadaf Ebrahimi          if (isRawContent) {
1279*83a54b2fSSadaf Ebrahimi            content = PR_textToHtml(content);
1280*83a54b2fSSadaf Ebrahimi          }
1281*83a54b2fSSadaf Ebrahimi
1282*83a54b2fSSadaf Ebrahimi          // do the pretty printing
1283*83a54b2fSSadaf Ebrahimi          var newContent = prettyPrintOne(content);
1284*83a54b2fSSadaf Ebrahimi
1285*83a54b2fSSadaf Ebrahimi          // push the prettified html back into the tag.
1286*83a54b2fSSadaf Ebrahimi          if (!isRawContent) {
1287*83a54b2fSSadaf Ebrahimi            // just replace the old html with the new
1288*83a54b2fSSadaf Ebrahimi            cs.innerHTML = newContent;
1289*83a54b2fSSadaf Ebrahimi          } else {
1290*83a54b2fSSadaf Ebrahimi            // we need to change the tag to a <pre> since <xmp>s do not allow
1291*83a54b2fSSadaf Ebrahimi            // embedded tags such as the span tags used to attach styles to
1292*83a54b2fSSadaf Ebrahimi            // sections of source code.
1293*83a54b2fSSadaf Ebrahimi            var pre = document.createElement('PRE');
1294*83a54b2fSSadaf Ebrahimi            for (var i = 0; i < cs.attributes.length; ++i) {
1295*83a54b2fSSadaf Ebrahimi              var a = cs.attributes[i];
1296*83a54b2fSSadaf Ebrahimi              if (a.specified) {
1297*83a54b2fSSadaf Ebrahimi                pre.setAttribute(a.name, a.value);
1298*83a54b2fSSadaf Ebrahimi              }
1299*83a54b2fSSadaf Ebrahimi            }
1300*83a54b2fSSadaf Ebrahimi            pre.innerHTML = newContent;
1301*83a54b2fSSadaf Ebrahimi            // remove the old
1302*83a54b2fSSadaf Ebrahimi            cs.parentNode.replaceChild(pre, cs);
1303*83a54b2fSSadaf Ebrahimi          }
1304*83a54b2fSSadaf Ebrahimi        }
1305*83a54b2fSSadaf Ebrahimi      }
1306*83a54b2fSSadaf Ebrahimi    }
1307*83a54b2fSSadaf Ebrahimi    if (k < elements.length) {
1308*83a54b2fSSadaf Ebrahimi      // finish up in a continuation
1309*83a54b2fSSadaf Ebrahimi      setTimeout(doWork, 250);
1310*83a54b2fSSadaf Ebrahimi    }
1311*83a54b2fSSadaf Ebrahimi  }
1312*83a54b2fSSadaf Ebrahimi
1313*83a54b2fSSadaf Ebrahimi  doWork();
1314*83a54b2fSSadaf Ebrahimi}
1315