Importation initiale du code de CKEditor 3.6.1.
[ckeditor.git] / _source / core / htmlparser.js
1 /*
2 Copyright (c) 2003-2011, CKSource - Frederico Knabben. All rights reserved.
3 For licensing, see LICENSE.html or http://ckeditor.com/license
4 */
5
6 /**
7 * Creates a {@link CKEDITOR.htmlParser} class instance.
8 * @class Provides an "event like" system to parse strings of HTML data.
9 * @example
10 * var parser = new CKEDITOR.htmlParser();
11 * parser.onTagOpen = function( tagName, attributes, selfClosing )
12 * {
13 * alert( tagName );
14 * };
15 * parser.parse( '<p>Some <b>text</b>.</p>' );
16 */
17 CKEDITOR.htmlParser = function()
18 {
19 this._ =
20 {
21 htmlPartsRegex : new RegExp( '<(?:(?:\\/([^>]+)>)|(?:!--([\\S|\\s]*?)-->)|(?:([^\\s>]+)\\s*((?:(?:"[^"]*")|(?:\'[^\']*\')|[^"\'>])*)\\/?>))', 'g' )
22 };
23 };
24
25 (function()
26 {
27 var attribsRegex = /([\w\-:.]+)(?:(?:\s*=\s*(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s>]+)))|(?=\s|$))/g,
28 emptyAttribs = {checked:1,compact:1,declare:1,defer:1,disabled:1,ismap:1,multiple:1,nohref:1,noresize:1,noshade:1,nowrap:1,readonly:1,selected:1};
29
30 CKEDITOR.htmlParser.prototype =
31 {
32 /**
33 * Function to be fired when a tag opener is found. This function
34 * should be overriden when using this class.
35 * @param {String} tagName The tag name. The name is guarantted to be
36 * lowercased.
37 * @param {Object} attributes An object containing all tag attributes. Each
38 * property in this object represent and attribute name and its
39 * value is the attribute value.
40 * @param {Boolean} selfClosing true if the tag closes itself, false if the
41 * tag doesn't.
42 * @example
43 * var parser = new CKEDITOR.htmlParser();
44 * parser.onTagOpen = function( tagName, attributes, selfClosing )
45 * {
46 * alert( tagName ); // e.g. "b"
47 * });
48 * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );
49 */
50 onTagOpen : function() {},
51
52 /**
53 * Function to be fired when a tag closer is found. This function
54 * should be overriden when using this class.
55 * @param {String} tagName The tag name. The name is guarantted to be
56 * lowercased.
57 * @example
58 * var parser = new CKEDITOR.htmlParser();
59 * parser.onTagClose = function( tagName )
60 * {
61 * alert( tagName ); // e.g. "b"
62 * });
63 * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );
64 */
65 onTagClose : function() {},
66
67 /**
68 * Function to be fired when text is found. This function
69 * should be overriden when using this class.
70 * @param {String} text The text found.
71 * @example
72 * var parser = new CKEDITOR.htmlParser();
73 * parser.onText = function( text )
74 * {
75 * alert( text ); // e.g. "Hello"
76 * });
77 * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );
78 */
79 onText : function() {},
80
81 /**
82 * Function to be fired when CDATA section is found. This function
83 * should be overriden when using this class.
84 * @param {String} cdata The CDATA been found.
85 * @example
86 * var parser = new CKEDITOR.htmlParser();
87 * parser.onCDATA = function( cdata )
88 * {
89 * alert( cdata ); // e.g. "var hello;"
90 * });
91 * parser.parse( "&lt;script&gt;var hello;&lt;/script&gt;" );
92 */
93 onCDATA : function() {},
94
95 /**
96 * Function to be fired when a commend is found. This function
97 * should be overriden when using this class.
98 * @param {String} comment The comment text.
99 * @example
100 * var parser = new CKEDITOR.htmlParser();
101 * parser.onComment = function( comment )
102 * {
103 * alert( comment ); // e.g. " Example "
104 * });
105 * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );
106 */
107 onComment : function() {},
108
109 /**
110 * Parses text, looking for HTML tokens, like tag openers or closers,
111 * or comments. This function fires the onTagOpen, onTagClose, onText
112 * and onComment function during its execution.
113 * @param {String} html The HTML to be parsed.
114 * @example
115 * var parser = new CKEDITOR.htmlParser();
116 * // The onTagOpen, onTagClose, onText and onComment should be overriden
117 * // at this point.
118 * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );
119 */
120 parse : function( html )
121 {
122 var parts,
123 tagName,
124 nextIndex = 0,
125 cdata; // The collected data inside a CDATA section.
126
127 while ( ( parts = this._.htmlPartsRegex.exec( html ) ) )
128 {
129 var tagIndex = parts.index;
130 if ( tagIndex > nextIndex )
131 {
132 var text = html.substring( nextIndex, tagIndex );
133
134 if ( cdata )
135 cdata.push( text );
136 else
137 this.onText( text );
138 }
139
140 nextIndex = this._.htmlPartsRegex.lastIndex;
141
142 /*
143 "parts" is an array with the following items:
144 0 : The entire match for opening/closing tags and comments.
145 1 : Group filled with the tag name for closing tags.
146 2 : Group filled with the comment text.
147 3 : Group filled with the tag name for opening tags.
148 4 : Group filled with the attributes part of opening tags.
149 */
150
151 // Closing tag
152 if ( ( tagName = parts[ 1 ] ) )
153 {
154 tagName = tagName.toLowerCase();
155
156 if ( cdata && CKEDITOR.dtd.$cdata[ tagName ] )
157 {
158 // Send the CDATA data.
159 this.onCDATA( cdata.join('') );
160 cdata = null;
161 }
162
163 if ( !cdata )
164 {
165 this.onTagClose( tagName );
166 continue;
167 }
168 }
169
170 // If CDATA is enabled, just save the raw match.
171 if ( cdata )
172 {
173 cdata.push( parts[ 0 ] );
174 continue;
175 }
176
177 // Opening tag
178 if ( ( tagName = parts[ 3 ] ) )
179 {
180 tagName = tagName.toLowerCase();
181
182 // There are some tag names that can break things, so let's
183 // simply ignore them when parsing. (#5224)
184 if ( /="/.test( tagName ) )
185 continue;
186
187 var attribs = {},
188 attribMatch,
189 attribsPart = parts[ 4 ],
190 selfClosing = !!( attribsPart && attribsPart.charAt( attribsPart.length - 1 ) == '/' );
191
192 if ( attribsPart )
193 {
194 while ( ( attribMatch = attribsRegex.exec( attribsPart ) ) )
195 {
196 var attName = attribMatch[1].toLowerCase(),
197 attValue = attribMatch[2] || attribMatch[3] || attribMatch[4] || '';
198
199 if ( !attValue && emptyAttribs[ attName ] )
200 attribs[ attName ] = attName;
201 else
202 attribs[ attName ] = attValue;
203 }
204 }
205
206 this.onTagOpen( tagName, attribs, selfClosing );
207
208 // Open CDATA mode when finding the appropriate tags.
209 if ( !cdata && CKEDITOR.dtd.$cdata[ tagName ] )
210 cdata = [];
211
212 continue;
213 }
214
215 // Comment
216 if ( ( tagName = parts[ 2 ] ) )
217 this.onComment( tagName );
218 }
219
220 if ( html.length > nextIndex )
221 this.onText( html.substring( nextIndex, html.length ) );
222 }
223 };
224 })();