root/trunk/libs/HTMLPurifier.php

Revision 243, 8.7 kB (checked in by azza-bazoo, 6 months ago)

[libraries] update - HTMLPurifier 2.1.4

Line 
1 <?php
2
3 /*!
4  * @mainpage
5  *
6  * HTML Purifier is an HTML filter that will take an arbitrary snippet of
7  * HTML and rigorously test, validate and filter it into a version that
8  * is safe for output onto webpages. It achieves this by:
9  *
10  *  -# Lexing (parsing into tokens) the document,
11  *  -# Executing various strategies on the tokens:
12  *      -# Removing all elements not in the whitelist,
13  *      -# Making the tokens well-formed,
14  *      -# Fixing the nesting of the nodes, and
15  *      -# Validating attributes of the nodes; and
16  *  -# Generating HTML from the purified tokens.
17  *
18  * However, most users will only need to interface with the HTMLPurifier
19  * class, so this massive amount of infrastructure is usually concealed.
20  * If you plan on working with the internals, be sure to include
21  * HTMLPurifier_ConfigSchema and HTMLPurifier_Config.
22  */
23
24 /*
25     HTML Purifier 2.1.4 - Standards Compliant HTML Filtering
26     Copyright (C) 2006-2007 Edward Z. Yang
27
28     This library is free software; you can redistribute it and/or
29     modify it under the terms of the GNU Lesser General Public
30     License as published by the Free Software Foundation; either
31     version 2.1 of the License, or (at your option) any later version.
32
33     This library is distributed in the hope that it will be useful,
34     but WITHOUT ANY WARRANTY; without even the implied warranty of
35     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
36     Lesser General Public License for more details.
37
38     You should have received a copy of the GNU Lesser General Public
39     License along with this library; if not, write to the Free Software
40     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
41  */
42
43 // constants are slow, but we'll make one exception
44 define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
45
46 // every class has an undocumented dependency to these, must be included!
47 require_once 'HTMLPurifier/ConfigSchema.php'; // fatal errors if not included
48 require_once 'HTMLPurifier/Config.php';
49 require_once 'HTMLPurifier/Context.php';
50
51 require_once 'HTMLPurifier/Lexer.php';
52 require_once 'HTMLPurifier/Generator.php';
53 require_once 'HTMLPurifier/Strategy/Core.php';
54 require_once 'HTMLPurifier/Encoder.php';
55
56 require_once 'HTMLPurifier/ErrorCollector.php';
57 require_once 'HTMLPurifier/LanguageFactory.php';
58
59 HTMLPurifier_ConfigSchema::define(
60     'Core', 'CollectErrors', false, 'bool', '
61 Whether or not to collect errors found while filtering the document. This
62 is a useful way to give feedback to your users. <strong>Warning:</strong>
63 Currently this feature is very patchy and experimental, with lots of
64 possible error messages not yet implemented. It will not cause any problems,
65 but it may not help your users either. This directive has been available
66 since 2.0.0.
67 ');
68
69 /**
70  * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
71  *
72  * @note There are several points in which configuration can be specified
73  *       for HTML Purifier.  The precedence of these (from lowest to
74  *       highest) is as follows:
75  *          -# Instance: new HTMLPurifier($config)
76  *          -# Invocation: purify($html, $config)
77  *       These configurations are entirely independent of each other and
78  *       are *not* merged.
79  *
80  * @todo We need an easier way to inject strategies, it'll probably end
81  *       up getting done through config though.
82  */
83 class HTMLPurifier
84 {
85     
86     var $version = '2.1.4';
87     
88     var $config;
89     var $filters = array();
90     
91     var $strategy, $generator;
92     
93     /**
94      * Resultant HTMLPurifier_Context of last run purification. Is an array
95      * of contexts if the last called method was purifyArray().
96      * @public
97      */
98     var $context;
99     
100     /**
101      * Initializes the purifier.
102      * @param $config Optional HTMLPurifier_Config object for all instances of
103      *                the purifier, if omitted, a default configuration is
104      *                supplied (which can be overridden on a per-use basis).
105      *                The parameter can also be any type that
106      *                HTMLPurifier_Config::create() supports.
107      */
108     function HTMLPurifier($config = null) {
109         
110         $this->config = HTMLPurifier_Config::create($config);
111         
112         $this->strategy     = new HTMLPurifier_Strategy_Core();
113         $this->generator    = new HTMLPurifier_Generator();
114         
115     }
116     
117     /**
118      * Adds a filter to process the output. First come first serve
119      * @param $filter HTMLPurifier_Filter object
120      */
121     function addFilter($filter) {
122         $this->filters[] = $filter;
123     }
124     
125     /**
126      * Filters an HTML snippet/document to be XSS-free and standards-compliant.
127      *
128      * @param $html String of HTML to purify
129      * @param $config HTMLPurifier_Config object for this operation, if omitted,
130      *                defaults to the config object specified during this
131      *                object's construction. The parameter can also be any type
132      *                that HTMLPurifier_Config::create() supports.
133      * @return Purified HTML
134      */
135     function purify($html, $config = null) {
136         
137         $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
138         
139         // implementation is partially environment dependant, partially
140         // configuration dependant
141         $lexer = HTMLPurifier_Lexer::create($config);
142         
143         $context = new HTMLPurifier_Context();
144         
145         // our friendly neighborhood generator, all primed with configuration too!
146         $this->generator->generateFromTokens(array(), $config, $context);
147         $context->register('Generator', $this->generator);
148         
149         // set up global context variables
150         if ($config->get('Core', 'CollectErrors')) {
151             // may get moved out if other facilities use it
152             $language_factory = HTMLPurifier_LanguageFactory::instance();
153             $language = $language_factory->create($config, $context);
154             $context->register('Locale', $language);
155             
156             $error_collector = new HTMLPurifier_ErrorCollector($context);
157             $context->register('ErrorCollector', $error_collector);
158         }
159         
160         // setup id_accumulator context, necessary due to the fact that
161         // AttrValidator can be called from many places
162         $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
163         $context->register('IDAccumulator', $id_accumulator);
164         
165         $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
166         
167         for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
168             $html = $this->filters[$i]->preFilter($html, $config, $context);
169         }
170         
171         // purified HTML
172         $html =
173             $this->generator->generateFromTokens(
174                 // list of tokens
175                 $this->strategy->execute(
176                     // list of un-purified tokens
177                     $lexer->tokenizeHTML(
178                         // un-purified HTML
179                         $html, $config, $context
180                     ),
181                     $config, $context
182                 ),
183                 $config, $context
184             );
185         
186         for ($i = $size - 1; $i >= 0; $i--) {
187             $html = $this->filters[$i]->postFilter($html, $config, $context);
188         }
189         
190         $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
191         $this->context =& $context;
192         return $html;
193     }
194     
195     /**
196      * Filters an array of HTML snippets
197      * @param $config Optional HTMLPurifier_Config object for this operation.
198      *                See HTMLPurifier::purify() for more details.
199      * @return Array of purified HTML
200      */
201     function purifyArray($array_of_html, $config = null) {
202         $context_array = array();
203         foreach ($array_of_html as $key => $html) {
204             $array_of_html[$key] = $this->purify($html, $config);
205             $context_array[$key] = $this->context;
206         }
207         $this->context = $context_array;
208         return $array_of_html;
209     }
210     
211     /**
212      * Singleton for enforcing just one HTML Purifier in your system
213      * @param $prototype Optional prototype HTMLPurifier instance to
214      *                   overload singleton with.
215      */
216     function &instance($prototype = null) {
217         static $htmlpurifier;
218         if (!$htmlpurifier || $prototype) {
219             if (is_a($prototype, 'HTMLPurifier')) {
220                 $htmlpurifier = $prototype;
221             } elseif ($prototype) {
222                 $htmlpurifier = new HTMLPurifier($prototype);
223             } else {
224                 $htmlpurifier = new HTMLPurifier();
225             }
226         }
227         return $htmlpurifier;
228     }
229     
230     function &getInstance($prototype = null) {
231         return HTMLPurifier::instance($prototype);
232     }
233     
234 }
235
236
Note: See TracBrowser for help on using the browser.