'[removed]',
'document.write' => '[removed]',
'.parentNode' => '[removed]',
'.innerHTML' => '[removed]',
'window.location' => '[removed]',
'-moz-binding' => '[removed]',
'' => '-->',
' '<![CDATA['
);
private $never_allowed_regex = array(
"javascript\s*:" => '[removed]',
"expression\s*(\(|&\#40;)" => '[removed]',
"vbscript\s*:" => '[removed]',
"Redirect\s+302" => '[removed]'
);
public function __construct($charset)
{
$this->charset = $charset;
}
public function _clean_input_data($str)
{
if (is_array($str))
{
$new_array = array();
foreach ($str as $key => $val)
{
$new_array[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
}
return $new_array;
}
// We strip slashes if magic quotes is on to keep things consistent
if (get_magic_quotes_gpc())
{
$str = stripslashes($str);
}
// Should we filter the input data?
if ($this->use_xss_clean === TRUE)
{
$str = $this->xss_clean($str);
}
// Standardize newlines
if (strpos($str, "\r") !== FALSE)
{
$str = str_replace(array("\r\n", "\r"), "\n", $str);
}
return $str;
}
private function _clean_input_keys($str)
{
if (!preg_match("/^[a-z0-9:_\/-]+$/i", $str))
{
exit('Disallowed Key Characters.');
}
return $str;
}
public function filename_security($str)
{
$bad = array(
"../",
"./",
"",
"<",
">",
"'",
'"',
'&',
'$',
'#',
'{',
'}',
'[',
']',
'=',
';',
'?',
"%20",
"%22",
"%3c", // <
"%253c", // <
"%3e", // >
"%0e", // >
"%28", // (
"%29", // )
"%2528", // (
"%26", // &
"%24", // $
"%3f", // ?
"%3b", // ;
"%3d" // =
);
return stripslashes(str_replace($bad, '', $str));
}
public function xss_clean($str, $is_image = false)
{
/*
* Is the string an array?
*
*/
if (is_array($str))
{
while (list($key) = each($str))
{
$str[$key] = $this->xss_clean($str[$key]);
}
return $str;
}
/*
* Remove Invisible Characters
*/
$str = $this->_remove_invisible_characters($str);
/*
* Protect GET variables in URLs
*/
// 901119URL5918AMP18930PROTECT8198
$str = preg_replace('|\&([a-z\_0-9]+)\=([a-z\_0-9]+)|i', $this->xss_hash()."\\1=\\2", $str);
/*
* Validate standard character entities
*
* Add a semicolon if missing. We do this to enable
* the conversion of entities to ASCII later.
*
*/
$str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', "\\1;\\2", $str);
/*
* Validate UTF16 two byte encoding (x00)
*
* Just as above, adds a semicolon if missing.
*
*/
$str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
/*
* Un-Protect GET variables in URLs
*/
$str = str_replace($this->xss_hash(), '&', $str);
/*
* URL Decode
*
* Just in case stuff like this is submitted:
*
* Google
*
* Note: Use rawurldecode() so it does not remove plus signs
*
*/
$str = rawurldecode($str);
/*
* Convert character entities to ASCII
*
* This permits our tests below to work reliably.
* We only convert entities that are within tags since
* these are the ones that will pose security problems.
*
*/
$str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
$str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", array($this, '_html_entity_decode_callback'), $str);
/*
* Remove Invisible Characters Again!
*/
$str = $this->_remove_invisible_characters($str);
/*
* Convert all tabs to spaces
*
* This prevents strings like this: ja vascript
* NOTE: we deal with spaces between characters later.
* NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
* so we use str_replace.
*
*/
if (strpos($str, "\t") !== FALSE)
{
$str = str_replace("\t", ' ', $str);
}
/*
* Capture converted string for later comparison
*/
$converted_string = $str;
/*
* Not Allowed Under Any Conditions
*/
foreach ($this->never_allowed_str as $key => $val)
{
$str = str_replace($key, $val, $str);
}
foreach ($this->never_allowed_regex as $key => $val)
{
$str = preg_replace("#".$key."#i", $val, $str);
}
/*
* Makes PHP tags safe
*
* Note: XML tags are inadvertently replaced too:
*
* '), array('<?php', '<?PHP', '<?', '?>'), $str);
}
/*
* Compact any exploded words
*
* This corrects words like: j a v a s c r i p t
* These words are compacted back to their correct state.
*
*/
$words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
foreach ($words as $word)
{
$temp = '';
for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++)
{
$temp .= substr($word, $i, 1)."\s*";
}
// We only want to do this when it is followed by a non-word character
// That way valid stuff like "dealer to" does not become "dealerto"
$str = preg_replace_callback('#('.substr($temp, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
}
/*
* Remove disallowed Javascript in links or img tags
* We used to do some version comparisons and use of stripos for PHP5, but it is dog slow compared
* to these simplified non-capturing preg_match(), especially if the pattern exists in the string
*/
do
{
$original = $str;
if (preg_match("/]*?)(>|$)#si", array($this, '_js_link_removal'), $str);
}
if (preg_match("/
]*?)(\s?/?>|$)#si", array($this, '_js_img_removal'), $str);
}
if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str))
{
$str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str);
}
}
while($original != $str);
unset($original);
/*
* Remove JavaScript Event Handlers
*
* Note: This code is a little blunt. It removes
* the event handler and anything up to the closing >,
* but it's unlikely to be a problem.
*
*/
$event_handlers = array('[^a-z_\-]on\w*','xmlns');
if ($is_image === TRUE)
{
/*
* Adobe Photoshop puts XML metadata into JFIF images, including namespacing,
* so we have to allow this for images. -Paul
*/
unset($event_handlers[array_search('xmlns', $event_handlers)]);
}
$str = preg_replace("#<([^><]+?)(".implode('|', $event_handlers).")(\s*=\s*[^><]*)([><]*)#i", "<\\1\\4", $str);
/*
* Sanitize naughty HTML elements
*
* If a tag containing any of the words in the list
* below is found, the tag gets converted to entities.
*
* So this: