Changeset 1861 in ExiteCMS for trunk/includes/core_functions.php


Ignore:
Timestamp:
10/18/08 01:01:39 (4 years ago)
Author:
hverton
Message:

replaced the XSS prevention code by the code used in the v8 core

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/includes/core_functions.php

    r1845 r1861  
    3535unset($_loadtime); 
    3636 
     37// prevent possible XSS attacks via $_GET (uses pre v8 xss cleaning code). 
     38$_GET = xss_clean($_GET); 
     39 
    3740// if register_globals is turned off, extract super globals (php 4.2.0+) 
    3841// TODO - WANWIZARD - 20070701 - NEED TO GET RID OF THIS !!! 
     
    4043    if ((isset($_POST) == true) && (is_array($_POST) == true)) extract($_POST, EXTR_OVERWRITE); 
    4144    if ((isset($_GET) == true) && (is_array($_GET) == true)) extract($_GET, EXTR_OVERWRITE); 
    42 } else { 
    43     // if not, unset all globals created by register_globals! 
    44     $rg = array_keys($_REQUEST); 
    45     foreach($rg as $var) { 
    46         if ($_REQUEST[$var] === $$var) { 
    47 //          unset($$var); 
    48         } 
    49     } 
    50 } 
    51  
    52 // prevent any possible XSS attacks via $_GET. 
    53 foreach ($_GET as $check_url) { 
    54     // deal with array's in GET parameters 
    55     if (is_array($check_url)) { 
    56         foreach ($check_url as $url_parts) { 
    57             if ((eregi("<[^>]*script*\"?[^>]*>", $url_parts)) || (eregi("<[^>]*object*\"?[^>]*>", $url_parts)) || 
    58                     (eregi("<[^>]*iframe*\"?[^>]*>", $url_parts)) || (eregi("<[^>]*applet*\"?[^>]*>", $url_parts)) || 
    59                     (eregi("<[^>]*meta*\"?[^>]*>", $url_parts)) || (eregi("<[^>]*style*\"?[^>]*>", $url_parts)) || 
    60                     (eregi("<[^>]*form*\"?[^>]*>", $url_parts)) || (eregi("\([^>]*\"?[^)]*\)", $url_parts))) { 
    61                 die (); 
    62             } 
    63         } 
    64     } else { 
    65         if ((eregi("<[^>]*script*\"?[^>]*>", $check_url)) || (eregi("<[^>]*object*\"?[^>]*>", $check_url)) || 
    66                 (eregi("<[^>]*iframe*\"?[^>]*>", $check_url)) || (eregi("<[^>]*applet*\"?[^>]*>", $check_url)) || 
    67                 (eregi("<[^>]*meta*\"?[^>]*>", $check_url)) || (eregi("<[^>]*style*\"?[^>]*>", $check_url)) || 
    68                 (eregi("<[^>]*form*\"?[^>]*>", $check_url)) || (eregi("\([^>]*\"?[^)]*\)", $check_url))) { 
    69             die (); 
    70         } 
    71     } 
    72 } 
    73 unset($check_url); 
     45} 
    7446 
    7547// disable the standard PHP include path (empty's not accepted?) 
     
    775747} 
    776748 
    777  
    778749// replacement for die() 
    779750function terminate($text) { 
    780751    die("<div style='font-family:Verdana,Sans-serif;font-size:11px;text-align:center;'>$text</div>"); 
    781752} 
     753 
     754/*---------------------------------------------------+ 
     755| XSS prevention functions, borrowed from the v8 code| 
     756+---------------------------------------------------*/ 
     757 
     758// check variables for xss attacks, and clean them 
     759function xss_clean($str) { 
     760 
     761    static $never_allowed_str, $never_allowed_regex; 
     762     
     763    // never allowed, string replacement 
     764    if (!isset($never_allowed_str)) { 
     765        $never_allowed_str = array( 
     766            'document.cookie'   => '[removed]', 
     767            'document.write'    => '[removed]', 
     768            '.parentNode'       => '[removed]', 
     769            '.innerHTML'        => '[removed]', 
     770            'window.location'   => '[removed]', 
     771            '-moz-binding'      => '[removed]', 
     772            '<!--'              => '&lt;!--', 
     773            '-->'               => '--&gt;', 
     774            '<![CDATA['         => '&lt;![CDATA[' 
     775        ); 
     776    } 
     777     
     778    // never allowed, regex replacement 
     779    if (!isset($never_allowed_regex)) { 
     780        $never_allowed_regex = array( 
     781            "javascript\s*:"    => '[removed]', 
     782            "expression\s*\("   => '[removed]', // CSS and IE 
     783            "Redirect\s+302"    => '[removed]' 
     784        ); 
     785    } 
     786     
     787    // is the argument passed an array? 
     788    if (is_array($str)) { 
     789        while (list($key) = each($str)) { 
     790            $str[$key] = xss_clean($str[$key]); 
     791        } 
     792        return $str; 
     793    } 
     794 
     795    // Remove Invisible Characters 
     796    $str = _remove_invisible_characters($str); 
     797 
     798    // Protect GET variables in URLs 
     799    $str = preg_replace('|\&([a-z\_0-9]+)\=([a-z\_0-9]+)|i', xss_hash()."\\1=\\2", $str); 
     800 
     801    // Validate standard character entities 
     802    // Add a semicolon if missing.  We do this to enable the conversion of entities to ASCII later. 
     803    $str = preg_replace('#(&\#?[0-9a-z]+)[\x00-\x20]*;?#i', "\\1;", $str); 
     804 
     805    // Validate UTF16 two byte encoding (x00). Just as above, adds a semicolon if missing. 
     806    $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str); 
     807 
     808    // Un-Protect GET variables in URLs 
     809    $str = str_replace(xss_hash(), '&', $str); 
     810 
     811    // URL Decode 
     812    // Just in case stuff like this is submitted: 
     813    // <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a> 
     814    $str = rawurldecode($str); 
     815     
     816    // Convert character entities to ASCII  
     817    // This permits our tests below to work reliably. 
     818    // We only convert entities that are within tags since 
     819    // these are the ones that will pose security problems. 
     820    $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", '_convert_attribute', $str); 
     821    $str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", '_html_entity_decode_callback', $str); 
     822 
     823    // Remove Invisible Characters Again! 
     824    $str = _remove_invisible_characters($str); 
     825         
     826    // Convert all tabs to spaces 
     827    // This prevents strings like this: ja  vascript 
     828    // NOTE: we deal with spaces between characters later. 
     829    // NOTE: preg_replace was found to be amazingly slow here on large blocks of data, 
     830    //       so we use str_replace. 
     831    if (strpos($str, "\t") !== false) { 
     832        $str = str_replace("\t", ' ', $str); 
     833    } 
     834         
     835    // Capture converted string for later comparison 
     836    $converted_string = $str; 
     837         
     838    // Not Allowed Under Any Conditions 
     839    foreach ($never_allowed_str as $key => $val) { 
     840        $str = str_replace($key, $val, $str);    
     841    } 
     842    foreach ($never_allowed_regex as $key => $val) { 
     843        $str = preg_replace("#".$key."#i", $val, $str);    
     844    } 
     845 
     846    // Makes PHP tags safe 
     847    // Note: XML tags are inadvertently replaced too: 
     848    // <?xml 
     849    // But it doesn't seem to pose a problem. 
     850    $str = str_replace(array('<?php', '<?PHP', '<?', '?'.'>'),  array('&lt;?php', '&lt;?PHP', '&lt;?', '?&gt;'), $str); 
     851         
     852    // Compact any exploded words 
     853    // This corrects words like:  j a v a s c r i p t 
     854    // These words are compacted back to their correct state. 
     855    $words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window'); 
     856    foreach ($words as $word) { 
     857        $temp = ''; 
     858        for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++) { 
     859            $temp .= substr($word, $i, 1)."\s*"; 
     860        } 
     861        // We only want to do this when it is followed by a non-word character 
     862        // That way valid stuff like "dealer to" does not become "dealerto" 
     863        $str = preg_replace_callback('#('.substr($temp, 0, -3).')(\W)#is', '_compact_exploded_words', $str); 
     864    } 
     865         
     866    // Remove disallowed Javascript in links or img tags 
     867    // We used to do some version comparisons and use of stripos for PHP5, but it is dog slow compared 
     868    // to these simplified non-capturing preg_match(), especially if the pattern exists in the string 
     869    do { 
     870        $original = $str; 
     871        if (preg_match("/<a/i", $str)) { 
     872            $str = preg_replace_callback("#<a\s*([^>]*?)(>|$)#si", '_js_link_removal', $str); 
     873        } 
     874        if (preg_match("/<img/i", $str)) { 
     875            $str = preg_replace_callback("#<img\s*([^>]*?)(>|$)#si", '_js_img_removal', $str); 
     876        } 
     877        if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str)) { 
     878            $str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str); 
     879        } 
     880    } 
     881    while($original != $str); 
     882 
     883    unset($original); 
     884 
     885    // Remove JavaScript Event Handlers 
     886    // Note: This code is a little blunt.  It removes 
     887    // the event handler and anything up to the closing >, 
     888    // but it's unlikely to be a problem. 
     889    $event_handlers = array('on\w*','xmlns'); 
     890    $str = preg_replace("#<([^><]+)(".implode('|', $event_handlers).")(\s*=\s*[^><]*)([><]*)#i", "<\\1\\4", $str); 
     891         
     892    // Sanitize naughty HTML elements 
     893    // If a tag containing any of the words in the list 
     894    // below is found, the tag gets converted to entities. 
     895    // So this: <blink> Becomes: &lt;blink&gt; 
     896    $naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss'; 
     897    $str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', '_sanitize_naughty_html', $str); 
     898 
     899    // Sanitize naughty scripting elements 
     900    // Similar to above, only instead of looking for 
     901    // tags it looks for PHP and JavaScript commands 
     902    // that are disallowed.  Rather than removing the 
     903    // code, it simply converts the parenthesis to entities 
     904    // rendering the code un-executable. 
     905    // For example: eval('some code') Becomes: eval&#40;'some code'&#41; 
     906    $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str); 
     907                     
     908    // Final clean up 
     909    // This adds a bit of extra precaution in case 
     910    // something got through the above filters 
     911    foreach ($never_allowed_str as $key => $val) { 
     912        $str = str_replace($key, $val, $str);    
     913    } 
     914    foreach ($never_allowed_regex as $key => $val) { 
     915        $str = preg_replace("#".$key."#i", $val, $str); 
     916    } 
     917 
     918    // return the result 
     919    return $str; 
     920} 
     921 
     922// Random Hash for protecting URLs 
     923function xss_hash() { 
     924 
     925    static $xss_hash; 
     926     
     927    if (!isset($xss_hash)) { 
     928        if (phpversion() >= 4.2) { 
     929            mt_srand(); 
     930        } else { 
     931            mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff); 
     932        } 
     933        $xss_hash = md5(time() + mt_rand(0, 1999999999)); 
     934    } 
     935 
     936    return $xss_hash; 
     937} 
     938 
     939// Remove Invisible Characters 
     940function _remove_invisible_characters($str) { 
     941 
     942    static $non_displayables; 
     943         
     944    if (!isset($non_displayables)) { 
     945        // every control character except newline (10), carriage return (13), and horizontal tab (09), 
     946        // both as a URL encoded character (::shakes fist at IE and WebKit::), and the actual character 
     947        $non_displayables = array( 
     948            '/%0[0-8]/', '/[\x00-\x08]/',           // 00-08 
     949            '/%11/', '/\x0b/', '/%12/', '/\x0c/',   // 11, 12 
     950            '/%1[4-9]/', '/%2[0-9]/', '/%3[0-1]/',  // url encoded 14-31 
     951            '/[\x0e-\x1f]/');                       // 14-31 
     952    } 
     953    do { 
     954        $cleaned = $str; 
     955        $str = preg_replace($non_displayables, '', $str); 
     956    } 
     957    while ($cleaned != $str); 
     958 
     959    return $str; 
     960} 
     961 
     962// Compact Exploded Words 
     963function _compact_exploded_words($matches) { 
     964 
     965    return preg_replace('/\s+/s', '', $matches[1]).$matches[2]; 
     966} 
     967 
     968// Sanitize Naughty HTML 
     969function _sanitize_naughty_html($matches) { 
     970 
     971    // encode opening brace 
     972    $str = '&lt;'.$matches[1].$matches[2].$matches[3]; 
     973 
     974    // encode captured opening or closing brace to prevent recursive vectors 
     975    $str .= str_replace(array('>', '<'), array('&gt;', '&lt;'), $matches[4]); 
     976 
     977    return $str; 
     978} 
     979 
     980// JS Link Removal 
     981function _js_link_removal($match) { 
     982    $attributes = _filter_attributes(str_replace(array('<', '>'), '', $match[1])); 
     983    return str_replace($match[1], preg_replace("#href=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]); 
     984} 
     985 
     986// JS Image Removal 
     987function _js_img_removal($match) { 
     988    $attributes = _filter_attributes(str_replace(array('<', '>'), '', $match[1])); 
     989    return str_replace($match[1], preg_replace("#src=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]); 
     990} 
     991 
     992// Attribute Conversion 
     993function _convert_attribute($match) { 
     994    return str_replace(array('>', '<'), array('&gt;', '&lt;'), $match[0]); 
     995} 
     996 
     997// HTML Entity Decode Callback 
     998function _html_entity_decode_callback($match) { 
     999     
     1000    global $settings; 
     1001    return html_entity_decode($match[0], strtoupper($settings['charset'])); 
     1002} 
     1003 
     1004//  Filters tag attributes for consistency and safety 
     1005function _filter_attributes($str) { 
     1006 
     1007    $out = ''; 
     1008    if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches)) { 
     1009        foreach ($matches[0] as $match) { 
     1010            $out .= "{$match}"; 
     1011        }            
     1012    } 
     1013    return $out; 
     1014} 
    7821015?> 
Note: See TracChangeset for help on using the changeset viewer.