Changeset 1861 in ExiteCMS for trunk/includes/core_functions.php
- Timestamp:
- 10/18/08 01:01:39 (4 years ago)
- File:
-
- 1 edited
-
trunk/includes/core_functions.php (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/includes/core_functions.php
r1845 r1861 35 35 unset($_loadtime); 36 36 37 // prevent possible XSS attacks via $_GET (uses pre v8 xss cleaning code). 38 $_GET = xss_clean($_GET); 39 37 40 // if register_globals is turned off, extract super globals (php 4.2.0+) 38 41 // TODO - WANWIZARD - 20070701 - NEED TO GET RID OF THIS !!! … … 40 43 if ((isset($_POST) == true) && (is_array($_POST) == true)) extract($_POST, EXTR_OVERWRITE); 41 44 if ((isset($_GET) == true) && (is_array($_GET) == true)) extract($_GET, EXTR_OVERWRITE); 42 } else { 43 // if not, unset all globals created by register_globals! 44 $rg = array_keys($_REQUEST); 45 foreach($rg as $var) { 46 if ($_REQUEST[$var] === $$var) { 47 // unset($$var); 48 } 49 } 50 } 51 52 // prevent any possible XSS attacks via $_GET. 53 foreach ($_GET as $check_url) { 54 // deal with array's in GET parameters 55 if (is_array($check_url)) { 56 foreach ($check_url as $url_parts) { 57 if ((eregi("<[^>]*script*\"?[^>]*>", $url_parts)) || (eregi("<[^>]*object*\"?[^>]*>", $url_parts)) || 58 (eregi("<[^>]*iframe*\"?[^>]*>", $url_parts)) || (eregi("<[^>]*applet*\"?[^>]*>", $url_parts)) || 59 (eregi("<[^>]*meta*\"?[^>]*>", $url_parts)) || (eregi("<[^>]*style*\"?[^>]*>", $url_parts)) || 60 (eregi("<[^>]*form*\"?[^>]*>", $url_parts)) || (eregi("\([^>]*\"?[^)]*\)", $url_parts))) { 61 die (); 62 } 63 } 64 } else { 65 if ((eregi("<[^>]*script*\"?[^>]*>", $check_url)) || (eregi("<[^>]*object*\"?[^>]*>", $check_url)) || 66 (eregi("<[^>]*iframe*\"?[^>]*>", $check_url)) || (eregi("<[^>]*applet*\"?[^>]*>", $check_url)) || 67 (eregi("<[^>]*meta*\"?[^>]*>", $check_url)) || (eregi("<[^>]*style*\"?[^>]*>", $check_url)) || 68 (eregi("<[^>]*form*\"?[^>]*>", $check_url)) || (eregi("\([^>]*\"?[^)]*\)", $check_url))) { 69 die (); 70 } 71 } 72 } 73 unset($check_url); 45 } 74 46 75 47 // disable the standard PHP include path (empty's not accepted?) … … 775 747 } 776 748 777 778 749 // replacement for die() 779 750 function terminate($text) { 780 751 die("<div style='font-family:Verdana,Sans-serif;font-size:11px;text-align:center;'>$text</div>"); 781 752 } 753 754 /*---------------------------------------------------+ 755 | XSS prevention functions, borrowed from the v8 code| 756 +---------------------------------------------------*/ 757 758 // check variables for xss attacks, and clean them 759 function xss_clean($str) { 760 761 static $never_allowed_str, $never_allowed_regex; 762 763 // never allowed, string replacement 764 if (!isset($never_allowed_str)) { 765 $never_allowed_str = array( 766 'document.cookie' => '[removed]', 767 'document.write' => '[removed]', 768 '.parentNode' => '[removed]', 769 '.innerHTML' => '[removed]', 770 'window.location' => '[removed]', 771 '-moz-binding' => '[removed]', 772 '<!--' => '<!--', 773 '-->' => '-->', 774 '<![CDATA[' => '<![CDATA[' 775 ); 776 } 777 778 // never allowed, regex replacement 779 if (!isset($never_allowed_regex)) { 780 $never_allowed_regex = array( 781 "javascript\s*:" => '[removed]', 782 "expression\s*\(" => '[removed]', // CSS and IE 783 "Redirect\s+302" => '[removed]' 784 ); 785 } 786 787 // is the argument passed an array? 788 if (is_array($str)) { 789 while (list($key) = each($str)) { 790 $str[$key] = xss_clean($str[$key]); 791 } 792 return $str; 793 } 794 795 // Remove Invisible Characters 796 $str = _remove_invisible_characters($str); 797 798 // Protect GET variables in URLs 799 $str = preg_replace('|\&([a-z\_0-9]+)\=([a-z\_0-9]+)|i', xss_hash()."\\1=\\2", $str); 800 801 // Validate standard character entities 802 // Add a semicolon if missing. We do this to enable the conversion of entities to ASCII later. 803 $str = preg_replace('#(&\#?[0-9a-z]+)[\x00-\x20]*;?#i', "\\1;", $str); 804 805 // Validate UTF16 two byte encoding (x00). Just as above, adds a semicolon if missing. 806 $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str); 807 808 // Un-Protect GET variables in URLs 809 $str = str_replace(xss_hash(), '&', $str); 810 811 // URL Decode 812 // Just in case stuff like this is submitted: 813 // <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a> 814 $str = rawurldecode($str); 815 816 // Convert character entities to ASCII 817 // This permits our tests below to work reliably. 818 // We only convert entities that are within tags since 819 // these are the ones that will pose security problems. 820 $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", '_convert_attribute', $str); 821 $str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", '_html_entity_decode_callback', $str); 822 823 // Remove Invisible Characters Again! 824 $str = _remove_invisible_characters($str); 825 826 // Convert all tabs to spaces 827 // This prevents strings like this: ja vascript 828 // NOTE: we deal with spaces between characters later. 829 // NOTE: preg_replace was found to be amazingly slow here on large blocks of data, 830 // so we use str_replace. 831 if (strpos($str, "\t") !== false) { 832 $str = str_replace("\t", ' ', $str); 833 } 834 835 // Capture converted string for later comparison 836 $converted_string = $str; 837 838 // Not Allowed Under Any Conditions 839 foreach ($never_allowed_str as $key => $val) { 840 $str = str_replace($key, $val, $str); 841 } 842 foreach ($never_allowed_regex as $key => $val) { 843 $str = preg_replace("#".$key."#i", $val, $str); 844 } 845 846 // Makes PHP tags safe 847 // Note: XML tags are inadvertently replaced too: 848 // <?xml 849 // But it doesn't seem to pose a problem. 850 $str = str_replace(array('<?php', '<?PHP', '<?', '?'.'>'), array('<?php', '<?PHP', '<?', '?>'), $str); 851 852 // Compact any exploded words 853 // This corrects words like: j a v a s c r i p t 854 // These words are compacted back to their correct state. 855 $words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window'); 856 foreach ($words as $word) { 857 $temp = ''; 858 for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++) { 859 $temp .= substr($word, $i, 1)."\s*"; 860 } 861 // We only want to do this when it is followed by a non-word character 862 // That way valid stuff like "dealer to" does not become "dealerto" 863 $str = preg_replace_callback('#('.substr($temp, 0, -3).')(\W)#is', '_compact_exploded_words', $str); 864 } 865 866 // Remove disallowed Javascript in links or img tags 867 // We used to do some version comparisons and use of stripos for PHP5, but it is dog slow compared 868 // to these simplified non-capturing preg_match(), especially if the pattern exists in the string 869 do { 870 $original = $str; 871 if (preg_match("/<a/i", $str)) { 872 $str = preg_replace_callback("#<a\s*([^>]*?)(>|$)#si", '_js_link_removal', $str); 873 } 874 if (preg_match("/<img/i", $str)) { 875 $str = preg_replace_callback("#<img\s*([^>]*?)(>|$)#si", '_js_img_removal', $str); 876 } 877 if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str)) { 878 $str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str); 879 } 880 } 881 while($original != $str); 882 883 unset($original); 884 885 // Remove JavaScript Event Handlers 886 // Note: This code is a little blunt. It removes 887 // the event handler and anything up to the closing >, 888 // but it's unlikely to be a problem. 889 $event_handlers = array('on\w*','xmlns'); 890 $str = preg_replace("#<([^><]+)(".implode('|', $event_handlers).")(\s*=\s*[^><]*)([><]*)#i", "<\\1\\4", $str); 891 892 // Sanitize naughty HTML elements 893 // If a tag containing any of the words in the list 894 // below is found, the tag gets converted to entities. 895 // So this: <blink> Becomes: <blink> 896 $naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss'; 897 $str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', '_sanitize_naughty_html', $str); 898 899 // Sanitize naughty scripting elements 900 // Similar to above, only instead of looking for 901 // tags it looks for PHP and JavaScript commands 902 // that are disallowed. Rather than removing the 903 // code, it simply converts the parenthesis to entities 904 // rendering the code un-executable. 905 // For example: eval('some code') Becomes: eval('some code') 906 $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2(\\3)", $str); 907 908 // Final clean up 909 // This adds a bit of extra precaution in case 910 // something got through the above filters 911 foreach ($never_allowed_str as $key => $val) { 912 $str = str_replace($key, $val, $str); 913 } 914 foreach ($never_allowed_regex as $key => $val) { 915 $str = preg_replace("#".$key."#i", $val, $str); 916 } 917 918 // return the result 919 return $str; 920 } 921 922 // Random Hash for protecting URLs 923 function xss_hash() { 924 925 static $xss_hash; 926 927 if (!isset($xss_hash)) { 928 if (phpversion() >= 4.2) { 929 mt_srand(); 930 } else { 931 mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff); 932 } 933 $xss_hash = md5(time() + mt_rand(0, 1999999999)); 934 } 935 936 return $xss_hash; 937 } 938 939 // Remove Invisible Characters 940 function _remove_invisible_characters($str) { 941 942 static $non_displayables; 943 944 if (!isset($non_displayables)) { 945 // every control character except newline (10), carriage return (13), and horizontal tab (09), 946 // both as a URL encoded character (::shakes fist at IE and WebKit::), and the actual character 947 $non_displayables = array( 948 '/%0[0-8]/', '/[\x00-\x08]/', // 00-08 949 '/%11/', '/\x0b/', '/%12/', '/\x0c/', // 11, 12 950 '/%1[4-9]/', '/%2[0-9]/', '/%3[0-1]/', // url encoded 14-31 951 '/[\x0e-\x1f]/'); // 14-31 952 } 953 do { 954 $cleaned = $str; 955 $str = preg_replace($non_displayables, '', $str); 956 } 957 while ($cleaned != $str); 958 959 return $str; 960 } 961 962 // Compact Exploded Words 963 function _compact_exploded_words($matches) { 964 965 return preg_replace('/\s+/s', '', $matches[1]).$matches[2]; 966 } 967 968 // Sanitize Naughty HTML 969 function _sanitize_naughty_html($matches) { 970 971 // encode opening brace 972 $str = '<'.$matches[1].$matches[2].$matches[3]; 973 974 // encode captured opening or closing brace to prevent recursive vectors 975 $str .= str_replace(array('>', '<'), array('>', '<'), $matches[4]); 976 977 return $str; 978 } 979 980 // JS Link Removal 981 function _js_link_removal($match) { 982 $attributes = _filter_attributes(str_replace(array('<', '>'), '', $match[1])); 983 return str_replace($match[1], preg_replace("#href=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]); 984 } 985 986 // JS Image Removal 987 function _js_img_removal($match) { 988 $attributes = _filter_attributes(str_replace(array('<', '>'), '', $match[1])); 989 return str_replace($match[1], preg_replace("#src=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]); 990 } 991 992 // Attribute Conversion 993 function _convert_attribute($match) { 994 return str_replace(array('>', '<'), array('>', '<'), $match[0]); 995 } 996 997 // HTML Entity Decode Callback 998 function _html_entity_decode_callback($match) { 999 1000 global $settings; 1001 return html_entity_decode($match[0], strtoupper($settings['charset'])); 1002 } 1003 1004 // Filters tag attributes for consistency and safety 1005 function _filter_attributes($str) { 1006 1007 $out = ''; 1008 if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches)) { 1009 foreach ($matches[0] as $match) { 1010 $out .= "{$match}"; 1011 } 1012 } 1013 return $out; 1014 } 782 1015 ?>
Note: See TracChangeset
for help on using the changeset viewer.
