• R/O
  • HTTP
  • SSH
  • HTTPS

提交

标签
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

修订版1cca23a1aa055699937396aa3f773124b9d3fc79 (tree)
时间2007-08-26 23:27:02
作者henoheno <henoheno>
Commiterhenoheno

Log Message

$Id: spam_pickup.php,v 1.57 2007/08/26 14:22:16 henoheno Exp $
spam_uri_pickup_preprocess(): not to rawurldecode(), not to decode '%20'
spam_uri_pickup_preprocess(): Added some

更改概述

差异

--- a/lib/spam_pickup.php
+++ b/lib/spam_pickup.php
@@ -1,5 +1,5 @@
11 <?php
2-// $Id: spam_pickup.php,v 1.2 2007-08-18 14:47:13 henoheno Exp $
2+// $Id: spam_pickup.php,v 1.3 2007-08-26 14:27:02 henoheno Exp $
33 // Copyright (C) 2006-2007 PukiWiki Developers Team
44 // License: GPL v2 or (at your option) any later version
55 //
@@ -23,7 +23,7 @@ function uri_pickup($string = '')
2323 preg_match_all(
2424 // scheme://userinfo@host:port/path/or/pathinfo/maybefile.and?query=string#fragment
2525 // Refer RFC3986 (Regex below is not strict)
26- '#(\b[a-z][a-z0-9.+-]{1,8}):[/\\\]+' . // 1: Scheme
26+ '#(\b[a-z][a-z0-9.+-]{1,8}):[/\\\]+' . // 1: Scheme
2727 '(?:' .
2828 '([^\s<>"\'\[\]/\#?@]*)' . // 2: Userinfo (Username)
2929 '@)?' .
@@ -31,7 +31,7 @@ function uri_pickup($string = '')
3131 // 3: Host
3232 '\[[0-9a-f:.]+\]' . '|' . // IPv6([colon-hex and dot]): RFC2732
3333 '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' . // IPv4(dot-decimal): 001.22.3.44
34- '[a-z0-9_-][a-z0-9_.-]+[a-z0-9_-]' . // hostname(FQDN) : foo.example.org
34+ '[a-z0-9_-][a-z0-9_.-]+[a-z0-9_-]' . // hostname(FQDN) : foo.example.org
3535 ')' .
3636 '(?::([0-9]*))?' . // 4: Port
3737 '((?:/+[^\s<>"\'\[\]/\#]+)*/+)?' . // 5: Directory path or path-info
@@ -115,7 +115,6 @@ function uri_pickup_implode($uri = array())
115115 return implode('', $tmp);
116116 }
117117
118-
119118 // ---------------------
120119 // URI normalization
121120
@@ -656,7 +655,7 @@ function _preg_replace_callback_domain_exposure($matches = array())
656655 return $result;
657656 }
658657
659-// Preprocess: rawurldecode() and adding space(s) and something
658+// Preprocess: minor-rawurldecode() and adding space(s) and something
660659 // to detect/count some URIs _if possible_
661660 // NOTE: It's maybe danger to var_dump(result). [e.g. 'javascript:']
662661 // [OK] http://victim.example.org/?site:nasty.example.org
@@ -667,7 +666,20 @@ function spam_uri_pickup_preprocess($string = '', $method = array())
667666 {
668667 if (! is_string($string)) return '';
669668
670- $string = spam_uri_removing_hocus_pocus(rawurldecode($string), $method);
669+ // rawurldecode(), just to catch encoded 'http://path/to/file', not to change '%20' to ' '
670+ $string = strtr(
671+ $string,
672+ array(
673+ '%3A' => ':',
674+ '%3a' => ':',
675+ '%2F' => '/',
676+ '%2f' => '/',
677+ '%5C' => '\\',
678+ '%5c' => '\\',
679+ )
680+ );
681+
682+ $string = spam_uri_removing_hocus_pocus($string, $method);
671683 //var_dump(htmlspecialchars($string));
672684
673685 // Domain exposure (simple)
@@ -676,15 +688,16 @@ function spam_uri_pickup_preprocess($string = '', $method = array())
676688 $string = preg_replace(
677689 '#h?ttp://' .
678690 '(' .
679- 'ime\.nu/' . '|' . // 2ch.net
680- 'ime\.st/' . '|' . // 2ch.net
691+ 'ime\.(?:nu|st)/' . '|' . // 2ch.net
681692 'link\.toolbot\.com/' . '|' .
682693 'urlx\.org/' . '|' .
683694 'big5.51job.com/gate/big5/' . '|' .
684695 'big5.china.com/gate/big5/' . '|' .
696+ 'big5.shippingchina.com:8080/' . '|' .
685697 'big5.xinhuanet.com/gate/big5/' . '|' .
686698 'bhomiyo.com/en.xliterate/' . '|' .
687- 'google.com/translate_c\?u=(?:http://)?' .
699+ 'google.com/translate_c\?u=(?:http://)?' . '|' .
700+ 'web.archive.org/web/2[^/]*/(?:http://)?' .
688701 ')' .
689702 '([a-z0-9.%_-]+\.[a-z0-9.%_-]+)' . // nasty.example.org
690703 '#i',
@@ -711,7 +724,7 @@ function spam_uri_pickup_preprocess($string = '', $method = array())
711724 ')' .
712725 '/' .
713726 '([a-z0-9?=&.%_/\'\\\+-]+)' . // 3:path/?query=foo+bar+
714- '\bsite:([a-z0-9.%_-]+\.[a-z0-9.%_-]+)' . // 4:site:nasty.example.com
727+ '(?:\b|%20)site:([a-z0-9.%_-]+\.[a-z0-9.%_-]+)' . // 4:site:nasty.example.com
715728 '()' . // 5:Preserve or remove?
716729 '#i',
717730 ),