• R/O
  • HTTP
  • SSH
  • HTTPS

提交

标签
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

修订版ba2d3f43ff57f97907be9b5fdccef11aa6e30fda (tree)
时间2007-09-16 01:16:12
作者henoheno <henoheno>
Commiterhenoheno

Log Message

$Id: spam_pickup.php,v 1.60 2007/09/15 15:55:29 henoheno Exp $
* spam_uri_removing_hocus_pocus(): Remove/Replace quoted-spaces within tags
* [img][email]
* technorati.com/blogs/

更改概述

差异

--- a/lib/spam_pickup.php
+++ b/lib/spam_pickup.php
@@ -1,5 +1,5 @@
11 <?php
2-// $Id: spam_pickup.php,v 1.3 2007-08-26 14:27:02 henoheno Exp $
2+// $Id: spam_pickup.php,v 1.4 2007-09-15 16:16:12 henoheno Exp $
33 // Copyright (C) 2006-2007 PukiWiki Developers Team
44 // License: GPL v2 or (at your option) any later version
55 //
@@ -547,7 +547,7 @@ function area_pickup($string = '', $method = array())
547547 // [OK] [link]http://nasty.example.com/[/link]
548548 // [OK] [url=http://nasty.example.com]visit http://nasty.example.com/[/url]
549549 // [OK] [link http://nasty.example.com/]buy something[/link]
550- $regex = '#\[(url|link)\b[^\]]*\].*?\[/\1\b[^\]]*(\])#is';
550+ $regex = '#\[(url|link|img|email)\b[^\]]*\].*?\[/\1\b[^\]]*(\])#is';
551551 if (isset($method['area_bbcode'])) {
552552 $areas = array();
553553 $count = isset($method['asap']) ?
@@ -605,9 +605,12 @@ function area_measure($areas, & $array, $belief = -1, $a_key = 'area', $o_key =
605605 // ---------------------
606606 // Spam-uri pickup
607607
608-// Preprocess: Removing uninterest part for URI detection
608+// Preprocess: Removing/Modifying uninterest part for URI detection
609609 function spam_uri_removing_hocus_pocus($binary = '', $method = array())
610610 {
611+ $from = $to = array();
612+
613+ // Remove sequential spaces and too short lines
611614 $length = 4 ; // 'http'(1) and '://'(2) and 'fqdn'(1)
612615 if (is_array($method)) {
613616 // '<a'(2) or 'href='(5) or '>'(1) or '</a>'(4)
@@ -616,14 +619,17 @@ function spam_uri_removing_hocus_pocus($binary = '', $method = array())
616619 isset($method['area_bbcode']) || isset($method['uri_bbcode']))
617620 $length = 1; // Seems not effective
618621 }
619-
620- // Removing sequential spaces and too short lines
621622 $binary = strings($binary, $length, TRUE, FALSE); // Multibyte NOT needed
622623
624+ // Remove/Replace quoted-spaces within tags
625+ $from[] = '#(<\w+ [^<>]*?\w ?= ?")([^"<>]*? [^"<>]*)("[^<>]*?>)#ie';
626+ $to[] = "'$1' . str_replace(' ' , '%20' , trim('$2')) . '$3'";
627+
623628 // Remove words (has no '<>[]:') between spaces
624- $binary = preg_replace('/[ \t][\w.,()\ \t]+[ \t]/', ' ', $binary);
629+ $from[] = '/[ \t][\w.,()\ \t]+[ \t]/';
630+ $to[] = ' ';
625631
626- return $binary;
632+ return preg_replace($from, $to, $binary);
627633 }
628634
629635 // Preprocess: Domain exposure callback (See spam_uri_pickup_preprocess())
@@ -680,7 +686,6 @@ function spam_uri_pickup_preprocess($string = '', $method = array())
680686 );
681687
682688 $string = spam_uri_removing_hocus_pocus($string, $method);
683- //var_dump(htmlspecialchars($string));
684689
685690 // Domain exposure (simple)
686691 // http://victim.example.org/nasty.example.org/path#frag
@@ -697,7 +702,8 @@ function spam_uri_pickup_preprocess($string = '', $method = array())
697702 'big5.xinhuanet.com/gate/big5/' . '|' .
698703 'bhomiyo.com/en.xliterate/' . '|' .
699704 'google.com/translate_c\?u=(?:http://)?' . '|' .
700- 'web.archive.org/web/2[^/]*/(?:http://)?' .
705+ 'web.archive.org/web/2[^/]*/(?:http://)?' . '|' .
706+ 'technorati.com/blogs/' .
701707 ')' .
702708 '([a-z0-9.%_-]+\.[a-z0-9.%_-]+)' . // nasty.example.org
703709 '#i',