修订版 | 24d41c08b07aad809d778766c26d41217759727a (tree) |
---|---|
时间 | 2017-09-20 00:35:36 |
作者 | umorigu <umorigu@gmai...> |
Commiter | umorigu |
Improve author line related proc / Speed up get_source raw
@@ -15,9 +15,16 @@ define('PKWK_MAXSHOW_CACHE', 'recent.dat'); | ||
15 | 15 | // AutoLink |
16 | 16 | define('PKWK_AUTOLINK_REGEX_CACHE', 'autolink.dat'); |
17 | 17 | |
18 | -// Get source(wiki text) data of the page | |
19 | -// Returns FALSE if error occurerd | |
20 | -function get_source($page = NULL, $lock = TRUE, $join = FALSE) | |
18 | +/** | |
19 | + * Get source(wiki text) data of the page | |
20 | + * | |
21 | + * @param $page page name | |
22 | + * @param $lock lock | |
23 | + * @param $join true: return string, false: return array of string | |
24 | + * @param $raw true: return file content as-is | |
25 | + * @return FALSE if error occurerd | |
26 | + */ | |
27 | +function get_source($page = NULL, $lock = TRUE, $join = FALSE, $raw = FALSE) | |
21 | 28 | { |
22 | 29 | //$result = NULL; // File is not found |
23 | 30 | $result = $join ? '' : array(); |
@@ -44,6 +51,9 @@ function get_source($page = NULL, $lock = TRUE, $join = FALSE) | ||
44 | 51 | } else { |
45 | 52 | $result = fread($fp, $size); |
46 | 53 | if ($result !== FALSE) { |
54 | + if ($raw) { | |
55 | + return $result; | |
56 | + } | |
47 | 57 | // Removing Carriage-Return |
48 | 58 | $result = str_replace("\r", '', $result); |
49 | 59 | } |
@@ -204,16 +214,54 @@ function remove_author_info($wikitext) | ||
204 | 214 | return preg_replace('/^\s*#author\([^\n]*(\n|$)/m', '', $wikitext); |
205 | 215 | } |
206 | 216 | |
207 | -function remove_author_lines($lines) | |
217 | +/** | |
218 | + * Remove author line from wikitext | |
219 | + */ | |
220 | +function remove_author_header($wikitext) | |
208 | 221 | { |
209 | - $author_head = '#author('; | |
210 | - $len = strlen($author_head); | |
211 | - for ($i = 0; $i < 5; $i++) { | |
212 | - if (substr($lines[$i], 0, $len) === $author_head) { | |
213 | - unset($lines[$i]); | |
222 | + $start = 0; | |
223 | + while (($pos = strpos($wikitext, "\n", $start)) != false) { | |
224 | + $line = substr($wikitext, $start, $pos); | |
225 | + $m = null; | |
226 | + if (preg_match('/^#author\(/', $line, $m)) { | |
227 | + // fond #author line, Remove this line only | |
228 | + if ($start === 0) { | |
229 | + return substr($wikitext, $pos + 1); | |
230 | + } else { | |
231 | + return substr($wikitext, 0, $start - 1) . | |
232 | + substr($wikitext, $pos + 1); | |
233 | + } | |
234 | + } else if (preg_match('/^#freeze(\W|$)/', $line, $m)) { | |
235 | + // Found #freeze still in header | |
236 | + } else { | |
237 | + // other line, #author not found | |
238 | + return $wikitext; | |
239 | + } | |
240 | + $start = $pos + 1; | |
241 | + } | |
242 | + return $wikitext; | |
243 | +} | |
244 | + | |
245 | +/** | |
246 | + * Get author info from wikitext | |
247 | + */ | |
248 | +function get_author_info($wikitext) | |
249 | +{ | |
250 | + $start = 0; | |
251 | + while (($pos = strpos($wikitext, "\n", $start)) != false) { | |
252 | + $line = substr($wikitext, $start, $pos); | |
253 | + $m = null; | |
254 | + if (preg_match('/^#author\(/', $line, $m)) { | |
255 | + return $line; | |
256 | + } else if (preg_match('/^#freeze(\W|$)/', $line, $m)) { | |
257 | + // Found #freeze still in header | |
258 | + } else { | |
259 | + // other line, #author not found | |
260 | + return false; | |
214 | 261 | } |
262 | + $start = $pos + 1; | |
215 | 263 | } |
216 | - return $lines; | |
264 | + return false; | |
217 | 265 | } |
218 | 266 | |
219 | 267 | function get_date_atom($timestamp) |
@@ -346,8 +346,8 @@ function do_search($word, $type = 'AND', $non_format = FALSE, $base = '') | ||
346 | 346 | |
347 | 347 | // Search for page contents |
348 | 348 | foreach ($keys as $key) { |
349 | - $lines = remove_author_lines(get_source($page, TRUE, FALSE)); | |
350 | - $b_match = preg_match($key, join('', $lines)); | |
349 | + $body = get_source($page, TRUE, TRUE, TRUE); | |
350 | + $b_match = preg_match($key, remove_author_header($body)); | |
351 | 351 | if ($b_type xor $b_match) break; // OR |
352 | 352 | } |
353 | 353 | if ($b_match) continue; |
@@ -266,8 +266,8 @@ function links_do_search_page($word) | ||
266 | 266 | $b_match = FALSE; |
267 | 267 | // Search for page contents |
268 | 268 | foreach ($keys as $key) { |
269 | - $lines = remove_author_lines(get_source($page, TRUE, FALSE)); | |
270 | - $b_match = preg_match($key, join('', $lines)); | |
269 | + $body = get_source($page, TRUE, TRUE, TRUE); | |
270 | + $b_match = preg_match($key, remove_author_header($body)); | |
271 | 271 | if (! $b_match) break; // OR |
272 | 272 | } |
273 | 273 | if ($b_match) continue; |
@@ -106,10 +106,8 @@ function plugin_search2_do_search($query_text, $base, $start_index) | ||
106 | 106 | $saved_scan_start_index = $scan_page_index; |
107 | 107 | } |
108 | 108 | // Search for page name and contents |
109 | - $raw_lines = get_source($page, TRUE, FALSE); | |
110 | - $lines = remove_author_lines($raw_lines); | |
111 | - $body = join('', $raw_lines); | |
112 | - $target = $page . '\n' . join('', $lines); | |
109 | + $body = get_source($page, TRUE, TRUE, TRUE); | |
110 | + $target = $page . "\n" . remove_author_header($body); | |
113 | 111 | foreach ($keys as $key) { |
114 | 112 | $b_match = preg_match($key, $target); |
115 | 113 | if ($b_type_and xor $b_match) break; // OR |
@@ -123,7 +121,7 @@ function plugin_search2_do_search($query_text, $base, $start_index) | ||
123 | 121 | 'pagename_only' => 1); |
124 | 122 | } else { |
125 | 123 | $found_pages[] = array('name' => (string)$page, |
126 | - 'url' => get_page_uri($page), 'body' => (string)$body); | |
124 | + 'url' => get_page_uri($page), 'body' => (string)$body); | |
127 | 125 | } |
128 | 126 | } |
129 | 127 | $last_read_page_name = $page; |