svnno****@sourc*****
svnno****@sourc*****
Sun Mar 2 03:51:37 JST 2008
Revision: 3431 http://svn.sourceforge.jp/cgi-bin/viewcvs.cgi?root=kazehakase&view=rev&rev=3431 Author: pal_gene Date: 2008-03-02 03:51:36 +0900 (Sun, 02 Mar 2008) Log Message: ----------- *Generalize search expression from tool-bar *Zenkaku white space convert (using UTF-8 Normalize and Composite) *Support 'site:' expression usage: * Only show the google.com URI from history that contain word "kazehakase" history-search:kazehakase site:google.com * Drop google.com URI from history that contain word "kazehakase" history-search:kazehakase -site:google.com Modified Paths: -------------- kazehakase/trunk/module/search/kz-hyper-estraier-search.c kazehakase/trunk/module/search/kz-search-common.h Modified: kazehakase/trunk/module/search/kz-hyper-estraier-search.c =================================================================== --- kazehakase/trunk/module/search/kz-hyper-estraier-search.c 2008-03-01 10:12:11 UTC (rev 3430) +++ kazehakase/trunk/module/search/kz-hyper-estraier-search.c 2008-03-01 18:51:36 UTC (rev 3431) @@ -258,15 +258,16 @@ G_OBJECT_CLASS(parent_class)->dispose(object); } + static gchar * create_search_result_html (KzSearch *search, const gchar *text) { ESTCOND *cond; CBLIST *highlights; int *results, n_results, i; - gchar *except_word, *tmp; + gchar *except_word, *tmp, *utf8; gchar **texts; - GString *html, *phrase, *desc_str; + GString *html, *phrase, *attr_uri_phrase, *desc_str; gint num_summary = 128, max_results = 20, half_of_summary; KzHyperEstraierSearch *he_search; @@ -279,54 +280,88 @@ cond = est_cond_new(); highlights = cblistopen(); - tmp = g_utf8_casefold(text, -1); - texts = g_strsplit(tmp, " ", -1); + /* convert from kz_conf except keyword */ + phrase = g_string_new(text); + except_word = KZ_CONF_GET_STR("History", "except_keyword"); + if (except_word && *except_word) + { + texts = g_strsplit(except_word, ",", -1); + g_free(except_word); + tmp = g_strjoinv(" -", texts); + g_strfreev(texts); + g_string_append(phrase, " -"); + g_string_append(phrase, tmp); + g_free(tmp); + } + utf8 = g_locale_to_utf8(phrase->str, -1, NULL, NULL, NULL); + g_string_free(phrase, TRUE); + tmp = g_utf8_normalize(utf8, -1, G_NORMALIZE_ALL_COMPOSE); + g_free(utf8); + utf8 = g_utf8_strdown(tmp, -1); g_free(tmp); + + /* start word split */ + texts = g_strsplit(utf8, " ", -1); + g_free(utf8); phrase = g_string_sized_new(0); for (i = 0; texts[i]; i++) { - if(*texts[i] == '-') + tmp = texts[i]; + guint flag = 0; + gint cond_i; + if(!tmp || !*tmp || g_unichar_isspace(*tmp)) continue; + /* extract supported condition [ALL KZ_HISTORY_SEARCH CODE] */ + for(cond_i = 0; cond_i < KZ_SEARCH_FLAG_SIZE; cond_i++) { - g_string_append(phrase, " " ESTOPDIFF " "); - g_string_append(phrase, texts[i]+1); + if(g_str_has_prefix(tmp, KZ_SEARCH_FLAGS[cond_i].exp)) + { + flag |= KZ_SEARCH_FLAGS[cond_i].mask; + tmp += strlen(KZ_SEARCH_FLAGS[cond_i].exp); + } } - else{ - cblistpush(highlights, texts[i], -1); - g_string_append(phrase, " " ESTOPISECT " "); - g_string_append(phrase, texts[i]); + + /* write out for search engine expression [ENGINE SPECIFIC CODE] */ + switch(flag & KZ_SEARCH_FLAG_GROUP_OPTION) { + case KZ_SEARCH_FLAG_MASK_SITE: + attr_uri_phrase = g_string_sized_new(0); + g_string_printf(attr_uri_phrase, + "%s %s %s", + ESTDATTRURI, + (flag & KZ_SEARCH_FLAG_MASK_NOT)?"!ISTRINC":"ISTRINC", + tmp); + est_cond_add_attr(cond, attr_uri_phrase->str); + g_string_free(attr_uri_phrase, TRUE); + break; + default: + if(flag & KZ_SEARCH_FLAG_MASK_NOT) + { + g_string_append(phrase, " " ESTOPDIFF " "); + } + else + { + g_string_append(phrase, " " ESTOPISECT " "); + cblistpush(highlights, tmp, -1); + } + g_string_append(phrase, tmp); } } g_strfreev(texts); - except_word = KZ_CONF_GET_STR("History", "except_keyword"); - if (except_word && *except_word) - { - tmp = g_utf8_casefold(except_word, -1); - g_free(except_word); - texts = g_strsplit(tmp, ",", -1); - g_free(tmp); - i = 0; - while (texts[i]) - { - g_string_append(phrase, " " ESTOPDIFF " "); - g_string_append(phrase, texts[i]); - i++; - } - g_strfreev(texts); - } - est_cond_set_phrase(cond, phrase->str); g_string_free(phrase, TRUE); + /* start actual search */ KZ_CONF_GET("History", "num_summary", num_summary, INT); KZ_CONF_GET("History", "max_results", max_results, INT); half_of_summary = num_summary / 2; results = est_mtdb_search(he_search->db, cond, &n_results, NULL); - desc_str = g_string_sized_new(num_summary*2 - 1); /* almost typical in num_summary*2 */ - html = g_string_sized_new((num_summary*2+512)*max_results); /* typical in num_summary*2*max_results */ + /* almost in typical num_summary*2 */ + desc_str = g_string_sized_new(num_summary*2 - 1); + /* in typical (num_summary*2+html_tags)*max_results */ + html = g_string_sized_new((num_summary*2+512)*max_results); g_string_append(html, DTD"\n"); g_string_append(html, "<html>\n"); Modified: kazehakase/trunk/module/search/kz-search-common.h =================================================================== --- kazehakase/trunk/module/search/kz-search-common.h 2008-03-01 10:12:11 UTC (rev 3430) +++ kazehakase/trunk/module/search/kz-search-common.h 2008-03-01 18:51:36 UTC (rev 3431) @@ -21,6 +21,8 @@ #ifndef __KZ_SEARCH_COMMON_H__ #define __KZ_SEARCH_COMMON_H__ +#include <glib.h> + G_BEGIN_DECLS #define DTD "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">" @@ -47,4 +49,30 @@ G_END_DECLS + +typedef struct { + unsigned int mask; + char *exp; +} KZ_SEARCH_FLAG; + +#define KZ_SEARCH_FLAG_GROUP_OPERATOR 0x0000000f +#define KZ_SEARCH_FLAG_MASK_NOT 0x00000001 +#define KZ_SEARCH_FLAG_GROUP_OPTION 0x000000f0 /* exclusive */ +#define KZ_SEARCH_FLAG_MASK_SITE 0x00000010 + +//NOTE: synchronize FLAG_NUMBER and FLAGS order +enum KZ_SEARCH_FLAG_NUMBER { + KZ_SEARCH_FLAG_NOT, + KZ_SEARCH_FLAG_SITE +}; + +KZ_SEARCH_FLAG KZ_SEARCH_FLAGS[] = { + { KZ_SEARCH_FLAG_MASK_NOT, "-"}, + { KZ_SEARCH_FLAG_MASK_SITE, "site:"} +}; + +#define KZ_SEARCH_FLAG_SIZE (sizeof(KZ_SEARCH_FLAGS)/sizeof(KZ_SEARCH_FLAG)) + + + #endif /* __KZ_SEARCH_COMMON_H__ */