[Groonga-commit] groonga/groonga [master] Add grn_tokenizer_have_delimiter()

Back to archive index

Kouhei Sutou null+****@clear*****
Fri Nov 9 13:56:02 JST 2012


Kouhei Sutou	2012-11-09 13:56:02 +0900 (Fri, 09 Nov 2012)

  New Revision: ba518d1e20a3bf85b596b717885604b6a80e1121
  https://github.com/groonga/groonga/commit/ba518d1e20a3bf85b596b717885604b6a80e1121

  Log:
    Add grn_tokenizer_have_delimiter()

  Modified files:
    include/groonga/tokenizer.h
    lib/tokenizer.c
    test/unit/core/test-tokenizer.c

  Modified: include/groonga/tokenizer.h (+10 -0)
===================================================================
--- include/groonga/tokenizer.h    2012-11-09 13:38:10 +0900 (0d0a899)
+++ include/groonga/tokenizer.h    2012-11-09 13:56:02 +0900 (37f9147)
@@ -54,6 +54,16 @@ grn_bool grn_tokenizer_is_delimiter(grn_ctx *ctx,
                                     grn_encoding encoding);
 
 /*
+  grn_tokenizer_have_delimiter() returns whether is there the special
+  delimiter character in the string specified by `str_ptr' and
+  `str_length' the special delimiter character or not.
+ */
+grn_bool grn_tokenizer_have_delimiter(grn_ctx *ctx,
+                                      const char *str_ptr,
+                                      unsigned int str_length,
+                                      grn_encoding encoding);
+
+/*
   grn_tokenizer_query is a structure for storing a query. See the following
   functions.
  */

  Modified: lib/tokenizer.c (+25 -0)
===================================================================
--- lib/tokenizer.c    2012-11-09 13:38:10 +0900 (061b7f6)
+++ lib/tokenizer.c    2012-11-09 13:56:02 +0900 (1b9085e)
@@ -100,6 +100,31 @@ grn_tokenizer_is_delimiter(grn_ctx *ctx, const char *str_ptr,
     binary_string[2] == 0xBE;
 }
 
+grn_bool
+grn_tokenizer_have_delimiter(grn_ctx *ctx, const char *str_ptr,
+                             unsigned int str_length, grn_encoding encoding)
+{
+  int char_length;
+  const char *current = str_ptr;
+  const char *end = str_ptr + str_length;
+
+  if (encoding != GRN_ENC_UTF8) {
+    return GRN_FALSE;
+  }
+
+  if (str_length == 0) {
+    return GRN_FALSE;
+  }
+
+  while ((char_length = grn_charlen_(ctx, current, end, encoding)) > 0) {
+    if (grn_tokenizer_is_delimiter(ctx, current, char_length, encoding)) {
+      return GRN_TRUE;
+    }
+    current += char_length;
+  }
+  return GRN_FALSE;
+}
+
 grn_tokenizer_query *
 grn_tokenizer_query_create(grn_ctx *ctx, int num_args, grn_obj **args)
 {

  Modified: test/unit/core/test-tokenizer.c (+38 -0)
===================================================================
--- test/unit/core/test-tokenizer.c    2012-11-09 13:38:10 +0900 (433ec5c)
+++ test/unit/core/test-tokenizer.c    2012-11-09 13:56:02 +0900 (85da507)
@@ -25,6 +25,8 @@
 
 void data_is_delimiter(void);
 void test_is_delimiter(gconstpointer data);
+void data_have_delimiter(void);
+void test_have_delimiter(gconstpointer data);
 
 static grn_ctx context;
 static grn_obj *db;
@@ -85,3 +87,39 @@ test_is_delimiter(gconstpointer data)
                                                 encoding));
   }
 }
+
+void
+data_have_delimiter(void)
+{
+#define ADD_DATUM(label, expected, input)                               \
+  gcut_add_datum(label,                                                 \
+                 "expected", G_TYPE_BOOLEAN, expected,                  \
+                 "input",    G_TYPE_STRING,  input,                     \
+                 NULL)
+
+#define UFFFE_IN_UTF8 "\xef\xbf\xbe"
+
+  ADD_DATUM("have",     GRN_TRUE,  "a" UFFFE_IN_UTF8 "b");
+  ADD_DATUM("not have", GRN_FALSE, "ab");
+
+#undef UFFFE_IN_UTF8
+
+#undef ADD_DATUM
+}
+
+void
+test_have_delimiter(gconstpointer data)
+{
+  const gchar *input;
+  grn_encoding encoding = GRN_ENC_UTF8;
+
+  GRN_CTX_SET_ENCODING(&context, encoding);
+  input = gcut_data_get_string(data, "input");
+  if (gcut_data_get_boolean(data, "expected")) {
+    cut_assert_true(grn_tokenizer_have_delimiter(&context, input, strlen(input),
+                                                 encoding));
+  } else {
+    cut_assert_false(grn_tokenizer_have_delimiter(&context, input, strlen(input),
+                                                  encoding));
+  }
+}
-------------- next part --------------
HTML����������������������������...
下载 



More information about the Groonga-commit mailing list
Back to archive index