[Groonga-commit] groonga/groonga at 7d11c2f [master] tokenize: add valid tokenizer check

Back to archive index

Kouhei Sutou null+****@clear*****
Thu Jun 5 23:34:56 JST 2014


Kouhei Sutou	2014-06-05 23:34:56 +0900 (Thu, 05 Jun 2014)

  New Revision: 7d11c2f6960f244ed0ed5c0408dd80beed668fa2
  https://github.com/groonga/groonga/commit/7d11c2f6960f244ed0ed5c0408dd80beed668fa2

  Message:
    tokenize: add valid tokenizer check

  Added files:
    test/command/suite/tokenize/invalid/tokenizer/invalid.expected
    test/command/suite/tokenize/invalid/tokenizer/invalid.test
    test/command/suite/tokenize/invalid/tokenizer/nonexistent.expected
    test/command/suite/tokenize/invalid/tokenizer/nonexistent.test
  Removed files:
    test/command/suite/tokenize/invalid/tokenizer/unknown.expected
    test/command/suite/tokenize/invalid/tokenizer/unknown.test
  Modified files:
    lib/proc.c

  Modified: lib/proc.c (+28 -1)
===================================================================
--- lib/proc.c    2014-06-05 23:30:25 +0900 (c4fb1ea)
+++ lib/proc.c    2014-06-05 23:34:56 +0900 (54adab5)
@@ -2958,6 +2958,20 @@ is_normalizer(grn_ctx *ctx, grn_obj *object)
   return GRN_TRUE;
 }
 
+static grn_bool
+is_tokenizer(grn_ctx *ctx, grn_obj *object)
+{
+  if (object->header.type != GRN_PROC) {
+    return GRN_FALSE;
+  }
+
+  if (grn_proc_get_type(ctx, object) != GRN_PROC_TOKENIZER) {
+    return GRN_FALSE;
+  }
+
+  return GRN_TRUE;
+}
+
 static const char *
 char_type_name(grn_char_type type)
 {
@@ -3190,12 +3204,25 @@ create_lexicon_for_tokenize(grn_ctx *ctx,
                           GRN_TEXT_LEN(tokenizer_name));
   if (!tokenizer) {
     ERR(GRN_INVALID_ARGUMENT,
-        "[tokenize] unknown tokenizer: <%.*s>",
+        "[tokenize] nonexistent tokenizer: <%.*s>",
         (int)GRN_TEXT_LEN(tokenizer_name),
         GRN_TEXT_VALUE(tokenizer_name));
     return NULL;
   }
 
+  if (!is_tokenizer(ctx, tokenizer)) {
+    grn_obj inspected;
+    GRN_TEXT_INIT(&inspected, 0);
+    grn_inspect(ctx, &inspected, tokenizer);
+    ERR(GRN_INVALID_ARGUMENT,
+        "[tokenize] not tokenizer: %.*s",
+        (int)GRN_TEXT_LEN(&inspected),
+        GRN_TEXT_VALUE(&inspected));
+    GRN_OBJ_FIN(ctx, &inspected);
+    grn_obj_unlink(ctx, tokenizer);
+    return NULL;
+  }
+
   if (GRN_TEXT_LEN(normalizer_name) > 0) {
     normalizer = grn_ctx_get(ctx,
                              GRN_TEXT_VALUE(normalizer_name),

  Added: test/command/suite/tokenize/invalid/tokenizer/invalid.expected (+15 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenize/invalid/tokenizer/invalid.expected    2014-06-05 23:34:56 +0900 (23fe362)
@@ -0,0 +1,15 @@
+tokenize NormalizerAuto "aBcDe 123"
+[
+  [
+    [
+      -22,
+      0.0,
+      0.0
+    ],
+    "[tokenize] not tokenizer: #<proc:normalizer NormalizerAuto arguments:[$1]>"
+  ],
+  [
+
+  ]
+]
+#|e| [tokenize] not tokenizer: #<proc:normalizer NormalizerAuto arguments:[$1]>

  Added: test/command/suite/tokenize/invalid/tokenizer/invalid.test (+1 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenize/invalid/tokenizer/invalid.test    2014-06-05 23:34:56 +0900 (cbba396)
@@ -0,0 +1 @@
+tokenize NormalizerAuto "aBcDe 123"

  Added: test/command/suite/tokenize/invalid/tokenizer/nonexistent.expected (+3 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenize/invalid/tokenizer/nonexistent.expected    2014-06-05 23:34:56 +0900 (ee16c98)
@@ -0,0 +1,3 @@
+tokenize TokenNonexistent "aBcDe 123"
+[[[-22,0.0,0.0],"[tokenize] nonexistent tokenizer: <TokenNonexistent>"],[]]
+#|e| [tokenize] nonexistent tokenizer: <TokenNonexistent>

  Added: test/command/suite/tokenize/invalid/tokenizer/nonexistent.test (+1 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenize/invalid/tokenizer/nonexistent.test    2014-06-05 23:34:56 +0900 (19a692f)
@@ -0,0 +1 @@
+tokenize TokenNonexistent "aBcDe 123"

  Deleted: test/command/suite/tokenize/invalid/tokenizer/unknown.expected (+0 -3) 100644
===================================================================
--- test/command/suite/tokenize/invalid/tokenizer/unknown.expected    2014-06-05 23:30:25 +0900 (edd2634)
+++ /dev/null
@@ -1,3 +0,0 @@
-tokenize TokenUnknown "aBcDe 123"
-[[[-22,0.0,0.0],"[tokenize] unknown tokenizer: <TokenUnknown>"],[]]
-#|e| [tokenize] unknown tokenizer: <TokenUnknown>

  Deleted: test/command/suite/tokenize/invalid/tokenizer/unknown.test (+0 -1) 100644
===================================================================
--- test/command/suite/tokenize/invalid/tokenizer/unknown.test    2014-06-05 23:30:25 +0900 (40dd10c)
+++ /dev/null
@@ -1 +0,0 @@
-tokenize TokenUnknown "aBcDe 123"
-------------- next part --------------
HTML����������������������������...
下载 



More information about the Groonga-commit mailing list
Back to archive index