[Groonga-commit] groonga/groonga at d0873c7 [master] tokenize: add valid normalizer check

Back to archive index

Kouhei Sutou null+****@clear*****
Thu Jun 5 23:30:25 JST 2014


Kouhei Sutou	2014-06-05 23:30:25 +0900 (Thu, 05 Jun 2014)

  New Revision: d0873c7056b5a98d704569bbf82daaa5af1865bc
  https://github.com/groonga/groonga/commit/d0873c7056b5a98d704569bbf82daaa5af1865bc

  Message:
    tokenize: add valid normalizer check

  Added files:
    test/command/suite/tokenize/invalid/normalizer/invalid.expected
    test/command/suite/tokenize/invalid/normalizer/invalid.test
    test/command/suite/tokenize/invalid/normalizer/nonexistent.expected
    test/command/suite/tokenize/invalid/normalizer/nonexistent.test
  Removed files:
    test/command/suite/tokenize/invalid/normalizer/unknown.expected
    test/command/suite/tokenize/invalid/normalizer/unknown.test
  Modified files:
    lib/proc.c

  Modified: lib/proc.c (+14 -1)
===================================================================
--- lib/proc.c    2014-06-05 23:25:14 +0900 (3f86c7f)
+++ lib/proc.c    2014-06-05 23:30:25 +0900 (c4fb1ea)
@@ -3203,11 +3203,24 @@ create_lexicon_for_tokenize(grn_ctx *ctx,
     if (!normalizer) {
       grn_obj_unlink(ctx, tokenizer);
       ERR(GRN_INVALID_ARGUMENT,
-          "[tokenize] unknown normalizer: <%.*s>",
+          "[tokenize] nonexistent normalizer: <%.*s>",
           (int)GRN_TEXT_LEN(normalizer_name),
           GRN_TEXT_VALUE(normalizer_name));
       return NULL;
     }
+
+    if (!is_normalizer(ctx, normalizer)) {
+      grn_obj inspected;
+      grn_obj_unlink(ctx, tokenizer);
+      GRN_TEXT_INIT(&inspected, 0);
+      grn_inspect(ctx, &inspected, normalizer);
+      ERR(GRN_INVALID_ARGUMENT,
+          "[tokenize] not normalizer: %.*s",
+          (int)GRN_TEXT_LEN(&inspected),
+          GRN_TEXT_VALUE(&inspected));
+      GRN_OBJ_FIN(ctx, &inspected);
+      return NULL;
+    }
   }
 
   lexicon = grn_hash_create(ctx, NULL, GRN_TABLE_MAX_KEY_SIZE, 0,

  Added: test/command/suite/tokenize/invalid/normalizer/invalid.expected (+15 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenize/invalid/normalizer/invalid.expected    2014-06-05 23:30:25 +0900 (e14ae9c)
@@ -0,0 +1,15 @@
+tokenize TokenBigram "aBcDe 123" TokenDelimit
+[
+  [
+    [
+      -22,
+      0.0,
+      0.0
+    ],
+    "[tokenize] not normalizer: #<proc:tokenizer TokenDelimit arguments:[$1, $2, $3]>"
+  ],
+  [
+
+  ]
+]
+#|e| [tokenize] not normalizer: #<proc:tokenizer TokenDelimit arguments:[$1, $2, $3]>

  Added: test/command/suite/tokenize/invalid/normalizer/invalid.test (+1 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenize/invalid/normalizer/invalid.test    2014-06-05 23:30:25 +0900 (103a497)
@@ -0,0 +1 @@
+tokenize TokenBigram "aBcDe 123" TokenDelimit

  Added: test/command/suite/tokenize/invalid/normalizer/nonexistent.expected (+15 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenize/invalid/normalizer/nonexistent.expected    2014-06-05 23:30:25 +0900 (7e8f06b)
@@ -0,0 +1,15 @@
+tokenize TokenBigram "aBcDe 123" NormalizerNonexistent
+[
+  [
+    [
+      -22,
+      0.0,
+      0.0
+    ],
+    "[tokenize] nonexistent normalizer: <NormalizerNonexistent>"
+  ],
+  [
+
+  ]
+]
+#|e| [tokenize] nonexistent normalizer: <NormalizerNonexistent>

  Added: test/command/suite/tokenize/invalid/normalizer/nonexistent.test (+1 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenize/invalid/normalizer/nonexistent.test    2014-06-05 23:30:25 +0900 (81a3129)
@@ -0,0 +1 @@
+tokenize TokenBigram "aBcDe 123" NormalizerNonexistent

  Deleted: test/command/suite/tokenize/invalid/normalizer/unknown.expected (+0 -3) 100644
===================================================================
--- test/command/suite/tokenize/invalid/normalizer/unknown.expected    2014-06-05 23:25:14 +0900 (b4b558f)
+++ /dev/null
@@ -1,3 +0,0 @@
-tokenize TokenBigram "aBcDe 123" NormalizerUnknown
-[[[-22,0.0,0.0],"[tokenize] unknown normalizer: <NormalizerUnknown>"],[]]
-#|e| [tokenize] unknown normalizer: <NormalizerUnknown>

  Deleted: test/command/suite/tokenize/invalid/normalizer/unknown.test (+0 -1) 100644
===================================================================
--- test/command/suite/tokenize/invalid/normalizer/unknown.test    2014-06-05 23:25:14 +0900 (29b9af2)
+++ /dev/null
@@ -1 +0,0 @@
-tokenize TokenBigram "aBcDe 123" NormalizerUnknown
-------------- next part --------------
HTML����������������������������...
下载 



More information about the Groonga-commit mailing list
Back to archive index