[Groonga-commit] groonga/groonga [master] Add "normalize" command

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Jan 23 18:33:56 JST 2013


Kouhei Sutou	2013-01-23 18:33:56 +0900 (Wed, 23 Jan 2013)

  New Revision: 3756d3cb667ed6bf66804f70849ef4879d4ceacb
  https://github.com/groonga/groonga/commit/3756d3cb667ed6bf66804f70849ef4879d4ceacb

  Log:
    Add "normalize" command
    
    It normalizes the specified string by the specified normalizer:
    
      normalize NormalizerAuto "aBcDe 123"
      [[0,0.0,0.0],"abcde 123"]
    
    Is is useful to confirm normalizer behavior because it doesn't require
    creating a table.
    
    TODO:
      * Document it
      * Add tests for non-ASCII characters
      * Support flags

  Added files:
    test/command/suite/normalize/ascii.expected
    test/command/suite/normalize/ascii.test
  Modified files:
    lib/proc.c

  Modified: lib/proc.c (+57 -0)
===================================================================
--- lib/proc.c    2013-01-23 16:25:04 +0900 (c9fcc19)
+++ lib/proc.c    2013-01-23 18:33:56 +0900 (4d656cf)
@@ -2745,6 +2745,59 @@ proc_truncate(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
 }
 
 static grn_obj *
+proc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+  grn_obj *normalizer_name;
+  grn_obj *string;
+
+  normalizer_name = VAR(0);
+  string = VAR(1);
+  if (GRN_TEXT_LEN(normalizer_name) == 0) {
+    ERR(GRN_INVALID_ARGUMENT, "normalizer name is missing");
+    GRN_OUTPUT_CSTR("");
+    return NULL;
+  }
+
+  {
+    grn_obj *normalizer;
+    grn_obj *grn_string;
+    int flags = 0; /* TODO */
+
+    normalizer = grn_ctx_get(ctx,
+                             GRN_TEXT_VALUE(normalizer_name),
+                             GRN_TEXT_LEN(normalizer_name));
+    if (!normalizer) {
+      ERR(GRN_INVALID_ARGUMENT,
+          "unknown normalizer: <%.*s>",
+          (int)GRN_TEXT_LEN(normalizer_name),
+          GRN_TEXT_VALUE(normalizer_name));
+      GRN_OUTPUT_CSTR("");
+      return NULL;
+    }
+
+    grn_string = grn_string_open(ctx,
+                                 GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string),
+                                 normalizer, flags);
+    grn_obj_unlink(ctx, normalizer);
+
+    {
+      const char *normalized;
+      unsigned int normalized_length_in_bytes;
+
+      grn_string_get_normalized(ctx, grn_string,
+                                &normalized,
+                                &normalized_length_in_bytes,
+                                NULL);
+      GRN_OUTPUT_STR(normalized, normalized_length_in_bytes);
+    }
+
+    grn_obj_unlink(ctx, grn_string);
+  }
+
+  return NULL;
+}
+
+static grn_obj *
 func_rand(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
 {
   int val;
@@ -3584,6 +3637,10 @@ grn_db_init_builtin_query(grn_ctx *ctx)
   DEF_VAR(vars[0], "table");
   DEF_COMMAND("truncate", proc_truncate, 1, vars);
 
+  DEF_VAR(vars[0], "normalizer");
+  DEF_VAR(vars[1], "string");
+  DEF_COMMAND("normalize", proc_normalize, 2, vars);
+
   DEF_VAR(vars[0], "seed");
   grn_proc_create(ctx, "rand", -1, GRN_PROC_FUNCTION, func_rand,
                   NULL, NULL, 0, vars);

  Added: test/command/suite/normalize/ascii.expected (+2 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalize/ascii.expected    2013-01-23 18:33:56 +0900 (948c499)
@@ -0,0 +1,2 @@
+normalize NormalizerAuto "aBcDe 123"
+[[0,0.0,0.0],"abcde 123"]

  Added: test/command/suite/normalize/ascii.test (+1 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalize/ascii.test    2013-01-23 18:33:56 +0900 (45f868b)
@@ -0,0 +1 @@
+normalize NormalizerAuto "aBcDe 123"
-------------- next part --------------
HTML����������������������������...
下载 



More information about the Groonga-commit mailing list
Back to archive index