[Groonga-commit] groonga/groonga at fb7c06a [master] grn_ts: support matching operators

Back to archive index

Susumu Yata null+****@clear*****
Wed Jan 6 16:14:32 JST 2016


Susumu Yata	2016-01-06 16:14:32 +0900 (Wed, 06 Jan 2016)

  New Revision: fb7c06a52970e3fd41001e4a77954df7cf696da1
  https://github.com/groonga/groonga/commit/fb7c06a52970e3fd41001e4a77954df7cf696da1

  Message:
    grn_ts: support matching operators
    
    GitHub: #451

  Modified files:
    lib/ts/ts_expr_node.c
    lib/ts/ts_expr_parser.c
    lib/ts/ts_op.c
    lib/ts/ts_op.h

  Modified: lib/ts/ts_expr_node.c (+192 -0)
===================================================================
--- lib/ts/ts_expr_node.c    2016-01-05 10:40:00 +0900 (b243f54)
+++ lib/ts/ts_expr_node.c    2016-01-06 16:14:32 +0900 (b4f5b94)
@@ -1115,6 +1115,60 @@ grn_ts_op_modulus_float_float(grn_ctx *ctx, grn_ts_float lhs, grn_ts_float rhs,
   return GRN_SUCCESS;
 }
 
+static grn_ts_bool
+grn_ts_op_match(grn_ts_text lhs, grn_ts_text rhs)
+{
+  const char *lhs_ptr, *lhs_ptr_last;
+  if (lhs.size < rhs.size) {
+    return GRN_FALSE;
+  }
+  lhs_ptr_last = lhs.ptr + lhs.size - rhs.size;
+  for (lhs_ptr = lhs.ptr; lhs_ptr <= lhs_ptr_last; lhs_ptr++) {
+    size_t i;
+    for (i = 0; i < rhs.size; i++) {
+      if (lhs_ptr[i] != rhs.ptr[i]) {
+        break;
+      }
+    }
+    if (i == rhs.size) {
+      return GRN_TRUE;
+    }
+  }
+  return GRN_FALSE;
+}
+
+static grn_ts_bool
+grn_ts_op_prefix_match(grn_ts_text lhs, grn_ts_text rhs)
+{
+  size_t i;
+  if (lhs.size < rhs.size) {
+    return GRN_FALSE;
+  }
+  for (i = 0; i < rhs.size; i++) {
+    if (lhs.ptr[i] != rhs.ptr[i]) {
+      return GRN_FALSE;
+    }
+  }
+  return GRN_TRUE;
+}
+
+static grn_ts_bool
+grn_ts_op_suffix_match(grn_ts_text lhs, grn_ts_text rhs)
+{
+  size_t i;
+  const char *lhs_ptr;
+  if (lhs.size < rhs.size) {
+    return GRN_FALSE;
+  }
+  lhs_ptr = lhs.ptr + lhs.size - rhs.size;
+  for (i = 0; i < rhs.size; i++) {
+    if (lhs_ptr[i] != rhs.ptr[i]) {
+      return GRN_FALSE;
+    }
+  }
+  return GRN_TRUE;
+}
+
 /*-------------------------------------------------------------
  * Groonga objects.
  */
@@ -3337,6 +3391,19 @@ grn_ts_expr_op_node_check_args(grn_ctx *ctx, grn_ts_expr_op_node *node)
         }
       }
     }
+    case GRN_TS_OP_MATCH:
+    case GRN_TS_OP_PREFIX_MATCH:
+    case GRN_TS_OP_SUFFIX_MATCH: {
+      if ((node->args[0]->data_kind != GRN_TS_TEXT) ||
+          (node->args[1]->data_kind != GRN_TS_TEXT)) {
+        GRN_TS_ERR_RETURN(GRN_INVALID_ARGUMENT, "invalid data kind: %d, %d",
+                          node->args[0]->data_kind,
+                          node->args[1]->data_kind);
+      }
+      node->data_kind = GRN_TS_BOOL;
+      node->data_type = GRN_DB_BOOL;
+      return GRN_SUCCESS;
+    }
     default: {
       GRN_TS_ERR_RETURN(GRN_INVALID_ARGUMENT, "invalid operator: %d",
                         node->op_type);
@@ -4185,6 +4252,51 @@ grn_ts_op_modulus_evaluate(grn_ctx *ctx, grn_ts_expr_op_node *node,
 }
 #undef GRN_TS_OP_ARITH_EVALUATE_CASE
 
+#define GRN_TS_OP_MATCH_EVALUATE(type)\
+  size_t i;\
+  grn_rc rc;\
+  grn_ts_bool *out_ptr = (grn_ts_bool *)out;\
+  grn_ts_text *buf_ptrs[2];\
+  for (i = 0; i < 2; i++) {\
+    rc = grn_ts_expr_node_evaluate_to_buf(ctx, node->args[i], in, n_in,\
+                                          &node->bufs[i]);\
+    if (rc != GRN_SUCCESS) {\
+      return rc;\
+    }\
+  }\
+  buf_ptrs[0] = (grn_ts_text *)node->bufs[0].ptr;\
+  buf_ptrs[1] = (grn_ts_text *)node->bufs[1].ptr;\
+  for (i = 0; i < n_in; i++) {\
+    out_ptr[i] = grn_ts_op_ ## type(buf_ptrs[0][i], buf_ptrs[1][i]);\
+  }\
+  return GRN_SUCCESS;\
+/* grn_ts_op_match_evaluate() evaluates an operator. */
+static grn_rc
+grn_ts_op_match_evaluate(grn_ctx *ctx, grn_ts_expr_op_node *node,
+                         const grn_ts_record *in, size_t n_in, void *out)
+{
+  GRN_TS_OP_MATCH_EVALUATE(match)
+}
+
+/* grn_ts_op_prefix_match_evaluate() evaluates an operator. */
+static grn_rc
+grn_ts_op_prefix_match_evaluate(grn_ctx *ctx, grn_ts_expr_op_node *node,
+                                const grn_ts_record *in, size_t n_in,
+                                void *out)
+{
+  GRN_TS_OP_MATCH_EVALUATE(prefix_match)
+}
+
+/* grn_ts_op_suffix_match_evaluate() evaluates an operator. */
+static grn_rc
+grn_ts_op_suffix_match_evaluate(grn_ctx *ctx, grn_ts_expr_op_node *node,
+                                const grn_ts_record *in, size_t n_in,
+                                void *out)
+{
+  GRN_TS_OP_MATCH_EVALUATE(suffix_match)
+}
+#undef GRN_TS_OP_MATCH_EVALUATE
+
 /* grn_ts_expr_op_node_evaluate() evaluates an operator. */
 static grn_rc
 grn_ts_expr_op_node_evaluate(grn_ctx *ctx, grn_ts_expr_op_node *node,
@@ -4272,6 +4384,15 @@ grn_ts_expr_op_node_evaluate(grn_ctx *ctx, grn_ts_expr_op_node *node,
     case GRN_TS_OP_MODULUS: {
       return grn_ts_op_modulus_evaluate(ctx, node, in, n_in, out);
     }
+    case GRN_TS_OP_MATCH: {
+      return grn_ts_op_match_evaluate(ctx, node, in, n_in, out);
+    }
+    case GRN_TS_OP_PREFIX_MATCH: {
+      return grn_ts_op_prefix_match_evaluate(ctx, node, in, n_in, out);
+    }
+    case GRN_TS_OP_SUFFIX_MATCH: {
+      return grn_ts_op_suffix_match_evaluate(ctx, node, in, n_in, out);
+    }
     // TODO: Add operators.
     default: {
       GRN_TS_ERR_RETURN(GRN_OPERATION_NOT_SUPPORTED,
@@ -4611,6 +4732,68 @@ grn_ts_op_greater_equal_filter(grn_ctx *ctx, grn_ts_expr_op_node *node,
 #undef GRN_TS_OP_CMP_FILTER_VECTOR_CASE
 #undef GRN_TS_OP_CMP_FILTER_CASE
 
+#define GRN_TS_OP_MATCH_FILTER_CASE(type, KIND, kind)\
+  case GRN_TS_ ## KIND: {\
+    grn_ts_ ## kind *buf_ptrs[] = {\
+      (grn_ts_ ## kind *)node->bufs[0].ptr,\
+      (grn_ts_ ## kind *)node->bufs[1].ptr\
+    };\
+    for (i = 0; i < n_in; i++) {\
+      if (grn_ts_op_ ## type ## _ ## kind(buf_ptrs[0][i], buf_ptrs[1][i])) {\
+        out[count++] = in[i];\
+      }\
+    }\
+    *n_out = count;\
+    return GRN_SUCCESS;\
+  }
+
+#define GRN_TS_OP_MATCH_FILTER(type)\
+  size_t i, count = 0;\
+  grn_ts_text *buf_ptrs[2];\
+  for (i = 0; i < 2; i++) {\
+    grn_rc rc = grn_ts_expr_node_evaluate_to_buf(ctx, node->args[i], in, n_in,\
+                                                 &node->bufs[i]);\
+    if (rc != GRN_SUCCESS) {\
+      return rc;\
+    }\
+  }\
+  buf_ptrs[0] = (grn_ts_text *)node->bufs[0].ptr;\
+  buf_ptrs[1] = (grn_ts_text *)node->bufs[1].ptr;\
+  for (i = 0; i < n_in; i++) {\
+    if (grn_ts_op_ ## type(buf_ptrs[0][i], buf_ptrs[1][i])) {\
+      out[count++] = in[i];\
+    }\
+  }\
+  *n_out = count;\
+  return GRN_SUCCESS;\
+/* grn_ts_op_match_filter() filters records. */
+static grn_rc
+grn_ts_op_match_filter(grn_ctx *ctx, grn_ts_expr_op_node *node,
+                       const grn_ts_record *in, size_t n_in,
+                       grn_ts_record *out, size_t *n_out)
+{
+  GRN_TS_OP_MATCH_FILTER(match)
+}
+
+/* grn_ts_op_prefix_match_filter() filters records. */
+static grn_rc
+grn_ts_op_prefix_match_filter(grn_ctx *ctx, grn_ts_expr_op_node *node,
+                              const grn_ts_record *in, size_t n_in,
+                              grn_ts_record *out, size_t *n_out)
+{
+  GRN_TS_OP_MATCH_FILTER(prefix_match)
+}
+
+/* grn_ts_op_suffix_match_filter() filters records. */
+static grn_rc
+grn_ts_op_suffix_match_filter(grn_ctx *ctx, grn_ts_expr_op_node *node,
+                              const grn_ts_record *in, size_t n_in,
+                              grn_ts_record *out, size_t *n_out)
+{
+  GRN_TS_OP_MATCH_FILTER(suffix_match)
+}
+#undef GRN_TS_OP_MATCH_FILTER
+
 /* grn_ts_expr_op_node_filter() filters records. */
 static grn_rc
 grn_ts_expr_op_node_filter(grn_ctx *ctx, grn_ts_expr_op_node *node,
@@ -4660,6 +4843,15 @@ grn_ts_expr_op_node_filter(grn_ctx *ctx, grn_ts_expr_op_node *node,
     case GRN_TS_OP_GREATER_EQUAL: {
       return grn_ts_op_greater_equal_filter(ctx, node, in, n_in, out, n_out);
     }
+    case GRN_TS_OP_MATCH: {
+      return grn_ts_op_match_filter(ctx, node, in, n_in, out, n_out);
+    }
+    case GRN_TS_OP_PREFIX_MATCH: {
+      return grn_ts_op_prefix_match_filter(ctx, node, in, n_in, out, n_out);
+    }
+    case GRN_TS_OP_SUFFIX_MATCH: {
+      return grn_ts_op_suffix_match_filter(ctx, node, in, n_in, out, n_out);
+    }
     // TODO: Add operators.
     default: {
       GRN_TS_ERR_RETURN(GRN_OPERATION_NOT_SUPPORTED,

  Modified: lib/ts/ts_expr_parser.c (+14 -0)
===================================================================
--- lib/ts/ts_expr_parser.c    2016-01-05 10:40:00 +0900 (14c3ef2)
+++ lib/ts/ts_expr_parser.c    2016-01-06 16:14:32 +0900 (1a4e61b)
@@ -717,6 +717,20 @@ grn_ts_expr_parser_tokenize_op(grn_ctx *ctx, grn_ts_expr_parser *parser,
     GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('/', DIVISION)
     GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('%', MODULUS)
 #undef GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE
+    case '@': {
+      if ((str.size >= 2) && (str.ptr[1] == '^')) {
+        token_str.size = 2;
+        op_type = GRN_TS_OP_PREFIX_MATCH;
+      } else if ((str.size >= 2) && (str.ptr[1] == '$')) {
+        token_str.size = 2;
+        op_type = GRN_TS_OP_SUFFIX_MATCH;
+      } else {
+        token_str.size = 1;
+        op_type = GRN_TS_OP_MATCH;
+      }
+      rc = grn_ts_expr_op_token_open(ctx, token_str, op_type, &new_token);
+      break;
+    }
     default: {
       GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid character: \"%.*s\"",
                         (int)str.size, str.ptr);

  Modified: lib/ts/ts_op.c (+11 -3)
===================================================================
--- lib/ts/ts_op.c    2016-01-05 10:40:00 +0900 (4a91d19)
+++ lib/ts/ts_op.c    2016-01-06 16:14:32 +0900 (cc4e3d1)
@@ -50,7 +50,10 @@ grn_ts_op_get_n_args(grn_ts_op_type op_type)
     case GRN_TS_OP_MINUS:                  /* X - Y   */
     case GRN_TS_OP_MULTIPLICATION:         /* X * Y   */
     case GRN_TS_OP_DIVISION:               /* X / Y   */
-    case GRN_TS_OP_MODULUS: {              /* X % Y   */
+    case GRN_TS_OP_MODULUS:                /* X % Y   */
+    case GRN_TS_OP_MATCH:                  /* X @ Y   */
+    case GRN_TS_OP_PREFIX_MATCH:           /* X @^ Y  */
+    case GRN_TS_OP_SUFFIX_MATCH: {         /* X @$ Y  */
       return 2;
     }
     default: {
@@ -67,11 +70,11 @@ grn_ts_op_get_precedence(grn_ts_op_type op_type)
     case GRN_TS_OP_BITWISE_NOT:
     case GRN_TS_OP_POSITIVE:
     case GRN_TS_OP_NEGATIVE: {
-      return 14;
+      return 15;
     }
     case GRN_TS_OP_FLOAT:
     case GRN_TS_OP_TIME: {
-      return 15;
+      return 16;
     }
     case GRN_TS_OP_LOGICAL_AND: {
       return 5;
@@ -116,6 +119,11 @@ grn_ts_op_get_precedence(grn_ts_op_type op_type)
     case GRN_TS_OP_MODULUS: {
       return 13;
     }
+    case GRN_TS_OP_MATCH:
+    case GRN_TS_OP_PREFIX_MATCH:
+    case GRN_TS_OP_SUFFIX_MATCH: {
+      return 14;
+    }
     default: {
       return 0;
     }

  Modified: lib/ts/ts_op.h (+4 -1)
===================================================================
--- lib/ts/ts_op.h    2016-01-05 10:40:00 +0900 (43fd737)
+++ lib/ts/ts_op.h    2016-01-06 16:14:32 +0900 (baec9da)
@@ -64,7 +64,10 @@ typedef enum {
   GRN_TS_OP_MINUS,                  /* X - Y   */
   GRN_TS_OP_MULTIPLICATION,         /* X * Y   */
   GRN_TS_OP_DIVISION,               /* X / Y   */
-  GRN_TS_OP_MODULUS                 /* X % Y   */
+  GRN_TS_OP_MODULUS,                /* X % Y   */
+  GRN_TS_OP_MATCH,                  /* X @ Y   */
+  GRN_TS_OP_PREFIX_MATCH,           /* X @^ Y  */
+  GRN_TS_OP_SUFFIX_MATCH            /* X @$ Y  */
 } grn_ts_op_type;
 
 /* Operator precedence. */
-------------- next part --------------
HTML����������������������������...
下载 



More information about the Groonga-commit mailing list
Back to archive index