Kouhei Sutou
null+****@clear*****
Sat Nov 10 00:25:51 JST 2012
Kouhei Sutou 2012-11-10 00:25:51 +0900 (Sat, 10 Nov 2012) New Revision: 9977a0c5645dfe4a25901564f068c243338980d6 https://github.com/groonga/groonga/commit/9977a0c5645dfe4a25901564f068c243338980d6 Log: Support nested index search This change supports the following query: select Articles --match_columns comment.content --query KEYWORD With the following schema: table_create Comments TABLE_HASH_KEY UInt32 column_create Comments content COLUMN_SCALAR ShortText table_create Articles TABLE_NO_KEY column_create Articles comment COLUMN_SCALAR Comments table_create Lexicon TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram column_create Lexicon comments_content COLUMN_INDEX|WITH_POSITION Comments content column_create Comments article COLUMN_INDEX Articles comment The select command searches with the following steps: 1. Searches Comments.content with KEYWORD by Lexicon.comments_content index and gets (a) record IDs of Comments. 2. Searches Articles with (a) by Comments.article index and gets record IDs of Articles. Added files: test/command/suite/select/index/nested/scalar.expected test/command/suite/select/index/nested/scalar.test test/command/suite/select/index/nested/vector.expected test/command/suite/select/index/nested/vector.test test/command/suite/select/index/nested/weight.expected test/command/suite/select/index/nested/weight.test test/command/suite/select/index/nested/with_top_level_index.expected test/command/suite/select/index/nested/with_top_level_index.test Modified files: lib/db.c lib/expr.c Modified: lib/db.c (+125 -7) =================================================================== --- lib/db.c 2012-11-10 00:16:39 +0900 (a35bba0) +++ lib/db.c 2012-11-10 00:25:51 +0900 (0456060) @@ -2832,13 +2832,107 @@ grn_table_next(grn_ctx *ctx, grn_obj *table, grn_id id) GRN_API_RETURN(r); } +static inline grn_rc +grn_obj_search_accessor(grn_ctx *ctx, grn_obj *obj, grn_obj *query, + grn_obj *res, grn_operator op, grn_search_optarg *optarg) +{ + grn_rc rc = GRN_SUCCESS; + grn_accessor *accessor = (grn_accessor *)obj; + grn_accessor *a; + grn_obj accessor_stack; + int i, n_accessors; + grn_obj *current_res = NULL; + + GRN_PTR_INIT(&accessor_stack, GRN_OBJ_VECTOR, GRN_ID_NIL); + for (a = accessor; a; a = a->next) { + GRN_PTR_PUT(ctx, &accessor_stack, a); + } + + n_accessors = GRN_BULK_VSIZE(&accessor_stack) / sizeof(grn_obj *); + for (i = n_accessors; i > 0; i--) { + grn_accessor *a = (grn_accessor *)GRN_PTR_VALUE_AT(&accessor_stack, i - 1); + grn_obj *index; + grn_operator index_op = GRN_OP_MATCH; + if (optarg && optarg->mode != GRN_OP_EXACT) { + index_op = optarg->mode; + } + if (grn_column_index(ctx, a->obj, index_op, &index, 1, NULL) == 0) { + rc = GRN_INVALID_ARGUMENT; + break; + } + + if (i == n_accessors) { + if (i == 1) { + current_res = res; + } else { + grn_obj *range = grn_ctx_at(ctx, DB_OBJ(index)->range); + current_res = grn_table_create(ctx, NULL, 0, NULL, + GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, + range, + NULL); + rc = ctx->rc; + grn_obj_unlink(ctx, range); + if (!current_res) { + break; + } + } + rc = grn_obj_search(ctx, index, query, current_res, op, optarg); + if (rc != GRN_SUCCESS) { + break; + } + } else { + grn_id *tid; + grn_obj *next_res; + grn_operator next_op; + grn_search_optarg next_optarg = *optarg; + if (i == 1) { + next_res = res; + next_op = op; + } else { + grn_obj *range = grn_ctx_at(ctx, DB_OBJ(index)->range); + next_res = grn_table_create(ctx, NULL, 0, NULL, + GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, + range, NULL); + rc = ctx->rc; + grn_obj_unlink(ctx, range); + if (!next_res) { + grn_obj_unlink(ctx, current_res); + break; + } + next_op = GRN_OP_OR; + } + next_optarg.mode = GRN_OP_EXACT; + GRN_HASH_EACH(ctx, (grn_hash *)current_res, id, &tid, NULL, NULL, { + rc = grn_ii_sel(ctx, (grn_ii *)index, (const char *)tid, sizeof(grn_id), + (grn_hash *)next_res, next_op, &next_optarg); + if (rc != GRN_SUCCESS) { + break; + } + }); + grn_obj_unlink(ctx, current_res); + if (rc != GRN_SUCCESS) { + if (res != next_res) { + grn_obj_unlink(ctx, next_res); + } + break; + } + current_res = next_res; + } + } + + GRN_OBJ_FIN(ctx, &accessor_stack); + return rc; +} + grn_rc grn_obj_search(grn_ctx *ctx, grn_obj *obj, grn_obj *query, grn_obj *res, grn_operator op, grn_search_optarg *optarg) { grn_rc rc = GRN_INVALID_ARGUMENT; GRN_API_ENTER; - if (GRN_DB_OBJP(obj)) { + if (GRN_ACCESSORP(obj)) { + rc = grn_obj_search_accessor(ctx, obj, query, res, op, optarg); + } else if (GRN_DB_OBJP(obj)) { switch (obj->header.type) { case GRN_TABLE_PAT_KEY : case GRN_TABLE_DAT_KEY : @@ -8415,18 +8509,42 @@ grn_column_index(grn_ctx *ctx, grn_obj *obj, grn_operator op, case GRN_OP_SIMILAR : { grn_accessor *a = (grn_accessor *)obj; - if (a->action == GRN_ACCESSOR_GET_KEY) { + while (a) { + grn_bool found = GRN_FALSE; + grn_hook_entry entry = -1; + + switch (a->action) { + case GRN_ACCESSOR_GET_KEY : + entry = GRN_HOOK_INSERT; + break; + case GRN_ACCESSOR_GET_COLUMN_VALUE : + entry = GRN_HOOK_SET; + break; + default : + break; + } + obj = a->obj; - for (hooks = DB_OBJ(obj)->hooks[GRN_HOOK_INSERT]; hooks; hooks = hooks->next) { + for (hooks = DB_OBJ(obj)->hooks[entry]; hooks; hooks = hooks->next) { default_set_value_hook_data *data = (void *)NEXT_ADDR(hooks); grn_obj *target = grn_ctx_at(ctx, data->target); if (target->header.type != GRN_COLUMN_INDEX) { continue; } - if (section) { *section = (MULTI_COLUMN_INDEXP(target)) ? data->section : 0; } - if (n < buf_size) { - *ip++ = target; + found = GRN_TRUE; + if (!a->next) { + if (section) { + *section = (MULTI_COLUMN_INDEXP(target)) ? data->section : 0; + } + if (n < buf_size) { + *ip++ = target; + } + n++; } - n++; } + + if (!found) { + break; + } + a = a->next; } } break; Modified: lib/expr.c (+6 -1) =================================================================== --- lib/expr.c 2012-11-10 00:16:39 +0900 (a071ebd) +++ lib/expr.c 2012-11-10 00:25:51 +0900 (f372073) @@ -3756,8 +3756,13 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, case GRN_ACCESSOR : case GRN_ACCESSOR_VIEW : if (grn_column_index(ctx, ec->value, c->op, &index, 1, &sid)) { + int32_t weight = get_weight(ctx, ec); si->flags |= SCAN_ACCESSOR; - scan_info_put_index(ctx, si, index, sid, get_weight(ctx, ec)); + if (((grn_accessor *)ec->value)->next) { + scan_info_put_index(ctx, si, ec->value, sid, weight); + } else { + scan_info_put_index(ctx, si, index, sid, weight); + } } break; case GRN_COLUMN_FIX_SIZE : Added: test/command/suite/select/index/nested/scalar.expected (+72 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/index/nested/scalar.expected 2012-11-10 00:25:51 +0900 (70f0c38) @@ -0,0 +1,72 @@ +table_create Comments TABLE_HASH_KEY UInt32 +[[0,0.0,0.0],true] +column_create Comments content COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Articles TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Articles content COLUMN_SCALAR Text +[[0,0.0,0.0],true] +column_create Articles comment COLUMN_SCALAR Comments +[[0,0.0,0.0],true] +table_create Lexicon TABLE_PAT_KEY|KEY_NORMALIZE ShortText --default_tokenizer TokenBigram +[[0,0.0,0.0],true] +column_create Lexicon articles_content COLUMN_INDEX|WITH_POSITION Articles content +[[0,0.0,0.0],true] +column_create Lexicon comments_content COLUMN_INDEX|WITH_POSITION Comments content +[[0,0.0,0.0],true] +column_create Comments article COLUMN_INDEX Articles comment +[[0,0.0,0.0],true] +load --table Comments +[ +{"_key": 1, "content": "I'm using groonga too!"}, +{"_key": 2, "content": "I'm using groonga and mroonga!"}, +{"_key": 3, "content": "I'm using mroonga too!"} +] +[[0,0.0,0.0],3] +load --table Articles +[ +{"content": "Groonga is fast!", "comment": 1}, +{"content": "Groonga is useful!"}, +{"content": "Mroonga is fast!", "comments": 3} +] +[[0,0.0,0.0],3] +#|e| invalid column('comments') +select Articles --match_columns comment.content --query groonga --output_columns "_id, _score, *" +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 1 + ], + [ + [ + "_id", + "UInt32" + ], + [ + "_score", + "Int32" + ], + [ + "comment", + "Comments" + ], + [ + "content", + "Text" + ] + ], + [ + 1, + 1, + 1, + "Groonga is fast!" + ] + ] + ] +] Added: test/command/suite/select/index/nested/scalar.test (+32 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/index/nested/scalar.test 2012-11-10 00:25:51 +0900 (c609826) @@ -0,0 +1,32 @@ +table_create Comments TABLE_HASH_KEY UInt32 +column_create Comments content COLUMN_SCALAR ShortText + +table_create Articles TABLE_NO_KEY +column_create Articles content COLUMN_SCALAR Text +column_create Articles comment COLUMN_SCALAR Comments + +table_create Lexicon TABLE_PAT_KEY|KEY_NORMALIZE ShortText \ + --default_tokenizer TokenBigram +column_create Lexicon articles_content COLUMN_INDEX|WITH_POSITION \ + Articles content +column_create Lexicon comments_content COLUMN_INDEX|WITH_POSITION \ + Comments content + +column_create Comments article COLUMN_INDEX Articles comment + +load --table Comments +[ +{"_key": 1, "content": "I'm using groonga too!"}, +{"_key": 2, "content": "I'm using groonga and mroonga!"}, +{"_key": 3, "content": "I'm using mroonga too!"} +] + +load --table Articles +[ +{"content": "Groonga is fast!", "comment": 1}, +{"content": "Groonga is useful!"}, +{"content": "Mroonga is fast!", "comments": 3} +] + +select Articles --match_columns comment.content --query groonga \ + --output_columns "_id, _score, *" Added: test/command/suite/select/index/nested/vector.expected (+82 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/index/nested/vector.expected 2012-11-10 00:25:51 +0900 (270d71c) @@ -0,0 +1,82 @@ +table_create Comments TABLE_HASH_KEY UInt32 +[[0,0.0,0.0],true] +column_create Comments content COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Articles TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Articles content COLUMN_SCALAR Text +[[0,0.0,0.0],true] +column_create Articles comments COLUMN_VECTOR Comments +[[0,0.0,0.0],true] +table_create Lexicon TABLE_PAT_KEY|KEY_NORMALIZE ShortText --default_tokenizer TokenBigram +[[0,0.0,0.0],true] +column_create Lexicon articles_content COLUMN_INDEX|WITH_POSITION Articles content +[[0,0.0,0.0],true] +column_create Lexicon comments_content COLUMN_INDEX|WITH_POSITION Comments content +[[0,0.0,0.0],true] +column_create Comments article COLUMN_INDEX Articles comments +[[0,0.0,0.0],true] +load --table Comments +[ +{"_key": 1, "content": "I'm using groonga too!"}, +{"_key": 2, "content": "I'm using groonga and mroonga!"}, +{"_key": 3, "content": "I'm using mroonga too!"} +] +[[0,0.0,0.0],3] +load --table Articles +[ +{"content": "Groonga is fast!", "comments": [1, 3]}, +{"content": "Groonga is useful!"}, +{"content": "Mroonga is fast!", "comments": [2]} +] +[[0,0.0,0.0],3] +select Articles --match_columns comments.content --query groonga --output_columns "_id, _score, *" +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 2 + ], + [ + [ + "_id", + "UInt32" + ], + [ + "_score", + "Int32" + ], + [ + "comments", + "Comments" + ], + [ + "content", + "Text" + ] + ], + [ + 1, + 1, + [ + 1, + 3 + ], + "Groonga is fast!" + ], + [ + 3, + 1, + [ + 2 + ], + "Mroonga is fast!" + ] + ] + ] +] Added: test/command/suite/select/index/nested/vector.test (+32 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/index/nested/vector.test 2012-11-10 00:25:51 +0900 (4d7b7d3) @@ -0,0 +1,32 @@ +table_create Comments TABLE_HASH_KEY UInt32 +column_create Comments content COLUMN_SCALAR ShortText + +table_create Articles TABLE_NO_KEY +column_create Articles content COLUMN_SCALAR Text +column_create Articles comments COLUMN_VECTOR Comments + +table_create Lexicon TABLE_PAT_KEY|KEY_NORMALIZE ShortText \ + --default_tokenizer TokenBigram +column_create Lexicon articles_content COLUMN_INDEX|WITH_POSITION \ + Articles content +column_create Lexicon comments_content COLUMN_INDEX|WITH_POSITION \ + Comments content + +column_create Comments article COLUMN_INDEX Articles comments + +load --table Comments +[ +{"_key": 1, "content": "I'm using groonga too!"}, +{"_key": 2, "content": "I'm using groonga and mroonga!"}, +{"_key": 3, "content": "I'm using mroonga too!"} +] + +load --table Articles +[ +{"content": "Groonga is fast!", "comments": [1, 3]}, +{"content": "Groonga is useful!"}, +{"content": "Mroonga is fast!", "comments": [2]} +] + +select Articles --match_columns comments.content --query groonga \ + --output_columns "_id, _score, *" Added: test/command/suite/select/index/nested/weight.expected (+71 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/index/nested/weight.expected 2012-11-10 00:25:51 +0900 (8f52c74) @@ -0,0 +1,71 @@ +table_create Comments TABLE_HASH_KEY UInt32 +[[0,0.0,0.0],true] +column_create Comments content COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Articles TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Articles content COLUMN_SCALAR Text +[[0,0.0,0.0],true] +column_create Articles comment COLUMN_SCALAR Comments +[[0,0.0,0.0],true] +table_create Lexicon TABLE_PAT_KEY|KEY_NORMALIZE ShortText --default_tokenizer TokenBigram +[[0,0.0,0.0],true] +column_create Lexicon articles_content COLUMN_INDEX|WITH_POSITION Articles content +[[0,0.0,0.0],true] +column_create Lexicon comments_content COLUMN_INDEX|WITH_POSITION Comments content +[[0,0.0,0.0],true] +column_create Comments article COLUMN_INDEX Articles comment +[[0,0.0,0.0],true] +load --table Comments +[ +{"_key": 1, "content": "I'm using groonga too!"}, +{"_key": 2, "content": "I'm using groonga and mroonga!"}, +{"_key": 3, "content": "I'm using mroonga too!"} +] +[[0,0.0,0.0],3] +load --table Articles +[ +{"content": "Groonga is fast!", "comment": 1}, +{"content": "Groonga is useful!"}, +{"content": "Mroonga is fast!", "comment": 3} +] +[[0,0.0,0.0],3] +select Articles --match_columns 'comment.content * 10' --query groonga --output_columns "_id, _score, *" +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 1 + ], + [ + [ + "_id", + "UInt32" + ], + [ + "_score", + "Int32" + ], + [ + "comment", + "Comments" + ], + [ + "content", + "Text" + ] + ], + [ + 1, + 10, + 1, + "Groonga is fast!" + ] + ] + ] +] Added: test/command/suite/select/index/nested/weight.test (+33 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/index/nested/weight.test 2012-11-10 00:25:51 +0900 (cf87271) @@ -0,0 +1,33 @@ +table_create Comments TABLE_HASH_KEY UInt32 +column_create Comments content COLUMN_SCALAR ShortText + +table_create Articles TABLE_NO_KEY +column_create Articles content COLUMN_SCALAR Text +column_create Articles comment COLUMN_SCALAR Comments + +table_create Lexicon TABLE_PAT_KEY|KEY_NORMALIZE ShortText \ + --default_tokenizer TokenBigram +column_create Lexicon articles_content COLUMN_INDEX|WITH_POSITION \ + Articles content +column_create Lexicon comments_content COLUMN_INDEX|WITH_POSITION \ + Comments content + +column_create Comments article COLUMN_INDEX Articles comment + +load --table Comments +[ +{"_key": 1, "content": "I'm using groonga too!"}, +{"_key": 2, "content": "I'm using groonga and mroonga!"}, +{"_key": 3, "content": "I'm using mroonga too!"} +] + +load --table Articles +[ +{"content": "Groonga is fast!", "comment": 1}, +{"content": "Groonga is useful!"}, +{"content": "Mroonga is fast!", "comment": 3} +] + +select Articles \ + --match_columns 'comment.content * 10' --query groonga \ + --output_columns "_id, _score, *" Added: test/command/suite/select/index/nested/with_top_level_index.expected (+77 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/index/nested/with_top_level_index.expected 2012-11-10 00:25:51 +0900 (330e165) @@ -0,0 +1,77 @@ +table_create Comments TABLE_HASH_KEY UInt32 +[[0,0.0,0.0],true] +column_create Comments content COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Articles TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Articles content COLUMN_SCALAR Text +[[0,0.0,0.0],true] +column_create Articles comment COLUMN_SCALAR Comments +[[0,0.0,0.0],true] +table_create Lexicon TABLE_PAT_KEY|KEY_NORMALIZE ShortText --default_tokenizer TokenBigram +[[0,0.0,0.0],true] +column_create Lexicon articles_content COLUMN_INDEX|WITH_POSITION Articles content +[[0,0.0,0.0],true] +column_create Lexicon comments_content COLUMN_INDEX|WITH_POSITION Comments content +[[0,0.0,0.0],true] +column_create Comments article COLUMN_INDEX Articles comment +[[0,0.0,0.0],true] +load --table Comments +[ +{"_key": 1, "content": "I'm using groonga too!"}, +{"_key": 2, "content": "I'm using groonga and mroonga!"}, +{"_key": 3, "content": "I'm using mroonga too!"} +] +[[0,0.0,0.0],3] +load --table Articles +[ +{"content": "Groonga is fast!", "comment": 1}, +{"content": "Groonga is useful!"}, +{"content": "Mroonga is fast!", "comment": 3} +] +[[0,0.0,0.0],3] +select Articles --match_columns 'content || comment.content' --query groonga --output_columns "_id, _score, *" +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 2 + ], + [ + [ + "_id", + "UInt32" + ], + [ + "_score", + "Int32" + ], + [ + "comment", + "Comments" + ], + [ + "content", + "Text" + ] + ], + [ + 1, + 2, + 1, + "Groonga is fast!" + ], + [ + 2, + 1, + 0, + "Groonga is useful!" + ] + ] + ] +] Added: test/command/suite/select/index/nested/with_top_level_index.test (+33 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/index/nested/with_top_level_index.test 2012-11-10 00:25:51 +0900 (16886ac) @@ -0,0 +1,33 @@ +table_create Comments TABLE_HASH_KEY UInt32 +column_create Comments content COLUMN_SCALAR ShortText + +table_create Articles TABLE_NO_KEY +column_create Articles content COLUMN_SCALAR Text +column_create Articles comment COLUMN_SCALAR Comments + +table_create Lexicon TABLE_PAT_KEY|KEY_NORMALIZE ShortText \ + --default_tokenizer TokenBigram +column_create Lexicon articles_content COLUMN_INDEX|WITH_POSITION \ + Articles content +column_create Lexicon comments_content COLUMN_INDEX|WITH_POSITION \ + Comments content + +column_create Comments article COLUMN_INDEX Articles comment + +load --table Comments +[ +{"_key": 1, "content": "I'm using groonga too!"}, +{"_key": 2, "content": "I'm using groonga and mroonga!"}, +{"_key": 3, "content": "I'm using mroonga too!"} +] + +load --table Articles +[ +{"content": "Groonga is fast!", "comment": 1}, +{"content": "Groonga is useful!"}, +{"content": "Mroonga is fast!", "comment": 3} +] + +select Articles \ + --match_columns 'content || comment.content' --query groonga \ + --output_columns "_id, _score, *" -------------- next part -------------- HTML����������������������������... 下载