Kouhei Sutou 2018-11-12 14:32:17 +0900 (Mon, 12 Nov 2018) Revision: 50d3319a8d201d22fa60d1f54240402ead6c7aa3 https://github.com/groonga/groonga/commit/50d3319a8d201d22fa60d1f54240402ead6c7aa3 Message: tokenize table_tokenize: use "force_prefix_search" "force_prefix" is kept for backward compatibility. Modified files: lib/proc/proc_tokenize.c test/command/suite/select/env/overlap_token_skip/long.expected test/command/suite/select/env/overlap_token_skip/non_overlap.expected test/command/suite/select/env/overlap_token_skip/one.expected test/command/suite/select/env/overlap_token_skip/short.expected test/command/suite/select/env/overlap_token_skip/skip.expected test/command/suite/table_tokenize/flags.expected test/command/suite/table_tokenize/index_column.expected test/command/suite/table_tokenize/mode_add.expected test/command/suite/table_tokenize/mode_get.expected test/command/suite/table_tokenize/with_normalizer.expected test/command/suite/table_tokenize/with_token_filters.expected test/command/suite/token_filters/nfkc100/unify_kana.expected test/command/suite/tokenize/empty_token.expected test/command/suite/tokenize/flags.expected test/command/suite/tokenize/mode_add.expected test/command/suite/tokenize/mode_get.expected test/command/suite/tokenize/no_normalizer.expected test/command/suite/tokenize/with_normalizer.expected test/command/suite/tokenize/with_token_filters.expected test/command/suite/tokenize/with_token_filters_options.expected test/command/suite/tokenizers/bigram/env/remove_blank_disable/include_blank.expected test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/matured.expected test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/unmatured.expected test/command/suite/tokenizers/bigram/force_prefix/single_token/matured.expected test/command/suite/tokenizers/bigram/force_prefix/single_token/unmatured.expected test/command/suite/tokenizers/bigram/long_token/first.expected test/command/suite/tokenizers/bigram/long_token/last.expected test/command/suite/tokenizers/bigram/long_token/middle.expected test/command/suite/tokenizers/delimit/valid/continuous_spaces.expected test/command/suite/tokenizers/delimit/valid/multiple_tokens.expected test/command/suite/tokenizers/mecab/chunk/comma.expected test/command/suite/tokenizers/mecab/chunk/exclamation_mark.expected test/command/suite/tokenizers/mecab/chunk/fullwidth_exclamation_mark.expected test/command/suite/tokenizers/mecab/chunk/fullwidth_question_mark.expected test/command/suite/tokenizers/mecab/chunk/ideographic_comma.expected test/command/suite/tokenizers/mecab/chunk/ideographic_full_stop.expected test/command/suite/tokenizers/mecab/chunk/ideographic_space.expected test/command/suite/tokenizers/mecab/chunk/multiple_delimiters_in_one_chunk.expected test/command/suite/tokenizers/mecab/chunk/period.expected test/command/suite/tokenizers/mecab/chunk/question_mark.expected test/command/suite/tokenizers/mecab/chunk/space.expected test/command/suite/tokenizers/mecab/chunk/threshold.expected test/command/suite/tokenizers/mecab/full_width_space/first.expected test/command/suite/tokenizers/mecab/full_width_space/last.expected test/command/suite/tokenizers/mecab/multiple_tokens.expected test/command/suite/tokenizers/mecab/one_token.expected test/command/suite/tokenizers/mecab/options/chunk_size_threshold.expected test/command/suite/tokenizers/mecab/options/include_class.expected test/command/suite/tokenizers/mecab/options/include_form.expected test/command/suite/tokenizers/mecab/options/include_reading.expected test/command/suite/tokenizers/mecab/options/target_class/negative.expected test/command/suite/tokenizers/mecab/options/target_class/one.expected test/command/suite/tokenizers/mecab/options/target_class/positive.expected test/command/suite/tokenizers/mecab/options/target_class/subclass0.expected test/command/suite/tokenizers/mecab/options/target_class/subclass1.expected test/command/suite/tokenizers/mecab/options/target_class/subclass2.expected test/command/suite/tokenizers/mecab/options/use_reading_add.expected test/command/suite/tokenizers/mecab/options/use_reading_get.expected test/command/suite/tokenizers/ngram/loose_blank/add.expected test/command/suite/tokenizers/ngram/loose_blank/get.expected test/command/suite/tokenizers/ngram/loose_symbol/add.expected test/command/suite/tokenizers/ngram/loose_symbol/get.expected test/command/suite/tokenizers/ngram/loose_symbol/get_all_symbols.expected test/command/suite/tokenizers/ngram/n.expected test/command/suite/tokenizers/ngram/remove_blank.expected test/command/suite/tokenizers/ngram/report_source_location/expand_katakana.expected test/command/suite/tokenizers/ngram/report_source_location/expand_no_overlap.expected test/command/suite/tokenizers/ngram/report_source_location/hiragana.expected test/command/suite/tokenizers/ngram/report_source_location/include_removed_source_location.expected test/command/suite/tokenizers/ngram/report_source_location/loose_and_unify.expected test/command/suite/tokenizers/ngram/report_source_location/loose_symbol.expected test/command/suite/tokenizers/ngram/report_source_location/loose_symbol_kabu.expected test/command/suite/tokenizers/ngram/report_source_location/loose_symbol_non_number.expected test/command/suite/tokenizers/ngram/unify_alphabet.expected test/command/suite/tokenizers/ngram/unify_digit.expected test/command/suite/tokenizers/ngram/unify_symbol.expected test/command/suite/tokenizers/regexp/add/four.expected test/command/suite/tokenizers/regexp/add/normalizer/blank.expected test/command/suite/tokenizers/regexp/add/one.expected test/command/suite/tokenizers/regexp/add/three.expected test/command/suite/tokenizers/regexp/add/two.expected test/command/suite/tokenizers/regexp/get/begin/one.expected test/command/suite/tokenizers/regexp/get/begin/three.expected test/command/suite/tokenizers/regexp/get/begin/two.expected test/command/suite/tokenizers/regexp/get/begin_end/one.expected test/command/suite/tokenizers/regexp/get/end/four.expected test/command/suite/tokenizers/regexp/get/end/one.expected test/command/suite/tokenizers/regexp/get/end/three.expected test/command/suite/tokenizers/regexp/get/end/two.expected test/command/suite/tokenizers/regexp/get/long.expected test/command/suite/tokenizers/regexp/get/normalizer/blank/less_after.expected test/command/suite/tokenizers/regexp/get/normalizer/blank/less_before.expected test/command/suite/tokenizers/trigram/force_prefix/multiple_tokens/matured.expected test/command/suite/tokenizers/trigram/force_prefix/multiple_tokens/unmatured_one_character.expected test/command/suite/tokenizers/trigram/force_prefix/multiple_tokens/unmatured_two_characters.expected test/command/suite/tokenizers/trigram/force_prefix/single_token/matured.expected test/command/suite/tokenizers/trigram/force_prefix/single_token/unmatured_one_character.expected test/command/suite/tokenizers/trigram/force_prefix/single_token/unmatured_two_characters.expected Modified: lib/proc/proc_tokenize.c (+9 -4) =================================================================== --- lib/proc/proc_tokenize.c 2018-11-12 14:14:04 +0900 (6d7535016) +++ lib/proc/proc_tokenize.c 2018-11-12 14:32:17 +0900 (8ee1c7110) @@ -64,7 +64,7 @@ parse_tokenize_flags(grn_ctx *ctx, grn_raw_string *flags_raw) typedef struct { grn_id id; int32_t position; - grn_bool force_prefix; + grn_bool force_prefix_search; uint64_t source_offset; uint32_t source_length; uint32_t source_first_character_length; @@ -106,7 +106,7 @@ output_tokens(grn_ctx *ctx, grn_bool have_metadata = GRN_FALSE; n_tokens = GRN_BULK_VSIZE(tokens) / sizeof(tokenize_token); - n_elements = 3; + n_elements = 4; if (index_column) { n_elements++; GRN_UINT32_INIT(&estimated_size, 0); @@ -146,8 +146,12 @@ output_tokens(grn_ctx *ctx, grn_ctx_output_cstr(ctx, "position"); grn_ctx_output_int32(ctx, token->position); + /* For backward compatibility. */ grn_ctx_output_cstr(ctx, "force_prefix"); - grn_ctx_output_bool(ctx, token->force_prefix); + grn_ctx_output_bool(ctx, token->force_prefix_search); + + grn_ctx_output_cstr(ctx, "force_prefix_search"); + grn_ctx_output_bool(ctx, token->force_prefix_search); if (index_column) { GRN_BULK_REWIND(&estimated_size); @@ -250,7 +254,8 @@ tokenize(grn_ctx *ctx, current_token = ((tokenize_token *)(GRN_BULK_CURR(tokens))) - 1; current_token->id = token_id; current_token->position = grn_token_get_position(ctx, token); - current_token->force_prefix = grn_token_get_force_prefix_search(ctx, token); + current_token->force_prefix_search = + grn_token_get_force_prefix_search(ctx, token); current_token->source_offset = grn_token_get_source_offset(ctx, token); current_token->source_length = grn_token_get_source_length(ctx, token); current_token->source_first_character_length = Modified: test/command/suite/select/env/overlap_token_skip/long.expected (+26 -0) =================================================================== --- test/command/suite/select/env/overlap_token_skip/long.expected 2018-11-12 14:14:04 +0900 (70bef1ccd) +++ test/command/suite/select/env/overlap_token_skip/long.expected 2018-11-12 14:32:17 +0900 (6737227bf) @@ -23,156 +23,182 @@ table_tokenize Terms "This is very very long sentence." --index_column index "value": "th", "position": 0, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "hi", "position": 1, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "is", "position": 2, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 3 }, { "value": "si", "position": 3, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "is", "position": 4, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 3 }, { "value": "sv", "position": 5, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "ve", "position": 6, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 3 }, { "value": "er", "position": 7, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 3 }, { "value": "ry", "position": 8, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 3 }, { "value": "yv", "position": 9, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "ve", "position": 10, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 3 }, { "value": "er", "position": 11, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 3 }, { "value": "ry", "position": 12, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 3 }, { "value": "yl", "position": 13, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "lo", "position": 14, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "on", "position": 15, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "ng", "position": 16, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "gs", "position": 17, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "se", "position": 18, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "en", "position": 19, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 3 }, { "value": "nt", "position": 20, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "te", "position": 21, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "en", "position": 22, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 3 }, { "value": "nc", "position": 23, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "ce", "position": 24, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "e.", "position": 25, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 } ] Modified: test/command/suite/select/env/overlap_token_skip/non_overlap.expected (+2 -0) =================================================================== --- test/command/suite/select/env/overlap_token_skip/non_overlap.expected 2018-11-12 14:14:04 +0900 (1871a8da3) +++ test/command/suite/select/env/overlap_token_skip/non_overlap.expected 2018-11-12 14:32:17 +0900 (8680e81af) @@ -23,12 +23,14 @@ table_tokenize Terms "Hong Kong" --index_column index "value": "hong", "position": 0, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "kong", "position": 1, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 } ] Modified: test/command/suite/select/env/overlap_token_skip/one.expected (+1 -0) =================================================================== --- test/command/suite/select/env/overlap_token_skip/one.expected 2018-11-12 14:14:04 +0900 (57d0b9b21) +++ test/command/suite/select/env/overlap_token_skip/one.expected 2018-11-12 14:32:17 +0900 (f0f710f14) @@ -23,6 +23,7 @@ table_tokenize Terms "HongKong" --index_column index "value": "hongkong", "position": 0, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 } ] Modified: test/command/suite/select/env/overlap_token_skip/short.expected (+7 -0) =================================================================== --- test/command/suite/select/env/overlap_token_skip/short.expected 2018-11-12 14:14:04 +0900 (043a5ffc8) +++ test/command/suite/select/env/overlap_token_skip/short.expected 2018-11-12 14:32:17 +0900 (c0960af4a) @@ -23,42 +23,49 @@ table_tokenize Terms "HongKong" --index_column index "value": "ho", "position": 0, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "on", "position": 1, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 3 }, { "value": "ng", "position": 2, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 3 }, { "value": "gk", "position": 3, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "ko", "position": 4, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "on", "position": 5, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 3 }, { "value": "ng", "position": 6, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 3 } ] Modified: test/command/suite/select/env/overlap_token_skip/skip.expected (+2 -0) =================================================================== --- test/command/suite/select/env/overlap_token_skip/skip.expected 2018-11-12 14:14:04 +0900 (4ea9d8b6d) +++ test/command/suite/select/env/overlap_token_skip/skip.expected 2018-11-12 14:32:17 +0900 (904f5c3fd) @@ -33,12 +33,14 @@ table_tokenize Terms "This is a pen" --index_column index "value": "this", "position": 0, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "pen", "position": 3, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 } ] Modified: test/command/suite/table_tokenize/flags.expected (+6 -3) =================================================================== --- test/command/suite/table_tokenize/flags.expected 2018-11-12 14:14:04 +0900 (0ff651d0a) +++ test/command/suite/table_tokenize/flags.expected 2018-11-12 14:32:17 +0900 (4a45746c1) @@ -11,17 +11,20 @@ table_tokenize Terms "aB { "value": "ab", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "cde 1", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "23", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/table_tokenize/index_column.expected (+5 -0) =================================================================== --- test/command/suite/table_tokenize/index_column.expected 2018-11-12 14:14:04 +0900 (4ea10853a) +++ test/command/suite/table_tokenize/index_column.expected 2018-11-12 14:32:17 +0900 (541685382) @@ -25,30 +25,35 @@ table_tokenize Terms "a ruby bindings of Groonga" --mode GET --index_column inde "value": "a", "position": 0, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 5 }, { "value": "ruby", "position": 1, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "bindings", "position": 2, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "of", "position": 3, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 1 }, { "value": "groonga", "position": 4, "force_prefix": false, + "force_prefix_search": false, "estimated_size": 5 } ] Modified: test/command/suite/table_tokenize/mode_add.expected (+10 -5) =================================================================== --- test/command/suite/table_tokenize/mode_add.expected 2018-11-12 14:14:04 +0900 (7bfed8921) +++ test/command/suite/table_tokenize/mode_add.expected 2018-11-12 14:32:17 +0900 (da5ddbe48) @@ -11,27 +11,32 @@ table_tokenize Terms "あいabアイ" --mode ADD { "value": "あい", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "い", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ab", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "アイ", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "イ", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/table_tokenize/mode_get.expected (+4 -2) =================================================================== --- test/command/suite/table_tokenize/mode_get.expected 2018-11-12 14:14:04 +0900 (066115781) +++ test/command/suite/table_tokenize/mode_get.expected 2018-11-12 14:32:17 +0900 (b43733244) @@ -21,12 +21,14 @@ table_tokenize Terms "あいabアイ" --mode GET { "value": "あい", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ab", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/table_tokenize/with_normalizer.expected (+4 -2) =================================================================== --- test/command/suite/table_tokenize/with_normalizer.expected 2018-11-12 14:14:04 +0900 (f620d89f3) +++ test/command/suite/table_tokenize/with_normalizer.expected 2018-11-12 14:32:17 +0900 (e1f70ea4e) @@ -11,12 +11,14 @@ table_tokenize Terms "aBcDe 123" --mode ADD { "value": "abcde", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "123", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/table_tokenize/with_token_filters.expected (+18 -9) =================================================================== --- test/command/suite/table_tokenize/with_token_filters.expected 2018-11-12 14:14:04 +0900 (de7cc4a5e) +++ test/command/suite/table_tokenize/with_token_filters.expected 2018-11-12 14:32:17 +0900 (804e65e7f) @@ -15,27 +15,32 @@ table_tokenize Terms "Hello and Good-bye" --mode ADD { "value": "hello", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "and", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "good", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "-", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "bye", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] @@ -55,22 +60,26 @@ table_tokenize Terms "Hello and Good-bye" { "value": "hello", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "good", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "-", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "bye", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/token_filters/nfkc100/unify_kana.expected (+14 -7) =================================================================== --- test/command/suite/token_filters/nfkc100/unify_kana.expected 2018-11-12 14:14:04 +0900 (c5e651881) +++ test/command/suite/token_filters/nfkc100/unify_kana.expected 2018-11-12 14:32:17 +0900 (b03845d05) @@ -9,37 +9,44 @@ tokenize 'TokenMecab("use_reading", true)' "私は林檎を食べます。" { "value": "わたし", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "は", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "りんご", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "を", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "たべ", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ます", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "。", "position": 6, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenize/empty_token.expected (+4 -2) =================================================================== --- test/command/suite/tokenize/empty_token.expected 2018-11-12 14:14:04 +0900 (1515547d0) +++ test/command/suite/tokenize/empty_token.expected 2018-11-12 14:32:17 +0900 (f36375d03) @@ -9,12 +9,14 @@ tokenize TokenDelimit "aB { "value": "ab", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "c", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenize/flags.expected (+6 -3) =================================================================== --- test/command/suite/tokenize/flags.expected 2018-11-12 14:14:04 +0900 (b89f27373) +++ test/command/suite/tokenize/flags.expected 2018-11-12 14:32:17 +0900 (ae744f8a4) @@ -9,17 +9,20 @@ tokenize TokenDelimit "aB { "value": "ab", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "cde 1", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "23", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenize/mode_add.expected (+10 -5) =================================================================== --- test/command/suite/tokenize/mode_add.expected 2018-11-12 14:14:04 +0900 (a4343d919) +++ test/command/suite/tokenize/mode_add.expected 2018-11-12 14:32:17 +0900 (76ab165b7) @@ -9,27 +9,32 @@ tokenize TokenBigram "あいabアイ" NormalizerAuto NONE ADD { "value": "あい", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "い", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ab", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "アイ", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "イ", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenize/mode_get.expected (+6 -3) =================================================================== --- test/command/suite/tokenize/mode_get.expected 2018-11-12 14:14:04 +0900 (86109b50b) +++ test/command/suite/tokenize/mode_get.expected 2018-11-12 14:32:17 +0900 (329a1e5ad) @@ -9,17 +9,20 @@ tokenize TokenBigram "あいabアイ" NormalizerAuto NONE GET { "value": "あい", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ab", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "アイ", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenize/no_normalizer.expected (+18 -9) =================================================================== --- test/command/suite/tokenize/no_normalizer.expected 2018-11-12 14:14:04 +0900 (1854ae399) +++ test/command/suite/tokenize/no_normalizer.expected 2018-11-12 14:32:17 +0900 (1f62bf32a) @@ -9,47 +9,56 @@ tokenize TokenBigram "aBcDe 123" { "value": "aB", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "Bc", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "cD", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "De", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "e ", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": " 1", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "12", "position": 6, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "23", "position": 7, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "3", "position": 8, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenize/with_normalizer.expected (+4 -2) =================================================================== --- test/command/suite/tokenize/with_normalizer.expected 2018-11-12 14:14:04 +0900 (4321c17bf) +++ test/command/suite/tokenize/with_normalizer.expected 2018-11-12 14:32:17 +0900 (93560745f) @@ -9,12 +9,14 @@ tokenize TokenBigram "aBcDe 123" NormalizerAuto { "value": "abcde", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "123", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenize/with_token_filters.expected (+6 -3) =================================================================== --- test/command/suite/tokenize/with_token_filters.expected 2018-11-12 14:14:04 +0900 (5bc37e1b5) +++ test/command/suite/tokenize/with_token_filters.expected 2018-11-12 14:32:17 +0900 (28b1b26e9) @@ -11,17 +11,20 @@ tokenize TokenBigram "I developed Groonga" NormalizerAuto --token_filters TokenF { "value": "i", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "develop", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "groonga", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenize/with_token_filters_options.expected (+15 -1) =================================================================== --- test/command/suite/tokenize/with_token_filters_options.expected 2018-11-12 14:14:04 +0900 (6d429ff94) +++ test/command/suite/tokenize/with_token_filters_options.expected 2018-11-12 14:32:17 +0900 (df3f01083) @@ -1,4 +1,18 @@ plugin_register token_filters/stem [[0,0.0,0.0],true] tokenize TokenBigram "maintenait" NormalizerAuto --token_filters 'TokenFilterStem("algorithm", "french")' -[[0,0.0,0.0],[{"value":"mainten","position":0,"force_prefix":false}]] +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "mainten", + "position": 0, + "force_prefix": false, + "force_prefix_search": false + } + ] +] Modified: test/command/suite/tokenizers/bigram/env/remove_blank_disable/include_blank.expected (+16 -8) =================================================================== --- test/command/suite/tokenizers/bigram/env/remove_blank_disable/include_blank.expected 2018-11-12 14:14:04 +0900 (6a7e3db06) +++ test/command/suite/tokenizers/bigram/env/remove_blank_disable/include_blank.expected 2018-11-12 14:32:17 +0900 (23d8cbc78) @@ -9,42 +9,50 @@ tokenize TokenBigramSplitSymbolAlpha "Hong Kong" NormalizerAuto --mode GET { "value": "ho", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "on", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ng", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "g ", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": " k", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ko", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "on", "position": 6, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ng", "position": 7, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/matured.expected (+4 -2) =================================================================== --- test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/matured.expected 2018-11-12 14:14:04 +0900 (aa9a2230d) +++ test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/matured.expected 2018-11-12 14:32:17 +0900 (29b5a38e7) @@ -9,12 +9,14 @@ tokenize TokenBigram "ABCだよ" NormalizerAuto --mode GET { "value": "abc", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "だよ", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/unmatured.expected (+4 -2) =================================================================== --- test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/unmatured.expected 2018-11-12 14:14:04 +0900 (23f8a25dc) +++ test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/unmatured.expected 2018-11-12 14:32:17 +0900 (5216b5226) @@ -9,12 +9,14 @@ tokenize TokenBigram "ABCだ" NormalizerAuto --mode GET { "value": "abc", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "だ", "position": 1, - "force_prefix": true + "force_prefix": true, + "force_prefix_search": true } ] ] Modified: test/command/suite/tokenizers/bigram/force_prefix/single_token/matured.expected (+15 -1) =================================================================== --- test/command/suite/tokenizers/bigram/force_prefix/single_token/matured.expected 2018-11-12 14:14:04 +0900 (a3114660b) +++ test/command/suite/tokenizers/bigram/force_prefix/single_token/matured.expected 2018-11-12 14:32:17 +0900 (96e948157) @@ -1,2 +1,16 @@ tokenize TokenBigram "だよ" NormalizerAuto --mode GET -[[0,0.0,0.0],[{"value":"だよ","position":0,"force_prefix":false}]] +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "だよ", + "position": 0, + "force_prefix": false, + "force_prefix_search": false + } + ] +] Modified: test/command/suite/tokenizers/bigram/force_prefix/single_token/unmatured.expected (+15 -1) =================================================================== --- test/command/suite/tokenizers/bigram/force_prefix/single_token/unmatured.expected 2018-11-12 14:14:04 +0900 (4867dda85) +++ test/command/suite/tokenizers/bigram/force_prefix/single_token/unmatured.expected 2018-11-12 14:32:17 +0900 (3dfb697fb) @@ -1,2 +1,16 @@ tokenize TokenBigram "だ" NormalizerAuto --mode GET -[[0,0.0,0.0],[{"value":"だ","position":0,"force_prefix":true}]] +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "だ", + "position": 0, + "force_prefix": true, + "force_prefix_search": true + } + ] +] Modified: test/command/suite/tokenizers/bigram/long_token/first.expected (+8 -4) =================================================================== --- test/command/suite/tokenizers/bigram/long_token/first.expected 2018-11-12 14:14:04 +0900 (52d036f64) +++ test/command/suite/tokenizers/bigram/long_token/first.expected 2018-11-12 14:32:17 +0900 (ad5dc446d) @@ -9,22 +9,26 @@ tokenize TokenBigram "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX { "value": ":", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "4097", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "byte", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "string", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/bigram/long_token/last.expected (+8 -4) =================================================================== --- test/command/suite/tokenizers/bigram/long_token/last.expected 2018-11-12 14:14:04 +0900 (287426b73) +++ test/command/suite/tokenizers/bigram/long_token/last.expected 2018-11-12 14:32:17 +0900 (3d5b68f0c) @@ -9,22 +9,26 @@ tokenize TokenBigram "4097byte string: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX { "value": "4097", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "byte", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "string", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": ":", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/bigram/long_token/middle.expected (+12 -6) =================================================================== --- test/command/suite/tokenizers/bigram/long_token/middle.expected 2018-11-12 14:14:04 +0900 (5a09fcca2) +++ test/command/suite/tokenizers/bigram/long_token/middle.expected 2018-11-12 14:32:17 +0900 (01d1bc026) @@ -9,32 +9,38 @@ tokenize TokenBigram "4097byte string: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX { "value": "4097", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "byte", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "string", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": ":", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "after", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "string", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/delimit/valid/continuous_spaces.expected (+4 -2) =================================================================== --- test/command/suite/tokenizers/delimit/valid/continuous_spaces.expected 2018-11-12 14:14:04 +0900 (31f8e264e) +++ test/command/suite/tokenizers/delimit/valid/continuous_spaces.expected 2018-11-12 14:32:17 +0900 (3c53f608a) @@ -9,12 +9,14 @@ tokenize TokenDelimit "A B" { "value": "A", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "B", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/delimit/valid/multiple_tokens.expected (+6 -3) =================================================================== --- test/command/suite/tokenizers/delimit/valid/multiple_tokens.expected 2018-11-12 14:14:04 +0900 (6c3e8bc48) +++ test/command/suite/tokenizers/delimit/valid/multiple_tokens.expected 2018-11-12 14:32:17 +0900 (2699bcd00) @@ -9,17 +9,20 @@ tokenize TokenDelimit "a b c" { "value": "a", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "b", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "c", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/chunk/comma.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/mecab/chunk/comma.expected 2018-11-12 14:14:04 +0900 (bb6066b2b) +++ test/command/suite/tokenizers/mecab/chunk/comma.expected 2018-11-12 14:32:17 +0900 (9e64c7fef) @@ -9,27 +9,32 @@ tokenize TokenMecab '日本のエンジン,エンジン' { "value": "日本", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "の", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": ",", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/chunk/exclamation_mark.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/mecab/chunk/exclamation_mark.expected 2018-11-12 14:14:04 +0900 (86b9b3c9f) +++ test/command/suite/tokenizers/mecab/chunk/exclamation_mark.expected 2018-11-12 14:32:17 +0900 (e8593cb52) @@ -9,27 +9,32 @@ tokenize TokenMecab '日本のエンジン!エンジン' { "value": "日本", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "の", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "!", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/chunk/fullwidth_exclamation_mark.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/mecab/chunk/fullwidth_exclamation_mark.expected 2018-11-12 14:14:04 +0900 (4d1a1ca20) +++ test/command/suite/tokenizers/mecab/chunk/fullwidth_exclamation_mark.expected 2018-11-12 14:32:17 +0900 (c74284a55) @@ -9,27 +9,32 @@ tokenize TokenMecab '日本のエンジン!エンジン' { "value": "日本", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "の", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "!", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/chunk/fullwidth_question_mark.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/mecab/chunk/fullwidth_question_mark.expected 2018-11-12 14:14:04 +0900 (b0a5539d4) +++ test/command/suite/tokenizers/mecab/chunk/fullwidth_question_mark.expected 2018-11-12 14:32:17 +0900 (13f36dd15) @@ -9,27 +9,32 @@ tokenize TokenMecab '日本のエンジン?エンジン' { "value": "日本", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "の", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "?", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/chunk/ideographic_comma.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/mecab/chunk/ideographic_comma.expected 2018-11-12 14:14:04 +0900 (042dd924b) +++ test/command/suite/tokenizers/mecab/chunk/ideographic_comma.expected 2018-11-12 14:32:17 +0900 (f17231f22) @@ -9,27 +9,32 @@ tokenize TokenMecab '日本のエンジン、エンジン' { "value": "日本", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "の", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "、", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/chunk/ideographic_full_stop.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/mecab/chunk/ideographic_full_stop.expected 2018-11-12 14:14:04 +0900 (19d1b7e28) +++ test/command/suite/tokenizers/mecab/chunk/ideographic_full_stop.expected 2018-11-12 14:32:17 +0900 (7cb004d8e) @@ -9,27 +9,32 @@ tokenize TokenMecab '日本のエンジン。エンジン' { "value": "日本", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "の", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "。", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/chunk/ideographic_space.expected (+8 -4) =================================================================== --- test/command/suite/tokenizers/mecab/chunk/ideographic_space.expected 2018-11-12 14:14:04 +0900 (7f30518d6) +++ test/command/suite/tokenizers/mecab/chunk/ideographic_space.expected 2018-11-12 14:32:17 +0900 (7ceaa45f0) @@ -9,22 +9,26 @@ tokenize TokenMecab '日本のエンジン エンジン' { "value": "日本", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "の", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/chunk/multiple_delimiters_in_one_chunk.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/mecab/chunk/multiple_delimiters_in_one_chunk.expected 2018-11-12 14:14:04 +0900 (f41d1a6f9) +++ test/command/suite/tokenizers/mecab/chunk/multiple_delimiters_in_one_chunk.expected 2018-11-12 14:32:17 +0900 (3e813dcfb) @@ -9,27 +9,32 @@ tokenize TokenMecab '日本。エンジン。エンジン' { "value": "日本", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "。", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "。", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/chunk/period.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/mecab/chunk/period.expected 2018-11-12 14:14:04 +0900 (33d9762a5) +++ test/command/suite/tokenizers/mecab/chunk/period.expected 2018-11-12 14:32:17 +0900 (2ad09236b) @@ -9,27 +9,32 @@ tokenize TokenMecab '日本のエンジン.エンジン' { "value": "日本", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "の", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": ".", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/chunk/question_mark.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/mecab/chunk/question_mark.expected 2018-11-12 14:14:04 +0900 (42f129807) +++ test/command/suite/tokenizers/mecab/chunk/question_mark.expected 2018-11-12 14:32:17 +0900 (23742b958) @@ -9,27 +9,32 @@ tokenize TokenMecab '日本のエンジン?エンジン' { "value": "日本", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "の", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "?", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/chunk/space.expected (+8 -4) =================================================================== --- test/command/suite/tokenizers/mecab/chunk/space.expected 2018-11-12 14:14:04 +0900 (bf112e723) +++ test/command/suite/tokenizers/mecab/chunk/space.expected 2018-11-12 14:32:17 +0900 (85fdcc269) @@ -9,22 +9,26 @@ tokenize TokenMecab '日本のエンジン エンジン' { "value": "日本", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "の", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/chunk/threshold.expected (+12 -6) =================================================================== --- test/command/suite/tokenizers/mecab/chunk/threshold.expected 2018-11-12 14:14:04 +0900 (391f7d0a3) +++ test/command/suite/tokenizers/mecab/chunk/threshold.expected 2018-11-12 14:32:17 +0900 (62dcfb7fc) @@ -9,32 +9,38 @@ tokenize TokenMecab '日本のエンジンとエンジン' { "value": "日本", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "の", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "と", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エン", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ジン", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/full_width_space/first.expected (+15 -1) =================================================================== --- test/command/suite/tokenizers/mecab/full_width_space/first.expected 2018-11-12 14:14:04 +0900 (15d04d9c5) +++ test/command/suite/tokenizers/mecab/full_width_space/first.expected 2018-11-12 14:32:17 +0900 (bc326757f) @@ -1,2 +1,16 @@ tokenize TokenMecab ' 日本' -[[0,0.0,0.0],[{"value":"日本","position":0,"force_prefix":false}]] +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "日本", + "position": 0, + "force_prefix": false, + "force_prefix_search": false + } + ] +] Modified: test/command/suite/tokenizers/mecab/full_width_space/last.expected (+15 -1) =================================================================== --- test/command/suite/tokenizers/mecab/full_width_space/last.expected 2018-11-12 14:14:04 +0900 (b57566604) +++ test/command/suite/tokenizers/mecab/full_width_space/last.expected 2018-11-12 14:32:17 +0900 (8e7c68b1c) @@ -1,2 +1,16 @@ tokenize TokenMecab '日本 ' -[[0,0.0,0.0],[{"value":"日本","position":0,"force_prefix":false}]] +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "日本", + "position": 0, + "force_prefix": false, + "force_prefix_search": false + } + ] +] Modified: test/command/suite/tokenizers/mecab/multiple_tokens.expected (+6 -3) =================================================================== --- test/command/suite/tokenizers/mecab/multiple_tokens.expected 2018-11-12 14:14:04 +0900 (57d2370bd) +++ test/command/suite/tokenizers/mecab/multiple_tokens.expected 2018-11-12 14:32:17 +0900 (43343fb79) @@ -9,17 +9,20 @@ tokenize TokenMecab '日本のエンジン' { "value": "日本", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "の", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/one_token.expected (+15 -1) =================================================================== --- test/command/suite/tokenizers/mecab/one_token.expected 2018-11-12 14:14:04 +0900 (b274b18cd) +++ test/command/suite/tokenizers/mecab/one_token.expected 2018-11-12 14:32:17 +0900 (75ff4eb0a) @@ -1,2 +1,16 @@ tokenize TokenMecab '日本' -[[0,0.0,0.0],[{"value":"日本","position":0,"force_prefix":false}]] +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "日本", + "position": 0, + "force_prefix": false, + "force_prefix_search": false + } + ] +] Modified: test/command/suite/tokenizers/mecab/options/chunk_size_threshold.expected (+12 -6) =================================================================== --- test/command/suite/tokenizers/mecab/options/chunk_size_threshold.expected 2018-11-12 14:14:04 +0900 (03d7061e9) +++ test/command/suite/tokenizers/mecab/options/chunk_size_threshold.expected 2018-11-12 14:32:17 +0900 (5f460f516) @@ -9,32 +9,38 @@ tokenize 'TokenMecab("chunked_tokenize", true, "chunk_size_threshold", 30)' { "value": "日本", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "の", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エンジン", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "と", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "エン", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ジン", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/options/include_class.expected (+5 -0) =================================================================== --- test/command/suite/tokenizers/mecab/options/include_class.expected 2018-11-12 14:14:04 +0900 (fd48876f6) +++ test/command/suite/tokenizers/mecab/options/include_class.expected 2018-11-12 14:32:17 +0900 (4acf520fd) @@ -10,6 +10,7 @@ tokenize 'TokenMecab("include_class", true)' 'これはペンです。' "value": "これ", "position": 0, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "名詞", "subclass0": "代名詞", @@ -20,6 +21,7 @@ tokenize 'TokenMecab("include_class", true)' 'これはペンです。' "value": "は", "position": 1, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "助詞", "subclass0": "係助詞" @@ -29,6 +31,7 @@ tokenize 'TokenMecab("include_class", true)' 'これはペンです。' "value": "ペン", "position": 2, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "名詞", "subclass0": "一般" @@ -38,6 +41,7 @@ tokenize 'TokenMecab("include_class", true)' 'これはペンです。' "value": "です", "position": 3, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "助動詞" } @@ -46,6 +50,7 @@ tokenize 'TokenMecab("include_class", true)' 'これはペンです。' "value": "。", "position": 4, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "記号", "subclass0": "句点" Modified: test/command/suite/tokenizers/mecab/options/include_form.expected (+3 -0) =================================================================== --- test/command/suite/tokenizers/mecab/options/include_form.expected 2018-11-12 14:14:04 +0900 (36432399d) +++ test/command/suite/tokenizers/mecab/options/include_form.expected 2018-11-12 14:32:17 +0900 (9475ad72b) @@ -10,6 +10,7 @@ tokenize 'TokenMecab("include_form", true)' '行きました' "value": "行き", "position": 0, "force_prefix": false, + "force_prefix_search": false, "metadata": { "inflected_type": "五段・カ行促音便", "inflected_form": "連用形", @@ -20,6 +21,7 @@ tokenize 'TokenMecab("include_form", true)' '行きました' "value": "まし", "position": 1, "force_prefix": false, + "force_prefix_search": false, "metadata": { "inflected_type": "特殊・マス", "inflected_form": "連用形", @@ -30,6 +32,7 @@ tokenize 'TokenMecab("include_form", true)' '行きました' "value": "た", "position": 2, "force_prefix": false, + "force_prefix_search": false, "metadata": { "inflected_type": "特殊・タ", "inflected_form": "基本形", Modified: test/command/suite/tokenizers/mecab/options/include_reading.expected (+5 -0) =================================================================== --- test/command/suite/tokenizers/mecab/options/include_reading.expected 2018-11-12 14:14:04 +0900 (000bb9359) +++ test/command/suite/tokenizers/mecab/options/include_reading.expected 2018-11-12 14:32:17 +0900 (fa2633403) @@ -10,6 +10,7 @@ tokenize 'TokenMecab("include_reading", true)' '焼き肉と焼肉とyakinik "value": "焼き肉", "position": 0, "force_prefix": false, + "force_prefix_search": false, "metadata": { "reading": "ヤキニク" } @@ -18,6 +19,7 @@ tokenize 'TokenMecab("include_reading", true)' '焼き肉と焼肉とyakinik "value": "と", "position": 1, "force_prefix": false, + "force_prefix_search": false, "metadata": { "reading": "ト" } @@ -26,6 +28,7 @@ tokenize 'TokenMecab("include_reading", true)' '焼き肉と焼肉とyakinik "value": "焼肉", "position": 2, "force_prefix": false, + "force_prefix_search": false, "metadata": { "reading": "ヤキニク" } @@ -34,6 +37,7 @@ tokenize 'TokenMecab("include_reading", true)' '焼き肉と焼肉とyakinik "value": "と", "position": 3, "force_prefix": false, + "force_prefix_search": false, "metadata": { "reading": "ト" } @@ -42,6 +46,7 @@ tokenize 'TokenMecab("include_reading", true)' '焼き肉と焼肉とyakinik "value": "yakiniku", "position": 4, "force_prefix": false, + "force_prefix_search": false, "metadata": { } } Modified: test/command/suite/tokenizers/mecab/options/target_class/negative.expected (+5 -0) =================================================================== --- test/command/suite/tokenizers/mecab/options/target_class/negative.expected 2018-11-12 14:14:04 +0900 (11783706a) +++ test/command/suite/tokenizers/mecab/options/target_class/negative.expected 2018-11-12 14:32:17 +0900 (bf362caba) @@ -10,6 +10,7 @@ tokenize 'TokenMecab("include_class", true, "target_class", "- "value": "私", "position": 0, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "名詞", "subclass0": "代名詞", @@ -20,6 +21,7 @@ tokenize 'TokenMecab("include_class", true, "target_class", "- "value": "名前", "position": 1, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "名詞", "subclass0": "一般" @@ -29,6 +31,7 @@ tokenize 'TokenMecab("include_class", true, "target_class", "- "value": "山田", "position": 2, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "名詞", "subclass0": "固有名詞", @@ -40,6 +43,7 @@ tokenize 'TokenMecab("include_class", true, "target_class", "- "value": "です", "position": 3, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "助動詞" } @@ -48,6 +52,7 @@ tokenize 'TokenMecab("include_class", true, "target_class", "- "value": "。", "position": 4, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "記号", "subclass0": "句点" Modified: test/command/suite/tokenizers/mecab/options/target_class/one.expected (+3 -0) =================================================================== --- test/command/suite/tokenizers/mecab/options/target_class/one.expected 2018-11-12 14:14:04 +0900 (1f91d20a2) +++ test/command/suite/tokenizers/mecab/options/target_class/one.expected 2018-11-12 14:32:17 +0900 (dd3633bc8) @@ -10,6 +10,7 @@ tokenize 'TokenMecab("include_class", true, "target_class", "名 "value": "私", "position": 0, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "名詞", "subclass0": "代名詞", @@ -20,6 +21,7 @@ tokenize 'TokenMecab("include_class", true, "target_class", "名 "value": "名前", "position": 1, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "名詞", "subclass0": "一般" @@ -29,6 +31,7 @@ tokenize 'TokenMecab("include_class", true, "target_class", "名 "value": "山田", "position": 2, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "名詞", "subclass0": "固有名詞", Modified: test/command/suite/tokenizers/mecab/options/target_class/positive.expected (+3 -0) =================================================================== --- test/command/suite/tokenizers/mecab/options/target_class/positive.expected 2018-11-12 14:14:04 +0900 (1b8fe774a) +++ test/command/suite/tokenizers/mecab/options/target_class/positive.expected 2018-11-12 14:32:17 +0900 (d636182de) @@ -10,6 +10,7 @@ tokenize 'TokenMecab("include_class", true, "target_class", "+ "value": "私", "position": 0, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "名詞", "subclass0": "代名詞", @@ -20,6 +21,7 @@ tokenize 'TokenMecab("include_class", true, "target_class", "+ "value": "名前", "position": 1, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "名詞", "subclass0": "一般" @@ -29,6 +31,7 @@ tokenize 'TokenMecab("include_class", true, "target_class", "+ "value": "山田", "position": 2, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "名詞", "subclass0": "固有名詞", Modified: test/command/suite/tokenizers/mecab/options/target_class/subclass0.expected (+1 -0) =================================================================== --- test/command/suite/tokenizers/mecab/options/target_class/subclass0.expected 2018-11-12 14:14:04 +0900 (d7d17422c) +++ test/command/suite/tokenizers/mecab/options/target_class/subclass0.expected 2018-11-12 14:32:17 +0900 (7249eb28a) @@ -10,6 +10,7 @@ tokenize 'TokenMecab("include_class", true, "target_class", "名 "value": "私", "position": 0, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "名詞", "subclass0": "代名詞", Modified: test/command/suite/tokenizers/mecab/options/target_class/subclass1.expected (+1 -0) =================================================================== --- test/command/suite/tokenizers/mecab/options/target_class/subclass1.expected 2018-11-12 14:14:04 +0900 (cb7f95ac5) +++ test/command/suite/tokenizers/mecab/options/target_class/subclass1.expected 2018-11-12 14:32:17 +0900 (b2e246b6b) @@ -10,6 +10,7 @@ tokenize 'TokenMecab("include_class", true, "target_class", "名 "value": "山田", "position": 0, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "名詞", "subclass0": "固有名詞", Modified: test/command/suite/tokenizers/mecab/options/target_class/subclass2.expected (+1 -0) =================================================================== --- test/command/suite/tokenizers/mecab/options/target_class/subclass2.expected 2018-11-12 14:14:04 +0900 (6afa6127a) +++ test/command/suite/tokenizers/mecab/options/target_class/subclass2.expected 2018-11-12 14:32:17 +0900 (fe3480219) @@ -10,6 +10,7 @@ tokenize 'TokenMecab("include_class", true, "target_class", "名 "value": "山田", "position": 0, "force_prefix": false, + "force_prefix_search": false, "metadata": { "class": "名詞", "subclass0": "固有名詞", Modified: test/command/suite/tokenizers/mecab/options/use_reading_add.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/mecab/options/use_reading_add.expected 2018-11-12 14:14:04 +0900 (00f119724) +++ test/command/suite/tokenizers/mecab/options/use_reading_add.expected 2018-11-12 14:32:17 +0900 (326357177) @@ -9,27 +9,32 @@ tokenize 'TokenMecab("use_reading", true)' '焼き肉と焼肉とyakiniku' { "value": "ヤキニク", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ト", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ヤキニク", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ト", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "yakiniku", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/mecab/options/use_reading_get.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/mecab/options/use_reading_get.expected 2018-11-12 14:14:04 +0900 (d4ba3d0fa) +++ test/command/suite/tokenizers/mecab/options/use_reading_get.expected 2018-11-12 14:32:17 +0900 (a65cfa7bc) @@ -9,27 +9,32 @@ tokenize 'TokenMecab("use_reading", true)' '焼き肉と焼肉とyakiniku' { "value": "ヤキニク", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ト", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ヤキニク", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ト", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "yakiniku", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/ngram/loose_blank/add.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/ngram/loose_blank/add.expected 2018-11-12 14:14:04 +0900 (e3e668ae7) +++ test/command/suite/tokenizers/ngram/loose_blank/add.expected 2018-11-12 14:32:17 +0900 (26d143577) @@ -9,27 +9,32 @@ tokenize 'TokenNgram("loose_blank", true)' "090 1234 5678" NormalizerAuto { "value": "090", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "1234", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "5678", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "09012345678", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/ngram/loose_blank/get.expected (+15 -1) =================================================================== --- test/command/suite/tokenizers/ngram/loose_blank/get.expected 2018-11-12 14:14:04 +0900 (069d4e464) +++ test/command/suite/tokenizers/ngram/loose_blank/get.expected 2018-11-12 14:32:17 +0900 (71e7f68c3) @@ -1,2 +1,16 @@ tokenize 'TokenNgram("loose_blank", true)' "090 1234 5678" NormalizerAuto --mode GET -[[0,0.0,0.0],[{"value":"09012345678","position":0,"force_prefix":false}]] +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "09012345678", + "position": 0, + "force_prefix": false, + "force_prefix_search": false + } + ] +] Modified: test/command/suite/tokenizers/ngram/loose_symbol/add.expected (+14 -7) =================================================================== --- test/command/suite/tokenizers/ngram/loose_symbol/add.expected 2018-11-12 14:14:04 +0900 (a8b574720) +++ test/command/suite/tokenizers/ngram/loose_symbol/add.expected 2018-11-12 14:32:17 +0900 (2bebafb57) @@ -9,37 +9,44 @@ tokenize 'TokenNgram("loose_symbol", true)' "090-1234-5678" NormalizerAuto { "value": "090", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "-", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "1234", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "-", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "5678", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "09012345678", "position": 6, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/ngram/loose_symbol/get.expected (+15 -1) =================================================================== --- test/command/suite/tokenizers/ngram/loose_symbol/get.expected 2018-11-12 14:14:04 +0900 (601e8b3cb) +++ test/command/suite/tokenizers/ngram/loose_symbol/get.expected 2018-11-12 14:32:17 +0900 (566c7046a) @@ -1,2 +1,16 @@ tokenize 'TokenNgram("loose_symbol", true)' "090-1234-5678" NormalizerAuto --mode GET -[[0,0.0,0.0],[{"value":"09012345678","position":0,"force_prefix":false}]] +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "09012345678", + "position": 0, + "force_prefix": false, + "force_prefix_search": false + } + ] +] Modified: test/command/suite/tokenizers/ngram/loose_symbol/get_all_symbols.expected (+6 -3) =================================================================== --- test/command/suite/tokenizers/ngram/loose_symbol/get_all_symbols.expected 2018-11-12 14:14:04 +0900 (d7c2352a9) +++ test/command/suite/tokenizers/ngram/loose_symbol/get_all_symbols.expected 2018-11-12 14:32:17 +0900 (86a3a0aec) @@ -9,17 +9,20 @@ tokenize 'TokenNgram("loose_symbol", true)' "? ? ?" NormalizerAuto --mod { "value": "?", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "?", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "?", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/ngram/n.expected (+14 -7) =================================================================== --- test/command/suite/tokenizers/ngram/n.expected 2018-11-12 14:14:04 +0900 (b4af32bdb) +++ test/command/suite/tokenizers/ngram/n.expected 2018-11-12 14:32:17 +0900 (bbdebc33d) @@ -9,37 +9,44 @@ tokenize 'TokenNgram("n", 3)' "abcdefg" { "value": "abc", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "bcd", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "cde", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "def", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "efg", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "fg", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "g", "position": 6, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/ngram/remove_blank.expected (+14 -7) =================================================================== --- test/command/suite/tokenizers/ngram/remove_blank.expected 2018-11-12 14:14:04 +0900 (237816cf9) +++ test/command/suite/tokenizers/ngram/remove_blank.expected 2018-11-12 14:32:17 +0900 (f5cbc972c) @@ -9,37 +9,44 @@ tokenize 'TokenNgram("remove_blank", false)' " a b c " NormalizerAuto { "value": " ", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "a", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": " ", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "b", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": " ", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "c", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": " ", "position": 6, - "force_prefix": true + "force_prefix": true, + "force_prefix_search": true } ] ] Modified: test/command/suite/tokenizers/ngram/report_source_location/expand_katakana.expected (+9 -0) =================================================================== --- test/command/suite/tokenizers/ngram/report_source_location/expand_katakana.expected 2018-11-12 14:14:04 +0900 (0674f5ce8) +++ test/command/suite/tokenizers/ngram/report_source_location/expand_katakana.expected 2018-11-12 14:32:17 +0900 (8860fb573) @@ -10,6 +10,7 @@ tokenize 'TokenNgram("report_source_location", true)' "アイ㌕エオ" 'N "value": "アイ", "position": 0, "force_prefix": false, + "force_prefix_search": false, "source_offset": 0, "source_length": 6, "source_first_character_length": 3 @@ -18,6 +19,7 @@ tokenize 'TokenNgram("report_source_location", true)' "アイ㌕エオ" 'N "value": "イキ", "position": 1, "force_prefix": false, + "force_prefix_search": false, "source_offset": 3, "source_length": 6, "source_first_character_length": 3 @@ -26,6 +28,7 @@ tokenize 'TokenNgram("report_source_location", true)' "アイ㌕エオ" 'N "value": "キロ", "position": 2, "force_prefix": false, + "force_prefix_search": false, "source_offset": 6, "source_length": 3, "source_first_character_length": 3 @@ -34,6 +37,7 @@ tokenize 'TokenNgram("report_source_location", true)' "アイ㌕エオ" 'N "value": "ログ", "position": 3, "force_prefix": false, + "force_prefix_search": false, "source_offset": 6, "source_length": 3, "source_first_character_length": 3 @@ -42,6 +46,7 @@ tokenize 'TokenNgram("report_source_location", true)' "アイ㌕エオ" 'N "value": "グラ", "position": 4, "force_prefix": false, + "force_prefix_search": false, "source_offset": 6, "source_length": 3, "source_first_character_length": 3 @@ -50,6 +55,7 @@ tokenize 'TokenNgram("report_source_location", true)' "アイ㌕エオ" 'N "value": "ラム", "position": 5, "force_prefix": false, + "force_prefix_search": false, "source_offset": 6, "source_length": 3, "source_first_character_length": 3 @@ -58,6 +64,7 @@ tokenize 'TokenNgram("report_source_location", true)' "アイ㌕エオ" 'N "value": "ムエ", "position": 6, "force_prefix": false, + "force_prefix_search": false, "source_offset": 6, "source_length": 6, "source_first_character_length": 3 @@ -66,6 +73,7 @@ tokenize 'TokenNgram("report_source_location", true)' "アイ㌕エオ" 'N "value": "エオ", "position": 7, "force_prefix": false, + "force_prefix_search": false, "source_offset": 9, "source_length": 6, "source_first_character_length": 3 @@ -74,6 +82,7 @@ tokenize 'TokenNgram("report_source_location", true)' "アイ㌕エオ" 'N "value": "オ", "position": 8, "force_prefix": false, + "force_prefix_search": false, "source_offset": 12, "source_length": 3, "source_first_character_length": 3 Modified: test/command/suite/tokenizers/ngram/report_source_location/expand_no_overlap.expected (+4 -0) =================================================================== --- test/command/suite/tokenizers/ngram/report_source_location/expand_no_overlap.expected 2018-11-12 14:14:04 +0900 (02dd5427b) +++ test/command/suite/tokenizers/ngram/report_source_location/expand_no_overlap.expected 2018-11-12 14:32:17 +0900 (b7d07375d) @@ -10,6 +10,7 @@ tokenize 'TokenNgram("report_source_location", true)' "A㌔Z" 'Normali "value": "a", "position": 0, "force_prefix": false, + "force_prefix_search": false, "source_offset": 0, "source_length": 3, "source_first_character_length": 3 @@ -18,6 +19,7 @@ tokenize 'TokenNgram("report_source_location", true)' "A㌔Z" 'Normali "value": "キロ", "position": 1, "force_prefix": false, + "force_prefix_search": false, "source_offset": 3, "source_length": 3, "source_first_character_length": 3 @@ -26,6 +28,7 @@ tokenize 'TokenNgram("report_source_location", true)' "A㌔Z" 'Normali "value": "ロ", "position": 2, "force_prefix": false, + "force_prefix_search": false, "source_offset": 3, "source_length": 3, "source_first_character_length": 3 @@ -34,6 +37,7 @@ tokenize 'TokenNgram("report_source_location", true)' "A㌔Z" 'Normali "value": "z", "position": 3, "force_prefix": false, + "force_prefix_search": false, "source_offset": 6, "source_length": 3, "source_first_character_length": 3 Modified: test/command/suite/tokenizers/ngram/report_source_location/hiragana.expected (+5 -0) =================================================================== --- test/command/suite/tokenizers/ngram/report_source_location/hiragana.expected 2018-11-12 14:14:04 +0900 (33074e0de) +++ test/command/suite/tokenizers/ngram/report_source_location/hiragana.expected 2018-11-12 14:32:17 +0900 (d5e96322f) @@ -10,6 +10,7 @@ tokenize 'TokenNgram("report_source_location", true)' "あいうえお" 'N "value": "あい", "position": 0, "force_prefix": false, + "force_prefix_search": false, "source_offset": 0, "source_length": 6, "source_first_character_length": 3 @@ -18,6 +19,7 @@ tokenize 'TokenNgram("report_source_location", true)' "あいうえお" 'N "value": "いう", "position": 1, "force_prefix": false, + "force_prefix_search": false, "source_offset": 3, "source_length": 6, "source_first_character_length": 3 @@ -26,6 +28,7 @@ tokenize 'TokenNgram("report_source_location", true)' "あいうえお" 'N "value": "うえ", "position": 2, "force_prefix": false, + "force_prefix_search": false, "source_offset": 6, "source_length": 6, "source_first_character_length": 3 @@ -34,6 +37,7 @@ tokenize 'TokenNgram("report_source_location", true)' "あいうえお" 'N "value": "えお", "position": 3, "force_prefix": false, + "force_prefix_search": false, "source_offset": 9, "source_length": 6, "source_first_character_length": 3 @@ -42,6 +46,7 @@ tokenize 'TokenNgram("report_source_location", true)' "あいうえお" 'N "value": "お", "position": 4, "force_prefix": false, + "force_prefix_search": false, "source_offset": 12, "source_length": 3, "source_first_character_length": 3 Modified: test/command/suite/tokenizers/ngram/report_source_location/include_removed_source_location.expected (+9 -0) =================================================================== --- test/command/suite/tokenizers/ngram/report_source_location/include_removed_source_location.expected 2018-11-12 14:14:04 +0900 (ad2d85f6d) +++ test/command/suite/tokenizers/ngram/report_source_location/include_removed_source_location.expected 2018-11-12 14:32:17 +0900 (bb8f04321) @@ -10,6 +10,7 @@ tokenize 'TokenNgram("report_source_location", true, "include_re "value": "090", "position": 0, "force_prefix": false, + "force_prefix_search": false, "source_offset": 0, "source_length": 9, "source_first_character_length": 3 @@ -18,6 +19,7 @@ tokenize 'TokenNgram("report_source_location", true, "include_re "value": "(", "position": 1, "force_prefix": false, + "force_prefix_search": false, "source_offset": 9, "source_length": 3, "source_first_character_length": 3 @@ -26,6 +28,7 @@ tokenize 'TokenNgram("report_source_location", true, "include_re "value": "1234", "position": 2, "force_prefix": false, + "force_prefix_search": false, "source_offset": 12, "source_length": 8, "source_first_character_length": 3 @@ -34,6 +37,7 @@ tokenize 'TokenNgram("report_source_location", true, "include_re "value": ")", "position": 3, "force_prefix": false, + "force_prefix_search": false, "source_offset": 20, "source_length": 3, "source_first_character_length": 3 @@ -42,6 +46,7 @@ tokenize 'TokenNgram("report_source_location", true, "include_re "value": "56", "position": 4, "force_prefix": false, + "force_prefix_search": false, "source_offset": 23, "source_length": 4, "source_first_character_length": 3 @@ -50,6 +55,7 @@ tokenize 'TokenNgram("report_source_location", true, "include_re "value": "−", "position": 5, "force_prefix": false, + "force_prefix_search": false, "source_offset": 27, "source_length": 3, "source_first_character_length": 3 @@ -58,6 +64,7 @@ tokenize 'TokenNgram("report_source_location", true, "include_re "value": "78", "position": 6, "force_prefix": false, + "force_prefix_search": false, "source_offset": 30, "source_length": 6, "source_first_character_length": 3 @@ -66,6 +73,7 @@ tokenize 'TokenNgram("report_source_location", true, "include_re "value": "", "position": 7, "force_prefix": false, + "force_prefix_search": false, "source_offset": 36, "source_length": 0, "source_first_character_length": 0 @@ -74,6 +82,7 @@ tokenize 'TokenNgram("report_source_location", true, "include_re "value": "09012345678", "position": 8, "force_prefix": false, + "force_prefix_search": false, "source_offset": 0, "source_length": 36, "source_first_character_length": 3 Modified: test/command/suite/tokenizers/ngram/report_source_location/loose_and_unify.expected (+14 -0) =================================================================== --- test/command/suite/tokenizers/ngram/report_source_location/loose_and_unify.expected 2018-11-12 14:14:04 +0900 (496eefd29) +++ test/command/suite/tokenizers/ngram/report_source_location/loose_and_unify.expected 2018-11-12 14:32:17 +0900 (93538cad8) @@ -10,6 +10,7 @@ tokenize 'TokenNgram("loose_symbol", true, "include_removed_sour "value": "[", "position": 0, "force_prefix": false, + "force_prefix_search": false, "source_offset": 0, "source_length": 1, "source_first_character_length": 1 @@ -18,6 +19,7 @@ tokenize 'TokenNgram("loose_symbol", true, "include_removed_sour "value": "クリ", "position": 1, "force_prefix": false, + "force_prefix_search": false, "source_offset": 1, "source_length": 6, "source_first_character_length": 3 @@ -26,6 +28,7 @@ tokenize 'TokenNgram("loose_symbol", true, "include_removed_sour "value": "リア", "position": 2, "force_prefix": false, + "force_prefix_search": false, "source_offset": 4, "source_length": 6, "source_first_character_length": 3 @@ -34,6 +37,7 @@ tokenize 'TokenNgram("loose_symbol", true, "include_removed_sour "value": "アコ", "position": 3, "force_prefix": false, + "force_prefix_search": false, "source_offset": 7, "source_length": 6, "source_first_character_length": 3 @@ -42,6 +46,7 @@ tokenize 'TokenNgram("loose_symbol", true, "include_removed_sour "value": "コ", "position": 4, "force_prefix": false, + "force_prefix_search": false, "source_offset": 10, "source_length": 3, "source_first_character_length": 3 @@ -50,6 +55,7 @@ tokenize 'TokenNgram("loose_symbol", true, "include_removed_sour "value": "-", "position": 5, "force_prefix": false, + "force_prefix_search": false, "source_offset": 13, "source_length": 3, "source_first_character_length": 3 @@ -58,6 +64,7 @@ tokenize 'TokenNgram("loose_symbol", true, "include_removed_sour "value": "ド", "position": 6, "force_prefix": false, + "force_prefix_search": false, "source_offset": 16, "source_length": 3, "source_first_character_length": 3 @@ -66,6 +73,7 @@ tokenize 'TokenNgram("loose_symbol", true, "include_removed_sour "value": "]", "position": 7, "force_prefix": false, + "force_prefix_search": false, "source_offset": 19, "source_length": 1, "source_first_character_length": 1 @@ -74,6 +82,7 @@ tokenize 'TokenNgram("loose_symbol", true, "include_removed_sour "value": "", "position": 8, "force_prefix": false, + "force_prefix_search": false, "source_offset": 20, "source_length": 0, "source_first_character_length": 0 @@ -82,6 +91,7 @@ tokenize 'TokenNgram("loose_symbol", true, "include_removed_sour "value": "クリ", "position": 9, "force_prefix": false, + "force_prefix_search": false, "source_offset": 1, "source_length": 6, "source_first_character_length": 3 @@ -90,6 +100,7 @@ tokenize 'TokenNgram("loose_symbol", true, "include_removed_sour "value": "リア", "position": 10, "force_prefix": false, + "force_prefix_search": false, "source_offset": 4, "source_length": 6, "source_first_character_length": 3 @@ -98,6 +109,7 @@ tokenize 'TokenNgram("loose_symbol", true, "include_removed_sour "value": "アコ", "position": 11, "force_prefix": false, + "force_prefix_search": false, "source_offset": 7, "source_length": 9, "source_first_character_length": 3 @@ -106,6 +118,7 @@ tokenize 'TokenNgram("loose_symbol", true, "include_removed_sour "value": "コド", "position": 12, "force_prefix": false, + "force_prefix_search": false, "source_offset": 10, "source_length": 9, "source_first_character_length": 3 @@ -114,6 +127,7 @@ tokenize 'TokenNgram("loose_symbol", true, "include_removed_sour "value": "ド", "position": 13, "force_prefix": false, + "force_prefix_search": false, "source_offset": 16, "source_length": 3, "source_first_character_length": 3 Modified: test/command/suite/tokenizers/ngram/report_source_location/loose_symbol.expected (+9 -0) =================================================================== --- test/command/suite/tokenizers/ngram/report_source_location/loose_symbol.expected 2018-11-12 14:14:04 +0900 (d7ecd9fa7) +++ test/command/suite/tokenizers/ngram/report_source_location/loose_symbol.expected 2018-11-12 14:32:17 +0900 (01ebfe793) @@ -10,6 +10,7 @@ tokenize 'TokenNgram("report_source_location", true, "loose_symbol", true)' "value": "090", "position": 0, "force_prefix": false, + "force_prefix_search": false, "source_offset": 0, "source_length": 9, "source_first_character_length": 3 @@ -18,6 +19,7 @@ tokenize 'TokenNgram("report_source_location", true, "loose_symbol", true)' "value": "(", "position": 1, "force_prefix": false, + "force_prefix_search": false, "source_offset": 9, "source_length": 3, "source_first_character_length": 3 @@ -26,6 +28,7 @@ tokenize 'TokenNgram("report_source_location", true, "loose_symbol", true)' "value": "1234", "position": 2, "force_prefix": false, + "force_prefix_search": false, "source_offset": 12, "source_length": 8, "source_first_character_length": 3 @@ -34,6 +37,7 @@ tokenize 'TokenNgram("report_source_location", true, "loose_symbol", true)' "value": ")", "position": 3, "force_prefix": false, + "force_prefix_search": false, "source_offset": 20, "source_length": 3, "source_first_character_length": 3 @@ -42,6 +46,7 @@ tokenize 'TokenNgram("report_source_location", true, "loose_symbol", true)' "value": "56", "position": 4, "force_prefix": false, + "force_prefix_search": false, "source_offset": 23, "source_length": 4, "source_first_character_length": 3 @@ -50,6 +55,7 @@ tokenize 'TokenNgram("report_source_location", true, "loose_symbol", true)' "value": "−", "position": 5, "force_prefix": false, + "force_prefix_search": false, "source_offset": 27, "source_length": 3, "source_first_character_length": 3 @@ -58,6 +64,7 @@ tokenize 'TokenNgram("report_source_location", true, "loose_symbol", true)' "value": "78", "position": 6, "force_prefix": false, + "force_prefix_search": false, "source_offset": 30, "source_length": 6, "source_first_character_length": 3 @@ -66,6 +73,7 @@ tokenize 'TokenNgram("report_source_location", true, "loose_symbol", true)' "value": "", "position": 7, "force_prefix": false, + "force_prefix_search": false, "source_offset": 36, "source_length": 0, "source_first_character_length": 0 @@ -74,6 +82,7 @@ tokenize 'TokenNgram("report_source_location", true, "loose_symbol", true)' "value": "09012345678", "position": 8, "force_prefix": false, + "force_prefix_search": false, "source_offset": 0, "source_length": 36, "source_first_character_length": 3 Modified: test/command/suite/tokenizers/ngram/report_source_location/loose_symbol_kabu.expected (+18 -0) =================================================================== --- test/command/suite/tokenizers/ngram/report_source_location/loose_symbol_kabu.expected 2018-11-12 14:14:04 +0900 (d8add60e0) +++ test/command/suite/tokenizers/ngram/report_source_location/loose_symbol_kabu.expected 2018-11-12 14:32:17 +0900 (04047aa26) @@ -10,6 +10,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "ここ", "position": 0, "force_prefix": false, + "force_prefix_search": false, "source_offset": 0, "source_length": 6, "source_first_character_length": 3 @@ -18,6 +19,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "こは", "position": 1, "force_prefix": false, + "force_prefix_search": false, "source_offset": 3, "source_length": 6, "source_first_character_length": 3 @@ -26,6 +28,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "は", "position": 2, "force_prefix": false, + "force_prefix_search": false, "source_offset": 6, "source_length": 3, "source_first_character_length": 3 @@ -34,6 +37,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "(", "position": 3, "force_prefix": false, + "force_prefix_search": false, "source_offset": 9, "source_length": 0, "source_first_character_length": 3 @@ -42,6 +46,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "株", "position": 4, "force_prefix": false, + "force_prefix_search": false, "source_offset": 9, "source_length": 0, "source_first_character_length": 3 @@ -50,6 +55,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": ")", "position": 5, "force_prefix": false, + "force_prefix_search": false, "source_offset": 9, "source_length": 3, "source_first_character_length": 3 @@ -58,6 +64,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "グル", "position": 6, "force_prefix": false, + "force_prefix_search": false, "source_offset": 12, "source_length": 6, "source_first_character_length": 3 @@ -66,6 +73,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "ルン", "position": 7, "force_prefix": false, + "force_prefix_search": false, "source_offset": 15, "source_length": 6, "source_first_character_length": 3 @@ -74,6 +82,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "ンガ", "position": 8, "force_prefix": false, + "force_prefix_search": false, "source_offset": 18, "source_length": 6, "source_first_character_length": 3 @@ -82,6 +91,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "", "position": 9, "force_prefix": false, + "force_prefix_search": false, "source_offset": 24, "source_length": 0, "source_first_character_length": 0 @@ -90,6 +100,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "ここ", "position": 10, "force_prefix": false, + "force_prefix_search": false, "source_offset": 0, "source_length": 6, "source_first_character_length": 3 @@ -98,6 +109,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "こは", "position": 11, "force_prefix": false, + "force_prefix_search": false, "source_offset": 3, "source_length": 6, "source_first_character_length": 3 @@ -106,6 +118,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "は株", "position": 12, "force_prefix": false, + "force_prefix_search": false, "source_offset": 6, "source_length": 6, "source_first_character_length": 3 @@ -114,6 +127,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "株グ", "position": 13, "force_prefix": false, + "force_prefix_search": false, "source_offset": 9, "source_length": 6, "source_first_character_length": 3 @@ -122,6 +136,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "グル", "position": 14, "force_prefix": false, + "force_prefix_search": false, "source_offset": 12, "source_length": 6, "source_first_character_length": 3 @@ -130,6 +145,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "ルン", "position": 15, "force_prefix": false, + "force_prefix_search": false, "source_offset": 15, "source_length": 6, "source_first_character_length": 3 @@ -138,6 +154,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "ンガ", "position": 16, "force_prefix": false, + "force_prefix_search": false, "source_offset": 18, "source_length": 6, "source_first_character_length": 3 @@ -146,6 +163,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "ガ", "position": 17, "force_prefix": false, + "force_prefix_search": false, "source_offset": 21, "source_length": 3, "source_first_character_length": 3 Modified: test/command/suite/tokenizers/ngram/report_source_location/loose_symbol_non_number.expected (+13 -0) =================================================================== --- test/command/suite/tokenizers/ngram/report_source_location/loose_symbol_non_number.expected 2018-11-12 14:14:04 +0900 (231e19e69) +++ test/command/suite/tokenizers/ngram/report_source_location/loose_symbol_non_number.expected 2018-11-12 14:32:17 +0900 (557dcb099) @@ -10,6 +10,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "(", "position": 0, "force_prefix": false, + "force_prefix_search": false, "source_offset": 0, "source_length": 1, "source_first_character_length": 1 @@ -18,6 +19,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "あい", "position": 1, "force_prefix": false, + "force_prefix_search": false, "source_offset": 1, "source_length": 6, "source_first_character_length": 3 @@ -26,6 +28,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "いう", "position": 2, "force_prefix": false, + "force_prefix_search": false, "source_offset": 4, "source_length": 6, "source_first_character_length": 3 @@ -34,6 +37,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "うえ", "position": 3, "force_prefix": false, + "force_prefix_search": false, "source_offset": 7, "source_length": 6, "source_first_character_length": 3 @@ -42,6 +46,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "えお", "position": 4, "force_prefix": false, + "force_prefix_search": false, "source_offset": 10, "source_length": 6, "source_first_character_length": 3 @@ -50,6 +55,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "お", "position": 5, "force_prefix": false, + "force_prefix_search": false, "source_offset": 13, "source_length": 3, "source_first_character_length": 3 @@ -58,6 +64,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": ")", "position": 6, "force_prefix": false, + "force_prefix_search": false, "source_offset": 16, "source_length": 1, "source_first_character_length": 1 @@ -66,6 +73,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "", "position": 7, "force_prefix": false, + "force_prefix_search": false, "source_offset": 17, "source_length": 0, "source_first_character_length": 0 @@ -74,6 +82,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "あい", "position": 8, "force_prefix": false, + "force_prefix_search": false, "source_offset": 1, "source_length": 6, "source_first_character_length": 4 @@ -82,6 +91,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "いう", "position": 9, "force_prefix": false, + "force_prefix_search": false, "source_offset": 4, "source_length": 6, "source_first_character_length": 3 @@ -90,6 +100,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "うえ", "position": 10, "force_prefix": false, + "force_prefix_search": false, "source_offset": 7, "source_length": 6, "source_first_character_length": 3 @@ -98,6 +109,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "えお", "position": 11, "force_prefix": false, + "force_prefix_search": false, "source_offset": 10, "source_length": 6, "source_first_character_length": 3 @@ -106,6 +118,7 @@ tokenize 'TokenNgram("loose_symbol", true, "report_source_locati "value": "お", "position": 12, "force_prefix": false, + "force_prefix_search": false, "source_offset": 13, "source_length": 3, "source_first_character_length": 3 Modified: test/command/suite/tokenizers/ngram/unify_alphabet.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/ngram/unify_alphabet.expected 2018-11-12 14:14:04 +0900 (1e6d748eb) +++ test/command/suite/tokenizers/ngram/unify_alphabet.expected 2018-11-12 14:32:17 +0900 (de183ca98) @@ -9,27 +9,32 @@ tokenize 'TokenNgram("unify_alphabet", false)' "abcde" NormalizerAuto { "value": "ab", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "bc", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "cd", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "de", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "e", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/ngram/unify_digit.expected (+12 -6) =================================================================== --- test/command/suite/tokenizers/ngram/unify_digit.expected 2018-11-12 14:14:04 +0900 (02889dc3f) +++ test/command/suite/tokenizers/ngram/unify_digit.expected 2018-11-12 14:32:17 +0900 (7adcd8147) @@ -9,32 +9,38 @@ tokenize 'TokenNgram("unify_digit", false)' "012345" NormalizerAuto { "value": "01", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "12", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "23", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "34", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "45", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "5", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/ngram/unify_symbol.expected (+12 -6) =================================================================== --- test/command/suite/tokenizers/ngram/unify_symbol.expected 2018-11-12 14:14:04 +0900 (460e19461) +++ test/command/suite/tokenizers/ngram/unify_symbol.expected 2018-11-12 14:32:17 +0900 (0e592733e) @@ -9,32 +9,38 @@ tokenize 'TokenNgram("unify_symbol", false)' "___---" NormalizerAuto { "value": "__", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "__", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "_-", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "--", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "--", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "-", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/regexp/add/four.expected (+12 -6) =================================================================== --- test/command/suite/tokenizers/regexp/add/four.expected 2018-11-12 14:14:04 +0900 (46919e5d7) +++ test/command/suite/tokenizers/regexp/add/four.expected 2018-11-12 14:32:17 +0900 (512361210) @@ -9,32 +9,38 @@ tokenize TokenRegexp "abcd" { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ab", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "bc", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "cd", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "d", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/regexp/add/normalizer/blank.expected (+20 -10) =================================================================== --- test/command/suite/tokenizers/regexp/add/normalizer/blank.expected 2018-11-12 14:14:04 +0900 (ed32ecfae) +++ test/command/suite/tokenizers/regexp/add/normalizer/blank.expected 2018-11-12 14:32:17 +0900 (4ef05b7df) @@ -11,52 +11,62 @@ table_tokenize Lexicon "abcd\nefgh" --mode ADD { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ab", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "bc", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "cd", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "d", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ef", "position": 6, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "fg", "position": 7, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "gh", "position": 8, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "h", "position": 9, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 10, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/regexp/add/one.expected (+6 -3) =================================================================== --- test/command/suite/tokenizers/regexp/add/one.expected 2018-11-12 14:14:04 +0900 (14c79681c) +++ test/command/suite/tokenizers/regexp/add/one.expected 2018-11-12 14:32:17 +0900 (89d9b6b87) @@ -9,17 +9,20 @@ tokenize TokenRegexp "x" { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "x", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/regexp/add/three.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/regexp/add/three.expected 2018-11-12 14:14:04 +0900 (8659548d5) +++ test/command/suite/tokenizers/regexp/add/three.expected 2018-11-12 14:32:17 +0900 (c78202041) @@ -9,27 +9,32 @@ tokenize TokenRegexp "xyz" { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "xy", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "yz", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "z", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/regexp/add/two.expected (+8 -4) =================================================================== --- test/command/suite/tokenizers/regexp/add/two.expected 2018-11-12 14:14:04 +0900 (ccd6712c1) +++ test/command/suite/tokenizers/regexp/add/two.expected 2018-11-12 14:32:17 +0900 (429f319fe) @@ -9,22 +9,26 @@ tokenize TokenRegexp "xy" { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "xy", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "y", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/regexp/get/begin/one.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/regexp/get/begin/one.expected 2018-11-12 14:14:04 +0900 (b7d07e788) +++ test/command/suite/tokenizers/regexp/get/begin/one.expected 2018-11-12 14:32:17 +0900 (88f546470) @@ -11,17 +11,20 @@ table_tokenize Lexicon "x" --mode ADD { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "x", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] @@ -36,12 +39,14 @@ table_tokenize Lexicon "x" --mode GET { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "x", "position": 1, - "force_prefix": true + "force_prefix": true, + "force_prefix_search": true } ] ] Modified: test/command/suite/tokenizers/regexp/get/begin/three.expected (+16 -8) =================================================================== --- test/command/suite/tokenizers/regexp/get/begin/three.expected 2018-11-12 14:14:04 +0900 (e7997a80c) +++ test/command/suite/tokenizers/regexp/get/begin/three.expected 2018-11-12 14:32:17 +0900 (29d77a6bc) @@ -11,27 +11,32 @@ table_tokenize Lexicon "xyz" --mode ADD { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "xy", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "yz", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "z", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] @@ -46,17 +51,20 @@ table_tokenize Lexicon "xyz" --mode GET { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "xy", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "yz", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/regexp/get/begin/two.expected (+12 -6) =================================================================== --- test/command/suite/tokenizers/regexp/get/begin/two.expected 2018-11-12 14:14:04 +0900 (337ac43f4) +++ test/command/suite/tokenizers/regexp/get/begin/two.expected 2018-11-12 14:32:17 +0900 (c707ab143) @@ -11,22 +11,26 @@ table_tokenize Lexicon "xy" --mode ADD { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "xy", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "y", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] @@ -41,12 +45,14 @@ table_tokenize Lexicon "xy" --mode GET { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "xy", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/regexp/get/begin_end/one.expected (+12 -6) =================================================================== --- test/command/suite/tokenizers/regexp/get/begin_end/one.expected 2018-11-12 14:14:04 +0900 (6fa5b6e23) +++ test/command/suite/tokenizers/regexp/get/begin_end/one.expected 2018-11-12 14:32:17 +0900 (4b1909ba8) @@ -11,17 +11,20 @@ table_tokenize Lexicon "x" --mode ADD { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "x", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] @@ -36,17 +39,20 @@ table_tokenize Lexicon "x" --mode GET { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "x", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/regexp/get/end/four.expected (+18 -9) =================================================================== --- test/command/suite/tokenizers/regexp/get/end/four.expected 2018-11-12 14:14:04 +0900 (498667356) +++ test/command/suite/tokenizers/regexp/get/end/four.expected 2018-11-12 14:32:17 +0900 (9e44245ae) @@ -11,32 +11,38 @@ table_tokenize Lexicon "abcd" --mode ADD { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ab", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "bc", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "cd", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "d", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] @@ -51,17 +57,20 @@ table_tokenize Lexicon "abcd" --mode GET { "value": "ab", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "cd", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/regexp/get/end/one.expected (+10 -5) =================================================================== --- test/command/suite/tokenizers/regexp/get/end/one.expected 2018-11-12 14:14:04 +0900 (b747a68ac) +++ test/command/suite/tokenizers/regexp/get/end/one.expected 2018-11-12 14:32:17 +0900 (84f2b5843) @@ -11,17 +11,20 @@ table_tokenize Lexicon "x" --mode ADD { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "x", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] @@ -36,12 +39,14 @@ table_tokenize Lexicon "x" --mode GET { "value": "x", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/regexp/get/end/three.expected (+16 -8) =================================================================== --- test/command/suite/tokenizers/regexp/get/end/three.expected 2018-11-12 14:14:04 +0900 (72b7c05bf) +++ test/command/suite/tokenizers/regexp/get/end/three.expected 2018-11-12 14:32:17 +0900 (c73400339) @@ -11,27 +11,32 @@ table_tokenize Lexicon "xyz" --mode ADD { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "xy", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "yz", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "z", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] @@ -46,17 +51,20 @@ table_tokenize Lexicon "xyz" --mode GET { "value": "xy", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "yz", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/regexp/get/end/two.expected (+12 -6) =================================================================== --- test/command/suite/tokenizers/regexp/get/end/two.expected 2018-11-12 14:14:04 +0900 (750691423) +++ test/command/suite/tokenizers/regexp/get/end/two.expected 2018-11-12 14:32:17 +0900 (8af1fc4d9) @@ -11,22 +11,26 @@ table_tokenize Lexicon "xy" --mode ADD { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "xy", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "y", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] @@ -41,12 +45,14 @@ table_tokenize Lexicon "xy" --mode GET { "value": "xy", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/regexp/get/long.expected (+38 -19) =================================================================== --- test/command/suite/tokenizers/regexp/get/long.expected 2018-11-12 14:14:04 +0900 (8a29a34d5) +++ test/command/suite/tokenizers/regexp/get/long.expected 2018-11-12 14:32:17 +0900 (700fc5fa6) @@ -11,67 +11,80 @@ table_tokenize Lexicon "abcdefghijk" --mode ADD { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ab", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "bc", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "cd", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "de", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ef", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "fg", "position": 6, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "gh", "position": 7, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "hi", "position": 8, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ij", "position": 9, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "jk", "position": 10, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "k", "position": 11, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 12, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] @@ -86,32 +99,38 @@ table_tokenize Lexicon "abcdefghijk" --mode GET { "value": "ab", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "cd", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ef", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "gh", "position": 6, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ij", "position": 8, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "jk", "position": 9, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/regexp/get/normalizer/blank/less_after.expected (+18 -9) =================================================================== --- test/command/suite/tokenizers/regexp/get/normalizer/blank/less_after.expected 2018-11-12 14:14:04 +0900 (7b4e43f1d) +++ test/command/suite/tokenizers/regexp/get/normalizer/blank/less_after.expected 2018-11-12 14:32:17 +0900 (a8ac4cd6b) @@ -11,32 +11,38 @@ table_tokenize Lexicon "abc\nd" --mode ADD { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ab", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "bc", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "c", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "d", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 6, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] @@ -51,17 +57,20 @@ table_tokenize Lexicon "abc\nd" --mode GET { "value": "ab", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "bc", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "d", "position": 3, - "force_prefix": true + "force_prefix": true, + "force_prefix_search": true } ] ] Modified: test/command/suite/tokenizers/regexp/get/normalizer/blank/less_before.expected (+18 -9) =================================================================== --- test/command/suite/tokenizers/regexp/get/normalizer/blank/less_before.expected 2018-11-12 14:14:04 +0900 (e0304557d) +++ test/command/suite/tokenizers/regexp/get/normalizer/blank/less_before.expected 2018-11-12 14:32:17 +0900 (9cd5a0df8) @@ -11,32 +11,38 @@ table_tokenize Lexicon "a\ndef" --mode ADD { "value": "", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "a", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "de", "position": 3, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ef", "position": 4, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "f", "position": 5, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "", "position": 6, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] @@ -51,17 +57,20 @@ table_tokenize Lexicon "a\ndef" --mode GET { "value": "a", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "de", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "ef", "position": 2, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/trigram/force_prefix/multiple_tokens/matured.expected (+4 -2) =================================================================== --- test/command/suite/tokenizers/trigram/force_prefix/multiple_tokens/matured.expected 2018-11-12 14:14:04 +0900 (143bd316c) +++ test/command/suite/tokenizers/trigram/force_prefix/multiple_tokens/matured.expected 2018-11-12 14:32:17 +0900 (56797a768) @@ -9,12 +9,14 @@ tokenize TokenTrigram "ABCだよね" NormalizerAuto --mode GET { "value": "abc", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "だよね", "position": 1, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false } ] ] Modified: test/command/suite/tokenizers/trigram/force_prefix/multiple_tokens/unmatured_one_character.expected (+4 -2) =================================================================== --- test/command/suite/tokenizers/trigram/force_prefix/multiple_tokens/unmatured_one_character.expected 2018-11-12 14:14:04 +0900 (ec3e13e3f) +++ test/command/suite/tokenizers/trigram/force_prefix/multiple_tokens/unmatured_one_character.expected 2018-11-12 14:32:17 +0900 (d8dc0398f) @@ -9,12 +9,14 @@ tokenize TokenTrigram "ABCだ" NormalizerAuto --mode GET { "value": "abc", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "だ", "position": 1, - "force_prefix": true + "force_prefix": true, + "force_prefix_search": true } ] ] Modified: test/command/suite/tokenizers/trigram/force_prefix/multiple_tokens/unmatured_two_characters.expected (+4 -2) =================================================================== --- test/command/suite/tokenizers/trigram/force_prefix/multiple_tokens/unmatured_two_characters.expected 2018-11-12 14:14:04 +0900 (60b38c2a7) +++ test/command/suite/tokenizers/trigram/force_prefix/multiple_tokens/unmatured_two_characters.expected 2018-11-12 14:32:17 +0900 (fe5fb9848) @@ -9,12 +9,14 @@ tokenize TokenTrigram "ABCだよ" NormalizerAuto --mode GET { "value": "abc", "position": 0, - "force_prefix": false + "force_prefix": false, + "force_prefix_search": false }, { "value": "だよ", "position": 1, - "force_prefix": true + "force_prefix": true, + "force_prefix_search": true } ] ] Modified: test/command/suite/tokenizers/trigram/force_prefix/single_token/matured.expected (+15 -1) =================================================================== --- test/command/suite/tokenizers/trigram/force_prefix/single_token/matured.expected 2018-11-12 14:14:04 +0900 (2b6bd9a58) +++ test/command/suite/tokenizers/trigram/force_prefix/single_token/matured.expected 2018-11-12 14:32:17 +0900 (43c1f526a) @@ -1,2 +1,16 @@ tokenize TokenTrigram "だよね" NormalizerAuto --mode GET -[[0,0.0,0.0],[{"value":"だよね","position":0,"force_prefix":false}]] +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "だよね", + "position": 0, + "force_prefix": false, + "force_prefix_search": false + } + ] +] Modified: test/command/suite/tokenizers/trigram/force_prefix/single_token/unmatured_one_character.expected (+15 -1) =================================================================== --- test/command/suite/tokenizers/trigram/force_prefix/single_token/unmatured_one_character.expected 2018-11-12 14:14:04 +0900 (0db9eb1fa) +++ test/command/suite/tokenizers/trigram/force_prefix/single_token/unmatured_one_character.expected 2018-11-12 14:32:17 +0900 (0c5519aae) @@ -1,2 +1,16 @@ tokenize TokenTrigram "だ" NormalizerAuto --mode GET -[[0,0.0,0.0],[{"value":"だ","position":0,"force_prefix":true}]] +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "だ", + "position": 0, + "force_prefix": true, + "force_prefix_search": true + } + ] +] Modified: test/command/suite/tokenizers/trigram/force_prefix/single_token/unmatured_two_characters.expected (+15 -1) =================================================================== --- test/command/suite/tokenizers/trigram/force_prefix/single_token/unmatured_two_characters.expected 2018-11-12 14:14:04 +0900 (413dfe06c) +++ test/command/suite/tokenizers/trigram/force_prefix/single_token/unmatured_two_characters.expected 2018-11-12 14:32:17 +0900 (b577eda5e) @@ -1,2 +1,16 @@ tokenize TokenTrigram "だよ" NormalizerAuto --mode GET -[[0,0.0,0.0],[{"value":"だよ","position":0,"force_prefix":true}]] +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "だよ", + "position": 0, + "force_prefix": true, + "force_prefix_search": true + } + ] +]