Kouhei Sutou 2018-11-07 15:59:23 +0900 (Wed, 07 Nov 2018) Revision: 17085d8cad78105891089c27dcc9665aaf6e4387 https://github.com/groonga/groonga/commit/17085d8cad78105891089c27dcc9665aaf6e4387 Message: NormalizeNFKC100: support unify_to_romaji from katakana Added files: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/a.expected test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/a.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ba.expected test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ba.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/da.expected test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/da.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/etc.expected test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/etc.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ga.expected test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ga.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ha.expected test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ha.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ka.expected test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ka.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ma.expected test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ma.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/n.expected test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/n.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/na.expected test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/na.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/pa.expected test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/pa.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ra.expected test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ra.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/sa.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ta.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/wa.expected test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/wa.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ya.expected test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ya.test test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/za.expected test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/za.test Copied files: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/sa.expected (from test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected) test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ta.expected (from test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected) Modified files: lib/romaji.c test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected Modified: lib/romaji.c (+7 -6) =================================================================== --- lib/romaji.c 2018-11-07 15:25:55 +0900 (75e79deca) +++ lib/romaji.c 2018-11-07 15:59:23 +0900 (5270bf725) @@ -62,7 +62,7 @@ grn_romaji_convert_hepburn(grn_ctx *ctx, next[2] == 0xa5 || /* U+30E5 KATAKANA LETTER SMALL YU */ next[2] == 0xa7)) { /* U+30E7 KATAKANA LETTER SMALL YO */ next_small_y = GRN_TRUE; - next_small_yayuyo = auo[(next[2] - 3) % 5]; + next_small_yayuyo = aiueo[(next[2] - 3) % 5]; } else if (next[0] == 0xe3 && ((next[1] == 0x81 && (next[2] == 0xb0 || /* U+3070 HIRAGANA LETTER BA */ @@ -244,8 +244,8 @@ grn_romaji_convert_hepburn(grn_ctx *ctx, if (0x80 <= next[2] && next[2] <= 0x89) { /* U+30C0 KATAKANA LETTER DA .. * U+30C9 KATAKANA LETTER DO */ - const char *tdtjxtztdtd = "tdtjxtztdtd"; - next_consonant = tdtjxtztdtd[next[2] - 0x80]; + const char *dtjxtztdtd = "dtjxtztdtd"; + next_consonant = dtjxtztdtd[next[2] - 0x80]; } else if (0x8a <= next[2] && next[2] <= 0x8e) { /* U+30CA KATAKANA LETTER NA .. * U+30CE KATAKANA LETTER NO */ @@ -557,7 +557,7 @@ grn_romaji_convert_hepburn(grn_ctx *ctx, /* U+30B7 KATAKANA LETTER SI */ buffer[(*n_bytes)++] = 's'; buffer[(*n_bytes)++] = 'h'; - } else if (current[2] == 0x98) { + } else if (current[2] == 0xb8) { /* U+30B8 KATAKANA LETTER ZI */ buffer[(*n_bytes)++] = 'j'; } else { @@ -602,6 +602,7 @@ grn_romaji_convert_hepburn(grn_ctx *ctx, /* U+30C2 KATAKANA LETTER DI */ buffer[(*n_bytes)++] = 'j'; } else if (current[2] == 0x83) { + /* U+30C3 KATAKANA LETTER SMALL TU */ buffer[(*n_bytes)++] = 'x'; buffer[(*n_bytes)++] = 't'; buffer[(*n_bytes)++] = 's'; @@ -610,8 +611,8 @@ grn_romaji_convert_hepburn(grn_ctx *ctx, buffer[(*n_bytes)++] = 't'; buffer[(*n_bytes)++] = 's'; } else { - const char *td_____tdtd = "td_____tdtd"; - buffer[(*n_bytes)++] = td_____tdtd[current[2] - 0x80]; + const char *d____ztdtd = "d____ztdtd"; + buffer[(*n_bytes)++] = d____ztdtd[current[2] - 0x80]; } buffer[(*n_bytes)++] = aiiuuueeoo[current[2] - 0x80]; } Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/a.expected (+62 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/a.expected 2018-11-07 15:59:23 +0900 (29b479993) @@ -0,0 +1,62 @@ +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "アァイィウゥエェオォ" WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "axaixiuxuexeoxo", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha" + ], + "checks": [ + 3, + 3, + -1, + 3, + 3, + -1, + 3, + 3, + -1, + 3, + 3, + -1, + 3, + 3, + -1 + ], + "offsets": [ + 0, + 3, + 3, + 6, + 9, + 9, + 12, + 15, + 15, + 18, + 21, + 21, + 24, + 27, + 27 + ] + } +] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/a.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/a.test 2018-11-07 15:59:23 +0900 (92a1152d5) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "アァイィウゥエェオォ" \ + WITH_CHECKS|WITH_TYPES Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ba.expected (+74 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ba.expected 2018-11-07 15:59:23 +0900 (37e040a74) @@ -0,0 +1,74 @@ +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "バビブベボビャビュビョ" WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "babibubebobyabyubyo", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha" + ], + "checks": [ + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 6, + -1, + -1, + 6, + -1, + -1, + 6, + -1, + -1 + ], + "offsets": [ + 0, + 0, + 3, + 3, + 6, + 6, + 9, + 9, + 12, + 12, + 15, + 15, + 15, + 21, + 21, + 21, + 27, + 27, + 27 + ] + } +] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ba.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ba.test 2018-11-07 15:59:23 +0900 (02ea757eb) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "バビブベボビャビュビョ" \ + WITH_CHECKS|WITH_TYPES Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/da.expected (+65 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/da.expected 2018-11-07 15:59:23 +0900 (530f43242) @@ -0,0 +1,65 @@ +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "ダヂヅデドヂャヂュヂョ" WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "dajizudedojajujo", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha" + ], + "checks": [ + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 6, + -1, + 6, + -1, + 6, + -1 + ], + "offsets": [ + 0, + 0, + 3, + 3, + 6, + 6, + 9, + 9, + 12, + 12, + 15, + 15, + 21, + 21, + 27, + 27 + ] + } +] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/da.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/da.test 2018-11-07 15:59:23 +0900 (0120d3600) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "ダヂヅデドヂャヂュヂョ" \ + WITH_CHECKS|WITH_TYPES Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/etc.expected (+41 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/etc.expected 2018-11-07 15:59:23 +0900 (959573377) @@ -0,0 +1,41 @@ +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "ヴゕゖ" WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "vuxkaxke", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha" + ], + "checks": [ + 3, + -1, + 3, + -1, + -1, + 3, + -1, + -1 + ], + "offsets": [ + 0, + 0, + 3, + 3, + 3, + 6, + 6, + 6 + ] + } +] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/etc.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/etc.test 2018-11-07 15:59:23 +0900 (61196e28b) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "ヴゕゖ" \ + WITH_CHECKS|WITH_TYPES Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ga.expected (+74 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ga.expected 2018-11-07 15:59:23 +0900 (59cce624d) @@ -0,0 +1,74 @@ +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "ガギグゲゴギャギュギョ" WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "gagigugegogyagyugyo", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha" + ], + "checks": [ + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 6, + -1, + -1, + 6, + -1, + -1, + 6, + -1, + -1 + ], + "offsets": [ + 0, + 0, + 3, + 3, + 6, + 6, + 9, + 9, + 12, + 12, + 15, + 15, + 15, + 21, + 21, + 21, + 27, + 27, + 27 + ] + } +] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ga.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ga.test 2018-11-07 15:59:23 +0900 (97da668a0) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "ガギグゲゴギャギュギョ" \ + WITH_CHECKS|WITH_TYPES Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ha.expected (+74 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ha.expected 2018-11-07 15:59:23 +0900 (e4d38086f) @@ -0,0 +1,74 @@ +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "ハヒフヘホヒャヒュヒョ" WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "hahihuhehohyahyuhyo", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha" + ], + "checks": [ + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 6, + -1, + -1, + 6, + -1, + -1, + 6, + -1, + -1 + ], + "offsets": [ + 0, + 0, + 3, + 3, + 6, + 6, + 9, + 9, + 12, + 12, + 15, + 15, + 15, + 21, + 21, + 21, + 27, + 27, + 27 + ] + } +] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ha.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ha.test 2018-11-07 15:59:23 +0900 (635e0b896) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "ハヒフヘホヒャヒュヒョ" \ + WITH_CHECKS|WITH_TYPES Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ka.expected (+74 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ka.expected 2018-11-07 15:59:23 +0900 (121fbd658) @@ -0,0 +1,74 @@ +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "カキクケコキャキュキョ" WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "kakikukekokyakyukyo", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha" + ], + "checks": [ + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 6, + -1, + -1, + 6, + -1, + -1, + 6, + -1, + -1 + ], + "offsets": [ + 0, + 0, + 3, + 3, + 6, + 6, + 9, + 9, + 12, + 12, + 15, + 15, + 15, + 21, + 21, + 21, + 27, + 27, + 27 + ] + } +] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ka.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ka.test 2018-11-07 15:59:23 +0900 (6d90372a0) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "カキクケコキャキュキョ" \ + WITH_CHECKS|WITH_TYPES Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ma.expected (+74 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ma.expected 2018-11-07 15:59:23 +0900 (60915283b) @@ -0,0 +1,74 @@ +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "マミムメモミャミュミョ" WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "mamimumemomyamyumyo", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha" + ], + "checks": [ + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 6, + -1, + -1, + 6, + -1, + -1, + 6, + -1, + -1 + ], + "offsets": [ + 0, + 0, + 3, + 3, + 6, + 6, + 9, + 9, + 12, + 12, + 15, + 15, + 15, + 21, + 21, + 21, + 27, + 27, + 27 + ] + } +] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ma.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ma.test 2018-11-07 15:59:23 +0900 (65747c562) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "マミムメモミャミュミョ" \ + WITH_CHECKS|WITH_TYPES Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/n.expected (+65 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/n.expected 2018-11-07 15:59:23 +0900 (582b98bb2) @@ -0,0 +1,65 @@ +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "ンパンバンマンアンヤ" WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "mpambamman-an-ya", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "symbol", + "alpha", + "alpha", + "symbol", + "alpha", + "alpha" + ], + "checks": [ + 3, + 3, + -1, + 3, + 3, + -1, + 3, + 3, + -1, + 3, + -1, + 3, + 3, + -1, + 3, + -1 + ], + "offsets": [ + 0, + 3, + 3, + 6, + 9, + 9, + 12, + 15, + 15, + 18, + 18, + 21, + 24, + 24, + 27, + 27 + ] + } +] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/n.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/n.test 2018-11-07 15:59:23 +0900 (6ab4df257) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "ンパンバンマンアンヤ" \ + WITH_CHECKS|WITH_TYPES Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/na.expected (+74 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/na.expected 2018-11-07 15:59:23 +0900 (086a22cbe) @@ -0,0 +1,74 @@ +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "ナニヌネノニャニュニョ" WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "naninunenonyanyunyo", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha" + ], + "checks": [ + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 6, + -1, + -1, + 6, + -1, + -1, + 6, + -1, + -1 + ], + "offsets": [ + 0, + 0, + 3, + 3, + 6, + 6, + 9, + 9, + 12, + 12, + 15, + 15, + 15, + 21, + 21, + 21, + 27, + 27, + 27 + ] + } +] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/na.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/na.test 2018-11-07 15:59:23 +0900 (a1bf5b6e3) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "ナニヌネノニャニュニョ" \ + WITH_CHECKS|WITH_TYPES Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/pa.expected (+74 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/pa.expected 2018-11-07 15:59:23 +0900 (3b01567e7) @@ -0,0 +1,74 @@ +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "パピプペポピャピュピョ" WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "papipupepopyapyupyo", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha" + ], + "checks": [ + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 6, + -1, + -1, + 6, + -1, + -1, + 6, + -1, + -1 + ], + "offsets": [ + 0, + 0, + 3, + 3, + 6, + 6, + 9, + 9, + 12, + 12, + 15, + 15, + 15, + 21, + 21, + 21, + 27, + 27, + 27 + ] + } +] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/pa.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/pa.test 2018-11-07 15:59:23 +0900 (8542c3bd9) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "パピプペポピャピュピョ" \ + WITH_CHECKS|WITH_TYPES Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ra.expected (+74 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ra.expected 2018-11-07 15:59:23 +0900 (895e66df8) @@ -0,0 +1,74 @@ +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "ラリルレロリャリュリョ" WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "rarirureroryaryuryo", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha" + ], + "checks": [ + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 6, + -1, + -1, + 6, + -1, + -1, + 6, + -1, + -1 + ], + "offsets": [ + 0, + 0, + 3, + 3, + 6, + 6, + 9, + 9, + 12, + 12, + 15, + 15, + 15, + 21, + 21, + 21, + 27, + 27, + 27 + ] + } +] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ra.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ra.test 2018-11-07 15:59:23 +0900 (1529d78b2) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "ラリルレロリャリュリョ" \ + WITH_CHECKS|WITH_TYPES Copied: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/sa.expected (+14 -55) 50% =================================================================== --- test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected 2018-11-07 15:25:55 +0900 (67969a6ef) +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/sa.expected 2018-11-07 15:59:23 +0900 (2970e7a02) @@ -1,4 +1,4 @@ -normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "あイウェおざジたチなニぱピまミヽヾ漢字" WITH_CHECKS|WITH_TYPES +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "サシスセソシャシュショ" WITH_CHECKS|WITH_TYPES [ [ 0, @@ -6,7 +6,7 @@ normalize 'NormalizerNFKC100("unify_to_romaji", true, "re 0.0 ], { - "normalized": "aiuxeozazitachinanipapimamiヽヾ漢字", + "normalized": "sashisusesoshashusho", "types": [ "alpha", "alpha", @@ -27,92 +27,51 @@ normalize 'NormalizerNFKC100("unify_to_romaji", true, "re "alpha", "alpha", "alpha", - "alpha", - "alpha", - "alpha", - "alpha", - "alpha", - "alpha", - "alpha", - "alpha", - "katakana", - "katakana", - "kanji", - "kanji" + "alpha" ], "checks": [ 3, - 3, - 3, - 3, -1, 3, - 3, -1, - 3, -1, 3, -1, 3, -1, - -1, 3, -1, - 3, + 6, -1, - 3, -1, - 3, + 6, -1, - 3, -1, - 3, + 6, -1, - 3, + -1 + ], + "offsets": [ 0, 0, 3, - 0, - 0, 3, - 0, - 0, - 3, - 0, - 0 - ], - "offsets": [ - 0, 3, 6, + 6, 9, 9, 12, + 12, 15, 15, - 18, - 18, + 15, + 21, 21, 21, - 24, - 24, - 24, 27, 27, - 30, - 30, - 33, - 33, - 36, - 36, - 39, - 39, - 42, - 42, - 45, - 48, - 51, - 54 + 27 ] } ] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/sa.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/sa.test 2018-11-07 15:59:23 +0900 (edeb01007) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "サシスセソシャシュショ" \ + WITH_CHECKS|WITH_TYPES Copied: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ta.expected (+15 -50) 55% =================================================================== --- test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected 2018-11-07 15:25:55 +0900 (67969a6ef) +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ta.expected 2018-11-07 15:59:23 +0900 (b4e0a58a5) @@ -1,4 +1,4 @@ -normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "あイウェおざジたチなニぱピまミヽヾ漢字" WITH_CHECKS|WITH_TYPES +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "タチツテトッチャチュチョ" WITH_CHECKS|WITH_TYPES [ [ 0, @@ -6,7 +6,7 @@ normalize 'NormalizerNFKC100("unify_to_romaji", true, "re 0.0 ], { - "normalized": "aiuxeozazitachinanipapimamiヽヾ漢字", + "normalized": "tachitsutetotchachucho", "types": [ "alpha", "alpha", @@ -29,29 +29,13 @@ normalize 'NormalizerNFKC100("unify_to_romaji", true, "re "alpha", "alpha", "alpha", - "alpha", - "alpha", - "alpha", - "alpha", - "alpha", - "alpha", - "katakana", - "katakana", - "kanji", - "kanji" + "alpha" ], "checks": [ 3, - 3, - 3, - 3, -1, 3, - 3, -1, - 3, - -1, - 3, -1, 3, -1, @@ -61,58 +45,39 @@ normalize 'NormalizerNFKC100("unify_to_romaji", true, "re 3, -1, 3, + 6, -1, - 3, -1, - 3, + 6, -1, - 3, -1, - 3, - 0, - 0, - 3, + 6, + -1, + -1 + ], + "offsets": [ 0, 0, 3, - 0, - 0, 3, - 0, - 0 - ], - "offsets": [ - 0, 3, 6, + 6, + 6, 9, 9, 12, + 12, 15, - 15, 18, 18, - 21, - 21, + 18, 24, 24, 24, - 27, - 27, 30, 30, - 33, - 33, - 36, - 36, - 39, - 39, - 42, - 42, - 45, - 48, - 51, - 54 + 30 ] } ] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ta.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ta.test 2018-11-07 15:59:23 +0900 (44b9c8f1d) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "タチツテトッチャチュチョ" \ + WITH_CHECKS|WITH_TYPES Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/wa.expected (+53 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/wa.expected 2018-11-07 15:59:23 +0900 (cfa6b9a80) @@ -0,0 +1,53 @@ +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "ヮワヰヱヲン" WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "xwawawiwewon", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha" + ], + "checks": [ + 3, + -1, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 3 + ], + "offsets": [ + 0, + 0, + 0, + 3, + 3, + 6, + 6, + 9, + 9, + 12, + 12, + 15 + ] + } +] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/wa.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/wa.test 2018-11-07 15:59:23 +0900 (9bfab8337) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "ヮワヰヱヲン" \ + WITH_CHECKS|WITH_TYPES Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ya.expected (+62 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ya.expected 2018-11-07 15:59:23 +0900 (811449dc2) @@ -0,0 +1,62 @@ +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "ヤユヨャュョ" WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "yayuyoxyaxyuxyo", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha" + ], + "checks": [ + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + -1, + 3, + -1, + -1, + 3, + -1, + -1 + ], + "offsets": [ + 0, + 0, + 3, + 3, + 6, + 6, + 9, + 9, + 9, + 12, + 12, + 12, + 15, + 15, + 15 + ] + } +] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ya.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ya.test 2018-11-07 15:59:23 +0900 (d9d065cba) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "ヤユヨャュョ" \ + WITH_CHECKS|WITH_TYPES Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/za.expected (+65 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/za.expected 2018-11-07 15:59:23 +0900 (ae1c4bc8a) @@ -0,0 +1,65 @@ +normalize 'NormalizerNFKC100("unify_to_romaji", true, "report_source_offset", true)' "ザジズゼゾジャジュジョ" WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "zajizuzezojajujo", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha", + "alpha" + ], + "checks": [ + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 3, + -1, + 6, + -1, + 6, + -1, + 6, + -1 + ], + "offsets": [ + 0, + 0, + 3, + 3, + 6, + 6, + 9, + 9, + 12, + 12, + 15, + 15, + 21, + 21, + 27, + 27 + ] + } +] Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/za.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/za.test 2018-11-07 15:59:23 +0900 (2143bdba8) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("unify_to_romaji", true, \ + "report_source_offset", true)' \ + "ザジズゼゾジャジュジョ" \ + WITH_CHECKS|WITH_TYPES Modified: test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected (+1 -1) =================================================================== --- test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected 2018-11-07 15:25:55 +0900 (67969a6ef) +++ test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected 2018-11-07 15:59:23 +0900 (8fdba3329) @@ -6,7 +6,7 @@ normalize 'NormalizerNFKC100("unify_to_romaji", true, "re 0.0 ], { - "normalized": "aiuxeozazitachinanipapimamiヽヾ漢字", + "normalized": "aiuxeozajitachinanipapimamiヽヾ漢字", "types": [ "alpha", "alpha", -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20181107/da8e7199/attachment-0001.html>