t-suw****@users*****
t-suw****@users*****
2007年 9月 8日 (土) 22:35:18 JST
Index: AquaSKK/src/context/SKKRomanKanaConverter.cpp diff -u /dev/null AquaSKK/src/context/SKKRomanKanaConverter.cpp:1.1.2.1 --- /dev/null Sat Sep 8 22:35:18 2007 +++ AquaSKK/src/context/SKKRomanKanaConverter.cpp Sat Sep 8 22:35:18 2007 @@ -0,0 +1,240 @@ +/* -*- C++ -*- + MacOS X implementation of the SKK input method. + + Copyright (C) 2007 Tomotaka SUWA <t.suw****@mac*****> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <iostream> +#include <fstream> +#include <sstream> +#include <map> +#include "jconv.h" +#include "SKKRomanKanaConverter.h" + +// ====================================================================== +// ã¦ã¼ãã£ãªã㣠+// ====================================================================== +void unescape_string(std::string& str) { + static struct { + std::string from; + const char* to; + } escape[] = { + { ",", "," }, + { "&space;", " " }, + { "♯", "#" }, + { "", 0x00 }, + }; + + for(int i = 0; escape[i].to != 0x00; ++ i) { + std::string& target = escape[i].from; + for(unsigned pos = str.find(target); pos != std::string::npos; pos = str.find(target)) { + str.replace(pos, target.length(), escape[i].to); + } + } +} + +// ====================================================================== +// SKKRomanKanaConverter::Node ã¤ã³ã¿ãã§ã¼ã¹ +// ====================================================================== +class SKKRomanKanaConverter::Node { + std::string hirakana_; + std::string katakana_; + std::string jisx0201kana_; + std::string next_; + + std::map<char, SKKRomanKanaConverter::Node> children_; + +public: + Node() {} + Node(const std::string& hirakana, const std::string& katakana, + const std::string& jisx0201kana, const std::string& next) + : hirakana_(hirakana), katakana_(katakana), jisx0201kana_(jisx0201kana), next_(next) {} + + // åæå + void Clear() { + children_.clear(); + } + + // ãã¼ã追å + void Add(const std::string& str, const Node& node, int depth = 0) { + // æ«ç«¯ãï¼ + if(str.size() - 1 == depth) { + children_[str[depth]] = node; + } else { + children_[str[depth]].Add(str, node, depth + 1); // å帰追å + } + } + + // ãã¼ãæ¤ç´¢ + const Node* Traverse(const std::string& str, int& match_length, int depth = 0) { + // [1] ãã¼ã¿ä¸è¶³(ex. "k" ã "ch" ãªã©) + if(depth == str.size()) { + match_length = 0; + return 0; + } + + // ä¸è´ï¼ + if(children_.find(str[depth]) != children_.end()) { + Node& leaf = children_[str[depth]]; + + // æ«ç«¯ã§ãªããªãå帰æ¤ç´¢ + if(!leaf.children_.empty()) { + return leaf.Traverse(str, match_length, depth + 1); + } + + // [2] å®å ¨ä¸è´ + match_length = depth + 1; + return &leaf; + } + + // [3] é¨åä¸è´(ex. "kb" ã "chm" ãªã©) + if(0 < depth) { + match_length = depth; + + // ç¯ãã¤èã§ããããnãã®ãããªå ´åã«ã¯ãä¸è´ã¨ãã¦æ±ã + if(!hirakana_.empty()) { + return this; + } + + return 0; + } + + // [4] å ¨ãä¸è´ããªãã£ã + match_length = -1; + return 0; + } + + // ãã¼ãæåååå¾ + const std::string& KanaString(SKK::InputMode mode) const { + switch(mode) { + case SKK::Hirakana: + return hirakana_; + + case SKK::Katakana: + return katakana_; + + case SKK::Jisx0201Kana: + return jisx0201kana_; + + default: + std::cerr << "SKKRomanKanaConverter::Node::KanaString(): invalid mode [" << mode << "]" << std::endl; + break; + } + } + + // 次ç¶æ æåååå¾ + const std::string& NextState() const { + return next_; + } +}; + +// ====================================================================== +// SKKRomanKanaConverter ã¤ã³ã¿ãã§ã¼ã¹ +// ====================================================================== +SKKRomanKanaConverter& SKKRomanKanaConverter::theInstance() { + static Node root; + static SKKRomanKanaConverter obj(root); + return obj; +} + +// Node ã®ã¤ã³ã¿ãã§ã¼ã¹ãé ãããã®è¦ããå®è£ +SKKRomanKanaConverter::SKKRomanKanaConverter(Node& node) : root_(node) { +} + +void SKKRomanKanaConverter::Initialize(const std::string& path) { + std::ifstream rule(path.c_str()); + std::string str; + + if(!rule) { + std::cerr << "SKKRomanKanaConverter::Initialize(): can't open file [" << path << "]" << std::endl; + return; + } + + // ã¯ãªã¢ + root_.Clear(); + + while(std::getline(rule, str)) { + if(str.empty() || str[0] == '#') continue; + + // EUC-JP â UTF-8 å¤æ + std::string utf8; + jconv::convert_eucj_to_utf8(str, utf8); + + // å ¨ã¦ã® ',' ã空ç½ã«ç½®æãã¦å解ãã + std::replace(utf8.begin(), utf8.end(), ',', ' '); + std::istringstream buf(utf8); + + // å¤æã«ã¼ã«ãèªã + std::string label, hirakana, katakana, jisx0201kana, next; + if(buf >> label >> hirakana >> katakana >> jisx0201kana) { + // ãªãã·ã§ã³ã®æ¬¡ç¶æ ãèªã + buf >> next; + + // ã¨ã¹ã±ã¼ãæåãå ã«æ»ã + unescape_string(label); + unescape_string(hirakana); + unescape_string(katakana); + unescape_string(jisx0201kana); + unescape_string(next); + + // 追å + root_.Add(label, Node(hirakana, katakana, jisx0201kana, next)); + } else { + // ä¸æ£ãªå½¢å¼ + std::cerr << "SKKRomanKanaConverter::Initialize(): invalid rule [" << utf8 << "]" << std::endl; + } + } +} + +bool SKKRomanKanaConverter::Execute(SKK::InputMode mode, const std::string& in, std::string& out, std::string& next) { + bool converted = false; + std::string str(in); + + out.clear(); + next.clear(); + + while(!str.empty()) { + int match_length; + const Node* node = root_.Traverse(str, match_length); + + // å¤æã§ããï¼ + if(node) { + out += node->KanaString(mode); + next = node->NextState(); + converted = true; + } else { + converted = false; + } + + // é¨åçã«ä¸è´ãã¦ããããã¼ã¿ä¸è¶³ã®ãããã以ä¸å¦çã§ããªã + if(!match_length) { + next = str; + return false; + } + + // æåã®ä¸æåãä¸è´ããªãå ´åãåºåã«ã³ãã¼ãã¦æ¬¡ã®æåã調ã¹ã + if(match_length < 0) { + out += str[0]; + match_length = 1; + } + + // ä¸è´ããé¨åãåãåã£ã¦æ¬¡ã®æåã調ã¹ã + str = str.substr(match_length); + } + + return converted; +} Index: AquaSKK/src/context/SKKRomanKanaConverter.h diff -u /dev/null AquaSKK/src/context/SKKRomanKanaConverter.h:1.1.2.1 --- /dev/null Sat Sep 8 22:35:18 2007 +++ AquaSKK/src/context/SKKRomanKanaConverter.h Sat Sep 8 22:35:18 2007 @@ -0,0 +1,53 @@ +/* -*- C++ -*- + MacOS X implementation of the SKK input method. + + Copyright (C) 2007 Tomotaka SUWA <t.suw****@mac*****> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef INC__SKKRomanKanaConverter__ +#define INC__SKKRomanKanaConverter__ + +#include <string> +#include "SKK.h" + +class SKKRomanKanaConverter { + class Node; + Node& root_; + + SKKRomanKanaConverter(); + SKKRomanKanaConverter(Node& node); + SKKRomanKanaConverter(const SKKRomanKanaConverter&); + +public: + static SKKRomanKanaConverter& theInstance(); + + void Initialize(const std::string& path); + + // ãã¼ãåããªå¤æ + // + // å¼æ°ï¼ + // in=ãã¼ãåæåå + // out=çµææåå(å¤æãããªãã¦ãè¨å®ããããã¨ã¯ãã) + // next=次ç¶æ æåå + // + // æ»ãå¤ï¼ + // true=å¤æããããfalse=å¤æãããªãã£ã + // + bool Execute(SKK::InputMode mode, const std::string& in, std::string& out, std::string& next); +}; + +#endif Index: AquaSKK/src/context/kana-rule-list diff -u /dev/null AquaSKK/src/context/kana-rule-list:1.1.2.1 --- /dev/null Sat Sep 8 22:35:18 2007 +++ AquaSKK/src/context/kana-rule-list Sat Sep 8 22:35:18 2007 @@ -0,0 +1,276 @@ +# $Id: kana-rule-list,v 1.1.2.1 2007/09/08 13:35:18 t-suwa Exp $ + +# このファイルは改行コードがLF、文字エンコーディングがEUC-JPでなければなりません。 +# 五つ目の項目は次状態です。無ければ項目自体を省略します。 +# エントリの順序には気を付けて下さい。例えば「ba」というエントリに出会うと、 +# AquaSKKはまず「b」という枝を探しますが、ここではまだ存在しないので +# 「b」という空の枝を作成します。この後に「b」というエントリが現れると +# そのエントリ「b」は既に定義されているのでどうなるか分かりません。 + +# 最初の項目の,は半角カンマに置換されます。 + +a,あ,ア,ア + +bb,っ,ッ,ッ,b +ba,ば,バ,バ +bi,び,ビ,ビ +bu,ぶ,ブ,ブ +be,べ,ベ,ベ +bo,ぼ,ボ,ボ +bya,びゃ,ビャ,ビャ +byi,びぃ,ビィ,ビィ +byu,びゅ,ビュ,ビュ +bye,びぇ,ビェ,ビェ +byo,びょ,ビョ,ビョ + +cc,っ,ッ,ッ,c +cha,ちゃ,チャ,チャ +chi,ち,チ,チ +chu,ちゅ,チュ,チュ +che,ちぇ,チェ,チェ +cho,ちょ,チョ,チョ +cya,ちゃ,チャ,チャ +cyi,ちぃ,チィ,チィ +cyu,ちゅ,チュ,チュ +cye,ちぇ,チェ,チェ +cyo,ちょ,チョ,チョ + +dd,っ,ッ,ッ,d +da,だ,ダ,ダ +di,ぢ,ヂ,ヂ +du,づ,ヅ,ヅ +de,で,デ,デ +do,ど,ド,ド +dha,でゃ,デャ,デャ +dhi,でぃ,ディ,ディ +dhu,でゅ,デュ,デュ +dhe,でぇ,デェ,デェ +dho,でょ,デョ,デョ +dya,ぢゃ,ヂャ,ヂャ +dyi,ぢぃ,ヂィ,ヂィ +dyu,ぢゅ,ヂュ,ヂュ +dye,ぢぇ,ヂェ,ヂェ +dyo,ぢょ,ヂョ,ヂョ + +e,え,エ,エ + +ff,っ,ッ,ッ,f +fa,ふぁ,ファ,ファ +fi,ふぃ,フィ,フィ +fu,ふ,フ,フ +fe,ふぇ,フェ,フェ +fo,ふぉ,フォ,フォ +fya,ふゃ,フャ,フャ +fyi,ふぃ,フィ,フィ +fyu,ふゅ,フュ,フュ +fye,ふぇ,フェ,フェ +fyo,ふょ,フョ,フョ + +gg,っ,ッ,ッ,g +ga,が,ガ,ガ +gi,ぎ,ギ,ギ +gu,ぐ,グ,グ +ge,げ,ゲ,ゲ +go,ご,ゴ,ゴ +gya,ぎゃ,ギャ,ギャ +gyi,ぎぃ,ギィ,ギィ +gyu,ぎゅ,ギュ,ギュ +gye,ぎぇ,ギェ,ギェ +gyo,ぎょ,ギョ,ギョ + +hh,っ,ッ,ッ,h +ha,は,ハ,ハ +hi,ひ,ヒ,ヒ +hu,ふ,フ,フ +he,へ,ヘ,ヘ +ho,ほ,ホ,ホ +hya,ひゃ,ヒャ,ヒャ +hyi,ひぃ,ヒィ,ヒィ +hyu,ひゅ,ヒュ,ヒュ +hye,ひぇ,ヒェ,ヒェ +hyo,ひょ,ヒョ,ヒョ + +i,い,イ,イ + +jj,っ,ッ,ッ,j +ja,じゃ,ジャ,ジャ +ji,じ,ジ,ジ +ju,じゅ,ジュ,ジュ +je,じぇ,ジェ,ジェ +jo,じょ,ジョ,ジョ +jya,じゃ,ジャ,ジャ +jyi,じぃ,ジィ,ジィ +jyu,じゅ,ジュ,ジュ +jye,じぇ,ジェ,ジェ +jyo,じょ,ジョ,ジョ + +kk,っ,ッ,ッ,k +ka,か,カ,カ +ki,き,キ,キ +ku,く,ク,ク +ke,け,ケ,ケ +ko,こ,コ,コ +kya,きゃ,キャ,キャ +kyi,きぃ,キィ,キィ +kyu,きゅ,キュ,キュ +kye,きぇ,キェ,キェ +kyo,きょ,キョ,キョ + +mm,っ,ッ,ッ,m +ma,ま,マ,マ +mi,み,ミ,ミ +mu,む,ム,ム +me,め,メ,メ +mo,も,モ,モ +mya,みゃ,ミャ,ミャ +myi,みぃ,ミィ,ミィ +myu,みゅ,ミュ,ミュ +mye,みぇ,ミェ,ミェ +myo,みょ,ミョ,ミョ + +n,ん,ン,ン +n',ん,ン,ン +nn,ん,ン,ン +na,な,ナ,ナ +ni,に,ニ,ニ +nu,ぬ,ヌ,ヌ +ne,ね,ネ,ネ +no,の,ノ,ノ +nya,にゃ,ニャ,ニャ +nyi,にぃ,ニィ,ニィ +nyu,にゅ,ニュ,ニュ +nye,にぇ,ニェ,ニェ +nyo,にょ,ニョ,ニョ + +o,お,オ,オ + +pp,っ,ッ,ッ,p +pa,ぱ,パ,パ +pi,ぴ,ピ,ピ +pu,ぷ,プ,プ +pe,ぺ,ペ,ペ +po,ぽ,ポ,ポ +pya,ぴゃ,ピャ,ピャ +pyi,ぴぃ,ピィ,ピィ +pyu,ぴゅ,ピュ,ピュ +pye,ぴぇ,ピェ,ピェ +pyo,ぴょ,ピョ,ピョ + +rr,っ,ッ,ッ,r +ra,ら,ラ,ラ +ri,り,リ,リ +ru,る,ル,ル +re,れ,レ,レ +ro,ろ,ロ,ロ +rya,りゃ,リャ,リャ +ryi,りぃ,リィ,リィ +ryu,りゅ,リュ,リュ +rye,りぇ,リェ,リェ +ryo,りょ,リョ,リョ + +ss,っ,ッ,ッ,s +sa,さ,サ,サ +si,し,シ,シ +su,す,ス,ス +se,せ,セ,セ +so,そ,ソ,ソ +sha,しゃ,シャ,シャ +shi,し,シ,シ +shu,しゅ,シュ,シュ +she,しぇ,シェ,シェ +sho,しょ,ショ,ショ +sya,しゃ,シャ,シャ +syi,しぃ,シィ,シィ +syu,しゅ,シュ,シュ +sye,しぇ,シェ,シェ +syo,しょ,ショ,ショ + +tt,っ,ッ,ッ,t +ta,た,タ,タ +ti,ち,チ,チ +tu,つ,ツ,ツ +te,て,テ,テ +to,と,ト,ト +tha,てぁ,テァ,テァ +thi,てぃ,ティ,ティ +thu,てゅ,テュ,テュ +the,てぇ,テェ,テェ +tho,てょ,テョ,テョ +tsu,つ,ツ,ツ +tya,ちゃ,チャ,チャ +tyi,ちぃ,チィ,チィ +tyu,ちゅ,チュ,チュ +tye,ちぇ,チェ,チェ +tyo,ちょ,チョ,チョ + +u,う,ウ,ウ + +vv,っ,ッ,ッ,v +va,う゛ぁ,ヴァ,ヴァ +vi,う゛ぃ,ヴィ,ヴィ +vu,う゛,ヴ,ヴ +ve,う゛ぇ,ヴェ,ヴェ +vo,う゛ぉ,ヴォ,ヴォ + +ww,っ,ッ,ッ,w +wa,わ,ワ,ワ +wi,うぃ,ウィ,ウィ +wu,う,ウ,ウ +we,うぇ,ウェ,ウェ +wo,を,ヲ,ヲ + +xx,っ,ッ,ッ,x +xa,ぁ,ァ,ァ +xi,ぃ,ィ,ィ +xu,ぅ,ゥ,ゥ +xe,ぇ,ェ,ェ +xo,ぉ,ォ,ォ +xka,ヵ,ヵ,カ +xke,ヶ,ヶ,ケ +xtsu,っ,ッ,ッ +xtu,っ,ッ,ッ +xu,ぅ,ゥ,ゥ +xwa,ゎ,ヮ,ワ +xwe,ゑ,ヱ,エ +xwi,ゐ,ヰ,イ +xya,ゃ,ャ,ャ +xyo,ょ,ョ,ョ +xyu,ゅ,ュ,ュ + +yy,っ,ッ,ッ,y +ya,や,ヤ,ヤ +yi,い,イ,イ +yu,ゆ,ユ,ユ +ye,いぇ,イェ,イェ +yo,よ,ヨ,ヨ + +zz,っ,ッ,ッ,z +za,ざ,ザ,ザ +zi,じ,ジ,ジ +zu,ず,ズ,ズ +ze,ぜ,ゼ,ゼ +zo,ぞ,ゾ,ゾ +zya,じゃ,ジャ,ジャ +zyi,じぃ,ジィ,ジィ +zyu,じゅ,ジュ,ジュ +zye,じぇ,ジェ,ジェ +zyo,じょ,ジョ,ジョ +z,,‥,‥,‥ +z-,〜,〜,〜 +z.,…,…,… +z/,・,・,・ +z[,『,『,『 +z],』,』,』 +zh,←,←,← +zj,↓,↓,↓ +zk,↑,↑,↑ +zl,→,→,→ + +-,ー,ー,ー +:,:,:,: +;,;,;,; +[,「,「,「 +],」,」,」 + +.,。,。,。 +,,、,、,、