[Ttssh2-commit] [7462] unicode関連をcodeconv.cに集めた

Back to archive index
scmno****@osdn***** scmno****@osdn*****
2019年 3月 10日 (日) 02:32:42 JST


Revision: 7462
          http://sourceforge.jp/projects/ttssh2/scm/svn/commits/7462
Author:   zmatsuo
Date:     2019-03-10 02:32:42 +0900 (Sun, 10 Mar 2019)
Log Message:
-----------
unicode関連をcodeconv.cに集めた
CP932へ/からの変換を関数に分離(UTF32ToCP932(), UTF32ToCP932())
unicodeからDEC特殊文字変換を関数に分離(UTF32ToDecSp())
SJIS2UTF8() (内部コードからUTF-8へ出力)を language.c から ttcmn.c に移動
_WideCharToMultiByte(), _MultiByteToWideChar() の変換した文字数の戻り値修正
UTF32ToMBCP()追加
UTF32_CP932()追加
MBCPToUTF32()追加
WideCharToUTF8(), WideCharToCP932() の仕様を変更
変換テーブルとWindows APIをつかったコード変換の優先順位を変更できるようにした

Modified Paths:
--------------
    trunk/teraterm/common/codeconv.cpp
    trunk/teraterm/common/codeconv.h
    trunk/teraterm/teraterm/CMakeLists.txt
    trunk/teraterm/teraterm/vtterm.c
    trunk/teraterm/ttpcmn/language.c
    trunk/teraterm/ttpcmn/language.h
    trunk/teraterm/ttpcmn/ttcmn.c

-------------- next part --------------
Modified: trunk/teraterm/common/codeconv.cpp
===================================================================
--- trunk/teraterm/common/codeconv.cpp	2019-03-05 16:22:05 UTC (rev 7461)
+++ trunk/teraterm/common/codeconv.cpp	2019-03-09 17:32:42 UTC (rev 7462)
@@ -26,14 +26,21 @@
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+/* unicode\x8A֘A\x82̕\xB6\x8E\x9A\x83R\x81[\x83h\x95ϊ\xB7 */
+
 #include <windows.h>
 #include <string.h>
+#include <assert.h>
 #include <crtdbg.h>
 #if (defined(_MSC_VER) && (_MSC_VER >= 1600)) || !defined(_MSC_VER)
 #include <stdint.h>
 #endif
+#include "codemap.h"
 #include "codeconv.h"
 
+// cp932\x95ϊ\xB7\x8E\x9E\x81AWindows API \x82\xE6\x82\xE8 Tera Term \x82̕ϊ\xB7\x83e\x81[\x83u\x83\x8B\x82\xF0\x97D\x90悷\x82\xE9
+//#define PRIORITY_CP932_TABLE
+
 #if defined(_MSC_VER) && (_MSC_VER < 1600)
 typedef unsigned char	uint8_t;
 typedef unsigned short  uint16_t;
@@ -47,12 +54,158 @@
 #define _wcsdup(s)    _wcsdup_dbg((s), _NORMAL_BLOCK, __FILE__, __LINE__)
 #endif
 
+/*
+ *	\x8C\xA9\x82‚\xA9\x82\xE7\x82Ȃ\xA2\x8Fꍇ\x82\xCD 0 \x82\xF0\x95Ԃ\xB7
+ */
+static unsigned short _ConvertUnicode(unsigned short code, const codemap_t *table, int tmax)
+{
+	int low, mid, high;
+	unsigned short result;
+
+	low = 0;
+	high = tmax - 1;
+	result = 0; // convert error
+
+	// binary search
+	while (low < high) {
+		mid = (low + high) / 2;
+		if (table[mid].from_code < code) {
+			low = mid + 1;
+		} else {
+			high = mid;
+		}
+	}
+
+	if (table[low].from_code == code) {
+		result = table[low].to_code;
+	}
+
+	return (result);
+}
+
+static int IsHighSurrogate(wchar_t u16)
+{
+	return 0xd800 <= u16 && u16 < 0xdc00;
+}
+
+static int IsLowSurrogate(wchar_t u16)
+{
+	return 0xdc00 <= u16 && u16 < 0xe000;
+}
+
 /**
- * UTF-32 \x82\xA9\x82\xE7 UTF-8 \x82֕ϊ\xB7\x82\xB7\x82\xE9
+ *	1\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA4\x82\xBD\x82߂ɁA\x95K\x97v\x82ȃL\x83\x83\x83\x89\x83N\x83^\x90\x94\x82𓾂\xE9
+ *	@retval	0	\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82Ȃ\xA2(\x95\xB6\x8E\x9A\x83R\x81[\x83h\x82\xAA\x82\xA8\x82\xA9\x82\xB5\x82\xA2)
+ *	@retval	1	1\x83L\x83\x83\x83\x89\x83N\x83^\x82\xC51\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82\xE9
+ *	@retval	2	2\x83L\x83\x83\x83\x89\x83N\x83^\x82\xC51\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82\xE9
+ */
+#if 0
+static size_t UTF16GetCharCount(const wchar_t *wstr_ptr, size_t wstr_len)
+{
+	wchar_t u16;
+	assert(wstr_ptr != NULL);
+	if (wstr_len == 0) {
+		return 0;
+	}
+	u16 = *wstr_ptr++;
+	if (IsHighSurrogate(u16)) {
+		if (wstr_len >= 2) {
+			const wchar_t u16_lo = *wstr_ptr++;
+			if (IsLowSurrogate(u16_lo)) {
+				return 2;
+			} else {
+				return 0;
+			}
+		} else {
+			return 0;
+		}
+	} else if (IsLowSurrogate(u16)) {
+		return 0;
+	}
+	return 1;
+}
+#endif
+
+/**
+ *	code page \x82\xCC mulit byte \x95\xB6\x8E\x9A\x82\xF0 UTF-32\x82֕ϊ\xB7\x82\xB7\x82\xE9
+ *	@param KCode	\x83}\x83\x8B\x83`\x83o\x83C\x83g\x82̕\xB6\x8E\x9A\x83R\x81[\x83h(0x0000-0xffff)
+ *	@param CoePage	\x83}\x83\x8B\x83`\x83o\x83C\x83g\x82̃R\x81[\x83h\x83y\x81[\x83W
+ *	@retval			unicode(UTF-32\x95\xB6\x8E\x9A\x83R\x81[\x83h)
+ */
+unsigned int MBCPToUTF32(unsigned short KCode, int CodePage)
+{
+	unsigned int c;
+
+	if (CodePage == 932) {
+		c = CP932ToUTF32(KCode);
+	} else {
+		char buf[3];
+		wchar_t wchar;
+		int ret;
+		int len = 0;
+		if (KCode < 0x100) {
+			buf[0] = KCode & 0xff;
+			len = 1;
+		} else {
+			buf[0] = KCode >> 8;
+			buf[1] = KCode & 0xff;
+			len = 2;
+		}
+		ret = MultiByteToWideChar(CodePage, MB_ERR_INVALID_CHARS, buf, len, &wchar, 1);
+		if (ret <= 0) {
+			c = 0;
+		} else {
+			c = (unsigned int)wchar;
+		}
+	}
+	return c;
+}
+
+/**
+ *	wchar_t\x95\xB6\x8E\x9A\x97񂩂\xE7unicode(UTF-32)\x82\xF01\x95\xB6\x8E\x9A\x8E\xE6\x82\xE8\x8Fo\x82\xB7
+ *	@retval	0	\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82Ȃ\xA2(\x95\xB6\x8E\x9A\x83R\x81[\x83h\x82\xAA\x82\xA8\x82\xA9\x82\xB5\x82\xA2)
+ *	@retval	1	1\x83L\x83\x83\x83\x89\x83N\x83^\x82\xC51\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82\xE9
+ *	@retval	2	2\x83L\x83\x83\x83\x89\x83N\x83^\x82\xC51\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82\xE9
+ */
+size_t UTF16ToUTF32(const wchar_t *wstr_ptr, size_t wstr_len, unsigned int *u32)
+{
+	assert(wstr_ptr != NULL);
+	if (wstr_len == 0) {
+		*u32 = 0;
+		return 0;
+	}
+	const wchar_t u16 = *wstr_ptr++;
+	// \x83T\x83\x8D\x83Q\x81[\x83g high?
+	if (IsHighSurrogate(u16)) {
+		if (wstr_len >= 2) {
+			const wchar_t u16_lo = *wstr_ptr++;
+			if (IsLowSurrogate(u16_lo)) {
+				// \x83T\x83\x8D\x83Q\x81[\x83g\x83y\x83A \x83f\x83R\x81[\x83h
+				*u32 = 0x10000 + (u16 - 0xd800) * 0x400 + (u16_lo - 0xdc00);
+				return 2;
+			} else {
+				*u32 = 0;
+				return 0;
+			}
+		} else {
+			*u32 = 0;
+			return 0;
+		}
+	} else if (IsLowSurrogate(u16)) {
+		*u32 = 0;
+		return 0;
+	} else {
+		*u32 = u16;
+		return 1;
+	}
+}
+
+/**
+ * UTF-32\x95\xB6\x8E\x9A \x82\xA9\x82\xE7 UTF-8 \x82֕ϊ\xB7\x82\xB7\x82\xE9
  * @param[in]		u32		\x95ϊ\xB7\x82\xB7\x82\xE9UTF-32
  * @param[in,out]	u8_ptr	\x95ϊ\xB7\x8C\xE3UTF-8\x95\xB6\x8E\x9A\x97\xF1\x8Fo\x97͐\xE6(NULL\x82̂Ƃ\xAB\x8Fo\x97͂\xB5\x82Ȃ\xA2)
  * @param[in]		u8_len	UTF-8\x8Fo\x97͐敶\x8E\x9A\x90\x94(\x83o\x83b\x83t\x83@\x92\xB7,byte\x90\x94)
- * @retval			\x8Eg\x97p\x82\xB5\x82\xBDutf8\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94\x81j
+ * @retval			\x8Fo\x97͂\xB5\x82\xBDutf8\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94\x81j
  *					0=\x83G\x83\x89\x81[
  */
 size_t UTF32ToUTF8(uint32_t u32, char *u8_ptr_, size_t u8_len)
@@ -113,7 +266,7 @@
  * @param[in]	u8_len	UTF-8\x95\xB6\x8E\x9A\x97񒷂\xB3
  * @param[out]	u32		\x95ϊ\xB7\x82\xB5\x82\xBDUTF-32\x95\xB6\x8E\x9A
  * @retval		\x8Eg\x97p\x82\xB5\x82\xBDUTF-8\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94\x81j
- *				0=\x83G\x83\x89\x81[
+ *				0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD)
  */
 size_t UTF8ToUTF32(const char *u8_ptr_, size_t u8_len, uint32_t *u32_)
 {
@@ -191,33 +344,286 @@
 	return u8_in;
 }
 
-// WideCharToMultiByte\x82\xCCUTF8\x93\xC1\x89\xBB\x94\xC5
-int WideCharToUTF8(const wchar_t *wstr_ptr, int wstr_len, char *u8_ptr, int u8_len)
+/**
+ * UTF-32 \x82\xA9\x82\xE7 UTF-16 \x82֕ϊ\xB7\x82\xB7\x82\xE9
+ * @param[in]		u32			\x95ϊ\xB7\x82\xB7\x82\xE9UTF-32
+ * @param[in,out]	wstr_ptr	\x95ϊ\xB7\x8C\xE3UTF-16\x95\xB6\x8E\x9A\x97\xF1\x8Fo\x97͐\xE6(NULL\x82̂Ƃ\xAB\x8Fo\x97͂\xB5\x82Ȃ\xA2)
+ * @param[in]		wstr_len	UTF-16\x8Fo\x97͐敶\x8E\x9A\x90\x94(\x95\xB6\x8E\x9A\x90\x94,sizeof(wchar_t)*wstr_len bytes)
+ * @retval			\x8Fo\x97͂\xB5\x82\xBDUTF-16\x95\xB6\x8E\x9A\x90\x94(sizeof(wchar_t)\x94{\x82\xB7\x82\xE9\x82\xC6byte\x90\x94)
+ *					0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD)
+ */
+size_t UTF32ToUTF16(uint32_t u32, wchar_t *wstr_ptr, size_t wstr_len)
 {
-	int u8_out_sum = 0;
-	if (u8_ptr == NULL) {
-		u8_len = 4;
+	size_t u16_out;
+	if (u32 < 0x10000) {
+		if (wstr_len >= 1) {
+			if (wstr_ptr != NULL) {
+				*wstr_ptr++ = (uint16_t)u32;
+			}
+			u16_out = 1;
+		} else {
+			u16_out = 0;
+		}
+	} else if (u32 <= 0x10ffff) {
+		if (wstr_len >= 2) {
+			if (wstr_ptr != NULL) {
+				// \x83T\x83\x8D\x83Q\x81[\x83g \x83G\x83\x93\x83R\x81[\x83h
+				*wstr_ptr++ = uint16_t((u32 - 0x10000) / 0x400) + 0xd800;
+				*wstr_ptr++ = uint16_t((u32 - 0x10000) % 0x400) + 0xdc00;
+			}
+			u16_out = 2;
+		} else {
+			u16_out = 0;
+		}
 	} else {
-		if (u8_len == 0) {
-			return 0;
+		u16_out = 0;
+	}
+	return u16_out;
+}
+
+/**
+ * UTF-32\x95\xB6\x8E\x9A\x82\xF0CP932\x95\xB6\x8E\x9A(Shift_JIS) 1\x95\xB6\x8E\x9A\x82֕ϊ\xB7\x82\xB7\x82\xE9
+ * @retval		\x8Eg\x97p\x82\xB5\x82\xBDCP932\x95\xB6\x8E\x9A
+ *				0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD)
+ */
+unsigned short UTF32_CP932(unsigned int u32)
+{
+#include "../teraterm/uni2sjis.map"		// mapUnicodeToSJIS[]
+	char mbstr[2];
+	unsigned short mb;
+	DWORD mblen;
+	wchar_t u16_str[2];
+	size_t u16_len;
+
+	if (u32 < 0x80) {
+		return (unsigned short)u32;
+	}
+
+#if defined(PRIORITY_CP932_TABLE)
+	if (u32 < 0x10000) {
+		wchar_t u16 = (wchar_t)u32;
+		// Tera Term\x82̕ϊ\xB7\x83e\x81[\x83u\x83\x8B\x82\xC5 Unicode -> Shift_JIS\x82֕ϊ\xB7
+		mb = _ConvertUnicode(u16, mapUnicodeToSJIS, _countof(mapUnicodeToSJIS));
+		if (mb != 0) {
+			// \x95ϊ\xB7\x82ł\xAB\x82\xBD
+			return mb;
 		}
 	}
-	if (wstr_len < 0) {
+#endif
+	u16_len = UTF32ToUTF16(u32, u16_str, 2);
+	if (u16_len == 0) {
+		return 0;
+	}
+	mblen = WideCharToMultiByte(932, 0, u16_str, (int)u16_len, mbstr, 2, NULL, NULL);
+	switch (mblen) {
+	case 0:
+	case 1:
+	default:
+		if (mblen == 0 || mbstr[0] == '?') {
+			goto next_convert;
+		} else {
+			mb = (unsigned char)mbstr[0];
+			return mb;
+		}
+	case 2:
+		if (mbstr[0] == '?' && mbstr[1] == '?') {
+			// 2byte\x8Fo\x97\xCD && "??" \x82̏ꍇ\x82͕ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD
+			goto next_convert;
+		}
+		mb = (((unsigned char)mbstr[0]) << 8) | (unsigned char)mbstr[1];
+		return mb;
+	}
+
+next_convert:
+#if !defined(PRIORITY_CP932_TABLE)
+	if (u32 < 0x10000) {
+		wchar_t u16 = (wchar_t)u32;
+		// Tera Term\x82̕ϊ\xB7\x83e\x81[\x83u\x83\x8B\x82\xC5 Unicode -> Shift_JIS\x82֕ϊ\xB7
+		mb = _ConvertUnicode(u16, mapUnicodeToSJIS, _countof(mapUnicodeToSJIS));
+		if (mb != 0) {
+			// \x95ϊ\xB7\x82ł\xAB\x82\xBD
+			return mb;
+		}
+	}
+#endif
+	return 0;
+}
+
+/**
+ * CP932\x95\xB6\x8E\x9A(Shift_JIS) 1\x95\xB6\x8E\x9A\x82\xA9\x82\xE7UTF-32\x82֕ϊ\xB7\x82\xB7\x82\xE9
+ * @param[in]		cp932		CP932\x95\xB6\x8E\x9A
+ * @retval			\x95ϊ\xB7\x82\xB5\x82\xBDUTF-32\x95\xB6\x8E\x9A\x90\x94
+ *					0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD)
+ */
+unsigned int CP932ToUTF32(unsigned short cp932)
+{
+#include "../ttpcmn/sjis2uni.map"		// mapSJISToUnicode[]
+	wchar_t wchar;
+	int ret;
+	unsigned int u32;
+	unsigned char buf[2];
+	int len = 0;
+
+#if defined(PRIORITY_CP932_TABLE)
+	u32 = _ConvertUnicode(cp932, mapSJISToUnicode, sizeof(mapSJISToUnicode)/sizeof(mapSJISToUnicode[0]));
+	if (u32 != 0) {
+		return u32;
+	}
+#endif
+	if (cp932 < 0x100) {
+		buf[0] = cp932 & 0xff;
+		len = 1;
+	} else {
+		buf[0] = cp932 >> 8;
+		buf[1] = cp932 & 0xff;
+		len = 2;
+	}
+	ret = MultiByteToWideChar(932, MB_ERR_INVALID_CHARS, (char *)buf, len, &wchar, 1);
+	if (ret <= 0) {
+		// MultiByteToWideChar()\x82\xAA\x95ϊ\xB7\x8E\xB8\x94s
+#if !defined(PRIORITY_CP932_TABLE)
+		u32 = _ConvertUnicode(cp932, mapSJISToUnicode, sizeof(mapSJISToUnicode)/sizeof(mapSJISToUnicode[0]));
+		// \x83e\x81[\x83u\x83\x8B\x82ɂ\xE0\x82Ȃ\xA9\x82\xC1\x82\xBD\x8Fꍇ c = 0(\x95ϊ\xB7\x8E\xB8\x94s\x8E\x9E)
+#else
+		u32 = 0;
+#endif
+	} else {
+		u32 = (unsigned int)wchar;
+	}
+
+	return u32;
+}
+
+/**
+ * Unicode\x82\xA9\x82\xE7DEC\x93\xC1\x8Eꕶ\x8E\x9A\x82֕ϊ\xB7
+ * @param	u32			UTF-32\x95\xB6\x8E\x9A\x83R\x81[\x83h
+ * @return	\x89\xBA\x88\xCA8bit	DEC\x93\xC1\x8Eꕶ\x8E\x9A\x83R\x81[\x83h
+ *			\x8F\xE3\x88\xCA8bit	\x95\xB6\x8E\x9A\x83R\x81[\x83h\x8E\xED\x95\xCA (1,2,4)
+ *						file://../../doc/ja/html/setup/teraterm-term.html \x8EQ\x8F\xC6
+ *			0			\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD
+ */
+unsigned short UTF32ToDecSp(unsigned int u32)
+{
+#include "../teraterm/unisym2decsp.map"		// mapUnicodeSymbolToDecSp[]
+	unsigned short cset;
+	if (u32 > 0x10000) {
+		cset = 0;
+	} else {
+		const unsigned short u16 = (unsigned short)u32;
+		cset = _ConvertUnicode(u16, mapUnicodeSymbolToDecSp, _countof(mapUnicodeSymbolToDecSp));
+	}
+	return cset;
+}
+
+/**
+ * UTF-32 \x82\xA9\x82\xE7 CP932 \x82֕ϊ\xB7\x82\xB7\x82\xE9
+ * @param[in]		u32			\x95ϊ\xB7\x82\xB7\x82\xE9UTF-32
+ * @param[in,out]	mb_ptr		\x95ϊ\xB7\x8C\xE3CP932\x95\xB6\x8E\x9A\x97\xF1\x8Fo\x97͐\xE6(NULL\x82̂Ƃ\xAB\x8Fo\x97͂\xB5\x82Ȃ\xA2)
+ * @param[in]		mb_len		CP932\x8Fo\x97͐敶\x8E\x9A\x90\x94(\x95\xB6\x8E\x9A\x90\x94,sizeof(wchar_t)*wstr_len bytes)
+ * @retval			\x8Fo\x97͂\xB5\x82\xBDCP932\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94)
+ *					0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD)
+ */
+size_t UTF32ToCP932(uint32_t u32, char *mb_ptr, size_t mb_len)
+{
+	size_t cp932_out;
+	const uint16_t cp932 = UTF32_CP932(u32);
+	if (cp932 == 0 && u32 != 0) {
+		return 0;
+	}
+	if (mb_ptr == NULL) {
+		mb_len = 2;
+	}
+	if (cp932 < 0x100) {
+		if (mb_len >= 1) {
+			if (mb_ptr != NULL) {
+				*mb_ptr = cp932 & 0xff;
+			}
+			cp932_out = 1;
+		} else {
+			cp932_out = 0;
+		}
+	} else {
+		if (mb_len >= 2) {
+			if (mb_ptr != NULL) {
+				mb_ptr[0] = (cp932 >> 8) & 0xff;
+				mb_ptr[1] = cp932 & 0xff;
+			}
+			cp932_out = 2;
+		} else {
+			cp932_out = 0;
+		}
+	}
+	return cp932_out;
+}
+
+size_t UTF32ToMBCP(unsigned int u32, int code_page, char *mb_ptr, size_t mb_len)
+{
+	wchar_t u16_str[2];
+	size_t u16_len;
+	u16_len = UTF32ToUTF16(u32, u16_str, 2);
+	if (u16_len == 0) {
+		return 0;
+	}
+	mb_len = WideCharToMultiByte(code_page, 0, u16_str, u16_len, mb_ptr, mb_len, NULL, NULL);
+	if (mb_len == 1 && mb_ptr[0] == '?' && u32 != '?') {
+		// \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD\x82Ƃ\xAB\x81A\x96߂\xE8\x92l=1, \x95\xB6\x8E\x9A[0]='?' \x82\xF0\x95Ԃ\xB5\x82Ă\xAD\x82\xE9
+		mb_len = 0;
+	}
+	return mb_len;
+}
+
+/**
+ *	wchar_t(UTF-16)\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x83}\x83\x8B\x83`\x83o\x83C\x83g\x82ɕϊ\xB7\x82\xB7\x82\xE9
+ *
+ *	@param[in]		*wstr_ptr	wchar_t\x95\xB6\x8E\x9A\x97\xF1
+ *	@param[in,out]	*wstr_len	wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7
+ *								NULL\x82܂\xBD\x82\xCD0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE\x81AL'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6)
+ *								NULL\x88ȊO\x82̂Ƃ\xAB\x93\xFC\x97͂\xB5\x82\xBD\x95\xB6\x8E\x9A\x90\x94\x82\xF0\x95Ԃ\xB7
+ *	@param[in]		*mb_ptr		\x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x8E\xFB\x94[\x82\xB7\x82\xE9\x83|\x83C\x83\x93\x83^
+ *								(NULL\x82̂Ƃ\xAB\x95ϊ\xB7\x82\xB9\x82\xB8\x82ɕ\xB6\x8E\x9A\x90\x94\x82\xF0\x83J\x83E\x83\x93\x83g\x82\xB7\x82\xE9)
+ *	@param[in,out]	*mb_len		\x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x8E\xFB\x94[\x82ł\xAB\x82\xE9\x83T\x83C\x83Y,byte\x90\x94,
+ *								mb_ptr\x82\xAANULL\x82̂Ƃ\xAB\x8Fo\x97͉”\\x83T\x83C\x83Y\x82͕s\x97v
+ *								\x95ϊ\xB7\x82\xB5\x82\xBD\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82̒\xB7\x82\xB3\x82\xF0\x95Ԃ\xB7
+ *								L'\0'\x82\xF0\x95ϊ\xB7\x82\xB5\x82\xBD\x82\xE7'\0'\x82\xE0\x8A܂\xDE
+ *								mb_ptr\x82\xAANULL\x82̂Ƃ\xAB\x82ł\xE0\x92\xB7\x82\xB3\x82͕Ԃ\xB7
+ *	@param[in]		UTF32ToMB	UTF32\x82\xF0\x83}\x83\x8B\x83`\x83o\x83C\x83g\x82ɕϊ\xB7\x82\xB7\x82\xE9\x8A֐\x94\x82ւ̃|\x83C\x83\x93\x83^
+ */
+static void WideCharToMB(const wchar_t *wstr_ptr, size_t *wstr_len_,
+						 char *mb_ptr, size_t *mb_len_,
+						 size_t (*UTF32ToMB)(uint32_t u32, char *mb_ptr, size_t mb_len))
+{
+	size_t wstr_len;
+	size_t mb_len;
+	size_t mb_out_sum = 0;
+	size_t wstr_in = 0;
+
+	assert(wstr_ptr != NULL);
+	if (mb_ptr == NULL) {
+		// \x95ϊ\xB7\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x8F\x91\x82\xAB\x8Fo\x82\xB3\x82Ȃ\xA2
+		mb_len = 4;		// 1\x95\xB6\x8E\x9A4byte\x82ɂ͎\xFB\x82܂\xE9\x82͂\xB8
+	} else {
+		mb_len = *mb_len_;
+	}
+	if (wstr_len_ == NULL || *wstr_len_ == 0) {
 		wstr_len = (int)wcslen(wstr_ptr) + 1;
+	} else {
+		wstr_len = *wstr_len_;
 	}
 
-	while(u8_len > 0 && wstr_len > 0) {
+	while(mb_len > 0 && wstr_len > 0) {
 		const wchar_t u16 = *wstr_ptr++;
 		uint32_t u32 = u16;
-		size_t u8_out;
+		size_t mb_out;
 		wstr_len--;
+		wstr_in++;
 		// \x83T\x83\x8D\x83Q\x81[\x83g high?
-		if (0xd800 <= u16 && u16 < 0xdc00) {
+		if (IsHighSurrogate(u16)) {
 			if (wstr_len >= 1) {
 				const wchar_t u16_lo = *wstr_ptr++;
 				wstr_len--;
+				wstr_in++;
 				// \x83T\x83\x8D\x83Q\x81[\x83g low?
-				if (0xdc00 <= u16_lo && u16_lo < 0xe000) {
+				if (IsLowSurrogate(u16_lo)) {
 					// \x83T\x83\x8D\x83Q\x81[\x83g\x83y\x83A \x83f\x83R\x81[\x83h
 					u32 = 0x10000 + (u16 - 0xd800) * 0x400 + (u16_lo - 0xdc00);
 				} else {
@@ -224,34 +630,75 @@
 					goto unknown_code;
 				}
 			} else {
-			unknown_code:
-				if (u8_ptr != NULL) {
-					*u8_ptr++ = '?';
-				}
-				u8_out = 1;
-				goto loop_next;
+				goto unknown_code;
 			}
 		}
-		u8_out = UTF32ToUTF8(u32, u8_ptr, u8_len);
-		if (u8_out == 0) {
-			goto unknown_code;
+		mb_out = UTF32ToMB(u32, mb_ptr, mb_len);
+		if (mb_out == 0) {
+		unknown_code:
+			if (mb_ptr != NULL) {
+				// \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD\x8Fꍇ
+				*mb_ptr++ = '?';
+			}
+			mb_out = 1;
 		}
-	loop_next:
-		u8_out_sum += u8_out;
-		if (u8_ptr != NULL) {
-			u8_ptr += u8_out;
-			u8_len -= u8_out;
+		mb_out_sum += mb_out;
+		if (mb_ptr != NULL) {
+			mb_ptr += mb_out;
+			mb_len -= mb_out;
 		}
 	}
-	return u8_out_sum;
+
+	if (wstr_len_ != NULL) {
+		*wstr_len_ = wstr_in;
+	}
+	*mb_len_ = mb_out_sum;
 }
 
+// WideCharToMultiByte\x82\xCCUTF8\x93\xC1\x89\xBB\x94\xC5
+void WideCharToUTF8(const wchar_t *wstr_ptr, size_t *wstr_len, char *u8_ptr, size_t *u8_len)
+{
+	WideCharToMB(wstr_ptr, wstr_len, u8_ptr, u8_len, UTF32ToUTF8);
+}
+
+void WideCharToCP932(const wchar_t *wstr_ptr, size_t *wstr_len, char *cp932_ptr, size_t *cp932_len)
+{
+	WideCharToMB(wstr_ptr, wstr_len,
+				 cp932_ptr, cp932_len,
+				 UTF32ToCP932);
+}
+
+void WideCharToMBCP(const wchar_t *wstr_ptr, size_t *wstr_len, char *mb_ptr, size_t *mb_len,
+				  int code_page)
+{
+	size_t (*utf32_to_mb)(uint32_t u32, char *mb_ptr, size_t mb_len);
+	switch (code_page) {
+	case CP_UTF8:
+		utf32_to_mb = UTF32ToUTF8;
+		break;
+	case 932:
+		utf32_to_mb = UTF32ToCP932;
+		break;
+	default:
+		*mb_len = 0;
+		return;
+	}
+
+	WideCharToMB(wstr_ptr, wstr_len,
+				 mb_ptr, mb_len,
+				 utf32_to_mb);
+}
+
 // MultiByteToWideChar\x82\xCCUTF8\x93\xC1\x89\xBB\x94\xC5
-int UTF8ToWideChar(const char *u8_ptr, int u8_len, wchar_t *wstr_ptr, int wstr_len)
+int UTF8ToWideChar(const char *u8_ptr, int u8_len_, wchar_t *wstr_ptr, int wstr_len_)
 {
+	size_t u8_len;
+	size_t wstr_len = wstr_len_;
 	size_t u16_out_sum = 0;
-	if (u8_len < 0) {
+	if (u8_len_ < 0) {
 		u8_len = strlen(u8_ptr) + 1;
+	} else {
+		u8_len = u8_len_;
 	}
 	if (wstr_ptr == NULL) {
 		wstr_len = 1;
@@ -295,16 +742,18 @@
 		}
 		u16_out_sum += u16_out;
 	}
-	return u16_out_sum;
+	return (int)u16_out_sum;
 }
 
 /**
  *	wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82֕ϊ\xB7
  *	@param[in]	*wstr_ptr	wchar_t\x95\xB6\x8E\x9A\x97\xF1
- *	@param[in]	wstr_len	wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE)
+ *	@param[in]	wstr_len	wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE\x81A\x8E\xA9\x93\xAE\x82̂Ƃ\xAB\x82\xCDL'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6)
  *	@param[in]	code_page	\x95ϊ\xB7\x90\xE6\x83R\x81[\x83h\x83y\x81[\x83W
- *	@param[out]	*mb_len_	mb\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(NULL\x82̂Ƃ\xAB\x93\xE0\x95\x94\x83G\x83\x89\x81[)
+ *	@param[out]	*mb_len_	\x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x92\xB7,byte\x90\x94,L'\0'\x82\xF0\x95ϊ\xB7\x82\xB5\x82\xBD\x82\xE7'\0'\x82\xE0\x8A܂\xDE
+ *							(NULL\x82̂Ƃ\xAB\x95\xB6\x8E\x9A\x97񒷂\xF0\x95Ԃ\xB3\x82Ȃ\xA2)
  *	@retval		mb\x95\xB6\x8E\x9A\x97\xF1\x82ւ̃|\x83C\x83\x93\x83^(NULL\x82̎\x9E\x95ϊ\xB7\x83G\x83\x89\x81[)
+ *				\x8Eg\x97p\x8C\xE3 free() \x82\xB7\x82邱\x82\xC6
  */
 char *_WideCharToMultiByte(const wchar_t *wstr_ptr, size_t wstr_len, int code_page, size_t *mb_len_)
 {
@@ -317,9 +766,11 @@
 		wstr_len = wcslen(wstr_ptr) + 1;
 	}
     int len;
-	if (code_page == CP_UTF8) {
-		len = WideCharToUTF8(wstr_ptr, (DWORD)wstr_len,
-							 NULL, 0);
+	if (code_page == CP_UTF8 || code_page == 932) {
+		size_t wl = wstr_len;
+		size_t ml;
+		WideCharToMBCP(wstr_ptr, &wl, NULL, &ml, code_page);
+		len = ml;
 	} else {
 		len = ::WideCharToMultiByte(code_page, flags,
 									wstr_ptr, (DWORD)wstr_len,
@@ -333,9 +784,11 @@
 	if (mb_ptr == NULL) {
 		return NULL;
 	}
-	if (code_page == CP_UTF8) {
-		len = WideCharToUTF8(wstr_ptr, (DWORD)wstr_len,
-							 mb_ptr, len);
+	if (code_page == CP_UTF8 || code_page == 932) {
+		size_t wl = wstr_len;
+		size_t ml = len;
+		WideCharToMBCP(wstr_ptr, &wl, mb_ptr, &ml, code_page);
+		len = ml;
 	} else {
 		len = ::WideCharToMultiByte(code_page, flags,
 									wstr_ptr, (DWORD)wstr_len,
@@ -347,7 +800,8 @@
 		return NULL;
 	}
 	if (mb_len_ != NULL) {
-		*mb_len_ = len - 1;
+		// \x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x90\x94(byte\x90\x94)\x82\xF0\x95Ԃ\xB7
+		*mb_len_ = len;
 	}
     return mb_ptr;
 }
@@ -355,10 +809,12 @@
 /**
  *	\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82\xF0wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82֕ϊ\xB7
  *	@param[in]	*str_ptr	mb(char)\x95\xB6\x8E\x9A\x97\xF1
- *	@param[in]	str_len		mb(char)\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE)
+ *	@param[in]	str_len		mb(char)\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE\x81A\x8E\xA9\x93\xAE\x82̂Ƃ\xAB\x82\xCD'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6)
  *	@param[in]	code_page	\x95ϊ\xB7\x8C\xB3\x83R\x81[\x83h\x83y\x81[\x83W
- *	@param[out]	*w_len_		wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7
- *	@retval		mb\x95\xB6\x8E\x9A\x97\xF1\x82ւ̃|\x83C\x83\x93\x83^(NULL\x82̎\x9E\x95ϊ\xB7\x83G\x83\x89\x81[)
+ *	@param[out]	*w_len_		wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7,wchar_t\x90\x94,'\0'\x82\xF0\x95ϊ\xB7\x82\xB5\x82\xBD\x82\xE7L'\0'\x82\xE0\x8A܂\xDE
+ *							(NULL\x82̂Ƃ\xAB\x95\xB6\x8E\x9A\x97񒷂\xF0\x95Ԃ\xB3\x82Ȃ\xA2)
+ *	@retval		wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82ւ̃|\x83C\x83\x93\x83^(NULL\x82̎\x9E\x95ϊ\xB7\x83G\x83\x89\x81[)
+ *				\x8Eg\x97p\x8C\xE3 free() \x82\xB7\x82邱\x82\xC6
  */
 wchar_t *_MultiByteToWideChar(const char *str_ptr, size_t str_len, int code_page, size_t *w_len_)
 {
@@ -402,7 +858,8 @@
 		return NULL;
 	}
 	if (w_len_ != NULL) {
-		*w_len_ = len - 1;
+		// \x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x90\x94(wchar_t\x90\x94)\x82\xF0\x95Ԃ\xB7
+		*w_len_ = len;
 	}
 	return wstr_ptr;
 }

Modified: trunk/teraterm/common/codeconv.h
===================================================================
--- trunk/teraterm/common/codeconv.h	2019-03-05 16:22:05 UTC (rev 7461)
+++ trunk/teraterm/common/codeconv.h	2019-03-09 17:32:42 UTC (rev 7462)
@@ -35,12 +35,24 @@
 #endif
 
 
-// 1char
+// simple code convert
+unsigned int CP932ToUTF32(unsigned short cp932);
+unsigned short UTF32ToDecSp(unsigned int u32);
+unsigned int MBCPToUTF32(unsigned short KCode, int CodePage);
+unsigned short UTF32_CP932(unsigned int u32);
+
+// 1char ToUTF32
+size_t UTF8ToUTF32(const char *u8_ptr_, size_t u8_len, unsigned int *u32_);
+size_t UTF16ToUTF32(const wchar_t *wstr_ptr, size_t wstr_len, unsigned int *u32);
+// 1char UTF32To
+size_t UTF32ToUTF16(unsigned int u32, wchar_t *wstr_ptr, size_t wstr_len);
 size_t UTF32ToUTF8(unsigned int u32, char *u8_ptr, size_t u8_len);
-size_t UTF8ToUTF32(const char *u8_ptr_, size_t u8_len, unsigned int *u32_);
+size_t UTF32ToCP932(unsigned int u32, char *mb_ptr, size_t mb_len);
+size_t UTF32ToMBCP(unsigned int u32, int code_page, char *mb_ptr, size_t mb_len);
 
 // MultiByteToWideChar() wrappers
-int WideCharToUTF8(const wchar_t *wstr_ptr, int wstr_len, char *u8_ptr, int u8_len);
+void WideCharToUTF8(const wchar_t *wstr_ptr, size_t *wstr_len, char *u8_ptr, size_t *u8_len);
+void WideCharToCP932(const wchar_t *wstr_ptr, size_t *wstr_len, char *cp932_ptr, size_t *cp932_len);
 int UTF8ToWideChar(const char *u8_ptr, int u8_len, wchar_t *wstr_ptr, int wstr_len);
 
 // API wrappers

Modified: trunk/teraterm/teraterm/CMakeLists.txt
===================================================================
--- trunk/teraterm/teraterm/CMakeLists.txt	2019-03-05 16:22:05 UTC (rev 7461)
+++ trunk/teraterm/teraterm/CMakeLists.txt	2019-03-09 17:32:42 UTC (rev 7462)
@@ -31,7 +31,12 @@
   ../common/i18n.h
   ../common/dllutil.cpp
   ../common/dllutil.h
-  ../ttpcmn/language.h
+  ../common/codeconv.h
+  ../common/codeconv.cpp
+  #
+  ../teraterm/unisym2decsp.map
+  ../teraterm/uni2sjis.map
+  ../ttpcmn/sjis2uni.map
   )
 
 source_group(
@@ -102,8 +107,6 @@
   WSAAsyncGetAddrInfo.c
   WSAAsyncGetAddrInfo.h
   #
-  uni2sjis.map
-  unisym2decsp.map
   uni_combining.map
   #
   teraterm.manifest

Modified: trunk/teraterm/teraterm/vtterm.c
===================================================================
--- trunk/teraterm/teraterm/vtterm.c	2019-03-05 16:22:05 UTC (rev 7461)
+++ trunk/teraterm/teraterm/vtterm.c	2019-03-09 17:32:42 UTC (rev 7462)
@@ -52,7 +52,8 @@
 #include "telnet.h"
 #include "ttime.h"
 #include "clipboar.h"
-#include "../ttpcmn/language.h"
+#include "codeconv.h"
+#include "codeconv.h"
 
 #include "vtterm.h"
 
@@ -5408,13 +5409,6 @@
 }
 
 //
-// UTF-8
-//
-#include "uni2sjis.map"
-#include "unisym2decsp.map"
-
-
-//
 // Unicode Combining Character Support
 //
 #include "uni_combining.map"
@@ -5467,17 +5461,22 @@
 	return (index);
 }
 
-// unicode(UTF-16,wchar_t)\x82\xF0\x83o\x83b\x83t\x83@\x82֏\x91\x82\xAB\x8D\x9E\x82\xDE
+// unicode(UTF-32,wchar_t)\x82\xF0\x83o\x83b\x83t\x83@\x82֏\x91\x82\xAB\x8D\x9E\x82\xDE
 static void UnicodeToCP932(unsigned int code)
 {
-	wchar_t wchar = (wchar_t)code;
+	wchar_t wchar;
 	int ret;
 	char mbchar[2];
 	unsigned short cset;
 
+	if (code >= 0x10000) {
+		goto unknown;
+	}
+	wchar = (wchar_t)code;
+
 	// Unicode\x82\xA9\x82\xE7DEC\x93\xC1\x8Eꕶ\x8E\x9A\x82ւ̃}\x83b\x83s\x83\x93\x83O
 	if (ts.UnicodeDecSpMapping) {
-		cset = ConvertUnicode(wchar, mapUnicodeSymbolToDecSp, MAPSIZE(mapUnicodeSymbolToDecSp));
+		cset = UTF32ToDecSp(wchar);
 		if (((cset >> 8) & ts.UnicodeDecSpMapping) != 0) {
 			PutDecSp(cset & 0xff);
 			return;
@@ -5485,7 +5484,24 @@
 	}
 
 	// Unicode -> \x93\xE0\x95\x94\x83R\x81[\x83h(ts.CodePage)\x82֕ϊ\xB7\x82\xB5\x82ďo\x97\xCD
-	ret = WideCharToMultiByte(ts.CodePage, 0, &wchar, 1, mbchar, 2, NULL, NULL);
+	if (ts.CodePage == 932) {
+		ret = (int)UTF16ToCP932(&wchar, 1, &cset);
+		if (ret == 0) {
+			// \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD
+			;
+		} else if (cset < 0x100) {
+			// 1byte\x95\xB6\x8E\x9A
+			mbchar[0] = (char)cset;
+			ret = 1;
+		} else {
+			// 2byte\x95\xB6\x8E\x9A
+			mbchar[0] = (char)(cset >> 8);
+			mbchar[1] = (char)(cset & 0xff);
+			ret = 2;
+		}
+	} else {
+		ret = WideCharToMultiByte(ts.CodePage, 0, &wchar, 1, mbchar, 2, NULL, NULL);
+	}
 	if (ret == 1 && mbchar[0] == '?' && code != '?') {
 		// \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD\x82Ƃ\xAB\x81Aret=1, '?' \x82\xF0\x95Ԃ\xB5\x82Ă\xAD\x82\xE9
 		ret = 0;
@@ -5492,17 +5508,7 @@
 	}
 	switch (ret) {
 	case 0:
-		if (ts.CodePage == 932) {
-			// CP932
-			// U+301C\x82Ȃǂ͕ϊ\xB7\x82ł\xAB\x82Ȃ\xA2\x81BUnicode -> Shift_JIS\x82֕ϊ\xB7\x82\xB5\x82Ă݂\xE9\x81B
-			cset = ConvertUnicode(code, mapUnicodeToSJIS, MAPSIZE(mapUnicodeToSJIS));
-			if (cset != 0) {
-				Kanji = cset & 0xff00;
-				PutKanji(cset & 0x00ff);
-				return;
-			}
-		}
-
+	unknown:
 		PutChar('?');
 		if (ts.UnknownUnicodeCharaAsWide) {
 			PutChar('?');

Modified: trunk/teraterm/ttpcmn/language.c
===================================================================
--- trunk/teraterm/ttpcmn/language.c	2019-03-05 16:22:05 UTC (rev 7461)
+++ trunk/teraterm/ttpcmn/language.c	2019-03-09 17:32:42 UTC (rev 7462)
@@ -35,9 +35,8 @@
 #include <locale.h>
 
 #include "language.h"
-#include "codeconv.h"
-#include "sjis2uni.map"
 
+// export\x82\xB3\x82\xEA\x82Ă\xA2\x82\xE9
 unsigned short ConvertUnicode(unsigned short code, const codemap_t *table, int tmax)
 {
 	int low, mid, high;
@@ -64,69 +63,6 @@
 	return (result);
 }
 
-// \x93\xE0\x95\x94\x83R\x81[\x83h(CodePage)\x82\xF0UTF8\x82֕ϊ\xB7\x82\xB7\x82\xE9
-unsigned int PASCAL SJIS2UTF8(WORD KCode, int *byte, int CodePage)
-{
-	wchar_t wchar;
-	int ret;
-	unsigned int code;
-	unsigned int c, c1, c2, c3;
-	unsigned char buf[3];
-	unsigned char KCode_h;
-	int len = 0;
-
-	// \x93\xE0\x95\x94\x83R\x81[\x83h(CodePage)\x82\xA9\x82\xE7UTF-16LE\x82֕ϊ\xB7\x82\xB7\x82\xE9
-	KCode_h = (unsigned char)(KCode >> 8);
-	if (KCode_h != 0) {
-		buf[len++] = KCode_h;
-	}
-	buf[len++] = KCode & 0xff;
-	ret = MultiByteToWideChar(CodePage, MB_ERR_INVALID_CHARS, buf, len, &wchar, 1);
-	if (ret <= 0) {
-		// \x95ϊ\xB7\x8E\xB8\x94s
-		unsigned short cset = 0;
-		if (CodePage == 932) {
-			// CP932
-			cset = ConvertUnicode(KCode, mapSJISToUnicode, sizeof(mapSJISToUnicode)/sizeof(mapSJISToUnicode[0]));
-		}
-		if (cset == 0) {
-			c = 0xfffd; // U+FFFD: Replacement Character
-		} else {
-			c = cset;
-		}
-	} else {
-		c = (unsigned int)wchar;
-	}
-
-	// UTF-16LE\x82\xA9\x82\xE7UTF-8\x82֕ϊ\xB7\x82\xB7\x82\xE9
-	if (c <= 0x0000007f) {
-		// 0x00000000 <= c <= 0x0000007f
-		code = (c & 0xff);
-		*byte = 1;
-
-	} else if (c <= 0x000007ff) {
-		// 0x00000080 <= c <= 0x000007ff
-		c1 = ((c >> 6) & 0x1f) | 0xc0;
-		c2 = (c & 0x3f) | 0x80;
-		code = (c1 << 8) | c2;
-		*byte = 2;
-
-	} else if (c <= 0x0000ffff) {
-		// 0x00000800 <= c <= 0x0000ffff
-		c1 = ((c >> 12) & 0xf) | 0xe0;
-		c2 = ((c >> 6) & 0x3f) | 0x80;
-		c3 = ((c) & 0x3f) | 0x80;
-		code = (c1 << 16) | (c2 << 8) | c3;
-		*byte = 3;
-	} else {
-		code = KCode;
-		*byte = 2;
-	}
-
-	return (code);
-}
-
-
 // Japanese SJIS -> JIS
 WORD PASCAL SJIS2JIS(WORD KCode)
 {

Modified: trunk/teraterm/ttpcmn/language.h
===================================================================
--- trunk/teraterm/ttpcmn/language.h	2019-03-05 16:22:05 UTC (rev 7461)
+++ trunk/teraterm/ttpcmn/language.h	2019-03-09 17:32:42 UTC (rev 7462)
@@ -35,13 +35,11 @@
 #endif
 
 /* proto types */
-unsigned int PASCAL SJIS2UTF8(WORD KCode, int *byte, int CodePage);
 WORD PASCAL SJIS2JIS(WORD KCode);
 WORD PASCAL SJIS2EUC(WORD KCode);
 WORD PASCAL JIS2SJIS(WORD KCode);
 BYTE PASCAL RussConv(int cin, int cout, BYTE b);
 void PASCAL RussConvStr(int cin, int cout, PCHAR Str, int count);
-unsigned short ConvertUnicode(unsigned short code, const codemap_t *table, int tmax);
 
 #ifdef __cplusplus
 }

Modified: trunk/teraterm/ttpcmn/ttcmn.c
===================================================================
--- trunk/teraterm/ttpcmn/ttcmn.c	2019-03-05 16:22:05 UTC (rev 7461)
+++ trunk/teraterm/ttpcmn/ttcmn.c	2019-03-09 17:32:42 UTC (rev 7462)
@@ -1582,25 +1582,52 @@
 	return i;
 }
 
+// \x93\xE0\x95\x94\x83R\x81[\x83h(CodePage)\x82\xF0UTF-32(UTF-16LE)\x82֕ϊ\xB7\x82\xB7\x82\xE9
+static unsigned int SJIS2UTF32(WORD KCode, int CodePage)
+{
+	unsigned int c;
+
+	// \x93\xE0\x95\x94\x83R\x81[\x83h(CodePage)\x82\xA9\x82\xE7UTF-16LE\x82֕ϊ\xB7\x82\xB7\x82\xE9
+	if (CodePage == 932) {
+		c = CP932ToUTF32(KCode);
+	} else {
+		unsigned char buf[3];
+		wchar_t wchar;
+		int ret;
+		int len = 0;
+		if (KCode < 0x100) {
+			buf[0] = KCode & 0xff;
+			len = 1;
+		} else {
+			buf[0] = KCode >> 8;
+			buf[1] = KCode & 0xff;
+			len = 2;
+		}
+		ret = MultiByteToWideChar(CodePage, MB_ERR_INVALID_CHARS, buf, len, &wchar, 1);
+		if (ret <= 0) {
+			c = 0;
+		} else {
+			c = (unsigned int)wchar;
+		}
+	}
+	if (c <= 0) {
+		// \x95ϊ\xB7\x8E\xB8\x94s
+		c = 0xfffd; // U+FFFD: Replacement Character
+	}
+
+	return c;
+}
+
+// \x93\xE0\x95\x94\x83R\x81[\x83h(CodePage)\x82\xF0UTF-8\x82֏o\x97͂\xB7\x82\xE9
 static int OutputTextUTF8(WORD K, char *TempStr, PComVar cv)
 {
+	int CodePage = *cv->CodePage;
 	unsigned int code;
 	int outlen;
-	int TempLen = 0;
 
-	code = SJIS2UTF8(K, &outlen, *cv->CodePage);
-	switch (outlen) {
-	  case 4:
-		TempStr[TempLen++] = (code >> 24) & 0xff;
-	  case 3:
-		TempStr[TempLen++] = (code >> 16) & 0xff;
-	  case 2:
-		TempStr[TempLen++] = (code >> 8) & 0xff;
-	  case 1:
-		TempStr[TempLen++] = code & 0xff;
-	}
-
-	return TempLen;
+	code = SJIS2UTF32(K, CodePage);
+	outlen = UTF32ToUTF8(code, TempStr, 4);
+	return outlen;
 }
 
 //


Ttssh2-commit メーリングリストの案内
Back to archive index