[Ttssh2-commit] [7469] codeconv入れ替え

Back to archive index
scmno****@osdn***** scmno****@osdn*****
2019年 3月 10日 (日) 23:32:33 JST


Revision: 7469
          http://sourceforge.jp/projects/ttssh2/scm/svn/commits/7469
Author:   zmatsuo
Date:     2019-03-10 23:32:33 +0900 (Sun, 10 Mar 2019)
Log Message:
-----------
codeconv入れ替え

# Conflicts:
#	teraterm/common/codeconv.cpp
#	teraterm/common/codeconv.h

Modified Paths:
--------------
    branches/cmake/teraterm/common/codeconv.cpp
    branches/cmake/teraterm/common/codeconv.h

-------------- next part --------------
Modified: branches/cmake/teraterm/common/codeconv.cpp
===================================================================
--- branches/cmake/teraterm/common/codeconv.cpp	2019-03-10 14:32:02 UTC (rev 7468)
+++ branches/cmake/teraterm/common/codeconv.cpp	2019-03-10 14:32:33 UTC (rev 7469)
@@ -26,14 +26,21 @@
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+/* unicode\x8A֘A\x82̕\xB6\x8E\x9A\x83R\x81[\x83h\x95ϊ\xB7 */
+
 #include <windows.h>
 #include <string.h>
+#include <assert.h>
 #include <crtdbg.h>
 #if (defined(_MSC_VER) && (_MSC_VER >= 1600)) || !defined(_MSC_VER)
 #include <stdint.h>
 #endif
+#include "codemap.h"
 #include "codeconv.h"
 
+// cp932\x95ϊ\xB7\x8E\x9E\x81AWindows API \x82\xE6\x82\xE8 Tera Term \x82̕ϊ\xB7\x83e\x81[\x83u\x83\x8B\x82\xF0\x97D\x90悷\x82\xE9
+//#define PRIORITY_CP932_TABLE
+
 #if defined(_MSC_VER) && (_MSC_VER < 1600)
 typedef unsigned char	uint8_t;
 typedef unsigned short  uint16_t;
@@ -47,64 +54,215 @@
 #define _wcsdup(s)    _wcsdup_dbg((s), _NORMAL_BLOCK, __FILE__, __LINE__)
 #endif
 
+/*
+ *	\x8C\xA9\x82‚\xA9\x82\xE7\x82Ȃ\xA2\x8Fꍇ\x82\xCD 0 \x82\xF0\x95Ԃ\xB7
+ */
+static unsigned short _ConvertUnicode(unsigned short code, const codemap_t *table, int tmax)
+{
+	int low, mid, high;
+	unsigned short result;
+
+	low = 0;
+	high = tmax - 1;
+	result = 0; // convert error
+
+	// binary search
+	while (low < high) {
+		mid = (low + high) / 2;
+		if (table[mid].from_code < code) {
+			low = mid + 1;
+		} else {
+			high = mid;
+		}
+	}
+
+	if (table[low].from_code == code) {
+		result = table[low].to_code;
+	}
+
+	return (result);
+}
+
+static int IsHighSurrogate(wchar_t u16)
+{
+	return 0xd800 <= u16 && u16 < 0xdc00;
+}
+
+static int IsLowSurrogate(wchar_t u16)
+{
+	return 0xdc00 <= u16 && u16 < 0xe000;
+}
+
 /**
- * UTF-32 \x82\xA9\x82\xE7 UTF-8 \x82֕ϊ\xB7\x82\xB7\x82\xE9
- * @param[in]		u32		\x95ϊ\xB7\x82\xB7\x82\xE9UTF-32
- * @param[in,out]	u8_ptr	\x95ϊ\xB7\x8C\xE3UTF-8\x95\xB6\x8E\x9A\x97\xF1\x8Fo\x97͐\xE6(NULL\x82̂Ƃ\xAB\x8Fo\x97͂\xB5\x82Ȃ\xA2)
- * @param[in]		u8_len	UTF-8\x8Fo\x97͐敶\x8E\x9A\x90\x94(\x83o\x83b\x83t\x83@\x92\xB7,byte\x90\x94)
- * @retval			\x8Eg\x97p\x82\xB5\x82\xBDutf8\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94\x81j
- *					0=\x83G\x83\x89\x81[
+ * CP932\x95\xB6\x8E\x9A(Shift_JIS) 1\x95\xB6\x8E\x9A\x82\xA9\x82\xE7UTF-32\x82֕ϊ\xB7\x82\xB7\x82\xE9
+ * @param[in]		cp932		CP932\x95\xB6\x8E\x9A
+ * @retval			\x95ϊ\xB7\x82\xB5\x82\xBDUTF-32\x95\xB6\x8E\x9A
+ *					0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD)
  */
-size_t UTF32ToUTF8(uint32_t u32, char *u8_ptr_, size_t u8_len)
+unsigned int CP932ToUTF32(unsigned short cp932)
 {
-	size_t out_len = 0;
-	uint8_t *u8_ptr = (uint8_t *)u8_ptr_;
-	if (u8_ptr != NULL) {
-		u8_len = 4;
+#include "../ttpcmn/sjis2uni.map"		// mapSJISToUnicode[]
+	wchar_t wchar;
+	int ret;
+	unsigned int u32;
+	unsigned char buf[2];
+	int len = 0;
+
+#if defined(PRIORITY_CP932_TABLE)
+	u32 = _ConvertUnicode(cp932, mapSJISToUnicode, sizeof(mapSJISToUnicode)/sizeof(mapSJISToUnicode[0]));
+	if (u32 != 0) {
+		return u32;
 	}
+#endif
+	if (cp932 < 0x100) {
+		buf[0] = cp932 & 0xff;
+		len = 1;
+	} else {
+		buf[0] = cp932 >> 8;
+		buf[1] = cp932 & 0xff;
+		len = 2;
+	}
+	ret = MultiByteToWideChar(932, MB_ERR_INVALID_CHARS, (char *)buf, len, &wchar, 1);
+	if (ret <= 0) {
+		// MultiByteToWideChar()\x82\xAA\x95ϊ\xB7\x8E\xB8\x94s
+#if !defined(PRIORITY_CP932_TABLE)
+		u32 = _ConvertUnicode(cp932, mapSJISToUnicode, sizeof(mapSJISToUnicode)/sizeof(mapSJISToUnicode[0]));
+		// \x83e\x81[\x83u\x83\x8B\x82ɂ\xE0\x82Ȃ\xA9\x82\xC1\x82\xBD\x8Fꍇ c = 0(\x95ϊ\xB7\x8E\xB8\x94s\x8E\x9E)
+#else
+		u32 = 0;
+#endif
+	} else {
+		u32 = (unsigned int)wchar;
+	}
 
-	if (u32 <= 0x0000007f) {
-		// 0x00000000 <= u32 <= 0x0000007f
-		if (u8_len >= 1) {
-			if (u8_ptr != NULL) {
-				u8_ptr[0] = (uint8_t)u32;
-			}
-			out_len = 1;
+	return u32;
+}
+
+/**
+ * Unicode\x82\xA9\x82\xE7DEC\x93\xC1\x8Eꕶ\x8E\x9A\x82֕ϊ\xB7
+ * @param	u32			UTF-32\x95\xB6\x8E\x9A\x83R\x81[\x83h
+ * @return	\x89\xBA\x88\xCA8bit	DEC\x93\xC1\x8Eꕶ\x8E\x9A\x83R\x81[\x83h
+ *			\x8F\xE3\x88\xCA8bit	\x95\xB6\x8E\x9A\x83R\x81[\x83h\x8E\xED\x95\xCA (1,2,4)
+ *						file://../../doc/ja/html/setup/teraterm-term.html \x8EQ\x8F\xC6
+ *			0			\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD
+ */
+unsigned short UTF32ToDecSp(unsigned int u32)
+{
+#include "../teraterm/unisym2decsp.map"		// mapUnicodeSymbolToDecSp[]
+	unsigned short cset;
+	if (u32 > 0x10000) {
+		cset = 0;
+	} else {
+		const unsigned short u16 = (unsigned short)u32;
+		cset = _ConvertUnicode(u16, mapUnicodeSymbolToDecSp, _countof(mapUnicodeSymbolToDecSp));
+	}
+	return cset;
+}
+
+/**
+ *	code page \x82\xCC mulit byte \x95\xB6\x8E\x9A\x82\xF0 UTF-32\x82֕ϊ\xB7\x82\xB7\x82\xE9
+ *	@param mb_code		\x83}\x83\x8B\x83`\x83o\x83C\x83g\x82̕\xB6\x8E\x9A\x83R\x81[\x83h(0x0000-0xffff)
+ *	@param code_page	\x83}\x83\x8B\x83`\x83o\x83C\x83g\x82̃R\x81[\x83h\x83y\x81[\x83W
+ *	@retval				unicode(UTF-32\x95\xB6\x8E\x9A\x83R\x81[\x83h)
+ */
+unsigned int MBCP_UTF32(unsigned short mb_code, int code_page)
+{
+	unsigned int c;
+
+	if (code_page == CP_ACP) {
+		code_page = (int)GetACP();
+	}
+	if (code_page == 932) {
+		c = CP932ToUTF32(mb_code);
+	} else {
+		char buf[2];
+		wchar_t wchar;
+		int ret;
+		int len = 0;
+		if (mb_code < 0x100) {
+			buf[0] = mb_code & 0xff;
+			len = 1;
+		} else {
+			buf[0] = mb_code >> 8;
+			buf[1] = mb_code & 0xff;
+			len = 2;
 		}
-	} else if (u32 <= 0x000007ff) {
-		// 0x00000080 <= u32 <= 0x000007ff
-		if (u8_len >= 2) {
-			if (u8_ptr != NULL) {
-				u8_ptr[0] = ((u32 >> 6) & 0x1f) | 0xc0;
-				u8_ptr[1] = (u32 & 0x3f) | 0x80;
-			}
-			out_len = 2;
+		ret = MultiByteToWideChar(code_page, MB_ERR_INVALID_CHARS, buf, len, &wchar, 1);
+		if (ret <= 0) {
+			c = 0;
+		} else {
+			c = (unsigned int)wchar;
 		}
-	} else if (u32 <= 0x0000ffff) {
-		// 0x00000800 <= u32 <= 0x0000ffff
-		if (u8_len >= 3) {
-			if (u8_ptr != NULL) {
-				u8_ptr[0] = ((u32 >> 12) & 0xf) | 0xe0;
-				u8_ptr[1] = ((u32 >> 6) & 0x3f) | 0x80;
-				u8_ptr[2] = (u32 & 0x3f) | 0x80;
-			}
-			out_len = 3;
+	}
+	return c;
+}
+
+/**
+ * UTF-32\x95\xB6\x8E\x9A\x82\xF0CP932\x95\xB6\x8E\x9A(Shift_JIS) 1\x95\xB6\x8E\x9A\x82֕ϊ\xB7\x82\xB7\x82\xE9
+ * @retval		\x8Eg\x97p\x82\xB5\x82\xBDCP932\x95\xB6\x8E\x9A
+ *				0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD)
+ */
+unsigned short UTF32_CP932(unsigned int u32)
+{
+#include "../teraterm/uni2sjis.map"		// mapUnicodeToSJIS[]
+	char mbstr[2];
+	unsigned short mb;
+	DWORD mblen;
+	wchar_t u16_str[2];
+	size_t u16_len;
+
+	if (u32 < 0x80) {
+		return (unsigned short)u32;
+	}
+
+#if defined(PRIORITY_CP932_TABLE)
+	if (u32 < 0x10000) {
+		wchar_t u16 = (wchar_t)u32;
+		// Tera Term\x82̕ϊ\xB7\x83e\x81[\x83u\x83\x8B\x82\xC5 Unicode -> Shift_JIS\x82֕ϊ\xB7
+		mb = _ConvertUnicode(u16, mapUnicodeToSJIS, _countof(mapUnicodeToSJIS));
+		if (mb != 0) {
+			// \x95ϊ\xB7\x82ł\xAB\x82\xBD
+			return mb;
 		}
-	} else if (u32 <= 0x0010ffff) {
-		// 0x00010000 <= u32 <= 0x0010ffff
-		if (u8_len >= 4) {
-			if (u8_ptr != NULL) {
-				u8_ptr[0] = ((uint8_t)(u32 >> 18)) | 0xf0;
-				u8_ptr[1] = ((u32 >> 12) & 0x3f) | 0x80;
-				u8_ptr[2] = ((u32 >> 6) & 0x3f) | 0x80;
-				u8_ptr[3] = (u32 & 0x3f) | 0x80;
-			}
-			out_len = 4;
+	}
+#endif
+	u16_len = UTF32ToUTF16(u32, u16_str, 2);
+	if (u16_len == 0) {
+		return 0;
+	}
+	mblen = WideCharToMultiByte(932, 0, u16_str, (int)u16_len, mbstr, 2, NULL, NULL);
+	switch (mblen) {
+	case 0:
+	case 1:
+	default:
+		if (mblen == 0 || mbstr[0] == '?') {
+			goto next_convert;
+		} else {
+			mb = (unsigned char)mbstr[0];
+			return mb;
 		}
-	} else {
-		out_len = 0;
+	case 2:
+		if (mbstr[0] == '?' && mbstr[1] == '?') {
+			// 2byte\x8Fo\x97\xCD && "??" \x82̏ꍇ\x82͕ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD
+			goto next_convert;
+		}
+		mb = (((unsigned char)mbstr[0]) << 8) | (unsigned char)mbstr[1];
+		return mb;
 	}
-	return out_len;
+
+next_convert:
+#if !defined(PRIORITY_CP932_TABLE)
+	if (u32 < 0x10000) {
+		wchar_t u16 = (wchar_t)u32;
+		// Tera Term\x82̕ϊ\xB7\x83e\x81[\x83u\x83\x8B\x82\xC5 Unicode -> Shift_JIS\x82֕ϊ\xB7
+		mb = _ConvertUnicode(u16, mapUnicodeToSJIS, _countof(mapUnicodeToSJIS));
+		if (mb != 0) {
+			// \x95ϊ\xB7\x82ł\xAB\x82\xBD
+			return mb;
+		}
+	}
+#endif
+	return 0;
 }
 
 /**
@@ -113,7 +271,7 @@
  * @param[in]	u8_len	UTF-8\x95\xB6\x8E\x9A\x97񒷂\xB3
  * @param[out]	u32		\x95ϊ\xB7\x82\xB5\x82\xBDUTF-32\x95\xB6\x8E\x9A
  * @retval		\x8Eg\x97p\x82\xB5\x82\xBDUTF-8\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94\x81j
- *				0=\x83G\x83\x89\x81[
+ *				0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD)
  */
 size_t UTF8ToUTF32(const char *u8_ptr_, size_t u8_len, uint32_t *u32_)
 {
@@ -191,33 +349,312 @@
 	return u8_in;
 }
 
-// WideCharToMultiByte\x82\xCCUTF8\x93\xC1\x89\xBB\x94\xC5
-int WideCharToUTF8(const wchar_t *wstr_ptr, int wstr_len, char *u8_ptr, int u8_len)
+/**
+ *	wchar_t\x95\xB6\x8E\x9A\x97񂩂\xE7unicode(UTF-32)\x82\xF01\x95\xB6\x8E\x9A\x8E\xE6\x82\xE8\x8Fo\x82\xB7
+ *	@retval	0	\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82Ȃ\xA2(\x95\xB6\x8E\x9A\x83R\x81[\x83h\x82\xAA\x82\xA8\x82\xA9\x82\xB5\x82\xA2)
+ *	@retval	1	1\x83L\x83\x83\x83\x89\x83N\x83^\x82\xC51\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82\xE9
+ *	@retval	2	2\x83L\x83\x83\x83\x89\x83N\x83^\x82\xC51\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82\xE9
+ */
+size_t UTF16ToUTF32(const wchar_t *wstr_ptr, size_t wstr_len, unsigned int *u32)
 {
-	int u8_out_sum = 0;
+	assert(wstr_ptr != NULL);
+	if (wstr_len == 0) {
+		*u32 = 0;
+		return 0;
+	}
+	const wchar_t u16 = *wstr_ptr++;
+	// \x83T\x83\x8D\x83Q\x81[\x83g high?
+	if (IsHighSurrogate(u16)) {
+		if (wstr_len >= 2) {
+			const wchar_t u16_lo = *wstr_ptr++;
+			if (IsLowSurrogate(u16_lo)) {
+				// \x83T\x83\x8D\x83Q\x81[\x83g\x83y\x83A \x83f\x83R\x81[\x83h
+				*u32 = 0x10000 + (u16 - 0xd800) * 0x400 + (u16_lo - 0xdc00);
+				return 2;
+			} else {
+				*u32 = 0;
+				return 0;
+			}
+		} else {
+			*u32 = 0;
+			return 0;
+		}
+	} else if (IsLowSurrogate(u16)) {
+		*u32 = 0;
+		return 0;
+	} else {
+		*u32 = u16;
+		return 1;
+	}
+}
+
+/**
+ *	\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A(code_page) \x82\xA9\x82\xE7unicode(UTF-32)\x82\xF01\x95\xB6\x8E\x9A\x8E\xE6\x82\xE8\x8Fo\x82\xB7
+ *	@retval	0	\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82Ȃ\xA2(\x95\xB6\x8E\x9A\x83R\x81[\x83h\x82\xAA\x82\xA8\x82\xA9\x82\xB5\x82\xA2)
+ *	@retval	1	1\x83L\x83\x83\x83\x89\x83N\x83^\x82\xC51\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82\xE9
+ *	@retval	2	2\x83L\x83\x83\x83\x89\x83N\x83^\x82\xC51\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ĉ\xB5\x82\xA6\x82\xE9
+ */
+size_t MBCPToUTF32(const char *mb_ptr, size_t mb_len, int code_page, unsigned int *u32)
+{
+	size_t input_len;
+	wchar_t u16_str[2];
+	size_t u16_len;
+
+	assert(mb_ptr != NULL);
+	if (mb_len == 0) {
+		*u32 = 0;
+		return 0;
+	}
+	if (code_page == CP_ACP) {
+		code_page = (int)GetACP();
+	}
+
+	input_len = 1;
+	while(1) {
+		u16_len = ::MultiByteToWideChar(code_page, MB_ERR_INVALID_CHARS,
+										mb_ptr, (int)input_len,
+										u16_str, 2);
+		if (u16_len != 0) {
+			size_t r = UTF16ToUTF32(u16_str, u16_len, u32);
+			assert(r != 0);
+			if (r == 0) {
+				// \x82Ȃ\xA2\x82͂\xB8
+				return 0;
+			} else {
+				return input_len;
+			}
+		}
+
+		input_len++;
+		if (input_len > mb_len) {
+			*u32 = 0;
+			return 0;
+		}
+	}
+}
+
+/**
+ * UTF-32\x95\xB6\x8E\x9A \x82\xA9\x82\xE7 UTF-8 \x82֕ϊ\xB7\x82\xB7\x82\xE9
+ * @param[in]		u32		\x95ϊ\xB7\x82\xB7\x82\xE9UTF-32
+ * @param[in,out]	u8_ptr	\x95ϊ\xB7\x8C\xE3UTF-8\x95\xB6\x8E\x9A\x97\xF1\x8Fo\x97͐\xE6(NULL\x82̂Ƃ\xAB\x8Fo\x97͂\xB5\x82Ȃ\xA2)
+ * @param[in]		u8_len	UTF-8\x8Fo\x97͐敶\x8E\x9A\x90\x94(\x83o\x83b\x83t\x83@\x92\xB7,byte\x90\x94)
+ * @retval			\x8Fo\x97͂\xB5\x82\xBDutf8\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94\x81j
+ *					0=\x83G\x83\x89\x81[
+ */
+size_t UTF32ToUTF8(uint32_t u32, char *u8_ptr_, size_t u8_len)
+{
+	size_t out_len = 0;
+	uint8_t *u8_ptr = (uint8_t *)u8_ptr_;
 	if (u8_ptr == NULL) {
 		u8_len = 4;
+	}
+
+	if (u32 <= 0x0000007f) {
+		// 0x00000000 <= u32 <= 0x0000007f
+		if (u8_len >= 1) {
+			if (u8_ptr != NULL) {
+				u8_ptr[0] = (uint8_t)u32;
+			}
+			out_len = 1;
+		}
+	} else if (u32 <= 0x000007ff) {
+		// 0x00000080 <= u32 <= 0x000007ff
+		if (u8_len >= 2) {
+			if (u8_ptr != NULL) {
+				u8_ptr[0] = ((u32 >> 6) & 0x1f) | 0xc0;
+				u8_ptr[1] = (u32 & 0x3f) | 0x80;
+			}
+			out_len = 2;
+		}
+	} else if (u32 <= 0x0000ffff) {
+		// 0x00000800 <= u32 <= 0x0000ffff
+		if (u8_len >= 3) {
+			if (u8_ptr != NULL) {
+				u8_ptr[0] = ((u32 >> 12) & 0xf) | 0xe0;
+				u8_ptr[1] = ((u32 >> 6) & 0x3f) | 0x80;
+				u8_ptr[2] = (u32 & 0x3f) | 0x80;
+			}
+			out_len = 3;
+		}
+	} else if (u32 <= 0x0010ffff) {
+		// 0x00010000 <= u32 <= 0x0010ffff
+		if (u8_len >= 4) {
+			if (u8_ptr != NULL) {
+				u8_ptr[0] = ((uint8_t)(u32 >> 18)) | 0xf0;
+				u8_ptr[1] = ((u32 >> 12) & 0x3f) | 0x80;
+				u8_ptr[2] = ((u32 >> 6) & 0x3f) | 0x80;
+				u8_ptr[3] = (u32 & 0x3f) | 0x80;
+			}
+			out_len = 4;
+		}
 	} else {
-		if (u8_len == 0) {
+		out_len = 0;
+	}
+	return out_len;
+}
+
+/**
+ * UTF-32 \x82\xA9\x82\xE7 UTF-16 \x82֕ϊ\xB7\x82\xB7\x82\xE9
+ * @param[in]		u32			\x95ϊ\xB7\x82\xB7\x82\xE9UTF-32
+ * @param[in,out]	wstr_ptr	\x95ϊ\xB7\x8C\xE3UTF-16\x95\xB6\x8E\x9A\x97\xF1\x8Fo\x97͐\xE6(NULL\x82̂Ƃ\xAB\x8Fo\x97͂\xB5\x82Ȃ\xA2)
+ * @param[in]		wstr_len	UTF-16\x8Fo\x97͐敶\x8E\x9A\x90\x94(\x95\xB6\x8E\x9A\x90\x94,sizeof(wchar_t)*wstr_len bytes)
+ * @retval			\x8Fo\x97͂\xB5\x82\xBDUTF-16\x95\xB6\x8E\x9A\x90\x94(sizeof(wchar_t)\x94{\x82\xB7\x82\xE9\x82\xC6byte\x90\x94)
+ *					0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD)
+ */
+size_t UTF32ToUTF16(uint32_t u32, wchar_t *wstr_ptr, size_t wstr_len)
+{
+	size_t u16_out;
+	if (u32 < 0x10000) {
+		if (wstr_len >= 1) {
+			if (wstr_ptr != NULL) {
+				*wstr_ptr++ = (uint16_t)u32;
+			}
+			u16_out = 1;
+		} else {
+			u16_out = 0;
+		}
+	} else if (u32 <= 0x10ffff) {
+		if (wstr_len >= 2) {
+			if (wstr_ptr != NULL) {
+				// \x83T\x83\x8D\x83Q\x81[\x83g \x83G\x83\x93\x83R\x81[\x83h
+				*wstr_ptr++ = uint16_t((u32 - 0x10000) / 0x400) + 0xd800;
+				*wstr_ptr++ = uint16_t((u32 - 0x10000) % 0x400) + 0xdc00;
+			}
+			u16_out = 2;
+		} else {
+			u16_out = 0;
+		}
+	} else {
+		u16_out = 0;
+	}
+	return u16_out;
+}
+
+/**
+ * UTF-32 \x82\xA9\x82\xE7 CP932 \x82֕ϊ\xB7\x82\xB7\x82\xE9
+ * @param[in]		u32			\x95ϊ\xB7\x82\xB7\x82\xE9UTF-32
+ * @param[in,out]	mb_ptr		\x95ϊ\xB7\x8C\xE3CP932\x95\xB6\x8E\x9A\x97\xF1\x8Fo\x97͐\xE6(NULL\x82̂Ƃ\xAB\x8Fo\x97͂\xB5\x82Ȃ\xA2)
+ * @param[in]		mb_len		CP932\x8Fo\x97͐敶\x8E\x9A\x90\x94(\x95\xB6\x8E\x9A\x90\x94,sizeof(wchar_t)*wstr_len bytes)
+ * @retval			\x8Fo\x97͂\xB5\x82\xBDCP932\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94)
+ *					0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD)
+ */
+size_t UTF32ToCP932(uint32_t u32, char *mb_ptr, size_t mb_len)
+{
+	size_t cp932_out;
+	const uint16_t cp932 = UTF32_CP932(u32);
+	if (cp932 == 0 && u32 != 0) {
+		return 0;
+	}
+	if (mb_ptr == NULL) {
+		mb_len = 2;
+	}
+	if (cp932 < 0x100) {
+		if (mb_len >= 1) {
+			if (mb_ptr != NULL) {
+				*mb_ptr = cp932 & 0xff;
+			}
+			cp932_out = 1;
+		} else {
+			cp932_out = 0;
+		}
+	} else {
+		if (mb_len >= 2) {
+			if (mb_ptr != NULL) {
+				mb_ptr[0] = (cp932 >> 8) & 0xff;
+				mb_ptr[1] = cp932 & 0xff;
+			}
+			cp932_out = 2;
+		} else {
+			cp932_out = 0;
+		}
+	}
+	return cp932_out;
+}
+
+/**
+ * UTF-32 \x82\xA9\x82\xE7 MultiByte\x95\xB6\x8E\x9A(code_page) \x82֕ϊ\xB7\x82\xB7\x82\xE9
+ * @param[in]		u32			\x95ϊ\xB7\x8C\xB3UTF-32
+ * @param[in]		code_page	\x95ϊ\xB7\x90\xE6codepage
+ * @param[in,out]	mb_ptr		\x95ϊ\xB7\x90敶\x8E\x9A\x97\xF1\x8Fo\x97͐\xE6(NULL\x82̂Ƃ\xAB\x8Fo\x97͂\xB5\x82Ȃ\xA2)
+ * @param[in]		mb_len		CP932\x8Fo\x97͐敶\x8E\x9A\x90\x94(\x95\xB6\x8E\x9A\x90\x94,sizeof(wchar_t)*wstr_len bytes)
+ * @retval			\x8Fo\x97͂\xB5\x82\xBDCP932\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94)
+ *					0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD)
+ */
+size_t UTF32ToMBCP(unsigned int u32, int code_page, char *mb_ptr, size_t mb_len)
+{
+	if (code_page == CP_ACP) {
+		code_page = (int)GetACP();
+	}
+	if (code_page == 932) {
+		return UTF32ToCP932(u32, mb_ptr, mb_len);
+	} else {
+		wchar_t u16_str[2];
+		size_t u16_len;
+		u16_len = UTF32ToUTF16(u32, u16_str, 2);
+		if (u16_len == 0) {
 			return 0;
 		}
+		mb_len = WideCharToMultiByte(code_page, 0, u16_str, u16_len, mb_ptr, mb_len, NULL, NULL);
+		if (mb_ptr != NULL && u32 != '?' && mb_len == 1 && mb_ptr[0] == '?') {
+			// \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD\x82Ƃ\xAB\x81A\x96߂\xE8\x92l=1, \x95\xB6\x8E\x9A[0]='?' \x82\xF0\x95Ԃ\xB5\x82Ă\xAD\x82\xE9
+			mb_len = 0;
+		}
+		return mb_len;
 	}
-	if (wstr_len < 0) {
+}
+
+/**
+ *	wchar_t(UTF-16)\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82ɕϊ\xB7\x82\xB7\x82\xE9
+ *
+ *	@param[in]		*wstr_ptr	wchar_t\x95\xB6\x8E\x9A\x97\xF1
+ *	@param[in,out]	*wstr_len	wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7
+ *								NULL\x82܂\xBD\x82\xCD0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE\x81AL'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6)
+ *								NULL\x88ȊO\x82̂Ƃ\xAB\x93\xFC\x97͂\xB5\x82\xBD\x95\xB6\x8E\x9A\x90\x94\x82\xF0\x95Ԃ\xB7
+ *	@param[in]		*mb_ptr		\x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x8E\xFB\x94[\x82\xB7\x82\xE9\x83|\x83C\x83\x93\x83^
+ *								(NULL\x82̂Ƃ\xAB\x95ϊ\xB7\x82\xB9\x82\xB8\x82ɕ\xB6\x8E\x9A\x90\x94\x82\xF0\x83J\x83E\x83\x93\x83g\x82\xB7\x82\xE9)
+ *	@param[in,out]	*mb_len		\x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x8E\xFB\x94[\x82ł\xAB\x82\xE9\x83T\x83C\x83Y,byte\x90\x94,
+ *								mb_ptr\x82\xAANULL\x82̂Ƃ\xAB\x8Fo\x97͉”\\x83T\x83C\x83Y\x82͕s\x97v
+ *								\x95ϊ\xB7\x82\xB5\x82\xBD\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82̒\xB7\x82\xB3\x82\xF0\x95Ԃ\xB7
+ *								L'\0'\x82\xF0\x95ϊ\xB7\x82\xB5\x82\xBD\x82\xE7'\0'\x82\xE0\x8A܂\xDE
+ *								mb_ptr\x82\xAANULL\x82̂Ƃ\xAB\x82ł\xE0\x92\xB7\x82\xB3\x82͕Ԃ\xB7
+ *	@param[in]		UTF32ToMB	UTF32\x82\xF0\x83}\x83\x8B\x83`\x83o\x83C\x83g\x82ɕϊ\xB7\x82\xB7\x82\xE9\x8A֐\x94\x82ւ̃|\x83C\x83\x93\x83^
+ */
+static void WideCharToMB(const wchar_t *wstr_ptr, size_t *wstr_len_,
+						 char *mb_ptr, size_t *mb_len_,
+						 size_t (*UTF32ToMB)(uint32_t u32, char *mb_ptr, size_t mb_len))
+{
+	size_t wstr_len;
+	size_t mb_len;
+	size_t mb_out_sum = 0;
+	size_t wstr_in = 0;
+
+	assert(wstr_ptr != NULL);
+	if (mb_ptr == NULL) {
+		// \x95ϊ\xB7\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x8F\x91\x82\xAB\x8Fo\x82\xB3\x82Ȃ\xA2
+		mb_len = 4;		// 1\x95\xB6\x8E\x9A4byte\x82ɂ͎\xFB\x82܂\xE9\x82͂\xB8
+	} else {
+		mb_len = *mb_len_;
+	}
+	if (wstr_len_ == NULL || *wstr_len_ == 0) {
 		wstr_len = (int)wcslen(wstr_ptr) + 1;
+	} else {
+		wstr_len = *wstr_len_;
 	}
 
-	while(u8_len > 0 && wstr_len > 0) {
+	while(mb_len > 0 && wstr_len > 0) {
 		const wchar_t u16 = *wstr_ptr++;
 		uint32_t u32 = u16;
-		size_t u8_out;
+		size_t mb_out;
 		wstr_len--;
+		wstr_in++;
 		// \x83T\x83\x8D\x83Q\x81[\x83g high?
-		if (0xd800 <= u16 && u16 < 0xdc00) {
+		if (IsHighSurrogate(u16)) {
 			if (wstr_len >= 1) {
 				const wchar_t u16_lo = *wstr_ptr++;
 				wstr_len--;
+				wstr_in++;
 				// \x83T\x83\x8D\x83Q\x81[\x83g low?
-				if (0xdc00 <= u16_lo && u16_lo < 0xe000) {
+				if (IsLowSurrogate(u16_lo)) {
 					// \x83T\x83\x8D\x83Q\x81[\x83g\x83y\x83A \x83f\x83R\x81[\x83h
 					u32 = 0x10000 + (u16 - 0xd800) * 0x400 + (u16_lo - 0xdc00);
 				} else {
@@ -224,34 +661,76 @@
 					goto unknown_code;
 				}
 			} else {
-			unknown_code:
-				if (u8_ptr != NULL) {
-					*u8_ptr++ = '?';
-				}
-				u8_out = 1;
-				goto loop_next;
+				goto unknown_code;
 			}
 		}
-		u8_out = UTF32ToUTF8(u32, u8_ptr, u8_len);
-		if (u8_out == 0) {
-			goto unknown_code;
+		mb_out = UTF32ToMB(u32, mb_ptr, mb_len);
+		if (mb_out == 0) {
+		unknown_code:
+			if (mb_ptr != NULL) {
+				// \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD\x8Fꍇ
+				*mb_ptr++ = '?';
+			}
+			mb_out = 1;
 		}
-	loop_next:
-		u8_out_sum += u8_out;
-		if (u8_ptr != NULL) {
-			u8_ptr += u8_out;
-			u8_len -= u8_out;
+		mb_out_sum += mb_out;
+		if (mb_ptr != NULL) {
+			mb_ptr += mb_out;
+			mb_len -= mb_out;
 		}
 	}
-	return u8_out_sum;
+
+	if (wstr_len_ != NULL) {
+		*wstr_len_ = wstr_in;
+	}
+	*mb_len_ = mb_out_sum;
 }
 
+// WideCharToMultiByte\x82\xCCUTF8\x93\xC1\x89\xBB\x94\xC5
+void WideCharToUTF8(const wchar_t *wstr_ptr, size_t *wstr_len, char *u8_ptr, size_t *u8_len)
+{
+	WideCharToMB(wstr_ptr, wstr_len, u8_ptr, u8_len, UTF32ToUTF8);
+}
+
+void WideCharToCP932(const wchar_t *wstr_ptr, size_t *wstr_len, char *cp932_ptr, size_t *cp932_len)
+{
+	WideCharToMB(wstr_ptr, wstr_len, cp932_ptr, cp932_len, UTF32ToCP932);
+}
+
+void WideCharToMBCP(const wchar_t *wstr_ptr, size_t *wstr_len, char *mb_ptr, size_t *mb_len,
+					int code_page)
+{
+	size_t (*utf32_to_mb)(uint32_t u32, char *mb_ptr, size_t mb_len);
+	if (code_page == CP_ACP) {
+		code_page = (int)GetACP();
+	}
+	switch (code_page) {
+	case CP_UTF8:
+		utf32_to_mb = UTF32ToUTF8;
+		break;
+	case 932:
+		utf32_to_mb = UTF32ToCP932;
+		break;
+	default:
+		*mb_len = 0;
+		return;
+	}
+
+	WideCharToMB(wstr_ptr, wstr_len,
+				 mb_ptr, mb_len,
+				 utf32_to_mb);
+}
+
 // MultiByteToWideChar\x82\xCCUTF8\x93\xC1\x89\xBB\x94\xC5
-int UTF8ToWideChar(const char *u8_ptr, int u8_len, wchar_t *wstr_ptr, int wstr_len)
+int UTF8ToWideChar(const char *u8_ptr, int u8_len_, wchar_t *wstr_ptr, int wstr_len_)
 {
+	size_t u8_len;
+	size_t wstr_len = wstr_len_;
 	size_t u16_out_sum = 0;
-	if (u8_len < 0) {
+	if (u8_len_ < 0) {
 		u8_len = strlen(u8_ptr) + 1;
+	} else {
+		u8_len = u8_len_;
 	}
 	if (wstr_ptr == NULL) {
 		wstr_len = 1;
@@ -295,21 +774,26 @@
 		}
 		u16_out_sum += u16_out;
 	}
-	return u16_out_sum;
+	return (int)u16_out_sum;
 }
 
 /**
  *	wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82֕ϊ\xB7
  *	@param[in]	*wstr_ptr	wchar_t\x95\xB6\x8E\x9A\x97\xF1
- *	@param[in]	wstr_len	wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE)
+ *	@param[in]	wstr_len	wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE\x81A\x8E\xA9\x93\xAE\x82̂Ƃ\xAB\x82\xCDL'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6)
  *	@param[in]	code_page	\x95ϊ\xB7\x90\xE6\x83R\x81[\x83h\x83y\x81[\x83W
- *	@param[out]	*mb_len_	mb\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(NULL\x82̂Ƃ\xAB\x93\xE0\x95\x94\x83G\x83\x89\x81[)
+ *	@param[out]	*mb_len_	\x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x92\xB7,byte\x90\x94,L'\0'\x82\xF0\x95ϊ\xB7\x82\xB5\x82\xBD\x82\xE7'\0'\x82\xE0\x8A܂\xDE
+ *							(NULL\x82̂Ƃ\xAB\x95\xB6\x8E\x9A\x97񒷂\xF0\x95Ԃ\xB3\x82Ȃ\xA2)
  *	@retval		mb\x95\xB6\x8E\x9A\x97\xF1\x82ւ̃|\x83C\x83\x93\x83^(NULL\x82̎\x9E\x95ϊ\xB7\x83G\x83\x89\x81[)
+ *				\x8Eg\x97p\x8C\xE3 free() \x82\xB7\x82邱\x82\xC6
  */
 char *_WideCharToMultiByte(const wchar_t *wstr_ptr, size_t wstr_len, int code_page, size_t *mb_len_)
 {
 	const DWORD flags = 0;
 	char *mb_ptr;
+	if (code_page == CP_ACP) {
+		code_page = (int)GetACP();
+	}
 	if (mb_len_ != NULL) {
 		*mb_len_ = 0;
 	}
@@ -317,9 +801,11 @@
 		wstr_len = wcslen(wstr_ptr) + 1;
 	}
     int len;
-	if (code_page == CP_UTF8) {
-		len = WideCharToUTF8(wstr_ptr, (DWORD)wstr_len,
-							 NULL, 0);
+	if (code_page == CP_UTF8 || code_page == 932) {
+		size_t wl = wstr_len;
+		size_t ml;
+		WideCharToMBCP(wstr_ptr, &wl, NULL, &ml, code_page);
+		len = ml;
 	} else {
 		len = ::WideCharToMultiByte(code_page, flags,
 									wstr_ptr, (DWORD)wstr_len,
@@ -333,9 +819,11 @@
 	if (mb_ptr == NULL) {
 		return NULL;
 	}
-	if (code_page == CP_UTF8) {
-		len = WideCharToUTF8(wstr_ptr, (DWORD)wstr_len,
-							 mb_ptr, len);
+	if (code_page == CP_UTF8 || code_page == 932) {
+		size_t wl = wstr_len;
+		size_t ml = len;
+		WideCharToMBCP(wstr_ptr, &wl, mb_ptr, &ml, code_page);
+		len = ml;
 	} else {
 		len = ::WideCharToMultiByte(code_page, flags,
 									wstr_ptr, (DWORD)wstr_len,
@@ -347,7 +835,8 @@
 		return NULL;
 	}
 	if (mb_len_ != NULL) {
-		*mb_len_ = len - 1;
+		// \x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x90\x94(byte\x90\x94)\x82\xF0\x95Ԃ\xB7
+		*mb_len_ = len;
 	}
     return mb_ptr;
 }
@@ -355,14 +844,19 @@
 /**
  *	\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82\xF0wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82֕ϊ\xB7
  *	@param[in]	*str_ptr	mb(char)\x95\xB6\x8E\x9A\x97\xF1
- *	@param[in]	str_len		mb(char)\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE)
+ *	@param[in]	str_len		mb(char)\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE\x81A\x8E\xA9\x93\xAE\x82̂Ƃ\xAB\x82\xCD'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6)
  *	@param[in]	code_page	\x95ϊ\xB7\x8C\xB3\x83R\x81[\x83h\x83y\x81[\x83W
- *	@param[out]	*w_len_		wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7
- *	@retval		mb\x95\xB6\x8E\x9A\x97\xF1\x82ւ̃|\x83C\x83\x93\x83^(NULL\x82̎\x9E\x95ϊ\xB7\x83G\x83\x89\x81[)
+ *	@param[out]	*w_len_		wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7,wchar_t\x90\x94,'\0'\x82\xF0\x95ϊ\xB7\x82\xB5\x82\xBD\x82\xE7L'\0'\x82\xE0\x8A܂\xDE
+ *							(NULL\x82̂Ƃ\xAB\x95\xB6\x8E\x9A\x97񒷂\xF0\x95Ԃ\xB3\x82Ȃ\xA2)
+ *	@retval		wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82ւ̃|\x83C\x83\x93\x83^(NULL\x82̎\x9E\x95ϊ\xB7\x83G\x83\x89\x81[)
+ *				\x8Eg\x97p\x8C\xE3 free() \x82\xB7\x82邱\x82\xC6
  */
 wchar_t *_MultiByteToWideChar(const char *str_ptr, size_t str_len, int code_page, size_t *w_len_)
 {
 	DWORD flags = MB_ERR_INVALID_CHARS;
+	if (code_page == CP_ACP) {
+		code_page = (int)GetACP();
+	}
 	if (code_page == CP_UTF8) {
 		// CP_UTF8 When this is set, dwFlags must be zero.
 		flags = 0;
@@ -402,7 +896,8 @@
 		return NULL;
 	}
 	if (w_len_ != NULL) {
-		*w_len_ = len - 1;
+		// \x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x90\x94(wchar_t\x90\x94)\x82\xF0\x95Ԃ\xB7
+		*w_len_ = len;
 	}
 	return wstr_ptr;
 }

Modified: branches/cmake/teraterm/common/codeconv.h
===================================================================
--- branches/cmake/teraterm/common/codeconv.h	2019-03-10 14:32:02 UTC (rev 7468)
+++ branches/cmake/teraterm/common/codeconv.h	2019-03-10 14:32:33 UTC (rev 7469)
@@ -34,13 +34,26 @@
 extern "C" {
 #endif
 
+// simple code convert
+unsigned int CP932ToUTF32(unsigned short cp932);
+unsigned short UTF32ToDecSp(unsigned int u32);
+unsigned int MBCP_UTF32(unsigned short mb_code, int code_page);
+unsigned short UTF32_CP932(unsigned int u32);
 
-// 1char
-size_t UTF32ToUTF8(unsigned int u32, char *u8_ptr, size_t u8_len);
+// 1char ToUTF32
 size_t UTF8ToUTF32(const char *u8_ptr_, size_t u8_len, unsigned int *u32_);
+size_t UTF16ToUTF32(const wchar_t *wstr_ptr, size_t wstr_len, unsigned int *u32);
+size_t MBCPToUTF32(const char *mb_ptr, size_t mb_len, int code_page, unsigned int *u32);
 
+// 1char UTF32To
+size_t UTF32ToUTF16(unsigned int u32, wchar_t *wstr_ptr, size_t wstr_len);
+size_t UTF32ToUTF8(unsigned int u32, char *u8_ptr, size_t u8_len);
+size_t UTF32ToCP932(unsigned int u32, char *mb_ptr, size_t mb_len);
+size_t UTF32ToMBCP(unsigned int u32, int code_page, char *mb_ptr, size_t mb_len);
+
 // MultiByteToWideChar() wrappers
-int WideCharToUTF8(const wchar_t *wstr_ptr, int wstr_len, char *u8_ptr, int u8_len);
+void WideCharToUTF8(const wchar_t *wstr_ptr, size_t *wstr_len, char *u8_ptr, size_t *u8_len);
+void WideCharToCP932(const wchar_t *wstr_ptr, size_t *wstr_len, char *cp932_ptr, size_t *cp932_len);
 int UTF8ToWideChar(const char *u8_ptr, int u8_len, wchar_t *wstr_ptr, int wstr_len);
 
 // API wrappers


Ttssh2-commit メーリングリストの案内
Back to archive index