made handling of nul terminators in CUnicode more sane.

2002-07-24 17:22:01 +00:00 · 2002-07-24 17:22:01 +00:00 · 5fe7763d37
parent 6fc6805a06
commit 5fe7763d37
4 changed files with 45 additions and 57 deletions
--- a/base/CUnicode.cpp
+++ b/base/CUnicode.cpp
@ -82,9 +82,8 @@ CUnicode::UTF8ToUCS2(const CString& src, bool* errors)
 	// default to success
 	resetError(errors);
-	// get size of input string and reserve some space in output.
+	// get size of input string and reserve some space in output
-	// include UTF8's nul terminator.
+	UInt32 n = src.size();
 	UInt32 n = src.size() + 1;
 	CString dst;
 	dst.reserve(2 * n);
@ -112,9 +111,8 @@ CUnicode::UTF8ToUCS4(const CString& src, bool* errors)
 	// default to success
 	resetError(errors);
-	// get size of input string and reserve some space in output.
+	// get size of input string and reserve some space in output
-	// include UTF8's nul terminator.
+	UInt32 n = src.size();
 	UInt32 n = src.size() + 1;
 	CString dst;
 	dst.reserve(4 * n);
@ -137,9 +135,8 @@ CUnicode::UTF8ToUTF16(const CString& src, bool* errors)
 	// default to success
 	resetError(errors);
-	// get size of input string and reserve some space in output.
+	// get size of input string and reserve some space in output
-	// include UTF8's nul terminator.
+	UInt32 n = src.size();
 	UInt32 n = src.size() + 1;
 	CString dst;
 	dst.reserve(2 * n);
@ -176,9 +173,8 @@ CUnicode::UTF8ToUTF32(const CString& src, bool* errors)
 	// default to success
 	resetError(errors);
-	// get size of input string and reserve some space in output.
+	// get size of input string and reserve some space in output
-	// include UTF8's nul terminator.
+	UInt32 n = src.size();
 	UInt32 n = src.size() + 1;
 	CString dst;
 	dst.reserve(4 * n);
@ -211,12 +207,13 @@ CUnicode::UTF8ToText(const CString& src, bool* errors)
 	// get length of multibyte string
 	char mbc[MB_LEN_MAX];
 	size_t mblen;
 	mbstate_t state;
 	memset(&state, 0, sizeof(state));
 	size_t len = 0;
 	UInt32 n   = size;
 	for (const wchar_t* scan = tmp; n > 0; ++scan, --n) {
-		size_t mblen = wcrtomb(mbc, *scan, &state);
+		mblen = wcrtomb(mbc, *scan, &state);
 		if (mblen == -1) {
 			// unconvertable character
 			setError(errors);
@ -227,21 +224,21 @@ CUnicode::UTF8ToText(const CString& src, bool* errors)
 		}
 	}
-	// check if state is in initial state.  if not then count the
+	// handle nul terminator
-	// bytes for returning it to the initial state.
+	mblen = wcrtomb(mbc, L'\0', &state);
-	if (mbsinit(&state) == 0) {
+	if (mblen != -1) {
-		len += wcrtomb(mbc, L'\0', &state) - 1;
+		len += mblen - 1;
 	}
 	assert(mbsinit(&state) != 0);
 	// allocate multibyte string
-	char* mbs = new char[len + 1];
+	char* mbs = new char[len];
 	// convert to multibyte
 	char* dst = mbs;
 	n         = size;
 	for (const wchar_t* scan = tmp; n > 0; ++scan, --n) {
-		size_t mblen = wcrtomb(dst, *scan, &state);
+		mblen = wcrtomb(dst, *scan, &state);
 		if (mblen == -1) {
 			// unconvertable character
 			*dst++ = '?';
@ -250,7 +247,11 @@ CUnicode::UTF8ToText(const CString& src, bool* errors)
 			dst   += mblen;
 		}
 	}
-	*dst++ = '\0';
+	mblen = wcrtomb(dst, L'\0', &state);
 	if (mblen != -1) {
 		// don't include nul terminator
 		dst += mblen - 1;
 	}
 	CString text(mbs, dst - mbs);
 	// clean up
@ -311,7 +312,7 @@ CUnicode::textToUTF8(const CString& src, bool* errors)
 	resetError(errors);
 	// get length of multibyte string
-	UInt32 n   = src.size();
+	UInt32 n   = src.size() + 1;
 	size_t len = 0;
 	mbstate_t state;
 	memset(&state, 0, sizeof(state));
@ -399,9 +400,7 @@ CUnicode::textToUTF8(const CString& src, bool* errors)
 wchar_t*
 CUnicode::UTF8ToWideChar(const CString& src, UInt32& size, bool* errors)
 {
-	// convert to platform's wide character encoding.
+	// convert to platform's wide character encoding
 	// note -- this must include a wide nul character (independent of
 	// the CString's nul character).
 #if WINDOWS_LIKE
 	CString tmp = UTF8ToUTF16(src, errors);
 	size = tmp.size() >> 1;
@ -442,11 +441,6 @@ CUnicode::doUCS2ToUTF8(const UInt8* data, UInt32 n, bool* errors)
 		toUTF8(dst, c, errors);
 	}
 	// remove extra trailing nul
 	if (dst.size() > 0 && dst[dst.size() - 1] == '\0') {
 		dst.resize(dst.size() - 1);
 	}
 	return dst;
 }
@ -463,11 +457,6 @@ CUnicode::doUCS4ToUTF8(const UInt8* data, UInt32 n, bool* errors)
 		toUTF8(dst, c, errors);
 	}
 	// remove extra trailing nul
 	if (dst.size() > 0 && dst[dst.size() - 1] == '\0') {
 		dst.resize(dst.size() - 1);
 	}
 	return dst;
 }
@ -510,11 +499,6 @@ CUnicode::doUTF16ToUTF8(const UInt8* data, UInt32 n, bool* errors)
 		}
 	}
 	// remove extra trailing nul
 	if (dst.size() > 0 && dst[dst.size() - 1] == '\0') {
 		dst.resize(dst.size() - 1);
 	}
 	return dst;
 }
@ -535,11 +519,6 @@ CUnicode::doUTF32ToUTF8(const UInt8* data, UInt32 n, bool* errors)
 		toUTF8(dst, c, errors);
 	}
 	// remove extra trailing nul
 	if (dst.size() > 0 && dst[dst.size() - 1] == '\0') {
 		dst.resize(dst.size() - 1);
 	}
 	return dst;
 }
--- a/base/CUnicode.h
+++ b/base/CUnicode.h
@ -15,9 +15,7 @@ public:
 	// not NULL then it gets true if any characters could not be
 	// encoded in the target encoding and false otherwise.  note
 	// that decoding errors do not set errors to error.  UTF8ToText()
-	// converts to the current locale's (multibyte) encoding.  all of
+	// converts to the current locale's (multibyte) encoding.
 	// these methods include the nul terminator in the returned
 	// string (independent of the CString's own nul terminator).
 	static CString		UTF8ToUCS2(const CString&, bool* errors = NULL);
 	static CString		UTF8ToUCS4(const CString&, bool* errors = NULL);
 	static CString		UTF8ToUTF16(const CString&, bool* errors = NULL);
@ -27,9 +25,7 @@ public:
 	// convert from some encoding to UTF-8.  if errors is not NULL
 	// then it gets true if any characters could not be decoded and
 	// false otherwise.  textToUTF8() converts from the current
-	// locale's (multibyte) encoding.  all of these methods strip
+	// locale's (multibyte) encoding.
 	// a terminating nul so the returned UTF-8 string uses the
 	// CString's own nul terminator for termination.
 	static CString		UCS2ToUTF8(const CString&, bool* errors = NULL);
 	static CString		UCS4ToUTF8(const CString&, bool* errors = NULL);
 	static CString		UTF16ToUTF8(const CString&, bool* errors = NULL);
@ -37,9 +33,10 @@ public:
 	static CString		textToUTF8(const CString&, bool* errors = NULL);
 private:
-	// convert UTF8 to nul terminated wchar_t string (using whatever
+	// convert UTF8 to wchar_t string (using whatever encoding is native
-	// encoding is native to the platform).  caller must delete[]
+	// to the platform).  caller must delete[] the returned string.  the
-	// the returned string.
+	// string is *not* nul terminated;  the length (in characters) is
 	// returned in size.
 	static wchar_t*		UTF8ToWideChar(const CString&,
 							UInt32& size, bool* errors);
--- a/platform/CMSWindowsClipboardTextConverter.cpp
+++ b/platform/CMSWindowsClipboardTextConverter.cpp
@ -24,11 +24,17 @@ CMSWindowsClipboardTextConverter::getWin32Format() const
 CString
 CMSWindowsClipboardTextConverter::doFromIClipboard(const CString& data) const
 {
-	return CUnicode::UTF8ToText(data);
+	// convert and add nul terminator
 	return CUnicode::UTF8ToText(data) += '\0';
 }
 CString
 CMSWindowsClipboardTextConverter::doToIClipboard(const CString& data) const
 {
-	return CUnicode::textToUTF8(data);
+	// convert and strip nul terminator
 	CString dst = CUnicode::textToUTF8(data);
 	if (dst.size() > 0 && dst[size() - 1] == '\0') {
 		dst.erase(dst.size() - 1);
 	}
 	return dst;
 }
--- a/platform/CMSWindowsClipboardUTF16Converter.cpp
+++ b/platform/CMSWindowsClipboardUTF16Converter.cpp
@ -24,11 +24,17 @@ CMSWindowsClipboardUTF16Converter::getWin32Format() const
 CString
 CMSWindowsClipboardUTF16Converter::doFromIClipboard(const CString& data) const
 {
-	return CUnicode::UTF8ToUTF16(data);
+	// convert and add nul terminator
 	return CUnicode::UTF8ToUTF16(data).append(sizeof(wchar_t), 0);
 }
 CString
 CMSWindowsClipboardUTF16Converter::doToIClipboard(const CString& data) const
 {
-	return CUnicode::UTF16ToUTF8(data);
+	// convert and strip nul terminator
 	CString dst = CUnicode::UTF16ToUTF8(data);
 	if (dst.size() > 0 && dst[size() - 1] == '\0') {
 		dst.erase(dst.size() - 1);
 	}
 	return dst;
 }